aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/btrfs_inode.h50
-rw-r--r--fs/btrfs/check-integrity.c585
-rw-r--r--fs/btrfs/compression.c12
-rw-r--r--fs/btrfs/ctree.c12
-rw-r--r--fs/btrfs/ctree.h45
-rw-r--r--fs/btrfs/delayed-inode.c8
-rw-r--r--fs/btrfs/disk-io.c50
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c13
-rw-r--r--fs/btrfs/extent_io.c104
-rw-r--r--fs/btrfs/extent_io.h5
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c76
-rw-r--r--fs/btrfs/free-space-cache.c47
-rw-r--r--fs/btrfs/inode.c290
-rw-r--r--fs/btrfs/ioctl.c48
-rw-r--r--fs/btrfs/ioctl.h33
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/ordered-data.c165
-rw-r--r--fs/btrfs/ordered-data.h13
-rw-r--r--fs/btrfs/print-tree.c3
-rw-r--r--fs/btrfs/reada.c5
-rw-r--r--fs/btrfs/scrub.c92
-rw-r--r--fs/btrfs/super.c125
-rw-r--r--fs/btrfs/transaction.c4
-rw-r--r--fs/btrfs/tree-log.c35
-rw-r--r--fs/btrfs/ulist.c6
-rw-r--r--fs/btrfs/ulist.h6
-rw-r--r--fs/btrfs/volumes.c306
-rw-r--r--fs/btrfs/volumes.h52
-rw-r--r--fs/btrfs/xattr.c1
-rw-r--r--fs/btrfs/zlib.c4
33 files changed, 1535 insertions, 673 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 89b156d85d6..761e2cd8fed 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
227 if (ret > 0) { 227 if (ret > 0) {
228 /* we need an acl */ 228 /* we need an acl */
229 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); 229 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
230 } else {
231 cache_no_acl(inode);
230 } 232 }
233 } else {
234 cache_no_acl(inode);
231 } 235 }
232failed: 236failed:
233 posix_acl_release(acl); 237 posix_acl_release(acl);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 9b9b15fd520..e616f8872e6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -24,6 +24,20 @@
24#include "ordered-data.h" 24#include "ordered-data.h"
25#include "delayed-inode.h" 25#include "delayed-inode.h"
26 26
27/*
28 * ordered_data_close is set by truncate when a file that used
29 * to have good data has been truncated to zero. When it is set
30 * the btrfs file release call will add this inode to the
31 * ordered operations list so that we make sure to flush out any
32 * new data the application may have written before commit.
33 */
34#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
35#define BTRFS_INODE_ORPHAN_META_RESERVED 1
36#define BTRFS_INODE_DUMMY 2
37#define BTRFS_INODE_IN_DEFRAG 3
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40
27/* in memory btrfs inode */ 41/* in memory btrfs inode */
28struct btrfs_inode { 42struct btrfs_inode {
29 /* which subvolume this inode belongs to */ 43 /* which subvolume this inode belongs to */
@@ -57,9 +71,6 @@ struct btrfs_inode {
57 /* used to order data wrt metadata */ 71 /* used to order data wrt metadata */
58 struct btrfs_ordered_inode_tree ordered_tree; 72 struct btrfs_ordered_inode_tree ordered_tree;
59 73
60 /* for keeping track of orphaned inodes */
61 struct list_head i_orphan;
62
63 /* list of all the delalloc inodes in the FS. There are times we need 74 /* list of all the delalloc inodes in the FS. There are times we need
64 * to write all the delalloc pages to disk, and this list is used 75 * to write all the delalloc pages to disk, and this list is used
65 * to walk them all. 76 * to walk them all.
@@ -78,14 +89,13 @@ struct btrfs_inode {
78 /* the space_info for where this inode's data allocations are done */ 89 /* the space_info for where this inode's data allocations are done */
79 struct btrfs_space_info *space_info; 90 struct btrfs_space_info *space_info;
80 91
92 unsigned long runtime_flags;
93
81 /* full 64 bit generation number, struct vfs_inode doesn't have a big 94 /* full 64 bit generation number, struct vfs_inode doesn't have a big
82 * enough field for this. 95 * enough field for this.
83 */ 96 */
84 u64 generation; 97 u64 generation;
85 98
86 /* sequence number for NFS changes */
87 u64 sequence;
88
89 /* 99 /*
90 * transid of the trans_handle that last modified this inode 100 * transid of the trans_handle that last modified this inode
91 */ 101 */
@@ -145,22 +155,9 @@ struct btrfs_inode {
145 unsigned reserved_extents; 155 unsigned reserved_extents;
146 156
147 /* 157 /*
148 * ordered_data_close is set by truncate when a file that used
149 * to have good data has been truncated to zero. When it is set
150 * the btrfs file release call will add this inode to the
151 * ordered operations list so that we make sure to flush out any
152 * new data the application may have written before commit.
153 */
154 unsigned ordered_data_close:1;
155 unsigned orphan_meta_reserved:1;
156 unsigned dummy_inode:1;
157 unsigned in_defrag:1;
158 unsigned delalloc_meta_reserved:1;
159
160 /*
161 * always compress this one file 158 * always compress this one file
162 */ 159 */
163 unsigned force_compress:4; 160 unsigned force_compress;
164 161
165 struct btrfs_delayed_node *delayed_node; 162 struct btrfs_delayed_node *delayed_node;
166 163
@@ -202,4 +199,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
202 return false; 199 return false;
203} 200}
204 201
202static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
203{
204 struct btrfs_root *root = BTRFS_I(inode)->root;
205 int ret = 0;
206
207 mutex_lock(&root->log_mutex);
208 if (BTRFS_I(inode)->logged_trans == generation &&
209 BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
210 ret = 1;
211 mutex_unlock(&root->log_mutex);
212 return ret;
213}
214
205#endif 215#endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index d986824bb2b..9cebb1fd6a3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -89,7 +89,6 @@
89#include "disk-io.h" 89#include "disk-io.h"
90#include "transaction.h" 90#include "transaction.h"
91#include "extent_io.h" 91#include "extent_io.h"
92#include "disk-io.h"
93#include "volumes.h" 92#include "volumes.h"
94#include "print-tree.h" 93#include "print-tree.h"
95#include "locking.h" 94#include "locking.h"
@@ -104,8 +103,6 @@
104#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 103#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
105#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 104#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
106 * excluding " [...]" */ 105 * excluding " [...]" */
107#define BTRFSIC_BLOCK_SIZE PAGE_SIZE
108
109#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 106#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
110 107
111/* 108/*
@@ -211,8 +208,9 @@ struct btrfsic_block_data_ctx {
211 u64 dev_bytenr; /* physical bytenr on device */ 208 u64 dev_bytenr; /* physical bytenr on device */
212 u32 len; 209 u32 len;
213 struct btrfsic_dev_state *dev; 210 struct btrfsic_dev_state *dev;
214 char *data; 211 char **datav;
215 struct buffer_head *bh; /* do not use if set to NULL */ 212 struct page **pagev;
213 void *mem_to_free;
216}; 214};
217 215
218/* This structure is used to implement recursion without occupying 216/* This structure is used to implement recursion without occupying
@@ -244,6 +242,8 @@ struct btrfsic_state {
244 struct btrfs_root *root; 242 struct btrfs_root *root;
245 u64 max_superblock_generation; 243 u64 max_superblock_generation;
246 struct btrfsic_block *latest_superblock; 244 struct btrfsic_block *latest_superblock;
245 u32 metablock_size;
246 u32 datablock_size;
247}; 247};
248 248
249static void btrfsic_block_init(struct btrfsic_block *b); 249static void btrfsic_block_init(struct btrfsic_block *b);
@@ -291,8 +291,10 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
291static int btrfsic_process_metablock(struct btrfsic_state *state, 291static int btrfsic_process_metablock(struct btrfsic_state *state,
292 struct btrfsic_block *block, 292 struct btrfsic_block *block,
293 struct btrfsic_block_data_ctx *block_ctx, 293 struct btrfsic_block_data_ctx *block_ctx,
294 struct btrfs_header *hdr,
295 int limit_nesting, int force_iodone_flag); 294 int limit_nesting, int force_iodone_flag);
295static void btrfsic_read_from_block_data(
296 struct btrfsic_block_data_ctx *block_ctx,
297 void *dst, u32 offset, size_t len);
296static int btrfsic_create_link_to_next_block( 298static int btrfsic_create_link_to_next_block(
297 struct btrfsic_state *state, 299 struct btrfsic_state *state,
298 struct btrfsic_block *block, 300 struct btrfsic_block *block,
@@ -319,12 +321,13 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
319static int btrfsic_read_block(struct btrfsic_state *state, 321static int btrfsic_read_block(struct btrfsic_state *state,
320 struct btrfsic_block_data_ctx *block_ctx); 322 struct btrfsic_block_data_ctx *block_ctx);
321static void btrfsic_dump_database(struct btrfsic_state *state); 323static void btrfsic_dump_database(struct btrfsic_state *state);
324static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
322static int btrfsic_test_for_metadata(struct btrfsic_state *state, 325static int btrfsic_test_for_metadata(struct btrfsic_state *state,
323 const u8 *data, unsigned int size); 326 char **datav, unsigned int num_pages);
324static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 327static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
325 u64 dev_bytenr, u8 *mapped_data, 328 u64 dev_bytenr, char **mapped_datav,
326 unsigned int len, struct bio *bio, 329 unsigned int num_pages,
327 int *bio_is_patched, 330 struct bio *bio, int *bio_is_patched,
328 struct buffer_head *bh, 331 struct buffer_head *bh,
329 int submit_bio_bh_rw); 332 int submit_bio_bh_rw);
330static int btrfsic_process_written_superblock( 333static int btrfsic_process_written_superblock(
@@ -376,7 +379,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
376static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 379static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
377 u64 bytenr, 380 u64 bytenr,
378 struct btrfsic_dev_state *dev_state, 381 struct btrfsic_dev_state *dev_state,
379 u64 dev_bytenr, char *data); 382 u64 dev_bytenr);
380 383
381static struct mutex btrfsic_mutex; 384static struct mutex btrfsic_mutex;
382static int btrfsic_is_initialized; 385static int btrfsic_is_initialized;
@@ -652,7 +655,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
652 int pass; 655 int pass;
653 656
654 BUG_ON(NULL == state); 657 BUG_ON(NULL == state);
655 selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS); 658 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
656 if (NULL == selected_super) { 659 if (NULL == selected_super) {
657 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 660 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
658 return -1; 661 return -1;
@@ -719,7 +722,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
719 722
720 num_copies = 723 num_copies =
721 btrfs_num_copies(&state->root->fs_info->mapping_tree, 724 btrfs_num_copies(&state->root->fs_info->mapping_tree,
722 next_bytenr, PAGE_SIZE); 725 next_bytenr, state->metablock_size);
723 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 726 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
724 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 727 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
725 (unsigned long long)next_bytenr, num_copies); 728 (unsigned long long)next_bytenr, num_copies);
@@ -728,9 +731,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
728 struct btrfsic_block *next_block; 731 struct btrfsic_block *next_block;
729 struct btrfsic_block_data_ctx tmp_next_block_ctx; 732 struct btrfsic_block_data_ctx tmp_next_block_ctx;
730 struct btrfsic_block_link *l; 733 struct btrfsic_block_link *l;
731 struct btrfs_header *hdr;
732 734
733 ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, 735 ret = btrfsic_map_block(state, next_bytenr,
736 state->metablock_size,
734 &tmp_next_block_ctx, 737 &tmp_next_block_ctx,
735 mirror_num); 738 mirror_num);
736 if (ret) { 739 if (ret) {
@@ -759,7 +762,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
759 BUG_ON(NULL == l); 762 BUG_ON(NULL == l);
760 763
761 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 764 ret = btrfsic_read_block(state, &tmp_next_block_ctx);
762 if (ret < (int)BTRFSIC_BLOCK_SIZE) { 765 if (ret < (int)PAGE_CACHE_SIZE) {
763 printk(KERN_INFO 766 printk(KERN_INFO
764 "btrfsic: read @logical %llu failed!\n", 767 "btrfsic: read @logical %llu failed!\n",
765 (unsigned long long) 768 (unsigned long long)
@@ -769,11 +772,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
769 return -1; 772 return -1;
770 } 773 }
771 774
772 hdr = (struct btrfs_header *)tmp_next_block_ctx.data;
773 ret = btrfsic_process_metablock(state, 775 ret = btrfsic_process_metablock(state,
774 next_block, 776 next_block,
775 &tmp_next_block_ctx, 777 &tmp_next_block_ctx,
776 hdr,
777 BTRFS_MAX_LEVEL + 3, 1); 778 BTRFS_MAX_LEVEL + 3, 1);
778 btrfsic_release_block_ctx(&tmp_next_block_ctx); 779 btrfsic_release_block_ctx(&tmp_next_block_ctx);
779 } 780 }
@@ -800,7 +801,10 @@ static int btrfsic_process_superblock_dev_mirror(
800 801
801 /* super block bytenr is always the unmapped device bytenr */ 802 /* super block bytenr is always the unmapped device bytenr */
802 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 803 dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
803 bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096); 804 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
805 return -1;
806 bh = __bread(superblock_bdev, dev_bytenr / 4096,
807 BTRFS_SUPER_INFO_SIZE);
804 if (NULL == bh) 808 if (NULL == bh)
805 return -1; 809 return -1;
806 super_tmp = (struct btrfs_super_block *) 810 super_tmp = (struct btrfs_super_block *)
@@ -809,7 +813,10 @@ static int btrfsic_process_superblock_dev_mirror(
809 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 813 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
810 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, 814 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
811 sizeof(super_tmp->magic)) || 815 sizeof(super_tmp->magic)) ||
812 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) { 816 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
817 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
818 btrfs_super_leafsize(super_tmp) != state->metablock_size ||
819 btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
813 brelse(bh); 820 brelse(bh);
814 return 0; 821 return 0;
815 } 822 }
@@ -894,7 +901,7 @@ static int btrfsic_process_superblock_dev_mirror(
894 901
895 num_copies = 902 num_copies =
896 btrfs_num_copies(&state->root->fs_info->mapping_tree, 903 btrfs_num_copies(&state->root->fs_info->mapping_tree,
897 next_bytenr, PAGE_SIZE); 904 next_bytenr, state->metablock_size);
898 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 905 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
899 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 906 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
900 (unsigned long long)next_bytenr, num_copies); 907 (unsigned long long)next_bytenr, num_copies);
@@ -903,7 +910,8 @@ static int btrfsic_process_superblock_dev_mirror(
903 struct btrfsic_block_data_ctx tmp_next_block_ctx; 910 struct btrfsic_block_data_ctx tmp_next_block_ctx;
904 struct btrfsic_block_link *l; 911 struct btrfsic_block_link *l;
905 912
906 if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE, 913 if (btrfsic_map_block(state, next_bytenr,
914 state->metablock_size,
907 &tmp_next_block_ctx, 915 &tmp_next_block_ctx,
908 mirror_num)) { 916 mirror_num)) {
909 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 917 printk(KERN_INFO "btrfsic: btrfsic_map_block("
@@ -967,13 +975,15 @@ static int btrfsic_process_metablock(
967 struct btrfsic_state *state, 975 struct btrfsic_state *state,
968 struct btrfsic_block *const first_block, 976 struct btrfsic_block *const first_block,
969 struct btrfsic_block_data_ctx *const first_block_ctx, 977 struct btrfsic_block_data_ctx *const first_block_ctx,
970 struct btrfs_header *const first_hdr,
971 int first_limit_nesting, int force_iodone_flag) 978 int first_limit_nesting, int force_iodone_flag)
972{ 979{
973 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 980 struct btrfsic_stack_frame initial_stack_frame = { 0 };
974 struct btrfsic_stack_frame *sf; 981 struct btrfsic_stack_frame *sf;
975 struct btrfsic_stack_frame *next_stack; 982 struct btrfsic_stack_frame *next_stack;
983 struct btrfs_header *const first_hdr =
984 (struct btrfs_header *)first_block_ctx->datav[0];
976 985
986 BUG_ON(!first_hdr);
977 sf = &initial_stack_frame; 987 sf = &initial_stack_frame;
978 sf->error = 0; 988 sf->error = 0;
979 sf->i = -1; 989 sf->i = -1;
@@ -1013,21 +1023,47 @@ continue_with_current_leaf_stack_frame:
1013 } 1023 }
1014 1024
1015 if (sf->i < sf->nr) { 1025 if (sf->i < sf->nr) {
1016 struct btrfs_item *disk_item = leafhdr->items + sf->i; 1026 struct btrfs_item disk_item;
1017 struct btrfs_disk_key *disk_key = &disk_item->key; 1027 u32 disk_item_offset =
1028 (uintptr_t)(leafhdr->items + sf->i) -
1029 (uintptr_t)leafhdr;
1030 struct btrfs_disk_key *disk_key;
1018 u8 type; 1031 u8 type;
1019 const u32 item_offset = le32_to_cpu(disk_item->offset); 1032 u32 item_offset;
1020 1033
1034 if (disk_item_offset + sizeof(struct btrfs_item) >
1035 sf->block_ctx->len) {
1036leaf_item_out_of_bounce_error:
1037 printk(KERN_INFO
1038 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1039 sf->block_ctx->start,
1040 sf->block_ctx->dev->name);
1041 goto one_stack_frame_backwards;
1042 }
1043 btrfsic_read_from_block_data(sf->block_ctx,
1044 &disk_item,
1045 disk_item_offset,
1046 sizeof(struct btrfs_item));
1047 item_offset = le32_to_cpu(disk_item.offset);
1048 disk_key = &disk_item.key;
1021 type = disk_key->type; 1049 type = disk_key->type;
1022 1050
1023 if (BTRFS_ROOT_ITEM_KEY == type) { 1051 if (BTRFS_ROOT_ITEM_KEY == type) {
1024 const struct btrfs_root_item *const root_item = 1052 struct btrfs_root_item root_item;
1025 (struct btrfs_root_item *) 1053 u32 root_item_offset;
1026 (sf->block_ctx->data + 1054 u64 next_bytenr;
1027 offsetof(struct btrfs_leaf, items) + 1055
1028 item_offset); 1056 root_item_offset = item_offset +
1029 const u64 next_bytenr = 1057 offsetof(struct btrfs_leaf, items);
1030 le64_to_cpu(root_item->bytenr); 1058 if (root_item_offset +
1059 sizeof(struct btrfs_root_item) >
1060 sf->block_ctx->len)
1061 goto leaf_item_out_of_bounce_error;
1062 btrfsic_read_from_block_data(
1063 sf->block_ctx, &root_item,
1064 root_item_offset,
1065 sizeof(struct btrfs_root_item));
1066 next_bytenr = le64_to_cpu(root_item.bytenr);
1031 1067
1032 sf->error = 1068 sf->error =
1033 btrfsic_create_link_to_next_block( 1069 btrfsic_create_link_to_next_block(
@@ -1042,7 +1078,7 @@ continue_with_current_leaf_stack_frame:
1042 &sf->num_copies, 1078 &sf->num_copies,
1043 &sf->mirror_num, 1079 &sf->mirror_num,
1044 disk_key, 1080 disk_key,
1045 le64_to_cpu(root_item-> 1081 le64_to_cpu(root_item.
1046 generation)); 1082 generation));
1047 if (sf->error) 1083 if (sf->error)
1048 goto one_stack_frame_backwards; 1084 goto one_stack_frame_backwards;
@@ -1050,7 +1086,7 @@ continue_with_current_leaf_stack_frame:
1050 if (NULL != sf->next_block) { 1086 if (NULL != sf->next_block) {
1051 struct btrfs_header *const next_hdr = 1087 struct btrfs_header *const next_hdr =
1052 (struct btrfs_header *) 1088 (struct btrfs_header *)
1053 sf->next_block_ctx.data; 1089 sf->next_block_ctx.datav[0];
1054 1090
1055 next_stack = 1091 next_stack =
1056 btrfsic_stack_frame_alloc(); 1092 btrfsic_stack_frame_alloc();
@@ -1112,10 +1148,24 @@ continue_with_current_node_stack_frame:
1112 } 1148 }
1113 1149
1114 if (sf->i < sf->nr) { 1150 if (sf->i < sf->nr) {
1115 struct btrfs_key_ptr *disk_key_ptr = 1151 struct btrfs_key_ptr key_ptr;
1116 nodehdr->ptrs + sf->i; 1152 u32 key_ptr_offset;
1117 const u64 next_bytenr = 1153 u64 next_bytenr;
1118 le64_to_cpu(disk_key_ptr->blockptr); 1154
1155 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1156 (uintptr_t)nodehdr;
1157 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1158 sf->block_ctx->len) {
1159 printk(KERN_INFO
1160 "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1161 sf->block_ctx->start,
1162 sf->block_ctx->dev->name);
1163 goto one_stack_frame_backwards;
1164 }
1165 btrfsic_read_from_block_data(
1166 sf->block_ctx, &key_ptr, key_ptr_offset,
1167 sizeof(struct btrfs_key_ptr));
1168 next_bytenr = le64_to_cpu(key_ptr.blockptr);
1119 1169
1120 sf->error = btrfsic_create_link_to_next_block( 1170 sf->error = btrfsic_create_link_to_next_block(
1121 state, 1171 state,
@@ -1128,15 +1178,15 @@ continue_with_current_node_stack_frame:
1128 force_iodone_flag, 1178 force_iodone_flag,
1129 &sf->num_copies, 1179 &sf->num_copies,
1130 &sf->mirror_num, 1180 &sf->mirror_num,
1131 &disk_key_ptr->key, 1181 &key_ptr.key,
1132 le64_to_cpu(disk_key_ptr->generation)); 1182 le64_to_cpu(key_ptr.generation));
1133 if (sf->error) 1183 if (sf->error)
1134 goto one_stack_frame_backwards; 1184 goto one_stack_frame_backwards;
1135 1185
1136 if (NULL != sf->next_block) { 1186 if (NULL != sf->next_block) {
1137 struct btrfs_header *const next_hdr = 1187 struct btrfs_header *const next_hdr =
1138 (struct btrfs_header *) 1188 (struct btrfs_header *)
1139 sf->next_block_ctx.data; 1189 sf->next_block_ctx.datav[0];
1140 1190
1141 next_stack = btrfsic_stack_frame_alloc(); 1191 next_stack = btrfsic_stack_frame_alloc();
1142 if (NULL == next_stack) 1192 if (NULL == next_stack)
@@ -1182,6 +1232,35 @@ one_stack_frame_backwards:
1182 return sf->error; 1232 return sf->error;
1183} 1233}
1184 1234
1235static void btrfsic_read_from_block_data(
1236 struct btrfsic_block_data_ctx *block_ctx,
1237 void *dstv, u32 offset, size_t len)
1238{
1239 size_t cur;
1240 size_t offset_in_page;
1241 char *kaddr;
1242 char *dst = (char *)dstv;
1243 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1244 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1245
1246 WARN_ON(offset + len > block_ctx->len);
1247 offset_in_page = (start_offset + offset) &
1248 ((unsigned long)PAGE_CACHE_SIZE - 1);
1249
1250 while (len > 0) {
1251 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1252 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1253 PAGE_CACHE_SHIFT);
1254 kaddr = block_ctx->datav[i];
1255 memcpy(dst, kaddr + offset_in_page, cur);
1256
1257 dst += cur;
1258 len -= cur;
1259 offset_in_page = 0;
1260 i++;
1261 }
1262}
1263
1185static int btrfsic_create_link_to_next_block( 1264static int btrfsic_create_link_to_next_block(
1186 struct btrfsic_state *state, 1265 struct btrfsic_state *state,
1187 struct btrfsic_block *block, 1266 struct btrfsic_block *block,
@@ -1205,7 +1284,7 @@ static int btrfsic_create_link_to_next_block(
1205 if (0 == *num_copiesp) { 1284 if (0 == *num_copiesp) {
1206 *num_copiesp = 1285 *num_copiesp =
1207 btrfs_num_copies(&state->root->fs_info->mapping_tree, 1286 btrfs_num_copies(&state->root->fs_info->mapping_tree,
1208 next_bytenr, PAGE_SIZE); 1287 next_bytenr, state->metablock_size);
1209 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1288 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1210 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1289 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1211 (unsigned long long)next_bytenr, *num_copiesp); 1290 (unsigned long long)next_bytenr, *num_copiesp);
@@ -1220,7 +1299,7 @@ static int btrfsic_create_link_to_next_block(
1220 "btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1299 "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1221 *mirror_nump); 1300 *mirror_nump);
1222 ret = btrfsic_map_block(state, next_bytenr, 1301 ret = btrfsic_map_block(state, next_bytenr,
1223 BTRFSIC_BLOCK_SIZE, 1302 state->metablock_size,
1224 next_block_ctx, *mirror_nump); 1303 next_block_ctx, *mirror_nump);
1225 if (ret) { 1304 if (ret) {
1226 printk(KERN_INFO 1305 printk(KERN_INFO
@@ -1315,7 +1394,7 @@ static int btrfsic_create_link_to_next_block(
1315 1394
1316 if (limit_nesting > 0 && did_alloc_block_link) { 1395 if (limit_nesting > 0 && did_alloc_block_link) {
1317 ret = btrfsic_read_block(state, next_block_ctx); 1396 ret = btrfsic_read_block(state, next_block_ctx);
1318 if (ret < (int)BTRFSIC_BLOCK_SIZE) { 1397 if (ret < (int)next_block_ctx->len) {
1319 printk(KERN_INFO 1398 printk(KERN_INFO
1320 "btrfsic: read block @logical %llu failed!\n", 1399 "btrfsic: read block @logical %llu failed!\n",
1321 (unsigned long long)next_bytenr); 1400 (unsigned long long)next_bytenr);
@@ -1340,43 +1419,74 @@ static int btrfsic_handle_extent_data(
1340 u32 item_offset, int force_iodone_flag) 1419 u32 item_offset, int force_iodone_flag)
1341{ 1420{
1342 int ret; 1421 int ret;
1343 struct btrfs_file_extent_item *file_extent_item = 1422 struct btrfs_file_extent_item file_extent_item;
1344 (struct btrfs_file_extent_item *)(block_ctx->data + 1423 u64 file_extent_item_offset;
1345 offsetof(struct btrfs_leaf, 1424 u64 next_bytenr;
1346 items) + item_offset); 1425 u64 num_bytes;
1347 u64 next_bytenr = 1426 u64 generation;
1348 le64_to_cpu(file_extent_item->disk_bytenr) +
1349 le64_to_cpu(file_extent_item->offset);
1350 u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes);
1351 u64 generation = le64_to_cpu(file_extent_item->generation);
1352 struct btrfsic_block_link *l; 1427 struct btrfsic_block_link *l;
1353 1428
1429 file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1430 item_offset;
1431 if (file_extent_item_offset +
1432 offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1433 block_ctx->len) {
1434 printk(KERN_INFO
1435 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1436 block_ctx->start, block_ctx->dev->name);
1437 return -1;
1438 }
1439
1440 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1441 file_extent_item_offset,
1442 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1443 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1444 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
1445 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1446 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1447 file_extent_item.type,
1448 (unsigned long long)
1449 le64_to_cpu(file_extent_item.disk_bytenr));
1450 return 0;
1451 }
1452
1453 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1454 block_ctx->len) {
1455 printk(KERN_INFO
1456 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1457 block_ctx->start, block_ctx->dev->name);
1458 return -1;
1459 }
1460 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1461 file_extent_item_offset,
1462 sizeof(struct btrfs_file_extent_item));
1463 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1464 le64_to_cpu(file_extent_item.offset);
1465 generation = le64_to_cpu(file_extent_item.generation);
1466 num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1467 generation = le64_to_cpu(file_extent_item.generation);
1468
1354 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1469 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1355 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1470 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1356 " offset = %llu, num_bytes = %llu\n", 1471 " offset = %llu, num_bytes = %llu\n",
1357 file_extent_item->type, 1472 file_extent_item.type,
1358 (unsigned long long)
1359 le64_to_cpu(file_extent_item->disk_bytenr),
1360 (unsigned long long)
1361 le64_to_cpu(file_extent_item->offset),
1362 (unsigned long long) 1473 (unsigned long long)
1363 le64_to_cpu(file_extent_item->num_bytes)); 1474 le64_to_cpu(file_extent_item.disk_bytenr),
1364 if (BTRFS_FILE_EXTENT_REG != file_extent_item->type || 1475 (unsigned long long)le64_to_cpu(file_extent_item.offset),
1365 ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr)) 1476 (unsigned long long)num_bytes);
1366 return 0;
1367 while (num_bytes > 0) { 1477 while (num_bytes > 0) {
1368 u32 chunk_len; 1478 u32 chunk_len;
1369 int num_copies; 1479 int num_copies;
1370 int mirror_num; 1480 int mirror_num;
1371 1481
1372 if (num_bytes > BTRFSIC_BLOCK_SIZE) 1482 if (num_bytes > state->datablock_size)
1373 chunk_len = BTRFSIC_BLOCK_SIZE; 1483 chunk_len = state->datablock_size;
1374 else 1484 else
1375 chunk_len = num_bytes; 1485 chunk_len = num_bytes;
1376 1486
1377 num_copies = 1487 num_copies =
1378 btrfs_num_copies(&state->root->fs_info->mapping_tree, 1488 btrfs_num_copies(&state->root->fs_info->mapping_tree,
1379 next_bytenr, PAGE_SIZE); 1489 next_bytenr, state->datablock_size);
1380 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1490 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1381 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1491 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1382 (unsigned long long)next_bytenr, num_copies); 1492 (unsigned long long)next_bytenr, num_copies);
@@ -1476,8 +1586,9 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1476 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1586 block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1477 block_ctx_out->start = bytenr; 1587 block_ctx_out->start = bytenr;
1478 block_ctx_out->len = len; 1588 block_ctx_out->len = len;
1479 block_ctx_out->data = NULL; 1589 block_ctx_out->datav = NULL;
1480 block_ctx_out->bh = NULL; 1590 block_ctx_out->pagev = NULL;
1591 block_ctx_out->mem_to_free = NULL;
1481 1592
1482 if (0 == ret) 1593 if (0 == ret)
1483 kfree(multi); 1594 kfree(multi);
@@ -1497,8 +1608,9 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1497 block_ctx_out->dev_bytenr = bytenr; 1608 block_ctx_out->dev_bytenr = bytenr;
1498 block_ctx_out->start = bytenr; 1609 block_ctx_out->start = bytenr;
1499 block_ctx_out->len = len; 1610 block_ctx_out->len = len;
1500 block_ctx_out->data = NULL; 1611 block_ctx_out->datav = NULL;
1501 block_ctx_out->bh = NULL; 1612 block_ctx_out->pagev = NULL;
1613 block_ctx_out->mem_to_free = NULL;
1502 if (NULL != block_ctx_out->dev) { 1614 if (NULL != block_ctx_out->dev) {
1503 return 0; 1615 return 0;
1504 } else { 1616 } else {
@@ -1509,38 +1621,127 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1509 1621
1510static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1622static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1511{ 1623{
1512 if (NULL != block_ctx->bh) { 1624 if (block_ctx->mem_to_free) {
1513 brelse(block_ctx->bh); 1625 unsigned int num_pages;
1514 block_ctx->bh = NULL; 1626
1627 BUG_ON(!block_ctx->datav);
1628 BUG_ON(!block_ctx->pagev);
1629 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1630 PAGE_CACHE_SHIFT;
1631 while (num_pages > 0) {
1632 num_pages--;
1633 if (block_ctx->datav[num_pages]) {
1634 kunmap(block_ctx->pagev[num_pages]);
1635 block_ctx->datav[num_pages] = NULL;
1636 }
1637 if (block_ctx->pagev[num_pages]) {
1638 __free_page(block_ctx->pagev[num_pages]);
1639 block_ctx->pagev[num_pages] = NULL;
1640 }
1641 }
1642
1643 kfree(block_ctx->mem_to_free);
1644 block_ctx->mem_to_free = NULL;
1645 block_ctx->pagev = NULL;
1646 block_ctx->datav = NULL;
1515 } 1647 }
1516} 1648}
1517 1649
1518static int btrfsic_read_block(struct btrfsic_state *state, 1650static int btrfsic_read_block(struct btrfsic_state *state,
1519 struct btrfsic_block_data_ctx *block_ctx) 1651 struct btrfsic_block_data_ctx *block_ctx)
1520{ 1652{
1521 block_ctx->bh = NULL; 1653 unsigned int num_pages;
1522 if (block_ctx->dev_bytenr & 4095) { 1654 unsigned int i;
1655 u64 dev_bytenr;
1656 int ret;
1657
1658 BUG_ON(block_ctx->datav);
1659 BUG_ON(block_ctx->pagev);
1660 BUG_ON(block_ctx->mem_to_free);
1661 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1523 printk(KERN_INFO 1662 printk(KERN_INFO
1524 "btrfsic: read_block() with unaligned bytenr %llu\n", 1663 "btrfsic: read_block() with unaligned bytenr %llu\n",
1525 (unsigned long long)block_ctx->dev_bytenr); 1664 (unsigned long long)block_ctx->dev_bytenr);
1526 return -1; 1665 return -1;
1527 } 1666 }
1528 if (block_ctx->len > 4096) { 1667
1529 printk(KERN_INFO 1668 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1530 "btrfsic: read_block() with too huge size %d\n", 1669 PAGE_CACHE_SHIFT;
1531 block_ctx->len); 1670 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1671 sizeof(*block_ctx->pagev)) *
1672 num_pages, GFP_NOFS);
1673 if (!block_ctx->mem_to_free)
1532 return -1; 1674 return -1;
1675 block_ctx->datav = block_ctx->mem_to_free;
1676 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1677 for (i = 0; i < num_pages; i++) {
1678 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1679 if (!block_ctx->pagev[i])
1680 return -1;
1533 } 1681 }
1534 1682
1535 block_ctx->bh = __bread(block_ctx->dev->bdev, 1683 dev_bytenr = block_ctx->dev_bytenr;
1536 block_ctx->dev_bytenr >> 12, 4096); 1684 for (i = 0; i < num_pages;) {
1537 if (NULL == block_ctx->bh) 1685 struct bio *bio;
1538 return -1; 1686 unsigned int j;
1539 block_ctx->data = block_ctx->bh->b_data; 1687 DECLARE_COMPLETION_ONSTACK(complete);
1688
1689 bio = bio_alloc(GFP_NOFS, num_pages - i);
1690 if (!bio) {
1691 printk(KERN_INFO
1692 "btrfsic: bio_alloc() for %u pages failed!\n",
1693 num_pages - i);
1694 return -1;
1695 }
1696 bio->bi_bdev = block_ctx->dev->bdev;
1697 bio->bi_sector = dev_bytenr >> 9;
1698 bio->bi_end_io = btrfsic_complete_bio_end_io;
1699 bio->bi_private = &complete;
1700
1701 for (j = i; j < num_pages; j++) {
1702 ret = bio_add_page(bio, block_ctx->pagev[j],
1703 PAGE_CACHE_SIZE, 0);
1704 if (PAGE_CACHE_SIZE != ret)
1705 break;
1706 }
1707 if (j == i) {
1708 printk(KERN_INFO
1709 "btrfsic: error, failed to add a single page!\n");
1710 return -1;
1711 }
1712 submit_bio(READ, bio);
1713
1714 /* this will also unplug the queue */
1715 wait_for_completion(&complete);
1716
1717 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1718 printk(KERN_INFO
1719 "btrfsic: read error at logical %llu dev %s!\n",
1720 block_ctx->start, block_ctx->dev->name);
1721 bio_put(bio);
1722 return -1;
1723 }
1724 bio_put(bio);
1725 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1726 i = j;
1727 }
1728 for (i = 0; i < num_pages; i++) {
1729 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1730 if (!block_ctx->datav[i]) {
1731 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1732 block_ctx->dev->name);
1733 return -1;
1734 }
1735 }
1540 1736
1541 return block_ctx->len; 1737 return block_ctx->len;
1542} 1738}
1543 1739
1740static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1741{
1742 complete((struct completion *)bio->bi_private);
1743}
1744
1544static void btrfsic_dump_database(struct btrfsic_state *state) 1745static void btrfsic_dump_database(struct btrfsic_state *state)
1545{ 1746{
1546 struct list_head *elem_all; 1747 struct list_head *elem_all;
@@ -1618,32 +1819,39 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
1618 * (note that this test fails for the super block) 1819 * (note that this test fails for the super block)
1619 */ 1820 */
1620static int btrfsic_test_for_metadata(struct btrfsic_state *state, 1821static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1621 const u8 *data, unsigned int size) 1822 char **datav, unsigned int num_pages)
1622{ 1823{
1623 struct btrfs_header *h; 1824 struct btrfs_header *h;
1624 u8 csum[BTRFS_CSUM_SIZE]; 1825 u8 csum[BTRFS_CSUM_SIZE];
1625 u32 crc = ~(u32)0; 1826 u32 crc = ~(u32)0;
1626 int fail = 0; 1827 unsigned int i;
1627 int crc_fail = 0;
1628 1828
1629 h = (struct btrfs_header *)data; 1829 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1830 return 1; /* not metadata */
1831 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1832 h = (struct btrfs_header *)datav[0];
1630 1833
1631 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) 1834 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1632 fail++; 1835 return 1;
1836
1837 for (i = 0; i < num_pages; i++) {
1838 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1839 size_t sublen = i ? PAGE_CACHE_SIZE :
1840 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1633 1841
1634 crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE); 1842 crc = crc32c(crc, data, sublen);
1843 }
1635 btrfs_csum_final(crc, csum); 1844 btrfs_csum_final(crc, csum);
1636 if (memcmp(csum, h->csum, state->csum_size)) 1845 if (memcmp(csum, h->csum, state->csum_size))
1637 crc_fail++; 1846 return 1;
1638 1847
1639 return fail || crc_fail; 1848 return 0; /* is metadata */
1640} 1849}
1641 1850
1642static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1851static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1643 u64 dev_bytenr, 1852 u64 dev_bytenr, char **mapped_datav,
1644 u8 *mapped_data, unsigned int len, 1853 unsigned int num_pages,
1645 struct bio *bio, 1854 struct bio *bio, int *bio_is_patched,
1646 int *bio_is_patched,
1647 struct buffer_head *bh, 1855 struct buffer_head *bh,
1648 int submit_bio_bh_rw) 1856 int submit_bio_bh_rw)
1649{ 1857{
@@ -1653,12 +1861,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1653 int ret; 1861 int ret;
1654 struct btrfsic_state *state = dev_state->state; 1862 struct btrfsic_state *state = dev_state->state;
1655 struct block_device *bdev = dev_state->bdev; 1863 struct block_device *bdev = dev_state->bdev;
1864 unsigned int processed_len;
1656 1865
1657 WARN_ON(len > PAGE_SIZE);
1658 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len));
1659 if (NULL != bio_is_patched) 1866 if (NULL != bio_is_patched)
1660 *bio_is_patched = 0; 1867 *bio_is_patched = 0;
1661 1868
1869again:
1870 if (num_pages == 0)
1871 return;
1872
1873 processed_len = 0;
1874 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1875 num_pages));
1876
1662 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1877 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1663 &state->block_hashtable); 1878 &state->block_hashtable);
1664 if (NULL != block) { 1879 if (NULL != block) {
@@ -1668,8 +1883,16 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1668 1883
1669 if (block->is_superblock) { 1884 if (block->is_superblock) {
1670 bytenr = le64_to_cpu(((struct btrfs_super_block *) 1885 bytenr = le64_to_cpu(((struct btrfs_super_block *)
1671 mapped_data)->bytenr); 1886 mapped_datav[0])->bytenr);
1887 if (num_pages * PAGE_CACHE_SIZE <
1888 BTRFS_SUPER_INFO_SIZE) {
1889 printk(KERN_INFO
1890 "btrfsic: cannot work with too short bios!\n");
1891 return;
1892 }
1672 is_metadata = 1; 1893 is_metadata = 1;
1894 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1895 processed_len = BTRFS_SUPER_INFO_SIZE;
1673 if (state->print_mask & 1896 if (state->print_mask &
1674 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1897 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1675 printk(KERN_INFO 1898 printk(KERN_INFO
@@ -1679,12 +1902,18 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1679 } 1902 }
1680 if (is_metadata) { 1903 if (is_metadata) {
1681 if (!block->is_superblock) { 1904 if (!block->is_superblock) {
1905 if (num_pages * PAGE_CACHE_SIZE <
1906 state->metablock_size) {
1907 printk(KERN_INFO
1908 "btrfsic: cannot work with too short bios!\n");
1909 return;
1910 }
1911 processed_len = state->metablock_size;
1682 bytenr = le64_to_cpu(((struct btrfs_header *) 1912 bytenr = le64_to_cpu(((struct btrfs_header *)
1683 mapped_data)->bytenr); 1913 mapped_datav[0])->bytenr);
1684 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1914 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1685 dev_state, 1915 dev_state,
1686 dev_bytenr, 1916 dev_bytenr);
1687 mapped_data);
1688 } 1917 }
1689 if (block->logical_bytenr != bytenr) { 1918 if (block->logical_bytenr != bytenr) {
1690 printk(KERN_INFO 1919 printk(KERN_INFO
@@ -1711,6 +1940,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1711 block->mirror_num, 1940 block->mirror_num,
1712 btrfsic_get_block_type(state, block)); 1941 btrfsic_get_block_type(state, block));
1713 } else { 1942 } else {
1943 if (num_pages * PAGE_CACHE_SIZE <
1944 state->datablock_size) {
1945 printk(KERN_INFO
1946 "btrfsic: cannot work with too short bios!\n");
1947 return;
1948 }
1949 processed_len = state->datablock_size;
1714 bytenr = block->logical_bytenr; 1950 bytenr = block->logical_bytenr;
1715 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1951 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1716 printk(KERN_INFO 1952 printk(KERN_INFO
@@ -1748,7 +1984,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1748 le64_to_cpu(block->disk_key.offset), 1984 le64_to_cpu(block->disk_key.offset),
1749 (unsigned long long) 1985 (unsigned long long)
1750 le64_to_cpu(((struct btrfs_header *) 1986 le64_to_cpu(((struct btrfs_header *)
1751 mapped_data)->generation), 1987 mapped_datav[0])->generation),
1752 (unsigned long long) 1988 (unsigned long long)
1753 state->max_superblock_generation); 1989 state->max_superblock_generation);
1754 btrfsic_dump_tree(state); 1990 btrfsic_dump_tree(state);
@@ -1766,10 +2002,10 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1766 (unsigned long long)block->generation, 2002 (unsigned long long)block->generation,
1767 (unsigned long long) 2003 (unsigned long long)
1768 le64_to_cpu(((struct btrfs_header *) 2004 le64_to_cpu(((struct btrfs_header *)
1769 mapped_data)->generation)); 2005 mapped_datav[0])->generation));
1770 /* it would not be safe to go on */ 2006 /* it would not be safe to go on */
1771 btrfsic_dump_tree(state); 2007 btrfsic_dump_tree(state);
1772 return; 2008 goto continue_loop;
1773 } 2009 }
1774 2010
1775 /* 2011 /*
@@ -1797,18 +2033,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1797 } 2033 }
1798 2034
1799 if (block->is_superblock) 2035 if (block->is_superblock)
1800 ret = btrfsic_map_superblock(state, bytenr, len, 2036 ret = btrfsic_map_superblock(state, bytenr,
2037 processed_len,
1801 bdev, &block_ctx); 2038 bdev, &block_ctx);
1802 else 2039 else
1803 ret = btrfsic_map_block(state, bytenr, len, 2040 ret = btrfsic_map_block(state, bytenr, processed_len,
1804 &block_ctx, 0); 2041 &block_ctx, 0);
1805 if (ret) { 2042 if (ret) {
1806 printk(KERN_INFO 2043 printk(KERN_INFO
1807 "btrfsic: btrfsic_map_block(root @%llu)" 2044 "btrfsic: btrfsic_map_block(root @%llu)"
1808 " failed!\n", (unsigned long long)bytenr); 2045 " failed!\n", (unsigned long long)bytenr);
1809 return; 2046 goto continue_loop;
1810 } 2047 }
1811 block_ctx.data = mapped_data; 2048 block_ctx.datav = mapped_datav;
1812 /* the following is required in case of writes to mirrors, 2049 /* the following is required in case of writes to mirrors,
1813 * use the same that was used for the lookup */ 2050 * use the same that was used for the lookup */
1814 block_ctx.dev = dev_state; 2051 block_ctx.dev = dev_state;
@@ -1864,11 +2101,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1864 block->logical_bytenr = bytenr; 2101 block->logical_bytenr = bytenr;
1865 block->is_metadata = 1; 2102 block->is_metadata = 1;
1866 if (block->is_superblock) { 2103 if (block->is_superblock) {
2104 BUG_ON(PAGE_CACHE_SIZE !=
2105 BTRFS_SUPER_INFO_SIZE);
1867 ret = btrfsic_process_written_superblock( 2106 ret = btrfsic_process_written_superblock(
1868 state, 2107 state,
1869 block, 2108 block,
1870 (struct btrfs_super_block *) 2109 (struct btrfs_super_block *)
1871 mapped_data); 2110 mapped_datav[0]);
1872 if (state->print_mask & 2111 if (state->print_mask &
1873 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 2112 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
1874 printk(KERN_INFO 2113 printk(KERN_INFO
@@ -1881,8 +2120,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1881 state, 2120 state,
1882 block, 2121 block,
1883 &block_ctx, 2122 &block_ctx,
1884 (struct btrfs_header *)
1885 block_ctx.data,
1886 0, 0); 2123 0, 0);
1887 } 2124 }
1888 if (ret) 2125 if (ret)
@@ -1913,26 +2150,30 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1913 u64 bytenr; 2150 u64 bytenr;
1914 2151
1915 if (!is_metadata) { 2152 if (!is_metadata) {
2153 processed_len = state->datablock_size;
1916 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2154 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1917 printk(KERN_INFO "Written block (%s/%llu/?)" 2155 printk(KERN_INFO "Written block (%s/%llu/?)"
1918 " !found in hash table, D.\n", 2156 " !found in hash table, D.\n",
1919 dev_state->name, 2157 dev_state->name,
1920 (unsigned long long)dev_bytenr); 2158 (unsigned long long)dev_bytenr);
1921 if (!state->include_extent_data) 2159 if (!state->include_extent_data) {
1922 return; /* ignore that written D block */ 2160 /* ignore that written D block */
2161 goto continue_loop;
2162 }
1923 2163
1924 /* this is getting ugly for the 2164 /* this is getting ugly for the
1925 * include_extent_data case... */ 2165 * include_extent_data case... */
1926 bytenr = 0; /* unknown */ 2166 bytenr = 0; /* unknown */
1927 block_ctx.start = bytenr; 2167 block_ctx.start = bytenr;
1928 block_ctx.len = len; 2168 block_ctx.len = processed_len;
1929 block_ctx.bh = NULL; 2169 block_ctx.mem_to_free = NULL;
2170 block_ctx.pagev = NULL;
1930 } else { 2171 } else {
2172 processed_len = state->metablock_size;
1931 bytenr = le64_to_cpu(((struct btrfs_header *) 2173 bytenr = le64_to_cpu(((struct btrfs_header *)
1932 mapped_data)->bytenr); 2174 mapped_datav[0])->bytenr);
1933 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2175 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
1934 dev_bytenr, 2176 dev_bytenr);
1935 mapped_data);
1936 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2177 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1937 printk(KERN_INFO 2178 printk(KERN_INFO
1938 "Written block @%llu (%s/%llu/?)" 2179 "Written block @%llu (%s/%llu/?)"
@@ -1941,17 +2182,17 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1941 dev_state->name, 2182 dev_state->name,
1942 (unsigned long long)dev_bytenr); 2183 (unsigned long long)dev_bytenr);
1943 2184
1944 ret = btrfsic_map_block(state, bytenr, len, &block_ctx, 2185 ret = btrfsic_map_block(state, bytenr, processed_len,
1945 0); 2186 &block_ctx, 0);
1946 if (ret) { 2187 if (ret) {
1947 printk(KERN_INFO 2188 printk(KERN_INFO
1948 "btrfsic: btrfsic_map_block(root @%llu)" 2189 "btrfsic: btrfsic_map_block(root @%llu)"
1949 " failed!\n", 2190 " failed!\n",
1950 (unsigned long long)dev_bytenr); 2191 (unsigned long long)dev_bytenr);
1951 return; 2192 goto continue_loop;
1952 } 2193 }
1953 } 2194 }
1954 block_ctx.data = mapped_data; 2195 block_ctx.datav = mapped_datav;
1955 /* the following is required in case of writes to mirrors, 2196 /* the following is required in case of writes to mirrors,
1956 * use the same that was used for the lookup */ 2197 * use the same that was used for the lookup */
1957 block_ctx.dev = dev_state; 2198 block_ctx.dev = dev_state;
@@ -1961,7 +2202,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1961 if (NULL == block) { 2202 if (NULL == block) {
1962 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2203 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1963 btrfsic_release_block_ctx(&block_ctx); 2204 btrfsic_release_block_ctx(&block_ctx);
1964 return; 2205 goto continue_loop;
1965 } 2206 }
1966 block->dev_state = dev_state; 2207 block->dev_state = dev_state;
1967 block->dev_bytenr = dev_bytenr; 2208 block->dev_bytenr = dev_bytenr;
@@ -2021,9 +2262,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
2021 2262
2022 if (is_metadata) { 2263 if (is_metadata) {
2023 ret = btrfsic_process_metablock(state, block, 2264 ret = btrfsic_process_metablock(state, block,
2024 &block_ctx, 2265 &block_ctx, 0, 0);
2025 (struct btrfs_header *)
2026 block_ctx.data, 0, 0);
2027 if (ret) 2266 if (ret)
2028 printk(KERN_INFO 2267 printk(KERN_INFO
2029 "btrfsic: process_metablock(root @%llu)" 2268 "btrfsic: process_metablock(root @%llu)"
@@ -2032,6 +2271,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
2032 } 2271 }
2033 btrfsic_release_block_ctx(&block_ctx); 2272 btrfsic_release_block_ctx(&block_ctx);
2034 } 2273 }
2274
2275continue_loop:
2276 BUG_ON(!processed_len);
2277 dev_bytenr += processed_len;
2278 mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2279 num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2280 goto again;
2035} 2281}
2036 2282
2037static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) 2283static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
@@ -2214,7 +2460,7 @@ static int btrfsic_process_written_superblock(
2214 2460
2215 num_copies = 2461 num_copies =
2216 btrfs_num_copies(&state->root->fs_info->mapping_tree, 2462 btrfs_num_copies(&state->root->fs_info->mapping_tree,
2217 next_bytenr, PAGE_SIZE); 2463 next_bytenr, BTRFS_SUPER_INFO_SIZE);
2218 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2464 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2219 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2465 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2220 (unsigned long long)next_bytenr, num_copies); 2466 (unsigned long long)next_bytenr, num_copies);
@@ -2225,7 +2471,8 @@ static int btrfsic_process_written_superblock(
2225 printk(KERN_INFO 2471 printk(KERN_INFO
2226 "btrfsic_process_written_superblock(" 2472 "btrfsic_process_written_superblock("
2227 "mirror_num=%d)\n", mirror_num); 2473 "mirror_num=%d)\n", mirror_num);
2228 ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, 2474 ret = btrfsic_map_block(state, next_bytenr,
2475 BTRFS_SUPER_INFO_SIZE,
2229 &tmp_next_block_ctx, 2476 &tmp_next_block_ctx,
2230 mirror_num); 2477 mirror_num);
2231 if (ret) { 2478 if (ret) {
@@ -2690,7 +2937,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
2690static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2937static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2691 u64 bytenr, 2938 u64 bytenr,
2692 struct btrfsic_dev_state *dev_state, 2939 struct btrfsic_dev_state *dev_state,
2693 u64 dev_bytenr, char *data) 2940 u64 dev_bytenr)
2694{ 2941{
2695 int num_copies; 2942 int num_copies;
2696 int mirror_num; 2943 int mirror_num;
@@ -2699,10 +2946,10 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2699 int match = 0; 2946 int match = 0;
2700 2947
2701 num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, 2948 num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
2702 bytenr, PAGE_SIZE); 2949 bytenr, state->metablock_size);
2703 2950
2704 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2951 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2705 ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, 2952 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2706 &block_ctx, mirror_num); 2953 &block_ctx, mirror_num);
2707 if (ret) { 2954 if (ret) {
2708 printk(KERN_INFO "btrfsic:" 2955 printk(KERN_INFO "btrfsic:"
@@ -2728,7 +2975,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2728 (unsigned long long)bytenr, dev_state->name, 2975 (unsigned long long)bytenr, dev_state->name,
2729 (unsigned long long)dev_bytenr); 2976 (unsigned long long)dev_bytenr);
2730 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2977 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2731 ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, 2978 ret = btrfsic_map_block(state, bytenr,
2979 state->metablock_size,
2732 &block_ctx, mirror_num); 2980 &block_ctx, mirror_num);
2733 if (ret) 2981 if (ret)
2734 continue; 2982 continue;
@@ -2782,13 +3030,13 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2782 (unsigned long)bh->b_size, bh->b_data, 3030 (unsigned long)bh->b_size, bh->b_data,
2783 bh->b_bdev); 3031 bh->b_bdev);
2784 btrfsic_process_written_block(dev_state, dev_bytenr, 3032 btrfsic_process_written_block(dev_state, dev_bytenr,
2785 bh->b_data, bh->b_size, NULL, 3033 &bh->b_data, 1, NULL,
2786 NULL, bh, rw); 3034 NULL, bh, rw);
2787 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3035 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2788 if (dev_state->state->print_mask & 3036 if (dev_state->state->print_mask &
2789 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3037 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2790 printk(KERN_INFO 3038 printk(KERN_INFO
2791 "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n", 3039 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
2792 rw, bh->b_bdev); 3040 rw, bh->b_bdev);
2793 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3041 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2794 if ((dev_state->state->print_mask & 3042 if ((dev_state->state->print_mask &
@@ -2837,6 +3085,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
2837 unsigned int i; 3085 unsigned int i;
2838 u64 dev_bytenr; 3086 u64 dev_bytenr;
2839 int bio_is_patched; 3087 int bio_is_patched;
3088 char **mapped_datav;
2840 3089
2841 dev_bytenr = 512 * bio->bi_sector; 3090 dev_bytenr = 512 * bio->bi_sector;
2842 bio_is_patched = 0; 3091 bio_is_patched = 0;
@@ -2849,35 +3098,46 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
2849 (unsigned long long)dev_bytenr, 3098 (unsigned long long)dev_bytenr,
2850 bio->bi_bdev); 3099 bio->bi_bdev);
2851 3100
3101 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3102 GFP_NOFS);
3103 if (!mapped_datav)
3104 goto leave;
2852 for (i = 0; i < bio->bi_vcnt; i++) { 3105 for (i = 0; i < bio->bi_vcnt; i++) {
2853 u8 *mapped_data; 3106 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
2854 3107 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
2855 mapped_data = kmap(bio->bi_io_vec[i].bv_page); 3108 if (!mapped_datav[i]) {
3109 while (i > 0) {
3110 i--;
3111 kunmap(bio->bi_io_vec[i].bv_page);
3112 }
3113 kfree(mapped_datav);
3114 goto leave;
3115 }
2856 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3116 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2857 BTRFSIC_PRINT_MASK_VERBOSE) == 3117 BTRFSIC_PRINT_MASK_VERBOSE) ==
2858 (dev_state->state->print_mask & 3118 (dev_state->state->print_mask &
2859 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3119 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2860 BTRFSIC_PRINT_MASK_VERBOSE))) 3120 BTRFSIC_PRINT_MASK_VERBOSE)))
2861 printk(KERN_INFO 3121 printk(KERN_INFO
2862 "#%u: page=%p, mapped=%p, len=%u," 3122 "#%u: page=%p, len=%u, offset=%u\n",
2863 " offset=%u\n",
2864 i, bio->bi_io_vec[i].bv_page, 3123 i, bio->bi_io_vec[i].bv_page,
2865 mapped_data,
2866 bio->bi_io_vec[i].bv_len, 3124 bio->bi_io_vec[i].bv_len,
2867 bio->bi_io_vec[i].bv_offset); 3125 bio->bi_io_vec[i].bv_offset);
2868 btrfsic_process_written_block(dev_state, dev_bytenr, 3126 }
2869 mapped_data, 3127 btrfsic_process_written_block(dev_state, dev_bytenr,
2870 bio->bi_io_vec[i].bv_len, 3128 mapped_datav, bio->bi_vcnt,
2871 bio, &bio_is_patched, 3129 bio, &bio_is_patched,
2872 NULL, rw); 3130 NULL, rw);
3131 while (i > 0) {
3132 i--;
2873 kunmap(bio->bi_io_vec[i].bv_page); 3133 kunmap(bio->bi_io_vec[i].bv_page);
2874 dev_bytenr += bio->bi_io_vec[i].bv_len;
2875 } 3134 }
3135 kfree(mapped_datav);
2876 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3136 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2877 if (dev_state->state->print_mask & 3137 if (dev_state->state->print_mask &
2878 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3138 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2879 printk(KERN_INFO 3139 printk(KERN_INFO
2880 "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n", 3140 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
2881 rw, bio->bi_bdev); 3141 rw, bio->bi_bdev);
2882 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3142 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2883 if ((dev_state->state->print_mask & 3143 if ((dev_state->state->print_mask &
@@ -2904,6 +3164,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
2904 bio->bi_end_io = btrfsic_bio_end_io; 3164 bio->bi_end_io = btrfsic_bio_end_io;
2905 } 3165 }
2906 } 3166 }
3167leave:
2907 mutex_unlock(&btrfsic_mutex); 3168 mutex_unlock(&btrfsic_mutex);
2908 3169
2909 submit_bio(rw, bio); 3170 submit_bio(rw, bio);
@@ -2918,6 +3179,30 @@ int btrfsic_mount(struct btrfs_root *root,
2918 struct list_head *dev_head = &fs_devices->devices; 3179 struct list_head *dev_head = &fs_devices->devices;
2919 struct btrfs_device *device; 3180 struct btrfs_device *device;
2920 3181
3182 if (root->nodesize != root->leafsize) {
3183 printk(KERN_INFO
3184 "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3185 root->nodesize, root->leafsize);
3186 return -1;
3187 }
3188 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3189 printk(KERN_INFO
3190 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3191 root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3192 return -1;
3193 }
3194 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3195 printk(KERN_INFO
3196 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3197 root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3198 return -1;
3199 }
3200 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3201 printk(KERN_INFO
3202 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3203 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3204 return -1;
3205 }
2921 state = kzalloc(sizeof(*state), GFP_NOFS); 3206 state = kzalloc(sizeof(*state), GFP_NOFS);
2922 if (NULL == state) { 3207 if (NULL == state) {
2923 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); 3208 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
@@ -2934,6 +3219,8 @@ int btrfsic_mount(struct btrfs_root *root,
2934 state->print_mask = print_mask; 3219 state->print_mask = print_mask;
2935 state->include_extent_data = including_extent_data; 3220 state->include_extent_data = including_extent_data;
2936 state->csum_size = 0; 3221 state->csum_size = 0;
3222 state->metablock_size = root->nodesize;
3223 state->datablock_size = root->sectorsize;
2937 INIT_LIST_HEAD(&state->all_blocks_list); 3224 INIT_LIST_HEAD(&state->all_blocks_list);
2938 btrfsic_block_hashtable_init(&state->block_hashtable); 3225 btrfsic_block_hashtable_init(&state->block_hashtable);
2939 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 3226 btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
@@ -3050,7 +3337,7 @@ void btrfsic_unmount(struct btrfs_root *root,
3050 btrfsic_block_link_free(l); 3337 btrfsic_block_link_free(l);
3051 } 3338 }
3052 3339
3053 if (b_all->is_iodone) 3340 if (b_all->is_iodone || b_all->never_written)
3054 btrfsic_block_free(b_all); 3341 btrfsic_block_free(b_all);
3055 else 3342 else
3056 printk(KERN_INFO "btrfs: attempt to free %c-block" 3343 printk(KERN_INFO "btrfs: attempt to free %c-block"
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 646f5e6f256..86eff48dab7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -120,10 +120,10 @@ static int check_compressed_csum(struct inode *inode,
120 page = cb->compressed_pages[i]; 120 page = cb->compressed_pages[i];
121 csum = ~(u32)0; 121 csum = ~(u32)0;
122 122
123 kaddr = kmap_atomic(page, KM_USER0); 123 kaddr = kmap_atomic(page);
124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE); 124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
125 btrfs_csum_final(csum, (char *)&csum); 125 btrfs_csum_final(csum, (char *)&csum);
126 kunmap_atomic(kaddr, KM_USER0); 126 kunmap_atomic(kaddr);
127 127
128 if (csum != *cb_sum) { 128 if (csum != *cb_sum) {
129 printk(KERN_INFO "btrfs csum failed ino %llu " 129 printk(KERN_INFO "btrfs csum failed ino %llu "
@@ -521,10 +521,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
521 if (zero_offset) { 521 if (zero_offset) {
522 int zeros; 522 int zeros;
523 zeros = PAGE_CACHE_SIZE - zero_offset; 523 zeros = PAGE_CACHE_SIZE - zero_offset;
524 userpage = kmap_atomic(page, KM_USER0); 524 userpage = kmap_atomic(page);
525 memset(userpage + zero_offset, 0, zeros); 525 memset(userpage + zero_offset, 0, zeros);
526 flush_dcache_page(page); 526 flush_dcache_page(page);
527 kunmap_atomic(userpage, KM_USER0); 527 kunmap_atomic(userpage);
528 } 528 }
529 } 529 }
530 530
@@ -993,9 +993,9 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
993 bytes = min(PAGE_CACHE_SIZE - *pg_offset, 993 bytes = min(PAGE_CACHE_SIZE - *pg_offset,
994 PAGE_CACHE_SIZE - buf_offset); 994 PAGE_CACHE_SIZE - buf_offset);
995 bytes = min(bytes, working_bytes); 995 bytes = min(bytes, working_bytes);
996 kaddr = kmap_atomic(page_out, KM_USER0); 996 kaddr = kmap_atomic(page_out);
997 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); 997 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
998 kunmap_atomic(kaddr, KM_USER0); 998 kunmap_atomic(kaddr);
999 flush_dcache_page(page_out); 999 flush_dcache_page(page_out);
1000 1000
1001 *pg_offset += bytes; 1001 *pg_offset += bytes;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b4534d918e4..d7a96cfdc50 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1390,7 +1390,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1390 if (!cur) 1390 if (!cur)
1391 return -EIO; 1391 return -EIO;
1392 } else if (!uptodate) { 1392 } else if (!uptodate) {
1393 btrfs_read_buffer(cur, gen); 1393 err = btrfs_read_buffer(cur, gen);
1394 if (err) {
1395 free_extent_buffer(cur);
1396 return err;
1397 }
1394 } 1398 }
1395 } 1399 }
1396 if (search_start == 0) 1400 if (search_start == 0)
@@ -1505,20 +1509,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
1505static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 1509static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1506 int level, int *slot) 1510 int level, int *slot)
1507{ 1511{
1508 if (level == 0) { 1512 if (level == 0)
1509 return generic_bin_search(eb, 1513 return generic_bin_search(eb,
1510 offsetof(struct btrfs_leaf, items), 1514 offsetof(struct btrfs_leaf, items),
1511 sizeof(struct btrfs_item), 1515 sizeof(struct btrfs_item),
1512 key, btrfs_header_nritems(eb), 1516 key, btrfs_header_nritems(eb),
1513 slot); 1517 slot);
1514 } else { 1518 else
1515 return generic_bin_search(eb, 1519 return generic_bin_search(eb,
1516 offsetof(struct btrfs_node, ptrs), 1520 offsetof(struct btrfs_node, ptrs),
1517 sizeof(struct btrfs_key_ptr), 1521 sizeof(struct btrfs_key_ptr),
1518 key, btrfs_header_nritems(eb), 1522 key, btrfs_header_nritems(eb),
1519 slot); 1523 slot);
1520 }
1521 return -1;
1522} 1524}
1523 1525
1524int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 1526int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f5f11a6c5e9..0151ca1ac65 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -173,6 +173,9 @@ static int btrfs_csum_sizes[] = { 4, 0 };
173#define BTRFS_FT_XATTR 8 173#define BTRFS_FT_XATTR 8
174#define BTRFS_FT_MAX 9 174#define BTRFS_FT_MAX 9
175 175
176/* ioprio of readahead is set to idle */
177#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
178
176/* 179/*
177 * The key defines the order in the tree, and so it also defines (optimal) 180 * The key defines the order in the tree, and so it also defines (optimal)
178 * block layout. 181 * block layout.
@@ -823,6 +826,14 @@ struct btrfs_csum_item {
823 u8 csum; 826 u8 csum;
824} __attribute__ ((__packed__)); 827} __attribute__ ((__packed__));
825 828
829struct btrfs_dev_stats_item {
830 /*
831 * grow this item struct at the end for future enhancements and keep
832 * the existing values unchanged
833 */
834 __le64 values[BTRFS_DEV_STAT_VALUES_MAX];
835} __attribute__ ((__packed__));
836
826/* different types of block groups (and chunks) */ 837/* different types of block groups (and chunks) */
827#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) 838#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
828#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) 839#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
@@ -1384,7 +1395,7 @@ struct btrfs_root {
1384 struct list_head root_list; 1395 struct list_head root_list;
1385 1396
1386 spinlock_t orphan_lock; 1397 spinlock_t orphan_lock;
1387 struct list_head orphan_list; 1398 atomic_t orphan_inodes;
1388 struct btrfs_block_rsv *orphan_block_rsv; 1399 struct btrfs_block_rsv *orphan_block_rsv;
1389 int orphan_item_inserted; 1400 int orphan_item_inserted;
1390 int orphan_cleanup_state; 1401 int orphan_cleanup_state;
@@ -1517,6 +1528,12 @@ struct btrfs_ioctl_defrag_range_args {
1517#define BTRFS_BALANCE_ITEM_KEY 248 1528#define BTRFS_BALANCE_ITEM_KEY 248
1518 1529
1519/* 1530/*
1531 * Persistantly stores the io stats in the device tree.
1532 * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
1533 */
1534#define BTRFS_DEV_STATS_KEY 249
1535
1536/*
1520 * string items are for debugging. They just store a short string of 1537 * string items are for debugging. They just store a short string of
1521 * data in the FS 1538 * data in the FS
1522 */ 1539 */
@@ -2175,7 +2192,7 @@ BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
2175 2192
2176static inline bool btrfs_root_readonly(struct btrfs_root *root) 2193static inline bool btrfs_root_readonly(struct btrfs_root *root)
2177{ 2194{
2178 return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; 2195 return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
2179} 2196}
2180 2197
2181/* struct btrfs_root_backup */ 2198/* struct btrfs_root_backup */
@@ -2424,6 +2441,30 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
2424 return btrfs_item_size(eb, e) - offset; 2441 return btrfs_item_size(eb, e) - offset;
2425} 2442}
2426 2443
2444/* btrfs_dev_stats_item */
2445static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
2446 struct btrfs_dev_stats_item *ptr,
2447 int index)
2448{
2449 u64 val;
2450
2451 read_extent_buffer(eb, &val,
2452 offsetof(struct btrfs_dev_stats_item, values) +
2453 ((unsigned long)ptr) + (index * sizeof(u64)),
2454 sizeof(val));
2455 return val;
2456}
2457
2458static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
2459 struct btrfs_dev_stats_item *ptr,
2460 int index, u64 val)
2461{
2462 write_extent_buffer(eb, &val,
2463 offsetof(struct btrfs_dev_stats_item, values) +
2464 ((unsigned long)ptr) + (index * sizeof(u64)),
2465 sizeof(val));
2466}
2467
2427static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) 2468static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
2428{ 2469{
2429 return sb->s_fs_info; 2470 return sb->s_fs_info;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 03e3748d84d..c18d0442ae6 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata(
669 return ret; 669 return ret;
670 } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { 670 } else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
671 spin_lock(&BTRFS_I(inode)->lock); 671 spin_lock(&BTRFS_I(inode)->lock);
672 if (BTRFS_I(inode)->delalloc_meta_reserved) { 672 if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
673 BTRFS_I(inode)->delalloc_meta_reserved = 0; 673 &BTRFS_I(inode)->runtime_flags)) {
674 spin_unlock(&BTRFS_I(inode)->lock); 674 spin_unlock(&BTRFS_I(inode)->lock);
675 release = true; 675 release = true;
676 goto migrate; 676 goto migrate;
@@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1706 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); 1706 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1707 btrfs_set_stack_inode_generation(inode_item, 1707 btrfs_set_stack_inode_generation(inode_item,
1708 BTRFS_I(inode)->generation); 1708 BTRFS_I(inode)->generation);
1709 btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence); 1709 btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1710 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1710 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1711 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1711 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1712 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1712 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
@@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1754 set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1754 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1755 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1755 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1756 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1756 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1757 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); 1757 inode->i_version = btrfs_stack_inode_sequence(inode_item);
1758 inode->i_rdev = 0; 1758 inode->i_rdev = 0;
1759 *rdev = btrfs_stack_inode_rdev(inode_item); 1759 *rdev = btrfs_stack_inode_rdev(inode_item);
1760 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); 1760 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f51ad8477f1..b99d5127ba1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1153 root->orphan_block_rsv = NULL; 1153 root->orphan_block_rsv = NULL;
1154 1154
1155 INIT_LIST_HEAD(&root->dirty_list); 1155 INIT_LIST_HEAD(&root->dirty_list);
1156 INIT_LIST_HEAD(&root->orphan_list);
1157 INIT_LIST_HEAD(&root->root_list); 1156 INIT_LIST_HEAD(&root->root_list);
1158 spin_lock_init(&root->orphan_lock); 1157 spin_lock_init(&root->orphan_lock);
1159 spin_lock_init(&root->inode_lock); 1158 spin_lock_init(&root->inode_lock);
@@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1166 atomic_set(&root->log_commit[0], 0); 1165 atomic_set(&root->log_commit[0], 0);
1167 atomic_set(&root->log_commit[1], 0); 1166 atomic_set(&root->log_commit[1], 0);
1168 atomic_set(&root->log_writers, 0); 1167 atomic_set(&root->log_writers, 0);
1168 atomic_set(&root->orphan_inodes, 0);
1169 root->log_batch = 0; 1169 root->log_batch = 0;
1170 root->log_transid = 0; 1170 root->log_transid = 0;
1171 root->last_log_commit = 0; 1171 root->last_log_commit = 0;
@@ -2006,7 +2006,8 @@ int open_ctree(struct super_block *sb,
2006 BTRFS_I(fs_info->btree_inode)->root = tree_root; 2006 BTRFS_I(fs_info->btree_inode)->root = tree_root;
2007 memset(&BTRFS_I(fs_info->btree_inode)->location, 0, 2007 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
2008 sizeof(struct btrfs_key)); 2008 sizeof(struct btrfs_key));
2009 BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; 2009 set_bit(BTRFS_INODE_DUMMY,
2010 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2010 insert_inode_hash(fs_info->btree_inode); 2011 insert_inode_hash(fs_info->btree_inode);
2011 2012
2012 spin_lock_init(&fs_info->block_group_cache_lock); 2013 spin_lock_init(&fs_info->block_group_cache_lock);
@@ -2358,6 +2359,13 @@ retry_root_backup:
2358 fs_info->generation = generation; 2359 fs_info->generation = generation;
2359 fs_info->last_trans_committed = generation; 2360 fs_info->last_trans_committed = generation;
2360 2361
2362 ret = btrfs_init_dev_stats(fs_info);
2363 if (ret) {
2364 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
2365 ret);
2366 goto fail_block_groups;
2367 }
2368
2361 ret = btrfs_init_space_info(fs_info); 2369 ret = btrfs_init_space_info(fs_info);
2362 if (ret) { 2370 if (ret) {
2363 printk(KERN_ERR "Failed to initial space info: %d\n", ret); 2371 printk(KERN_ERR "Failed to initial space info: %d\n", ret);
@@ -2561,18 +2569,19 @@ recovery_tree_root:
2561 2569
2562static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) 2570static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2563{ 2571{
2564 char b[BDEVNAME_SIZE];
2565
2566 if (uptodate) { 2572 if (uptodate) {
2567 set_buffer_uptodate(bh); 2573 set_buffer_uptodate(bh);
2568 } else { 2574 } else {
2575 struct btrfs_device *device = (struct btrfs_device *)
2576 bh->b_private;
2577
2569 printk_ratelimited(KERN_WARNING "lost page write due to " 2578 printk_ratelimited(KERN_WARNING "lost page write due to "
2570 "I/O error on %s\n", 2579 "I/O error on %s\n", device->name);
2571 bdevname(bh->b_bdev, b));
2572 /* note, we dont' set_buffer_write_io_error because we have 2580 /* note, we dont' set_buffer_write_io_error because we have
2573 * our own ways of dealing with the IO errors 2581 * our own ways of dealing with the IO errors
2574 */ 2582 */
2575 clear_buffer_uptodate(bh); 2583 clear_buffer_uptodate(bh);
2584 btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
2576 } 2585 }
2577 unlock_buffer(bh); 2586 unlock_buffer(bh);
2578 put_bh(bh); 2587 put_bh(bh);
@@ -2687,6 +2696,7 @@ static int write_dev_supers(struct btrfs_device *device,
2687 set_buffer_uptodate(bh); 2696 set_buffer_uptodate(bh);
2688 lock_buffer(bh); 2697 lock_buffer(bh);
2689 bh->b_end_io = btrfs_end_buffer_write_sync; 2698 bh->b_end_io = btrfs_end_buffer_write_sync;
2699 bh->b_private = device;
2690 } 2700 }
2691 2701
2692 /* 2702 /*
@@ -2745,6 +2755,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
2745 } 2755 }
2746 if (!bio_flagged(bio, BIO_UPTODATE)) { 2756 if (!bio_flagged(bio, BIO_UPTODATE)) {
2747 ret = -EIO; 2757 ret = -EIO;
2758 if (!bio_flagged(bio, BIO_EOPNOTSUPP))
2759 btrfs_dev_stat_inc_and_print(device,
2760 BTRFS_DEV_STAT_FLUSH_ERRS);
2748 } 2761 }
2749 2762
2750 /* drop the reference from the wait == 0 run */ 2763 /* drop the reference from the wait == 0 run */
@@ -2907,19 +2920,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2907 return ret; 2920 return ret;
2908} 2921}
2909 2922
2910/* Kill all outstanding I/O */
2911void btrfs_abort_devices(struct btrfs_root *root)
2912{
2913 struct list_head *head;
2914 struct btrfs_device *dev;
2915 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2916 head = &root->fs_info->fs_devices->devices;
2917 list_for_each_entry_rcu(dev, head, dev_list) {
2918 blk_abort_queue(dev->bdev->bd_disk->queue);
2919 }
2920 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2921}
2922
2923void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2923void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2924{ 2924{
2925 spin_lock(&fs_info->fs_roots_radix_lock); 2925 spin_lock(&fs_info->fs_roots_radix_lock);
@@ -3676,17 +3676,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3676 return 0; 3676 return 0;
3677} 3677}
3678 3678
3679static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
3680 u64 start, u64 end,
3681 struct extent_state *state)
3682{
3683 struct super_block *sb = page->mapping->host->i_sb;
3684 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
3685 btrfs_error(fs_info, -EIO,
3686 "Error occured while writing out btree at %llu", start);
3687 return -EIO;
3688}
3689
3690static struct extent_io_ops btree_extent_io_ops = { 3679static struct extent_io_ops btree_extent_io_ops = {
3691 .write_cache_pages_lock_hook = btree_lock_page_hook, 3680 .write_cache_pages_lock_hook = btree_lock_page_hook,
3692 .readpage_end_io_hook = btree_readpage_end_io_hook, 3681 .readpage_end_io_hook = btree_readpage_end_io_hook,
@@ -3694,5 +3683,4 @@ static struct extent_io_ops btree_extent_io_ops = {
3694 .submit_bio_hook = btree_submit_bio_hook, 3683 .submit_bio_hook = btree_submit_bio_hook,
3695 /* note we're sharing with inode.c for the merge bio hook */ 3684 /* note we're sharing with inode.c for the merge bio hook */
3696 .merge_bio_hook = btrfs_merge_bio_hook, 3685 .merge_bio_hook = btrfs_merge_bio_hook,
3697 .writepage_io_failed_hook = btree_writepage_io_failed_hook,
3698}; 3686};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index ab1830aaf0e..05b3fab39f7 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -89,7 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
89int btrfs_cleanup_transaction(struct btrfs_root *root); 89int btrfs_cleanup_transaction(struct btrfs_root *root);
90void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, 90void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
91 struct btrfs_root *root); 91 struct btrfs_root *root);
92void btrfs_abort_devices(struct btrfs_root *root);
93 92
94#ifdef CONFIG_DEBUG_LOCK_ALLOC 93#ifdef CONFIG_DEBUG_LOCK_ALLOC
95void btrfs_init_lockdep(void); 94void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b68eb7ad05a..4b5a1e1bdef 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3578,7 +3578,7 @@ again:
3578 space_info->chunk_alloc = 0; 3578 space_info->chunk_alloc = 0;
3579 spin_unlock(&space_info->lock); 3579 spin_unlock(&space_info->lock);
3580out: 3580out:
3581 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3581 mutex_unlock(&fs_info->chunk_mutex);
3582 return ret; 3582 return ret;
3583} 3583}
3584 3584
@@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode)
4355 BTRFS_I(inode)->outstanding_extents--; 4355 BTRFS_I(inode)->outstanding_extents--;
4356 4356
4357 if (BTRFS_I(inode)->outstanding_extents == 0 && 4357 if (BTRFS_I(inode)->outstanding_extents == 0 &&
4358 BTRFS_I(inode)->delalloc_meta_reserved) { 4358 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4359 &BTRFS_I(inode)->runtime_flags))
4359 drop_inode_space = 1; 4360 drop_inode_space = 1;
4360 BTRFS_I(inode)->delalloc_meta_reserved = 0;
4361 }
4362 4361
4363 /* 4362 /*
4364 * If we have more or the same amount of outsanding extents than we have 4363 * If we have more or the same amount of outsanding extents than we have
@@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4465 * Add an item to reserve for updating the inode when we complete the 4464 * Add an item to reserve for updating the inode when we complete the
4466 * delalloc io. 4465 * delalloc io.
4467 */ 4466 */
4468 if (!BTRFS_I(inode)->delalloc_meta_reserved) { 4467 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4468 &BTRFS_I(inode)->runtime_flags)) {
4469 nr_extents++; 4469 nr_extents++;
4470 extra_reserve = 1; 4470 extra_reserve = 1;
4471 } 4471 }
@@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4511 4511
4512 spin_lock(&BTRFS_I(inode)->lock); 4512 spin_lock(&BTRFS_I(inode)->lock);
4513 if (extra_reserve) { 4513 if (extra_reserve) {
4514 BTRFS_I(inode)->delalloc_meta_reserved = 1; 4514 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4515 &BTRFS_I(inode)->runtime_flags);
4515 nr_extents--; 4516 nr_extents--;
4516 } 4517 }
4517 BTRFS_I(inode)->reserved_extents += nr_extents; 4518 BTRFS_I(inode)->reserved_extents += nr_extents;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3daed70a401..2c8f7b20461 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
186 return parent; 186 return parent;
187 } 187 }
188 188
189 entry = rb_entry(node, struct tree_entry, rb_node);
190 rb_link_node(node, parent, p); 189 rb_link_node(node, parent, p);
191 rb_insert_color(node, root); 190 rb_insert_color(node, root);
192 return NULL; 191 return NULL;
@@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state)
413 412
414/* 413/*
415 * utility function to clear some bits in an extent state struct. 414 * utility function to clear some bits in an extent state struct.
416 * it will optionally wake up any one waiting on this state (wake == 1) 415 * it will optionally wake up any one waiting on this state (wake == 1).
417 * 416 *
418 * If no bits are set on the state struct after clearing things, the 417 * If no bits are set on the state struct after clearing things, the
419 * struct is freed and removed from the tree 418 * struct is freed and removed from the tree
@@ -570,10 +569,8 @@ hit_next:
570 if (err) 569 if (err)
571 goto out; 570 goto out;
572 if (state->end <= end) { 571 if (state->end <= end) {
573 clear_state_bit(tree, state, &bits, wake); 572 state = clear_state_bit(tree, state, &bits, wake);
574 if (last_end == (u64)-1) 573 goto next;
575 goto out;
576 start = last_end + 1;
577 } 574 }
578 goto search_again; 575 goto search_again;
579 } 576 }
@@ -781,7 +778,6 @@ hit_next:
781 * Just lock what we found and keep going 778 * Just lock what we found and keep going
782 */ 779 */
783 if (state->start == start && state->end <= end) { 780 if (state->start == start && state->end <= end) {
784 struct rb_node *next_node;
785 if (state->state & exclusive_bits) { 781 if (state->state & exclusive_bits) {
786 *failed_start = state->start; 782 *failed_start = state->start;
787 err = -EEXIST; 783 err = -EEXIST;
@@ -789,20 +785,15 @@ hit_next:
789 } 785 }
790 786
791 set_state_bits(tree, state, &bits); 787 set_state_bits(tree, state, &bits);
792
793 cache_state(state, cached_state); 788 cache_state(state, cached_state);
794 merge_state(tree, state); 789 merge_state(tree, state);
795 if (last_end == (u64)-1) 790 if (last_end == (u64)-1)
796 goto out; 791 goto out;
797
798 start = last_end + 1; 792 start = last_end + 1;
799 next_node = rb_next(&state->rb_node); 793 state = next_state(state);
800 if (next_node && start < end && prealloc && !need_resched()) { 794 if (start < end && state && state->start == start &&
801 state = rb_entry(next_node, struct extent_state, 795 !need_resched())
802 rb_node); 796 goto hit_next;
803 if (state->start == start)
804 goto hit_next;
805 }
806 goto search_again; 797 goto search_again;
807 } 798 }
808 799
@@ -845,6 +836,10 @@ hit_next:
845 if (last_end == (u64)-1) 836 if (last_end == (u64)-1)
846 goto out; 837 goto out;
847 start = last_end + 1; 838 start = last_end + 1;
839 state = next_state(state);
840 if (start < end && state && state->start == start &&
841 !need_resched())
842 goto hit_next;
848 } 843 }
849 goto search_again; 844 goto search_again;
850 } 845 }
@@ -994,21 +989,14 @@ hit_next:
994 * Just lock what we found and keep going 989 * Just lock what we found and keep going
995 */ 990 */
996 if (state->start == start && state->end <= end) { 991 if (state->start == start && state->end <= end) {
997 struct rb_node *next_node;
998
999 set_state_bits(tree, state, &bits); 992 set_state_bits(tree, state, &bits);
1000 clear_state_bit(tree, state, &clear_bits, 0); 993 state = clear_state_bit(tree, state, &clear_bits, 0);
1001 if (last_end == (u64)-1) 994 if (last_end == (u64)-1)
1002 goto out; 995 goto out;
1003
1004 start = last_end + 1; 996 start = last_end + 1;
1005 next_node = rb_next(&state->rb_node); 997 if (start < end && state && state->start == start &&
1006 if (next_node && start < end && prealloc && !need_resched()) { 998 !need_resched())
1007 state = rb_entry(next_node, struct extent_state, 999 goto hit_next;
1008 rb_node);
1009 if (state->start == start)
1010 goto hit_next;
1011 }
1012 goto search_again; 1000 goto search_again;
1013 } 1001 }
1014 1002
@@ -1042,10 +1030,13 @@ hit_next:
1042 goto out; 1030 goto out;
1043 if (state->end <= end) { 1031 if (state->end <= end) {
1044 set_state_bits(tree, state, &bits); 1032 set_state_bits(tree, state, &bits);
1045 clear_state_bit(tree, state, &clear_bits, 0); 1033 state = clear_state_bit(tree, state, &clear_bits, 0);
1046 if (last_end == (u64)-1) 1034 if (last_end == (u64)-1)
1047 goto out; 1035 goto out;
1048 start = last_end + 1; 1036 start = last_end + 1;
1037 if (start < end && state && state->start == start &&
1038 !need_resched())
1039 goto hit_next;
1049 } 1040 }
1050 goto search_again; 1041 goto search_again;
1051 } 1042 }
@@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1173 cached_state, mask); 1164 cached_state, mask);
1174} 1165}
1175 1166
1176static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 1167int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1177 u64 end, struct extent_state **cached_state, 1168 struct extent_state **cached_state, gfp_t mask)
1178 gfp_t mask)
1179{ 1169{
1180 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, 1170 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
1181 cached_state, mask); 1171 cached_state, mask);
@@ -1293,7 +1283,7 @@ out:
1293 * returned if we find something, and *start_ret and *end_ret are 1283 * returned if we find something, and *start_ret and *end_ret are
1294 * set to reflect the state struct that was found. 1284 * set to reflect the state struct that was found.
1295 * 1285 *
1296 * If nothing was found, 1 is returned, < 0 on error 1286 * If nothing was found, 1 is returned. If found something, return 0.
1297 */ 1287 */
1298int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1288int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1299 u64 *start_ret, u64 *end_ret, int bits) 1289 u64 *start_ret, u64 *end_ret, int bits)
@@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1923 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 1913 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1924 /* try to remap that extent elsewhere? */ 1914 /* try to remap that extent elsewhere? */
1925 bio_put(bio); 1915 bio_put(bio);
1916 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
1926 return -EIO; 1917 return -EIO;
1927 } 1918 }
1928 1919
@@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2222 uptodate = 0; 2213 uptodate = 0;
2223 } 2214 }
2224 2215
2225 if (!uptodate && tree->ops &&
2226 tree->ops->writepage_io_failed_hook) {
2227 ret = tree->ops->writepage_io_failed_hook(NULL, page,
2228 start, end, NULL);
2229 /* Writeback already completed */
2230 if (ret == 0)
2231 return 1;
2232 }
2233
2234 if (!uptodate) { 2216 if (!uptodate) {
2235 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
2236 ClearPageUptodate(page); 2217 ClearPageUptodate(page);
2237 SetPageError(page); 2218 SetPageError(page);
2238 } 2219 }
@@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2347 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2328 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
2348 ret = tree->ops->readpage_end_io_hook(page, start, end, 2329 ret = tree->ops->readpage_end_io_hook(page, start, end,
2349 state, mirror); 2330 state, mirror);
2350 if (ret) 2331 if (ret) {
2332 /* no IO indicated but software detected errors
2333 * in the block, either checksum errors or
2334 * issues with the contents */
2335 struct btrfs_root *root =
2336 BTRFS_I(page->mapping->host)->root;
2337 struct btrfs_device *device;
2338
2351 uptodate = 0; 2339 uptodate = 0;
2352 else 2340 device = btrfs_find_device_for_logical(
2341 root, start, mirror);
2342 if (device)
2343 btrfs_dev_stat_inc_and_print(device,
2344 BTRFS_DEV_STAT_CORRUPTION_ERRS);
2345 } else {
2353 clean_io_failure(start, page); 2346 clean_io_failure(start, page);
2347 }
2354 } 2348 }
2355 2349
2356 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2350 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
@@ -2612,10 +2606,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2612 2606
2613 if (zero_offset) { 2607 if (zero_offset) {
2614 iosize = PAGE_CACHE_SIZE - zero_offset; 2608 iosize = PAGE_CACHE_SIZE - zero_offset;
2615 userpage = kmap_atomic(page, KM_USER0); 2609 userpage = kmap_atomic(page);
2616 memset(userpage + zero_offset, 0, iosize); 2610 memset(userpage + zero_offset, 0, iosize);
2617 flush_dcache_page(page); 2611 flush_dcache_page(page);
2618 kunmap_atomic(userpage, KM_USER0); 2612 kunmap_atomic(userpage);
2619 } 2613 }
2620 } 2614 }
2621 while (cur <= end) { 2615 while (cur <= end) {
@@ -2624,10 +2618,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2624 struct extent_state *cached = NULL; 2618 struct extent_state *cached = NULL;
2625 2619
2626 iosize = PAGE_CACHE_SIZE - pg_offset; 2620 iosize = PAGE_CACHE_SIZE - pg_offset;
2627 userpage = kmap_atomic(page, KM_USER0); 2621 userpage = kmap_atomic(page);
2628 memset(userpage + pg_offset, 0, iosize); 2622 memset(userpage + pg_offset, 0, iosize);
2629 flush_dcache_page(page); 2623 flush_dcache_page(page);
2630 kunmap_atomic(userpage, KM_USER0); 2624 kunmap_atomic(userpage);
2631 set_extent_uptodate(tree, cur, cur + iosize - 1, 2625 set_extent_uptodate(tree, cur, cur + iosize - 1,
2632 &cached, GFP_NOFS); 2626 &cached, GFP_NOFS);
2633 unlock_extent_cached(tree, cur, cur + iosize - 1, 2627 unlock_extent_cached(tree, cur, cur + iosize - 1,
@@ -2673,10 +2667,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2673 char *userpage; 2667 char *userpage;
2674 struct extent_state *cached = NULL; 2668 struct extent_state *cached = NULL;
2675 2669
2676 userpage = kmap_atomic(page, KM_USER0); 2670 userpage = kmap_atomic(page);
2677 memset(userpage + pg_offset, 0, iosize); 2671 memset(userpage + pg_offset, 0, iosize);
2678 flush_dcache_page(page); 2672 flush_dcache_page(page);
2679 kunmap_atomic(userpage, KM_USER0); 2673 kunmap_atomic(userpage);
2680 2674
2681 set_extent_uptodate(tree, cur, cur + iosize - 1, 2675 set_extent_uptodate(tree, cur, cur + iosize - 1,
2682 &cached, GFP_NOFS); 2676 &cached, GFP_NOFS);
@@ -2823,10 +2817,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2823 if (page->index == end_index) { 2817 if (page->index == end_index) {
2824 char *userpage; 2818 char *userpage;
2825 2819
2826 userpage = kmap_atomic(page, KM_USER0); 2820 userpage = kmap_atomic(page);
2827 memset(userpage + pg_offset, 0, 2821 memset(userpage + pg_offset, 0,
2828 PAGE_CACHE_SIZE - pg_offset); 2822 PAGE_CACHE_SIZE - pg_offset);
2829 kunmap_atomic(userpage, KM_USER0); 2823 kunmap_atomic(userpage);
2830 flush_dcache_page(page); 2824 flush_dcache_page(page);
2831 } 2825 }
2832 pg_offset = 0; 2826 pg_offset = 0;
@@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb,
3164 u64 offset = eb->start; 3158 u64 offset = eb->start;
3165 unsigned long i, num_pages; 3159 unsigned long i, num_pages;
3166 int rw = (epd->sync_io ? WRITE_SYNC : WRITE); 3160 int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
3167 int ret; 3161 int ret = 0;
3168 3162
3169 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 3163 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3170 num_pages = num_extent_pages(eb->start, eb->len); 3164 num_pages = num_extent_pages(eb->start, eb->len);
@@ -4036,12 +4030,14 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4036 unsigned long start_idx) 4030 unsigned long start_idx)
4037{ 4031{
4038 unsigned long index; 4032 unsigned long index;
4033 unsigned long num_pages;
4039 struct page *page; 4034 struct page *page;
4040 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); 4035 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4041 4036
4042 BUG_ON(extent_buffer_under_io(eb)); 4037 BUG_ON(extent_buffer_under_io(eb));
4043 4038
4044 index = num_extent_pages(eb->start, eb->len); 4039 num_pages = num_extent_pages(eb->start, eb->len);
4040 index = start_idx + num_pages;
4045 if (start_idx >= index) 4041 if (start_idx >= index)
4046 return; 4042 return;
4047 4043
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 96434a61d7c..25900af5b15 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -76,9 +76,6 @@ struct extent_io_ops {
76 unsigned long bio_flags); 76 unsigned long bio_flags);
77 int (*readpage_io_hook)(struct page *page, u64 start, u64 end); 77 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
78 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 78 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
79 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
80 u64 start, u64 end,
81 struct extent_state *state);
82 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 79 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
83 struct extent_state *state, int mirror); 80 struct extent_state *state, int mirror);
84 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
@@ -226,6 +223,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
226 struct extent_state **cached_state, gfp_t mask); 223 struct extent_state **cached_state, gfp_t mask);
227int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 224int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
228 struct extent_state **cached_state, gfp_t mask); 225 struct extent_state **cached_state, gfp_t mask);
226int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
227 struct extent_state **cached_state, gfp_t mask);
229int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 228int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
230 gfp_t mask); 229 gfp_t mask);
231int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 230int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index cab0ffb5ef3..5d158d32023 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -460,13 +460,13 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
460 sums->bytenr = ordered->start; 460 sums->bytenr = ordered->start;
461 } 461 }
462 462
463 data = kmap_atomic(bvec->bv_page, KM_USER0); 463 data = kmap_atomic(bvec->bv_page);
464 sector_sum->sum = ~(u32)0; 464 sector_sum->sum = ~(u32)0;
465 sector_sum->sum = btrfs_csum_data(root, 465 sector_sum->sum = btrfs_csum_data(root,
466 data + bvec->bv_offset, 466 data + bvec->bv_offset,
467 sector_sum->sum, 467 sector_sum->sum,
468 bvec->bv_len); 468 bvec->bv_len);
469 kunmap_atomic(data, KM_USER0); 469 kunmap_atomic(data);
470 btrfs_csum_final(sector_sum->sum, 470 btrfs_csum_final(sector_sum->sum,
471 (char *)&sector_sum->sum); 471 (char *)&sector_sum->sum);
472 sector_sum->bytenr = disk_bytenr; 472 sector_sum->bytenr = disk_bytenr;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 53bf2d764bb..876cddd6b2f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -65,6 +65,21 @@ struct inode_defrag {
65 int cycled; 65 int cycled;
66}; 66};
67 67
68static int __compare_inode_defrag(struct inode_defrag *defrag1,
69 struct inode_defrag *defrag2)
70{
71 if (defrag1->root > defrag2->root)
72 return 1;
73 else if (defrag1->root < defrag2->root)
74 return -1;
75 else if (defrag1->ino > defrag2->ino)
76 return 1;
77 else if (defrag1->ino < defrag2->ino)
78 return -1;
79 else
80 return 0;
81}
82
68/* pop a record for an inode into the defrag tree. The lock 83/* pop a record for an inode into the defrag tree. The lock
69 * must be held already 84 * must be held already
70 * 85 *
@@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
81 struct inode_defrag *entry; 96 struct inode_defrag *entry;
82 struct rb_node **p; 97 struct rb_node **p;
83 struct rb_node *parent = NULL; 98 struct rb_node *parent = NULL;
99 int ret;
84 100
85 p = &root->fs_info->defrag_inodes.rb_node; 101 p = &root->fs_info->defrag_inodes.rb_node;
86 while (*p) { 102 while (*p) {
87 parent = *p; 103 parent = *p;
88 entry = rb_entry(parent, struct inode_defrag, rb_node); 104 entry = rb_entry(parent, struct inode_defrag, rb_node);
89 105
90 if (defrag->ino < entry->ino) 106 ret = __compare_inode_defrag(defrag, entry);
107 if (ret < 0)
91 p = &parent->rb_left; 108 p = &parent->rb_left;
92 else if (defrag->ino > entry->ino) 109 else if (ret > 0)
93 p = &parent->rb_right; 110 p = &parent->rb_right;
94 else { 111 else {
95 /* if we're reinserting an entry for 112 /* if we're reinserting an entry for
@@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
103 goto exists; 120 goto exists;
104 } 121 }
105 } 122 }
106 BTRFS_I(inode)->in_defrag = 1; 123 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
107 rb_link_node(&defrag->rb_node, parent, p); 124 rb_link_node(&defrag->rb_node, parent, p);
108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); 125 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
109 return; 126 return;
@@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
131 if (btrfs_fs_closing(root->fs_info)) 148 if (btrfs_fs_closing(root->fs_info))
132 return 0; 149 return 0;
133 150
134 if (BTRFS_I(inode)->in_defrag) 151 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
135 return 0; 152 return 0;
136 153
137 if (trans) 154 if (trans)
@@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
148 defrag->root = root->root_key.objectid; 165 defrag->root = root->root_key.objectid;
149 166
150 spin_lock(&root->fs_info->defrag_inodes_lock); 167 spin_lock(&root->fs_info->defrag_inodes_lock);
151 if (!BTRFS_I(inode)->in_defrag) 168 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
152 __btrfs_add_inode_defrag(inode, defrag); 169 __btrfs_add_inode_defrag(inode, defrag);
153 else 170 else
154 kfree(defrag); 171 kfree(defrag);
@@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
159/* 176/*
160 * must be called with the defrag_inodes lock held 177 * must be called with the defrag_inodes lock held
161 */ 178 */
162struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, 179struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
180 u64 root, u64 ino,
163 struct rb_node **next) 181 struct rb_node **next)
164{ 182{
165 struct inode_defrag *entry = NULL; 183 struct inode_defrag *entry = NULL;
184 struct inode_defrag tmp;
166 struct rb_node *p; 185 struct rb_node *p;
167 struct rb_node *parent = NULL; 186 struct rb_node *parent = NULL;
187 int ret;
188
189 tmp.ino = ino;
190 tmp.root = root;
168 191
169 p = info->defrag_inodes.rb_node; 192 p = info->defrag_inodes.rb_node;
170 while (p) { 193 while (p) {
171 parent = p; 194 parent = p;
172 entry = rb_entry(parent, struct inode_defrag, rb_node); 195 entry = rb_entry(parent, struct inode_defrag, rb_node);
173 196
174 if (ino < entry->ino) 197 ret = __compare_inode_defrag(&tmp, entry);
198 if (ret < 0)
175 p = parent->rb_left; 199 p = parent->rb_left;
176 else if (ino > entry->ino) 200 else if (ret > 0)
177 p = parent->rb_right; 201 p = parent->rb_right;
178 else 202 else
179 return entry; 203 return entry;
180 } 204 }
181 205
182 if (next) { 206 if (next) {
183 while (parent && ino > entry->ino) { 207 while (parent && __compare_inode_defrag(&tmp, entry) > 0) {
184 parent = rb_next(parent); 208 parent = rb_next(parent);
185 entry = rb_entry(parent, struct inode_defrag, rb_node); 209 entry = rb_entry(parent, struct inode_defrag, rb_node);
186 } 210 }
@@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
202 struct btrfs_key key; 226 struct btrfs_key key;
203 struct btrfs_ioctl_defrag_range_args range; 227 struct btrfs_ioctl_defrag_range_args range;
204 u64 first_ino = 0; 228 u64 first_ino = 0;
229 u64 root_objectid = 0;
205 int num_defrag; 230 int num_defrag;
206 int defrag_batch = 1024; 231 int defrag_batch = 1024;
207 232
@@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
214 n = NULL; 239 n = NULL;
215 240
216 /* find an inode to defrag */ 241 /* find an inode to defrag */
217 defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); 242 defrag = btrfs_find_defrag_inode(fs_info, root_objectid,
243 first_ino, &n);
218 if (!defrag) { 244 if (!defrag) {
219 if (n) 245 if (n) {
220 defrag = rb_entry(n, struct inode_defrag, rb_node); 246 defrag = rb_entry(n, struct inode_defrag,
221 else if (first_ino) { 247 rb_node);
248 } else if (root_objectid || first_ino) {
249 root_objectid = 0;
222 first_ino = 0; 250 first_ino = 0;
223 continue; 251 continue;
224 } else { 252 } else {
@@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
228 256
229 /* remove it from the rbtree */ 257 /* remove it from the rbtree */
230 first_ino = defrag->ino + 1; 258 first_ino = defrag->ino + 1;
259 root_objectid = defrag->root;
231 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); 260 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
232 261
233 if (btrfs_fs_closing(fs_info)) 262 if (btrfs_fs_closing(fs_info))
@@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
252 goto next; 281 goto next;
253 282
254 /* do a chunk of defrag */ 283 /* do a chunk of defrag */
255 BTRFS_I(inode)->in_defrag = 0; 284 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
256 range.start = defrag->last_offset; 285 range.start = defrag->last_offset;
257 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, 286 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
258 defrag_batch); 287 defrag_batch);
@@ -1409,7 +1438,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1409 mutex_unlock(&inode->i_mutex); 1438 mutex_unlock(&inode->i_mutex);
1410 goto out; 1439 goto out;
1411 } 1440 }
1412 BTRFS_I(inode)->sequence++;
1413 1441
1414 start_pos = round_down(pos, root->sectorsize); 1442 start_pos = round_down(pos, root->sectorsize);
1415 if (start_pos > i_size_read(inode)) { 1443 if (start_pos > i_size_read(inode)) {
@@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1466 * flush down new bytes that may have been written if the 1494 * flush down new bytes that may have been written if the
1467 * application were using truncate to replace a file in place. 1495 * application were using truncate to replace a file in place.
1468 */ 1496 */
1469 if (BTRFS_I(inode)->ordered_data_close) { 1497 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1470 BTRFS_I(inode)->ordered_data_close = 0; 1498 &BTRFS_I(inode)->runtime_flags)) {
1471 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); 1499 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
1472 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 1500 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1473 filemap_flush(inode->i_mapping); 1501 filemap_flush(inode->i_mapping);
@@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1498 1526
1499 trace_btrfs_sync_file(file, datasync); 1527 trace_btrfs_sync_file(file, datasync);
1500 1528
1501 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1502 if (ret)
1503 return ret;
1504 mutex_lock(&inode->i_mutex); 1529 mutex_lock(&inode->i_mutex);
1505 1530
1506 /* we wait first, since the writeback may change the inode */ 1531 /*
1532 * we wait first, since the writeback may change the inode, also wait
1533 * ordered range does a filemape_write_and_wait_range which is why we
1534 * don't do it above like other file systems.
1535 */
1507 root->log_batch++; 1536 root->log_batch++;
1508 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1537 btrfs_wait_ordered_range(inode, start, end);
1509 root->log_batch++; 1538 root->log_batch++;
1510 1539
1511 /* 1540 /*
@@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1523 * syncing 1552 * syncing
1524 */ 1553 */
1525 smp_mb(); 1554 smp_mb();
1526 if (BTRFS_I(inode)->last_trans <= 1555 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1556 BTRFS_I(inode)->last_trans <=
1527 root->fs_info->last_trans_committed) { 1557 root->fs_info->last_trans_committed) {
1528 BTRFS_I(inode)->last_trans = 0; 1558 BTRFS_I(inode)->last_trans = 0;
1529 mutex_unlock(&inode->i_mutex); 1559 mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index baaa518baaf..19a0d85b451 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -33,6 +33,8 @@
33 33
34static int link_free_space(struct btrfs_free_space_ctl *ctl, 34static int link_free_space(struct btrfs_free_space_ctl *ctl,
35 struct btrfs_free_space *info); 35 struct btrfs_free_space *info);
36static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
37 struct btrfs_free_space *info);
36 38
37static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 39static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path, 40 struct btrfs_path *path,
@@ -584,6 +586,44 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
584 return 0; 586 return 0;
585} 587}
586 588
589/*
590 * Since we attach pinned extents after the fact we can have contiguous sections
591 * of free space that are split up in entries. This poses a problem with the
592 * tree logging stuff since it could have allocated across what appears to be 2
593 * entries since we would have merged the entries when adding the pinned extents
594 * back to the free space cache. So run through the space cache that we just
595 * loaded and merge contiguous entries. This will make the log replay stuff not
596 * blow up and it will make for nicer allocator behavior.
597 */
598static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
599{
600 struct btrfs_free_space *e, *prev = NULL;
601 struct rb_node *n;
602
603again:
604 spin_lock(&ctl->tree_lock);
605 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
606 e = rb_entry(n, struct btrfs_free_space, offset_index);
607 if (!prev)
608 goto next;
609 if (e->bitmap || prev->bitmap)
610 goto next;
611 if (prev->offset + prev->bytes == e->offset) {
612 unlink_free_space(ctl, prev);
613 unlink_free_space(ctl, e);
614 prev->bytes += e->bytes;
615 kmem_cache_free(btrfs_free_space_cachep, e);
616 link_free_space(ctl, prev);
617 prev = NULL;
618 spin_unlock(&ctl->tree_lock);
619 goto again;
620 }
621next:
622 prev = e;
623 }
624 spin_unlock(&ctl->tree_lock);
625}
626
587int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 627int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
588 struct btrfs_free_space_ctl *ctl, 628 struct btrfs_free_space_ctl *ctl,
589 struct btrfs_path *path, u64 offset) 629 struct btrfs_path *path, u64 offset)
@@ -726,6 +766,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
726 } 766 }
727 767
728 io_ctl_drop_pages(&io_ctl); 768 io_ctl_drop_pages(&io_ctl);
769 merge_space_tree(ctl);
729 ret = 1; 770 ret = 1;
730out: 771out:
731 io_ctl_free(&io_ctl); 772 io_ctl_free(&io_ctl);
@@ -972,9 +1013,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
972 goto out; 1013 goto out;
973 1014
974 1015
975 ret = filemap_write_and_wait(inode->i_mapping); 1016 btrfs_wait_ordered_range(inode, 0, (u64)-1);
976 if (ret)
977 goto out;
978 1017
979 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1018 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
980 key.offset = offset; 1019 key.offset = offset;
@@ -1065,7 +1104,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1065 spin_unlock(&block_group->lock); 1104 spin_unlock(&block_group->lock);
1066 ret = 0; 1105 ret = 0;
1067#ifdef DEBUG 1106#ifdef DEBUG
1068 printk(KERN_ERR "btrfs: failed to write free space cace " 1107 printk(KERN_ERR "btrfs: failed to write free space cache "
1069 "for block group %llu\n", block_group->key.objectid); 1108 "for block group %llu\n", block_group->key.objectid);
1070#endif 1109#endif
1071 } 1110 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3ce7805d111..92df0a5d1d9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
89 89
90static int btrfs_setsize(struct inode *inode, loff_t newsize); 90static int btrfs_setsize(struct inode *inode, loff_t newsize);
91static int btrfs_truncate(struct inode *inode); 91static int btrfs_truncate(struct inode *inode);
92static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); 92static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
93static noinline int cow_file_range(struct inode *inode, 93static noinline int cow_file_range(struct inode *inode,
94 struct page *locked_page, 94 struct page *locked_page,
95 u64 start, u64 end, int *page_started, 95 u64 start, u64 end, int *page_started,
@@ -172,9 +172,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
172 cur_size = min_t(unsigned long, compressed_size, 172 cur_size = min_t(unsigned long, compressed_size,
173 PAGE_CACHE_SIZE); 173 PAGE_CACHE_SIZE);
174 174
175 kaddr = kmap_atomic(cpage, KM_USER0); 175 kaddr = kmap_atomic(cpage);
176 write_extent_buffer(leaf, kaddr, ptr, cur_size); 176 write_extent_buffer(leaf, kaddr, ptr, cur_size);
177 kunmap_atomic(kaddr, KM_USER0); 177 kunmap_atomic(kaddr);
178 178
179 i++; 179 i++;
180 ptr += cur_size; 180 ptr += cur_size;
@@ -186,10 +186,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
186 page = find_get_page(inode->i_mapping, 186 page = find_get_page(inode->i_mapping,
187 start >> PAGE_CACHE_SHIFT); 187 start >> PAGE_CACHE_SHIFT);
188 btrfs_set_file_extent_compression(leaf, ei, 0); 188 btrfs_set_file_extent_compression(leaf, ei, 0);
189 kaddr = kmap_atomic(page, KM_USER0); 189 kaddr = kmap_atomic(page);
190 offset = start & (PAGE_CACHE_SIZE - 1); 190 offset = start & (PAGE_CACHE_SIZE - 1);
191 write_extent_buffer(leaf, kaddr + offset, ptr, size); 191 write_extent_buffer(leaf, kaddr + offset, ptr, size);
192 kunmap_atomic(kaddr, KM_USER0); 192 kunmap_atomic(kaddr);
193 page_cache_release(page); 193 page_cache_release(page);
194 } 194 }
195 btrfs_mark_buffer_dirty(leaf); 195 btrfs_mark_buffer_dirty(leaf);
@@ -257,10 +257,13 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
257 ret = insert_inline_extent(trans, root, inode, start, 257 ret = insert_inline_extent(trans, root, inode, start,
258 inline_len, compressed_size, 258 inline_len, compressed_size,
259 compress_type, compressed_pages); 259 compress_type, compressed_pages);
260 if (ret) { 260 if (ret && ret != -ENOSPC) {
261 btrfs_abort_transaction(trans, root, ret); 261 btrfs_abort_transaction(trans, root, ret);
262 return ret; 262 return ret;
263 } else if (ret == -ENOSPC) {
264 return 1;
263 } 265 }
266
264 btrfs_delalloc_release_metadata(inode, end + 1 - start); 267 btrfs_delalloc_release_metadata(inode, end + 1 - start);
265 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 268 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
266 return 0; 269 return 0;
@@ -426,10 +429,10 @@ again:
426 * sending it down to disk 429 * sending it down to disk
427 */ 430 */
428 if (offset) { 431 if (offset) {
429 kaddr = kmap_atomic(page, KM_USER0); 432 kaddr = kmap_atomic(page);
430 memset(kaddr + offset, 0, 433 memset(kaddr + offset, 0,
431 PAGE_CACHE_SIZE - offset); 434 PAGE_CACHE_SIZE - offset);
432 kunmap_atomic(kaddr, KM_USER0); 435 kunmap_atomic(kaddr);
433 } 436 }
434 will_compress = 1; 437 will_compress = 1;
435 } 438 }
@@ -1572,11 +1575,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1572 if (btrfs_is_free_space_inode(root, inode)) 1575 if (btrfs_is_free_space_inode(root, inode))
1573 metadata = 2; 1576 metadata = 2;
1574 1577
1575 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1576 if (ret)
1577 return ret;
1578
1579 if (!(rw & REQ_WRITE)) { 1578 if (!(rw & REQ_WRITE)) {
1579 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1580 if (ret)
1581 return ret;
1582
1580 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1583 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1581 return btrfs_submit_compressed_read(inode, bio, 1584 return btrfs_submit_compressed_read(inode, bio,
1582 mirror_num, bio_flags); 1585 mirror_num, bio_flags);
@@ -1815,25 +1818,24 @@ out:
1815 * an ordered extent if the range of bytes in the file it covers are 1818 * an ordered extent if the range of bytes in the file it covers are
1816 * fully written. 1819 * fully written.
1817 */ 1820 */
1818static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 1821static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1819{ 1822{
1823 struct inode *inode = ordered_extent->inode;
1820 struct btrfs_root *root = BTRFS_I(inode)->root; 1824 struct btrfs_root *root = BTRFS_I(inode)->root;
1821 struct btrfs_trans_handle *trans = NULL; 1825 struct btrfs_trans_handle *trans = NULL;
1822 struct btrfs_ordered_extent *ordered_extent = NULL;
1823 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1826 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1824 struct extent_state *cached_state = NULL; 1827 struct extent_state *cached_state = NULL;
1825 int compress_type = 0; 1828 int compress_type = 0;
1826 int ret; 1829 int ret;
1827 bool nolock; 1830 bool nolock;
1828 1831
1829 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1830 end - start + 1);
1831 if (!ret)
1832 return 0;
1833 BUG_ON(!ordered_extent); /* Logic error */
1834
1835 nolock = btrfs_is_free_space_inode(root, inode); 1832 nolock = btrfs_is_free_space_inode(root, inode);
1836 1833
1834 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
1835 ret = -EIO;
1836 goto out;
1837 }
1838
1837 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1839 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1838 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ 1840 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
1839 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1841 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -1889,12 +1891,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1889 ordered_extent->file_offset, 1891 ordered_extent->file_offset,
1890 ordered_extent->len); 1892 ordered_extent->len);
1891 } 1893 }
1892 unlock_extent_cached(io_tree, ordered_extent->file_offset, 1894
1893 ordered_extent->file_offset +
1894 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1895 if (ret < 0) { 1895 if (ret < 0) {
1896 btrfs_abort_transaction(trans, root, ret); 1896 btrfs_abort_transaction(trans, root, ret);
1897 goto out; 1897 goto out_unlock;
1898 } 1898 }
1899 1899
1900 add_pending_csums(trans, inode, ordered_extent->file_offset, 1900 add_pending_csums(trans, inode, ordered_extent->file_offset,
@@ -1905,10 +1905,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1905 ret = btrfs_update_inode_fallback(trans, root, inode); 1905 ret = btrfs_update_inode_fallback(trans, root, inode);
1906 if (ret) { /* -ENOMEM or corruption */ 1906 if (ret) { /* -ENOMEM or corruption */
1907 btrfs_abort_transaction(trans, root, ret); 1907 btrfs_abort_transaction(trans, root, ret);
1908 goto out; 1908 goto out_unlock;
1909 } 1909 }
1910 } 1910 }
1911 ret = 0; 1911 ret = 0;
1912out_unlock:
1913 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1914 ordered_extent->file_offset +
1915 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1912out: 1916out:
1913 if (root != root->fs_info->tree_root) 1917 if (root != root->fs_info->tree_root)
1914 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1918 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
@@ -1919,26 +1923,57 @@ out:
1919 btrfs_end_transaction(trans, root); 1923 btrfs_end_transaction(trans, root);
1920 } 1924 }
1921 1925
1926 if (ret)
1927 clear_extent_uptodate(io_tree, ordered_extent->file_offset,
1928 ordered_extent->file_offset +
1929 ordered_extent->len - 1, NULL, GFP_NOFS);
1930
1931 /*
1932 * This needs to be dont to make sure anybody waiting knows we are done
1933 * upating everything for this ordered extent.
1934 */
1935 btrfs_remove_ordered_extent(inode, ordered_extent);
1936
1922 /* once for us */ 1937 /* once for us */
1923 btrfs_put_ordered_extent(ordered_extent); 1938 btrfs_put_ordered_extent(ordered_extent);
1924 /* once for the tree */ 1939 /* once for the tree */
1925 btrfs_put_ordered_extent(ordered_extent); 1940 btrfs_put_ordered_extent(ordered_extent);
1926 1941
1927 return 0; 1942 return ret;
1928out_unlock: 1943}
1929 unlock_extent_cached(io_tree, ordered_extent->file_offset, 1944
1930 ordered_extent->file_offset + 1945static void finish_ordered_fn(struct btrfs_work *work)
1931 ordered_extent->len - 1, &cached_state, GFP_NOFS); 1946{
1932 goto out; 1947 struct btrfs_ordered_extent *ordered_extent;
1948 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
1949 btrfs_finish_ordered_io(ordered_extent);
1933} 1950}
1934 1951
1935static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1952static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1936 struct extent_state *state, int uptodate) 1953 struct extent_state *state, int uptodate)
1937{ 1954{
1955 struct inode *inode = page->mapping->host;
1956 struct btrfs_root *root = BTRFS_I(inode)->root;
1957 struct btrfs_ordered_extent *ordered_extent = NULL;
1958 struct btrfs_workers *workers;
1959
1938 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); 1960 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
1939 1961
1940 ClearPagePrivate2(page); 1962 ClearPagePrivate2(page);
1941 return btrfs_finish_ordered_io(page->mapping->host, start, end); 1963 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1964 end - start + 1, uptodate))
1965 return 0;
1966
1967 ordered_extent->work.func = finish_ordered_fn;
1968 ordered_extent->work.flags = 0;
1969
1970 if (btrfs_is_free_space_inode(root, inode))
1971 workers = &root->fs_info->endio_freespace_worker;
1972 else
1973 workers = &root->fs_info->endio_write_workers;
1974 btrfs_queue_worker(workers, &ordered_extent->work);
1975
1976 return 0;
1942} 1977}
1943 1978
1944/* 1979/*
@@ -1979,7 +2014,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1979 } else { 2014 } else {
1980 ret = get_state_private(io_tree, start, &private); 2015 ret = get_state_private(io_tree, start, &private);
1981 } 2016 }
1982 kaddr = kmap_atomic(page, KM_USER0); 2017 kaddr = kmap_atomic(page);
1983 if (ret) 2018 if (ret)
1984 goto zeroit; 2019 goto zeroit;
1985 2020
@@ -1988,7 +2023,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1988 if (csum != private) 2023 if (csum != private)
1989 goto zeroit; 2024 goto zeroit;
1990 2025
1991 kunmap_atomic(kaddr, KM_USER0); 2026 kunmap_atomic(kaddr);
1992good: 2027good:
1993 return 0; 2028 return 0;
1994 2029
@@ -2000,7 +2035,7 @@ zeroit:
2000 (unsigned long long)private); 2035 (unsigned long long)private);
2001 memset(kaddr + offset, 1, end - start + 1); 2036 memset(kaddr + offset, 1, end - start + 1);
2002 flush_dcache_page(page); 2037 flush_dcache_page(page);
2003 kunmap_atomic(kaddr, KM_USER0); 2038 kunmap_atomic(kaddr);
2004 if (private == 0) 2039 if (private == 0)
2005 return 0; 2040 return 0;
2006 return -EIO; 2041 return -EIO;
@@ -2072,12 +2107,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2072 struct btrfs_block_rsv *block_rsv; 2107 struct btrfs_block_rsv *block_rsv;
2073 int ret; 2108 int ret;
2074 2109
2075 if (!list_empty(&root->orphan_list) || 2110 if (atomic_read(&root->orphan_inodes) ||
2076 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) 2111 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
2077 return; 2112 return;
2078 2113
2079 spin_lock(&root->orphan_lock); 2114 spin_lock(&root->orphan_lock);
2080 if (!list_empty(&root->orphan_list)) { 2115 if (atomic_read(&root->orphan_inodes)) {
2081 spin_unlock(&root->orphan_lock); 2116 spin_unlock(&root->orphan_lock);
2082 return; 2117 return;
2083 } 2118 }
@@ -2134,8 +2169,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2134 block_rsv = NULL; 2169 block_rsv = NULL;
2135 } 2170 }
2136 2171
2137 if (list_empty(&BTRFS_I(inode)->i_orphan)) { 2172 if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2138 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2173 &BTRFS_I(inode)->runtime_flags)) {
2139#if 0 2174#if 0
2140 /* 2175 /*
2141 * For proper ENOSPC handling, we should do orphan 2176 * For proper ENOSPC handling, we should do orphan
@@ -2148,12 +2183,12 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2148 insert = 1; 2183 insert = 1;
2149#endif 2184#endif
2150 insert = 1; 2185 insert = 1;
2186 atomic_dec(&root->orphan_inodes);
2151 } 2187 }
2152 2188
2153 if (!BTRFS_I(inode)->orphan_meta_reserved) { 2189 if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
2154 BTRFS_I(inode)->orphan_meta_reserved = 1; 2190 &BTRFS_I(inode)->runtime_flags))
2155 reserve = 1; 2191 reserve = 1;
2156 }
2157 spin_unlock(&root->orphan_lock); 2192 spin_unlock(&root->orphan_lock);
2158 2193
2159 /* grab metadata reservation from transaction handle */ 2194 /* grab metadata reservation from transaction handle */
@@ -2166,6 +2201,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2166 if (insert >= 1) { 2201 if (insert >= 1) {
2167 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2202 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2168 if (ret && ret != -EEXIST) { 2203 if (ret && ret != -EEXIST) {
2204 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2205 &BTRFS_I(inode)->runtime_flags);
2169 btrfs_abort_transaction(trans, root, ret); 2206 btrfs_abort_transaction(trans, root, ret);
2170 return ret; 2207 return ret;
2171 } 2208 }
@@ -2196,15 +2233,13 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2196 int ret = 0; 2233 int ret = 0;
2197 2234
2198 spin_lock(&root->orphan_lock); 2235 spin_lock(&root->orphan_lock);
2199 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 2236 if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2200 list_del_init(&BTRFS_I(inode)->i_orphan); 2237 &BTRFS_I(inode)->runtime_flags))
2201 delete_item = 1; 2238 delete_item = 1;
2202 }
2203 2239
2204 if (BTRFS_I(inode)->orphan_meta_reserved) { 2240 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
2205 BTRFS_I(inode)->orphan_meta_reserved = 0; 2241 &BTRFS_I(inode)->runtime_flags))
2206 release_rsv = 1; 2242 release_rsv = 1;
2207 }
2208 spin_unlock(&root->orphan_lock); 2243 spin_unlock(&root->orphan_lock);
2209 2244
2210 if (trans && delete_item) { 2245 if (trans && delete_item) {
@@ -2212,8 +2247,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2212 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ 2247 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
2213 } 2248 }
2214 2249
2215 if (release_rsv) 2250 if (release_rsv) {
2216 btrfs_orphan_release_metadata(inode); 2251 btrfs_orphan_release_metadata(inode);
2252 atomic_dec(&root->orphan_inodes);
2253 }
2217 2254
2218 return 0; 2255 return 0;
2219} 2256}
@@ -2341,6 +2378,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2341 ret = PTR_ERR(trans); 2378 ret = PTR_ERR(trans);
2342 goto out; 2379 goto out;
2343 } 2380 }
2381 printk(KERN_ERR "auto deleting %Lu\n",
2382 found_key.objectid);
2344 ret = btrfs_del_orphan_item(trans, root, 2383 ret = btrfs_del_orphan_item(trans, root,
2345 found_key.objectid); 2384 found_key.objectid);
2346 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ 2385 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@@ -2352,9 +2391,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2352 * add this inode to the orphan list so btrfs_orphan_del does 2391 * add this inode to the orphan list so btrfs_orphan_del does
2353 * the proper thing when we hit it 2392 * the proper thing when we hit it
2354 */ 2393 */
2355 spin_lock(&root->orphan_lock); 2394 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2356 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2395 &BTRFS_I(inode)->runtime_flags);
2357 spin_unlock(&root->orphan_lock);
2358 2396
2359 /* if we have links, this was a truncate, lets do that */ 2397 /* if we have links, this was a truncate, lets do that */
2360 if (inode->i_nlink) { 2398 if (inode->i_nlink) {
@@ -2510,7 +2548,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
2510 2548
2511 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); 2549 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
2512 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 2550 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
2513 BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item); 2551 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
2514 inode->i_generation = BTRFS_I(inode)->generation; 2552 inode->i_generation = BTRFS_I(inode)->generation;
2515 inode->i_rdev = 0; 2553 inode->i_rdev = 0;
2516 rdev = btrfs_inode_rdev(leaf, inode_item); 2554 rdev = btrfs_inode_rdev(leaf, inode_item);
@@ -2594,7 +2632,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2594 2632
2595 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); 2633 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
2596 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); 2634 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
2597 btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence); 2635 btrfs_set_inode_sequence(leaf, item, inode->i_version);
2598 btrfs_set_inode_transid(leaf, item, trans->transid); 2636 btrfs_set_inode_transid(leaf, item, trans->transid);
2599 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2637 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2600 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2638 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
@@ -2752,6 +2790,8 @@ err:
2752 goto out; 2790 goto out;
2753 2791
2754 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 2792 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
2793 inode_inc_iversion(inode);
2794 inode_inc_iversion(dir);
2755 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2795 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2756 btrfs_update_inode(trans, root, dir); 2796 btrfs_update_inode(trans, root, dir);
2757out: 2797out:
@@ -3089,6 +3129,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3089 } 3129 }
3090 3130
3091 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3131 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3132 inode_inc_iversion(dir);
3092 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3133 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3093 ret = btrfs_update_inode(trans, root, dir); 3134 ret = btrfs_update_inode(trans, root, dir);
3094 if (ret) 3135 if (ret)
@@ -3607,7 +3648,8 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3607 * any new writes get down to disk quickly. 3648 * any new writes get down to disk quickly.
3608 */ 3649 */
3609 if (newsize == 0) 3650 if (newsize == 0)
3610 BTRFS_I(inode)->ordered_data_close = 1; 3651 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
3652 &BTRFS_I(inode)->runtime_flags);
3611 3653
3612 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3654 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3613 truncate_setsize(inode, newsize); 3655 truncate_setsize(inode, newsize);
@@ -3638,6 +3680,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3638 3680
3639 if (attr->ia_valid) { 3681 if (attr->ia_valid) {
3640 setattr_copy(inode, attr); 3682 setattr_copy(inode, attr);
3683 inode_inc_iversion(inode);
3641 err = btrfs_dirty_inode(inode); 3684 err = btrfs_dirty_inode(inode);
3642 3685
3643 if (!err && attr->ia_valid & ATTR_MODE) 3686 if (!err && attr->ia_valid & ATTR_MODE)
@@ -3671,7 +3714,8 @@ void btrfs_evict_inode(struct inode *inode)
3671 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3714 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3672 3715
3673 if (root->fs_info->log_root_recovering) { 3716 if (root->fs_info->log_root_recovering) {
3674 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); 3717 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3718 &BTRFS_I(inode)->runtime_flags));
3675 goto no_delete; 3719 goto no_delete;
3676 } 3720 }
3677 3721
@@ -4066,7 +4110,7 @@ static struct inode *new_simple_dir(struct super_block *s,
4066 4110
4067 BTRFS_I(inode)->root = root; 4111 BTRFS_I(inode)->root = root;
4068 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); 4112 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
4069 BTRFS_I(inode)->dummy_inode = 1; 4113 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
4070 4114
4071 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; 4115 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
4072 inode->i_op = &btrfs_dir_ro_inode_operations; 4116 inode->i_op = &btrfs_dir_ro_inode_operations;
@@ -4370,7 +4414,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4370 int ret = 0; 4414 int ret = 0;
4371 bool nolock = false; 4415 bool nolock = false;
4372 4416
4373 if (BTRFS_I(inode)->dummy_inode) 4417 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
4374 return 0; 4418 return 0;
4375 4419
4376 if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) 4420 if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
@@ -4403,7 +4447,7 @@ int btrfs_dirty_inode(struct inode *inode)
4403 struct btrfs_trans_handle *trans; 4447 struct btrfs_trans_handle *trans;
4404 int ret; 4448 int ret;
4405 4449
4406 if (BTRFS_I(inode)->dummy_inode) 4450 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
4407 return 0; 4451 return 0;
4408 4452
4409 trans = btrfs_join_transaction(root); 4453 trans = btrfs_join_transaction(root);
@@ -4730,6 +4774,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4730 4774
4731 btrfs_i_size_write(parent_inode, parent_inode->i_size + 4775 btrfs_i_size_write(parent_inode, parent_inode->i_size +
4732 name_len * 2); 4776 name_len * 2);
4777 inode_inc_iversion(parent_inode);
4733 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 4778 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
4734 ret = btrfs_update_inode(trans, root, parent_inode); 4779 ret = btrfs_update_inode(trans, root, parent_inode);
4735 if (ret) 4780 if (ret)
@@ -4937,6 +4982,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4937 } 4982 }
4938 4983
4939 btrfs_inc_nlink(inode); 4984 btrfs_inc_nlink(inode);
4985 inode_inc_iversion(inode);
4940 inode->i_ctime = CURRENT_TIME; 4986 inode->i_ctime = CURRENT_TIME;
4941 ihold(inode); 4987 ihold(inode);
4942 4988
@@ -5079,12 +5125,12 @@ static noinline int uncompress_inline(struct btrfs_path *path,
5079 ret = btrfs_decompress(compress_type, tmp, page, 5125 ret = btrfs_decompress(compress_type, tmp, page,
5080 extent_offset, inline_size, max_size); 5126 extent_offset, inline_size, max_size);
5081 if (ret) { 5127 if (ret) {
5082 char *kaddr = kmap_atomic(page, KM_USER0); 5128 char *kaddr = kmap_atomic(page);
5083 unsigned long copy_size = min_t(u64, 5129 unsigned long copy_size = min_t(u64,
5084 PAGE_CACHE_SIZE - pg_offset, 5130 PAGE_CACHE_SIZE - pg_offset,
5085 max_size - extent_offset); 5131 max_size - extent_offset);
5086 memset(kaddr + pg_offset, 0, copy_size); 5132 memset(kaddr + pg_offset, 0, copy_size);
5087 kunmap_atomic(kaddr, KM_USER0); 5133 kunmap_atomic(kaddr);
5088 } 5134 }
5089 kfree(tmp); 5135 kfree(tmp);
5090 return 0; 5136 return 0;
@@ -5862,11 +5908,11 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5862 unsigned long flags; 5908 unsigned long flags;
5863 5909
5864 local_irq_save(flags); 5910 local_irq_save(flags);
5865 kaddr = kmap_atomic(page, KM_IRQ0); 5911 kaddr = kmap_atomic(page);
5866 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 5912 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
5867 csum, bvec->bv_len); 5913 csum, bvec->bv_len);
5868 btrfs_csum_final(csum, (char *)&csum); 5914 btrfs_csum_final(csum, (char *)&csum);
5869 kunmap_atomic(kaddr, KM_IRQ0); 5915 kunmap_atomic(kaddr);
5870 local_irq_restore(flags); 5916 local_irq_restore(flags);
5871 5917
5872 flush_dcache_page(bvec->bv_page); 5918 flush_dcache_page(bvec->bv_page);
@@ -5903,9 +5949,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
5903 struct btrfs_dio_private *dip = bio->bi_private; 5949 struct btrfs_dio_private *dip = bio->bi_private;
5904 struct inode *inode = dip->inode; 5950 struct inode *inode = dip->inode;
5905 struct btrfs_root *root = BTRFS_I(inode)->root; 5951 struct btrfs_root *root = BTRFS_I(inode)->root;
5906 struct btrfs_trans_handle *trans;
5907 struct btrfs_ordered_extent *ordered = NULL; 5952 struct btrfs_ordered_extent *ordered = NULL;
5908 struct extent_state *cached_state = NULL;
5909 u64 ordered_offset = dip->logical_offset; 5953 u64 ordered_offset = dip->logical_offset;
5910 u64 ordered_bytes = dip->bytes; 5954 u64 ordered_bytes = dip->bytes;
5911 int ret; 5955 int ret;
@@ -5915,73 +5959,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
5915again: 5959again:
5916 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 5960 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
5917 &ordered_offset, 5961 &ordered_offset,
5918 ordered_bytes); 5962 ordered_bytes, !err);
5919 if (!ret) 5963 if (!ret)
5920 goto out_test; 5964 goto out_test;
5921 5965
5922 BUG_ON(!ordered); 5966 ordered->work.func = finish_ordered_fn;
5923 5967 ordered->work.flags = 0;
5924 trans = btrfs_join_transaction(root); 5968 btrfs_queue_worker(&root->fs_info->endio_write_workers,
5925 if (IS_ERR(trans)) { 5969 &ordered->work);
5926 err = -ENOMEM;
5927 goto out;
5928 }
5929 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5930
5931 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5932 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5933 if (!ret)
5934 err = btrfs_update_inode_fallback(trans, root, inode);
5935 goto out;
5936 }
5937
5938 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5939 ordered->file_offset + ordered->len - 1, 0,
5940 &cached_state);
5941
5942 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5943 ret = btrfs_mark_extent_written(trans, inode,
5944 ordered->file_offset,
5945 ordered->file_offset +
5946 ordered->len);
5947 if (ret) {
5948 err = ret;
5949 goto out_unlock;
5950 }
5951 } else {
5952 ret = insert_reserved_file_extent(trans, inode,
5953 ordered->file_offset,
5954 ordered->start,
5955 ordered->disk_len,
5956 ordered->len,
5957 ordered->len,
5958 0, 0, 0,
5959 BTRFS_FILE_EXTENT_REG);
5960 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
5961 ordered->file_offset, ordered->len);
5962 if (ret) {
5963 err = ret;
5964 WARN_ON(1);
5965 goto out_unlock;
5966 }
5967 }
5968
5969 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5970 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5971 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
5972 btrfs_update_inode_fallback(trans, root, inode);
5973 ret = 0;
5974out_unlock:
5975 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5976 ordered->file_offset + ordered->len - 1,
5977 &cached_state, GFP_NOFS);
5978out:
5979 btrfs_delalloc_release_metadata(inode, ordered->len);
5980 btrfs_end_transaction(trans, root);
5981 ordered_offset = ordered->file_offset + ordered->len;
5982 btrfs_put_ordered_extent(ordered);
5983 btrfs_put_ordered_extent(ordered);
5984
5985out_test: 5970out_test:
5986 /* 5971 /*
5987 * our bio might span multiple ordered extents. If we haven't 5972 * our bio might span multiple ordered extents. If we haven't
@@ -5990,12 +5975,12 @@ out_test:
5990 if (ordered_offset < dip->logical_offset + dip->bytes) { 5975 if (ordered_offset < dip->logical_offset + dip->bytes) {
5991 ordered_bytes = dip->logical_offset + dip->bytes - 5976 ordered_bytes = dip->logical_offset + dip->bytes -
5992 ordered_offset; 5977 ordered_offset;
5978 ordered = NULL;
5993 goto again; 5979 goto again;
5994 } 5980 }
5995out_done: 5981out_done:
5996 bio->bi_private = dip->private; 5982 bio->bi_private = dip->private;
5997 5983
5998 kfree(dip->csums);
5999 kfree(dip); 5984 kfree(dip);
6000 5985
6001 /* If we had an error make sure to clear the uptodate flag */ 5986 /* If we had an error make sure to clear the uptodate flag */
@@ -6063,9 +6048,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6063 int ret; 6048 int ret;
6064 6049
6065 bio_get(bio); 6050 bio_get(bio);
6066 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 6051
6067 if (ret) 6052 if (!write) {
6068 goto err; 6053 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
6054 if (ret)
6055 goto err;
6056 }
6069 6057
6070 if (skip_sum) 6058 if (skip_sum)
6071 goto map; 6059 goto map;
@@ -6485,13 +6473,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
6485 6473
6486static void btrfs_invalidatepage(struct page *page, unsigned long offset) 6474static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6487{ 6475{
6476 struct inode *inode = page->mapping->host;
6488 struct extent_io_tree *tree; 6477 struct extent_io_tree *tree;
6489 struct btrfs_ordered_extent *ordered; 6478 struct btrfs_ordered_extent *ordered;
6490 struct extent_state *cached_state = NULL; 6479 struct extent_state *cached_state = NULL;
6491 u64 page_start = page_offset(page); 6480 u64 page_start = page_offset(page);
6492 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 6481 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
6493 6482
6494
6495 /* 6483 /*
6496 * we have the page locked, so new writeback can't start, 6484 * we have the page locked, so new writeback can't start,
6497 * and the dirty bit won't be cleared while we are here. 6485 * and the dirty bit won't be cleared while we are here.
@@ -6501,13 +6489,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6501 */ 6489 */
6502 wait_on_page_writeback(page); 6490 wait_on_page_writeback(page);
6503 6491
6504 tree = &BTRFS_I(page->mapping->host)->io_tree; 6492 tree = &BTRFS_I(inode)->io_tree;
6505 if (offset) { 6493 if (offset) {
6506 btrfs_releasepage(page, GFP_NOFS); 6494 btrfs_releasepage(page, GFP_NOFS);
6507 return; 6495 return;
6508 } 6496 }
6509 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 6497 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6510 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 6498 ordered = btrfs_lookup_ordered_extent(inode,
6511 page_offset(page)); 6499 page_offset(page));
6512 if (ordered) { 6500 if (ordered) {
6513 /* 6501 /*
@@ -6522,9 +6510,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6522 * whoever cleared the private bit is responsible 6510 * whoever cleared the private bit is responsible
6523 * for the finish_ordered_io 6511 * for the finish_ordered_io
6524 */ 6512 */
6525 if (TestClearPagePrivate2(page)) { 6513 if (TestClearPagePrivate2(page) &&
6526 btrfs_finish_ordered_io(page->mapping->host, 6514 btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
6527 page_start, page_end); 6515 PAGE_CACHE_SIZE, 1)) {
6516 btrfs_finish_ordered_io(ordered);
6528 } 6517 }
6529 btrfs_put_ordered_extent(ordered); 6518 btrfs_put_ordered_extent(ordered);
6530 cached_state = NULL; 6519 cached_state = NULL;
@@ -6771,7 +6760,8 @@ static int btrfs_truncate(struct inode *inode)
6771 * using truncate to replace the contents of the file will 6760 * using truncate to replace the contents of the file will
6772 * end up with a zero length file after a crash. 6761 * end up with a zero length file after a crash.
6773 */ 6762 */
6774 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 6763 if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
6764 &BTRFS_I(inode)->runtime_flags))
6775 btrfs_add_ordered_operation(trans, root, inode); 6765 btrfs_add_ordered_operation(trans, root, inode);
6776 6766
6777 while (1) { 6767 while (1) {
@@ -6894,7 +6884,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6894 ei->root = NULL; 6884 ei->root = NULL;
6895 ei->space_info = NULL; 6885 ei->space_info = NULL;
6896 ei->generation = 0; 6886 ei->generation = 0;
6897 ei->sequence = 0;
6898 ei->last_trans = 0; 6887 ei->last_trans = 0;
6899 ei->last_sub_trans = 0; 6888 ei->last_sub_trans = 0;
6900 ei->logged_trans = 0; 6889 ei->logged_trans = 0;
@@ -6909,11 +6898,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6909 ei->outstanding_extents = 0; 6898 ei->outstanding_extents = 0;
6910 ei->reserved_extents = 0; 6899 ei->reserved_extents = 0;
6911 6900
6912 ei->ordered_data_close = 0; 6901 ei->runtime_flags = 0;
6913 ei->orphan_meta_reserved = 0;
6914 ei->dummy_inode = 0;
6915 ei->in_defrag = 0;
6916 ei->delalloc_meta_reserved = 0;
6917 ei->force_compress = BTRFS_COMPRESS_NONE; 6902 ei->force_compress = BTRFS_COMPRESS_NONE;
6918 6903
6919 ei->delayed_node = NULL; 6904 ei->delayed_node = NULL;
@@ -6927,7 +6912,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6927 mutex_init(&ei->log_mutex); 6912 mutex_init(&ei->log_mutex);
6928 mutex_init(&ei->delalloc_mutex); 6913 mutex_init(&ei->delalloc_mutex);
6929 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6914 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
6930 INIT_LIST_HEAD(&ei->i_orphan);
6931 INIT_LIST_HEAD(&ei->delalloc_inodes); 6915 INIT_LIST_HEAD(&ei->delalloc_inodes);
6932 INIT_LIST_HEAD(&ei->ordered_operations); 6916 INIT_LIST_HEAD(&ei->ordered_operations);
6933 RB_CLEAR_NODE(&ei->rb_node); 6917 RB_CLEAR_NODE(&ei->rb_node);
@@ -6972,13 +6956,12 @@ void btrfs_destroy_inode(struct inode *inode)
6972 spin_unlock(&root->fs_info->ordered_extent_lock); 6956 spin_unlock(&root->fs_info->ordered_extent_lock);
6973 } 6957 }
6974 6958
6975 spin_lock(&root->orphan_lock); 6959 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
6976 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6960 &BTRFS_I(inode)->runtime_flags)) {
6977 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 6961 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
6978 (unsigned long long)btrfs_ino(inode)); 6962 (unsigned long long)btrfs_ino(inode));
6979 list_del_init(&BTRFS_I(inode)->i_orphan); 6963 atomic_dec(&root->orphan_inodes);
6980 } 6964 }
6981 spin_unlock(&root->orphan_lock);
6982 6965
6983 while (1) { 6966 while (1) {
6984 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 6967 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -7193,6 +7176,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7193 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) 7176 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
7194 btrfs_add_ordered_operation(trans, root, old_inode); 7177 btrfs_add_ordered_operation(trans, root, old_inode);
7195 7178
7179 inode_inc_iversion(old_dir);
7180 inode_inc_iversion(new_dir);
7181 inode_inc_iversion(old_inode);
7196 old_dir->i_ctime = old_dir->i_mtime = ctime; 7182 old_dir->i_ctime = old_dir->i_mtime = ctime;
7197 new_dir->i_ctime = new_dir->i_mtime = ctime; 7183 new_dir->i_ctime = new_dir->i_mtime = ctime;
7198 old_inode->i_ctime = ctime; 7184 old_inode->i_ctime = ctime;
@@ -7219,6 +7205,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7219 } 7205 }
7220 7206
7221 if (new_inode) { 7207 if (new_inode) {
7208 inode_inc_iversion(new_inode);
7222 new_inode->i_ctime = CURRENT_TIME; 7209 new_inode->i_ctime = CURRENT_TIME;
7223 if (unlikely(btrfs_ino(new_inode) == 7210 if (unlikely(btrfs_ino(new_inode) ==
7224 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 7211 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
@@ -7490,6 +7477,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7490 cur_offset += ins.offset; 7477 cur_offset += ins.offset;
7491 *alloc_hint = ins.objectid + ins.offset; 7478 *alloc_hint = ins.objectid + ins.offset;
7492 7479
7480 inode_inc_iversion(inode);
7493 inode->i_ctime = CURRENT_TIME; 7481 inode->i_ctime = CURRENT_TIME;
7494 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 7482 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
7495 if (!(mode & FALLOC_FL_KEEP_SIZE) && 7483 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7f3a91367d7..24b776c08d9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -261,6 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
261 } 261 }
262 262
263 btrfs_update_iflags(inode); 263 btrfs_update_iflags(inode);
264 inode_inc_iversion(inode);
264 inode->i_ctime = CURRENT_TIME; 265 inode->i_ctime = CURRENT_TIME;
265 ret = btrfs_update_inode(trans, root, inode); 266 ret = btrfs_update_inode(trans, root, inode);
266 267
@@ -2262,10 +2263,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2262 di_args->bytes_used = dev->bytes_used; 2263 di_args->bytes_used = dev->bytes_used;
2263 di_args->total_bytes = dev->total_bytes; 2264 di_args->total_bytes = dev->total_bytes;
2264 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2265 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2265 if (dev->name) 2266 if (dev->name) {
2266 strncpy(di_args->path, dev->name, sizeof(di_args->path)); 2267 strncpy(di_args->path, dev->name, sizeof(di_args->path));
2267 else 2268 di_args->path[sizeof(di_args->path) - 1] = 0;
2269 } else {
2268 di_args->path[0] = '\0'; 2270 di_args->path[0] = '\0';
2271 }
2269 2272
2270out: 2273out:
2271 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2274 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
@@ -2622,6 +2625,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2622 btrfs_mark_buffer_dirty(leaf); 2625 btrfs_mark_buffer_dirty(leaf);
2623 btrfs_release_path(path); 2626 btrfs_release_path(path);
2624 2627
2628 inode_inc_iversion(inode);
2625 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2629 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2626 2630
2627 /* 2631 /*
@@ -2914,7 +2918,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2914 up_read(&info->groups_sem); 2918 up_read(&info->groups_sem);
2915 } 2919 }
2916 2920
2917 user_dest = (struct btrfs_ioctl_space_info *) 2921 user_dest = (struct btrfs_ioctl_space_info __user *)
2918 (arg + sizeof(struct btrfs_ioctl_space_args)); 2922 (arg + sizeof(struct btrfs_ioctl_space_args));
2919 2923
2920 if (copy_to_user(user_dest, dest_orig, alloc_size)) 2924 if (copy_to_user(user_dest, dest_orig, alloc_size))
@@ -3042,6 +3046,28 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
3042 return ret; 3046 return ret;
3043} 3047}
3044 3048
3049static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
3050 void __user *arg, int reset_after_read)
3051{
3052 struct btrfs_ioctl_get_dev_stats *sa;
3053 int ret;
3054
3055 if (reset_after_read && !capable(CAP_SYS_ADMIN))
3056 return -EPERM;
3057
3058 sa = memdup_user(arg, sizeof(*sa));
3059 if (IS_ERR(sa))
3060 return PTR_ERR(sa);
3061
3062 ret = btrfs_get_dev_stats(root, sa, reset_after_read);
3063
3064 if (copy_to_user(arg, sa, sizeof(*sa)))
3065 ret = -EFAULT;
3066
3067 kfree(sa);
3068 return ret;
3069}
3070
3045static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 3071static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
3046{ 3072{
3047 int ret = 0; 3073 int ret = 0;
@@ -3212,8 +3238,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3212 } 3238 }
3213} 3239}
3214 3240
3215static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) 3241static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3216{ 3242{
3243 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3217 struct btrfs_fs_info *fs_info = root->fs_info; 3244 struct btrfs_fs_info *fs_info = root->fs_info;
3218 struct btrfs_ioctl_balance_args *bargs; 3245 struct btrfs_ioctl_balance_args *bargs;
3219 struct btrfs_balance_control *bctl; 3246 struct btrfs_balance_control *bctl;
@@ -3225,6 +3252,10 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
3225 if (fs_info->sb->s_flags & MS_RDONLY) 3252 if (fs_info->sb->s_flags & MS_RDONLY)
3226 return -EROFS; 3253 return -EROFS;
3227 3254
3255 ret = mnt_want_write(file->f_path.mnt);
3256 if (ret)
3257 return ret;
3258
3228 mutex_lock(&fs_info->volume_mutex); 3259 mutex_lock(&fs_info->volume_mutex);
3229 mutex_lock(&fs_info->balance_mutex); 3260 mutex_lock(&fs_info->balance_mutex);
3230 3261
@@ -3291,6 +3322,7 @@ out_bargs:
3291out: 3322out:
3292 mutex_unlock(&fs_info->balance_mutex); 3323 mutex_unlock(&fs_info->balance_mutex);
3293 mutex_unlock(&fs_info->volume_mutex); 3324 mutex_unlock(&fs_info->volume_mutex);
3325 mnt_drop_write(file->f_path.mnt);
3294 return ret; 3326 return ret;
3295} 3327}
3296 3328
@@ -3386,7 +3418,7 @@ long btrfs_ioctl(struct file *file, unsigned int
3386 case BTRFS_IOC_DEV_INFO: 3418 case BTRFS_IOC_DEV_INFO:
3387 return btrfs_ioctl_dev_info(root, argp); 3419 return btrfs_ioctl_dev_info(root, argp);
3388 case BTRFS_IOC_BALANCE: 3420 case BTRFS_IOC_BALANCE:
3389 return btrfs_ioctl_balance(root, NULL); 3421 return btrfs_ioctl_balance(file, NULL);
3390 case BTRFS_IOC_CLONE: 3422 case BTRFS_IOC_CLONE:
3391 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 3423 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
3392 case BTRFS_IOC_CLONE_RANGE: 3424 case BTRFS_IOC_CLONE_RANGE:
@@ -3419,11 +3451,15 @@ long btrfs_ioctl(struct file *file, unsigned int
3419 case BTRFS_IOC_SCRUB_PROGRESS: 3451 case BTRFS_IOC_SCRUB_PROGRESS:
3420 return btrfs_ioctl_scrub_progress(root, argp); 3452 return btrfs_ioctl_scrub_progress(root, argp);
3421 case BTRFS_IOC_BALANCE_V2: 3453 case BTRFS_IOC_BALANCE_V2:
3422 return btrfs_ioctl_balance(root, argp); 3454 return btrfs_ioctl_balance(file, argp);
3423 case BTRFS_IOC_BALANCE_CTL: 3455 case BTRFS_IOC_BALANCE_CTL:
3424 return btrfs_ioctl_balance_ctl(root, arg); 3456 return btrfs_ioctl_balance_ctl(root, arg);
3425 case BTRFS_IOC_BALANCE_PROGRESS: 3457 case BTRFS_IOC_BALANCE_PROGRESS:
3426 return btrfs_ioctl_balance_progress(root, argp); 3458 return btrfs_ioctl_balance_progress(root, argp);
3459 case BTRFS_IOC_GET_DEV_STATS:
3460 return btrfs_ioctl_get_dev_stats(root, argp, 0);
3461 case BTRFS_IOC_GET_AND_RESET_DEV_STATS:
3462 return btrfs_ioctl_get_dev_stats(root, argp, 1);
3427 } 3463 }
3428 3464
3429 return -ENOTTY; 3465 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 086e6bdae1c..497c530724c 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -266,6 +266,35 @@ struct btrfs_ioctl_logical_ino_args {
266 __u64 inodes; 266 __u64 inodes;
267}; 267};
268 268
269enum btrfs_dev_stat_values {
270 /* disk I/O failure stats */
271 BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
272 BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
273 BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
274
275 /* stats for indirect indications for I/O failures */
276 BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
277 * contents is illegal: this is an
278 * indication that the block was damaged
279 * during read or write, or written to
280 * wrong location or read from wrong
281 * location */
282 BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
283 * been written */
284
285 BTRFS_DEV_STAT_VALUES_MAX
286};
287
288struct btrfs_ioctl_get_dev_stats {
289 __u64 devid; /* in */
290 __u64 nr_items; /* in/out */
291
292 /* out values: */
293 __u64 values[BTRFS_DEV_STAT_VALUES_MAX];
294
295 __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
296};
297
269#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ 298#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
270 struct btrfs_ioctl_vol_args) 299 struct btrfs_ioctl_vol_args)
271#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ 300#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -330,5 +359,9 @@ struct btrfs_ioctl_logical_ino_args {
330 struct btrfs_ioctl_ino_path_args) 359 struct btrfs_ioctl_ino_path_args)
331#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ 360#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
332 struct btrfs_ioctl_ino_path_args) 361 struct btrfs_ioctl_ino_path_args)
362#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
363 struct btrfs_ioctl_get_dev_stats)
364#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
365 struct btrfs_ioctl_get_dev_stats)
333 366
334#endif 367#endif
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a178f5ebea7..743b86fa4fc 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -411,9 +411,9 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
411 411
412 bytes = min_t(unsigned long, destlen, out_len - start_byte); 412 bytes = min_t(unsigned long, destlen, out_len - start_byte);
413 413
414 kaddr = kmap_atomic(dest_page, KM_USER0); 414 kaddr = kmap_atomic(dest_page);
415 memcpy(kaddr, workspace->buf + start_byte, bytes); 415 memcpy(kaddr, workspace->buf + start_byte, bytes);
416 kunmap_atomic(kaddr, KM_USER0); 416 kunmap_atomic(kaddr);
417out: 417out:
418 return ret; 418 return ret;
419} 419}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index bbf6d0d9aeb..9e138cdc36c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
196 entry->len = len; 196 entry->len = len;
197 entry->disk_len = disk_len; 197 entry->disk_len = disk_len;
198 entry->bytes_left = len; 198 entry->bytes_left = len;
199 entry->inode = inode; 199 entry->inode = igrab(inode);
200 entry->compress_type = compress_type; 200 entry->compress_type = compress_type;
201 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 201 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
202 set_bit(type, &entry->flags); 202 set_bit(type, &entry->flags);
@@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
212 212
213 trace_btrfs_ordered_extent_add(inode, entry); 213 trace_btrfs_ordered_extent_add(inode, entry);
214 214
215 spin_lock(&tree->lock); 215 spin_lock_irq(&tree->lock);
216 node = tree_insert(&tree->tree, file_offset, 216 node = tree_insert(&tree->tree, file_offset,
217 &entry->rb_node); 217 &entry->rb_node);
218 if (node) 218 if (node)
219 ordered_data_tree_panic(inode, -EEXIST, file_offset); 219 ordered_data_tree_panic(inode, -EEXIST, file_offset);
220 spin_unlock(&tree->lock); 220 spin_unlock_irq(&tree->lock);
221 221
222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
223 list_add_tail(&entry->root_extent_list, 223 list_add_tail(&entry->root_extent_list,
@@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode,
264 struct btrfs_ordered_inode_tree *tree; 264 struct btrfs_ordered_inode_tree *tree;
265 265
266 tree = &BTRFS_I(inode)->ordered_tree; 266 tree = &BTRFS_I(inode)->ordered_tree;
267 spin_lock(&tree->lock); 267 spin_lock_irq(&tree->lock);
268 list_add_tail(&sum->list, &entry->list); 268 list_add_tail(&sum->list, &entry->list);
269 spin_unlock(&tree->lock); 269 spin_unlock_irq(&tree->lock);
270} 270}
271 271
272/* 272/*
@@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode,
283 */ 283 */
284int btrfs_dec_test_first_ordered_pending(struct inode *inode, 284int btrfs_dec_test_first_ordered_pending(struct inode *inode,
285 struct btrfs_ordered_extent **cached, 285 struct btrfs_ordered_extent **cached,
286 u64 *file_offset, u64 io_size) 286 u64 *file_offset, u64 io_size, int uptodate)
287{ 287{
288 struct btrfs_ordered_inode_tree *tree; 288 struct btrfs_ordered_inode_tree *tree;
289 struct rb_node *node; 289 struct rb_node *node;
290 struct btrfs_ordered_extent *entry = NULL; 290 struct btrfs_ordered_extent *entry = NULL;
291 int ret; 291 int ret;
292 unsigned long flags;
292 u64 dec_end; 293 u64 dec_end;
293 u64 dec_start; 294 u64 dec_start;
294 u64 to_dec; 295 u64 to_dec;
295 296
296 tree = &BTRFS_I(inode)->ordered_tree; 297 tree = &BTRFS_I(inode)->ordered_tree;
297 spin_lock(&tree->lock); 298 spin_lock_irqsave(&tree->lock, flags);
298 node = tree_search(tree, *file_offset); 299 node = tree_search(tree, *file_offset);
299 if (!node) { 300 if (!node) {
300 ret = 1; 301 ret = 1;
@@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
323 (unsigned long long)to_dec); 324 (unsigned long long)to_dec);
324 } 325 }
325 entry->bytes_left -= to_dec; 326 entry->bytes_left -= to_dec;
327 if (!uptodate)
328 set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
329
326 if (entry->bytes_left == 0) 330 if (entry->bytes_left == 0)
327 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 331 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
328 else 332 else
@@ -332,7 +336,7 @@ out:
332 *cached = entry; 336 *cached = entry;
333 atomic_inc(&entry->refs); 337 atomic_inc(&entry->refs);
334 } 338 }
335 spin_unlock(&tree->lock); 339 spin_unlock_irqrestore(&tree->lock, flags);
336 return ret == 0; 340 return ret == 0;
337} 341}
338 342
@@ -347,15 +351,21 @@ out:
347 */ 351 */
348int btrfs_dec_test_ordered_pending(struct inode *inode, 352int btrfs_dec_test_ordered_pending(struct inode *inode,
349 struct btrfs_ordered_extent **cached, 353 struct btrfs_ordered_extent **cached,
350 u64 file_offset, u64 io_size) 354 u64 file_offset, u64 io_size, int uptodate)
351{ 355{
352 struct btrfs_ordered_inode_tree *tree; 356 struct btrfs_ordered_inode_tree *tree;
353 struct rb_node *node; 357 struct rb_node *node;
354 struct btrfs_ordered_extent *entry = NULL; 358 struct btrfs_ordered_extent *entry = NULL;
359 unsigned long flags;
355 int ret; 360 int ret;
356 361
357 tree = &BTRFS_I(inode)->ordered_tree; 362 tree = &BTRFS_I(inode)->ordered_tree;
358 spin_lock(&tree->lock); 363 spin_lock_irqsave(&tree->lock, flags);
364 if (cached && *cached) {
365 entry = *cached;
366 goto have_entry;
367 }
368
359 node = tree_search(tree, file_offset); 369 node = tree_search(tree, file_offset);
360 if (!node) { 370 if (!node) {
361 ret = 1; 371 ret = 1;
@@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
363 } 373 }
364 374
365 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 375 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
376have_entry:
366 if (!offset_in_entry(entry, file_offset)) { 377 if (!offset_in_entry(entry, file_offset)) {
367 ret = 1; 378 ret = 1;
368 goto out; 379 goto out;
@@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
374 (unsigned long long)io_size); 385 (unsigned long long)io_size);
375 } 386 }
376 entry->bytes_left -= io_size; 387 entry->bytes_left -= io_size;
388 if (!uptodate)
389 set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
390
377 if (entry->bytes_left == 0) 391 if (entry->bytes_left == 0)
378 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 392 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
379 else 393 else
@@ -383,7 +397,7 @@ out:
383 *cached = entry; 397 *cached = entry;
384 atomic_inc(&entry->refs); 398 atomic_inc(&entry->refs);
385 } 399 }
386 spin_unlock(&tree->lock); 400 spin_unlock_irqrestore(&tree->lock, flags);
387 return ret == 0; 401 return ret == 0;
388} 402}
389 403
@@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
399 trace_btrfs_ordered_extent_put(entry->inode, entry); 413 trace_btrfs_ordered_extent_put(entry->inode, entry);
400 414
401 if (atomic_dec_and_test(&entry->refs)) { 415 if (atomic_dec_and_test(&entry->refs)) {
416 if (entry->inode)
417 btrfs_add_delayed_iput(entry->inode);
402 while (!list_empty(&entry->list)) { 418 while (!list_empty(&entry->list)) {
403 cur = entry->list.next; 419 cur = entry->list.next;
404 sum = list_entry(cur, struct btrfs_ordered_sum, list); 420 sum = list_entry(cur, struct btrfs_ordered_sum, list);
@@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
411 427
412/* 428/*
413 * remove an ordered extent from the tree. No references are dropped 429 * remove an ordered extent from the tree. No references are dropped
414 * and you must wake_up entry->wait. You must hold the tree lock 430 * and waiters are woken up.
415 * while you call this function.
416 */ 431 */
417static void __btrfs_remove_ordered_extent(struct inode *inode, 432void btrfs_remove_ordered_extent(struct inode *inode,
418 struct btrfs_ordered_extent *entry) 433 struct btrfs_ordered_extent *entry)
419{ 434{
420 struct btrfs_ordered_inode_tree *tree; 435 struct btrfs_ordered_inode_tree *tree;
421 struct btrfs_root *root = BTRFS_I(inode)->root; 436 struct btrfs_root *root = BTRFS_I(inode)->root;
422 struct rb_node *node; 437 struct rb_node *node;
423 438
424 tree = &BTRFS_I(inode)->ordered_tree; 439 tree = &BTRFS_I(inode)->ordered_tree;
440 spin_lock_irq(&tree->lock);
425 node = &entry->rb_node; 441 node = &entry->rb_node;
426 rb_erase(node, &tree->tree); 442 rb_erase(node, &tree->tree);
427 tree->last = NULL; 443 tree->last = NULL;
428 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 444 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
445 spin_unlock_irq(&tree->lock);
429 446
430 spin_lock(&root->fs_info->ordered_extent_lock); 447 spin_lock(&root->fs_info->ordered_extent_lock);
431 list_del_init(&entry->root_extent_list); 448 list_del_init(&entry->root_extent_list);
@@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode,
442 list_del_init(&BTRFS_I(inode)->ordered_operations); 459 list_del_init(&BTRFS_I(inode)->ordered_operations);
443 } 460 }
444 spin_unlock(&root->fs_info->ordered_extent_lock); 461 spin_unlock(&root->fs_info->ordered_extent_lock);
445}
446
447/*
448 * remove an ordered extent from the tree. No references are dropped
449 * but any waiters are woken.
450 */
451void btrfs_remove_ordered_extent(struct inode *inode,
452 struct btrfs_ordered_extent *entry)
453{
454 struct btrfs_ordered_inode_tree *tree;
455
456 tree = &BTRFS_I(inode)->ordered_tree;
457 spin_lock(&tree->lock);
458 __btrfs_remove_ordered_extent(inode, entry);
459 spin_unlock(&tree->lock);
460 wake_up(&entry->wait); 462 wake_up(&entry->wait);
461} 463}
462 464
@@ -621,19 +623,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
621 if (orig_end > INT_LIMIT(loff_t)) 623 if (orig_end > INT_LIMIT(loff_t))
622 orig_end = INT_LIMIT(loff_t); 624 orig_end = INT_LIMIT(loff_t);
623 } 625 }
624again: 626
625 /* start IO across the range first to instantiate any delalloc 627 /* start IO across the range first to instantiate any delalloc
626 * extents 628 * extents
627 */ 629 */
628 filemap_fdatawrite_range(inode->i_mapping, start, orig_end); 630 filemap_write_and_wait_range(inode->i_mapping, start, orig_end);
629
630 /* The compression code will leave pages locked but return from
631 * writepage without setting the page writeback. Starting again
632 * with WB_SYNC_ALL will end up waiting for the IO to actually start.
633 */
634 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
635
636 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
637 631
638 end = orig_end; 632 end = orig_end;
639 found = 0; 633 found = 0;
@@ -657,11 +651,6 @@ again:
657 break; 651 break;
658 end--; 652 end--;
659 } 653 }
660 if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
661 EXTENT_DELALLOC, 0, NULL)) {
662 schedule_timeout(1);
663 goto again;
664 }
665} 654}
666 655
667/* 656/*
@@ -676,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
676 struct btrfs_ordered_extent *entry = NULL; 665 struct btrfs_ordered_extent *entry = NULL;
677 666
678 tree = &BTRFS_I(inode)->ordered_tree; 667 tree = &BTRFS_I(inode)->ordered_tree;
679 spin_lock(&tree->lock); 668 spin_lock_irq(&tree->lock);
680 node = tree_search(tree, file_offset); 669 node = tree_search(tree, file_offset);
681 if (!node) 670 if (!node)
682 goto out; 671 goto out;
@@ -687,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
687 if (entry) 676 if (entry)
688 atomic_inc(&entry->refs); 677 atomic_inc(&entry->refs);
689out: 678out:
690 spin_unlock(&tree->lock); 679 spin_unlock_irq(&tree->lock);
691 return entry; 680 return entry;
692} 681}
693 682
@@ -703,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
703 struct btrfs_ordered_extent *entry = NULL; 692 struct btrfs_ordered_extent *entry = NULL;
704 693
705 tree = &BTRFS_I(inode)->ordered_tree; 694 tree = &BTRFS_I(inode)->ordered_tree;
706 spin_lock(&tree->lock); 695 spin_lock_irq(&tree->lock);
707 node = tree_search(tree, file_offset); 696 node = tree_search(tree, file_offset);
708 if (!node) { 697 if (!node) {
709 node = tree_search(tree, file_offset + len); 698 node = tree_search(tree, file_offset + len);
@@ -728,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
728out: 717out:
729 if (entry) 718 if (entry)
730 atomic_inc(&entry->refs); 719 atomic_inc(&entry->refs);
731 spin_unlock(&tree->lock); 720 spin_unlock_irq(&tree->lock);
732 return entry; 721 return entry;
733} 722}
734 723
@@ -744,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
744 struct btrfs_ordered_extent *entry = NULL; 733 struct btrfs_ordered_extent *entry = NULL;
745 734
746 tree = &BTRFS_I(inode)->ordered_tree; 735 tree = &BTRFS_I(inode)->ordered_tree;
747 spin_lock(&tree->lock); 736 spin_lock_irq(&tree->lock);
748 node = tree_search(tree, file_offset); 737 node = tree_search(tree, file_offset);
749 if (!node) 738 if (!node)
750 goto out; 739 goto out;
@@ -752,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
752 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 741 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
753 atomic_inc(&entry->refs); 742 atomic_inc(&entry->refs);
754out: 743out:
755 spin_unlock(&tree->lock); 744 spin_unlock_irq(&tree->lock);
756 return entry; 745 return entry;
757} 746}
758 747
@@ -764,7 +753,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
764 struct btrfs_ordered_extent *ordered) 753 struct btrfs_ordered_extent *ordered)
765{ 754{
766 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 755 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
767 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
768 u64 disk_i_size; 756 u64 disk_i_size;
769 u64 new_i_size; 757 u64 new_i_size;
770 u64 i_size_test; 758 u64 i_size_test;
@@ -779,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
779 else 767 else
780 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); 768 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
781 769
782 spin_lock(&tree->lock); 770 spin_lock_irq(&tree->lock);
783 disk_i_size = BTRFS_I(inode)->disk_i_size; 771 disk_i_size = BTRFS_I(inode)->disk_i_size;
784 772
785 /* truncate file */ 773 /* truncate file */
@@ -798,14 +786,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
798 } 786 }
799 787
800 /* 788 /*
801 * we can't update the disk_isize if there are delalloc bytes
802 * between disk_i_size and this ordered extent
803 */
804 if (test_range_bit(io_tree, disk_i_size, offset - 1,
805 EXTENT_DELALLOC, 0, NULL)) {
806 goto out;
807 }
808 /*
809 * walk backward from this ordered extent to disk_i_size. 789 * walk backward from this ordered extent to disk_i_size.
810 * if we find an ordered extent then we can't update disk i_size 790 * if we find an ordered extent then we can't update disk i_size
811 * yet 791 * yet
@@ -825,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
825 } 805 }
826 node = prev; 806 node = prev;
827 } 807 }
828 while (node) { 808 for (; node; node = rb_prev(node)) {
829 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 809 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
810
811 /* We treat this entry as if it doesnt exist */
812 if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
813 continue;
830 if (test->file_offset + test->len <= disk_i_size) 814 if (test->file_offset + test->len <= disk_i_size)
831 break; 815 break;
832 if (test->file_offset >= i_size) 816 if (test->file_offset >= i_size)
833 break; 817 break;
834 if (test->file_offset >= disk_i_size) 818 if (test->file_offset >= disk_i_size)
835 goto out; 819 goto out;
836 node = rb_prev(node);
837 } 820 }
838 new_i_size = min_t(u64, offset, i_size); 821 new_i_size = min_t(u64, offset, i_size);
839 822
@@ -851,43 +834,49 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
851 else 834 else
852 node = rb_first(&tree->tree); 835 node = rb_first(&tree->tree);
853 } 836 }
854 i_size_test = 0; 837
855 if (node) { 838 /*
856 /* 839 * We are looking for an area between our current extent and the next
857 * do we have an area where IO might have finished 840 * ordered extent to update the i_size to. There are 3 cases here
858 * between our ordered extent and the next one. 841 *
859 */ 842 * 1) We don't actually have anything and we can update to i_size.
843 * 2) We have stuff but they already did their i_size update so again we
844 * can just update to i_size.
845 * 3) We have an outstanding ordered extent so the most we can update
846 * our disk_i_size to is the start of the next offset.
847 */
848 i_size_test = i_size;
849 for (; node; node = rb_next(node)) {
860 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 850 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
861 if (test->file_offset > offset) 851
852 if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
853 continue;
854 if (test->file_offset > offset) {
862 i_size_test = test->file_offset; 855 i_size_test = test->file_offset;
863 } else { 856 break;
864 i_size_test = i_size; 857 }
865 } 858 }
866 859
867 /* 860 /*
868 * i_size_test is the end of a region after this ordered 861 * i_size_test is the end of a region after this ordered
869 * extent where there are no ordered extents. As long as there 862 * extent where there are no ordered extents, we can safely set
870 * are no delalloc bytes in this area, it is safe to update 863 * disk_i_size to this.
871 * disk_i_size to the end of the region.
872 */ 864 */
873 if (i_size_test > offset && 865 if (i_size_test > offset)
874 !test_range_bit(io_tree, offset, i_size_test - 1,
875 EXTENT_DELALLOC, 0, NULL)) {
876 new_i_size = min_t(u64, i_size_test, i_size); 866 new_i_size = min_t(u64, i_size_test, i_size);
877 }
878 BTRFS_I(inode)->disk_i_size = new_i_size; 867 BTRFS_I(inode)->disk_i_size = new_i_size;
879 ret = 0; 868 ret = 0;
880out: 869out:
881 /* 870 /*
882 * we need to remove the ordered extent with the tree lock held 871 * We need to do this because we can't remove ordered extents until
883 * so that other people calling this function don't find our fully 872 * after the i_disk_size has been updated and then the inode has been
884 * processed ordered entry and skip updating the i_size 873 * updated to reflect the change, so we need to tell anybody who finds
874 * this ordered extent that we've already done all the real work, we
875 * just haven't completed all the other work.
885 */ 876 */
886 if (ordered) 877 if (ordered)
887 __btrfs_remove_ordered_extent(inode, ordered); 878 set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags);
888 spin_unlock(&tree->lock); 879 spin_unlock_irq(&tree->lock);
889 if (ordered)
890 wake_up(&ordered->wait);
891 return ret; 880 return ret;
892} 881}
893 882
@@ -912,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
912 if (!ordered) 901 if (!ordered)
913 return 1; 902 return 1;
914 903
915 spin_lock(&tree->lock); 904 spin_lock_irq(&tree->lock);
916 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 905 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
917 if (disk_bytenr >= ordered_sum->bytenr) { 906 if (disk_bytenr >= ordered_sum->bytenr) {
918 num_sectors = ordered_sum->len / sectorsize; 907 num_sectors = ordered_sum->len / sectorsize;
@@ -927,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
927 } 916 }
928 } 917 }
929out: 918out:
930 spin_unlock(&tree->lock); 919 spin_unlock_irq(&tree->lock);
931 btrfs_put_ordered_extent(ordered); 920 btrfs_put_ordered_extent(ordered);
932 return ret; 921 return ret;
933} 922}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index c355ad4dc1a..e03c560d299 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -74,6 +74,12 @@ struct btrfs_ordered_sum {
74 74
75#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ 75#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
76 76
77#define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */
78
79#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
80 * has done its due diligence in updating
81 * the isize. */
82
77struct btrfs_ordered_extent { 83struct btrfs_ordered_extent {
78 /* logical offset in the file */ 84 /* logical offset in the file */
79 u64 file_offset; 85 u64 file_offset;
@@ -113,6 +119,8 @@ struct btrfs_ordered_extent {
113 119
114 /* a per root list of all the pending ordered extents */ 120 /* a per root list of all the pending ordered extents */
115 struct list_head root_extent_list; 121 struct list_head root_extent_list;
122
123 struct btrfs_work work;
116}; 124};
117 125
118 126
@@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode,
143 struct btrfs_ordered_extent *entry); 151 struct btrfs_ordered_extent *entry);
144int btrfs_dec_test_ordered_pending(struct inode *inode, 152int btrfs_dec_test_ordered_pending(struct inode *inode,
145 struct btrfs_ordered_extent **cached, 153 struct btrfs_ordered_extent **cached,
146 u64 file_offset, u64 io_size); 154 u64 file_offset, u64 io_size, int uptodate);
147int btrfs_dec_test_first_ordered_pending(struct inode *inode, 155int btrfs_dec_test_first_ordered_pending(struct inode *inode,
148 struct btrfs_ordered_extent **cached, 156 struct btrfs_ordered_extent **cached,
149 u64 *file_offset, u64 io_size); 157 u64 *file_offset, u64 io_size,
158 int uptodate);
150int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 159int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
151 u64 start, u64 len, u64 disk_len, int type); 160 u64 start, u64 len, u64 disk_len, int type);
152int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 161int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index f38e452486b..5e23684887e 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -294,6 +294,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
294 btrfs_dev_extent_chunk_offset(l, dev_extent), 294 btrfs_dev_extent_chunk_offset(l, dev_extent),
295 (unsigned long long) 295 (unsigned long long)
296 btrfs_dev_extent_length(l, dev_extent)); 296 btrfs_dev_extent_length(l, dev_extent));
297 case BTRFS_DEV_STATS_KEY:
298 printk(KERN_INFO "\t\tdevice stats\n");
299 break;
297 }; 300 };
298 } 301 }
299} 302}
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index ac5d0108588..48a4882d8ad 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -718,13 +718,18 @@ static void reada_start_machine_worker(struct btrfs_work *work)
718{ 718{
719 struct reada_machine_work *rmw; 719 struct reada_machine_work *rmw;
720 struct btrfs_fs_info *fs_info; 720 struct btrfs_fs_info *fs_info;
721 int old_ioprio;
721 722
722 rmw = container_of(work, struct reada_machine_work, work); 723 rmw = container_of(work, struct reada_machine_work, work);
723 fs_info = rmw->fs_info; 724 fs_info = rmw->fs_info;
724 725
725 kfree(rmw); 726 kfree(rmw);
726 727
728 old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
729 task_nice_ioprio(current));
730 set_task_ioprio(current, BTRFS_IOPRIO_READA);
727 __reada_start_machine(fs_info); 731 __reada_start_machine(fs_info);
732 set_task_ioprio(current, old_ioprio);
728} 733}
729 734
730static void __reada_start_machine(struct btrfs_fs_info *fs_info) 735static void __reada_start_machine(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 7e487be0094..a38cfa4f251 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -50,7 +50,7 @@ struct scrub_dev;
50struct scrub_page { 50struct scrub_page {
51 struct scrub_block *sblock; 51 struct scrub_block *sblock;
52 struct page *page; 52 struct page *page;
53 struct block_device *bdev; 53 struct btrfs_device *dev;
54 u64 flags; /* extent flags */ 54 u64 flags; /* extent flags */
55 u64 generation; 55 u64 generation;
56 u64 logical; 56 u64 logical;
@@ -86,6 +86,7 @@ struct scrub_block {
86 unsigned int header_error:1; 86 unsigned int header_error:1;
87 unsigned int checksum_error:1; 87 unsigned int checksum_error:1;
88 unsigned int no_io_error_seen:1; 88 unsigned int no_io_error_seen:1;
89 unsigned int generation_error:1; /* also sets header_error */
89 }; 90 };
90}; 91};
91 92
@@ -675,6 +676,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
675 sdev->stat.read_errors++; 676 sdev->stat.read_errors++;
676 sdev->stat.uncorrectable_errors++; 677 sdev->stat.uncorrectable_errors++;
677 spin_unlock(&sdev->stat_lock); 678 spin_unlock(&sdev->stat_lock);
679 btrfs_dev_stat_inc_and_print(sdev->dev,
680 BTRFS_DEV_STAT_READ_ERRS);
678 goto out; 681 goto out;
679 } 682 }
680 683
@@ -686,6 +689,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
686 sdev->stat.read_errors++; 689 sdev->stat.read_errors++;
687 sdev->stat.uncorrectable_errors++; 690 sdev->stat.uncorrectable_errors++;
688 spin_unlock(&sdev->stat_lock); 691 spin_unlock(&sdev->stat_lock);
692 btrfs_dev_stat_inc_and_print(sdev->dev,
693 BTRFS_DEV_STAT_READ_ERRS);
689 goto out; 694 goto out;
690 } 695 }
691 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); 696 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
@@ -699,6 +704,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
699 sdev->stat.read_errors++; 704 sdev->stat.read_errors++;
700 sdev->stat.uncorrectable_errors++; 705 sdev->stat.uncorrectable_errors++;
701 spin_unlock(&sdev->stat_lock); 706 spin_unlock(&sdev->stat_lock);
707 btrfs_dev_stat_inc_and_print(sdev->dev,
708 BTRFS_DEV_STAT_READ_ERRS);
702 goto out; 709 goto out;
703 } 710 }
704 711
@@ -725,12 +732,16 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
725 spin_unlock(&sdev->stat_lock); 732 spin_unlock(&sdev->stat_lock);
726 if (__ratelimit(&_rs)) 733 if (__ratelimit(&_rs))
727 scrub_print_warning("i/o error", sblock_to_check); 734 scrub_print_warning("i/o error", sblock_to_check);
735 btrfs_dev_stat_inc_and_print(sdev->dev,
736 BTRFS_DEV_STAT_READ_ERRS);
728 } else if (sblock_bad->checksum_error) { 737 } else if (sblock_bad->checksum_error) {
729 spin_lock(&sdev->stat_lock); 738 spin_lock(&sdev->stat_lock);
730 sdev->stat.csum_errors++; 739 sdev->stat.csum_errors++;
731 spin_unlock(&sdev->stat_lock); 740 spin_unlock(&sdev->stat_lock);
732 if (__ratelimit(&_rs)) 741 if (__ratelimit(&_rs))
733 scrub_print_warning("checksum error", sblock_to_check); 742 scrub_print_warning("checksum error", sblock_to_check);
743 btrfs_dev_stat_inc_and_print(sdev->dev,
744 BTRFS_DEV_STAT_CORRUPTION_ERRS);
734 } else if (sblock_bad->header_error) { 745 } else if (sblock_bad->header_error) {
735 spin_lock(&sdev->stat_lock); 746 spin_lock(&sdev->stat_lock);
736 sdev->stat.verify_errors++; 747 sdev->stat.verify_errors++;
@@ -738,6 +749,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
738 if (__ratelimit(&_rs)) 749 if (__ratelimit(&_rs))
739 scrub_print_warning("checksum/header error", 750 scrub_print_warning("checksum/header error",
740 sblock_to_check); 751 sblock_to_check);
752 if (sblock_bad->generation_error)
753 btrfs_dev_stat_inc_and_print(sdev->dev,
754 BTRFS_DEV_STAT_GENERATION_ERRS);
755 else
756 btrfs_dev_stat_inc_and_print(sdev->dev,
757 BTRFS_DEV_STAT_CORRUPTION_ERRS);
741 } 758 }
742 759
743 if (sdev->readonly) 760 if (sdev->readonly)
@@ -998,8 +1015,8 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev,
998 page = sblock->pagev + page_index; 1015 page = sblock->pagev + page_index;
999 page->logical = logical; 1016 page->logical = logical;
1000 page->physical = bbio->stripes[mirror_index].physical; 1017 page->physical = bbio->stripes[mirror_index].physical;
1001 /* for missing devices, bdev is NULL */ 1018 /* for missing devices, dev->bdev is NULL */
1002 page->bdev = bbio->stripes[mirror_index].dev->bdev; 1019 page->dev = bbio->stripes[mirror_index].dev;
1003 page->mirror_num = mirror_index + 1; 1020 page->mirror_num = mirror_index + 1;
1004 page->page = alloc_page(GFP_NOFS); 1021 page->page = alloc_page(GFP_NOFS);
1005 if (!page->page) { 1022 if (!page->page) {
@@ -1043,7 +1060,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1043 struct scrub_page *page = sblock->pagev + page_num; 1060 struct scrub_page *page = sblock->pagev + page_num;
1044 DECLARE_COMPLETION_ONSTACK(complete); 1061 DECLARE_COMPLETION_ONSTACK(complete);
1045 1062
1046 if (page->bdev == NULL) { 1063 if (page->dev->bdev == NULL) {
1047 page->io_error = 1; 1064 page->io_error = 1;
1048 sblock->no_io_error_seen = 0; 1065 sblock->no_io_error_seen = 0;
1049 continue; 1066 continue;
@@ -1053,7 +1070,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1053 bio = bio_alloc(GFP_NOFS, 1); 1070 bio = bio_alloc(GFP_NOFS, 1);
1054 if (!bio) 1071 if (!bio)
1055 return -EIO; 1072 return -EIO;
1056 bio->bi_bdev = page->bdev; 1073 bio->bi_bdev = page->dev->bdev;
1057 bio->bi_sector = page->physical >> 9; 1074 bio->bi_sector = page->physical >> 9;
1058 bio->bi_end_io = scrub_complete_bio_end_io; 1075 bio->bi_end_io = scrub_complete_bio_end_io;
1059 bio->bi_private = &complete; 1076 bio->bi_private = &complete;
@@ -1098,21 +1115,24 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1098 if (is_metadata) { 1115 if (is_metadata) {
1099 struct btrfs_header *h; 1116 struct btrfs_header *h;
1100 1117
1101 mapped_buffer = kmap_atomic(sblock->pagev[0].page, KM_USER0); 1118 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
1102 h = (struct btrfs_header *)mapped_buffer; 1119 h = (struct btrfs_header *)mapped_buffer;
1103 1120
1104 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || 1121 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
1105 generation != le64_to_cpu(h->generation) ||
1106 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || 1122 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1107 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, 1123 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1108 BTRFS_UUID_SIZE)) 1124 BTRFS_UUID_SIZE)) {
1109 sblock->header_error = 1; 1125 sblock->header_error = 1;
1126 } else if (generation != le64_to_cpu(h->generation)) {
1127 sblock->header_error = 1;
1128 sblock->generation_error = 1;
1129 }
1110 csum = h->csum; 1130 csum = h->csum;
1111 } else { 1131 } else {
1112 if (!have_csum) 1132 if (!have_csum)
1113 return; 1133 return;
1114 1134
1115 mapped_buffer = kmap_atomic(sblock->pagev[0].page, KM_USER0); 1135 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
1116 } 1136 }
1117 1137
1118 for (page_num = 0;;) { 1138 for (page_num = 0;;) {
@@ -1124,14 +1144,13 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1124 crc = btrfs_csum_data(root, mapped_buffer, crc, 1144 crc = btrfs_csum_data(root, mapped_buffer, crc,
1125 PAGE_SIZE); 1145 PAGE_SIZE);
1126 1146
1127 kunmap_atomic(mapped_buffer, KM_USER0); 1147 kunmap_atomic(mapped_buffer);
1128 page_num++; 1148 page_num++;
1129 if (page_num >= sblock->page_count) 1149 if (page_num >= sblock->page_count)
1130 break; 1150 break;
1131 BUG_ON(!sblock->pagev[page_num].page); 1151 BUG_ON(!sblock->pagev[page_num].page);
1132 1152
1133 mapped_buffer = kmap_atomic(sblock->pagev[page_num].page, 1153 mapped_buffer = kmap_atomic(sblock->pagev[page_num].page);
1134 KM_USER0);
1135 } 1154 }
1136 1155
1137 btrfs_csum_final(crc, calculated_csum); 1156 btrfs_csum_final(crc, calculated_csum);
@@ -1183,7 +1202,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1183 bio = bio_alloc(GFP_NOFS, 1); 1202 bio = bio_alloc(GFP_NOFS, 1);
1184 if (!bio) 1203 if (!bio)
1185 return -EIO; 1204 return -EIO;
1186 bio->bi_bdev = page_bad->bdev; 1205 bio->bi_bdev = page_bad->dev->bdev;
1187 bio->bi_sector = page_bad->physical >> 9; 1206 bio->bi_sector = page_bad->physical >> 9;
1188 bio->bi_end_io = scrub_complete_bio_end_io; 1207 bio->bi_end_io = scrub_complete_bio_end_io;
1189 bio->bi_private = &complete; 1208 bio->bi_private = &complete;
@@ -1197,6 +1216,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1197 1216
1198 /* this will also unplug the queue */ 1217 /* this will also unplug the queue */
1199 wait_for_completion(&complete); 1218 wait_for_completion(&complete);
1219 if (!bio_flagged(bio, BIO_UPTODATE)) {
1220 btrfs_dev_stat_inc_and_print(page_bad->dev,
1221 BTRFS_DEV_STAT_WRITE_ERRS);
1222 bio_put(bio);
1223 return -EIO;
1224 }
1200 bio_put(bio); 1225 bio_put(bio);
1201 } 1226 }
1202 1227
@@ -1242,7 +1267,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1242 1267
1243 on_disk_csum = sblock->pagev[0].csum; 1268 on_disk_csum = sblock->pagev[0].csum;
1244 page = sblock->pagev[0].page; 1269 page = sblock->pagev[0].page;
1245 buffer = kmap_atomic(page, KM_USER0); 1270 buffer = kmap_atomic(page);
1246 1271
1247 len = sdev->sectorsize; 1272 len = sdev->sectorsize;
1248 index = 0; 1273 index = 0;
@@ -1250,7 +1275,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1250 u64 l = min_t(u64, len, PAGE_SIZE); 1275 u64 l = min_t(u64, len, PAGE_SIZE);
1251 1276
1252 crc = btrfs_csum_data(root, buffer, crc, l); 1277 crc = btrfs_csum_data(root, buffer, crc, l);
1253 kunmap_atomic(buffer, KM_USER0); 1278 kunmap_atomic(buffer);
1254 len -= l; 1279 len -= l;
1255 if (len == 0) 1280 if (len == 0)
1256 break; 1281 break;
@@ -1258,7 +1283,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1258 BUG_ON(index >= sblock->page_count); 1283 BUG_ON(index >= sblock->page_count);
1259 BUG_ON(!sblock->pagev[index].page); 1284 BUG_ON(!sblock->pagev[index].page);
1260 page = sblock->pagev[index].page; 1285 page = sblock->pagev[index].page;
1261 buffer = kmap_atomic(page, KM_USER0); 1286 buffer = kmap_atomic(page);
1262 } 1287 }
1263 1288
1264 btrfs_csum_final(crc, csum); 1289 btrfs_csum_final(crc, csum);
@@ -1288,7 +1313,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1288 1313
1289 BUG_ON(sblock->page_count < 1); 1314 BUG_ON(sblock->page_count < 1);
1290 page = sblock->pagev[0].page; 1315 page = sblock->pagev[0].page;
1291 mapped_buffer = kmap_atomic(page, KM_USER0); 1316 mapped_buffer = kmap_atomic(page);
1292 h = (struct btrfs_header *)mapped_buffer; 1317 h = (struct btrfs_header *)mapped_buffer;
1293 memcpy(on_disk_csum, h->csum, sdev->csum_size); 1318 memcpy(on_disk_csum, h->csum, sdev->csum_size);
1294 1319
@@ -1320,7 +1345,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1320 u64 l = min_t(u64, len, mapped_size); 1345 u64 l = min_t(u64, len, mapped_size);
1321 1346
1322 crc = btrfs_csum_data(root, p, crc, l); 1347 crc = btrfs_csum_data(root, p, crc, l);
1323 kunmap_atomic(mapped_buffer, KM_USER0); 1348 kunmap_atomic(mapped_buffer);
1324 len -= l; 1349 len -= l;
1325 if (len == 0) 1350 if (len == 0)
1326 break; 1351 break;
@@ -1328,7 +1353,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1328 BUG_ON(index >= sblock->page_count); 1353 BUG_ON(index >= sblock->page_count);
1329 BUG_ON(!sblock->pagev[index].page); 1354 BUG_ON(!sblock->pagev[index].page);
1330 page = sblock->pagev[index].page; 1355 page = sblock->pagev[index].page;
1331 mapped_buffer = kmap_atomic(page, KM_USER0); 1356 mapped_buffer = kmap_atomic(page);
1332 mapped_size = PAGE_SIZE; 1357 mapped_size = PAGE_SIZE;
1333 p = mapped_buffer; 1358 p = mapped_buffer;
1334 } 1359 }
@@ -1353,24 +1378,25 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1353 u64 mapped_size; 1378 u64 mapped_size;
1354 void *p; 1379 void *p;
1355 u32 crc = ~(u32)0; 1380 u32 crc = ~(u32)0;
1356 int fail = 0; 1381 int fail_gen = 0;
1382 int fail_cor = 0;
1357 u64 len; 1383 u64 len;
1358 int index; 1384 int index;
1359 1385
1360 BUG_ON(sblock->page_count < 1); 1386 BUG_ON(sblock->page_count < 1);
1361 page = sblock->pagev[0].page; 1387 page = sblock->pagev[0].page;
1362 mapped_buffer = kmap_atomic(page, KM_USER0); 1388 mapped_buffer = kmap_atomic(page);
1363 s = (struct btrfs_super_block *)mapped_buffer; 1389 s = (struct btrfs_super_block *)mapped_buffer;
1364 memcpy(on_disk_csum, s->csum, sdev->csum_size); 1390 memcpy(on_disk_csum, s->csum, sdev->csum_size);
1365 1391
1366 if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) 1392 if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
1367 ++fail; 1393 ++fail_cor;
1368 1394
1369 if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) 1395 if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
1370 ++fail; 1396 ++fail_gen;
1371 1397
1372 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1398 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1373 ++fail; 1399 ++fail_cor;
1374 1400
1375 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; 1401 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1376 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; 1402 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
@@ -1380,7 +1406,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1380 u64 l = min_t(u64, len, mapped_size); 1406 u64 l = min_t(u64, len, mapped_size);
1381 1407
1382 crc = btrfs_csum_data(root, p, crc, l); 1408 crc = btrfs_csum_data(root, p, crc, l);
1383 kunmap_atomic(mapped_buffer, KM_USER0); 1409 kunmap_atomic(mapped_buffer);
1384 len -= l; 1410 len -= l;
1385 if (len == 0) 1411 if (len == 0)
1386 break; 1412 break;
@@ -1388,16 +1414,16 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1388 BUG_ON(index >= sblock->page_count); 1414 BUG_ON(index >= sblock->page_count);
1389 BUG_ON(!sblock->pagev[index].page); 1415 BUG_ON(!sblock->pagev[index].page);
1390 page = sblock->pagev[index].page; 1416 page = sblock->pagev[index].page;
1391 mapped_buffer = kmap_atomic(page, KM_USER0); 1417 mapped_buffer = kmap_atomic(page);
1392 mapped_size = PAGE_SIZE; 1418 mapped_size = PAGE_SIZE;
1393 p = mapped_buffer; 1419 p = mapped_buffer;
1394 } 1420 }
1395 1421
1396 btrfs_csum_final(crc, calculated_csum); 1422 btrfs_csum_final(crc, calculated_csum);
1397 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) 1423 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
1398 ++fail; 1424 ++fail_cor;
1399 1425
1400 if (fail) { 1426 if (fail_cor + fail_gen) {
1401 /* 1427 /*
1402 * if we find an error in a super block, we just report it. 1428 * if we find an error in a super block, we just report it.
1403 * They will get written with the next transaction commit 1429 * They will get written with the next transaction commit
@@ -1406,9 +1432,15 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1406 spin_lock(&sdev->stat_lock); 1432 spin_lock(&sdev->stat_lock);
1407 ++sdev->stat.super_errors; 1433 ++sdev->stat.super_errors;
1408 spin_unlock(&sdev->stat_lock); 1434 spin_unlock(&sdev->stat_lock);
1435 if (fail_cor)
1436 btrfs_dev_stat_inc_and_print(sdev->dev,
1437 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1438 else
1439 btrfs_dev_stat_inc_and_print(sdev->dev,
1440 BTRFS_DEV_STAT_GENERATION_ERRS);
1409 } 1441 }
1410 1442
1411 return fail; 1443 return fail_cor + fail_gen;
1412} 1444}
1413 1445
1414static void scrub_block_get(struct scrub_block *sblock) 1446static void scrub_block_get(struct scrub_block *sblock)
@@ -1552,7 +1584,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
1552 return -ENOMEM; 1584 return -ENOMEM;
1553 } 1585 }
1554 spage->sblock = sblock; 1586 spage->sblock = sblock;
1555 spage->bdev = sdev->dev->bdev; 1587 spage->dev = sdev->dev;
1556 spage->flags = flags; 1588 spage->flags = flags;
1557 spage->generation = gen; 1589 spage->generation = gen;
1558 spage->logical = logical; 1590 spage->logical = logical;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f267718cbd1..96eb9fef7bd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -188,7 +188,8 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
188 va_start(args, fmt); 188 va_start(args, fmt);
189 189
190 if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { 190 if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
191 strncpy(lvl, fmt, 3); 191 memcpy(lvl, fmt, 3);
192 lvl[3] = '\0';
192 fmt += 3; 193 fmt += 3;
193 type = logtypes[fmt[1] - '0']; 194 type = logtypes[fmt[1] - '0'];
194 } else 195 } else
@@ -435,11 +436,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
435 case Opt_thread_pool: 436 case Opt_thread_pool:
436 intarg = 0; 437 intarg = 0;
437 match_int(&args[0], &intarg); 438 match_int(&args[0], &intarg);
438 if (intarg) { 439 if (intarg)
439 info->thread_pool_size = intarg; 440 info->thread_pool_size = intarg;
440 printk(KERN_INFO "btrfs: thread pool %d\n",
441 info->thread_pool_size);
442 }
443 break; 441 break;
444 case Opt_max_inline: 442 case Opt_max_inline:
445 num = match_strdup(&args[0]); 443 num = match_strdup(&args[0]);
@@ -755,7 +753,6 @@ static int btrfs_fill_super(struct super_block *sb,
755 void *data, int silent) 753 void *data, int silent)
756{ 754{
757 struct inode *inode; 755 struct inode *inode;
758 struct dentry *root_dentry;
759 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 756 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
760 struct btrfs_key key; 757 struct btrfs_key key;
761 int err; 758 int err;
@@ -770,7 +767,7 @@ static int btrfs_fill_super(struct super_block *sb,
770#ifdef CONFIG_BTRFS_FS_POSIX_ACL 767#ifdef CONFIG_BTRFS_FS_POSIX_ACL
771 sb->s_flags |= MS_POSIXACL; 768 sb->s_flags |= MS_POSIXACL;
772#endif 769#endif
773 770 sb->s_flags |= MS_I_VERSION;
774 err = open_ctree(sb, fs_devices, (char *)data); 771 err = open_ctree(sb, fs_devices, (char *)data);
775 if (err) { 772 if (err) {
776 printk("btrfs: open_ctree failed\n"); 773 printk("btrfs: open_ctree failed\n");
@@ -786,15 +783,12 @@ static int btrfs_fill_super(struct super_block *sb,
786 goto fail_close; 783 goto fail_close;
787 } 784 }
788 785
789 root_dentry = d_alloc_root(inode); 786 sb->s_root = d_make_root(inode);
790 if (!root_dentry) { 787 if (!sb->s_root) {
791 iput(inode);
792 err = -ENOMEM; 788 err = -ENOMEM;
793 goto fail_close; 789 goto fail_close;
794 } 790 }
795 791
796 sb->s_root = root_dentry;
797
798 save_mount_options(sb, data); 792 save_mount_options(sb, data);
799 cleancache_init_fs(sb); 793 cleancache_init_fs(sb);
800 sb->s_flags |= MS_ACTIVE; 794 sb->s_flags |= MS_ACTIVE;
@@ -929,63 +923,48 @@ static inline int is_subvolume_inode(struct inode *inode)
929 */ 923 */
930static char *setup_root_args(char *args) 924static char *setup_root_args(char *args)
931{ 925{
932 unsigned copied = 0; 926 unsigned len = strlen(args) + 2 + 1;
933 unsigned len = strlen(args) + 2; 927 char *src, *dst, *buf;
934 char *pos;
935 char *ret;
936 928
937 /* 929 /*
938 * We need the same args as before, but minus 930 * We need the same args as before, but with this substitution:
939 * 931 * s!subvol=[^,]+!subvolid=0!
940 * subvol=a
941 *
942 * and add
943 * 932 *
944 * subvolid=0 933 * Since the replacement string is up to 2 bytes longer than the
945 * 934 * original, allocate strlen(args) + 2 + 1 bytes.
946 * which is a difference of 2 characters, so we allocate strlen(args) +
947 * 2 characters.
948 */ 935 */
949 ret = kzalloc(len * sizeof(char), GFP_NOFS);
950 if (!ret)
951 return NULL;
952 pos = strstr(args, "subvol=");
953 936
937 src = strstr(args, "subvol=");
954 /* This shouldn't happen, but just in case.. */ 938 /* This shouldn't happen, but just in case.. */
955 if (!pos) { 939 if (!src)
956 kfree(ret); 940 return NULL;
941
942 buf = dst = kmalloc(len, GFP_NOFS);
943 if (!buf)
957 return NULL; 944 return NULL;
958 }
959 945
960 /* 946 /*
961 * The subvol=<> arg is not at the front of the string, copy everybody 947 * If the subvol= arg is not at the start of the string,
962 * up to that into ret. 948 * copy whatever precedes it into buf.
963 */ 949 */
964 if (pos != args) { 950 if (src != args) {
965 *pos = '\0'; 951 *src++ = '\0';
966 strcpy(ret, args); 952 strcpy(buf, args);
967 copied += strlen(args); 953 dst += strlen(args);
968 pos++;
969 } 954 }
970 955
971 strncpy(ret + copied, "subvolid=0", len - copied); 956 strcpy(dst, "subvolid=0");
972 957 dst += strlen("subvolid=0");
973 /* Length of subvolid=0 */
974 copied += 10;
975 958
976 /* 959 /*
977 * If there is no , after the subvol= option then we know there's no 960 * If there is a "," after the original subvol=... string,
978 * other options and we can just return. 961 * copy that suffix into our buffer. Otherwise, we're done.
979 */ 962 */
980 pos = strchr(pos, ','); 963 src = strchr(src, ',');
981 if (!pos) 964 if (src)
982 return ret; 965 strcpy(dst, src);
983
984 /* Copy the rest of the arguments into our buffer */
985 strncpy(ret + copied, pos, len - copied);
986 copied += strlen(pos);
987 966
988 return ret; 967 return buf;
989} 968}
990 969
991static struct dentry *mount_subvol(const char *subvol_name, int flags, 970static struct dentry *mount_subvol(const char *subvol_name, int flags,
@@ -1122,6 +1101,40 @@ error_fs_info:
1122 return ERR_PTR(error); 1101 return ERR_PTR(error);
1123} 1102}
1124 1103
1104static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit)
1105{
1106 spin_lock_irq(&workers->lock);
1107 workers->max_workers = new_limit;
1108 spin_unlock_irq(&workers->lock);
1109}
1110
1111static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1112 int new_pool_size, int old_pool_size)
1113{
1114 if (new_pool_size == old_pool_size)
1115 return;
1116
1117 fs_info->thread_pool_size = new_pool_size;
1118
1119 printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n",
1120 old_pool_size, new_pool_size);
1121
1122 btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size);
1123 btrfs_set_max_workers(&fs_info->workers, new_pool_size);
1124 btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size);
1125 btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size);
1126 btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size);
1127 btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size);
1128 btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size);
1129 btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size);
1130 btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size);
1131 btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size);
1132 btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size);
1133 btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size);
1134 btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size);
1135 btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size);
1136}
1137
1125static int btrfs_remount(struct super_block *sb, int *flags, char *data) 1138static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1126{ 1139{
1127 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1140 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
@@ -1141,6 +1154,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1141 goto restore; 1154 goto restore;
1142 } 1155 }
1143 1156
1157 btrfs_resize_thread_pool(fs_info,
1158 fs_info->thread_pool_size, old_thread_pool_size);
1159
1144 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1160 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1145 return 0; 1161 return 0;
1146 1162
@@ -1184,7 +1200,8 @@ restore:
1184 fs_info->compress_type = old_compress_type; 1200 fs_info->compress_type = old_compress_type;
1185 fs_info->max_inline = old_max_inline; 1201 fs_info->max_inline = old_max_inline;
1186 fs_info->alloc_start = old_alloc_start; 1202 fs_info->alloc_start = old_alloc_start;
1187 fs_info->thread_pool_size = old_thread_pool_size; 1203 btrfs_resize_thread_pool(fs_info,
1204 old_thread_pool_size, fs_info->thread_pool_size);
1188 fs_info->metadata_ratio = old_metadata_ratio; 1205 fs_info->metadata_ratio = old_metadata_ratio;
1189 return ret; 1206 return ret;
1190} 1207}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 667735fb45e..1791c6e3d83 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -28,6 +28,7 @@
28#include "locking.h" 28#include "locking.h"
29#include "tree-log.h" 29#include "tree-log.h"
30#include "inode-map.h" 30#include "inode-map.h"
31#include "volumes.h"
31 32
32#define BTRFS_ROOT_TRANS_TAG 0 33#define BTRFS_ROOT_TRANS_TAG 0
33 34
@@ -777,6 +778,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
777 if (ret) 778 if (ret)
778 return ret; 779 return ret;
779 780
781 ret = btrfs_run_dev_stats(trans, root->fs_info);
782 BUG_ON(ret);
783
780 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 784 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
781 next = fs_info->dirty_cowonly_roots.next; 785 next = fs_info->dirty_cowonly_roots.next;
782 list_del_init(next); 786 list_del_init(next);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index eb1ae908582..2017d0ff511 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1628,7 +1628,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1628 int i; 1628 int i;
1629 int ret; 1629 int ret;
1630 1630
1631 btrfs_read_buffer(eb, gen); 1631 ret = btrfs_read_buffer(eb, gen);
1632 if (ret)
1633 return ret;
1632 1634
1633 level = btrfs_header_level(eb); 1635 level = btrfs_header_level(eb);
1634 1636
@@ -1749,7 +1751,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1749 1751
1750 path->slots[*level]++; 1752 path->slots[*level]++;
1751 if (wc->free) { 1753 if (wc->free) {
1752 btrfs_read_buffer(next, ptr_gen); 1754 ret = btrfs_read_buffer(next, ptr_gen);
1755 if (ret) {
1756 free_extent_buffer(next);
1757 return ret;
1758 }
1753 1759
1754 btrfs_tree_lock(next); 1760 btrfs_tree_lock(next);
1755 btrfs_set_lock_blocking(next); 1761 btrfs_set_lock_blocking(next);
@@ -1766,7 +1772,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1766 free_extent_buffer(next); 1772 free_extent_buffer(next);
1767 continue; 1773 continue;
1768 } 1774 }
1769 btrfs_read_buffer(next, ptr_gen); 1775 ret = btrfs_read_buffer(next, ptr_gen);
1776 if (ret) {
1777 free_extent_buffer(next);
1778 return ret;
1779 }
1770 1780
1771 WARN_ON(*level <= 0); 1781 WARN_ON(*level <= 0);
1772 if (path->nodes[*level-1]) 1782 if (path->nodes[*level-1])
@@ -2657,6 +2667,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2657 btrfs_release_path(path); 2667 btrfs_release_path(path);
2658 } 2668 }
2659 btrfs_release_path(path); 2669 btrfs_release_path(path);
2670 if (ret > 0)
2671 ret = 0;
2660 return ret; 2672 return ret;
2661} 2673}
2662 2674
@@ -3028,21 +3040,6 @@ out:
3028 return ret; 3040 return ret;
3029} 3041}
3030 3042
3031static int inode_in_log(struct btrfs_trans_handle *trans,
3032 struct inode *inode)
3033{
3034 struct btrfs_root *root = BTRFS_I(inode)->root;
3035 int ret = 0;
3036
3037 mutex_lock(&root->log_mutex);
3038 if (BTRFS_I(inode)->logged_trans == trans->transid &&
3039 BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
3040 ret = 1;
3041 mutex_unlock(&root->log_mutex);
3042 return ret;
3043}
3044
3045
3046/* 3043/*
3047 * helper function around btrfs_log_inode to make sure newly created 3044 * helper function around btrfs_log_inode to make sure newly created
3048 * parent directories also end up in the log. A minimal inode and backref 3045 * parent directories also end up in the log. A minimal inode and backref
@@ -3083,7 +3080,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3083 if (ret) 3080 if (ret)
3084 goto end_no_trans; 3081 goto end_no_trans;
3085 3082
3086 if (inode_in_log(trans, inode)) { 3083 if (btrfs_inode_in_log(inode, trans->transid)) {
3087 ret = BTRFS_NO_LOG_SYNC; 3084 ret = BTRFS_NO_LOG_SYNC;
3088 goto end_no_trans; 3085 goto end_no_trans;
3089 } 3086 }
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index 2ef59400ad6..ab942f46b3d 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit);
95 * 95 *
96 * The allocated ulist will be returned in an initialized state. 96 * The allocated ulist will be returned in an initialized state.
97 */ 97 */
98struct ulist *ulist_alloc(unsigned long gfp_mask) 98struct ulist *ulist_alloc(gfp_t gfp_mask)
99{ 99{
100 struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); 100 struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
101 101
@@ -144,13 +144,13 @@ EXPORT_SYMBOL(ulist_free);
144 * unaltered. 144 * unaltered.
145 */ 145 */
146int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, 146int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
147 unsigned long gfp_mask) 147 gfp_t gfp_mask)
148{ 148{
149 return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); 149 return ulist_add_merge(ulist, val, aux, NULL, gfp_mask);
150} 150}
151 151
152int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, 152int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux,
153 unsigned long *old_aux, unsigned long gfp_mask) 153 unsigned long *old_aux, gfp_t gfp_mask)
154{ 154{
155 int i; 155 int i;
156 156
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index f1b1bf00c5a..21bdc8ec813 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -63,12 +63,12 @@ struct ulist {
63void ulist_init(struct ulist *ulist); 63void ulist_init(struct ulist *ulist);
64void ulist_fini(struct ulist *ulist); 64void ulist_fini(struct ulist *ulist);
65void ulist_reinit(struct ulist *ulist); 65void ulist_reinit(struct ulist *ulist);
66struct ulist *ulist_alloc(unsigned long gfp_mask); 66struct ulist *ulist_alloc(gfp_t gfp_mask);
67void ulist_free(struct ulist *ulist); 67void ulist_free(struct ulist *ulist);
68int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, 68int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
69 unsigned long gfp_mask); 69 gfp_t gfp_mask);
70int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, 70int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux,
71 unsigned long *old_aux, unsigned long gfp_mask); 71 unsigned long *old_aux, gfp_t gfp_mask);
72struct ulist_node *ulist_next(struct ulist *ulist, 72struct ulist_node *ulist_next(struct ulist *ulist,
73 struct ulist_iterator *uiter); 73 struct ulist_iterator *uiter);
74 74
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1411b99555a..7782020996f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -23,6 +23,7 @@
23#include <linux/random.h> 23#include <linux/random.h>
24#include <linux/iocontext.h> 24#include <linux/iocontext.h>
25#include <linux/capability.h> 25#include <linux/capability.h>
26#include <linux/ratelimit.h>
26#include <linux/kthread.h> 27#include <linux/kthread.h>
27#include <asm/div64.h> 28#include <asm/div64.h>
28#include "compat.h" 29#include "compat.h"
@@ -39,6 +40,8 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
39 struct btrfs_root *root, 40 struct btrfs_root *root,
40 struct btrfs_device *device); 41 struct btrfs_device *device);
41static int btrfs_relocate_sys_chunks(struct btrfs_root *root); 42static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
43static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
44static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
42 45
43static DEFINE_MUTEX(uuid_mutex); 46static DEFINE_MUTEX(uuid_mutex);
44static LIST_HEAD(fs_uuids); 47static LIST_HEAD(fs_uuids);
@@ -361,6 +364,7 @@ static noinline int device_list_add(const char *path,
361 return -ENOMEM; 364 return -ENOMEM;
362 } 365 }
363 device->devid = devid; 366 device->devid = devid;
367 device->dev_stats_valid = 0;
364 device->work.func = pending_bios_fn; 368 device->work.func = pending_bios_fn;
365 memcpy(device->uuid, disk_super->dev_item.uuid, 369 memcpy(device->uuid, disk_super->dev_item.uuid,
366 BTRFS_UUID_SIZE); 370 BTRFS_UUID_SIZE);
@@ -1633,7 +1637,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1633 int ret = 0; 1637 int ret = 0;
1634 1638
1635 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) 1639 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1636 return -EINVAL; 1640 return -EROFS;
1637 1641
1638 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, 1642 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
1639 root->fs_info->bdev_holder); 1643 root->fs_info->bdev_holder);
@@ -4001,13 +4005,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
4001 return 0; 4005 return 0;
4002} 4006}
4003 4007
4008static void *merge_stripe_index_into_bio_private(void *bi_private,
4009 unsigned int stripe_index)
4010{
4011 /*
4012 * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
4013 * at most 1.
4014 * The alternative solution (instead of stealing bits from the
4015 * pointer) would be to allocate an intermediate structure
4016 * that contains the old private pointer plus the stripe_index.
4017 */
4018 BUG_ON((((uintptr_t)bi_private) & 3) != 0);
4019 BUG_ON(stripe_index > 3);
4020 return (void *)(((uintptr_t)bi_private) | stripe_index);
4021}
4022
4023static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
4024{
4025 return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
4026}
4027
4028static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
4029{
4030 return (unsigned int)((uintptr_t)bi_private) & 3;
4031}
4032
4004static void btrfs_end_bio(struct bio *bio, int err) 4033static void btrfs_end_bio(struct bio *bio, int err)
4005{ 4034{
4006 struct btrfs_bio *bbio = bio->bi_private; 4035 struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
4007 int is_orig_bio = 0; 4036 int is_orig_bio = 0;
4008 4037
4009 if (err) 4038 if (err) {
4010 atomic_inc(&bbio->error); 4039 atomic_inc(&bbio->error);
4040 if (err == -EIO || err == -EREMOTEIO) {
4041 unsigned int stripe_index =
4042 extract_stripe_index_from_bio_private(
4043 bio->bi_private);
4044 struct btrfs_device *dev;
4045
4046 BUG_ON(stripe_index >= bbio->num_stripes);
4047 dev = bbio->stripes[stripe_index].dev;
4048 if (bio->bi_rw & WRITE)
4049 btrfs_dev_stat_inc(dev,
4050 BTRFS_DEV_STAT_WRITE_ERRS);
4051 else
4052 btrfs_dev_stat_inc(dev,
4053 BTRFS_DEV_STAT_READ_ERRS);
4054 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
4055 btrfs_dev_stat_inc(dev,
4056 BTRFS_DEV_STAT_FLUSH_ERRS);
4057 btrfs_dev_stat_print_on_error(dev);
4058 }
4059 }
4011 4060
4012 if (bio == bbio->orig_bio) 4061 if (bio == bbio->orig_bio)
4013 is_orig_bio = 1; 4062 is_orig_bio = 1;
@@ -4149,6 +4198,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4149 bio = first_bio; 4198 bio = first_bio;
4150 } 4199 }
4151 bio->bi_private = bbio; 4200 bio->bi_private = bbio;
4201 bio->bi_private = merge_stripe_index_into_bio_private(
4202 bio->bi_private, (unsigned int)dev_nr);
4152 bio->bi_end_io = btrfs_end_bio; 4203 bio->bi_end_io = btrfs_end_bio;
4153 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; 4204 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
4154 dev = bbio->stripes[dev_nr].dev; 4205 dev = bbio->stripes[dev_nr].dev;
@@ -4509,6 +4560,28 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4509 return ret; 4560 return ret;
4510} 4561}
4511 4562
4563struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
4564 u64 logical, int mirror_num)
4565{
4566 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
4567 int ret;
4568 u64 map_length = 0;
4569 struct btrfs_bio *bbio = NULL;
4570 struct btrfs_device *device;
4571
4572 BUG_ON(mirror_num == 0);
4573 ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
4574 mirror_num);
4575 if (ret) {
4576 BUG_ON(bbio != NULL);
4577 return NULL;
4578 }
4579 BUG_ON(mirror_num != bbio->mirror_num);
4580 device = bbio->stripes[mirror_num - 1].dev;
4581 kfree(bbio);
4582 return device;
4583}
4584
4512int btrfs_read_chunk_tree(struct btrfs_root *root) 4585int btrfs_read_chunk_tree(struct btrfs_root *root)
4513{ 4586{
4514 struct btrfs_path *path; 4587 struct btrfs_path *path;
@@ -4583,3 +4656,230 @@ error:
4583 btrfs_free_path(path); 4656 btrfs_free_path(path);
4584 return ret; 4657 return ret;
4585} 4658}
4659
4660static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
4661{
4662 int i;
4663
4664 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
4665 btrfs_dev_stat_reset(dev, i);
4666}
4667
4668int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
4669{
4670 struct btrfs_key key;
4671 struct btrfs_key found_key;
4672 struct btrfs_root *dev_root = fs_info->dev_root;
4673 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
4674 struct extent_buffer *eb;
4675 int slot;
4676 int ret = 0;
4677 struct btrfs_device *device;
4678 struct btrfs_path *path = NULL;
4679 int i;
4680
4681 path = btrfs_alloc_path();
4682 if (!path) {
4683 ret = -ENOMEM;
4684 goto out;
4685 }
4686
4687 mutex_lock(&fs_devices->device_list_mutex);
4688 list_for_each_entry(device, &fs_devices->devices, dev_list) {
4689 int item_size;
4690 struct btrfs_dev_stats_item *ptr;
4691
4692 key.objectid = 0;
4693 key.type = BTRFS_DEV_STATS_KEY;
4694 key.offset = device->devid;
4695 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
4696 if (ret) {
4697 printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
4698 device->name, (unsigned long long)device->devid);
4699 __btrfs_reset_dev_stats(device);
4700 device->dev_stats_valid = 1;
4701 btrfs_release_path(path);
4702 continue;
4703 }
4704 slot = path->slots[0];
4705 eb = path->nodes[0];
4706 btrfs_item_key_to_cpu(eb, &found_key, slot);
4707 item_size = btrfs_item_size_nr(eb, slot);
4708
4709 ptr = btrfs_item_ptr(eb, slot,
4710 struct btrfs_dev_stats_item);
4711
4712 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
4713 if (item_size >= (1 + i) * sizeof(__le64))
4714 btrfs_dev_stat_set(device, i,
4715 btrfs_dev_stats_value(eb, ptr, i));
4716 else
4717 btrfs_dev_stat_reset(device, i);
4718 }
4719
4720 device->dev_stats_valid = 1;
4721 btrfs_dev_stat_print_on_load(device);
4722 btrfs_release_path(path);
4723 }
4724 mutex_unlock(&fs_devices->device_list_mutex);
4725
4726out:
4727 btrfs_free_path(path);
4728 return ret < 0 ? ret : 0;
4729}
4730
4731static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4732 struct btrfs_root *dev_root,
4733 struct btrfs_device *device)
4734{
4735 struct btrfs_path *path;
4736 struct btrfs_key key;
4737 struct extent_buffer *eb;
4738 struct btrfs_dev_stats_item *ptr;
4739 int ret;
4740 int i;
4741
4742 key.objectid = 0;
4743 key.type = BTRFS_DEV_STATS_KEY;
4744 key.offset = device->devid;
4745
4746 path = btrfs_alloc_path();
4747 BUG_ON(!path);
4748 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
4749 if (ret < 0) {
4750 printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
4751 ret, device->name);
4752 goto out;
4753 }
4754
4755 if (ret == 0 &&
4756 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
4757 /* need to delete old one and insert a new one */
4758 ret = btrfs_del_item(trans, dev_root, path);
4759 if (ret != 0) {
4760 printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
4761 device->name, ret);
4762 goto out;
4763 }
4764 ret = 1;
4765 }
4766
4767 if (ret == 1) {
4768 /* need to insert a new item */
4769 btrfs_release_path(path);
4770 ret = btrfs_insert_empty_item(trans, dev_root, path,
4771 &key, sizeof(*ptr));
4772 if (ret < 0) {
4773 printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
4774 device->name, ret);
4775 goto out;
4776 }
4777 }
4778
4779 eb = path->nodes[0];
4780 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
4781 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
4782 btrfs_set_dev_stats_value(eb, ptr, i,
4783 btrfs_dev_stat_read(device, i));
4784 btrfs_mark_buffer_dirty(eb);
4785
4786out:
4787 btrfs_free_path(path);
4788 return ret;
4789}
4790
4791/*
4792 * called from commit_transaction. Writes all changed device stats to disk.
4793 */
4794int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
4795 struct btrfs_fs_info *fs_info)
4796{
4797 struct btrfs_root *dev_root = fs_info->dev_root;
4798 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
4799 struct btrfs_device *device;
4800 int ret = 0;
4801
4802 mutex_lock(&fs_devices->device_list_mutex);
4803 list_for_each_entry(device, &fs_devices->devices, dev_list) {
4804 if (!device->dev_stats_valid || !device->dev_stats_dirty)
4805 continue;
4806
4807 ret = update_dev_stat_item(trans, dev_root, device);
4808 if (!ret)
4809 device->dev_stats_dirty = 0;
4810 }
4811 mutex_unlock(&fs_devices->device_list_mutex);
4812
4813 return ret;
4814}
4815
4816void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
4817{
4818 btrfs_dev_stat_inc(dev, index);
4819 btrfs_dev_stat_print_on_error(dev);
4820}
4821
4822void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4823{
4824 if (!dev->dev_stats_valid)
4825 return;
4826 printk_ratelimited(KERN_ERR
4827 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4828 dev->name,
4829 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4830 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4831 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
4832 btrfs_dev_stat_read(dev,
4833 BTRFS_DEV_STAT_CORRUPTION_ERRS),
4834 btrfs_dev_stat_read(dev,
4835 BTRFS_DEV_STAT_GENERATION_ERRS));
4836}
4837
4838static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
4839{
4840 printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4841 dev->name,
4842 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4843 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4844 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
4845 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
4846 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
4847}
4848
4849int btrfs_get_dev_stats(struct btrfs_root *root,
4850 struct btrfs_ioctl_get_dev_stats *stats,
4851 int reset_after_read)
4852{
4853 struct btrfs_device *dev;
4854 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
4855 int i;
4856
4857 mutex_lock(&fs_devices->device_list_mutex);
4858 dev = btrfs_find_device(root, stats->devid, NULL, NULL);
4859 mutex_unlock(&fs_devices->device_list_mutex);
4860
4861 if (!dev) {
4862 printk(KERN_WARNING
4863 "btrfs: get dev_stats failed, device not found\n");
4864 return -ENODEV;
4865 } else if (!dev->dev_stats_valid) {
4866 printk(KERN_WARNING
4867 "btrfs: get dev_stats failed, not yet valid\n");
4868 return -ENODEV;
4869 } else if (reset_after_read) {
4870 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
4871 if (stats->nr_items > i)
4872 stats->values[i] =
4873 btrfs_dev_stat_read_and_reset(dev, i);
4874 else
4875 btrfs_dev_stat_reset(dev, i);
4876 }
4877 } else {
4878 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
4879 if (stats->nr_items > i)
4880 stats->values[i] = btrfs_dev_stat_read(dev, i);
4881 }
4882 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
4883 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
4884 return 0;
4885}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index bb6b03f97aa..3406a88ca83 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -22,6 +22,7 @@
22#include <linux/bio.h> 22#include <linux/bio.h>
23#include <linux/sort.h> 23#include <linux/sort.h>
24#include "async-thread.h" 24#include "async-thread.h"
25#include "ioctl.h"
25 26
26#define BTRFS_STRIPE_LEN (64 * 1024) 27#define BTRFS_STRIPE_LEN (64 * 1024)
27 28
@@ -106,6 +107,11 @@ struct btrfs_device {
106 struct completion flush_wait; 107 struct completion flush_wait;
107 int nobarriers; 108 int nobarriers;
108 109
110 /* disk I/O failure stats. For detailed description refer to
111 * enum btrfs_dev_stat_values in ioctl.h */
112 int dev_stats_valid;
113 int dev_stats_dirty; /* counters need to be written to disk */
114 atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
109}; 115};
110 116
111struct btrfs_fs_devices { 117struct btrfs_fs_devices {
@@ -281,4 +287,50 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
281int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 287int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
282int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 288int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
283 u64 *start, u64 *max_avail); 289 u64 *start, u64 *max_avail);
290struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
291 u64 logical, int mirror_num);
292void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
293void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
294int btrfs_get_dev_stats(struct btrfs_root *root,
295 struct btrfs_ioctl_get_dev_stats *stats,
296 int reset_after_read);
297int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
298int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
299 struct btrfs_fs_info *fs_info);
300
301static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
302 int index)
303{
304 atomic_inc(dev->dev_stat_values + index);
305 dev->dev_stats_dirty = 1;
306}
307
308static inline int btrfs_dev_stat_read(struct btrfs_device *dev,
309 int index)
310{
311 return atomic_read(dev->dev_stat_values + index);
312}
313
314static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
315 int index)
316{
317 int ret;
318
319 ret = atomic_xchg(dev->dev_stat_values + index, 0);
320 dev->dev_stats_dirty = 1;
321 return ret;
322}
323
324static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
325 int index, unsigned long val)
326{
327 atomic_set(dev->dev_stat_values + index, val);
328 dev->dev_stats_dirty = 1;
329}
330
331static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
332 int index)
333{
334 btrfs_dev_stat_set(dev, index, 0);
335}
284#endif 336#endif
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index e7a5659087e..3f4e2d69e83 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -196,6 +196,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
196 if (ret) 196 if (ret)
197 goto out; 197 goto out;
198 198
199 inode_inc_iversion(inode);
199 inode->i_ctime = CURRENT_TIME; 200 inode->i_ctime = CURRENT_TIME;
200 ret = btrfs_update_inode(trans, root, inode); 201 ret = btrfs_update_inode(trans, root, inode);
201 BUG_ON(ret); 202 BUG_ON(ret);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index faccd47c6c4..92c20654cc5 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -370,9 +370,9 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
370 PAGE_CACHE_SIZE - buf_offset); 370 PAGE_CACHE_SIZE - buf_offset);
371 bytes = min(bytes, bytes_left); 371 bytes = min(bytes, bytes_left);
372 372
373 kaddr = kmap_atomic(dest_page, KM_USER0); 373 kaddr = kmap_atomic(dest_page);
374 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); 374 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
375 kunmap_atomic(kaddr, KM_USER0); 375 kunmap_atomic(kaddr);
376 376
377 pg_offset += bytes; 377 pg_offset += bytes;
378 bytes_left -= bytes; 378 bytes_left -= bytes;