aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-03-29 15:15:27 -0400
committerDavid Woodhouse <dwmw2@hera.kernel.org>2007-03-29 15:15:27 -0400
commitf254e52c1ce550fdaa0d31f5e068f0d67c2485d4 (patch)
tree1232b7e16b6f0b1b1b150c5df396452cac5e3853
parent75dfe3960e602e63ea42ac7a2a0520832b189ffa (diff)
Btrfs: verify csums on read
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/ctree.h47
-rw-r--r--fs/btrfs/disk-io.c37
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/file-item.c60
-rw-r--r--fs/btrfs/super.c184
6 files changed, 279 insertions, 60 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 48c611948d11..1eb333e2e18b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -109,14 +109,14 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
109 return 1; 109 return 1;
110 if (k1.objectid < k2->objectid) 110 if (k1.objectid < k2->objectid)
111 return -1; 111 return -1;
112 if (k1.flags > k2->flags)
113 return 1;
114 if (k1.flags < k2->flags)
115 return -1;
116 if (k1.offset > k2->offset) 112 if (k1.offset > k2->offset)
117 return 1; 113 return 1;
118 if (k1.offset < k2->offset) 114 if (k1.offset < k2->offset)
119 return -1; 115 return -1;
116 if (k1.flags > k2->flags)
117 return 1;
118 if (k1.flags < k2->flags)
119 return -1;
120 return 0; 120 return 0;
121} 121}
122 122
@@ -1165,7 +1165,6 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1165 BUG(); 1165 BUG();
1166 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); 1166 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1167 if (ret == 0) { 1167 if (ret == 0) {
1168 btrfs_release_path(root, path);
1169 return -EEXIST; 1168 return -EEXIST;
1170 } 1169 }
1171 if (ret < 0) 1170 if (ret < 0)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6ff87f44c5d8..df1a025a771c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -21,6 +21,9 @@ struct btrfs_transaction;
21 */ 21 */
22#define BTRFS_NAME_LEN 255 22#define BTRFS_NAME_LEN 255
23 23
24/* 32 bytes in various csum fields */
25#define BTRFS_CSUM_SIZE 32
26
24/* 27/*
25 * the key defines the order in the tree, and so it also defines (optimal) 28 * the key defines the order in the tree, and so it also defines (optimal)
26 * block layout. objectid corresonds to the inode number. The flags 29 * block layout. objectid corresonds to the inode number. The flags
@@ -37,21 +40,21 @@ struct btrfs_transaction;
37 */ 40 */
38struct btrfs_disk_key { 41struct btrfs_disk_key {
39 __le64 objectid; 42 __le64 objectid;
40 __le32 flags;
41 __le64 offset; 43 __le64 offset;
44 __le32 flags;
42} __attribute__ ((__packed__)); 45} __attribute__ ((__packed__));
43 46
44struct btrfs_key { 47struct btrfs_key {
45 u64 objectid; 48 u64 objectid;
46 u32 flags;
47 u64 offset; 49 u64 offset;
50 u32 flags;
48} __attribute__ ((__packed__)); 51} __attribute__ ((__packed__));
49 52
50/* 53/*
51 * every tree block (leaf or node) starts with this header. 54 * every tree block (leaf or node) starts with this header.
52 */ 55 */
53struct btrfs_header { 56struct btrfs_header {
54 __le32 csum[8]; 57 u8 csum[BTRFS_CSUM_SIZE];
55 u8 fsid[16]; /* FS specific uuid */ 58 u8 fsid[16]; /* FS specific uuid */
56 __le64 blocknr; /* which block this node is supposed to live in */ 59 __le64 blocknr; /* which block this node is supposed to live in */
57 __le64 generation; 60 __le64 generation;
@@ -75,7 +78,7 @@ struct buffer_head;
75 * it currently lacks any block count etc etc 78 * it currently lacks any block count etc etc
76 */ 79 */
77struct btrfs_super_block { 80struct btrfs_super_block {
78 __le32 csum[8]; 81 u8 csum[BTRFS_CSUM_SIZE];
79 /* the first 3 fields must match struct btrfs_header */ 82 /* the first 3 fields must match struct btrfs_header */
80 u8 fsid[16]; /* FS specific uuid */ 83 u8 fsid[16]; /* FS specific uuid */
81 __le64 blocknr; /* this block number */ 84 __le64 blocknr; /* this block number */
@@ -147,7 +150,7 @@ struct btrfs_extent_item {
147} __attribute__ ((__packed__)); 150} __attribute__ ((__packed__));
148 151
149struct btrfs_inode_timespec { 152struct btrfs_inode_timespec {
150 __le32 sec; 153 __le64 sec;
151 __le32 nsec; 154 __le32 nsec;
152} __attribute__ ((__packed__)); 155} __attribute__ ((__packed__));
153 156
@@ -214,6 +217,10 @@ struct btrfs_file_extent_item {
214 __le64 num_blocks; 217 __le64 num_blocks;
215} __attribute__ ((__packed__)); 218} __attribute__ ((__packed__));
216 219
220struct btrfs_csum_item {
221 u8 csum[BTRFS_CSUM_SIZE];
222} __attribute__ ((__packed__));
223
217struct btrfs_inode_map_item { 224struct btrfs_inode_map_item {
218 struct btrfs_disk_key key; 225 struct btrfs_disk_key key;
219} __attribute__ ((__packed__)); 226} __attribute__ ((__packed__));
@@ -284,26 +291,31 @@ struct btrfs_root {
284 */ 291 */
285#define BTRFS_EXTENT_DATA_KEY 4 292#define BTRFS_EXTENT_DATA_KEY 4
286/* 293/*
294 * csum items have the checksums for data in the extents
295 */
296#define BTRFS_CSUM_ITEM_KEY 5
297
298/*
287 * root items point to tree roots. There are typically in the root 299 * root items point to tree roots. There are typically in the root
288 * tree used by the super block to find all the other trees 300 * tree used by the super block to find all the other trees
289 */ 301 */
290#define BTRFS_ROOT_ITEM_KEY 5 302#define BTRFS_ROOT_ITEM_KEY 6
291/* 303/*
292 * extent items are in the extent map tree. These record which blocks 304 * extent items are in the extent map tree. These record which blocks
293 * are used, and how many references there are to each block 305 * are used, and how many references there are to each block
294 */ 306 */
295#define BTRFS_EXTENT_ITEM_KEY 6 307#define BTRFS_EXTENT_ITEM_KEY 7
296 308
297/* 309/*
298 * the inode map records which inode numbers are in use and where 310 * the inode map records which inode numbers are in use and where
299 * they actually live on disk 311 * they actually live on disk
300 */ 312 */
301#define BTRFS_INODE_MAP_ITEM_KEY 7 313#define BTRFS_INODE_MAP_ITEM_KEY 8
302/* 314/*
303 * string items are for debugging. They just store a short string of 315 * string items are for debugging. They just store a short string of
304 * data in the FS 316 * data in the FS
305 */ 317 */
306#define BTRFS_STRING_ITEM_KEY 8 318#define BTRFS_STRING_ITEM_KEY 9
307 319
308static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) 320static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i)
309{ 321{
@@ -407,15 +419,15 @@ static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i,
407 i->compat_flags = cpu_to_le16(val); 419 i->compat_flags = cpu_to_le16(val);
408} 420}
409 421
410static inline u32 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) 422static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts)
411{ 423{
412 return le32_to_cpu(ts->sec); 424 return le64_to_cpu(ts->sec);
413} 425}
414 426
415static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, 427static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts,
416 u32 val) 428 u64 val)
417{ 429{
418 ts->sec = cpu_to_le32(val); 430 ts->sec = cpu_to_le64(val);
419} 431}
420 432
421static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) 433static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts)
@@ -429,8 +441,6 @@ static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts,
429 ts->nsec = cpu_to_le32(val); 441 ts->nsec = cpu_to_le32(val);
430} 442}
431 443
432
433
434static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) 444static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei)
435{ 445{
436 return le64_to_cpu(ei->owner); 446 return le64_to_cpu(ei->owner);
@@ -905,4 +915,11 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
905 struct btrfs_root *root, 915 struct btrfs_root *root,
906 struct btrfs_path *path, u64 objectid, 916 struct btrfs_path *path, u64 objectid,
907 u64 blocknr, int mod); 917 u64 blocknr, int mod);
918int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
919 struct btrfs_root *root,
920 u64 objectid, u64 offset,
921 char *data, size_t len);
922int btrfs_csum_verify_file_block(struct btrfs_root *root,
923 u64 objectid, u64 offset,
924 char *data, size_t len);
908#endif 925#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f2c1dd40c087..f5db2b936502 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -127,31 +127,46 @@ static int btree_get_block(struct inode *inode, sector_t iblock,
127 return 0; 127 return 0;
128} 128}
129 129
130static int csum_tree_block(struct btrfs_root * root, struct buffer_head *bh, 130int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
131 int verify) 131 char *result)
132{ 132{
133 struct btrfs_node *node = btrfs_buffer_node(bh);
134 struct scatterlist sg; 133 struct scatterlist sg;
135 struct crypto_hash *tfm = root->fs_info->hash_tfm; 134 struct crypto_hash *tfm = root->fs_info->hash_tfm;
136 struct hash_desc desc; 135 struct hash_desc desc;
137 int ret; 136 int ret;
138 char result[32];
139 137
140 desc.tfm = tfm; 138 desc.tfm = tfm;
141 desc.flags = 0; 139 desc.flags = 0;
142 sg_init_one(&sg, bh->b_data + 32, bh->b_size - 32); 140 sg_init_one(&sg, data, len);
143 spin_lock(&root->fs_info->hash_lock); 141 spin_lock(&root->fs_info->hash_lock);
144 ret = crypto_hash_digest(&desc, &sg, bh->b_size - 32, result); 142 ret = crypto_hash_digest(&desc, &sg, len, result);
145 spin_unlock(&root->fs_info->hash_lock); 143 spin_unlock(&root->fs_info->hash_lock);
146 if (ret) { 144 if (ret) {
147 printk("sha256 digest failed\n"); 145 printk("sha256 digest failed\n");
148 } 146 }
147 return ret;
148}
149static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh,
150 int verify)
151{
152 char result[BTRFS_CSUM_SIZE];
153 int ret;
154 struct btrfs_node *node;
155
156 ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
157 bh->b_size - BTRFS_CSUM_SIZE, result);
158 if (ret)
159 return ret;
149 if (verify) { 160 if (verify) {
150 if (memcmp(node->header.csum, result, sizeof(result))) 161 if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
151 printk("csum verify failed on %Lu\n", bh->b_blocknr); 162 printk("checksum verify failed on %lu\n",
152 return -EINVAL; 163 bh->b_blocknr);
153 } else 164 return 1;
154 memcpy(node->header.csum, result, sizeof(node->header.csum)); 165 }
166 } else {
167 node = btrfs_buffer_node(bh);
168 memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE);
169 }
155 return 0; 170 return 0;
156} 171}
157 172
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index c2c38bda704d..f6998e2192ce 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -39,4 +39,6 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf);
39int write_ctree_super(struct btrfs_trans_handle *trans, 39int write_ctree_super(struct btrfs_trans_handle *trans,
40 struct btrfs_root *root); 40 struct btrfs_root *root);
41struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); 41struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr);
42int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
43 char *result);
42#endif 44#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 5230a44cb19b..2d2c23ca7cbf 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -57,3 +57,63 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
57 ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); 57 ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
58 return ret; 58 return ret;
59} 59}
60
61int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
62 struct btrfs_root *root,
63 u64 objectid, u64 offset,
64 char *data, size_t len)
65{
66 int ret;
67 struct btrfs_key file_key;
68 struct btrfs_path path;
69 struct btrfs_csum_item *item;
70
71 btrfs_init_path(&path);
72 file_key.objectid = objectid;
73 file_key.offset = offset;
74 file_key.flags = 0;
75 btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
76 ret = btrfs_insert_empty_item(trans, root, &path, &file_key,
77 BTRFS_CSUM_SIZE);
78 if (ret != 0 && ret != -EEXIST)
79 goto fail;
80 item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
81 struct btrfs_csum_item);
82 ret = 0;
83 ret = btrfs_csum_data(root, data, len, item->csum);
84 mark_buffer_dirty(path.nodes[0]);
85fail:
86 btrfs_release_path(root, &path);
87 return ret;
88}
89
90int btrfs_csum_verify_file_block(struct btrfs_root *root,
91 u64 objectid, u64 offset,
92 char *data, size_t len)
93{
94 int ret;
95 struct btrfs_key file_key;
96 struct btrfs_path path;
97 struct btrfs_csum_item *item;
98 char result[BTRFS_CSUM_SIZE];
99
100 btrfs_init_path(&path);
101 file_key.objectid = objectid;
102 file_key.offset = offset;
103 file_key.flags = 0;
104 btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
105 ret = btrfs_search_slot(NULL, root, &file_key, &path, 0, 0);
106 if (ret)
107 goto fail;
108 item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
109 struct btrfs_csum_item);
110 ret = 0;
111 ret = btrfs_csum_data(root, data, len, result);
112 WARN_ON(ret);
113 if (memcmp(result, item->csum, BTRFS_CSUM_SIZE))
114 ret = 1;
115fail:
116 btrfs_release_path(root, &path);
117 return ret;
118}
119
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 7914b31f5bcd..04428137d75f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -249,15 +249,16 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
249 struct btrfs_key key; 249 struct btrfs_key key;
250 struct btrfs_disk_key *found_key; 250 struct btrfs_disk_key *found_key;
251 struct btrfs_leaf *leaf; 251 struct btrfs_leaf *leaf;
252 struct btrfs_file_extent_item *fi; 252 struct btrfs_file_extent_item *fi = NULL;
253 u64 extent_start; 253 u64 extent_start = 0;
254 u64 extent_num_blocks; 254 u64 extent_num_blocks = 0;
255 int found_extent;
255 256
256 /* FIXME, add redo link to tree so we don't leak on crash */ 257 /* FIXME, add redo link to tree so we don't leak on crash */
257 key.objectid = inode->i_ino; 258 key.objectid = inode->i_ino;
258 key.offset = (u64)-1; 259 key.offset = (u64)-1;
259 key.flags = 0; 260 key.flags = 0;
260 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 261 btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
261 while(1) { 262 while(1) {
262 btrfs_init_path(&path); 263 btrfs_init_path(&path);
263 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); 264 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
@@ -273,25 +274,32 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
273 found_key = &leaf->items[path.slots[0]].key; 274 found_key = &leaf->items[path.slots[0]].key;
274 if (btrfs_disk_key_objectid(found_key) != inode->i_ino) 275 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
275 break; 276 break;
276 if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) 277 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
278 btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
277 break; 279 break;
278 if (btrfs_disk_key_offset(found_key) < inode->i_size) 280 if (btrfs_disk_key_offset(found_key) < inode->i_size)
279 break; 281 break;
280 fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), 282 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
281 path.slots[0], 283 fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
282 struct btrfs_file_extent_item); 284 path.slots[0],
283 extent_start = btrfs_file_extent_disk_blocknr(fi); 285 struct btrfs_file_extent_item);
284 extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); 286 extent_start = btrfs_file_extent_disk_blocknr(fi);
285 key.offset = btrfs_disk_key_offset(found_key) - 1; 287 extent_num_blocks =
288 btrfs_file_extent_disk_num_blocks(fi);
289 inode->i_blocks -=
290 btrfs_file_extent_num_blocks(fi) >> 9;
291 found_extent = 1;
292 } else {
293 found_extent = 0;
294 }
286 ret = btrfs_del_item(trans, root, &path); 295 ret = btrfs_del_item(trans, root, &path);
287 BUG_ON(ret); 296 BUG_ON(ret);
288 inode->i_blocks -= btrfs_file_extent_num_blocks(fi) >> 9;
289 btrfs_release_path(root, &path); 297 btrfs_release_path(root, &path);
290 ret = btrfs_free_extent(trans, root, extent_start, 298 if (found_extent) {
291 extent_num_blocks, 0); 299 ret = btrfs_free_extent(trans, root, extent_start,
292 BUG_ON(ret); 300 extent_num_blocks, 0);
293 if (key.offset + 1 == 0) 301 BUG_ON(ret);
294 break; 302 }
295 } 303 }
296 btrfs_release_path(root, &path); 304 btrfs_release_path(root, &path);
297 ret = 0; 305 ret = 0;
@@ -975,10 +983,24 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
975 int err = 0; 983 int err = 0;
976 int ret; 984 int ret;
977 int this_write; 985 int this_write;
986 struct inode *inode = file->f_path.dentry->d_inode;
978 987
979 for (i = 0; i < num_pages; i++) { 988 for (i = 0; i < num_pages; i++) {
980 offset = pos & (PAGE_CACHE_SIZE -1); 989 offset = pos & (PAGE_CACHE_SIZE -1);
981 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); 990 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
991 /* FIXME, one block at a time */
992
993 mutex_lock(&root->fs_info->fs_mutex);
994 trans = btrfs_start_transaction(root, 1);
995 btrfs_csum_file_block(trans, root, inode->i_ino,
996 pages[i]->index << PAGE_CACHE_SHIFT,
997 kmap(pages[i]), PAGE_CACHE_SIZE);
998 kunmap(pages[i]);
999 SetPageChecked(pages[i]);
1000 ret = btrfs_end_transaction(trans, root);
1001 BUG_ON(ret);
1002 mutex_unlock(&root->fs_info->fs_mutex);
1003
982 ret = nobh_commit_write(file, pages[i], offset, 1004 ret = nobh_commit_write(file, pages[i], offset,
983 offset + this_write); 1005 offset + this_write);
984 pos += this_write; 1006 pos += this_write;
@@ -1022,7 +1044,7 @@ static int prepare_pages(struct btrfs_trans_handle *trans,
1022 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); 1044 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1023 ret = nobh_prepare_write(pages[i], offset, 1045 ret = nobh_prepare_write(pages[i], offset,
1024 offset + this_write, 1046 offset + this_write,
1025 btrfs_get_block_lock); 1047 btrfs_get_block);
1026 pos += this_write; 1048 pos += this_write;
1027 if (ret) { 1049 if (ret) {
1028 err = ret; 1050 err = ret;
@@ -1051,7 +1073,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1051 size_t num_written = 0; 1073 size_t num_written = 0;
1052 int err = 0; 1074 int err = 0;
1053 int ret = 0; 1075 int ret = 0;
1054 struct btrfs_trans_handle *trans;
1055 struct inode *inode = file->f_path.dentry->d_inode; 1076 struct inode *inode = file->f_path.dentry->d_inode;
1056 struct btrfs_root *root = btrfs_sb(inode->i_sb); 1077 struct btrfs_root *root = btrfs_sb(inode->i_sb);
1057 struct page *pages[1]; 1078 struct page *pages[1];
@@ -1077,25 +1098,18 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1077 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); 1098 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1078 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> 1099 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1079 PAGE_CACHE_SHIFT; 1100 PAGE_CACHE_SHIFT;
1080 mutex_lock(&root->fs_info->fs_mutex); 1101 ret = prepare_pages(NULL, root, file, pages, num_pages,
1081 trans = btrfs_start_transaction(root, 1);
1082
1083 ret = prepare_pages(trans, root, file, pages, num_pages,
1084 pos, write_bytes); 1102 pos, write_bytes);
1085 BUG_ON(ret); 1103 BUG_ON(ret);
1086 ret = btrfs_copy_from_user(pos, num_pages, 1104 ret = btrfs_copy_from_user(pos, num_pages,
1087 write_bytes, pages, buf); 1105 write_bytes, pages, buf);
1088 BUG_ON(ret); 1106 BUG_ON(ret);
1089 1107
1090 mutex_unlock(&root->fs_info->fs_mutex); 1108 ret = dirty_and_release_pages(NULL, root, file, pages,
1091
1092 ret = dirty_and_release_pages(trans, root, file, pages,
1093 num_pages, pos, write_bytes); 1109 num_pages, pos, write_bytes);
1094 BUG_ON(ret); 1110 BUG_ON(ret);
1095 btrfs_drop_pages(pages, num_pages); 1111 btrfs_drop_pages(pages, num_pages);
1096 1112
1097 ret = btrfs_end_transaction(trans, root);
1098
1099 buf += write_bytes; 1113 buf += write_bytes;
1100 count -= write_bytes; 1114 count -= write_bytes;
1101 pos += write_bytes; 1115 pos += write_bytes;
@@ -1111,6 +1125,118 @@ out:
1111 return num_written ? num_written : err; 1125 return num_written ? num_written : err;
1112} 1126}
1113 1127
1128static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1129 unsigned long offset, unsigned long size)
1130{
1131 char *kaddr;
1132 unsigned long left, count = desc->count;
1133
1134 if (size > count)
1135 size = count;
1136
1137 if (!PageChecked(page)) {
1138 /* FIXME, do it per block */
1139 struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb);
1140 int ret = btrfs_csum_verify_file_block(root,
1141 page->mapping->host->i_ino,
1142 page->index << PAGE_CACHE_SHIFT,
1143 kmap(page), PAGE_CACHE_SIZE);
1144 if (ret) {
1145 printk("failed to verify ino %lu page %lu\n",
1146 page->mapping->host->i_ino,
1147 page->index);
1148 memset(page_address(page), 0, PAGE_CACHE_SIZE);
1149 }
1150 SetPageChecked(page);
1151 kunmap(page);
1152 }
1153 /*
1154 * Faults on the destination of a read are common, so do it before
1155 * taking the kmap.
1156 */
1157 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1158 kaddr = kmap_atomic(page, KM_USER0);
1159 left = __copy_to_user_inatomic(desc->arg.buf,
1160 kaddr + offset, size);
1161 kunmap_atomic(kaddr, KM_USER0);
1162 if (left == 0)
1163 goto success;
1164 }
1165
1166 /* Do it the slow way */
1167 kaddr = kmap(page);
1168 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1169 kunmap(page);
1170
1171 if (left) {
1172 size -= left;
1173 desc->error = -EFAULT;
1174 }
1175success:
1176 desc->count = count - size;
1177 desc->written += size;
1178 desc->arg.buf += size;
1179 return size;
1180}
1181
1182/**
1183 * btrfs_file_aio_read - filesystem read routine
1184 * @iocb: kernel I/O control block
1185 * @iov: io vector request
1186 * @nr_segs: number of segments in the iovec
1187 * @pos: current file position
1188 */
1189static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1190 unsigned long nr_segs, loff_t pos)
1191{
1192 struct file *filp = iocb->ki_filp;
1193 ssize_t retval;
1194 unsigned long seg;
1195 size_t count;
1196 loff_t *ppos = &iocb->ki_pos;
1197
1198 count = 0;
1199 for (seg = 0; seg < nr_segs; seg++) {
1200 const struct iovec *iv = &iov[seg];
1201
1202 /*
1203 * If any segment has a negative length, or the cumulative
1204 * length ever wraps negative then return -EINVAL.
1205 */
1206 count += iv->iov_len;
1207 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
1208 return -EINVAL;
1209 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
1210 continue;
1211 if (seg == 0)
1212 return -EFAULT;
1213 nr_segs = seg;
1214 count -= iv->iov_len; /* This segment is no good */
1215 break;
1216 }
1217 retval = 0;
1218 if (count) {
1219 for (seg = 0; seg < nr_segs; seg++) {
1220 read_descriptor_t desc;
1221
1222 desc.written = 0;
1223 desc.arg.buf = iov[seg].iov_base;
1224 desc.count = iov[seg].iov_len;
1225 if (desc.count == 0)
1226 continue;
1227 desc.error = 0;
1228 do_generic_file_read(filp, ppos, &desc,
1229 btrfs_read_actor);
1230 retval += desc.written;
1231 if (desc.error) {
1232 retval = retval ?: desc.error;
1233 break;
1234 }
1235 }
1236 }
1237 return retval;
1238}
1239
1114static int btrfs_get_sb(struct file_system_type *fs_type, 1240static int btrfs_get_sb(struct file_system_type *fs_type,
1115 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1241 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1116{ 1242{
@@ -1166,7 +1292,7 @@ static struct inode_operations btrfs_file_inode_operations = {
1166static struct file_operations btrfs_file_operations = { 1292static struct file_operations btrfs_file_operations = {
1167 .llseek = generic_file_llseek, 1293 .llseek = generic_file_llseek,
1168 .read = do_sync_read, 1294 .read = do_sync_read,
1169 .aio_read = generic_file_aio_read, 1295 .aio_read = btrfs_file_aio_read,
1170 .write = btrfs_file_write, 1296 .write = btrfs_file_write,
1171 .mmap = generic_file_mmap, 1297 .mmap = generic_file_mmap,
1172 .open = generic_file_open, 1298 .open = generic_file_open,