aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-03-24 15:01:56 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:00 -0400
commit0b86a832a1f38abec695864ec2eaedc9d2383f1b (patch)
tree8ec0db0b55f77eb6a23fe7db3c1064d298db55c1
parent7f93bf8d27653726e3721c01fefc523487ecf2af (diff)
Btrfs: Add support for multiple devices per filesystem
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c38
-rw-r--r--fs/btrfs/ctree.h313
-rw-r--r--fs/btrfs/disk-io.c97
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c429
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/inode.c26
-rw-r--r--fs/btrfs/print-tree.c48
-rw-r--r--fs/btrfs/transaction.c50
-rw-r--r--fs/btrfs/volumes.c852
-rw-r--r--fs/btrfs/volumes.h78
13 files changed, 1563 insertions, 380 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9c61609f3714..5ac6f2946c21 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \ 6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o bit-radix.o inode.o file.o tree-defrag.o \ 7 transaction.o bit-radix.o inode.o file.o tree-defrag.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o 9 extent_io.o volumes.o
10 10
11ifeq ($(CONFIG_FS_POSIX_ACL),y) 11ifeq ($(CONFIG_FS_POSIX_ACL),y)
12btrfs-y += acl.o 12btrfs-y += acl.o
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ef8809c1e69c..70b6ddfe15a1 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -70,6 +70,14 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
70 memset(p, 0, sizeof(*p)); 70 memset(p, 0, sizeof(*p));
71} 71}
72 72
73static void add_root_to_dirty_list(struct btrfs_root *root)
74{
75 if (root->track_dirty && list_empty(&root->dirty_list)) {
76 list_add(&root->dirty_list,
77 &root->fs_info->dirty_cowonly_roots);
78 }
79}
80
73int btrfs_copy_root(struct btrfs_trans_handle *trans, 81int btrfs_copy_root(struct btrfs_trans_handle *trans,
74 struct btrfs_root *root, 82 struct btrfs_root *root,
75 struct extent_buffer *buf, 83 struct extent_buffer *buf,
@@ -196,6 +204,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
196 root_gen, 0, 0, 1); 204 root_gen, 0, 0, 1);
197 } 205 }
198 free_extent_buffer(buf); 206 free_extent_buffer(buf);
207 add_root_to_dirty_list(root);
199 } else { 208 } else {
200 root_gen = btrfs_header_generation(parent); 209 root_gen = btrfs_header_generation(parent);
201 btrfs_set_node_blockptr(parent, parent_slot, 210 btrfs_set_node_blockptr(parent, parent_slot,
@@ -241,7 +250,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
241 return 0; 250 return 0;
242 } 251 }
243 252
244 search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); 253 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
245 ret = __btrfs_cow_block(trans, root, buf, parent, 254 ret = __btrfs_cow_block(trans, root, buf, parent,
246 parent_slot, cow_ret, search_start, 0); 255 parent_slot, cow_ret, search_start, 0);
247 return ret; 256 return ret;
@@ -724,6 +733,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
724 BUG_ON(ret); 733 BUG_ON(ret);
725 734
726 root->node = child; 735 root->node = child;
736 add_root_to_dirty_list(root);
727 path->nodes[level] = NULL; 737 path->nodes[level] = NULL;
728 clean_tree_block(trans, root, mid); 738 clean_tree_block(trans, root, mid);
729 wait_on_tree_block_writeback(root, mid); 739 wait_on_tree_block_writeback(root, mid);
@@ -1369,6 +1379,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1369 /* the super has an extra ref to root->node */ 1379 /* the super has an extra ref to root->node */
1370 free_extent_buffer(root->node); 1380 free_extent_buffer(root->node);
1371 root->node = c; 1381 root->node = c;
1382 add_root_to_dirty_list(root);
1372 extent_buffer_get(c); 1383 extent_buffer_get(c);
1373 path->nodes[level] = c; 1384 path->nodes[level] = c;
1374 path->slots[level] = 0; 1385 path->slots[level] = 0;
@@ -2777,3 +2788,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2777 } 2788 }
2778 return 0; 2789 return 0;
2779} 2790}
2791
2792int btrfs_previous_item(struct btrfs_root *root,
2793 struct btrfs_path *path, u64 min_objectid,
2794 int type)
2795{
2796 struct btrfs_key found_key;
2797 struct extent_buffer *leaf;
2798 int ret;
2799
2800 while(1) {
2801 if (path->slots[0] == 0) {
2802 ret = btrfs_prev_leaf(root, path);
2803 if (ret != 0)
2804 return ret;
2805 } else {
2806 path->slots[0]--;
2807 }
2808 leaf = path->nodes[0];
2809 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2810 if (found_key.type == type)
2811 return 0;
2812 }
2813 return 1;
2814}
2815
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 92d892f92075..1453d995fef6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -40,12 +40,44 @@ extern struct kmem_cache *btrfs_path_cachep;
40#define BTRFS_MAGIC "_B4RfS_M" 40#define BTRFS_MAGIC "_B4RfS_M"
41 41
42#define BTRFS_MAX_LEVEL 8 42#define BTRFS_MAX_LEVEL 8
43
44/* holds pointers to all of the tree roots */
43#define BTRFS_ROOT_TREE_OBJECTID 1ULL 45#define BTRFS_ROOT_TREE_OBJECTID 1ULL
46
47/* stores information about which extents are in use, and reference counts */
44#define BTRFS_EXTENT_TREE_OBJECTID 2ULL 48#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
49
50/* one per subvolume, storing files and directories */
45#define BTRFS_FS_TREE_OBJECTID 3ULL 51#define BTRFS_FS_TREE_OBJECTID 3ULL
52
53/* directory objectid inside the root tree */
46#define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL 54#define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL
55
56
57/*
58 * chunk tree stores translations from logical -> physical block numbering
59 * the super block points to the chunk tree
60 */
61#define BTRFS_CHUNK_TREE_OBJECTID 5ULL
62
63/*
64 * stores information about which areas of a given device are in use.
65 * one per device. The tree of tree roots points to the device tree
66 */
67#define BTRFS_DEV_TREE_OBJECTID 6ULL
68
69/*
70 * All files have objectids higher than this.
71 */
47#define BTRFS_FIRST_FREE_OBJECTID 256ULL 72#define BTRFS_FIRST_FREE_OBJECTID 256ULL
48 73
74
75/*
76 * the device items go into the chunk tree. The key is in the form
77 * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
78 */
79#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
80
49/* 81/*
50 * we can actually store much bigger names, but lets not confuse the rest 82 * we can actually store much bigger names, but lets not confuse the rest
51 * of linux 83 * of linux
@@ -95,6 +127,81 @@ struct btrfs_key {
95 u64 offset; 127 u64 offset;
96} __attribute__ ((__packed__)); 128} __attribute__ ((__packed__));
97 129
130struct btrfs_mapping_tree {
131 struct extent_map_tree map_tree;
132};
133
134#define BTRFS_DEV_UUID_SIZE 16
135struct btrfs_dev_item {
136 /* the internal btrfs device id */
137 __le64 devid;
138
139 /* size of the device */
140 __le64 total_bytes;
141
142 /* bytes used */
143 __le64 bytes_used;
144
145 /* optimal io alignment for this device */
146 __le32 io_align;
147
148 /* optimal io width for this device */
149 __le32 io_width;
150
151 /* minimal io size for this device */
152 __le32 sector_size;
153
154 /* the kernel device number */
155 __le64 rdev;
156
157 /* type and info about this device */
158 __le64 type;
159
160 /* partition number, 0 for whole dev */
161 __le32 partition;
162
163 /* length of the name data at the end of the item */
164 __le16 name_len;
165
166 /* physical drive uuid (or lvm uuid) */
167 u8 uuid[BTRFS_DEV_UUID_SIZE];
168 /* name goes here */
169} __attribute__ ((__packed__));
170
171struct btrfs_stripe {
172 __le64 devid;
173 __le64 offset;
174} __attribute__ ((__packed__));
175
176struct btrfs_chunk {
177 __le64 owner;
178 __le64 stripe_len;
179 __le64 type;
180
181 /* optimal io alignment for this chunk */
182 __le32 io_align;
183
184 /* optimal io width for this chunk */
185 __le32 io_width;
186
187 /* minimal io size for this chunk */
188 __le32 sector_size;
189
190 /* 2^16 stripes is quite a lot, a second limit is the size of a single
191 * item in the btree
192 */
193 __le16 num_stripes;
194 struct btrfs_stripe stripe;
195 /* additional stripes go here */
196} __attribute__ ((__packed__));
197
198static inline unsigned long btrfs_chunk_item_size(int num_stripes)
199{
200 BUG_ON(num_stripes == 0);
201 return sizeof(struct btrfs_chunk) +
202 sizeof(struct btrfs_stripe) * (num_stripes - 1);
203}
204
98#define BTRFS_FSID_SIZE 16 205#define BTRFS_FSID_SIZE 16
99/* 206/*
100 * every tree block (leaf or node) starts with this header. 207 * every tree block (leaf or node) starts with this header.
@@ -119,6 +226,13 @@ struct btrfs_header {
119 sizeof(struct btrfs_item) - \ 226 sizeof(struct btrfs_item) - \
120 sizeof(struct btrfs_file_extent_item)) 227 sizeof(struct btrfs_file_extent_item))
121 228
229
230/*
231 * this is a very generous portion of the super block, giving us
232 * room to translate 14 chunks with 3 stripes each.
233 */
234#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
235
122/* 236/*
123 * the super block basically lists the main trees of the FS 237 * the super block basically lists the main trees of the FS
124 * it currently lacks any block count etc etc 238 * it currently lacks any block count etc etc
@@ -131,6 +245,7 @@ struct btrfs_super_block {
131 __le64 magic; 245 __le64 magic;
132 __le64 generation; 246 __le64 generation;
133 __le64 root; 247 __le64 root;
248 __le64 chunk_root;
134 __le64 total_bytes; 249 __le64 total_bytes;
135 __le64 bytes_used; 250 __le64 bytes_used;
136 __le64 root_dir_objectid; 251 __le64 root_dir_objectid;
@@ -138,7 +253,10 @@ struct btrfs_super_block {
138 __le32 nodesize; 253 __le32 nodesize;
139 __le32 leafsize; 254 __le32 leafsize;
140 __le32 stripesize; 255 __le32 stripesize;
256 __le32 sys_chunk_array_size;
141 u8 root_level; 257 u8 root_level;
258 u8 chunk_root_level;
259 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
142} __attribute__ ((__packed__)); 260} __attribute__ ((__packed__));
143 261
144/* 262/*
@@ -208,12 +326,22 @@ struct btrfs_extent_ref {
208 __le64 offset; 326 __le64 offset;
209} __attribute__ ((__packed__)); 327} __attribute__ ((__packed__));
210 328
329/* dev extents record free space on individual devices. The owner
330 * field points back to the chunk allocation mapping tree that allocated
331 * the extent
332 */
333struct btrfs_dev_extent {
334 __le64 owner;
335 __le64 length;
336} __attribute__ ((__packed__));
337
338
211struct btrfs_inode_ref { 339struct btrfs_inode_ref {
212 __le16 name_len; 340 __le16 name_len;
213 /* name goes here */ 341 /* name goes here */
214} __attribute__ ((__packed__)); 342} __attribute__ ((__packed__));
215 343
216struct btrfs_inode_timespec { 344struct btrfs_timespec {
217 __le64 sec; 345 __le64 sec;
218 __le32 nsec; 346 __le32 nsec;
219} __attribute__ ((__packed__)); 347} __attribute__ ((__packed__));
@@ -231,13 +359,13 @@ struct btrfs_inode_item {
231 __le32 uid; 359 __le32 uid;
232 __le32 gid; 360 __le32 gid;
233 __le32 mode; 361 __le32 mode;
234 __le32 rdev; 362 __le64 rdev;
235 __le16 flags; 363 __le16 flags;
236 __le16 compat_flags; 364 __le16 compat_flags;
237 struct btrfs_inode_timespec atime; 365 struct btrfs_timespec atime;
238 struct btrfs_inode_timespec ctime; 366 struct btrfs_timespec ctime;
239 struct btrfs_inode_timespec mtime; 367 struct btrfs_timespec mtime;
240 struct btrfs_inode_timespec otime; 368 struct btrfs_timespec otime;
241} __attribute__ ((__packed__)); 369} __attribute__ ((__packed__));
242 370
243struct btrfs_dir_item { 371struct btrfs_dir_item {
@@ -290,29 +418,34 @@ struct btrfs_csum_item {
290 u8 csum; 418 u8 csum;
291} __attribute__ ((__packed__)); 419} __attribute__ ((__packed__));
292 420
293/* tag for the radix tree of block groups in ram */ 421/* different types of block groups (and chunks) */
294#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) 422#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
295 423#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
424#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
296 425
297#define BTRFS_BLOCK_GROUP_DATA 1
298#define BTRFS_BLOCK_GROUP_MIXED 2
299 426
300struct btrfs_block_group_item { 427struct btrfs_block_group_item {
301 __le64 used; 428 __le64 used;
302 u8 flags; 429 __le64 chunk_tree;
430 __le64 chunk_objectid;
431 __le64 flags;
303} __attribute__ ((__packed__)); 432} __attribute__ ((__packed__));
304 433
305struct btrfs_block_group_cache { 434struct btrfs_block_group_cache {
306 struct btrfs_key key; 435 struct btrfs_key key;
307 struct btrfs_block_group_item item; 436 struct btrfs_block_group_item item;
308 int data;
309 int cached;
310 u64 pinned; 437 u64 pinned;
438 u64 flags;
439 int cached;
311}; 440};
441
442struct btrfs_device;
312struct btrfs_fs_info { 443struct btrfs_fs_info {
313 u8 fsid[BTRFS_FSID_SIZE]; 444 u8 fsid[BTRFS_FSID_SIZE];
314 struct btrfs_root *extent_root; 445 struct btrfs_root *extent_root;
315 struct btrfs_root *tree_root; 446 struct btrfs_root *tree_root;
447 struct btrfs_root *chunk_root;
448 struct btrfs_root *dev_root;
316 struct radix_tree_root fs_roots_radix; 449 struct radix_tree_root fs_roots_radix;
317 450
318 struct extent_io_tree free_space_cache; 451 struct extent_io_tree free_space_cache;
@@ -321,6 +454,9 @@ struct btrfs_fs_info {
321 struct extent_io_tree pending_del; 454 struct extent_io_tree pending_del;
322 struct extent_io_tree extent_ins; 455 struct extent_io_tree extent_ins;
323 456
457 /* logical->physical extent mapping */
458 struct btrfs_mapping_tree mapping_tree;
459
324 u64 generation; 460 u64 generation;
325 u64 last_trans_committed; 461 u64 last_trans_committed;
326 unsigned long mount_opt; 462 unsigned long mount_opt;
@@ -330,6 +466,7 @@ struct btrfs_fs_info {
330 struct btrfs_transaction *running_transaction; 466 struct btrfs_transaction *running_transaction;
331 struct btrfs_super_block super_copy; 467 struct btrfs_super_block super_copy;
332 struct extent_buffer *sb_buffer; 468 struct extent_buffer *sb_buffer;
469 struct block_device *__bdev;
333 struct super_block *sb; 470 struct super_block *sb;
334 struct inode *btree_inode; 471 struct inode *btree_inode;
335 spinlock_t hash_lock; 472 spinlock_t hash_lock;
@@ -350,12 +487,17 @@ struct btrfs_fs_info {
350 unsigned long throttles; 487 unsigned long throttles;
351 488
352 u64 total_pinned; 489 u64 total_pinned;
490 struct list_head dirty_cowonly_roots;
491
492 struct list_head devices;
493 struct list_head *last_device;
353 spinlock_t delalloc_lock; 494 spinlock_t delalloc_lock;
354 spinlock_t new_trans_lock; 495 spinlock_t new_trans_lock;
355 u64 delalloc_bytes; 496 u64 delalloc_bytes;
356 u64 last_alloc; 497 u64 last_alloc;
357 u64 last_data_alloc; 498 u64 last_data_alloc;
358}; 499};
500
359/* 501/*
360 * in ram representation of the tree. extent_root is used for all allocations 502 * in ram representation of the tree. extent_root is used for all allocations
361 * and for the extent tree extent_root root. 503 * and for the extent tree extent_root root.
@@ -387,14 +529,19 @@ struct btrfs_root {
387 u64 highest_inode; 529 u64 highest_inode;
388 u64 last_inode_alloc; 530 u64 last_inode_alloc;
389 int ref_cows; 531 int ref_cows;
532 int track_dirty;
390 struct btrfs_key defrag_progress; 533 struct btrfs_key defrag_progress;
391 int defrag_running; 534 int defrag_running;
392 int defrag_level; 535 int defrag_level;
393 char *name; 536 char *name;
394 int in_sysfs; 537 int in_sysfs;
538
539 /* the dirty list is only used by non-reference counted roots */
540 struct list_head dirty_list;
395}; 541};
396 542
397/* 543/*
544
398 * inode items have the data typically returned from stat and store other 545 * inode items have the data typically returned from stat and store other
399 * info about object characteristics. There is one for every file and dir in 546 * info about object characteristics. There is one for every file and dir in
400 * the FS 547 * the FS
@@ -439,6 +586,10 @@ struct btrfs_root {
439 */ 586 */
440#define BTRFS_BLOCK_GROUP_ITEM_KEY 50 587#define BTRFS_BLOCK_GROUP_ITEM_KEY 50
441 588
589#define BTRFS_DEV_EXTENT_KEY 75
590#define BTRFS_DEV_ITEM_KEY 76
591#define BTRFS_CHUNK_ITEM_KEY 77
592
442/* 593/*
443 * string items are for debugging. They just store a short string of 594 * string items are for debugging. They just store a short string of
444 * data in the FS 595 * data in the FS
@@ -518,13 +669,104 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
518 s->member = cpu_to_le##bits(val); \ 669 s->member = cpu_to_le##bits(val); \
519} 670}
520 671
672BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
673BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
674BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
675BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
676BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
677BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
678BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
679BTRFS_SETGET_FUNCS(device_rdev, struct btrfs_dev_item, rdev, 64);
680BTRFS_SETGET_FUNCS(device_partition, struct btrfs_dev_item, partition, 32);
681BTRFS_SETGET_FUNCS(device_name_len, struct btrfs_dev_item, name_len, 16);
682
683static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
684{
685 return (char *)d + offsetof(struct btrfs_dev_item, uuid);
686}
687
688static inline char *btrfs_device_name(struct btrfs_dev_item *d)
689{
690 return (char *)(d + 1);
691}
692
693BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
694BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
695BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
696BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
697BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
698BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
699BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
700BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
701BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
702
703BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
704BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
705 stripe_len, 64);
706BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
707 io_align, 32);
708BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
709 io_width, 32);
710BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
711 sector_size, 32);
712BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
713BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
714 num_stripes, 16);
715BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
716BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
717
718static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
719 int nr)
720{
721 unsigned long offset = (unsigned long)c;
722 offset += offsetof(struct btrfs_chunk, stripe);
723 offset += nr * sizeof(struct btrfs_stripe);
724 return (struct btrfs_stripe *)offset;
725}
726
727static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
728 struct btrfs_chunk *c, int nr)
729{
730 return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
731}
732
733static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
734 struct btrfs_chunk *c, int nr,
735 u64 val)
736{
737 btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
738}
739
740static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
741 struct btrfs_chunk *c, int nr)
742{
743 return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
744}
745
746static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
747 struct btrfs_chunk *c, int nr,
748 u64 val)
749{
750 btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
751}
752
521/* struct btrfs_block_group_item */ 753/* struct btrfs_block_group_item */
522BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, 754BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
523 used, 64); 755 used, 64);
524BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, 756BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
525 used, 64); 757 used, 64);
526BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, 758BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item,
527 flags, 8); 759 chunk_tree, 64);
760BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item,
761 chunk_tree, 64);
762BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
763 struct btrfs_block_group_item, chunk_objectid, 64);
764BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd,
765 struct btrfs_block_group_item, chunk_objectid, 64);
766BTRFS_SETGET_FUNCS(disk_block_group_flags,
767 struct btrfs_block_group_item, flags, 64);
768BTRFS_SETGET_STACK_FUNCS(block_group_flags,
769 struct btrfs_block_group_item, flags, 64);
528 770
529/* struct btrfs_inode_ref */ 771/* struct btrfs_inode_ref */
530BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); 772BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
@@ -538,49 +780,53 @@ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
538BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); 780BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
539BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); 781BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
540BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); 782BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
541BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); 783BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
542BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); 784BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16);
543BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, 785BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item,
544 compat_flags, 16); 786 compat_flags, 16);
545 787
546static inline struct btrfs_inode_timespec * 788static inline struct btrfs_timespec *
547btrfs_inode_atime(struct btrfs_inode_item *inode_item) 789btrfs_inode_atime(struct btrfs_inode_item *inode_item)
548{ 790{
549 unsigned long ptr = (unsigned long)inode_item; 791 unsigned long ptr = (unsigned long)inode_item;
550 ptr += offsetof(struct btrfs_inode_item, atime); 792 ptr += offsetof(struct btrfs_inode_item, atime);
551 return (struct btrfs_inode_timespec *)ptr; 793 return (struct btrfs_timespec *)ptr;
552} 794}
553 795
554static inline struct btrfs_inode_timespec * 796static inline struct btrfs_timespec *
555btrfs_inode_mtime(struct btrfs_inode_item *inode_item) 797btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
556{ 798{
557 unsigned long ptr = (unsigned long)inode_item; 799 unsigned long ptr = (unsigned long)inode_item;
558 ptr += offsetof(struct btrfs_inode_item, mtime); 800 ptr += offsetof(struct btrfs_inode_item, mtime);
559 return (struct btrfs_inode_timespec *)ptr; 801 return (struct btrfs_timespec *)ptr;
560} 802}
561 803
562static inline struct btrfs_inode_timespec * 804static inline struct btrfs_timespec *
563btrfs_inode_ctime(struct btrfs_inode_item *inode_item) 805btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
564{ 806{
565 unsigned long ptr = (unsigned long)inode_item; 807 unsigned long ptr = (unsigned long)inode_item;
566 ptr += offsetof(struct btrfs_inode_item, ctime); 808 ptr += offsetof(struct btrfs_inode_item, ctime);
567 return (struct btrfs_inode_timespec *)ptr; 809 return (struct btrfs_timespec *)ptr;
568} 810}
569 811
570static inline struct btrfs_inode_timespec * 812static inline struct btrfs_timespec *
571btrfs_inode_otime(struct btrfs_inode_item *inode_item) 813btrfs_inode_otime(struct btrfs_inode_item *inode_item)
572{ 814{
573 unsigned long ptr = (unsigned long)inode_item; 815 unsigned long ptr = (unsigned long)inode_item;
574 ptr += offsetof(struct btrfs_inode_item, otime); 816 ptr += offsetof(struct btrfs_inode_item, otime);
575 return (struct btrfs_inode_timespec *)ptr; 817 return (struct btrfs_timespec *)ptr;
576} 818}
577 819
578BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); 820BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
579BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); 821BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
580 822
581/* struct btrfs_extent_item */ 823/* struct btrfs_extent_item */
582BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); 824BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
583 825
826/* struct btrfs_dev_extent */
827BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64);
828BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
829
584/* struct btrfs_extent_ref */ 830/* struct btrfs_extent_ref */
585BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); 831BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
586BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); 832BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
@@ -846,8 +1092,14 @@ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
846BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, 1092BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
847 generation, 64); 1093 generation, 64);
848BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); 1094BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
1095BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
1096 struct btrfs_super_block, sys_chunk_array_size, 32);
849BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, 1097BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
850 root_level, 8); 1098 root_level, 8);
1099BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
1100 chunk_root, 64);
1101BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
1102 chunk_root_level, 64);
851BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, 1103BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
852 total_bytes, 64); 1104 total_bytes, 64);
853BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, 1105BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
@@ -1009,7 +1261,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1009 struct btrfs_root *root); 1261 struct btrfs_root *root);
1010int btrfs_free_block_groups(struct btrfs_fs_info *info); 1262int btrfs_free_block_groups(struct btrfs_fs_info *info);
1011int btrfs_read_block_groups(struct btrfs_root *root); 1263int btrfs_read_block_groups(struct btrfs_root *root);
1264int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1265 struct btrfs_root *root, u64 bytes_used,
1266 u64 type, u64 chunk_tree, u64 chunk_objectid,
1267 u64 size);
1012/* ctree.c */ 1268/* ctree.c */
1269int btrfs_previous_item(struct btrfs_root *root,
1270 struct btrfs_path *path, u64 min_objectid,
1271 int type);
1013int btrfs_cow_block(struct btrfs_trans_handle *trans, 1272int btrfs_cow_block(struct btrfs_trans_handle *trans,
1014 struct btrfs_root *root, struct extent_buffer *buf, 1273 struct btrfs_root *root, struct extent_buffer *buf,
1015 struct extent_buffer *parent, int parent_slot, 1274 struct extent_buffer *parent, int parent_slot,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 88e21bdbc478..8e37fa120cc8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -28,6 +28,7 @@
28#include "disk-io.h" 28#include "disk-io.h"
29#include "transaction.h" 29#include "transaction.h"
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "volumes.h"
31#include "print-tree.h" 32#include "print-tree.h"
32 33
33#if 0 34#if 0
@@ -234,6 +235,19 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
234 return 0; 235 return 0;
235} 236}
236 237
238static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio)
239{
240 struct btrfs_root *root = BTRFS_I(inode)->root;
241 u64 offset;
242 offset = bio->bi_sector << 9;
243 if (offset == BTRFS_SUPER_INFO_OFFSET) {
244 bio->bi_bdev = root->fs_info->sb->s_bdev;
245 submit_bio(rw, bio);
246 return 0;
247 }
248 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio);
249}
250
237static int btree_writepage(struct page *page, struct writeback_control *wbc) 251static int btree_writepage(struct page *page, struct writeback_control *wbc)
238{ 252{
239 struct extent_io_tree *tree; 253 struct extent_io_tree *tree;
@@ -345,6 +359,23 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
345 return ret; 359 return ret;
346} 360}
347 361
362static int close_all_devices(struct btrfs_fs_info *fs_info)
363{
364 struct list_head *list;
365 struct list_head *next;
366 struct btrfs_device *device;
367
368 list = &fs_info->devices;
369 while(!list_empty(list)) {
370 next = list->next;
371 list_del(next);
372 device = list_entry(next, struct btrfs_device, dev_list);
373 kfree(device->name);
374 kfree(device);
375 }
376 return 0;
377}
378
348struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, 379struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
349 u32 blocksize) 380 u32 blocksize)
350{ 381{
@@ -420,6 +451,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
420 root->leafsize = leafsize; 451 root->leafsize = leafsize;
421 root->stripesize = stripesize; 452 root->stripesize = stripesize;
422 root->ref_cows = 0; 453 root->ref_cows = 0;
454 root->track_dirty = 0;
455
423 root->fs_info = fs_info; 456 root->fs_info = fs_info;
424 root->objectid = objectid; 457 root->objectid = objectid;
425 root->last_trans = 0; 458 root->last_trans = 0;
@@ -427,6 +460,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
427 root->last_inode_alloc = 0; 460 root->last_inode_alloc = 0;
428 root->name = NULL; 461 root->name = NULL;
429 root->in_sysfs = 0; 462 root->in_sysfs = 0;
463
464 INIT_LIST_HEAD(&root->dirty_list);
430 memset(&root->root_key, 0, sizeof(root->root_key)); 465 memset(&root->root_key, 0, sizeof(root->root_key));
431 memset(&root->root_item, 0, sizeof(root->root_item)); 466 memset(&root->root_item, 0, sizeof(root->root_item));
432 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 467 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -634,6 +669,10 @@ struct btrfs_root *open_ctree(struct super_block *sb)
634 GFP_NOFS); 669 GFP_NOFS);
635 struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), 670 struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
636 GFP_NOFS); 671 GFP_NOFS);
672 struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root),
673 GFP_NOFS);
674 struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root),
675 GFP_NOFS);
637 int ret; 676 int ret;
638 int err = -EIO; 677 int err = -EIO;
639 struct btrfs_super_block *disk_super; 678 struct btrfs_super_block *disk_super;
@@ -657,6 +696,12 @@ struct btrfs_root *open_ctree(struct super_block *sb)
657 fs_info->last_trans_committed = 0; 696 fs_info->last_trans_committed = 0;
658 fs_info->tree_root = tree_root; 697 fs_info->tree_root = tree_root;
659 fs_info->extent_root = extent_root; 698 fs_info->extent_root = extent_root;
699 fs_info->chunk_root = chunk_root;
700 fs_info->dev_root = dev_root;
701 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
702 INIT_LIST_HEAD(&fs_info->devices);
703 btrfs_mapping_init(&fs_info->mapping_tree);
704 fs_info->last_device = &fs_info->devices;
660 fs_info->sb = sb; 705 fs_info->sb = sb;
661 fs_info->throttles = 0; 706 fs_info->throttles = 0;
662 fs_info->mount_opt = 0; 707 fs_info->mount_opt = 0;
@@ -714,12 +759,12 @@ struct btrfs_root *open_ctree(struct super_block *sb)
714 goto fail_iput; 759 goto fail_iput;
715 } 760 }
716#endif 761#endif
717 __setup_root(512, 512, 512, 512, tree_root, 762 __setup_root(4096, 4096, 4096, 4096, tree_root,
718 fs_info, BTRFS_ROOT_TREE_OBJECTID); 763 fs_info, BTRFS_ROOT_TREE_OBJECTID);
719 764
720 fs_info->sb_buffer = read_tree_block(tree_root, 765 fs_info->sb_buffer = read_tree_block(tree_root,
721 BTRFS_SUPER_INFO_OFFSET, 766 BTRFS_SUPER_INFO_OFFSET,
722 512); 767 4096);
723 768
724 if (!fs_info->sb_buffer) 769 if (!fs_info->sb_buffer)
725 goto fail_iput; 770 goto fail_iput;
@@ -730,6 +775,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
730 read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, 775 read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
731 (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), 776 (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
732 BTRFS_FSID_SIZE); 777 BTRFS_FSID_SIZE);
778
733 disk_super = &fs_info->super_copy; 779 disk_super = &fs_info->super_copy;
734 if (!btrfs_super_root(disk_super)) 780 if (!btrfs_super_root(disk_super))
735 goto fail_sb_buffer; 781 goto fail_sb_buffer;
@@ -753,23 +799,47 @@ struct btrfs_root *open_ctree(struct super_block *sb)
753 goto fail_sb_buffer; 799 goto fail_sb_buffer;
754 } 800 }
755 801
802 mutex_lock(&fs_info->fs_mutex);
803 ret = btrfs_read_sys_array(tree_root);
804 BUG_ON(ret);
805
806 blocksize = btrfs_level_size(tree_root,
807 btrfs_super_chunk_root_level(disk_super));
808
809 __setup_root(nodesize, leafsize, sectorsize, stripesize,
810 chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
811
812 chunk_root->node = read_tree_block(chunk_root,
813 btrfs_super_chunk_root(disk_super),
814 blocksize);
815 BUG_ON(!chunk_root->node);
816
817 ret = btrfs_read_chunk_tree(chunk_root);
818 BUG_ON(ret);
819
756 blocksize = btrfs_level_size(tree_root, 820 blocksize = btrfs_level_size(tree_root,
757 btrfs_super_root_level(disk_super)); 821 btrfs_super_root_level(disk_super));
758 822
823
759 tree_root->node = read_tree_block(tree_root, 824 tree_root->node = read_tree_block(tree_root,
760 btrfs_super_root(disk_super), 825 btrfs_super_root(disk_super),
761 blocksize); 826 blocksize);
762 if (!tree_root->node) 827 if (!tree_root->node)
763 goto fail_sb_buffer; 828 goto fail_sb_buffer;
764 829
765 mutex_lock(&fs_info->fs_mutex);
766 830
767 ret = find_and_setup_root(tree_root, fs_info, 831 ret = find_and_setup_root(tree_root, fs_info,
768 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 832 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
769 if (ret) { 833 if (ret)
770 mutex_unlock(&fs_info->fs_mutex);
771 goto fail_tree_root; 834 goto fail_tree_root;
772 } 835 extent_root->track_dirty = 1;
836
837 ret = find_and_setup_root(tree_root, fs_info,
838 BTRFS_DEV_TREE_OBJECTID, dev_root);
839 dev_root->track_dirty = 1;
840
841 if (ret)
842 goto fail_extent_root;
773 843
774 btrfs_read_block_groups(extent_root); 844 btrfs_read_block_groups(extent_root);
775 845
@@ -777,7 +847,10 @@ struct btrfs_root *open_ctree(struct super_block *sb)
777 mutex_unlock(&fs_info->fs_mutex); 847 mutex_unlock(&fs_info->fs_mutex);
778 return tree_root; 848 return tree_root;
779 849
850fail_extent_root:
851 free_extent_buffer(extent_root->node);
780fail_tree_root: 852fail_tree_root:
853 mutex_unlock(&fs_info->fs_mutex);
781 free_extent_buffer(tree_root->node); 854 free_extent_buffer(tree_root->node);
782fail_sb_buffer: 855fail_sb_buffer:
783 free_extent_buffer(fs_info->sb_buffer); 856 free_extent_buffer(fs_info->sb_buffer);
@@ -874,6 +947,12 @@ int close_ctree(struct btrfs_root *root)
874 if (fs_info->tree_root->node) 947 if (fs_info->tree_root->node)
875 free_extent_buffer(fs_info->tree_root->node); 948 free_extent_buffer(fs_info->tree_root->node);
876 949
950 if (root->fs_info->chunk_root->node);
951 free_extent_buffer(root->fs_info->chunk_root->node);
952
953 if (root->fs_info->dev_root->node);
954 free_extent_buffer(root->fs_info->dev_root->node);
955
877 free_extent_buffer(fs_info->sb_buffer); 956 free_extent_buffer(fs_info->sb_buffer);
878 957
879 btrfs_free_block_groups(root->fs_info); 958 btrfs_free_block_groups(root->fs_info);
@@ -901,8 +980,13 @@ int close_ctree(struct btrfs_root *root)
901 kfree(hasher); 980 kfree(hasher);
902 } 981 }
903#endif 982#endif
983 close_all_devices(fs_info);
984 btrfs_mapping_tree_free(&fs_info->mapping_tree);
985
904 kfree(fs_info->extent_root); 986 kfree(fs_info->extent_root);
905 kfree(fs_info->tree_root); 987 kfree(fs_info->tree_root);
988 kfree(fs_info->chunk_root);
989 kfree(fs_info->dev_root);
906 return 0; 990 return 0;
907} 991}
908 992
@@ -1016,4 +1100,5 @@ int btrfs_read_buffer(struct extent_buffer *buf)
1016 1100
1017static struct extent_io_ops btree_extent_io_ops = { 1101static struct extent_io_ops btree_extent_io_ops = {
1018 .writepage_io_hook = btree_writepage_io_hook, 1102 .writepage_io_hook = btree_writepage_io_hook,
1103 .submit_bio_hook = btree_submit_bio_hook,
1019}; 1104};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 828f3a2081b6..206cb48638f7 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -20,6 +20,7 @@
20#define __DISKIO__ 20#define __DISKIO__
21 21
22#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) 22#define BTRFS_SUPER_INFO_OFFSET (16 * 1024)
23struct btrfs_device;
23 24
24struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, 25struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
25 u32 blocksize); 26 u32 blocksize);
@@ -65,4 +66,5 @@ int btrfs_read_buffer(struct extent_buffer *buf);
65u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); 66u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
66void btrfs_csum_final(u32 crc, char *result); 67void btrfs_csum_final(u32 crc, char *result);
67void btrfs_throttle(struct btrfs_root *root); 68void btrfs_throttle(struct btrfs_root *root);
69int btrfs_open_device(struct btrfs_device *dev);
68#endif 70#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ebfd304138ca..2cd957d6e8d8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -24,37 +24,19 @@
24#include "disk-io.h" 24#include "disk-io.h"
25#include "print-tree.h" 25#include "print-tree.h"
26#include "transaction.h" 26#include "transaction.h"
27#include "volumes.h"
27 28
28#define BLOCK_GROUP_DATA EXTENT_WRITEBACK 29#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
29#define BLOCK_GROUP_METADATA EXTENT_UPTODATE 30#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
31#define BLOCK_GROUP_SYSTEM EXTENT_NEW
32
30#define BLOCK_GROUP_DIRTY EXTENT_DIRTY 33#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
31 34
32static int finish_current_insert(struct btrfs_trans_handle *trans, struct 35static int finish_current_insert(struct btrfs_trans_handle *trans, struct
33 btrfs_root *extent_root); 36 btrfs_root *extent_root);
34static int del_pending_extents(struct btrfs_trans_handle *trans, struct 37static int del_pending_extents(struct btrfs_trans_handle *trans, struct
35 btrfs_root *extent_root); 38 btrfs_root *extent_root);
36static int find_previous_extent(struct btrfs_root *root,
37 struct btrfs_path *path)
38{
39 struct btrfs_key found_key;
40 struct extent_buffer *leaf;
41 int ret;
42 39
43 while(1) {
44 if (path->slots[0] == 0) {
45 ret = btrfs_prev_leaf(root, path);
46 if (ret != 0)
47 return ret;
48 } else {
49 path->slots[0]--;
50 }
51 leaf = path->nodes[0];
52 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
53 if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
54 return 0;
55 }
56 return 1;
57}
58 40
59static int cache_block_group(struct btrfs_root *root, 41static int cache_block_group(struct btrfs_root *root,
60 struct btrfs_block_group_cache *block_group) 42 struct btrfs_block_group_cache *block_group)
@@ -91,7 +73,7 @@ static int cache_block_group(struct btrfs_root *root,
91 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 73 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
92 if (ret < 0) 74 if (ret < 0)
93 return ret; 75 return ret;
94 ret = find_previous_extent(root, path); 76 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
95 if (ret < 0) 77 if (ret < 0)
96 return ret; 78 return ret;
97 if (ret == 0) { 79 if (ret == 0) {
@@ -168,7 +150,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
168 block_group_cache = &info->block_group_cache; 150 block_group_cache = &info->block_group_cache;
169 ret = find_first_extent_bit(block_group_cache, 151 ret = find_first_extent_bit(block_group_cache,
170 bytenr, &start, &end, 152 bytenr, &start, &end,
171 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); 153 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
154 BLOCK_GROUP_SYSTEM);
172 if (ret) { 155 if (ret) {
173 return NULL; 156 return NULL;
174 } 157 }
@@ -182,23 +165,38 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
182 return block_group; 165 return block_group;
183 return NULL; 166 return NULL;
184} 167}
185static u64 noinline find_search_start(struct btrfs_root *root, 168
169static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
170{
171 if ((bits & BLOCK_GROUP_DATA) &&
172 (cache->flags & BTRFS_BLOCK_GROUP_DATA))
173 return 1;
174 if ((bits & BLOCK_GROUP_METADATA) &&
175 (cache->flags & BTRFS_BLOCK_GROUP_METADATA))
176 return 1;
177 if ((bits & BLOCK_GROUP_SYSTEM) &&
178 (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
179 return 1;
180 return 0;
181}
182
183static int noinline find_search_start(struct btrfs_root *root,
186 struct btrfs_block_group_cache **cache_ret, 184 struct btrfs_block_group_cache **cache_ret,
187 u64 search_start, int num, int data) 185 u64 *start_ret, int num, int data)
188{ 186{
189 int ret; 187 int ret;
190 struct btrfs_block_group_cache *cache = *cache_ret; 188 struct btrfs_block_group_cache *cache = *cache_ret;
191 struct extent_io_tree *free_space_cache; 189 struct extent_io_tree *free_space_cache;
192 struct extent_state *state;
193 u64 last; 190 u64 last;
194 u64 start = 0; 191 u64 start = 0;
192 u64 end = 0;
195 u64 cache_miss = 0; 193 u64 cache_miss = 0;
196 u64 total_fs_bytes; 194 u64 total_fs_bytes;
195 u64 search_start = *start_ret;
197 int wrapped = 0; 196 int wrapped = 0;
198 197
199 if (!cache) { 198 if (!cache)
200 goto out; 199 goto out;
201 }
202 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 200 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
203 free_space_cache = &root->fs_info->free_space_cache; 201 free_space_cache = &root->fs_info->free_space_cache;
204 202
@@ -208,6 +206,9 @@ again:
208 goto out; 206 goto out;
209 207
210 last = max(search_start, cache->key.objectid); 208 last = max(search_start, cache->key.objectid);
209 if (!block_group_bits(cache, data)) {
210 goto new_group;
211 }
211 212
212 while(1) { 213 while(1) {
213 ret = find_first_extent_bit(&root->fs_info->free_space_cache, 214 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
@@ -225,22 +226,20 @@ again:
225 cache_miss = start; 226 cache_miss = start;
226 continue; 227 continue;
227 } 228 }
228 if (data != BTRFS_BLOCK_GROUP_MIXED && 229 if (start + num > cache->key.objectid + cache->key.offset)
229 start + num > cache->key.objectid + cache->key.offset)
230 goto new_group; 230 goto new_group;
231 if (start + num > total_fs_bytes) 231 if (start + num > total_fs_bytes)
232 goto new_group; 232 goto new_group;
233 return start; 233 *start_ret = start;
234 return 0;
234 } 235 }
235out: 236out:
236 cache = btrfs_lookup_block_group(root->fs_info, search_start); 237 cache = btrfs_lookup_block_group(root->fs_info, search_start);
237 if (!cache) { 238 if (!cache) {
238 printk("Unable to find block group for %Lu\n", 239 printk("Unable to find block group for %Lu\n", search_start);
239 search_start);
240 WARN_ON(1); 240 WARN_ON(1);
241 return search_start;
242 } 241 }
243 return search_start; 242 return -ENOSPC;
244 243
245new_group: 244new_group:
246 last = cache->key.objectid + cache->key.offset; 245 last = cache->key.objectid + cache->key.offset;
@@ -251,7 +250,6 @@ no_cache:
251 if (!wrapped) { 250 if (!wrapped) {
252 wrapped = 1; 251 wrapped = 1;
253 last = search_start; 252 last = search_start;
254 data = BTRFS_BLOCK_GROUP_MIXED;
255 goto wrapped; 253 goto wrapped;
256 } 254 }
257 goto out; 255 goto out;
@@ -299,7 +297,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
299 int ret; 297 int ret;
300 int full_search = 0; 298 int full_search = 0;
301 int factor = 8; 299 int factor = 8;
302 int data_swap = 0;
303 300
304 block_group_cache = &info->block_group_cache; 301 block_group_cache = &info->block_group_cache;
305 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 302 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
@@ -307,19 +304,12 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
307 if (!owner) 304 if (!owner)
308 factor = 8; 305 factor = 8;
309 306
310 if (data == BTRFS_BLOCK_GROUP_MIXED) { 307 bit = data;
311 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
312 factor = 10;
313 } else if (data)
314 bit = BLOCK_GROUP_DATA;
315 else
316 bit = BLOCK_GROUP_METADATA;
317 308
318 if (search_start && search_start < total_fs_bytes) { 309 if (search_start && search_start < total_fs_bytes) {
319 struct btrfs_block_group_cache *shint; 310 struct btrfs_block_group_cache *shint;
320 shint = btrfs_lookup_block_group(info, search_start); 311 shint = btrfs_lookup_block_group(info, search_start);
321 if (shint && (shint->data == data || 312 if (shint && block_group_bits(shint, data)) {
322 shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
323 used = btrfs_block_group_used(&shint->item); 313 used = btrfs_block_group_used(&shint->item);
324 if (used + shint->pinned < 314 if (used + shint->pinned <
325 div_factor(shint->key.offset, factor)) { 315 div_factor(shint->key.offset, factor)) {
@@ -327,8 +317,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
327 } 317 }
328 } 318 }
329 } 319 }
330 if (hint && hint->key.objectid < total_fs_bytes && 320 if (hint && block_group_bits(hint, data) &&
331 (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { 321 hint->key.objectid < total_fs_bytes) {
332 used = btrfs_block_group_used(&hint->item); 322 used = btrfs_block_group_used(&hint->item);
333 if (used + hint->pinned < 323 if (used + hint->pinned <
334 div_factor(hint->key.offset, factor)) { 324 div_factor(hint->key.offset, factor)) {
@@ -379,12 +369,6 @@ again:
379 full_search = 1; 369 full_search = 1;
380 goto again; 370 goto again;
381 } 371 }
382 if (!data_swap) {
383 data_swap = 1;
384 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
385 last = search_start;
386 goto again;
387 }
388found: 372found:
389 return found_group; 373 return found_group;
390} 374}
@@ -1002,7 +986,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1002static int update_block_group(struct btrfs_trans_handle *trans, 986static int update_block_group(struct btrfs_trans_handle *trans,
1003 struct btrfs_root *root, 987 struct btrfs_root *root,
1004 u64 bytenr, u64 num_bytes, int alloc, 988 u64 bytenr, u64 num_bytes, int alloc,
1005 int mark_free, int data) 989 int mark_free)
1006{ 990{
1007 struct btrfs_block_group_cache *cache; 991 struct btrfs_block_group_cache *cache;
1008 struct btrfs_fs_info *info = root->fs_info; 992 struct btrfs_fs_info *info = root->fs_info;
@@ -1027,41 +1011,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
1027 old_val = btrfs_block_group_used(&cache->item); 1011 old_val = btrfs_block_group_used(&cache->item);
1028 num_bytes = min(total, cache->key.offset - byte_in_group); 1012 num_bytes = min(total, cache->key.offset - byte_in_group);
1029 if (alloc) { 1013 if (alloc) {
1030 if (cache->data != data &&
1031 old_val < (cache->key.offset >> 1)) {
1032 int bit_to_clear;
1033 int bit_to_set;
1034 cache->data = data;
1035 if (data) {
1036 bit_to_clear = BLOCK_GROUP_METADATA;
1037 bit_to_set = BLOCK_GROUP_DATA;
1038 cache->item.flags &=
1039 ~BTRFS_BLOCK_GROUP_MIXED;
1040 cache->item.flags |=
1041 BTRFS_BLOCK_GROUP_DATA;
1042 } else {
1043 bit_to_clear = BLOCK_GROUP_DATA;
1044 bit_to_set = BLOCK_GROUP_METADATA;
1045 cache->item.flags &=
1046 ~BTRFS_BLOCK_GROUP_MIXED;
1047 cache->item.flags &=
1048 ~BTRFS_BLOCK_GROUP_DATA;
1049 }
1050 clear_extent_bits(&info->block_group_cache,
1051 start, end, bit_to_clear,
1052 GFP_NOFS);
1053 set_extent_bits(&info->block_group_cache,
1054 start, end, bit_to_set,
1055 GFP_NOFS);
1056 } else if (cache->data != data &&
1057 cache->data != BTRFS_BLOCK_GROUP_MIXED) {
1058 cache->data = BTRFS_BLOCK_GROUP_MIXED;
1059 set_extent_bits(&info->block_group_cache,
1060 start, end,
1061 BLOCK_GROUP_DATA |
1062 BLOCK_GROUP_METADATA,
1063 GFP_NOFS);
1064 }
1065 old_val += num_bytes; 1014 old_val += num_bytes;
1066 } else { 1015 } else {
1067 old_val -= num_bytes; 1016 old_val -= num_bytes;
@@ -1357,7 +1306,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1357 return ret; 1306 return ret;
1358 } 1307 }
1359 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 1308 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1360 mark_free, 0); 1309 mark_free);
1361 BUG_ON(ret); 1310 BUG_ON(ret);
1362 } 1311 }
1363 btrfs_free_path(path); 1312 btrfs_free_path(path);
@@ -1450,38 +1399,21 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1450 u64 exclude_start, u64 exclude_nr, 1399 u64 exclude_start, u64 exclude_nr,
1451 int data) 1400 int data)
1452{ 1401{
1453 struct btrfs_path *path;
1454 struct btrfs_key key;
1455 u64 hole_size = 0;
1456 u64 aligned;
1457 int ret; 1402 int ret;
1458 int slot = 0;
1459 u64 last_byte = 0;
1460 u64 *last_ptr = NULL;
1461 u64 orig_search_start = search_start; 1403 u64 orig_search_start = search_start;
1462 int start_found;
1463 struct extent_buffer *l;
1464 struct btrfs_root * root = orig_root->fs_info->extent_root; 1404 struct btrfs_root * root = orig_root->fs_info->extent_root;
1465 struct btrfs_fs_info *info = root->fs_info; 1405 struct btrfs_fs_info *info = root->fs_info;
1466 u64 total_needed = num_bytes; 1406 u64 total_needed = num_bytes;
1467 int level;
1468 struct btrfs_block_group_cache *block_group; 1407 struct btrfs_block_group_cache *block_group;
1469 int full_scan = 0; 1408 int full_scan = 0;
1470 int wrapped = 0; 1409 int wrapped = 0;
1471 int empty_cluster;
1472 u64 cached_start;
1473 1410
1474 WARN_ON(num_bytes < root->sectorsize); 1411 WARN_ON(num_bytes < root->sectorsize);
1475 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 1412 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1476 1413
1477 level = btrfs_header_level(root->node);
1478
1479 if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1480 data = BTRFS_BLOCK_GROUP_MIXED;
1481 }
1482
1483 if (search_end == (u64)-1) 1414 if (search_end == (u64)-1)
1484 search_end = btrfs_super_total_bytes(&info->super_copy); 1415 search_end = btrfs_super_total_bytes(&info->super_copy);
1416
1485 if (hint_byte) { 1417 if (hint_byte) {
1486 block_group = btrfs_lookup_block_group(info, hint_byte); 1418 block_group = btrfs_lookup_block_group(info, hint_byte);
1487 if (!block_group) 1419 if (!block_group)
@@ -1495,7 +1427,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1495 } 1427 }
1496 1428
1497 total_needed += empty_size; 1429 total_needed += empty_size;
1498 path = btrfs_alloc_path(); 1430
1499check_failed: 1431check_failed:
1500 if (!block_group) { 1432 if (!block_group) {
1501 block_group = btrfs_lookup_block_group(info, search_start); 1433 block_group = btrfs_lookup_block_group(info, search_start);
@@ -1503,135 +1435,49 @@ check_failed:
1503 block_group = btrfs_lookup_block_group(info, 1435 block_group = btrfs_lookup_block_group(info,
1504 orig_search_start); 1436 orig_search_start);
1505 } 1437 }
1506 search_start = find_search_start(root, &block_group, search_start, 1438 ret = find_search_start(root, &block_group, &search_start,
1507 total_needed, data); 1439 total_needed, data);
1508 search_start = stripe_align(root, search_start); 1440 if (ret)
1509 cached_start = search_start;
1510 btrfs_init_path(path);
1511 ins->objectid = search_start;
1512 ins->offset = 0;
1513 start_found = 0;
1514 path->reada = 2;
1515
1516 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1517 if (ret < 0)
1518 goto error;
1519 ret = find_previous_extent(root, path);
1520 if (ret < 0)
1521 goto error; 1441 goto error;
1522 l = path->nodes[0];
1523 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1524 while (1) {
1525 l = path->nodes[0];
1526 slot = path->slots[0];
1527 if (slot >= btrfs_header_nritems(l)) {
1528 ret = btrfs_next_leaf(root, path);
1529 if (ret == 0)
1530 continue;
1531 if (ret < 0)
1532 goto error;
1533 1442
1534 search_start = max(search_start, 1443 search_start = stripe_align(root, search_start);
1535 block_group->key.objectid); 1444 ins->objectid = search_start;
1536 if (!start_found) { 1445 ins->offset = num_bytes;
1537 aligned = stripe_align(root, search_start);
1538 ins->objectid = aligned;
1539 if (aligned >= search_end) {
1540 ret = -ENOSPC;
1541 goto error;
1542 }
1543 ins->offset = search_end - aligned;
1544 start_found = 1;
1545 goto check_pending;
1546 }
1547 ins->objectid = stripe_align(root,
1548 last_byte > search_start ?
1549 last_byte : search_start);
1550 if (search_end <= ins->objectid) {
1551 ret = -ENOSPC;
1552 goto error;
1553 }
1554 ins->offset = search_end - ins->objectid;
1555 BUG_ON(ins->objectid >= search_end);
1556 goto check_pending;
1557 }
1558 btrfs_item_key_to_cpu(l, &key, slot);
1559
1560 if (key.objectid >= search_start && key.objectid > last_byte &&
1561 start_found) {
1562 if (last_byte < search_start)
1563 last_byte = search_start;
1564 aligned = stripe_align(root, last_byte);
1565 hole_size = key.objectid - aligned;
1566 if (key.objectid > aligned && hole_size >= num_bytes) {
1567 ins->objectid = aligned;
1568 ins->offset = hole_size;
1569 goto check_pending;
1570 }
1571 }
1572 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1573 if (!start_found && btrfs_key_type(&key) ==
1574 BTRFS_BLOCK_GROUP_ITEM_KEY) {
1575 last_byte = key.objectid;
1576 start_found = 1;
1577 }
1578 goto next;
1579 }
1580
1581
1582 start_found = 1;
1583 last_byte = key.objectid + key.offset;
1584
1585 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1586 last_byte >= block_group->key.objectid +
1587 block_group->key.offset) {
1588 btrfs_release_path(root, path);
1589 search_start = block_group->key.objectid +
1590 block_group->key.offset;
1591 goto new_group;
1592 }
1593next:
1594 path->slots[0]++;
1595 cond_resched();
1596 }
1597check_pending:
1598 /* we have to make sure we didn't find an extent that has already
1599 * been allocated by the map tree or the original allocation
1600 */
1601 btrfs_release_path(root, path);
1602 BUG_ON(ins->objectid < search_start);
1603 1446
1604 if (ins->objectid + num_bytes >= search_end) 1447 if (ins->objectid + num_bytes >= search_end)
1605 goto enospc; 1448 goto enospc;
1606 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && 1449
1607 ins->objectid + num_bytes > block_group-> 1450 if (ins->objectid + num_bytes >
1608 key.objectid + block_group->key.offset) { 1451 block_group->key.objectid + block_group->key.offset) {
1609 search_start = block_group->key.objectid + 1452 search_start = block_group->key.objectid +
1610 block_group->key.offset; 1453 block_group->key.offset;
1611 goto new_group; 1454 goto new_group;
1612 } 1455 }
1456
1613 if (test_range_bit(&info->extent_ins, ins->objectid, 1457 if (test_range_bit(&info->extent_ins, ins->objectid,
1614 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { 1458 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1615 search_start = ins->objectid + num_bytes; 1459 search_start = ins->objectid + num_bytes;
1616 goto new_group; 1460 goto new_group;
1617 } 1461 }
1462
1618 if (test_range_bit(&info->pinned_extents, ins->objectid, 1463 if (test_range_bit(&info->pinned_extents, ins->objectid,
1619 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { 1464 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1620 search_start = ins->objectid + num_bytes; 1465 search_start = ins->objectid + num_bytes;
1621 goto new_group; 1466 goto new_group;
1622 } 1467 }
1468
1623 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && 1469 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1624 ins->objectid < exclude_start + exclude_nr)) { 1470 ins->objectid < exclude_start + exclude_nr)) {
1625 search_start = exclude_start + exclude_nr; 1471 search_start = exclude_start + exclude_nr;
1626 goto new_group; 1472 goto new_group;
1627 } 1473 }
1628 if (!data) { 1474
1475 if (!(data & BLOCK_GROUP_DATA)) {
1629 block_group = btrfs_lookup_block_group(info, ins->objectid); 1476 block_group = btrfs_lookup_block_group(info, ins->objectid);
1630 if (block_group) 1477 if (block_group)
1631 trans->block_group = block_group; 1478 trans->block_group = block_group;
1632 } 1479 }
1633 ins->offset = num_bytes; 1480 ins->offset = num_bytes;
1634 btrfs_free_path(path);
1635 return 0; 1481 return 0;
1636 1482
1637new_group: 1483new_group:
@@ -1646,7 +1492,6 @@ enospc:
1646 if (!full_scan) 1492 if (!full_scan)
1647 total_needed -= empty_size; 1493 total_needed -= empty_size;
1648 full_scan = 1; 1494 full_scan = 1;
1649 data = BTRFS_BLOCK_GROUP_MIXED;
1650 } else 1495 } else
1651 wrapped = 1; 1496 wrapped = 1;
1652 } 1497 }
@@ -1657,8 +1502,6 @@ enospc:
1657 goto check_failed; 1502 goto check_failed;
1658 1503
1659error: 1504error:
1660 btrfs_release_path(root, path);
1661 btrfs_free_path(path);
1662 return ret; 1505 return ret;
1663} 1506}
1664/* 1507/*
@@ -1689,6 +1532,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1689 struct btrfs_path *path; 1532 struct btrfs_path *path;
1690 struct btrfs_key keys[2]; 1533 struct btrfs_key keys[2];
1691 1534
1535 if (data)
1536 data = BLOCK_GROUP_DATA;
1537 else if (root == root->fs_info->chunk_root)
1538 data = BLOCK_GROUP_SYSTEM;
1539 else
1540 data = BLOCK_GROUP_METADATA;
1541
1692 new_hint = max(hint_byte, root->fs_info->alloc_start); 1542 new_hint = max(hint_byte, root->fs_info->alloc_start);
1693 if (new_hint < btrfs_super_total_bytes(&info->super_copy)) 1543 if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1694 hint_byte = new_hint; 1544 hint_byte = new_hint;
@@ -1718,7 +1568,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1718 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 1568 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1719 ins->objectid + ins->offset - 1, 1569 ins->objectid + ins->offset - 1,
1720 EXTENT_LOCKED, GFP_NOFS); 1570 EXTENT_LOCKED, GFP_NOFS);
1721 WARN_ON(data == 1);
1722 goto update_block; 1571 goto update_block;
1723 } 1572 }
1724 1573
@@ -1768,8 +1617,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1768 } 1617 }
1769 1618
1770update_block: 1619update_block:
1771 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, 1620 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1772 data);
1773 if (ret) { 1621 if (ret) {
1774 printk("update block group failed for %Lu %Lu\n", 1622 printk("update block group failed for %Lu %Lu\n",
1775 ins->objectid, ins->offset); 1623 ins->objectid, ins->offset);
@@ -2457,7 +2305,7 @@ again:
2457 if (ret < 0) 2305 if (ret < 0)
2458 goto out; 2306 goto out;
2459 2307
2460 ret = find_previous_extent(root, path); 2308 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
2461 if (ret < 0) 2309 if (ret < 0)
2462 goto out; 2310 goto out;
2463 if (ret == 0) { 2311 if (ret == 0) {
@@ -2604,95 +2452,48 @@ out:
2604int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, 2452int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2605 struct btrfs_root *root, u64 new_size) 2453 struct btrfs_root *root, u64 new_size)
2606{ 2454{
2607 struct btrfs_path *path; 2455 btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size);
2608 u64 nr = 0; 2456 return 0;
2609 u64 cur_byte; 2457}
2610 u64 old_size;
2611 unsigned long rem;
2612 struct btrfs_block_group_cache *cache;
2613 struct btrfs_block_group_item *item;
2614 struct btrfs_fs_info *info = root->fs_info;
2615 struct extent_io_tree *block_group_cache;
2616 struct btrfs_key key;
2617 struct extent_buffer *leaf;
2618 int ret;
2619 int bit;
2620
2621 old_size = btrfs_super_total_bytes(&info->super_copy);
2622 block_group_cache = &info->block_group_cache;
2623
2624 root = info->extent_root;
2625
2626 cache = btrfs_lookup_block_group(root->fs_info, old_size - 1);
2627
2628 cur_byte = cache->key.objectid + cache->key.offset;
2629 if (cur_byte >= new_size)
2630 goto set_size;
2631
2632 key.offset = BTRFS_BLOCK_GROUP_SIZE;
2633 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2634 2458
2635 path = btrfs_alloc_path(); 2459int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2636 if (!path) 2460 struct btrfs_key *key)
2637 return -ENOMEM; 2461{
2462 int ret;
2463 struct btrfs_key found_key;
2464 struct extent_buffer *leaf;
2465 int slot;
2638 2466
2639 while(cur_byte < new_size) { 2467 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2640 key.objectid = cur_byte; 2468 if (ret < 0)
2641 ret = btrfs_insert_empty_item(trans, root, path, &key, 2469 return ret;
2642 sizeof(struct btrfs_block_group_item)); 2470 while(1) {
2643 BUG_ON(ret); 2471 slot = path->slots[0];
2644 leaf = path->nodes[0]; 2472 leaf = path->nodes[0];
2645 item = btrfs_item_ptr(leaf, path->slots[0], 2473 if (slot >= btrfs_header_nritems(leaf)) {
2646 struct btrfs_block_group_item); 2474 ret = btrfs_next_leaf(root, path);
2647 2475 if (ret == 0)
2648 btrfs_set_disk_block_group_used(leaf, item, 0); 2476 continue;
2649 div_long_long_rem(nr, 3, &rem); 2477 if (ret < 0)
2650 if (rem) { 2478 goto error;
2651 btrfs_set_disk_block_group_flags(leaf, item, 2479 break;
2652 BTRFS_BLOCK_GROUP_DATA);
2653 } else {
2654 btrfs_set_disk_block_group_flags(leaf, item, 0);
2655 }
2656 nr++;
2657
2658 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2659 BUG_ON(!cache);
2660
2661 read_extent_buffer(leaf, &cache->item, (unsigned long)item,
2662 sizeof(cache->item));
2663
2664 memcpy(&cache->key, &key, sizeof(key));
2665 cache->cached = 0;
2666 cache->pinned = 0;
2667 cur_byte = key.objectid + key.offset;
2668 btrfs_release_path(root, path);
2669
2670 if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2671 bit = BLOCK_GROUP_DATA;
2672 cache->data = BTRFS_BLOCK_GROUP_DATA;
2673 } else {
2674 bit = BLOCK_GROUP_METADATA;
2675 cache->data = 0;
2676 } 2480 }
2481 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2677 2482
2678 /* use EXTENT_LOCKED to prevent merging */ 2483 if (found_key.objectid >= key->objectid &&
2679 set_extent_bits(block_group_cache, key.objectid, 2484 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2680 key.objectid + key.offset - 1, 2485 return 0;
2681 bit | EXTENT_LOCKED, GFP_NOFS); 2486 path->slots[0]++;
2682 set_state_private(block_group_cache, key.objectid,
2683 (unsigned long)cache);
2684 } 2487 }
2685 btrfs_free_path(path); 2488 ret = -ENOENT;
2686set_size: 2489error:
2687 btrfs_set_super_total_bytes(&info->super_copy, new_size); 2490 return ret;
2688 return 0;
2689} 2491}
2690 2492
2691int btrfs_read_block_groups(struct btrfs_root *root) 2493int btrfs_read_block_groups(struct btrfs_root *root)
2692{ 2494{
2693 struct btrfs_path *path; 2495 struct btrfs_path *path;
2694 int ret; 2496 int ret;
2695 int err = 0;
2696 int bit; 2497 int bit;
2697 struct btrfs_block_group_cache *cache; 2498 struct btrfs_block_group_cache *cache;
2698 struct btrfs_fs_info *info = root->fs_info; 2499 struct btrfs_fs_info *info = root->fs_info;
@@ -2702,28 +2503,28 @@ int btrfs_read_block_groups(struct btrfs_root *root)
2702 struct extent_buffer *leaf; 2503 struct extent_buffer *leaf;
2703 2504
2704 block_group_cache = &info->block_group_cache; 2505 block_group_cache = &info->block_group_cache;
2705
2706 root = info->extent_root; 2506 root = info->extent_root;
2707 key.objectid = 0; 2507 key.objectid = 0;
2708 key.offset = BTRFS_BLOCK_GROUP_SIZE; 2508 key.offset = 0;
2709 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 2509 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2710
2711 path = btrfs_alloc_path(); 2510 path = btrfs_alloc_path();
2712 if (!path) 2511 if (!path)
2713 return -ENOMEM; 2512 return -ENOMEM;
2714 2513
2715 while(1) { 2514 while(1) {
2716 ret = btrfs_search_slot(NULL, info->extent_root, 2515 ret = find_first_block_group(root, path, &key);
2717 &key, path, 0, 0); 2516 if (ret > 0) {
2718 if (ret != 0) { 2517 ret = 0;
2719 err = ret; 2518 goto error;
2720 break;
2721 } 2519 }
2520 if (ret != 0)
2521 goto error;
2522
2722 leaf = path->nodes[0]; 2523 leaf = path->nodes[0];
2723 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2524 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2724 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2525 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2725 if (!cache) { 2526 if (!cache) {
2726 err = -1; 2527 ret = -ENOMEM;
2727 break; 2528 break;
2728 } 2529 }
2729 2530
@@ -2733,18 +2534,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
2733 memcpy(&cache->key, &found_key, sizeof(found_key)); 2534 memcpy(&cache->key, &found_key, sizeof(found_key));
2734 cache->cached = 0; 2535 cache->cached = 0;
2735 cache->pinned = 0; 2536 cache->pinned = 0;
2537
2736 key.objectid = found_key.objectid + found_key.offset; 2538 key.objectid = found_key.objectid + found_key.offset;
2737 btrfs_release_path(root, path); 2539 btrfs_release_path(root, path);
2738 2540 cache->flags = btrfs_block_group_flags(&cache->item);
2739 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) { 2541 bit = 0;
2740 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; 2542 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2741 cache->data = BTRFS_BLOCK_GROUP_MIXED;
2742 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2743 bit = BLOCK_GROUP_DATA; 2543 bit = BLOCK_GROUP_DATA;
2744 cache->data = BTRFS_BLOCK_GROUP_DATA; 2544 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2745 } else { 2545 bit = BLOCK_GROUP_SYSTEM;
2546 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2746 bit = BLOCK_GROUP_METADATA; 2547 bit = BLOCK_GROUP_METADATA;
2747 cache->data = 0;
2748 } 2548 }
2749 2549
2750 /* use EXTENT_LOCKED to prevent merging */ 2550 /* use EXTENT_LOCKED to prevent merging */
@@ -2758,7 +2558,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
2758 btrfs_super_total_bytes(&info->super_copy)) 2558 btrfs_super_total_bytes(&info->super_copy))
2759 break; 2559 break;
2760 } 2560 }
2761 2561 ret = 0;
2562error:
2762 btrfs_free_path(path); 2563 btrfs_free_path(path);
2763 return 0; 2564 return ret;
2764} 2565}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e8130c876330..7e3a1ebde9fc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1706,9 +1706,9 @@ static int submit_one_bio(int rw, struct bio *bio)
1706 WARN_ON(1); 1706 WARN_ON(1);
1707 } 1707 }
1708 if (tree->ops && tree->ops->submit_bio_hook) 1708 if (tree->ops && tree->ops->submit_bio_hook)
1709 tree->ops->submit_bio_hook(rw, bio); 1709 tree->ops->submit_bio_hook(page->mapping->host, rw, bio);
1710 1710 else
1711 submit_bio(rw, bio); 1711 submit_bio(rw, bio);
1712 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1712 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1713 ret = -EOPNOTSUPP; 1713 ret = -EOPNOTSUPP;
1714 bio_put(bio); 1714 bio_put(bio);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 9d6654667089..0dca89328f98 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -28,7 +28,7 @@ struct extent_state;
28struct extent_io_ops { 28struct extent_io_ops {
29 int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); 29 int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
30 int (*writepage_io_hook)(struct page *page, u64 start, u64 end); 30 int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
31 int (*submit_bio_hook)(int rw, struct bio *bio); 31 int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio);
32 int (*readpage_io_hook)(struct page *page, u64 start, u64 end); 32 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
33 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 33 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
34 struct extent_state *state); 34 struct extent_state *state);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 315dd550d560..17063cd2cb73 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -40,6 +40,7 @@
40#include "btrfs_inode.h" 40#include "btrfs_inode.h"
41#include "ioctl.h" 41#include "ioctl.h"
42#include "print-tree.h" 42#include "print-tree.h"
43#include "volumes.h"
43 44
44struct btrfs_iget_args { 45struct btrfs_iget_args {
45 u64 ino; 46 u64 ino;
@@ -295,20 +296,20 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
295 return 0; 296 return 0;
296} 297}
297 298
298int btrfs_submit_bio_hook(int rw, struct bio *bio) 299int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio)
299{ 300{
300 struct bio_vec *bvec = bio->bi_io_vec;
301 struct inode *inode = bvec->bv_page->mapping->host;
302 struct btrfs_root *root = BTRFS_I(inode)->root; 301 struct btrfs_root *root = BTRFS_I(inode)->root;
303 struct btrfs_trans_handle *trans; 302 struct btrfs_trans_handle *trans;
304 int ret = 0; 303 int ret = 0;
305 304
306 if (rw != WRITE) 305 if (rw != WRITE) {
307 return 0; 306 goto mapit;
307 }
308 308
309 if (btrfs_test_opt(root, NODATASUM) || 309 if (btrfs_test_opt(root, NODATASUM) ||
310 btrfs_test_flag(inode, NODATASUM)) 310 btrfs_test_flag(inode, NODATASUM)) {
311 return 0; 311 goto mapit;
312 }
312 313
313 mutex_lock(&root->fs_info->fs_mutex); 314 mutex_lock(&root->fs_info->fs_mutex);
314 trans = btrfs_start_transaction(root, 1); 315 trans = btrfs_start_transaction(root, 1);
@@ -317,7 +318,8 @@ int btrfs_submit_bio_hook(int rw, struct bio *bio)
317 ret = btrfs_end_transaction(trans, root); 318 ret = btrfs_end_transaction(trans, root);
318 BUG_ON(ret); 319 BUG_ON(ret);
319 mutex_unlock(&root->fs_info->fs_mutex); 320 mutex_unlock(&root->fs_info->fs_mutex);
320 return ret; 321mapit:
322 return btrfs_map_bio(root, rw, bio);
321} 323}
322 324
323int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) 325int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
@@ -406,7 +408,7 @@ void btrfs_read_locked_inode(struct inode *inode)
406 struct btrfs_path *path; 408 struct btrfs_path *path;
407 struct extent_buffer *leaf; 409 struct extent_buffer *leaf;
408 struct btrfs_inode_item *inode_item; 410 struct btrfs_inode_item *inode_item;
409 struct btrfs_inode_timespec *tspec; 411 struct btrfs_timespec *tspec;
410 struct btrfs_root *root = BTRFS_I(inode)->root; 412 struct btrfs_root *root = BTRFS_I(inode)->root;
411 struct btrfs_key location; 413 struct btrfs_key location;
412 u64 alloc_group_block; 414 u64 alloc_group_block;
@@ -455,7 +457,8 @@ void btrfs_read_locked_inode(struct inode *inode)
455 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 457 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
456 if (!BTRFS_I(inode)->block_group) { 458 if (!BTRFS_I(inode)->block_group) {
457 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 459 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
458 NULL, 0, 0, 0); 460 NULL, 0,
461 BTRFS_BLOCK_GROUP_METADATA, 0);
459 } 462 }
460 btrfs_free_path(path); 463 btrfs_free_path(path);
461 inode_item = NULL; 464 inode_item = NULL;
@@ -1550,7 +1553,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1550 owner = 0; 1553 owner = 0;
1551 else 1554 else
1552 owner = 1; 1555 owner = 1;
1553 group = btrfs_find_block_group(root, group, 0, 0, owner); 1556 group = btrfs_find_block_group(root, group, 0,
1557 BTRFS_BLOCK_GROUP_METADATA, owner);
1554 BTRFS_I(inode)->block_group = group; 1558 BTRFS_I(inode)->block_group = group;
1555 BTRFS_I(inode)->flags = 0; 1559 BTRFS_I(inode)->flags = 0;
1556 1560
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index da0b4dcf3617..9c1335dad40c 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -20,6 +20,40 @@
20#include "disk-io.h" 20#include "disk-io.h"
21#include "print-tree.h" 21#include "print-tree.h"
22 22
23static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
24{
25 int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
26 int i;
27 printk("\t\tchunk owner %llu type %llu num_stripes %d\n",
28 (unsigned long long)btrfs_chunk_owner(eb, chunk),
29 (unsigned long long)btrfs_chunk_type(eb, chunk),
30 num_stripes);
31 for (i = 0 ; i < num_stripes ; i++) {
32 printk("\t\t\tstripe %d devid %llu offset %llu\n", i,
33 (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
34 (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
35 }
36}
37static void print_dev_item(struct extent_buffer *eb,
38 struct btrfs_dev_item *dev_item)
39{
40 char *name;
41 int name_len;
42
43 name_len = btrfs_device_name_len(eb, dev_item);
44 name = kmalloc(name_len, GFP_NOFS);
45 if (name) {
46 read_extent_buffer(eb, name,
47 (unsigned long)btrfs_device_name(dev_item),
48 name_len);
49 }
50 printk("\t\tdev item name %.*s devid %llu "
51 "total_bytes %llu bytes used %Lu\n", name_len, name,
52 (unsigned long long)btrfs_device_id(eb, dev_item),
53 (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
54 (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
55 kfree(name);
56}
23void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) 57void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
24{ 58{
25 int i; 59 int i;
@@ -34,6 +68,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
34 struct btrfs_key key; 68 struct btrfs_key key;
35 struct btrfs_key found_key; 69 struct btrfs_key found_key;
36 struct btrfs_extent_ref *ref; 70 struct btrfs_extent_ref *ref;
71 struct btrfs_dev_extent *dev_extent;
37 u32 type; 72 u32 type;
38 73
39 printk("leaf %llu total ptrs %d free space %d\n", 74 printk("leaf %llu total ptrs %d free space %d\n",
@@ -106,6 +141,19 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
106 printk("\t\tblock group used %llu\n", 141 printk("\t\tblock group used %llu\n",
107 (unsigned long long)btrfs_disk_block_group_used(l, bi)); 142 (unsigned long long)btrfs_disk_block_group_used(l, bi));
108 break; 143 break;
144 case BTRFS_CHUNK_ITEM_KEY:
145 print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk));
146 break;
147 case BTRFS_DEV_ITEM_KEY:
148 print_dev_item(l, btrfs_item_ptr(l, i,
149 struct btrfs_dev_item));
150 break;
151 case BTRFS_DEV_EXTENT_KEY:
152 dev_extent = btrfs_item_ptr(l, i,
153 struct btrfs_dev_extent);
154 printk("\t\tdev extent owner %llu length %llu\n",
155 (unsigned long long)btrfs_dev_extent_owner(l, dev_extent),
156 (unsigned long long)btrfs_dev_extent_length(l, dev_extent));
109 }; 157 };
110 } 158 }
111} 159}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e9a0983897f3..5e9f69244f9f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -198,29 +198,42 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
198 return werr; 198 return werr;
199} 199}
200 200
201int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 201static int update_cowonly_root(struct btrfs_trans_handle *trans,
202 struct btrfs_root *root) 202 struct btrfs_root *root)
203{ 203{
204 int ret; 204 int ret;
205 u64 old_extent_block; 205 u64 old_root_bytenr;
206 struct btrfs_fs_info *fs_info = root->fs_info; 206 struct btrfs_root *tree_root = root->fs_info->tree_root;
207 struct btrfs_root *tree_root = fs_info->tree_root;
208 struct btrfs_root *extent_root = fs_info->extent_root;
209 207
210 btrfs_write_dirty_block_groups(trans, extent_root); 208 btrfs_write_dirty_block_groups(trans, root);
211 while(1) { 209 while(1) {
212 old_extent_block = btrfs_root_bytenr(&extent_root->root_item); 210 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
213 if (old_extent_block == extent_root->node->start) 211 if (old_root_bytenr == root->node->start)
214 break; 212 break;
215 btrfs_set_root_bytenr(&extent_root->root_item, 213 btrfs_set_root_bytenr(&root->root_item,
216 extent_root->node->start); 214 root->node->start);
217 btrfs_set_root_level(&extent_root->root_item, 215 btrfs_set_root_level(&root->root_item,
218 btrfs_header_level(extent_root->node)); 216 btrfs_header_level(root->node));
219 ret = btrfs_update_root(trans, tree_root, 217 ret = btrfs_update_root(trans, tree_root,
220 &extent_root->root_key, 218 &root->root_key,
221 &extent_root->root_item); 219 &root->root_item);
222 BUG_ON(ret); 220 BUG_ON(ret);
223 btrfs_write_dirty_block_groups(trans, extent_root); 221 btrfs_write_dirty_block_groups(trans, root);
222 }
223 return 0;
224}
225
226int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
227 struct btrfs_root *root)
228{
229 struct btrfs_fs_info *fs_info = root->fs_info;
230 struct list_head *next;
231
232 while(!list_empty(&fs_info->dirty_cowonly_roots)) {
233 next = fs_info->dirty_cowonly_roots.next;
234 list_del_init(next);
235 root = list_entry(next, struct btrfs_root, dirty_list);
236 update_cowonly_root(trans, root);
224 } 237 }
225 return 0; 238 return 0;
226} 239}
@@ -616,6 +629,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
616 unsigned long timeout = 1; 629 unsigned long timeout = 1;
617 struct btrfs_transaction *cur_trans; 630 struct btrfs_transaction *cur_trans;
618 struct btrfs_transaction *prev_trans = NULL; 631 struct btrfs_transaction *prev_trans = NULL;
632 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
619 struct list_head dirty_fs_roots; 633 struct list_head dirty_fs_roots;
620 struct extent_io_tree *pinned_copy; 634 struct extent_io_tree *pinned_copy;
621 DEFINE_WAIT(wait); 635 DEFINE_WAIT(wait);
@@ -714,6 +728,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
714 btrfs_set_super_root_level(&root->fs_info->super_copy, 728 btrfs_set_super_root_level(&root->fs_info->super_copy,
715 btrfs_header_level(root->fs_info->tree_root->node)); 729 btrfs_header_level(root->fs_info->tree_root->node));
716 730
731 btrfs_set_super_chunk_root(&root->fs_info->super_copy,
732 chunk_root->node->start);
733 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
734 btrfs_header_level(chunk_root->node));
717 write_extent_buffer(root->fs_info->sb_buffer, 735 write_extent_buffer(root->fs_info->sb_buffer,
718 &root->fs_info->super_copy, 0, 736 &root->fs_info->super_copy, 0,
719 sizeof(root->fs_info->super_copy)); 737 sizeof(root->fs_info->super_copy));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
new file mode 100644
index 000000000000..90a8d45dc6d7
--- /dev/null
+++ b/fs/btrfs/volumes.c
@@ -0,0 +1,852 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/bio.h>
20#include "ctree.h"
21#include "extent_map.h"
22#include "disk-io.h"
23#include "transaction.h"
24#include "print-tree.h"
25#include "volumes.h"
26
27struct map_lookup {
28 struct btrfs_device *dev;
29 u64 physical;
30};
31
32/*
33 * this uses a pretty simple search, the expectation is that it is
34 * called very infrequently and that a given device has a small number
35 * of extents
36 */
37static int find_free_dev_extent(struct btrfs_trans_handle *trans,
38 struct btrfs_device *device,
39 struct btrfs_path *path,
40 u64 num_bytes, u64 *start)
41{
42 struct btrfs_key key;
43 struct btrfs_root *root = device->dev_root;
44 struct btrfs_dev_extent *dev_extent = NULL;
45 u64 hole_size = 0;
46 u64 last_byte = 0;
47 u64 search_start = 0;
48 u64 search_end = device->total_bytes;
49 int ret;
50 int slot = 0;
51 int start_found;
52 struct extent_buffer *l;
53
54 start_found = 0;
55 path->reada = 2;
56
57 /* FIXME use last free of some kind */
58
59 key.objectid = device->devid;
60 key.offset = search_start;
61 key.type = BTRFS_DEV_EXTENT_KEY;
62 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
63 if (ret < 0)
64 goto error;
65 ret = btrfs_previous_item(root, path, 0, key.type);
66 if (ret < 0)
67 goto error;
68 l = path->nodes[0];
69 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
70 while (1) {
71 l = path->nodes[0];
72 slot = path->slots[0];
73 if (slot >= btrfs_header_nritems(l)) {
74 ret = btrfs_next_leaf(root, path);
75 if (ret == 0)
76 continue;
77 if (ret < 0)
78 goto error;
79no_more_items:
80 if (!start_found) {
81 if (search_start >= search_end) {
82 ret = -ENOSPC;
83 goto error;
84 }
85 *start = search_start;
86 start_found = 1;
87 goto check_pending;
88 }
89 *start = last_byte > search_start ?
90 last_byte : search_start;
91 if (search_end <= *start) {
92 ret = -ENOSPC;
93 goto error;
94 }
95 goto check_pending;
96 }
97 btrfs_item_key_to_cpu(l, &key, slot);
98
99 if (key.objectid < device->devid)
100 goto next;
101
102 if (key.objectid > device->devid)
103 goto no_more_items;
104
105 if (key.offset >= search_start && key.offset > last_byte &&
106 start_found) {
107 if (last_byte < search_start)
108 last_byte = search_start;
109 hole_size = key.offset - last_byte;
110 if (key.offset > last_byte &&
111 hole_size >= num_bytes) {
112 *start = last_byte;
113 goto check_pending;
114 }
115 }
116 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
117 goto next;
118 }
119
120 start_found = 1;
121 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
122 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
123next:
124 path->slots[0]++;
125 cond_resched();
126 }
127check_pending:
128 /* we have to make sure we didn't find an extent that has already
129 * been allocated by the map tree or the original allocation
130 */
131 btrfs_release_path(root, path);
132 BUG_ON(*start < search_start);
133
134 if (*start + num_bytes >= search_end) {
135 ret = -ENOSPC;
136 goto error;
137 }
138 /* check for pending inserts here */
139 return 0;
140
141error:
142 btrfs_release_path(root, path);
143 return ret;
144}
145
146int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
147 struct btrfs_device *device,
148 u64 owner, u64 num_bytes, u64 *start)
149{
150 int ret;
151 struct btrfs_path *path;
152 struct btrfs_root *root = device->dev_root;
153 struct btrfs_dev_extent *extent;
154 struct extent_buffer *leaf;
155 struct btrfs_key key;
156
157 path = btrfs_alloc_path();
158 if (!path)
159 return -ENOMEM;
160
161 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
162 if (ret)
163 goto err;
164
165 key.objectid = device->devid;
166 key.offset = *start;
167 key.type = BTRFS_DEV_EXTENT_KEY;
168 ret = btrfs_insert_empty_item(trans, root, path, &key,
169 sizeof(*extent));
170 BUG_ON(ret);
171
172 leaf = path->nodes[0];
173 extent = btrfs_item_ptr(leaf, path->slots[0],
174 struct btrfs_dev_extent);
175 btrfs_set_dev_extent_owner(leaf, extent, owner);
176 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
177 btrfs_mark_buffer_dirty(leaf);
178err:
179 btrfs_free_path(path);
180 return ret;
181}
182
183static int find_next_chunk(struct btrfs_root *root, u64 *objectid)
184{
185 struct btrfs_path *path;
186 int ret;
187 struct btrfs_key key;
188 struct btrfs_key found_key;
189
190 path = btrfs_alloc_path();
191 BUG_ON(!path);
192
193 key.objectid = (u64)-1;
194 key.offset = (u64)-1;
195 key.type = BTRFS_CHUNK_ITEM_KEY;
196
197 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
198 if (ret < 0)
199 goto error;
200
201 BUG_ON(ret == 0);
202
203 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
204 if (ret) {
205 *objectid = 0;
206 } else {
207 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
208 path->slots[0]);
209 *objectid = found_key.objectid + found_key.offset;
210 }
211 ret = 0;
212error:
213 btrfs_free_path(path);
214 return ret;
215}
216
217static struct btrfs_device *next_device(struct list_head *head,
218 struct list_head *last)
219{
220 struct list_head *next = last->next;
221 struct btrfs_device *dev;
222
223 if (list_empty(head))
224 return NULL;
225
226 if (next == head)
227 next = next->next;
228
229 dev = list_entry(next, struct btrfs_device, dev_list);
230 return dev;
231}
232
233static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
234 u64 *objectid)
235{
236 int ret;
237 struct btrfs_key key;
238 struct btrfs_key found_key;
239
240 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
241 key.type = BTRFS_DEV_ITEM_KEY;
242 key.offset = (u64)-1;
243
244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
245 if (ret < 0)
246 goto error;
247
248 BUG_ON(ret == 0);
249
250 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
251 BTRFS_DEV_ITEM_KEY);
252 if (ret) {
253 *objectid = 1;
254 } else {
255 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
256 path->slots[0]);
257 *objectid = found_key.offset + 1;
258 }
259 ret = 0;
260error:
261 btrfs_release_path(root, path);
262 return ret;
263}
264
265/*
266 * the device information is stored in the chunk root
267 * the btrfs_device struct should be fully filled in
268 */
269int btrfs_add_device(struct btrfs_trans_handle *trans,
270 struct btrfs_root *root,
271 struct btrfs_device *device)
272{
273 int ret;
274 struct btrfs_path *path;
275 struct btrfs_dev_item *dev_item;
276 struct extent_buffer *leaf;
277 struct btrfs_key key;
278 unsigned long ptr;
279 u64 free_devid;
280
281 root = root->fs_info->chunk_root;
282
283 path = btrfs_alloc_path();
284 if (!path)
285 return -ENOMEM;
286
287 ret = find_next_devid(root, path, &free_devid);
288 if (ret)
289 goto out;
290
291 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
292 key.type = BTRFS_DEV_ITEM_KEY;
293 key.offset = free_devid;
294
295 ret = btrfs_insert_empty_item(trans, root, path, &key,
296 sizeof(*dev_item) + device->name_len);
297 if (ret)
298 goto out;
299
300 leaf = path->nodes[0];
301 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
302
303 btrfs_set_device_id(leaf, dev_item, device->devid);
304 btrfs_set_device_type(leaf, dev_item, device->type);
305 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
306 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
307 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
308 btrfs_set_device_rdev(leaf, dev_item, device->rdev);
309 btrfs_set_device_partition(leaf, dev_item, device->partition);
310 btrfs_set_device_name_len(leaf, dev_item, device->name_len);
311 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
312 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
313
314 ptr = (unsigned long)btrfs_device_name(dev_item);
315 write_extent_buffer(leaf, device->name, ptr, device->name_len);
316
317 ptr = (unsigned long)btrfs_device_uuid(dev_item);
318 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
319 btrfs_mark_buffer_dirty(leaf);
320 ret = 0;
321
322out:
323 btrfs_free_path(path);
324 return ret;
325}
326int btrfs_update_device(struct btrfs_trans_handle *trans,
327 struct btrfs_device *device)
328{
329 int ret;
330 struct btrfs_path *path;
331 struct btrfs_root *root;
332 struct btrfs_dev_item *dev_item;
333 struct extent_buffer *leaf;
334 struct btrfs_key key;
335
336 root = device->dev_root->fs_info->chunk_root;
337
338 path = btrfs_alloc_path();
339 if (!path)
340 return -ENOMEM;
341
342 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
343 key.type = BTRFS_DEV_ITEM_KEY;
344 key.offset = device->devid;
345
346 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
347 if (ret < 0)
348 goto out;
349
350 if (ret > 0) {
351 ret = -ENOENT;
352 goto out;
353 }
354
355 leaf = path->nodes[0];
356 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
357
358 btrfs_set_device_id(leaf, dev_item, device->devid);
359 btrfs_set_device_type(leaf, dev_item, device->type);
360 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
361 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
362 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
363 btrfs_set_device_rdev(leaf, dev_item, device->rdev);
364 btrfs_set_device_partition(leaf, dev_item, device->partition);
365 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
366 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
367 btrfs_mark_buffer_dirty(leaf);
368
369out:
370 btrfs_free_path(path);
371 return ret;
372}
373
374int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
375 struct btrfs_root *root,
376 struct btrfs_key *key,
377 struct btrfs_chunk *chunk, int item_size)
378{
379 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
380 struct btrfs_disk_key disk_key;
381 u32 array_size;
382 u8 *ptr;
383
384 array_size = btrfs_super_sys_array_size(super_copy);
385 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
386 return -EFBIG;
387
388 ptr = super_copy->sys_chunk_array + array_size;
389 btrfs_cpu_key_to_disk(&disk_key, key);
390 memcpy(ptr, &disk_key, sizeof(disk_key));
391 ptr += sizeof(disk_key);
392 memcpy(ptr, chunk, item_size);
393 item_size += sizeof(disk_key);
394 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
395 return 0;
396}
397
398int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
399 struct btrfs_root *extent_root, u64 *start,
400 u64 *num_bytes, u32 type)
401{
402 u64 dev_offset;
403 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
404 struct btrfs_stripe *stripes;
405 struct btrfs_device *device = NULL;
406 struct btrfs_chunk *chunk;
407 struct list_head *dev_list = &extent_root->fs_info->devices;
408 struct list_head *last_dev = extent_root->fs_info->last_device;
409 struct extent_map_tree *em_tree;
410 struct map_lookup *map;
411 struct extent_map *em;
412 u64 physical;
413 u64 calc_size = 1024 * 1024 * 1024;
414 int num_stripes;
415 int ret;
416 int index = 0;
417 struct btrfs_key key;
418
419
420 ret = find_next_chunk(chunk_root, &key.objectid);
421 if (ret)
422 return ret;
423
424 num_stripes = 1;
425 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
426 if (!chunk)
427 return -ENOMEM;
428
429 stripes = &chunk->stripe;
430
431 *num_bytes = calc_size;
432 while(index < num_stripes) {
433 device = next_device(dev_list, last_dev);
434 BUG_ON(!device);
435 last_dev = &device->dev_list;
436 extent_root->fs_info->last_device = last_dev;
437
438 ret = btrfs_alloc_dev_extent(trans, device,
439 key.objectid,
440 calc_size, &dev_offset);
441 BUG_ON(ret);
442
443 device->bytes_used += calc_size;
444 ret = btrfs_update_device(trans, device);
445 BUG_ON(ret);
446
447 btrfs_set_stack_stripe_devid(stripes + index, device->devid);
448 btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
449 physical = dev_offset;
450 index++;
451 }
452
453 /* key.objectid was set above */
454 key.offset = *num_bytes;
455 key.type = BTRFS_CHUNK_ITEM_KEY;
456 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
457 btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
458 btrfs_set_stack_chunk_type(chunk, type);
459 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
460 btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize);
461 btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize);
462 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
463
464 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
465 btrfs_chunk_item_size(num_stripes));
466 BUG_ON(ret);
467 *start = key.objectid;
468
469 em = alloc_extent_map(GFP_NOFS);
470 if (!em)
471 return -ENOMEM;
472 map = kmalloc(sizeof(*map), GFP_NOFS);
473 if (!map) {
474 free_extent_map(em);
475 return -ENOMEM;
476 }
477
478 em->bdev = (struct block_device *)map;
479 em->start = key.objectid;
480 em->len = key.offset;
481 em->block_start = 0;
482
483 map->physical = physical;
484 map->dev = device;
485
486 if (!map->dev) {
487 kfree(map);
488 free_extent_map(em);
489 return -EIO;
490 }
491 kfree(chunk);
492
493 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
494 spin_lock(&em_tree->lock);
495 ret = add_extent_mapping(em_tree, em);
496 BUG_ON(ret);
497 spin_unlock(&em_tree->lock);
498 free_extent_map(em);
499 return ret;
500}
501
502void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
503{
504 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
505}
506
507void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
508{
509 struct extent_map *em;
510
511 while(1) {
512 spin_lock(&tree->map_tree.lock);
513 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
514 if (em)
515 remove_extent_mapping(&tree->map_tree, em);
516 spin_unlock(&tree->map_tree.lock);
517 if (!em)
518 break;
519 kfree(em->bdev);
520 /* once for us */
521 free_extent_map(em);
522 /* once for the tree */
523 free_extent_map(em);
524 }
525}
526
527int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
528 u64 logical, u64 *phys, u64 *length,
529 struct btrfs_device **dev)
530{
531 struct extent_map *em;
532 struct map_lookup *map;
533 struct extent_map_tree *em_tree = &map_tree->map_tree;
534 u64 offset;
535
536
537 spin_lock(&em_tree->lock);
538 em = lookup_extent_mapping(em_tree, logical, *length);
539 BUG_ON(!em);
540
541 BUG_ON(em->start > logical || em->start + em->len < logical);
542 map = (struct map_lookup *)em->bdev;
543 offset = logical - em->start;
544 *phys = map->physical + offset;
545 *length = em->len - offset;
546 *dev = map->dev;
547 free_extent_map(em);
548 spin_unlock(&em_tree->lock);
549 return 0;
550}
551
552int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
553{
554 struct btrfs_mapping_tree *map_tree;
555 struct btrfs_device *dev;
556 u64 logical = bio->bi_sector << 9;
557 u64 physical;
558 u64 length = 0;
559 u64 map_length;
560 struct bio_vec *bvec;
561 int i;
562 int ret;
563
564 bio_for_each_segment(bvec, bio, i) {
565 length += bvec->bv_len;
566 }
567 map_tree = &root->fs_info->mapping_tree;
568 map_length = length;
569 ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
570 BUG_ON(map_length < length);
571 bio->bi_sector = physical >> 9;
572 bio->bi_bdev = dev->bdev;
573 submit_bio(rw, bio);
574 return 0;
575}
576
577struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid)
578{
579 struct btrfs_device *dev;
580 struct list_head *cur = root->fs_info->devices.next;
581 struct list_head *head = &root->fs_info->devices;
582
583 while(cur != head) {
584 dev = list_entry(cur, struct btrfs_device, dev_list);
585 if (dev->devid == devid)
586 return dev;
587 cur = cur->next;
588 }
589 return NULL;
590}
591
592static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
593 struct extent_buffer *leaf,
594 struct btrfs_chunk *chunk)
595{
596 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
597 struct map_lookup *map;
598 struct extent_map *em;
599 u64 logical;
600 u64 length;
601 u64 devid;
602 int ret;
603
604 logical = key->objectid;
605 length = key->offset;
606 spin_lock(&map_tree->map_tree.lock);
607 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
608
609 /* already mapped? */
610 if (em && em->start <= logical && em->start + em->len > logical) {
611 free_extent_map(em);
612 spin_unlock(&map_tree->map_tree.lock);
613 return 0;
614 } else if (em) {
615 free_extent_map(em);
616 }
617 spin_unlock(&map_tree->map_tree.lock);
618
619 map = kzalloc(sizeof(*map), GFP_NOFS);
620 if (!map)
621 return -ENOMEM;
622
623 em = alloc_extent_map(GFP_NOFS);
624 if (!em)
625 return -ENOMEM;
626 map = kmalloc(sizeof(*map), GFP_NOFS);
627 if (!map) {
628 free_extent_map(em);
629 return -ENOMEM;
630 }
631
632 em->bdev = (struct block_device *)map;
633 em->start = logical;
634 em->len = length;
635 em->block_start = 0;
636
637 map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0);
638 devid = btrfs_stripe_devid_nr(leaf, chunk, 0);
639 map->dev = btrfs_find_device(root, devid);
640 if (!map->dev) {
641 kfree(map);
642 free_extent_map(em);
643 return -EIO;
644 }
645
646 spin_lock(&map_tree->map_tree.lock);
647 ret = add_extent_mapping(&map_tree->map_tree, em);
648 BUG_ON(ret);
649 spin_unlock(&map_tree->map_tree.lock);
650 free_extent_map(em);
651
652 return 0;
653}
654
655static int fill_device_from_item(struct extent_buffer *leaf,
656 struct btrfs_dev_item *dev_item,
657 struct btrfs_device *device)
658{
659 unsigned long ptr;
660 char *name;
661
662 device->devid = btrfs_device_id(leaf, dev_item);
663 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
664 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
665 device->type = btrfs_device_type(leaf, dev_item);
666 device->io_align = btrfs_device_io_align(leaf, dev_item);
667 device->io_width = btrfs_device_io_width(leaf, dev_item);
668 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
669 device->rdev = btrfs_device_rdev(leaf, dev_item);
670 device->partition = btrfs_device_partition(leaf, dev_item);
671 device->name_len = btrfs_device_name_len(leaf, dev_item);
672
673 ptr = (unsigned long)btrfs_device_uuid(dev_item);
674 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
675
676 name = kmalloc(device->name_len + 1, GFP_NOFS);
677 if (!name)
678 return -ENOMEM;
679 device->name = name;
680 ptr = (unsigned long)btrfs_device_name(dev_item);
681 read_extent_buffer(leaf, name, ptr, device->name_len);
682 name[device->name_len] = '\0';
683 return 0;
684}
685
686static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key,
687 struct extent_buffer *leaf,
688 struct btrfs_dev_item *dev_item)
689{
690 struct btrfs_device *device;
691 u64 devid;
692 int ret;
693
694 devid = btrfs_device_id(leaf, dev_item);
695 if (btrfs_find_device(root, devid))
696 return 0;
697
698 device = kmalloc(sizeof(*device), GFP_NOFS);
699 if (!device)
700 return -ENOMEM;
701
702 fill_device_from_item(leaf, dev_item, device);
703 device->dev_root = root->fs_info->dev_root;
704 device->bdev = root->fs_info->sb->s_bdev;
705 list_add(&device->dev_list, &root->fs_info->devices);
706 memcpy(&device->dev_key, key, sizeof(*key));
707 ret = 0;
708#if 0
709 ret = btrfs_open_device(device);
710 if (ret) {
711 kfree(device);
712 }
713#endif
714 return ret;
715}
716
717int btrfs_read_sys_array(struct btrfs_root *root)
718{
719 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
720 struct extent_buffer *sb = root->fs_info->sb_buffer;
721 struct btrfs_disk_key *disk_key;
722 struct btrfs_dev_item *dev_item;
723 struct btrfs_chunk *chunk;
724 struct btrfs_key key;
725 u32 num_stripes;
726 u32 array_size;
727 u32 len = 0;
728 u8 *ptr;
729 unsigned long sb_ptr;
730 u32 cur;
731 int ret;
732 int dev_only = 1;
733
734 array_size = btrfs_super_sys_array_size(super_copy);
735
736 /*
737 * we do this loop twice, once for the device items and
738 * once for all of the chunks. This way there are device
739 * structs filled in for every chunk
740 */
741again:
742 ptr = super_copy->sys_chunk_array;
743 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
744 cur = 0;
745
746 while (cur < array_size) {
747 disk_key = (struct btrfs_disk_key *)ptr;
748 btrfs_disk_key_to_cpu(&key, disk_key);
749
750 len = sizeof(*disk_key);
751 ptr += len;
752 sb_ptr += len;
753 cur += len;
754
755 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID &&
756 key.type == BTRFS_DEV_ITEM_KEY) {
757 dev_item = (struct btrfs_dev_item *)sb_ptr;
758 if (dev_only) {
759 ret = read_one_dev(root, &key, sb, dev_item);
760 BUG_ON(ret);
761 }
762 len = sizeof(*dev_item);
763 len += btrfs_device_name_len(sb, dev_item);
764 } else if (key.type == BTRFS_CHUNK_ITEM_KEY) {
765
766 chunk = (struct btrfs_chunk *)sb_ptr;
767 if (!dev_only) {
768 ret = read_one_chunk(root, &key, sb, chunk);
769 BUG_ON(ret);
770 }
771 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
772 len = btrfs_chunk_item_size(num_stripes);
773 } else {
774 BUG();
775 }
776 ptr += len;
777 sb_ptr += len;
778 cur += len;
779 }
780 if (dev_only == 1) {
781 dev_only = 0;
782 goto again;
783 }
784 return 0;
785}
786
787int btrfs_read_chunk_tree(struct btrfs_root *root)
788{
789 struct btrfs_path *path;
790 struct extent_buffer *leaf;
791 struct btrfs_key key;
792 struct btrfs_key found_key;
793 int ret;
794 int slot;
795
796 root = root->fs_info->chunk_root;
797
798 path = btrfs_alloc_path();
799 if (!path)
800 return -ENOMEM;
801
802 /* first we search for all of the device items, and then we
803 * read in all of the chunk items. This way we can create chunk
804 * mappings that reference all of the devices that are afound
805 */
806 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
807 key.offset = 0;
808 key.type = 0;
809again:
810 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
811 while(1) {
812 leaf = path->nodes[0];
813 slot = path->slots[0];
814 if (slot >= btrfs_header_nritems(leaf)) {
815 ret = btrfs_next_leaf(root, path);
816 if (ret == 0)
817 continue;
818 if (ret < 0)
819 goto error;
820 break;
821 }
822 btrfs_item_key_to_cpu(leaf, &found_key, slot);
823 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
824 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
825 break;
826 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
827 struct btrfs_dev_item *dev_item;
828 dev_item = btrfs_item_ptr(leaf, slot,
829 struct btrfs_dev_item);
830 ret = read_one_dev(root, &found_key, leaf,
831 dev_item);
832 BUG_ON(ret);
833 }
834 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
835 struct btrfs_chunk *chunk;
836 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
837 ret = read_one_chunk(root, &found_key, leaf, chunk);
838 }
839 path->slots[0]++;
840 }
841 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
842 key.objectid = 0;
843 btrfs_release_path(root, path);
844 goto again;
845 }
846
847 btrfs_free_path(path);
848 ret = 0;
849error:
850 return ret;
851}
852
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
new file mode 100644
index 000000000000..4a47dcb00a73
--- /dev/null
+++ b/fs/btrfs/volumes.h
@@ -0,0 +1,78 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_VOLUMES_
20#define __BTRFS_VOLUMES_
21struct btrfs_device {
22 struct list_head dev_list;
23 struct btrfs_root *dev_root;
24 struct btrfs_key dev_key;
25
26 struct block_device *bdev;
27
28 /* the internal btrfs device id */
29 u64 devid;
30
31 /* size of the device */
32 u64 total_bytes;
33
34 /* bytes used */
35 u64 bytes_used;
36
37 /* optimal io alignment for this device */
38 u32 io_align;
39
40 /* optimal io width for this device */
41 u32 io_width;
42
43 /* minimal io size for this device */
44 u32 sector_size;
45
46 /* the kernel device number */
47 u64 rdev;
48
49 /* type and info about this device */
50 u64 type;
51
52 /* partition number, 0 for whole dev */
53 int partition;
54
55 /* length of the name data at the end of the item */
56 int name_len;
57
58 /* physical drive uuid (or lvm uuid) */
59 u8 uuid[BTRFS_DEV_UUID_SIZE];
60
61 char *name;
62};
63
64int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
65 struct btrfs_device *device,
66 u64 owner, u64 num_bytes, u64 *start);
67int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
68 u64 logical, u64 *phys, u64 *length,
69 struct btrfs_device **dev);
70int btrfs_read_sys_array(struct btrfs_root *root);
71int btrfs_read_chunk_tree(struct btrfs_root *root);
72int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
73 struct btrfs_root *extent_root, u64 *start,
74 u64 *num_bytes, u32 type);
75void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
76void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
77int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio);
78#endif