diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-09-29 15:18:18 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-29 15:18:18 -0400 |
commit | d352ac68148b69937d39ca5d48bcc4478e118dbf (patch) | |
tree | 7951dd7311999d9e77766acdc7f8e93de97874d8 /fs/btrfs | |
parent | 9a5e1ea1e1e539e244a54afffc330fc368376ab9 (diff) |
Btrfs: add and improve comments
This improves the comments at the top of many functions. It didn't
dive into the guts of functions because I was trying to
avoid merging problems with the new allocator and back reference work.
extent-tree.c and volumes.c were both skipped, and there is definitely
more work todo in cleaning and commenting the code.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
-rw-r--r-- | fs/btrfs/TODO | 20 | ||||
-rw-r--r-- | fs/btrfs/async-thread.c | 10 | ||||
-rw-r--r-- | fs/btrfs/async-thread.h | 7 | ||||
-rw-r--r-- | fs/btrfs/bit-radix.c | 130 | ||||
-rw-r--r-- | fs/btrfs/bit-radix.h | 33 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 54 | ||||
-rw-r--r-- | fs/btrfs/crc32c.h | 18 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 127 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 1 | ||||
-rw-r--r-- | fs/btrfs/dir-item.c | 41 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 33 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 34 | ||||
-rw-r--r-- | fs/btrfs/extent_map.c | 10 | ||||
-rw-r--r-- | fs/btrfs/file.c | 44 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 189 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 13 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 19 | ||||
-rw-r--r-- | fs/btrfs/ref-cache.c | 26 | ||||
-rw-r--r-- | fs/btrfs/ref-cache.h | 3 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 21 | ||||
-rw-r--r-- | fs/btrfs/struct-funcs.c | 21 | ||||
-rw-r--r-- | fs/btrfs/super.c | 3 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 67 | ||||
-rw-r--r-- | fs/btrfs/tree-defrag.c | 4 |
25 files changed, 653 insertions, 277 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d5c28557fba9..48b7909ca8d1 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) | |||
4 | obj-m := btrfs.o | 4 | obj-m := btrfs.o |
5 | btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | 5 | btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ |
6 | file-item.o inode-item.o inode-map.o disk-io.o \ | 6 | file-item.o inode-item.o inode-map.o disk-io.o \ |
7 | transaction.o bit-radix.o inode.o file.o tree-defrag.o \ | 7 | transaction.o inode.o file.o tree-defrag.o \ |
8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o | 10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o |
diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO deleted file mode 100644 index d9b6d38c603a..000000000000 --- a/fs/btrfs/TODO +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | * cleanup, add more error checking, get rid of BUG_ONs | ||
2 | * Fix ENOSPC handling | ||
3 | * Make allocator smarter | ||
4 | * add a block group to struct inode | ||
5 | * Do actual block accounting | ||
6 | * Check compat and incompat flags on the inode | ||
7 | * Get rid of struct ctree_path, limiting tree levels held at one time | ||
8 | * Add generation number to key pointer in nodes | ||
9 | * Add generation number to inode | ||
10 | * forbid cross subvolume renames and hardlinks | ||
11 | * Release | ||
12 | * Do real tree locking | ||
13 | * Add extent mirroring (backup copies of blocks) | ||
14 | * Add fancy interface to get access to incremental backups | ||
15 | * Add fancy striped extents to make big reads faster | ||
16 | * Use relocation to try and fix write errors | ||
17 | * Make allocator much smarter | ||
18 | * xattrs (directory streams for regular files) | ||
19 | * Scrub & defrag | ||
20 | |||
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 4e780b279de6..04fb9702d14c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -231,17 +231,25 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |||
231 | 231 | ||
232 | /* | 232 | /* |
233 | * if we pick a busy task, move the task to the end of the list. | 233 | * if we pick a busy task, move the task to the end of the list. |
234 | * hopefully this will keep things somewhat evenly balanced | 234 | * hopefully this will keep things somewhat evenly balanced. |
235 | * Do the move in batches based on the sequence number. This groups | ||
236 | * requests submitted at roughly the same time onto the same worker. | ||
235 | */ | 237 | */ |
236 | next = workers->worker_list.next; | 238 | next = workers->worker_list.next; |
237 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | 239 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); |
238 | atomic_inc(&worker->num_pending); | 240 | atomic_inc(&worker->num_pending); |
239 | worker->sequence++; | 241 | worker->sequence++; |
242 | |||
240 | if (worker->sequence % workers->idle_thresh == 0) | 243 | if (worker->sequence % workers->idle_thresh == 0) |
241 | list_move_tail(next, &workers->worker_list); | 244 | list_move_tail(next, &workers->worker_list); |
242 | return worker; | 245 | return worker; |
243 | } | 246 | } |
244 | 247 | ||
248 | /* | ||
249 | * selects a worker thread to take the next job. This will either find | ||
250 | * an idle worker, start a new worker up to the max count, or just return | ||
251 | * one of the existing busy workers. | ||
252 | */ | ||
245 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | 253 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) |
246 | { | 254 | { |
247 | struct btrfs_worker_thread *worker; | 255 | struct btrfs_worker_thread *worker; |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 43e44d115dd1..4ec9a2ee0f9d 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -63,14 +63,17 @@ struct btrfs_workers { | |||
63 | /* once a worker has this many requests or fewer, it is idle */ | 63 | /* once a worker has this many requests or fewer, it is idle */ |
64 | int idle_thresh; | 64 | int idle_thresh; |
65 | 65 | ||
66 | /* list with all the work threads */ | 66 | /* list with all the work threads. The workers on the idle thread |
67 | * may be actively servicing jobs, but they haven't yet hit the | ||
68 | * idle thresh limit above. | ||
69 | */ | ||
67 | struct list_head worker_list; | 70 | struct list_head worker_list; |
68 | struct list_head idle_list; | 71 | struct list_head idle_list; |
69 | 72 | ||
70 | /* lock for finding the next worker thread to queue on */ | 73 | /* lock for finding the next worker thread to queue on */ |
71 | spinlock_t lock; | 74 | spinlock_t lock; |
72 | 75 | ||
73 | /* extra name for this worker */ | 76 | /* extra name for this worker, used for current->name */ |
74 | char *name; | 77 | char *name; |
75 | }; | 78 | }; |
76 | 79 | ||
diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c deleted file mode 100644 index e8bf876db393..000000000000 --- a/fs/btrfs/bit-radix.c +++ /dev/null | |||
@@ -1,130 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include "bit-radix.h" | ||
20 | |||
21 | #define BIT_ARRAY_BYTES 256 | ||
22 | #define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) | ||
23 | |||
24 | extern struct kmem_cache *btrfs_bit_radix_cachep; | ||
25 | int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) | ||
26 | { | ||
27 | unsigned long *bits; | ||
28 | unsigned long slot; | ||
29 | int bit_slot; | ||
30 | int ret; | ||
31 | |||
32 | slot = bit / BIT_RADIX_BITS_PER_ARRAY; | ||
33 | bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; | ||
34 | |||
35 | bits = radix_tree_lookup(radix, slot); | ||
36 | if (!bits) { | ||
37 | bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS); | ||
38 | if (!bits) | ||
39 | return -ENOMEM; | ||
40 | memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); | ||
41 | bits[0] = slot; | ||
42 | ret = radix_tree_insert(radix, slot, bits); | ||
43 | if (ret) | ||
44 | return ret; | ||
45 | } | ||
46 | ret = test_and_set_bit(bit_slot, bits + 1); | ||
47 | if (ret < 0) | ||
48 | ret = 1; | ||
49 | return ret; | ||
50 | } | ||
51 | |||
52 | int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) | ||
53 | { | ||
54 | unsigned long *bits; | ||
55 | unsigned long slot; | ||
56 | int bit_slot; | ||
57 | |||
58 | slot = bit / BIT_RADIX_BITS_PER_ARRAY; | ||
59 | bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; | ||
60 | |||
61 | bits = radix_tree_lookup(radix, slot); | ||
62 | if (!bits) | ||
63 | return 0; | ||
64 | return test_bit(bit_slot, bits + 1); | ||
65 | } | ||
66 | |||
67 | int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) | ||
68 | { | ||
69 | unsigned long *bits; | ||
70 | unsigned long slot; | ||
71 | int bit_slot; | ||
72 | int i; | ||
73 | int empty = 1; | ||
74 | |||
75 | slot = bit / BIT_RADIX_BITS_PER_ARRAY; | ||
76 | bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; | ||
77 | |||
78 | bits = radix_tree_lookup(radix, slot); | ||
79 | if (!bits) | ||
80 | return 0; | ||
81 | clear_bit(bit_slot, bits + 1); | ||
82 | for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { | ||
83 | if (bits[i]) { | ||
84 | empty = 0; | ||
85 | break; | ||
86 | } | ||
87 | } | ||
88 | if (empty) { | ||
89 | bits = radix_tree_delete(radix, slot); | ||
90 | BUG_ON(!bits); | ||
91 | kmem_cache_free(btrfs_bit_radix_cachep, bits); | ||
92 | } | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, | ||
97 | unsigned long start, int nr) | ||
98 | { | ||
99 | unsigned long *bits; | ||
100 | unsigned long *gang[4]; | ||
101 | int found; | ||
102 | int ret; | ||
103 | int i; | ||
104 | int total_found = 0; | ||
105 | unsigned long slot; | ||
106 | |||
107 | slot = start / BIT_RADIX_BITS_PER_ARRAY; | ||
108 | ret = radix_tree_gang_lookup(radix, (void **)gang, slot, | ||
109 | ARRAY_SIZE(gang)); | ||
110 | found = start % BIT_RADIX_BITS_PER_ARRAY; | ||
111 | for (i = 0; i < ret && nr > 0; i++) { | ||
112 | bits = gang[i]; | ||
113 | while(nr > 0) { | ||
114 | found = find_next_bit(bits + 1, | ||
115 | BIT_RADIX_BITS_PER_ARRAY, | ||
116 | found); | ||
117 | if (found < BIT_RADIX_BITS_PER_ARRAY) { | ||
118 | *retbits = bits[0] * | ||
119 | BIT_RADIX_BITS_PER_ARRAY + found; | ||
120 | retbits++; | ||
121 | nr--; | ||
122 | total_found++; | ||
123 | found++; | ||
124 | } else | ||
125 | break; | ||
126 | } | ||
127 | found = 0; | ||
128 | } | ||
129 | return total_found; | ||
130 | } | ||
diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h deleted file mode 100644 index c100f54d5c32..000000000000 --- a/fs/btrfs/bit-radix.h +++ /dev/null | |||
@@ -1,33 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BIT_RADIX__ | ||
20 | #define __BIT_RADIX__ | ||
21 | #include <linux/radix-tree.h> | ||
22 | |||
23 | int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); | ||
24 | int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); | ||
25 | int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); | ||
26 | int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, | ||
27 | unsigned long start, int nr); | ||
28 | |||
29 | static inline void init_bit_radix(struct radix_tree_root *radix) | ||
30 | { | ||
31 | INIT_RADIX_TREE(radix, GFP_NOFS); | ||
32 | } | ||
33 | #endif | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0577fda2168a..0b2e623cf421 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -25,27 +25,58 @@ | |||
25 | 25 | ||
26 | /* in memory btrfs inode */ | 26 | /* in memory btrfs inode */ |
27 | struct btrfs_inode { | 27 | struct btrfs_inode { |
28 | /* which subvolume this inode belongs to */ | ||
28 | struct btrfs_root *root; | 29 | struct btrfs_root *root; |
30 | |||
31 | /* the block group preferred for allocations. This pointer is buggy | ||
32 | * and needs to be replaced with a bytenr instead | ||
33 | */ | ||
29 | struct btrfs_block_group_cache *block_group; | 34 | struct btrfs_block_group_cache *block_group; |
35 | |||
36 | /* key used to find this inode on disk. This is used by the code | ||
37 | * to read in roots of subvolumes | ||
38 | */ | ||
30 | struct btrfs_key location; | 39 | struct btrfs_key location; |
40 | |||
41 | /* the extent_tree has caches of all the extent mappings to disk */ | ||
31 | struct extent_map_tree extent_tree; | 42 | struct extent_map_tree extent_tree; |
43 | |||
44 | /* the io_tree does range state (DIRTY, LOCKED etc) */ | ||
32 | struct extent_io_tree io_tree; | 45 | struct extent_io_tree io_tree; |
46 | |||
47 | /* special utility tree used to record which mirrors have already been | ||
48 | * tried when checksums fail for a given block | ||
49 | */ | ||
33 | struct extent_io_tree io_failure_tree; | 50 | struct extent_io_tree io_failure_tree; |
51 | |||
52 | /* held while inserting checksums to avoid races */ | ||
34 | struct mutex csum_mutex; | 53 | struct mutex csum_mutex; |
54 | |||
55 | /* held while inesrting or deleting extents from files */ | ||
35 | struct mutex extent_mutex; | 56 | struct mutex extent_mutex; |
57 | |||
58 | /* held while logging the inode in tree-log.c */ | ||
36 | struct mutex log_mutex; | 59 | struct mutex log_mutex; |
37 | struct inode vfs_inode; | 60 | |
61 | /* used to order data wrt metadata */ | ||
38 | struct btrfs_ordered_inode_tree ordered_tree; | 62 | struct btrfs_ordered_inode_tree ordered_tree; |
39 | 63 | ||
64 | /* standard acl pointers */ | ||
40 | struct posix_acl *i_acl; | 65 | struct posix_acl *i_acl; |
41 | struct posix_acl *i_default_acl; | 66 | struct posix_acl *i_default_acl; |
42 | 67 | ||
43 | /* for keeping track of orphaned inodes */ | 68 | /* for keeping track of orphaned inodes */ |
44 | struct list_head i_orphan; | 69 | struct list_head i_orphan; |
45 | 70 | ||
71 | /* list of all the delalloc inodes in the FS. There are times we need | ||
72 | * to write all the delalloc pages to disk, and this list is used | ||
73 | * to walk them all. | ||
74 | */ | ||
46 | struct list_head delalloc_inodes; | 75 | struct list_head delalloc_inodes; |
47 | 76 | ||
48 | /* full 64 bit generation number */ | 77 | /* full 64 bit generation number, struct vfs_inode doesn't have a big |
78 | * enough field for this. | ||
79 | */ | ||
49 | u64 generation; | 80 | u64 generation; |
50 | 81 | ||
51 | /* | 82 | /* |
@@ -57,10 +88,25 @@ struct btrfs_inode { | |||
57 | */ | 88 | */ |
58 | u64 logged_trans; | 89 | u64 logged_trans; |
59 | 90 | ||
60 | /* trans that last made a change that should be fully fsync'd */ | 91 | /* |
92 | * trans that last made a change that should be fully fsync'd. This | ||
93 | * gets reset to zero each time the inode is logged | ||
94 | */ | ||
61 | u64 log_dirty_trans; | 95 | u64 log_dirty_trans; |
96 | |||
97 | /* total number of bytes pending delalloc, used by stat to calc the | ||
98 | * real block usage of the file | ||
99 | */ | ||
62 | u64 delalloc_bytes; | 100 | u64 delalloc_bytes; |
101 | |||
102 | /* | ||
103 | * the size of the file stored in the metadata on disk. data=ordered | ||
104 | * means the in-memory i_size might be larger than the size on disk | ||
105 | * because not all the blocks are written yet. | ||
106 | */ | ||
63 | u64 disk_i_size; | 107 | u64 disk_i_size; |
108 | |||
109 | /* flags field from the on disk inode */ | ||
64 | u32 flags; | 110 | u32 flags; |
65 | 111 | ||
66 | /* | 112 | /* |
@@ -68,6 +114,8 @@ struct btrfs_inode { | |||
68 | * number for new files that are created | 114 | * number for new files that are created |
69 | */ | 115 | */ |
70 | u64 index_cnt; | 116 | u64 index_cnt; |
117 | |||
118 | struct inode vfs_inode; | ||
71 | }; | 119 | }; |
72 | 120 | ||
73 | static inline struct btrfs_inode *BTRFS_I(struct inode *inode) | 121 | static inline struct btrfs_inode *BTRFS_I(struct inode *inode) |
diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h index 4f0fefed132a..1eaf11d334fd 100644 --- a/fs/btrfs/crc32c.h +++ b/fs/btrfs/crc32c.h | |||
@@ -1,3 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
1 | #ifndef __BTRFS_CRC32C__ | 19 | #ifndef __BTRFS_CRC32C__ |
2 | #define __BTRFS_CRC32C__ | 20 | #define __BTRFS_CRC32C__ |
3 | #include <asm/byteorder.h> | 21 | #include <asm/byteorder.h> |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50e81f43e6d4..ff3261ff2e19 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007,2008 Oracle. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public | 5 | * modify it under the terms of the GNU General Public |
@@ -54,12 +54,19 @@ struct btrfs_path *btrfs_alloc_path(void) | |||
54 | return path; | 54 | return path; |
55 | } | 55 | } |
56 | 56 | ||
57 | /* this also releases the path */ | ||
57 | void btrfs_free_path(struct btrfs_path *p) | 58 | void btrfs_free_path(struct btrfs_path *p) |
58 | { | 59 | { |
59 | btrfs_release_path(NULL, p); | 60 | btrfs_release_path(NULL, p); |
60 | kmem_cache_free(btrfs_path_cachep, p); | 61 | kmem_cache_free(btrfs_path_cachep, p); |
61 | } | 62 | } |
62 | 63 | ||
64 | /* | ||
65 | * path release drops references on the extent buffers in the path | ||
66 | * and it drops any locks held by this path | ||
67 | * | ||
68 | * It is safe to call this on paths that no locks or extent buffers held. | ||
69 | */ | ||
63 | void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | 70 | void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) |
64 | { | 71 | { |
65 | int i; | 72 | int i; |
@@ -77,6 +84,16 @@ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | |||
77 | } | 84 | } |
78 | } | 85 | } |
79 | 86 | ||
87 | /* | ||
88 | * safely gets a reference on the root node of a tree. A lock | ||
89 | * is not taken, so a concurrent writer may put a different node | ||
90 | * at the root of the tree. See btrfs_lock_root_node for the | ||
91 | * looping required. | ||
92 | * | ||
93 | * The extent buffer returned by this has a reference taken, so | ||
94 | * it won't disappear. It may stop being the root of the tree | ||
95 | * at any time because there are no locks held. | ||
96 | */ | ||
80 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | 97 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) |
81 | { | 98 | { |
82 | struct extent_buffer *eb; | 99 | struct extent_buffer *eb; |
@@ -87,6 +104,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | |||
87 | return eb; | 104 | return eb; |
88 | } | 105 | } |
89 | 106 | ||
107 | /* loop around taking references on and locking the root node of the | ||
108 | * tree until you end up with a lock on the root. A locked buffer | ||
109 | * is returned, with a reference held. | ||
110 | */ | ||
90 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | 111 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) |
91 | { | 112 | { |
92 | struct extent_buffer *eb; | 113 | struct extent_buffer *eb; |
@@ -108,6 +129,10 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
108 | return eb; | 129 | return eb; |
109 | } | 130 | } |
110 | 131 | ||
132 | /* cowonly root (everything not a reference counted cow subvolume), just get | ||
133 | * put onto a simple dirty list. transaction.c walks this to make sure they | ||
134 | * get properly updated on disk. | ||
135 | */ | ||
111 | static void add_root_to_dirty_list(struct btrfs_root *root) | 136 | static void add_root_to_dirty_list(struct btrfs_root *root) |
112 | { | 137 | { |
113 | if (root->track_dirty && list_empty(&root->dirty_list)) { | 138 | if (root->track_dirty && list_empty(&root->dirty_list)) { |
@@ -116,6 +141,11 @@ static void add_root_to_dirty_list(struct btrfs_root *root) | |||
116 | } | 141 | } |
117 | } | 142 | } |
118 | 143 | ||
144 | /* | ||
145 | * used by snapshot creation to make a copy of a root for a tree with | ||
146 | * a given objectid. The buffer with the new root node is returned in | ||
147 | * cow_ret, and this func returns zero on success or a negative error code. | ||
148 | */ | ||
119 | int btrfs_copy_root(struct btrfs_trans_handle *trans, | 149 | int btrfs_copy_root(struct btrfs_trans_handle *trans, |
120 | struct btrfs_root *root, | 150 | struct btrfs_root *root, |
121 | struct extent_buffer *buf, | 151 | struct extent_buffer *buf, |
@@ -167,6 +197,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
167 | return 0; | 197 | return 0; |
168 | } | 198 | } |
169 | 199 | ||
200 | /* | ||
201 | * does the dirty work in cow of a single block. The parent block | ||
202 | * (if supplied) is updated to point to the new cow copy. The new | ||
203 | * buffer is marked dirty and returned locked. If you modify the block | ||
204 | * it needs to be marked dirty again. | ||
205 | * | ||
206 | * search_start -- an allocation hint for the new block | ||
207 | * | ||
208 | * empty_size -- a hint that you plan on doing more cow. This is the size in bytes | ||
209 | * the allocator should try to find free next to the block it returns. This is | ||
210 | * just a hint and may be ignored by the allocator. | ||
211 | * | ||
212 | * prealloc_dest -- if you have already reserved a destination for the cow, | ||
213 | * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent | ||
214 | * is used to finish the allocation. | ||
215 | */ | ||
170 | int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | 216 | int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, |
171 | struct btrfs_root *root, | 217 | struct btrfs_root *root, |
172 | struct extent_buffer *buf, | 218 | struct extent_buffer *buf, |
@@ -311,6 +357,11 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
311 | return 0; | 357 | return 0; |
312 | } | 358 | } |
313 | 359 | ||
360 | /* | ||
361 | * cows a single block, see __btrfs_cow_block for the real work. | ||
362 | * This version of it has extra checks so that a block isn't cow'd more than | ||
363 | * once per transaction, as long as it hasn't been written yet | ||
364 | */ | ||
314 | int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, | 365 | int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, |
315 | struct btrfs_root *root, struct extent_buffer *buf, | 366 | struct btrfs_root *root, struct extent_buffer *buf, |
316 | struct extent_buffer *parent, int parent_slot, | 367 | struct extent_buffer *parent, int parent_slot, |
@@ -347,6 +398,10 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
347 | return ret; | 398 | return ret; |
348 | } | 399 | } |
349 | 400 | ||
401 | /* | ||
402 | * helper function for defrag to decide if two blocks pointed to by a | ||
403 | * node are actually close by | ||
404 | */ | ||
350 | static int close_blocks(u64 blocknr, u64 other, u32 blocksize) | 405 | static int close_blocks(u64 blocknr, u64 other, u32 blocksize) |
351 | { | 406 | { |
352 | if (blocknr < other && other - (blocknr + blocksize) < 32768) | 407 | if (blocknr < other && other - (blocknr + blocksize) < 32768) |
@@ -381,6 +436,11 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) | |||
381 | } | 436 | } |
382 | 437 | ||
383 | 438 | ||
439 | /* | ||
440 | * this is used by the defrag code to go through all the | ||
441 | * leaves pointed to by a node and reallocate them so that | ||
442 | * disk order is close to key order | ||
443 | */ | ||
384 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 444 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
385 | struct btrfs_root *root, struct extent_buffer *parent, | 445 | struct btrfs_root *root, struct extent_buffer *parent, |
386 | int start_slot, int cache_only, u64 *last_ret, | 446 | int start_slot, int cache_only, u64 *last_ret, |
@@ -521,6 +581,10 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, | |||
521 | return btrfs_item_offset_nr(leaf, nr - 1); | 581 | return btrfs_item_offset_nr(leaf, nr - 1); |
522 | } | 582 | } |
523 | 583 | ||
584 | /* | ||
585 | * extra debugging checks to make sure all the items in a key are | ||
586 | * well formed and in the proper order | ||
587 | */ | ||
524 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, | 588 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, |
525 | int level) | 589 | int level) |
526 | { | 590 | { |
@@ -561,6 +625,10 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, | |||
561 | return 0; | 625 | return 0; |
562 | } | 626 | } |
563 | 627 | ||
628 | /* | ||
629 | * extra checking to make sure all the items in a leaf are | ||
630 | * well formed and in the proper order | ||
631 | */ | ||
564 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, | 632 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, |
565 | int level) | 633 | int level) |
566 | { | 634 | { |
@@ -782,6 +850,10 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
782 | return -1; | 850 | return -1; |
783 | } | 851 | } |
784 | 852 | ||
853 | /* given a node and slot number, this reads the blocks it points to. The | ||
854 | * extent buffer is returned with a reference taken (but unlocked). | ||
855 | * NULL is returned on error. | ||
856 | */ | ||
785 | static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, | 857 | static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, |
786 | struct extent_buffer *parent, int slot) | 858 | struct extent_buffer *parent, int slot) |
787 | { | 859 | { |
@@ -798,6 +870,11 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, | |||
798 | btrfs_node_ptr_generation(parent, slot)); | 870 | btrfs_node_ptr_generation(parent, slot)); |
799 | } | 871 | } |
800 | 872 | ||
873 | /* | ||
874 | * node level balancing, used to make sure nodes are in proper order for | ||
875 | * item deletion. We balance from the top down, so we have to make sure | ||
876 | * that a deletion won't leave an node completely empty later on. | ||
877 | */ | ||
801 | static noinline int balance_level(struct btrfs_trans_handle *trans, | 878 | static noinline int balance_level(struct btrfs_trans_handle *trans, |
802 | struct btrfs_root *root, | 879 | struct btrfs_root *root, |
803 | struct btrfs_path *path, int level) | 880 | struct btrfs_path *path, int level) |
@@ -1024,7 +1101,10 @@ enospc: | |||
1024 | return ret; | 1101 | return ret; |
1025 | } | 1102 | } |
1026 | 1103 | ||
1027 | /* returns zero if the push worked, non-zero otherwise */ | 1104 | /* Node balancing for insertion. Here we only split or push nodes around |
1105 | * when they are completely full. This is also done top down, so we | ||
1106 | * have to be pessimistic. | ||
1107 | */ | ||
1028 | static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, | 1108 | static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, |
1029 | struct btrfs_root *root, | 1109 | struct btrfs_root *root, |
1030 | struct btrfs_path *path, int level) | 1110 | struct btrfs_path *path, int level) |
@@ -1150,7 +1230,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1150 | } | 1230 | } |
1151 | 1231 | ||
1152 | /* | 1232 | /* |
1153 | * readahead one full node of leaves | 1233 | * readahead one full node of leaves, finding things that are close |
1234 | * to the block in 'slot', and triggering ra on them. | ||
1154 | */ | 1235 | */ |
1155 | static noinline void reada_for_search(struct btrfs_root *root, | 1236 | static noinline void reada_for_search(struct btrfs_root *root, |
1156 | struct btrfs_path *path, | 1237 | struct btrfs_path *path, |
@@ -1226,6 +1307,19 @@ static noinline void reada_for_search(struct btrfs_root *root, | |||
1226 | } | 1307 | } |
1227 | } | 1308 | } |
1228 | 1309 | ||
1310 | /* | ||
1311 | * when we walk down the tree, it is usually safe to unlock the higher layers in | ||
1312 | * the tree. The exceptions are when our path goes through slot 0, because operations | ||
1313 | * on the tree might require changing key pointers higher up in the tree. | ||
1314 | * | ||
1315 | * callers might also have set path->keep_locks, which tells this code to | ||
1316 | * keep the lock if the path points to the last slot in the block. This is | ||
1317 | * part of walking through the tree, and selecting the next slot in the higher | ||
1318 | * block. | ||
1319 | * | ||
1320 | * lowest_unlock sets the lowest level in the tree we're allowed to unlock. | ||
1321 | * so if lowest_unlock is 1, level 0 won't be unlocked | ||
1322 | */ | ||
1229 | static noinline void unlock_up(struct btrfs_path *path, int level, | 1323 | static noinline void unlock_up(struct btrfs_path *path, int level, |
1230 | int lowest_unlock) | 1324 | int lowest_unlock) |
1231 | { | 1325 | { |
@@ -2705,6 +2799,12 @@ again: | |||
2705 | return ret; | 2799 | return ret; |
2706 | } | 2800 | } |
2707 | 2801 | ||
2802 | /* | ||
2803 | * make the item pointed to by the path smaller. new_size indicates | ||
2804 | * how small to make it, and from_end tells us if we just chop bytes | ||
2805 | * off the end of the item or if we shift the item to chop bytes off | ||
2806 | * the front. | ||
2807 | */ | ||
2708 | int btrfs_truncate_item(struct btrfs_trans_handle *trans, | 2808 | int btrfs_truncate_item(struct btrfs_trans_handle *trans, |
2709 | struct btrfs_root *root, | 2809 | struct btrfs_root *root, |
2710 | struct btrfs_path *path, | 2810 | struct btrfs_path *path, |
@@ -2818,6 +2918,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
2818 | return ret; | 2918 | return ret; |
2819 | } | 2919 | } |
2820 | 2920 | ||
2921 | /* | ||
2922 | * make the item pointed to by the path bigger, data_size is the new size. | ||
2923 | */ | ||
2821 | int btrfs_extend_item(struct btrfs_trans_handle *trans, | 2924 | int btrfs_extend_item(struct btrfs_trans_handle *trans, |
2822 | struct btrfs_root *root, struct btrfs_path *path, | 2925 | struct btrfs_root *root, struct btrfs_path *path, |
2823 | u32 data_size) | 2926 | u32 data_size) |
@@ -2897,7 +3000,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
2897 | } | 3000 | } |
2898 | 3001 | ||
2899 | /* | 3002 | /* |
2900 | * Given a key and some data, insert an item into the tree. | 3003 | * Given a key and some data, insert items into the tree. |
2901 | * This does all the path init required, making room in the tree if needed. | 3004 | * This does all the path init required, making room in the tree if needed. |
2902 | */ | 3005 | */ |
2903 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | 3006 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, |
@@ -3046,9 +3149,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3046 | /* | 3149 | /* |
3047 | * delete the pointer from a given node. | 3150 | * delete the pointer from a given node. |
3048 | * | 3151 | * |
3049 | * If the delete empties a node, the node is removed from the tree, | 3152 | * the tree should have been previously balanced so the deletion does not |
3050 | * continuing all the way the root if required. The root is converted into | 3153 | * empty a node. |
3051 | * a leaf if all the nodes are emptied. | ||
3052 | */ | 3154 | */ |
3053 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3155 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3054 | struct btrfs_path *path, int level, int slot) | 3156 | struct btrfs_path *path, int level, int slot) |
@@ -3233,6 +3335,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3233 | * search the tree again to find a leaf with lesser keys | 3335 | * search the tree again to find a leaf with lesser keys |
3234 | * returns 0 if it found something or 1 if there are no lesser leaves. | 3336 | * returns 0 if it found something or 1 if there are no lesser leaves. |
3235 | * returns < 0 on io errors. | 3337 | * returns < 0 on io errors. |
3338 | * | ||
3339 | * This may release the path, and so you may lose any locks held at the | ||
3340 | * time you call it. | ||
3236 | */ | 3341 | */ |
3237 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | 3342 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) |
3238 | { | 3343 | { |
@@ -3265,9 +3370,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3265 | /* | 3370 | /* |
3266 | * A helper function to walk down the tree starting at min_key, and looking | 3371 | * A helper function to walk down the tree starting at min_key, and looking |
3267 | * for nodes or leaves that are either in cache or have a minimum | 3372 | * for nodes or leaves that are either in cache or have a minimum |
3268 | * transaction id. This is used by the btree defrag code, but could | 3373 | * transaction id. This is used by the btree defrag code, and tree logging |
3269 | * also be used to search for blocks that have changed since a given | ||
3270 | * transaction id. | ||
3271 | * | 3374 | * |
3272 | * This does not cow, but it does stuff the starting key it finds back | 3375 | * This does not cow, but it does stuff the starting key it finds back |
3273 | * into min_key, so you can call btrfs_search_slot with cow=1 on the | 3376 | * into min_key, so you can call btrfs_search_slot with cow=1 on the |
@@ -3279,6 +3382,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3279 | * This honors path->lowest_level to prevent descent past a given level | 3382 | * This honors path->lowest_level to prevent descent past a given level |
3280 | * of the tree. | 3383 | * of the tree. |
3281 | * | 3384 | * |
3385 | * min_trans indicates the oldest transaction that you are interested | ||
3386 | * in walking through. Any nodes or leaves older than min_trans are | ||
3387 | * skipped over (without reading them). | ||
3388 | * | ||
3282 | * returns zero if something useful was found, < 0 on error and 1 if there | 3389 | * returns zero if something useful was found, < 0 on error and 1 if there |
3283 | * was nothing in the tree that matched the search criteria. | 3390 | * was nothing in the tree that matched the search criteria. |
3284 | */ | 3391 | */ |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0079b60b18f3..ded1643c0273 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
29 | #include <asm/kmap_types.h> | 29 | #include <asm/kmap_types.h> |
30 | #include "bit-radix.h" | ||
31 | #include "extent_io.h" | 30 | #include "extent_io.h" |
32 | #include "extent_map.h" | 31 | #include "extent_map.h" |
33 | #include "async-thread.h" | 32 | #include "async-thread.h" |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index e4f30090d640..5040b71f1900 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -21,6 +21,14 @@ | |||
21 | #include "hash.h" | 21 | #include "hash.h" |
22 | #include "transaction.h" | 22 | #include "transaction.h" |
23 | 23 | ||
24 | /* | ||
25 | * insert a name into a directory, doing overflow properly if there is a hash | ||
26 | * collision. data_size indicates how big the item inserted should be. On | ||
27 | * success a struct btrfs_dir_item pointer is returned, otherwise it is | ||
28 | * an ERR_PTR. | ||
29 | * | ||
30 | * The name is not copied into the dir item, you have to do that yourself. | ||
31 | */ | ||
24 | static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle | 32 | static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle |
25 | *trans, | 33 | *trans, |
26 | struct btrfs_root *root, | 34 | struct btrfs_root *root, |
@@ -55,6 +63,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle | |||
55 | return (struct btrfs_dir_item *)ptr; | 63 | return (struct btrfs_dir_item *)ptr; |
56 | } | 64 | } |
57 | 65 | ||
66 | /* | ||
67 | * xattrs work a lot like directories, this inserts an xattr item | ||
68 | * into the tree | ||
69 | */ | ||
58 | int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | 70 | int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, |
59 | struct btrfs_root *root, const char *name, | 71 | struct btrfs_root *root, const char *name, |
60 | u16 name_len, const void *data, u16 data_len, | 72 | u16 name_len, const void *data, u16 data_len, |
@@ -109,6 +121,13 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
109 | return ret; | 121 | return ret; |
110 | } | 122 | } |
111 | 123 | ||
124 | /* | ||
125 | * insert a directory item in the tree, doing all the magic for | ||
126 | * both indexes. 'dir' indicates which objectid to insert it into, | ||
127 | * 'location' is the key to stuff into the directory item, 'type' is the | ||
128 | * type of the inode we're pointing to, and 'index' is the sequence number | ||
129 | * to use for the second index (if one is created). | ||
130 | */ | ||
112 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | 131 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root |
113 | *root, const char *name, int name_len, u64 dir, | 132 | *root, const char *name, int name_len, u64 dir, |
114 | struct btrfs_key *location, u8 type, u64 index) | 133 | struct btrfs_key *location, u8 type, u64 index) |
@@ -184,6 +203,11 @@ out: | |||
184 | return 0; | 203 | return 0; |
185 | } | 204 | } |
186 | 205 | ||
206 | /* | ||
207 | * lookup a directory item based on name. 'dir' is the objectid | ||
208 | * we're searching in, and 'mod' tells us if you plan on deleting the | ||
209 | * item (use mod < 0) or changing the options (use mod > 0) | ||
210 | */ | ||
187 | struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | 211 | struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, |
188 | struct btrfs_root *root, | 212 | struct btrfs_root *root, |
189 | struct btrfs_path *path, u64 dir, | 213 | struct btrfs_path *path, u64 dir, |
@@ -222,6 +246,14 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
222 | return btrfs_match_dir_item_name(root, path, name, name_len); | 246 | return btrfs_match_dir_item_name(root, path, name, name_len); |
223 | } | 247 | } |
224 | 248 | ||
249 | /* | ||
250 | * lookup a directory item based on index. 'dir' is the objectid | ||
251 | * we're searching in, and 'mod' tells us if you plan on deleting the | ||
252 | * item (use mod < 0) or changing the options (use mod > 0) | ||
253 | * | ||
254 | * The name is used to make sure the index really points to the name you were | ||
255 | * looking for. | ||
256 | */ | ||
225 | struct btrfs_dir_item * | 257 | struct btrfs_dir_item * |
226 | btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | 258 | btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, |
227 | struct btrfs_root *root, | 259 | struct btrfs_root *root, |
@@ -282,6 +314,11 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
282 | return btrfs_match_dir_item_name(root, path, name, name_len); | 314 | return btrfs_match_dir_item_name(root, path, name, name_len); |
283 | } | 315 | } |
284 | 316 | ||
317 | /* | ||
318 | * helper function to look at the directory item pointed to by 'path' | ||
319 | * this walks through all the entries in a dir item and finds one | ||
320 | * for a specific name. | ||
321 | */ | ||
285 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | 322 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, |
286 | struct btrfs_path *path, | 323 | struct btrfs_path *path, |
287 | const char *name, int name_len) | 324 | const char *name, int name_len) |
@@ -313,6 +350,10 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | |||
313 | return NULL; | 350 | return NULL; |
314 | } | 351 | } |
315 | 352 | ||
353 | /* | ||
354 | * given a pointer into a directory item, delete it. This | ||
355 | * handles items that have more than one entry in them. | ||
356 | */ | ||
316 | int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | 357 | int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, |
317 | struct btrfs_root *root, | 358 | struct btrfs_root *root, |
318 | struct btrfs_path *path, | 359 | struct btrfs_path *path, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 45b4f7285275..5ee10d3136f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -55,6 +55,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) | |||
55 | static struct extent_io_ops btree_extent_io_ops; | 55 | static struct extent_io_ops btree_extent_io_ops; |
56 | static void end_workqueue_fn(struct btrfs_work *work); | 56 | static void end_workqueue_fn(struct btrfs_work *work); |
57 | 57 | ||
58 | /* | ||
59 | * end_io_wq structs are used to do processing in task context when an IO is | ||
60 | * complete. This is used during reads to verify checksums, and it is used | ||
61 | * by writes to insert metadata for new file extents after IO is complete. | ||
62 | */ | ||
58 | struct end_io_wq { | 63 | struct end_io_wq { |
59 | struct bio *bio; | 64 | struct bio *bio; |
60 | bio_end_io_t *end_io; | 65 | bio_end_io_t *end_io; |
@@ -66,6 +71,11 @@ struct end_io_wq { | |||
66 | struct btrfs_work work; | 71 | struct btrfs_work work; |
67 | }; | 72 | }; |
68 | 73 | ||
74 | /* | ||
75 | * async submit bios are used to offload expensive checksumming | ||
76 | * onto the worker threads. They checksum file and metadata bios | ||
77 | * just before they are sent down the IO stack. | ||
78 | */ | ||
69 | struct async_submit_bio { | 79 | struct async_submit_bio { |
70 | struct inode *inode; | 80 | struct inode *inode; |
71 | struct bio *bio; | 81 | struct bio *bio; |
@@ -76,6 +86,10 @@ struct async_submit_bio { | |||
76 | struct btrfs_work work; | 86 | struct btrfs_work work; |
77 | }; | 87 | }; |
78 | 88 | ||
89 | /* | ||
90 | * extents on the btree inode are pretty simple, there's one extent | ||
91 | * that covers the entire device | ||
92 | */ | ||
79 | struct extent_map *btree_get_extent(struct inode *inode, struct page *page, | 93 | struct extent_map *btree_get_extent(struct inode *inode, struct page *page, |
80 | size_t page_offset, u64 start, u64 len, | 94 | size_t page_offset, u64 start, u64 len, |
81 | int create) | 95 | int create) |
@@ -151,6 +165,10 @@ void btrfs_csum_final(u32 crc, char *result) | |||
151 | *(__le32 *)result = ~cpu_to_le32(crc); | 165 | *(__le32 *)result = ~cpu_to_le32(crc); |
152 | } | 166 | } |
153 | 167 | ||
168 | /* | ||
169 | * compute the csum for a btree block, and either verify it or write it | ||
170 | * into the csum field of the block. | ||
171 | */ | ||
154 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | 172 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, |
155 | int verify) | 173 | int verify) |
156 | { | 174 | { |
@@ -204,6 +222,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
204 | return 0; | 222 | return 0; |
205 | } | 223 | } |
206 | 224 | ||
225 | /* | ||
226 | * we can't consider a given block up to date unless the transid of the | ||
227 | * block matches the transid in the parent node's pointer. This is how we | ||
228 | * detect blocks that either didn't get written at all or got written | ||
229 | * in the wrong place. | ||
230 | */ | ||
207 | static int verify_parent_transid(struct extent_io_tree *io_tree, | 231 | static int verify_parent_transid(struct extent_io_tree *io_tree, |
208 | struct extent_buffer *eb, u64 parent_transid) | 232 | struct extent_buffer *eb, u64 parent_transid) |
209 | { | 233 | { |
@@ -228,9 +252,12 @@ out: | |||
228 | unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, | 252 | unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, |
229 | GFP_NOFS); | 253 | GFP_NOFS); |
230 | return ret; | 254 | return ret; |
231 | |||
232 | } | 255 | } |
233 | 256 | ||
257 | /* | ||
258 | * helper to read a given tree block, doing retries as required when | ||
259 | * the checksums don't match and we have alternate mirrors to try. | ||
260 | */ | ||
234 | static int btree_read_extent_buffer_pages(struct btrfs_root *root, | 261 | static int btree_read_extent_buffer_pages(struct btrfs_root *root, |
235 | struct extent_buffer *eb, | 262 | struct extent_buffer *eb, |
236 | u64 start, u64 parent_transid) | 263 | u64 start, u64 parent_transid) |
@@ -260,6 +287,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror | |||
260 | return -EIO; | 287 | return -EIO; |
261 | } | 288 | } |
262 | 289 | ||
290 | /* | ||
291 | * checksum a dirty tree block before IO. This has extra checks to make | ||
292 | * sure we only fill in the checksum field in the first page of a multi-page block | ||
293 | */ | ||
263 | int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | 294 | int csum_dirty_buffer(struct btrfs_root *root, struct page *page) |
264 | { | 295 | { |
265 | struct extent_io_tree *tree; | 296 | struct extent_io_tree *tree; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8bd1b402f3fd..563b2d12f4f2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -914,6 +914,10 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
914 | } | 914 | } |
915 | EXPORT_SYMBOL(wait_on_extent_writeback); | 915 | EXPORT_SYMBOL(wait_on_extent_writeback); |
916 | 916 | ||
917 | /* | ||
918 | * either insert or lock state struct between start and end use mask to tell | ||
919 | * us if waiting is desired. | ||
920 | */ | ||
917 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | 921 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) |
918 | { | 922 | { |
919 | int err; | 923 | int err; |
@@ -982,6 +986,13 @@ int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
982 | } | 986 | } |
983 | EXPORT_SYMBOL(set_range_writeback); | 987 | EXPORT_SYMBOL(set_range_writeback); |
984 | 988 | ||
989 | /* | ||
990 | * find the first offset in the io tree with 'bits' set. zero is | ||
991 | * returned if we find something, and *start_ret and *end_ret are | ||
992 | * set to reflect the state struct that was found. | ||
993 | * | ||
994 | * If nothing was found, 1 is returned, < 0 on error | ||
995 | */ | ||
985 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 996 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
986 | u64 *start_ret, u64 *end_ret, int bits) | 997 | u64 *start_ret, u64 *end_ret, int bits) |
987 | { | 998 | { |
@@ -1017,6 +1028,10 @@ out: | |||
1017 | } | 1028 | } |
1018 | EXPORT_SYMBOL(find_first_extent_bit); | 1029 | EXPORT_SYMBOL(find_first_extent_bit); |
1019 | 1030 | ||
1031 | /* find the first state struct with 'bits' set after 'start', and | ||
1032 | * return it. tree->lock must be held. NULL will returned if | ||
1033 | * nothing was found after 'start' | ||
1034 | */ | ||
1020 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, | 1035 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, |
1021 | u64 start, int bits) | 1036 | u64 start, int bits) |
1022 | { | 1037 | { |
@@ -1046,8 +1061,14 @@ out: | |||
1046 | } | 1061 | } |
1047 | EXPORT_SYMBOL(find_first_extent_bit_state); | 1062 | EXPORT_SYMBOL(find_first_extent_bit_state); |
1048 | 1063 | ||
1049 | u64 find_lock_delalloc_range(struct extent_io_tree *tree, | 1064 | /* |
1050 | u64 *start, u64 *end, u64 max_bytes) | 1065 | * find a contiguous range of bytes in the file marked as delalloc, not |
1066 | * more than 'max_bytes'. start and end are used to return the range, | ||
1067 | * | ||
1068 | * 1 is returned if we find something, 0 if nothing was in the tree | ||
1069 | */ | ||
1070 | static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, | ||
1071 | u64 *start, u64 *end, u64 max_bytes) | ||
1051 | { | 1072 | { |
1052 | struct rb_node *node; | 1073 | struct rb_node *node; |
1053 | struct extent_state *state; | 1074 | struct extent_state *state; |
@@ -1130,6 +1151,11 @@ out: | |||
1130 | return found; | 1151 | return found; |
1131 | } | 1152 | } |
1132 | 1153 | ||
1154 | /* | ||
1155 | * count the number of bytes in the tree that have a given bit(s) | ||
1156 | * set. This can be fairly slow, except for EXTENT_DIRTY which is | ||
1157 | * cached. The total number found is returned. | ||
1158 | */ | ||
1133 | u64 count_range_bits(struct extent_io_tree *tree, | 1159 | u64 count_range_bits(struct extent_io_tree *tree, |
1134 | u64 *start, u64 search_end, u64 max_bytes, | 1160 | u64 *start, u64 search_end, u64 max_bytes, |
1135 | unsigned long bits) | 1161 | unsigned long bits) |
@@ -1245,6 +1271,10 @@ int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) | |||
1245 | } | 1271 | } |
1246 | EXPORT_SYMBOL(unlock_range); | 1272 | EXPORT_SYMBOL(unlock_range); |
1247 | 1273 | ||
1274 | /* | ||
1275 | * set the private field for a given byte offset in the tree. If there isn't | ||
1276 | * an extent_state there already, this does nothing. | ||
1277 | */ | ||
1248 | int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) | 1278 | int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) |
1249 | { | 1279 | { |
1250 | struct rb_node *node; | 1280 | struct rb_node *node; |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 78ced11d18c7..74b2a29880d3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -114,6 +114,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
114 | return NULL; | 114 | return NULL; |
115 | } | 115 | } |
116 | 116 | ||
117 | /* | ||
118 | * search through the tree for an extent_map with a given offset. If | ||
119 | * it can't be found, try to find some neighboring extents | ||
120 | */ | ||
117 | static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | 121 | static struct rb_node *__tree_search(struct rb_root *root, u64 offset, |
118 | struct rb_node **prev_ret, | 122 | struct rb_node **prev_ret, |
119 | struct rb_node **next_ret) | 123 | struct rb_node **next_ret) |
@@ -160,6 +164,10 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | |||
160 | return NULL; | 164 | return NULL; |
161 | } | 165 | } |
162 | 166 | ||
167 | /* | ||
168 | * look for an offset in the tree, and if it can't be found, return | ||
169 | * the first offset we can find smaller than 'offset'. | ||
170 | */ | ||
163 | static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) | 171 | static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) |
164 | { | 172 | { |
165 | struct rb_node *prev; | 173 | struct rb_node *prev; |
@@ -170,6 +178,7 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) | |||
170 | return ret; | 178 | return ret; |
171 | } | 179 | } |
172 | 180 | ||
181 | /* check to see if two extent_map structs are adjacent and safe to merge */ | ||
173 | static int mergable_maps(struct extent_map *prev, struct extent_map *next) | 182 | static int mergable_maps(struct extent_map *prev, struct extent_map *next) |
174 | { | 183 | { |
175 | if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) | 184 | if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) |
@@ -250,6 +259,7 @@ out: | |||
250 | } | 259 | } |
251 | EXPORT_SYMBOL(add_extent_mapping); | 260 | EXPORT_SYMBOL(add_extent_mapping); |
252 | 261 | ||
262 | /* simple helper to do math around the end of an extent, handling wrap */ | ||
253 | static u64 range_end(u64 start, u64 len) | 263 | static u64 range_end(u64 start, u64 len) |
254 | { | 264 | { |
255 | if (start + len < start) | 265 | if (start + len < start) |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1b7e51a9db0f..3088a1184483 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -41,6 +41,9 @@ | |||
41 | #include "compat.h" | 41 | #include "compat.h" |
42 | 42 | ||
43 | 43 | ||
44 | /* simple helper to fault in pages and copy. This should go away | ||
45 | * and be replaced with calls into generic code. | ||
46 | */ | ||
44 | static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, | 47 | static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, |
45 | int write_bytes, | 48 | int write_bytes, |
46 | struct page **prepared_pages, | 49 | struct page **prepared_pages, |
@@ -72,12 +75,19 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, | |||
72 | return page_fault ? -EFAULT : 0; | 75 | return page_fault ? -EFAULT : 0; |
73 | } | 76 | } |
74 | 77 | ||
78 | /* | ||
79 | * unlocks pages after btrfs_file_write is done with them | ||
80 | */ | ||
75 | static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) | 81 | static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) |
76 | { | 82 | { |
77 | size_t i; | 83 | size_t i; |
78 | for (i = 0; i < num_pages; i++) { | 84 | for (i = 0; i < num_pages; i++) { |
79 | if (!pages[i]) | 85 | if (!pages[i]) |
80 | break; | 86 | break; |
87 | /* page checked is some magic around finding pages that | ||
88 | * have been modified without going through btrfs_set_page_dirty | ||
89 | * clear it here | ||
90 | */ | ||
81 | ClearPageChecked(pages[i]); | 91 | ClearPageChecked(pages[i]); |
82 | unlock_page(pages[i]); | 92 | unlock_page(pages[i]); |
83 | mark_page_accessed(pages[i]); | 93 | mark_page_accessed(pages[i]); |
@@ -85,6 +95,10 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
85 | } | 95 | } |
86 | } | 96 | } |
87 | 97 | ||
98 | /* this does all the hard work for inserting an inline extent into | ||
99 | * the btree. Any existing inline extent is extended as required to make room, | ||
100 | * otherwise things are inserted as required into the btree | ||
101 | */ | ||
88 | static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, | 102 | static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, |
89 | struct btrfs_root *root, struct inode *inode, | 103 | struct btrfs_root *root, struct inode *inode, |
90 | u64 offset, size_t size, | 104 | u64 offset, size_t size, |
@@ -228,6 +242,14 @@ fail: | |||
228 | return err; | 242 | return err; |
229 | } | 243 | } |
230 | 244 | ||
245 | /* | ||
246 | * after copy_from_user, pages need to be dirtied and we need to make | ||
247 | * sure holes are created between the current EOF and the start of | ||
248 | * any next extents (if required). | ||
249 | * | ||
250 | * this also makes the decision about creating an inline extent vs | ||
251 | * doing real data extents, marking pages dirty and delalloc as required. | ||
252 | */ | ||
231 | static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | 253 | static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, |
232 | struct btrfs_root *root, | 254 | struct btrfs_root *root, |
233 | struct file *file, | 255 | struct file *file, |
@@ -362,6 +384,10 @@ out_unlock: | |||
362 | return err; | 384 | return err; |
363 | } | 385 | } |
364 | 386 | ||
387 | /* | ||
388 | * this drops all the extents in the cache that intersect the range | ||
389 | * [start, end]. Existing extents are split as required. | ||
390 | */ | ||
365 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 391 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
366 | int skip_pinned) | 392 | int skip_pinned) |
367 | { | 393 | { |
@@ -536,6 +562,9 @@ out: | |||
536 | * If an extent intersects the range but is not entirely inside the range | 562 | * If an extent intersects the range but is not entirely inside the range |
537 | * it is either truncated or split. Anything entirely inside the range | 563 | * it is either truncated or split. Anything entirely inside the range |
538 | * is deleted from the tree. | 564 | * is deleted from the tree. |
565 | * | ||
566 | * inline_limit is used to tell this code which offsets in the file to keep | ||
567 | * if they contain inline extents. | ||
539 | */ | 568 | */ |
540 | int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, | 569 | int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, |
541 | struct btrfs_root *root, struct inode *inode, | 570 | struct btrfs_root *root, struct inode *inode, |
@@ -796,7 +825,9 @@ out: | |||
796 | } | 825 | } |
797 | 826 | ||
798 | /* | 827 | /* |
799 | * this gets pages into the page cache and locks them down | 828 | * this gets pages into the page cache and locks them down, it also properly |
829 | * waits for data=ordered extents to finish before allowing the pages to be | ||
830 | * modified. | ||
800 | */ | 831 | */ |
801 | static int noinline prepare_pages(struct btrfs_root *root, struct file *file, | 832 | static int noinline prepare_pages(struct btrfs_root *root, struct file *file, |
802 | struct page **pages, size_t num_pages, | 833 | struct page **pages, size_t num_pages, |
@@ -1034,6 +1065,17 @@ int btrfs_release_file(struct inode * inode, struct file * filp) | |||
1034 | return 0; | 1065 | return 0; |
1035 | } | 1066 | } |
1036 | 1067 | ||
1068 | /* | ||
1069 | * fsync call for both files and directories. This logs the inode into | ||
1070 | * the tree log instead of forcing full commits whenever possible. | ||
1071 | * | ||
1072 | * It needs to call filemap_fdatawait so that all ordered extent updates are | ||
1073 | * in the metadata btree are up to date for copying to the log. | ||
1074 | * | ||
1075 | * It drops the inode mutex before doing the tree log commit. This is an | ||
1076 | * important optimization for directories because holding the mutex prevents | ||
1077 | * new operations on the dir while we write to disk. | ||
1078 | */ | ||
1037 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | 1079 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) |
1038 | { | 1080 | { |
1039 | struct inode *inode = dentry->d_inode; | 1081 | struct inode *inode = dentry->d_inode; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 404704d26822..f3abecc2d14c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -83,6 +83,10 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
83 | 83 | ||
84 | static void btrfs_truncate(struct inode *inode); | 84 | static void btrfs_truncate(struct inode *inode); |
85 | 85 | ||
86 | /* | ||
87 | * a very lame attempt at stopping writes when the FS is 85% full. There | ||
88 | * are countless ways this is incorrect, but it is better than nothing. | ||
89 | */ | ||
86 | int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, | 90 | int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, |
87 | int for_del) | 91 | int for_del) |
88 | { | 92 | { |
@@ -108,6 +112,12 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, | |||
108 | return ret; | 112 | return ret; |
109 | } | 113 | } |
110 | 114 | ||
115 | /* | ||
116 | * when extent_io.c finds a delayed allocation range in the file, | ||
117 | * the call backs end up in this code. The basic idea is to | ||
118 | * allocate extents on disk for the range, and create ordered data structs | ||
119 | * in ram to track those extents. | ||
120 | */ | ||
111 | static int cow_file_range(struct inode *inode, u64 start, u64 end) | 121 | static int cow_file_range(struct inode *inode, u64 start, u64 end) |
112 | { | 122 | { |
113 | struct btrfs_root *root = BTRFS_I(inode)->root; | 123 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -185,6 +195,13 @@ out: | |||
185 | return ret; | 195 | return ret; |
186 | } | 196 | } |
187 | 197 | ||
198 | /* | ||
199 | * when nowcow writeback call back. This checks for snapshots or COW copies | ||
200 | * of the extents that exist in the file, and COWs the file as required. | ||
201 | * | ||
202 | * If no cow copies or snapshots exist, we write directly to the existing | ||
203 | * blocks on disk | ||
204 | */ | ||
188 | static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) | 205 | static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) |
189 | { | 206 | { |
190 | u64 extent_start; | 207 | u64 extent_start; |
@@ -291,6 +308,9 @@ out: | |||
291 | return err; | 308 | return err; |
292 | } | 309 | } |
293 | 310 | ||
311 | /* | ||
312 | * extent_io.c call back to do delayed allocation processing | ||
313 | */ | ||
294 | static int run_delalloc_range(struct inode *inode, u64 start, u64 end) | 314 | static int run_delalloc_range(struct inode *inode, u64 start, u64 end) |
295 | { | 315 | { |
296 | struct btrfs_root *root = BTRFS_I(inode)->root; | 316 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -305,6 +325,11 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) | |||
305 | return ret; | 325 | return ret; |
306 | } | 326 | } |
307 | 327 | ||
328 | /* | ||
329 | * extent_io.c set_bit_hook, used to track delayed allocation | ||
330 | * bytes in this file, and to maintain the list of inodes that | ||
331 | * have pending delalloc work to be done. | ||
332 | */ | ||
308 | int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 333 | int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, |
309 | unsigned long old, unsigned long bits) | 334 | unsigned long old, unsigned long bits) |
310 | { | 335 | { |
@@ -323,6 +348,9 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
323 | return 0; | 348 | return 0; |
324 | } | 349 | } |
325 | 350 | ||
351 | /* | ||
352 | * extent_io.c clear_bit_hook, see set_bit_hook for why | ||
353 | */ | ||
326 | int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | 354 | int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, |
327 | unsigned long old, unsigned long bits) | 355 | unsigned long old, unsigned long bits) |
328 | { | 356 | { |
@@ -349,6 +377,10 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
349 | return 0; | 377 | return 0; |
350 | } | 378 | } |
351 | 379 | ||
380 | /* | ||
381 | * extent_io.c merge_bio_hook, this must check the chunk tree to make sure | ||
382 | * we don't create bios that span stripes or chunks | ||
383 | */ | ||
352 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 384 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
353 | size_t size, struct bio *bio) | 385 | size_t size, struct bio *bio) |
354 | { | 386 | { |
@@ -371,6 +403,14 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
371 | return 0; | 403 | return 0; |
372 | } | 404 | } |
373 | 405 | ||
406 | /* | ||
407 | * in order to insert checksums into the metadata in large chunks, | ||
408 | * we wait until bio submission time. All the pages in the bio are | ||
409 | * checksummed and sums are attached onto the ordered extent record. | ||
410 | * | ||
411 | * At IO completion time the cums attached on the ordered extent record | ||
412 | * are inserted into the btree | ||
413 | */ | ||
374 | int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 414 | int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
375 | int mirror_num) | 415 | int mirror_num) |
376 | { | 416 | { |
@@ -383,6 +423,10 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
383 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 423 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); |
384 | } | 424 | } |
385 | 425 | ||
426 | /* | ||
427 | * extent_io.c submission hook. This does the right thing for csum calculation on write, | ||
428 | * or reading the csums from the tree before a read | ||
429 | */ | ||
386 | int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 430 | int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
387 | int mirror_num) | 431 | int mirror_num) |
388 | { | 432 | { |
@@ -408,6 +452,10 @@ mapit: | |||
408 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); | 452 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); |
409 | } | 453 | } |
410 | 454 | ||
455 | /* | ||
456 | * given a list of ordered sums record them in the inode. This happens | ||
457 | * at IO completion time based on sums calculated at bio submission time. | ||
458 | */ | ||
411 | static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | 459 | static noinline int add_pending_csums(struct btrfs_trans_handle *trans, |
412 | struct inode *inode, u64 file_offset, | 460 | struct inode *inode, u64 file_offset, |
413 | struct list_head *list) | 461 | struct list_head *list) |
@@ -430,12 +478,12 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) | |||
430 | GFP_NOFS); | 478 | GFP_NOFS); |
431 | } | 479 | } |
432 | 480 | ||
481 | /* see btrfs_writepage_start_hook for details on why this is required */ | ||
433 | struct btrfs_writepage_fixup { | 482 | struct btrfs_writepage_fixup { |
434 | struct page *page; | 483 | struct page *page; |
435 | struct btrfs_work work; | 484 | struct btrfs_work work; |
436 | }; | 485 | }; |
437 | 486 | ||
438 | /* see btrfs_writepage_start_hook for details on why this is required */ | ||
439 | void btrfs_writepage_fixup_worker(struct btrfs_work *work) | 487 | void btrfs_writepage_fixup_worker(struct btrfs_work *work) |
440 | { | 488 | { |
441 | struct btrfs_writepage_fixup *fixup; | 489 | struct btrfs_writepage_fixup *fixup; |
@@ -522,6 +570,10 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
522 | return -EAGAIN; | 570 | return -EAGAIN; |
523 | } | 571 | } |
524 | 572 | ||
573 | /* as ordered data IO finishes, this gets called so we can finish | ||
574 | * an ordered extent if the range of bytes in the file it covers are | ||
575 | * fully written. | ||
576 | */ | ||
525 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 577 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) |
526 | { | 578 | { |
527 | struct btrfs_root *root = BTRFS_I(inode)->root; | 579 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -631,6 +683,14 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
631 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 683 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
632 | } | 684 | } |
633 | 685 | ||
686 | /* | ||
687 | * When IO fails, either with EIO or csum verification fails, we | ||
688 | * try other mirrors that might have a good copy of the data. This | ||
689 | * io_failure_record is used to record state as we go through all the | ||
690 | * mirrors. If another mirror has good data, the page is set up to date | ||
691 | * and things continue. If a good mirror can't be found, the original | ||
692 | * bio end_io callback is called to indicate things have failed. | ||
693 | */ | ||
634 | struct io_failure_record { | 694 | struct io_failure_record { |
635 | struct page *page; | 695 | struct page *page; |
636 | u64 start; | 696 | u64 start; |
@@ -725,6 +785,10 @@ int btrfs_io_failed_hook(struct bio *failed_bio, | |||
725 | return 0; | 785 | return 0; |
726 | } | 786 | } |
727 | 787 | ||
788 | /* | ||
789 | * each time an IO finishes, we do a fast check in the IO failure tree | ||
790 | * to see if we need to process or clean up an io_failure_record | ||
791 | */ | ||
728 | int btrfs_clean_io_failures(struct inode *inode, u64 start) | 792 | int btrfs_clean_io_failures(struct inode *inode, u64 start) |
729 | { | 793 | { |
730 | u64 private; | 794 | u64 private; |
@@ -753,6 +817,11 @@ int btrfs_clean_io_failures(struct inode *inode, u64 start) | |||
753 | return 0; | 817 | return 0; |
754 | } | 818 | } |
755 | 819 | ||
820 | /* | ||
821 | * when reads are done, we need to check csums to verify the data is correct | ||
822 | * if there's a match, we allow the bio to finish. If not, we go through | ||
823 | * the io_failure_record routines to find good copies | ||
824 | */ | ||
756 | int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 825 | int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
757 | struct extent_state *state) | 826 | struct extent_state *state) |
758 | { | 827 | { |
@@ -990,6 +1059,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
990 | btrfs_free_path(path); | 1059 | btrfs_free_path(path); |
991 | } | 1060 | } |
992 | 1061 | ||
1062 | /* | ||
1063 | * read an inode from the btree into the in-memory inode | ||
1064 | */ | ||
993 | void btrfs_read_locked_inode(struct inode *inode) | 1065 | void btrfs_read_locked_inode(struct inode *inode) |
994 | { | 1066 | { |
995 | struct btrfs_path *path; | 1067 | struct btrfs_path *path; |
@@ -1083,6 +1155,9 @@ make_bad: | |||
1083 | make_bad_inode(inode); | 1155 | make_bad_inode(inode); |
1084 | } | 1156 | } |
1085 | 1157 | ||
1158 | /* | ||
1159 | * given a leaf and an inode, copy the inode fields into the leaf | ||
1160 | */ | ||
1086 | static void fill_inode_item(struct btrfs_trans_handle *trans, | 1161 | static void fill_inode_item(struct btrfs_trans_handle *trans, |
1087 | struct extent_buffer *leaf, | 1162 | struct extent_buffer *leaf, |
1088 | struct btrfs_inode_item *item, | 1163 | struct btrfs_inode_item *item, |
@@ -1118,6 +1193,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
1118 | BTRFS_I(inode)->block_group->key.objectid); | 1193 | BTRFS_I(inode)->block_group->key.objectid); |
1119 | } | 1194 | } |
1120 | 1195 | ||
1196 | /* | ||
1197 | * copy everything in the in-memory inode into the btree. | ||
1198 | */ | ||
1121 | int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, | 1199 | int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, |
1122 | struct btrfs_root *root, | 1200 | struct btrfs_root *root, |
1123 | struct inode *inode) | 1201 | struct inode *inode) |
@@ -1151,6 +1229,11 @@ failed: | |||
1151 | } | 1229 | } |
1152 | 1230 | ||
1153 | 1231 | ||
1232 | /* | ||
1233 | * unlink helper that gets used here in inode.c and in the tree logging | ||
1234 | * recovery code. It remove a link in a directory with a given name, and | ||
1235 | * also drops the back refs in the inode to the directory | ||
1236 | */ | ||
1154 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | 1237 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, |
1155 | struct btrfs_root *root, | 1238 | struct btrfs_root *root, |
1156 | struct inode *dir, struct inode *inode, | 1239 | struct inode *dir, struct inode *inode, |
@@ -1309,7 +1392,7 @@ fail: | |||
1309 | /* | 1392 | /* |
1310 | * this can truncate away extent items, csum items and directory items. | 1393 | * this can truncate away extent items, csum items and directory items. |
1311 | * It starts at a high offset and removes keys until it can't find | 1394 | * It starts at a high offset and removes keys until it can't find |
1312 | * any higher than i_size. | 1395 | * any higher than new_size |
1313 | * | 1396 | * |
1314 | * csum items that cross the new i_size are truncated to the new size | 1397 | * csum items that cross the new i_size are truncated to the new size |
1315 | * as well. | 1398 | * as well. |
@@ -2123,6 +2206,11 @@ void btrfs_dirty_inode(struct inode *inode) | |||
2123 | btrfs_end_transaction(trans, root); | 2206 | btrfs_end_transaction(trans, root); |
2124 | } | 2207 | } |
2125 | 2208 | ||
2209 | /* | ||
2210 | * find the highest existing sequence number in a directory | ||
2211 | * and then set the in-memory index_cnt variable to reflect | ||
2212 | * free sequence numbers | ||
2213 | */ | ||
2126 | static int btrfs_set_inode_index_count(struct inode *inode) | 2214 | static int btrfs_set_inode_index_count(struct inode *inode) |
2127 | { | 2215 | { |
2128 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2216 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -2175,6 +2263,10 @@ out: | |||
2175 | return ret; | 2263 | return ret; |
2176 | } | 2264 | } |
2177 | 2265 | ||
2266 | /* | ||
2267 | * helper to find a free sequence number in a given directory. This current | ||
2268 | * code is very simple, later versions will do smarter things in the btree | ||
2269 | */ | ||
2178 | static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, | 2270 | static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, |
2179 | u64 *index) | 2271 | u64 *index) |
2180 | { | 2272 | { |
@@ -2305,6 +2397,12 @@ static inline u8 btrfs_inode_type(struct inode *inode) | |||
2305 | return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; | 2397 | return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; |
2306 | } | 2398 | } |
2307 | 2399 | ||
2400 | /* | ||
2401 | * utility function to add 'inode' into 'parent_inode' with | ||
2402 | * a give name and a given sequence number. | ||
2403 | * if 'add_backref' is true, also insert a backref from the | ||
2404 | * inode to the parent directory. | ||
2405 | */ | ||
2308 | int btrfs_add_link(struct btrfs_trans_handle *trans, | 2406 | int btrfs_add_link(struct btrfs_trans_handle *trans, |
2309 | struct inode *parent_inode, struct inode *inode, | 2407 | struct inode *parent_inode, struct inode *inode, |
2310 | const char *name, int name_len, int add_backref, u64 index) | 2408 | const char *name, int name_len, int add_backref, u64 index) |
@@ -2611,6 +2709,10 @@ out_unlock: | |||
2611 | return err; | 2709 | return err; |
2612 | } | 2710 | } |
2613 | 2711 | ||
2712 | /* helper for btfs_get_extent. Given an existing extent in the tree, | ||
2713 | * and an extent that you want to insert, deal with overlap and insert | ||
2714 | * the new extent into the tree. | ||
2715 | */ | ||
2614 | static int merge_extent_mapping(struct extent_map_tree *em_tree, | 2716 | static int merge_extent_mapping(struct extent_map_tree *em_tree, |
2615 | struct extent_map *existing, | 2717 | struct extent_map *existing, |
2616 | struct extent_map *em, | 2718 | struct extent_map *em, |
@@ -2627,6 +2729,14 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, | |||
2627 | return add_extent_mapping(em_tree, em); | 2729 | return add_extent_mapping(em_tree, em); |
2628 | } | 2730 | } |
2629 | 2731 | ||
2732 | /* | ||
2733 | * a bit scary, this does extent mapping from logical file offset to the disk. | ||
2734 | * the ugly parts come from merging extents from the disk with the | ||
2735 | * in-ram representation. This gets more complex because of the data=ordered code, | ||
2736 | * where the in-ram extents might be locked pending data=ordered completion. | ||
2737 | * | ||
2738 | * This also copies inline extents directly into the page. | ||
2739 | */ | ||
2630 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | 2740 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, |
2631 | size_t pg_offset, u64 start, u64 len, | 2741 | size_t pg_offset, u64 start, u64 len, |
2632 | int create) | 2742 | int create) |
@@ -2869,76 +2979,11 @@ out: | |||
2869 | return em; | 2979 | return em; |
2870 | } | 2980 | } |
2871 | 2981 | ||
2872 | #if 0 /* waiting for O_DIRECT reads */ | ||
2873 | static int btrfs_get_block(struct inode *inode, sector_t iblock, | ||
2874 | struct buffer_head *bh_result, int create) | ||
2875 | { | ||
2876 | struct extent_map *em; | ||
2877 | u64 start = (u64)iblock << inode->i_blkbits; | ||
2878 | struct btrfs_multi_bio *multi = NULL; | ||
2879 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2880 | u64 len; | ||
2881 | u64 logical; | ||
2882 | u64 map_length; | ||
2883 | int ret = 0; | ||
2884 | |||
2885 | em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0); | ||
2886 | |||
2887 | if (!em || IS_ERR(em)) | ||
2888 | goto out; | ||
2889 | |||
2890 | if (em->start > start || em->start + em->len <= start) { | ||
2891 | goto out; | ||
2892 | } | ||
2893 | |||
2894 | if (em->block_start == EXTENT_MAP_INLINE) { | ||
2895 | ret = -EINVAL; | ||
2896 | goto out; | ||
2897 | } | ||
2898 | |||
2899 | len = em->start + em->len - start; | ||
2900 | len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size))); | ||
2901 | |||
2902 | if (em->block_start == EXTENT_MAP_HOLE || | ||
2903 | em->block_start == EXTENT_MAP_DELALLOC) { | ||
2904 | bh_result->b_size = len; | ||
2905 | goto out; | ||
2906 | } | ||
2907 | |||
2908 | logical = start - em->start; | ||
2909 | logical = em->block_start + logical; | ||
2910 | |||
2911 | map_length = len; | ||
2912 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | ||
2913 | logical, &map_length, &multi, 0); | ||
2914 | BUG_ON(ret); | ||
2915 | bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits; | ||
2916 | bh_result->b_size = min(map_length, len); | ||
2917 | |||
2918 | bh_result->b_bdev = multi->stripes[0].dev->bdev; | ||
2919 | set_buffer_mapped(bh_result); | ||
2920 | kfree(multi); | ||
2921 | out: | ||
2922 | free_extent_map(em); | ||
2923 | return ret; | ||
2924 | } | ||
2925 | #endif | ||
2926 | |||
2927 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 2982 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
2928 | const struct iovec *iov, loff_t offset, | 2983 | const struct iovec *iov, loff_t offset, |
2929 | unsigned long nr_segs) | 2984 | unsigned long nr_segs) |
2930 | { | 2985 | { |
2931 | return -EINVAL; | 2986 | return -EINVAL; |
2932 | #if 0 | ||
2933 | struct file *file = iocb->ki_filp; | ||
2934 | struct inode *inode = file->f_mapping->host; | ||
2935 | |||
2936 | if (rw == WRITE) | ||
2937 | return -EINVAL; | ||
2938 | |||
2939 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | ||
2940 | offset, nr_segs, btrfs_get_block, NULL); | ||
2941 | #endif | ||
2942 | } | 2987 | } |
2943 | 2988 | ||
2944 | static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) | 2989 | static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) |
@@ -3202,6 +3247,9 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, | |||
3202 | } | 3247 | } |
3203 | } | 3248 | } |
3204 | 3249 | ||
3250 | /* | ||
3251 | * create a new subvolume directory/inode (helper for the ioctl). | ||
3252 | */ | ||
3205 | int btrfs_create_subvol_root(struct btrfs_root *new_root, | 3253 | int btrfs_create_subvol_root(struct btrfs_root *new_root, |
3206 | struct btrfs_trans_handle *trans, u64 new_dirid, | 3254 | struct btrfs_trans_handle *trans, u64 new_dirid, |
3207 | struct btrfs_block_group_cache *block_group) | 3255 | struct btrfs_block_group_cache *block_group) |
@@ -3223,6 +3271,9 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, | |||
3223 | return btrfs_update_inode(trans, new_root, inode); | 3271 | return btrfs_update_inode(trans, new_root, inode); |
3224 | } | 3272 | } |
3225 | 3273 | ||
3274 | /* helper function for file defrag and space balancing. This | ||
3275 | * forces readahead on a given range of bytes in an inode | ||
3276 | */ | ||
3226 | unsigned long btrfs_force_ra(struct address_space *mapping, | 3277 | unsigned long btrfs_force_ra(struct address_space *mapping, |
3227 | struct file_ra_state *ra, struct file *file, | 3278 | struct file_ra_state *ra, struct file *file, |
3228 | pgoff_t offset, pgoff_t last_index) | 3279 | pgoff_t offset, pgoff_t last_index) |
@@ -3424,6 +3475,10 @@ out_unlock: | |||
3424 | return ret; | 3475 | return ret; |
3425 | } | 3476 | } |
3426 | 3477 | ||
3478 | /* | ||
3479 | * some fairly slow code that needs optimization. This walks the list | ||
3480 | * of all the inodes with pending delalloc and forces them to disk. | ||
3481 | */ | ||
3427 | int btrfs_start_delalloc_inodes(struct btrfs_root *root) | 3482 | int btrfs_start_delalloc_inodes(struct btrfs_root *root) |
3428 | { | 3483 | { |
3429 | struct list_head *head = &root->fs_info->delalloc_inodes; | 3484 | struct list_head *head = &root->fs_info->delalloc_inodes; |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 0cc314c10d66..e30aa6e2958f 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -25,6 +25,15 @@ | |||
25 | #include "extent_io.h" | 25 | #include "extent_io.h" |
26 | #include "locking.h" | 26 | #include "locking.h" |
27 | 27 | ||
28 | /* | ||
29 | * locks the per buffer mutex in an extent buffer. This uses adaptive locks | ||
30 | * and the spin is not tuned very extensively. The spinning does make a big | ||
31 | * difference in almost every workload, but spinning for the right amount of | ||
32 | * time needs some help. | ||
33 | * | ||
34 | * In general, we want to spin as long as the lock holder is doing btree searches, | ||
35 | * and we should give up if they are in more expensive code. | ||
36 | */ | ||
28 | int btrfs_tree_lock(struct extent_buffer *eb) | 37 | int btrfs_tree_lock(struct extent_buffer *eb) |
29 | { | 38 | { |
30 | int i; | 39 | int i; |
@@ -57,6 +66,10 @@ int btrfs_tree_locked(struct extent_buffer *eb) | |||
57 | return mutex_is_locked(&eb->mutex); | 66 | return mutex_is_locked(&eb->mutex); |
58 | } | 67 | } |
59 | 68 | ||
69 | /* | ||
70 | * btrfs_search_slot uses this to decide if it should drop its locks | ||
71 | * before doing something expensive like allocating free blocks for cow. | ||
72 | */ | ||
60 | int btrfs_path_lock_waiting(struct btrfs_path *path, int level) | 73 | int btrfs_path_lock_waiting(struct btrfs_path *path, int level) |
61 | { | 74 | { |
62 | int i; | 75 | int i; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 951eacff2420..dcc1730dd837 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include "btrfs_inode.h" | 26 | #include "btrfs_inode.h" |
27 | #include "extent_io.h" | 27 | #include "extent_io.h" |
28 | 28 | ||
29 | |||
30 | static u64 entry_end(struct btrfs_ordered_extent *entry) | 29 | static u64 entry_end(struct btrfs_ordered_extent *entry) |
31 | { | 30 | { |
32 | if (entry->file_offset + entry->len < entry->file_offset) | 31 | if (entry->file_offset + entry->len < entry->file_offset) |
@@ -34,6 +33,9 @@ static u64 entry_end(struct btrfs_ordered_extent *entry) | |||
34 | return entry->file_offset + entry->len; | 33 | return entry->file_offset + entry->len; |
35 | } | 34 | } |
36 | 35 | ||
36 | /* returns NULL if the insertion worked, or it returns the node it did find | ||
37 | * in the tree | ||
38 | */ | ||
37 | static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, | 39 | static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, |
38 | struct rb_node *node) | 40 | struct rb_node *node) |
39 | { | 41 | { |
@@ -58,6 +60,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, | |||
58 | return NULL; | 60 | return NULL; |
59 | } | 61 | } |
60 | 62 | ||
63 | /* | ||
64 | * look for a given offset in the tree, and if it can't be found return the | ||
65 | * first lesser offset | ||
66 | */ | ||
61 | static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, | 67 | static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, |
62 | struct rb_node **prev_ret) | 68 | struct rb_node **prev_ret) |
63 | { | 69 | { |
@@ -108,6 +114,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, | |||
108 | return NULL; | 114 | return NULL; |
109 | } | 115 | } |
110 | 116 | ||
117 | /* | ||
118 | * helper to check if a given offset is inside a given entry | ||
119 | */ | ||
111 | static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | 120 | static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) |
112 | { | 121 | { |
113 | if (file_offset < entry->file_offset || | 122 | if (file_offset < entry->file_offset || |
@@ -116,6 +125,10 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
116 | return 1; | 125 | return 1; |
117 | } | 126 | } |
118 | 127 | ||
128 | /* | ||
129 | * look find the first ordered struct that has this offset, otherwise | ||
130 | * the first one less than this offset | ||
131 | */ | ||
119 | static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | 132 | static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, |
120 | u64 file_offset) | 133 | u64 file_offset) |
121 | { | 134 | { |
@@ -305,6 +318,10 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
305 | return 0; | 318 | return 0; |
306 | } | 319 | } |
307 | 320 | ||
321 | /* | ||
322 | * wait for all the ordered extents in a root. This is done when balancing | ||
323 | * space between drives. | ||
324 | */ | ||
308 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) | 325 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) |
309 | { | 326 | { |
310 | struct list_head splice; | 327 | struct list_head splice; |
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 30fcb7aea5b5..a50ebb67055d 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c | |||
@@ -21,6 +21,16 @@ | |||
21 | #include "ref-cache.h" | 21 | #include "ref-cache.h" |
22 | #include "transaction.h" | 22 | #include "transaction.h" |
23 | 23 | ||
24 | /* | ||
25 | * leaf refs are used to cache the information about which extents | ||
26 | * a given leaf has references on. This allows us to process that leaf | ||
27 | * in btrfs_drop_snapshot without needing to read it back from disk. | ||
28 | */ | ||
29 | |||
30 | /* | ||
31 | * kmalloc a leaf reference struct and update the counters for the | ||
32 | * total ref cache size | ||
33 | */ | ||
24 | struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, | 34 | struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, |
25 | int nr_extents) | 35 | int nr_extents) |
26 | { | 36 | { |
@@ -40,6 +50,10 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, | |||
40 | return ref; | 50 | return ref; |
41 | } | 51 | } |
42 | 52 | ||
53 | /* | ||
54 | * free a leaf reference struct and update the counters for the | ||
55 | * total ref cache size | ||
56 | */ | ||
43 | void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) | 57 | void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) |
44 | { | 58 | { |
45 | if (!ref) | 59 | if (!ref) |
@@ -135,6 +149,10 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, | |||
135 | return 0; | 149 | return 0; |
136 | } | 150 | } |
137 | 151 | ||
152 | /* | ||
153 | * find the leaf ref for a given extent. This returns the ref struct with | ||
154 | * a usage reference incremented | ||
155 | */ | ||
138 | struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, | 156 | struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, |
139 | u64 bytenr) | 157 | u64 bytenr) |
140 | { | 158 | { |
@@ -160,6 +178,10 @@ again: | |||
160 | return NULL; | 178 | return NULL; |
161 | } | 179 | } |
162 | 180 | ||
181 | /* | ||
182 | * add a fully filled in leaf ref struct | ||
183 | * remove all the refs older than a given root generation | ||
184 | */ | ||
163 | int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, | 185 | int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, |
164 | int shared) | 186 | int shared) |
165 | { | 187 | { |
@@ -184,6 +206,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, | |||
184 | return ret; | 206 | return ret; |
185 | } | 207 | } |
186 | 208 | ||
209 | /* | ||
210 | * remove a single leaf ref from the tree. This drops the ref held by the tree | ||
211 | * only | ||
212 | */ | ||
187 | int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) | 213 | int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) |
188 | { | 214 | { |
189 | struct btrfs_leaf_ref_tree *tree; | 215 | struct btrfs_leaf_ref_tree *tree; |
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 617564787f52..16f3183d7c59 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h | |||
@@ -19,8 +19,11 @@ | |||
19 | #define __REFCACHE__ | 19 | #define __REFCACHE__ |
20 | 20 | ||
21 | struct btrfs_extent_info { | 21 | struct btrfs_extent_info { |
22 | /* bytenr and num_bytes find the extent in the extent allocation tree */ | ||
22 | u64 bytenr; | 23 | u64 bytenr; |
23 | u64 num_bytes; | 24 | u64 num_bytes; |
25 | |||
26 | /* objectid and offset find the back reference for the file */ | ||
24 | u64 objectid; | 27 | u64 objectid; |
25 | u64 offset; | 28 | u64 offset; |
26 | }; | 29 | }; |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0091c01abb06..eb7f7655e9d5 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -22,8 +22,10 @@ | |||
22 | #include "print-tree.h" | 22 | #include "print-tree.h" |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * returns 0 on finding something, 1 if no more roots are there | 25 | * search forward for a root, starting with objectid 'search_start' |
26 | * and < 0 on error | 26 | * if a root key is found, the objectid we find is filled into 'found_objectid' |
27 | * and 0 is returned. < 0 is returned on error, 1 if there is nothing | ||
28 | * left in the tree. | ||
27 | */ | 29 | */ |
28 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | 30 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, |
29 | u64 *found_objectid) | 31 | u64 *found_objectid) |
@@ -66,6 +68,11 @@ out: | |||
66 | return ret; | 68 | return ret; |
67 | } | 69 | } |
68 | 70 | ||
71 | /* | ||
72 | * lookup the root with the highest offset for a given objectid. The key we do | ||
73 | * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 | ||
74 | * on error. | ||
75 | */ | ||
69 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | 76 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, |
70 | struct btrfs_root_item *item, struct btrfs_key *key) | 77 | struct btrfs_root_item *item, struct btrfs_key *key) |
71 | { | 78 | { |
@@ -104,6 +111,9 @@ out: | |||
104 | return ret; | 111 | return ret; |
105 | } | 112 | } |
106 | 113 | ||
114 | /* | ||
115 | * copy the data in 'item' into the btree | ||
116 | */ | ||
107 | int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | 117 | int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root |
108 | *root, struct btrfs_key *key, struct btrfs_root_item | 118 | *root, struct btrfs_key *key, struct btrfs_root_item |
109 | *item) | 119 | *item) |
@@ -147,6 +157,12 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
147 | return ret; | 157 | return ret; |
148 | } | 158 | } |
149 | 159 | ||
160 | /* | ||
161 | * at mount time we want to find all the old transaction snapshots that were in | ||
162 | * the process of being deleted if we crashed. This is any root item with an offset | ||
163 | * lower than the latest root. They need to be queued for deletion to finish | ||
164 | * what was happening when we crashed. | ||
165 | */ | ||
150 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, | 166 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, |
151 | struct btrfs_root *latest) | 167 | struct btrfs_root *latest) |
152 | { | 168 | { |
@@ -227,6 +243,7 @@ err: | |||
227 | return ret; | 243 | return ret; |
228 | } | 244 | } |
229 | 245 | ||
246 | /* drop the root item for 'key' from 'root' */ | ||
230 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 247 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
231 | struct btrfs_key *key) | 248 | struct btrfs_key *key) |
232 | { | 249 | { |
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index ad03a32d1116..cdedbe144d45 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c | |||
@@ -17,6 +17,27 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
20 | |||
21 | /* this is some deeply nasty code. ctree.h has a different | ||
22 | * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef | ||
23 | * | ||
24 | * The end result is that anyone who #includes ctree.h gets a | ||
25 | * declaration for the btrfs_set_foo functions and btrfs_foo functions | ||
26 | * | ||
27 | * This file declares the macros and then #includes ctree.h, which results | ||
28 | * in cpp creating the function here based on the template below. | ||
29 | * | ||
30 | * These setget functions do all the extent_buffer related mapping | ||
31 | * required to efficiently read and write specific fields in the extent | ||
32 | * buffers. Every pointer to metadata items in btrfs is really just | ||
33 | * an unsigned long offset into the extent buffer which has been | ||
34 | * cast to a specific type. This gives us all the gcc type checking. | ||
35 | * | ||
36 | * The extent buffer api is used to do all the kmapping and page | ||
37 | * spanning work required to get extent buffers in highmem and have | ||
38 | * a metadata blocksize different from the page size. | ||
39 | */ | ||
40 | |||
20 | #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ | 41 | #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ |
21 | u##bits btrfs_##name(struct extent_buffer *eb, \ | 42 | u##bits btrfs_##name(struct extent_buffer *eb, \ |
22 | type *s) \ | 43 | type *s) \ |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8399d6d05d63..2e6039825b7b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -519,6 +519,9 @@ static struct file_system_type btrfs_fs_type = { | |||
519 | .fs_flags = FS_REQUIRES_DEV, | 519 | .fs_flags = FS_REQUIRES_DEV, |
520 | }; | 520 | }; |
521 | 521 | ||
522 | /* | ||
523 | * used by btrfsctl to scan devices when no FS is mounted | ||
524 | */ | ||
522 | static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | 525 | static long btrfs_control_ioctl(struct file *file, unsigned int cmd, |
523 | unsigned long arg) | 526 | unsigned long arg) |
524 | { | 527 | { |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 444abe0796ae..11266d68a6c9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -46,6 +46,9 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) | |||
46 | } | 46 | } |
47 | } | 47 | } |
48 | 48 | ||
49 | /* | ||
50 | * either allocate a new transaction or hop into the existing one | ||
51 | */ | ||
49 | static noinline int join_transaction(struct btrfs_root *root) | 52 | static noinline int join_transaction(struct btrfs_root *root) |
50 | { | 53 | { |
51 | struct btrfs_transaction *cur_trans; | 54 | struct btrfs_transaction *cur_trans; |
@@ -85,6 +88,12 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
85 | return 0; | 88 | return 0; |
86 | } | 89 | } |
87 | 90 | ||
91 | /* | ||
92 | * this does all the record keeping required to make sure that a | ||
93 | * reference counted root is properly recorded in a given transaction. | ||
94 | * This is required to make sure the old root from before we joined the transaction | ||
95 | * is deleted when the transaction commits | ||
96 | */ | ||
88 | noinline int btrfs_record_root_in_trans(struct btrfs_root *root) | 97 | noinline int btrfs_record_root_in_trans(struct btrfs_root *root) |
89 | { | 98 | { |
90 | struct btrfs_dirty_root *dirty; | 99 | struct btrfs_dirty_root *dirty; |
@@ -127,6 +136,10 @@ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) | |||
127 | return 0; | 136 | return 0; |
128 | } | 137 | } |
129 | 138 | ||
139 | /* wait for commit against the current transaction to become unblocked | ||
140 | * when this is done, it is safe to start a new transaction, but the current | ||
141 | * transaction might not be fully on disk. | ||
142 | */ | ||
130 | static void wait_current_trans(struct btrfs_root *root) | 143 | static void wait_current_trans(struct btrfs_root *root) |
131 | { | 144 | { |
132 | struct btrfs_transaction *cur_trans; | 145 | struct btrfs_transaction *cur_trans; |
@@ -198,7 +211,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | |||
198 | return start_transaction(r, num_blocks, 2); | 211 | return start_transaction(r, num_blocks, 2); |
199 | } | 212 | } |
200 | 213 | ||
201 | 214 | /* wait for a transaction commit to be fully complete */ | |
202 | static noinline int wait_for_commit(struct btrfs_root *root, | 215 | static noinline int wait_for_commit(struct btrfs_root *root, |
203 | struct btrfs_transaction *commit) | 216 | struct btrfs_transaction *commit) |
204 | { | 217 | { |
@@ -218,6 +231,10 @@ static noinline int wait_for_commit(struct btrfs_root *root, | |||
218 | return 0; | 231 | return 0; |
219 | } | 232 | } |
220 | 233 | ||
234 | /* | ||
235 | * rate limit against the drop_snapshot code. This helps to slow down new operations | ||
236 | * if the drop_snapshot code isn't able to keep up. | ||
237 | */ | ||
221 | static void throttle_on_drops(struct btrfs_root *root) | 238 | static void throttle_on_drops(struct btrfs_root *root) |
222 | { | 239 | { |
223 | struct btrfs_fs_info *info = root->fs_info; | 240 | struct btrfs_fs_info *info = root->fs_info; |
@@ -302,7 +319,11 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | |||
302 | return __btrfs_end_transaction(trans, root, 1); | 319 | return __btrfs_end_transaction(trans, root, 1); |
303 | } | 320 | } |
304 | 321 | ||
305 | 322 | /* | |
323 | * when btree blocks are allocated, they have some corresponding bits set for | ||
324 | * them in one of two extent_io trees. This is used to make sure all of | ||
325 | * those extents are on disk for transaction or log commit | ||
326 | */ | ||
306 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 327 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, |
307 | struct extent_io_tree *dirty_pages) | 328 | struct extent_io_tree *dirty_pages) |
308 | { | 329 | { |
@@ -393,6 +414,16 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | |||
393 | &trans->transaction->dirty_pages); | 414 | &trans->transaction->dirty_pages); |
394 | } | 415 | } |
395 | 416 | ||
417 | /* | ||
418 | * this is used to update the root pointer in the tree of tree roots. | ||
419 | * | ||
420 | * But, in the case of the extent allocation tree, updating the root | ||
421 | * pointer may allocate blocks which may change the root of the extent | ||
422 | * allocation tree. | ||
423 | * | ||
424 | * So, this loops and repeats and makes sure the cowonly root didn't | ||
425 | * change while the root pointer was being updated in the metadata. | ||
426 | */ | ||
396 | static int update_cowonly_root(struct btrfs_trans_handle *trans, | 427 | static int update_cowonly_root(struct btrfs_trans_handle *trans, |
397 | struct btrfs_root *root) | 428 | struct btrfs_root *root) |
398 | { | 429 | { |
@@ -418,6 +449,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
418 | return 0; | 449 | return 0; |
419 | } | 450 | } |
420 | 451 | ||
452 | /* | ||
453 | * update all the cowonly tree roots on disk | ||
454 | */ | ||
421 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 455 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, |
422 | struct btrfs_root *root) | 456 | struct btrfs_root *root) |
423 | { | 457 | { |
@@ -433,6 +467,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
433 | return 0; | 467 | return 0; |
434 | } | 468 | } |
435 | 469 | ||
470 | /* | ||
471 | * dead roots are old snapshots that need to be deleted. This allocates | ||
472 | * a dirty root struct and adds it into the list of dead roots that need to | ||
473 | * be deleted | ||
474 | */ | ||
436 | int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) | 475 | int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) |
437 | { | 476 | { |
438 | struct btrfs_dirty_root *dirty; | 477 | struct btrfs_dirty_root *dirty; |
@@ -449,6 +488,12 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) | |||
449 | return 0; | 488 | return 0; |
450 | } | 489 | } |
451 | 490 | ||
491 | /* | ||
492 | * at transaction commit time we need to schedule the old roots for | ||
493 | * deletion via btrfs_drop_snapshot. This runs through all the | ||
494 | * reference counted roots that were modified in the current | ||
495 | * transaction and puts them into the drop list | ||
496 | */ | ||
452 | static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | 497 | static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, |
453 | struct radix_tree_root *radix, | 498 | struct radix_tree_root *radix, |
454 | struct list_head *list) | 499 | struct list_head *list) |
@@ -541,6 +586,10 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | |||
541 | return err; | 586 | return err; |
542 | } | 587 | } |
543 | 588 | ||
589 | /* | ||
590 | * defrag a given btree. If cacheonly == 1, this won't read from the disk, | ||
591 | * otherwise every leaf in the btree is read and defragged. | ||
592 | */ | ||
544 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | 593 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) |
545 | { | 594 | { |
546 | struct btrfs_fs_info *info = root->fs_info; | 595 | struct btrfs_fs_info *info = root->fs_info; |
@@ -570,6 +619,10 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
570 | return 0; | 619 | return 0; |
571 | } | 620 | } |
572 | 621 | ||
622 | /* | ||
623 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | ||
624 | * all of them | ||
625 | */ | ||
573 | static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | 626 | static noinline int drop_dirty_roots(struct btrfs_root *tree_root, |
574 | struct list_head *list) | 627 | struct list_head *list) |
575 | { | 628 | { |
@@ -664,6 +717,10 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
664 | return ret; | 717 | return ret; |
665 | } | 718 | } |
666 | 719 | ||
720 | /* | ||
721 | * new snapshots need to be created at a very specific time in the | ||
722 | * transaction commit. This does the actual creation | ||
723 | */ | ||
667 | static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | 724 | static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, |
668 | struct btrfs_fs_info *fs_info, | 725 | struct btrfs_fs_info *fs_info, |
669 | struct btrfs_pending_snapshot *pending) | 726 | struct btrfs_pending_snapshot *pending) |
@@ -734,6 +791,9 @@ fail: | |||
734 | return ret; | 791 | return ret; |
735 | } | 792 | } |
736 | 793 | ||
794 | /* | ||
795 | * create all the snapshots we've scheduled for creation | ||
796 | */ | ||
737 | static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, | 797 | static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, |
738 | struct btrfs_fs_info *fs_info) | 798 | struct btrfs_fs_info *fs_info) |
739 | { | 799 | { |
@@ -944,6 +1004,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
944 | return ret; | 1004 | return ret; |
945 | } | 1005 | } |
946 | 1006 | ||
1007 | /* | ||
1008 | * interface function to delete all the snapshots we have scheduled for deletion | ||
1009 | */ | ||
947 | int btrfs_clean_old_snapshots(struct btrfs_root *root) | 1010 | int btrfs_clean_old_snapshots(struct btrfs_root *root) |
948 | { | 1011 | { |
949 | struct list_head dirty_roots; | 1012 | struct list_head dirty_roots; |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b3bb5bbad76e..6f57d0889b1e 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -23,6 +23,10 @@ | |||
23 | #include "transaction.h" | 23 | #include "transaction.h" |
24 | #include "locking.h" | 24 | #include "locking.h" |
25 | 25 | ||
26 | /* defrag all the leaves in a given btree. If cache_only == 1, don't read things | ||
27 | * from disk, otherwise read all the leaves and try to get key order to | ||
28 | * better reflect disk order | ||
29 | */ | ||
26 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 30 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
27 | struct btrfs_root *root, int cache_only) | 31 | struct btrfs_root *root, int cache_only) |
28 | { | 32 | { |