diff options
Diffstat (limited to 'fs/btrfs')
56 files changed, 15026 insertions, 6963 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -4,6 +4,8 @@ config BTRFS_FS | |||
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select ZLIB_INFLATE | 5 | select ZLIB_INFLATE |
6 | select ZLIB_DEFLATE | 6 | select ZLIB_DEFLATE |
7 | select LZO_COMPRESS | ||
8 | select LZO_DECOMPRESS | ||
7 | help | 9 | help |
8 | Btrfs is a new filesystem with extents, writable snapshotting, | 10 | Btrfs is a new filesystem with extents, writable snapshotting, |
9 | support for multiple devices and many more features. | 11 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..9b72dcf1cd25 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 2222d161c7b6..f66fc9959733 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
37 | char *value = NULL; | 37 | char *value = NULL; |
38 | struct posix_acl *acl; | 38 | struct posix_acl *acl; |
39 | 39 | ||
40 | if (!IS_POSIXACL(inode)) | ||
41 | return NULL; | ||
42 | |||
40 | acl = get_cached_acl(inode, type); | 43 | acl = get_cached_acl(inode, type); |
41 | if (acl != ACL_NOT_CACHED) | 44 | if (acl != ACL_NOT_CACHED) |
42 | return acl; | 45 | return acl; |
@@ -60,8 +63,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
60 | size = __btrfs_getxattr(inode, name, value, size); | 63 | size = __btrfs_getxattr(inode, name, value, size); |
61 | if (size > 0) { | 64 | if (size > 0) { |
62 | acl = posix_acl_from_xattr(value, size); | 65 | acl = posix_acl_from_xattr(value, size); |
63 | if (IS_ERR(acl)) | 66 | if (IS_ERR(acl)) { |
67 | kfree(value); | ||
64 | return acl; | 68 | return acl; |
69 | } | ||
65 | set_cached_acl(inode, type, acl); | 70 | set_cached_acl(inode, type, acl); |
66 | } | 71 | } |
67 | kfree(value); | 72 | kfree(value); |
@@ -82,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, | |||
82 | struct posix_acl *acl; | 87 | struct posix_acl *acl; |
83 | int ret = 0; | 88 | int ret = 0; |
84 | 89 | ||
90 | if (!IS_POSIXACL(dentry->d_inode)) | ||
91 | return -EOPNOTSUPP; | ||
92 | |||
85 | acl = btrfs_get_acl(dentry->d_inode, type); | 93 | acl = btrfs_get_acl(dentry->d_inode, type); |
86 | 94 | ||
87 | if (IS_ERR(acl)) | 95 | if (IS_ERR(acl)) |
@@ -162,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
162 | int ret; | 170 | int ret; |
163 | struct posix_acl *acl = NULL; | 171 | struct posix_acl *acl = NULL; |
164 | 172 | ||
165 | if (!is_owner_or_cap(dentry->d_inode)) | 173 | if (!inode_owner_or_capable(dentry->d_inode)) |
166 | return -EPERM; | 174 | return -EPERM; |
167 | 175 | ||
168 | if (!IS_POSIXACL(dentry->d_inode)) | 176 | if (!IS_POSIXACL(dentry->d_inode)) |
@@ -170,33 +178,40 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
170 | 178 | ||
171 | if (value) { | 179 | if (value) { |
172 | acl = posix_acl_from_xattr(value, size); | 180 | acl = posix_acl_from_xattr(value, size); |
173 | if (acl == NULL) { | 181 | if (IS_ERR(acl)) |
174 | value = NULL; | ||
175 | size = 0; | ||
176 | } else if (IS_ERR(acl)) { | ||
177 | return PTR_ERR(acl); | 182 | return PTR_ERR(acl); |
183 | |||
184 | if (acl) { | ||
185 | ret = posix_acl_valid(acl); | ||
186 | if (ret) | ||
187 | goto out; | ||
178 | } | 188 | } |
179 | } | 189 | } |
180 | 190 | ||
181 | ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); | 191 | ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); |
182 | 192 | out: | |
183 | posix_acl_release(acl); | 193 | posix_acl_release(acl); |
184 | 194 | ||
185 | return ret; | 195 | return ret; |
186 | } | 196 | } |
187 | 197 | ||
188 | int btrfs_check_acl(struct inode *inode, int mask) | 198 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) |
189 | { | 199 | { |
190 | struct posix_acl *acl; | ||
191 | int error = -EAGAIN; | 200 | int error = -EAGAIN; |
192 | 201 | ||
193 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); | 202 | if (flags & IPERM_FLAG_RCU) { |
203 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
204 | error = -ECHILD; | ||
194 | 205 | ||
195 | if (IS_ERR(acl)) | 206 | } else { |
196 | return PTR_ERR(acl); | 207 | struct posix_acl *acl; |
197 | if (acl) { | 208 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); |
198 | error = posix_acl_permission(inode, acl, mask); | 209 | if (IS_ERR(acl)) |
199 | posix_acl_release(acl); | 210 | return PTR_ERR(acl); |
211 | if (acl) { | ||
212 | error = posix_acl_permission(inode, acl, mask); | ||
213 | posix_acl_release(acl); | ||
214 | } | ||
200 | } | 215 | } |
201 | 216 | ||
202 | return error; | 217 | return error; |
@@ -273,7 +288,7 @@ int btrfs_acl_chmod(struct inode *inode) | |||
273 | return 0; | 288 | return 0; |
274 | 289 | ||
275 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); | 290 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); |
276 | if (IS_ERR(acl) || !acl) | 291 | if (IS_ERR_OR_NULL(acl)) |
277 | return PTR_ERR(acl); | 292 | return PTR_ERR(acl); |
278 | 293 | ||
279 | clone = posix_acl_clone(acl, GFP_KERNEL); | 294 | clone = posix_acl_clone(acl, GFP_KERNEL); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6ad63f17eca0..52d7eca8c7bf 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "extent_map.h" | 22 | #include "extent_map.h" |
23 | #include "extent_io.h" | 23 | #include "extent_io.h" |
24 | #include "ordered-data.h" | 24 | #include "ordered-data.h" |
25 | #include "delayed-inode.h" | ||
25 | 26 | ||
26 | /* in memory btrfs inode */ | 27 | /* in memory btrfs inode */ |
27 | struct btrfs_inode { | 28 | struct btrfs_inode { |
@@ -120,9 +121,6 @@ struct btrfs_inode { | |||
120 | */ | 121 | */ |
121 | u64 index_cnt; | 122 | u64 index_cnt; |
122 | 123 | ||
123 | /* the start of block group preferred for allocations. */ | ||
124 | u64 block_group; | ||
125 | |||
126 | /* the fsync log has some corner cases that mean we have to check | 124 | /* the fsync log has some corner cases that mean we have to check |
127 | * directories to see if any unlinks have been done before | 125 | * directories to see if any unlinks have been done before |
128 | * the directory was logged. See tree-log.c for all the | 126 | * the directory was logged. See tree-log.c for all the |
@@ -136,9 +134,8 @@ struct btrfs_inode { | |||
136 | * items we think we'll end up using, and reserved_extents is the number | 134 | * items we think we'll end up using, and reserved_extents is the number |
137 | * of extent items we've reserved metadata for. | 135 | * of extent items we've reserved metadata for. |
138 | */ | 136 | */ |
139 | spinlock_t accounting_lock; | ||
140 | atomic_t outstanding_extents; | 137 | atomic_t outstanding_extents; |
141 | int reserved_extents; | 138 | atomic_t reserved_extents; |
142 | 139 | ||
143 | /* | 140 | /* |
144 | * ordered_data_close is set by truncate when a file that used | 141 | * ordered_data_close is set by truncate when a file that used |
@@ -153,20 +150,34 @@ struct btrfs_inode { | |||
153 | unsigned ordered_data_close:1; | 150 | unsigned ordered_data_close:1; |
154 | unsigned orphan_meta_reserved:1; | 151 | unsigned orphan_meta_reserved:1; |
155 | unsigned dummy_inode:1; | 152 | unsigned dummy_inode:1; |
153 | unsigned in_defrag:1; | ||
156 | 154 | ||
157 | /* | 155 | /* |
158 | * always compress this one file | 156 | * always compress this one file |
159 | */ | 157 | */ |
160 | unsigned force_compress:1; | 158 | unsigned force_compress:4; |
159 | |||
160 | struct btrfs_delayed_node *delayed_node; | ||
161 | 161 | ||
162 | struct inode vfs_inode; | 162 | struct inode vfs_inode; |
163 | }; | 163 | }; |
164 | 164 | ||
165 | extern unsigned char btrfs_filetype_table[]; | ||
166 | |||
165 | static inline struct btrfs_inode *BTRFS_I(struct inode *inode) | 167 | static inline struct btrfs_inode *BTRFS_I(struct inode *inode) |
166 | { | 168 | { |
167 | return container_of(inode, struct btrfs_inode, vfs_inode); | 169 | return container_of(inode, struct btrfs_inode, vfs_inode); |
168 | } | 170 | } |
169 | 171 | ||
172 | static inline u64 btrfs_ino(struct inode *inode) | ||
173 | { | ||
174 | u64 ino = BTRFS_I(inode)->location.objectid; | ||
175 | |||
176 | if (ino <= BTRFS_FIRST_FREE_OBJECTID) | ||
177 | ino = inode->i_ino; | ||
178 | return ino; | ||
179 | } | ||
180 | |||
170 | static inline void btrfs_i_size_write(struct inode *inode, u64 size) | 181 | static inline void btrfs_i_size_write(struct inode *inode, u64 size) |
171 | { | 182 | { |
172 | i_size_write(inode, size); | 183 | i_size_write(inode, size); |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 396039b3a8a2..bfe42b03eaf9 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -62,6 +62,9 @@ struct compressed_bio { | |||
62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
64 | 64 | ||
65 | /* the compression algorithm for this bio */ | ||
66 | int compress_type; | ||
67 | |||
65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
67 | 70 | ||
@@ -91,23 +94,10 @@ static inline int compressed_bio_size(struct btrfs_root *root, | |||
91 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | 94 | static struct bio *compressed_bio_alloc(struct block_device *bdev, |
92 | u64 first_byte, gfp_t gfp_flags) | 95 | u64 first_byte, gfp_t gfp_flags) |
93 | { | 96 | { |
94 | struct bio *bio; | ||
95 | int nr_vecs; | 97 | int nr_vecs; |
96 | 98 | ||
97 | nr_vecs = bio_get_nr_vecs(bdev); | 99 | nr_vecs = bio_get_nr_vecs(bdev); |
98 | bio = bio_alloc(gfp_flags, nr_vecs); | 100 | return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags); |
99 | |||
100 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | ||
101 | while (!bio && (nr_vecs /= 2)) | ||
102 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
103 | } | ||
104 | |||
105 | if (bio) { | ||
106 | bio->bi_size = 0; | ||
107 | bio->bi_bdev = bdev; | ||
108 | bio->bi_sector = first_byte >> 9; | ||
109 | } | ||
110 | return bio; | ||
111 | } | 101 | } |
112 | 102 | ||
113 | static int check_compressed_csum(struct inode *inode, | 103 | static int check_compressed_csum(struct inode *inode, |
@@ -135,9 +125,10 @@ static int check_compressed_csum(struct inode *inode, | |||
135 | kunmap_atomic(kaddr, KM_USER0); | 125 | kunmap_atomic(kaddr, KM_USER0); |
136 | 126 | ||
137 | if (csum != *cb_sum) { | 127 | if (csum != *cb_sum) { |
138 | printk(KERN_INFO "btrfs csum failed ino %lu " | 128 | printk(KERN_INFO "btrfs csum failed ino %llu " |
139 | "extent %llu csum %u " | 129 | "extent %llu csum %u " |
140 | "wanted %u mirror %d\n", inode->i_ino, | 130 | "wanted %u mirror %d\n", |
131 | (unsigned long long)btrfs_ino(inode), | ||
141 | (unsigned long long)disk_start, | 132 | (unsigned long long)disk_start, |
142 | csum, *cb_sum, cb->mirror_num); | 133 | csum, *cb_sum, cb->mirror_num); |
143 | ret = -EIO; | 134 | ret = -EIO; |
@@ -163,7 +154,6 @@ fail: | |||
163 | */ | 154 | */ |
164 | static void end_compressed_bio_read(struct bio *bio, int err) | 155 | static void end_compressed_bio_read(struct bio *bio, int err) |
165 | { | 156 | { |
166 | struct extent_io_tree *tree; | ||
167 | struct compressed_bio *cb = bio->bi_private; | 157 | struct compressed_bio *cb = bio->bi_private; |
168 | struct inode *inode; | 158 | struct inode *inode; |
169 | struct page *page; | 159 | struct page *page; |
@@ -187,12 +177,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
187 | /* ok, we're the last bio for this extent, lets start | 177 | /* ok, we're the last bio for this extent, lets start |
188 | * the decompression. | 178 | * the decompression. |
189 | */ | 179 | */ |
190 | tree = &BTRFS_I(inode)->io_tree; | 180 | ret = btrfs_decompress_biovec(cb->compress_type, |
191 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 181 | cb->compressed_pages, |
192 | cb->start, | 182 | cb->start, |
193 | cb->orig_bio->bi_io_vec, | 183 | cb->orig_bio->bi_io_vec, |
194 | cb->orig_bio->bi_vcnt, | 184 | cb->orig_bio->bi_vcnt, |
195 | cb->compressed_len); | 185 | cb->compressed_len); |
196 | csum_failed: | 186 | csum_failed: |
197 | if (ret) | 187 | if (ret) |
198 | cb->errors = 1; | 188 | cb->errors = 1; |
@@ -343,7 +333,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
343 | struct compressed_bio *cb; | 333 | struct compressed_bio *cb; |
344 | unsigned long bytes_left; | 334 | unsigned long bytes_left; |
345 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 335 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
346 | int page_index = 0; | 336 | int pg_index = 0; |
347 | struct page *page; | 337 | struct page *page; |
348 | u64 first_byte = disk_start; | 338 | u64 first_byte = disk_start; |
349 | struct block_device *bdev; | 339 | struct block_device *bdev; |
@@ -351,6 +341,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
351 | 341 | ||
352 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | 342 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
353 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 343 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
344 | if (!cb) | ||
345 | return -ENOMEM; | ||
354 | atomic_set(&cb->pending_bios, 0); | 346 | atomic_set(&cb->pending_bios, 0); |
355 | cb->errors = 0; | 347 | cb->errors = 0; |
356 | cb->inode = inode; | 348 | cb->inode = inode; |
@@ -365,14 +357,18 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
365 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 357 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
366 | 358 | ||
367 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | 359 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); |
360 | if(!bio) { | ||
361 | kfree(cb); | ||
362 | return -ENOMEM; | ||
363 | } | ||
368 | bio->bi_private = cb; | 364 | bio->bi_private = cb; |
369 | bio->bi_end_io = end_compressed_bio_write; | 365 | bio->bi_end_io = end_compressed_bio_write; |
370 | atomic_inc(&cb->pending_bios); | 366 | atomic_inc(&cb->pending_bios); |
371 | 367 | ||
372 | /* create and submit bios for the compressed pages */ | 368 | /* create and submit bios for the compressed pages */ |
373 | bytes_left = compressed_len; | 369 | bytes_left = compressed_len; |
374 | for (page_index = 0; page_index < cb->nr_pages; page_index++) { | 370 | for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { |
375 | page = compressed_pages[page_index]; | 371 | page = compressed_pages[pg_index]; |
376 | page->mapping = inode->i_mapping; | 372 | page->mapping = inode->i_mapping; |
377 | if (bio->bi_size) | 373 | if (bio->bi_size) |
378 | ret = io_tree->ops->merge_bio_hook(page, 0, | 374 | ret = io_tree->ops->merge_bio_hook(page, 0, |
@@ -437,7 +433,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
437 | struct compressed_bio *cb) | 433 | struct compressed_bio *cb) |
438 | { | 434 | { |
439 | unsigned long end_index; | 435 | unsigned long end_index; |
440 | unsigned long page_index; | 436 | unsigned long pg_index; |
441 | u64 last_offset; | 437 | u64 last_offset; |
442 | u64 isize = i_size_read(inode); | 438 | u64 isize = i_size_read(inode); |
443 | int ret; | 439 | int ret; |
@@ -461,13 +457,13 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
461 | end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 457 | end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; |
462 | 458 | ||
463 | while (last_offset < compressed_end) { | 459 | while (last_offset < compressed_end) { |
464 | page_index = last_offset >> PAGE_CACHE_SHIFT; | 460 | pg_index = last_offset >> PAGE_CACHE_SHIFT; |
465 | 461 | ||
466 | if (page_index > end_index) | 462 | if (pg_index > end_index) |
467 | break; | 463 | break; |
468 | 464 | ||
469 | rcu_read_lock(); | 465 | rcu_read_lock(); |
470 | page = radix_tree_lookup(&mapping->page_tree, page_index); | 466 | page = radix_tree_lookup(&mapping->page_tree, pg_index); |
471 | rcu_read_unlock(); | 467 | rcu_read_unlock(); |
472 | if (page) { | 468 | if (page) { |
473 | misses++; | 469 | misses++; |
@@ -481,7 +477,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
481 | if (!page) | 477 | if (!page) |
482 | break; | 478 | break; |
483 | 479 | ||
484 | if (add_to_page_cache_lru(page, mapping, page_index, | 480 | if (add_to_page_cache_lru(page, mapping, pg_index, |
485 | GFP_NOFS)) { | 481 | GFP_NOFS)) { |
486 | page_cache_release(page); | 482 | page_cache_release(page); |
487 | goto next; | 483 | goto next; |
@@ -565,7 +561,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
565 | unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; | 561 | unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; |
566 | unsigned long compressed_len; | 562 | unsigned long compressed_len; |
567 | unsigned long nr_pages; | 563 | unsigned long nr_pages; |
568 | unsigned long page_index; | 564 | unsigned long pg_index; |
569 | struct page *page; | 565 | struct page *page; |
570 | struct block_device *bdev; | 566 | struct block_device *bdev; |
571 | struct bio *comp_bio; | 567 | struct bio *comp_bio; |
@@ -573,7 +569,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
573 | u64 em_len; | 569 | u64 em_len; |
574 | u64 em_start; | 570 | u64 em_start; |
575 | struct extent_map *em; | 571 | struct extent_map *em; |
576 | int ret; | 572 | int ret = -ENOMEM; |
577 | u32 *sums; | 573 | u32 *sums; |
578 | 574 | ||
579 | tree = &BTRFS_I(inode)->io_tree; | 575 | tree = &BTRFS_I(inode)->io_tree; |
@@ -588,6 +584,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
588 | 584 | ||
589 | compressed_len = em->block_len; | 585 | compressed_len = em->block_len; |
590 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 586 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
587 | if (!cb) | ||
588 | goto out; | ||
589 | |||
591 | atomic_set(&cb->pending_bios, 0); | 590 | atomic_set(&cb->pending_bios, 0); |
592 | cb->errors = 0; | 591 | cb->errors = 0; |
593 | cb->inode = inode; | 592 | cb->inode = inode; |
@@ -603,17 +602,23 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
603 | 602 | ||
604 | cb->len = uncompressed_len; | 603 | cb->len = uncompressed_len; |
605 | cb->compressed_len = compressed_len; | 604 | cb->compressed_len = compressed_len; |
605 | cb->compress_type = extent_compress_type(bio_flags); | ||
606 | cb->orig_bio = bio; | 606 | cb->orig_bio = bio; |
607 | 607 | ||
608 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 608 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
609 | PAGE_CACHE_SIZE; | 609 | PAGE_CACHE_SIZE; |
610 | cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, | 610 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, |
611 | GFP_NOFS); | 611 | GFP_NOFS); |
612 | if (!cb->compressed_pages) | ||
613 | goto fail1; | ||
614 | |||
612 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 615 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
613 | 616 | ||
614 | for (page_index = 0; page_index < nr_pages; page_index++) { | 617 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { |
615 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | | 618 | cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS | |
616 | __GFP_HIGHMEM); | 619 | __GFP_HIGHMEM); |
620 | if (!cb->compressed_pages[pg_index]) | ||
621 | goto fail2; | ||
617 | } | 622 | } |
618 | cb->nr_pages = nr_pages; | 623 | cb->nr_pages = nr_pages; |
619 | 624 | ||
@@ -624,12 +629,14 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
624 | cb->len = uncompressed_len; | 629 | cb->len = uncompressed_len; |
625 | 630 | ||
626 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | 631 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); |
632 | if (!comp_bio) | ||
633 | goto fail2; | ||
627 | comp_bio->bi_private = cb; | 634 | comp_bio->bi_private = cb; |
628 | comp_bio->bi_end_io = end_compressed_bio_read; | 635 | comp_bio->bi_end_io = end_compressed_bio_read; |
629 | atomic_inc(&cb->pending_bios); | 636 | atomic_inc(&cb->pending_bios); |
630 | 637 | ||
631 | for (page_index = 0; page_index < nr_pages; page_index++) { | 638 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { |
632 | page = cb->compressed_pages[page_index]; | 639 | page = cb->compressed_pages[pg_index]; |
633 | page->mapping = inode->i_mapping; | 640 | page->mapping = inode->i_mapping; |
634 | page->index = em_start >> PAGE_CACHE_SHIFT; | 641 | page->index = em_start >> PAGE_CACHE_SHIFT; |
635 | 642 | ||
@@ -657,8 +664,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
657 | atomic_inc(&cb->pending_bios); | 664 | atomic_inc(&cb->pending_bios); |
658 | 665 | ||
659 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 666 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
660 | btrfs_lookup_bio_sums(root, inode, comp_bio, | 667 | ret = btrfs_lookup_bio_sums(root, inode, |
661 | sums); | 668 | comp_bio, sums); |
669 | BUG_ON(ret); | ||
662 | } | 670 | } |
663 | sums += (comp_bio->bi_size + root->sectorsize - 1) / | 671 | sums += (comp_bio->bi_size + root->sectorsize - 1) / |
664 | root->sectorsize; | 672 | root->sectorsize; |
@@ -683,12 +691,339 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
683 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 691 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); |
684 | BUG_ON(ret); | 692 | BUG_ON(ret); |
685 | 693 | ||
686 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) | 694 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
687 | btrfs_lookup_bio_sums(root, inode, comp_bio, sums); | 695 | ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); |
696 | BUG_ON(ret); | ||
697 | } | ||
688 | 698 | ||
689 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); | 699 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
690 | BUG_ON(ret); | 700 | BUG_ON(ret); |
691 | 701 | ||
692 | bio_put(comp_bio); | 702 | bio_put(comp_bio); |
693 | return 0; | 703 | return 0; |
704 | |||
705 | fail2: | ||
706 | for (pg_index = 0; pg_index < nr_pages; pg_index++) | ||
707 | free_page((unsigned long)cb->compressed_pages[pg_index]); | ||
708 | |||
709 | kfree(cb->compressed_pages); | ||
710 | fail1: | ||
711 | kfree(cb); | ||
712 | out: | ||
713 | free_extent_map(em); | ||
714 | return ret; | ||
715 | } | ||
716 | |||
717 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
718 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
719 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
720 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
721 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
722 | |||
723 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
724 | &btrfs_zlib_compress, | ||
725 | &btrfs_lzo_compress, | ||
726 | }; | ||
727 | |||
728 | int __init btrfs_init_compress(void) | ||
729 | { | ||
730 | int i; | ||
731 | |||
732 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
733 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
734 | spin_lock_init(&comp_workspace_lock[i]); | ||
735 | atomic_set(&comp_alloc_workspace[i], 0); | ||
736 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
737 | } | ||
738 | return 0; | ||
739 | } | ||
740 | |||
741 | /* | ||
742 | * this finds an available workspace or allocates a new one | ||
743 | * ERR_PTR is returned if things go bad. | ||
744 | */ | ||
745 | static struct list_head *find_workspace(int type) | ||
746 | { | ||
747 | struct list_head *workspace; | ||
748 | int cpus = num_online_cpus(); | ||
749 | int idx = type - 1; | ||
750 | |||
751 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
752 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
753 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
754 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
755 | int *num_workspace = &comp_num_workspace[idx]; | ||
756 | again: | ||
757 | spin_lock(workspace_lock); | ||
758 | if (!list_empty(idle_workspace)) { | ||
759 | workspace = idle_workspace->next; | ||
760 | list_del(workspace); | ||
761 | (*num_workspace)--; | ||
762 | spin_unlock(workspace_lock); | ||
763 | return workspace; | ||
764 | |||
765 | } | ||
766 | if (atomic_read(alloc_workspace) > cpus) { | ||
767 | DEFINE_WAIT(wait); | ||
768 | |||
769 | spin_unlock(workspace_lock); | ||
770 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
771 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
772 | schedule(); | ||
773 | finish_wait(workspace_wait, &wait); | ||
774 | goto again; | ||
775 | } | ||
776 | atomic_inc(alloc_workspace); | ||
777 | spin_unlock(workspace_lock); | ||
778 | |||
779 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
780 | if (IS_ERR(workspace)) { | ||
781 | atomic_dec(alloc_workspace); | ||
782 | wake_up(workspace_wait); | ||
783 | } | ||
784 | return workspace; | ||
785 | } | ||
786 | |||
787 | /* | ||
788 | * put a workspace struct back on the list or free it if we have enough | ||
789 | * idle ones sitting around | ||
790 | */ | ||
791 | static void free_workspace(int type, struct list_head *workspace) | ||
792 | { | ||
793 | int idx = type - 1; | ||
794 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
795 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
796 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
797 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
798 | int *num_workspace = &comp_num_workspace[idx]; | ||
799 | |||
800 | spin_lock(workspace_lock); | ||
801 | if (*num_workspace < num_online_cpus()) { | ||
802 | list_add_tail(workspace, idle_workspace); | ||
803 | (*num_workspace)++; | ||
804 | spin_unlock(workspace_lock); | ||
805 | goto wake; | ||
806 | } | ||
807 | spin_unlock(workspace_lock); | ||
808 | |||
809 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
810 | atomic_dec(alloc_workspace); | ||
811 | wake: | ||
812 | if (waitqueue_active(workspace_wait)) | ||
813 | wake_up(workspace_wait); | ||
814 | } | ||
815 | |||
816 | /* | ||
817 | * cleanup function for module exit | ||
818 | */ | ||
819 | static void free_workspaces(void) | ||
820 | { | ||
821 | struct list_head *workspace; | ||
822 | int i; | ||
823 | |||
824 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
825 | while (!list_empty(&comp_idle_workspace[i])) { | ||
826 | workspace = comp_idle_workspace[i].next; | ||
827 | list_del(workspace); | ||
828 | btrfs_compress_op[i]->free_workspace(workspace); | ||
829 | atomic_dec(&comp_alloc_workspace[i]); | ||
830 | } | ||
831 | } | ||
832 | } | ||
833 | |||
834 | /* | ||
835 | * given an address space and start/len, compress the bytes. | ||
836 | * | ||
837 | * pages are allocated to hold the compressed result and stored | ||
838 | * in 'pages' | ||
839 | * | ||
840 | * out_pages is used to return the number of pages allocated. There | ||
841 | * may be pages allocated even if we return an error | ||
842 | * | ||
843 | * total_in is used to return the number of bytes actually read. It | ||
844 | * may be smaller then len if we had to exit early because we | ||
845 | * ran out of room in the pages array or because we cross the | ||
846 | * max_out threshold. | ||
847 | * | ||
848 | * total_out is used to return the total number of compressed bytes | ||
849 | * | ||
850 | * max_out tells us the max number of bytes that we're allowed to | ||
851 | * stuff into pages | ||
852 | */ | ||
853 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
854 | u64 start, unsigned long len, | ||
855 | struct page **pages, | ||
856 | unsigned long nr_dest_pages, | ||
857 | unsigned long *out_pages, | ||
858 | unsigned long *total_in, | ||
859 | unsigned long *total_out, | ||
860 | unsigned long max_out) | ||
861 | { | ||
862 | struct list_head *workspace; | ||
863 | int ret; | ||
864 | |||
865 | workspace = find_workspace(type); | ||
866 | if (IS_ERR(workspace)) | ||
867 | return -1; | ||
868 | |||
869 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
870 | start, len, pages, | ||
871 | nr_dest_pages, out_pages, | ||
872 | total_in, total_out, | ||
873 | max_out); | ||
874 | free_workspace(type, workspace); | ||
875 | return ret; | ||
876 | } | ||
877 | |||
878 | /* | ||
879 | * pages_in is an array of pages with compressed data. | ||
880 | * | ||
881 | * disk_start is the starting logical offset of this array in the file | ||
882 | * | ||
883 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
884 | * | ||
885 | * vcnt is the count of pages in the biovec | ||
886 | * | ||
887 | * srclen is the number of bytes in pages_in | ||
888 | * | ||
889 | * The basic idea is that we have a bio that was created by readpages. | ||
890 | * The pages in the bio are for the uncompressed data, and they may not | ||
891 | * be contiguous. They all correspond to the range of bytes covered by | ||
892 | * the compressed extent. | ||
893 | */ | ||
894 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
895 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
896 | { | ||
897 | struct list_head *workspace; | ||
898 | int ret; | ||
899 | |||
900 | workspace = find_workspace(type); | ||
901 | if (IS_ERR(workspace)) | ||
902 | return -ENOMEM; | ||
903 | |||
904 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
905 | disk_start, | ||
906 | bvec, vcnt, srclen); | ||
907 | free_workspace(type, workspace); | ||
908 | return ret; | ||
909 | } | ||
910 | |||
911 | /* | ||
912 | * a less complex decompression routine. Our compressed data fits in a | ||
913 | * single page, and we want to read a single page out of it. | ||
914 | * start_byte tells us the offset into the compressed data we're interested in | ||
915 | */ | ||
916 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
917 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
918 | { | ||
919 | struct list_head *workspace; | ||
920 | int ret; | ||
921 | |||
922 | workspace = find_workspace(type); | ||
923 | if (IS_ERR(workspace)) | ||
924 | return -ENOMEM; | ||
925 | |||
926 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
927 | dest_page, start_byte, | ||
928 | srclen, destlen); | ||
929 | |||
930 | free_workspace(type, workspace); | ||
931 | return ret; | ||
932 | } | ||
933 | |||
934 | void btrfs_exit_compress(void) | ||
935 | { | ||
936 | free_workspaces(); | ||
937 | } | ||
938 | |||
939 | /* | ||
940 | * Copy uncompressed data from working buffer to pages. | ||
941 | * | ||
942 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
943 | * | ||
944 | * total_out is the last byte of the buffer | ||
945 | */ | ||
946 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
947 | unsigned long total_out, u64 disk_start, | ||
948 | struct bio_vec *bvec, int vcnt, | ||
949 | unsigned long *pg_index, | ||
950 | unsigned long *pg_offset) | ||
951 | { | ||
952 | unsigned long buf_offset; | ||
953 | unsigned long current_buf_start; | ||
954 | unsigned long start_byte; | ||
955 | unsigned long working_bytes = total_out - buf_start; | ||
956 | unsigned long bytes; | ||
957 | char *kaddr; | ||
958 | struct page *page_out = bvec[*pg_index].bv_page; | ||
959 | |||
960 | /* | ||
961 | * start byte is the first byte of the page we're currently | ||
962 | * copying into relative to the start of the compressed data. | ||
963 | */ | ||
964 | start_byte = page_offset(page_out) - disk_start; | ||
965 | |||
966 | /* we haven't yet hit data corresponding to this page */ | ||
967 | if (total_out <= start_byte) | ||
968 | return 1; | ||
969 | |||
970 | /* | ||
971 | * the start of the data we care about is offset into | ||
972 | * the middle of our working buffer | ||
973 | */ | ||
974 | if (total_out > start_byte && buf_start < start_byte) { | ||
975 | buf_offset = start_byte - buf_start; | ||
976 | working_bytes -= buf_offset; | ||
977 | } else { | ||
978 | buf_offset = 0; | ||
979 | } | ||
980 | current_buf_start = buf_start; | ||
981 | |||
982 | /* copy bytes from the working buffer into the pages */ | ||
983 | while (working_bytes > 0) { | ||
984 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
985 | PAGE_CACHE_SIZE - buf_offset); | ||
986 | bytes = min(bytes, working_bytes); | ||
987 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
988 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
989 | kunmap_atomic(kaddr, KM_USER0); | ||
990 | flush_dcache_page(page_out); | ||
991 | |||
992 | *pg_offset += bytes; | ||
993 | buf_offset += bytes; | ||
994 | working_bytes -= bytes; | ||
995 | current_buf_start += bytes; | ||
996 | |||
997 | /* check if we need to pick another page */ | ||
998 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
999 | (*pg_index)++; | ||
1000 | if (*pg_index >= vcnt) | ||
1001 | return 0; | ||
1002 | |||
1003 | page_out = bvec[*pg_index].bv_page; | ||
1004 | *pg_offset = 0; | ||
1005 | start_byte = page_offset(page_out) - disk_start; | ||
1006 | |||
1007 | /* | ||
1008 | * make sure our new page is covered by this | ||
1009 | * working buffer | ||
1010 | */ | ||
1011 | if (total_out <= start_byte) | ||
1012 | return 1; | ||
1013 | |||
1014 | /* | ||
1015 | * the next page in the biovec might not be adjacent | ||
1016 | * to the last page, but it might still be found | ||
1017 | * inside this working buffer. bump our offset pointer | ||
1018 | */ | ||
1019 | if (total_out > start_byte && | ||
1020 | current_buf_start < start_byte) { | ||
1021 | buf_offset = start_byte - buf_start; | ||
1022 | working_bytes = total_out - start_byte; | ||
1023 | current_buf_start = buf_start + buf_offset; | ||
1024 | } | ||
1025 | } | ||
1026 | } | ||
1027 | |||
1028 | return 1; | ||
694 | } | 1029 | } |
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..a12059f4f0fd 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -19,24 +19,27 @@ | |||
19 | #ifndef __BTRFS_COMPRESSION_ | 19 | #ifndef __BTRFS_COMPRESSION_ |
20 | #define __BTRFS_COMPRESSION_ | 20 | #define __BTRFS_COMPRESSION_ |
21 | 21 | ||
22 | int btrfs_zlib_decompress(unsigned char *data_in, | 22 | int btrfs_init_compress(void); |
23 | struct page *dest_page, | 23 | void btrfs_exit_compress(void); |
24 | unsigned long start_byte, | 24 | |
25 | size_t srclen, size_t destlen); | 25 | int btrfs_compress_pages(int type, struct address_space *mapping, |
26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | 26 | u64 start, unsigned long len, |
27 | u64 start, unsigned long len, | 27 | struct page **pages, |
28 | struct page **pages, | 28 | unsigned long nr_dest_pages, |
29 | unsigned long nr_dest_pages, | 29 | unsigned long *out_pages, |
30 | unsigned long *out_pages, | 30 | unsigned long *total_in, |
31 | unsigned long *total_in, | 31 | unsigned long *total_out, |
32 | unsigned long *total_out, | 32 | unsigned long max_out); |
33 | unsigned long max_out); | 33 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, |
34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | 34 | struct bio_vec *bvec, int vcnt, size_t srclen); |
35 | u64 disk_start, | 35 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, |
36 | struct bio_vec *bvec, | 36 | unsigned long start_byte, size_t srclen, size_t destlen); |
37 | int vcnt, | 37 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, |
38 | size_t srclen); | 38 | unsigned long total_out, u64 disk_start, |
39 | void btrfs_zlib_exit(void); | 39 | struct bio_vec *bvec, int vcnt, |
40 | unsigned long *pg_index, | ||
41 | unsigned long *pg_offset); | ||
42 | |||
40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | 43 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, |
41 | unsigned long len, u64 disk_start, | 44 | unsigned long len, u64 disk_start, |
42 | unsigned long compressed_len, | 45 | unsigned long compressed_len, |
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
44 | unsigned long nr_pages); | 47 | unsigned long nr_pages); |
45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 48 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
46 | int mirror_num, unsigned long bio_flags); | 49 | int mirror_num, unsigned long bio_flags); |
50 | |||
51 | struct btrfs_compress_op { | ||
52 | struct list_head *(*alloc_workspace)(void); | ||
53 | |||
54 | void (*free_workspace)(struct list_head *workspace); | ||
55 | |||
56 | int (*compress_pages)(struct list_head *workspace, | ||
57 | struct address_space *mapping, | ||
58 | u64 start, unsigned long len, | ||
59 | struct page **pages, | ||
60 | unsigned long nr_dest_pages, | ||
61 | unsigned long *out_pages, | ||
62 | unsigned long *total_in, | ||
63 | unsigned long *total_out, | ||
64 | unsigned long max_out); | ||
65 | |||
66 | int (*decompress_biovec)(struct list_head *workspace, | ||
67 | struct page **pages_in, | ||
68 | u64 disk_start, | ||
69 | struct bio_vec *bvec, | ||
70 | int vcnt, | ||
71 | size_t srclen); | ||
72 | |||
73 | int (*decompress)(struct list_head *workspace, | ||
74 | unsigned char *data_in, | ||
75 | struct page *dest_page, | ||
76 | unsigned long start_byte, | ||
77 | size_t srclen, size_t destlen); | ||
78 | }; | ||
79 | |||
80 | extern struct btrfs_compress_op btrfs_zlib_compress; | ||
81 | extern struct btrfs_compress_op btrfs_lzo_compress; | ||
82 | |||
47 | #endif | 83 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c3df14ce2cc2..2e667868e0d2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -38,18 +38,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
38 | struct extent_buffer *src_buf); | 38 | struct extent_buffer *src_buf); |
39 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 39 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
40 | struct btrfs_path *path, int level, int slot); | 40 | struct btrfs_path *path, int level, int slot); |
41 | static int setup_items_for_insert(struct btrfs_trans_handle *trans, | ||
42 | struct btrfs_root *root, struct btrfs_path *path, | ||
43 | struct btrfs_key *cpu_key, u32 *data_size, | ||
44 | u32 total_data, u32 total_size, int nr); | ||
45 | |||
46 | 41 | ||
47 | struct btrfs_path *btrfs_alloc_path(void) | 42 | struct btrfs_path *btrfs_alloc_path(void) |
48 | { | 43 | { |
49 | struct btrfs_path *path; | 44 | struct btrfs_path *path; |
50 | path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); | 45 | path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); |
51 | if (path) | ||
52 | path->reada = 1; | ||
53 | return path; | 46 | return path; |
54 | } | 47 | } |
55 | 48 | ||
@@ -105,7 +98,9 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
105 | /* this also releases the path */ | 98 | /* this also releases the path */ |
106 | void btrfs_free_path(struct btrfs_path *p) | 99 | void btrfs_free_path(struct btrfs_path *p) |
107 | { | 100 | { |
108 | btrfs_release_path(NULL, p); | 101 | if (!p) |
102 | return; | ||
103 | btrfs_release_path(p); | ||
109 | kmem_cache_free(btrfs_path_cachep, p); | 104 | kmem_cache_free(btrfs_path_cachep, p); |
110 | } | 105 | } |
111 | 106 | ||
@@ -115,7 +110,7 @@ void btrfs_free_path(struct btrfs_path *p) | |||
115 | * | 110 | * |
116 | * It is safe to call this on paths that no locks or extent buffers held. | 111 | * It is safe to call this on paths that no locks or extent buffers held. |
117 | */ | 112 | */ |
118 | noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | 113 | noinline void btrfs_release_path(struct btrfs_path *p) |
119 | { | 114 | { |
120 | int i; | 115 | int i; |
121 | 116 | ||
@@ -145,10 +140,11 @@ noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | |||
145 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | 140 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) |
146 | { | 141 | { |
147 | struct extent_buffer *eb; | 142 | struct extent_buffer *eb; |
148 | spin_lock(&root->node_lock); | 143 | |
149 | eb = root->node; | 144 | rcu_read_lock(); |
145 | eb = rcu_dereference(root->node); | ||
150 | extent_buffer_get(eb); | 146 | extent_buffer_get(eb); |
151 | spin_unlock(&root->node_lock); | 147 | rcu_read_unlock(); |
152 | return eb; | 148 | return eb; |
153 | } | 149 | } |
154 | 150 | ||
@@ -163,14 +159,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
163 | while (1) { | 159 | while (1) { |
164 | eb = btrfs_root_node(root); | 160 | eb = btrfs_root_node(root); |
165 | btrfs_tree_lock(eb); | 161 | btrfs_tree_lock(eb); |
166 | 162 | if (eb == root->node) | |
167 | spin_lock(&root->node_lock); | ||
168 | if (eb == root->node) { | ||
169 | spin_unlock(&root->node_lock); | ||
170 | break; | 163 | break; |
171 | } | ||
172 | spin_unlock(&root->node_lock); | ||
173 | |||
174 | btrfs_tree_unlock(eb); | 164 | btrfs_tree_unlock(eb); |
175 | free_extent_buffer(eb); | 165 | free_extent_buffer(eb); |
176 | } | 166 | } |
@@ -200,7 +190,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
200 | struct extent_buffer **cow_ret, u64 new_root_objectid) | 190 | struct extent_buffer **cow_ret, u64 new_root_objectid) |
201 | { | 191 | { |
202 | struct extent_buffer *cow; | 192 | struct extent_buffer *cow; |
203 | u32 nritems; | ||
204 | int ret = 0; | 193 | int ret = 0; |
205 | int level; | 194 | int level; |
206 | struct btrfs_disk_key disk_key; | 195 | struct btrfs_disk_key disk_key; |
@@ -210,7 +199,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
210 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | 199 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); |
211 | 200 | ||
212 | level = btrfs_header_level(buf); | 201 | level = btrfs_header_level(buf); |
213 | nritems = btrfs_header_nritems(buf); | ||
214 | if (level == 0) | 202 | if (level == 0) |
215 | btrfs_item_key(buf, &disk_key, 0); | 203 | btrfs_item_key(buf, &disk_key, 0); |
216 | else | 204 | else |
@@ -458,10 +446,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
458 | else | 446 | else |
459 | parent_start = 0; | 447 | parent_start = 0; |
460 | 448 | ||
461 | spin_lock(&root->node_lock); | ||
462 | root->node = cow; | ||
463 | extent_buffer_get(cow); | 449 | extent_buffer_get(cow); |
464 | spin_unlock(&root->node_lock); | 450 | rcu_assign_pointer(root->node, cow); |
465 | 451 | ||
466 | btrfs_free_tree_block(trans, root, buf, parent_start, | 452 | btrfs_free_tree_block(trans, root, buf, parent_start, |
467 | last_ref); | 453 | last_ref); |
@@ -542,6 +528,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
542 | 528 | ||
543 | ret = __btrfs_cow_block(trans, root, buf, parent, | 529 | ret = __btrfs_cow_block(trans, root, buf, parent, |
544 | parent_slot, cow_ret, search_start, 0); | 530 | parent_slot, cow_ret, search_start, 0); |
531 | |||
532 | trace_btrfs_cow_block(root, buf, *cow_ret); | ||
533 | |||
545 | return ret; | 534 | return ret; |
546 | } | 535 | } |
547 | 536 | ||
@@ -686,6 +675,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
686 | if (!cur) { | 675 | if (!cur) { |
687 | cur = read_tree_block(root, blocknr, | 676 | cur = read_tree_block(root, blocknr, |
688 | blocksize, gen); | 677 | blocksize, gen); |
678 | if (!cur) | ||
679 | return -EIO; | ||
689 | } else if (!uptodate) { | 680 | } else if (!uptodate) { |
690 | btrfs_read_buffer(cur, gen); | 681 | btrfs_read_buffer(cur, gen); |
691 | } | 682 | } |
@@ -732,122 +723,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, | |||
732 | return btrfs_item_offset_nr(leaf, nr - 1); | 723 | return btrfs_item_offset_nr(leaf, nr - 1); |
733 | } | 724 | } |
734 | 725 | ||
735 | /* | ||
736 | * extra debugging checks to make sure all the items in a key are | ||
737 | * well formed and in the proper order | ||
738 | */ | ||
739 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, | ||
740 | int level) | ||
741 | { | ||
742 | struct extent_buffer *parent = NULL; | ||
743 | struct extent_buffer *node = path->nodes[level]; | ||
744 | struct btrfs_disk_key parent_key; | ||
745 | struct btrfs_disk_key node_key; | ||
746 | int parent_slot; | ||
747 | int slot; | ||
748 | struct btrfs_key cpukey; | ||
749 | u32 nritems = btrfs_header_nritems(node); | ||
750 | |||
751 | if (path->nodes[level + 1]) | ||
752 | parent = path->nodes[level + 1]; | ||
753 | |||
754 | slot = path->slots[level]; | ||
755 | BUG_ON(nritems == 0); | ||
756 | if (parent) { | ||
757 | parent_slot = path->slots[level + 1]; | ||
758 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
759 | btrfs_node_key(node, &node_key, 0); | ||
760 | BUG_ON(memcmp(&parent_key, &node_key, | ||
761 | sizeof(struct btrfs_disk_key))); | ||
762 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
763 | btrfs_header_bytenr(node)); | ||
764 | } | ||
765 | BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); | ||
766 | if (slot != 0) { | ||
767 | btrfs_node_key_to_cpu(node, &cpukey, slot - 1); | ||
768 | btrfs_node_key(node, &node_key, slot); | ||
769 | BUG_ON(comp_keys(&node_key, &cpukey) <= 0); | ||
770 | } | ||
771 | if (slot < nritems - 1) { | ||
772 | btrfs_node_key_to_cpu(node, &cpukey, slot + 1); | ||
773 | btrfs_node_key(node, &node_key, slot); | ||
774 | BUG_ON(comp_keys(&node_key, &cpukey) >= 0); | ||
775 | } | ||
776 | return 0; | ||
777 | } | ||
778 | |||
779 | /* | ||
780 | * extra checking to make sure all the items in a leaf are | ||
781 | * well formed and in the proper order | ||
782 | */ | ||
783 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, | ||
784 | int level) | ||
785 | { | ||
786 | struct extent_buffer *leaf = path->nodes[level]; | ||
787 | struct extent_buffer *parent = NULL; | ||
788 | int parent_slot; | ||
789 | struct btrfs_key cpukey; | ||
790 | struct btrfs_disk_key parent_key; | ||
791 | struct btrfs_disk_key leaf_key; | ||
792 | int slot = path->slots[0]; | ||
793 | |||
794 | u32 nritems = btrfs_header_nritems(leaf); | ||
795 | |||
796 | if (path->nodes[level + 1]) | ||
797 | parent = path->nodes[level + 1]; | ||
798 | |||
799 | if (nritems == 0) | ||
800 | return 0; | ||
801 | |||
802 | if (parent) { | ||
803 | parent_slot = path->slots[level + 1]; | ||
804 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
805 | btrfs_item_key(leaf, &leaf_key, 0); | ||
806 | |||
807 | BUG_ON(memcmp(&parent_key, &leaf_key, | ||
808 | sizeof(struct btrfs_disk_key))); | ||
809 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
810 | btrfs_header_bytenr(leaf)); | ||
811 | } | ||
812 | if (slot != 0 && slot < nritems - 1) { | ||
813 | btrfs_item_key(leaf, &leaf_key, slot); | ||
814 | btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); | ||
815 | if (comp_keys(&leaf_key, &cpukey) <= 0) { | ||
816 | btrfs_print_leaf(root, leaf); | ||
817 | printk(KERN_CRIT "slot %d offset bad key\n", slot); | ||
818 | BUG_ON(1); | ||
819 | } | ||
820 | if (btrfs_item_offset_nr(leaf, slot - 1) != | ||
821 | btrfs_item_end_nr(leaf, slot)) { | ||
822 | btrfs_print_leaf(root, leaf); | ||
823 | printk(KERN_CRIT "slot %d offset bad\n", slot); | ||
824 | BUG_ON(1); | ||
825 | } | ||
826 | } | ||
827 | if (slot < nritems - 1) { | ||
828 | btrfs_item_key(leaf, &leaf_key, slot); | ||
829 | btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); | ||
830 | BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); | ||
831 | if (btrfs_item_offset_nr(leaf, slot) != | ||
832 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
833 | btrfs_print_leaf(root, leaf); | ||
834 | printk(KERN_CRIT "slot %d offset bad\n", slot); | ||
835 | BUG_ON(1); | ||
836 | } | ||
837 | } | ||
838 | BUG_ON(btrfs_item_offset_nr(leaf, 0) + | ||
839 | btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | static noinline int check_block(struct btrfs_root *root, | ||
844 | struct btrfs_path *path, int level) | ||
845 | { | ||
846 | return 0; | ||
847 | if (level == 0) | ||
848 | return check_leaf(root, path, level); | ||
849 | return check_node(root, path, level); | ||
850 | } | ||
851 | 726 | ||
852 | /* | 727 | /* |
853 | * search for key in the extent_buffer. The items start at offset p, | 728 | * search for key in the extent_buffer. The items start at offset p, |
@@ -1008,7 +883,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1008 | int wret; | 883 | int wret; |
1009 | int pslot; | 884 | int pslot; |
1010 | int orig_slot = path->slots[level]; | 885 | int orig_slot = path->slots[level]; |
1011 | int err_on_enospc = 0; | ||
1012 | u64 orig_ptr; | 886 | u64 orig_ptr; |
1013 | 887 | ||
1014 | if (level == 0) | 888 | if (level == 0) |
@@ -1047,9 +921,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1047 | goto enospc; | 921 | goto enospc; |
1048 | } | 922 | } |
1049 | 923 | ||
1050 | spin_lock(&root->node_lock); | 924 | rcu_assign_pointer(root->node, child); |
1051 | root->node = child; | ||
1052 | spin_unlock(&root->node_lock); | ||
1053 | 925 | ||
1054 | add_root_to_dirty_list(root); | 926 | add_root_to_dirty_list(root); |
1055 | btrfs_tree_unlock(child); | 927 | btrfs_tree_unlock(child); |
@@ -1071,8 +943,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1071 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 943 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
1072 | return 0; | 944 | return 0; |
1073 | 945 | ||
1074 | if (btrfs_header_nritems(mid) < 2) | 946 | btrfs_header_nritems(mid); |
1075 | err_on_enospc = 1; | ||
1076 | 947 | ||
1077 | left = read_node_slot(root, parent, pslot - 1); | 948 | left = read_node_slot(root, parent, pslot - 1); |
1078 | if (left) { | 949 | if (left) { |
@@ -1103,8 +974,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1103 | wret = push_node_left(trans, root, left, mid, 1); | 974 | wret = push_node_left(trans, root, left, mid, 1); |
1104 | if (wret < 0) | 975 | if (wret < 0) |
1105 | ret = wret; | 976 | ret = wret; |
1106 | if (btrfs_header_nritems(mid) < 2) | 977 | btrfs_header_nritems(mid); |
1107 | err_on_enospc = 1; | ||
1108 | } | 978 | } |
1109 | 979 | ||
1110 | /* | 980 | /* |
@@ -1191,7 +1061,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1191 | } | 1061 | } |
1192 | } | 1062 | } |
1193 | /* double check we haven't messed things up */ | 1063 | /* double check we haven't messed things up */ |
1194 | check_block(root, path, level); | ||
1195 | if (orig_ptr != | 1064 | if (orig_ptr != |
1196 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) | 1065 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) |
1197 | BUG(); | 1066 | BUG(); |
@@ -1224,14 +1093,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1224 | int wret; | 1093 | int wret; |
1225 | int pslot; | 1094 | int pslot; |
1226 | int orig_slot = path->slots[level]; | 1095 | int orig_slot = path->slots[level]; |
1227 | u64 orig_ptr; | ||
1228 | 1096 | ||
1229 | if (level == 0) | 1097 | if (level == 0) |
1230 | return 1; | 1098 | return 1; |
1231 | 1099 | ||
1232 | mid = path->nodes[level]; | 1100 | mid = path->nodes[level]; |
1233 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 1101 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
1234 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | ||
1235 | 1102 | ||
1236 | if (level < BTRFS_MAX_LEVEL - 1) | 1103 | if (level < BTRFS_MAX_LEVEL - 1) |
1237 | parent = path->nodes[level + 1]; | 1104 | parent = path->nodes[level + 1]; |
@@ -1355,11 +1222,13 @@ static void reada_for_search(struct btrfs_root *root, | |||
1355 | u64 search; | 1222 | u64 search; |
1356 | u64 target; | 1223 | u64 target; |
1357 | u64 nread = 0; | 1224 | u64 nread = 0; |
1225 | u64 gen; | ||
1358 | int direction = path->reada; | 1226 | int direction = path->reada; |
1359 | struct extent_buffer *eb; | 1227 | struct extent_buffer *eb; |
1360 | u32 nr; | 1228 | u32 nr; |
1361 | u32 blocksize; | 1229 | u32 blocksize; |
1362 | u32 nscan = 0; | 1230 | u32 nscan = 0; |
1231 | bool map = true; | ||
1363 | 1232 | ||
1364 | if (level != 1) | 1233 | if (level != 1) |
1365 | return; | 1234 | return; |
@@ -1381,7 +1250,19 @@ static void reada_for_search(struct btrfs_root *root, | |||
1381 | 1250 | ||
1382 | nritems = btrfs_header_nritems(node); | 1251 | nritems = btrfs_header_nritems(node); |
1383 | nr = slot; | 1252 | nr = slot; |
1253 | if (node->map_token || path->skip_locking) | ||
1254 | map = false; | ||
1255 | |||
1384 | while (1) { | 1256 | while (1) { |
1257 | if (map && !node->map_token) { | ||
1258 | unsigned long offset = btrfs_node_key_ptr_offset(nr); | ||
1259 | map_private_extent_buffer(node, offset, | ||
1260 | sizeof(struct btrfs_key_ptr), | ||
1261 | &node->map_token, | ||
1262 | &node->kaddr, | ||
1263 | &node->map_start, | ||
1264 | &node->map_len, KM_USER1); | ||
1265 | } | ||
1385 | if (direction < 0) { | 1266 | if (direction < 0) { |
1386 | if (nr == 0) | 1267 | if (nr == 0) |
1387 | break; | 1268 | break; |
@@ -1399,14 +1280,23 @@ static void reada_for_search(struct btrfs_root *root, | |||
1399 | search = btrfs_node_blockptr(node, nr); | 1280 | search = btrfs_node_blockptr(node, nr); |
1400 | if ((search <= target && target - search <= 65536) || | 1281 | if ((search <= target && target - search <= 65536) || |
1401 | (search > target && search - target <= 65536)) { | 1282 | (search > target && search - target <= 65536)) { |
1402 | readahead_tree_block(root, search, blocksize, | 1283 | gen = btrfs_node_ptr_generation(node, nr); |
1403 | btrfs_node_ptr_generation(node, nr)); | 1284 | if (map && node->map_token) { |
1285 | unmap_extent_buffer(node, node->map_token, | ||
1286 | KM_USER1); | ||
1287 | node->map_token = NULL; | ||
1288 | } | ||
1289 | readahead_tree_block(root, search, blocksize, gen); | ||
1404 | nread += blocksize; | 1290 | nread += blocksize; |
1405 | } | 1291 | } |
1406 | nscan++; | 1292 | nscan++; |
1407 | if ((nread > 65536 || nscan > 32)) | 1293 | if ((nread > 65536 || nscan > 32)) |
1408 | break; | 1294 | break; |
1409 | } | 1295 | } |
1296 | if (map && node->map_token) { | ||
1297 | unmap_extent_buffer(node, node->map_token, KM_USER1); | ||
1298 | node->map_token = NULL; | ||
1299 | } | ||
1410 | } | 1300 | } |
1411 | 1301 | ||
1412 | /* | 1302 | /* |
@@ -1454,7 +1344,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1454 | ret = -EAGAIN; | 1344 | ret = -EAGAIN; |
1455 | 1345 | ||
1456 | /* release the whole path */ | 1346 | /* release the whole path */ |
1457 | btrfs_release_path(root, path); | 1347 | btrfs_release_path(path); |
1458 | 1348 | ||
1459 | /* read the blocks */ | 1349 | /* read the blocks */ |
1460 | if (block1) | 1350 | if (block1) |
@@ -1577,13 +1467,33 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1577 | blocksize = btrfs_level_size(root, level - 1); | 1467 | blocksize = btrfs_level_size(root, level - 1); |
1578 | 1468 | ||
1579 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | 1469 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); |
1580 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | 1470 | if (tmp) { |
1581 | /* | 1471 | if (btrfs_buffer_uptodate(tmp, 0)) { |
1582 | * we found an up to date block without sleeping, return | 1472 | if (btrfs_buffer_uptodate(tmp, gen)) { |
1583 | * right away | 1473 | /* |
1584 | */ | 1474 | * we found an up to date block without |
1585 | *eb_ret = tmp; | 1475 | * sleeping, return |
1586 | return 0; | 1476 | * right away |
1477 | */ | ||
1478 | *eb_ret = tmp; | ||
1479 | return 0; | ||
1480 | } | ||
1481 | /* the pages were up to date, but we failed | ||
1482 | * the generation number check. Do a full | ||
1483 | * read for the generation number that is correct. | ||
1484 | * We must do this without dropping locks so | ||
1485 | * we can trust our generation number | ||
1486 | */ | ||
1487 | free_extent_buffer(tmp); | ||
1488 | tmp = read_tree_block(root, blocknr, blocksize, gen); | ||
1489 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1490 | *eb_ret = tmp; | ||
1491 | return 0; | ||
1492 | } | ||
1493 | free_extent_buffer(tmp); | ||
1494 | btrfs_release_path(p); | ||
1495 | return -EIO; | ||
1496 | } | ||
1587 | } | 1497 | } |
1588 | 1498 | ||
1589 | /* | 1499 | /* |
@@ -1596,12 +1506,11 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1596 | btrfs_unlock_up_safe(p, level + 1); | 1506 | btrfs_unlock_up_safe(p, level + 1); |
1597 | btrfs_set_path_blocking(p); | 1507 | btrfs_set_path_blocking(p); |
1598 | 1508 | ||
1599 | if (tmp) | 1509 | free_extent_buffer(tmp); |
1600 | free_extent_buffer(tmp); | ||
1601 | if (p->reada) | 1510 | if (p->reada) |
1602 | reada_for_search(root, p, level, slot, key->objectid); | 1511 | reada_for_search(root, p, level, slot, key->objectid); |
1603 | 1512 | ||
1604 | btrfs_release_path(NULL, p); | 1513 | btrfs_release_path(p); |
1605 | 1514 | ||
1606 | ret = -EAGAIN; | 1515 | ret = -EAGAIN; |
1607 | tmp = read_tree_block(root, blocknr, blocksize, 0); | 1516 | tmp = read_tree_block(root, blocknr, blocksize, 0); |
@@ -1670,7 +1579,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
1670 | } | 1579 | } |
1671 | b = p->nodes[level]; | 1580 | b = p->nodes[level]; |
1672 | if (!b) { | 1581 | if (!b) { |
1673 | btrfs_release_path(NULL, p); | 1582 | btrfs_release_path(p); |
1674 | goto again; | 1583 | goto again; |
1675 | } | 1584 | } |
1676 | BUG_ON(btrfs_header_nritems(b) == 1); | 1585 | BUG_ON(btrfs_header_nritems(b) == 1); |
@@ -1760,9 +1669,6 @@ again: | |||
1760 | } | 1669 | } |
1761 | cow_done: | 1670 | cow_done: |
1762 | BUG_ON(!cow && ins_len); | 1671 | BUG_ON(!cow && ins_len); |
1763 | if (level != btrfs_header_level(b)) | ||
1764 | WARN_ON(1); | ||
1765 | level = btrfs_header_level(b); | ||
1766 | 1672 | ||
1767 | p->nodes[level] = b; | 1673 | p->nodes[level] = b; |
1768 | if (!p->skip_locking) | 1674 | if (!p->skip_locking) |
@@ -1784,12 +1690,6 @@ cow_done: | |||
1784 | if (!cow) | 1690 | if (!cow) |
1785 | btrfs_unlock_up_safe(p, level + 1); | 1691 | btrfs_unlock_up_safe(p, level + 1); |
1786 | 1692 | ||
1787 | ret = check_block(root, p, level); | ||
1788 | if (ret) { | ||
1789 | ret = -1; | ||
1790 | goto done; | ||
1791 | } | ||
1792 | |||
1793 | ret = bin_search(b, key, level, &slot); | 1693 | ret = bin_search(b, key, level, &slot); |
1794 | 1694 | ||
1795 | if (level != 0) { | 1695 | if (level != 0) { |
@@ -1866,7 +1766,7 @@ done: | |||
1866 | if (!p->leave_spinning) | 1766 | if (!p->leave_spinning) |
1867 | btrfs_set_path_blocking(p); | 1767 | btrfs_set_path_blocking(p); |
1868 | if (ret < 0) | 1768 | if (ret < 0) |
1869 | btrfs_release_path(root, p); | 1769 | btrfs_release_path(p); |
1870 | return ret; | 1770 | return ret; |
1871 | } | 1771 | } |
1872 | 1772 | ||
@@ -2116,10 +2016,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2116 | 2016 | ||
2117 | btrfs_mark_buffer_dirty(c); | 2017 | btrfs_mark_buffer_dirty(c); |
2118 | 2018 | ||
2119 | spin_lock(&root->node_lock); | ||
2120 | old = root->node; | 2019 | old = root->node; |
2121 | root->node = c; | 2020 | rcu_assign_pointer(root->node, c); |
2122 | spin_unlock(&root->node_lock); | ||
2123 | 2021 | ||
2124 | /* the super has an extra ref to root->node */ | 2022 | /* the super has an extra ref to root->node */ |
2125 | free_extent_buffer(old); | 2023 | free_extent_buffer(old); |
@@ -2502,6 +2400,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2502 | btrfs_assert_tree_locked(path->nodes[1]); | 2400 | btrfs_assert_tree_locked(path->nodes[1]); |
2503 | 2401 | ||
2504 | right = read_node_slot(root, upper, slot + 1); | 2402 | right = read_node_slot(root, upper, slot + 1); |
2403 | if (right == NULL) | ||
2404 | return 1; | ||
2405 | |||
2505 | btrfs_tree_lock(right); | 2406 | btrfs_tree_lock(right); |
2506 | btrfs_set_lock_blocking(right); | 2407 | btrfs_set_lock_blocking(right); |
2507 | 2408 | ||
@@ -2548,7 +2449,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2548 | { | 2449 | { |
2549 | struct btrfs_disk_key disk_key; | 2450 | struct btrfs_disk_key disk_key; |
2550 | struct extent_buffer *right = path->nodes[0]; | 2451 | struct extent_buffer *right = path->nodes[0]; |
2551 | int slot; | ||
2552 | int i; | 2452 | int i; |
2553 | int push_space = 0; | 2453 | int push_space = 0; |
2554 | int push_items = 0; | 2454 | int push_items = 0; |
@@ -2560,8 +2460,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2560 | u32 this_item_size; | 2460 | u32 this_item_size; |
2561 | u32 old_left_item_size; | 2461 | u32 old_left_item_size; |
2562 | 2462 | ||
2563 | slot = path->slots[1]; | ||
2564 | |||
2565 | if (empty) | 2463 | if (empty) |
2566 | nr = min(right_nritems, max_slot); | 2464 | nr = min(right_nritems, max_slot); |
2567 | else | 2465 | else |
@@ -2755,6 +2653,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2755 | btrfs_assert_tree_locked(path->nodes[1]); | 2653 | btrfs_assert_tree_locked(path->nodes[1]); |
2756 | 2654 | ||
2757 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2655 | left = read_node_slot(root, path->nodes[1], slot - 1); |
2656 | if (left == NULL) | ||
2657 | return 1; | ||
2658 | |||
2758 | btrfs_tree_lock(left); | 2659 | btrfs_tree_lock(left); |
2759 | btrfs_set_lock_blocking(left); | 2660 | btrfs_set_lock_blocking(left); |
2760 | 2661 | ||
@@ -3138,7 +3039,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
3138 | struct btrfs_file_extent_item); | 3039 | struct btrfs_file_extent_item); |
3139 | extent_len = btrfs_file_extent_num_bytes(leaf, fi); | 3040 | extent_len = btrfs_file_extent_num_bytes(leaf, fi); |
3140 | } | 3041 | } |
3141 | btrfs_release_path(root, path); | 3042 | btrfs_release_path(path); |
3142 | 3043 | ||
3143 | path->keep_locks = 1; | 3044 | path->keep_locks = 1; |
3144 | path->search_for_split = 1; | 3045 | path->search_for_split = 1; |
@@ -3328,9 +3229,7 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3328 | struct btrfs_path *path, | 3229 | struct btrfs_path *path, |
3329 | u32 new_size, int from_end) | 3230 | u32 new_size, int from_end) |
3330 | { | 3231 | { |
3331 | int ret = 0; | ||
3332 | int slot; | 3232 | int slot; |
3333 | int slot_orig; | ||
3334 | struct extent_buffer *leaf; | 3233 | struct extent_buffer *leaf; |
3335 | struct btrfs_item *item; | 3234 | struct btrfs_item *item; |
3336 | u32 nritems; | 3235 | u32 nritems; |
@@ -3340,7 +3239,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3340 | unsigned int size_diff; | 3239 | unsigned int size_diff; |
3341 | int i; | 3240 | int i; |
3342 | 3241 | ||
3343 | slot_orig = path->slots[0]; | ||
3344 | leaf = path->nodes[0]; | 3242 | leaf = path->nodes[0]; |
3345 | slot = path->slots[0]; | 3243 | slot = path->slots[0]; |
3346 | 3244 | ||
@@ -3428,12 +3326,11 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3428 | btrfs_set_item_size(leaf, item, new_size); | 3326 | btrfs_set_item_size(leaf, item, new_size); |
3429 | btrfs_mark_buffer_dirty(leaf); | 3327 | btrfs_mark_buffer_dirty(leaf); |
3430 | 3328 | ||
3431 | ret = 0; | ||
3432 | if (btrfs_leaf_free_space(root, leaf) < 0) { | 3329 | if (btrfs_leaf_free_space(root, leaf) < 0) { |
3433 | btrfs_print_leaf(root, leaf); | 3330 | btrfs_print_leaf(root, leaf); |
3434 | BUG(); | 3331 | BUG(); |
3435 | } | 3332 | } |
3436 | return ret; | 3333 | return 0; |
3437 | } | 3334 | } |
3438 | 3335 | ||
3439 | /* | 3336 | /* |
@@ -3443,9 +3340,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3443 | struct btrfs_root *root, struct btrfs_path *path, | 3340 | struct btrfs_root *root, struct btrfs_path *path, |
3444 | u32 data_size) | 3341 | u32 data_size) |
3445 | { | 3342 | { |
3446 | int ret = 0; | ||
3447 | int slot; | 3343 | int slot; |
3448 | int slot_orig; | ||
3449 | struct extent_buffer *leaf; | 3344 | struct extent_buffer *leaf; |
3450 | struct btrfs_item *item; | 3345 | struct btrfs_item *item; |
3451 | u32 nritems; | 3346 | u32 nritems; |
@@ -3454,7 +3349,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3454 | unsigned int old_size; | 3349 | unsigned int old_size; |
3455 | int i; | 3350 | int i; |
3456 | 3351 | ||
3457 | slot_orig = path->slots[0]; | ||
3458 | leaf = path->nodes[0]; | 3352 | leaf = path->nodes[0]; |
3459 | 3353 | ||
3460 | nritems = btrfs_header_nritems(leaf); | 3354 | nritems = btrfs_header_nritems(leaf); |
@@ -3510,12 +3404,11 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3510 | btrfs_set_item_size(leaf, item, old_size + data_size); | 3404 | btrfs_set_item_size(leaf, item, old_size + data_size); |
3511 | btrfs_mark_buffer_dirty(leaf); | 3405 | btrfs_mark_buffer_dirty(leaf); |
3512 | 3406 | ||
3513 | ret = 0; | ||
3514 | if (btrfs_leaf_free_space(root, leaf) < 0) { | 3407 | if (btrfs_leaf_free_space(root, leaf) < 0) { |
3515 | btrfs_print_leaf(root, leaf); | 3408 | btrfs_print_leaf(root, leaf); |
3516 | BUG(); | 3409 | BUG(); |
3517 | } | 3410 | } |
3518 | return ret; | 3411 | return 0; |
3519 | } | 3412 | } |
3520 | 3413 | ||
3521 | /* | 3414 | /* |
@@ -3675,11 +3568,10 @@ out: | |||
3675 | * to save stack depth by doing the bulk of the work in a function | 3568 | * to save stack depth by doing the bulk of the work in a function |
3676 | * that doesn't call btrfs_search_slot | 3569 | * that doesn't call btrfs_search_slot |
3677 | */ | 3570 | */ |
3678 | static noinline_for_stack int | 3571 | int setup_items_for_insert(struct btrfs_trans_handle *trans, |
3679 | setup_items_for_insert(struct btrfs_trans_handle *trans, | 3572 | struct btrfs_root *root, struct btrfs_path *path, |
3680 | struct btrfs_root *root, struct btrfs_path *path, | 3573 | struct btrfs_key *cpu_key, u32 *data_size, |
3681 | struct btrfs_key *cpu_key, u32 *data_size, | 3574 | u32 total_data, u32 total_size, int nr) |
3682 | u32 total_data, u32 total_size, int nr) | ||
3683 | { | 3575 | { |
3684 | struct btrfs_item *item; | 3576 | struct btrfs_item *item; |
3685 | int i; | 3577 | int i; |
@@ -3763,7 +3655,6 @@ setup_items_for_insert(struct btrfs_trans_handle *trans, | |||
3763 | 3655 | ||
3764 | ret = 0; | 3656 | ret = 0; |
3765 | if (slot == 0) { | 3657 | if (slot == 0) { |
3766 | struct btrfs_disk_key disk_key; | ||
3767 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | 3658 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); |
3768 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); | 3659 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); |
3769 | } | 3660 | } |
@@ -3787,7 +3678,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3787 | struct btrfs_key *cpu_key, u32 *data_size, | 3678 | struct btrfs_key *cpu_key, u32 *data_size, |
3788 | int nr) | 3679 | int nr) |
3789 | { | 3680 | { |
3790 | struct extent_buffer *leaf; | ||
3791 | int ret = 0; | 3681 | int ret = 0; |
3792 | int slot; | 3682 | int slot; |
3793 | int i; | 3683 | int i; |
@@ -3804,7 +3694,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3804 | if (ret < 0) | 3694 | if (ret < 0) |
3805 | goto out; | 3695 | goto out; |
3806 | 3696 | ||
3807 | leaf = path->nodes[0]; | ||
3808 | slot = path->slots[0]; | 3697 | slot = path->slots[0]; |
3809 | BUG_ON(slot < 0); | 3698 | BUG_ON(slot < 0); |
3810 | 3699 | ||
@@ -3829,7 +3718,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3829 | unsigned long ptr; | 3718 | unsigned long ptr; |
3830 | 3719 | ||
3831 | path = btrfs_alloc_path(); | 3720 | path = btrfs_alloc_path(); |
3832 | BUG_ON(!path); | 3721 | if (!path) |
3722 | return -ENOMEM; | ||
3833 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); | 3723 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); |
3834 | if (!ret) { | 3724 | if (!ret) { |
3835 | leaf = path->nodes[0]; | 3725 | leaf = path->nodes[0]; |
@@ -4066,7 +3956,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4066 | else | 3956 | else |
4067 | return 1; | 3957 | return 1; |
4068 | 3958 | ||
4069 | btrfs_release_path(root, path); | 3959 | btrfs_release_path(path); |
4070 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 3960 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4071 | if (ret < 0) | 3961 | if (ret < 0) |
4072 | return ret; | 3962 | return ret; |
@@ -4190,7 +4080,7 @@ find_next_key: | |||
4190 | sret = btrfs_find_next_key(root, path, min_key, level, | 4080 | sret = btrfs_find_next_key(root, path, min_key, level, |
4191 | cache_only, min_trans); | 4081 | cache_only, min_trans); |
4192 | if (sret == 0) { | 4082 | if (sret == 0) { |
4193 | btrfs_release_path(root, path); | 4083 | btrfs_release_path(path); |
4194 | goto again; | 4084 | goto again; |
4195 | } else { | 4085 | } else { |
4196 | goto out; | 4086 | goto out; |
@@ -4206,6 +4096,7 @@ find_next_key: | |||
4206 | } | 4096 | } |
4207 | btrfs_set_path_blocking(path); | 4097 | btrfs_set_path_blocking(path); |
4208 | cur = read_node_slot(root, cur, slot); | 4098 | cur = read_node_slot(root, cur, slot); |
4099 | BUG_ON(!cur); | ||
4209 | 4100 | ||
4210 | btrfs_tree_lock(cur); | 4101 | btrfs_tree_lock(cur); |
4211 | 4102 | ||
@@ -4268,7 +4159,7 @@ next: | |||
4268 | btrfs_node_key_to_cpu(c, &cur_key, slot); | 4159 | btrfs_node_key_to_cpu(c, &cur_key, slot); |
4269 | 4160 | ||
4270 | orig_lowest = path->lowest_level; | 4161 | orig_lowest = path->lowest_level; |
4271 | btrfs_release_path(root, path); | 4162 | btrfs_release_path(path); |
4272 | path->lowest_level = level; | 4163 | path->lowest_level = level; |
4273 | ret = btrfs_search_slot(NULL, root, &cur_key, path, | 4164 | ret = btrfs_search_slot(NULL, root, &cur_key, path, |
4274 | 0, 0); | 4165 | 0, 0); |
@@ -4345,7 +4236,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4345 | again: | 4236 | again: |
4346 | level = 1; | 4237 | level = 1; |
4347 | next = NULL; | 4238 | next = NULL; |
4348 | btrfs_release_path(root, path); | 4239 | btrfs_release_path(path); |
4349 | 4240 | ||
4350 | path->keep_locks = 1; | 4241 | path->keep_locks = 1; |
4351 | 4242 | ||
@@ -4401,7 +4292,7 @@ again: | |||
4401 | goto again; | 4292 | goto again; |
4402 | 4293 | ||
4403 | if (ret < 0) { | 4294 | if (ret < 0) { |
4404 | btrfs_release_path(root, path); | 4295 | btrfs_release_path(path); |
4405 | goto done; | 4296 | goto done; |
4406 | } | 4297 | } |
4407 | 4298 | ||
@@ -4440,7 +4331,7 @@ again: | |||
4440 | goto again; | 4331 | goto again; |
4441 | 4332 | ||
4442 | if (ret < 0) { | 4333 | if (ret < 0) { |
4443 | btrfs_release_path(root, path); | 4334 | btrfs_release_path(path); |
4444 | goto done; | 4335 | goto done; |
4445 | } | 4336 | } |
4446 | 4337 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eaf286abad17..3b859a3e6a0e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -19,18 +19,21 @@ | |||
19 | #ifndef __BTRFS_CTREE__ | 19 | #ifndef __BTRFS_CTREE__ |
20 | #define __BTRFS_CTREE__ | 20 | #define __BTRFS_CTREE__ |
21 | 21 | ||
22 | #include <linux/version.h> | ||
23 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
24 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
25 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
25 | #include <linux/rwsem.h> | ||
26 | #include <linux/completion.h> | 26 | #include <linux/completion.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/kobject.h> | ||
31 | #include <trace/events/btrfs.h> | ||
30 | #include <asm/kmap_types.h> | 32 | #include <asm/kmap_types.h> |
31 | #include "extent_io.h" | 33 | #include "extent_io.h" |
32 | #include "extent_map.h" | 34 | #include "extent_map.h" |
33 | #include "async-thread.h" | 35 | #include "async-thread.h" |
36 | #include "ioctl.h" | ||
34 | 37 | ||
35 | struct btrfs_trans_handle; | 38 | struct btrfs_trans_handle; |
36 | struct btrfs_transaction; | 39 | struct btrfs_transaction; |
@@ -39,6 +42,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; | |||
39 | extern struct kmem_cache *btrfs_transaction_cachep; | 42 | extern struct kmem_cache *btrfs_transaction_cachep; |
40 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 43 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
41 | extern struct kmem_cache *btrfs_path_cachep; | 44 | extern struct kmem_cache *btrfs_path_cachep; |
45 | extern struct kmem_cache *btrfs_free_space_cachep; | ||
42 | struct btrfs_ordered_sum; | 46 | struct btrfs_ordered_sum; |
43 | 47 | ||
44 | #define BTRFS_MAGIC "_BHRfS_M" | 48 | #define BTRFS_MAGIC "_BHRfS_M" |
@@ -99,6 +103,15 @@ struct btrfs_ordered_sum; | |||
99 | */ | 103 | */ |
100 | #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL | 104 | #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL |
101 | 105 | ||
106 | /* For storing free space cache */ | ||
107 | #define BTRFS_FREE_SPACE_OBJECTID -11ULL | ||
108 | |||
109 | /* | ||
110 | * The inode number assigned to the special inode for sotring | ||
111 | * free ino cache | ||
112 | */ | ||
113 | #define BTRFS_FREE_INO_OBJECTID -12ULL | ||
114 | |||
102 | /* dummy objectid represents multiple objectids */ | 115 | /* dummy objectid represents multiple objectids */ |
103 | #define BTRFS_MULTIPLE_OBJECTIDS -255ULL | 116 | #define BTRFS_MULTIPLE_OBJECTIDS -255ULL |
104 | 117 | ||
@@ -181,7 +194,6 @@ struct btrfs_mapping_tree { | |||
181 | struct extent_map_tree map_tree; | 194 | struct extent_map_tree map_tree; |
182 | }; | 195 | }; |
183 | 196 | ||
184 | #define BTRFS_UUID_SIZE 16 | ||
185 | struct btrfs_dev_item { | 197 | struct btrfs_dev_item { |
186 | /* the internal btrfs device id */ | 198 | /* the internal btrfs device id */ |
187 | __le64 devid; | 199 | __le64 devid; |
@@ -265,6 +277,22 @@ struct btrfs_chunk { | |||
265 | /* additional stripes go here */ | 277 | /* additional stripes go here */ |
266 | } __attribute__ ((__packed__)); | 278 | } __attribute__ ((__packed__)); |
267 | 279 | ||
280 | #define BTRFS_FREE_SPACE_EXTENT 1 | ||
281 | #define BTRFS_FREE_SPACE_BITMAP 2 | ||
282 | |||
283 | struct btrfs_free_space_entry { | ||
284 | __le64 offset; | ||
285 | __le64 bytes; | ||
286 | u8 type; | ||
287 | } __attribute__ ((__packed__)); | ||
288 | |||
289 | struct btrfs_free_space_header { | ||
290 | struct btrfs_disk_key location; | ||
291 | __le64 generation; | ||
292 | __le64 num_entries; | ||
293 | __le64 num_bitmaps; | ||
294 | } __attribute__ ((__packed__)); | ||
295 | |||
268 | static inline unsigned long btrfs_chunk_item_size(int num_stripes) | 296 | static inline unsigned long btrfs_chunk_item_size(int num_stripes) |
269 | { | 297 | { |
270 | BUG_ON(num_stripes == 0); | 298 | BUG_ON(num_stripes == 0); |
@@ -272,9 +300,16 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
272 | sizeof(struct btrfs_stripe) * (num_stripes - 1); | 300 | sizeof(struct btrfs_stripe) * (num_stripes - 1); |
273 | } | 301 | } |
274 | 302 | ||
275 | #define BTRFS_FSID_SIZE 16 | ||
276 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) | 303 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) |
277 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | 304 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) |
305 | |||
306 | /* | ||
307 | * File system states | ||
308 | */ | ||
309 | |||
310 | /* Errors detected */ | ||
311 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | ||
312 | |||
278 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | 313 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) |
279 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | 314 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) |
280 | 315 | ||
@@ -365,8 +400,10 @@ struct btrfs_super_block { | |||
365 | 400 | ||
366 | char label[BTRFS_LABEL_SIZE]; | 401 | char label[BTRFS_LABEL_SIZE]; |
367 | 402 | ||
403 | __le64 cache_generation; | ||
404 | |||
368 | /* future expansion */ | 405 | /* future expansion */ |
369 | __le64 reserved[32]; | 406 | __le64 reserved[31]; |
370 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; | 407 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; |
371 | } __attribute__ ((__packed__)); | 408 | } __attribute__ ((__packed__)); |
372 | 409 | ||
@@ -375,13 +412,17 @@ struct btrfs_super_block { | |||
375 | * ones specified below then we will fail to mount | 412 | * ones specified below then we will fail to mount |
376 | */ | 413 | */ |
377 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) | 414 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) |
378 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0) | 415 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) |
416 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) | ||
417 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) | ||
379 | 418 | ||
380 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 419 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
381 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 420 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
382 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 421 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
383 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ | 422 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ |
384 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL) | 423 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
424 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ | ||
425 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | ||
385 | 426 | ||
386 | /* | 427 | /* |
387 | * A leaf is full of items. offset and size tell us where to find | 428 | * A leaf is full of items. offset and size tell us where to find |
@@ -474,6 +515,12 @@ struct btrfs_extent_item_v0 { | |||
474 | /* use full backrefs for extent pointers in the block */ | 515 | /* use full backrefs for extent pointers in the block */ |
475 | #define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) | 516 | #define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) |
476 | 517 | ||
518 | /* | ||
519 | * this flag is only used internally by scrub and may be changed at any time | ||
520 | * it is only declared here to avoid collisions | ||
521 | */ | ||
522 | #define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48) | ||
523 | |||
477 | struct btrfs_tree_block_info { | 524 | struct btrfs_tree_block_info { |
478 | struct btrfs_disk_key key; | 525 | struct btrfs_disk_key key; |
479 | u8 level; | 526 | u8 level; |
@@ -528,9 +575,11 @@ struct btrfs_timespec { | |||
528 | } __attribute__ ((__packed__)); | 575 | } __attribute__ ((__packed__)); |
529 | 576 | ||
530 | enum btrfs_compression_type { | 577 | enum btrfs_compression_type { |
531 | BTRFS_COMPRESS_NONE = 0, | 578 | BTRFS_COMPRESS_NONE = 0, |
532 | BTRFS_COMPRESS_ZLIB = 1, | 579 | BTRFS_COMPRESS_ZLIB = 1, |
533 | BTRFS_COMPRESS_LAST = 2, | 580 | BTRFS_COMPRESS_LZO = 2, |
581 | BTRFS_COMPRESS_TYPES = 2, | ||
582 | BTRFS_COMPRESS_LAST = 3, | ||
534 | }; | 583 | }; |
535 | 584 | ||
536 | struct btrfs_inode_item { | 585 | struct btrfs_inode_item { |
@@ -574,6 +623,8 @@ struct btrfs_dir_item { | |||
574 | u8 type; | 623 | u8 type; |
575 | } __attribute__ ((__packed__)); | 624 | } __attribute__ ((__packed__)); |
576 | 625 | ||
626 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | ||
627 | |||
577 | struct btrfs_root_item { | 628 | struct btrfs_root_item { |
578 | struct btrfs_inode_item inode; | 629 | struct btrfs_inode_item inode; |
579 | __le64 generation; | 630 | __le64 generation; |
@@ -675,9 +726,10 @@ struct btrfs_block_group_item { | |||
675 | struct btrfs_space_info { | 726 | struct btrfs_space_info { |
676 | u64 flags; | 727 | u64 flags; |
677 | 728 | ||
678 | u64 total_bytes; /* total bytes in the space */ | 729 | u64 total_bytes; /* total bytes in the space, |
730 | this doesn't take mirrors into account */ | ||
679 | u64 bytes_used; /* total bytes used, | 731 | u64 bytes_used; /* total bytes used, |
680 | this does't take mirrors into account */ | 732 | this doesn't take mirrors into account */ |
681 | u64 bytes_pinned; /* total bytes pinned, will be freed when the | 733 | u64 bytes_pinned; /* total bytes pinned, will be freed when the |
682 | transaction finishes */ | 734 | transaction finishes */ |
683 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 735 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
@@ -687,11 +739,24 @@ struct btrfs_space_info { | |||
687 | u64 bytes_may_use; /* number of bytes that may be used for | 739 | u64 bytes_may_use; /* number of bytes that may be used for |
688 | delalloc/allocations */ | 740 | delalloc/allocations */ |
689 | u64 disk_used; /* total bytes used on disk */ | 741 | u64 disk_used; /* total bytes used on disk */ |
742 | u64 disk_total; /* total bytes on disk, takes mirrors into | ||
743 | account */ | ||
744 | |||
745 | /* | ||
746 | * we bump reservation progress every time we decrement | ||
747 | * bytes_reserved. This way people waiting for reservations | ||
748 | * know something good has happened and they can check | ||
749 | * for progress. The number here isn't to be trusted, it | ||
750 | * just shows reclaim activity | ||
751 | */ | ||
752 | unsigned long reservation_progress; | ||
690 | 753 | ||
691 | int full; /* indicates that we cannot allocate any more | 754 | unsigned int full:1; /* indicates that we cannot allocate any more |
692 | chunks for this space */ | 755 | chunks for this space */ |
693 | int force_alloc; /* set if we need to force a chunk alloc for | 756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ |
694 | this space */ | 757 | |
758 | unsigned int force_alloc; /* set if we need to force a chunk | ||
759 | alloc for this space */ | ||
695 | 760 | ||
696 | struct list_head list; | 761 | struct list_head list; |
697 | 762 | ||
@@ -732,9 +797,6 @@ struct btrfs_free_cluster { | |||
732 | /* first extent starting offset */ | 797 | /* first extent starting offset */ |
733 | u64 window_start; | 798 | u64 window_start; |
734 | 799 | ||
735 | /* if this cluster simply points at a bitmap in the block group */ | ||
736 | bool points_to_bitmap; | ||
737 | |||
738 | struct btrfs_block_group_cache *block_group; | 800 | struct btrfs_block_group_cache *block_group; |
739 | /* | 801 | /* |
740 | * when a cluster is allocated from a block group, we put the | 802 | * when a cluster is allocated from a block group, we put the |
@@ -750,6 +812,14 @@ enum btrfs_caching_type { | |||
750 | BTRFS_CACHE_FINISHED = 2, | 812 | BTRFS_CACHE_FINISHED = 2, |
751 | }; | 813 | }; |
752 | 814 | ||
815 | enum btrfs_disk_cache_state { | ||
816 | BTRFS_DC_WRITTEN = 0, | ||
817 | BTRFS_DC_ERROR = 1, | ||
818 | BTRFS_DC_CLEAR = 2, | ||
819 | BTRFS_DC_SETUP = 3, | ||
820 | BTRFS_DC_NEED_WRITE = 4, | ||
821 | }; | ||
822 | |||
753 | struct btrfs_caching_control { | 823 | struct btrfs_caching_control { |
754 | struct list_head list; | 824 | struct list_head list; |
755 | struct mutex mutex; | 825 | struct mutex mutex; |
@@ -763,6 +833,7 @@ struct btrfs_block_group_cache { | |||
763 | struct btrfs_key key; | 833 | struct btrfs_key key; |
764 | struct btrfs_block_group_item item; | 834 | struct btrfs_block_group_item item; |
765 | struct btrfs_fs_info *fs_info; | 835 | struct btrfs_fs_info *fs_info; |
836 | struct inode *inode; | ||
766 | spinlock_t lock; | 837 | spinlock_t lock; |
767 | u64 pinned; | 838 | u64 pinned; |
768 | u64 reserved; | 839 | u64 reserved; |
@@ -770,11 +841,11 @@ struct btrfs_block_group_cache { | |||
770 | u64 bytes_super; | 841 | u64 bytes_super; |
771 | u64 flags; | 842 | u64 flags; |
772 | u64 sectorsize; | 843 | u64 sectorsize; |
773 | int extents_thresh; | 844 | unsigned int ro:1; |
774 | int free_extents; | 845 | unsigned int dirty:1; |
775 | int total_bitmaps; | 846 | unsigned int iref:1; |
776 | int ro; | 847 | |
777 | int dirty; | 848 | int disk_cache_state; |
778 | 849 | ||
779 | /* cache tracking stuff */ | 850 | /* cache tracking stuff */ |
780 | int cached; | 851 | int cached; |
@@ -784,9 +855,7 @@ struct btrfs_block_group_cache { | |||
784 | struct btrfs_space_info *space_info; | 855 | struct btrfs_space_info *space_info; |
785 | 856 | ||
786 | /* free space cache stuff */ | 857 | /* free space cache stuff */ |
787 | spinlock_t tree_lock; | 858 | struct btrfs_free_space_ctl *free_space_ctl; |
788 | struct rb_root free_space_offset; | ||
789 | u64 free_space; | ||
790 | 859 | ||
791 | /* block group cache stuff */ | 860 | /* block group cache stuff */ |
792 | struct rb_node cache_node; | 861 | struct rb_node cache_node; |
@@ -806,6 +875,7 @@ struct btrfs_block_group_cache { | |||
806 | struct reloc_control; | 875 | struct reloc_control; |
807 | struct btrfs_device; | 876 | struct btrfs_device; |
808 | struct btrfs_fs_devices; | 877 | struct btrfs_fs_devices; |
878 | struct btrfs_delayed_root; | ||
809 | struct btrfs_fs_info { | 879 | struct btrfs_fs_info { |
810 | u8 fsid[BTRFS_FSID_SIZE]; | 880 | u8 fsid[BTRFS_FSID_SIZE]; |
811 | u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; | 881 | u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; |
@@ -832,7 +902,10 @@ struct btrfs_fs_info { | |||
832 | /* logical->physical extent mapping */ | 902 | /* logical->physical extent mapping */ |
833 | struct btrfs_mapping_tree mapping_tree; | 903 | struct btrfs_mapping_tree mapping_tree; |
834 | 904 | ||
835 | /* block reservation for extent, checksum and root tree */ | 905 | /* |
906 | * block reservation for extent, checksum, root tree and | ||
907 | * delayed dir index item | ||
908 | */ | ||
836 | struct btrfs_block_rsv global_block_rsv; | 909 | struct btrfs_block_rsv global_block_rsv; |
837 | /* block reservation for delay allocation */ | 910 | /* block reservation for delay allocation */ |
838 | struct btrfs_block_rsv delalloc_block_rsv; | 911 | struct btrfs_block_rsv delalloc_block_rsv; |
@@ -856,13 +929,14 @@ struct btrfs_fs_info { | |||
856 | * is required instead of the faster short fsync log commits | 929 | * is required instead of the faster short fsync log commits |
857 | */ | 930 | */ |
858 | u64 last_trans_log_full_commit; | 931 | u64 last_trans_log_full_commit; |
859 | u64 open_ioctl_trans; | 932 | unsigned long mount_opt:20; |
860 | unsigned long mount_opt; | 933 | unsigned long compress_type:4; |
861 | u64 max_inline; | 934 | u64 max_inline; |
862 | u64 alloc_start; | 935 | u64 alloc_start; |
863 | struct btrfs_transaction *running_transaction; | 936 | struct btrfs_transaction *running_transaction; |
864 | wait_queue_head_t transaction_throttle; | 937 | wait_queue_head_t transaction_throttle; |
865 | wait_queue_head_t transaction_wait; | 938 | wait_queue_head_t transaction_wait; |
939 | wait_queue_head_t transaction_blocked_wait; | ||
866 | wait_queue_head_t async_submit_wait; | 940 | wait_queue_head_t async_submit_wait; |
867 | 941 | ||
868 | struct btrfs_super_block super_copy; | 942 | struct btrfs_super_block super_copy; |
@@ -871,7 +945,6 @@ struct btrfs_fs_info { | |||
871 | struct super_block *sb; | 945 | struct super_block *sb; |
872 | struct inode *btree_inode; | 946 | struct inode *btree_inode; |
873 | struct backing_dev_info bdi; | 947 | struct backing_dev_info bdi; |
874 | struct mutex trans_mutex; | ||
875 | struct mutex tree_log_mutex; | 948 | struct mutex tree_log_mutex; |
876 | struct mutex transaction_kthread_mutex; | 949 | struct mutex transaction_kthread_mutex; |
877 | struct mutex cleaner_mutex; | 950 | struct mutex cleaner_mutex; |
@@ -892,6 +965,13 @@ struct btrfs_fs_info { | |||
892 | struct rw_semaphore subvol_sem; | 965 | struct rw_semaphore subvol_sem; |
893 | struct srcu_struct subvol_srcu; | 966 | struct srcu_struct subvol_srcu; |
894 | 967 | ||
968 | spinlock_t trans_lock; | ||
969 | /* | ||
970 | * the reloc mutex goes with the trans lock, it is taken | ||
971 | * during commit to protect us from the relocation code | ||
972 | */ | ||
973 | struct mutex reloc_mutex; | ||
974 | |||
895 | struct list_head trans_list; | 975 | struct list_head trans_list; |
896 | struct list_head hashers; | 976 | struct list_head hashers; |
897 | struct list_head dead_roots; | 977 | struct list_head dead_roots; |
@@ -904,6 +984,7 @@ struct btrfs_fs_info { | |||
904 | atomic_t async_submit_draining; | 984 | atomic_t async_submit_draining; |
905 | atomic_t nr_async_bios; | 985 | atomic_t nr_async_bios; |
906 | atomic_t async_delalloc_pages; | 986 | atomic_t async_delalloc_pages; |
987 | atomic_t open_ioctl_trans; | ||
907 | 988 | ||
908 | /* | 989 | /* |
909 | * this is used by the balancing code to wait for all the pending | 990 | * this is used by the balancing code to wait for all the pending |
@@ -949,6 +1030,7 @@ struct btrfs_fs_info { | |||
949 | struct btrfs_workers endio_meta_workers; | 1030 | struct btrfs_workers endio_meta_workers; |
950 | struct btrfs_workers endio_meta_write_workers; | 1031 | struct btrfs_workers endio_meta_write_workers; |
951 | struct btrfs_workers endio_write_workers; | 1032 | struct btrfs_workers endio_write_workers; |
1033 | struct btrfs_workers endio_freespace_worker; | ||
952 | struct btrfs_workers submit_workers; | 1034 | struct btrfs_workers submit_workers; |
953 | /* | 1035 | /* |
954 | * fixup workers take dirty pages that didn't properly go through | 1036 | * fixup workers take dirty pages that didn't properly go through |
@@ -956,6 +1038,7 @@ struct btrfs_fs_info { | |||
956 | * for the sys_munmap function call path | 1038 | * for the sys_munmap function call path |
957 | */ | 1039 | */ |
958 | struct btrfs_workers fixup_workers; | 1040 | struct btrfs_workers fixup_workers; |
1041 | struct btrfs_workers delayed_workers; | ||
959 | struct task_struct *transaction_kthread; | 1042 | struct task_struct *transaction_kthread; |
960 | struct task_struct *cleaner_kthread; | 1043 | struct task_struct *cleaner_kthread; |
961 | int thread_pool_size; | 1044 | int thread_pool_size; |
@@ -966,6 +1049,7 @@ struct btrfs_fs_info { | |||
966 | int closing; | 1049 | int closing; |
967 | int log_root_recovering; | 1050 | int log_root_recovering; |
968 | int enospc_unlink; | 1051 | int enospc_unlink; |
1052 | int trans_no_join; | ||
969 | 1053 | ||
970 | u64 total_pinned; | 1054 | u64 total_pinned; |
971 | 1055 | ||
@@ -987,7 +1071,6 @@ struct btrfs_fs_info { | |||
987 | struct reloc_control *reloc_ctl; | 1071 | struct reloc_control *reloc_ctl; |
988 | 1072 | ||
989 | spinlock_t delalloc_lock; | 1073 | spinlock_t delalloc_lock; |
990 | spinlock_t new_trans_lock; | ||
991 | u64 delalloc_bytes; | 1074 | u64 delalloc_bytes; |
992 | 1075 | ||
993 | /* data_alloc_cluster is only used in ssd mode */ | 1076 | /* data_alloc_cluster is only used in ssd mode */ |
@@ -996,6 +1079,11 @@ struct btrfs_fs_info { | |||
996 | /* all metadata allocations go through this cluster */ | 1079 | /* all metadata allocations go through this cluster */ |
997 | struct btrfs_free_cluster meta_alloc_cluster; | 1080 | struct btrfs_free_cluster meta_alloc_cluster; |
998 | 1081 | ||
1082 | /* auto defrag inodes go here */ | ||
1083 | spinlock_t defrag_inodes_lock; | ||
1084 | struct rb_root defrag_inodes; | ||
1085 | atomic_t defrag_running; | ||
1086 | |||
999 | spinlock_t ref_cache_lock; | 1087 | spinlock_t ref_cache_lock; |
1000 | u64 total_ref_cache_size; | 1088 | u64 total_ref_cache_size; |
1001 | 1089 | ||
@@ -1010,6 +1098,22 @@ struct btrfs_fs_info { | |||
1010 | unsigned metadata_ratio; | 1098 | unsigned metadata_ratio; |
1011 | 1099 | ||
1012 | void *bdev_holder; | 1100 | void *bdev_holder; |
1101 | |||
1102 | /* private scrub information */ | ||
1103 | struct mutex scrub_lock; | ||
1104 | atomic_t scrubs_running; | ||
1105 | atomic_t scrub_pause_req; | ||
1106 | atomic_t scrubs_paused; | ||
1107 | atomic_t scrub_cancel_req; | ||
1108 | wait_queue_head_t scrub_pause_wait; | ||
1109 | struct rw_semaphore scrub_super_lock; | ||
1110 | int scrub_workers_refcnt; | ||
1111 | struct btrfs_workers scrub_workers; | ||
1112 | |||
1113 | /* filesystem state */ | ||
1114 | u64 fs_state; | ||
1115 | |||
1116 | struct btrfs_delayed_root *delayed_root; | ||
1013 | }; | 1117 | }; |
1014 | 1118 | ||
1015 | /* | 1119 | /* |
@@ -1019,9 +1123,6 @@ struct btrfs_fs_info { | |||
1019 | struct btrfs_root { | 1123 | struct btrfs_root { |
1020 | struct extent_buffer *node; | 1124 | struct extent_buffer *node; |
1021 | 1125 | ||
1022 | /* the node lock is held while changing the node pointer */ | ||
1023 | spinlock_t node_lock; | ||
1024 | |||
1025 | struct extent_buffer *commit_root; | 1126 | struct extent_buffer *commit_root; |
1026 | struct btrfs_root *log_root; | 1127 | struct btrfs_root *log_root; |
1027 | struct btrfs_root *reloc_root; | 1128 | struct btrfs_root *reloc_root; |
@@ -1038,6 +1139,16 @@ struct btrfs_root { | |||
1038 | spinlock_t accounting_lock; | 1139 | spinlock_t accounting_lock; |
1039 | struct btrfs_block_rsv *block_rsv; | 1140 | struct btrfs_block_rsv *block_rsv; |
1040 | 1141 | ||
1142 | /* free ino cache stuff */ | ||
1143 | struct mutex fs_commit_mutex; | ||
1144 | struct btrfs_free_space_ctl *free_ino_ctl; | ||
1145 | enum btrfs_caching_type cached; | ||
1146 | spinlock_t cache_lock; | ||
1147 | wait_queue_head_t cache_wait; | ||
1148 | struct btrfs_free_space_ctl *free_ino_pinned; | ||
1149 | u64 cache_progress; | ||
1150 | struct inode *cache_inode; | ||
1151 | |||
1041 | struct mutex log_mutex; | 1152 | struct mutex log_mutex; |
1042 | wait_queue_head_t log_writer_wait; | 1153 | wait_queue_head_t log_writer_wait; |
1043 | wait_queue_head_t log_commit_wait[2]; | 1154 | wait_queue_head_t log_commit_wait[2]; |
@@ -1066,6 +1177,14 @@ struct btrfs_root { | |||
1066 | u32 type; | 1177 | u32 type; |
1067 | 1178 | ||
1068 | u64 highest_objectid; | 1179 | u64 highest_objectid; |
1180 | |||
1181 | /* btrfs_record_root_in_trans is a multi-step process, | ||
1182 | * and it can race with the balancing code. But the | ||
1183 | * race is very small, and only the first time the root | ||
1184 | * is added to each transaction. So in_trans_setup | ||
1185 | * is used to tell us when more checks are required | ||
1186 | */ | ||
1187 | unsigned long in_trans_setup; | ||
1069 | int ref_cows; | 1188 | int ref_cows; |
1070 | int track_dirty; | 1189 | int track_dirty; |
1071 | int in_radix; | 1190 | int in_radix; |
@@ -1075,7 +1194,6 @@ struct btrfs_root { | |||
1075 | struct btrfs_key defrag_max; | 1194 | struct btrfs_key defrag_max; |
1076 | int defrag_running; | 1195 | int defrag_running; |
1077 | char *name; | 1196 | char *name; |
1078 | int in_sysfs; | ||
1079 | 1197 | ||
1080 | /* the dirty list is only used by non-reference counted roots */ | 1198 | /* the dirty list is only used by non-reference counted roots */ |
1081 | struct list_head dirty_list; | 1199 | struct list_head dirty_list; |
@@ -1093,12 +1211,49 @@ struct btrfs_root { | |||
1093 | struct rb_root inode_tree; | 1211 | struct rb_root inode_tree; |
1094 | 1212 | ||
1095 | /* | 1213 | /* |
1214 | * radix tree that keeps track of delayed nodes of every inode, | ||
1215 | * protected by inode_lock | ||
1216 | */ | ||
1217 | struct radix_tree_root delayed_nodes_tree; | ||
1218 | /* | ||
1096 | * right now this just gets used so that a root has its own devid | 1219 | * right now this just gets used so that a root has its own devid |
1097 | * for stat. It may be used for more later | 1220 | * for stat. It may be used for more later |
1098 | */ | 1221 | */ |
1099 | struct super_block anon_super; | 1222 | struct super_block anon_super; |
1100 | }; | 1223 | }; |
1101 | 1224 | ||
1225 | struct btrfs_ioctl_defrag_range_args { | ||
1226 | /* start of the defrag operation */ | ||
1227 | __u64 start; | ||
1228 | |||
1229 | /* number of bytes to defrag, use (u64)-1 to say all */ | ||
1230 | __u64 len; | ||
1231 | |||
1232 | /* | ||
1233 | * flags for the operation, which can include turning | ||
1234 | * on compression for this one defrag | ||
1235 | */ | ||
1236 | __u64 flags; | ||
1237 | |||
1238 | /* | ||
1239 | * any extent bigger than this will be considered | ||
1240 | * already defragged. Use 0 to take the kernel default | ||
1241 | * Use 1 to say every single extent must be rewritten | ||
1242 | */ | ||
1243 | __u32 extent_thresh; | ||
1244 | |||
1245 | /* | ||
1246 | * which compression method to use if turning on compression | ||
1247 | * for this defrag operation. If unspecified, zlib will | ||
1248 | * be used | ||
1249 | */ | ||
1250 | __u32 compress_type; | ||
1251 | |||
1252 | /* spare for later */ | ||
1253 | __u32 unused[4]; | ||
1254 | }; | ||
1255 | |||
1256 | |||
1102 | /* | 1257 | /* |
1103 | * inode items have the data typically returned from stat and store other | 1258 | * inode items have the data typically returned from stat and store other |
1104 | * info about object characteristics. There is one for every file and dir in | 1259 | * info about object characteristics. There is one for every file and dir in |
@@ -1180,6 +1335,11 @@ struct btrfs_root { | |||
1180 | */ | 1335 | */ |
1181 | #define BTRFS_STRING_ITEM_KEY 253 | 1336 | #define BTRFS_STRING_ITEM_KEY 253 |
1182 | 1337 | ||
1338 | /* | ||
1339 | * Flags for mount options. | ||
1340 | * | ||
1341 | * Note: don't forget to add new options to btrfs_show_options() | ||
1342 | */ | ||
1183 | #define BTRFS_MOUNT_NODATASUM (1 << 0) | 1343 | #define BTRFS_MOUNT_NODATASUM (1 << 0) |
1184 | #define BTRFS_MOUNT_NODATACOW (1 << 1) | 1344 | #define BTRFS_MOUNT_NODATACOW (1 << 1) |
1185 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) | 1345 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) |
@@ -1192,6 +1352,12 @@ struct btrfs_root { | |||
1192 | #define BTRFS_MOUNT_NOSSD (1 << 9) | 1352 | #define BTRFS_MOUNT_NOSSD (1 << 9) |
1193 | #define BTRFS_MOUNT_DISCARD (1 << 10) | 1353 | #define BTRFS_MOUNT_DISCARD (1 << 10) |
1194 | #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) | 1354 | #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) |
1355 | #define BTRFS_MOUNT_SPACE_CACHE (1 << 12) | ||
1356 | #define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) | ||
1357 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) | ||
1358 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) | ||
1359 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) | ||
1360 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) | ||
1195 | 1361 | ||
1196 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1362 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1197 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1363 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1211,6 +1377,9 @@ struct btrfs_root { | |||
1211 | #define BTRFS_INODE_NODUMP (1 << 8) | 1377 | #define BTRFS_INODE_NODUMP (1 << 8) |
1212 | #define BTRFS_INODE_NOATIME (1 << 9) | 1378 | #define BTRFS_INODE_NOATIME (1 << 9) |
1213 | #define BTRFS_INODE_DIRSYNC (1 << 10) | 1379 | #define BTRFS_INODE_DIRSYNC (1 << 10) |
1380 | #define BTRFS_INODE_COMPRESS (1 << 11) | ||
1381 | |||
1382 | #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) | ||
1214 | 1383 | ||
1215 | /* some macros to generate set/get funcs for the struct fields. This | 1384 | /* some macros to generate set/get funcs for the struct fields. This |
1216 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple | 1385 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple |
@@ -1364,26 +1533,12 @@ static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, | |||
1364 | return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); | 1533 | return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); |
1365 | } | 1534 | } |
1366 | 1535 | ||
1367 | static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, | ||
1368 | struct btrfs_chunk *c, int nr, | ||
1369 | u64 val) | ||
1370 | { | ||
1371 | btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); | ||
1372 | } | ||
1373 | |||
1374 | static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, | 1536 | static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, |
1375 | struct btrfs_chunk *c, int nr) | 1537 | struct btrfs_chunk *c, int nr) |
1376 | { | 1538 | { |
1377 | return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); | 1539 | return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); |
1378 | } | 1540 | } |
1379 | 1541 | ||
1380 | static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, | ||
1381 | struct btrfs_chunk *c, int nr, | ||
1382 | u64 val) | ||
1383 | { | ||
1384 | btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); | ||
1385 | } | ||
1386 | |||
1387 | /* struct btrfs_block_group_item */ | 1542 | /* struct btrfs_block_group_item */ |
1388 | BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, | 1543 | BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, |
1389 | used, 64); | 1544 | used, 64); |
@@ -1441,14 +1596,6 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item) | |||
1441 | return (struct btrfs_timespec *)ptr; | 1596 | return (struct btrfs_timespec *)ptr; |
1442 | } | 1597 | } |
1443 | 1598 | ||
1444 | static inline struct btrfs_timespec * | ||
1445 | btrfs_inode_otime(struct btrfs_inode_item *inode_item) | ||
1446 | { | ||
1447 | unsigned long ptr = (unsigned long)inode_item; | ||
1448 | ptr += offsetof(struct btrfs_inode_item, otime); | ||
1449 | return (struct btrfs_timespec *)ptr; | ||
1450 | } | ||
1451 | |||
1452 | BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); | 1599 | BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); |
1453 | BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); | 1600 | BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); |
1454 | 1601 | ||
@@ -1665,6 +1812,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, | |||
1665 | write_eb_member(eb, item, struct btrfs_dir_item, location, key); | 1812 | write_eb_member(eb, item, struct btrfs_dir_item, location, key); |
1666 | } | 1813 | } |
1667 | 1814 | ||
1815 | BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, | ||
1816 | num_entries, 64); | ||
1817 | BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, | ||
1818 | num_bitmaps, 64); | ||
1819 | BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, | ||
1820 | generation, 64); | ||
1821 | |||
1822 | static inline void btrfs_free_space_key(struct extent_buffer *eb, | ||
1823 | struct btrfs_free_space_header *h, | ||
1824 | struct btrfs_disk_key *key) | ||
1825 | { | ||
1826 | read_eb_member(eb, h, struct btrfs_free_space_header, location, key); | ||
1827 | } | ||
1828 | |||
1829 | static inline void btrfs_set_free_space_key(struct extent_buffer *eb, | ||
1830 | struct btrfs_free_space_header *h, | ||
1831 | struct btrfs_disk_key *key) | ||
1832 | { | ||
1833 | write_eb_member(eb, h, struct btrfs_free_space_header, location, key); | ||
1834 | } | ||
1835 | |||
1668 | /* struct btrfs_disk_key */ | 1836 | /* struct btrfs_disk_key */ |
1669 | BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, | 1837 | BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, |
1670 | objectid, 64); | 1838 | objectid, 64); |
@@ -1778,33 +1946,6 @@ static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) | |||
1778 | return (u8 *)ptr; | 1946 | return (u8 *)ptr; |
1779 | } | 1947 | } |
1780 | 1948 | ||
1781 | static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) | ||
1782 | { | ||
1783 | unsigned long ptr = offsetof(struct btrfs_super_block, fsid); | ||
1784 | return (u8 *)ptr; | ||
1785 | } | ||
1786 | |||
1787 | static inline u8 *btrfs_header_csum(struct extent_buffer *eb) | ||
1788 | { | ||
1789 | unsigned long ptr = offsetof(struct btrfs_header, csum); | ||
1790 | return (u8 *)ptr; | ||
1791 | } | ||
1792 | |||
1793 | static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb) | ||
1794 | { | ||
1795 | return NULL; | ||
1796 | } | ||
1797 | |||
1798 | static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb) | ||
1799 | { | ||
1800 | return NULL; | ||
1801 | } | ||
1802 | |||
1803 | static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) | ||
1804 | { | ||
1805 | return NULL; | ||
1806 | } | ||
1807 | |||
1808 | static inline int btrfs_is_leaf(struct extent_buffer *eb) | 1949 | static inline int btrfs_is_leaf(struct extent_buffer *eb) |
1809 | { | 1950 | { |
1810 | return btrfs_header_level(eb) == 0; | 1951 | return btrfs_header_level(eb) == 0; |
@@ -1829,6 +1970,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | |||
1829 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, | 1970 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, |
1830 | last_snapshot, 64); | 1971 | last_snapshot, 64); |
1831 | 1972 | ||
1973 | static inline bool btrfs_root_readonly(struct btrfs_root *root) | ||
1974 | { | ||
1975 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | ||
1976 | } | ||
1977 | |||
1832 | /* struct btrfs_super_block */ | 1978 | /* struct btrfs_super_block */ |
1833 | 1979 | ||
1834 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 1980 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
@@ -1876,6 +2022,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, | |||
1876 | incompat_flags, 64); | 2022 | incompat_flags, 64); |
1877 | BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, | 2023 | BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, |
1878 | csum_type, 16); | 2024 | csum_type, 16); |
2025 | BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, | ||
2026 | cache_generation, 64); | ||
1879 | 2027 | ||
1880 | static inline int btrfs_super_csum_size(struct btrfs_super_block *s) | 2028 | static inline int btrfs_super_csum_size(struct btrfs_super_block *s) |
1881 | { | 2029 | { |
@@ -1951,22 +2099,6 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb) | |||
1951 | return sb->s_fs_info; | 2099 | return sb->s_fs_info; |
1952 | } | 2100 | } |
1953 | 2101 | ||
1954 | static inline int btrfs_set_root_name(struct btrfs_root *root, | ||
1955 | const char *name, int len) | ||
1956 | { | ||
1957 | /* if we already have a name just free it */ | ||
1958 | kfree(root->name); | ||
1959 | |||
1960 | root->name = kmalloc(len+1, GFP_KERNEL); | ||
1961 | if (!root->name) | ||
1962 | return -ENOMEM; | ||
1963 | |||
1964 | memcpy(root->name, name, len); | ||
1965 | root->name[len] = '\0'; | ||
1966 | |||
1967 | return 0; | ||
1968 | } | ||
1969 | |||
1970 | static inline u32 btrfs_level_size(struct btrfs_root *root, int level) | 2102 | static inline u32 btrfs_level_size(struct btrfs_root *root, int level) |
1971 | { | 2103 | { |
1972 | if (level == 0) | 2104 | if (level == 0) |
@@ -1988,7 +2120,20 @@ static inline struct dentry *fdentry(struct file *file) | |||
1988 | return file->f_path.dentry; | 2120 | return file->f_path.dentry; |
1989 | } | 2121 | } |
1990 | 2122 | ||
2123 | static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | ||
2124 | { | ||
2125 | return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && | ||
2126 | (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); | ||
2127 | } | ||
2128 | |||
1991 | /* extent-tree.c */ | 2129 | /* extent-tree.c */ |
2130 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | ||
2131 | int num_items) | ||
2132 | { | ||
2133 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
2134 | 3 * num_items; | ||
2135 | } | ||
2136 | |||
1992 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 2137 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
1993 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 2138 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1994 | struct btrfs_root *root, unsigned long count); | 2139 | struct btrfs_root *root, unsigned long count); |
@@ -1998,12 +2143,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | |||
1998 | u64 num_bytes, u64 *refs, u64 *flags); | 2143 | u64 num_bytes, u64 *refs, u64 *flags); |
1999 | int btrfs_pin_extent(struct btrfs_root *root, | 2144 | int btrfs_pin_extent(struct btrfs_root *root, |
2000 | u64 bytenr, u64 num, int reserved); | 2145 | u64 bytenr, u64 num, int reserved); |
2001 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
2002 | struct btrfs_root *root, struct extent_buffer *leaf); | ||
2003 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 2146 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
2004 | struct btrfs_root *root, | 2147 | struct btrfs_root *root, |
2005 | u64 objectid, u64 offset, u64 bytenr); | 2148 | u64 objectid, u64 offset, u64 bytenr); |
2006 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | ||
2007 | struct btrfs_block_group_cache *btrfs_lookup_block_group( | 2149 | struct btrfs_block_group_cache *btrfs_lookup_block_group( |
2008 | struct btrfs_fs_info *info, | 2150 | struct btrfs_fs_info *info, |
2009 | u64 bytenr); | 2151 | u64 bytenr); |
@@ -2051,6 +2193,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2051 | u64 root_objectid, u64 owner, u64 offset); | 2193 | u64 root_objectid, u64 owner, u64 offset); |
2052 | 2194 | ||
2053 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2195 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
2196 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
2197 | u64 num_bytes, int reserve, int sinfo); | ||
2054 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 2198 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
2055 | struct btrfs_root *root); | 2199 | struct btrfs_root *root); |
2056 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2200 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
@@ -2073,13 +2217,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2073 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2217 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2074 | struct btrfs_root *root, u64 group_start); | 2218 | struct btrfs_root *root, u64 group_start); |
2075 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2219 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2220 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | ||
2076 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2221 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2077 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2222 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2078 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2223 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
2079 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 2224 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
2080 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | 2225 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
2081 | struct btrfs_root *root, | 2226 | struct btrfs_root *root, |
2082 | int num_items, int *retries); | 2227 | int num_items); |
2083 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 2228 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
2084 | struct btrfs_root *root); | 2229 | struct btrfs_root *root); |
2085 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | 2230 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
@@ -2100,7 +2245,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | |||
2100 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | 2245 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, |
2101 | struct btrfs_root *root, | 2246 | struct btrfs_root *root, |
2102 | struct btrfs_block_rsv *block_rsv, | 2247 | struct btrfs_block_rsv *block_rsv, |
2103 | u64 num_bytes, int *retries); | 2248 | u64 num_bytes); |
2104 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | 2249 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, |
2105 | struct btrfs_root *root, | 2250 | struct btrfs_root *root, |
2106 | struct btrfs_block_rsv *block_rsv, | 2251 | struct btrfs_block_rsv *block_rsv, |
@@ -2111,10 +2256,24 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | |||
2111 | void btrfs_block_rsv_release(struct btrfs_root *root, | 2256 | void btrfs_block_rsv_release(struct btrfs_root *root, |
2112 | struct btrfs_block_rsv *block_rsv, | 2257 | struct btrfs_block_rsv *block_rsv, |
2113 | u64 num_bytes); | 2258 | u64 num_bytes); |
2259 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2260 | struct btrfs_root *root, | ||
2261 | struct btrfs_block_rsv *rsv); | ||
2114 | int btrfs_set_block_group_ro(struct btrfs_root *root, | 2262 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
2115 | struct btrfs_block_group_cache *cache); | 2263 | struct btrfs_block_group_cache *cache); |
2116 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2264 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
2117 | struct btrfs_block_group_cache *cache); | 2265 | struct btrfs_block_group_cache *cache); |
2266 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); | ||
2267 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | ||
2268 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | ||
2269 | u64 start, u64 end); | ||
2270 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
2271 | u64 num_bytes, u64 *actual_bytes); | ||
2272 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | ||
2273 | struct btrfs_root *root, u64 type); | ||
2274 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); | ||
2275 | |||
2276 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | ||
2118 | /* ctree.c */ | 2277 | /* ctree.c */ |
2119 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2278 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2120 | int level, int *slot); | 2279 | int level, int *slot); |
@@ -2166,10 +2325,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
2166 | struct btrfs_root *root, struct extent_buffer *parent, | 2325 | struct btrfs_root *root, struct extent_buffer *parent, |
2167 | int start_slot, int cache_only, u64 *last_ret, | 2326 | int start_slot, int cache_only, u64 *last_ret, |
2168 | struct btrfs_key *progress); | 2327 | struct btrfs_key *progress); |
2169 | void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); | 2328 | void btrfs_release_path(struct btrfs_path *p); |
2170 | struct btrfs_path *btrfs_alloc_path(void); | 2329 | struct btrfs_path *btrfs_alloc_path(void); |
2171 | void btrfs_free_path(struct btrfs_path *p); | 2330 | void btrfs_free_path(struct btrfs_path *p); |
2172 | void btrfs_set_path_blocking(struct btrfs_path *p); | 2331 | void btrfs_set_path_blocking(struct btrfs_path *p); |
2332 | void btrfs_clear_path_blocking(struct btrfs_path *p, | ||
2333 | struct extent_buffer *held); | ||
2173 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); | 2334 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); |
2174 | 2335 | ||
2175 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2336 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
@@ -2181,13 +2342,12 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans, | |||
2181 | return btrfs_del_items(trans, root, path, path->slots[0], 1); | 2342 | return btrfs_del_items(trans, root, path, path->slots[0], 1); |
2182 | } | 2343 | } |
2183 | 2344 | ||
2345 | int setup_items_for_insert(struct btrfs_trans_handle *trans, | ||
2346 | struct btrfs_root *root, struct btrfs_path *path, | ||
2347 | struct btrfs_key *cpu_key, u32 *data_size, | ||
2348 | u32 total_data, u32 total_size, int nr); | ||
2184 | int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | 2349 | int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root |
2185 | *root, struct btrfs_key *key, void *data, u32 data_size); | 2350 | *root, struct btrfs_key *key, void *data, u32 data_size); |
2186 | int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | ||
2187 | struct btrfs_root *root, | ||
2188 | struct btrfs_path *path, | ||
2189 | struct btrfs_key *cpu_key, u32 *data_size, | ||
2190 | int nr); | ||
2191 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | 2351 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, |
2192 | struct btrfs_root *root, | 2352 | struct btrfs_root *root, |
2193 | struct btrfs_path *path, | 2353 | struct btrfs_path *path, |
@@ -2211,6 +2371,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
2211 | struct btrfs_root *root, | 2371 | struct btrfs_root *root, |
2212 | struct extent_buffer *node, | 2372 | struct extent_buffer *node, |
2213 | struct extent_buffer *parent); | 2373 | struct extent_buffer *parent); |
2374 | static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | ||
2375 | { | ||
2376 | /* | ||
2377 | * Get synced with close_ctree() | ||
2378 | */ | ||
2379 | smp_mb(); | ||
2380 | return fs_info->closing; | ||
2381 | } | ||
2382 | |||
2214 | /* root-item.c */ | 2383 | /* root-item.c */ |
2215 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2384 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
2216 | struct btrfs_path *path, | 2385 | struct btrfs_path *path, |
@@ -2233,16 +2402,16 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2233 | *item); | 2402 | *item); |
2234 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | 2403 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct |
2235 | btrfs_root_item *item, struct btrfs_key *key); | 2404 | btrfs_root_item *item, struct btrfs_key *key); |
2236 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | ||
2237 | u64 *found_objectid); | ||
2238 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 2405 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
2239 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | 2406 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); |
2240 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2407 | int btrfs_set_root_node(struct btrfs_root_item *item, |
2241 | struct extent_buffer *node); | 2408 | struct extent_buffer *node); |
2409 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); | ||
2410 | |||
2242 | /* dir-item.c */ | 2411 | /* dir-item.c */ |
2243 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | 2412 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, |
2244 | struct btrfs_root *root, const char *name, | 2413 | struct btrfs_root *root, const char *name, |
2245 | int name_len, u64 dir, | 2414 | int name_len, struct inode *dir, |
2246 | struct btrfs_key *location, u8 type, u64 index); | 2415 | struct btrfs_key *location, u8 type, u64 index); |
2247 | struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | 2416 | struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, |
2248 | struct btrfs_root *root, | 2417 | struct btrfs_root *root, |
@@ -2276,6 +2445,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
2276 | struct btrfs_path *path, u64 dir, | 2445 | struct btrfs_path *path, u64 dir, |
2277 | const char *name, u16 name_len, | 2446 | const char *name, u16 name_len, |
2278 | int mod); | 2447 | int mod); |
2448 | int verify_dir_item(struct btrfs_root *root, | ||
2449 | struct extent_buffer *leaf, | ||
2450 | struct btrfs_dir_item *dir_item); | ||
2279 | 2451 | ||
2280 | /* orphan.c */ | 2452 | /* orphan.c */ |
2281 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | 2453 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, |
@@ -2284,12 +2456,6 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | |||
2284 | struct btrfs_root *root, u64 offset); | 2456 | struct btrfs_root *root, u64 offset); |
2285 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); | 2457 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); |
2286 | 2458 | ||
2287 | /* inode-map.c */ | ||
2288 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | ||
2289 | struct btrfs_root *fs_root, | ||
2290 | u64 dirid, u64 *objectid); | ||
2291 | int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); | ||
2292 | |||
2293 | /* inode-item.c */ | 2459 | /* inode-item.c */ |
2294 | int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | 2460 | int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, |
2295 | struct btrfs_root *root, | 2461 | struct btrfs_root *root, |
@@ -2334,8 +2500,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
2334 | struct btrfs_ordered_sum *sums); | 2500 | struct btrfs_ordered_sum *sums); |
2335 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | 2501 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, |
2336 | struct bio *bio, u64 file_start, int contig); | 2502 | struct bio *bio, u64 file_start, int contig); |
2337 | int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, | ||
2338 | u64 start, unsigned long len); | ||
2339 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | 2503 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, |
2340 | struct btrfs_root *root, | 2504 | struct btrfs_root *root, |
2341 | struct btrfs_path *path, | 2505 | struct btrfs_path *path, |
@@ -2343,8 +2507,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |||
2343 | int btrfs_csum_truncate(struct btrfs_trans_handle *trans, | 2507 | int btrfs_csum_truncate(struct btrfs_trans_handle *trans, |
2344 | struct btrfs_root *root, struct btrfs_path *path, | 2508 | struct btrfs_root *root, struct btrfs_path *path, |
2345 | u64 isize); | 2509 | u64 isize); |
2346 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, | 2510 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
2347 | u64 end, struct list_head *list); | 2511 | struct list_head *list, int search_commit); |
2348 | /* inode.c */ | 2512 | /* inode.c */ |
2349 | 2513 | ||
2350 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ | 2514 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ |
@@ -2373,14 +2537,12 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2373 | u32 min_type); | 2537 | u32 min_type); |
2374 | 2538 | ||
2375 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2539 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
2376 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); | ||
2377 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 2540 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
2378 | struct extent_state **cached_state); | 2541 | struct extent_state **cached_state); |
2379 | int btrfs_writepages(struct address_space *mapping, | 2542 | int btrfs_writepages(struct address_space *mapping, |
2380 | struct writeback_control *wbc); | 2543 | struct writeback_control *wbc); |
2381 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 2544 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
2382 | struct btrfs_root *new_root, | 2545 | struct btrfs_root *new_root, u64 new_dirid); |
2383 | u64 new_dirid, u64 alloc_hint); | ||
2384 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2546 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
2385 | size_t size, struct bio *bio, unsigned long bio_flags); | 2547 | size_t size, struct bio *bio, unsigned long bio_flags); |
2386 | 2548 | ||
@@ -2390,9 +2552,8 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
2390 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2552 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2391 | int btrfs_readpage(struct file *file, struct page *page); | 2553 | int btrfs_readpage(struct file *file, struct page *page); |
2392 | void btrfs_evict_inode(struct inode *inode); | 2554 | void btrfs_evict_inode(struct inode *inode); |
2393 | void btrfs_put_inode(struct inode *inode); | ||
2394 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); | 2555 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); |
2395 | void btrfs_dirty_inode(struct inode *inode); | 2556 | void btrfs_dirty_inode(struct inode *inode, int flags); |
2396 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2557 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
2397 | void btrfs_destroy_inode(struct inode *inode); | 2558 | void btrfs_destroy_inode(struct inode *inode); |
2398 | int btrfs_drop_inode(struct inode *inode); | 2559 | int btrfs_drop_inode(struct inode *inode); |
@@ -2401,17 +2562,15 @@ void btrfs_destroy_cachep(void); | |||
2401 | long btrfs_ioctl_trans_end(struct file *file); | 2562 | long btrfs_ioctl_trans_end(struct file *file); |
2402 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | 2563 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, |
2403 | struct btrfs_root *root, int *was_new); | 2564 | struct btrfs_root *root, int *was_new); |
2404 | int btrfs_commit_write(struct file *file, struct page *page, | ||
2405 | unsigned from, unsigned to); | ||
2406 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | 2565 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, |
2407 | size_t page_offset, u64 start, u64 end, | 2566 | size_t pg_offset, u64 start, u64 end, |
2408 | int create); | 2567 | int create); |
2409 | int btrfs_update_inode(struct btrfs_trans_handle *trans, | 2568 | int btrfs_update_inode(struct btrfs_trans_handle *trans, |
2410 | struct btrfs_root *root, | 2569 | struct btrfs_root *root, |
2411 | struct inode *inode); | 2570 | struct inode *inode); |
2412 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2571 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2413 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2572 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2414 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2573 | int btrfs_orphan_cleanup(struct btrfs_root *root); |
2415 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | 2574 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, |
2416 | struct btrfs_pending_snapshot *pending, | 2575 | struct btrfs_pending_snapshot *pending, |
2417 | u64 *bytes_to_reserve); | 2576 | u64 *bytes_to_reserve); |
@@ -2419,31 +2578,44 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | |||
2419 | struct btrfs_pending_snapshot *pending); | 2578 | struct btrfs_pending_snapshot *pending); |
2420 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | 2579 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, |
2421 | struct btrfs_root *root); | 2580 | struct btrfs_root *root); |
2422 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2581 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); |
2423 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2582 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2424 | void btrfs_add_delayed_iput(struct inode *inode); | 2583 | void btrfs_add_delayed_iput(struct inode *inode); |
2425 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2584 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
2426 | int btrfs_prealloc_file_range(struct inode *inode, int mode, | 2585 | int btrfs_prealloc_file_range(struct inode *inode, int mode, |
2427 | u64 start, u64 num_bytes, u64 min_size, | 2586 | u64 start, u64 num_bytes, u64 min_size, |
2428 | loff_t actual_len, u64 *alloc_hint); | 2587 | loff_t actual_len, u64 *alloc_hint); |
2588 | int btrfs_prealloc_file_range_trans(struct inode *inode, | ||
2589 | struct btrfs_trans_handle *trans, int mode, | ||
2590 | u64 start, u64 num_bytes, u64 min_size, | ||
2591 | loff_t actual_len, u64 *alloc_hint); | ||
2429 | extern const struct dentry_operations btrfs_dentry_operations; | 2592 | extern const struct dentry_operations btrfs_dentry_operations; |
2430 | 2593 | ||
2431 | /* ioctl.c */ | 2594 | /* ioctl.c */ |
2432 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 2595 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
2433 | void btrfs_update_iflags(struct inode *inode); | 2596 | void btrfs_update_iflags(struct inode *inode); |
2434 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | 2597 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); |
2435 | 2598 | int btrfs_defrag_file(struct inode *inode, struct file *file, | |
2599 | struct btrfs_ioctl_defrag_range_args *range, | ||
2600 | u64 newer_than, unsigned long max_pages); | ||
2436 | /* file.c */ | 2601 | /* file.c */ |
2602 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | ||
2603 | struct inode *inode); | ||
2604 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | ||
2437 | int btrfs_sync_file(struct file *file, int datasync); | 2605 | int btrfs_sync_file(struct file *file, int datasync); |
2438 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2606 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
2439 | int skip_pinned); | 2607 | int skip_pinned); |
2440 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | ||
2441 | extern const struct file_operations btrfs_file_operations; | 2608 | extern const struct file_operations btrfs_file_operations; |
2442 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | 2609 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, |
2443 | u64 start, u64 end, u64 *hint_byte, int drop_cache); | 2610 | u64 start, u64 end, u64 *hint_byte, int drop_cache); |
2444 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2611 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2445 | struct inode *inode, u64 start, u64 end); | 2612 | struct inode *inode, u64 start, u64 end); |
2446 | int btrfs_release_file(struct inode *inode, struct file *file); | 2613 | int btrfs_release_file(struct inode *inode, struct file *file); |
2614 | void btrfs_drop_pages(struct page **pages, size_t num_pages); | ||
2615 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | ||
2616 | struct page **pages, size_t num_pages, | ||
2617 | loff_t pos, size_t write_bytes, | ||
2618 | struct extent_state **cached); | ||
2447 | 2619 | ||
2448 | /* tree-defrag.c */ | 2620 | /* tree-defrag.c */ |
2449 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 2621 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
@@ -2452,10 +2624,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
2452 | /* sysfs.c */ | 2624 | /* sysfs.c */ |
2453 | int btrfs_init_sysfs(void); | 2625 | int btrfs_init_sysfs(void); |
2454 | void btrfs_exit_sysfs(void); | 2626 | void btrfs_exit_sysfs(void); |
2455 | int btrfs_sysfs_add_super(struct btrfs_fs_info *fs); | ||
2456 | int btrfs_sysfs_add_root(struct btrfs_root *root); | ||
2457 | void btrfs_sysfs_del_root(struct btrfs_root *root); | ||
2458 | void btrfs_sysfs_del_super(struct btrfs_fs_info *root); | ||
2459 | 2627 | ||
2460 | /* xattr.c */ | 2628 | /* xattr.c */ |
2461 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | 2629 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); |
@@ -2463,10 +2631,18 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | |||
2463 | /* super.c */ | 2631 | /* super.c */ |
2464 | int btrfs_parse_options(struct btrfs_root *root, char *options); | 2632 | int btrfs_parse_options(struct btrfs_root *root, char *options); |
2465 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2633 | int btrfs_sync_fs(struct super_block *sb, int wait); |
2634 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
2635 | unsigned int line, int errno); | ||
2636 | |||
2637 | #define btrfs_std_error(fs_info, errno) \ | ||
2638 | do { \ | ||
2639 | if ((errno)) \ | ||
2640 | __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ | ||
2641 | } while (0) | ||
2466 | 2642 | ||
2467 | /* acl.c */ | 2643 | /* acl.c */ |
2468 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2644 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
2469 | int btrfs_check_acl(struct inode *inode, int mask); | 2645 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); |
2470 | #else | 2646 | #else |
2471 | #define btrfs_check_acl NULL | 2647 | #define btrfs_check_acl NULL |
2472 | #endif | 2648 | #endif |
@@ -2490,4 +2666,18 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | |||
2490 | u64 *bytes_to_reserve); | 2666 | u64 *bytes_to_reserve); |
2491 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | 2667 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, |
2492 | struct btrfs_pending_snapshot *pending); | 2668 | struct btrfs_pending_snapshot *pending); |
2669 | |||
2670 | /* scrub.c */ | ||
2671 | int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | ||
2672 | struct btrfs_scrub_progress *progress, int readonly); | ||
2673 | int btrfs_scrub_pause(struct btrfs_root *root); | ||
2674 | int btrfs_scrub_pause_super(struct btrfs_root *root); | ||
2675 | int btrfs_scrub_continue(struct btrfs_root *root); | ||
2676 | int btrfs_scrub_continue_super(struct btrfs_root *root); | ||
2677 | int btrfs_scrub_cancel(struct btrfs_root *root); | ||
2678 | int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev); | ||
2679 | int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); | ||
2680 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | ||
2681 | struct btrfs_scrub_progress *progress); | ||
2682 | |||
2493 | #endif | 2683 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c new file mode 100644 index 000000000000..98c68e658a9b --- /dev/null +++ b/fs/btrfs/delayed-inode.c | |||
@@ -0,0 +1,1773 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 Fujitsu. All rights reserved. | ||
3 | * Written by Miao Xie <miaox@cn.fujitsu.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public | ||
7 | * License v2 as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public | ||
15 | * License along with this program; if not, write to the | ||
16 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
17 | * Boston, MA 021110-1307, USA. | ||
18 | */ | ||
19 | |||
20 | #include <linux/slab.h> | ||
21 | #include "delayed-inode.h" | ||
22 | #include "disk-io.h" | ||
23 | #include "transaction.h" | ||
24 | |||
25 | #define BTRFS_DELAYED_WRITEBACK 400 | ||
26 | #define BTRFS_DELAYED_BACKGROUND 100 | ||
27 | |||
28 | static struct kmem_cache *delayed_node_cache; | ||
29 | |||
30 | int __init btrfs_delayed_inode_init(void) | ||
31 | { | ||
32 | delayed_node_cache = kmem_cache_create("delayed_node", | ||
33 | sizeof(struct btrfs_delayed_node), | ||
34 | 0, | ||
35 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
36 | NULL); | ||
37 | if (!delayed_node_cache) | ||
38 | return -ENOMEM; | ||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | void btrfs_delayed_inode_exit(void) | ||
43 | { | ||
44 | if (delayed_node_cache) | ||
45 | kmem_cache_destroy(delayed_node_cache); | ||
46 | } | ||
47 | |||
48 | static inline void btrfs_init_delayed_node( | ||
49 | struct btrfs_delayed_node *delayed_node, | ||
50 | struct btrfs_root *root, u64 inode_id) | ||
51 | { | ||
52 | delayed_node->root = root; | ||
53 | delayed_node->inode_id = inode_id; | ||
54 | atomic_set(&delayed_node->refs, 0); | ||
55 | delayed_node->count = 0; | ||
56 | delayed_node->in_list = 0; | ||
57 | delayed_node->inode_dirty = 0; | ||
58 | delayed_node->ins_root = RB_ROOT; | ||
59 | delayed_node->del_root = RB_ROOT; | ||
60 | mutex_init(&delayed_node->mutex); | ||
61 | delayed_node->index_cnt = 0; | ||
62 | INIT_LIST_HEAD(&delayed_node->n_list); | ||
63 | INIT_LIST_HEAD(&delayed_node->p_list); | ||
64 | delayed_node->bytes_reserved = 0; | ||
65 | } | ||
66 | |||
67 | static inline int btrfs_is_continuous_delayed_item( | ||
68 | struct btrfs_delayed_item *item1, | ||
69 | struct btrfs_delayed_item *item2) | ||
70 | { | ||
71 | if (item1->key.type == BTRFS_DIR_INDEX_KEY && | ||
72 | item1->key.objectid == item2->key.objectid && | ||
73 | item1->key.type == item2->key.type && | ||
74 | item1->key.offset + 1 == item2->key.offset) | ||
75 | return 1; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static inline struct btrfs_delayed_root *btrfs_get_delayed_root( | ||
80 | struct btrfs_root *root) | ||
81 | { | ||
82 | return root->fs_info->delayed_root; | ||
83 | } | ||
84 | |||
85 | static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) | ||
86 | { | ||
87 | struct btrfs_inode *btrfs_inode = BTRFS_I(inode); | ||
88 | struct btrfs_root *root = btrfs_inode->root; | ||
89 | u64 ino = btrfs_ino(inode); | ||
90 | struct btrfs_delayed_node *node; | ||
91 | |||
92 | node = ACCESS_ONCE(btrfs_inode->delayed_node); | ||
93 | if (node) { | ||
94 | atomic_inc(&node->refs); | ||
95 | return node; | ||
96 | } | ||
97 | |||
98 | spin_lock(&root->inode_lock); | ||
99 | node = radix_tree_lookup(&root->delayed_nodes_tree, ino); | ||
100 | if (node) { | ||
101 | if (btrfs_inode->delayed_node) { | ||
102 | atomic_inc(&node->refs); /* can be accessed */ | ||
103 | BUG_ON(btrfs_inode->delayed_node != node); | ||
104 | spin_unlock(&root->inode_lock); | ||
105 | return node; | ||
106 | } | ||
107 | btrfs_inode->delayed_node = node; | ||
108 | atomic_inc(&node->refs); /* can be accessed */ | ||
109 | atomic_inc(&node->refs); /* cached in the inode */ | ||
110 | spin_unlock(&root->inode_lock); | ||
111 | return node; | ||
112 | } | ||
113 | spin_unlock(&root->inode_lock); | ||
114 | |||
115 | return NULL; | ||
116 | } | ||
117 | |||
118 | static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( | ||
119 | struct inode *inode) | ||
120 | { | ||
121 | struct btrfs_delayed_node *node; | ||
122 | struct btrfs_inode *btrfs_inode = BTRFS_I(inode); | ||
123 | struct btrfs_root *root = btrfs_inode->root; | ||
124 | u64 ino = btrfs_ino(inode); | ||
125 | int ret; | ||
126 | |||
127 | again: | ||
128 | node = btrfs_get_delayed_node(inode); | ||
129 | if (node) | ||
130 | return node; | ||
131 | |||
132 | node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS); | ||
133 | if (!node) | ||
134 | return ERR_PTR(-ENOMEM); | ||
135 | btrfs_init_delayed_node(node, root, ino); | ||
136 | |||
137 | atomic_inc(&node->refs); /* cached in the btrfs inode */ | ||
138 | atomic_inc(&node->refs); /* can be accessed */ | ||
139 | |||
140 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
141 | if (ret) { | ||
142 | kmem_cache_free(delayed_node_cache, node); | ||
143 | return ERR_PTR(ret); | ||
144 | } | ||
145 | |||
146 | spin_lock(&root->inode_lock); | ||
147 | ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); | ||
148 | if (ret == -EEXIST) { | ||
149 | kmem_cache_free(delayed_node_cache, node); | ||
150 | spin_unlock(&root->inode_lock); | ||
151 | radix_tree_preload_end(); | ||
152 | goto again; | ||
153 | } | ||
154 | btrfs_inode->delayed_node = node; | ||
155 | spin_unlock(&root->inode_lock); | ||
156 | radix_tree_preload_end(); | ||
157 | |||
158 | return node; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * Call it when holding delayed_node->mutex | ||
163 | * | ||
164 | * If mod = 1, add this node into the prepared list. | ||
165 | */ | ||
166 | static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root, | ||
167 | struct btrfs_delayed_node *node, | ||
168 | int mod) | ||
169 | { | ||
170 | spin_lock(&root->lock); | ||
171 | if (node->in_list) { | ||
172 | if (!list_empty(&node->p_list)) | ||
173 | list_move_tail(&node->p_list, &root->prepare_list); | ||
174 | else if (mod) | ||
175 | list_add_tail(&node->p_list, &root->prepare_list); | ||
176 | } else { | ||
177 | list_add_tail(&node->n_list, &root->node_list); | ||
178 | list_add_tail(&node->p_list, &root->prepare_list); | ||
179 | atomic_inc(&node->refs); /* inserted into list */ | ||
180 | root->nodes++; | ||
181 | node->in_list = 1; | ||
182 | } | ||
183 | spin_unlock(&root->lock); | ||
184 | } | ||
185 | |||
186 | /* Call it when holding delayed_node->mutex */ | ||
187 | static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root, | ||
188 | struct btrfs_delayed_node *node) | ||
189 | { | ||
190 | spin_lock(&root->lock); | ||
191 | if (node->in_list) { | ||
192 | root->nodes--; | ||
193 | atomic_dec(&node->refs); /* not in the list */ | ||
194 | list_del_init(&node->n_list); | ||
195 | if (!list_empty(&node->p_list)) | ||
196 | list_del_init(&node->p_list); | ||
197 | node->in_list = 0; | ||
198 | } | ||
199 | spin_unlock(&root->lock); | ||
200 | } | ||
201 | |||
202 | struct btrfs_delayed_node *btrfs_first_delayed_node( | ||
203 | struct btrfs_delayed_root *delayed_root) | ||
204 | { | ||
205 | struct list_head *p; | ||
206 | struct btrfs_delayed_node *node = NULL; | ||
207 | |||
208 | spin_lock(&delayed_root->lock); | ||
209 | if (list_empty(&delayed_root->node_list)) | ||
210 | goto out; | ||
211 | |||
212 | p = delayed_root->node_list.next; | ||
213 | node = list_entry(p, struct btrfs_delayed_node, n_list); | ||
214 | atomic_inc(&node->refs); | ||
215 | out: | ||
216 | spin_unlock(&delayed_root->lock); | ||
217 | |||
218 | return node; | ||
219 | } | ||
220 | |||
221 | struct btrfs_delayed_node *btrfs_next_delayed_node( | ||
222 | struct btrfs_delayed_node *node) | ||
223 | { | ||
224 | struct btrfs_delayed_root *delayed_root; | ||
225 | struct list_head *p; | ||
226 | struct btrfs_delayed_node *next = NULL; | ||
227 | |||
228 | delayed_root = node->root->fs_info->delayed_root; | ||
229 | spin_lock(&delayed_root->lock); | ||
230 | if (!node->in_list) { /* not in the list */ | ||
231 | if (list_empty(&delayed_root->node_list)) | ||
232 | goto out; | ||
233 | p = delayed_root->node_list.next; | ||
234 | } else if (list_is_last(&node->n_list, &delayed_root->node_list)) | ||
235 | goto out; | ||
236 | else | ||
237 | p = node->n_list.next; | ||
238 | |||
239 | next = list_entry(p, struct btrfs_delayed_node, n_list); | ||
240 | atomic_inc(&next->refs); | ||
241 | out: | ||
242 | spin_unlock(&delayed_root->lock); | ||
243 | |||
244 | return next; | ||
245 | } | ||
246 | |||
247 | static void __btrfs_release_delayed_node( | ||
248 | struct btrfs_delayed_node *delayed_node, | ||
249 | int mod) | ||
250 | { | ||
251 | struct btrfs_delayed_root *delayed_root; | ||
252 | |||
253 | if (!delayed_node) | ||
254 | return; | ||
255 | |||
256 | delayed_root = delayed_node->root->fs_info->delayed_root; | ||
257 | |||
258 | mutex_lock(&delayed_node->mutex); | ||
259 | if (delayed_node->count) | ||
260 | btrfs_queue_delayed_node(delayed_root, delayed_node, mod); | ||
261 | else | ||
262 | btrfs_dequeue_delayed_node(delayed_root, delayed_node); | ||
263 | mutex_unlock(&delayed_node->mutex); | ||
264 | |||
265 | if (atomic_dec_and_test(&delayed_node->refs)) { | ||
266 | struct btrfs_root *root = delayed_node->root; | ||
267 | spin_lock(&root->inode_lock); | ||
268 | if (atomic_read(&delayed_node->refs) == 0) { | ||
269 | radix_tree_delete(&root->delayed_nodes_tree, | ||
270 | delayed_node->inode_id); | ||
271 | kmem_cache_free(delayed_node_cache, delayed_node); | ||
272 | } | ||
273 | spin_unlock(&root->inode_lock); | ||
274 | } | ||
275 | } | ||
276 | |||
277 | static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node) | ||
278 | { | ||
279 | __btrfs_release_delayed_node(node, 0); | ||
280 | } | ||
281 | |||
282 | struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( | ||
283 | struct btrfs_delayed_root *delayed_root) | ||
284 | { | ||
285 | struct list_head *p; | ||
286 | struct btrfs_delayed_node *node = NULL; | ||
287 | |||
288 | spin_lock(&delayed_root->lock); | ||
289 | if (list_empty(&delayed_root->prepare_list)) | ||
290 | goto out; | ||
291 | |||
292 | p = delayed_root->prepare_list.next; | ||
293 | list_del_init(p); | ||
294 | node = list_entry(p, struct btrfs_delayed_node, p_list); | ||
295 | atomic_inc(&node->refs); | ||
296 | out: | ||
297 | spin_unlock(&delayed_root->lock); | ||
298 | |||
299 | return node; | ||
300 | } | ||
301 | |||
302 | static inline void btrfs_release_prepared_delayed_node( | ||
303 | struct btrfs_delayed_node *node) | ||
304 | { | ||
305 | __btrfs_release_delayed_node(node, 1); | ||
306 | } | ||
307 | |||
308 | struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) | ||
309 | { | ||
310 | struct btrfs_delayed_item *item; | ||
311 | item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); | ||
312 | if (item) { | ||
313 | item->data_len = data_len; | ||
314 | item->ins_or_del = 0; | ||
315 | item->bytes_reserved = 0; | ||
316 | item->delayed_node = NULL; | ||
317 | atomic_set(&item->refs, 1); | ||
318 | } | ||
319 | return item; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * __btrfs_lookup_delayed_item - look up the delayed item by key | ||
324 | * @delayed_node: pointer to the delayed node | ||
325 | * @key: the key to look up | ||
326 | * @prev: used to store the prev item if the right item isn't found | ||
327 | * @next: used to store the next item if the right item isn't found | ||
328 | * | ||
329 | * Note: if we don't find the right item, we will return the prev item and | ||
330 | * the next item. | ||
331 | */ | ||
332 | static struct btrfs_delayed_item *__btrfs_lookup_delayed_item( | ||
333 | struct rb_root *root, | ||
334 | struct btrfs_key *key, | ||
335 | struct btrfs_delayed_item **prev, | ||
336 | struct btrfs_delayed_item **next) | ||
337 | { | ||
338 | struct rb_node *node, *prev_node = NULL; | ||
339 | struct btrfs_delayed_item *delayed_item = NULL; | ||
340 | int ret = 0; | ||
341 | |||
342 | node = root->rb_node; | ||
343 | |||
344 | while (node) { | ||
345 | delayed_item = rb_entry(node, struct btrfs_delayed_item, | ||
346 | rb_node); | ||
347 | prev_node = node; | ||
348 | ret = btrfs_comp_cpu_keys(&delayed_item->key, key); | ||
349 | if (ret < 0) | ||
350 | node = node->rb_right; | ||
351 | else if (ret > 0) | ||
352 | node = node->rb_left; | ||
353 | else | ||
354 | return delayed_item; | ||
355 | } | ||
356 | |||
357 | if (prev) { | ||
358 | if (!prev_node) | ||
359 | *prev = NULL; | ||
360 | else if (ret < 0) | ||
361 | *prev = delayed_item; | ||
362 | else if ((node = rb_prev(prev_node)) != NULL) { | ||
363 | *prev = rb_entry(node, struct btrfs_delayed_item, | ||
364 | rb_node); | ||
365 | } else | ||
366 | *prev = NULL; | ||
367 | } | ||
368 | |||
369 | if (next) { | ||
370 | if (!prev_node) | ||
371 | *next = NULL; | ||
372 | else if (ret > 0) | ||
373 | *next = delayed_item; | ||
374 | else if ((node = rb_next(prev_node)) != NULL) { | ||
375 | *next = rb_entry(node, struct btrfs_delayed_item, | ||
376 | rb_node); | ||
377 | } else | ||
378 | *next = NULL; | ||
379 | } | ||
380 | return NULL; | ||
381 | } | ||
382 | |||
383 | struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( | ||
384 | struct btrfs_delayed_node *delayed_node, | ||
385 | struct btrfs_key *key) | ||
386 | { | ||
387 | struct btrfs_delayed_item *item; | ||
388 | |||
389 | item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key, | ||
390 | NULL, NULL); | ||
391 | return item; | ||
392 | } | ||
393 | |||
394 | struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item( | ||
395 | struct btrfs_delayed_node *delayed_node, | ||
396 | struct btrfs_key *key) | ||
397 | { | ||
398 | struct btrfs_delayed_item *item; | ||
399 | |||
400 | item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key, | ||
401 | NULL, NULL); | ||
402 | return item; | ||
403 | } | ||
404 | |||
405 | struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item( | ||
406 | struct btrfs_delayed_node *delayed_node, | ||
407 | struct btrfs_key *key) | ||
408 | { | ||
409 | struct btrfs_delayed_item *item, *next; | ||
410 | |||
411 | item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key, | ||
412 | NULL, &next); | ||
413 | if (!item) | ||
414 | item = next; | ||
415 | |||
416 | return item; | ||
417 | } | ||
418 | |||
419 | struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item( | ||
420 | struct btrfs_delayed_node *delayed_node, | ||
421 | struct btrfs_key *key) | ||
422 | { | ||
423 | struct btrfs_delayed_item *item, *next; | ||
424 | |||
425 | item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key, | ||
426 | NULL, &next); | ||
427 | if (!item) | ||
428 | item = next; | ||
429 | |||
430 | return item; | ||
431 | } | ||
432 | |||
433 | static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, | ||
434 | struct btrfs_delayed_item *ins, | ||
435 | int action) | ||
436 | { | ||
437 | struct rb_node **p, *node; | ||
438 | struct rb_node *parent_node = NULL; | ||
439 | struct rb_root *root; | ||
440 | struct btrfs_delayed_item *item; | ||
441 | int cmp; | ||
442 | |||
443 | if (action == BTRFS_DELAYED_INSERTION_ITEM) | ||
444 | root = &delayed_node->ins_root; | ||
445 | else if (action == BTRFS_DELAYED_DELETION_ITEM) | ||
446 | root = &delayed_node->del_root; | ||
447 | else | ||
448 | BUG(); | ||
449 | p = &root->rb_node; | ||
450 | node = &ins->rb_node; | ||
451 | |||
452 | while (*p) { | ||
453 | parent_node = *p; | ||
454 | item = rb_entry(parent_node, struct btrfs_delayed_item, | ||
455 | rb_node); | ||
456 | |||
457 | cmp = btrfs_comp_cpu_keys(&item->key, &ins->key); | ||
458 | if (cmp < 0) | ||
459 | p = &(*p)->rb_right; | ||
460 | else if (cmp > 0) | ||
461 | p = &(*p)->rb_left; | ||
462 | else | ||
463 | return -EEXIST; | ||
464 | } | ||
465 | |||
466 | rb_link_node(node, parent_node, p); | ||
467 | rb_insert_color(node, root); | ||
468 | ins->delayed_node = delayed_node; | ||
469 | ins->ins_or_del = action; | ||
470 | |||
471 | if (ins->key.type == BTRFS_DIR_INDEX_KEY && | ||
472 | action == BTRFS_DELAYED_INSERTION_ITEM && | ||
473 | ins->key.offset >= delayed_node->index_cnt) | ||
474 | delayed_node->index_cnt = ins->key.offset + 1; | ||
475 | |||
476 | delayed_node->count++; | ||
477 | atomic_inc(&delayed_node->root->fs_info->delayed_root->items); | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node, | ||
482 | struct btrfs_delayed_item *item) | ||
483 | { | ||
484 | return __btrfs_add_delayed_item(node, item, | ||
485 | BTRFS_DELAYED_INSERTION_ITEM); | ||
486 | } | ||
487 | |||
488 | static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node, | ||
489 | struct btrfs_delayed_item *item) | ||
490 | { | ||
491 | return __btrfs_add_delayed_item(node, item, | ||
492 | BTRFS_DELAYED_DELETION_ITEM); | ||
493 | } | ||
494 | |||
495 | static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) | ||
496 | { | ||
497 | struct rb_root *root; | ||
498 | struct btrfs_delayed_root *delayed_root; | ||
499 | |||
500 | delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root; | ||
501 | |||
502 | BUG_ON(!delayed_root); | ||
503 | BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM && | ||
504 | delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM); | ||
505 | |||
506 | if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM) | ||
507 | root = &delayed_item->delayed_node->ins_root; | ||
508 | else | ||
509 | root = &delayed_item->delayed_node->del_root; | ||
510 | |||
511 | rb_erase(&delayed_item->rb_node, root); | ||
512 | delayed_item->delayed_node->count--; | ||
513 | atomic_dec(&delayed_root->items); | ||
514 | if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && | ||
515 | waitqueue_active(&delayed_root->wait)) | ||
516 | wake_up(&delayed_root->wait); | ||
517 | } | ||
518 | |||
519 | static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) | ||
520 | { | ||
521 | if (item) { | ||
522 | __btrfs_remove_delayed_item(item); | ||
523 | if (atomic_dec_and_test(&item->refs)) | ||
524 | kfree(item); | ||
525 | } | ||
526 | } | ||
527 | |||
528 | struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( | ||
529 | struct btrfs_delayed_node *delayed_node) | ||
530 | { | ||
531 | struct rb_node *p; | ||
532 | struct btrfs_delayed_item *item = NULL; | ||
533 | |||
534 | p = rb_first(&delayed_node->ins_root); | ||
535 | if (p) | ||
536 | item = rb_entry(p, struct btrfs_delayed_item, rb_node); | ||
537 | |||
538 | return item; | ||
539 | } | ||
540 | |||
541 | struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( | ||
542 | struct btrfs_delayed_node *delayed_node) | ||
543 | { | ||
544 | struct rb_node *p; | ||
545 | struct btrfs_delayed_item *item = NULL; | ||
546 | |||
547 | p = rb_first(&delayed_node->del_root); | ||
548 | if (p) | ||
549 | item = rb_entry(p, struct btrfs_delayed_item, rb_node); | ||
550 | |||
551 | return item; | ||
552 | } | ||
553 | |||
554 | struct btrfs_delayed_item *__btrfs_next_delayed_item( | ||
555 | struct btrfs_delayed_item *item) | ||
556 | { | ||
557 | struct rb_node *p; | ||
558 | struct btrfs_delayed_item *next = NULL; | ||
559 | |||
560 | p = rb_next(&item->rb_node); | ||
561 | if (p) | ||
562 | next = rb_entry(p, struct btrfs_delayed_item, rb_node); | ||
563 | |||
564 | return next; | ||
565 | } | ||
566 | |||
567 | static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root, | ||
568 | u64 root_id) | ||
569 | { | ||
570 | struct btrfs_key root_key; | ||
571 | |||
572 | if (root->objectid == root_id) | ||
573 | return root; | ||
574 | |||
575 | root_key.objectid = root_id; | ||
576 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
577 | root_key.offset = (u64)-1; | ||
578 | return btrfs_read_fs_root_no_name(root->fs_info, &root_key); | ||
579 | } | ||
580 | |||
581 | static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | ||
582 | struct btrfs_root *root, | ||
583 | struct btrfs_delayed_item *item) | ||
584 | { | ||
585 | struct btrfs_block_rsv *src_rsv; | ||
586 | struct btrfs_block_rsv *dst_rsv; | ||
587 | u64 num_bytes; | ||
588 | int ret; | ||
589 | |||
590 | if (!trans->bytes_reserved) | ||
591 | return 0; | ||
592 | |||
593 | src_rsv = trans->block_rsv; | ||
594 | dst_rsv = &root->fs_info->global_block_rsv; | ||
595 | |||
596 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | ||
597 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | ||
598 | if (!ret) | ||
599 | item->bytes_reserved = num_bytes; | ||
600 | |||
601 | return ret; | ||
602 | } | ||
603 | |||
604 | static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, | ||
605 | struct btrfs_delayed_item *item) | ||
606 | { | ||
607 | struct btrfs_block_rsv *rsv; | ||
608 | |||
609 | if (!item->bytes_reserved) | ||
610 | return; | ||
611 | |||
612 | rsv = &root->fs_info->global_block_rsv; | ||
613 | btrfs_block_rsv_release(root, rsv, | ||
614 | item->bytes_reserved); | ||
615 | } | ||
616 | |||
617 | static int btrfs_delayed_inode_reserve_metadata( | ||
618 | struct btrfs_trans_handle *trans, | ||
619 | struct btrfs_root *root, | ||
620 | struct btrfs_delayed_node *node) | ||
621 | { | ||
622 | struct btrfs_block_rsv *src_rsv; | ||
623 | struct btrfs_block_rsv *dst_rsv; | ||
624 | u64 num_bytes; | ||
625 | int ret; | ||
626 | |||
627 | if (!trans->bytes_reserved) | ||
628 | return 0; | ||
629 | |||
630 | src_rsv = trans->block_rsv; | ||
631 | dst_rsv = &root->fs_info->global_block_rsv; | ||
632 | |||
633 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | ||
634 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | ||
635 | if (!ret) | ||
636 | node->bytes_reserved = num_bytes; | ||
637 | |||
638 | return ret; | ||
639 | } | ||
640 | |||
641 | static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root, | ||
642 | struct btrfs_delayed_node *node) | ||
643 | { | ||
644 | struct btrfs_block_rsv *rsv; | ||
645 | |||
646 | if (!node->bytes_reserved) | ||
647 | return; | ||
648 | |||
649 | rsv = &root->fs_info->global_block_rsv; | ||
650 | btrfs_block_rsv_release(root, rsv, | ||
651 | node->bytes_reserved); | ||
652 | node->bytes_reserved = 0; | ||
653 | } | ||
654 | |||
655 | /* | ||
656 | * This helper will insert some continuous items into the same leaf according | ||
657 | * to the free space of the leaf. | ||
658 | */ | ||
659 | static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, | ||
660 | struct btrfs_root *root, | ||
661 | struct btrfs_path *path, | ||
662 | struct btrfs_delayed_item *item) | ||
663 | { | ||
664 | struct btrfs_delayed_item *curr, *next; | ||
665 | int free_space; | ||
666 | int total_data_size = 0, total_size = 0; | ||
667 | struct extent_buffer *leaf; | ||
668 | char *data_ptr; | ||
669 | struct btrfs_key *keys; | ||
670 | u32 *data_size; | ||
671 | struct list_head head; | ||
672 | int slot; | ||
673 | int nitems; | ||
674 | int i; | ||
675 | int ret = 0; | ||
676 | |||
677 | BUG_ON(!path->nodes[0]); | ||
678 | |||
679 | leaf = path->nodes[0]; | ||
680 | free_space = btrfs_leaf_free_space(root, leaf); | ||
681 | INIT_LIST_HEAD(&head); | ||
682 | |||
683 | next = item; | ||
684 | nitems = 0; | ||
685 | |||
686 | /* | ||
687 | * count the number of the continuous items that we can insert in batch | ||
688 | */ | ||
689 | while (total_size + next->data_len + sizeof(struct btrfs_item) <= | ||
690 | free_space) { | ||
691 | total_data_size += next->data_len; | ||
692 | total_size += next->data_len + sizeof(struct btrfs_item); | ||
693 | list_add_tail(&next->tree_list, &head); | ||
694 | nitems++; | ||
695 | |||
696 | curr = next; | ||
697 | next = __btrfs_next_delayed_item(curr); | ||
698 | if (!next) | ||
699 | break; | ||
700 | |||
701 | if (!btrfs_is_continuous_delayed_item(curr, next)) | ||
702 | break; | ||
703 | } | ||
704 | |||
705 | if (!nitems) { | ||
706 | ret = 0; | ||
707 | goto out; | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | * we need allocate some memory space, but it might cause the task | ||
712 | * to sleep, so we set all locked nodes in the path to blocking locks | ||
713 | * first. | ||
714 | */ | ||
715 | btrfs_set_path_blocking(path); | ||
716 | |||
717 | keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS); | ||
718 | if (!keys) { | ||
719 | ret = -ENOMEM; | ||
720 | goto out; | ||
721 | } | ||
722 | |||
723 | data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS); | ||
724 | if (!data_size) { | ||
725 | ret = -ENOMEM; | ||
726 | goto error; | ||
727 | } | ||
728 | |||
729 | /* get keys of all the delayed items */ | ||
730 | i = 0; | ||
731 | list_for_each_entry(next, &head, tree_list) { | ||
732 | keys[i] = next->key; | ||
733 | data_size[i] = next->data_len; | ||
734 | i++; | ||
735 | } | ||
736 | |||
737 | /* reset all the locked nodes in the patch to spinning locks. */ | ||
738 | btrfs_clear_path_blocking(path, NULL); | ||
739 | |||
740 | /* insert the keys of the items */ | ||
741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, | ||
742 | total_data_size, total_size, nitems); | ||
743 | if (ret) | ||
744 | goto error; | ||
745 | |||
746 | /* insert the dir index items */ | ||
747 | slot = path->slots[0]; | ||
748 | list_for_each_entry_safe(curr, next, &head, tree_list) { | ||
749 | data_ptr = btrfs_item_ptr(leaf, slot, char); | ||
750 | write_extent_buffer(leaf, &curr->data, | ||
751 | (unsigned long)data_ptr, | ||
752 | curr->data_len); | ||
753 | slot++; | ||
754 | |||
755 | btrfs_delayed_item_release_metadata(root, curr); | ||
756 | |||
757 | list_del(&curr->tree_list); | ||
758 | btrfs_release_delayed_item(curr); | ||
759 | } | ||
760 | |||
761 | error: | ||
762 | kfree(data_size); | ||
763 | kfree(keys); | ||
764 | out: | ||
765 | return ret; | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * This helper can just do simple insertion that needn't extend item for new | ||
770 | * data, such as directory name index insertion, inode insertion. | ||
771 | */ | ||
772 | static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, | ||
773 | struct btrfs_root *root, | ||
774 | struct btrfs_path *path, | ||
775 | struct btrfs_delayed_item *delayed_item) | ||
776 | { | ||
777 | struct extent_buffer *leaf; | ||
778 | struct btrfs_item *item; | ||
779 | char *ptr; | ||
780 | int ret; | ||
781 | |||
782 | ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key, | ||
783 | delayed_item->data_len); | ||
784 | if (ret < 0 && ret != -EEXIST) | ||
785 | return ret; | ||
786 | |||
787 | leaf = path->nodes[0]; | ||
788 | |||
789 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
790 | ptr = btrfs_item_ptr(leaf, path->slots[0], char); | ||
791 | |||
792 | write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr, | ||
793 | delayed_item->data_len); | ||
794 | btrfs_mark_buffer_dirty(leaf); | ||
795 | |||
796 | btrfs_delayed_item_release_metadata(root, delayed_item); | ||
797 | return 0; | ||
798 | } | ||
799 | |||
800 | /* | ||
801 | * we insert an item first, then if there are some continuous items, we try | ||
802 | * to insert those items into the same leaf. | ||
803 | */ | ||
804 | static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans, | ||
805 | struct btrfs_path *path, | ||
806 | struct btrfs_root *root, | ||
807 | struct btrfs_delayed_node *node) | ||
808 | { | ||
809 | struct btrfs_delayed_item *curr, *prev; | ||
810 | int ret = 0; | ||
811 | |||
812 | do_again: | ||
813 | mutex_lock(&node->mutex); | ||
814 | curr = __btrfs_first_delayed_insertion_item(node); | ||
815 | if (!curr) | ||
816 | goto insert_end; | ||
817 | |||
818 | ret = btrfs_insert_delayed_item(trans, root, path, curr); | ||
819 | if (ret < 0) { | ||
820 | btrfs_release_path(path); | ||
821 | goto insert_end; | ||
822 | } | ||
823 | |||
824 | prev = curr; | ||
825 | curr = __btrfs_next_delayed_item(prev); | ||
826 | if (curr && btrfs_is_continuous_delayed_item(prev, curr)) { | ||
827 | /* insert the continuous items into the same leaf */ | ||
828 | path->slots[0]++; | ||
829 | btrfs_batch_insert_items(trans, root, path, curr); | ||
830 | } | ||
831 | btrfs_release_delayed_item(prev); | ||
832 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
833 | |||
834 | btrfs_release_path(path); | ||
835 | mutex_unlock(&node->mutex); | ||
836 | goto do_again; | ||
837 | |||
838 | insert_end: | ||
839 | mutex_unlock(&node->mutex); | ||
840 | return ret; | ||
841 | } | ||
842 | |||
843 | static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans, | ||
844 | struct btrfs_root *root, | ||
845 | struct btrfs_path *path, | ||
846 | struct btrfs_delayed_item *item) | ||
847 | { | ||
848 | struct btrfs_delayed_item *curr, *next; | ||
849 | struct extent_buffer *leaf; | ||
850 | struct btrfs_key key; | ||
851 | struct list_head head; | ||
852 | int nitems, i, last_item; | ||
853 | int ret = 0; | ||
854 | |||
855 | BUG_ON(!path->nodes[0]); | ||
856 | |||
857 | leaf = path->nodes[0]; | ||
858 | |||
859 | i = path->slots[0]; | ||
860 | last_item = btrfs_header_nritems(leaf) - 1; | ||
861 | if (i > last_item) | ||
862 | return -ENOENT; /* FIXME: Is errno suitable? */ | ||
863 | |||
864 | next = item; | ||
865 | INIT_LIST_HEAD(&head); | ||
866 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
867 | nitems = 0; | ||
868 | /* | ||
869 | * count the number of the dir index items that we can delete in batch | ||
870 | */ | ||
871 | while (btrfs_comp_cpu_keys(&next->key, &key) == 0) { | ||
872 | list_add_tail(&next->tree_list, &head); | ||
873 | nitems++; | ||
874 | |||
875 | curr = next; | ||
876 | next = __btrfs_next_delayed_item(curr); | ||
877 | if (!next) | ||
878 | break; | ||
879 | |||
880 | if (!btrfs_is_continuous_delayed_item(curr, next)) | ||
881 | break; | ||
882 | |||
883 | i++; | ||
884 | if (i > last_item) | ||
885 | break; | ||
886 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
887 | } | ||
888 | |||
889 | if (!nitems) | ||
890 | return 0; | ||
891 | |||
892 | ret = btrfs_del_items(trans, root, path, path->slots[0], nitems); | ||
893 | if (ret) | ||
894 | goto out; | ||
895 | |||
896 | list_for_each_entry_safe(curr, next, &head, tree_list) { | ||
897 | btrfs_delayed_item_release_metadata(root, curr); | ||
898 | list_del(&curr->tree_list); | ||
899 | btrfs_release_delayed_item(curr); | ||
900 | } | ||
901 | |||
902 | out: | ||
903 | return ret; | ||
904 | } | ||
905 | |||
906 | static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans, | ||
907 | struct btrfs_path *path, | ||
908 | struct btrfs_root *root, | ||
909 | struct btrfs_delayed_node *node) | ||
910 | { | ||
911 | struct btrfs_delayed_item *curr, *prev; | ||
912 | int ret = 0; | ||
913 | |||
914 | do_again: | ||
915 | mutex_lock(&node->mutex); | ||
916 | curr = __btrfs_first_delayed_deletion_item(node); | ||
917 | if (!curr) | ||
918 | goto delete_fail; | ||
919 | |||
920 | ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1); | ||
921 | if (ret < 0) | ||
922 | goto delete_fail; | ||
923 | else if (ret > 0) { | ||
924 | /* | ||
925 | * can't find the item which the node points to, so this node | ||
926 | * is invalid, just drop it. | ||
927 | */ | ||
928 | prev = curr; | ||
929 | curr = __btrfs_next_delayed_item(prev); | ||
930 | btrfs_release_delayed_item(prev); | ||
931 | ret = 0; | ||
932 | btrfs_release_path(path); | ||
933 | if (curr) | ||
934 | goto do_again; | ||
935 | else | ||
936 | goto delete_fail; | ||
937 | } | ||
938 | |||
939 | btrfs_batch_delete_items(trans, root, path, curr); | ||
940 | btrfs_release_path(path); | ||
941 | mutex_unlock(&node->mutex); | ||
942 | goto do_again; | ||
943 | |||
944 | delete_fail: | ||
945 | btrfs_release_path(path); | ||
946 | mutex_unlock(&node->mutex); | ||
947 | return ret; | ||
948 | } | ||
949 | |||
950 | static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) | ||
951 | { | ||
952 | struct btrfs_delayed_root *delayed_root; | ||
953 | |||
954 | if (delayed_node && delayed_node->inode_dirty) { | ||
955 | BUG_ON(!delayed_node->root); | ||
956 | delayed_node->inode_dirty = 0; | ||
957 | delayed_node->count--; | ||
958 | |||
959 | delayed_root = delayed_node->root->fs_info->delayed_root; | ||
960 | atomic_dec(&delayed_root->items); | ||
961 | if (atomic_read(&delayed_root->items) < | ||
962 | BTRFS_DELAYED_BACKGROUND && | ||
963 | waitqueue_active(&delayed_root->wait)) | ||
964 | wake_up(&delayed_root->wait); | ||
965 | } | ||
966 | } | ||
967 | |||
968 | static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, | ||
969 | struct btrfs_root *root, | ||
970 | struct btrfs_path *path, | ||
971 | struct btrfs_delayed_node *node) | ||
972 | { | ||
973 | struct btrfs_key key; | ||
974 | struct btrfs_inode_item *inode_item; | ||
975 | struct extent_buffer *leaf; | ||
976 | int ret; | ||
977 | |||
978 | mutex_lock(&node->mutex); | ||
979 | if (!node->inode_dirty) { | ||
980 | mutex_unlock(&node->mutex); | ||
981 | return 0; | ||
982 | } | ||
983 | |||
984 | key.objectid = node->inode_id; | ||
985 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
986 | key.offset = 0; | ||
987 | ret = btrfs_lookup_inode(trans, root, path, &key, 1); | ||
988 | if (ret > 0) { | ||
989 | btrfs_release_path(path); | ||
990 | mutex_unlock(&node->mutex); | ||
991 | return -ENOENT; | ||
992 | } else if (ret < 0) { | ||
993 | mutex_unlock(&node->mutex); | ||
994 | return ret; | ||
995 | } | ||
996 | |||
997 | btrfs_unlock_up_safe(path, 1); | ||
998 | leaf = path->nodes[0]; | ||
999 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | ||
1000 | struct btrfs_inode_item); | ||
1001 | write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item, | ||
1002 | sizeof(struct btrfs_inode_item)); | ||
1003 | btrfs_mark_buffer_dirty(leaf); | ||
1004 | btrfs_release_path(path); | ||
1005 | |||
1006 | btrfs_delayed_inode_release_metadata(root, node); | ||
1007 | btrfs_release_delayed_inode(node); | ||
1008 | mutex_unlock(&node->mutex); | ||
1009 | |||
1010 | return 0; | ||
1011 | } | ||
1012 | |||
1013 | /* Called when committing the transaction. */ | ||
1014 | int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | ||
1015 | struct btrfs_root *root) | ||
1016 | { | ||
1017 | struct btrfs_delayed_root *delayed_root; | ||
1018 | struct btrfs_delayed_node *curr_node, *prev_node; | ||
1019 | struct btrfs_path *path; | ||
1020 | struct btrfs_block_rsv *block_rsv; | ||
1021 | int ret = 0; | ||
1022 | |||
1023 | path = btrfs_alloc_path(); | ||
1024 | if (!path) | ||
1025 | return -ENOMEM; | ||
1026 | path->leave_spinning = 1; | ||
1027 | |||
1028 | block_rsv = trans->block_rsv; | ||
1029 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
1030 | |||
1031 | delayed_root = btrfs_get_delayed_root(root); | ||
1032 | |||
1033 | curr_node = btrfs_first_delayed_node(delayed_root); | ||
1034 | while (curr_node) { | ||
1035 | root = curr_node->root; | ||
1036 | ret = btrfs_insert_delayed_items(trans, path, root, | ||
1037 | curr_node); | ||
1038 | if (!ret) | ||
1039 | ret = btrfs_delete_delayed_items(trans, path, root, | ||
1040 | curr_node); | ||
1041 | if (!ret) | ||
1042 | ret = btrfs_update_delayed_inode(trans, root, path, | ||
1043 | curr_node); | ||
1044 | if (ret) { | ||
1045 | btrfs_release_delayed_node(curr_node); | ||
1046 | break; | ||
1047 | } | ||
1048 | |||
1049 | prev_node = curr_node; | ||
1050 | curr_node = btrfs_next_delayed_node(curr_node); | ||
1051 | btrfs_release_delayed_node(prev_node); | ||
1052 | } | ||
1053 | |||
1054 | btrfs_free_path(path); | ||
1055 | trans->block_rsv = block_rsv; | ||
1056 | return ret; | ||
1057 | } | ||
1058 | |||
1059 | static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | ||
1060 | struct btrfs_delayed_node *node) | ||
1061 | { | ||
1062 | struct btrfs_path *path; | ||
1063 | struct btrfs_block_rsv *block_rsv; | ||
1064 | int ret; | ||
1065 | |||
1066 | path = btrfs_alloc_path(); | ||
1067 | if (!path) | ||
1068 | return -ENOMEM; | ||
1069 | path->leave_spinning = 1; | ||
1070 | |||
1071 | block_rsv = trans->block_rsv; | ||
1072 | trans->block_rsv = &node->root->fs_info->global_block_rsv; | ||
1073 | |||
1074 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); | ||
1075 | if (!ret) | ||
1076 | ret = btrfs_delete_delayed_items(trans, path, node->root, node); | ||
1077 | if (!ret) | ||
1078 | ret = btrfs_update_delayed_inode(trans, node->root, path, node); | ||
1079 | btrfs_free_path(path); | ||
1080 | |||
1081 | trans->block_rsv = block_rsv; | ||
1082 | return ret; | ||
1083 | } | ||
1084 | |||
1085 | int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | ||
1086 | struct inode *inode) | ||
1087 | { | ||
1088 | struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); | ||
1089 | int ret; | ||
1090 | |||
1091 | if (!delayed_node) | ||
1092 | return 0; | ||
1093 | |||
1094 | mutex_lock(&delayed_node->mutex); | ||
1095 | if (!delayed_node->count) { | ||
1096 | mutex_unlock(&delayed_node->mutex); | ||
1097 | btrfs_release_delayed_node(delayed_node); | ||
1098 | return 0; | ||
1099 | } | ||
1100 | mutex_unlock(&delayed_node->mutex); | ||
1101 | |||
1102 | ret = __btrfs_commit_inode_delayed_items(trans, delayed_node); | ||
1103 | btrfs_release_delayed_node(delayed_node); | ||
1104 | return ret; | ||
1105 | } | ||
1106 | |||
1107 | void btrfs_remove_delayed_node(struct inode *inode) | ||
1108 | { | ||
1109 | struct btrfs_delayed_node *delayed_node; | ||
1110 | |||
1111 | delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node); | ||
1112 | if (!delayed_node) | ||
1113 | return; | ||
1114 | |||
1115 | BTRFS_I(inode)->delayed_node = NULL; | ||
1116 | btrfs_release_delayed_node(delayed_node); | ||
1117 | } | ||
1118 | |||
1119 | struct btrfs_async_delayed_node { | ||
1120 | struct btrfs_root *root; | ||
1121 | struct btrfs_delayed_node *delayed_node; | ||
1122 | struct btrfs_work work; | ||
1123 | }; | ||
1124 | |||
1125 | static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | ||
1126 | { | ||
1127 | struct btrfs_async_delayed_node *async_node; | ||
1128 | struct btrfs_trans_handle *trans; | ||
1129 | struct btrfs_path *path; | ||
1130 | struct btrfs_delayed_node *delayed_node = NULL; | ||
1131 | struct btrfs_root *root; | ||
1132 | struct btrfs_block_rsv *block_rsv; | ||
1133 | unsigned long nr = 0; | ||
1134 | int need_requeue = 0; | ||
1135 | int ret; | ||
1136 | |||
1137 | async_node = container_of(work, struct btrfs_async_delayed_node, work); | ||
1138 | |||
1139 | path = btrfs_alloc_path(); | ||
1140 | if (!path) | ||
1141 | goto out; | ||
1142 | path->leave_spinning = 1; | ||
1143 | |||
1144 | delayed_node = async_node->delayed_node; | ||
1145 | root = delayed_node->root; | ||
1146 | |||
1147 | trans = btrfs_join_transaction(root); | ||
1148 | if (IS_ERR(trans)) | ||
1149 | goto free_path; | ||
1150 | |||
1151 | block_rsv = trans->block_rsv; | ||
1152 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
1153 | |||
1154 | ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); | ||
1155 | if (!ret) | ||
1156 | ret = btrfs_delete_delayed_items(trans, path, root, | ||
1157 | delayed_node); | ||
1158 | |||
1159 | if (!ret) | ||
1160 | btrfs_update_delayed_inode(trans, root, path, delayed_node); | ||
1161 | |||
1162 | /* | ||
1163 | * Maybe new delayed items have been inserted, so we need requeue | ||
1164 | * the work. Besides that, we must dequeue the empty delayed nodes | ||
1165 | * to avoid the race between delayed items balance and the worker. | ||
1166 | * The race like this: | ||
1167 | * Task1 Worker thread | ||
1168 | * count == 0, needn't requeue | ||
1169 | * also needn't insert the | ||
1170 | * delayed node into prepare | ||
1171 | * list again. | ||
1172 | * add lots of delayed items | ||
1173 | * queue the delayed node | ||
1174 | * already in the list, | ||
1175 | * and not in the prepare | ||
1176 | * list, it means the delayed | ||
1177 | * node is being dealt with | ||
1178 | * by the worker. | ||
1179 | * do delayed items balance | ||
1180 | * the delayed node is being | ||
1181 | * dealt with by the worker | ||
1182 | * now, just wait. | ||
1183 | * the worker goto idle. | ||
1184 | * Task1 will sleep until the transaction is commited. | ||
1185 | */ | ||
1186 | mutex_lock(&delayed_node->mutex); | ||
1187 | if (delayed_node->count) | ||
1188 | need_requeue = 1; | ||
1189 | else | ||
1190 | btrfs_dequeue_delayed_node(root->fs_info->delayed_root, | ||
1191 | delayed_node); | ||
1192 | mutex_unlock(&delayed_node->mutex); | ||
1193 | |||
1194 | nr = trans->blocks_used; | ||
1195 | |||
1196 | trans->block_rsv = block_rsv; | ||
1197 | btrfs_end_transaction_dmeta(trans, root); | ||
1198 | __btrfs_btree_balance_dirty(root, nr); | ||
1199 | free_path: | ||
1200 | btrfs_free_path(path); | ||
1201 | out: | ||
1202 | if (need_requeue) | ||
1203 | btrfs_requeue_work(&async_node->work); | ||
1204 | else { | ||
1205 | btrfs_release_prepared_delayed_node(delayed_node); | ||
1206 | kfree(async_node); | ||
1207 | } | ||
1208 | } | ||
1209 | |||
1210 | static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | ||
1211 | struct btrfs_root *root, int all) | ||
1212 | { | ||
1213 | struct btrfs_async_delayed_node *async_node; | ||
1214 | struct btrfs_delayed_node *curr; | ||
1215 | int count = 0; | ||
1216 | |||
1217 | again: | ||
1218 | curr = btrfs_first_prepared_delayed_node(delayed_root); | ||
1219 | if (!curr) | ||
1220 | return 0; | ||
1221 | |||
1222 | async_node = kmalloc(sizeof(*async_node), GFP_NOFS); | ||
1223 | if (!async_node) { | ||
1224 | btrfs_release_prepared_delayed_node(curr); | ||
1225 | return -ENOMEM; | ||
1226 | } | ||
1227 | |||
1228 | async_node->root = root; | ||
1229 | async_node->delayed_node = curr; | ||
1230 | |||
1231 | async_node->work.func = btrfs_async_run_delayed_node_done; | ||
1232 | async_node->work.flags = 0; | ||
1233 | |||
1234 | btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work); | ||
1235 | count++; | ||
1236 | |||
1237 | if (all || count < 4) | ||
1238 | goto again; | ||
1239 | |||
1240 | return 0; | ||
1241 | } | ||
1242 | |||
1243 | void btrfs_assert_delayed_root_empty(struct btrfs_root *root) | ||
1244 | { | ||
1245 | struct btrfs_delayed_root *delayed_root; | ||
1246 | delayed_root = btrfs_get_delayed_root(root); | ||
1247 | WARN_ON(btrfs_first_delayed_node(delayed_root)); | ||
1248 | } | ||
1249 | |||
1250 | void btrfs_balance_delayed_items(struct btrfs_root *root) | ||
1251 | { | ||
1252 | struct btrfs_delayed_root *delayed_root; | ||
1253 | |||
1254 | delayed_root = btrfs_get_delayed_root(root); | ||
1255 | |||
1256 | if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) | ||
1257 | return; | ||
1258 | |||
1259 | if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { | ||
1260 | int ret; | ||
1261 | ret = btrfs_wq_run_delayed_node(delayed_root, root, 1); | ||
1262 | if (ret) | ||
1263 | return; | ||
1264 | |||
1265 | wait_event_interruptible_timeout( | ||
1266 | delayed_root->wait, | ||
1267 | (atomic_read(&delayed_root->items) < | ||
1268 | BTRFS_DELAYED_BACKGROUND), | ||
1269 | HZ); | ||
1270 | return; | ||
1271 | } | ||
1272 | |||
1273 | btrfs_wq_run_delayed_node(delayed_root, root, 0); | ||
1274 | } | ||
1275 | |||
1276 | int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | ||
1277 | struct btrfs_root *root, const char *name, | ||
1278 | int name_len, struct inode *dir, | ||
1279 | struct btrfs_disk_key *disk_key, u8 type, | ||
1280 | u64 index) | ||
1281 | { | ||
1282 | struct btrfs_delayed_node *delayed_node; | ||
1283 | struct btrfs_delayed_item *delayed_item; | ||
1284 | struct btrfs_dir_item *dir_item; | ||
1285 | int ret; | ||
1286 | |||
1287 | delayed_node = btrfs_get_or_create_delayed_node(dir); | ||
1288 | if (IS_ERR(delayed_node)) | ||
1289 | return PTR_ERR(delayed_node); | ||
1290 | |||
1291 | delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len); | ||
1292 | if (!delayed_item) { | ||
1293 | ret = -ENOMEM; | ||
1294 | goto release_node; | ||
1295 | } | ||
1296 | |||
1297 | ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); | ||
1298 | /* | ||
1299 | * we have reserved enough space when we start a new transaction, | ||
1300 | * so reserving metadata failure is impossible | ||
1301 | */ | ||
1302 | BUG_ON(ret); | ||
1303 | |||
1304 | delayed_item->key.objectid = btrfs_ino(dir); | ||
1305 | btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY); | ||
1306 | delayed_item->key.offset = index; | ||
1307 | |||
1308 | dir_item = (struct btrfs_dir_item *)delayed_item->data; | ||
1309 | dir_item->location = *disk_key; | ||
1310 | dir_item->transid = cpu_to_le64(trans->transid); | ||
1311 | dir_item->data_len = 0; | ||
1312 | dir_item->name_len = cpu_to_le16(name_len); | ||
1313 | dir_item->type = type; | ||
1314 | memcpy((char *)(dir_item + 1), name, name_len); | ||
1315 | |||
1316 | mutex_lock(&delayed_node->mutex); | ||
1317 | ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); | ||
1318 | if (unlikely(ret)) { | ||
1319 | printk(KERN_ERR "err add delayed dir index item(name: %s) into " | ||
1320 | "the insertion tree of the delayed node" | ||
1321 | "(root id: %llu, inode id: %llu, errno: %d)\n", | ||
1322 | name, | ||
1323 | (unsigned long long)delayed_node->root->objectid, | ||
1324 | (unsigned long long)delayed_node->inode_id, | ||
1325 | ret); | ||
1326 | BUG(); | ||
1327 | } | ||
1328 | mutex_unlock(&delayed_node->mutex); | ||
1329 | |||
1330 | release_node: | ||
1331 | btrfs_release_delayed_node(delayed_node); | ||
1332 | return ret; | ||
1333 | } | ||
1334 | |||
1335 | static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root, | ||
1336 | struct btrfs_delayed_node *node, | ||
1337 | struct btrfs_key *key) | ||
1338 | { | ||
1339 | struct btrfs_delayed_item *item; | ||
1340 | |||
1341 | mutex_lock(&node->mutex); | ||
1342 | item = __btrfs_lookup_delayed_insertion_item(node, key); | ||
1343 | if (!item) { | ||
1344 | mutex_unlock(&node->mutex); | ||
1345 | return 1; | ||
1346 | } | ||
1347 | |||
1348 | btrfs_delayed_item_release_metadata(root, item); | ||
1349 | btrfs_release_delayed_item(item); | ||
1350 | mutex_unlock(&node->mutex); | ||
1351 | return 0; | ||
1352 | } | ||
1353 | |||
1354 | int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, | ||
1355 | struct btrfs_root *root, struct inode *dir, | ||
1356 | u64 index) | ||
1357 | { | ||
1358 | struct btrfs_delayed_node *node; | ||
1359 | struct btrfs_delayed_item *item; | ||
1360 | struct btrfs_key item_key; | ||
1361 | int ret; | ||
1362 | |||
1363 | node = btrfs_get_or_create_delayed_node(dir); | ||
1364 | if (IS_ERR(node)) | ||
1365 | return PTR_ERR(node); | ||
1366 | |||
1367 | item_key.objectid = btrfs_ino(dir); | ||
1368 | btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY); | ||
1369 | item_key.offset = index; | ||
1370 | |||
1371 | ret = btrfs_delete_delayed_insertion_item(root, node, &item_key); | ||
1372 | if (!ret) | ||
1373 | goto end; | ||
1374 | |||
1375 | item = btrfs_alloc_delayed_item(0); | ||
1376 | if (!item) { | ||
1377 | ret = -ENOMEM; | ||
1378 | goto end; | ||
1379 | } | ||
1380 | |||
1381 | item->key = item_key; | ||
1382 | |||
1383 | ret = btrfs_delayed_item_reserve_metadata(trans, root, item); | ||
1384 | /* | ||
1385 | * we have reserved enough space when we start a new transaction, | ||
1386 | * so reserving metadata failure is impossible. | ||
1387 | */ | ||
1388 | BUG_ON(ret); | ||
1389 | |||
1390 | mutex_lock(&node->mutex); | ||
1391 | ret = __btrfs_add_delayed_deletion_item(node, item); | ||
1392 | if (unlikely(ret)) { | ||
1393 | printk(KERN_ERR "err add delayed dir index item(index: %llu) " | ||
1394 | "into the deletion tree of the delayed node" | ||
1395 | "(root id: %llu, inode id: %llu, errno: %d)\n", | ||
1396 | (unsigned long long)index, | ||
1397 | (unsigned long long)node->root->objectid, | ||
1398 | (unsigned long long)node->inode_id, | ||
1399 | ret); | ||
1400 | BUG(); | ||
1401 | } | ||
1402 | mutex_unlock(&node->mutex); | ||
1403 | end: | ||
1404 | btrfs_release_delayed_node(node); | ||
1405 | return ret; | ||
1406 | } | ||
1407 | |||
1408 | int btrfs_inode_delayed_dir_index_count(struct inode *inode) | ||
1409 | { | ||
1410 | struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); | ||
1411 | |||
1412 | if (!delayed_node) | ||
1413 | return -ENOENT; | ||
1414 | |||
1415 | /* | ||
1416 | * Since we have held i_mutex of this directory, it is impossible that | ||
1417 | * a new directory index is added into the delayed node and index_cnt | ||
1418 | * is updated now. So we needn't lock the delayed node. | ||
1419 | */ | ||
1420 | if (!delayed_node->index_cnt) { | ||
1421 | btrfs_release_delayed_node(delayed_node); | ||
1422 | return -EINVAL; | ||
1423 | } | ||
1424 | |||
1425 | BTRFS_I(inode)->index_cnt = delayed_node->index_cnt; | ||
1426 | btrfs_release_delayed_node(delayed_node); | ||
1427 | return 0; | ||
1428 | } | ||
1429 | |||
1430 | void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, | ||
1431 | struct list_head *del_list) | ||
1432 | { | ||
1433 | struct btrfs_delayed_node *delayed_node; | ||
1434 | struct btrfs_delayed_item *item; | ||
1435 | |||
1436 | delayed_node = btrfs_get_delayed_node(inode); | ||
1437 | if (!delayed_node) | ||
1438 | return; | ||
1439 | |||
1440 | mutex_lock(&delayed_node->mutex); | ||
1441 | item = __btrfs_first_delayed_insertion_item(delayed_node); | ||
1442 | while (item) { | ||
1443 | atomic_inc(&item->refs); | ||
1444 | list_add_tail(&item->readdir_list, ins_list); | ||
1445 | item = __btrfs_next_delayed_item(item); | ||
1446 | } | ||
1447 | |||
1448 | item = __btrfs_first_delayed_deletion_item(delayed_node); | ||
1449 | while (item) { | ||
1450 | atomic_inc(&item->refs); | ||
1451 | list_add_tail(&item->readdir_list, del_list); | ||
1452 | item = __btrfs_next_delayed_item(item); | ||
1453 | } | ||
1454 | mutex_unlock(&delayed_node->mutex); | ||
1455 | /* | ||
1456 | * This delayed node is still cached in the btrfs inode, so refs | ||
1457 | * must be > 1 now, and we needn't check it is going to be freed | ||
1458 | * or not. | ||
1459 | * | ||
1460 | * Besides that, this function is used to read dir, we do not | ||
1461 | * insert/delete delayed items in this period. So we also needn't | ||
1462 | * requeue or dequeue this delayed node. | ||
1463 | */ | ||
1464 | atomic_dec(&delayed_node->refs); | ||
1465 | } | ||
1466 | |||
1467 | void btrfs_put_delayed_items(struct list_head *ins_list, | ||
1468 | struct list_head *del_list) | ||
1469 | { | ||
1470 | struct btrfs_delayed_item *curr, *next; | ||
1471 | |||
1472 | list_for_each_entry_safe(curr, next, ins_list, readdir_list) { | ||
1473 | list_del(&curr->readdir_list); | ||
1474 | if (atomic_dec_and_test(&curr->refs)) | ||
1475 | kfree(curr); | ||
1476 | } | ||
1477 | |||
1478 | list_for_each_entry_safe(curr, next, del_list, readdir_list) { | ||
1479 | list_del(&curr->readdir_list); | ||
1480 | if (atomic_dec_and_test(&curr->refs)) | ||
1481 | kfree(curr); | ||
1482 | } | ||
1483 | } | ||
1484 | |||
1485 | int btrfs_should_delete_dir_index(struct list_head *del_list, | ||
1486 | u64 index) | ||
1487 | { | ||
1488 | struct btrfs_delayed_item *curr, *next; | ||
1489 | int ret; | ||
1490 | |||
1491 | if (list_empty(del_list)) | ||
1492 | return 0; | ||
1493 | |||
1494 | list_for_each_entry_safe(curr, next, del_list, readdir_list) { | ||
1495 | if (curr->key.offset > index) | ||
1496 | break; | ||
1497 | |||
1498 | list_del(&curr->readdir_list); | ||
1499 | ret = (curr->key.offset == index); | ||
1500 | |||
1501 | if (atomic_dec_and_test(&curr->refs)) | ||
1502 | kfree(curr); | ||
1503 | |||
1504 | if (ret) | ||
1505 | return 1; | ||
1506 | else | ||
1507 | continue; | ||
1508 | } | ||
1509 | return 0; | ||
1510 | } | ||
1511 | |||
1512 | /* | ||
1513 | * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree | ||
1514 | * | ||
1515 | */ | ||
1516 | int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | ||
1517 | filldir_t filldir, | ||
1518 | struct list_head *ins_list) | ||
1519 | { | ||
1520 | struct btrfs_dir_item *di; | ||
1521 | struct btrfs_delayed_item *curr, *next; | ||
1522 | struct btrfs_key location; | ||
1523 | char *name; | ||
1524 | int name_len; | ||
1525 | int over = 0; | ||
1526 | unsigned char d_type; | ||
1527 | |||
1528 | if (list_empty(ins_list)) | ||
1529 | return 0; | ||
1530 | |||
1531 | /* | ||
1532 | * Changing the data of the delayed item is impossible. So | ||
1533 | * we needn't lock them. And we have held i_mutex of the | ||
1534 | * directory, nobody can delete any directory indexes now. | ||
1535 | */ | ||
1536 | list_for_each_entry_safe(curr, next, ins_list, readdir_list) { | ||
1537 | list_del(&curr->readdir_list); | ||
1538 | |||
1539 | if (curr->key.offset < filp->f_pos) { | ||
1540 | if (atomic_dec_and_test(&curr->refs)) | ||
1541 | kfree(curr); | ||
1542 | continue; | ||
1543 | } | ||
1544 | |||
1545 | filp->f_pos = curr->key.offset; | ||
1546 | |||
1547 | di = (struct btrfs_dir_item *)curr->data; | ||
1548 | name = (char *)(di + 1); | ||
1549 | name_len = le16_to_cpu(di->name_len); | ||
1550 | |||
1551 | d_type = btrfs_filetype_table[di->type]; | ||
1552 | btrfs_disk_key_to_cpu(&location, &di->location); | ||
1553 | |||
1554 | over = filldir(dirent, name, name_len, curr->key.offset, | ||
1555 | location.objectid, d_type); | ||
1556 | |||
1557 | if (atomic_dec_and_test(&curr->refs)) | ||
1558 | kfree(curr); | ||
1559 | |||
1560 | if (over) | ||
1561 | return 1; | ||
1562 | } | ||
1563 | return 0; | ||
1564 | } | ||
1565 | |||
1566 | BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, | ||
1567 | generation, 64); | ||
1568 | BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, | ||
1569 | sequence, 64); | ||
1570 | BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, | ||
1571 | transid, 64); | ||
1572 | BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); | ||
1573 | BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, | ||
1574 | nbytes, 64); | ||
1575 | BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, | ||
1576 | block_group, 64); | ||
1577 | BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); | ||
1578 | BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); | ||
1579 | BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); | ||
1580 | BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); | ||
1581 | BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); | ||
1582 | BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); | ||
1583 | |||
1584 | BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); | ||
1585 | BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); | ||
1586 | |||
1587 | static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | ||
1588 | struct btrfs_inode_item *inode_item, | ||
1589 | struct inode *inode) | ||
1590 | { | ||
1591 | btrfs_set_stack_inode_uid(inode_item, inode->i_uid); | ||
1592 | btrfs_set_stack_inode_gid(inode_item, inode->i_gid); | ||
1593 | btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); | ||
1594 | btrfs_set_stack_inode_mode(inode_item, inode->i_mode); | ||
1595 | btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); | ||
1596 | btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); | ||
1597 | btrfs_set_stack_inode_generation(inode_item, | ||
1598 | BTRFS_I(inode)->generation); | ||
1599 | btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence); | ||
1600 | btrfs_set_stack_inode_transid(inode_item, trans->transid); | ||
1601 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); | ||
1602 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); | ||
1603 | btrfs_set_stack_inode_block_group(inode_item, 0); | ||
1604 | |||
1605 | btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), | ||
1606 | inode->i_atime.tv_sec); | ||
1607 | btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item), | ||
1608 | inode->i_atime.tv_nsec); | ||
1609 | |||
1610 | btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item), | ||
1611 | inode->i_mtime.tv_sec); | ||
1612 | btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item), | ||
1613 | inode->i_mtime.tv_nsec); | ||
1614 | |||
1615 | btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item), | ||
1616 | inode->i_ctime.tv_sec); | ||
1617 | btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item), | ||
1618 | inode->i_ctime.tv_nsec); | ||
1619 | } | ||
1620 | |||
1621 | int btrfs_fill_inode(struct inode *inode, u32 *rdev) | ||
1622 | { | ||
1623 | struct btrfs_delayed_node *delayed_node; | ||
1624 | struct btrfs_inode_item *inode_item; | ||
1625 | struct btrfs_timespec *tspec; | ||
1626 | |||
1627 | delayed_node = btrfs_get_delayed_node(inode); | ||
1628 | if (!delayed_node) | ||
1629 | return -ENOENT; | ||
1630 | |||
1631 | mutex_lock(&delayed_node->mutex); | ||
1632 | if (!delayed_node->inode_dirty) { | ||
1633 | mutex_unlock(&delayed_node->mutex); | ||
1634 | btrfs_release_delayed_node(delayed_node); | ||
1635 | return -ENOENT; | ||
1636 | } | ||
1637 | |||
1638 | inode_item = &delayed_node->inode_item; | ||
1639 | |||
1640 | inode->i_uid = btrfs_stack_inode_uid(inode_item); | ||
1641 | inode->i_gid = btrfs_stack_inode_gid(inode_item); | ||
1642 | btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); | ||
1643 | inode->i_mode = btrfs_stack_inode_mode(inode_item); | ||
1644 | inode->i_nlink = btrfs_stack_inode_nlink(inode_item); | ||
1645 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); | ||
1646 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); | ||
1647 | BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); | ||
1648 | inode->i_rdev = 0; | ||
1649 | *rdev = btrfs_stack_inode_rdev(inode_item); | ||
1650 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); | ||
1651 | |||
1652 | tspec = btrfs_inode_atime(inode_item); | ||
1653 | inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec); | ||
1654 | inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec); | ||
1655 | |||
1656 | tspec = btrfs_inode_mtime(inode_item); | ||
1657 | inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec); | ||
1658 | inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec); | ||
1659 | |||
1660 | tspec = btrfs_inode_ctime(inode_item); | ||
1661 | inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec); | ||
1662 | inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec); | ||
1663 | |||
1664 | inode->i_generation = BTRFS_I(inode)->generation; | ||
1665 | BTRFS_I(inode)->index_cnt = (u64)-1; | ||
1666 | |||
1667 | mutex_unlock(&delayed_node->mutex); | ||
1668 | btrfs_release_delayed_node(delayed_node); | ||
1669 | return 0; | ||
1670 | } | ||
1671 | |||
1672 | int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, | ||
1673 | struct btrfs_root *root, struct inode *inode) | ||
1674 | { | ||
1675 | struct btrfs_delayed_node *delayed_node; | ||
1676 | int ret = 0; | ||
1677 | |||
1678 | delayed_node = btrfs_get_or_create_delayed_node(inode); | ||
1679 | if (IS_ERR(delayed_node)) | ||
1680 | return PTR_ERR(delayed_node); | ||
1681 | |||
1682 | mutex_lock(&delayed_node->mutex); | ||
1683 | if (delayed_node->inode_dirty) { | ||
1684 | fill_stack_inode_item(trans, &delayed_node->inode_item, inode); | ||
1685 | goto release_node; | ||
1686 | } | ||
1687 | |||
1688 | ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); | ||
1689 | /* | ||
1690 | * we must reserve enough space when we start a new transaction, | ||
1691 | * so reserving metadata failure is impossible | ||
1692 | */ | ||
1693 | BUG_ON(ret); | ||
1694 | |||
1695 | fill_stack_inode_item(trans, &delayed_node->inode_item, inode); | ||
1696 | delayed_node->inode_dirty = 1; | ||
1697 | delayed_node->count++; | ||
1698 | atomic_inc(&root->fs_info->delayed_root->items); | ||
1699 | release_node: | ||
1700 | mutex_unlock(&delayed_node->mutex); | ||
1701 | btrfs_release_delayed_node(delayed_node); | ||
1702 | return ret; | ||
1703 | } | ||
1704 | |||
1705 | static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node) | ||
1706 | { | ||
1707 | struct btrfs_root *root = delayed_node->root; | ||
1708 | struct btrfs_delayed_item *curr_item, *prev_item; | ||
1709 | |||
1710 | mutex_lock(&delayed_node->mutex); | ||
1711 | curr_item = __btrfs_first_delayed_insertion_item(delayed_node); | ||
1712 | while (curr_item) { | ||
1713 | btrfs_delayed_item_release_metadata(root, curr_item); | ||
1714 | prev_item = curr_item; | ||
1715 | curr_item = __btrfs_next_delayed_item(prev_item); | ||
1716 | btrfs_release_delayed_item(prev_item); | ||
1717 | } | ||
1718 | |||
1719 | curr_item = __btrfs_first_delayed_deletion_item(delayed_node); | ||
1720 | while (curr_item) { | ||
1721 | btrfs_delayed_item_release_metadata(root, curr_item); | ||
1722 | prev_item = curr_item; | ||
1723 | curr_item = __btrfs_next_delayed_item(prev_item); | ||
1724 | btrfs_release_delayed_item(prev_item); | ||
1725 | } | ||
1726 | |||
1727 | if (delayed_node->inode_dirty) { | ||
1728 | btrfs_delayed_inode_release_metadata(root, delayed_node); | ||
1729 | btrfs_release_delayed_inode(delayed_node); | ||
1730 | } | ||
1731 | mutex_unlock(&delayed_node->mutex); | ||
1732 | } | ||
1733 | |||
1734 | void btrfs_kill_delayed_inode_items(struct inode *inode) | ||
1735 | { | ||
1736 | struct btrfs_delayed_node *delayed_node; | ||
1737 | |||
1738 | delayed_node = btrfs_get_delayed_node(inode); | ||
1739 | if (!delayed_node) | ||
1740 | return; | ||
1741 | |||
1742 | __btrfs_kill_delayed_node(delayed_node); | ||
1743 | btrfs_release_delayed_node(delayed_node); | ||
1744 | } | ||
1745 | |||
1746 | void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) | ||
1747 | { | ||
1748 | u64 inode_id = 0; | ||
1749 | struct btrfs_delayed_node *delayed_nodes[8]; | ||
1750 | int i, n; | ||
1751 | |||
1752 | while (1) { | ||
1753 | spin_lock(&root->inode_lock); | ||
1754 | n = radix_tree_gang_lookup(&root->delayed_nodes_tree, | ||
1755 | (void **)delayed_nodes, inode_id, | ||
1756 | ARRAY_SIZE(delayed_nodes)); | ||
1757 | if (!n) { | ||
1758 | spin_unlock(&root->inode_lock); | ||
1759 | break; | ||
1760 | } | ||
1761 | |||
1762 | inode_id = delayed_nodes[n - 1]->inode_id + 1; | ||
1763 | |||
1764 | for (i = 0; i < n; i++) | ||
1765 | atomic_inc(&delayed_nodes[i]->refs); | ||
1766 | spin_unlock(&root->inode_lock); | ||
1767 | |||
1768 | for (i = 0; i < n; i++) { | ||
1769 | __btrfs_kill_delayed_node(delayed_nodes[i]); | ||
1770 | btrfs_release_delayed_node(delayed_nodes[i]); | ||
1771 | } | ||
1772 | } | ||
1773 | } | ||
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h new file mode 100644 index 000000000000..8d27af4bd8b9 --- /dev/null +++ b/fs/btrfs/delayed-inode.h | |||
@@ -0,0 +1,145 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 Fujitsu. All rights reserved. | ||
3 | * Written by Miao Xie <miaox@cn.fujitsu.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public | ||
7 | * License v2 as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public | ||
15 | * License along with this program; if not, write to the | ||
16 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
17 | * Boston, MA 021110-1307, USA. | ||
18 | */ | ||
19 | |||
20 | #ifndef __DELAYED_TREE_OPERATION_H | ||
21 | #define __DELAYED_TREE_OPERATION_H | ||
22 | |||
23 | #include <linux/rbtree.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/mutex.h> | ||
26 | #include <linux/list.h> | ||
27 | #include <linux/wait.h> | ||
28 | #include <asm/atomic.h> | ||
29 | |||
30 | #include "ctree.h" | ||
31 | |||
32 | /* types of the delayed item */ | ||
33 | #define BTRFS_DELAYED_INSERTION_ITEM 1 | ||
34 | #define BTRFS_DELAYED_DELETION_ITEM 2 | ||
35 | |||
36 | struct btrfs_delayed_root { | ||
37 | spinlock_t lock; | ||
38 | struct list_head node_list; | ||
39 | /* | ||
40 | * Used for delayed nodes which is waiting to be dealt with by the | ||
41 | * worker. If the delayed node is inserted into the work queue, we | ||
42 | * drop it from this list. | ||
43 | */ | ||
44 | struct list_head prepare_list; | ||
45 | atomic_t items; /* for delayed items */ | ||
46 | int nodes; /* for delayed nodes */ | ||
47 | wait_queue_head_t wait; | ||
48 | }; | ||
49 | |||
50 | struct btrfs_delayed_node { | ||
51 | u64 inode_id; | ||
52 | u64 bytes_reserved; | ||
53 | struct btrfs_root *root; | ||
54 | /* Used to add the node into the delayed root's node list. */ | ||
55 | struct list_head n_list; | ||
56 | /* | ||
57 | * Used to add the node into the prepare list, the nodes in this list | ||
58 | * is waiting to be dealt with by the async worker. | ||
59 | */ | ||
60 | struct list_head p_list; | ||
61 | struct rb_root ins_root; | ||
62 | struct rb_root del_root; | ||
63 | struct mutex mutex; | ||
64 | struct btrfs_inode_item inode_item; | ||
65 | atomic_t refs; | ||
66 | u64 index_cnt; | ||
67 | bool in_list; | ||
68 | bool inode_dirty; | ||
69 | int count; | ||
70 | }; | ||
71 | |||
72 | struct btrfs_delayed_item { | ||
73 | struct rb_node rb_node; | ||
74 | struct btrfs_key key; | ||
75 | struct list_head tree_list; /* used for batch insert/delete items */ | ||
76 | struct list_head readdir_list; /* used for readdir items */ | ||
77 | u64 bytes_reserved; | ||
78 | struct btrfs_delayed_node *delayed_node; | ||
79 | atomic_t refs; | ||
80 | int ins_or_del; | ||
81 | u32 data_len; | ||
82 | char data[0]; | ||
83 | }; | ||
84 | |||
85 | static inline void btrfs_init_delayed_root( | ||
86 | struct btrfs_delayed_root *delayed_root) | ||
87 | { | ||
88 | atomic_set(&delayed_root->items, 0); | ||
89 | delayed_root->nodes = 0; | ||
90 | spin_lock_init(&delayed_root->lock); | ||
91 | init_waitqueue_head(&delayed_root->wait); | ||
92 | INIT_LIST_HEAD(&delayed_root->node_list); | ||
93 | INIT_LIST_HEAD(&delayed_root->prepare_list); | ||
94 | } | ||
95 | |||
96 | int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | ||
97 | struct btrfs_root *root, const char *name, | ||
98 | int name_len, struct inode *dir, | ||
99 | struct btrfs_disk_key *disk_key, u8 type, | ||
100 | u64 index); | ||
101 | |||
102 | int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, | ||
103 | struct btrfs_root *root, struct inode *dir, | ||
104 | u64 index); | ||
105 | |||
106 | int btrfs_inode_delayed_dir_index_count(struct inode *inode); | ||
107 | |||
108 | int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | ||
109 | struct btrfs_root *root); | ||
110 | |||
111 | void btrfs_balance_delayed_items(struct btrfs_root *root); | ||
112 | |||
113 | int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | ||
114 | struct inode *inode); | ||
115 | /* Used for evicting the inode. */ | ||
116 | void btrfs_remove_delayed_node(struct inode *inode); | ||
117 | void btrfs_kill_delayed_inode_items(struct inode *inode); | ||
118 | |||
119 | |||
120 | int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, | ||
121 | struct btrfs_root *root, struct inode *inode); | ||
122 | int btrfs_fill_inode(struct inode *inode, u32 *rdev); | ||
123 | |||
124 | /* Used for drop dead root */ | ||
125 | void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); | ||
126 | |||
127 | /* Used for readdir() */ | ||
128 | void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, | ||
129 | struct list_head *del_list); | ||
130 | void btrfs_put_delayed_items(struct list_head *ins_list, | ||
131 | struct list_head *del_list); | ||
132 | int btrfs_should_delete_dir_index(struct list_head *del_list, | ||
133 | u64 index); | ||
134 | int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | ||
135 | filldir_t filldir, | ||
136 | struct list_head *ins_list); | ||
137 | |||
138 | /* for init */ | ||
139 | int __init btrfs_delayed_inode_init(void); | ||
140 | void btrfs_delayed_inode_exit(void); | ||
141 | |||
142 | /* for debugging */ | ||
143 | void btrfs_assert_delayed_root_empty(struct btrfs_root *root); | ||
144 | |||
145 | #endif | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index e807b143b857..125cf76fcd08 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -281,44 +281,6 @@ again: | |||
281 | } | 281 | } |
282 | 282 | ||
283 | /* | 283 | /* |
284 | * This checks to see if there are any delayed refs in the | ||
285 | * btree for a given bytenr. It returns one if it finds any | ||
286 | * and zero otherwise. | ||
287 | * | ||
288 | * If it only finds a head node, it returns 0. | ||
289 | * | ||
290 | * The idea is to use this when deciding if you can safely delete an | ||
291 | * extent from the extent allocation tree. There may be a pending | ||
292 | * ref in the rbtree that adds or removes references, so as long as this | ||
293 | * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent | ||
294 | * allocation tree. | ||
295 | */ | ||
296 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr) | ||
297 | { | ||
298 | struct btrfs_delayed_ref_node *ref; | ||
299 | struct btrfs_delayed_ref_root *delayed_refs; | ||
300 | struct rb_node *prev_node; | ||
301 | int ret = 0; | ||
302 | |||
303 | delayed_refs = &trans->transaction->delayed_refs; | ||
304 | spin_lock(&delayed_refs->lock); | ||
305 | |||
306 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); | ||
307 | if (ref) { | ||
308 | prev_node = rb_prev(&ref->rb_node); | ||
309 | if (!prev_node) | ||
310 | goto out; | ||
311 | ref = rb_entry(prev_node, struct btrfs_delayed_ref_node, | ||
312 | rb_node); | ||
313 | if (ref->bytenr == bytenr) | ||
314 | ret = 1; | ||
315 | } | ||
316 | out: | ||
317 | spin_unlock(&delayed_refs->lock); | ||
318 | return ret; | ||
319 | } | ||
320 | |||
321 | /* | ||
322 | * helper function to update an extent delayed ref in the | 284 | * helper function to update an extent delayed ref in the |
323 | * rbtree. existing and update must both have the same | 285 | * rbtree. existing and update must both have the same |
324 | * bytenr and parent | 286 | * bytenr and parent |
@@ -483,6 +445,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, | |||
483 | INIT_LIST_HEAD(&head_ref->cluster); | 445 | INIT_LIST_HEAD(&head_ref->cluster); |
484 | mutex_init(&head_ref->mutex); | 446 | mutex_init(&head_ref->mutex); |
485 | 447 | ||
448 | trace_btrfs_delayed_ref_head(ref, head_ref, action); | ||
449 | |||
486 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 450 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
487 | 451 | ||
488 | if (existing) { | 452 | if (existing) { |
@@ -537,6 +501,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
537 | } | 501 | } |
538 | full_ref->level = level; | 502 | full_ref->level = level; |
539 | 503 | ||
504 | trace_btrfs_delayed_tree_ref(ref, full_ref, action); | ||
505 | |||
540 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 506 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
541 | 507 | ||
542 | if (existing) { | 508 | if (existing) { |
@@ -591,6 +557,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
591 | full_ref->objectid = owner; | 557 | full_ref->objectid = owner; |
592 | full_ref->offset = offset; | 558 | full_ref->offset = offset; |
593 | 559 | ||
560 | trace_btrfs_delayed_data_ref(ref, full_ref, action); | ||
561 | |||
594 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 562 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
595 | 563 | ||
596 | if (existing) { | 564 | if (existing) { |
@@ -741,79 +709,3 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
741 | return btrfs_delayed_node_to_head(ref); | 709 | return btrfs_delayed_node_to_head(ref); |
742 | return NULL; | 710 | return NULL; |
743 | } | 711 | } |
744 | |||
745 | /* | ||
746 | * add a delayed ref to the tree. This does all of the accounting required | ||
747 | * to make sure the delayed ref is eventually processed before this | ||
748 | * transaction commits. | ||
749 | * | ||
750 | * The main point of this call is to add and remove a backreference in a single | ||
751 | * shot, taking the lock only once, and only searching for the head node once. | ||
752 | * | ||
753 | * It is the same as doing a ref add and delete in two separate calls. | ||
754 | */ | ||
755 | #if 0 | ||
756 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | ||
757 | u64 bytenr, u64 num_bytes, u64 orig_parent, | ||
758 | u64 parent, u64 orig_ref_root, u64 ref_root, | ||
759 | u64 orig_ref_generation, u64 ref_generation, | ||
760 | u64 owner_objectid, int pin) | ||
761 | { | ||
762 | struct btrfs_delayed_ref *ref; | ||
763 | struct btrfs_delayed_ref *old_ref; | ||
764 | struct btrfs_delayed_ref_head *head_ref; | ||
765 | struct btrfs_delayed_ref_root *delayed_refs; | ||
766 | int ret; | ||
767 | |||
768 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
769 | if (!ref) | ||
770 | return -ENOMEM; | ||
771 | |||
772 | old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS); | ||
773 | if (!old_ref) { | ||
774 | kfree(ref); | ||
775 | return -ENOMEM; | ||
776 | } | ||
777 | |||
778 | /* | ||
779 | * the parent = 0 case comes from cases where we don't actually | ||
780 | * know the parent yet. It will get updated later via a add/drop | ||
781 | * pair. | ||
782 | */ | ||
783 | if (parent == 0) | ||
784 | parent = bytenr; | ||
785 | if (orig_parent == 0) | ||
786 | orig_parent = bytenr; | ||
787 | |||
788 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
789 | if (!head_ref) { | ||
790 | kfree(ref); | ||
791 | kfree(old_ref); | ||
792 | return -ENOMEM; | ||
793 | } | ||
794 | delayed_refs = &trans->transaction->delayed_refs; | ||
795 | spin_lock(&delayed_refs->lock); | ||
796 | |||
797 | /* | ||
798 | * insert both the head node and the new ref without dropping | ||
799 | * the spin lock | ||
800 | */ | ||
801 | ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, | ||
802 | (u64)-1, 0, 0, 0, | ||
803 | BTRFS_UPDATE_DELAYED_HEAD, 0); | ||
804 | BUG_ON(ret); | ||
805 | |||
806 | ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, | ||
807 | parent, ref_root, ref_generation, | ||
808 | owner_objectid, BTRFS_ADD_DELAYED_REF, 0); | ||
809 | BUG_ON(ret); | ||
810 | |||
811 | ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes, | ||
812 | orig_parent, orig_ref_root, | ||
813 | orig_ref_generation, owner_objectid, | ||
814 | BTRFS_DROP_DELAYED_REF, pin); | ||
815 | BUG_ON(ret); | ||
816 | spin_unlock(&delayed_refs->lock); | ||
817 | return 0; | ||
818 | } | ||
819 | #endif | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 50e3cf92fbda..e287e3b0eab0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -166,12 +166,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
166 | 166 | ||
167 | struct btrfs_delayed_ref_head * | 167 | struct btrfs_delayed_ref_head * |
168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | ||
170 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | ||
171 | u64 bytenr, u64 num_bytes, u64 orig_parent, | ||
172 | u64 parent, u64 orig_ref_root, u64 ref_root, | ||
173 | u64 orig_ref_generation, u64 ref_generation, | ||
174 | u64 owner_objectid, int pin); | ||
175 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | 169 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, |
176 | struct btrfs_delayed_ref_head *head); | 170 | struct btrfs_delayed_ref_head *head); |
177 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 171 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index e9103b3baa49..685f2593c4f0 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -50,7 +50,6 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle | |||
50 | if (di) | 50 | if (di) |
51 | return ERR_PTR(-EEXIST); | 51 | return ERR_PTR(-EEXIST); |
52 | ret = btrfs_extend_item(trans, root, path, data_size); | 52 | ret = btrfs_extend_item(trans, root, path, data_size); |
53 | WARN_ON(ret > 0); | ||
54 | } | 53 | } |
55 | if (ret < 0) | 54 | if (ret < 0) |
56 | return ERR_PTR(ret); | 55 | return ERR_PTR(ret); |
@@ -124,8 +123,9 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
124 | * to use for the second index (if one is created). | 123 | * to use for the second index (if one is created). |
125 | */ | 124 | */ |
126 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | 125 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root |
127 | *root, const char *name, int name_len, u64 dir, | 126 | *root, const char *name, int name_len, |
128 | struct btrfs_key *location, u8 type, u64 index) | 127 | struct inode *dir, struct btrfs_key *location, |
128 | u8 type, u64 index) | ||
129 | { | 129 | { |
130 | int ret = 0; | 130 | int ret = 0; |
131 | int ret2 = 0; | 131 | int ret2 = 0; |
@@ -137,13 +137,17 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
137 | struct btrfs_disk_key disk_key; | 137 | struct btrfs_disk_key disk_key; |
138 | u32 data_size; | 138 | u32 data_size; |
139 | 139 | ||
140 | key.objectid = dir; | 140 | key.objectid = btrfs_ino(dir); |
141 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 141 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); |
142 | key.offset = btrfs_name_hash(name, name_len); | 142 | key.offset = btrfs_name_hash(name, name_len); |
143 | 143 | ||
144 | path = btrfs_alloc_path(); | 144 | path = btrfs_alloc_path(); |
145 | if (!path) | ||
146 | return -ENOMEM; | ||
145 | path->leave_spinning = 1; | 147 | path->leave_spinning = 1; |
146 | 148 | ||
149 | btrfs_cpu_key_to_disk(&disk_key, location); | ||
150 | |||
147 | data_size = sizeof(*dir_item) + name_len; | 151 | data_size = sizeof(*dir_item) + name_len; |
148 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 152 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, |
149 | name, name_len); | 153 | name, name_len); |
@@ -151,11 +155,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
151 | ret = PTR_ERR(dir_item); | 155 | ret = PTR_ERR(dir_item); |
152 | if (ret == -EEXIST) | 156 | if (ret == -EEXIST) |
153 | goto second_insert; | 157 | goto second_insert; |
154 | goto out; | 158 | goto out_free; |
155 | } | 159 | } |
156 | 160 | ||
157 | leaf = path->nodes[0]; | 161 | leaf = path->nodes[0]; |
158 | btrfs_cpu_key_to_disk(&disk_key, location); | ||
159 | btrfs_set_dir_item_key(leaf, dir_item, &disk_key); | 162 | btrfs_set_dir_item_key(leaf, dir_item, &disk_key); |
160 | btrfs_set_dir_type(leaf, dir_item, type); | 163 | btrfs_set_dir_type(leaf, dir_item, type); |
161 | btrfs_set_dir_data_len(leaf, dir_item, 0); | 164 | btrfs_set_dir_data_len(leaf, dir_item, 0); |
@@ -170,29 +173,13 @@ second_insert: | |||
170 | /* FIXME, use some real flag for selecting the extra index */ | 173 | /* FIXME, use some real flag for selecting the extra index */ |
171 | if (root == root->fs_info->tree_root) { | 174 | if (root == root->fs_info->tree_root) { |
172 | ret = 0; | 175 | ret = 0; |
173 | goto out; | 176 | goto out_free; |
174 | } | 177 | } |
175 | btrfs_release_path(root, path); | 178 | btrfs_release_path(path); |
176 | 179 | ||
177 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | 180 | ret2 = btrfs_insert_delayed_dir_index(trans, root, name, name_len, dir, |
178 | key.offset = index; | 181 | &disk_key, type, index); |
179 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 182 | out_free: |
180 | name, name_len); | ||
181 | if (IS_ERR(dir_item)) { | ||
182 | ret2 = PTR_ERR(dir_item); | ||
183 | goto out; | ||
184 | } | ||
185 | leaf = path->nodes[0]; | ||
186 | btrfs_cpu_key_to_disk(&disk_key, location); | ||
187 | btrfs_set_dir_item_key(leaf, dir_item, &disk_key); | ||
188 | btrfs_set_dir_type(leaf, dir_item, type); | ||
189 | btrfs_set_dir_data_len(leaf, dir_item, 0); | ||
190 | btrfs_set_dir_name_len(leaf, dir_item, name_len); | ||
191 | btrfs_set_dir_transid(leaf, dir_item, trans->transid); | ||
192 | name_ptr = (unsigned long)(dir_item + 1); | ||
193 | write_extent_buffer(leaf, name, name_ptr, name_len); | ||
194 | btrfs_mark_buffer_dirty(leaf); | ||
195 | out: | ||
196 | btrfs_free_path(path); | 183 | btrfs_free_path(path); |
197 | if (ret) | 184 | if (ret) |
198 | return ret; | 185 | return ret; |
@@ -377,6 +364,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | |||
377 | 364 | ||
378 | leaf = path->nodes[0]; | 365 | leaf = path->nodes[0]; |
379 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); | 366 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); |
367 | if (verify_dir_item(root, leaf, dir_item)) | ||
368 | return NULL; | ||
369 | |||
380 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); | 370 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); |
381 | while (cur < total_len) { | 371 | while (cur < total_len) { |
382 | this_len = sizeof(*dir_item) + | 372 | this_len = sizeof(*dir_item) + |
@@ -427,5 +417,37 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | |||
427 | ret = btrfs_truncate_item(trans, root, path, | 417 | ret = btrfs_truncate_item(trans, root, path, |
428 | item_len - sub_item_len, 1); | 418 | item_len - sub_item_len, 1); |
429 | } | 419 | } |
420 | return ret; | ||
421 | } | ||
422 | |||
423 | int verify_dir_item(struct btrfs_root *root, | ||
424 | struct extent_buffer *leaf, | ||
425 | struct btrfs_dir_item *dir_item) | ||
426 | { | ||
427 | u16 namelen = BTRFS_NAME_LEN; | ||
428 | u8 type = btrfs_dir_type(leaf, dir_item); | ||
429 | |||
430 | if (type >= BTRFS_FT_MAX) { | ||
431 | printk(KERN_CRIT "btrfs: invalid dir item type: %d\n", | ||
432 | (int)type); | ||
433 | return 1; | ||
434 | } | ||
435 | |||
436 | if (type == BTRFS_FT_XATTR) | ||
437 | namelen = XATTR_NAME_MAX; | ||
438 | |||
439 | if (btrfs_dir_name_len(leaf, dir_item) > namelen) { | ||
440 | printk(KERN_CRIT "btrfs: invalid dir item name len: %u\n", | ||
441 | (unsigned)btrfs_dir_data_len(leaf, dir_item)); | ||
442 | return 1; | ||
443 | } | ||
444 | |||
445 | /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ | ||
446 | if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) { | ||
447 | printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n", | ||
448 | (unsigned)btrfs_dir_data_len(leaf, dir_item)); | ||
449 | return 1; | ||
450 | } | ||
451 | |||
430 | return 0; | 452 | return 0; |
431 | } | 453 | } |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 64f10082f048..1ac8db5dc0a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -28,6 +28,9 @@ | |||
28 | #include <linux/freezer.h> | 28 | #include <linux/freezer.h> |
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/migrate.h> | ||
32 | #include <linux/ratelimit.h> | ||
33 | #include <asm/unaligned.h> | ||
31 | #include "compat.h" | 34 | #include "compat.h" |
32 | #include "ctree.h" | 35 | #include "ctree.h" |
33 | #include "disk-io.h" | 36 | #include "disk-io.h" |
@@ -39,10 +42,25 @@ | |||
39 | #include "locking.h" | 42 | #include "locking.h" |
40 | #include "tree-log.h" | 43 | #include "tree-log.h" |
41 | #include "free-space-cache.h" | 44 | #include "free-space-cache.h" |
45 | #include "inode-map.h" | ||
42 | 46 | ||
43 | static struct extent_io_ops btree_extent_io_ops; | 47 | static struct extent_io_ops btree_extent_io_ops; |
44 | static void end_workqueue_fn(struct btrfs_work *work); | 48 | static void end_workqueue_fn(struct btrfs_work *work); |
45 | static void free_fs_root(struct btrfs_root *root); | 49 | static void free_fs_root(struct btrfs_root *root); |
50 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
51 | int read_only); | ||
52 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | ||
53 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | ||
54 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
55 | struct btrfs_root *root); | ||
56 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | ||
57 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | ||
58 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
59 | struct extent_io_tree *dirty_pages, | ||
60 | int mark); | ||
61 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
62 | struct extent_io_tree *pinned_extents); | ||
63 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
46 | 64 | ||
47 | /* | 65 | /* |
48 | * end_io_wq structs are used to do processing in task context when an IO is | 66 | * end_io_wq structs are used to do processing in task context when an IO is |
@@ -121,7 +139,7 @@ static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { | |||
121 | * that covers the entire device | 139 | * that covers the entire device |
122 | */ | 140 | */ |
123 | static struct extent_map *btree_get_extent(struct inode *inode, | 141 | static struct extent_map *btree_get_extent(struct inode *inode, |
124 | struct page *page, size_t page_offset, u64 start, u64 len, | 142 | struct page *page, size_t pg_offset, u64 start, u64 len, |
125 | int create) | 143 | int create) |
126 | { | 144 | { |
127 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 145 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
@@ -138,7 +156,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
138 | } | 156 | } |
139 | read_unlock(&em_tree->lock); | 157 | read_unlock(&em_tree->lock); |
140 | 158 | ||
141 | em = alloc_extent_map(GFP_NOFS); | 159 | em = alloc_extent_map(); |
142 | if (!em) { | 160 | if (!em) { |
143 | em = ERR_PTR(-ENOMEM); | 161 | em = ERR_PTR(-ENOMEM); |
144 | goto out; | 162 | goto out; |
@@ -183,7 +201,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | |||
183 | 201 | ||
184 | void btrfs_csum_final(u32 crc, char *result) | 202 | void btrfs_csum_final(u32 crc, char *result) |
185 | { | 203 | { |
186 | *(__le32 *)result = ~cpu_to_le32(crc); | 204 | put_unaligned_le32(~crc, result); |
187 | } | 205 | } |
188 | 206 | ||
189 | /* | 207 | /* |
@@ -238,14 +256,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
238 | memcpy(&found, result, csum_size); | 256 | memcpy(&found, result, csum_size); |
239 | 257 | ||
240 | read_extent_buffer(buf, &val, 0, csum_size); | 258 | read_extent_buffer(buf, &val, 0, csum_size); |
241 | if (printk_ratelimit()) { | 259 | printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " |
242 | printk(KERN_INFO "btrfs: %s checksum verify " | ||
243 | "failed on %llu wanted %X found %X " | 260 | "failed on %llu wanted %X found %X " |
244 | "level %d\n", | 261 | "level %d\n", |
245 | root->fs_info->sb->s_id, | 262 | root->fs_info->sb->s_id, |
246 | (unsigned long long)buf->start, val, found, | 263 | (unsigned long long)buf->start, val, found, |
247 | btrfs_header_level(buf)); | 264 | btrfs_header_level(buf)); |
248 | } | ||
249 | if (result != (char *)&inline_result) | 265 | if (result != (char *)&inline_result) |
250 | kfree(result); | 266 | kfree(result); |
251 | return 1; | 267 | return 1; |
@@ -280,13 +296,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
280 | ret = 0; | 296 | ret = 0; |
281 | goto out; | 297 | goto out; |
282 | } | 298 | } |
283 | if (printk_ratelimit()) { | 299 | printk_ratelimited("parent transid verify failed on %llu wanted %llu " |
284 | printk("parent transid verify failed on %llu wanted %llu " | ||
285 | "found %llu\n", | 300 | "found %llu\n", |
286 | (unsigned long long)eb->start, | 301 | (unsigned long long)eb->start, |
287 | (unsigned long long)parent_transid, | 302 | (unsigned long long)parent_transid, |
288 | (unsigned long long)btrfs_header_generation(eb)); | 303 | (unsigned long long)btrfs_header_generation(eb)); |
289 | } | ||
290 | ret = 1; | 304 | ret = 1; |
291 | clear_extent_buffer_uptodate(io_tree, eb, &cached_state); | 305 | clear_extent_buffer_uptodate(io_tree, eb, &cached_state); |
292 | out: | 306 | out: |
@@ -308,6 +322,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
308 | int num_copies = 0; | 322 | int num_copies = 0; |
309 | int mirror_num = 0; | 323 | int mirror_num = 0; |
310 | 324 | ||
325 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
311 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 326 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
312 | while (1) { | 327 | while (1) { |
313 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 328 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, |
@@ -316,6 +331,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
316 | !verify_parent_transid(io_tree, eb, parent_transid)) | 331 | !verify_parent_transid(io_tree, eb, parent_transid)) |
317 | return ret; | 332 | return ret; |
318 | 333 | ||
334 | /* | ||
335 | * This buffer's crc is fine, but its contents are corrupted, so | ||
336 | * there is no reason to read the other copies, they won't be | ||
337 | * any less wrong. | ||
338 | */ | ||
339 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | ||
340 | return ret; | ||
341 | |||
319 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 342 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
320 | eb->start, eb->len); | 343 | eb->start, eb->len); |
321 | if (num_copies == 1) | 344 | if (num_copies == 1) |
@@ -338,24 +361,33 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
338 | struct extent_io_tree *tree; | 361 | struct extent_io_tree *tree; |
339 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 362 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
340 | u64 found_start; | 363 | u64 found_start; |
341 | int found_level; | ||
342 | unsigned long len; | 364 | unsigned long len; |
343 | struct extent_buffer *eb; | 365 | struct extent_buffer *eb; |
344 | int ret; | 366 | int ret; |
345 | 367 | ||
346 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 368 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
347 | 369 | ||
348 | if (page->private == EXTENT_PAGE_PRIVATE) | 370 | if (page->private == EXTENT_PAGE_PRIVATE) { |
371 | WARN_ON(1); | ||
349 | goto out; | 372 | goto out; |
350 | if (!page->private) | 373 | } |
374 | if (!page->private) { | ||
375 | WARN_ON(1); | ||
351 | goto out; | 376 | goto out; |
377 | } | ||
352 | len = page->private >> 2; | 378 | len = page->private >> 2; |
353 | WARN_ON(len == 0); | 379 | WARN_ON(len == 0); |
354 | 380 | ||
355 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 381 | eb = alloc_extent_buffer(tree, start, len, page); |
382 | if (eb == NULL) { | ||
383 | WARN_ON(1); | ||
384 | goto out; | ||
385 | } | ||
356 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 386 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
357 | btrfs_header_generation(eb)); | 387 | btrfs_header_generation(eb)); |
358 | BUG_ON(ret); | 388 | BUG_ON(ret); |
389 | WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); | ||
390 | |||
359 | found_start = btrfs_header_bytenr(eb); | 391 | found_start = btrfs_header_bytenr(eb); |
360 | if (found_start != start) { | 392 | if (found_start != start) { |
361 | WARN_ON(1); | 393 | WARN_ON(1); |
@@ -369,8 +401,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
369 | WARN_ON(1); | 401 | WARN_ON(1); |
370 | goto err; | 402 | goto err; |
371 | } | 403 | } |
372 | found_level = btrfs_header_level(eb); | ||
373 | |||
374 | csum_tree_block(root, eb, 0); | 404 | csum_tree_block(root, eb, 0); |
375 | err: | 405 | err: |
376 | free_extent_buffer(eb); | 406 | free_extent_buffer(eb); |
@@ -397,6 +427,73 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
397 | return ret; | 427 | return ret; |
398 | } | 428 | } |
399 | 429 | ||
430 | #define CORRUPT(reason, eb, root, slot) \ | ||
431 | printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ | ||
432 | "root=%llu, slot=%d\n", reason, \ | ||
433 | (unsigned long long)btrfs_header_bytenr(eb), \ | ||
434 | (unsigned long long)root->objectid, slot) | ||
435 | |||
436 | static noinline int check_leaf(struct btrfs_root *root, | ||
437 | struct extent_buffer *leaf) | ||
438 | { | ||
439 | struct btrfs_key key; | ||
440 | struct btrfs_key leaf_key; | ||
441 | u32 nritems = btrfs_header_nritems(leaf); | ||
442 | int slot; | ||
443 | |||
444 | if (nritems == 0) | ||
445 | return 0; | ||
446 | |||
447 | /* Check the 0 item */ | ||
448 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | ||
449 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
450 | CORRUPT("invalid item offset size pair", leaf, root, 0); | ||
451 | return -EIO; | ||
452 | } | ||
453 | |||
454 | /* | ||
455 | * Check to make sure each items keys are in the correct order and their | ||
456 | * offsets make sense. We only have to loop through nritems-1 because | ||
457 | * we check the current slot against the next slot, which verifies the | ||
458 | * next slot's offset+size makes sense and that the current's slot | ||
459 | * offset is correct. | ||
460 | */ | ||
461 | for (slot = 0; slot < nritems - 1; slot++) { | ||
462 | btrfs_item_key_to_cpu(leaf, &leaf_key, slot); | ||
463 | btrfs_item_key_to_cpu(leaf, &key, slot + 1); | ||
464 | |||
465 | /* Make sure the keys are in the right order */ | ||
466 | if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { | ||
467 | CORRUPT("bad key order", leaf, root, slot); | ||
468 | return -EIO; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Make sure the offset and ends are right, remember that the | ||
473 | * item data starts at the end of the leaf and grows towards the | ||
474 | * front. | ||
475 | */ | ||
476 | if (btrfs_item_offset_nr(leaf, slot) != | ||
477 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
478 | CORRUPT("slot offset bad", leaf, root, slot); | ||
479 | return -EIO; | ||
480 | } | ||
481 | |||
482 | /* | ||
483 | * Check to make sure that we don't point outside of the leaf, | ||
484 | * just incase all the items are consistent to eachother, but | ||
485 | * all point outside of the leaf. | ||
486 | */ | ||
487 | if (btrfs_item_end_nr(leaf, slot) > | ||
488 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
489 | CORRUPT("slot end outside of leaf", leaf, root, slot); | ||
490 | return -EIO; | ||
491 | } | ||
492 | } | ||
493 | |||
494 | return 0; | ||
495 | } | ||
496 | |||
400 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 497 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
401 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | 498 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) |
402 | { | 499 | { |
@@ -426,16 +523,18 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
426 | len = page->private >> 2; | 523 | len = page->private >> 2; |
427 | WARN_ON(len == 0); | 524 | WARN_ON(len == 0); |
428 | 525 | ||
429 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 526 | eb = alloc_extent_buffer(tree, start, len, page); |
527 | if (eb == NULL) { | ||
528 | ret = -EIO; | ||
529 | goto out; | ||
530 | } | ||
430 | 531 | ||
431 | found_start = btrfs_header_bytenr(eb); | 532 | found_start = btrfs_header_bytenr(eb); |
432 | if (found_start != start) { | 533 | if (found_start != start) { |
433 | if (printk_ratelimit()) { | 534 | printk_ratelimited(KERN_INFO "btrfs bad tree block start " |
434 | printk(KERN_INFO "btrfs bad tree block start " | ||
435 | "%llu %llu\n", | 535 | "%llu %llu\n", |
436 | (unsigned long long)found_start, | 536 | (unsigned long long)found_start, |
437 | (unsigned long long)eb->start); | 537 | (unsigned long long)eb->start); |
438 | } | ||
439 | ret = -EIO; | 538 | ret = -EIO; |
440 | goto err; | 539 | goto err; |
441 | } | 540 | } |
@@ -447,10 +546,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
447 | goto err; | 546 | goto err; |
448 | } | 547 | } |
449 | if (check_tree_block_fsid(root, eb)) { | 548 | if (check_tree_block_fsid(root, eb)) { |
450 | if (printk_ratelimit()) { | 549 | printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", |
451 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", | ||
452 | (unsigned long long)eb->start); | 550 | (unsigned long long)eb->start); |
453 | } | ||
454 | ret = -EIO; | 551 | ret = -EIO; |
455 | goto err; | 552 | goto err; |
456 | } | 553 | } |
@@ -459,8 +556,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
459 | btrfs_set_buffer_lockdep_class(eb, found_level); | 556 | btrfs_set_buffer_lockdep_class(eb, found_level); |
460 | 557 | ||
461 | ret = csum_tree_block(root, eb, 1); | 558 | ret = csum_tree_block(root, eb, 1); |
462 | if (ret) | 559 | if (ret) { |
463 | ret = -EIO; | 560 | ret = -EIO; |
561 | goto err; | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * If this is a leaf block and it is corrupt, set the corrupt bit so | ||
566 | * that we don't try and read the other copies of this block, just | ||
567 | * return -EIO. | ||
568 | */ | ||
569 | if (found_level == 0 && check_leaf(root, eb)) { | ||
570 | set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
571 | ret = -EIO; | ||
572 | } | ||
464 | 573 | ||
465 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 574 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
466 | end = eb->start + end - 1; | 575 | end = eb->start + end - 1; |
@@ -481,9 +590,12 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
481 | end_io_wq->work.flags = 0; | 590 | end_io_wq->work.flags = 0; |
482 | 591 | ||
483 | if (bio->bi_rw & REQ_WRITE) { | 592 | if (bio->bi_rw & REQ_WRITE) { |
484 | if (end_io_wq->metadata) | 593 | if (end_io_wq->metadata == 1) |
485 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 594 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, |
486 | &end_io_wq->work); | 595 | &end_io_wq->work); |
596 | else if (end_io_wq->metadata == 2) | ||
597 | btrfs_queue_worker(&fs_info->endio_freespace_worker, | ||
598 | &end_io_wq->work); | ||
487 | else | 599 | else |
488 | btrfs_queue_worker(&fs_info->endio_write_workers, | 600 | btrfs_queue_worker(&fs_info->endio_write_workers, |
489 | &end_io_wq->work); | 601 | &end_io_wq->work); |
@@ -497,6 +609,13 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
497 | } | 609 | } |
498 | } | 610 | } |
499 | 611 | ||
612 | /* | ||
613 | * For the metadata arg you want | ||
614 | * | ||
615 | * 0 - if data | ||
616 | * 1 - if normal metadta | ||
617 | * 2 - if writing to the free space cache area | ||
618 | */ | ||
500 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 619 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
501 | int metadata) | 620 | int metadata) |
502 | { | 621 | { |
@@ -525,19 +644,11 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | |||
525 | return 256 * limit; | 644 | return 256 * limit; |
526 | } | 645 | } |
527 | 646 | ||
528 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) | ||
529 | { | ||
530 | return atomic_read(&info->nr_async_bios) > | ||
531 | btrfs_async_submit_limit(info); | ||
532 | } | ||
533 | |||
534 | static void run_one_async_start(struct btrfs_work *work) | 647 | static void run_one_async_start(struct btrfs_work *work) |
535 | { | 648 | { |
536 | struct btrfs_fs_info *fs_info; | ||
537 | struct async_submit_bio *async; | 649 | struct async_submit_bio *async; |
538 | 650 | ||
539 | async = container_of(work, struct async_submit_bio, work); | 651 | async = container_of(work, struct async_submit_bio, work); |
540 | fs_info = BTRFS_I(async->inode)->root->fs_info; | ||
541 | async->submit_bio_start(async->inode, async->rw, async->bio, | 652 | async->submit_bio_start(async->inode, async->rw, async->bio, |
542 | async->mirror_num, async->bio_flags, | 653 | async->mirror_num, async->bio_flags, |
543 | async->bio_offset); | 654 | async->bio_offset); |
@@ -688,6 +799,27 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
688 | __btree_submit_bio_done); | 799 | __btree_submit_bio_done); |
689 | } | 800 | } |
690 | 801 | ||
802 | #ifdef CONFIG_MIGRATION | ||
803 | static int btree_migratepage(struct address_space *mapping, | ||
804 | struct page *newpage, struct page *page) | ||
805 | { | ||
806 | /* | ||
807 | * we can't safely write a btree page from here, | ||
808 | * we haven't done the locking hook | ||
809 | */ | ||
810 | if (PageDirty(page)) | ||
811 | return -EAGAIN; | ||
812 | /* | ||
813 | * Buffers may be managed in a filesystem specific way. | ||
814 | * We must have no buffers or drop them. | ||
815 | */ | ||
816 | if (page_has_private(page) && | ||
817 | !try_to_release_page(page, GFP_KERNEL)) | ||
818 | return -EAGAIN; | ||
819 | return migrate_page(mapping, newpage, page); | ||
820 | } | ||
821 | #endif | ||
822 | |||
691 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 823 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
692 | { | 824 | { |
693 | struct extent_io_tree *tree; | 825 | struct extent_io_tree *tree; |
@@ -702,8 +834,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) | |||
702 | } | 834 | } |
703 | 835 | ||
704 | redirty_page_for_writepage(wbc, page); | 836 | redirty_page_for_writepage(wbc, page); |
705 | eb = btrfs_find_tree_block(root, page_offset(page), | 837 | eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); |
706 | PAGE_CACHE_SIZE); | ||
707 | WARN_ON(!eb); | 838 | WARN_ON(!eb); |
708 | 839 | ||
709 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | 840 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); |
@@ -793,7 +924,9 @@ static const struct address_space_operations btree_aops = { | |||
793 | .writepages = btree_writepages, | 924 | .writepages = btree_writepages, |
794 | .releasepage = btree_releasepage, | 925 | .releasepage = btree_releasepage, |
795 | .invalidatepage = btree_invalidatepage, | 926 | .invalidatepage = btree_invalidatepage, |
796 | .sync_page = block_sync_page, | 927 | #ifdef CONFIG_MIGRATION |
928 | .migratepage = btree_migratepage, | ||
929 | #endif | ||
797 | }; | 930 | }; |
798 | 931 | ||
799 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 932 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
@@ -818,7 +951,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | |||
818 | struct inode *btree_inode = root->fs_info->btree_inode; | 951 | struct inode *btree_inode = root->fs_info->btree_inode; |
819 | struct extent_buffer *eb; | 952 | struct extent_buffer *eb; |
820 | eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, | 953 | eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, |
821 | bytenr, blocksize, GFP_NOFS); | 954 | bytenr, blocksize); |
822 | return eb; | 955 | return eb; |
823 | } | 956 | } |
824 | 957 | ||
@@ -829,7 +962,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | |||
829 | struct extent_buffer *eb; | 962 | struct extent_buffer *eb; |
830 | 963 | ||
831 | eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, | 964 | eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, |
832 | bytenr, blocksize, NULL, GFP_NOFS); | 965 | bytenr, blocksize, NULL); |
833 | return eb; | 966 | return eb; |
834 | } | 967 | } |
835 | 968 | ||
@@ -850,12 +983,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
850 | u32 blocksize, u64 parent_transid) | 983 | u32 blocksize, u64 parent_transid) |
851 | { | 984 | { |
852 | struct extent_buffer *buf = NULL; | 985 | struct extent_buffer *buf = NULL; |
853 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
854 | struct extent_io_tree *io_tree; | ||
855 | int ret; | 986 | int ret; |
856 | 987 | ||
857 | io_tree = &BTRFS_I(btree_inode)->io_tree; | ||
858 | |||
859 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 988 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); |
860 | if (!buf) | 989 | if (!buf) |
861 | return NULL; | 990 | return NULL; |
@@ -915,15 +1044,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
915 | root->last_trans = 0; | 1044 | root->last_trans = 0; |
916 | root->highest_objectid = 0; | 1045 | root->highest_objectid = 0; |
917 | root->name = NULL; | 1046 | root->name = NULL; |
918 | root->in_sysfs = 0; | ||
919 | root->inode_tree = RB_ROOT; | 1047 | root->inode_tree = RB_ROOT; |
1048 | INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); | ||
920 | root->block_rsv = NULL; | 1049 | root->block_rsv = NULL; |
921 | root->orphan_block_rsv = NULL; | 1050 | root->orphan_block_rsv = NULL; |
922 | 1051 | ||
923 | INIT_LIST_HEAD(&root->dirty_list); | 1052 | INIT_LIST_HEAD(&root->dirty_list); |
924 | INIT_LIST_HEAD(&root->orphan_list); | 1053 | INIT_LIST_HEAD(&root->orphan_list); |
925 | INIT_LIST_HEAD(&root->root_list); | 1054 | INIT_LIST_HEAD(&root->root_list); |
926 | spin_lock_init(&root->node_lock); | ||
927 | spin_lock_init(&root->orphan_lock); | 1055 | spin_lock_init(&root->orphan_lock); |
928 | spin_lock_init(&root->inode_lock); | 1056 | spin_lock_init(&root->inode_lock); |
929 | spin_lock_init(&root->accounting_lock); | 1057 | spin_lock_init(&root->accounting_lock); |
@@ -939,7 +1067,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
939 | root->log_transid = 0; | 1067 | root->log_transid = 0; |
940 | root->last_log_commit = 0; | 1068 | root->last_log_commit = 0; |
941 | extent_io_tree_init(&root->dirty_log_pages, | 1069 | extent_io_tree_init(&root->dirty_log_pages, |
942 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1070 | fs_info->btree_inode->i_mapping); |
943 | 1071 | ||
944 | memset(&root->root_key, 0, sizeof(root->root_key)); | 1072 | memset(&root->root_key, 0, sizeof(root->root_key)); |
945 | memset(&root->root_item, 0, sizeof(root->root_item)); | 1073 | memset(&root->root_item, 0, sizeof(root->root_item)); |
@@ -980,7 +1108,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
980 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1108 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
981 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1109 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
982 | blocksize, generation); | 1110 | blocksize, generation); |
983 | BUG_ON(!root->node); | 1111 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { |
1112 | free_extent_buffer(root->node); | ||
1113 | return -EIO; | ||
1114 | } | ||
984 | root->commit_root = btrfs_root_node(root); | 1115 | root->commit_root = btrfs_root_node(root); |
985 | return 0; | 1116 | return 0; |
986 | } | 1117 | } |
@@ -1104,7 +1235,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1104 | root, fs_info, location->objectid); | 1235 | root, fs_info, location->objectid); |
1105 | 1236 | ||
1106 | path = btrfs_alloc_path(); | 1237 | path = btrfs_alloc_path(); |
1107 | BUG_ON(!path); | 1238 | if (!path) { |
1239 | kfree(root); | ||
1240 | return ERR_PTR(-ENOMEM); | ||
1241 | } | ||
1108 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1242 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1109 | if (ret == 0) { | 1243 | if (ret == 0) { |
1110 | l = path->nodes[0]; | 1244 | l = path->nodes[0]; |
@@ -1115,6 +1249,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1115 | } | 1249 | } |
1116 | btrfs_free_path(path); | 1250 | btrfs_free_path(path); |
1117 | if (ret) { | 1251 | if (ret) { |
1252 | kfree(root); | ||
1118 | if (ret > 0) | 1253 | if (ret > 0) |
1119 | ret = -ENOENT; | 1254 | ret = -ENOENT; |
1120 | return ERR_PTR(ret); | 1255 | return ERR_PTR(ret); |
@@ -1127,27 +1262,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1127 | root->commit_root = btrfs_root_node(root); | 1262 | root->commit_root = btrfs_root_node(root); |
1128 | BUG_ON(!root->node); | 1263 | BUG_ON(!root->node); |
1129 | out: | 1264 | out: |
1130 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) | 1265 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
1131 | root->ref_cows = 1; | 1266 | root->ref_cows = 1; |
1267 | btrfs_check_and_init_root_item(&root->root_item); | ||
1268 | } | ||
1132 | 1269 | ||
1133 | return root; | 1270 | return root; |
1134 | } | 1271 | } |
1135 | 1272 | ||
1136 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
1137 | u64 root_objectid) | ||
1138 | { | ||
1139 | struct btrfs_root *root; | ||
1140 | |||
1141 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID) | ||
1142 | return fs_info->tree_root; | ||
1143 | if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) | ||
1144 | return fs_info->extent_root; | ||
1145 | |||
1146 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
1147 | (unsigned long)root_objectid); | ||
1148 | return root; | ||
1149 | } | ||
1150 | |||
1151 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 1273 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
1152 | struct btrfs_key *location) | 1274 | struct btrfs_key *location) |
1153 | { | 1275 | { |
@@ -1176,7 +1298,22 @@ again: | |||
1176 | if (IS_ERR(root)) | 1298 | if (IS_ERR(root)) |
1177 | return root; | 1299 | return root; |
1178 | 1300 | ||
1179 | set_anon_super(&root->anon_super, NULL); | 1301 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); |
1302 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | ||
1303 | GFP_NOFS); | ||
1304 | if (!root->free_ino_pinned || !root->free_ino_ctl) { | ||
1305 | ret = -ENOMEM; | ||
1306 | goto fail; | ||
1307 | } | ||
1308 | |||
1309 | btrfs_init_free_ino_ctl(root); | ||
1310 | mutex_init(&root->fs_commit_mutex); | ||
1311 | spin_lock_init(&root->cache_lock); | ||
1312 | init_waitqueue_head(&root->cache_wait); | ||
1313 | |||
1314 | ret = set_anon_super(&root->anon_super, NULL); | ||
1315 | if (ret) | ||
1316 | goto fail; | ||
1180 | 1317 | ||
1181 | if (btrfs_root_refs(&root->root_item) == 0) { | 1318 | if (btrfs_root_refs(&root->root_item) == 0) { |
1182 | ret = -ENOENT; | 1319 | ret = -ENOENT; |
@@ -1219,41 +1356,6 @@ fail: | |||
1219 | return ERR_PTR(ret); | 1356 | return ERR_PTR(ret); |
1220 | } | 1357 | } |
1221 | 1358 | ||
1222 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | ||
1223 | struct btrfs_key *location, | ||
1224 | const char *name, int namelen) | ||
1225 | { | ||
1226 | return btrfs_read_fs_root_no_name(fs_info, location); | ||
1227 | #if 0 | ||
1228 | struct btrfs_root *root; | ||
1229 | int ret; | ||
1230 | |||
1231 | root = btrfs_read_fs_root_no_name(fs_info, location); | ||
1232 | if (!root) | ||
1233 | return NULL; | ||
1234 | |||
1235 | if (root->in_sysfs) | ||
1236 | return root; | ||
1237 | |||
1238 | ret = btrfs_set_root_name(root, name, namelen); | ||
1239 | if (ret) { | ||
1240 | free_extent_buffer(root->node); | ||
1241 | kfree(root); | ||
1242 | return ERR_PTR(ret); | ||
1243 | } | ||
1244 | |||
1245 | ret = btrfs_sysfs_add_root(root); | ||
1246 | if (ret) { | ||
1247 | free_extent_buffer(root->node); | ||
1248 | kfree(root->name); | ||
1249 | kfree(root); | ||
1250 | return ERR_PTR(ret); | ||
1251 | } | ||
1252 | root->in_sysfs = 1; | ||
1253 | return root; | ||
1254 | #endif | ||
1255 | } | ||
1256 | |||
1257 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) | 1359 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) |
1258 | { | 1360 | { |
1259 | struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; | 1361 | struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; |
@@ -1261,7 +1363,8 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1261 | struct btrfs_device *device; | 1363 | struct btrfs_device *device; |
1262 | struct backing_dev_info *bdi; | 1364 | struct backing_dev_info *bdi; |
1263 | 1365 | ||
1264 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | 1366 | rcu_read_lock(); |
1367 | list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) { | ||
1265 | if (!device->bdev) | 1368 | if (!device->bdev) |
1266 | continue; | 1369 | continue; |
1267 | bdi = blk_get_backing_dev_info(device->bdev); | 1370 | bdi = blk_get_backing_dev_info(device->bdev); |
@@ -1270,86 +1373,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1270 | break; | 1373 | break; |
1271 | } | 1374 | } |
1272 | } | 1375 | } |
1376 | rcu_read_unlock(); | ||
1273 | return ret; | 1377 | return ret; |
1274 | } | 1378 | } |
1275 | 1379 | ||
1276 | /* | 1380 | /* |
1277 | * this unplugs every device on the box, and it is only used when page | ||
1278 | * is null | ||
1279 | */ | ||
1280 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1281 | { | ||
1282 | struct btrfs_device *device; | ||
1283 | struct btrfs_fs_info *info; | ||
1284 | |||
1285 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | ||
1286 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | ||
1287 | if (!device->bdev) | ||
1288 | continue; | ||
1289 | |||
1290 | bdi = blk_get_backing_dev_info(device->bdev); | ||
1291 | if (bdi->unplug_io_fn) | ||
1292 | bdi->unplug_io_fn(bdi, page); | ||
1293 | } | ||
1294 | } | ||
1295 | |||
1296 | static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1297 | { | ||
1298 | struct inode *inode; | ||
1299 | struct extent_map_tree *em_tree; | ||
1300 | struct extent_map *em; | ||
1301 | struct address_space *mapping; | ||
1302 | u64 offset; | ||
1303 | |||
1304 | /* the generic O_DIRECT read code does this */ | ||
1305 | if (1 || !page) { | ||
1306 | __unplug_io_fn(bdi, page); | ||
1307 | return; | ||
1308 | } | ||
1309 | |||
1310 | /* | ||
1311 | * page->mapping may change at any time. Get a consistent copy | ||
1312 | * and use that for everything below | ||
1313 | */ | ||
1314 | smp_mb(); | ||
1315 | mapping = page->mapping; | ||
1316 | if (!mapping) | ||
1317 | return; | ||
1318 | |||
1319 | inode = mapping->host; | ||
1320 | |||
1321 | /* | ||
1322 | * don't do the expensive searching for a small number of | ||
1323 | * devices | ||
1324 | */ | ||
1325 | if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) { | ||
1326 | __unplug_io_fn(bdi, page); | ||
1327 | return; | ||
1328 | } | ||
1329 | |||
1330 | offset = page_offset(page); | ||
1331 | |||
1332 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
1333 | read_lock(&em_tree->lock); | ||
1334 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
1335 | read_unlock(&em_tree->lock); | ||
1336 | if (!em) { | ||
1337 | __unplug_io_fn(bdi, page); | ||
1338 | return; | ||
1339 | } | ||
1340 | |||
1341 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
1342 | free_extent_map(em); | ||
1343 | __unplug_io_fn(bdi, page); | ||
1344 | return; | ||
1345 | } | ||
1346 | offset = offset - em->start; | ||
1347 | btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
1348 | em->block_start + offset, page); | ||
1349 | free_extent_map(em); | ||
1350 | } | ||
1351 | |||
1352 | /* | ||
1353 | * If this fails, caller must call bdi_destroy() to get rid of the | 1381 | * If this fails, caller must call bdi_destroy() to get rid of the |
1354 | * bdi again. | 1382 | * bdi again. |
1355 | */ | 1383 | */ |
@@ -1363,8 +1391,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1363 | return err; | 1391 | return err; |
1364 | 1392 | ||
1365 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1393 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1366 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | ||
1367 | bdi->unplug_io_data = info; | ||
1368 | bdi->congested_fn = btrfs_congested_fn; | 1394 | bdi->congested_fn = btrfs_congested_fn; |
1369 | bdi->congested_data = info; | 1395 | bdi->congested_data = info; |
1370 | return 0; | 1396 | return 0; |
@@ -1377,7 +1403,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1377 | u64 start = 0; | 1403 | u64 start = 0; |
1378 | struct page *page; | 1404 | struct page *page; |
1379 | struct extent_io_tree *io_tree = NULL; | 1405 | struct extent_io_tree *io_tree = NULL; |
1380 | struct btrfs_fs_info *info = NULL; | ||
1381 | struct bio_vec *bvec; | 1406 | struct bio_vec *bvec; |
1382 | int i; | 1407 | int i; |
1383 | int ret; | 1408 | int ret; |
@@ -1396,7 +1421,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1396 | buf_len = page->private >> 2; | 1421 | buf_len = page->private >> 2; |
1397 | start = page_offset(page) + bvec->bv_offset; | 1422 | start = page_offset(page) + bvec->bv_offset; |
1398 | io_tree = &BTRFS_I(page->mapping->host)->io_tree; | 1423 | io_tree = &BTRFS_I(page->mapping->host)->io_tree; |
1399 | info = BTRFS_I(page->mapping->host)->root->fs_info; | ||
1400 | } | 1424 | } |
1401 | /* are we fully contained in this bio? */ | 1425 | /* are we fully contained in this bio? */ |
1402 | if (buf_len <= length) | 1426 | if (buf_len <= length) |
@@ -1452,6 +1476,7 @@ static int cleaner_kthread(void *arg) | |||
1452 | btrfs_run_delayed_iputs(root); | 1476 | btrfs_run_delayed_iputs(root); |
1453 | btrfs_clean_old_snapshots(root); | 1477 | btrfs_clean_old_snapshots(root); |
1454 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1478 | mutex_unlock(&root->fs_info->cleaner_mutex); |
1479 | btrfs_run_defrag_inodes(root->fs_info); | ||
1455 | } | 1480 | } |
1456 | 1481 | ||
1457 | if (freezing(current)) { | 1482 | if (freezing(current)) { |
@@ -1481,24 +1506,25 @@ static int transaction_kthread(void *arg) | |||
1481 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1506 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1482 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1507 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1483 | 1508 | ||
1484 | spin_lock(&root->fs_info->new_trans_lock); | 1509 | spin_lock(&root->fs_info->trans_lock); |
1485 | cur = root->fs_info->running_transaction; | 1510 | cur = root->fs_info->running_transaction; |
1486 | if (!cur) { | 1511 | if (!cur) { |
1487 | spin_unlock(&root->fs_info->new_trans_lock); | 1512 | spin_unlock(&root->fs_info->trans_lock); |
1488 | goto sleep; | 1513 | goto sleep; |
1489 | } | 1514 | } |
1490 | 1515 | ||
1491 | now = get_seconds(); | 1516 | now = get_seconds(); |
1492 | if (!cur->blocked && | 1517 | if (!cur->blocked && |
1493 | (now < cur->start_time || now - cur->start_time < 30)) { | 1518 | (now < cur->start_time || now - cur->start_time < 30)) { |
1494 | spin_unlock(&root->fs_info->new_trans_lock); | 1519 | spin_unlock(&root->fs_info->trans_lock); |
1495 | delay = HZ * 5; | 1520 | delay = HZ * 5; |
1496 | goto sleep; | 1521 | goto sleep; |
1497 | } | 1522 | } |
1498 | transid = cur->transid; | 1523 | transid = cur->transid; |
1499 | spin_unlock(&root->fs_info->new_trans_lock); | 1524 | spin_unlock(&root->fs_info->trans_lock); |
1500 | 1525 | ||
1501 | trans = btrfs_join_transaction(root, 1); | 1526 | trans = btrfs_join_transaction(root); |
1527 | BUG_ON(IS_ERR(trans)); | ||
1502 | if (transid == trans->transid) { | 1528 | if (transid == trans->transid) { |
1503 | ret = btrfs_commit_transaction(trans, root); | 1529 | ret = btrfs_commit_transaction(trans, root); |
1504 | BUG_ON(ret); | 1530 | BUG_ON(ret); |
@@ -1539,10 +1565,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1539 | GFP_NOFS); | 1565 | GFP_NOFS); |
1540 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | 1566 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), |
1541 | GFP_NOFS); | 1567 | GFP_NOFS); |
1542 | struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), | 1568 | struct btrfs_root *tree_root = btrfs_sb(sb); |
1543 | GFP_NOFS); | 1569 | struct btrfs_fs_info *fs_info = NULL; |
1544 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), | ||
1545 | GFP_NOFS); | ||
1546 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | 1570 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), |
1547 | GFP_NOFS); | 1571 | GFP_NOFS); |
1548 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | 1572 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), |
@@ -1554,11 +1578,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1554 | 1578 | ||
1555 | struct btrfs_super_block *disk_super; | 1579 | struct btrfs_super_block *disk_super; |
1556 | 1580 | ||
1557 | if (!extent_root || !tree_root || !fs_info || | 1581 | if (!extent_root || !tree_root || !tree_root->fs_info || |
1558 | !chunk_root || !dev_root || !csum_root) { | 1582 | !chunk_root || !dev_root || !csum_root) { |
1559 | err = -ENOMEM; | 1583 | err = -ENOMEM; |
1560 | goto fail; | 1584 | goto fail; |
1561 | } | 1585 | } |
1586 | fs_info = tree_root->fs_info; | ||
1562 | 1587 | ||
1563 | ret = init_srcu_struct(&fs_info->subvol_srcu); | 1588 | ret = init_srcu_struct(&fs_info->subvol_srcu); |
1564 | if (ret) { | 1589 | if (ret) { |
@@ -1578,6 +1603,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1578 | goto fail_bdi; | 1603 | goto fail_bdi; |
1579 | } | 1604 | } |
1580 | 1605 | ||
1606 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | ||
1607 | |||
1581 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1608 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1582 | INIT_LIST_HEAD(&fs_info->trans_list); | 1609 | INIT_LIST_HEAD(&fs_info->trans_list); |
1583 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1610 | INIT_LIST_HEAD(&fs_info->dead_roots); |
@@ -1587,10 +1614,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1587 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 1614 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
1588 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 1615 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
1589 | spin_lock_init(&fs_info->delalloc_lock); | 1616 | spin_lock_init(&fs_info->delalloc_lock); |
1590 | spin_lock_init(&fs_info->new_trans_lock); | 1617 | spin_lock_init(&fs_info->trans_lock); |
1591 | spin_lock_init(&fs_info->ref_cache_lock); | 1618 | spin_lock_init(&fs_info->ref_cache_lock); |
1592 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1619 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
1593 | spin_lock_init(&fs_info->delayed_iput_lock); | 1620 | spin_lock_init(&fs_info->delayed_iput_lock); |
1621 | spin_lock_init(&fs_info->defrag_inodes_lock); | ||
1622 | mutex_init(&fs_info->reloc_mutex); | ||
1594 | 1623 | ||
1595 | init_completion(&fs_info->kobj_unregister); | 1624 | init_completion(&fs_info->kobj_unregister); |
1596 | fs_info->tree_root = tree_root; | 1625 | fs_info->tree_root = tree_root; |
@@ -1613,15 +1642,34 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1613 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1642 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1614 | atomic_set(&fs_info->async_submit_draining, 0); | 1643 | atomic_set(&fs_info->async_submit_draining, 0); |
1615 | atomic_set(&fs_info->nr_async_bios, 0); | 1644 | atomic_set(&fs_info->nr_async_bios, 0); |
1645 | atomic_set(&fs_info->defrag_running, 0); | ||
1616 | fs_info->sb = sb; | 1646 | fs_info->sb = sb; |
1617 | fs_info->max_inline = 8192 * 1024; | 1647 | fs_info->max_inline = 8192 * 1024; |
1618 | fs_info->metadata_ratio = 0; | 1648 | fs_info->metadata_ratio = 0; |
1649 | fs_info->defrag_inodes = RB_ROOT; | ||
1650 | fs_info->trans_no_join = 0; | ||
1619 | 1651 | ||
1620 | fs_info->thread_pool_size = min_t(unsigned long, | 1652 | fs_info->thread_pool_size = min_t(unsigned long, |
1621 | num_online_cpus() + 2, 8); | 1653 | num_online_cpus() + 2, 8); |
1622 | 1654 | ||
1623 | INIT_LIST_HEAD(&fs_info->ordered_extents); | 1655 | INIT_LIST_HEAD(&fs_info->ordered_extents); |
1624 | spin_lock_init(&fs_info->ordered_extent_lock); | 1656 | spin_lock_init(&fs_info->ordered_extent_lock); |
1657 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), | ||
1658 | GFP_NOFS); | ||
1659 | if (!fs_info->delayed_root) { | ||
1660 | err = -ENOMEM; | ||
1661 | goto fail_iput; | ||
1662 | } | ||
1663 | btrfs_init_delayed_root(fs_info->delayed_root); | ||
1664 | |||
1665 | mutex_init(&fs_info->scrub_lock); | ||
1666 | atomic_set(&fs_info->scrubs_running, 0); | ||
1667 | atomic_set(&fs_info->scrub_pause_req, 0); | ||
1668 | atomic_set(&fs_info->scrubs_paused, 0); | ||
1669 | atomic_set(&fs_info->scrub_cancel_req, 0); | ||
1670 | init_waitqueue_head(&fs_info->scrub_pause_wait); | ||
1671 | init_rwsem(&fs_info->scrub_super_lock); | ||
1672 | fs_info->scrub_workers_refcnt = 0; | ||
1625 | 1673 | ||
1626 | sb->s_blocksize = 4096; | 1674 | sb->s_blocksize = 4096; |
1627 | sb->s_blocksize_bits = blksize_bits(4096); | 1675 | sb->s_blocksize_bits = blksize_bits(4096); |
@@ -1640,10 +1688,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1640 | 1688 | ||
1641 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); | 1689 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); |
1642 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | 1690 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, |
1643 | fs_info->btree_inode->i_mapping, | 1691 | fs_info->btree_inode->i_mapping); |
1644 | GFP_NOFS); | 1692 | extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); |
1645 | extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, | ||
1646 | GFP_NOFS); | ||
1647 | 1693 | ||
1648 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | 1694 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; |
1649 | 1695 | ||
@@ -1657,14 +1703,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1657 | fs_info->block_group_cache_tree = RB_ROOT; | 1703 | fs_info->block_group_cache_tree = RB_ROOT; |
1658 | 1704 | ||
1659 | extent_io_tree_init(&fs_info->freed_extents[0], | 1705 | extent_io_tree_init(&fs_info->freed_extents[0], |
1660 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1706 | fs_info->btree_inode->i_mapping); |
1661 | extent_io_tree_init(&fs_info->freed_extents[1], | 1707 | extent_io_tree_init(&fs_info->freed_extents[1], |
1662 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1708 | fs_info->btree_inode->i_mapping); |
1663 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 1709 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
1664 | fs_info->do_barriers = 1; | 1710 | fs_info->do_barriers = 1; |
1665 | 1711 | ||
1666 | 1712 | ||
1667 | mutex_init(&fs_info->trans_mutex); | ||
1668 | mutex_init(&fs_info->ordered_operations_mutex); | 1713 | mutex_init(&fs_info->ordered_operations_mutex); |
1669 | mutex_init(&fs_info->tree_log_mutex); | 1714 | mutex_init(&fs_info->tree_log_mutex); |
1670 | mutex_init(&fs_info->chunk_mutex); | 1715 | mutex_init(&fs_info->chunk_mutex); |
@@ -1680,15 +1725,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1680 | 1725 | ||
1681 | init_waitqueue_head(&fs_info->transaction_throttle); | 1726 | init_waitqueue_head(&fs_info->transaction_throttle); |
1682 | init_waitqueue_head(&fs_info->transaction_wait); | 1727 | init_waitqueue_head(&fs_info->transaction_wait); |
1728 | init_waitqueue_head(&fs_info->transaction_blocked_wait); | ||
1683 | init_waitqueue_head(&fs_info->async_submit_wait); | 1729 | init_waitqueue_head(&fs_info->async_submit_wait); |
1684 | 1730 | ||
1685 | __setup_root(4096, 4096, 4096, 4096, tree_root, | 1731 | __setup_root(4096, 4096, 4096, 4096, tree_root, |
1686 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1732 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
1687 | 1733 | ||
1688 | |||
1689 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 1734 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
1690 | if (!bh) | 1735 | if (!bh) { |
1691 | goto fail_iput; | 1736 | err = -EINVAL; |
1737 | goto fail_alloc; | ||
1738 | } | ||
1692 | 1739 | ||
1693 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 1740 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
1694 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 1741 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, |
@@ -1699,12 +1746,23 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1699 | 1746 | ||
1700 | disk_super = &fs_info->super_copy; | 1747 | disk_super = &fs_info->super_copy; |
1701 | if (!btrfs_super_root(disk_super)) | 1748 | if (!btrfs_super_root(disk_super)) |
1702 | goto fail_iput; | 1749 | goto fail_alloc; |
1750 | |||
1751 | /* check FS state, whether FS is broken. */ | ||
1752 | fs_info->fs_state |= btrfs_super_flags(disk_super); | ||
1753 | |||
1754 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | ||
1755 | |||
1756 | /* | ||
1757 | * In the long term, we'll store the compression type in the super | ||
1758 | * block, and it'll be used for per file compression control. | ||
1759 | */ | ||
1760 | fs_info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
1703 | 1761 | ||
1704 | ret = btrfs_parse_options(tree_root, options); | 1762 | ret = btrfs_parse_options(tree_root, options); |
1705 | if (ret) { | 1763 | if (ret) { |
1706 | err = ret; | 1764 | err = ret; |
1707 | goto fail_iput; | 1765 | goto fail_alloc; |
1708 | } | 1766 | } |
1709 | 1767 | ||
1710 | features = btrfs_super_incompat_flags(disk_super) & | 1768 | features = btrfs_super_incompat_flags(disk_super) & |
@@ -1714,14 +1772,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1714 | "unsupported optional features (%Lx).\n", | 1772 | "unsupported optional features (%Lx).\n", |
1715 | (unsigned long long)features); | 1773 | (unsigned long long)features); |
1716 | err = -EINVAL; | 1774 | err = -EINVAL; |
1717 | goto fail_iput; | 1775 | goto fail_alloc; |
1718 | } | 1776 | } |
1719 | 1777 | ||
1720 | features = btrfs_super_incompat_flags(disk_super); | 1778 | features = btrfs_super_incompat_flags(disk_super); |
1721 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | 1779 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
1722 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 1780 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
1723 | btrfs_set_super_incompat_flags(disk_super, features); | 1781 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
1724 | } | 1782 | btrfs_set_super_incompat_flags(disk_super, features); |
1725 | 1783 | ||
1726 | features = btrfs_super_compat_ro_flags(disk_super) & | 1784 | features = btrfs_super_compat_ro_flags(disk_super) & |
1727 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1785 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
@@ -1730,7 +1788,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1730 | "unsupported option features (%Lx).\n", | 1788 | "unsupported option features (%Lx).\n", |
1731 | (unsigned long long)features); | 1789 | (unsigned long long)features); |
1732 | err = -EINVAL; | 1790 | err = -EINVAL; |
1733 | goto fail_iput; | 1791 | goto fail_alloc; |
1734 | } | 1792 | } |
1735 | 1793 | ||
1736 | btrfs_init_workers(&fs_info->generic_worker, | 1794 | btrfs_init_workers(&fs_info->generic_worker, |
@@ -1775,6 +1833,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1775 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | 1833 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", |
1776 | fs_info->thread_pool_size, | 1834 | fs_info->thread_pool_size, |
1777 | &fs_info->generic_worker); | 1835 | &fs_info->generic_worker); |
1836 | btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", | ||
1837 | 1, &fs_info->generic_worker); | ||
1838 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | ||
1839 | fs_info->thread_pool_size, | ||
1840 | &fs_info->generic_worker); | ||
1778 | 1841 | ||
1779 | /* | 1842 | /* |
1780 | * endios are largely parallel and should have a very | 1843 | * endios are largely parallel and should have a very |
@@ -1795,6 +1858,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1795 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1858 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
1796 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1859 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
1797 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1860 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1861 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | ||
1862 | btrfs_start_workers(&fs_info->delayed_workers, 1); | ||
1798 | 1863 | ||
1799 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1864 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1800 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1865 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1903,6 +1968,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1903 | fs_info->metadata_alloc_profile = (u64)-1; | 1968 | fs_info->metadata_alloc_profile = (u64)-1; |
1904 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1969 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
1905 | 1970 | ||
1971 | ret = btrfs_init_space_info(fs_info); | ||
1972 | if (ret) { | ||
1973 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | ||
1974 | goto fail_block_groups; | ||
1975 | } | ||
1976 | |||
1906 | ret = btrfs_read_block_groups(extent_root); | 1977 | ret = btrfs_read_block_groups(extent_root); |
1907 | if (ret) { | 1978 | if (ret) { |
1908 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 1979 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
@@ -1928,7 +1999,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1928 | btrfs_set_opt(fs_info->mount_opt, SSD); | 1999 | btrfs_set_opt(fs_info->mount_opt, SSD); |
1929 | } | 2000 | } |
1930 | 2001 | ||
1931 | if (btrfs_super_log_root(disk_super) != 0) { | 2002 | /* do not make disk changes in broken FS */ |
2003 | if (btrfs_super_log_root(disk_super) != 0 && | ||
2004 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
1932 | u64 bytenr = btrfs_super_log_root(disk_super); | 2005 | u64 bytenr = btrfs_super_log_root(disk_super); |
1933 | 2006 | ||
1934 | if (fs_devices->rw_devices == 0) { | 2007 | if (fs_devices->rw_devices == 0) { |
@@ -1992,8 +2065,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1992 | 2065 | ||
1993 | if (!(sb->s_flags & MS_RDONLY)) { | 2066 | if (!(sb->s_flags & MS_RDONLY)) { |
1994 | down_read(&fs_info->cleanup_work_sem); | 2067 | down_read(&fs_info->cleanup_work_sem); |
1995 | btrfs_orphan_cleanup(fs_info->fs_root); | 2068 | err = btrfs_orphan_cleanup(fs_info->fs_root); |
2069 | if (!err) | ||
2070 | err = btrfs_orphan_cleanup(fs_info->tree_root); | ||
1996 | up_read(&fs_info->cleanup_work_sem); | 2071 | up_read(&fs_info->cleanup_work_sem); |
2072 | if (err) { | ||
2073 | close_ctree(tree_root); | ||
2074 | return ERR_PTR(err); | ||
2075 | } | ||
1997 | } | 2076 | } |
1998 | 2077 | ||
1999 | return tree_root; | 2078 | return tree_root; |
@@ -2035,7 +2114,11 @@ fail_sb_buffer: | |||
2035 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2114 | btrfs_stop_workers(&fs_info->endio_meta_workers); |
2036 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2115 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2037 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2116 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2117 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | ||
2038 | btrfs_stop_workers(&fs_info->submit_workers); | 2118 | btrfs_stop_workers(&fs_info->submit_workers); |
2119 | btrfs_stop_workers(&fs_info->delayed_workers); | ||
2120 | fail_alloc: | ||
2121 | kfree(fs_info->delayed_root); | ||
2039 | fail_iput: | 2122 | fail_iput: |
2040 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2123 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2041 | iput(fs_info->btree_inode); | 2124 | iput(fs_info->btree_inode); |
@@ -2063,11 +2146,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
2063 | if (uptodate) { | 2146 | if (uptodate) { |
2064 | set_buffer_uptodate(bh); | 2147 | set_buffer_uptodate(bh); |
2065 | } else { | 2148 | } else { |
2066 | if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { | 2149 | printk_ratelimited(KERN_WARNING "lost page write due to " |
2067 | printk(KERN_WARNING "lost page write due to " | ||
2068 | "I/O error on %s\n", | 2150 | "I/O error on %s\n", |
2069 | bdevname(bh->b_bdev, b)); | 2151 | bdevname(bh->b_bdev, b)); |
2070 | } | ||
2071 | /* note, we dont' set_buffer_write_io_error because we have | 2152 | /* note, we dont' set_buffer_write_io_error because we have |
2072 | * our own ways of dealing with the IO errors | 2153 | * our own ways of dealing with the IO errors |
2073 | */ | 2154 | */ |
@@ -2200,21 +2281,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2200 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2281 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2201 | } | 2282 | } |
2202 | 2283 | ||
2203 | if (i == last_barrier && do_barriers && device->barriers) { | 2284 | if (i == last_barrier && do_barriers) |
2204 | ret = submit_bh(WRITE_BARRIER, bh); | 2285 | ret = submit_bh(WRITE_FLUSH_FUA, bh); |
2205 | if (ret == -EOPNOTSUPP) { | 2286 | else |
2206 | printk("btrfs: disabling barriers on dev %s\n", | ||
2207 | device->name); | ||
2208 | set_buffer_uptodate(bh); | ||
2209 | device->barriers = 0; | ||
2210 | /* one reference for submit_bh */ | ||
2211 | get_bh(bh); | ||
2212 | lock_buffer(bh); | ||
2213 | ret = submit_bh(WRITE_SYNC, bh); | ||
2214 | } | ||
2215 | } else { | ||
2216 | ret = submit_bh(WRITE_SYNC, bh); | 2287 | ret = submit_bh(WRITE_SYNC, bh); |
2217 | } | ||
2218 | 2288 | ||
2219 | if (ret) | 2289 | if (ret) |
2220 | errors++; | 2290 | errors++; |
@@ -2242,7 +2312,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2242 | 2312 | ||
2243 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2313 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2244 | head = &root->fs_info->fs_devices->devices; | 2314 | head = &root->fs_info->fs_devices->devices; |
2245 | list_for_each_entry(dev, head, dev_list) { | 2315 | list_for_each_entry_rcu(dev, head, dev_list) { |
2246 | if (!dev->bdev) { | 2316 | if (!dev->bdev) { |
2247 | total_errors++; | 2317 | total_errors++; |
2248 | continue; | 2318 | continue; |
@@ -2275,7 +2345,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2275 | } | 2345 | } |
2276 | 2346 | ||
2277 | total_errors = 0; | 2347 | total_errors = 0; |
2278 | list_for_each_entry(dev, head, dev_list) { | 2348 | list_for_each_entry_rcu(dev, head, dev_list) { |
2279 | if (!dev->bdev) | 2349 | if (!dev->bdev) |
2280 | continue; | 2350 | continue; |
2281 | if (!dev->in_fs_metadata || !dev->writeable) | 2351 | if (!dev->in_fs_metadata || !dev->writeable) |
@@ -2313,12 +2383,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | |||
2313 | if (btrfs_root_refs(&root->root_item) == 0) | 2383 | if (btrfs_root_refs(&root->root_item) == 0) |
2314 | synchronize_srcu(&fs_info->subvol_srcu); | 2384 | synchronize_srcu(&fs_info->subvol_srcu); |
2315 | 2385 | ||
2386 | __btrfs_remove_free_space_cache(root->free_ino_pinned); | ||
2387 | __btrfs_remove_free_space_cache(root->free_ino_ctl); | ||
2316 | free_fs_root(root); | 2388 | free_fs_root(root); |
2317 | return 0; | 2389 | return 0; |
2318 | } | 2390 | } |
2319 | 2391 | ||
2320 | static void free_fs_root(struct btrfs_root *root) | 2392 | static void free_fs_root(struct btrfs_root *root) |
2321 | { | 2393 | { |
2394 | iput(root->cache_inode); | ||
2322 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 2395 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); |
2323 | if (root->anon_super.s_dev) { | 2396 | if (root->anon_super.s_dev) { |
2324 | down_write(&root->anon_super.s_umount); | 2397 | down_write(&root->anon_super.s_umount); |
@@ -2326,6 +2399,8 @@ static void free_fs_root(struct btrfs_root *root) | |||
2326 | } | 2399 | } |
2327 | free_extent_buffer(root->node); | 2400 | free_extent_buffer(root->node); |
2328 | free_extent_buffer(root->commit_root); | 2401 | free_extent_buffer(root->commit_root); |
2402 | kfree(root->free_ino_ctl); | ||
2403 | kfree(root->free_ino_pinned); | ||
2329 | kfree(root->name); | 2404 | kfree(root->name); |
2330 | kfree(root); | 2405 | kfree(root); |
2331 | } | 2406 | } |
@@ -2378,8 +2453,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2378 | 2453 | ||
2379 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2454 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
2380 | for (i = 0; i < ret; i++) { | 2455 | for (i = 0; i < ret; i++) { |
2456 | int err; | ||
2457 | |||
2381 | root_objectid = gang[i]->root_key.objectid; | 2458 | root_objectid = gang[i]->root_key.objectid; |
2382 | btrfs_orphan_cleanup(gang[i]); | 2459 | err = btrfs_orphan_cleanup(gang[i]); |
2460 | if (err) | ||
2461 | return err; | ||
2383 | } | 2462 | } |
2384 | root_objectid++; | 2463 | root_objectid++; |
2385 | } | 2464 | } |
@@ -2400,11 +2479,15 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2400 | down_write(&root->fs_info->cleanup_work_sem); | 2479 | down_write(&root->fs_info->cleanup_work_sem); |
2401 | up_write(&root->fs_info->cleanup_work_sem); | 2480 | up_write(&root->fs_info->cleanup_work_sem); |
2402 | 2481 | ||
2403 | trans = btrfs_join_transaction(root, 1); | 2482 | trans = btrfs_join_transaction(root); |
2483 | if (IS_ERR(trans)) | ||
2484 | return PTR_ERR(trans); | ||
2404 | ret = btrfs_commit_transaction(trans, root); | 2485 | ret = btrfs_commit_transaction(trans, root); |
2405 | BUG_ON(ret); | 2486 | BUG_ON(ret); |
2406 | /* run commit again to drop the original snapshot */ | 2487 | /* run commit again to drop the original snapshot */ |
2407 | trans = btrfs_join_transaction(root, 1); | 2488 | trans = btrfs_join_transaction(root); |
2489 | if (IS_ERR(trans)) | ||
2490 | return PTR_ERR(trans); | ||
2408 | btrfs_commit_transaction(trans, root); | 2491 | btrfs_commit_transaction(trans, root); |
2409 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2492 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2410 | BUG_ON(ret); | 2493 | BUG_ON(ret); |
@@ -2421,8 +2504,38 @@ int close_ctree(struct btrfs_root *root) | |||
2421 | fs_info->closing = 1; | 2504 | fs_info->closing = 1; |
2422 | smp_mb(); | 2505 | smp_mb(); |
2423 | 2506 | ||
2507 | btrfs_scrub_cancel(root); | ||
2508 | |||
2509 | /* wait for any defraggers to finish */ | ||
2510 | wait_event(fs_info->transaction_wait, | ||
2511 | (atomic_read(&fs_info->defrag_running) == 0)); | ||
2512 | |||
2513 | /* clear out the rbtree of defraggable inodes */ | ||
2514 | btrfs_run_defrag_inodes(root->fs_info); | ||
2515 | |||
2516 | btrfs_put_block_group_cache(fs_info); | ||
2517 | |||
2518 | /* | ||
2519 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
2520 | * | ||
2521 | * 1. when btrfs flips readonly somewhere else before | ||
2522 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
2523 | * and btrfs will skip to write sb directly to keep | ||
2524 | * ERROR state on disk. | ||
2525 | * | ||
2526 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
2527 | * and in such case, btrfs cannot write sb via btrfs_commit_super, | ||
2528 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
2529 | * btrfs will cleanup all FS resources first and write sb then. | ||
2530 | */ | ||
2424 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2531 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2425 | ret = btrfs_commit_super(root); | 2532 | ret = btrfs_commit_super(root); |
2533 | if (ret) | ||
2534 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
2535 | } | ||
2536 | |||
2537 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
2538 | ret = btrfs_error_commit_super(root); | ||
2426 | if (ret) | 2539 | if (ret) |
2427 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2540 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2428 | } | 2541 | } |
@@ -2458,6 +2571,7 @@ int close_ctree(struct btrfs_root *root) | |||
2458 | del_fs_roots(fs_info); | 2571 | del_fs_roots(fs_info); |
2459 | 2572 | ||
2460 | iput(fs_info->btree_inode); | 2573 | iput(fs_info->btree_inode); |
2574 | kfree(fs_info->delayed_root); | ||
2461 | 2575 | ||
2462 | btrfs_stop_workers(&fs_info->generic_worker); | 2576 | btrfs_stop_workers(&fs_info->generic_worker); |
2463 | btrfs_stop_workers(&fs_info->fixup_workers); | 2577 | btrfs_stop_workers(&fs_info->fixup_workers); |
@@ -2467,7 +2581,9 @@ int close_ctree(struct btrfs_root *root) | |||
2467 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2581 | btrfs_stop_workers(&fs_info->endio_meta_workers); |
2468 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2582 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2469 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2583 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2584 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | ||
2470 | btrfs_stop_workers(&fs_info->submit_workers); | 2585 | btrfs_stop_workers(&fs_info->submit_workers); |
2586 | btrfs_stop_workers(&fs_info->delayed_workers); | ||
2471 | 2587 | ||
2472 | btrfs_close_devices(fs_info->fs_devices); | 2588 | btrfs_close_devices(fs_info->fs_devices); |
2473 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2589 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
@@ -2480,6 +2596,8 @@ int close_ctree(struct btrfs_root *root) | |||
2480 | kfree(fs_info->chunk_root); | 2596 | kfree(fs_info->chunk_root); |
2481 | kfree(fs_info->dev_root); | 2597 | kfree(fs_info->dev_root); |
2482 | kfree(fs_info->csum_root); | 2598 | kfree(fs_info->csum_root); |
2599 | kfree(fs_info); | ||
2600 | |||
2483 | return 0; | 2601 | return 0; |
2484 | } | 2602 | } |
2485 | 2603 | ||
@@ -2542,6 +2660,29 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
2542 | if (current->flags & PF_MEMALLOC) | 2660 | if (current->flags & PF_MEMALLOC) |
2543 | return; | 2661 | return; |
2544 | 2662 | ||
2663 | btrfs_balance_delayed_items(root); | ||
2664 | |||
2665 | num_dirty = root->fs_info->dirty_metadata_bytes; | ||
2666 | |||
2667 | if (num_dirty > thresh) { | ||
2668 | balance_dirty_pages_ratelimited_nr( | ||
2669 | root->fs_info->btree_inode->i_mapping, 1); | ||
2670 | } | ||
2671 | return; | ||
2672 | } | ||
2673 | |||
2674 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | ||
2675 | { | ||
2676 | /* | ||
2677 | * looks as though older kernels can get into trouble with | ||
2678 | * this code, they end up stuck in balance_dirty_pages forever | ||
2679 | */ | ||
2680 | u64 num_dirty; | ||
2681 | unsigned long thresh = 32 * 1024 * 1024; | ||
2682 | |||
2683 | if (current->flags & PF_MEMALLOC) | ||
2684 | return; | ||
2685 | |||
2545 | num_dirty = root->fs_info->dirty_metadata_bytes; | 2686 | num_dirty = root->fs_info->dirty_metadata_bytes; |
2546 | 2687 | ||
2547 | if (num_dirty > thresh) { | 2688 | if (num_dirty > thresh) { |
@@ -2574,7 +2715,7 @@ int btree_lock_page_hook(struct page *page) | |||
2574 | goto out; | 2715 | goto out; |
2575 | 2716 | ||
2576 | len = page->private >> 2; | 2717 | len = page->private >> 2; |
2577 | eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); | 2718 | eb = find_extent_buffer(io_tree, bytenr, len); |
2578 | if (!eb) | 2719 | if (!eb) |
2579 | goto out; | 2720 | goto out; |
2580 | 2721 | ||
@@ -2597,6 +2738,355 @@ out: | |||
2597 | return 0; | 2738 | return 0; |
2598 | } | 2739 | } |
2599 | 2740 | ||
2741 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
2742 | int read_only) | ||
2743 | { | ||
2744 | if (read_only) | ||
2745 | return; | ||
2746 | |||
2747 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
2748 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
2749 | "running btrfsck is recommended\n"); | ||
2750 | } | ||
2751 | |||
2752 | int btrfs_error_commit_super(struct btrfs_root *root) | ||
2753 | { | ||
2754 | int ret; | ||
2755 | |||
2756 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
2757 | btrfs_run_delayed_iputs(root); | ||
2758 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
2759 | |||
2760 | down_write(&root->fs_info->cleanup_work_sem); | ||
2761 | up_write(&root->fs_info->cleanup_work_sem); | ||
2762 | |||
2763 | /* cleanup FS via transaction */ | ||
2764 | btrfs_cleanup_transaction(root); | ||
2765 | |||
2766 | ret = write_ctree_super(NULL, root, 0); | ||
2767 | |||
2768 | return ret; | ||
2769 | } | ||
2770 | |||
2771 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | ||
2772 | { | ||
2773 | struct btrfs_inode *btrfs_inode; | ||
2774 | struct list_head splice; | ||
2775 | |||
2776 | INIT_LIST_HEAD(&splice); | ||
2777 | |||
2778 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
2779 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2780 | |||
2781 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
2782 | while (!list_empty(&splice)) { | ||
2783 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2784 | ordered_operations); | ||
2785 | |||
2786 | list_del_init(&btrfs_inode->ordered_operations); | ||
2787 | |||
2788 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2789 | } | ||
2790 | |||
2791 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2792 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
2793 | |||
2794 | return 0; | ||
2795 | } | ||
2796 | |||
2797 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | ||
2798 | { | ||
2799 | struct list_head splice; | ||
2800 | struct btrfs_ordered_extent *ordered; | ||
2801 | struct inode *inode; | ||
2802 | |||
2803 | INIT_LIST_HEAD(&splice); | ||
2804 | |||
2805 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2806 | |||
2807 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
2808 | while (!list_empty(&splice)) { | ||
2809 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | ||
2810 | root_extent_list); | ||
2811 | |||
2812 | list_del_init(&ordered->root_extent_list); | ||
2813 | atomic_inc(&ordered->refs); | ||
2814 | |||
2815 | /* the inode may be getting freed (in sys_unlink path). */ | ||
2816 | inode = igrab(ordered->inode); | ||
2817 | |||
2818 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2819 | if (inode) | ||
2820 | iput(inode); | ||
2821 | |||
2822 | atomic_set(&ordered->refs, 1); | ||
2823 | btrfs_put_ordered_extent(ordered); | ||
2824 | |||
2825 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2826 | } | ||
2827 | |||
2828 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2829 | |||
2830 | return 0; | ||
2831 | } | ||
2832 | |||
2833 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
2834 | struct btrfs_root *root) | ||
2835 | { | ||
2836 | struct rb_node *node; | ||
2837 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2838 | struct btrfs_delayed_ref_node *ref; | ||
2839 | int ret = 0; | ||
2840 | |||
2841 | delayed_refs = &trans->delayed_refs; | ||
2842 | |||
2843 | spin_lock(&delayed_refs->lock); | ||
2844 | if (delayed_refs->num_entries == 0) { | ||
2845 | spin_unlock(&delayed_refs->lock); | ||
2846 | printk(KERN_INFO "delayed_refs has NO entry\n"); | ||
2847 | return ret; | ||
2848 | } | ||
2849 | |||
2850 | node = rb_first(&delayed_refs->root); | ||
2851 | while (node) { | ||
2852 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2853 | node = rb_next(node); | ||
2854 | |||
2855 | ref->in_tree = 0; | ||
2856 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
2857 | delayed_refs->num_entries--; | ||
2858 | |||
2859 | atomic_set(&ref->refs, 1); | ||
2860 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2861 | struct btrfs_delayed_ref_head *head; | ||
2862 | |||
2863 | head = btrfs_delayed_node_to_head(ref); | ||
2864 | mutex_lock(&head->mutex); | ||
2865 | kfree(head->extent_op); | ||
2866 | delayed_refs->num_heads--; | ||
2867 | if (list_empty(&head->cluster)) | ||
2868 | delayed_refs->num_heads_ready--; | ||
2869 | list_del_init(&head->cluster); | ||
2870 | mutex_unlock(&head->mutex); | ||
2871 | } | ||
2872 | |||
2873 | spin_unlock(&delayed_refs->lock); | ||
2874 | btrfs_put_delayed_ref(ref); | ||
2875 | |||
2876 | cond_resched(); | ||
2877 | spin_lock(&delayed_refs->lock); | ||
2878 | } | ||
2879 | |||
2880 | spin_unlock(&delayed_refs->lock); | ||
2881 | |||
2882 | return ret; | ||
2883 | } | ||
2884 | |||
2885 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | ||
2886 | { | ||
2887 | struct btrfs_pending_snapshot *snapshot; | ||
2888 | struct list_head splice; | ||
2889 | |||
2890 | INIT_LIST_HEAD(&splice); | ||
2891 | |||
2892 | list_splice_init(&t->pending_snapshots, &splice); | ||
2893 | |||
2894 | while (!list_empty(&splice)) { | ||
2895 | snapshot = list_entry(splice.next, | ||
2896 | struct btrfs_pending_snapshot, | ||
2897 | list); | ||
2898 | |||
2899 | list_del_init(&snapshot->list); | ||
2900 | |||
2901 | kfree(snapshot); | ||
2902 | } | ||
2903 | |||
2904 | return 0; | ||
2905 | } | ||
2906 | |||
2907 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | ||
2908 | { | ||
2909 | struct btrfs_inode *btrfs_inode; | ||
2910 | struct list_head splice; | ||
2911 | |||
2912 | INIT_LIST_HEAD(&splice); | ||
2913 | |||
2914 | spin_lock(&root->fs_info->delalloc_lock); | ||
2915 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2916 | |||
2917 | while (!list_empty(&splice)) { | ||
2918 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2919 | delalloc_inodes); | ||
2920 | |||
2921 | list_del_init(&btrfs_inode->delalloc_inodes); | ||
2922 | |||
2923 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2924 | } | ||
2925 | |||
2926 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2927 | |||
2928 | return 0; | ||
2929 | } | ||
2930 | |||
2931 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
2932 | struct extent_io_tree *dirty_pages, | ||
2933 | int mark) | ||
2934 | { | ||
2935 | int ret; | ||
2936 | struct page *page; | ||
2937 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
2938 | struct extent_buffer *eb; | ||
2939 | u64 start = 0; | ||
2940 | u64 end; | ||
2941 | u64 offset; | ||
2942 | unsigned long index; | ||
2943 | |||
2944 | while (1) { | ||
2945 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
2946 | mark); | ||
2947 | if (ret) | ||
2948 | break; | ||
2949 | |||
2950 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | ||
2951 | while (start <= end) { | ||
2952 | index = start >> PAGE_CACHE_SHIFT; | ||
2953 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
2954 | page = find_get_page(btree_inode->i_mapping, index); | ||
2955 | if (!page) | ||
2956 | continue; | ||
2957 | offset = page_offset(page); | ||
2958 | |||
2959 | spin_lock(&dirty_pages->buffer_lock); | ||
2960 | eb = radix_tree_lookup( | ||
2961 | &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, | ||
2962 | offset >> PAGE_CACHE_SHIFT); | ||
2963 | spin_unlock(&dirty_pages->buffer_lock); | ||
2964 | if (eb) { | ||
2965 | ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, | ||
2966 | &eb->bflags); | ||
2967 | atomic_set(&eb->refs, 1); | ||
2968 | } | ||
2969 | if (PageWriteback(page)) | ||
2970 | end_page_writeback(page); | ||
2971 | |||
2972 | lock_page(page); | ||
2973 | if (PageDirty(page)) { | ||
2974 | clear_page_dirty_for_io(page); | ||
2975 | spin_lock_irq(&page->mapping->tree_lock); | ||
2976 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
2977 | page_index(page), | ||
2978 | PAGECACHE_TAG_DIRTY); | ||
2979 | spin_unlock_irq(&page->mapping->tree_lock); | ||
2980 | } | ||
2981 | |||
2982 | page->mapping->a_ops->invalidatepage(page, 0); | ||
2983 | unlock_page(page); | ||
2984 | } | ||
2985 | } | ||
2986 | |||
2987 | return ret; | ||
2988 | } | ||
2989 | |||
2990 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
2991 | struct extent_io_tree *pinned_extents) | ||
2992 | { | ||
2993 | struct extent_io_tree *unpin; | ||
2994 | u64 start; | ||
2995 | u64 end; | ||
2996 | int ret; | ||
2997 | |||
2998 | unpin = pinned_extents; | ||
2999 | while (1) { | ||
3000 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
3001 | EXTENT_DIRTY); | ||
3002 | if (ret) | ||
3003 | break; | ||
3004 | |||
3005 | /* opt_discard */ | ||
3006 | if (btrfs_test_opt(root, DISCARD)) | ||
3007 | ret = btrfs_error_discard_extent(root, start, | ||
3008 | end + 1 - start, | ||
3009 | NULL); | ||
3010 | |||
3011 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
3012 | btrfs_error_unpin_extent_range(root, start, end); | ||
3013 | cond_resched(); | ||
3014 | } | ||
3015 | |||
3016 | return 0; | ||
3017 | } | ||
3018 | |||
3019 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
3020 | { | ||
3021 | struct btrfs_transaction *t; | ||
3022 | LIST_HEAD(list); | ||
3023 | |||
3024 | WARN_ON(1); | ||
3025 | |||
3026 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
3027 | |||
3028 | spin_lock(&root->fs_info->trans_lock); | ||
3029 | list_splice_init(&root->fs_info->trans_list, &list); | ||
3030 | root->fs_info->trans_no_join = 1; | ||
3031 | spin_unlock(&root->fs_info->trans_lock); | ||
3032 | |||
3033 | while (!list_empty(&list)) { | ||
3034 | t = list_entry(list.next, struct btrfs_transaction, list); | ||
3035 | if (!t) | ||
3036 | break; | ||
3037 | |||
3038 | btrfs_destroy_ordered_operations(root); | ||
3039 | |||
3040 | btrfs_destroy_ordered_extents(root); | ||
3041 | |||
3042 | btrfs_destroy_delayed_refs(t, root); | ||
3043 | |||
3044 | btrfs_block_rsv_release(root, | ||
3045 | &root->fs_info->trans_block_rsv, | ||
3046 | t->dirty_pages.dirty_bytes); | ||
3047 | |||
3048 | /* FIXME: cleanup wait for commit */ | ||
3049 | t->in_commit = 1; | ||
3050 | t->blocked = 1; | ||
3051 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
3052 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
3053 | |||
3054 | t->blocked = 0; | ||
3055 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
3056 | wake_up(&root->fs_info->transaction_wait); | ||
3057 | |||
3058 | t->commit_done = 1; | ||
3059 | if (waitqueue_active(&t->commit_wait)) | ||
3060 | wake_up(&t->commit_wait); | ||
3061 | |||
3062 | btrfs_destroy_pending_snapshots(t); | ||
3063 | |||
3064 | btrfs_destroy_delalloc_inodes(root); | ||
3065 | |||
3066 | spin_lock(&root->fs_info->trans_lock); | ||
3067 | root->fs_info->running_transaction = NULL; | ||
3068 | spin_unlock(&root->fs_info->trans_lock); | ||
3069 | |||
3070 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | ||
3071 | EXTENT_DIRTY); | ||
3072 | |||
3073 | btrfs_destroy_pinned_extent(root, | ||
3074 | root->fs_info->pinned_extents); | ||
3075 | |||
3076 | atomic_set(&t->use_count, 0); | ||
3077 | list_del_init(&t->list); | ||
3078 | memset(t, 0, sizeof(*t)); | ||
3079 | kmem_cache_free(btrfs_transaction_cachep, t); | ||
3080 | } | ||
3081 | |||
3082 | spin_lock(&root->fs_info->trans_lock); | ||
3083 | root->fs_info->trans_no_join = 0; | ||
3084 | spin_unlock(&root->fs_info->trans_lock); | ||
3085 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
3086 | |||
3087 | return 0; | ||
3088 | } | ||
3089 | |||
2600 | static struct extent_io_ops btree_extent_io_ops = { | 3090 | static struct extent_io_ops btree_extent_io_ops = { |
2601 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3091 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
2602 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3092 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 88e825a0bf21..a0b610a67aae 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -52,37 +52,23 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
52 | struct btrfs_root *root, int max_mirrors); | 52 | struct btrfs_root *root, int max_mirrors); |
53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
54 | int btrfs_commit_super(struct btrfs_root *root); | 54 | int btrfs_commit_super(struct btrfs_root *root); |
55 | int btrfs_error_commit_super(struct btrfs_root *root); | ||
55 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 56 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
56 | u64 bytenr, u32 blocksize); | 57 | u64 bytenr, u32 blocksize); |
57 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
58 | u64 root_objectid); | ||
59 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | ||
60 | struct btrfs_key *location, | ||
61 | const char *name, int namelen); | ||
62 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | 58 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, |
63 | struct btrfs_key *location); | 59 | struct btrfs_key *location); |
64 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 60 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
65 | struct btrfs_key *location); | 61 | struct btrfs_key *location); |
66 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); | 62 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); |
67 | int btrfs_insert_dev_radix(struct btrfs_root *root, | ||
68 | struct block_device *bdev, | ||
69 | u64 device_id, | ||
70 | u64 block_start, | ||
71 | u64 num_blocks); | ||
72 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 63 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); |
64 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | ||
73 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 65 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); |
74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 66 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
75 | void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf); | ||
76 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); | 67 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); |
77 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); | 68 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); |
78 | int wait_on_tree_block_writeback(struct btrfs_root *root, | ||
79 | struct extent_buffer *buf); | ||
80 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); | 69 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); |
81 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); | 70 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); |
82 | void btrfs_csum_final(u32 crc, char *result); | 71 | void btrfs_csum_final(u32 crc, char *result); |
83 | int btrfs_open_device(struct btrfs_device *dev); | ||
84 | int btrfs_verify_block_csum(struct btrfs_root *root, | ||
85 | struct extent_buffer *buf); | ||
86 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 72 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
87 | int metadata); | 73 | int metadata); |
88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 74 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
@@ -90,8 +76,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
90 | unsigned long bio_flags, u64 bio_offset, | 76 | unsigned long bio_flags, u64 bio_offset, |
91 | extent_submit_bio_hook_t *submit_bio_start, | 77 | extent_submit_bio_hook_t *submit_bio_start, |
92 | extent_submit_bio_hook_t *submit_bio_done); | 78 | extent_submit_bio_hook_t *submit_bio_done); |
93 | |||
94 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | ||
95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 79 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
96 | int btrfs_write_tree_block(struct extent_buffer *buf); | 80 | int btrfs_write_tree_block(struct extent_buffer *buf); |
97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | 81 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 951ef09b82f4..1b8dc33778f9 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -21,14 +21,18 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
21 | int len = *max_len; | 21 | int len = *max_len; |
22 | int type; | 22 | int type; |
23 | 23 | ||
24 | if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || | 24 | if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { |
25 | (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) | 25 | *max_len = BTRFS_FID_SIZE_CONNECTABLE; |
26 | return 255; | 26 | return 255; |
27 | } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { | ||
28 | *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE; | ||
29 | return 255; | ||
30 | } | ||
27 | 31 | ||
28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; | 32 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; |
29 | type = FILEID_BTRFS_WITHOUT_PARENT; | 33 | type = FILEID_BTRFS_WITHOUT_PARENT; |
30 | 34 | ||
31 | fid->objectid = inode->i_ino; | 35 | fid->objectid = btrfs_ino(inode); |
32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; | 36 | fid->root_objectid = BTRFS_I(inode)->root->objectid; |
33 | fid->gen = inode->i_generation; | 37 | fid->gen = inode->i_generation; |
34 | 38 | ||
@@ -65,7 +69,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
65 | { | 69 | { |
66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; | 70 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; |
67 | struct btrfs_root *root; | 71 | struct btrfs_root *root; |
68 | struct dentry *dentry; | ||
69 | struct inode *inode; | 72 | struct inode *inode; |
70 | struct btrfs_key key; | 73 | struct btrfs_key key; |
71 | int index; | 74 | int index; |
@@ -108,10 +111,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
108 | return ERR_PTR(-ESTALE); | 111 | return ERR_PTR(-ESTALE); |
109 | } | 112 | } |
110 | 113 | ||
111 | dentry = d_obtain_alias(inode); | 114 | return d_obtain_alias(inode); |
112 | if (!IS_ERR(dentry)) | ||
113 | dentry->d_op = &btrfs_dentry_operations; | ||
114 | return dentry; | ||
115 | fail: | 115 | fail: |
116 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 116 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
117 | return ERR_PTR(err); | 117 | return ERR_PTR(err); |
@@ -166,7 +166,6 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
166 | static struct dentry *btrfs_get_parent(struct dentry *child) | 166 | static struct dentry *btrfs_get_parent(struct dentry *child) |
167 | { | 167 | { |
168 | struct inode *dir = child->d_inode; | 168 | struct inode *dir = child->d_inode; |
169 | static struct dentry *dentry; | ||
170 | struct btrfs_root *root = BTRFS_I(dir)->root; | 169 | struct btrfs_root *root = BTRFS_I(dir)->root; |
171 | struct btrfs_path *path; | 170 | struct btrfs_path *path; |
172 | struct extent_buffer *leaf; | 171 | struct extent_buffer *leaf; |
@@ -176,14 +175,16 @@ static struct dentry *btrfs_get_parent(struct dentry *child) | |||
176 | int ret; | 175 | int ret; |
177 | 176 | ||
178 | path = btrfs_alloc_path(); | 177 | path = btrfs_alloc_path(); |
178 | if (!path) | ||
179 | return ERR_PTR(-ENOMEM); | ||
179 | 180 | ||
180 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | 181 | if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) { |
181 | key.objectid = root->root_key.objectid; | 182 | key.objectid = root->root_key.objectid; |
182 | key.type = BTRFS_ROOT_BACKREF_KEY; | 183 | key.type = BTRFS_ROOT_BACKREF_KEY; |
183 | key.offset = (u64)-1; | 184 | key.offset = (u64)-1; |
184 | root = root->fs_info->tree_root; | 185 | root = root->fs_info->tree_root; |
185 | } else { | 186 | } else { |
186 | key.objectid = dir->i_ino; | 187 | key.objectid = btrfs_ino(dir); |
187 | key.type = BTRFS_INODE_REF_KEY; | 188 | key.type = BTRFS_INODE_REF_KEY; |
188 | key.offset = (u64)-1; | 189 | key.offset = (u64)-1; |
189 | } | 190 | } |
@@ -223,18 +224,94 @@ static struct dentry *btrfs_get_parent(struct dentry *child) | |||
223 | 224 | ||
224 | key.type = BTRFS_INODE_ITEM_KEY; | 225 | key.type = BTRFS_INODE_ITEM_KEY; |
225 | key.offset = 0; | 226 | key.offset = 0; |
226 | dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); | 227 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); |
227 | if (!IS_ERR(dentry)) | ||
228 | dentry->d_op = &btrfs_dentry_operations; | ||
229 | return dentry; | ||
230 | fail: | 228 | fail: |
231 | btrfs_free_path(path); | 229 | btrfs_free_path(path); |
232 | return ERR_PTR(ret); | 230 | return ERR_PTR(ret); |
233 | } | 231 | } |
234 | 232 | ||
233 | static int btrfs_get_name(struct dentry *parent, char *name, | ||
234 | struct dentry *child) | ||
235 | { | ||
236 | struct inode *inode = child->d_inode; | ||
237 | struct inode *dir = parent->d_inode; | ||
238 | struct btrfs_path *path; | ||
239 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
240 | struct btrfs_inode_ref *iref; | ||
241 | struct btrfs_root_ref *rref; | ||
242 | struct extent_buffer *leaf; | ||
243 | unsigned long name_ptr; | ||
244 | struct btrfs_key key; | ||
245 | int name_len; | ||
246 | int ret; | ||
247 | u64 ino; | ||
248 | |||
249 | if (!dir || !inode) | ||
250 | return -EINVAL; | ||
251 | |||
252 | if (!S_ISDIR(dir->i_mode)) | ||
253 | return -EINVAL; | ||
254 | |||
255 | ino = btrfs_ino(inode); | ||
256 | |||
257 | path = btrfs_alloc_path(); | ||
258 | if (!path) | ||
259 | return -ENOMEM; | ||
260 | path->leave_spinning = 1; | ||
261 | |||
262 | if (ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
263 | key.objectid = BTRFS_I(inode)->root->root_key.objectid; | ||
264 | key.type = BTRFS_ROOT_BACKREF_KEY; | ||
265 | key.offset = (u64)-1; | ||
266 | root = root->fs_info->tree_root; | ||
267 | } else { | ||
268 | key.objectid = ino; | ||
269 | key.offset = btrfs_ino(dir); | ||
270 | key.type = BTRFS_INODE_REF_KEY; | ||
271 | } | ||
272 | |||
273 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
274 | if (ret < 0) { | ||
275 | btrfs_free_path(path); | ||
276 | return ret; | ||
277 | } else if (ret > 0) { | ||
278 | if (ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
279 | path->slots[0]--; | ||
280 | } else { | ||
281 | btrfs_free_path(path); | ||
282 | return -ENOENT; | ||
283 | } | ||
284 | } | ||
285 | leaf = path->nodes[0]; | ||
286 | |||
287 | if (ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
288 | rref = btrfs_item_ptr(leaf, path->slots[0], | ||
289 | struct btrfs_root_ref); | ||
290 | name_ptr = (unsigned long)(rref + 1); | ||
291 | name_len = btrfs_root_ref_name_len(leaf, rref); | ||
292 | } else { | ||
293 | iref = btrfs_item_ptr(leaf, path->slots[0], | ||
294 | struct btrfs_inode_ref); | ||
295 | name_ptr = (unsigned long)(iref + 1); | ||
296 | name_len = btrfs_inode_ref_name_len(leaf, iref); | ||
297 | } | ||
298 | |||
299 | read_extent_buffer(leaf, name, name_ptr, name_len); | ||
300 | btrfs_free_path(path); | ||
301 | |||
302 | /* | ||
303 | * have to add the null termination to make sure that reconnect_path | ||
304 | * gets the right len for strlen | ||
305 | */ | ||
306 | name[name_len] = '\0'; | ||
307 | |||
308 | return 0; | ||
309 | } | ||
310 | |||
235 | const struct export_operations btrfs_export_ops = { | 311 | const struct export_operations btrfs_export_ops = { |
236 | .encode_fh = btrfs_encode_fh, | 312 | .encode_fh = btrfs_encode_fh, |
237 | .fh_to_dentry = btrfs_fh_to_dentry, | 313 | .fh_to_dentry = btrfs_fh_to_dentry, |
238 | .fh_to_parent = btrfs_fh_to_parent, | 314 | .fh_to_parent = btrfs_fh_to_parent, |
239 | .get_parent = btrfs_get_parent, | 315 | .get_parent = btrfs_get_parent, |
316 | .get_name = btrfs_get_name, | ||
240 | }; | 317 | }; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32d094002a57..71cd456fdb60 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -33,11 +33,28 @@ | |||
33 | #include "locking.h" | 33 | #include "locking.h" |
34 | #include "free-space-cache.h" | 34 | #include "free-space-cache.h" |
35 | 35 | ||
36 | /* control flags for do_chunk_alloc's force field | ||
37 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | ||
38 | * if we really need one. | ||
39 | * | ||
40 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
41 | * | ||
42 | * CHUNK_ALLOC_LIMITED means to only try and allocate one | ||
43 | * if we have very few chunks already allocated. This is | ||
44 | * used as part of the clustering code to help make sure | ||
45 | * we have a good pool of storage to cluster in, without | ||
46 | * filling the FS with empty chunks | ||
47 | * | ||
48 | */ | ||
49 | enum { | ||
50 | CHUNK_ALLOC_NO_FORCE = 0, | ||
51 | CHUNK_ALLOC_FORCE = 1, | ||
52 | CHUNK_ALLOC_LIMITED = 2, | ||
53 | }; | ||
54 | |||
36 | static int update_block_group(struct btrfs_trans_handle *trans, | 55 | static int update_block_group(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 56 | struct btrfs_root *root, |
38 | u64 bytenr, u64 num_bytes, int alloc); | 57 | u64 bytenr, u64 num_bytes, int alloc); |
39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve, int sinfo); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 58 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 59 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 60 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -77,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | |||
77 | return (cache->flags & bits) == bits; | 94 | return (cache->flags & bits) == bits; |
78 | } | 95 | } |
79 | 96 | ||
80 | void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | 97 | static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) |
81 | { | 98 | { |
82 | atomic_inc(&cache->count); | 99 | atomic_inc(&cache->count); |
83 | } | 100 | } |
@@ -88,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | |||
88 | WARN_ON(cache->pinned > 0); | 105 | WARN_ON(cache->pinned > 0); |
89 | WARN_ON(cache->reserved > 0); | 106 | WARN_ON(cache->reserved > 0); |
90 | WARN_ON(cache->reserved_pinned > 0); | 107 | WARN_ON(cache->reserved_pinned > 0); |
108 | kfree(cache->free_space_ctl); | ||
91 | kfree(cache); | 109 | kfree(cache); |
92 | } | 110 | } |
93 | } | 111 | } |
@@ -242,6 +260,12 @@ get_caching_control(struct btrfs_block_group_cache *cache) | |||
242 | return NULL; | 260 | return NULL; |
243 | } | 261 | } |
244 | 262 | ||
263 | /* We're loading it the fast way, so we don't have a caching_ctl. */ | ||
264 | if (!cache->caching_ctl) { | ||
265 | spin_unlock(&cache->lock); | ||
266 | return NULL; | ||
267 | } | ||
268 | |||
245 | ctl = cache->caching_ctl; | 269 | ctl = cache->caching_ctl; |
246 | atomic_inc(&ctl->count); | 270 | atomic_inc(&ctl->count); |
247 | spin_unlock(&cache->lock); | 271 | spin_unlock(&cache->lock); |
@@ -314,11 +338,6 @@ static int caching_kthread(void *data) | |||
314 | if (!path) | 338 | if (!path) |
315 | return -ENOMEM; | 339 | return -ENOMEM; |
316 | 340 | ||
317 | exclude_super_stripes(extent_root, block_group); | ||
318 | spin_lock(&block_group->space_info->lock); | ||
319 | block_group->space_info->bytes_readonly += block_group->bytes_super; | ||
320 | spin_unlock(&block_group->space_info->lock); | ||
321 | |||
322 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 341 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
323 | 342 | ||
324 | /* | 343 | /* |
@@ -329,7 +348,7 @@ static int caching_kthread(void *data) | |||
329 | */ | 348 | */ |
330 | path->skip_locking = 1; | 349 | path->skip_locking = 1; |
331 | path->search_commit_root = 1; | 350 | path->search_commit_root = 1; |
332 | path->reada = 2; | 351 | path->reada = 1; |
333 | 352 | ||
334 | key.objectid = last; | 353 | key.objectid = last; |
335 | key.offset = 0; | 354 | key.offset = 0; |
@@ -347,8 +366,7 @@ again: | |||
347 | nritems = btrfs_header_nritems(leaf); | 366 | nritems = btrfs_header_nritems(leaf); |
348 | 367 | ||
349 | while (1) { | 368 | while (1) { |
350 | smp_mb(); | 369 | if (btrfs_fs_closing(fs_info) > 1) { |
351 | if (fs_info->closing > 1) { | ||
352 | last = (u64)-1; | 370 | last = (u64)-1; |
353 | break; | 371 | break; |
354 | } | 372 | } |
@@ -360,15 +378,18 @@ again: | |||
360 | if (ret) | 378 | if (ret) |
361 | break; | 379 | break; |
362 | 380 | ||
363 | caching_ctl->progress = last; | 381 | if (need_resched() || |
364 | btrfs_release_path(extent_root, path); | 382 | btrfs_next_leaf(extent_root, path)) { |
365 | up_read(&fs_info->extent_commit_sem); | 383 | caching_ctl->progress = last; |
366 | mutex_unlock(&caching_ctl->mutex); | 384 | btrfs_release_path(path); |
367 | if (btrfs_transaction_in_commit(fs_info)) | 385 | up_read(&fs_info->extent_commit_sem); |
368 | schedule_timeout(1); | 386 | mutex_unlock(&caching_ctl->mutex); |
369 | else | ||
370 | cond_resched(); | 387 | cond_resched(); |
371 | goto again; | 388 | goto again; |
389 | } | ||
390 | leaf = path->nodes[0]; | ||
391 | nritems = btrfs_header_nritems(leaf); | ||
392 | continue; | ||
372 | } | 393 | } |
373 | 394 | ||
374 | if (key.objectid < block_group->key.objectid) { | 395 | if (key.objectid < block_group->key.objectid) { |
@@ -421,7 +442,10 @@ err: | |||
421 | return 0; | 442 | return 0; |
422 | } | 443 | } |
423 | 444 | ||
424 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 445 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
446 | struct btrfs_trans_handle *trans, | ||
447 | struct btrfs_root *root, | ||
448 | int load_cache_only) | ||
425 | { | 449 | { |
426 | struct btrfs_fs_info *fs_info = cache->fs_info; | 450 | struct btrfs_fs_info *fs_info = cache->fs_info; |
427 | struct btrfs_caching_control *caching_ctl; | 451 | struct btrfs_caching_control *caching_ctl; |
@@ -432,7 +456,42 @@ static int cache_block_group(struct btrfs_block_group_cache *cache) | |||
432 | if (cache->cached != BTRFS_CACHE_NO) | 456 | if (cache->cached != BTRFS_CACHE_NO) |
433 | return 0; | 457 | return 0; |
434 | 458 | ||
435 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | 459 | /* |
460 | * We can't do the read from on-disk cache during a commit since we need | ||
461 | * to have the normal tree locking. Also if we are currently trying to | ||
462 | * allocate blocks for the tree root we can't do the fast caching since | ||
463 | * we likely hold important locks. | ||
464 | */ | ||
465 | if (trans && (!trans->transaction->in_commit) && | ||
466 | (root && root != root->fs_info->tree_root)) { | ||
467 | spin_lock(&cache->lock); | ||
468 | if (cache->cached != BTRFS_CACHE_NO) { | ||
469 | spin_unlock(&cache->lock); | ||
470 | return 0; | ||
471 | } | ||
472 | cache->cached = BTRFS_CACHE_STARTED; | ||
473 | spin_unlock(&cache->lock); | ||
474 | |||
475 | ret = load_free_space_cache(fs_info, cache); | ||
476 | |||
477 | spin_lock(&cache->lock); | ||
478 | if (ret == 1) { | ||
479 | cache->cached = BTRFS_CACHE_FINISHED; | ||
480 | cache->last_byte_to_unpin = (u64)-1; | ||
481 | } else { | ||
482 | cache->cached = BTRFS_CACHE_NO; | ||
483 | } | ||
484 | spin_unlock(&cache->lock); | ||
485 | if (ret == 1) { | ||
486 | free_excluded_extents(fs_info->extent_root, cache); | ||
487 | return 0; | ||
488 | } | ||
489 | } | ||
490 | |||
491 | if (load_cache_only) | ||
492 | return 0; | ||
493 | |||
494 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); | ||
436 | BUG_ON(!caching_ctl); | 495 | BUG_ON(!caching_ctl); |
437 | 496 | ||
438 | INIT_LIST_HEAD(&caching_ctl->list); | 497 | INIT_LIST_HEAD(&caching_ctl->list); |
@@ -509,7 +568,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
509 | 568 | ||
510 | rcu_read_lock(); | 569 | rcu_read_lock(); |
511 | list_for_each_entry_rcu(found, head, list) { | 570 | list_for_each_entry_rcu(found, head, list) { |
512 | if (found->flags == flags) { | 571 | if (found->flags & flags) { |
513 | rcu_read_unlock(); | 572 | rcu_read_unlock(); |
514 | return found; | 573 | return found; |
515 | } | 574 | } |
@@ -542,6 +601,15 @@ static u64 div_factor(u64 num, int factor) | |||
542 | return num; | 601 | return num; |
543 | } | 602 | } |
544 | 603 | ||
604 | static u64 div_factor_fine(u64 num, int factor) | ||
605 | { | ||
606 | if (factor == 100) | ||
607 | return num; | ||
608 | num *= factor; | ||
609 | do_div(num, 100); | ||
610 | return num; | ||
611 | } | ||
612 | |||
545 | u64 btrfs_find_block_group(struct btrfs_root *root, | 613 | u64 btrfs_find_block_group(struct btrfs_root *root, |
546 | u64 search_start, u64 search_hint, int owner) | 614 | u64 search_start, u64 search_hint, int owner) |
547 | { | 615 | { |
@@ -689,8 +757,12 @@ again: | |||
689 | atomic_inc(&head->node.refs); | 757 | atomic_inc(&head->node.refs); |
690 | spin_unlock(&delayed_refs->lock); | 758 | spin_unlock(&delayed_refs->lock); |
691 | 759 | ||
692 | btrfs_release_path(root->fs_info->extent_root, path); | 760 | btrfs_release_path(path); |
693 | 761 | ||
762 | /* | ||
763 | * Mutex was contended, block until it's released and try | ||
764 | * again | ||
765 | */ | ||
694 | mutex_lock(&head->mutex); | 766 | mutex_lock(&head->mutex); |
695 | mutex_unlock(&head->mutex); | 767 | mutex_unlock(&head->mutex); |
696 | btrfs_put_delayed_ref(&head->node); | 768 | btrfs_put_delayed_ref(&head->node); |
@@ -869,7 +941,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, | |||
869 | break; | 941 | break; |
870 | } | 942 | } |
871 | } | 943 | } |
872 | btrfs_release_path(root, path); | 944 | btrfs_release_path(path); |
873 | 945 | ||
874 | if (owner < BTRFS_FIRST_FREE_OBJECTID) | 946 | if (owner < BTRFS_FIRST_FREE_OBJECTID) |
875 | new_size += sizeof(*bi); | 947 | new_size += sizeof(*bi); |
@@ -882,7 +954,6 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, | |||
882 | BUG_ON(ret); | 954 | BUG_ON(ret); |
883 | 955 | ||
884 | ret = btrfs_extend_item(trans, root, path, new_size); | 956 | ret = btrfs_extend_item(trans, root, path, new_size); |
885 | BUG_ON(ret); | ||
886 | 957 | ||
887 | leaf = path->nodes[0]; | 958 | leaf = path->nodes[0]; |
888 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | 959 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
@@ -977,7 +1048,7 @@ again: | |||
977 | return 0; | 1048 | return 0; |
978 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 1049 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
979 | key.type = BTRFS_EXTENT_REF_V0_KEY; | 1050 | key.type = BTRFS_EXTENT_REF_V0_KEY; |
980 | btrfs_release_path(root, path); | 1051 | btrfs_release_path(path); |
981 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1052 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
982 | if (ret < 0) { | 1053 | if (ret < 0) { |
983 | err = ret; | 1054 | err = ret; |
@@ -1015,7 +1086,7 @@ again: | |||
1015 | if (match_extent_data_ref(leaf, ref, root_objectid, | 1086 | if (match_extent_data_ref(leaf, ref, root_objectid, |
1016 | owner, offset)) { | 1087 | owner, offset)) { |
1017 | if (recow) { | 1088 | if (recow) { |
1018 | btrfs_release_path(root, path); | 1089 | btrfs_release_path(path); |
1019 | goto again; | 1090 | goto again; |
1020 | } | 1091 | } |
1021 | err = 0; | 1092 | err = 0; |
@@ -1076,7 +1147,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, | |||
1076 | if (match_extent_data_ref(leaf, ref, root_objectid, | 1147 | if (match_extent_data_ref(leaf, ref, root_objectid, |
1077 | owner, offset)) | 1148 | owner, offset)) |
1078 | break; | 1149 | break; |
1079 | btrfs_release_path(root, path); | 1150 | btrfs_release_path(path); |
1080 | key.offset++; | 1151 | key.offset++; |
1081 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 1152 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
1082 | size); | 1153 | size); |
@@ -1102,7 +1173,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, | |||
1102 | btrfs_mark_buffer_dirty(leaf); | 1173 | btrfs_mark_buffer_dirty(leaf); |
1103 | ret = 0; | 1174 | ret = 0; |
1104 | fail: | 1175 | fail: |
1105 | btrfs_release_path(root, path); | 1176 | btrfs_release_path(path); |
1106 | return ret; | 1177 | return ret; |
1107 | } | 1178 | } |
1108 | 1179 | ||
@@ -1228,7 +1299,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, | |||
1228 | ret = -ENOENT; | 1299 | ret = -ENOENT; |
1229 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 1300 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
1230 | if (ret == -ENOENT && parent) { | 1301 | if (ret == -ENOENT && parent) { |
1231 | btrfs_release_path(root, path); | 1302 | btrfs_release_path(path); |
1232 | key.type = BTRFS_EXTENT_REF_V0_KEY; | 1303 | key.type = BTRFS_EXTENT_REF_V0_KEY; |
1233 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1304 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
1234 | if (ret > 0) | 1305 | if (ret > 0) |
@@ -1257,7 +1328,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, | |||
1257 | } | 1328 | } |
1258 | 1329 | ||
1259 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); | 1330 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); |
1260 | btrfs_release_path(root, path); | 1331 | btrfs_release_path(path); |
1261 | return ret; | 1332 | return ret; |
1262 | } | 1333 | } |
1263 | 1334 | ||
@@ -1490,7 +1561,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans, | |||
1490 | size = btrfs_extent_inline_ref_size(type); | 1561 | size = btrfs_extent_inline_ref_size(type); |
1491 | 1562 | ||
1492 | ret = btrfs_extend_item(trans, root, path, size); | 1563 | ret = btrfs_extend_item(trans, root, path, size); |
1493 | BUG_ON(ret); | ||
1494 | 1564 | ||
1495 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | 1565 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
1496 | refs = btrfs_extent_refs(leaf, ei); | 1566 | refs = btrfs_extent_refs(leaf, ei); |
@@ -1543,7 +1613,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, | |||
1543 | if (ret != -ENOENT) | 1613 | if (ret != -ENOENT) |
1544 | return ret; | 1614 | return ret; |
1545 | 1615 | ||
1546 | btrfs_release_path(root, path); | 1616 | btrfs_release_path(path); |
1547 | *ref_ret = NULL; | 1617 | *ref_ret = NULL; |
1548 | 1618 | ||
1549 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 1619 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
@@ -1619,7 +1689,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans, | |||
1619 | end - ptr - size); | 1689 | end - ptr - size); |
1620 | item_size -= size; | 1690 | item_size -= size; |
1621 | ret = btrfs_truncate_item(trans, root, path, item_size, 1); | 1691 | ret = btrfs_truncate_item(trans, root, path, item_size, 1); |
1622 | BUG_ON(ret); | ||
1623 | } | 1692 | } |
1624 | btrfs_mark_buffer_dirty(leaf); | 1693 | btrfs_mark_buffer_dirty(leaf); |
1625 | return 0; | 1694 | return 0; |
@@ -1692,40 +1761,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1692 | return ret; | 1761 | return ret; |
1693 | } | 1762 | } |
1694 | 1763 | ||
1695 | static void btrfs_issue_discard(struct block_device *bdev, | 1764 | static int btrfs_issue_discard(struct block_device *bdev, |
1696 | u64 start, u64 len) | 1765 | u64 start, u64 len) |
1697 | { | 1766 | { |
1698 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1767 | return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); |
1699 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
1700 | } | 1768 | } |
1701 | 1769 | ||
1702 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1770 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1703 | u64 num_bytes) | 1771 | u64 num_bytes, u64 *actual_bytes) |
1704 | { | 1772 | { |
1705 | int ret; | 1773 | int ret; |
1706 | u64 map_length = num_bytes; | 1774 | u64 discarded_bytes = 0; |
1707 | struct btrfs_multi_bio *multi = NULL; | 1775 | struct btrfs_multi_bio *multi = NULL; |
1708 | 1776 | ||
1709 | if (!btrfs_test_opt(root, DISCARD)) | ||
1710 | return 0; | ||
1711 | 1777 | ||
1712 | /* Tell the block device(s) that the sectors can be discarded */ | 1778 | /* Tell the block device(s) that the sectors can be discarded */ |
1713 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1779 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, |
1714 | bytenr, &map_length, &multi, 0); | 1780 | bytenr, &num_bytes, &multi, 0); |
1715 | if (!ret) { | 1781 | if (!ret) { |
1716 | struct btrfs_bio_stripe *stripe = multi->stripes; | 1782 | struct btrfs_bio_stripe *stripe = multi->stripes; |
1717 | int i; | 1783 | int i; |
1718 | 1784 | ||
1719 | if (map_length > num_bytes) | ||
1720 | map_length = num_bytes; | ||
1721 | 1785 | ||
1722 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1786 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1723 | btrfs_issue_discard(stripe->dev->bdev, | 1787 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1724 | stripe->physical, | 1788 | stripe->physical, |
1725 | map_length); | 1789 | stripe->length); |
1790 | if (!ret) | ||
1791 | discarded_bytes += stripe->length; | ||
1792 | else if (ret != -EOPNOTSUPP) | ||
1793 | break; | ||
1726 | } | 1794 | } |
1727 | kfree(multi); | 1795 | kfree(multi); |
1728 | } | 1796 | } |
1797 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1798 | ret = 0; | ||
1799 | |||
1800 | if (actual_bytes) | ||
1801 | *actual_bytes = discarded_bytes; | ||
1802 | |||
1729 | 1803 | ||
1730 | return ret; | 1804 | return ret; |
1731 | } | 1805 | } |
@@ -1792,7 +1866,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1792 | __run_delayed_extent_op(extent_op, leaf, item); | 1866 | __run_delayed_extent_op(extent_op, leaf, item); |
1793 | 1867 | ||
1794 | btrfs_mark_buffer_dirty(leaf); | 1868 | btrfs_mark_buffer_dirty(leaf); |
1795 | btrfs_release_path(root->fs_info->extent_root, path); | 1869 | btrfs_release_path(path); |
1796 | 1870 | ||
1797 | path->reada = 1; | 1871 | path->reada = 1; |
1798 | path->leave_spinning = 1; | 1872 | path->leave_spinning = 1; |
@@ -2227,6 +2301,10 @@ again: | |||
2227 | atomic_inc(&ref->refs); | 2301 | atomic_inc(&ref->refs); |
2228 | 2302 | ||
2229 | spin_unlock(&delayed_refs->lock); | 2303 | spin_unlock(&delayed_refs->lock); |
2304 | /* | ||
2305 | * Mutex was contended, block until it's | ||
2306 | * released and try again | ||
2307 | */ | ||
2230 | mutex_lock(&head->mutex); | 2308 | mutex_lock(&head->mutex); |
2231 | mutex_unlock(&head->mutex); | 2309 | mutex_unlock(&head->mutex); |
2232 | 2310 | ||
@@ -2291,8 +2369,12 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, | |||
2291 | atomic_inc(&head->node.refs); | 2369 | atomic_inc(&head->node.refs); |
2292 | spin_unlock(&delayed_refs->lock); | 2370 | spin_unlock(&delayed_refs->lock); |
2293 | 2371 | ||
2294 | btrfs_release_path(root->fs_info->extent_root, path); | 2372 | btrfs_release_path(path); |
2295 | 2373 | ||
2374 | /* | ||
2375 | * Mutex was contended, block until it's released and let | ||
2376 | * caller try again | ||
2377 | */ | ||
2296 | mutex_lock(&head->mutex); | 2378 | mutex_lock(&head->mutex); |
2297 | mutex_unlock(&head->mutex); | 2379 | mutex_unlock(&head->mutex); |
2298 | btrfs_put_delayed_ref(&head->node); | 2380 | btrfs_put_delayed_ref(&head->node); |
@@ -2440,126 +2522,6 @@ out: | |||
2440 | return ret; | 2522 | return ret; |
2441 | } | 2523 | } |
2442 | 2524 | ||
2443 | #if 0 | ||
2444 | int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
2445 | struct extent_buffer *buf, u32 nr_extents) | ||
2446 | { | ||
2447 | struct btrfs_key key; | ||
2448 | struct btrfs_file_extent_item *fi; | ||
2449 | u64 root_gen; | ||
2450 | u32 nritems; | ||
2451 | int i; | ||
2452 | int level; | ||
2453 | int ret = 0; | ||
2454 | int shared = 0; | ||
2455 | |||
2456 | if (!root->ref_cows) | ||
2457 | return 0; | ||
2458 | |||
2459 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | ||
2460 | shared = 0; | ||
2461 | root_gen = root->root_key.offset; | ||
2462 | } else { | ||
2463 | shared = 1; | ||
2464 | root_gen = trans->transid - 1; | ||
2465 | } | ||
2466 | |||
2467 | level = btrfs_header_level(buf); | ||
2468 | nritems = btrfs_header_nritems(buf); | ||
2469 | |||
2470 | if (level == 0) { | ||
2471 | struct btrfs_leaf_ref *ref; | ||
2472 | struct btrfs_extent_info *info; | ||
2473 | |||
2474 | ref = btrfs_alloc_leaf_ref(root, nr_extents); | ||
2475 | if (!ref) { | ||
2476 | ret = -ENOMEM; | ||
2477 | goto out; | ||
2478 | } | ||
2479 | |||
2480 | ref->root_gen = root_gen; | ||
2481 | ref->bytenr = buf->start; | ||
2482 | ref->owner = btrfs_header_owner(buf); | ||
2483 | ref->generation = btrfs_header_generation(buf); | ||
2484 | ref->nritems = nr_extents; | ||
2485 | info = ref->extents; | ||
2486 | |||
2487 | for (i = 0; nr_extents > 0 && i < nritems; i++) { | ||
2488 | u64 disk_bytenr; | ||
2489 | btrfs_item_key_to_cpu(buf, &key, i); | ||
2490 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
2491 | continue; | ||
2492 | fi = btrfs_item_ptr(buf, i, | ||
2493 | struct btrfs_file_extent_item); | ||
2494 | if (btrfs_file_extent_type(buf, fi) == | ||
2495 | BTRFS_FILE_EXTENT_INLINE) | ||
2496 | continue; | ||
2497 | disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
2498 | if (disk_bytenr == 0) | ||
2499 | continue; | ||
2500 | |||
2501 | info->bytenr = disk_bytenr; | ||
2502 | info->num_bytes = | ||
2503 | btrfs_file_extent_disk_num_bytes(buf, fi); | ||
2504 | info->objectid = key.objectid; | ||
2505 | info->offset = key.offset; | ||
2506 | info++; | ||
2507 | } | ||
2508 | |||
2509 | ret = btrfs_add_leaf_ref(root, ref, shared); | ||
2510 | if (ret == -EEXIST && shared) { | ||
2511 | struct btrfs_leaf_ref *old; | ||
2512 | old = btrfs_lookup_leaf_ref(root, ref->bytenr); | ||
2513 | BUG_ON(!old); | ||
2514 | btrfs_remove_leaf_ref(root, old); | ||
2515 | btrfs_free_leaf_ref(root, old); | ||
2516 | ret = btrfs_add_leaf_ref(root, ref, shared); | ||
2517 | } | ||
2518 | WARN_ON(ret); | ||
2519 | btrfs_free_leaf_ref(root, ref); | ||
2520 | } | ||
2521 | out: | ||
2522 | return ret; | ||
2523 | } | ||
2524 | |||
2525 | /* when a block goes through cow, we update the reference counts of | ||
2526 | * everything that block points to. The internal pointers of the block | ||
2527 | * can be in just about any order, and it is likely to have clusters of | ||
2528 | * things that are close together and clusters of things that are not. | ||
2529 | * | ||
2530 | * To help reduce the seeks that come with updating all of these reference | ||
2531 | * counts, sort them by byte number before actual updates are done. | ||
2532 | * | ||
2533 | * struct refsort is used to match byte number to slot in the btree block. | ||
2534 | * we sort based on the byte number and then use the slot to actually | ||
2535 | * find the item. | ||
2536 | * | ||
2537 | * struct refsort is smaller than strcut btrfs_item and smaller than | ||
2538 | * struct btrfs_key_ptr. Since we're currently limited to the page size | ||
2539 | * for a btree block, there's no way for a kmalloc of refsorts for a | ||
2540 | * single node to be bigger than a page. | ||
2541 | */ | ||
2542 | struct refsort { | ||
2543 | u64 bytenr; | ||
2544 | u32 slot; | ||
2545 | }; | ||
2546 | |||
2547 | /* | ||
2548 | * for passing into sort() | ||
2549 | */ | ||
2550 | static int refsort_cmp(const void *a_void, const void *b_void) | ||
2551 | { | ||
2552 | const struct refsort *a = a_void; | ||
2553 | const struct refsort *b = b_void; | ||
2554 | |||
2555 | if (a->bytenr < b->bytenr) | ||
2556 | return -1; | ||
2557 | if (a->bytenr > b->bytenr) | ||
2558 | return 1; | ||
2559 | return 0; | ||
2560 | } | ||
2561 | #endif | ||
2562 | |||
2563 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | 2525 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, |
2564 | struct btrfs_root *root, | 2526 | struct btrfs_root *root, |
2565 | struct extent_buffer *buf, | 2527 | struct extent_buffer *buf, |
@@ -2662,7 +2624,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
2662 | bi = btrfs_item_ptr_offset(leaf, path->slots[0]); | 2624 | bi = btrfs_item_ptr_offset(leaf, path->slots[0]); |
2663 | write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); | 2625 | write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); |
2664 | btrfs_mark_buffer_dirty(leaf); | 2626 | btrfs_mark_buffer_dirty(leaf); |
2665 | btrfs_release_path(extent_root, path); | 2627 | btrfs_release_path(path); |
2666 | fail: | 2628 | fail: |
2667 | if (ret) | 2629 | if (ret) |
2668 | return ret; | 2630 | return ret; |
@@ -2688,6 +2650,111 @@ next_block_group(struct btrfs_root *root, | |||
2688 | return cache; | 2650 | return cache; |
2689 | } | 2651 | } |
2690 | 2652 | ||
2653 | static int cache_save_setup(struct btrfs_block_group_cache *block_group, | ||
2654 | struct btrfs_trans_handle *trans, | ||
2655 | struct btrfs_path *path) | ||
2656 | { | ||
2657 | struct btrfs_root *root = block_group->fs_info->tree_root; | ||
2658 | struct inode *inode = NULL; | ||
2659 | u64 alloc_hint = 0; | ||
2660 | int dcs = BTRFS_DC_ERROR; | ||
2661 | int num_pages = 0; | ||
2662 | int retries = 0; | ||
2663 | int ret = 0; | ||
2664 | |||
2665 | /* | ||
2666 | * If this block group is smaller than 100 megs don't bother caching the | ||
2667 | * block group. | ||
2668 | */ | ||
2669 | if (block_group->key.offset < (100 * 1024 * 1024)) { | ||
2670 | spin_lock(&block_group->lock); | ||
2671 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
2672 | spin_unlock(&block_group->lock); | ||
2673 | return 0; | ||
2674 | } | ||
2675 | |||
2676 | again: | ||
2677 | inode = lookup_free_space_inode(root, block_group, path); | ||
2678 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | ||
2679 | ret = PTR_ERR(inode); | ||
2680 | btrfs_release_path(path); | ||
2681 | goto out; | ||
2682 | } | ||
2683 | |||
2684 | if (IS_ERR(inode)) { | ||
2685 | BUG_ON(retries); | ||
2686 | retries++; | ||
2687 | |||
2688 | if (block_group->ro) | ||
2689 | goto out_free; | ||
2690 | |||
2691 | ret = create_free_space_inode(root, trans, block_group, path); | ||
2692 | if (ret) | ||
2693 | goto out_free; | ||
2694 | goto again; | ||
2695 | } | ||
2696 | |||
2697 | /* | ||
2698 | * We want to set the generation to 0, that way if anything goes wrong | ||
2699 | * from here on out we know not to trust this cache when we load up next | ||
2700 | * time. | ||
2701 | */ | ||
2702 | BTRFS_I(inode)->generation = 0; | ||
2703 | ret = btrfs_update_inode(trans, root, inode); | ||
2704 | WARN_ON(ret); | ||
2705 | |||
2706 | if (i_size_read(inode) > 0) { | ||
2707 | ret = btrfs_truncate_free_space_cache(root, trans, path, | ||
2708 | inode); | ||
2709 | if (ret) | ||
2710 | goto out_put; | ||
2711 | } | ||
2712 | |||
2713 | spin_lock(&block_group->lock); | ||
2714 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | ||
2715 | /* We're not cached, don't bother trying to write stuff out */ | ||
2716 | dcs = BTRFS_DC_WRITTEN; | ||
2717 | spin_unlock(&block_group->lock); | ||
2718 | goto out_put; | ||
2719 | } | ||
2720 | spin_unlock(&block_group->lock); | ||
2721 | |||
2722 | num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); | ||
2723 | if (!num_pages) | ||
2724 | num_pages = 1; | ||
2725 | |||
2726 | /* | ||
2727 | * Just to make absolutely sure we have enough space, we're going to | ||
2728 | * preallocate 12 pages worth of space for each block group. In | ||
2729 | * practice we ought to use at most 8, but we need extra space so we can | ||
2730 | * add our header and have a terminator between the extents and the | ||
2731 | * bitmaps. | ||
2732 | */ | ||
2733 | num_pages *= 16; | ||
2734 | num_pages *= PAGE_CACHE_SIZE; | ||
2735 | |||
2736 | ret = btrfs_check_data_free_space(inode, num_pages); | ||
2737 | if (ret) | ||
2738 | goto out_put; | ||
2739 | |||
2740 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, | ||
2741 | num_pages, num_pages, | ||
2742 | &alloc_hint); | ||
2743 | if (!ret) | ||
2744 | dcs = BTRFS_DC_SETUP; | ||
2745 | btrfs_free_reserved_data_space(inode, num_pages); | ||
2746 | out_put: | ||
2747 | iput(inode); | ||
2748 | out_free: | ||
2749 | btrfs_release_path(path); | ||
2750 | out: | ||
2751 | spin_lock(&block_group->lock); | ||
2752 | block_group->disk_cache_state = dcs; | ||
2753 | spin_unlock(&block_group->lock); | ||
2754 | |||
2755 | return ret; | ||
2756 | } | ||
2757 | |||
2691 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 2758 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
2692 | struct btrfs_root *root) | 2759 | struct btrfs_root *root) |
2693 | { | 2760 | { |
@@ -2700,6 +2767,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2700 | if (!path) | 2767 | if (!path) |
2701 | return -ENOMEM; | 2768 | return -ENOMEM; |
2702 | 2769 | ||
2770 | again: | ||
2771 | while (1) { | ||
2772 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
2773 | while (cache) { | ||
2774 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | ||
2775 | break; | ||
2776 | cache = next_block_group(root, cache); | ||
2777 | } | ||
2778 | if (!cache) { | ||
2779 | if (last == 0) | ||
2780 | break; | ||
2781 | last = 0; | ||
2782 | continue; | ||
2783 | } | ||
2784 | err = cache_save_setup(cache, trans, path); | ||
2785 | last = cache->key.objectid + cache->key.offset; | ||
2786 | btrfs_put_block_group(cache); | ||
2787 | } | ||
2788 | |||
2703 | while (1) { | 2789 | while (1) { |
2704 | if (last == 0) { | 2790 | if (last == 0) { |
2705 | err = btrfs_run_delayed_refs(trans, root, | 2791 | err = btrfs_run_delayed_refs(trans, root, |
@@ -2709,6 +2795,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2709 | 2795 | ||
2710 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | 2796 | cache = btrfs_lookup_first_block_group(root->fs_info, last); |
2711 | while (cache) { | 2797 | while (cache) { |
2798 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) { | ||
2799 | btrfs_put_block_group(cache); | ||
2800 | goto again; | ||
2801 | } | ||
2802 | |||
2712 | if (cache->dirty) | 2803 | if (cache->dirty) |
2713 | break; | 2804 | break; |
2714 | cache = next_block_group(root, cache); | 2805 | cache = next_block_group(root, cache); |
@@ -2720,6 +2811,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2720 | continue; | 2811 | continue; |
2721 | } | 2812 | } |
2722 | 2813 | ||
2814 | if (cache->disk_cache_state == BTRFS_DC_SETUP) | ||
2815 | cache->disk_cache_state = BTRFS_DC_NEED_WRITE; | ||
2723 | cache->dirty = 0; | 2816 | cache->dirty = 0; |
2724 | last = cache->key.objectid + cache->key.offset; | 2817 | last = cache->key.objectid + cache->key.offset; |
2725 | 2818 | ||
@@ -2728,6 +2821,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2728 | btrfs_put_block_group(cache); | 2821 | btrfs_put_block_group(cache); |
2729 | } | 2822 | } |
2730 | 2823 | ||
2824 | while (1) { | ||
2825 | /* | ||
2826 | * I don't think this is needed since we're just marking our | ||
2827 | * preallocated extent as written, but just in case it can't | ||
2828 | * hurt. | ||
2829 | */ | ||
2830 | if (last == 0) { | ||
2831 | err = btrfs_run_delayed_refs(trans, root, | ||
2832 | (unsigned long)-1); | ||
2833 | BUG_ON(err); | ||
2834 | } | ||
2835 | |||
2836 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
2837 | while (cache) { | ||
2838 | /* | ||
2839 | * Really this shouldn't happen, but it could if we | ||
2840 | * couldn't write the entire preallocated extent and | ||
2841 | * splitting the extent resulted in a new block. | ||
2842 | */ | ||
2843 | if (cache->dirty) { | ||
2844 | btrfs_put_block_group(cache); | ||
2845 | goto again; | ||
2846 | } | ||
2847 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
2848 | break; | ||
2849 | cache = next_block_group(root, cache); | ||
2850 | } | ||
2851 | if (!cache) { | ||
2852 | if (last == 0) | ||
2853 | break; | ||
2854 | last = 0; | ||
2855 | continue; | ||
2856 | } | ||
2857 | |||
2858 | btrfs_write_out_cache(root, trans, cache, path); | ||
2859 | |||
2860 | /* | ||
2861 | * If we didn't have an error then the cache state is still | ||
2862 | * NEED_WRITE, so we can set it to WRITTEN. | ||
2863 | */ | ||
2864 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
2865 | cache->disk_cache_state = BTRFS_DC_WRITTEN; | ||
2866 | last = cache->key.objectid + cache->key.offset; | ||
2867 | btrfs_put_block_group(cache); | ||
2868 | } | ||
2869 | |||
2731 | btrfs_free_path(path); | 2870 | btrfs_free_path(path); |
2732 | return 0; | 2871 | return 0; |
2733 | } | 2872 | } |
@@ -2763,6 +2902,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2763 | if (found) { | 2902 | if (found) { |
2764 | spin_lock(&found->lock); | 2903 | spin_lock(&found->lock); |
2765 | found->total_bytes += total_bytes; | 2904 | found->total_bytes += total_bytes; |
2905 | found->disk_total += total_bytes * factor; | ||
2766 | found->bytes_used += bytes_used; | 2906 | found->bytes_used += bytes_used; |
2767 | found->disk_used += bytes_used * factor; | 2907 | found->disk_used += bytes_used * factor; |
2768 | found->full = 0; | 2908 | found->full = 0; |
@@ -2782,6 +2922,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2782 | BTRFS_BLOCK_GROUP_SYSTEM | | 2922 | BTRFS_BLOCK_GROUP_SYSTEM | |
2783 | BTRFS_BLOCK_GROUP_METADATA); | 2923 | BTRFS_BLOCK_GROUP_METADATA); |
2784 | found->total_bytes = total_bytes; | 2924 | found->total_bytes = total_bytes; |
2925 | found->disk_total = total_bytes * factor; | ||
2785 | found->bytes_used = bytes_used; | 2926 | found->bytes_used = bytes_used; |
2786 | found->disk_used = bytes_used * factor; | 2927 | found->disk_used = bytes_used * factor; |
2787 | found->bytes_pinned = 0; | 2928 | found->bytes_pinned = 0; |
@@ -2789,7 +2930,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2789 | found->bytes_readonly = 0; | 2930 | found->bytes_readonly = 0; |
2790 | found->bytes_may_use = 0; | 2931 | found->bytes_may_use = 0; |
2791 | found->full = 0; | 2932 | found->full = 0; |
2792 | found->force_alloc = 0; | 2933 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
2934 | found->chunk_alloc = 0; | ||
2793 | *space_info = found; | 2935 | *space_info = found; |
2794 | list_add_rcu(&found->list, &info->space_info); | 2936 | list_add_rcu(&found->list, &info->space_info); |
2795 | atomic_set(&found->caching_threads, 0); | 2937 | atomic_set(&found->caching_threads, 0); |
@@ -2814,7 +2956,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
2814 | 2956 | ||
2815 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 2957 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
2816 | { | 2958 | { |
2817 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 2959 | /* |
2960 | * we add in the count of missing devices because we want | ||
2961 | * to make sure that any RAID levels on a degraded FS | ||
2962 | * continue to be honored. | ||
2963 | */ | ||
2964 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | ||
2965 | root->fs_info->fs_devices->missing_devices; | ||
2818 | 2966 | ||
2819 | if (num_devices == 1) | 2967 | if (num_devices == 1) |
2820 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 2968 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); |
@@ -2854,7 +3002,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
2854 | return btrfs_reduce_alloc_profile(root, flags); | 3002 | return btrfs_reduce_alloc_profile(root, flags); |
2855 | } | 3003 | } |
2856 | 3004 | ||
2857 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3005 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
2858 | { | 3006 | { |
2859 | u64 flags; | 3007 | u64 flags; |
2860 | 3008 | ||
@@ -2883,11 +3031,17 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
2883 | struct btrfs_space_info *data_sinfo; | 3031 | struct btrfs_space_info *data_sinfo; |
2884 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3032 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2885 | u64 used; | 3033 | u64 used; |
2886 | int ret = 0, committed = 0; | 3034 | int ret = 0, committed = 0, alloc_chunk = 1; |
2887 | 3035 | ||
2888 | /* make sure bytes are sectorsize aligned */ | 3036 | /* make sure bytes are sectorsize aligned */ |
2889 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3037 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2890 | 3038 | ||
3039 | if (root == root->fs_info->tree_root || | ||
3040 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { | ||
3041 | alloc_chunk = 0; | ||
3042 | committed = 1; | ||
3043 | } | ||
3044 | |||
2891 | data_sinfo = BTRFS_I(inode)->space_info; | 3045 | data_sinfo = BTRFS_I(inode)->space_info; |
2892 | if (!data_sinfo) | 3046 | if (!data_sinfo) |
2893 | goto alloc; | 3047 | goto alloc; |
@@ -2906,23 +3060,28 @@ again: | |||
2906 | * if we don't have enough free bytes in this space then we need | 3060 | * if we don't have enough free bytes in this space then we need |
2907 | * to alloc a new chunk. | 3061 | * to alloc a new chunk. |
2908 | */ | 3062 | */ |
2909 | if (!data_sinfo->full) { | 3063 | if (!data_sinfo->full && alloc_chunk) { |
2910 | u64 alloc_target; | 3064 | u64 alloc_target; |
2911 | 3065 | ||
2912 | data_sinfo->force_alloc = 1; | 3066 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; |
2913 | spin_unlock(&data_sinfo->lock); | 3067 | spin_unlock(&data_sinfo->lock); |
2914 | alloc: | 3068 | alloc: |
2915 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3069 | alloc_target = btrfs_get_alloc_profile(root, 1); |
2916 | trans = btrfs_join_transaction(root, 1); | 3070 | trans = btrfs_join_transaction(root); |
2917 | if (IS_ERR(trans)) | 3071 | if (IS_ERR(trans)) |
2918 | return PTR_ERR(trans); | 3072 | return PTR_ERR(trans); |
2919 | 3073 | ||
2920 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3074 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
2921 | bytes + 2 * 1024 * 1024, | 3075 | bytes + 2 * 1024 * 1024, |
2922 | alloc_target, 0); | 3076 | alloc_target, |
3077 | CHUNK_ALLOC_NO_FORCE); | ||
2923 | btrfs_end_transaction(trans, root); | 3078 | btrfs_end_transaction(trans, root); |
2924 | if (ret < 0) | 3079 | if (ret < 0) { |
2925 | return ret; | 3080 | if (ret != -ENOSPC) |
3081 | return ret; | ||
3082 | else | ||
3083 | goto commit_trans; | ||
3084 | } | ||
2926 | 3085 | ||
2927 | if (!data_sinfo) { | 3086 | if (!data_sinfo) { |
2928 | btrfs_set_inode_space_info(root, inode); | 3087 | btrfs_set_inode_space_info(root, inode); |
@@ -2930,12 +3089,21 @@ alloc: | |||
2930 | } | 3089 | } |
2931 | goto again; | 3090 | goto again; |
2932 | } | 3091 | } |
3092 | |||
3093 | /* | ||
3094 | * If we have less pinned bytes than we want to allocate then | ||
3095 | * don't bother committing the transaction, it won't help us. | ||
3096 | */ | ||
3097 | if (data_sinfo->bytes_pinned < bytes) | ||
3098 | committed = 1; | ||
2933 | spin_unlock(&data_sinfo->lock); | 3099 | spin_unlock(&data_sinfo->lock); |
2934 | 3100 | ||
2935 | /* commit the current transaction and try again */ | 3101 | /* commit the current transaction and try again */ |
2936 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3102 | commit_trans: |
3103 | if (!committed && | ||
3104 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | ||
2937 | committed = 1; | 3105 | committed = 1; |
2938 | trans = btrfs_join_transaction(root, 1); | 3106 | trans = btrfs_join_transaction(root); |
2939 | if (IS_ERR(trans)) | 3107 | if (IS_ERR(trans)) |
2940 | return PTR_ERR(trans); | 3108 | return PTR_ERR(trans); |
2941 | ret = btrfs_commit_transaction(trans, root); | 3109 | ret = btrfs_commit_transaction(trans, root); |
@@ -2944,18 +3112,6 @@ alloc: | |||
2944 | goto again; | 3112 | goto again; |
2945 | } | 3113 | } |
2946 | 3114 | ||
2947 | #if 0 /* I hope we never need this code again, just in case */ | ||
2948 | printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " | ||
2949 | "%llu bytes_reserved, " "%llu bytes_pinned, " | ||
2950 | "%llu bytes_readonly, %llu may use %llu total\n", | ||
2951 | (unsigned long long)bytes, | ||
2952 | (unsigned long long)data_sinfo->bytes_used, | ||
2953 | (unsigned long long)data_sinfo->bytes_reserved, | ||
2954 | (unsigned long long)data_sinfo->bytes_pinned, | ||
2955 | (unsigned long long)data_sinfo->bytes_readonly, | ||
2956 | (unsigned long long)data_sinfo->bytes_may_use, | ||
2957 | (unsigned long long)data_sinfo->total_bytes); | ||
2958 | #endif | ||
2959 | return -ENOSPC; | 3115 | return -ENOSPC; |
2960 | } | 3116 | } |
2961 | data_sinfo->bytes_may_use += bytes; | 3117 | data_sinfo->bytes_may_use += bytes; |
@@ -2993,24 +3149,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
2993 | rcu_read_lock(); | 3149 | rcu_read_lock(); |
2994 | list_for_each_entry_rcu(found, head, list) { | 3150 | list_for_each_entry_rcu(found, head, list) { |
2995 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | 3151 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) |
2996 | found->force_alloc = 1; | 3152 | found->force_alloc = CHUNK_ALLOC_FORCE; |
2997 | } | 3153 | } |
2998 | rcu_read_unlock(); | 3154 | rcu_read_unlock(); |
2999 | } | 3155 | } |
3000 | 3156 | ||
3001 | static int should_alloc_chunk(struct btrfs_space_info *sinfo, | 3157 | static int should_alloc_chunk(struct btrfs_root *root, |
3002 | u64 alloc_bytes) | 3158 | struct btrfs_space_info *sinfo, u64 alloc_bytes, |
3159 | int force) | ||
3003 | { | 3160 | { |
3004 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3161 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
3162 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; | ||
3163 | u64 thresh; | ||
3164 | |||
3165 | if (force == CHUNK_ALLOC_FORCE) | ||
3166 | return 1; | ||
3005 | 3167 | ||
3006 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3168 | /* |
3007 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | 3169 | * in limited mode, we want to have some free space up to |
3170 | * about 1% of the FS size. | ||
3171 | */ | ||
3172 | if (force == CHUNK_ALLOC_LIMITED) { | ||
3173 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
3174 | thresh = max_t(u64, 64 * 1024 * 1024, | ||
3175 | div_factor_fine(thresh, 1)); | ||
3176 | |||
3177 | if (num_bytes - num_allocated < thresh) | ||
3178 | return 1; | ||
3179 | } | ||
3180 | |||
3181 | /* | ||
3182 | * we have two similar checks here, one based on percentage | ||
3183 | * and once based on a hard number of 256MB. The idea | ||
3184 | * is that if we have a good amount of free | ||
3185 | * room, don't allocate a chunk. A good mount is | ||
3186 | * less than 80% utilized of the chunks we have allocated, | ||
3187 | * or more than 256MB free | ||
3188 | */ | ||
3189 | if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
3008 | return 0; | 3190 | return 0; |
3009 | 3191 | ||
3010 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3192 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) |
3011 | alloc_bytes < div_factor(num_bytes, 8)) | ||
3012 | return 0; | 3193 | return 0; |
3013 | 3194 | ||
3195 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
3196 | |||
3197 | /* 256MB or 5% of the FS */ | ||
3198 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | ||
3199 | |||
3200 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) | ||
3201 | return 0; | ||
3014 | return 1; | 3202 | return 1; |
3015 | } | 3203 | } |
3016 | 3204 | ||
@@ -3020,10 +3208,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3020 | { | 3208 | { |
3021 | struct btrfs_space_info *space_info; | 3209 | struct btrfs_space_info *space_info; |
3022 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3210 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3211 | int wait_for_alloc = 0; | ||
3023 | int ret = 0; | 3212 | int ret = 0; |
3024 | 3213 | ||
3025 | mutex_lock(&fs_info->chunk_mutex); | ||
3026 | |||
3027 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 3214 | flags = btrfs_reduce_alloc_profile(extent_root, flags); |
3028 | 3215 | ||
3029 | space_info = __find_space_info(extent_root->fs_info, flags); | 3216 | space_info = __find_space_info(extent_root->fs_info, flags); |
@@ -3034,20 +3221,47 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3034 | } | 3221 | } |
3035 | BUG_ON(!space_info); | 3222 | BUG_ON(!space_info); |
3036 | 3223 | ||
3224 | again: | ||
3037 | spin_lock(&space_info->lock); | 3225 | spin_lock(&space_info->lock); |
3038 | if (space_info->force_alloc) | 3226 | if (space_info->force_alloc) |
3039 | force = 1; | 3227 | force = space_info->force_alloc; |
3040 | if (space_info->full) { | 3228 | if (space_info->full) { |
3041 | spin_unlock(&space_info->lock); | 3229 | spin_unlock(&space_info->lock); |
3042 | goto out; | 3230 | return 0; |
3043 | } | 3231 | } |
3044 | 3232 | ||
3045 | if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { | 3233 | if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { |
3046 | spin_unlock(&space_info->lock); | 3234 | spin_unlock(&space_info->lock); |
3047 | goto out; | 3235 | return 0; |
3236 | } else if (space_info->chunk_alloc) { | ||
3237 | wait_for_alloc = 1; | ||
3238 | } else { | ||
3239 | space_info->chunk_alloc = 1; | ||
3048 | } | 3240 | } |
3241 | |||
3049 | spin_unlock(&space_info->lock); | 3242 | spin_unlock(&space_info->lock); |
3050 | 3243 | ||
3244 | mutex_lock(&fs_info->chunk_mutex); | ||
3245 | |||
3246 | /* | ||
3247 | * The chunk_mutex is held throughout the entirety of a chunk | ||
3248 | * allocation, so once we've acquired the chunk_mutex we know that the | ||
3249 | * other guy is done and we need to recheck and see if we should | ||
3250 | * allocate. | ||
3251 | */ | ||
3252 | if (wait_for_alloc) { | ||
3253 | mutex_unlock(&fs_info->chunk_mutex); | ||
3254 | wait_for_alloc = 0; | ||
3255 | goto again; | ||
3256 | } | ||
3257 | |||
3258 | /* | ||
3259 | * If we have mixed data/metadata chunks we want to make sure we keep | ||
3260 | * allocating mixed chunks instead of individual chunks. | ||
3261 | */ | ||
3262 | if (btrfs_mixed_space_info(space_info)) | ||
3263 | flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); | ||
3264 | |||
3051 | /* | 3265 | /* |
3052 | * if we're doing a data chunk, go ahead and make sure that | 3266 | * if we're doing a data chunk, go ahead and make sure that |
3053 | * we keep a reasonable number of metadata chunks allocated in the | 3267 | * we keep a reasonable number of metadata chunks allocated in the |
@@ -3066,167 +3280,220 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3066 | space_info->full = 1; | 3280 | space_info->full = 1; |
3067 | else | 3281 | else |
3068 | ret = 1; | 3282 | ret = 1; |
3069 | space_info->force_alloc = 0; | 3283 | |
3284 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | ||
3285 | space_info->chunk_alloc = 0; | ||
3070 | spin_unlock(&space_info->lock); | 3286 | spin_unlock(&space_info->lock); |
3071 | out: | ||
3072 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3287 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3073 | return ret; | 3288 | return ret; |
3074 | } | 3289 | } |
3075 | 3290 | ||
3076 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | ||
3077 | struct btrfs_root *root, | ||
3078 | struct btrfs_space_info *sinfo, u64 num_bytes) | ||
3079 | { | ||
3080 | int ret; | ||
3081 | int end_trans = 0; | ||
3082 | |||
3083 | if (sinfo->full) | ||
3084 | return 0; | ||
3085 | |||
3086 | spin_lock(&sinfo->lock); | ||
3087 | ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); | ||
3088 | spin_unlock(&sinfo->lock); | ||
3089 | if (!ret) | ||
3090 | return 0; | ||
3091 | |||
3092 | if (!trans) { | ||
3093 | trans = btrfs_join_transaction(root, 1); | ||
3094 | BUG_ON(IS_ERR(trans)); | ||
3095 | end_trans = 1; | ||
3096 | } | ||
3097 | |||
3098 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3099 | num_bytes + 2 * 1024 * 1024, | ||
3100 | get_alloc_profile(root, sinfo->flags), 0); | ||
3101 | |||
3102 | if (end_trans) | ||
3103 | btrfs_end_transaction(trans, root); | ||
3104 | |||
3105 | return ret == 1 ? 1 : 0; | ||
3106 | } | ||
3107 | |||
3108 | /* | 3291 | /* |
3109 | * shrink metadata reservation for delalloc | 3292 | * shrink metadata reservation for delalloc |
3110 | */ | 3293 | */ |
3111 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | 3294 | static int shrink_delalloc(struct btrfs_trans_handle *trans, |
3112 | struct btrfs_root *root, u64 to_reclaim) | 3295 | struct btrfs_root *root, u64 to_reclaim, int sync) |
3113 | { | 3296 | { |
3114 | struct btrfs_block_rsv *block_rsv; | 3297 | struct btrfs_block_rsv *block_rsv; |
3298 | struct btrfs_space_info *space_info; | ||
3115 | u64 reserved; | 3299 | u64 reserved; |
3116 | u64 max_reclaim; | 3300 | u64 max_reclaim; |
3117 | u64 reclaimed = 0; | 3301 | u64 reclaimed = 0; |
3118 | int pause = 1; | 3302 | long time_left; |
3119 | int ret; | 3303 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
3304 | int loops = 0; | ||
3305 | unsigned long progress; | ||
3120 | 3306 | ||
3121 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3307 | block_rsv = &root->fs_info->delalloc_block_rsv; |
3122 | spin_lock(&block_rsv->lock); | 3308 | space_info = block_rsv->space_info; |
3123 | reserved = block_rsv->reserved; | 3309 | |
3124 | spin_unlock(&block_rsv->lock); | 3310 | smp_mb(); |
3311 | reserved = space_info->bytes_reserved; | ||
3312 | progress = space_info->reservation_progress; | ||
3125 | 3313 | ||
3126 | if (reserved == 0) | 3314 | if (reserved == 0) |
3127 | return 0; | 3315 | return 0; |
3128 | 3316 | ||
3129 | max_reclaim = min(reserved, to_reclaim); | 3317 | max_reclaim = min(reserved, to_reclaim); |
3130 | 3318 | ||
3131 | while (1) { | 3319 | while (loops < 1024) { |
3132 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | 3320 | /* have the flusher threads jump in and do some IO */ |
3133 | if (!ret) { | 3321 | smp_mb(); |
3134 | __set_current_state(TASK_INTERRUPTIBLE); | 3322 | nr_pages = min_t(unsigned long, nr_pages, |
3135 | schedule_timeout(pause); | 3323 | root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); |
3136 | pause <<= 1; | 3324 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); |
3137 | if (pause > HZ / 10) | ||
3138 | pause = HZ / 10; | ||
3139 | } else { | ||
3140 | pause = 1; | ||
3141 | } | ||
3142 | 3325 | ||
3143 | spin_lock(&block_rsv->lock); | 3326 | spin_lock(&space_info->lock); |
3144 | if (reserved > block_rsv->reserved) | 3327 | if (reserved > space_info->bytes_reserved) |
3145 | reclaimed = reserved - block_rsv->reserved; | 3328 | reclaimed += reserved - space_info->bytes_reserved; |
3146 | reserved = block_rsv->reserved; | 3329 | reserved = space_info->bytes_reserved; |
3147 | spin_unlock(&block_rsv->lock); | 3330 | spin_unlock(&space_info->lock); |
3331 | |||
3332 | loops++; | ||
3148 | 3333 | ||
3149 | if (reserved == 0 || reclaimed >= max_reclaim) | 3334 | if (reserved == 0 || reclaimed >= max_reclaim) |
3150 | break; | 3335 | break; |
3151 | 3336 | ||
3152 | if (trans && trans->transaction->blocked) | 3337 | if (trans && trans->transaction->blocked) |
3153 | return -EAGAIN; | 3338 | return -EAGAIN; |
3339 | |||
3340 | time_left = schedule_timeout_interruptible(1); | ||
3341 | |||
3342 | /* We were interrupted, exit */ | ||
3343 | if (time_left) | ||
3344 | break; | ||
3345 | |||
3346 | /* we've kicked the IO a few times, if anything has been freed, | ||
3347 | * exit. There is no sense in looping here for a long time | ||
3348 | * when we really need to commit the transaction, or there are | ||
3349 | * just too many writers without enough free space | ||
3350 | */ | ||
3351 | |||
3352 | if (loops > 3) { | ||
3353 | smp_mb(); | ||
3354 | if (progress != space_info->reservation_progress) | ||
3355 | break; | ||
3356 | } | ||
3357 | |||
3154 | } | 3358 | } |
3155 | return reclaimed >= to_reclaim; | 3359 | return reclaimed >= to_reclaim; |
3156 | } | 3360 | } |
3157 | 3361 | ||
3158 | static int should_retry_reserve(struct btrfs_trans_handle *trans, | 3362 | /* |
3159 | struct btrfs_root *root, | 3363 | * Retries tells us how many times we've called reserve_metadata_bytes. The |
3160 | struct btrfs_block_rsv *block_rsv, | 3364 | * idea is if this is the first call (retries == 0) then we will add to our |
3161 | u64 num_bytes, int *retries) | 3365 | * reserved count if we can't make the allocation in order to hold our place |
3366 | * while we go and try and free up space. That way for retries > 1 we don't try | ||
3367 | * and add space, we just check to see if the amount of unused space is >= the | ||
3368 | * total space, meaning that our reservation is valid. | ||
3369 | * | ||
3370 | * However if we don't intend to retry this reservation, pass -1 as retries so | ||
3371 | * that it short circuits this logic. | ||
3372 | */ | ||
3373 | static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | ||
3374 | struct btrfs_root *root, | ||
3375 | struct btrfs_block_rsv *block_rsv, | ||
3376 | u64 orig_bytes, int flush) | ||
3162 | { | 3377 | { |
3163 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3378 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3164 | int ret; | 3379 | u64 unused; |
3380 | u64 num_bytes = orig_bytes; | ||
3381 | int retries = 0; | ||
3382 | int ret = 0; | ||
3383 | bool reserved = false; | ||
3384 | bool committed = false; | ||
3165 | 3385 | ||
3166 | if ((*retries) > 2) | 3386 | again: |
3167 | return -ENOSPC; | 3387 | ret = -ENOSPC; |
3388 | if (reserved) | ||
3389 | num_bytes = 0; | ||
3168 | 3390 | ||
3169 | ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); | 3391 | spin_lock(&space_info->lock); |
3170 | if (ret) | 3392 | unused = space_info->bytes_used + space_info->bytes_reserved + |
3171 | return 1; | 3393 | space_info->bytes_pinned + space_info->bytes_readonly + |
3394 | space_info->bytes_may_use; | ||
3172 | 3395 | ||
3173 | if (trans && trans->transaction->in_commit) | 3396 | /* |
3174 | return -ENOSPC; | 3397 | * The idea here is that we've not already over-reserved the block group |
3398 | * then we can go ahead and save our reservation first and then start | ||
3399 | * flushing if we need to. Otherwise if we've already overcommitted | ||
3400 | * lets start flushing stuff first and then come back and try to make | ||
3401 | * our reservation. | ||
3402 | */ | ||
3403 | if (unused <= space_info->total_bytes) { | ||
3404 | unused = space_info->total_bytes - unused; | ||
3405 | if (unused >= num_bytes) { | ||
3406 | if (!reserved) | ||
3407 | space_info->bytes_reserved += orig_bytes; | ||
3408 | ret = 0; | ||
3409 | } else { | ||
3410 | /* | ||
3411 | * Ok set num_bytes to orig_bytes since we aren't | ||
3412 | * overocmmitted, this way we only try and reclaim what | ||
3413 | * we need. | ||
3414 | */ | ||
3415 | num_bytes = orig_bytes; | ||
3416 | } | ||
3417 | } else { | ||
3418 | /* | ||
3419 | * Ok we're over committed, set num_bytes to the overcommitted | ||
3420 | * amount plus the amount of bytes that we need for this | ||
3421 | * reservation. | ||
3422 | */ | ||
3423 | num_bytes = unused - space_info->total_bytes + | ||
3424 | (orig_bytes * (retries + 1)); | ||
3425 | } | ||
3175 | 3426 | ||
3176 | ret = shrink_delalloc(trans, root, num_bytes); | 3427 | /* |
3177 | if (ret) | 3428 | * Couldn't make our reservation, save our place so while we're trying |
3178 | return ret; | 3429 | * to reclaim space we can actually use it instead of somebody else |
3430 | * stealing it from us. | ||
3431 | */ | ||
3432 | if (ret && !reserved) { | ||
3433 | space_info->bytes_reserved += orig_bytes; | ||
3434 | reserved = true; | ||
3435 | } | ||
3179 | 3436 | ||
3180 | spin_lock(&space_info->lock); | ||
3181 | if (space_info->bytes_pinned < num_bytes) | ||
3182 | ret = 1; | ||
3183 | spin_unlock(&space_info->lock); | 3437 | spin_unlock(&space_info->lock); |
3184 | if (ret) | ||
3185 | return -ENOSPC; | ||
3186 | 3438 | ||
3187 | (*retries)++; | 3439 | if (!ret) |
3188 | 3440 | return 0; | |
3189 | if (trans) | ||
3190 | return -EAGAIN; | ||
3191 | 3441 | ||
3192 | trans = btrfs_join_transaction(root, 1); | 3442 | if (!flush) |
3193 | BUG_ON(IS_ERR(trans)); | 3443 | goto out; |
3194 | ret = btrfs_commit_transaction(trans, root); | ||
3195 | BUG_ON(ret); | ||
3196 | 3444 | ||
3197 | return 1; | 3445 | /* |
3198 | } | 3446 | * We do synchronous shrinking since we don't actually unreserve |
3447 | * metadata until after the IO is completed. | ||
3448 | */ | ||
3449 | ret = shrink_delalloc(trans, root, num_bytes, 1); | ||
3450 | if (ret > 0) | ||
3451 | return 0; | ||
3452 | else if (ret < 0) | ||
3453 | goto out; | ||
3199 | 3454 | ||
3200 | static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, | 3455 | /* |
3201 | u64 num_bytes) | 3456 | * So if we were overcommitted it's possible that somebody else flushed |
3202 | { | 3457 | * out enough space and we simply didn't have enough space to reclaim, |
3203 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3458 | * so go back around and try again. |
3204 | u64 unused; | 3459 | */ |
3205 | int ret = -ENOSPC; | 3460 | if (retries < 2) { |
3461 | retries++; | ||
3462 | goto again; | ||
3463 | } | ||
3206 | 3464 | ||
3207 | spin_lock(&space_info->lock); | 3465 | spin_lock(&space_info->lock); |
3208 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3466 | /* |
3209 | space_info->bytes_pinned + space_info->bytes_readonly; | 3467 | * Not enough space to be reclaimed, don't bother committing the |
3468 | * transaction. | ||
3469 | */ | ||
3470 | if (space_info->bytes_pinned < orig_bytes) | ||
3471 | ret = -ENOSPC; | ||
3472 | spin_unlock(&space_info->lock); | ||
3473 | if (ret) | ||
3474 | goto out; | ||
3210 | 3475 | ||
3211 | if (unused < space_info->total_bytes) | 3476 | ret = -EAGAIN; |
3212 | unused = space_info->total_bytes - unused; | 3477 | if (trans || committed) |
3213 | else | 3478 | goto out; |
3214 | unused = 0; | ||
3215 | 3479 | ||
3216 | if (unused >= num_bytes) { | 3480 | ret = -ENOSPC; |
3217 | if (block_rsv->priority >= 10) { | 3481 | trans = btrfs_join_transaction(root); |
3218 | space_info->bytes_reserved += num_bytes; | 3482 | if (IS_ERR(trans)) |
3219 | ret = 0; | 3483 | goto out; |
3220 | } else { | 3484 | ret = btrfs_commit_transaction(trans, root); |
3221 | if ((unused + block_rsv->reserved) * | 3485 | if (!ret) { |
3222 | block_rsv->priority >= | 3486 | trans = NULL; |
3223 | (num_bytes + block_rsv->reserved) * 10) { | 3487 | committed = true; |
3224 | space_info->bytes_reserved += num_bytes; | 3488 | goto again; |
3225 | ret = 0; | 3489 | } |
3226 | } | 3490 | |
3227 | } | 3491 | out: |
3492 | if (reserved) { | ||
3493 | spin_lock(&space_info->lock); | ||
3494 | space_info->bytes_reserved -= orig_bytes; | ||
3495 | spin_unlock(&space_info->lock); | ||
3228 | } | 3496 | } |
3229 | spin_unlock(&space_info->lock); | ||
3230 | 3497 | ||
3231 | return ret; | 3498 | return ret; |
3232 | } | 3499 | } |
@@ -3273,8 +3540,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | |||
3273 | spin_unlock(&block_rsv->lock); | 3540 | spin_unlock(&block_rsv->lock); |
3274 | } | 3541 | } |
3275 | 3542 | ||
3276 | void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | 3543 | static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, |
3277 | struct btrfs_block_rsv *dest, u64 num_bytes) | 3544 | struct btrfs_block_rsv *dest, u64 num_bytes) |
3278 | { | 3545 | { |
3279 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3546 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3280 | 3547 | ||
@@ -3293,10 +3560,23 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | |||
3293 | 3560 | ||
3294 | if (num_bytes > 0) { | 3561 | if (num_bytes > 0) { |
3295 | if (dest) { | 3562 | if (dest) { |
3296 | block_rsv_add_bytes(dest, num_bytes, 0); | 3563 | spin_lock(&dest->lock); |
3297 | } else { | 3564 | if (!dest->full) { |
3565 | u64 bytes_to_add; | ||
3566 | |||
3567 | bytes_to_add = dest->size - dest->reserved; | ||
3568 | bytes_to_add = min(num_bytes, bytes_to_add); | ||
3569 | dest->reserved += bytes_to_add; | ||
3570 | if (dest->reserved >= dest->size) | ||
3571 | dest->full = 1; | ||
3572 | num_bytes -= bytes_to_add; | ||
3573 | } | ||
3574 | spin_unlock(&dest->lock); | ||
3575 | } | ||
3576 | if (num_bytes) { | ||
3298 | spin_lock(&space_info->lock); | 3577 | spin_lock(&space_info->lock); |
3299 | space_info->bytes_reserved -= num_bytes; | 3578 | space_info->bytes_reserved -= num_bytes; |
3579 | space_info->reservation_progress++; | ||
3300 | spin_unlock(&space_info->lock); | 3580 | spin_unlock(&space_info->lock); |
3301 | } | 3581 | } |
3302 | } | 3582 | } |
@@ -3328,18 +3608,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
3328 | { | 3608 | { |
3329 | struct btrfs_block_rsv *block_rsv; | 3609 | struct btrfs_block_rsv *block_rsv; |
3330 | struct btrfs_fs_info *fs_info = root->fs_info; | 3610 | struct btrfs_fs_info *fs_info = root->fs_info; |
3331 | u64 alloc_target; | ||
3332 | 3611 | ||
3333 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | 3612 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); |
3334 | if (!block_rsv) | 3613 | if (!block_rsv) |
3335 | return NULL; | 3614 | return NULL; |
3336 | 3615 | ||
3337 | btrfs_init_block_rsv(block_rsv); | 3616 | btrfs_init_block_rsv(block_rsv); |
3338 | |||
3339 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3340 | block_rsv->space_info = __find_space_info(fs_info, | 3617 | block_rsv->space_info = __find_space_info(fs_info, |
3341 | BTRFS_BLOCK_GROUP_METADATA); | 3618 | BTRFS_BLOCK_GROUP_METADATA); |
3342 | |||
3343 | return block_rsv; | 3619 | return block_rsv; |
3344 | } | 3620 | } |
3345 | 3621 | ||
@@ -3370,23 +3646,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | |||
3370 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | 3646 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, |
3371 | struct btrfs_root *root, | 3647 | struct btrfs_root *root, |
3372 | struct btrfs_block_rsv *block_rsv, | 3648 | struct btrfs_block_rsv *block_rsv, |
3373 | u64 num_bytes, int *retries) | 3649 | u64 num_bytes) |
3374 | { | 3650 | { |
3375 | int ret; | 3651 | int ret; |
3376 | 3652 | ||
3377 | if (num_bytes == 0) | 3653 | if (num_bytes == 0) |
3378 | return 0; | 3654 | return 0; |
3379 | again: | 3655 | |
3380 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | 3656 | ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); |
3381 | if (!ret) { | 3657 | if (!ret) { |
3382 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | 3658 | block_rsv_add_bytes(block_rsv, num_bytes, 1); |
3383 | return 0; | 3659 | return 0; |
3384 | } | 3660 | } |
3385 | 3661 | ||
3386 | ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); | ||
3387 | if (ret > 0) | ||
3388 | goto again; | ||
3389 | |||
3390 | return ret; | 3662 | return ret; |
3391 | } | 3663 | } |
3392 | 3664 | ||
@@ -3421,7 +3693,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3421 | return 0; | 3693 | return 0; |
3422 | 3694 | ||
3423 | if (block_rsv->refill_used) { | 3695 | if (block_rsv->refill_used) { |
3424 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | 3696 | ret = reserve_metadata_bytes(trans, root, block_rsv, |
3697 | num_bytes, 0); | ||
3425 | if (!ret) { | 3698 | if (!ret) { |
3426 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | 3699 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
3427 | return 0; | 3700 | return 0; |
@@ -3432,17 +3705,12 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3432 | if (trans) | 3705 | if (trans) |
3433 | return -EAGAIN; | 3706 | return -EAGAIN; |
3434 | 3707 | ||
3435 | trans = btrfs_join_transaction(root, 1); | 3708 | trans = btrfs_join_transaction(root); |
3436 | BUG_ON(IS_ERR(trans)); | 3709 | BUG_ON(IS_ERR(trans)); |
3437 | ret = btrfs_commit_transaction(trans, root); | 3710 | ret = btrfs_commit_transaction(trans, root); |
3438 | return 0; | 3711 | return 0; |
3439 | } | 3712 | } |
3440 | 3713 | ||
3441 | WARN_ON(1); | ||
3442 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3443 | block_rsv->size, block_rsv->reserved, | ||
3444 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3445 | |||
3446 | return -ENOSPC; | 3714 | return -ENOSPC; |
3447 | } | 3715 | } |
3448 | 3716 | ||
@@ -3476,23 +3744,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
3476 | u64 meta_used; | 3744 | u64 meta_used; |
3477 | u64 data_used; | 3745 | u64 data_used; |
3478 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | 3746 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); |
3479 | #if 0 | ||
3480 | /* | ||
3481 | * per tree used space accounting can be inaccuracy, so we | ||
3482 | * can't rely on it. | ||
3483 | */ | ||
3484 | spin_lock(&fs_info->extent_root->accounting_lock); | ||
3485 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); | ||
3486 | spin_unlock(&fs_info->extent_root->accounting_lock); | ||
3487 | |||
3488 | spin_lock(&fs_info->csum_root->accounting_lock); | ||
3489 | num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); | ||
3490 | spin_unlock(&fs_info->csum_root->accounting_lock); | ||
3491 | 3747 | ||
3492 | spin_lock(&fs_info->tree_root->accounting_lock); | ||
3493 | num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); | ||
3494 | spin_unlock(&fs_info->tree_root->accounting_lock); | ||
3495 | #endif | ||
3496 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | 3748 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); |
3497 | spin_lock(&sinfo->lock); | 3749 | spin_lock(&sinfo->lock); |
3498 | data_used = sinfo->bytes_used; | 3750 | data_used = sinfo->bytes_used; |
@@ -3500,6 +3752,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
3500 | 3752 | ||
3501 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 3753 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
3502 | spin_lock(&sinfo->lock); | 3754 | spin_lock(&sinfo->lock); |
3755 | if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) | ||
3756 | data_used = 0; | ||
3503 | meta_used = sinfo->bytes_used; | 3757 | meta_used = sinfo->bytes_used; |
3504 | spin_unlock(&sinfo->lock); | 3758 | spin_unlock(&sinfo->lock); |
3505 | 3759 | ||
@@ -3527,7 +3781,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3527 | block_rsv->size = num_bytes; | 3781 | block_rsv->size = num_bytes; |
3528 | 3782 | ||
3529 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | 3783 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + |
3530 | sinfo->bytes_reserved + sinfo->bytes_readonly; | 3784 | sinfo->bytes_reserved + sinfo->bytes_readonly + |
3785 | sinfo->bytes_may_use; | ||
3531 | 3786 | ||
3532 | if (sinfo->total_bytes > num_bytes) { | 3787 | if (sinfo->total_bytes > num_bytes) { |
3533 | num_bytes = sinfo->total_bytes - num_bytes; | 3788 | num_bytes = sinfo->total_bytes - num_bytes; |
@@ -3538,13 +3793,11 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3538 | if (block_rsv->reserved >= block_rsv->size) { | 3793 | if (block_rsv->reserved >= block_rsv->size) { |
3539 | num_bytes = block_rsv->reserved - block_rsv->size; | 3794 | num_bytes = block_rsv->reserved - block_rsv->size; |
3540 | sinfo->bytes_reserved -= num_bytes; | 3795 | sinfo->bytes_reserved -= num_bytes; |
3796 | sinfo->reservation_progress++; | ||
3541 | block_rsv->reserved = block_rsv->size; | 3797 | block_rsv->reserved = block_rsv->size; |
3542 | block_rsv->full = 1; | 3798 | block_rsv->full = 1; |
3543 | } | 3799 | } |
3544 | #if 0 | 3800 | |
3545 | printk(KERN_INFO"global block rsv size %llu reserved %llu\n", | ||
3546 | block_rsv->size, block_rsv->reserved); | ||
3547 | #endif | ||
3548 | spin_unlock(&sinfo->lock); | 3801 | spin_unlock(&sinfo->lock); |
3549 | spin_unlock(&block_rsv->lock); | 3802 | spin_unlock(&block_rsv->lock); |
3550 | } | 3803 | } |
@@ -3590,15 +3843,40 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3590 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | 3843 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); |
3591 | } | 3844 | } |
3592 | 3845 | ||
3593 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | 3846 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, |
3847 | struct btrfs_root *root, | ||
3848 | struct btrfs_block_rsv *rsv) | ||
3594 | { | 3849 | { |
3595 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 3850 | struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; |
3596 | 3 * num_items; | 3851 | u64 num_bytes; |
3852 | int ret; | ||
3853 | |||
3854 | /* | ||
3855 | * Truncate should be freeing data, but give us 2 items just in case it | ||
3856 | * needs to use some space. We may want to be smarter about this in the | ||
3857 | * future. | ||
3858 | */ | ||
3859 | num_bytes = btrfs_calc_trans_metadata_size(root, 2); | ||
3860 | |||
3861 | /* We already have enough bytes, just return */ | ||
3862 | if (rsv->reserved >= num_bytes) | ||
3863 | return 0; | ||
3864 | |||
3865 | num_bytes -= rsv->reserved; | ||
3866 | |||
3867 | /* | ||
3868 | * You should have reserved enough space before hand to do this, so this | ||
3869 | * should not fail. | ||
3870 | */ | ||
3871 | ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); | ||
3872 | BUG_ON(ret); | ||
3873 | |||
3874 | return 0; | ||
3597 | } | 3875 | } |
3598 | 3876 | ||
3599 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | 3877 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
3600 | struct btrfs_root *root, | 3878 | struct btrfs_root *root, |
3601 | int num_items, int *retries) | 3879 | int num_items) |
3602 | { | 3880 | { |
3603 | u64 num_bytes; | 3881 | u64 num_bytes; |
3604 | int ret; | 3882 | int ret; |
@@ -3606,9 +3884,9 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3606 | if (num_items == 0 || root->fs_info->chunk_root == root) | 3884 | if (num_items == 0 || root->fs_info->chunk_root == root) |
3607 | return 0; | 3885 | return 0; |
3608 | 3886 | ||
3609 | num_bytes = calc_trans_metadata_size(root, num_items); | 3887 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
3610 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | 3888 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, |
3611 | num_bytes, retries); | 3889 | num_bytes); |
3612 | if (!ret) { | 3890 | if (!ret) { |
3613 | trans->bytes_reserved += num_bytes; | 3891 | trans->bytes_reserved += num_bytes; |
3614 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3892 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
@@ -3636,23 +3914,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3636 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | 3914 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; |
3637 | 3915 | ||
3638 | /* | 3916 | /* |
3639 | * one for deleting orphan item, one for updating inode and | 3917 | * We need to hold space in order to delete our orphan item once we've |
3640 | * two for calling btrfs_truncate_inode_items. | 3918 | * added it, so this takes the reservation so we can release it later |
3641 | * | 3919 | * when we are truly done with the orphan item. |
3642 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
3643 | * more space than it uses in most cases. So two units of | ||
3644 | * metadata space should be enough for calling it many times. | ||
3645 | * If all of the metadata space is used, we can commit | ||
3646 | * transaction and use space it freed. | ||
3647 | */ | 3920 | */ |
3648 | u64 num_bytes = calc_trans_metadata_size(root, 4); | 3921 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
3649 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3922 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3650 | } | 3923 | } |
3651 | 3924 | ||
3652 | void btrfs_orphan_release_metadata(struct inode *inode) | 3925 | void btrfs_orphan_release_metadata(struct inode *inode) |
3653 | { | 3926 | { |
3654 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3927 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3655 | u64 num_bytes = calc_trans_metadata_size(root, 4); | 3928 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
3656 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | 3929 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); |
3657 | } | 3930 | } |
3658 | 3931 | ||
@@ -3666,7 +3939,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3666 | * two for root back/forward refs, two for directory entries | 3939 | * two for root back/forward refs, two for directory entries |
3667 | * and one for root of the snapshot. | 3940 | * and one for root of the snapshot. |
3668 | */ | 3941 | */ |
3669 | u64 num_bytes = calc_trans_metadata_size(root, 5); | 3942 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); |
3670 | dst_rsv->space_info = src_rsv->space_info; | 3943 | dst_rsv->space_info = src_rsv->space_info; |
3671 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3944 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3672 | } | 3945 | } |
@@ -3682,43 +3955,37 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3682 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3955 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3683 | u64 to_reserve; | 3956 | u64 to_reserve; |
3684 | int nr_extents; | 3957 | int nr_extents; |
3685 | int retries = 0; | 3958 | int reserved_extents; |
3686 | int ret; | 3959 | int ret; |
3687 | 3960 | ||
3688 | if (btrfs_transaction_in_commit(root->fs_info)) | 3961 | if (btrfs_transaction_in_commit(root->fs_info)) |
3689 | schedule_timeout(1); | 3962 | schedule_timeout(1); |
3690 | 3963 | ||
3691 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 3964 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3692 | again: | 3965 | |
3693 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3694 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 3966 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; |
3695 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | 3967 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
3696 | nr_extents -= BTRFS_I(inode)->reserved_extents; | 3968 | |
3697 | to_reserve = calc_trans_metadata_size(root, nr_extents); | 3969 | if (nr_extents > reserved_extents) { |
3970 | nr_extents -= reserved_extents; | ||
3971 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | ||
3698 | } else { | 3972 | } else { |
3699 | nr_extents = 0; | 3973 | nr_extents = 0; |
3700 | to_reserve = 0; | 3974 | to_reserve = 0; |
3701 | } | 3975 | } |
3702 | 3976 | ||
3703 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 3977 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3704 | ret = reserve_metadata_bytes(block_rsv, to_reserve); | 3978 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3705 | if (ret) { | 3979 | if (ret) |
3706 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3707 | ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, | ||
3708 | &retries); | ||
3709 | if (ret > 0) | ||
3710 | goto again; | ||
3711 | return ret; | 3980 | return ret; |
3712 | } | ||
3713 | 3981 | ||
3714 | BTRFS_I(inode)->reserved_extents += nr_extents; | 3982 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); |
3715 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 3983 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
3716 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3717 | 3984 | ||
3718 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 3985 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3719 | 3986 | ||
3720 | if (block_rsv->size > 512 * 1024 * 1024) | 3987 | if (block_rsv->size > 512 * 1024 * 1024) |
3721 | shrink_delalloc(NULL, root, to_reserve); | 3988 | shrink_delalloc(NULL, root, to_reserve, 0); |
3722 | 3989 | ||
3723 | return 0; | 3990 | return 0; |
3724 | } | 3991 | } |
@@ -3728,23 +3995,34 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
3728 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3995 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3729 | u64 to_free; | 3996 | u64 to_free; |
3730 | int nr_extents; | 3997 | int nr_extents; |
3998 | int reserved_extents; | ||
3731 | 3999 | ||
3732 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4000 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3733 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4001 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
4002 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
3734 | 4003 | ||
3735 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4004 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
3736 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | 4005 | do { |
3737 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | 4006 | int old, new; |
3738 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | 4007 | |
3739 | BTRFS_I(inode)->reserved_extents -= nr_extents; | 4008 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); |
3740 | } else { | 4009 | if (nr_extents >= reserved_extents) { |
3741 | nr_extents = 0; | 4010 | nr_extents = 0; |
3742 | } | 4011 | break; |
3743 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 4012 | } |
4013 | old = reserved_extents; | ||
4014 | nr_extents = reserved_extents - nr_extents; | ||
4015 | new = reserved_extents - nr_extents; | ||
4016 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4017 | reserved_extents, new); | ||
4018 | if (likely(old == reserved_extents)) | ||
4019 | break; | ||
4020 | reserved_extents = old; | ||
4021 | } while (1); | ||
3744 | 4022 | ||
3745 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4023 | to_free = calc_csum_metadata_size(inode, num_bytes); |
3746 | if (nr_extents > 0) | 4024 | if (nr_extents > 0) |
3747 | to_free += calc_trans_metadata_size(root, nr_extents); | 4025 | to_free += btrfs_calc_trans_metadata_size(root, nr_extents); |
3748 | 4026 | ||
3749 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4027 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
3750 | to_free); | 4028 | to_free); |
@@ -3777,12 +4055,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3777 | struct btrfs_root *root, | 4055 | struct btrfs_root *root, |
3778 | u64 bytenr, u64 num_bytes, int alloc) | 4056 | u64 bytenr, u64 num_bytes, int alloc) |
3779 | { | 4057 | { |
3780 | struct btrfs_block_group_cache *cache; | 4058 | struct btrfs_block_group_cache *cache = NULL; |
3781 | struct btrfs_fs_info *info = root->fs_info; | 4059 | struct btrfs_fs_info *info = root->fs_info; |
3782 | int factor; | ||
3783 | u64 total = num_bytes; | 4060 | u64 total = num_bytes; |
3784 | u64 old_val; | 4061 | u64 old_val; |
3785 | u64 byte_in_group; | 4062 | u64 byte_in_group; |
4063 | int factor; | ||
3786 | 4064 | ||
3787 | /* block accounting for super block */ | 4065 | /* block accounting for super block */ |
3788 | spin_lock(&info->delalloc_lock); | 4066 | spin_lock(&info->delalloc_lock); |
@@ -3804,11 +4082,25 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3804 | factor = 2; | 4082 | factor = 2; |
3805 | else | 4083 | else |
3806 | factor = 1; | 4084 | factor = 1; |
4085 | /* | ||
4086 | * If this block group has free space cache written out, we | ||
4087 | * need to make sure to load it if we are removing space. This | ||
4088 | * is because we need the unpinning stage to actually add the | ||
4089 | * space back to the block group, otherwise we will leak space. | ||
4090 | */ | ||
4091 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | ||
4092 | cache_block_group(cache, trans, NULL, 1); | ||
4093 | |||
3807 | byte_in_group = bytenr - cache->key.objectid; | 4094 | byte_in_group = bytenr - cache->key.objectid; |
3808 | WARN_ON(byte_in_group > cache->key.offset); | 4095 | WARN_ON(byte_in_group > cache->key.offset); |
3809 | 4096 | ||
3810 | spin_lock(&cache->space_info->lock); | 4097 | spin_lock(&cache->space_info->lock); |
3811 | spin_lock(&cache->lock); | 4098 | spin_lock(&cache->lock); |
4099 | |||
4100 | if (btrfs_super_cache_generation(&info->super_copy) != 0 && | ||
4101 | cache->disk_cache_state < BTRFS_DC_CLEAR) | ||
4102 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
4103 | |||
3812 | cache->dirty = 1; | 4104 | cache->dirty = 1; |
3813 | old_val = btrfs_block_group_used(&cache->item); | 4105 | old_val = btrfs_block_group_used(&cache->item); |
3814 | num_bytes = min(total, cache->key.offset - byte_in_group); | 4106 | num_bytes = min(total, cache->key.offset - byte_in_group); |
@@ -3817,6 +4109,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3817 | btrfs_set_block_group_used(&cache->item, old_val); | 4109 | btrfs_set_block_group_used(&cache->item, old_val); |
3818 | cache->reserved -= num_bytes; | 4110 | cache->reserved -= num_bytes; |
3819 | cache->space_info->bytes_reserved -= num_bytes; | 4111 | cache->space_info->bytes_reserved -= num_bytes; |
4112 | cache->space_info->reservation_progress++; | ||
3820 | cache->space_info->bytes_used += num_bytes; | 4113 | cache->space_info->bytes_used += num_bytes; |
3821 | cache->space_info->disk_used += num_bytes * factor; | 4114 | cache->space_info->disk_used += num_bytes * factor; |
3822 | spin_unlock(&cache->lock); | 4115 | spin_unlock(&cache->lock); |
@@ -3868,6 +4161,7 @@ static int pin_down_extent(struct btrfs_root *root, | |||
3868 | if (reserved) { | 4161 | if (reserved) { |
3869 | cache->reserved -= num_bytes; | 4162 | cache->reserved -= num_bytes; |
3870 | cache->space_info->bytes_reserved -= num_bytes; | 4163 | cache->space_info->bytes_reserved -= num_bytes; |
4164 | cache->space_info->reservation_progress++; | ||
3871 | } | 4165 | } |
3872 | spin_unlock(&cache->lock); | 4166 | spin_unlock(&cache->lock); |
3873 | spin_unlock(&cache->space_info->lock); | 4167 | spin_unlock(&cache->space_info->lock); |
@@ -3898,8 +4192,8 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
3898 | * update size of reserved extents. this function may return -EAGAIN | 4192 | * update size of reserved extents. this function may return -EAGAIN |
3899 | * if 'reserve' is true or 'sinfo' is false. | 4193 | * if 'reserve' is true or 'sinfo' is false. |
3900 | */ | 4194 | */ |
3901 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | 4195 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
3902 | u64 num_bytes, int reserve, int sinfo) | 4196 | u64 num_bytes, int reserve, int sinfo) |
3903 | { | 4197 | { |
3904 | int ret = 0; | 4198 | int ret = 0; |
3905 | if (sinfo) { | 4199 | if (sinfo) { |
@@ -3918,6 +4212,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
3918 | space_info->bytes_readonly += num_bytes; | 4212 | space_info->bytes_readonly += num_bytes; |
3919 | cache->reserved -= num_bytes; | 4213 | cache->reserved -= num_bytes; |
3920 | space_info->bytes_reserved -= num_bytes; | 4214 | space_info->bytes_reserved -= num_bytes; |
4215 | space_info->reservation_progress++; | ||
3921 | } | 4216 | } |
3922 | spin_unlock(&cache->lock); | 4217 | spin_unlock(&cache->lock); |
3923 | spin_unlock(&space_info->lock); | 4218 | spin_unlock(&space_info->lock); |
@@ -4037,7 +4332,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4037 | if (ret) | 4332 | if (ret) |
4038 | break; | 4333 | break; |
4039 | 4334 | ||
4040 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 4335 | if (btrfs_test_opt(root, DISCARD)) |
4336 | ret = btrfs_discard_extent(root, start, | ||
4337 | end + 1 - start, NULL); | ||
4041 | 4338 | ||
4042 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 4339 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
4043 | unpin_extent_range(root, start, end); | 4340 | unpin_extent_range(root, start, end); |
@@ -4134,7 +4431,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4134 | NULL, refs_to_drop, | 4431 | NULL, refs_to_drop, |
4135 | is_data); | 4432 | is_data); |
4136 | BUG_ON(ret); | 4433 | BUG_ON(ret); |
4137 | btrfs_release_path(extent_root, path); | 4434 | btrfs_release_path(path); |
4138 | path->leave_spinning = 1; | 4435 | path->leave_spinning = 1; |
4139 | 4436 | ||
4140 | key.objectid = bytenr; | 4437 | key.objectid = bytenr; |
@@ -4173,7 +4470,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4173 | owner_objectid, 0); | 4470 | owner_objectid, 0); |
4174 | BUG_ON(ret < 0); | 4471 | BUG_ON(ret < 0); |
4175 | 4472 | ||
4176 | btrfs_release_path(extent_root, path); | 4473 | btrfs_release_path(path); |
4177 | path->leave_spinning = 1; | 4474 | path->leave_spinning = 1; |
4178 | 4475 | ||
4179 | key.objectid = bytenr; | 4476 | key.objectid = bytenr; |
@@ -4243,7 +4540,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4243 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 4540 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
4244 | num_to_del); | 4541 | num_to_del); |
4245 | BUG_ON(ret); | 4542 | BUG_ON(ret); |
4246 | btrfs_release_path(extent_root, path); | 4543 | btrfs_release_path(path); |
4247 | 4544 | ||
4248 | if (is_data) { | 4545 | if (is_data) { |
4249 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 4546 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
@@ -4378,10 +4675,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4378 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 4675 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
4379 | 4676 | ||
4380 | btrfs_add_free_space(cache, buf->start, buf->len); | 4677 | btrfs_add_free_space(cache, buf->start, buf->len); |
4381 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | 4678 | ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); |
4382 | if (ret == -EAGAIN) { | 4679 | if (ret == -EAGAIN) { |
4383 | /* block group became read-only */ | 4680 | /* block group became read-only */ |
4384 | update_reserved_bytes(cache, buf->len, 0, 1); | 4681 | btrfs_update_reserved_bytes(cache, buf->len, 0, 1); |
4385 | goto out; | 4682 | goto out; |
4386 | } | 4683 | } |
4387 | 4684 | ||
@@ -4396,6 +4693,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4396 | if (ret) { | 4693 | if (ret) { |
4397 | spin_lock(&cache->space_info->lock); | 4694 | spin_lock(&cache->space_info->lock); |
4398 | cache->space_info->bytes_reserved -= buf->len; | 4695 | cache->space_info->bytes_reserved -= buf->len; |
4696 | cache->space_info->reservation_progress++; | ||
4399 | spin_unlock(&cache->space_info->lock); | 4697 | spin_unlock(&cache->space_info->lock); |
4400 | } | 4698 | } |
4401 | goto out; | 4699 | goto out; |
@@ -4417,6 +4715,11 @@ pin: | |||
4417 | } | 4715 | } |
4418 | } | 4716 | } |
4419 | out: | 4717 | out: |
4718 | /* | ||
4719 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | ||
4720 | * anymore. | ||
4721 | */ | ||
4722 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); | ||
4420 | btrfs_put_block_group(cache); | 4723 | btrfs_put_block_group(cache); |
4421 | } | 4724 | } |
4422 | 4725 | ||
@@ -4480,7 +4783,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | |||
4480 | return 0; | 4783 | return 0; |
4481 | 4784 | ||
4482 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || | 4785 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
4483 | (cache->free_space >= num_bytes)); | 4786 | (cache->free_space_ctl->free_space >= num_bytes)); |
4484 | 4787 | ||
4485 | put_caching_control(caching_ctl); | 4788 | put_caching_control(caching_ctl); |
4486 | return 0; | 4789 | return 0; |
@@ -4539,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4539 | u64 num_bytes, u64 empty_size, | 4842 | u64 num_bytes, u64 empty_size, |
4540 | u64 search_start, u64 search_end, | 4843 | u64 search_start, u64 search_end, |
4541 | u64 hint_byte, struct btrfs_key *ins, | 4844 | u64 hint_byte, struct btrfs_key *ins, |
4542 | int data) | 4845 | u64 data) |
4543 | { | 4846 | { |
4544 | int ret = 0; | 4847 | int ret = 0; |
4545 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 4848 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
@@ -4555,6 +4858,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4555 | bool found_uncached_bg = false; | 4858 | bool found_uncached_bg = false; |
4556 | bool failed_cluster_refill = false; | 4859 | bool failed_cluster_refill = false; |
4557 | bool failed_alloc = false; | 4860 | bool failed_alloc = false; |
4861 | bool use_cluster = true; | ||
4558 | u64 ideal_cache_percent = 0; | 4862 | u64 ideal_cache_percent = 0; |
4559 | u64 ideal_cache_offset = 0; | 4863 | u64 ideal_cache_offset = 0; |
4560 | 4864 | ||
@@ -4565,20 +4869,28 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4565 | 4869 | ||
4566 | space_info = __find_space_info(root->fs_info, data); | 4870 | space_info = __find_space_info(root->fs_info, data); |
4567 | if (!space_info) { | 4871 | if (!space_info) { |
4568 | printk(KERN_ERR "No space info for %d\n", data); | 4872 | printk(KERN_ERR "No space info for %llu\n", data); |
4569 | return -ENOSPC; | 4873 | return -ENOSPC; |
4570 | } | 4874 | } |
4571 | 4875 | ||
4876 | /* | ||
4877 | * If the space info is for both data and metadata it means we have a | ||
4878 | * small filesystem and we can't use the clustering stuff. | ||
4879 | */ | ||
4880 | if (btrfs_mixed_space_info(space_info)) | ||
4881 | use_cluster = false; | ||
4882 | |||
4572 | if (orig_root->ref_cows || empty_size) | 4883 | if (orig_root->ref_cows || empty_size) |
4573 | allowed_chunk_alloc = 1; | 4884 | allowed_chunk_alloc = 1; |
4574 | 4885 | ||
4575 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 4886 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { |
4576 | last_ptr = &root->fs_info->meta_alloc_cluster; | 4887 | last_ptr = &root->fs_info->meta_alloc_cluster; |
4577 | if (!btrfs_test_opt(root, SSD)) | 4888 | if (!btrfs_test_opt(root, SSD)) |
4578 | empty_cluster = 64 * 1024; | 4889 | empty_cluster = 64 * 1024; |
4579 | } | 4890 | } |
4580 | 4891 | ||
4581 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { | 4892 | if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && |
4893 | btrfs_test_opt(root, SSD)) { | ||
4582 | last_ptr = &root->fs_info->data_alloc_cluster; | 4894 | last_ptr = &root->fs_info->data_alloc_cluster; |
4583 | } | 4895 | } |
4584 | 4896 | ||
@@ -4638,10 +4950,34 @@ search: | |||
4638 | btrfs_get_block_group(block_group); | 4950 | btrfs_get_block_group(block_group); |
4639 | search_start = block_group->key.objectid; | 4951 | search_start = block_group->key.objectid; |
4640 | 4952 | ||
4953 | /* | ||
4954 | * this can happen if we end up cycling through all the | ||
4955 | * raid types, but we want to make sure we only allocate | ||
4956 | * for the proper type. | ||
4957 | */ | ||
4958 | if (!block_group_bits(block_group, data)) { | ||
4959 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | ||
4960 | BTRFS_BLOCK_GROUP_RAID1 | | ||
4961 | BTRFS_BLOCK_GROUP_RAID10; | ||
4962 | |||
4963 | /* | ||
4964 | * if they asked for extra copies and this block group | ||
4965 | * doesn't provide them, bail. This does allow us to | ||
4966 | * fill raid0 from raid1. | ||
4967 | */ | ||
4968 | if ((data & extra) && !(block_group->flags & extra)) | ||
4969 | goto loop; | ||
4970 | } | ||
4971 | |||
4641 | have_block_group: | 4972 | have_block_group: |
4642 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4973 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4643 | u64 free_percent; | 4974 | u64 free_percent; |
4644 | 4975 | ||
4976 | ret = cache_block_group(block_group, trans, | ||
4977 | orig_root, 1); | ||
4978 | if (block_group->cached == BTRFS_CACHE_FINISHED) | ||
4979 | goto have_block_group; | ||
4980 | |||
4645 | free_percent = btrfs_block_group_used(&block_group->item); | 4981 | free_percent = btrfs_block_group_used(&block_group->item); |
4646 | free_percent *= 100; | 4982 | free_percent *= 100; |
4647 | free_percent = div64_u64(free_percent, | 4983 | free_percent = div64_u64(free_percent, |
@@ -4662,7 +4998,8 @@ have_block_group: | |||
4662 | if (loop > LOOP_CACHING_NOWAIT || | 4998 | if (loop > LOOP_CACHING_NOWAIT || |
4663 | (loop > LOOP_FIND_IDEAL && | 4999 | (loop > LOOP_FIND_IDEAL && |
4664 | atomic_read(&space_info->caching_threads) < 2)) { | 5000 | atomic_read(&space_info->caching_threads) < 2)) { |
4665 | ret = cache_block_group(block_group); | 5001 | ret = cache_block_group(block_group, trans, |
5002 | orig_root, 0); | ||
4666 | BUG_ON(ret); | 5003 | BUG_ON(ret); |
4667 | } | 5004 | } |
4668 | found_uncached_bg = true; | 5005 | found_uncached_bg = true; |
@@ -4682,6 +5019,15 @@ have_block_group: | |||
4682 | if (unlikely(block_group->ro)) | 5019 | if (unlikely(block_group->ro)) |
4683 | goto loop; | 5020 | goto loop; |
4684 | 5021 | ||
5022 | spin_lock(&block_group->free_space_ctl->tree_lock); | ||
5023 | if (cached && | ||
5024 | block_group->free_space_ctl->free_space < | ||
5025 | num_bytes + empty_size) { | ||
5026 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5027 | goto loop; | ||
5028 | } | ||
5029 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5030 | |||
4685 | /* | 5031 | /* |
4686 | * Ok we want to try and use the cluster allocator, so lets look | 5032 | * Ok we want to try and use the cluster allocator, so lets look |
4687 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | 5033 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will |
@@ -4830,7 +5176,7 @@ checks: | |||
4830 | search_start - offset); | 5176 | search_start - offset); |
4831 | BUG_ON(offset > search_start); | 5177 | BUG_ON(offset > search_start); |
4832 | 5178 | ||
4833 | ret = update_reserved_bytes(block_group, num_bytes, 1, | 5179 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, |
4834 | (data & BTRFS_BLOCK_GROUP_DATA)); | 5180 | (data & BTRFS_BLOCK_GROUP_DATA)); |
4835 | if (ret == -EAGAIN) { | 5181 | if (ret == -EAGAIN) { |
4836 | btrfs_add_free_space(block_group, offset, num_bytes); | 5182 | btrfs_add_free_space(block_group, offset, num_bytes); |
@@ -4845,6 +5191,7 @@ checks: | |||
4845 | btrfs_add_free_space(block_group, offset, | 5191 | btrfs_add_free_space(block_group, offset, |
4846 | search_start - offset); | 5192 | search_start - offset); |
4847 | BUG_ON(offset > search_start); | 5193 | BUG_ON(offset > search_start); |
5194 | btrfs_put_block_group(block_group); | ||
4848 | break; | 5195 | break; |
4849 | loop: | 5196 | loop: |
4850 | failed_cluster_refill = false; | 5197 | failed_cluster_refill = false; |
@@ -4867,9 +5214,7 @@ loop: | |||
4867 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | 5214 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
4868 | * again | 5215 | * again |
4869 | */ | 5216 | */ |
4870 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 5217 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { |
4871 | (found_uncached_bg || empty_size || empty_cluster || | ||
4872 | allowed_chunk_alloc)) { | ||
4873 | index = 0; | 5218 | index = 0; |
4874 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5219 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4875 | found_uncached_bg = false; | 5220 | found_uncached_bg = false; |
@@ -4909,40 +5254,39 @@ loop: | |||
4909 | goto search; | 5254 | goto search; |
4910 | } | 5255 | } |
4911 | 5256 | ||
4912 | if (loop < LOOP_CACHING_WAIT) { | 5257 | loop++; |
4913 | loop++; | ||
4914 | goto search; | ||
4915 | } | ||
4916 | 5258 | ||
4917 | if (loop == LOOP_ALLOC_CHUNK) { | 5259 | if (loop == LOOP_ALLOC_CHUNK) { |
4918 | empty_size = 0; | 5260 | if (allowed_chunk_alloc) { |
4919 | empty_cluster = 0; | 5261 | ret = do_chunk_alloc(trans, root, num_bytes + |
4920 | } | 5262 | 2 * 1024 * 1024, data, |
5263 | CHUNK_ALLOC_LIMITED); | ||
5264 | allowed_chunk_alloc = 0; | ||
5265 | if (ret == 1) | ||
5266 | done_chunk_alloc = 1; | ||
5267 | } else if (!done_chunk_alloc && | ||
5268 | space_info->force_alloc == | ||
5269 | CHUNK_ALLOC_NO_FORCE) { | ||
5270 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
5271 | } | ||
4921 | 5272 | ||
4922 | if (allowed_chunk_alloc) { | 5273 | /* |
4923 | ret = do_chunk_alloc(trans, root, num_bytes + | 5274 | * We didn't allocate a chunk, go ahead and drop the |
4924 | 2 * 1024 * 1024, data, 1); | 5275 | * empty size and loop again. |
4925 | allowed_chunk_alloc = 0; | 5276 | */ |
4926 | done_chunk_alloc = 1; | 5277 | if (!done_chunk_alloc) |
4927 | } else if (!done_chunk_alloc) { | 5278 | loop = LOOP_NO_EMPTY_SIZE; |
4928 | space_info->force_alloc = 1; | ||
4929 | } | 5279 | } |
4930 | 5280 | ||
4931 | if (loop < LOOP_NO_EMPTY_SIZE) { | 5281 | if (loop == LOOP_NO_EMPTY_SIZE) { |
4932 | loop++; | 5282 | empty_size = 0; |
4933 | goto search; | 5283 | empty_cluster = 0; |
4934 | } | 5284 | } |
4935 | ret = -ENOSPC; | 5285 | |
5286 | goto search; | ||
4936 | } else if (!ins->objectid) { | 5287 | } else if (!ins->objectid) { |
4937 | ret = -ENOSPC; | 5288 | ret = -ENOSPC; |
4938 | } | 5289 | } else if (ins->objectid) { |
4939 | |||
4940 | /* we found what we needed */ | ||
4941 | if (ins->objectid) { | ||
4942 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | ||
4943 | trans->block_group = block_group->key.objectid; | ||
4944 | |||
4945 | btrfs_put_block_group(block_group); | ||
4946 | ret = 0; | 5290 | ret = 0; |
4947 | } | 5291 | } |
4948 | 5292 | ||
@@ -5011,7 +5355,8 @@ again: | |||
5011 | */ | 5355 | */ |
5012 | if (empty_size || root->ref_cows) | 5356 | if (empty_size || root->ref_cows) |
5013 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 5357 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
5014 | num_bytes + 2 * 1024 * 1024, data, 0); | 5358 | num_bytes + 2 * 1024 * 1024, data, |
5359 | CHUNK_ALLOC_NO_FORCE); | ||
5015 | 5360 | ||
5016 | WARN_ON(num_bytes < root->sectorsize); | 5361 | WARN_ON(num_bytes < root->sectorsize); |
5017 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5362 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
@@ -5023,10 +5368,10 @@ again: | |||
5023 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 5368 | num_bytes = num_bytes & ~(root->sectorsize - 1); |
5024 | num_bytes = max(num_bytes, min_alloc_size); | 5369 | num_bytes = max(num_bytes, min_alloc_size); |
5025 | do_chunk_alloc(trans, root->fs_info->extent_root, | 5370 | do_chunk_alloc(trans, root->fs_info->extent_root, |
5026 | num_bytes, data, 1); | 5371 | num_bytes, data, CHUNK_ALLOC_FORCE); |
5027 | goto again; | 5372 | goto again; |
5028 | } | 5373 | } |
5029 | if (ret == -ENOSPC) { | 5374 | if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { |
5030 | struct btrfs_space_info *sinfo; | 5375 | struct btrfs_space_info *sinfo; |
5031 | 5376 | ||
5032 | sinfo = __find_space_info(root->fs_info, data); | 5377 | sinfo = __find_space_info(root->fs_info, data); |
@@ -5036,6 +5381,8 @@ again: | |||
5036 | dump_space_info(sinfo, num_bytes, 1); | 5381 | dump_space_info(sinfo, num_bytes, 1); |
5037 | } | 5382 | } |
5038 | 5383 | ||
5384 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); | ||
5385 | |||
5039 | return ret; | 5386 | return ret; |
5040 | } | 5387 | } |
5041 | 5388 | ||
@@ -5051,12 +5398,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5051 | return -ENOSPC; | 5398 | return -ENOSPC; |
5052 | } | 5399 | } |
5053 | 5400 | ||
5054 | ret = btrfs_discard_extent(root, start, len); | 5401 | if (btrfs_test_opt(root, DISCARD)) |
5402 | ret = btrfs_discard_extent(root, start, len, NULL); | ||
5055 | 5403 | ||
5056 | btrfs_add_free_space(cache, start, len); | 5404 | btrfs_add_free_space(cache, start, len); |
5057 | update_reserved_bytes(cache, len, 0, 1); | 5405 | btrfs_update_reserved_bytes(cache, len, 0, 1); |
5058 | btrfs_put_block_group(cache); | 5406 | btrfs_put_block_group(cache); |
5059 | 5407 | ||
5408 | trace_btrfs_reserved_extent_free(root, start, len); | ||
5409 | |||
5060 | return ret; | 5410 | return ret; |
5061 | } | 5411 | } |
5062 | 5412 | ||
@@ -5083,7 +5433,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
5083 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); | 5433 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); |
5084 | 5434 | ||
5085 | path = btrfs_alloc_path(); | 5435 | path = btrfs_alloc_path(); |
5086 | BUG_ON(!path); | 5436 | if (!path) |
5437 | return -ENOMEM; | ||
5087 | 5438 | ||
5088 | path->leave_spinning = 1; | 5439 | path->leave_spinning = 1; |
5089 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 5440 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
@@ -5219,7 +5570,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5219 | u64 num_bytes = ins->offset; | 5570 | u64 num_bytes = ins->offset; |
5220 | 5571 | ||
5221 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 5572 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
5222 | cache_block_group(block_group); | 5573 | cache_block_group(block_group, trans, NULL, 0); |
5223 | caching_ctl = get_caching_control(block_group); | 5574 | caching_ctl = get_caching_control(block_group); |
5224 | 5575 | ||
5225 | if (!caching_ctl) { | 5576 | if (!caching_ctl) { |
@@ -5253,7 +5604,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5253 | put_caching_control(caching_ctl); | 5604 | put_caching_control(caching_ctl); |
5254 | } | 5605 | } |
5255 | 5606 | ||
5256 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); | 5607 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); |
5257 | BUG_ON(ret); | 5608 | BUG_ON(ret); |
5258 | btrfs_put_block_group(block_group); | 5609 | btrfs_put_block_group(block_group); |
5259 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5610 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
@@ -5304,25 +5655,47 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5304 | struct btrfs_root *root, u32 blocksize) | 5655 | struct btrfs_root *root, u32 blocksize) |
5305 | { | 5656 | { |
5306 | struct btrfs_block_rsv *block_rsv; | 5657 | struct btrfs_block_rsv *block_rsv; |
5658 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
5307 | int ret; | 5659 | int ret; |
5308 | 5660 | ||
5309 | block_rsv = get_block_rsv(trans, root); | 5661 | block_rsv = get_block_rsv(trans, root); |
5310 | 5662 | ||
5311 | if (block_rsv->size == 0) { | 5663 | if (block_rsv->size == 0) { |
5312 | ret = reserve_metadata_bytes(block_rsv, blocksize); | 5664 | ret = reserve_metadata_bytes(trans, root, block_rsv, |
5313 | if (ret) | 5665 | blocksize, 0); |
5666 | /* | ||
5667 | * If we couldn't reserve metadata bytes try and use some from | ||
5668 | * the global reserve. | ||
5669 | */ | ||
5670 | if (ret && block_rsv != global_rsv) { | ||
5671 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
5672 | if (!ret) | ||
5673 | return global_rsv; | ||
5314 | return ERR_PTR(ret); | 5674 | return ERR_PTR(ret); |
5675 | } else if (ret) { | ||
5676 | return ERR_PTR(ret); | ||
5677 | } | ||
5315 | return block_rsv; | 5678 | return block_rsv; |
5316 | } | 5679 | } |
5317 | 5680 | ||
5318 | ret = block_rsv_use_bytes(block_rsv, blocksize); | 5681 | ret = block_rsv_use_bytes(block_rsv, blocksize); |
5319 | if (!ret) | 5682 | if (!ret) |
5320 | return block_rsv; | 5683 | return block_rsv; |
5321 | 5684 | if (ret) { | |
5322 | WARN_ON(1); | 5685 | WARN_ON(1); |
5323 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | 5686 | ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, |
5324 | block_rsv->size, block_rsv->reserved, | 5687 | 0); |
5325 | block_rsv->freed[0], block_rsv->freed[1]); | 5688 | if (!ret) { |
5689 | spin_lock(&block_rsv->lock); | ||
5690 | block_rsv->size += blocksize; | ||
5691 | spin_unlock(&block_rsv->lock); | ||
5692 | return block_rsv; | ||
5693 | } else if (ret && block_rsv != global_rsv) { | ||
5694 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
5695 | if (!ret) | ||
5696 | return global_rsv; | ||
5697 | } | ||
5698 | } | ||
5326 | 5699 | ||
5327 | return ERR_PTR(-ENOSPC); | 5700 | return ERR_PTR(-ENOSPC); |
5328 | } | 5701 | } |
@@ -5422,7 +5795,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
5422 | u64 generation; | 5795 | u64 generation; |
5423 | u64 refs; | 5796 | u64 refs; |
5424 | u64 flags; | 5797 | u64 flags; |
5425 | u64 last = 0; | ||
5426 | u32 nritems; | 5798 | u32 nritems; |
5427 | u32 blocksize; | 5799 | u32 blocksize; |
5428 | struct btrfs_key key; | 5800 | struct btrfs_key key; |
@@ -5490,7 +5862,6 @@ reada: | |||
5490 | generation); | 5862 | generation); |
5491 | if (ret) | 5863 | if (ret) |
5492 | break; | 5864 | break; |
5493 | last = bytenr + blocksize; | ||
5494 | nread++; | 5865 | nread++; |
5495 | } | 5866 | } |
5496 | wc->reada_slot = slot; | 5867 | wc->reada_slot = slot; |
@@ -5666,6 +6037,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
5666 | if (reada && level == 1) | 6037 | if (reada && level == 1) |
5667 | reada_walk_down(trans, root, wc, path); | 6038 | reada_walk_down(trans, root, wc, path); |
5668 | next = read_tree_block(root, bytenr, blocksize, generation); | 6039 | next = read_tree_block(root, bytenr, blocksize, generation); |
6040 | if (!next) | ||
6041 | return -EIO; | ||
5669 | btrfs_tree_lock(next); | 6042 | btrfs_tree_lock(next); |
5670 | btrfs_set_lock_blocking(next); | 6043 | btrfs_set_lock_blocking(next); |
5671 | } | 6044 | } |
@@ -5898,6 +6271,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
5898 | BUG_ON(!wc); | 6271 | BUG_ON(!wc); |
5899 | 6272 | ||
5900 | trans = btrfs_start_transaction(tree_root, 0); | 6273 | trans = btrfs_start_transaction(tree_root, 0); |
6274 | BUG_ON(IS_ERR(trans)); | ||
6275 | |||
5901 | if (block_rsv) | 6276 | if (block_rsv) |
5902 | trans->block_rsv = block_rsv; | 6277 | trans->block_rsv = block_rsv; |
5903 | 6278 | ||
@@ -5995,11 +6370,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
5995 | 6370 | ||
5996 | btrfs_end_transaction_throttle(trans, tree_root); | 6371 | btrfs_end_transaction_throttle(trans, tree_root); |
5997 | trans = btrfs_start_transaction(tree_root, 0); | 6372 | trans = btrfs_start_transaction(tree_root, 0); |
6373 | BUG_ON(IS_ERR(trans)); | ||
5998 | if (block_rsv) | 6374 | if (block_rsv) |
5999 | trans->block_rsv = block_rsv; | 6375 | trans->block_rsv = block_rsv; |
6000 | } | 6376 | } |
6001 | } | 6377 | } |
6002 | btrfs_release_path(root, path); | 6378 | btrfs_release_path(path); |
6003 | BUG_ON(err); | 6379 | BUG_ON(err); |
6004 | 6380 | ||
6005 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | 6381 | ret = btrfs_del_root(trans, tree_root, &root->root_key); |
@@ -6010,9 +6386,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6010 | NULL, NULL); | 6386 | NULL, NULL); |
6011 | BUG_ON(ret < 0); | 6387 | BUG_ON(ret < 0); |
6012 | if (ret > 0) { | 6388 | if (ret > 0) { |
6013 | ret = btrfs_del_orphan_item(trans, tree_root, | 6389 | /* if we fail to delete the orphan item this time |
6014 | root->root_key.objectid); | 6390 | * around, it'll get picked up the next time. |
6015 | BUG_ON(ret); | 6391 | * |
6392 | * The most common failure here is just -ENOENT. | ||
6393 | */ | ||
6394 | btrfs_del_orphan_item(trans, tree_root, | ||
6395 | root->root_key.objectid); | ||
6016 | } | 6396 | } |
6017 | } | 6397 | } |
6018 | 6398 | ||
@@ -6050,10 +6430,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6050 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 6430 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
6051 | 6431 | ||
6052 | path = btrfs_alloc_path(); | 6432 | path = btrfs_alloc_path(); |
6053 | BUG_ON(!path); | 6433 | if (!path) |
6434 | return -ENOMEM; | ||
6054 | 6435 | ||
6055 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6436 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6056 | BUG_ON(!wc); | 6437 | if (!wc) { |
6438 | btrfs_free_path(path); | ||
6439 | return -ENOMEM; | ||
6440 | } | ||
6057 | 6441 | ||
6058 | btrfs_assert_tree_locked(parent); | 6442 | btrfs_assert_tree_locked(parent); |
6059 | parent_level = btrfs_header_level(parent); | 6443 | parent_level = btrfs_header_level(parent); |
@@ -6095,1500 +6479,20 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6095 | return ret; | 6479 | return ret; |
6096 | } | 6480 | } |
6097 | 6481 | ||
6098 | #if 0 | ||
6099 | static unsigned long calc_ra(unsigned long start, unsigned long last, | ||
6100 | unsigned long nr) | ||
6101 | { | ||
6102 | return min(last, start + nr - 1); | ||
6103 | } | ||
6104 | |||
6105 | static noinline int relocate_inode_pages(struct inode *inode, u64 start, | ||
6106 | u64 len) | ||
6107 | { | ||
6108 | u64 page_start; | ||
6109 | u64 page_end; | ||
6110 | unsigned long first_index; | ||
6111 | unsigned long last_index; | ||
6112 | unsigned long i; | ||
6113 | struct page *page; | ||
6114 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
6115 | struct file_ra_state *ra; | ||
6116 | struct btrfs_ordered_extent *ordered; | ||
6117 | unsigned int total_read = 0; | ||
6118 | unsigned int total_dirty = 0; | ||
6119 | int ret = 0; | ||
6120 | |||
6121 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
6122 | |||
6123 | mutex_lock(&inode->i_mutex); | ||
6124 | first_index = start >> PAGE_CACHE_SHIFT; | ||
6125 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
6126 | |||
6127 | /* make sure the dirty trick played by the caller work */ | ||
6128 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
6129 | first_index, last_index); | ||
6130 | if (ret) | ||
6131 | goto out_unlock; | ||
6132 | |||
6133 | file_ra_state_init(ra, inode->i_mapping); | ||
6134 | |||
6135 | for (i = first_index ; i <= last_index; i++) { | ||
6136 | if (total_read % ra->ra_pages == 0) { | ||
6137 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | ||
6138 | calc_ra(i, last_index, ra->ra_pages)); | ||
6139 | } | ||
6140 | total_read++; | ||
6141 | again: | ||
6142 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
6143 | BUG_ON(1); | ||
6144 | page = grab_cache_page(inode->i_mapping, i); | ||
6145 | if (!page) { | ||
6146 | ret = -ENOMEM; | ||
6147 | goto out_unlock; | ||
6148 | } | ||
6149 | if (!PageUptodate(page)) { | ||
6150 | btrfs_readpage(NULL, page); | ||
6151 | lock_page(page); | ||
6152 | if (!PageUptodate(page)) { | ||
6153 | unlock_page(page); | ||
6154 | page_cache_release(page); | ||
6155 | ret = -EIO; | ||
6156 | goto out_unlock; | ||
6157 | } | ||
6158 | } | ||
6159 | wait_on_page_writeback(page); | ||
6160 | |||
6161 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
6162 | page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
6163 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
6164 | |||
6165 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
6166 | if (ordered) { | ||
6167 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
6168 | unlock_page(page); | ||
6169 | page_cache_release(page); | ||
6170 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
6171 | btrfs_put_ordered_extent(ordered); | ||
6172 | goto again; | ||
6173 | } | ||
6174 | set_page_extent_mapped(page); | ||
6175 | |||
6176 | if (i == first_index) | ||
6177 | set_extent_bits(io_tree, page_start, page_end, | ||
6178 | EXTENT_BOUNDARY, GFP_NOFS); | ||
6179 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
6180 | |||
6181 | set_page_dirty(page); | ||
6182 | total_dirty++; | ||
6183 | |||
6184 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
6185 | unlock_page(page); | ||
6186 | page_cache_release(page); | ||
6187 | } | ||
6188 | |||
6189 | out_unlock: | ||
6190 | kfree(ra); | ||
6191 | mutex_unlock(&inode->i_mutex); | ||
6192 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
6193 | return ret; | ||
6194 | } | ||
6195 | |||
6196 | static noinline int relocate_data_extent(struct inode *reloc_inode, | ||
6197 | struct btrfs_key *extent_key, | ||
6198 | u64 offset) | ||
6199 | { | ||
6200 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
6201 | struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; | ||
6202 | struct extent_map *em; | ||
6203 | u64 start = extent_key->objectid - offset; | ||
6204 | u64 end = start + extent_key->offset - 1; | ||
6205 | |||
6206 | em = alloc_extent_map(GFP_NOFS); | ||
6207 | BUG_ON(!em || IS_ERR(em)); | ||
6208 | |||
6209 | em->start = start; | ||
6210 | em->len = extent_key->offset; | ||
6211 | em->block_len = extent_key->offset; | ||
6212 | em->block_start = extent_key->objectid; | ||
6213 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
6214 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
6215 | |||
6216 | /* setup extent map to cheat btrfs_readpage */ | ||
6217 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | ||
6218 | while (1) { | ||
6219 | int ret; | ||
6220 | write_lock(&em_tree->lock); | ||
6221 | ret = add_extent_mapping(em_tree, em); | ||
6222 | write_unlock(&em_tree->lock); | ||
6223 | if (ret != -EEXIST) { | ||
6224 | free_extent_map(em); | ||
6225 | break; | ||
6226 | } | ||
6227 | btrfs_drop_extent_cache(reloc_inode, start, end, 0); | ||
6228 | } | ||
6229 | unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | ||
6230 | |||
6231 | return relocate_inode_pages(reloc_inode, start, extent_key->offset); | ||
6232 | } | ||
6233 | |||
6234 | struct btrfs_ref_path { | ||
6235 | u64 extent_start; | ||
6236 | u64 nodes[BTRFS_MAX_LEVEL]; | ||
6237 | u64 root_objectid; | ||
6238 | u64 root_generation; | ||
6239 | u64 owner_objectid; | ||
6240 | u32 num_refs; | ||
6241 | int lowest_level; | ||
6242 | int current_level; | ||
6243 | int shared_level; | ||
6244 | |||
6245 | struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; | ||
6246 | u64 new_nodes[BTRFS_MAX_LEVEL]; | ||
6247 | }; | ||
6248 | |||
6249 | struct disk_extent { | ||
6250 | u64 ram_bytes; | ||
6251 | u64 disk_bytenr; | ||
6252 | u64 disk_num_bytes; | ||
6253 | u64 offset; | ||
6254 | u64 num_bytes; | ||
6255 | u8 compression; | ||
6256 | u8 encryption; | ||
6257 | u16 other_encoding; | ||
6258 | }; | ||
6259 | |||
6260 | static int is_cowonly_root(u64 root_objectid) | ||
6261 | { | ||
6262 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || | ||
6263 | root_objectid == BTRFS_EXTENT_TREE_OBJECTID || | ||
6264 | root_objectid == BTRFS_CHUNK_TREE_OBJECTID || | ||
6265 | root_objectid == BTRFS_DEV_TREE_OBJECTID || | ||
6266 | root_objectid == BTRFS_TREE_LOG_OBJECTID || | ||
6267 | root_objectid == BTRFS_CSUM_TREE_OBJECTID) | ||
6268 | return 1; | ||
6269 | return 0; | ||
6270 | } | ||
6271 | |||
6272 | static noinline int __next_ref_path(struct btrfs_trans_handle *trans, | ||
6273 | struct btrfs_root *extent_root, | ||
6274 | struct btrfs_ref_path *ref_path, | ||
6275 | int first_time) | ||
6276 | { | ||
6277 | struct extent_buffer *leaf; | ||
6278 | struct btrfs_path *path; | ||
6279 | struct btrfs_extent_ref *ref; | ||
6280 | struct btrfs_key key; | ||
6281 | struct btrfs_key found_key; | ||
6282 | u64 bytenr; | ||
6283 | u32 nritems; | ||
6284 | int level; | ||
6285 | int ret = 1; | ||
6286 | |||
6287 | path = btrfs_alloc_path(); | ||
6288 | if (!path) | ||
6289 | return -ENOMEM; | ||
6290 | |||
6291 | if (first_time) { | ||
6292 | ref_path->lowest_level = -1; | ||
6293 | ref_path->current_level = -1; | ||
6294 | ref_path->shared_level = -1; | ||
6295 | goto walk_up; | ||
6296 | } | ||
6297 | walk_down: | ||
6298 | level = ref_path->current_level - 1; | ||
6299 | while (level >= -1) { | ||
6300 | u64 parent; | ||
6301 | if (level < ref_path->lowest_level) | ||
6302 | break; | ||
6303 | |||
6304 | if (level >= 0) | ||
6305 | bytenr = ref_path->nodes[level]; | ||
6306 | else | ||
6307 | bytenr = ref_path->extent_start; | ||
6308 | BUG_ON(bytenr == 0); | ||
6309 | |||
6310 | parent = ref_path->nodes[level + 1]; | ||
6311 | ref_path->nodes[level + 1] = 0; | ||
6312 | ref_path->current_level = level; | ||
6313 | BUG_ON(parent == 0); | ||
6314 | |||
6315 | key.objectid = bytenr; | ||
6316 | key.offset = parent + 1; | ||
6317 | key.type = BTRFS_EXTENT_REF_KEY; | ||
6318 | |||
6319 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); | ||
6320 | if (ret < 0) | ||
6321 | goto out; | ||
6322 | BUG_ON(ret == 0); | ||
6323 | |||
6324 | leaf = path->nodes[0]; | ||
6325 | nritems = btrfs_header_nritems(leaf); | ||
6326 | if (path->slots[0] >= nritems) { | ||
6327 | ret = btrfs_next_leaf(extent_root, path); | ||
6328 | if (ret < 0) | ||
6329 | goto out; | ||
6330 | if (ret > 0) | ||
6331 | goto next; | ||
6332 | leaf = path->nodes[0]; | ||
6333 | } | ||
6334 | |||
6335 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
6336 | if (found_key.objectid == bytenr && | ||
6337 | found_key.type == BTRFS_EXTENT_REF_KEY) { | ||
6338 | if (level < ref_path->shared_level) | ||
6339 | ref_path->shared_level = level; | ||
6340 | goto found; | ||
6341 | } | ||
6342 | next: | ||
6343 | level--; | ||
6344 | btrfs_release_path(extent_root, path); | ||
6345 | cond_resched(); | ||
6346 | } | ||
6347 | /* reached lowest level */ | ||
6348 | ret = 1; | ||
6349 | goto out; | ||
6350 | walk_up: | ||
6351 | level = ref_path->current_level; | ||
6352 | while (level < BTRFS_MAX_LEVEL - 1) { | ||
6353 | u64 ref_objectid; | ||
6354 | |||
6355 | if (level >= 0) | ||
6356 | bytenr = ref_path->nodes[level]; | ||
6357 | else | ||
6358 | bytenr = ref_path->extent_start; | ||
6359 | |||
6360 | BUG_ON(bytenr == 0); | ||
6361 | |||
6362 | key.objectid = bytenr; | ||
6363 | key.offset = 0; | ||
6364 | key.type = BTRFS_EXTENT_REF_KEY; | ||
6365 | |||
6366 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); | ||
6367 | if (ret < 0) | ||
6368 | goto out; | ||
6369 | |||
6370 | leaf = path->nodes[0]; | ||
6371 | nritems = btrfs_header_nritems(leaf); | ||
6372 | if (path->slots[0] >= nritems) { | ||
6373 | ret = btrfs_next_leaf(extent_root, path); | ||
6374 | if (ret < 0) | ||
6375 | goto out; | ||
6376 | if (ret > 0) { | ||
6377 | /* the extent was freed by someone */ | ||
6378 | if (ref_path->lowest_level == level) | ||
6379 | goto out; | ||
6380 | btrfs_release_path(extent_root, path); | ||
6381 | goto walk_down; | ||
6382 | } | ||
6383 | leaf = path->nodes[0]; | ||
6384 | } | ||
6385 | |||
6386 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
6387 | if (found_key.objectid != bytenr || | ||
6388 | found_key.type != BTRFS_EXTENT_REF_KEY) { | ||
6389 | /* the extent was freed by someone */ | ||
6390 | if (ref_path->lowest_level == level) { | ||
6391 | ret = 1; | ||
6392 | goto out; | ||
6393 | } | ||
6394 | btrfs_release_path(extent_root, path); | ||
6395 | goto walk_down; | ||
6396 | } | ||
6397 | found: | ||
6398 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
6399 | struct btrfs_extent_ref); | ||
6400 | ref_objectid = btrfs_ref_objectid(leaf, ref); | ||
6401 | if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
6402 | if (first_time) { | ||
6403 | level = (int)ref_objectid; | ||
6404 | BUG_ON(level >= BTRFS_MAX_LEVEL); | ||
6405 | ref_path->lowest_level = level; | ||
6406 | ref_path->current_level = level; | ||
6407 | ref_path->nodes[level] = bytenr; | ||
6408 | } else { | ||
6409 | WARN_ON(ref_objectid != level); | ||
6410 | } | ||
6411 | } else { | ||
6412 | WARN_ON(level != -1); | ||
6413 | } | ||
6414 | first_time = 0; | ||
6415 | |||
6416 | if (ref_path->lowest_level == level) { | ||
6417 | ref_path->owner_objectid = ref_objectid; | ||
6418 | ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); | ||
6419 | } | ||
6420 | |||
6421 | /* | ||
6422 | * the block is tree root or the block isn't in reference | ||
6423 | * counted tree. | ||
6424 | */ | ||
6425 | if (found_key.objectid == found_key.offset || | ||
6426 | is_cowonly_root(btrfs_ref_root(leaf, ref))) { | ||
6427 | ref_path->root_objectid = btrfs_ref_root(leaf, ref); | ||
6428 | ref_path->root_generation = | ||
6429 | btrfs_ref_generation(leaf, ref); | ||
6430 | if (level < 0) { | ||
6431 | /* special reference from the tree log */ | ||
6432 | ref_path->nodes[0] = found_key.offset; | ||
6433 | ref_path->current_level = 0; | ||
6434 | } | ||
6435 | ret = 0; | ||
6436 | goto out; | ||
6437 | } | ||
6438 | |||
6439 | level++; | ||
6440 | BUG_ON(ref_path->nodes[level] != 0); | ||
6441 | ref_path->nodes[level] = found_key.offset; | ||
6442 | ref_path->current_level = level; | ||
6443 | |||
6444 | /* | ||
6445 | * the reference was created in the running transaction, | ||
6446 | * no need to continue walking up. | ||
6447 | */ | ||
6448 | if (btrfs_ref_generation(leaf, ref) == trans->transid) { | ||
6449 | ref_path->root_objectid = btrfs_ref_root(leaf, ref); | ||
6450 | ref_path->root_generation = | ||
6451 | btrfs_ref_generation(leaf, ref); | ||
6452 | ret = 0; | ||
6453 | goto out; | ||
6454 | } | ||
6455 | |||
6456 | btrfs_release_path(extent_root, path); | ||
6457 | cond_resched(); | ||
6458 | } | ||
6459 | /* reached max tree level, but no tree root found. */ | ||
6460 | BUG(); | ||
6461 | out: | ||
6462 | btrfs_free_path(path); | ||
6463 | return ret; | ||
6464 | } | ||
6465 | |||
6466 | static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, | ||
6467 | struct btrfs_root *extent_root, | ||
6468 | struct btrfs_ref_path *ref_path, | ||
6469 | u64 extent_start) | ||
6470 | { | ||
6471 | memset(ref_path, 0, sizeof(*ref_path)); | ||
6472 | ref_path->extent_start = extent_start; | ||
6473 | |||
6474 | return __next_ref_path(trans, extent_root, ref_path, 1); | ||
6475 | } | ||
6476 | |||
6477 | static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, | ||
6478 | struct btrfs_root *extent_root, | ||
6479 | struct btrfs_ref_path *ref_path) | ||
6480 | { | ||
6481 | return __next_ref_path(trans, extent_root, ref_path, 0); | ||
6482 | } | ||
6483 | |||
6484 | static noinline int get_new_locations(struct inode *reloc_inode, | ||
6485 | struct btrfs_key *extent_key, | ||
6486 | u64 offset, int no_fragment, | ||
6487 | struct disk_extent **extents, | ||
6488 | int *nr_extents) | ||
6489 | { | ||
6490 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
6491 | struct btrfs_path *path; | ||
6492 | struct btrfs_file_extent_item *fi; | ||
6493 | struct extent_buffer *leaf; | ||
6494 | struct disk_extent *exts = *extents; | ||
6495 | struct btrfs_key found_key; | ||
6496 | u64 cur_pos; | ||
6497 | u64 last_byte; | ||
6498 | u32 nritems; | ||
6499 | int nr = 0; | ||
6500 | int max = *nr_extents; | ||
6501 | int ret; | ||
6502 | |||
6503 | WARN_ON(!no_fragment && *extents); | ||
6504 | if (!exts) { | ||
6505 | max = 1; | ||
6506 | exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); | ||
6507 | if (!exts) | ||
6508 | return -ENOMEM; | ||
6509 | } | ||
6510 | |||
6511 | path = btrfs_alloc_path(); | ||
6512 | BUG_ON(!path); | ||
6513 | |||
6514 | cur_pos = extent_key->objectid - offset; | ||
6515 | last_byte = extent_key->objectid + extent_key->offset; | ||
6516 | ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, | ||
6517 | cur_pos, 0); | ||
6518 | if (ret < 0) | ||
6519 | goto out; | ||
6520 | if (ret > 0) { | ||
6521 | ret = -ENOENT; | ||
6522 | goto out; | ||
6523 | } | ||
6524 | |||
6525 | while (1) { | ||
6526 | leaf = path->nodes[0]; | ||
6527 | nritems = btrfs_header_nritems(leaf); | ||
6528 | if (path->slots[0] >= nritems) { | ||
6529 | ret = btrfs_next_leaf(root, path); | ||
6530 | if (ret < 0) | ||
6531 | goto out; | ||
6532 | if (ret > 0) | ||
6533 | break; | ||
6534 | leaf = path->nodes[0]; | ||
6535 | } | ||
6536 | |||
6537 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
6538 | if (found_key.offset != cur_pos || | ||
6539 | found_key.type != BTRFS_EXTENT_DATA_KEY || | ||
6540 | found_key.objectid != reloc_inode->i_ino) | ||
6541 | break; | ||
6542 | |||
6543 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
6544 | struct btrfs_file_extent_item); | ||
6545 | if (btrfs_file_extent_type(leaf, fi) != | ||
6546 | BTRFS_FILE_EXTENT_REG || | ||
6547 | btrfs_file_extent_disk_bytenr(leaf, fi) == 0) | ||
6548 | break; | ||
6549 | |||
6550 | if (nr == max) { | ||
6551 | struct disk_extent *old = exts; | ||
6552 | max *= 2; | ||
6553 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); | ||
6554 | memcpy(exts, old, sizeof(*exts) * nr); | ||
6555 | if (old != *extents) | ||
6556 | kfree(old); | ||
6557 | } | ||
6558 | |||
6559 | exts[nr].disk_bytenr = | ||
6560 | btrfs_file_extent_disk_bytenr(leaf, fi); | ||
6561 | exts[nr].disk_num_bytes = | ||
6562 | btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
6563 | exts[nr].offset = btrfs_file_extent_offset(leaf, fi); | ||
6564 | exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
6565 | exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | ||
6566 | exts[nr].compression = btrfs_file_extent_compression(leaf, fi); | ||
6567 | exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); | ||
6568 | exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, | ||
6569 | fi); | ||
6570 | BUG_ON(exts[nr].offset > 0); | ||
6571 | BUG_ON(exts[nr].compression || exts[nr].encryption); | ||
6572 | BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); | ||
6573 | |||
6574 | cur_pos += exts[nr].num_bytes; | ||
6575 | nr++; | ||
6576 | |||
6577 | if (cur_pos + offset >= last_byte) | ||
6578 | break; | ||
6579 | |||
6580 | if (no_fragment) { | ||
6581 | ret = 1; | ||
6582 | goto out; | ||
6583 | } | ||
6584 | path->slots[0]++; | ||
6585 | } | ||
6586 | |||
6587 | BUG_ON(cur_pos + offset > last_byte); | ||
6588 | if (cur_pos + offset < last_byte) { | ||
6589 | ret = -ENOENT; | ||
6590 | goto out; | ||
6591 | } | ||
6592 | ret = 0; | ||
6593 | out: | ||
6594 | btrfs_free_path(path); | ||
6595 | if (ret) { | ||
6596 | if (exts != *extents) | ||
6597 | kfree(exts); | ||
6598 | } else { | ||
6599 | *extents = exts; | ||
6600 | *nr_extents = nr; | ||
6601 | } | ||
6602 | return ret; | ||
6603 | } | ||
6604 | |||
6605 | static noinline int replace_one_extent(struct btrfs_trans_handle *trans, | ||
6606 | struct btrfs_root *root, | ||
6607 | struct btrfs_path *path, | ||
6608 | struct btrfs_key *extent_key, | ||
6609 | struct btrfs_key *leaf_key, | ||
6610 | struct btrfs_ref_path *ref_path, | ||
6611 | struct disk_extent *new_extents, | ||
6612 | int nr_extents) | ||
6613 | { | ||
6614 | struct extent_buffer *leaf; | ||
6615 | struct btrfs_file_extent_item *fi; | ||
6616 | struct inode *inode = NULL; | ||
6617 | struct btrfs_key key; | ||
6618 | u64 lock_start = 0; | ||
6619 | u64 lock_end = 0; | ||
6620 | u64 num_bytes; | ||
6621 | u64 ext_offset; | ||
6622 | u64 search_end = (u64)-1; | ||
6623 | u32 nritems; | ||
6624 | int nr_scaned = 0; | ||
6625 | int extent_locked = 0; | ||
6626 | int extent_type; | ||
6627 | int ret; | ||
6628 | |||
6629 | memcpy(&key, leaf_key, sizeof(key)); | ||
6630 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | ||
6631 | if (key.objectid < ref_path->owner_objectid || | ||
6632 | (key.objectid == ref_path->owner_objectid && | ||
6633 | key.type < BTRFS_EXTENT_DATA_KEY)) { | ||
6634 | key.objectid = ref_path->owner_objectid; | ||
6635 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
6636 | key.offset = 0; | ||
6637 | } | ||
6638 | } | ||
6639 | |||
6640 | while (1) { | ||
6641 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
6642 | if (ret < 0) | ||
6643 | goto out; | ||
6644 | |||
6645 | leaf = path->nodes[0]; | ||
6646 | nritems = btrfs_header_nritems(leaf); | ||
6647 | next: | ||
6648 | if (extent_locked && ret > 0) { | ||
6649 | /* | ||
6650 | * the file extent item was modified by someone | ||
6651 | * before the extent got locked. | ||
6652 | */ | ||
6653 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
6654 | lock_end, GFP_NOFS); | ||
6655 | extent_locked = 0; | ||
6656 | } | ||
6657 | |||
6658 | if (path->slots[0] >= nritems) { | ||
6659 | if (++nr_scaned > 2) | ||
6660 | break; | ||
6661 | |||
6662 | BUG_ON(extent_locked); | ||
6663 | ret = btrfs_next_leaf(root, path); | ||
6664 | if (ret < 0) | ||
6665 | goto out; | ||
6666 | if (ret > 0) | ||
6667 | break; | ||
6668 | leaf = path->nodes[0]; | ||
6669 | nritems = btrfs_header_nritems(leaf); | ||
6670 | } | ||
6671 | |||
6672 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
6673 | |||
6674 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | ||
6675 | if ((key.objectid > ref_path->owner_objectid) || | ||
6676 | (key.objectid == ref_path->owner_objectid && | ||
6677 | key.type > BTRFS_EXTENT_DATA_KEY) || | ||
6678 | key.offset >= search_end) | ||
6679 | break; | ||
6680 | } | ||
6681 | |||
6682 | if (inode && key.objectid != inode->i_ino) { | ||
6683 | BUG_ON(extent_locked); | ||
6684 | btrfs_release_path(root, path); | ||
6685 | mutex_unlock(&inode->i_mutex); | ||
6686 | iput(inode); | ||
6687 | inode = NULL; | ||
6688 | continue; | ||
6689 | } | ||
6690 | |||
6691 | if (key.type != BTRFS_EXTENT_DATA_KEY) { | ||
6692 | path->slots[0]++; | ||
6693 | ret = 1; | ||
6694 | goto next; | ||
6695 | } | ||
6696 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
6697 | struct btrfs_file_extent_item); | ||
6698 | extent_type = btrfs_file_extent_type(leaf, fi); | ||
6699 | if ((extent_type != BTRFS_FILE_EXTENT_REG && | ||
6700 | extent_type != BTRFS_FILE_EXTENT_PREALLOC) || | ||
6701 | (btrfs_file_extent_disk_bytenr(leaf, fi) != | ||
6702 | extent_key->objectid)) { | ||
6703 | path->slots[0]++; | ||
6704 | ret = 1; | ||
6705 | goto next; | ||
6706 | } | ||
6707 | |||
6708 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
6709 | ext_offset = btrfs_file_extent_offset(leaf, fi); | ||
6710 | |||
6711 | if (search_end == (u64)-1) { | ||
6712 | search_end = key.offset - ext_offset + | ||
6713 | btrfs_file_extent_ram_bytes(leaf, fi); | ||
6714 | } | ||
6715 | |||
6716 | if (!extent_locked) { | ||
6717 | lock_start = key.offset; | ||
6718 | lock_end = lock_start + num_bytes - 1; | ||
6719 | } else { | ||
6720 | if (lock_start > key.offset || | ||
6721 | lock_end + 1 < key.offset + num_bytes) { | ||
6722 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
6723 | lock_start, lock_end, GFP_NOFS); | ||
6724 | extent_locked = 0; | ||
6725 | } | ||
6726 | } | ||
6727 | |||
6728 | if (!inode) { | ||
6729 | btrfs_release_path(root, path); | ||
6730 | |||
6731 | inode = btrfs_iget_locked(root->fs_info->sb, | ||
6732 | key.objectid, root); | ||
6733 | if (inode->i_state & I_NEW) { | ||
6734 | BTRFS_I(inode)->root = root; | ||
6735 | BTRFS_I(inode)->location.objectid = | ||
6736 | key.objectid; | ||
6737 | BTRFS_I(inode)->location.type = | ||
6738 | BTRFS_INODE_ITEM_KEY; | ||
6739 | BTRFS_I(inode)->location.offset = 0; | ||
6740 | btrfs_read_locked_inode(inode); | ||
6741 | unlock_new_inode(inode); | ||
6742 | } | ||
6743 | /* | ||
6744 | * some code call btrfs_commit_transaction while | ||
6745 | * holding the i_mutex, so we can't use mutex_lock | ||
6746 | * here. | ||
6747 | */ | ||
6748 | if (is_bad_inode(inode) || | ||
6749 | !mutex_trylock(&inode->i_mutex)) { | ||
6750 | iput(inode); | ||
6751 | inode = NULL; | ||
6752 | key.offset = (u64)-1; | ||
6753 | goto skip; | ||
6754 | } | ||
6755 | } | ||
6756 | |||
6757 | if (!extent_locked) { | ||
6758 | struct btrfs_ordered_extent *ordered; | ||
6759 | |||
6760 | btrfs_release_path(root, path); | ||
6761 | |||
6762 | lock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
6763 | lock_end, GFP_NOFS); | ||
6764 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
6765 | lock_end); | ||
6766 | if (ordered && | ||
6767 | ordered->file_offset <= lock_end && | ||
6768 | ordered->file_offset + ordered->len > lock_start) { | ||
6769 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
6770 | lock_start, lock_end, GFP_NOFS); | ||
6771 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
6772 | btrfs_put_ordered_extent(ordered); | ||
6773 | key.offset += num_bytes; | ||
6774 | goto skip; | ||
6775 | } | ||
6776 | if (ordered) | ||
6777 | btrfs_put_ordered_extent(ordered); | ||
6778 | |||
6779 | extent_locked = 1; | ||
6780 | continue; | ||
6781 | } | ||
6782 | |||
6783 | if (nr_extents == 1) { | ||
6784 | /* update extent pointer in place */ | ||
6785 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
6786 | new_extents[0].disk_bytenr); | ||
6787 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
6788 | new_extents[0].disk_num_bytes); | ||
6789 | btrfs_mark_buffer_dirty(leaf); | ||
6790 | |||
6791 | btrfs_drop_extent_cache(inode, key.offset, | ||
6792 | key.offset + num_bytes - 1, 0); | ||
6793 | |||
6794 | ret = btrfs_inc_extent_ref(trans, root, | ||
6795 | new_extents[0].disk_bytenr, | ||
6796 | new_extents[0].disk_num_bytes, | ||
6797 | leaf->start, | ||
6798 | root->root_key.objectid, | ||
6799 | trans->transid, | ||
6800 | key.objectid); | ||
6801 | BUG_ON(ret); | ||
6802 | |||
6803 | ret = btrfs_free_extent(trans, root, | ||
6804 | extent_key->objectid, | ||
6805 | extent_key->offset, | ||
6806 | leaf->start, | ||
6807 | btrfs_header_owner(leaf), | ||
6808 | btrfs_header_generation(leaf), | ||
6809 | key.objectid, 0); | ||
6810 | BUG_ON(ret); | ||
6811 | |||
6812 | btrfs_release_path(root, path); | ||
6813 | key.offset += num_bytes; | ||
6814 | } else { | ||
6815 | BUG_ON(1); | ||
6816 | #if 0 | ||
6817 | u64 alloc_hint; | ||
6818 | u64 extent_len; | ||
6819 | int i; | ||
6820 | /* | ||
6821 | * drop old extent pointer at first, then insert the | ||
6822 | * new pointers one bye one | ||
6823 | */ | ||
6824 | btrfs_release_path(root, path); | ||
6825 | ret = btrfs_drop_extents(trans, root, inode, key.offset, | ||
6826 | key.offset + num_bytes, | ||
6827 | key.offset, &alloc_hint); | ||
6828 | BUG_ON(ret); | ||
6829 | |||
6830 | for (i = 0; i < nr_extents; i++) { | ||
6831 | if (ext_offset >= new_extents[i].num_bytes) { | ||
6832 | ext_offset -= new_extents[i].num_bytes; | ||
6833 | continue; | ||
6834 | } | ||
6835 | extent_len = min(new_extents[i].num_bytes - | ||
6836 | ext_offset, num_bytes); | ||
6837 | |||
6838 | ret = btrfs_insert_empty_item(trans, root, | ||
6839 | path, &key, | ||
6840 | sizeof(*fi)); | ||
6841 | BUG_ON(ret); | ||
6842 | |||
6843 | leaf = path->nodes[0]; | ||
6844 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
6845 | struct btrfs_file_extent_item); | ||
6846 | btrfs_set_file_extent_generation(leaf, fi, | ||
6847 | trans->transid); | ||
6848 | btrfs_set_file_extent_type(leaf, fi, | ||
6849 | BTRFS_FILE_EXTENT_REG); | ||
6850 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
6851 | new_extents[i].disk_bytenr); | ||
6852 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
6853 | new_extents[i].disk_num_bytes); | ||
6854 | btrfs_set_file_extent_ram_bytes(leaf, fi, | ||
6855 | new_extents[i].ram_bytes); | ||
6856 | |||
6857 | btrfs_set_file_extent_compression(leaf, fi, | ||
6858 | new_extents[i].compression); | ||
6859 | btrfs_set_file_extent_encryption(leaf, fi, | ||
6860 | new_extents[i].encryption); | ||
6861 | btrfs_set_file_extent_other_encoding(leaf, fi, | ||
6862 | new_extents[i].other_encoding); | ||
6863 | |||
6864 | btrfs_set_file_extent_num_bytes(leaf, fi, | ||
6865 | extent_len); | ||
6866 | ext_offset += new_extents[i].offset; | ||
6867 | btrfs_set_file_extent_offset(leaf, fi, | ||
6868 | ext_offset); | ||
6869 | btrfs_mark_buffer_dirty(leaf); | ||
6870 | |||
6871 | btrfs_drop_extent_cache(inode, key.offset, | ||
6872 | key.offset + extent_len - 1, 0); | ||
6873 | |||
6874 | ret = btrfs_inc_extent_ref(trans, root, | ||
6875 | new_extents[i].disk_bytenr, | ||
6876 | new_extents[i].disk_num_bytes, | ||
6877 | leaf->start, | ||
6878 | root->root_key.objectid, | ||
6879 | trans->transid, key.objectid); | ||
6880 | BUG_ON(ret); | ||
6881 | btrfs_release_path(root, path); | ||
6882 | |||
6883 | inode_add_bytes(inode, extent_len); | ||
6884 | |||
6885 | ext_offset = 0; | ||
6886 | num_bytes -= extent_len; | ||
6887 | key.offset += extent_len; | ||
6888 | |||
6889 | if (num_bytes == 0) | ||
6890 | break; | ||
6891 | } | ||
6892 | BUG_ON(i >= nr_extents); | ||
6893 | #endif | ||
6894 | } | ||
6895 | |||
6896 | if (extent_locked) { | ||
6897 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
6898 | lock_end, GFP_NOFS); | ||
6899 | extent_locked = 0; | ||
6900 | } | ||
6901 | skip: | ||
6902 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && | ||
6903 | key.offset >= search_end) | ||
6904 | break; | ||
6905 | |||
6906 | cond_resched(); | ||
6907 | } | ||
6908 | ret = 0; | ||
6909 | out: | ||
6910 | btrfs_release_path(root, path); | ||
6911 | if (inode) { | ||
6912 | mutex_unlock(&inode->i_mutex); | ||
6913 | if (extent_locked) { | ||
6914 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
6915 | lock_end, GFP_NOFS); | ||
6916 | } | ||
6917 | iput(inode); | ||
6918 | } | ||
6919 | return ret; | ||
6920 | } | ||
6921 | |||
6922 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | ||
6923 | struct btrfs_root *root, | ||
6924 | struct extent_buffer *buf, u64 orig_start) | ||
6925 | { | ||
6926 | int level; | ||
6927 | int ret; | ||
6928 | |||
6929 | BUG_ON(btrfs_header_generation(buf) != trans->transid); | ||
6930 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | ||
6931 | |||
6932 | level = btrfs_header_level(buf); | ||
6933 | if (level == 0) { | ||
6934 | struct btrfs_leaf_ref *ref; | ||
6935 | struct btrfs_leaf_ref *orig_ref; | ||
6936 | |||
6937 | orig_ref = btrfs_lookup_leaf_ref(root, orig_start); | ||
6938 | if (!orig_ref) | ||
6939 | return -ENOENT; | ||
6940 | |||
6941 | ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); | ||
6942 | if (!ref) { | ||
6943 | btrfs_free_leaf_ref(root, orig_ref); | ||
6944 | return -ENOMEM; | ||
6945 | } | ||
6946 | |||
6947 | ref->nritems = orig_ref->nritems; | ||
6948 | memcpy(ref->extents, orig_ref->extents, | ||
6949 | sizeof(ref->extents[0]) * ref->nritems); | ||
6950 | |||
6951 | btrfs_free_leaf_ref(root, orig_ref); | ||
6952 | |||
6953 | ref->root_gen = trans->transid; | ||
6954 | ref->bytenr = buf->start; | ||
6955 | ref->owner = btrfs_header_owner(buf); | ||
6956 | ref->generation = btrfs_header_generation(buf); | ||
6957 | |||
6958 | ret = btrfs_add_leaf_ref(root, ref, 0); | ||
6959 | WARN_ON(ret); | ||
6960 | btrfs_free_leaf_ref(root, ref); | ||
6961 | } | ||
6962 | return 0; | ||
6963 | } | ||
6964 | |||
6965 | static noinline int invalidate_extent_cache(struct btrfs_root *root, | ||
6966 | struct extent_buffer *leaf, | ||
6967 | struct btrfs_block_group_cache *group, | ||
6968 | struct btrfs_root *target_root) | ||
6969 | { | ||
6970 | struct btrfs_key key; | ||
6971 | struct inode *inode = NULL; | ||
6972 | struct btrfs_file_extent_item *fi; | ||
6973 | struct extent_state *cached_state = NULL; | ||
6974 | u64 num_bytes; | ||
6975 | u64 skip_objectid = 0; | ||
6976 | u32 nritems; | ||
6977 | u32 i; | ||
6978 | |||
6979 | nritems = btrfs_header_nritems(leaf); | ||
6980 | for (i = 0; i < nritems; i++) { | ||
6981 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
6982 | if (key.objectid == skip_objectid || | ||
6983 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
6984 | continue; | ||
6985 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
6986 | if (btrfs_file_extent_type(leaf, fi) == | ||
6987 | BTRFS_FILE_EXTENT_INLINE) | ||
6988 | continue; | ||
6989 | if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) | ||
6990 | continue; | ||
6991 | if (!inode || inode->i_ino != key.objectid) { | ||
6992 | iput(inode); | ||
6993 | inode = btrfs_ilookup(target_root->fs_info->sb, | ||
6994 | key.objectid, target_root, 1); | ||
6995 | } | ||
6996 | if (!inode) { | ||
6997 | skip_objectid = key.objectid; | ||
6998 | continue; | ||
6999 | } | ||
7000 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
7001 | |||
7002 | lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset, | ||
7003 | key.offset + num_bytes - 1, 0, &cached_state, | ||
7004 | GFP_NOFS); | ||
7005 | btrfs_drop_extent_cache(inode, key.offset, | ||
7006 | key.offset + num_bytes - 1, 1); | ||
7007 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset, | ||
7008 | key.offset + num_bytes - 1, &cached_state, | ||
7009 | GFP_NOFS); | ||
7010 | cond_resched(); | ||
7011 | } | ||
7012 | iput(inode); | ||
7013 | return 0; | ||
7014 | } | ||
7015 | |||
7016 | static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, | ||
7017 | struct btrfs_root *root, | ||
7018 | struct extent_buffer *leaf, | ||
7019 | struct btrfs_block_group_cache *group, | ||
7020 | struct inode *reloc_inode) | ||
7021 | { | ||
7022 | struct btrfs_key key; | ||
7023 | struct btrfs_key extent_key; | ||
7024 | struct btrfs_file_extent_item *fi; | ||
7025 | struct btrfs_leaf_ref *ref; | ||
7026 | struct disk_extent *new_extent; | ||
7027 | u64 bytenr; | ||
7028 | u64 num_bytes; | ||
7029 | u32 nritems; | ||
7030 | u32 i; | ||
7031 | int ext_index; | ||
7032 | int nr_extent; | ||
7033 | int ret; | ||
7034 | |||
7035 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); | ||
7036 | BUG_ON(!new_extent); | ||
7037 | |||
7038 | ref = btrfs_lookup_leaf_ref(root, leaf->start); | ||
7039 | BUG_ON(!ref); | ||
7040 | |||
7041 | ext_index = -1; | ||
7042 | nritems = btrfs_header_nritems(leaf); | ||
7043 | for (i = 0; i < nritems; i++) { | ||
7044 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
7045 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
7046 | continue; | ||
7047 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
7048 | if (btrfs_file_extent_type(leaf, fi) == | ||
7049 | BTRFS_FILE_EXTENT_INLINE) | ||
7050 | continue; | ||
7051 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
7052 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
7053 | if (bytenr == 0) | ||
7054 | continue; | ||
7055 | |||
7056 | ext_index++; | ||
7057 | if (bytenr >= group->key.objectid + group->key.offset || | ||
7058 | bytenr + num_bytes <= group->key.objectid) | ||
7059 | continue; | ||
7060 | |||
7061 | extent_key.objectid = bytenr; | ||
7062 | extent_key.offset = num_bytes; | ||
7063 | extent_key.type = BTRFS_EXTENT_ITEM_KEY; | ||
7064 | nr_extent = 1; | ||
7065 | ret = get_new_locations(reloc_inode, &extent_key, | ||
7066 | group->key.objectid, 1, | ||
7067 | &new_extent, &nr_extent); | ||
7068 | if (ret > 0) | ||
7069 | continue; | ||
7070 | BUG_ON(ret < 0); | ||
7071 | |||
7072 | BUG_ON(ref->extents[ext_index].bytenr != bytenr); | ||
7073 | BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); | ||
7074 | ref->extents[ext_index].bytenr = new_extent->disk_bytenr; | ||
7075 | ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; | ||
7076 | |||
7077 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
7078 | new_extent->disk_bytenr); | ||
7079 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
7080 | new_extent->disk_num_bytes); | ||
7081 | btrfs_mark_buffer_dirty(leaf); | ||
7082 | |||
7083 | ret = btrfs_inc_extent_ref(trans, root, | ||
7084 | new_extent->disk_bytenr, | ||
7085 | new_extent->disk_num_bytes, | ||
7086 | leaf->start, | ||
7087 | root->root_key.objectid, | ||
7088 | trans->transid, key.objectid); | ||
7089 | BUG_ON(ret); | ||
7090 | |||
7091 | ret = btrfs_free_extent(trans, root, | ||
7092 | bytenr, num_bytes, leaf->start, | ||
7093 | btrfs_header_owner(leaf), | ||
7094 | btrfs_header_generation(leaf), | ||
7095 | key.objectid, 0); | ||
7096 | BUG_ON(ret); | ||
7097 | cond_resched(); | ||
7098 | } | ||
7099 | kfree(new_extent); | ||
7100 | BUG_ON(ext_index + 1 != ref->nritems); | ||
7101 | btrfs_free_leaf_ref(root, ref); | ||
7102 | return 0; | ||
7103 | } | ||
7104 | |||
7105 | int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, | ||
7106 | struct btrfs_root *root) | ||
7107 | { | ||
7108 | struct btrfs_root *reloc_root; | ||
7109 | int ret; | ||
7110 | |||
7111 | if (root->reloc_root) { | ||
7112 | reloc_root = root->reloc_root; | ||
7113 | root->reloc_root = NULL; | ||
7114 | list_add(&reloc_root->dead_list, | ||
7115 | &root->fs_info->dead_reloc_roots); | ||
7116 | |||
7117 | btrfs_set_root_bytenr(&reloc_root->root_item, | ||
7118 | reloc_root->node->start); | ||
7119 | btrfs_set_root_level(&root->root_item, | ||
7120 | btrfs_header_level(reloc_root->node)); | ||
7121 | memset(&reloc_root->root_item.drop_progress, 0, | ||
7122 | sizeof(struct btrfs_disk_key)); | ||
7123 | reloc_root->root_item.drop_level = 0; | ||
7124 | |||
7125 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
7126 | &reloc_root->root_key, | ||
7127 | &reloc_root->root_item); | ||
7128 | BUG_ON(ret); | ||
7129 | } | ||
7130 | return 0; | ||
7131 | } | ||
7132 | |||
7133 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) | ||
7134 | { | ||
7135 | struct btrfs_trans_handle *trans; | ||
7136 | struct btrfs_root *reloc_root; | ||
7137 | struct btrfs_root *prev_root = NULL; | ||
7138 | struct list_head dead_roots; | ||
7139 | int ret; | ||
7140 | unsigned long nr; | ||
7141 | |||
7142 | INIT_LIST_HEAD(&dead_roots); | ||
7143 | list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); | ||
7144 | |||
7145 | while (!list_empty(&dead_roots)) { | ||
7146 | reloc_root = list_entry(dead_roots.prev, | ||
7147 | struct btrfs_root, dead_list); | ||
7148 | list_del_init(&reloc_root->dead_list); | ||
7149 | |||
7150 | BUG_ON(reloc_root->commit_root != NULL); | ||
7151 | while (1) { | ||
7152 | trans = btrfs_join_transaction(root, 1); | ||
7153 | BUG_ON(!trans); | ||
7154 | |||
7155 | mutex_lock(&root->fs_info->drop_mutex); | ||
7156 | ret = btrfs_drop_snapshot(trans, reloc_root); | ||
7157 | if (ret != -EAGAIN) | ||
7158 | break; | ||
7159 | mutex_unlock(&root->fs_info->drop_mutex); | ||
7160 | |||
7161 | nr = trans->blocks_used; | ||
7162 | ret = btrfs_end_transaction(trans, root); | ||
7163 | BUG_ON(ret); | ||
7164 | btrfs_btree_balance_dirty(root, nr); | ||
7165 | } | ||
7166 | |||
7167 | free_extent_buffer(reloc_root->node); | ||
7168 | |||
7169 | ret = btrfs_del_root(trans, root->fs_info->tree_root, | ||
7170 | &reloc_root->root_key); | ||
7171 | BUG_ON(ret); | ||
7172 | mutex_unlock(&root->fs_info->drop_mutex); | ||
7173 | |||
7174 | nr = trans->blocks_used; | ||
7175 | ret = btrfs_end_transaction(trans, root); | ||
7176 | BUG_ON(ret); | ||
7177 | btrfs_btree_balance_dirty(root, nr); | ||
7178 | |||
7179 | kfree(prev_root); | ||
7180 | prev_root = reloc_root; | ||
7181 | } | ||
7182 | if (prev_root) { | ||
7183 | btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); | ||
7184 | kfree(prev_root); | ||
7185 | } | ||
7186 | return 0; | ||
7187 | } | ||
7188 | |||
7189 | int btrfs_add_dead_reloc_root(struct btrfs_root *root) | ||
7190 | { | ||
7191 | list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); | ||
7192 | return 0; | ||
7193 | } | ||
7194 | |||
7195 | int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | ||
7196 | { | ||
7197 | struct btrfs_root *reloc_root; | ||
7198 | struct btrfs_trans_handle *trans; | ||
7199 | struct btrfs_key location; | ||
7200 | int found; | ||
7201 | int ret; | ||
7202 | |||
7203 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
7204 | ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); | ||
7205 | BUG_ON(ret); | ||
7206 | found = !list_empty(&root->fs_info->dead_reloc_roots); | ||
7207 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
7208 | |||
7209 | if (found) { | ||
7210 | trans = btrfs_start_transaction(root, 1); | ||
7211 | BUG_ON(!trans); | ||
7212 | ret = btrfs_commit_transaction(trans, root); | ||
7213 | BUG_ON(ret); | ||
7214 | } | ||
7215 | |||
7216 | location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | ||
7217 | location.offset = (u64)-1; | ||
7218 | location.type = BTRFS_ROOT_ITEM_KEY; | ||
7219 | |||
7220 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | ||
7221 | BUG_ON(!reloc_root); | ||
7222 | btrfs_orphan_cleanup(reloc_root); | ||
7223 | return 0; | ||
7224 | } | ||
7225 | |||
7226 | static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, | ||
7227 | struct btrfs_root *root) | ||
7228 | { | ||
7229 | struct btrfs_root *reloc_root; | ||
7230 | struct extent_buffer *eb; | ||
7231 | struct btrfs_root_item *root_item; | ||
7232 | struct btrfs_key root_key; | ||
7233 | int ret; | ||
7234 | |||
7235 | BUG_ON(!root->ref_cows); | ||
7236 | if (root->reloc_root) | ||
7237 | return 0; | ||
7238 | |||
7239 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | ||
7240 | BUG_ON(!root_item); | ||
7241 | |||
7242 | ret = btrfs_copy_root(trans, root, root->commit_root, | ||
7243 | &eb, BTRFS_TREE_RELOC_OBJECTID); | ||
7244 | BUG_ON(ret); | ||
7245 | |||
7246 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | ||
7247 | root_key.offset = root->root_key.objectid; | ||
7248 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
7249 | |||
7250 | memcpy(root_item, &root->root_item, sizeof(root_item)); | ||
7251 | btrfs_set_root_refs(root_item, 0); | ||
7252 | btrfs_set_root_bytenr(root_item, eb->start); | ||
7253 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | ||
7254 | btrfs_set_root_generation(root_item, trans->transid); | ||
7255 | |||
7256 | btrfs_tree_unlock(eb); | ||
7257 | free_extent_buffer(eb); | ||
7258 | |||
7259 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, | ||
7260 | &root_key, root_item); | ||
7261 | BUG_ON(ret); | ||
7262 | kfree(root_item); | ||
7263 | |||
7264 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | ||
7265 | &root_key); | ||
7266 | BUG_ON(!reloc_root); | ||
7267 | reloc_root->last_trans = trans->transid; | ||
7268 | reloc_root->commit_root = NULL; | ||
7269 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; | ||
7270 | |||
7271 | root->reloc_root = reloc_root; | ||
7272 | return 0; | ||
7273 | } | ||
7274 | |||
7275 | /* | ||
7276 | * Core function of space balance. | ||
7277 | * | ||
7278 | * The idea is using reloc trees to relocate tree blocks in reference | ||
7279 | * counted roots. There is one reloc tree for each subvol, and all | ||
7280 | * reloc trees share same root key objectid. Reloc trees are snapshots | ||
7281 | * of the latest committed roots of subvols (root->commit_root). | ||
7282 | * | ||
7283 | * To relocate a tree block referenced by a subvol, there are two steps. | ||
7284 | * COW the block through subvol's reloc tree, then update block pointer | ||
7285 | * in the subvol to point to the new block. Since all reloc trees share | ||
7286 | * same root key objectid, doing special handing for tree blocks owned | ||
7287 | * by them is easy. Once a tree block has been COWed in one reloc tree, | ||
7288 | * we can use the resulting new block directly when the same block is | ||
7289 | * required to COW again through other reloc trees. By this way, relocated | ||
7290 | * tree blocks are shared between reloc trees, so they are also shared | ||
7291 | * between subvols. | ||
7292 | */ | ||
7293 | static noinline int relocate_one_path(struct btrfs_trans_handle *trans, | ||
7294 | struct btrfs_root *root, | ||
7295 | struct btrfs_path *path, | ||
7296 | struct btrfs_key *first_key, | ||
7297 | struct btrfs_ref_path *ref_path, | ||
7298 | struct btrfs_block_group_cache *group, | ||
7299 | struct inode *reloc_inode) | ||
7300 | { | ||
7301 | struct btrfs_root *reloc_root; | ||
7302 | struct extent_buffer *eb = NULL; | ||
7303 | struct btrfs_key *keys; | ||
7304 | u64 *nodes; | ||
7305 | int level; | ||
7306 | int shared_level; | ||
7307 | int lowest_level = 0; | ||
7308 | int ret; | ||
7309 | |||
7310 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
7311 | lowest_level = ref_path->owner_objectid; | ||
7312 | |||
7313 | if (!root->ref_cows) { | ||
7314 | path->lowest_level = lowest_level; | ||
7315 | ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); | ||
7316 | BUG_ON(ret < 0); | ||
7317 | path->lowest_level = 0; | ||
7318 | btrfs_release_path(root, path); | ||
7319 | return 0; | ||
7320 | } | ||
7321 | |||
7322 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
7323 | ret = init_reloc_tree(trans, root); | ||
7324 | BUG_ON(ret); | ||
7325 | reloc_root = root->reloc_root; | ||
7326 | |||
7327 | shared_level = ref_path->shared_level; | ||
7328 | ref_path->shared_level = BTRFS_MAX_LEVEL - 1; | ||
7329 | |||
7330 | keys = ref_path->node_keys; | ||
7331 | nodes = ref_path->new_nodes; | ||
7332 | memset(&keys[shared_level + 1], 0, | ||
7333 | sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); | ||
7334 | memset(&nodes[shared_level + 1], 0, | ||
7335 | sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); | ||
7336 | |||
7337 | if (nodes[lowest_level] == 0) { | ||
7338 | path->lowest_level = lowest_level; | ||
7339 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, | ||
7340 | 0, 1); | ||
7341 | BUG_ON(ret); | ||
7342 | for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { | ||
7343 | eb = path->nodes[level]; | ||
7344 | if (!eb || eb == reloc_root->node) | ||
7345 | break; | ||
7346 | nodes[level] = eb->start; | ||
7347 | if (level == 0) | ||
7348 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
7349 | else | ||
7350 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
7351 | } | ||
7352 | if (nodes[0] && | ||
7353 | ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
7354 | eb = path->nodes[0]; | ||
7355 | ret = replace_extents_in_leaf(trans, reloc_root, eb, | ||
7356 | group, reloc_inode); | ||
7357 | BUG_ON(ret); | ||
7358 | } | ||
7359 | btrfs_release_path(reloc_root, path); | ||
7360 | } else { | ||
7361 | ret = btrfs_merge_path(trans, reloc_root, keys, nodes, | ||
7362 | lowest_level); | ||
7363 | BUG_ON(ret); | ||
7364 | } | ||
7365 | |||
7366 | /* | ||
7367 | * replace tree blocks in the fs tree with tree blocks in | ||
7368 | * the reloc tree. | ||
7369 | */ | ||
7370 | ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); | ||
7371 | BUG_ON(ret < 0); | ||
7372 | |||
7373 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
7374 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, | ||
7375 | 0, 0); | ||
7376 | BUG_ON(ret); | ||
7377 | extent_buffer_get(path->nodes[0]); | ||
7378 | eb = path->nodes[0]; | ||
7379 | btrfs_release_path(reloc_root, path); | ||
7380 | ret = invalidate_extent_cache(reloc_root, eb, group, root); | ||
7381 | BUG_ON(ret); | ||
7382 | free_extent_buffer(eb); | ||
7383 | } | ||
7384 | |||
7385 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
7386 | path->lowest_level = 0; | ||
7387 | return 0; | ||
7388 | } | ||
7389 | |||
7390 | static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, | ||
7391 | struct btrfs_root *root, | ||
7392 | struct btrfs_path *path, | ||
7393 | struct btrfs_key *first_key, | ||
7394 | struct btrfs_ref_path *ref_path) | ||
7395 | { | ||
7396 | int ret; | ||
7397 | |||
7398 | ret = relocate_one_path(trans, root, path, first_key, | ||
7399 | ref_path, NULL, NULL); | ||
7400 | BUG_ON(ret); | ||
7401 | |||
7402 | return 0; | ||
7403 | } | ||
7404 | |||
7405 | static noinline int del_extent_zero(struct btrfs_trans_handle *trans, | ||
7406 | struct btrfs_root *extent_root, | ||
7407 | struct btrfs_path *path, | ||
7408 | struct btrfs_key *extent_key) | ||
7409 | { | ||
7410 | int ret; | ||
7411 | |||
7412 | ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); | ||
7413 | if (ret) | ||
7414 | goto out; | ||
7415 | ret = btrfs_del_item(trans, extent_root, path); | ||
7416 | out: | ||
7417 | btrfs_release_path(extent_root, path); | ||
7418 | return ret; | ||
7419 | } | ||
7420 | |||
7421 | static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, | ||
7422 | struct btrfs_ref_path *ref_path) | ||
7423 | { | ||
7424 | struct btrfs_key root_key; | ||
7425 | |||
7426 | root_key.objectid = ref_path->root_objectid; | ||
7427 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
7428 | if (is_cowonly_root(ref_path->root_objectid)) | ||
7429 | root_key.offset = 0; | ||
7430 | else | ||
7431 | root_key.offset = (u64)-1; | ||
7432 | |||
7433 | return btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
7434 | } | ||
7435 | |||
7436 | static noinline int relocate_one_extent(struct btrfs_root *extent_root, | ||
7437 | struct btrfs_path *path, | ||
7438 | struct btrfs_key *extent_key, | ||
7439 | struct btrfs_block_group_cache *group, | ||
7440 | struct inode *reloc_inode, int pass) | ||
7441 | { | ||
7442 | struct btrfs_trans_handle *trans; | ||
7443 | struct btrfs_root *found_root; | ||
7444 | struct btrfs_ref_path *ref_path = NULL; | ||
7445 | struct disk_extent *new_extents = NULL; | ||
7446 | int nr_extents = 0; | ||
7447 | int loops; | ||
7448 | int ret; | ||
7449 | int level; | ||
7450 | struct btrfs_key first_key; | ||
7451 | u64 prev_block = 0; | ||
7452 | |||
7453 | |||
7454 | trans = btrfs_start_transaction(extent_root, 1); | ||
7455 | BUG_ON(!trans); | ||
7456 | |||
7457 | if (extent_key->objectid == 0) { | ||
7458 | ret = del_extent_zero(trans, extent_root, path, extent_key); | ||
7459 | goto out; | ||
7460 | } | ||
7461 | |||
7462 | ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); | ||
7463 | if (!ref_path) { | ||
7464 | ret = -ENOMEM; | ||
7465 | goto out; | ||
7466 | } | ||
7467 | |||
7468 | for (loops = 0; ; loops++) { | ||
7469 | if (loops == 0) { | ||
7470 | ret = btrfs_first_ref_path(trans, extent_root, ref_path, | ||
7471 | extent_key->objectid); | ||
7472 | } else { | ||
7473 | ret = btrfs_next_ref_path(trans, extent_root, ref_path); | ||
7474 | } | ||
7475 | if (ret < 0) | ||
7476 | goto out; | ||
7477 | if (ret > 0) | ||
7478 | break; | ||
7479 | |||
7480 | if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || | ||
7481 | ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
7482 | continue; | ||
7483 | |||
7484 | found_root = read_ref_root(extent_root->fs_info, ref_path); | ||
7485 | BUG_ON(!found_root); | ||
7486 | /* | ||
7487 | * for reference counted tree, only process reference paths | ||
7488 | * rooted at the latest committed root. | ||
7489 | */ | ||
7490 | if (found_root->ref_cows && | ||
7491 | ref_path->root_generation != found_root->root_key.offset) | ||
7492 | continue; | ||
7493 | |||
7494 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
7495 | if (pass == 0) { | ||
7496 | /* | ||
7497 | * copy data extents to new locations | ||
7498 | */ | ||
7499 | u64 group_start = group->key.objectid; | ||
7500 | ret = relocate_data_extent(reloc_inode, | ||
7501 | extent_key, | ||
7502 | group_start); | ||
7503 | if (ret < 0) | ||
7504 | goto out; | ||
7505 | break; | ||
7506 | } | ||
7507 | level = 0; | ||
7508 | } else { | ||
7509 | level = ref_path->owner_objectid; | ||
7510 | } | ||
7511 | |||
7512 | if (prev_block != ref_path->nodes[level]) { | ||
7513 | struct extent_buffer *eb; | ||
7514 | u64 block_start = ref_path->nodes[level]; | ||
7515 | u64 block_size = btrfs_level_size(found_root, level); | ||
7516 | |||
7517 | eb = read_tree_block(found_root, block_start, | ||
7518 | block_size, 0); | ||
7519 | btrfs_tree_lock(eb); | ||
7520 | BUG_ON(level != btrfs_header_level(eb)); | ||
7521 | |||
7522 | if (level == 0) | ||
7523 | btrfs_item_key_to_cpu(eb, &first_key, 0); | ||
7524 | else | ||
7525 | btrfs_node_key_to_cpu(eb, &first_key, 0); | ||
7526 | |||
7527 | btrfs_tree_unlock(eb); | ||
7528 | free_extent_buffer(eb); | ||
7529 | prev_block = block_start; | ||
7530 | } | ||
7531 | |||
7532 | mutex_lock(&extent_root->fs_info->trans_mutex); | ||
7533 | btrfs_record_root_in_trans(found_root); | ||
7534 | mutex_unlock(&extent_root->fs_info->trans_mutex); | ||
7535 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
7536 | /* | ||
7537 | * try to update data extent references while | ||
7538 | * keeping metadata shared between snapshots. | ||
7539 | */ | ||
7540 | if (pass == 1) { | ||
7541 | ret = relocate_one_path(trans, found_root, | ||
7542 | path, &first_key, ref_path, | ||
7543 | group, reloc_inode); | ||
7544 | if (ret < 0) | ||
7545 | goto out; | ||
7546 | continue; | ||
7547 | } | ||
7548 | /* | ||
7549 | * use fallback method to process the remaining | ||
7550 | * references. | ||
7551 | */ | ||
7552 | if (!new_extents) { | ||
7553 | u64 group_start = group->key.objectid; | ||
7554 | new_extents = kmalloc(sizeof(*new_extents), | ||
7555 | GFP_NOFS); | ||
7556 | nr_extents = 1; | ||
7557 | ret = get_new_locations(reloc_inode, | ||
7558 | extent_key, | ||
7559 | group_start, 1, | ||
7560 | &new_extents, | ||
7561 | &nr_extents); | ||
7562 | if (ret) | ||
7563 | goto out; | ||
7564 | } | ||
7565 | ret = replace_one_extent(trans, found_root, | ||
7566 | path, extent_key, | ||
7567 | &first_key, ref_path, | ||
7568 | new_extents, nr_extents); | ||
7569 | } else { | ||
7570 | ret = relocate_tree_block(trans, found_root, path, | ||
7571 | &first_key, ref_path); | ||
7572 | } | ||
7573 | if (ret < 0) | ||
7574 | goto out; | ||
7575 | } | ||
7576 | ret = 0; | ||
7577 | out: | ||
7578 | btrfs_end_transaction(trans, extent_root); | ||
7579 | kfree(new_extents); | ||
7580 | kfree(ref_path); | ||
7581 | return ret; | ||
7582 | } | ||
7583 | #endif | ||
7584 | |||
7585 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | 6482 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) |
7586 | { | 6483 | { |
7587 | u64 num_devices; | 6484 | u64 num_devices; |
7588 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | 6485 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | |
7589 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 6486 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
7590 | 6487 | ||
7591 | num_devices = root->fs_info->fs_devices->rw_devices; | 6488 | /* |
6489 | * we add in the count of missing devices because we want | ||
6490 | * to make sure that any RAID levels on a degraded FS | ||
6491 | * continue to be honored. | ||
6492 | */ | ||
6493 | num_devices = root->fs_info->fs_devices->rw_devices + | ||
6494 | root->fs_info->fs_devices->missing_devices; | ||
6495 | |||
7592 | if (num_devices == 1) { | 6496 | if (num_devices == 1) { |
7593 | stripped |= BTRFS_BLOCK_GROUP_DUP; | 6497 | stripped |= BTRFS_BLOCK_GROUP_DUP; |
7594 | stripped = flags & ~stripped; | 6498 | stripped = flags & ~stripped; |
@@ -7636,13 +6540,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
7636 | 6540 | ||
7637 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6541 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
7638 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6542 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
7639 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | 6543 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { |
7640 | sinfo->bytes_readonly += num_bytes; | 6544 | sinfo->bytes_readonly += num_bytes; |
7641 | sinfo->bytes_reserved += cache->reserved_pinned; | 6545 | sinfo->bytes_reserved += cache->reserved_pinned; |
7642 | cache->reserved_pinned = 0; | 6546 | cache->reserved_pinned = 0; |
7643 | cache->ro = 1; | 6547 | cache->ro = 1; |
7644 | ret = 0; | 6548 | ret = 0; |
7645 | } | 6549 | } |
6550 | |||
7646 | spin_unlock(&cache->lock); | 6551 | spin_unlock(&cache->lock); |
7647 | spin_unlock(&sinfo->lock); | 6552 | spin_unlock(&sinfo->lock); |
7648 | return ret; | 6553 | return ret; |
@@ -7658,18 +6563,20 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
7658 | 6563 | ||
7659 | BUG_ON(cache->ro); | 6564 | BUG_ON(cache->ro); |
7660 | 6565 | ||
7661 | trans = btrfs_join_transaction(root, 1); | 6566 | trans = btrfs_join_transaction(root); |
7662 | BUG_ON(IS_ERR(trans)); | 6567 | BUG_ON(IS_ERR(trans)); |
7663 | 6568 | ||
7664 | alloc_flags = update_block_group_flags(root, cache->flags); | 6569 | alloc_flags = update_block_group_flags(root, cache->flags); |
7665 | if (alloc_flags != cache->flags) | 6570 | if (alloc_flags != cache->flags) |
7666 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 6571 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6572 | CHUNK_ALLOC_FORCE); | ||
7667 | 6573 | ||
7668 | ret = set_block_group_ro(cache); | 6574 | ret = set_block_group_ro(cache); |
7669 | if (!ret) | 6575 | if (!ret) |
7670 | goto out; | 6576 | goto out; |
7671 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 6577 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
7672 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 6578 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6579 | CHUNK_ALLOC_FORCE); | ||
7673 | if (ret < 0) | 6580 | if (ret < 0) |
7674 | goto out; | 6581 | goto out; |
7675 | ret = set_block_group_ro(cache); | 6582 | ret = set_block_group_ro(cache); |
@@ -7678,6 +6585,70 @@ out: | |||
7678 | return ret; | 6585 | return ret; |
7679 | } | 6586 | } |
7680 | 6587 | ||
6588 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | ||
6589 | struct btrfs_root *root, u64 type) | ||
6590 | { | ||
6591 | u64 alloc_flags = get_alloc_profile(root, type); | ||
6592 | return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | ||
6593 | CHUNK_ALLOC_FORCE); | ||
6594 | } | ||
6595 | |||
6596 | /* | ||
6597 | * helper to account the unused space of all the readonly block group in the | ||
6598 | * list. takes mirrors into account. | ||
6599 | */ | ||
6600 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | ||
6601 | { | ||
6602 | struct btrfs_block_group_cache *block_group; | ||
6603 | u64 free_bytes = 0; | ||
6604 | int factor; | ||
6605 | |||
6606 | list_for_each_entry(block_group, groups_list, list) { | ||
6607 | spin_lock(&block_group->lock); | ||
6608 | |||
6609 | if (!block_group->ro) { | ||
6610 | spin_unlock(&block_group->lock); | ||
6611 | continue; | ||
6612 | } | ||
6613 | |||
6614 | if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
6615 | BTRFS_BLOCK_GROUP_RAID10 | | ||
6616 | BTRFS_BLOCK_GROUP_DUP)) | ||
6617 | factor = 2; | ||
6618 | else | ||
6619 | factor = 1; | ||
6620 | |||
6621 | free_bytes += (block_group->key.offset - | ||
6622 | btrfs_block_group_used(&block_group->item)) * | ||
6623 | factor; | ||
6624 | |||
6625 | spin_unlock(&block_group->lock); | ||
6626 | } | ||
6627 | |||
6628 | return free_bytes; | ||
6629 | } | ||
6630 | |||
6631 | /* | ||
6632 | * helper to account the unused space of all the readonly block group in the | ||
6633 | * space_info. takes mirrors into account. | ||
6634 | */ | ||
6635 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
6636 | { | ||
6637 | int i; | ||
6638 | u64 free_bytes = 0; | ||
6639 | |||
6640 | spin_lock(&sinfo->lock); | ||
6641 | |||
6642 | for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
6643 | if (!list_empty(&sinfo->block_groups[i])) | ||
6644 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
6645 | &sinfo->block_groups[i]); | ||
6646 | |||
6647 | spin_unlock(&sinfo->lock); | ||
6648 | |||
6649 | return free_bytes; | ||
6650 | } | ||
6651 | |||
7681 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 6652 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
7682 | struct btrfs_block_group_cache *cache) | 6653 | struct btrfs_block_group_cache *cache) |
7683 | { | 6654 | { |
@@ -7758,7 +6729,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7758 | mutex_lock(&root->fs_info->chunk_mutex); | 6729 | mutex_lock(&root->fs_info->chunk_mutex); |
7759 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 6730 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
7760 | u64 min_free = btrfs_block_group_used(&block_group->item); | 6731 | u64 min_free = btrfs_block_group_used(&block_group->item); |
7761 | u64 dev_offset, max_avail; | 6732 | u64 dev_offset; |
7762 | 6733 | ||
7763 | /* | 6734 | /* |
7764 | * check to make sure we can actually find a chunk with enough | 6735 | * check to make sure we can actually find a chunk with enough |
@@ -7766,7 +6737,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7766 | */ | 6737 | */ |
7767 | if (device->total_bytes > device->bytes_used + min_free) { | 6738 | if (device->total_bytes > device->bytes_used + min_free) { |
7768 | ret = find_free_dev_extent(NULL, device, min_free, | 6739 | ret = find_free_dev_extent(NULL, device, min_free, |
7769 | &dev_offset, &max_avail); | 6740 | &dev_offset, NULL); |
7770 | if (!ret) | 6741 | if (!ret) |
7771 | break; | 6742 | break; |
7772 | ret = -1; | 6743 | ret = -1; |
@@ -7814,6 +6785,40 @@ out: | |||
7814 | return ret; | 6785 | return ret; |
7815 | } | 6786 | } |
7816 | 6787 | ||
6788 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info) | ||
6789 | { | ||
6790 | struct btrfs_block_group_cache *block_group; | ||
6791 | u64 last = 0; | ||
6792 | |||
6793 | while (1) { | ||
6794 | struct inode *inode; | ||
6795 | |||
6796 | block_group = btrfs_lookup_first_block_group(info, last); | ||
6797 | while (block_group) { | ||
6798 | spin_lock(&block_group->lock); | ||
6799 | if (block_group->iref) | ||
6800 | break; | ||
6801 | spin_unlock(&block_group->lock); | ||
6802 | block_group = next_block_group(info->tree_root, | ||
6803 | block_group); | ||
6804 | } | ||
6805 | if (!block_group) { | ||
6806 | if (last == 0) | ||
6807 | break; | ||
6808 | last = 0; | ||
6809 | continue; | ||
6810 | } | ||
6811 | |||
6812 | inode = block_group->inode; | ||
6813 | block_group->iref = 0; | ||
6814 | block_group->inode = NULL; | ||
6815 | spin_unlock(&block_group->lock); | ||
6816 | iput(inode); | ||
6817 | last = block_group->key.objectid + block_group->key.offset; | ||
6818 | btrfs_put_block_group(block_group); | ||
6819 | } | ||
6820 | } | ||
6821 | |||
7817 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | 6822 | int btrfs_free_block_groups(struct btrfs_fs_info *info) |
7818 | { | 6823 | { |
7819 | struct btrfs_block_group_cache *block_group; | 6824 | struct btrfs_block_group_cache *block_group; |
@@ -7845,6 +6850,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7845 | if (block_group->cached == BTRFS_CACHE_STARTED) | 6850 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7846 | wait_block_group_cache_done(block_group); | 6851 | wait_block_group_cache_done(block_group); |
7847 | 6852 | ||
6853 | /* | ||
6854 | * We haven't cached this block group, which means we could | ||
6855 | * possibly have excluded extents on this block group. | ||
6856 | */ | ||
6857 | if (block_group->cached == BTRFS_CACHE_NO) | ||
6858 | free_excluded_extents(info->extent_root, block_group); | ||
6859 | |||
7848 | btrfs_remove_free_space_cache(block_group); | 6860 | btrfs_remove_free_space_cache(block_group); |
7849 | btrfs_put_block_group(block_group); | 6861 | btrfs_put_block_group(block_group); |
7850 | 6862 | ||
@@ -7897,6 +6909,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7897 | struct btrfs_key key; | 6909 | struct btrfs_key key; |
7898 | struct btrfs_key found_key; | 6910 | struct btrfs_key found_key; |
7899 | struct extent_buffer *leaf; | 6911 | struct extent_buffer *leaf; |
6912 | int need_clear = 0; | ||
6913 | u64 cache_gen; | ||
7900 | 6914 | ||
7901 | root = info->extent_root; | 6915 | root = info->extent_root; |
7902 | key.objectid = 0; | 6916 | key.objectid = 0; |
@@ -7905,6 +6919,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7905 | path = btrfs_alloc_path(); | 6919 | path = btrfs_alloc_path(); |
7906 | if (!path) | 6920 | if (!path) |
7907 | return -ENOMEM; | 6921 | return -ENOMEM; |
6922 | path->reada = 1; | ||
6923 | |||
6924 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | ||
6925 | if (cache_gen != 0 && | ||
6926 | btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) | ||
6927 | need_clear = 1; | ||
6928 | if (btrfs_test_opt(root, CLEAR_CACHE)) | ||
6929 | need_clear = 1; | ||
6930 | if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) | ||
6931 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
7908 | 6932 | ||
7909 | while (1) { | 6933 | while (1) { |
7910 | ret = find_first_block_group(root, path, &key); | 6934 | ret = find_first_block_group(root, path, &key); |
@@ -7912,7 +6936,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7912 | break; | 6936 | break; |
7913 | if (ret != 0) | 6937 | if (ret != 0) |
7914 | goto error; | 6938 | goto error; |
7915 | |||
7916 | leaf = path->nodes[0]; | 6939 | leaf = path->nodes[0]; |
7917 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 6940 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
7918 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 6941 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
@@ -7920,21 +6943,22 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7920 | ret = -ENOMEM; | 6943 | ret = -ENOMEM; |
7921 | goto error; | 6944 | goto error; |
7922 | } | 6945 | } |
6946 | cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), | ||
6947 | GFP_NOFS); | ||
6948 | if (!cache->free_space_ctl) { | ||
6949 | kfree(cache); | ||
6950 | ret = -ENOMEM; | ||
6951 | goto error; | ||
6952 | } | ||
7923 | 6953 | ||
7924 | atomic_set(&cache->count, 1); | 6954 | atomic_set(&cache->count, 1); |
7925 | spin_lock_init(&cache->lock); | 6955 | spin_lock_init(&cache->lock); |
7926 | spin_lock_init(&cache->tree_lock); | ||
7927 | cache->fs_info = info; | 6956 | cache->fs_info = info; |
7928 | INIT_LIST_HEAD(&cache->list); | 6957 | INIT_LIST_HEAD(&cache->list); |
7929 | INIT_LIST_HEAD(&cache->cluster_list); | 6958 | INIT_LIST_HEAD(&cache->cluster_list); |
7930 | 6959 | ||
7931 | /* | 6960 | if (need_clear) |
7932 | * we only want to have 32k of ram per block group for keeping | 6961 | cache->disk_cache_state = BTRFS_DC_CLEAR; |
7933 | * track of free space, and if we pass 1/2 of that we want to | ||
7934 | * start converting things over to using bitmaps | ||
7935 | */ | ||
7936 | cache->extents_thresh = ((1024 * 32) / 2) / | ||
7937 | sizeof(struct btrfs_free_space); | ||
7938 | 6962 | ||
7939 | read_extent_buffer(leaf, &cache->item, | 6963 | read_extent_buffer(leaf, &cache->item, |
7940 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 6964 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
@@ -7942,10 +6966,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7942 | memcpy(&cache->key, &found_key, sizeof(found_key)); | 6966 | memcpy(&cache->key, &found_key, sizeof(found_key)); |
7943 | 6967 | ||
7944 | key.objectid = found_key.objectid + found_key.offset; | 6968 | key.objectid = found_key.objectid + found_key.offset; |
7945 | btrfs_release_path(root, path); | 6969 | btrfs_release_path(path); |
7946 | cache->flags = btrfs_block_group_flags(&cache->item); | 6970 | cache->flags = btrfs_block_group_flags(&cache->item); |
7947 | cache->sectorsize = root->sectorsize; | 6971 | cache->sectorsize = root->sectorsize; |
7948 | 6972 | ||
6973 | btrfs_init_free_space_ctl(cache); | ||
6974 | |||
6975 | /* | ||
6976 | * We need to exclude the super stripes now so that the space | ||
6977 | * info has super bytes accounted for, otherwise we'll think | ||
6978 | * we have more space than we actually do. | ||
6979 | */ | ||
6980 | exclude_super_stripes(root, cache); | ||
6981 | |||
7949 | /* | 6982 | /* |
7950 | * check for two cases, either we are full, and therefore | 6983 | * check for two cases, either we are full, and therefore |
7951 | * don't need to bother with the caching work since we won't | 6984 | * don't need to bother with the caching work since we won't |
@@ -7954,12 +6987,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7954 | * time, particularly in the full case. | 6987 | * time, particularly in the full case. |
7955 | */ | 6988 | */ |
7956 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 6989 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
7957 | exclude_super_stripes(root, cache); | ||
7958 | cache->last_byte_to_unpin = (u64)-1; | 6990 | cache->last_byte_to_unpin = (u64)-1; |
7959 | cache->cached = BTRFS_CACHE_FINISHED; | 6991 | cache->cached = BTRFS_CACHE_FINISHED; |
7960 | free_excluded_extents(root, cache); | 6992 | free_excluded_extents(root, cache); |
7961 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 6993 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
7962 | exclude_super_stripes(root, cache); | ||
7963 | cache->last_byte_to_unpin = (u64)-1; | 6994 | cache->last_byte_to_unpin = (u64)-1; |
7964 | cache->cached = BTRFS_CACHE_FINISHED; | 6995 | cache->cached = BTRFS_CACHE_FINISHED; |
7965 | add_new_free_space(cache, root->fs_info, | 6996 | add_new_free_space(cache, root->fs_info, |
@@ -8027,25 +7058,26 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
8027 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 7058 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
8028 | if (!cache) | 7059 | if (!cache) |
8029 | return -ENOMEM; | 7060 | return -ENOMEM; |
7061 | cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), | ||
7062 | GFP_NOFS); | ||
7063 | if (!cache->free_space_ctl) { | ||
7064 | kfree(cache); | ||
7065 | return -ENOMEM; | ||
7066 | } | ||
8030 | 7067 | ||
8031 | cache->key.objectid = chunk_offset; | 7068 | cache->key.objectid = chunk_offset; |
8032 | cache->key.offset = size; | 7069 | cache->key.offset = size; |
8033 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 7070 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
8034 | cache->sectorsize = root->sectorsize; | 7071 | cache->sectorsize = root->sectorsize; |
7072 | cache->fs_info = root->fs_info; | ||
8035 | 7073 | ||
8036 | /* | ||
8037 | * we only want to have 32k of ram per block group for keeping track | ||
8038 | * of free space, and if we pass 1/2 of that we want to start | ||
8039 | * converting things over to using bitmaps | ||
8040 | */ | ||
8041 | cache->extents_thresh = ((1024 * 32) / 2) / | ||
8042 | sizeof(struct btrfs_free_space); | ||
8043 | atomic_set(&cache->count, 1); | 7074 | atomic_set(&cache->count, 1); |
8044 | spin_lock_init(&cache->lock); | 7075 | spin_lock_init(&cache->lock); |
8045 | spin_lock_init(&cache->tree_lock); | ||
8046 | INIT_LIST_HEAD(&cache->list); | 7076 | INIT_LIST_HEAD(&cache->list); |
8047 | INIT_LIST_HEAD(&cache->cluster_list); | 7077 | INIT_LIST_HEAD(&cache->cluster_list); |
8048 | 7078 | ||
7079 | btrfs_init_free_space_ctl(cache); | ||
7080 | |||
8049 | btrfs_set_block_group_used(&cache->item, bytes_used); | 7081 | btrfs_set_block_group_used(&cache->item, bytes_used); |
8050 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); | 7082 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); |
8051 | cache->flags = type; | 7083 | cache->flags = type; |
@@ -8088,8 +7120,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8088 | struct btrfs_path *path; | 7120 | struct btrfs_path *path; |
8089 | struct btrfs_block_group_cache *block_group; | 7121 | struct btrfs_block_group_cache *block_group; |
8090 | struct btrfs_free_cluster *cluster; | 7122 | struct btrfs_free_cluster *cluster; |
7123 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
8091 | struct btrfs_key key; | 7124 | struct btrfs_key key; |
7125 | struct inode *inode; | ||
8092 | int ret; | 7126 | int ret; |
7127 | int factor; | ||
8093 | 7128 | ||
8094 | root = root->fs_info->extent_root; | 7129 | root = root->fs_info->extent_root; |
8095 | 7130 | ||
@@ -8097,7 +7132,19 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8097 | BUG_ON(!block_group); | 7132 | BUG_ON(!block_group); |
8098 | BUG_ON(!block_group->ro); | 7133 | BUG_ON(!block_group->ro); |
8099 | 7134 | ||
7135 | /* | ||
7136 | * Free the reserved super bytes from this block group before | ||
7137 | * remove it. | ||
7138 | */ | ||
7139 | free_excluded_extents(root, block_group); | ||
7140 | |||
8100 | memcpy(&key, &block_group->key, sizeof(key)); | 7141 | memcpy(&key, &block_group->key, sizeof(key)); |
7142 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
7143 | BTRFS_BLOCK_GROUP_RAID1 | | ||
7144 | BTRFS_BLOCK_GROUP_RAID10)) | ||
7145 | factor = 2; | ||
7146 | else | ||
7147 | factor = 1; | ||
8101 | 7148 | ||
8102 | /* make sure this block group isn't part of an allocation cluster */ | 7149 | /* make sure this block group isn't part of an allocation cluster */ |
8103 | cluster = &root->fs_info->data_alloc_cluster; | 7150 | cluster = &root->fs_info->data_alloc_cluster; |
@@ -8117,6 +7164,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8117 | path = btrfs_alloc_path(); | 7164 | path = btrfs_alloc_path(); |
8118 | BUG_ON(!path); | 7165 | BUG_ON(!path); |
8119 | 7166 | ||
7167 | inode = lookup_free_space_inode(root, block_group, path); | ||
7168 | if (!IS_ERR(inode)) { | ||
7169 | btrfs_orphan_add(trans, inode); | ||
7170 | clear_nlink(inode); | ||
7171 | /* One for the block groups ref */ | ||
7172 | spin_lock(&block_group->lock); | ||
7173 | if (block_group->iref) { | ||
7174 | block_group->iref = 0; | ||
7175 | block_group->inode = NULL; | ||
7176 | spin_unlock(&block_group->lock); | ||
7177 | iput(inode); | ||
7178 | } else { | ||
7179 | spin_unlock(&block_group->lock); | ||
7180 | } | ||
7181 | /* One for our lookup ref */ | ||
7182 | iput(inode); | ||
7183 | } | ||
7184 | |||
7185 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
7186 | key.offset = block_group->key.objectid; | ||
7187 | key.type = 0; | ||
7188 | |||
7189 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | ||
7190 | if (ret < 0) | ||
7191 | goto out; | ||
7192 | if (ret > 0) | ||
7193 | btrfs_release_path(path); | ||
7194 | if (ret == 0) { | ||
7195 | ret = btrfs_del_item(trans, tree_root, path); | ||
7196 | if (ret) | ||
7197 | goto out; | ||
7198 | btrfs_release_path(path); | ||
7199 | } | ||
7200 | |||
8120 | spin_lock(&root->fs_info->block_group_cache_lock); | 7201 | spin_lock(&root->fs_info->block_group_cache_lock); |
8121 | rb_erase(&block_group->cache_node, | 7202 | rb_erase(&block_group->cache_node, |
8122 | &root->fs_info->block_group_cache_tree); | 7203 | &root->fs_info->block_group_cache_tree); |
@@ -8138,8 +7219,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8138 | spin_lock(&block_group->space_info->lock); | 7219 | spin_lock(&block_group->space_info->lock); |
8139 | block_group->space_info->total_bytes -= block_group->key.offset; | 7220 | block_group->space_info->total_bytes -= block_group->key.offset; |
8140 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 7221 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
7222 | block_group->space_info->disk_total -= block_group->key.offset * factor; | ||
8141 | spin_unlock(&block_group->space_info->lock); | 7223 | spin_unlock(&block_group->space_info->lock); |
8142 | 7224 | ||
7225 | memcpy(&key, &block_group->key, sizeof(key)); | ||
7226 | |||
8143 | btrfs_clear_space_info_full(root->fs_info); | 7227 | btrfs_clear_space_info_full(root->fs_info); |
8144 | 7228 | ||
8145 | btrfs_put_block_group(block_group); | 7229 | btrfs_put_block_group(block_group); |
@@ -8156,3 +7240,100 @@ out: | |||
8156 | btrfs_free_path(path); | 7240 | btrfs_free_path(path); |
8157 | return ret; | 7241 | return ret; |
8158 | } | 7242 | } |
7243 | |||
7244 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | ||
7245 | { | ||
7246 | struct btrfs_space_info *space_info; | ||
7247 | struct btrfs_super_block *disk_super; | ||
7248 | u64 features; | ||
7249 | u64 flags; | ||
7250 | int mixed = 0; | ||
7251 | int ret; | ||
7252 | |||
7253 | disk_super = &fs_info->super_copy; | ||
7254 | if (!btrfs_super_root(disk_super)) | ||
7255 | return 1; | ||
7256 | |||
7257 | features = btrfs_super_incompat_flags(disk_super); | ||
7258 | if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | ||
7259 | mixed = 1; | ||
7260 | |||
7261 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
7262 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | ||
7263 | if (ret) | ||
7264 | goto out; | ||
7265 | |||
7266 | if (mixed) { | ||
7267 | flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; | ||
7268 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | ||
7269 | } else { | ||
7270 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
7271 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | ||
7272 | if (ret) | ||
7273 | goto out; | ||
7274 | |||
7275 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
7276 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | ||
7277 | } | ||
7278 | out: | ||
7279 | return ret; | ||
7280 | } | ||
7281 | |||
7282 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | ||
7283 | { | ||
7284 | return unpin_extent_range(root, start, end); | ||
7285 | } | ||
7286 | |||
7287 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
7288 | u64 num_bytes, u64 *actual_bytes) | ||
7289 | { | ||
7290 | return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); | ||
7291 | } | ||
7292 | |||
7293 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | ||
7294 | { | ||
7295 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
7296 | struct btrfs_block_group_cache *cache = NULL; | ||
7297 | u64 group_trimmed; | ||
7298 | u64 start; | ||
7299 | u64 end; | ||
7300 | u64 trimmed = 0; | ||
7301 | int ret = 0; | ||
7302 | |||
7303 | cache = btrfs_lookup_block_group(fs_info, range->start); | ||
7304 | |||
7305 | while (cache) { | ||
7306 | if (cache->key.objectid >= (range->start + range->len)) { | ||
7307 | btrfs_put_block_group(cache); | ||
7308 | break; | ||
7309 | } | ||
7310 | |||
7311 | start = max(range->start, cache->key.objectid); | ||
7312 | end = min(range->start + range->len, | ||
7313 | cache->key.objectid + cache->key.offset); | ||
7314 | |||
7315 | if (end - start >= range->minlen) { | ||
7316 | if (!block_group_cache_done(cache)) { | ||
7317 | ret = cache_block_group(cache, NULL, root, 0); | ||
7318 | if (!ret) | ||
7319 | wait_block_group_cache_done(cache); | ||
7320 | } | ||
7321 | ret = btrfs_trim_block_group(cache, | ||
7322 | &group_trimmed, | ||
7323 | start, | ||
7324 | end, | ||
7325 | range->minlen); | ||
7326 | |||
7327 | trimmed += group_trimmed; | ||
7328 | if (ret) { | ||
7329 | btrfs_put_block_group(cache); | ||
7330 | break; | ||
7331 | } | ||
7332 | } | ||
7333 | |||
7334 | cache = next_block_group(fs_info->tree_root, cache); | ||
7335 | } | ||
7336 | |||
7337 | range->len = trimmed; | ||
7338 | return ret; | ||
7339 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d74e6af9b53a..7055d11c1efd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -10,6 +10,8 @@ | |||
10 | #include <linux/swap.h> | 10 | #include <linux/swap.h> |
11 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
12 | #include <linux/pagevec.h> | 12 | #include <linux/pagevec.h> |
13 | #include <linux/prefetch.h> | ||
14 | #include <linux/cleancache.h> | ||
13 | #include "extent_io.h" | 15 | #include "extent_io.h" |
14 | #include "extent_map.h" | 16 | #include "extent_map.h" |
15 | #include "compat.h" | 17 | #include "compat.h" |
@@ -101,10 +103,10 @@ void extent_io_exit(void) | |||
101 | } | 103 | } |
102 | 104 | ||
103 | void extent_io_tree_init(struct extent_io_tree *tree, | 105 | void extent_io_tree_init(struct extent_io_tree *tree, |
104 | struct address_space *mapping, gfp_t mask) | 106 | struct address_space *mapping) |
105 | { | 107 | { |
106 | tree->state = RB_ROOT; | 108 | tree->state = RB_ROOT; |
107 | tree->buffer = RB_ROOT; | 109 | INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); |
108 | tree->ops = NULL; | 110 | tree->ops = NULL; |
109 | tree->dirty_bytes = 0; | 111 | tree->dirty_bytes = 0; |
110 | spin_lock_init(&tree->lock); | 112 | spin_lock_init(&tree->lock); |
@@ -235,50 +237,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, | |||
235 | return ret; | 237 | return ret; |
236 | } | 238 | } |
237 | 239 | ||
238 | static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, | ||
239 | u64 offset, struct rb_node *node) | ||
240 | { | ||
241 | struct rb_root *root = &tree->buffer; | ||
242 | struct rb_node **p = &root->rb_node; | ||
243 | struct rb_node *parent = NULL; | ||
244 | struct extent_buffer *eb; | ||
245 | |||
246 | while (*p) { | ||
247 | parent = *p; | ||
248 | eb = rb_entry(parent, struct extent_buffer, rb_node); | ||
249 | |||
250 | if (offset < eb->start) | ||
251 | p = &(*p)->rb_left; | ||
252 | else if (offset > eb->start) | ||
253 | p = &(*p)->rb_right; | ||
254 | else | ||
255 | return eb; | ||
256 | } | ||
257 | |||
258 | rb_link_node(node, parent, p); | ||
259 | rb_insert_color(node, root); | ||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | ||
264 | u64 offset) | ||
265 | { | ||
266 | struct rb_root *root = &tree->buffer; | ||
267 | struct rb_node *n = root->rb_node; | ||
268 | struct extent_buffer *eb; | ||
269 | |||
270 | while (n) { | ||
271 | eb = rb_entry(n, struct extent_buffer, rb_node); | ||
272 | if (offset < eb->start) | ||
273 | n = n->rb_left; | ||
274 | else if (offset > eb->start) | ||
275 | n = n->rb_right; | ||
276 | else | ||
277 | return eb; | ||
278 | } | ||
279 | return NULL; | ||
280 | } | ||
281 | |||
282 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | 240 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, |
283 | struct extent_state *other) | 241 | struct extent_state *other) |
284 | { | 242 | { |
@@ -483,6 +441,15 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
483 | return ret; | 441 | return ret; |
484 | } | 442 | } |
485 | 443 | ||
444 | static struct extent_state * | ||
445 | alloc_extent_state_atomic(struct extent_state *prealloc) | ||
446 | { | ||
447 | if (!prealloc) | ||
448 | prealloc = alloc_extent_state(GFP_ATOMIC); | ||
449 | |||
450 | return prealloc; | ||
451 | } | ||
452 | |||
486 | /* | 453 | /* |
487 | * clear some bits on a range in the tree. This may require splitting | 454 | * clear some bits on a range in the tree. This may require splitting |
488 | * or inserting elements in the tree, so the gfp mask is used to | 455 | * or inserting elements in the tree, so the gfp mask is used to |
@@ -573,8 +540,8 @@ hit_next: | |||
573 | */ | 540 | */ |
574 | 541 | ||
575 | if (state->start < start) { | 542 | if (state->start < start) { |
576 | if (!prealloc) | 543 | prealloc = alloc_extent_state_atomic(prealloc); |
577 | prealloc = alloc_extent_state(GFP_ATOMIC); | 544 | BUG_ON(!prealloc); |
578 | err = split_state(tree, state, prealloc, start); | 545 | err = split_state(tree, state, prealloc, start); |
579 | BUG_ON(err == -EEXIST); | 546 | BUG_ON(err == -EEXIST); |
580 | prealloc = NULL; | 547 | prealloc = NULL; |
@@ -595,8 +562,8 @@ hit_next: | |||
595 | * on the first half | 562 | * on the first half |
596 | */ | 563 | */ |
597 | if (state->start <= end && state->end > end) { | 564 | if (state->start <= end && state->end > end) { |
598 | if (!prealloc) | 565 | prealloc = alloc_extent_state_atomic(prealloc); |
599 | prealloc = alloc_extent_state(GFP_ATOMIC); | 566 | BUG_ON(!prealloc); |
600 | err = split_state(tree, state, prealloc, end + 1); | 567 | err = split_state(tree, state, prealloc, end + 1); |
601 | BUG_ON(err == -EEXIST); | 568 | BUG_ON(err == -EEXIST); |
602 | if (wake) | 569 | if (wake) |
@@ -734,6 +701,15 @@ static void cache_state(struct extent_state *state, | |||
734 | } | 701 | } |
735 | } | 702 | } |
736 | 703 | ||
704 | static void uncache_state(struct extent_state **cached_ptr) | ||
705 | { | ||
706 | if (cached_ptr && (*cached_ptr)) { | ||
707 | struct extent_state *state = *cached_ptr; | ||
708 | *cached_ptr = NULL; | ||
709 | free_extent_state(state); | ||
710 | } | ||
711 | } | ||
712 | |||
737 | /* | 713 | /* |
738 | * set some bits on a range in the tree. This may require allocations or | 714 | * set some bits on a range in the tree. This may require allocations or |
739 | * sleeping, so the gfp mask is used to indicate what is allowed. | 715 | * sleeping, so the gfp mask is used to indicate what is allowed. |
@@ -760,8 +736,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
760 | again: | 736 | again: |
761 | if (!prealloc && (mask & __GFP_WAIT)) { | 737 | if (!prealloc && (mask & __GFP_WAIT)) { |
762 | prealloc = alloc_extent_state(mask); | 738 | prealloc = alloc_extent_state(mask); |
763 | if (!prealloc) | 739 | BUG_ON(!prealloc); |
764 | return -ENOMEM; | ||
765 | } | 740 | } |
766 | 741 | ||
767 | spin_lock(&tree->lock); | 742 | spin_lock(&tree->lock); |
@@ -778,6 +753,8 @@ again: | |||
778 | */ | 753 | */ |
779 | node = tree_search(tree, start); | 754 | node = tree_search(tree, start); |
780 | if (!node) { | 755 | if (!node) { |
756 | prealloc = alloc_extent_state_atomic(prealloc); | ||
757 | BUG_ON(!prealloc); | ||
781 | err = insert_state(tree, prealloc, start, end, &bits); | 758 | err = insert_state(tree, prealloc, start, end, &bits); |
782 | prealloc = NULL; | 759 | prealloc = NULL; |
783 | BUG_ON(err == -EEXIST); | 760 | BUG_ON(err == -EEXIST); |
@@ -806,20 +783,18 @@ hit_next: | |||
806 | if (err) | 783 | if (err) |
807 | goto out; | 784 | goto out; |
808 | 785 | ||
786 | next_node = rb_next(node); | ||
809 | cache_state(state, cached_state); | 787 | cache_state(state, cached_state); |
810 | merge_state(tree, state); | 788 | merge_state(tree, state); |
811 | if (last_end == (u64)-1) | 789 | if (last_end == (u64)-1) |
812 | goto out; | 790 | goto out; |
813 | 791 | ||
814 | start = last_end + 1; | 792 | start = last_end + 1; |
815 | if (start < end && prealloc && !need_resched()) { | 793 | if (next_node && start < end && prealloc && !need_resched()) { |
816 | next_node = rb_next(node); | 794 | state = rb_entry(next_node, struct extent_state, |
817 | if (next_node) { | 795 | rb_node); |
818 | state = rb_entry(next_node, struct extent_state, | 796 | if (state->start == start) |
819 | rb_node); | 797 | goto hit_next; |
820 | if (state->start == start) | ||
821 | goto hit_next; | ||
822 | } | ||
823 | } | 798 | } |
824 | goto search_again; | 799 | goto search_again; |
825 | } | 800 | } |
@@ -846,6 +821,9 @@ hit_next: | |||
846 | err = -EEXIST; | 821 | err = -EEXIST; |
847 | goto out; | 822 | goto out; |
848 | } | 823 | } |
824 | |||
825 | prealloc = alloc_extent_state_atomic(prealloc); | ||
826 | BUG_ON(!prealloc); | ||
849 | err = split_state(tree, state, prealloc, start); | 827 | err = split_state(tree, state, prealloc, start); |
850 | BUG_ON(err == -EEXIST); | 828 | BUG_ON(err == -EEXIST); |
851 | prealloc = NULL; | 829 | prealloc = NULL; |
@@ -876,14 +854,25 @@ hit_next: | |||
876 | this_end = end; | 854 | this_end = end; |
877 | else | 855 | else |
878 | this_end = last_start - 1; | 856 | this_end = last_start - 1; |
857 | |||
858 | prealloc = alloc_extent_state_atomic(prealloc); | ||
859 | BUG_ON(!prealloc); | ||
860 | |||
861 | /* | ||
862 | * Avoid to free 'prealloc' if it can be merged with | ||
863 | * the later extent. | ||
864 | */ | ||
865 | atomic_inc(&prealloc->refs); | ||
879 | err = insert_state(tree, prealloc, start, this_end, | 866 | err = insert_state(tree, prealloc, start, this_end, |
880 | &bits); | 867 | &bits); |
881 | BUG_ON(err == -EEXIST); | 868 | BUG_ON(err == -EEXIST); |
882 | if (err) { | 869 | if (err) { |
870 | free_extent_state(prealloc); | ||
883 | prealloc = NULL; | 871 | prealloc = NULL; |
884 | goto out; | 872 | goto out; |
885 | } | 873 | } |
886 | cache_state(prealloc, cached_state); | 874 | cache_state(prealloc, cached_state); |
875 | free_extent_state(prealloc); | ||
887 | prealloc = NULL; | 876 | prealloc = NULL; |
888 | start = this_end + 1; | 877 | start = this_end + 1; |
889 | goto search_again; | 878 | goto search_again; |
@@ -900,6 +889,9 @@ hit_next: | |||
900 | err = -EEXIST; | 889 | err = -EEXIST; |
901 | goto out; | 890 | goto out; |
902 | } | 891 | } |
892 | |||
893 | prealloc = alloc_extent_state_atomic(prealloc); | ||
894 | BUG_ON(!prealloc); | ||
903 | err = split_state(tree, state, prealloc, end + 1); | 895 | err = split_state(tree, state, prealloc, end + 1); |
904 | BUG_ON(err == -EEXIST); | 896 | BUG_ON(err == -EEXIST); |
905 | 897 | ||
@@ -976,18 +968,11 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | |||
976 | NULL, mask); | 968 | NULL, mask); |
977 | } | 969 | } |
978 | 970 | ||
979 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | ||
980 | gfp_t mask) | ||
981 | { | ||
982 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, | ||
983 | NULL, mask); | ||
984 | } | ||
985 | |||
986 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 971 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
987 | gfp_t mask) | 972 | struct extent_state **cached_state, gfp_t mask) |
988 | { | 973 | { |
989 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 974 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, |
990 | NULL, mask); | 975 | NULL, cached_state, mask); |
991 | } | 976 | } |
992 | 977 | ||
993 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 978 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
@@ -998,11 +983,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | |||
998 | cached_state, mask); | 983 | cached_state, mask); |
999 | } | 984 | } |
1000 | 985 | ||
1001 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | ||
1002 | { | ||
1003 | return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); | ||
1004 | } | ||
1005 | |||
1006 | /* | 986 | /* |
1007 | * either insert or lock state struct between start and end use mask to tell | 987 | * either insert or lock state struct between start and end use mask to tell |
1008 | * us if waiting is desired. | 988 | * us if waiting is desired. |
@@ -1056,33 +1036,13 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | |||
1056 | mask); | 1036 | mask); |
1057 | } | 1037 | } |
1058 | 1038 | ||
1059 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1039 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) |
1060 | gfp_t mask) | ||
1061 | { | 1040 | { |
1062 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, | 1041 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
1063 | mask); | 1042 | mask); |
1064 | } | 1043 | } |
1065 | 1044 | ||
1066 | /* | 1045 | /* |
1067 | * helper function to set pages and extents in the tree dirty | ||
1068 | */ | ||
1069 | int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) | ||
1070 | { | ||
1071 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1072 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1073 | struct page *page; | ||
1074 | |||
1075 | while (index <= end_index) { | ||
1076 | page = find_get_page(tree->mapping, index); | ||
1077 | BUG_ON(!page); | ||
1078 | __set_page_dirty_nobuffers(page); | ||
1079 | page_cache_release(page); | ||
1080 | index++; | ||
1081 | } | ||
1082 | return 0; | ||
1083 | } | ||
1084 | |||
1085 | /* | ||
1086 | * helper function to set both pages and extents in the tree writeback | 1046 | * helper function to set both pages and extents in the tree writeback |
1087 | */ | 1047 | */ |
1088 | static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 1048 | static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
@@ -1477,12 +1437,13 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1477 | */ | 1437 | */ |
1478 | u64 count_range_bits(struct extent_io_tree *tree, | 1438 | u64 count_range_bits(struct extent_io_tree *tree, |
1479 | u64 *start, u64 search_end, u64 max_bytes, | 1439 | u64 *start, u64 search_end, u64 max_bytes, |
1480 | unsigned long bits) | 1440 | unsigned long bits, int contig) |
1481 | { | 1441 | { |
1482 | struct rb_node *node; | 1442 | struct rb_node *node; |
1483 | struct extent_state *state; | 1443 | struct extent_state *state; |
1484 | u64 cur_start = *start; | 1444 | u64 cur_start = *start; |
1485 | u64 total_bytes = 0; | 1445 | u64 total_bytes = 0; |
1446 | u64 last = 0; | ||
1486 | int found = 0; | 1447 | int found = 0; |
1487 | 1448 | ||
1488 | if (search_end <= cur_start) { | 1449 | if (search_end <= cur_start) { |
@@ -1507,15 +1468,20 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1507 | state = rb_entry(node, struct extent_state, rb_node); | 1468 | state = rb_entry(node, struct extent_state, rb_node); |
1508 | if (state->start > search_end) | 1469 | if (state->start > search_end) |
1509 | break; | 1470 | break; |
1510 | if (state->end >= cur_start && (state->state & bits)) { | 1471 | if (contig && found && state->start > last + 1) |
1472 | break; | ||
1473 | if (state->end >= cur_start && (state->state & bits) == bits) { | ||
1511 | total_bytes += min(search_end, state->end) + 1 - | 1474 | total_bytes += min(search_end, state->end) + 1 - |
1512 | max(cur_start, state->start); | 1475 | max(cur_start, state->start); |
1513 | if (total_bytes >= max_bytes) | 1476 | if (total_bytes >= max_bytes) |
1514 | break; | 1477 | break; |
1515 | if (!found) { | 1478 | if (!found) { |
1516 | *start = state->start; | 1479 | *start = max(cur_start, state->start); |
1517 | found = 1; | 1480 | found = 1; |
1518 | } | 1481 | } |
1482 | last = state->end; | ||
1483 | } else if (contig && found) { | ||
1484 | break; | ||
1519 | } | 1485 | } |
1520 | node = rb_next(node); | 1486 | node = rb_next(node); |
1521 | if (!node) | 1487 | if (!node) |
@@ -1773,6 +1739,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1773 | 1739 | ||
1774 | do { | 1740 | do { |
1775 | struct page *page = bvec->bv_page; | 1741 | struct page *page = bvec->bv_page; |
1742 | struct extent_state *cached = NULL; | ||
1743 | struct extent_state *state; | ||
1744 | |||
1776 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 1745 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
1777 | 1746 | ||
1778 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 1747 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
@@ -1787,9 +1756,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1787 | if (++bvec <= bvec_end) | 1756 | if (++bvec <= bvec_end) |
1788 | prefetchw(&bvec->bv_page->flags); | 1757 | prefetchw(&bvec->bv_page->flags); |
1789 | 1758 | ||
1759 | spin_lock(&tree->lock); | ||
1760 | state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); | ||
1761 | if (state && state->start == start) { | ||
1762 | /* | ||
1763 | * take a reference on the state, unlock will drop | ||
1764 | * the ref | ||
1765 | */ | ||
1766 | cache_state(state, &cached); | ||
1767 | } | ||
1768 | spin_unlock(&tree->lock); | ||
1769 | |||
1790 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 1770 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
1791 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 1771 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
1792 | NULL); | 1772 | state); |
1793 | if (ret) | 1773 | if (ret) |
1794 | uptodate = 0; | 1774 | uptodate = 0; |
1795 | } | 1775 | } |
@@ -1802,15 +1782,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1802 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 1782 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
1803 | if (err) | 1783 | if (err) |
1804 | uptodate = 0; | 1784 | uptodate = 0; |
1785 | uncache_state(&cached); | ||
1805 | continue; | 1786 | continue; |
1806 | } | 1787 | } |
1807 | } | 1788 | } |
1808 | 1789 | ||
1809 | if (uptodate) { | 1790 | if (uptodate) { |
1810 | set_extent_uptodate(tree, start, end, | 1791 | set_extent_uptodate(tree, start, end, &cached, |
1811 | GFP_ATOMIC); | 1792 | GFP_ATOMIC); |
1812 | } | 1793 | } |
1813 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1794 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
1814 | 1795 | ||
1815 | if (whole_page) { | 1796 | if (whole_page) { |
1816 | if (uptodate) { | 1797 | if (uptodate) { |
@@ -1834,47 +1815,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1834 | bio_put(bio); | 1815 | bio_put(bio); |
1835 | } | 1816 | } |
1836 | 1817 | ||
1837 | /* | 1818 | struct bio * |
1838 | * IO done from prepare_write is pretty simple, we just unlock | 1819 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
1839 | * the structs in the extent tree when done, and set the uptodate bits | 1820 | gfp_t gfp_flags) |
1840 | * as appropriate. | ||
1841 | */ | ||
1842 | static void end_bio_extent_preparewrite(struct bio *bio, int err) | ||
1843 | { | ||
1844 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
1845 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
1846 | struct extent_io_tree *tree; | ||
1847 | u64 start; | ||
1848 | u64 end; | ||
1849 | |||
1850 | do { | ||
1851 | struct page *page = bvec->bv_page; | ||
1852 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
1853 | |||
1854 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | ||
1855 | bvec->bv_offset; | ||
1856 | end = start + bvec->bv_len - 1; | ||
1857 | |||
1858 | if (--bvec >= bio->bi_io_vec) | ||
1859 | prefetchw(&bvec->bv_page->flags); | ||
1860 | |||
1861 | if (uptodate) { | ||
1862 | set_extent_uptodate(tree, start, end, GFP_ATOMIC); | ||
1863 | } else { | ||
1864 | ClearPageUptodate(page); | ||
1865 | SetPageError(page); | ||
1866 | } | ||
1867 | |||
1868 | unlock_extent(tree, start, end, GFP_ATOMIC); | ||
1869 | |||
1870 | } while (bvec >= bio->bi_io_vec); | ||
1871 | |||
1872 | bio_put(bio); | ||
1873 | } | ||
1874 | |||
1875 | static struct bio * | ||
1876 | extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | ||
1877 | gfp_t gfp_flags) | ||
1878 | { | 1821 | { |
1879 | struct bio *bio; | 1822 | struct bio *bio; |
1880 | 1823 | ||
@@ -1901,17 +1844,15 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1901 | struct page *page = bvec->bv_page; | 1844 | struct page *page = bvec->bv_page; |
1902 | struct extent_io_tree *tree = bio->bi_private; | 1845 | struct extent_io_tree *tree = bio->bi_private; |
1903 | u64 start; | 1846 | u64 start; |
1904 | u64 end; | ||
1905 | 1847 | ||
1906 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | 1848 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; |
1907 | end = start + bvec->bv_len - 1; | ||
1908 | 1849 | ||
1909 | bio->bi_private = NULL; | 1850 | bio->bi_private = NULL; |
1910 | 1851 | ||
1911 | bio_get(bio); | 1852 | bio_get(bio); |
1912 | 1853 | ||
1913 | if (tree->ops && tree->ops->submit_bio_hook) | 1854 | if (tree->ops && tree->ops->submit_bio_hook) |
1914 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1855 | ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1915 | mirror_num, bio_flags, start); | 1856 | mirror_num, bio_flags, start); |
1916 | else | 1857 | else |
1917 | submit_bio(rw, bio); | 1858 | submit_bio(rw, bio); |
@@ -1965,7 +1906,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1965 | else | 1906 | else |
1966 | nr = bio_get_nr_vecs(bdev); | 1907 | nr = bio_get_nr_vecs(bdev); |
1967 | 1908 | ||
1968 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1909 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1910 | if (!bio) | ||
1911 | return -ENOMEM; | ||
1969 | 1912 | ||
1970 | bio_add_page(bio, page, page_size, offset); | 1913 | bio_add_page(bio, page, page_size, offset); |
1971 | bio->bi_end_io = end_io_func; | 1914 | bio->bi_end_io = end_io_func; |
@@ -1990,6 +1933,7 @@ void set_page_extent_mapped(struct page *page) | |||
1990 | 1933 | ||
1991 | static void set_page_extent_head(struct page *page, unsigned long len) | 1934 | static void set_page_extent_head(struct page *page, unsigned long len) |
1992 | { | 1935 | { |
1936 | WARN_ON(!PagePrivate(page)); | ||
1993 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); | 1937 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); |
1994 | } | 1938 | } |
1995 | 1939 | ||
@@ -2019,7 +1963,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2019 | struct btrfs_ordered_extent *ordered; | 1963 | struct btrfs_ordered_extent *ordered; |
2020 | int ret; | 1964 | int ret; |
2021 | int nr = 0; | 1965 | int nr = 0; |
2022 | size_t page_offset = 0; | 1966 | size_t pg_offset = 0; |
2023 | size_t iosize; | 1967 | size_t iosize; |
2024 | size_t disk_io_size; | 1968 | size_t disk_io_size; |
2025 | size_t blocksize = inode->i_sb->s_blocksize; | 1969 | size_t blocksize = inode->i_sb->s_blocksize; |
@@ -2027,6 +1971,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2027 | 1971 | ||
2028 | set_page_extent_mapped(page); | 1972 | set_page_extent_mapped(page); |
2029 | 1973 | ||
1974 | if (!PageUptodate(page)) { | ||
1975 | if (cleancache_get_page(page) == 0) { | ||
1976 | BUG_ON(blocksize != PAGE_SIZE); | ||
1977 | goto out; | ||
1978 | } | ||
1979 | } | ||
1980 | |||
2030 | end = page_end; | 1981 | end = page_end; |
2031 | while (1) { | 1982 | while (1) { |
2032 | lock_extent(tree, start, end, GFP_NOFS); | 1983 | lock_extent(tree, start, end, GFP_NOFS); |
@@ -2053,19 +2004,22 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2053 | while (cur <= end) { | 2004 | while (cur <= end) { |
2054 | if (cur >= last_byte) { | 2005 | if (cur >= last_byte) { |
2055 | char *userpage; | 2006 | char *userpage; |
2056 | iosize = PAGE_CACHE_SIZE - page_offset; | 2007 | struct extent_state *cached = NULL; |
2008 | |||
2009 | iosize = PAGE_CACHE_SIZE - pg_offset; | ||
2057 | userpage = kmap_atomic(page, KM_USER0); | 2010 | userpage = kmap_atomic(page, KM_USER0); |
2058 | memset(userpage + page_offset, 0, iosize); | 2011 | memset(userpage + pg_offset, 0, iosize); |
2059 | flush_dcache_page(page); | 2012 | flush_dcache_page(page); |
2060 | kunmap_atomic(userpage, KM_USER0); | 2013 | kunmap_atomic(userpage, KM_USER0); |
2061 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2014 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2062 | GFP_NOFS); | 2015 | &cached, GFP_NOFS); |
2063 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2016 | unlock_extent_cached(tree, cur, cur + iosize - 1, |
2017 | &cached, GFP_NOFS); | ||
2064 | break; | 2018 | break; |
2065 | } | 2019 | } |
2066 | em = get_extent(inode, page, page_offset, cur, | 2020 | em = get_extent(inode, page, pg_offset, cur, |
2067 | end - cur + 1, 0); | 2021 | end - cur + 1, 0); |
2068 | if (IS_ERR(em) || !em) { | 2022 | if (IS_ERR_OR_NULL(em)) { |
2069 | SetPageError(page); | 2023 | SetPageError(page); |
2070 | unlock_extent(tree, cur, end, GFP_NOFS); | 2024 | unlock_extent(tree, cur, end, GFP_NOFS); |
2071 | break; | 2025 | break; |
@@ -2074,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2074 | BUG_ON(extent_map_end(em) <= cur); | 2028 | BUG_ON(extent_map_end(em) <= cur); |
2075 | BUG_ON(end < cur); | 2029 | BUG_ON(end < cur); |
2076 | 2030 | ||
2077 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
2078 | this_bio_flag = EXTENT_BIO_COMPRESSED; | 2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; |
2033 | extent_set_compress_type(&this_bio_flag, | ||
2034 | em->compress_type); | ||
2035 | } | ||
2079 | 2036 | ||
2080 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 2037 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
2081 | cur_end = min(extent_map_end(em) - 1, end); | 2038 | cur_end = min(extent_map_end(em) - 1, end); |
@@ -2097,16 +2054,19 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2097 | /* we've found a hole, just zero and go on */ | 2054 | /* we've found a hole, just zero and go on */ |
2098 | if (block_start == EXTENT_MAP_HOLE) { | 2055 | if (block_start == EXTENT_MAP_HOLE) { |
2099 | char *userpage; | 2056 | char *userpage; |
2057 | struct extent_state *cached = NULL; | ||
2058 | |||
2100 | userpage = kmap_atomic(page, KM_USER0); | 2059 | userpage = kmap_atomic(page, KM_USER0); |
2101 | memset(userpage + page_offset, 0, iosize); | 2060 | memset(userpage + pg_offset, 0, iosize); |
2102 | flush_dcache_page(page); | 2061 | flush_dcache_page(page); |
2103 | kunmap_atomic(userpage, KM_USER0); | 2062 | kunmap_atomic(userpage, KM_USER0); |
2104 | 2063 | ||
2105 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2064 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2106 | GFP_NOFS); | 2065 | &cached, GFP_NOFS); |
2107 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2066 | unlock_extent_cached(tree, cur, cur + iosize - 1, |
2067 | &cached, GFP_NOFS); | ||
2108 | cur = cur + iosize; | 2068 | cur = cur + iosize; |
2109 | page_offset += iosize; | 2069 | pg_offset += iosize; |
2110 | continue; | 2070 | continue; |
2111 | } | 2071 | } |
2112 | /* the get_extent function already copied into the page */ | 2072 | /* the get_extent function already copied into the page */ |
@@ -2115,7 +2075,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2115 | check_page_uptodate(tree, page); | 2075 | check_page_uptodate(tree, page); |
2116 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2076 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
2117 | cur = cur + iosize; | 2077 | cur = cur + iosize; |
2118 | page_offset += iosize; | 2078 | pg_offset += iosize; |
2119 | continue; | 2079 | continue; |
2120 | } | 2080 | } |
2121 | /* we have an inline extent but it didn't get marked up | 2081 | /* we have an inline extent but it didn't get marked up |
@@ -2125,7 +2085,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2125 | SetPageError(page); | 2085 | SetPageError(page); |
2126 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2086 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
2127 | cur = cur + iosize; | 2087 | cur = cur + iosize; |
2128 | page_offset += iosize; | 2088 | pg_offset += iosize; |
2129 | continue; | 2089 | continue; |
2130 | } | 2090 | } |
2131 | 2091 | ||
@@ -2138,7 +2098,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2138 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | 2098 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; |
2139 | pnr -= page->index; | 2099 | pnr -= page->index; |
2140 | ret = submit_extent_page(READ, tree, page, | 2100 | ret = submit_extent_page(READ, tree, page, |
2141 | sector, disk_io_size, page_offset, | 2101 | sector, disk_io_size, pg_offset, |
2142 | bdev, bio, pnr, | 2102 | bdev, bio, pnr, |
2143 | end_bio_extent_readpage, mirror_num, | 2103 | end_bio_extent_readpage, mirror_num, |
2144 | *bio_flags, | 2104 | *bio_flags, |
@@ -2149,8 +2109,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2149 | if (ret) | 2109 | if (ret) |
2150 | SetPageError(page); | 2110 | SetPageError(page); |
2151 | cur = cur + iosize; | 2111 | cur = cur + iosize; |
2152 | page_offset += iosize; | 2112 | pg_offset += iosize; |
2153 | } | 2113 | } |
2114 | out: | ||
2154 | if (!nr) { | 2115 | if (!nr) { |
2155 | if (!PageError(page)) | 2116 | if (!PageError(page)) |
2156 | SetPageUptodate(page); | 2117 | SetPageUptodate(page); |
@@ -2169,7 +2130,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2169 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, | 2130 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, |
2170 | &bio_flags); | 2131 | &bio_flags); |
2171 | if (bio) | 2132 | if (bio) |
2172 | submit_one_bio(READ, bio, 0, bio_flags); | 2133 | ret = submit_one_bio(READ, bio, 0, bio_flags); |
2173 | return ret; | 2134 | return ret; |
2174 | } | 2135 | } |
2175 | 2136 | ||
@@ -2204,7 +2165,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2204 | u64 last_byte = i_size_read(inode); | 2165 | u64 last_byte = i_size_read(inode); |
2205 | u64 block_start; | 2166 | u64 block_start; |
2206 | u64 iosize; | 2167 | u64 iosize; |
2207 | u64 unlock_start; | ||
2208 | sector_t sector; | 2168 | sector_t sector; |
2209 | struct extent_state *cached_state = NULL; | 2169 | struct extent_state *cached_state = NULL; |
2210 | struct extent_map *em; | 2170 | struct extent_map *em; |
@@ -2223,10 +2183,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2223 | unsigned long nr_written = 0; | 2183 | unsigned long nr_written = 0; |
2224 | 2184 | ||
2225 | if (wbc->sync_mode == WB_SYNC_ALL) | 2185 | if (wbc->sync_mode == WB_SYNC_ALL) |
2226 | write_flags = WRITE_SYNC_PLUG; | 2186 | write_flags = WRITE_SYNC; |
2227 | else | 2187 | else |
2228 | write_flags = WRITE; | 2188 | write_flags = WRITE; |
2229 | 2189 | ||
2190 | trace___extent_writepage(page, inode, wbc); | ||
2191 | |||
2230 | WARN_ON(!PageLocked(page)); | 2192 | WARN_ON(!PageLocked(page)); |
2231 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2193 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2232 | if (page->index > end_index || | 2194 | if (page->index > end_index || |
@@ -2329,7 +2291,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2329 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2291 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2330 | tree->ops->writepage_end_io_hook(page, start, | 2292 | tree->ops->writepage_end_io_hook(page, start, |
2331 | page_end, NULL, 1); | 2293 | page_end, NULL, 1); |
2332 | unlock_start = page_end + 1; | ||
2333 | goto done; | 2294 | goto done; |
2334 | } | 2295 | } |
2335 | 2296 | ||
@@ -2340,12 +2301,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2340 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2301 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2341 | tree->ops->writepage_end_io_hook(page, cur, | 2302 | tree->ops->writepage_end_io_hook(page, cur, |
2342 | page_end, NULL, 1); | 2303 | page_end, NULL, 1); |
2343 | unlock_start = page_end + 1; | ||
2344 | break; | 2304 | break; |
2345 | } | 2305 | } |
2346 | em = epd->get_extent(inode, page, pg_offset, cur, | 2306 | em = epd->get_extent(inode, page, pg_offset, cur, |
2347 | end - cur + 1, 1); | 2307 | end - cur + 1, 1); |
2348 | if (IS_ERR(em) || !em) { | 2308 | if (IS_ERR_OR_NULL(em)) { |
2349 | SetPageError(page); | 2309 | SetPageError(page); |
2350 | break; | 2310 | break; |
2351 | } | 2311 | } |
@@ -2387,7 +2347,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2387 | 2347 | ||
2388 | cur += iosize; | 2348 | cur += iosize; |
2389 | pg_offset += iosize; | 2349 | pg_offset += iosize; |
2390 | unlock_start = cur; | ||
2391 | continue; | 2350 | continue; |
2392 | } | 2351 | } |
2393 | /* leave this out until we have a page_mkwrite call */ | 2352 | /* leave this out until we have a page_mkwrite call */ |
@@ -2473,7 +2432,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2473 | pgoff_t index; | 2432 | pgoff_t index; |
2474 | pgoff_t end; /* Inclusive */ | 2433 | pgoff_t end; /* Inclusive */ |
2475 | int scanned = 0; | 2434 | int scanned = 0; |
2476 | int range_whole = 0; | ||
2477 | 2435 | ||
2478 | pagevec_init(&pvec, 0); | 2436 | pagevec_init(&pvec, 0); |
2479 | if (wbc->range_cyclic) { | 2437 | if (wbc->range_cyclic) { |
@@ -2482,8 +2440,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2482 | } else { | 2440 | } else { |
2483 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2441 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2484 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2442 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2485 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
2486 | range_whole = 1; | ||
2487 | scanned = 1; | 2443 | scanned = 1; |
2488 | } | 2444 | } |
2489 | retry: | 2445 | retry: |
@@ -2689,7 +2645,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2689 | prefetchw(&page->flags); | 2645 | prefetchw(&page->flags); |
2690 | list_del(&page->lru); | 2646 | list_del(&page->lru); |
2691 | if (!add_to_page_cache_lru(page, mapping, | 2647 | if (!add_to_page_cache_lru(page, mapping, |
2692 | page->index, GFP_KERNEL)) { | 2648 | page->index, GFP_NOFS)) { |
2693 | __extent_read_full_page(tree, page, get_extent, | 2649 | __extent_read_full_page(tree, page, get_extent, |
2694 | &bio, 0, &bio_flags); | 2650 | &bio, 0, &bio_flags); |
2695 | } | 2651 | } |
@@ -2728,123 +2684,6 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
2728 | } | 2684 | } |
2729 | 2685 | ||
2730 | /* | 2686 | /* |
2731 | * simple commit_write call, set_range_dirty is used to mark both | ||
2732 | * the pages and the extent records as dirty | ||
2733 | */ | ||
2734 | int extent_commit_write(struct extent_io_tree *tree, | ||
2735 | struct inode *inode, struct page *page, | ||
2736 | unsigned from, unsigned to) | ||
2737 | { | ||
2738 | loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | ||
2739 | |||
2740 | set_page_extent_mapped(page); | ||
2741 | set_page_dirty(page); | ||
2742 | |||
2743 | if (pos > inode->i_size) { | ||
2744 | i_size_write(inode, pos); | ||
2745 | mark_inode_dirty(inode); | ||
2746 | } | ||
2747 | return 0; | ||
2748 | } | ||
2749 | |||
2750 | int extent_prepare_write(struct extent_io_tree *tree, | ||
2751 | struct inode *inode, struct page *page, | ||
2752 | unsigned from, unsigned to, get_extent_t *get_extent) | ||
2753 | { | ||
2754 | u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
2755 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
2756 | u64 block_start; | ||
2757 | u64 orig_block_start; | ||
2758 | u64 block_end; | ||
2759 | u64 cur_end; | ||
2760 | struct extent_map *em; | ||
2761 | unsigned blocksize = 1 << inode->i_blkbits; | ||
2762 | size_t page_offset = 0; | ||
2763 | size_t block_off_start; | ||
2764 | size_t block_off_end; | ||
2765 | int err = 0; | ||
2766 | int iocount = 0; | ||
2767 | int ret = 0; | ||
2768 | int isnew; | ||
2769 | |||
2770 | set_page_extent_mapped(page); | ||
2771 | |||
2772 | block_start = (page_start + from) & ~((u64)blocksize - 1); | ||
2773 | block_end = (page_start + to - 1) | (blocksize - 1); | ||
2774 | orig_block_start = block_start; | ||
2775 | |||
2776 | lock_extent(tree, page_start, page_end, GFP_NOFS); | ||
2777 | while (block_start <= block_end) { | ||
2778 | em = get_extent(inode, page, page_offset, block_start, | ||
2779 | block_end - block_start + 1, 1); | ||
2780 | if (IS_ERR(em) || !em) | ||
2781 | goto err; | ||
2782 | |||
2783 | cur_end = min(block_end, extent_map_end(em) - 1); | ||
2784 | block_off_start = block_start & (PAGE_CACHE_SIZE - 1); | ||
2785 | block_off_end = block_off_start + blocksize; | ||
2786 | isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); | ||
2787 | |||
2788 | if (!PageUptodate(page) && isnew && | ||
2789 | (block_off_end > to || block_off_start < from)) { | ||
2790 | void *kaddr; | ||
2791 | |||
2792 | kaddr = kmap_atomic(page, KM_USER0); | ||
2793 | if (block_off_end > to) | ||
2794 | memset(kaddr + to, 0, block_off_end - to); | ||
2795 | if (block_off_start < from) | ||
2796 | memset(kaddr + block_off_start, 0, | ||
2797 | from - block_off_start); | ||
2798 | flush_dcache_page(page); | ||
2799 | kunmap_atomic(kaddr, KM_USER0); | ||
2800 | } | ||
2801 | if ((em->block_start != EXTENT_MAP_HOLE && | ||
2802 | em->block_start != EXTENT_MAP_INLINE) && | ||
2803 | !isnew && !PageUptodate(page) && | ||
2804 | (block_off_end > to || block_off_start < from) && | ||
2805 | !test_range_bit(tree, block_start, cur_end, | ||
2806 | EXTENT_UPTODATE, 1, NULL)) { | ||
2807 | u64 sector; | ||
2808 | u64 extent_offset = block_start - em->start; | ||
2809 | size_t iosize; | ||
2810 | sector = (em->block_start + extent_offset) >> 9; | ||
2811 | iosize = (cur_end - block_start + blocksize) & | ||
2812 | ~((u64)blocksize - 1); | ||
2813 | /* | ||
2814 | * we've already got the extent locked, but we | ||
2815 | * need to split the state such that our end_bio | ||
2816 | * handler can clear the lock. | ||
2817 | */ | ||
2818 | set_extent_bit(tree, block_start, | ||
2819 | block_start + iosize - 1, | ||
2820 | EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); | ||
2821 | ret = submit_extent_page(READ, tree, page, | ||
2822 | sector, iosize, page_offset, em->bdev, | ||
2823 | NULL, 1, | ||
2824 | end_bio_extent_preparewrite, 0, | ||
2825 | 0, 0); | ||
2826 | iocount++; | ||
2827 | block_start = block_start + iosize; | ||
2828 | } else { | ||
2829 | set_extent_uptodate(tree, block_start, cur_end, | ||
2830 | GFP_NOFS); | ||
2831 | unlock_extent(tree, block_start, cur_end, GFP_NOFS); | ||
2832 | block_start = cur_end + 1; | ||
2833 | } | ||
2834 | page_offset = block_start & (PAGE_CACHE_SIZE - 1); | ||
2835 | free_extent_map(em); | ||
2836 | } | ||
2837 | if (iocount) { | ||
2838 | wait_extent_bit(tree, orig_block_start, | ||
2839 | block_end, EXTENT_LOCKED); | ||
2840 | } | ||
2841 | check_page_uptodate(tree, page); | ||
2842 | err: | ||
2843 | /* FIXME, zero out newly allocated blocks on error */ | ||
2844 | return err; | ||
2845 | } | ||
2846 | |||
2847 | /* | ||
2848 | * a helper for releasepage, this tests for areas of the page that | 2687 | * a helper for releasepage, this tests for areas of the page that |
2849 | * are locked or under IO and drops the related state bits if it is safe | 2688 | * are locked or under IO and drops the related state bits if it is safe |
2850 | * to drop the page. | 2689 | * to drop the page. |
@@ -2867,9 +2706,17 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
2867 | * at this point we can safely clear everything except the | 2706 | * at this point we can safely clear everything except the |
2868 | * locked bit and the nodatasum bit | 2707 | * locked bit and the nodatasum bit |
2869 | */ | 2708 | */ |
2870 | clear_extent_bit(tree, start, end, | 2709 | ret = clear_extent_bit(tree, start, end, |
2871 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | 2710 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), |
2872 | 0, 0, NULL, mask); | 2711 | 0, 0, NULL, mask); |
2712 | |||
2713 | /* if clear_extent_bit failed for enomem reasons, | ||
2714 | * we can't allow the release to continue. | ||
2715 | */ | ||
2716 | if (ret < 0) | ||
2717 | ret = 0; | ||
2718 | else | ||
2719 | ret = 1; | ||
2873 | } | 2720 | } |
2874 | return ret; | 2721 | return ret; |
2875 | } | 2722 | } |
@@ -2894,7 +2741,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2894 | len = end - start + 1; | 2741 | len = end - start + 1; |
2895 | write_lock(&map->lock); | 2742 | write_lock(&map->lock); |
2896 | em = lookup_extent_mapping(map, start, len); | 2743 | em = lookup_extent_mapping(map, start, len); |
2897 | if (!em || IS_ERR(em)) { | 2744 | if (IS_ERR_OR_NULL(em)) { |
2898 | write_unlock(&map->lock); | 2745 | write_unlock(&map->lock); |
2899 | break; | 2746 | break; |
2900 | } | 2747 | } |
@@ -2922,76 +2769,169 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2922 | return try_release_extent_state(map, tree, page, mask); | 2769 | return try_release_extent_state(map, tree, page, mask); |
2923 | } | 2770 | } |
2924 | 2771 | ||
2925 | sector_t extent_bmap(struct address_space *mapping, sector_t iblock, | 2772 | /* |
2926 | get_extent_t *get_extent) | 2773 | * helper function for fiemap, which doesn't want to see any holes. |
2774 | * This maps until we find something past 'last' | ||
2775 | */ | ||
2776 | static struct extent_map *get_extent_skip_holes(struct inode *inode, | ||
2777 | u64 offset, | ||
2778 | u64 last, | ||
2779 | get_extent_t *get_extent) | ||
2927 | { | 2780 | { |
2928 | struct inode *inode = mapping->host; | 2781 | u64 sectorsize = BTRFS_I(inode)->root->sectorsize; |
2929 | struct extent_state *cached_state = NULL; | ||
2930 | u64 start = iblock << inode->i_blkbits; | ||
2931 | sector_t sector = 0; | ||
2932 | size_t blksize = (1 << inode->i_blkbits); | ||
2933 | struct extent_map *em; | 2782 | struct extent_map *em; |
2783 | u64 len; | ||
2934 | 2784 | ||
2935 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, | 2785 | if (offset >= last) |
2936 | 0, &cached_state, GFP_NOFS); | 2786 | return NULL; |
2937 | em = get_extent(inode, NULL, 0, start, blksize, 0); | ||
2938 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, | ||
2939 | start + blksize - 1, &cached_state, GFP_NOFS); | ||
2940 | if (!em || IS_ERR(em)) | ||
2941 | return 0; | ||
2942 | 2787 | ||
2943 | if (em->block_start > EXTENT_MAP_LAST_BYTE) | 2788 | while(1) { |
2944 | goto out; | 2789 | len = last - offset; |
2790 | if (len == 0) | ||
2791 | break; | ||
2792 | len = (len + sectorsize - 1) & ~(sectorsize - 1); | ||
2793 | em = get_extent(inode, NULL, 0, offset, len, 0); | ||
2794 | if (IS_ERR_OR_NULL(em)) | ||
2795 | return em; | ||
2945 | 2796 | ||
2946 | sector = (em->block_start + start - em->start) >> inode->i_blkbits; | 2797 | /* if this isn't a hole return it */ |
2947 | out: | 2798 | if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) && |
2948 | free_extent_map(em); | 2799 | em->block_start != EXTENT_MAP_HOLE) { |
2949 | return sector; | 2800 | return em; |
2801 | } | ||
2802 | |||
2803 | /* this is a hole, advance to the next extent */ | ||
2804 | offset = extent_map_end(em); | ||
2805 | free_extent_map(em); | ||
2806 | if (offset >= last) | ||
2807 | break; | ||
2808 | } | ||
2809 | return NULL; | ||
2950 | } | 2810 | } |
2951 | 2811 | ||
2952 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2812 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2953 | __u64 start, __u64 len, get_extent_t *get_extent) | 2813 | __u64 start, __u64 len, get_extent_t *get_extent) |
2954 | { | 2814 | { |
2955 | int ret; | 2815 | int ret = 0; |
2956 | u64 off = start; | 2816 | u64 off = start; |
2957 | u64 max = start + len; | 2817 | u64 max = start + len; |
2958 | u32 flags = 0; | 2818 | u32 flags = 0; |
2819 | u32 found_type; | ||
2820 | u64 last; | ||
2821 | u64 last_for_get_extent = 0; | ||
2959 | u64 disko = 0; | 2822 | u64 disko = 0; |
2823 | u64 isize = i_size_read(inode); | ||
2824 | struct btrfs_key found_key; | ||
2960 | struct extent_map *em = NULL; | 2825 | struct extent_map *em = NULL; |
2961 | struct extent_state *cached_state = NULL; | 2826 | struct extent_state *cached_state = NULL; |
2827 | struct btrfs_path *path; | ||
2828 | struct btrfs_file_extent_item *item; | ||
2962 | int end = 0; | 2829 | int end = 0; |
2963 | u64 em_start = 0, em_len = 0; | 2830 | u64 em_start = 0; |
2831 | u64 em_len = 0; | ||
2832 | u64 em_end = 0; | ||
2964 | unsigned long emflags; | 2833 | unsigned long emflags; |
2965 | ret = 0; | ||
2966 | 2834 | ||
2967 | if (len == 0) | 2835 | if (len == 0) |
2968 | return -EINVAL; | 2836 | return -EINVAL; |
2969 | 2837 | ||
2838 | path = btrfs_alloc_path(); | ||
2839 | if (!path) | ||
2840 | return -ENOMEM; | ||
2841 | path->leave_spinning = 1; | ||
2842 | |||
2843 | /* | ||
2844 | * lookup the last file extent. We're not using i_size here | ||
2845 | * because there might be preallocation past i_size | ||
2846 | */ | ||
2847 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | ||
2848 | path, btrfs_ino(inode), -1, 0); | ||
2849 | if (ret < 0) { | ||
2850 | btrfs_free_path(path); | ||
2851 | return ret; | ||
2852 | } | ||
2853 | WARN_ON(!ret); | ||
2854 | path->slots[0]--; | ||
2855 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2856 | struct btrfs_file_extent_item); | ||
2857 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | ||
2858 | found_type = btrfs_key_type(&found_key); | ||
2859 | |||
2860 | /* No extents, but there might be delalloc bits */ | ||
2861 | if (found_key.objectid != btrfs_ino(inode) || | ||
2862 | found_type != BTRFS_EXTENT_DATA_KEY) { | ||
2863 | /* have to trust i_size as the end */ | ||
2864 | last = (u64)-1; | ||
2865 | last_for_get_extent = isize; | ||
2866 | } else { | ||
2867 | /* | ||
2868 | * remember the start of the last extent. There are a | ||
2869 | * bunch of different factors that go into the length of the | ||
2870 | * extent, so its much less complex to remember where it started | ||
2871 | */ | ||
2872 | last = found_key.offset; | ||
2873 | last_for_get_extent = last + 1; | ||
2874 | } | ||
2875 | btrfs_free_path(path); | ||
2876 | |||
2877 | /* | ||
2878 | * we might have some extents allocated but more delalloc past those | ||
2879 | * extents. so, we trust isize unless the start of the last extent is | ||
2880 | * beyond isize | ||
2881 | */ | ||
2882 | if (last < isize) { | ||
2883 | last = (u64)-1; | ||
2884 | last_for_get_extent = isize; | ||
2885 | } | ||
2886 | |||
2970 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 2887 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
2971 | &cached_state, GFP_NOFS); | 2888 | &cached_state, GFP_NOFS); |
2972 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 2889 | |
2890 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
2891 | get_extent); | ||
2973 | if (!em) | 2892 | if (!em) |
2974 | goto out; | 2893 | goto out; |
2975 | if (IS_ERR(em)) { | 2894 | if (IS_ERR(em)) { |
2976 | ret = PTR_ERR(em); | 2895 | ret = PTR_ERR(em); |
2977 | goto out; | 2896 | goto out; |
2978 | } | 2897 | } |
2898 | |||
2979 | while (!end) { | 2899 | while (!end) { |
2980 | off = em->start + em->len; | 2900 | u64 offset_in_extent; |
2981 | if (off >= max) | 2901 | |
2982 | end = 1; | 2902 | /* break if the extent we found is outside the range */ |
2903 | if (em->start >= max || extent_map_end(em) < off) | ||
2904 | break; | ||
2983 | 2905 | ||
2984 | em_start = em->start; | 2906 | /* |
2985 | em_len = em->len; | 2907 | * get_extent may return an extent that starts before our |
2908 | * requested range. We have to make sure the ranges | ||
2909 | * we return to fiemap always move forward and don't | ||
2910 | * overlap, so adjust the offsets here | ||
2911 | */ | ||
2912 | em_start = max(em->start, off); | ||
2986 | 2913 | ||
2914 | /* | ||
2915 | * record the offset from the start of the extent | ||
2916 | * for adjusting the disk offset below | ||
2917 | */ | ||
2918 | offset_in_extent = em_start - em->start; | ||
2919 | em_end = extent_map_end(em); | ||
2920 | em_len = em_end - em_start; | ||
2921 | emflags = em->flags; | ||
2987 | disko = 0; | 2922 | disko = 0; |
2988 | flags = 0; | 2923 | flags = 0; |
2989 | 2924 | ||
2925 | /* | ||
2926 | * bump off for our next call to get_extent | ||
2927 | */ | ||
2928 | off = extent_map_end(em); | ||
2929 | if (off >= max) | ||
2930 | end = 1; | ||
2931 | |||
2990 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { | 2932 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2991 | end = 1; | 2933 | end = 1; |
2992 | flags |= FIEMAP_EXTENT_LAST; | 2934 | flags |= FIEMAP_EXTENT_LAST; |
2993 | } else if (em->block_start == EXTENT_MAP_HOLE) { | ||
2994 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
2995 | } else if (em->block_start == EXTENT_MAP_INLINE) { | 2935 | } else if (em->block_start == EXTENT_MAP_INLINE) { |
2996 | flags |= (FIEMAP_EXTENT_DATA_INLINE | | 2936 | flags |= (FIEMAP_EXTENT_DATA_INLINE | |
2997 | FIEMAP_EXTENT_NOT_ALIGNED); | 2937 | FIEMAP_EXTENT_NOT_ALIGNED); |
@@ -2999,32 +2939,32 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2999 | flags |= (FIEMAP_EXTENT_DELALLOC | | 2939 | flags |= (FIEMAP_EXTENT_DELALLOC | |
3000 | FIEMAP_EXTENT_UNKNOWN); | 2940 | FIEMAP_EXTENT_UNKNOWN); |
3001 | } else { | 2941 | } else { |
3002 | disko = em->block_start; | 2942 | disko = em->block_start + offset_in_extent; |
3003 | } | 2943 | } |
3004 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2944 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
3005 | flags |= FIEMAP_EXTENT_ENCODED; | 2945 | flags |= FIEMAP_EXTENT_ENCODED; |
3006 | 2946 | ||
3007 | emflags = em->flags; | ||
3008 | free_extent_map(em); | 2947 | free_extent_map(em); |
3009 | em = NULL; | 2948 | em = NULL; |
2949 | if ((em_start >= last) || em_len == (u64)-1 || | ||
2950 | (last == (u64)-1 && isize <= em_end)) { | ||
2951 | flags |= FIEMAP_EXTENT_LAST; | ||
2952 | end = 1; | ||
2953 | } | ||
3010 | 2954 | ||
3011 | if (!end) { | 2955 | /* now scan forward to see if this is really the last extent. */ |
3012 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 2956 | em = get_extent_skip_holes(inode, off, last_for_get_extent, |
3013 | if (!em) | 2957 | get_extent); |
3014 | goto out; | 2958 | if (IS_ERR(em)) { |
3015 | if (IS_ERR(em)) { | 2959 | ret = PTR_ERR(em); |
3016 | ret = PTR_ERR(em); | 2960 | goto out; |
3017 | goto out; | ||
3018 | } | ||
3019 | emflags = em->flags; | ||
3020 | } | 2961 | } |
3021 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | 2962 | if (!em) { |
3022 | flags |= FIEMAP_EXTENT_LAST; | 2963 | flags |= FIEMAP_EXTENT_LAST; |
3023 | end = 1; | 2964 | end = 1; |
3024 | } | 2965 | } |
3025 | |||
3026 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 2966 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, |
3027 | em_len, flags); | 2967 | em_len, flags); |
3028 | if (ret) | 2968 | if (ret) |
3029 | goto out_free; | 2969 | goto out_free; |
3030 | } | 2970 | } |
@@ -3078,6 +3018,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3078 | #endif | 3018 | #endif |
3079 | 3019 | ||
3080 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 3020 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
3021 | if (eb == NULL) | ||
3022 | return NULL; | ||
3081 | eb->start = start; | 3023 | eb->start = start; |
3082 | eb->len = len; | 3024 | eb->len = len; |
3083 | spin_lock_init(&eb->lock); | 3025 | spin_lock_init(&eb->lock); |
@@ -3104,10 +3046,42 @@ static void __free_extent_buffer(struct extent_buffer *eb) | |||
3104 | kmem_cache_free(extent_buffer_cache, eb); | 3046 | kmem_cache_free(extent_buffer_cache, eb); |
3105 | } | 3047 | } |
3106 | 3048 | ||
3049 | /* | ||
3050 | * Helper for releasing extent buffer page. | ||
3051 | */ | ||
3052 | static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | ||
3053 | unsigned long start_idx) | ||
3054 | { | ||
3055 | unsigned long index; | ||
3056 | struct page *page; | ||
3057 | |||
3058 | if (!eb->first_page) | ||
3059 | return; | ||
3060 | |||
3061 | index = num_extent_pages(eb->start, eb->len); | ||
3062 | if (start_idx >= index) | ||
3063 | return; | ||
3064 | |||
3065 | do { | ||
3066 | index--; | ||
3067 | page = extent_buffer_page(eb, index); | ||
3068 | if (page) | ||
3069 | page_cache_release(page); | ||
3070 | } while (index != start_idx); | ||
3071 | } | ||
3072 | |||
3073 | /* | ||
3074 | * Helper for releasing the extent buffer. | ||
3075 | */ | ||
3076 | static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) | ||
3077 | { | ||
3078 | btrfs_release_extent_buffer_page(eb, 0); | ||
3079 | __free_extent_buffer(eb); | ||
3080 | } | ||
3081 | |||
3107 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | 3082 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, |
3108 | u64 start, unsigned long len, | 3083 | u64 start, unsigned long len, |
3109 | struct page *page0, | 3084 | struct page *page0) |
3110 | gfp_t mask) | ||
3111 | { | 3085 | { |
3112 | unsigned long num_pages = num_extent_pages(start, len); | 3086 | unsigned long num_pages = num_extent_pages(start, len); |
3113 | unsigned long i; | 3087 | unsigned long i; |
@@ -3117,18 +3091,18 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3117 | struct page *p; | 3091 | struct page *p; |
3118 | struct address_space *mapping = tree->mapping; | 3092 | struct address_space *mapping = tree->mapping; |
3119 | int uptodate = 1; | 3093 | int uptodate = 1; |
3094 | int ret; | ||
3120 | 3095 | ||
3121 | spin_lock(&tree->buffer_lock); | 3096 | rcu_read_lock(); |
3122 | eb = buffer_search(tree, start); | 3097 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3123 | if (eb) { | 3098 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
3124 | atomic_inc(&eb->refs); | 3099 | rcu_read_unlock(); |
3125 | spin_unlock(&tree->buffer_lock); | ||
3126 | mark_page_accessed(eb->first_page); | 3100 | mark_page_accessed(eb->first_page); |
3127 | return eb; | 3101 | return eb; |
3128 | } | 3102 | } |
3129 | spin_unlock(&tree->buffer_lock); | 3103 | rcu_read_unlock(); |
3130 | 3104 | ||
3131 | eb = __alloc_extent_buffer(tree, start, len, mask); | 3105 | eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); |
3132 | if (!eb) | 3106 | if (!eb) |
3133 | return NULL; | 3107 | return NULL; |
3134 | 3108 | ||
@@ -3145,7 +3119,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3145 | i = 0; | 3119 | i = 0; |
3146 | } | 3120 | } |
3147 | for (; i < num_pages; i++, index++) { | 3121 | for (; i < num_pages; i++, index++) { |
3148 | p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); | 3122 | p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); |
3149 | if (!p) { | 3123 | if (!p) { |
3150 | WARN_ON(1); | 3124 | WARN_ON(1); |
3151 | goto free_eb; | 3125 | goto free_eb; |
@@ -3160,50 +3134,77 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3160 | } | 3134 | } |
3161 | if (!PageUptodate(p)) | 3135 | if (!PageUptodate(p)) |
3162 | uptodate = 0; | 3136 | uptodate = 0; |
3163 | unlock_page(p); | 3137 | |
3138 | /* | ||
3139 | * see below about how we avoid a nasty race with release page | ||
3140 | * and why we unlock later | ||
3141 | */ | ||
3142 | if (i != 0) | ||
3143 | unlock_page(p); | ||
3164 | } | 3144 | } |
3165 | if (uptodate) | 3145 | if (uptodate) |
3166 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3146 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3167 | 3147 | ||
3148 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
3149 | if (ret) | ||
3150 | goto free_eb; | ||
3151 | |||
3168 | spin_lock(&tree->buffer_lock); | 3152 | spin_lock(&tree->buffer_lock); |
3169 | exists = buffer_tree_insert(tree, start, &eb->rb_node); | 3153 | ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); |
3170 | if (exists) { | 3154 | if (ret == -EEXIST) { |
3155 | exists = radix_tree_lookup(&tree->buffer, | ||
3156 | start >> PAGE_CACHE_SHIFT); | ||
3171 | /* add one reference for the caller */ | 3157 | /* add one reference for the caller */ |
3172 | atomic_inc(&exists->refs); | 3158 | atomic_inc(&exists->refs); |
3173 | spin_unlock(&tree->buffer_lock); | 3159 | spin_unlock(&tree->buffer_lock); |
3160 | radix_tree_preload_end(); | ||
3174 | goto free_eb; | 3161 | goto free_eb; |
3175 | } | 3162 | } |
3176 | /* add one reference for the tree */ | 3163 | /* add one reference for the tree */ |
3177 | atomic_inc(&eb->refs); | 3164 | atomic_inc(&eb->refs); |
3178 | spin_unlock(&tree->buffer_lock); | 3165 | spin_unlock(&tree->buffer_lock); |
3166 | radix_tree_preload_end(); | ||
3167 | |||
3168 | /* | ||
3169 | * there is a race where release page may have | ||
3170 | * tried to find this extent buffer in the radix | ||
3171 | * but failed. It will tell the VM it is safe to | ||
3172 | * reclaim the, and it will clear the page private bit. | ||
3173 | * We must make sure to set the page private bit properly | ||
3174 | * after the extent buffer is in the radix tree so | ||
3175 | * it doesn't get lost | ||
3176 | */ | ||
3177 | set_page_extent_mapped(eb->first_page); | ||
3178 | set_page_extent_head(eb->first_page, eb->len); | ||
3179 | if (!page0) | ||
3180 | unlock_page(eb->first_page); | ||
3179 | return eb; | 3181 | return eb; |
3180 | 3182 | ||
3181 | free_eb: | 3183 | free_eb: |
3184 | if (eb->first_page && !page0) | ||
3185 | unlock_page(eb->first_page); | ||
3186 | |||
3182 | if (!atomic_dec_and_test(&eb->refs)) | 3187 | if (!atomic_dec_and_test(&eb->refs)) |
3183 | return exists; | 3188 | return exists; |
3184 | for (index = 1; index < i; index++) | 3189 | btrfs_release_extent_buffer(eb); |
3185 | page_cache_release(extent_buffer_page(eb, index)); | ||
3186 | page_cache_release(extent_buffer_page(eb, 0)); | ||
3187 | __free_extent_buffer(eb); | ||
3188 | return exists; | 3190 | return exists; |
3189 | } | 3191 | } |
3190 | 3192 | ||
3191 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 3193 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
3192 | u64 start, unsigned long len, | 3194 | u64 start, unsigned long len) |
3193 | gfp_t mask) | ||
3194 | { | 3195 | { |
3195 | struct extent_buffer *eb; | 3196 | struct extent_buffer *eb; |
3196 | 3197 | ||
3197 | spin_lock(&tree->buffer_lock); | 3198 | rcu_read_lock(); |
3198 | eb = buffer_search(tree, start); | 3199 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3199 | if (eb) | 3200 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
3200 | atomic_inc(&eb->refs); | 3201 | rcu_read_unlock(); |
3201 | spin_unlock(&tree->buffer_lock); | ||
3202 | |||
3203 | if (eb) | ||
3204 | mark_page_accessed(eb->first_page); | 3202 | mark_page_accessed(eb->first_page); |
3203 | return eb; | ||
3204 | } | ||
3205 | rcu_read_unlock(); | ||
3205 | 3206 | ||
3206 | return eb; | 3207 | return NULL; |
3207 | } | 3208 | } |
3208 | 3209 | ||
3209 | void free_extent_buffer(struct extent_buffer *eb) | 3210 | void free_extent_buffer(struct extent_buffer *eb) |
@@ -3232,10 +3233,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3232 | continue; | 3233 | continue; |
3233 | 3234 | ||
3234 | lock_page(page); | 3235 | lock_page(page); |
3236 | WARN_ON(!PagePrivate(page)); | ||
3237 | |||
3238 | set_page_extent_mapped(page); | ||
3235 | if (i == 0) | 3239 | if (i == 0) |
3236 | set_page_extent_head(page, eb->len); | 3240 | set_page_extent_head(page, eb->len); |
3237 | else | ||
3238 | set_page_private(page, EXTENT_PAGE_PRIVATE); | ||
3239 | 3241 | ||
3240 | clear_page_dirty_for_io(page); | 3242 | clear_page_dirty_for_io(page); |
3241 | spin_lock_irq(&page->mapping->tree_lock); | 3243 | spin_lock_irq(&page->mapping->tree_lock); |
@@ -3250,13 +3252,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3250 | return 0; | 3252 | return 0; |
3251 | } | 3253 | } |
3252 | 3254 | ||
3253 | int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, | ||
3254 | struct extent_buffer *eb) | ||
3255 | { | ||
3256 | return wait_on_extent_writeback(tree, eb->start, | ||
3257 | eb->start + eb->len - 1); | ||
3258 | } | ||
3259 | |||
3260 | int set_extent_buffer_dirty(struct extent_io_tree *tree, | 3255 | int set_extent_buffer_dirty(struct extent_io_tree *tree, |
3261 | struct extent_buffer *eb) | 3256 | struct extent_buffer *eb) |
3262 | { | 3257 | { |
@@ -3302,7 +3297,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3302 | num_pages = num_extent_pages(eb->start, eb->len); | 3297 | num_pages = num_extent_pages(eb->start, eb->len); |
3303 | 3298 | ||
3304 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3299 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3305 | GFP_NOFS); | 3300 | NULL, GFP_NOFS); |
3306 | for (i = 0; i < num_pages; i++) { | 3301 | for (i = 0; i < num_pages; i++) { |
3307 | page = extent_buffer_page(eb, i); | 3302 | page = extent_buffer_page(eb, i); |
3308 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3303 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
@@ -3425,6 +3420,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3425 | 3420 | ||
3426 | for (i = start_i; i < num_pages; i++) { | 3421 | for (i = start_i; i < num_pages; i++) { |
3427 | page = extent_buffer_page(eb, i); | 3422 | page = extent_buffer_page(eb, i); |
3423 | |||
3424 | WARN_ON(!PagePrivate(page)); | ||
3425 | |||
3426 | set_page_extent_mapped(page); | ||
3427 | if (i == 0) | ||
3428 | set_page_extent_head(page, eb->len); | ||
3429 | |||
3428 | if (inc_all_pages) | 3430 | if (inc_all_pages) |
3429 | page_cache_get(page); | 3431 | page_cache_get(page); |
3430 | if (!PageUptodate(page)) { | 3432 | if (!PageUptodate(page)) { |
@@ -3530,6 +3532,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3530 | "wanted %lu %lu\n", (unsigned long long)eb->start, | 3532 | "wanted %lu %lu\n", (unsigned long long)eb->start, |
3531 | eb->len, start, min_len); | 3533 | eb->len, start, min_len); |
3532 | WARN_ON(1); | 3534 | WARN_ON(1); |
3535 | return -EINVAL; | ||
3533 | } | 3536 | } |
3534 | 3537 | ||
3535 | p = extent_buffer_page(eb, i); | 3538 | p = extent_buffer_page(eb, i); |
@@ -3722,6 +3725,12 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
3722 | kunmap_atomic(dst_kaddr, KM_USER0); | 3725 | kunmap_atomic(dst_kaddr, KM_USER0); |
3723 | } | 3726 | } |
3724 | 3727 | ||
3728 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | ||
3729 | { | ||
3730 | unsigned long distance = (src > dst) ? src - dst : dst - src; | ||
3731 | return distance < len; | ||
3732 | } | ||
3733 | |||
3725 | static void copy_pages(struct page *dst_page, struct page *src_page, | 3734 | static void copy_pages(struct page *dst_page, struct page *src_page, |
3726 | unsigned long dst_off, unsigned long src_off, | 3735 | unsigned long dst_off, unsigned long src_off, |
3727 | unsigned long len) | 3736 | unsigned long len) |
@@ -3729,10 +3738,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
3729 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3738 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); |
3730 | char *src_kaddr; | 3739 | char *src_kaddr; |
3731 | 3740 | ||
3732 | if (dst_page != src_page) | 3741 | if (dst_page != src_page) { |
3733 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 3742 | src_kaddr = kmap_atomic(src_page, KM_USER1); |
3734 | else | 3743 | } else { |
3735 | src_kaddr = dst_kaddr; | 3744 | src_kaddr = dst_kaddr; |
3745 | BUG_ON(areas_overlap(src_off, dst_off, len)); | ||
3746 | } | ||
3736 | 3747 | ||
3737 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 3748 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
3738 | kunmap_atomic(dst_kaddr, KM_USER0); | 3749 | kunmap_atomic(dst_kaddr, KM_USER0); |
@@ -3807,7 +3818,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
3807 | "len %lu len %lu\n", dst_offset, len, dst->len); | 3818 | "len %lu len %lu\n", dst_offset, len, dst->len); |
3808 | BUG_ON(1); | 3819 | BUG_ON(1); |
3809 | } | 3820 | } |
3810 | if (dst_offset < src_offset) { | 3821 | if (!areas_overlap(src_offset, dst_offset, len)) { |
3811 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); | 3822 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); |
3812 | return; | 3823 | return; |
3813 | } | 3824 | } |
@@ -3833,34 +3844,47 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
3833 | } | 3844 | } |
3834 | } | 3845 | } |
3835 | 3846 | ||
3847 | static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) | ||
3848 | { | ||
3849 | struct extent_buffer *eb = | ||
3850 | container_of(head, struct extent_buffer, rcu_head); | ||
3851 | |||
3852 | btrfs_release_extent_buffer(eb); | ||
3853 | } | ||
3854 | |||
3836 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | 3855 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) |
3837 | { | 3856 | { |
3838 | u64 start = page_offset(page); | 3857 | u64 start = page_offset(page); |
3839 | struct extent_buffer *eb; | 3858 | struct extent_buffer *eb; |
3840 | int ret = 1; | 3859 | int ret = 1; |
3841 | unsigned long i; | ||
3842 | unsigned long num_pages; | ||
3843 | 3860 | ||
3844 | spin_lock(&tree->buffer_lock); | 3861 | spin_lock(&tree->buffer_lock); |
3845 | eb = buffer_search(tree, start); | 3862 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3846 | if (!eb) | 3863 | if (!eb) { |
3847 | goto out; | 3864 | spin_unlock(&tree->buffer_lock); |
3865 | return ret; | ||
3866 | } | ||
3848 | 3867 | ||
3849 | if (atomic_read(&eb->refs) > 1) { | 3868 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
3850 | ret = 0; | 3869 | ret = 0; |
3851 | goto out; | 3870 | goto out; |
3852 | } | 3871 | } |
3853 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3872 | |
3873 | /* | ||
3874 | * set @eb->refs to 0 if it is already 1, and then release the @eb. | ||
3875 | * Or go back. | ||
3876 | */ | ||
3877 | if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { | ||
3854 | ret = 0; | 3878 | ret = 0; |
3855 | goto out; | 3879 | goto out; |
3856 | } | 3880 | } |
3857 | /* at this point we can safely release the extent buffer */ | 3881 | |
3858 | num_pages = num_extent_pages(eb->start, eb->len); | 3882 | radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3859 | for (i = 0; i < num_pages; i++) | ||
3860 | page_cache_release(extent_buffer_page(eb, i)); | ||
3861 | rb_erase(&eb->rb_node, &tree->buffer); | ||
3862 | __free_extent_buffer(eb); | ||
3863 | out: | 3883 | out: |
3864 | spin_unlock(&tree->buffer_lock); | 3884 | spin_unlock(&tree->buffer_lock); |
3885 | |||
3886 | /* at this point we can safely release the extent buffer */ | ||
3887 | if (atomic_read(&eb->refs) == 0) | ||
3888 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | ||
3865 | return ret; | 3889 | return ret; |
3866 | } | 3890 | } |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5691c7b590da..a11a92ee2d30 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -20,13 +20,18 @@ | |||
20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
22 | 22 | ||
23 | /* flags for bio submission */ | 23 | /* |
24 | * flags for bio submission. The high bits indicate the compression | ||
25 | * type for this bio | ||
26 | */ | ||
24 | #define EXTENT_BIO_COMPRESSED 1 | 27 | #define EXTENT_BIO_COMPRESSED 1 |
28 | #define EXTENT_BIO_FLAG_SHIFT 16 | ||
25 | 29 | ||
26 | /* these are bit numbers for test/set bit */ | 30 | /* these are bit numbers for test/set bit */ |
27 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
28 | #define EXTENT_BUFFER_BLOCKING 1 | 32 | #define EXTENT_BUFFER_BLOCKING 1 |
29 | #define EXTENT_BUFFER_DIRTY 2 | 33 | #define EXTENT_BUFFER_DIRTY 2 |
34 | #define EXTENT_BUFFER_CORRUPT 3 | ||
30 | 35 | ||
31 | /* these are flags for extent_clear_unlock_delalloc */ | 36 | /* these are flags for extent_clear_unlock_delalloc */ |
32 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 37 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
@@ -85,7 +90,7 @@ struct extent_io_ops { | |||
85 | 90 | ||
86 | struct extent_io_tree { | 91 | struct extent_io_tree { |
87 | struct rb_root state; | 92 | struct rb_root state; |
88 | struct rb_root buffer; | 93 | struct radix_tree_root buffer; |
89 | struct address_space *mapping; | 94 | struct address_space *mapping; |
90 | u64 dirty_bytes; | 95 | u64 dirty_bytes; |
91 | spinlock_t lock; | 96 | spinlock_t lock; |
@@ -121,9 +126,9 @@ struct extent_buffer { | |||
121 | unsigned long map_len; | 126 | unsigned long map_len; |
122 | struct page *first_page; | 127 | struct page *first_page; |
123 | unsigned long bflags; | 128 | unsigned long bflags; |
124 | atomic_t refs; | ||
125 | struct list_head leak_list; | 129 | struct list_head leak_list; |
126 | struct rb_node rb_node; | 130 | struct rcu_head rcu_head; |
131 | atomic_t refs; | ||
127 | 132 | ||
128 | /* the spinlock is used to protect most operations */ | 133 | /* the spinlock is used to protect most operations */ |
129 | spinlock_t lock; | 134 | spinlock_t lock; |
@@ -135,25 +140,27 @@ struct extent_buffer { | |||
135 | wait_queue_head_t lock_wq; | 140 | wait_queue_head_t lock_wq; |
136 | }; | 141 | }; |
137 | 142 | ||
138 | struct extent_map_tree; | 143 | static inline void extent_set_compress_type(unsigned long *bio_flags, |
144 | int compress_type) | ||
145 | { | ||
146 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; | ||
147 | } | ||
139 | 148 | ||
140 | static inline struct extent_state *extent_state_next(struct extent_state *state) | 149 | static inline int extent_compress_type(unsigned long bio_flags) |
141 | { | 150 | { |
142 | struct rb_node *node; | 151 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; |
143 | node = rb_next(&state->rb_node); | ||
144 | if (!node) | ||
145 | return NULL; | ||
146 | return rb_entry(node, struct extent_state, rb_node); | ||
147 | } | 152 | } |
148 | 153 | ||
154 | struct extent_map_tree; | ||
155 | |||
149 | typedef struct extent_map *(get_extent_t)(struct inode *inode, | 156 | typedef struct extent_map *(get_extent_t)(struct inode *inode, |
150 | struct page *page, | 157 | struct page *page, |
151 | size_t page_offset, | 158 | size_t pg_offset, |
152 | u64 start, u64 len, | 159 | u64 start, u64 len, |
153 | int create); | 160 | int create); |
154 | 161 | ||
155 | void extent_io_tree_init(struct extent_io_tree *tree, | 162 | void extent_io_tree_init(struct extent_io_tree *tree, |
156 | struct address_space *mapping, gfp_t mask); | 163 | struct address_space *mapping); |
157 | int try_release_extent_mapping(struct extent_map_tree *map, | 164 | int try_release_extent_mapping(struct extent_map_tree *map, |
158 | struct extent_io_tree *tree, struct page *page, | 165 | struct extent_io_tree *tree, struct page *page, |
159 | gfp_t mask); | 166 | gfp_t mask); |
@@ -176,7 +183,7 @@ void extent_io_exit(void); | |||
176 | 183 | ||
177 | u64 count_range_bits(struct extent_io_tree *tree, | 184 | u64 count_range_bits(struct extent_io_tree *tree, |
178 | u64 *start, u64 search_end, | 185 | u64 *start, u64 search_end, |
179 | u64 max_bytes, unsigned long bits); | 186 | u64 max_bytes, unsigned long bits, int contig); |
180 | 187 | ||
181 | void free_extent_state(struct extent_state *state); | 188 | void free_extent_state(struct extent_state *state); |
182 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 189 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -192,21 +199,15 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
192 | int bits, int exclusive_bits, u64 *failed_start, | 199 | int bits, int exclusive_bits, u64 *failed_start, |
193 | struct extent_state **cached_state, gfp_t mask); | 200 | struct extent_state **cached_state, gfp_t mask); |
194 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 201 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
195 | gfp_t mask); | 202 | struct extent_state **cached_state, gfp_t mask); |
196 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 203 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
197 | gfp_t mask); | 204 | gfp_t mask); |
198 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 205 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
199 | gfp_t mask); | 206 | gfp_t mask); |
200 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 207 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
201 | gfp_t mask); | 208 | gfp_t mask); |
202 | int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
203 | gfp_t mask); | ||
204 | int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, | ||
205 | u64 end, gfp_t mask); | ||
206 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 209 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
207 | struct extent_state **cached_state, gfp_t mask); | 210 | struct extent_state **cached_state, gfp_t mask); |
208 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
209 | gfp_t mask); | ||
210 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 211 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
211 | u64 *start_ret, u64 *end_ret, int bits); | 212 | u64 *start_ret, u64 *end_ret, int bits); |
212 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, | 213 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, |
@@ -227,28 +228,17 @@ int extent_readpages(struct extent_io_tree *tree, | |||
227 | struct address_space *mapping, | 228 | struct address_space *mapping, |
228 | struct list_head *pages, unsigned nr_pages, | 229 | struct list_head *pages, unsigned nr_pages, |
229 | get_extent_t get_extent); | 230 | get_extent_t get_extent); |
230 | int extent_prepare_write(struct extent_io_tree *tree, | ||
231 | struct inode *inode, struct page *page, | ||
232 | unsigned from, unsigned to, get_extent_t *get_extent); | ||
233 | int extent_commit_write(struct extent_io_tree *tree, | ||
234 | struct inode *inode, struct page *page, | ||
235 | unsigned from, unsigned to); | ||
236 | sector_t extent_bmap(struct address_space *mapping, sector_t iblock, | ||
237 | get_extent_t *get_extent); | ||
238 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 231 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
239 | __u64 start, __u64 len, get_extent_t *get_extent); | 232 | __u64 start, __u64 len, get_extent_t *get_extent); |
240 | int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); | ||
241 | int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); | 233 | int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); |
242 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); | 234 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); |
243 | void set_page_extent_mapped(struct page *page); | 235 | void set_page_extent_mapped(struct page *page); |
244 | 236 | ||
245 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | 237 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, |
246 | u64 start, unsigned long len, | 238 | u64 start, unsigned long len, |
247 | struct page *page0, | 239 | struct page *page0); |
248 | gfp_t mask); | ||
249 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 240 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
250 | u64 start, unsigned long len, | 241 | u64 start, unsigned long len); |
251 | gfp_t mask); | ||
252 | void free_extent_buffer(struct extent_buffer *eb); | 242 | void free_extent_buffer(struct extent_buffer *eb); |
253 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 243 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
254 | struct extent_buffer *eb, u64 start, int wait, | 244 | struct extent_buffer *eb, u64 start, int wait, |
@@ -276,16 +266,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
276 | unsigned long src_offset, unsigned long len); | 266 | unsigned long src_offset, unsigned long len); |
277 | void memset_extent_buffer(struct extent_buffer *eb, char c, | 267 | void memset_extent_buffer(struct extent_buffer *eb, char c, |
278 | unsigned long start, unsigned long len); | 268 | unsigned long start, unsigned long len); |
279 | int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, | ||
280 | struct extent_buffer *eb); | ||
281 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end); | ||
282 | int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); | 269 | int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); |
283 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, | 270 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, |
284 | struct extent_buffer *eb); | 271 | struct extent_buffer *eb); |
285 | int set_extent_buffer_dirty(struct extent_io_tree *tree, | 272 | int set_extent_buffer_dirty(struct extent_io_tree *tree, |
286 | struct extent_buffer *eb); | 273 | struct extent_buffer *eb); |
287 | int test_extent_buffer_dirty(struct extent_io_tree *tree, | ||
288 | struct extent_buffer *eb); | ||
289 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, | 274 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, |
290 | struct extent_buffer *eb); | 275 | struct extent_buffer *eb); |
291 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 276 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
@@ -303,11 +288,13 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, | |||
303 | unsigned long *map_start, | 288 | unsigned long *map_start, |
304 | unsigned long *map_len, int km); | 289 | unsigned long *map_len, int km); |
305 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); | 290 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); |
306 | int release_extent_buffer_tail_pages(struct extent_buffer *eb); | ||
307 | int extent_range_uptodate(struct extent_io_tree *tree, | 291 | int extent_range_uptodate(struct extent_io_tree *tree, |
308 | u64 start, u64 end); | 292 | u64 start, u64 end); |
309 | int extent_clear_unlock_delalloc(struct inode *inode, | 293 | int extent_clear_unlock_delalloc(struct inode *inode, |
310 | struct extent_io_tree *tree, | 294 | struct extent_io_tree *tree, |
311 | u64 start, u64 end, struct page *locked_page, | 295 | u64 start, u64 end, struct page *locked_page, |
312 | unsigned long op); | 296 | unsigned long op); |
297 | struct bio * | ||
298 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | ||
299 | gfp_t gfp_flags); | ||
313 | #endif | 300 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 454ca52d6451..2d0410344ea3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
5 | #include <linux/hardirq.h> | 5 | #include <linux/hardirq.h> |
6 | #include "ctree.h" | ||
6 | #include "extent_map.h" | 7 | #include "extent_map.h" |
7 | 8 | ||
8 | 9 | ||
@@ -27,12 +28,11 @@ void extent_map_exit(void) | |||
27 | /** | 28 | /** |
28 | * extent_map_tree_init - initialize extent map tree | 29 | * extent_map_tree_init - initialize extent map tree |
29 | * @tree: tree to initialize | 30 | * @tree: tree to initialize |
30 | * @mask: flags for memory allocations during tree operations | ||
31 | * | 31 | * |
32 | * Initialize the extent tree @tree. Should be called for each new inode | 32 | * Initialize the extent tree @tree. Should be called for each new inode |
33 | * or other user of the extent_map interface. | 33 | * or other user of the extent_map interface. |
34 | */ | 34 | */ |
35 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | 35 | void extent_map_tree_init(struct extent_map_tree *tree) |
36 | { | 36 | { |
37 | tree->map = RB_ROOT; | 37 | tree->map = RB_ROOT; |
38 | rwlock_init(&tree->lock); | 38 | rwlock_init(&tree->lock); |
@@ -40,20 +40,20 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | |||
40 | 40 | ||
41 | /** | 41 | /** |
42 | * alloc_extent_map - allocate new extent map structure | 42 | * alloc_extent_map - allocate new extent map structure |
43 | * @mask: memory allocation flags | ||
44 | * | 43 | * |
45 | * Allocate a new extent_map structure. The new structure is | 44 | * Allocate a new extent_map structure. The new structure is |
46 | * returned with a reference count of one and needs to be | 45 | * returned with a reference count of one and needs to be |
47 | * freed using free_extent_map() | 46 | * freed using free_extent_map() |
48 | */ | 47 | */ |
49 | struct extent_map *alloc_extent_map(gfp_t mask) | 48 | struct extent_map *alloc_extent_map(void) |
50 | { | 49 | { |
51 | struct extent_map *em; | 50 | struct extent_map *em; |
52 | em = kmem_cache_alloc(extent_map_cache, mask); | 51 | em = kmem_cache_alloc(extent_map_cache, GFP_NOFS); |
53 | if (!em || IS_ERR(em)) | 52 | if (!em) |
54 | return em; | 53 | return NULL; |
55 | em->in_tree = 0; | 54 | em->in_tree = 0; |
56 | em->flags = 0; | 55 | em->flags = 0; |
56 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
57 | atomic_set(&em->refs, 1); | 57 | atomic_set(&em->refs, 1); |
58 | return em; | 58 | return em; |
59 | } | 59 | } |
@@ -241,7 +241,7 @@ out: | |||
241 | * Insert @em into @tree or perform a simple forward/backward merge with | 241 | * Insert @em into @tree or perform a simple forward/backward merge with |
242 | * existing mappings. The extent_map struct passed in will be inserted | 242 | * existing mappings. The extent_map struct passed in will be inserted |
243 | * into the tree directly, with an additional reference taken, or a | 243 | * into the tree directly, with an additional reference taken, or a |
244 | * reference dropped if the merge attempt was successfull. | 244 | * reference dropped if the merge attempt was successful. |
245 | */ | 245 | */ |
246 | int add_extent_mapping(struct extent_map_tree *tree, | 246 | int add_extent_mapping(struct extent_map_tree *tree, |
247 | struct extent_map *em) | 247 | struct extent_map *em) |
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
335 | goto out; | 335 | goto out; |
336 | } | 336 | } |
337 | if (IS_ERR(rb_node)) { | 337 | if (IS_ERR(rb_node)) { |
338 | em = ERR_PTR(PTR_ERR(rb_node)); | 338 | em = ERR_CAST(rb_node); |
339 | goto out; | 339 | goto out; |
340 | } | 340 | } |
341 | em = rb_entry(rb_node, struct extent_map, rb_node); | 341 | em = rb_entry(rb_node, struct extent_map, rb_node); |
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | |||
384 | goto out; | 384 | goto out; |
385 | } | 385 | } |
386 | if (IS_ERR(rb_node)) { | 386 | if (IS_ERR(rb_node)) { |
387 | em = ERR_PTR(PTR_ERR(rb_node)); | 387 | em = ERR_CAST(rb_node); |
388 | goto out; | 388 | goto out; |
389 | } | 389 | } |
390 | em = rb_entry(rb_node, struct extent_map, rb_node); | 390 | em = rb_entry(rb_node, struct extent_map, rb_node); |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..33a7890b1f40 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -26,7 +26,8 @@ struct extent_map { | |||
26 | unsigned long flags; | 26 | unsigned long flags; |
27 | struct block_device *bdev; | 27 | struct block_device *bdev; |
28 | atomic_t refs; | 28 | atomic_t refs; |
29 | int in_tree; | 29 | unsigned int in_tree:1; |
30 | unsigned int compress_type:4; | ||
30 | }; | 31 | }; |
31 | 32 | ||
32 | struct extent_map_tree { | 33 | struct extent_map_tree { |
@@ -48,14 +49,14 @@ static inline u64 extent_map_block_end(struct extent_map *em) | |||
48 | return em->block_start + em->block_len; | 49 | return em->block_start + em->block_len; |
49 | } | 50 | } |
50 | 51 | ||
51 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); | 52 | void extent_map_tree_init(struct extent_map_tree *tree); |
52 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | 53 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, |
53 | u64 start, u64 len); | 54 | u64 start, u64 len); |
54 | int add_extent_mapping(struct extent_map_tree *tree, | 55 | int add_extent_mapping(struct extent_map_tree *tree, |
55 | struct extent_map *em); | 56 | struct extent_map *em); |
56 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); | 57 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); |
57 | 58 | ||
58 | struct extent_map *alloc_extent_map(gfp_t mask); | 59 | struct extent_map *alloc_extent_map(void); |
59 | void free_extent_map(struct extent_map *em); | 60 | void free_extent_map(struct extent_map *em); |
60 | int __init extent_map_init(void); | 61 | int __init extent_map_init(void); |
61 | void extent_map_exit(void); | 62 | void extent_map_exit(void); |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a562a250ae77..90d4ee52cd45 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
48 | struct extent_buffer *leaf; | 48 | struct extent_buffer *leaf; |
49 | 49 | ||
50 | path = btrfs_alloc_path(); | 50 | path = btrfs_alloc_path(); |
51 | BUG_ON(!path); | 51 | if (!path) |
52 | return -ENOMEM; | ||
52 | file_key.objectid = objectid; | 53 | file_key.objectid = objectid; |
53 | file_key.offset = pos; | 54 | file_key.offset = pos; |
54 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 55 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); |
@@ -169,6 +170,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
169 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 170 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
170 | 171 | ||
171 | path = btrfs_alloc_path(); | 172 | path = btrfs_alloc_path(); |
173 | if (!path) | ||
174 | return -ENOMEM; | ||
172 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) | 175 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) |
173 | path->reada = 2; | 176 | path->reada = 2; |
174 | 177 | ||
@@ -190,7 +193,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
190 | u32 item_size; | 193 | u32 item_size; |
191 | 194 | ||
192 | if (item) | 195 | if (item) |
193 | btrfs_release_path(root, path); | 196 | btrfs_release_path(path); |
194 | item = btrfs_lookup_csum(NULL, root->fs_info->csum_root, | 197 | item = btrfs_lookup_csum(NULL, root->fs_info->csum_root, |
195 | path, disk_bytenr, 0); | 198 | path, disk_bytenr, 0); |
196 | if (IS_ERR(item)) { | 199 | if (IS_ERR(item)) { |
@@ -205,12 +208,13 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
205 | EXTENT_NODATASUM, GFP_NOFS); | 208 | EXTENT_NODATASUM, GFP_NOFS); |
206 | } else { | 209 | } else { |
207 | printk(KERN_INFO "btrfs no csum found " | 210 | printk(KERN_INFO "btrfs no csum found " |
208 | "for inode %lu start %llu\n", | 211 | "for inode %llu start %llu\n", |
209 | inode->i_ino, | 212 | (unsigned long long) |
213 | btrfs_ino(inode), | ||
210 | (unsigned long long)offset); | 214 | (unsigned long long)offset); |
211 | } | 215 | } |
212 | item = NULL; | 216 | item = NULL; |
213 | btrfs_release_path(root, path); | 217 | btrfs_release_path(path); |
214 | goto found; | 218 | goto found; |
215 | } | 219 | } |
216 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 220 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, |
@@ -263,7 +267,7 @@ int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | |||
263 | } | 267 | } |
264 | 268 | ||
265 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 269 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
266 | struct list_head *list) | 270 | struct list_head *list, int search_commit) |
267 | { | 271 | { |
268 | struct btrfs_key key; | 272 | struct btrfs_key key; |
269 | struct btrfs_path *path; | 273 | struct btrfs_path *path; |
@@ -280,6 +284,12 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
280 | path = btrfs_alloc_path(); | 284 | path = btrfs_alloc_path(); |
281 | BUG_ON(!path); | 285 | BUG_ON(!path); |
282 | 286 | ||
287 | if (search_commit) { | ||
288 | path->skip_locking = 1; | ||
289 | path->reada = 2; | ||
290 | path->search_commit_root = 1; | ||
291 | } | ||
292 | |||
283 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 293 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
284 | key.offset = start; | 294 | key.offset = start; |
285 | key.type = BTRFS_EXTENT_CSUM_KEY; | 295 | key.type = BTRFS_EXTENT_CSUM_KEY; |
@@ -492,7 +502,6 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, | |||
492 | u32 new_size = (bytenr - key->offset) >> blocksize_bits; | 502 | u32 new_size = (bytenr - key->offset) >> blocksize_bits; |
493 | new_size *= csum_size; | 503 | new_size *= csum_size; |
494 | ret = btrfs_truncate_item(trans, root, path, new_size, 1); | 504 | ret = btrfs_truncate_item(trans, root, path, new_size, 1); |
495 | BUG_ON(ret); | ||
496 | } else if (key->offset >= bytenr && csum_end > end_byte && | 505 | } else if (key->offset >= bytenr && csum_end > end_byte && |
497 | end_byte > key->offset) { | 506 | end_byte > key->offset) { |
498 | /* | 507 | /* |
@@ -505,7 +514,6 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, | |||
505 | new_size *= csum_size; | 514 | new_size *= csum_size; |
506 | 515 | ||
507 | ret = btrfs_truncate_item(trans, root, path, new_size, 0); | 516 | ret = btrfs_truncate_item(trans, root, path, new_size, 0); |
508 | BUG_ON(ret); | ||
509 | 517 | ||
510 | key->offset = end_byte; | 518 | key->offset = end_byte; |
511 | ret = btrfs_set_item_key_safe(trans, root, path, key); | 519 | ret = btrfs_set_item_key_safe(trans, root, path, key); |
@@ -536,6 +544,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
536 | root = root->fs_info->csum_root; | 544 | root = root->fs_info->csum_root; |
537 | 545 | ||
538 | path = btrfs_alloc_path(); | 546 | path = btrfs_alloc_path(); |
547 | if (!path) | ||
548 | return -ENOMEM; | ||
539 | 549 | ||
540 | while (1) { | 550 | while (1) { |
541 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 551 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
@@ -546,9 +556,12 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
546 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 556 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
547 | if (ret > 0) { | 557 | if (ret > 0) { |
548 | if (path->slots[0] == 0) | 558 | if (path->slots[0] == 0) |
549 | goto out; | 559 | break; |
550 | path->slots[0]--; | 560 | path->slots[0]--; |
561 | } else if (ret < 0) { | ||
562 | break; | ||
551 | } | 563 | } |
564 | |||
552 | leaf = path->nodes[0]; | 565 | leaf = path->nodes[0]; |
553 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 566 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
554 | 567 | ||
@@ -571,7 +584,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
571 | /* delete the entire item, it is inside our range */ | 584 | /* delete the entire item, it is inside our range */ |
572 | if (key.offset >= bytenr && csum_end <= end_byte) { | 585 | if (key.offset >= bytenr && csum_end <= end_byte) { |
573 | ret = btrfs_del_item(trans, root, path); | 586 | ret = btrfs_del_item(trans, root, path); |
574 | BUG_ON(ret); | 587 | if (ret) |
588 | goto out; | ||
575 | if (key.offset == bytenr) | 589 | if (key.offset == bytenr) |
576 | break; | 590 | break; |
577 | } else if (key.offset < bytenr && csum_end > end_byte) { | 591 | } else if (key.offset < bytenr && csum_end > end_byte) { |
@@ -623,11 +637,12 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
623 | if (key.offset < bytenr) | 637 | if (key.offset < bytenr) |
624 | break; | 638 | break; |
625 | } | 639 | } |
626 | btrfs_release_path(root, path); | 640 | btrfs_release_path(path); |
627 | } | 641 | } |
642 | ret = 0; | ||
628 | out: | 643 | out: |
629 | btrfs_free_path(path); | 644 | btrfs_free_path(path); |
630 | return 0; | 645 | return ret; |
631 | } | 646 | } |
632 | 647 | ||
633 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | 648 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
@@ -714,7 +729,7 @@ again: | |||
714 | * at this point, we know the tree has an item, but it isn't big | 729 | * at this point, we know the tree has an item, but it isn't big |
715 | * enough yet to put our csum in. Grow it | 730 | * enough yet to put our csum in. Grow it |
716 | */ | 731 | */ |
717 | btrfs_release_path(root, path); | 732 | btrfs_release_path(path); |
718 | ret = btrfs_search_slot(trans, root, &file_key, path, | 733 | ret = btrfs_search_slot(trans, root, &file_key, path, |
719 | csum_size, 1); | 734 | csum_size, 1); |
720 | if (ret < 0) | 735 | if (ret < 0) |
@@ -753,12 +768,11 @@ again: | |||
753 | goto insert; | 768 | goto insert; |
754 | 769 | ||
755 | ret = btrfs_extend_item(trans, root, path, diff); | 770 | ret = btrfs_extend_item(trans, root, path, diff); |
756 | BUG_ON(ret); | ||
757 | goto csum; | 771 | goto csum; |
758 | } | 772 | } |
759 | 773 | ||
760 | insert: | 774 | insert: |
761 | btrfs_release_path(root, path); | 775 | btrfs_release_path(path); |
762 | csum_offset = 0; | 776 | csum_offset = 0; |
763 | if (found_next) { | 777 | if (found_next) { |
764 | u64 tmp = total_bytes + root->sectorsize; | 778 | u64 tmp = total_bytes + root->sectorsize; |
@@ -842,7 +856,7 @@ next_sector: | |||
842 | } | 856 | } |
843 | btrfs_mark_buffer_dirty(path->nodes[0]); | 857 | btrfs_mark_buffer_dirty(path->nodes[0]); |
844 | if (total_bytes < sums->len) { | 858 | if (total_bytes < sums->len) { |
845 | btrfs_release_path(root, path); | 859 | btrfs_release_path(path); |
846 | cond_resched(); | 860 | cond_resched(); |
847 | goto again; | 861 | goto again; |
848 | } | 862 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e354c33df082..fa4ef18b66b1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/falloc.h> | ||
27 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
29 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
@@ -39,16 +40,274 @@ | |||
39 | #include "locking.h" | 40 | #include "locking.h" |
40 | #include "compat.h" | 41 | #include "compat.h" |
41 | 42 | ||
43 | /* | ||
44 | * when auto defrag is enabled we | ||
45 | * queue up these defrag structs to remember which | ||
46 | * inodes need defragging passes | ||
47 | */ | ||
48 | struct inode_defrag { | ||
49 | struct rb_node rb_node; | ||
50 | /* objectid */ | ||
51 | u64 ino; | ||
52 | /* | ||
53 | * transid where the defrag was added, we search for | ||
54 | * extents newer than this | ||
55 | */ | ||
56 | u64 transid; | ||
57 | |||
58 | /* root objectid */ | ||
59 | u64 root; | ||
60 | |||
61 | /* last offset we were able to defrag */ | ||
62 | u64 last_offset; | ||
63 | |||
64 | /* if we've wrapped around back to zero once already */ | ||
65 | int cycled; | ||
66 | }; | ||
67 | |||
68 | /* pop a record for an inode into the defrag tree. The lock | ||
69 | * must be held already | ||
70 | * | ||
71 | * If you're inserting a record for an older transid than an | ||
72 | * existing record, the transid already in the tree is lowered | ||
73 | * | ||
74 | * If an existing record is found the defrag item you | ||
75 | * pass in is freed | ||
76 | */ | ||
77 | static int __btrfs_add_inode_defrag(struct inode *inode, | ||
78 | struct inode_defrag *defrag) | ||
79 | { | ||
80 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
81 | struct inode_defrag *entry; | ||
82 | struct rb_node **p; | ||
83 | struct rb_node *parent = NULL; | ||
84 | |||
85 | p = &root->fs_info->defrag_inodes.rb_node; | ||
86 | while (*p) { | ||
87 | parent = *p; | ||
88 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
89 | |||
90 | if (defrag->ino < entry->ino) | ||
91 | p = &parent->rb_left; | ||
92 | else if (defrag->ino > entry->ino) | ||
93 | p = &parent->rb_right; | ||
94 | else { | ||
95 | /* if we're reinserting an entry for | ||
96 | * an old defrag run, make sure to | ||
97 | * lower the transid of our existing record | ||
98 | */ | ||
99 | if (defrag->transid < entry->transid) | ||
100 | entry->transid = defrag->transid; | ||
101 | if (defrag->last_offset > entry->last_offset) | ||
102 | entry->last_offset = defrag->last_offset; | ||
103 | goto exists; | ||
104 | } | ||
105 | } | ||
106 | BTRFS_I(inode)->in_defrag = 1; | ||
107 | rb_link_node(&defrag->rb_node, parent, p); | ||
108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | ||
109 | return 0; | ||
110 | |||
111 | exists: | ||
112 | kfree(defrag); | ||
113 | return 0; | ||
114 | |||
115 | } | ||
116 | |||
117 | /* | ||
118 | * insert a defrag record for this inode if auto defrag is | ||
119 | * enabled | ||
120 | */ | ||
121 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | ||
122 | struct inode *inode) | ||
123 | { | ||
124 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
125 | struct inode_defrag *defrag; | ||
126 | int ret = 0; | ||
127 | u64 transid; | ||
128 | |||
129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | ||
130 | return 0; | ||
131 | |||
132 | if (btrfs_fs_closing(root->fs_info)) | ||
133 | return 0; | ||
134 | |||
135 | if (BTRFS_I(inode)->in_defrag) | ||
136 | return 0; | ||
137 | |||
138 | if (trans) | ||
139 | transid = trans->transid; | ||
140 | else | ||
141 | transid = BTRFS_I(inode)->root->last_trans; | ||
142 | |||
143 | defrag = kzalloc(sizeof(*defrag), GFP_NOFS); | ||
144 | if (!defrag) | ||
145 | return -ENOMEM; | ||
146 | |||
147 | defrag->ino = btrfs_ino(inode); | ||
148 | defrag->transid = transid; | ||
149 | defrag->root = root->root_key.objectid; | ||
150 | |||
151 | spin_lock(&root->fs_info->defrag_inodes_lock); | ||
152 | if (!BTRFS_I(inode)->in_defrag) | ||
153 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
154 | spin_unlock(&root->fs_info->defrag_inodes_lock); | ||
155 | return ret; | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * must be called with the defrag_inodes lock held | ||
160 | */ | ||
161 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, | ||
162 | struct rb_node **next) | ||
163 | { | ||
164 | struct inode_defrag *entry = NULL; | ||
165 | struct rb_node *p; | ||
166 | struct rb_node *parent = NULL; | ||
167 | |||
168 | p = info->defrag_inodes.rb_node; | ||
169 | while (p) { | ||
170 | parent = p; | ||
171 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
172 | |||
173 | if (ino < entry->ino) | ||
174 | p = parent->rb_left; | ||
175 | else if (ino > entry->ino) | ||
176 | p = parent->rb_right; | ||
177 | else | ||
178 | return entry; | ||
179 | } | ||
180 | |||
181 | if (next) { | ||
182 | while (parent && ino > entry->ino) { | ||
183 | parent = rb_next(parent); | ||
184 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
185 | } | ||
186 | *next = parent; | ||
187 | } | ||
188 | return NULL; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * run through the list of inodes in the FS that need | ||
193 | * defragging | ||
194 | */ | ||
195 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
196 | { | ||
197 | struct inode_defrag *defrag; | ||
198 | struct btrfs_root *inode_root; | ||
199 | struct inode *inode; | ||
200 | struct rb_node *n; | ||
201 | struct btrfs_key key; | ||
202 | struct btrfs_ioctl_defrag_range_args range; | ||
203 | u64 first_ino = 0; | ||
204 | int num_defrag; | ||
205 | int defrag_batch = 1024; | ||
206 | |||
207 | memset(&range, 0, sizeof(range)); | ||
208 | range.len = (u64)-1; | ||
209 | |||
210 | atomic_inc(&fs_info->defrag_running); | ||
211 | spin_lock(&fs_info->defrag_inodes_lock); | ||
212 | while(1) { | ||
213 | n = NULL; | ||
214 | |||
215 | /* find an inode to defrag */ | ||
216 | defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); | ||
217 | if (!defrag) { | ||
218 | if (n) | ||
219 | defrag = rb_entry(n, struct inode_defrag, rb_node); | ||
220 | else if (first_ino) { | ||
221 | first_ino = 0; | ||
222 | continue; | ||
223 | } else { | ||
224 | break; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | /* remove it from the rbtree */ | ||
229 | first_ino = defrag->ino + 1; | ||
230 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | ||
231 | |||
232 | if (btrfs_fs_closing(fs_info)) | ||
233 | goto next_free; | ||
234 | |||
235 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
236 | |||
237 | /* get the inode */ | ||
238 | key.objectid = defrag->root; | ||
239 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
240 | key.offset = (u64)-1; | ||
241 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
242 | if (IS_ERR(inode_root)) | ||
243 | goto next; | ||
244 | |||
245 | key.objectid = defrag->ino; | ||
246 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
247 | key.offset = 0; | ||
248 | |||
249 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
250 | if (IS_ERR(inode)) | ||
251 | goto next; | ||
252 | |||
253 | /* do a chunk of defrag */ | ||
254 | BTRFS_I(inode)->in_defrag = 0; | ||
255 | range.start = defrag->last_offset; | ||
256 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
257 | defrag_batch); | ||
258 | /* | ||
259 | * if we filled the whole defrag batch, there | ||
260 | * must be more work to do. Queue this defrag | ||
261 | * again | ||
262 | */ | ||
263 | if (num_defrag == defrag_batch) { | ||
264 | defrag->last_offset = range.start; | ||
265 | __btrfs_add_inode_defrag(inode, defrag); | ||
266 | /* | ||
267 | * we don't want to kfree defrag, we added it back to | ||
268 | * the rbtree | ||
269 | */ | ||
270 | defrag = NULL; | ||
271 | } else if (defrag->last_offset && !defrag->cycled) { | ||
272 | /* | ||
273 | * we didn't fill our defrag batch, but | ||
274 | * we didn't start at zero. Make sure we loop | ||
275 | * around to the start of the file. | ||
276 | */ | ||
277 | defrag->last_offset = 0; | ||
278 | defrag->cycled = 1; | ||
279 | __btrfs_add_inode_defrag(inode, defrag); | ||
280 | defrag = NULL; | ||
281 | } | ||
282 | |||
283 | iput(inode); | ||
284 | next: | ||
285 | spin_lock(&fs_info->defrag_inodes_lock); | ||
286 | next_free: | ||
287 | kfree(defrag); | ||
288 | } | ||
289 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
290 | |||
291 | atomic_dec(&fs_info->defrag_running); | ||
292 | |||
293 | /* | ||
294 | * during unmount, we use the transaction_wait queue to | ||
295 | * wait for the defragger to stop | ||
296 | */ | ||
297 | wake_up(&fs_info->transaction_wait); | ||
298 | return 0; | ||
299 | } | ||
42 | 300 | ||
43 | /* simple helper to fault in pages and copy. This should go away | 301 | /* simple helper to fault in pages and copy. This should go away |
44 | * and be replaced with calls into generic code. | 302 | * and be replaced with calls into generic code. |
45 | */ | 303 | */ |
46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 304 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
47 | int write_bytes, | 305 | size_t write_bytes, |
48 | struct page **prepared_pages, | 306 | struct page **prepared_pages, |
49 | struct iov_iter *i) | 307 | struct iov_iter *i) |
50 | { | 308 | { |
51 | size_t copied; | 309 | size_t copied = 0; |
310 | size_t total_copied = 0; | ||
52 | int pg = 0; | 311 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 312 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
54 | 313 | ||
@@ -56,23 +315,38 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
56 | size_t count = min_t(size_t, | 315 | size_t count = min_t(size_t, |
57 | PAGE_CACHE_SIZE - offset, write_bytes); | 316 | PAGE_CACHE_SIZE - offset, write_bytes); |
58 | struct page *page = prepared_pages[pg]; | 317 | struct page *page = prepared_pages[pg]; |
59 | again: | 318 | /* |
60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | 319 | * Copy data from userspace to the current page |
61 | return -EFAULT; | 320 | * |
62 | 321 | * Disable pagefault to avoid recursive lock since | |
63 | /* Copy data from userspace to the current page */ | 322 | * the pages are already locked |
64 | copied = iov_iter_copy_from_user(page, i, offset, count); | 323 | */ |
324 | pagefault_disable(); | ||
325 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); | ||
326 | pagefault_enable(); | ||
65 | 327 | ||
66 | /* Flush processor's dcache for this page */ | 328 | /* Flush processor's dcache for this page */ |
67 | flush_dcache_page(page); | 329 | flush_dcache_page(page); |
330 | |||
331 | /* | ||
332 | * if we get a partial write, we can end up with | ||
333 | * partially up to date pages. These add | ||
334 | * a lot of complexity, so make sure they don't | ||
335 | * happen by forcing this copy to be retried. | ||
336 | * | ||
337 | * The rest of the btrfs_file_write code will fall | ||
338 | * back to page at a time copies after we return 0. | ||
339 | */ | ||
340 | if (!PageUptodate(page) && copied < count) | ||
341 | copied = 0; | ||
342 | |||
68 | iov_iter_advance(i, copied); | 343 | iov_iter_advance(i, copied); |
69 | write_bytes -= copied; | 344 | write_bytes -= copied; |
345 | total_copied += copied; | ||
70 | 346 | ||
71 | if (unlikely(copied == 0)) { | 347 | /* Return to btrfs_file_aio_write to fault page */ |
72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, | 348 | if (unlikely(copied == 0)) |
73 | iov_iter_single_seg_count(i)); | 349 | break; |
74 | goto again; | ||
75 | } | ||
76 | 350 | ||
77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 351 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
78 | offset += copied; | 352 | offset += copied; |
@@ -81,18 +355,16 @@ again: | |||
81 | offset = 0; | 355 | offset = 0; |
82 | } | 356 | } |
83 | } | 357 | } |
84 | return 0; | 358 | return total_copied; |
85 | } | 359 | } |
86 | 360 | ||
87 | /* | 361 | /* |
88 | * unlocks pages after btrfs_file_write is done with them | 362 | * unlocks pages after btrfs_file_write is done with them |
89 | */ | 363 | */ |
90 | static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | 364 | void btrfs_drop_pages(struct page **pages, size_t num_pages) |
91 | { | 365 | { |
92 | size_t i; | 366 | size_t i; |
93 | for (i = 0; i < num_pages; i++) { | 367 | for (i = 0; i < num_pages; i++) { |
94 | if (!pages[i]) | ||
95 | break; | ||
96 | /* page checked is some magic around finding pages that | 368 | /* page checked is some magic around finding pages that |
97 | * have been modified without going through btrfs_set_page_dirty | 369 | * have been modified without going through btrfs_set_page_dirty |
98 | * clear it here | 370 | * clear it here |
@@ -112,17 +384,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
112 | * this also makes the decision about creating an inline extent vs | 384 | * this also makes the decision about creating an inline extent vs |
113 | * doing real data extents, marking pages dirty and delalloc as required. | 385 | * doing real data extents, marking pages dirty and delalloc as required. |
114 | */ | 386 | */ |
115 | static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | 387 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, |
116 | struct btrfs_root *root, | 388 | struct page **pages, size_t num_pages, |
117 | struct file *file, | 389 | loff_t pos, size_t write_bytes, |
118 | struct page **pages, | 390 | struct extent_state **cached) |
119 | size_t num_pages, | ||
120 | loff_t pos, | ||
121 | size_t write_bytes) | ||
122 | { | 391 | { |
123 | int err = 0; | 392 | int err = 0; |
124 | int i; | 393 | int i; |
125 | struct inode *inode = fdentry(file)->d_inode; | ||
126 | u64 num_bytes; | 394 | u64 num_bytes; |
127 | u64 start_pos; | 395 | u64 start_pos; |
128 | u64 end_of_last_block; | 396 | u64 end_of_last_block; |
@@ -135,8 +403,9 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
135 | 403 | ||
136 | end_of_last_block = start_pos + num_bytes - 1; | 404 | end_of_last_block = start_pos + num_bytes - 1; |
137 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 405 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
138 | NULL); | 406 | cached); |
139 | BUG_ON(err); | 407 | if (err) |
408 | return err; | ||
140 | 409 | ||
141 | for (i = 0; i < num_pages; i++) { | 410 | for (i = 0; i < num_pages; i++) { |
142 | struct page *p = pages[i]; | 411 | struct page *p = pages[i]; |
@@ -144,13 +413,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
144 | ClearPageChecked(p); | 413 | ClearPageChecked(p); |
145 | set_page_dirty(p); | 414 | set_page_dirty(p); |
146 | } | 415 | } |
147 | if (end_pos > isize) { | 416 | |
417 | /* | ||
418 | * we've only changed i_size in ram, and we haven't updated | ||
419 | * the disk i_size. There is no need to log the inode | ||
420 | * at this time. | ||
421 | */ | ||
422 | if (end_pos > isize) | ||
148 | i_size_write(inode, end_pos); | 423 | i_size_write(inode, end_pos); |
149 | /* we've only changed i_size in ram, and we haven't updated | ||
150 | * the disk i_size. There is no need to log the inode | ||
151 | * at this time. | ||
152 | */ | ||
153 | } | ||
154 | return 0; | 424 | return 0; |
155 | } | 425 | } |
156 | 426 | ||
@@ -178,9 +448,10 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
178 | } | 448 | } |
179 | while (1) { | 449 | while (1) { |
180 | if (!split) | 450 | if (!split) |
181 | split = alloc_extent_map(GFP_NOFS); | 451 | split = alloc_extent_map(); |
182 | if (!split2) | 452 | if (!split2) |
183 | split2 = alloc_extent_map(GFP_NOFS); | 453 | split2 = alloc_extent_map(); |
454 | BUG_ON(!split || !split2); | ||
184 | 455 | ||
185 | write_lock(&em_tree->lock); | 456 | write_lock(&em_tree->lock); |
186 | em = lookup_extent_mapping(em_tree, start, len); | 457 | em = lookup_extent_mapping(em_tree, start, len); |
@@ -220,6 +491,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
220 | 491 | ||
221 | split->bdev = em->bdev; | 492 | split->bdev = em->bdev; |
222 | split->flags = flags; | 493 | split->flags = flags; |
494 | split->compress_type = em->compress_type; | ||
223 | ret = add_extent_mapping(em_tree, split); | 495 | ret = add_extent_mapping(em_tree, split); |
224 | BUG_ON(ret); | 496 | BUG_ON(ret); |
225 | free_extent_map(split); | 497 | free_extent_map(split); |
@@ -234,6 +506,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
234 | split->len = em->start + em->len - (start + len); | 506 | split->len = em->start + em->len - (start + len); |
235 | split->bdev = em->bdev; | 507 | split->bdev = em->bdev; |
236 | split->flags = flags; | 508 | split->flags = flags; |
509 | split->compress_type = em->compress_type; | ||
237 | 510 | ||
238 | if (compressed) { | 511 | if (compressed) { |
239 | split->block_len = em->block_len; | 512 | split->block_len = em->block_len; |
@@ -282,6 +555,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | |||
282 | struct btrfs_path *path; | 555 | struct btrfs_path *path; |
283 | struct btrfs_key key; | 556 | struct btrfs_key key; |
284 | struct btrfs_key new_key; | 557 | struct btrfs_key new_key; |
558 | u64 ino = btrfs_ino(inode); | ||
285 | u64 search_start = start; | 559 | u64 search_start = start; |
286 | u64 disk_bytenr = 0; | 560 | u64 disk_bytenr = 0; |
287 | u64 num_bytes = 0; | 561 | u64 num_bytes = 0; |
@@ -302,14 +576,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | |||
302 | 576 | ||
303 | while (1) { | 577 | while (1) { |
304 | recow = 0; | 578 | recow = 0; |
305 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | 579 | ret = btrfs_lookup_file_extent(trans, root, path, ino, |
306 | search_start, -1); | 580 | search_start, -1); |
307 | if (ret < 0) | 581 | if (ret < 0) |
308 | break; | 582 | break; |
309 | if (ret > 0 && path->slots[0] > 0 && search_start == start) { | 583 | if (ret > 0 && path->slots[0] > 0 && search_start == start) { |
310 | leaf = path->nodes[0]; | 584 | leaf = path->nodes[0]; |
311 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); | 585 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); |
312 | if (key.objectid == inode->i_ino && | 586 | if (key.objectid == ino && |
313 | key.type == BTRFS_EXTENT_DATA_KEY) | 587 | key.type == BTRFS_EXTENT_DATA_KEY) |
314 | path->slots[0]--; | 588 | path->slots[0]--; |
315 | } | 589 | } |
@@ -330,7 +604,7 @@ next_slot: | |||
330 | } | 604 | } |
331 | 605 | ||
332 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 606 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
333 | if (key.objectid > inode->i_ino || | 607 | if (key.objectid > ino || |
334 | key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) | 608 | key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) |
335 | break; | 609 | break; |
336 | 610 | ||
@@ -360,7 +634,7 @@ next_slot: | |||
360 | 634 | ||
361 | search_start = max(key.offset, start); | 635 | search_start = max(key.offset, start); |
362 | if (recow) { | 636 | if (recow) { |
363 | btrfs_release_path(root, path); | 637 | btrfs_release_path(path); |
364 | continue; | 638 | continue; |
365 | } | 639 | } |
366 | 640 | ||
@@ -377,7 +651,7 @@ next_slot: | |||
377 | ret = btrfs_duplicate_item(trans, root, path, | 651 | ret = btrfs_duplicate_item(trans, root, path, |
378 | &new_key); | 652 | &new_key); |
379 | if (ret == -EAGAIN) { | 653 | if (ret == -EAGAIN) { |
380 | btrfs_release_path(root, path); | 654 | btrfs_release_path(path); |
381 | continue; | 655 | continue; |
382 | } | 656 | } |
383 | if (ret < 0) | 657 | if (ret < 0) |
@@ -500,7 +774,7 @@ next_slot: | |||
500 | del_nr = 0; | 774 | del_nr = 0; |
501 | del_slot = 0; | 775 | del_slot = 0; |
502 | 776 | ||
503 | btrfs_release_path(root, path); | 777 | btrfs_release_path(path); |
504 | continue; | 778 | continue; |
505 | } | 779 | } |
506 | 780 | ||
@@ -576,6 +850,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
576 | int del_slot = 0; | 850 | int del_slot = 0; |
577 | int recow; | 851 | int recow; |
578 | int ret; | 852 | int ret; |
853 | u64 ino = btrfs_ino(inode); | ||
579 | 854 | ||
580 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 855 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
581 | 856 | ||
@@ -584,18 +859,19 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
584 | again: | 859 | again: |
585 | recow = 0; | 860 | recow = 0; |
586 | split = start; | 861 | split = start; |
587 | key.objectid = inode->i_ino; | 862 | key.objectid = ino; |
588 | key.type = BTRFS_EXTENT_DATA_KEY; | 863 | key.type = BTRFS_EXTENT_DATA_KEY; |
589 | key.offset = split; | 864 | key.offset = split; |
590 | 865 | ||
591 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 866 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
867 | if (ret < 0) | ||
868 | goto out; | ||
592 | if (ret > 0 && path->slots[0] > 0) | 869 | if (ret > 0 && path->slots[0] > 0) |
593 | path->slots[0]--; | 870 | path->slots[0]--; |
594 | 871 | ||
595 | leaf = path->nodes[0]; | 872 | leaf = path->nodes[0]; |
596 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 873 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
597 | BUG_ON(key.objectid != inode->i_ino || | 874 | BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY); |
598 | key.type != BTRFS_EXTENT_DATA_KEY); | ||
599 | fi = btrfs_item_ptr(leaf, path->slots[0], | 875 | fi = btrfs_item_ptr(leaf, path->slots[0], |
600 | struct btrfs_file_extent_item); | 876 | struct btrfs_file_extent_item); |
601 | BUG_ON(btrfs_file_extent_type(leaf, fi) != | 877 | BUG_ON(btrfs_file_extent_type(leaf, fi) != |
@@ -612,7 +888,7 @@ again: | |||
612 | other_start = 0; | 888 | other_start = 0; |
613 | other_end = start; | 889 | other_end = start; |
614 | if (extent_mergeable(leaf, path->slots[0] - 1, | 890 | if (extent_mergeable(leaf, path->slots[0] - 1, |
615 | inode->i_ino, bytenr, orig_offset, | 891 | ino, bytenr, orig_offset, |
616 | &other_start, &other_end)) { | 892 | &other_start, &other_end)) { |
617 | new_key.offset = end; | 893 | new_key.offset = end; |
618 | btrfs_set_item_key_safe(trans, root, path, &new_key); | 894 | btrfs_set_item_key_safe(trans, root, path, &new_key); |
@@ -635,7 +911,7 @@ again: | |||
635 | other_start = end; | 911 | other_start = end; |
636 | other_end = 0; | 912 | other_end = 0; |
637 | if (extent_mergeable(leaf, path->slots[0] + 1, | 913 | if (extent_mergeable(leaf, path->slots[0] + 1, |
638 | inode->i_ino, bytenr, orig_offset, | 914 | ino, bytenr, orig_offset, |
639 | &other_start, &other_end)) { | 915 | &other_start, &other_end)) { |
640 | fi = btrfs_item_ptr(leaf, path->slots[0], | 916 | fi = btrfs_item_ptr(leaf, path->slots[0], |
641 | struct btrfs_file_extent_item); | 917 | struct btrfs_file_extent_item); |
@@ -663,7 +939,7 @@ again: | |||
663 | new_key.offset = split; | 939 | new_key.offset = split; |
664 | ret = btrfs_duplicate_item(trans, root, path, &new_key); | 940 | ret = btrfs_duplicate_item(trans, root, path, &new_key); |
665 | if (ret == -EAGAIN) { | 941 | if (ret == -EAGAIN) { |
666 | btrfs_release_path(root, path); | 942 | btrfs_release_path(path); |
667 | goto again; | 943 | goto again; |
668 | } | 944 | } |
669 | BUG_ON(ret < 0); | 945 | BUG_ON(ret < 0); |
@@ -684,7 +960,7 @@ again: | |||
684 | 960 | ||
685 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, | 961 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, |
686 | root->root_key.objectid, | 962 | root->root_key.objectid, |
687 | inode->i_ino, orig_offset); | 963 | ino, orig_offset); |
688 | BUG_ON(ret); | 964 | BUG_ON(ret); |
689 | 965 | ||
690 | if (split == start) { | 966 | if (split == start) { |
@@ -700,10 +976,10 @@ again: | |||
700 | other_start = end; | 976 | other_start = end; |
701 | other_end = 0; | 977 | other_end = 0; |
702 | if (extent_mergeable(leaf, path->slots[0] + 1, | 978 | if (extent_mergeable(leaf, path->slots[0] + 1, |
703 | inode->i_ino, bytenr, orig_offset, | 979 | ino, bytenr, orig_offset, |
704 | &other_start, &other_end)) { | 980 | &other_start, &other_end)) { |
705 | if (recow) { | 981 | if (recow) { |
706 | btrfs_release_path(root, path); | 982 | btrfs_release_path(path); |
707 | goto again; | 983 | goto again; |
708 | } | 984 | } |
709 | extent_end = other_end; | 985 | extent_end = other_end; |
@@ -711,16 +987,16 @@ again: | |||
711 | del_nr++; | 987 | del_nr++; |
712 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 988 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
713 | 0, root->root_key.objectid, | 989 | 0, root->root_key.objectid, |
714 | inode->i_ino, orig_offset); | 990 | ino, orig_offset); |
715 | BUG_ON(ret); | 991 | BUG_ON(ret); |
716 | } | 992 | } |
717 | other_start = 0; | 993 | other_start = 0; |
718 | other_end = start; | 994 | other_end = start; |
719 | if (extent_mergeable(leaf, path->slots[0] - 1, | 995 | if (extent_mergeable(leaf, path->slots[0] - 1, |
720 | inode->i_ino, bytenr, orig_offset, | 996 | ino, bytenr, orig_offset, |
721 | &other_start, &other_end)) { | 997 | &other_start, &other_end)) { |
722 | if (recow) { | 998 | if (recow) { |
723 | btrfs_release_path(root, path); | 999 | btrfs_release_path(path); |
724 | goto again; | 1000 | goto again; |
725 | } | 1001 | } |
726 | key.offset = other_start; | 1002 | key.offset = other_start; |
@@ -728,7 +1004,7 @@ again: | |||
728 | del_nr++; | 1004 | del_nr++; |
729 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 1005 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
730 | 0, root->root_key.objectid, | 1006 | 0, root->root_key.objectid, |
731 | inode->i_ino, orig_offset); | 1007 | ino, orig_offset); |
732 | BUG_ON(ret); | 1008 | BUG_ON(ret); |
733 | } | 1009 | } |
734 | if (del_nr == 0) { | 1010 | if (del_nr == 0) { |
@@ -755,6 +1031,27 @@ out: | |||
755 | } | 1031 | } |
756 | 1032 | ||
757 | /* | 1033 | /* |
1034 | * on error we return an unlocked page and the error value | ||
1035 | * on success we return a locked page and 0 | ||
1036 | */ | ||
1037 | static int prepare_uptodate_page(struct page *page, u64 pos) | ||
1038 | { | ||
1039 | int ret = 0; | ||
1040 | |||
1041 | if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { | ||
1042 | ret = btrfs_readpage(NULL, page); | ||
1043 | if (ret) | ||
1044 | return ret; | ||
1045 | lock_page(page); | ||
1046 | if (!PageUptodate(page)) { | ||
1047 | unlock_page(page); | ||
1048 | return -EIO; | ||
1049 | } | ||
1050 | } | ||
1051 | return 0; | ||
1052 | } | ||
1053 | |||
1054 | /* | ||
758 | * this gets pages into the page cache and locks them down, it also properly | 1055 | * this gets pages into the page cache and locks them down, it also properly |
759 | * waits for data=ordered extents to finish before allowing the pages to be | 1056 | * waits for data=ordered extents to finish before allowing the pages to be |
760 | * modified. | 1057 | * modified. |
@@ -769,6 +1066,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
769 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 1066 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
770 | struct inode *inode = fdentry(file)->d_inode; | 1067 | struct inode *inode = fdentry(file)->d_inode; |
771 | int err = 0; | 1068 | int err = 0; |
1069 | int faili = 0; | ||
772 | u64 start_pos; | 1070 | u64 start_pos; |
773 | u64 last_pos; | 1071 | u64 last_pos; |
774 | 1072 | ||
@@ -776,21 +1074,33 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
776 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | 1074 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; |
777 | 1075 | ||
778 | if (start_pos > inode->i_size) { | 1076 | if (start_pos > inode->i_size) { |
779 | err = btrfs_cont_expand(inode, start_pos); | 1077 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); |
780 | if (err) | 1078 | if (err) |
781 | return err; | 1079 | return err; |
782 | } | 1080 | } |
783 | 1081 | ||
784 | memset(pages, 0, num_pages * sizeof(struct page *)); | ||
785 | again: | 1082 | again: |
786 | for (i = 0; i < num_pages; i++) { | 1083 | for (i = 0; i < num_pages; i++) { |
787 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 1084 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
788 | if (!pages[i]) { | 1085 | if (!pages[i]) { |
1086 | faili = i - 1; | ||
789 | err = -ENOMEM; | 1087 | err = -ENOMEM; |
790 | BUG_ON(1); | 1088 | goto fail; |
1089 | } | ||
1090 | |||
1091 | if (i == 0) | ||
1092 | err = prepare_uptodate_page(pages[i], pos); | ||
1093 | if (i == num_pages - 1) | ||
1094 | err = prepare_uptodate_page(pages[i], | ||
1095 | pos + write_bytes); | ||
1096 | if (err) { | ||
1097 | page_cache_release(pages[i]); | ||
1098 | faili = i - 1; | ||
1099 | goto fail; | ||
791 | } | 1100 | } |
792 | wait_on_page_writeback(pages[i]); | 1101 | wait_on_page_writeback(pages[i]); |
793 | } | 1102 | } |
1103 | err = 0; | ||
794 | if (start_pos < inode->i_size) { | 1104 | if (start_pos < inode->i_size) { |
795 | struct btrfs_ordered_extent *ordered; | 1105 | struct btrfs_ordered_extent *ordered; |
796 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 1106 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
@@ -830,199 +1140,264 @@ again: | |||
830 | WARN_ON(!PageLocked(pages[i])); | 1140 | WARN_ON(!PageLocked(pages[i])); |
831 | } | 1141 | } |
832 | return 0; | 1142 | return 0; |
1143 | fail: | ||
1144 | while (faili >= 0) { | ||
1145 | unlock_page(pages[faili]); | ||
1146 | page_cache_release(pages[faili]); | ||
1147 | faili--; | ||
1148 | } | ||
1149 | return err; | ||
1150 | |||
833 | } | 1151 | } |
834 | 1152 | ||
835 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 1153 | static noinline ssize_t __btrfs_buffered_write(struct file *file, |
836 | const struct iovec *iov, | 1154 | struct iov_iter *i, |
837 | unsigned long nr_segs, loff_t pos) | 1155 | loff_t pos) |
838 | { | 1156 | { |
839 | struct file *file = iocb->ki_filp; | ||
840 | struct inode *inode = fdentry(file)->d_inode; | 1157 | struct inode *inode = fdentry(file)->d_inode; |
841 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1158 | struct btrfs_root *root = BTRFS_I(inode)->root; |
842 | struct page *pinned[2]; | ||
843 | struct page **pages = NULL; | 1159 | struct page **pages = NULL; |
844 | struct iov_iter i; | ||
845 | loff_t *ppos = &iocb->ki_pos; | ||
846 | loff_t start_pos; | ||
847 | ssize_t num_written = 0; | ||
848 | ssize_t err = 0; | ||
849 | size_t count; | ||
850 | size_t ocount; | ||
851 | int ret = 0; | ||
852 | int nrptrs; | ||
853 | unsigned long first_index; | 1160 | unsigned long first_index; |
854 | unsigned long last_index; | 1161 | unsigned long last_index; |
855 | int will_write; | 1162 | size_t num_written = 0; |
856 | int buffered = 0; | 1163 | int nrptrs; |
1164 | int ret = 0; | ||
857 | 1165 | ||
858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 1166 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
859 | (file->f_flags & O_DIRECT)); | 1167 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
1168 | (sizeof(struct page *))); | ||
1169 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | ||
1170 | if (!pages) | ||
1171 | return -ENOMEM; | ||
860 | 1172 | ||
861 | pinned[0] = NULL; | 1173 | first_index = pos >> PAGE_CACHE_SHIFT; |
862 | pinned[1] = NULL; | 1174 | last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; |
863 | 1175 | ||
864 | start_pos = pos; | 1176 | while (iov_iter_count(i) > 0) { |
1177 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | ||
1178 | size_t write_bytes = min(iov_iter_count(i), | ||
1179 | nrptrs * (size_t)PAGE_CACHE_SIZE - | ||
1180 | offset); | ||
1181 | size_t num_pages = (write_bytes + offset + | ||
1182 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1183 | size_t dirty_pages; | ||
1184 | size_t copied; | ||
865 | 1185 | ||
866 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 1186 | WARN_ON(num_pages > nrptrs); |
867 | 1187 | ||
868 | mutex_lock(&inode->i_mutex); | 1188 | /* |
1189 | * Fault pages before locking them in prepare_pages | ||
1190 | * to avoid recursive lock | ||
1191 | */ | ||
1192 | if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { | ||
1193 | ret = -EFAULT; | ||
1194 | break; | ||
1195 | } | ||
869 | 1196 | ||
870 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | 1197 | ret = btrfs_delalloc_reserve_space(inode, |
871 | if (err) | 1198 | num_pages << PAGE_CACHE_SHIFT); |
872 | goto out; | 1199 | if (ret) |
873 | count = ocount; | 1200 | break; |
874 | 1201 | ||
875 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 1202 | /* |
876 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 1203 | * This is going to setup the pages array with the number of |
877 | if (err) | 1204 | * pages we want, so we don't really need to worry about the |
878 | goto out; | 1205 | * contents of pages from loop to loop |
1206 | */ | ||
1207 | ret = prepare_pages(root, file, pages, num_pages, | ||
1208 | pos, first_index, last_index, | ||
1209 | write_bytes); | ||
1210 | if (ret) { | ||
1211 | btrfs_delalloc_release_space(inode, | ||
1212 | num_pages << PAGE_CACHE_SHIFT); | ||
1213 | break; | ||
1214 | } | ||
879 | 1215 | ||
880 | if (count == 0) | 1216 | copied = btrfs_copy_from_user(pos, num_pages, |
881 | goto out; | 1217 | write_bytes, pages, i); |
882 | 1218 | ||
883 | err = file_remove_suid(file); | 1219 | /* |
884 | if (err) | 1220 | * if we have trouble faulting in the pages, fall |
885 | goto out; | 1221 | * back to one page at a time |
1222 | */ | ||
1223 | if (copied < write_bytes) | ||
1224 | nrptrs = 1; | ||
886 | 1225 | ||
887 | file_update_time(file); | 1226 | if (copied == 0) |
888 | BTRFS_I(inode)->sequence++; | 1227 | dirty_pages = 0; |
1228 | else | ||
1229 | dirty_pages = (copied + offset + | ||
1230 | PAGE_CACHE_SIZE - 1) >> | ||
1231 | PAGE_CACHE_SHIFT; | ||
889 | 1232 | ||
890 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
891 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
892 | pos, ppos, count, | ||
893 | ocount); | ||
894 | /* | 1233 | /* |
895 | * the generic O_DIRECT will update in-memory i_size after the | 1234 | * If we had a short copy we need to release the excess delaloc |
896 | * DIOs are done. But our endio handlers that update the on | 1235 | * bytes we reserved. We need to increment outstanding_extents |
897 | * disk i_size never update past the in memory i_size. So we | 1236 | * because btrfs_delalloc_release_space will decrement it, but |
898 | * need one more update here to catch any additions to the | 1237 | * we still have an outstanding extent for the chunk we actually |
899 | * file | 1238 | * managed to copy. |
900 | */ | 1239 | */ |
901 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | 1240 | if (num_pages > dirty_pages) { |
902 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 1241 | if (copied > 0) |
903 | mark_inode_dirty(inode); | 1242 | atomic_inc( |
1243 | &BTRFS_I(inode)->outstanding_extents); | ||
1244 | btrfs_delalloc_release_space(inode, | ||
1245 | (num_pages - dirty_pages) << | ||
1246 | PAGE_CACHE_SHIFT); | ||
904 | } | 1247 | } |
905 | 1248 | ||
906 | if (num_written < 0) { | 1249 | if (copied > 0) { |
907 | ret = num_written; | 1250 | ret = btrfs_dirty_pages(root, inode, pages, |
908 | num_written = 0; | 1251 | dirty_pages, pos, copied, |
909 | goto out; | 1252 | NULL); |
910 | } else if (num_written == count) { | 1253 | if (ret) { |
911 | /* pick up pos changes done by the generic code */ | 1254 | btrfs_delalloc_release_space(inode, |
912 | pos = *ppos; | 1255 | dirty_pages << PAGE_CACHE_SHIFT); |
913 | goto out; | 1256 | btrfs_drop_pages(pages, num_pages); |
1257 | break; | ||
1258 | } | ||
914 | } | 1259 | } |
915 | /* | 1260 | |
916 | * We are going to do buffered for the rest of the range, so we | 1261 | btrfs_drop_pages(pages, num_pages); |
917 | * need to make sure to invalidate the buffered pages when we're | 1262 | |
918 | * done. | 1263 | cond_resched(); |
919 | */ | 1264 | |
920 | buffered = 1; | 1265 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
921 | pos += num_written; | 1266 | dirty_pages); |
1267 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1268 | btrfs_btree_balance_dirty(root, 1); | ||
1269 | btrfs_throttle(root); | ||
1270 | |||
1271 | pos += copied; | ||
1272 | num_written += copied; | ||
922 | } | 1273 | } |
923 | 1274 | ||
924 | iov_iter_init(&i, iov, nr_segs, count, num_written); | 1275 | kfree(pages); |
925 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
926 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
927 | (sizeof(struct page *))); | ||
928 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | ||
929 | 1276 | ||
930 | /* generic_write_checks can change our pos */ | 1277 | return num_written ? num_written : ret; |
931 | start_pos = pos; | 1278 | } |
932 | 1279 | ||
933 | first_index = pos >> PAGE_CACHE_SHIFT; | 1280 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, |
934 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; | 1281 | const struct iovec *iov, |
1282 | unsigned long nr_segs, loff_t pos, | ||
1283 | loff_t *ppos, size_t count, size_t ocount) | ||
1284 | { | ||
1285 | struct file *file = iocb->ki_filp; | ||
1286 | struct inode *inode = fdentry(file)->d_inode; | ||
1287 | struct iov_iter i; | ||
1288 | ssize_t written; | ||
1289 | ssize_t written_buffered; | ||
1290 | loff_t endbyte; | ||
1291 | int err; | ||
1292 | |||
1293 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, | ||
1294 | count, ocount); | ||
935 | 1295 | ||
936 | /* | 1296 | /* |
937 | * there are lots of better ways to do this, but this code | 1297 | * the generic O_DIRECT will update in-memory i_size after the |
938 | * makes sure the first and last page in the file range are | 1298 | * DIOs are done. But our endio handlers that update the on |
939 | * up to date and ready for cow | 1299 | * disk i_size never update past the in memory i_size. So we |
1300 | * need one more update here to catch any additions to the | ||
1301 | * file | ||
940 | */ | 1302 | */ |
941 | if ((pos & (PAGE_CACHE_SIZE - 1))) { | 1303 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { |
942 | pinned[0] = grab_cache_page(inode->i_mapping, first_index); | 1304 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
943 | if (!PageUptodate(pinned[0])) { | 1305 | mark_inode_dirty(inode); |
944 | ret = btrfs_readpage(NULL, pinned[0]); | ||
945 | BUG_ON(ret); | ||
946 | wait_on_page_locked(pinned[0]); | ||
947 | } else { | ||
948 | unlock_page(pinned[0]); | ||
949 | } | ||
950 | } | 1306 | } |
951 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { | 1307 | |
952 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 1308 | if (written < 0 || written == count) |
953 | if (!PageUptodate(pinned[1])) { | 1309 | return written; |
954 | ret = btrfs_readpage(NULL, pinned[1]); | 1310 | |
955 | BUG_ON(ret); | 1311 | pos += written; |
956 | wait_on_page_locked(pinned[1]); | 1312 | count -= written; |
957 | } else { | 1313 | iov_iter_init(&i, iov, nr_segs, count, written); |
958 | unlock_page(pinned[1]); | 1314 | written_buffered = __btrfs_buffered_write(file, &i, pos); |
959 | } | 1315 | if (written_buffered < 0) { |
1316 | err = written_buffered; | ||
1317 | goto out; | ||
960 | } | 1318 | } |
1319 | endbyte = pos + written_buffered - 1; | ||
1320 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); | ||
1321 | if (err) | ||
1322 | goto out; | ||
1323 | written += written_buffered; | ||
1324 | *ppos = pos + written_buffered; | ||
1325 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, | ||
1326 | endbyte >> PAGE_CACHE_SHIFT); | ||
1327 | out: | ||
1328 | return written ? written : err; | ||
1329 | } | ||
961 | 1330 | ||
962 | while (iov_iter_count(&i) > 0) { | 1331 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
963 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 1332 | const struct iovec *iov, |
964 | size_t write_bytes = min(iov_iter_count(&i), | 1333 | unsigned long nr_segs, loff_t pos) |
965 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 1334 | { |
966 | offset); | 1335 | struct file *file = iocb->ki_filp; |
967 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 1336 | struct inode *inode = fdentry(file)->d_inode; |
968 | PAGE_CACHE_SHIFT; | 1337 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1338 | loff_t *ppos = &iocb->ki_pos; | ||
1339 | ssize_t num_written = 0; | ||
1340 | ssize_t err = 0; | ||
1341 | size_t count, ocount; | ||
969 | 1342 | ||
970 | WARN_ON(num_pages > nrptrs); | 1343 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
971 | memset(pages, 0, sizeof(struct page *) * nrptrs); | ||
972 | 1344 | ||
973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); | 1345 | mutex_lock(&inode->i_mutex); |
974 | if (ret) | ||
975 | goto out; | ||
976 | 1346 | ||
977 | ret = prepare_pages(root, file, pages, num_pages, | 1347 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); |
978 | pos, first_index, last_index, | 1348 | if (err) { |
979 | write_bytes); | 1349 | mutex_unlock(&inode->i_mutex); |
980 | if (ret) { | 1350 | goto out; |
981 | btrfs_delalloc_release_space(inode, write_bytes); | 1351 | } |
982 | goto out; | 1352 | count = ocount; |
983 | } | ||
984 | 1353 | ||
985 | ret = btrfs_copy_from_user(pos, num_pages, | 1354 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
986 | write_bytes, pages, &i); | 1355 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
987 | if (ret == 0) { | 1356 | if (err) { |
988 | dirty_and_release_pages(NULL, root, file, pages, | 1357 | mutex_unlock(&inode->i_mutex); |
989 | num_pages, pos, write_bytes); | 1358 | goto out; |
990 | } | 1359 | } |
991 | 1360 | ||
992 | btrfs_drop_pages(pages, num_pages); | 1361 | if (count == 0) { |
993 | if (ret) { | 1362 | mutex_unlock(&inode->i_mutex); |
994 | btrfs_delalloc_release_space(inode, write_bytes); | 1363 | goto out; |
995 | goto out; | 1364 | } |
996 | } | ||
997 | 1365 | ||
998 | if (will_write) { | 1366 | err = file_remove_suid(file); |
999 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1367 | if (err) { |
1000 | pos + write_bytes - 1); | 1368 | mutex_unlock(&inode->i_mutex); |
1001 | } else { | 1369 | goto out; |
1002 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1370 | } |
1003 | num_pages); | ||
1004 | if (num_pages < | ||
1005 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1006 | btrfs_btree_balance_dirty(root, 1); | ||
1007 | btrfs_throttle(root); | ||
1008 | } | ||
1009 | 1371 | ||
1010 | pos += write_bytes; | 1372 | /* |
1011 | num_written += write_bytes; | 1373 | * If BTRFS flips readonly due to some impossible error |
1374 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
1375 | * although we have opened a file as writable, we have | ||
1376 | * to stop this write operation to ensure FS consistency. | ||
1377 | */ | ||
1378 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
1379 | mutex_unlock(&inode->i_mutex); | ||
1380 | err = -EROFS; | ||
1381 | goto out; | ||
1382 | } | ||
1012 | 1383 | ||
1013 | cond_resched(); | 1384 | file_update_time(file); |
1385 | BTRFS_I(inode)->sequence++; | ||
1386 | |||
1387 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
1388 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | ||
1389 | pos, ppos, count, ocount); | ||
1390 | } else { | ||
1391 | struct iov_iter i; | ||
1392 | |||
1393 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
1394 | |||
1395 | num_written = __btrfs_buffered_write(file, &i, pos); | ||
1396 | if (num_written > 0) | ||
1397 | *ppos = pos + num_written; | ||
1014 | } | 1398 | } |
1015 | out: | ||
1016 | mutex_unlock(&inode->i_mutex); | ||
1017 | if (ret) | ||
1018 | err = ret; | ||
1019 | 1399 | ||
1020 | kfree(pages); | 1400 | mutex_unlock(&inode->i_mutex); |
1021 | if (pinned[0]) | ||
1022 | page_cache_release(pinned[0]); | ||
1023 | if (pinned[1]) | ||
1024 | page_cache_release(pinned[1]); | ||
1025 | *ppos = pos; | ||
1026 | 1401 | ||
1027 | /* | 1402 | /* |
1028 | * we want to make sure fsync finds this change | 1403 | * we want to make sure fsync finds this change |
@@ -1037,36 +1412,12 @@ out: | |||
1037 | * one running right now. | 1412 | * one running right now. |
1038 | */ | 1413 | */ |
1039 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1414 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
1040 | 1415 | if (num_written > 0 || num_written == -EIOCBQUEUED) { | |
1041 | if (num_written > 0 && will_write) { | 1416 | err = generic_write_sync(file, pos, num_written); |
1042 | struct btrfs_trans_handle *trans; | 1417 | if (err < 0 && num_written > 0) |
1043 | |||
1044 | err = btrfs_wait_ordered_range(inode, start_pos, num_written); | ||
1045 | if (err) | ||
1046 | num_written = err; | 1418 | num_written = err; |
1047 | |||
1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
1049 | trans = btrfs_start_transaction(root, 0); | ||
1050 | ret = btrfs_log_dentry_safe(trans, root, | ||
1051 | file->f_dentry); | ||
1052 | if (ret == 0) { | ||
1053 | ret = btrfs_sync_log(trans, root); | ||
1054 | if (ret == 0) | ||
1055 | btrfs_end_transaction(trans, root); | ||
1056 | else | ||
1057 | btrfs_commit_transaction(trans, root); | ||
1058 | } else if (ret != BTRFS_NO_LOG_SYNC) { | ||
1059 | btrfs_commit_transaction(trans, root); | ||
1060 | } else { | ||
1061 | btrfs_end_transaction(trans, root); | ||
1062 | } | ||
1063 | } | ||
1064 | if (file->f_flags & O_DIRECT && buffered) { | ||
1065 | invalidate_mapping_pages(inode->i_mapping, | ||
1066 | start_pos >> PAGE_CACHE_SHIFT, | ||
1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | ||
1068 | } | ||
1069 | } | 1419 | } |
1420 | out: | ||
1070 | current->backing_dev_info = NULL; | 1421 | current->backing_dev_info = NULL; |
1071 | return num_written ? num_written : err; | 1422 | return num_written ? num_written : err; |
1072 | } | 1423 | } |
@@ -1109,6 +1460,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1109 | int ret = 0; | 1460 | int ret = 0; |
1110 | struct btrfs_trans_handle *trans; | 1461 | struct btrfs_trans_handle *trans; |
1111 | 1462 | ||
1463 | trace_btrfs_sync_file(file, datasync); | ||
1112 | 1464 | ||
1113 | /* we wait first, since the writeback may change the inode */ | 1465 | /* we wait first, since the writeback may change the inode */ |
1114 | root->log_batch++; | 1466 | root->log_batch++; |
@@ -1128,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1128 | * the current transaction, we can bail out now without any | 1480 | * the current transaction, we can bail out now without any |
1129 | * syncing | 1481 | * syncing |
1130 | */ | 1482 | */ |
1131 | mutex_lock(&root->fs_info->trans_mutex); | 1483 | smp_mb(); |
1132 | if (BTRFS_I(inode)->last_trans <= | 1484 | if (BTRFS_I(inode)->last_trans <= |
1133 | root->fs_info->last_trans_committed) { | 1485 | root->fs_info->last_trans_committed) { |
1134 | BTRFS_I(inode)->last_trans = 0; | 1486 | BTRFS_I(inode)->last_trans = 0; |
1135 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1136 | goto out; | 1487 | goto out; |
1137 | } | 1488 | } |
1138 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1139 | 1489 | ||
1140 | /* | 1490 | /* |
1141 | * ok we haven't committed the transaction yet, lets do a commit | 1491 | * ok we haven't committed the transaction yet, lets do a commit |
@@ -1202,6 +1552,118 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1202 | return 0; | 1552 | return 0; |
1203 | } | 1553 | } |
1204 | 1554 | ||
1555 | static long btrfs_fallocate(struct file *file, int mode, | ||
1556 | loff_t offset, loff_t len) | ||
1557 | { | ||
1558 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1559 | struct extent_state *cached_state = NULL; | ||
1560 | u64 cur_offset; | ||
1561 | u64 last_byte; | ||
1562 | u64 alloc_start; | ||
1563 | u64 alloc_end; | ||
1564 | u64 alloc_hint = 0; | ||
1565 | u64 locked_end; | ||
1566 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1567 | struct extent_map *em; | ||
1568 | int ret; | ||
1569 | |||
1570 | alloc_start = offset & ~mask; | ||
1571 | alloc_end = (offset + len + mask) & ~mask; | ||
1572 | |||
1573 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1574 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
1575 | return -EOPNOTSUPP; | ||
1576 | |||
1577 | /* | ||
1578 | * wait for ordered IO before we have any locks. We'll loop again | ||
1579 | * below with the locks held. | ||
1580 | */ | ||
1581 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
1582 | |||
1583 | mutex_lock(&inode->i_mutex); | ||
1584 | ret = inode_newsize_ok(inode, alloc_end); | ||
1585 | if (ret) | ||
1586 | goto out; | ||
1587 | |||
1588 | if (alloc_start > inode->i_size) { | ||
1589 | ret = btrfs_cont_expand(inode, i_size_read(inode), | ||
1590 | alloc_start); | ||
1591 | if (ret) | ||
1592 | goto out; | ||
1593 | } | ||
1594 | |||
1595 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
1596 | if (ret) | ||
1597 | goto out; | ||
1598 | |||
1599 | locked_end = alloc_end - 1; | ||
1600 | while (1) { | ||
1601 | struct btrfs_ordered_extent *ordered; | ||
1602 | |||
1603 | /* the extent lock is ordered inside the running | ||
1604 | * transaction | ||
1605 | */ | ||
1606 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
1607 | locked_end, 0, &cached_state, GFP_NOFS); | ||
1608 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
1609 | alloc_end - 1); | ||
1610 | if (ordered && | ||
1611 | ordered->file_offset + ordered->len > alloc_start && | ||
1612 | ordered->file_offset < alloc_end) { | ||
1613 | btrfs_put_ordered_extent(ordered); | ||
1614 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
1615 | alloc_start, locked_end, | ||
1616 | &cached_state, GFP_NOFS); | ||
1617 | /* | ||
1618 | * we can't wait on the range with the transaction | ||
1619 | * running or with the extent lock held | ||
1620 | */ | ||
1621 | btrfs_wait_ordered_range(inode, alloc_start, | ||
1622 | alloc_end - alloc_start); | ||
1623 | } else { | ||
1624 | if (ordered) | ||
1625 | btrfs_put_ordered_extent(ordered); | ||
1626 | break; | ||
1627 | } | ||
1628 | } | ||
1629 | |||
1630 | cur_offset = alloc_start; | ||
1631 | while (1) { | ||
1632 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
1633 | alloc_end - cur_offset, 0); | ||
1634 | BUG_ON(IS_ERR_OR_NULL(em)); | ||
1635 | last_byte = min(extent_map_end(em), alloc_end); | ||
1636 | last_byte = (last_byte + mask) & ~mask; | ||
1637 | if (em->block_start == EXTENT_MAP_HOLE || | ||
1638 | (cur_offset >= inode->i_size && | ||
1639 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
1640 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
1641 | last_byte - cur_offset, | ||
1642 | 1 << inode->i_blkbits, | ||
1643 | offset + len, | ||
1644 | &alloc_hint); | ||
1645 | if (ret < 0) { | ||
1646 | free_extent_map(em); | ||
1647 | break; | ||
1648 | } | ||
1649 | } | ||
1650 | free_extent_map(em); | ||
1651 | |||
1652 | cur_offset = last_byte; | ||
1653 | if (cur_offset >= alloc_end) { | ||
1654 | ret = 0; | ||
1655 | break; | ||
1656 | } | ||
1657 | } | ||
1658 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
1659 | &cached_state, GFP_NOFS); | ||
1660 | |||
1661 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
1662 | out: | ||
1663 | mutex_unlock(&inode->i_mutex); | ||
1664 | return ret; | ||
1665 | } | ||
1666 | |||
1205 | const struct file_operations btrfs_file_operations = { | 1667 | const struct file_operations btrfs_file_operations = { |
1206 | .llseek = generic_file_llseek, | 1668 | .llseek = generic_file_llseek, |
1207 | .read = do_sync_read, | 1669 | .read = do_sync_read, |
@@ -1213,6 +1675,7 @@ const struct file_operations btrfs_file_operations = { | |||
1213 | .open = generic_file_open, | 1675 | .open = generic_file_open, |
1214 | .release = btrfs_release_file, | 1676 | .release = btrfs_release_file, |
1215 | .fsync = btrfs_sync_file, | 1677 | .fsync = btrfs_sync_file, |
1678 | .fallocate = btrfs_fallocate, | ||
1216 | .unlocked_ioctl = btrfs_ioctl, | 1679 | .unlocked_ioctl = btrfs_ioctl, |
1217 | #ifdef CONFIG_COMPAT | 1680 | #ifdef CONFIG_COMPAT |
1218 | .compat_ioctl = btrfs_ioctl, | 1681 | .compat_ioctl = btrfs_ioctl, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f488fac04d99..bf0d61567f3d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -23,34 +23,937 @@ | |||
23 | #include "ctree.h" | 23 | #include "ctree.h" |
24 | #include "free-space-cache.h" | 24 | #include "free-space-cache.h" |
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "disk-io.h" | ||
27 | #include "extent_io.h" | ||
28 | #include "inode-map.h" | ||
26 | 29 | ||
27 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 30 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
28 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 31 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
29 | 32 | ||
30 | static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, | 33 | static int link_free_space(struct btrfs_free_space_ctl *ctl, |
34 | struct btrfs_free_space *info); | ||
35 | |||
36 | static struct inode *__lookup_free_space_inode(struct btrfs_root *root, | ||
37 | struct btrfs_path *path, | ||
38 | u64 offset) | ||
39 | { | ||
40 | struct btrfs_key key; | ||
41 | struct btrfs_key location; | ||
42 | struct btrfs_disk_key disk_key; | ||
43 | struct btrfs_free_space_header *header; | ||
44 | struct extent_buffer *leaf; | ||
45 | struct inode *inode = NULL; | ||
46 | int ret; | ||
47 | |||
48 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
49 | key.offset = offset; | ||
50 | key.type = 0; | ||
51 | |||
52 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
53 | if (ret < 0) | ||
54 | return ERR_PTR(ret); | ||
55 | if (ret > 0) { | ||
56 | btrfs_release_path(path); | ||
57 | return ERR_PTR(-ENOENT); | ||
58 | } | ||
59 | |||
60 | leaf = path->nodes[0]; | ||
61 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
62 | struct btrfs_free_space_header); | ||
63 | btrfs_free_space_key(leaf, header, &disk_key); | ||
64 | btrfs_disk_key_to_cpu(&location, &disk_key); | ||
65 | btrfs_release_path(path); | ||
66 | |||
67 | inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); | ||
68 | if (!inode) | ||
69 | return ERR_PTR(-ENOENT); | ||
70 | if (IS_ERR(inode)) | ||
71 | return inode; | ||
72 | if (is_bad_inode(inode)) { | ||
73 | iput(inode); | ||
74 | return ERR_PTR(-ENOENT); | ||
75 | } | ||
76 | |||
77 | inode->i_mapping->flags &= ~__GFP_FS; | ||
78 | |||
79 | return inode; | ||
80 | } | ||
81 | |||
82 | struct inode *lookup_free_space_inode(struct btrfs_root *root, | ||
83 | struct btrfs_block_group_cache | ||
84 | *block_group, struct btrfs_path *path) | ||
85 | { | ||
86 | struct inode *inode = NULL; | ||
87 | |||
88 | spin_lock(&block_group->lock); | ||
89 | if (block_group->inode) | ||
90 | inode = igrab(block_group->inode); | ||
91 | spin_unlock(&block_group->lock); | ||
92 | if (inode) | ||
93 | return inode; | ||
94 | |||
95 | inode = __lookup_free_space_inode(root, path, | ||
96 | block_group->key.objectid); | ||
97 | if (IS_ERR(inode)) | ||
98 | return inode; | ||
99 | |||
100 | spin_lock(&block_group->lock); | ||
101 | if (!btrfs_fs_closing(root->fs_info)) { | ||
102 | block_group->inode = igrab(inode); | ||
103 | block_group->iref = 1; | ||
104 | } | ||
105 | spin_unlock(&block_group->lock); | ||
106 | |||
107 | return inode; | ||
108 | } | ||
109 | |||
110 | int __create_free_space_inode(struct btrfs_root *root, | ||
111 | struct btrfs_trans_handle *trans, | ||
112 | struct btrfs_path *path, u64 ino, u64 offset) | ||
113 | { | ||
114 | struct btrfs_key key; | ||
115 | struct btrfs_disk_key disk_key; | ||
116 | struct btrfs_free_space_header *header; | ||
117 | struct btrfs_inode_item *inode_item; | ||
118 | struct extent_buffer *leaf; | ||
119 | int ret; | ||
120 | |||
121 | ret = btrfs_insert_empty_inode(trans, root, path, ino); | ||
122 | if (ret) | ||
123 | return ret; | ||
124 | |||
125 | leaf = path->nodes[0]; | ||
126 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | ||
127 | struct btrfs_inode_item); | ||
128 | btrfs_item_key(leaf, &disk_key, path->slots[0]); | ||
129 | memset_extent_buffer(leaf, 0, (unsigned long)inode_item, | ||
130 | sizeof(*inode_item)); | ||
131 | btrfs_set_inode_generation(leaf, inode_item, trans->transid); | ||
132 | btrfs_set_inode_size(leaf, inode_item, 0); | ||
133 | btrfs_set_inode_nbytes(leaf, inode_item, 0); | ||
134 | btrfs_set_inode_uid(leaf, inode_item, 0); | ||
135 | btrfs_set_inode_gid(leaf, inode_item, 0); | ||
136 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | ||
137 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | ||
138 | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); | ||
139 | btrfs_set_inode_nlink(leaf, inode_item, 1); | ||
140 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | ||
141 | btrfs_set_inode_block_group(leaf, inode_item, offset); | ||
142 | btrfs_mark_buffer_dirty(leaf); | ||
143 | btrfs_release_path(path); | ||
144 | |||
145 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
146 | key.offset = offset; | ||
147 | key.type = 0; | ||
148 | |||
149 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
150 | sizeof(struct btrfs_free_space_header)); | ||
151 | if (ret < 0) { | ||
152 | btrfs_release_path(path); | ||
153 | return ret; | ||
154 | } | ||
155 | leaf = path->nodes[0]; | ||
156 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
157 | struct btrfs_free_space_header); | ||
158 | memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); | ||
159 | btrfs_set_free_space_key(leaf, header, &disk_key); | ||
160 | btrfs_mark_buffer_dirty(leaf); | ||
161 | btrfs_release_path(path); | ||
162 | |||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | int create_free_space_inode(struct btrfs_root *root, | ||
167 | struct btrfs_trans_handle *trans, | ||
168 | struct btrfs_block_group_cache *block_group, | ||
169 | struct btrfs_path *path) | ||
170 | { | ||
171 | int ret; | ||
172 | u64 ino; | ||
173 | |||
174 | ret = btrfs_find_free_objectid(root, &ino); | ||
175 | if (ret < 0) | ||
176 | return ret; | ||
177 | |||
178 | return __create_free_space_inode(root, trans, path, ino, | ||
179 | block_group->key.objectid); | ||
180 | } | ||
181 | |||
182 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | ||
183 | struct btrfs_trans_handle *trans, | ||
184 | struct btrfs_path *path, | ||
185 | struct inode *inode) | ||
186 | { | ||
187 | loff_t oldsize; | ||
188 | int ret = 0; | ||
189 | |||
190 | trans->block_rsv = root->orphan_block_rsv; | ||
191 | ret = btrfs_block_rsv_check(trans, root, | ||
192 | root->orphan_block_rsv, | ||
193 | 0, 5); | ||
194 | if (ret) | ||
195 | return ret; | ||
196 | |||
197 | oldsize = i_size_read(inode); | ||
198 | btrfs_i_size_write(inode, 0); | ||
199 | truncate_pagecache(inode, oldsize, 0); | ||
200 | |||
201 | /* | ||
202 | * We don't need an orphan item because truncating the free space cache | ||
203 | * will never be split across transactions. | ||
204 | */ | ||
205 | ret = btrfs_truncate_inode_items(trans, root, inode, | ||
206 | 0, BTRFS_EXTENT_DATA_KEY); | ||
207 | if (ret) { | ||
208 | WARN_ON(1); | ||
209 | return ret; | ||
210 | } | ||
211 | |||
212 | ret = btrfs_update_inode(trans, root, inode); | ||
213 | return ret; | ||
214 | } | ||
215 | |||
216 | static int readahead_cache(struct inode *inode) | ||
217 | { | ||
218 | struct file_ra_state *ra; | ||
219 | unsigned long last_index; | ||
220 | |||
221 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
222 | if (!ra) | ||
223 | return -ENOMEM; | ||
224 | |||
225 | file_ra_state_init(ra, inode->i_mapping); | ||
226 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | ||
227 | |||
228 | page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); | ||
229 | |||
230 | kfree(ra); | ||
231 | |||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | ||
236 | struct btrfs_free_space_ctl *ctl, | ||
237 | struct btrfs_path *path, u64 offset) | ||
238 | { | ||
239 | struct btrfs_free_space_header *header; | ||
240 | struct extent_buffer *leaf; | ||
241 | struct page *page; | ||
242 | u32 *checksums = NULL, *crc; | ||
243 | char *disk_crcs = NULL; | ||
244 | struct btrfs_key key; | ||
245 | struct list_head bitmaps; | ||
246 | u64 num_entries; | ||
247 | u64 num_bitmaps; | ||
248 | u64 generation; | ||
249 | u32 cur_crc = ~(u32)0; | ||
250 | pgoff_t index = 0; | ||
251 | unsigned long first_page_offset; | ||
252 | int num_checksums; | ||
253 | int ret = 0; | ||
254 | |||
255 | INIT_LIST_HEAD(&bitmaps); | ||
256 | |||
257 | /* Nothing in the space cache, goodbye */ | ||
258 | if (!i_size_read(inode)) | ||
259 | goto out; | ||
260 | |||
261 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
262 | key.offset = offset; | ||
263 | key.type = 0; | ||
264 | |||
265 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
266 | if (ret < 0) | ||
267 | goto out; | ||
268 | else if (ret > 0) { | ||
269 | btrfs_release_path(path); | ||
270 | ret = 0; | ||
271 | goto out; | ||
272 | } | ||
273 | |||
274 | ret = -1; | ||
275 | |||
276 | leaf = path->nodes[0]; | ||
277 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
278 | struct btrfs_free_space_header); | ||
279 | num_entries = btrfs_free_space_entries(leaf, header); | ||
280 | num_bitmaps = btrfs_free_space_bitmaps(leaf, header); | ||
281 | generation = btrfs_free_space_generation(leaf, header); | ||
282 | btrfs_release_path(path); | ||
283 | |||
284 | if (BTRFS_I(inode)->generation != generation) { | ||
285 | printk(KERN_ERR "btrfs: free space inode generation (%llu) did" | ||
286 | " not match free space cache generation (%llu)\n", | ||
287 | (unsigned long long)BTRFS_I(inode)->generation, | ||
288 | (unsigned long long)generation); | ||
289 | goto out; | ||
290 | } | ||
291 | |||
292 | if (!num_entries) | ||
293 | goto out; | ||
294 | |||
295 | /* Setup everything for doing checksumming */ | ||
296 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
297 | checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
298 | if (!checksums) | ||
299 | goto out; | ||
300 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
301 | disk_crcs = kzalloc(first_page_offset, GFP_NOFS); | ||
302 | if (!disk_crcs) | ||
303 | goto out; | ||
304 | |||
305 | ret = readahead_cache(inode); | ||
306 | if (ret) | ||
307 | goto out; | ||
308 | |||
309 | while (1) { | ||
310 | struct btrfs_free_space_entry *entry; | ||
311 | struct btrfs_free_space *e; | ||
312 | void *addr; | ||
313 | unsigned long offset = 0; | ||
314 | unsigned long start_offset = 0; | ||
315 | int need_loop = 0; | ||
316 | |||
317 | if (!num_entries && !num_bitmaps) | ||
318 | break; | ||
319 | |||
320 | if (index == 0) { | ||
321 | start_offset = first_page_offset; | ||
322 | offset = start_offset; | ||
323 | } | ||
324 | |||
325 | page = grab_cache_page(inode->i_mapping, index); | ||
326 | if (!page) | ||
327 | goto free_cache; | ||
328 | |||
329 | if (!PageUptodate(page)) { | ||
330 | btrfs_readpage(NULL, page); | ||
331 | lock_page(page); | ||
332 | if (!PageUptodate(page)) { | ||
333 | unlock_page(page); | ||
334 | page_cache_release(page); | ||
335 | printk(KERN_ERR "btrfs: error reading free " | ||
336 | "space cache\n"); | ||
337 | goto free_cache; | ||
338 | } | ||
339 | } | ||
340 | addr = kmap(page); | ||
341 | |||
342 | if (index == 0) { | ||
343 | u64 *gen; | ||
344 | |||
345 | memcpy(disk_crcs, addr, first_page_offset); | ||
346 | gen = addr + (sizeof(u32) * num_checksums); | ||
347 | if (*gen != BTRFS_I(inode)->generation) { | ||
348 | printk(KERN_ERR "btrfs: space cache generation" | ||
349 | " (%llu) does not match inode (%llu)\n", | ||
350 | (unsigned long long)*gen, | ||
351 | (unsigned long long) | ||
352 | BTRFS_I(inode)->generation); | ||
353 | kunmap(page); | ||
354 | unlock_page(page); | ||
355 | page_cache_release(page); | ||
356 | goto free_cache; | ||
357 | } | ||
358 | crc = (u32 *)disk_crcs; | ||
359 | } | ||
360 | entry = addr + start_offset; | ||
361 | |||
362 | /* First lets check our crc before we do anything fun */ | ||
363 | cur_crc = ~(u32)0; | ||
364 | cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, | ||
365 | PAGE_CACHE_SIZE - start_offset); | ||
366 | btrfs_csum_final(cur_crc, (char *)&cur_crc); | ||
367 | if (cur_crc != *crc) { | ||
368 | printk(KERN_ERR "btrfs: crc mismatch for page %lu\n", | ||
369 | index); | ||
370 | kunmap(page); | ||
371 | unlock_page(page); | ||
372 | page_cache_release(page); | ||
373 | goto free_cache; | ||
374 | } | ||
375 | crc++; | ||
376 | |||
377 | while (1) { | ||
378 | if (!num_entries) | ||
379 | break; | ||
380 | |||
381 | need_loop = 1; | ||
382 | e = kmem_cache_zalloc(btrfs_free_space_cachep, | ||
383 | GFP_NOFS); | ||
384 | if (!e) { | ||
385 | kunmap(page); | ||
386 | unlock_page(page); | ||
387 | page_cache_release(page); | ||
388 | goto free_cache; | ||
389 | } | ||
390 | |||
391 | e->offset = le64_to_cpu(entry->offset); | ||
392 | e->bytes = le64_to_cpu(entry->bytes); | ||
393 | if (!e->bytes) { | ||
394 | kunmap(page); | ||
395 | kmem_cache_free(btrfs_free_space_cachep, e); | ||
396 | unlock_page(page); | ||
397 | page_cache_release(page); | ||
398 | goto free_cache; | ||
399 | } | ||
400 | |||
401 | if (entry->type == BTRFS_FREE_SPACE_EXTENT) { | ||
402 | spin_lock(&ctl->tree_lock); | ||
403 | ret = link_free_space(ctl, e); | ||
404 | spin_unlock(&ctl->tree_lock); | ||
405 | if (ret) { | ||
406 | printk(KERN_ERR "Duplicate entries in " | ||
407 | "free space cache, dumping\n"); | ||
408 | kunmap(page); | ||
409 | unlock_page(page); | ||
410 | page_cache_release(page); | ||
411 | goto free_cache; | ||
412 | } | ||
413 | } else { | ||
414 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
415 | if (!e->bitmap) { | ||
416 | kunmap(page); | ||
417 | kmem_cache_free( | ||
418 | btrfs_free_space_cachep, e); | ||
419 | unlock_page(page); | ||
420 | page_cache_release(page); | ||
421 | goto free_cache; | ||
422 | } | ||
423 | spin_lock(&ctl->tree_lock); | ||
424 | ret = link_free_space(ctl, e); | ||
425 | ctl->total_bitmaps++; | ||
426 | ctl->op->recalc_thresholds(ctl); | ||
427 | spin_unlock(&ctl->tree_lock); | ||
428 | if (ret) { | ||
429 | printk(KERN_ERR "Duplicate entries in " | ||
430 | "free space cache, dumping\n"); | ||
431 | kunmap(page); | ||
432 | unlock_page(page); | ||
433 | page_cache_release(page); | ||
434 | goto free_cache; | ||
435 | } | ||
436 | list_add_tail(&e->list, &bitmaps); | ||
437 | } | ||
438 | |||
439 | num_entries--; | ||
440 | offset += sizeof(struct btrfs_free_space_entry); | ||
441 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
442 | PAGE_CACHE_SIZE) | ||
443 | break; | ||
444 | entry++; | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * We read an entry out of this page, we need to move on to the | ||
449 | * next page. | ||
450 | */ | ||
451 | if (need_loop) { | ||
452 | kunmap(page); | ||
453 | goto next; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * We add the bitmaps at the end of the entries in order that | ||
458 | * the bitmap entries are added to the cache. | ||
459 | */ | ||
460 | e = list_entry(bitmaps.next, struct btrfs_free_space, list); | ||
461 | list_del_init(&e->list); | ||
462 | memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); | ||
463 | kunmap(page); | ||
464 | num_bitmaps--; | ||
465 | next: | ||
466 | unlock_page(page); | ||
467 | page_cache_release(page); | ||
468 | index++; | ||
469 | } | ||
470 | |||
471 | ret = 1; | ||
472 | out: | ||
473 | kfree(checksums); | ||
474 | kfree(disk_crcs); | ||
475 | return ret; | ||
476 | free_cache: | ||
477 | __btrfs_remove_free_space_cache(ctl); | ||
478 | goto out; | ||
479 | } | ||
480 | |||
481 | int load_free_space_cache(struct btrfs_fs_info *fs_info, | ||
482 | struct btrfs_block_group_cache *block_group) | ||
483 | { | ||
484 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
485 | struct btrfs_root *root = fs_info->tree_root; | ||
486 | struct inode *inode; | ||
487 | struct btrfs_path *path; | ||
488 | int ret; | ||
489 | bool matched; | ||
490 | u64 used = btrfs_block_group_used(&block_group->item); | ||
491 | |||
492 | /* | ||
493 | * If we're unmounting then just return, since this does a search on the | ||
494 | * normal root and not the commit root and we could deadlock. | ||
495 | */ | ||
496 | if (btrfs_fs_closing(fs_info)) | ||
497 | return 0; | ||
498 | |||
499 | /* | ||
500 | * If this block group has been marked to be cleared for one reason or | ||
501 | * another then we can't trust the on disk cache, so just return. | ||
502 | */ | ||
503 | spin_lock(&block_group->lock); | ||
504 | if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { | ||
505 | spin_unlock(&block_group->lock); | ||
506 | return 0; | ||
507 | } | ||
508 | spin_unlock(&block_group->lock); | ||
509 | |||
510 | path = btrfs_alloc_path(); | ||
511 | if (!path) | ||
512 | return 0; | ||
513 | |||
514 | inode = lookup_free_space_inode(root, block_group, path); | ||
515 | if (IS_ERR(inode)) { | ||
516 | btrfs_free_path(path); | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, | ||
521 | path, block_group->key.objectid); | ||
522 | btrfs_free_path(path); | ||
523 | if (ret <= 0) | ||
524 | goto out; | ||
525 | |||
526 | spin_lock(&ctl->tree_lock); | ||
527 | matched = (ctl->free_space == (block_group->key.offset - used - | ||
528 | block_group->bytes_super)); | ||
529 | spin_unlock(&ctl->tree_lock); | ||
530 | |||
531 | if (!matched) { | ||
532 | __btrfs_remove_free_space_cache(ctl); | ||
533 | printk(KERN_ERR "block group %llu has an wrong amount of free " | ||
534 | "space\n", block_group->key.objectid); | ||
535 | ret = -1; | ||
536 | } | ||
537 | out: | ||
538 | if (ret < 0) { | ||
539 | /* This cache is bogus, make sure it gets cleared */ | ||
540 | spin_lock(&block_group->lock); | ||
541 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
542 | spin_unlock(&block_group->lock); | ||
543 | ret = 0; | ||
544 | |||
545 | printk(KERN_ERR "btrfs: failed to load free space cache " | ||
546 | "for block group %llu\n", block_group->key.objectid); | ||
547 | } | ||
548 | |||
549 | iput(inode); | ||
550 | return ret; | ||
551 | } | ||
552 | |||
553 | int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | ||
554 | struct btrfs_free_space_ctl *ctl, | ||
555 | struct btrfs_block_group_cache *block_group, | ||
556 | struct btrfs_trans_handle *trans, | ||
557 | struct btrfs_path *path, u64 offset) | ||
558 | { | ||
559 | struct btrfs_free_space_header *header; | ||
560 | struct extent_buffer *leaf; | ||
561 | struct rb_node *node; | ||
562 | struct list_head *pos, *n; | ||
563 | struct page **pages; | ||
564 | struct page *page; | ||
565 | struct extent_state *cached_state = NULL; | ||
566 | struct btrfs_free_cluster *cluster = NULL; | ||
567 | struct extent_io_tree *unpin = NULL; | ||
568 | struct list_head bitmap_list; | ||
569 | struct btrfs_key key; | ||
570 | u64 start, end, len; | ||
571 | u64 bytes = 0; | ||
572 | u32 *crc, *checksums; | ||
573 | unsigned long first_page_offset; | ||
574 | int index = 0, num_pages = 0; | ||
575 | int entries = 0; | ||
576 | int bitmaps = 0; | ||
577 | int ret = -1; | ||
578 | bool next_page = false; | ||
579 | bool out_of_space = false; | ||
580 | |||
581 | INIT_LIST_HEAD(&bitmap_list); | ||
582 | |||
583 | node = rb_first(&ctl->free_space_offset); | ||
584 | if (!node) | ||
585 | return 0; | ||
586 | |||
587 | if (!i_size_read(inode)) | ||
588 | return -1; | ||
589 | |||
590 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
591 | PAGE_CACHE_SHIFT; | ||
592 | |||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
599 | filemap_write_and_wait(inode->i_mapping); | ||
600 | btrfs_wait_ordered_range(inode, inode->i_size & | ||
601 | ~(root->sectorsize - 1), (u64)-1); | ||
602 | |||
603 | /* make sure we don't overflow that first page */ | ||
604 | if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { | ||
605 | /* this is really the same as running out of space, where we also return 0 */ | ||
606 | printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); | ||
607 | ret = 0; | ||
608 | goto out_update; | ||
609 | } | ||
610 | |||
611 | /* We need a checksum per page. */ | ||
612 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); | ||
613 | if (!crc) | ||
614 | return -1; | ||
615 | |||
616 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | ||
617 | if (!pages) { | ||
618 | kfree(crc); | ||
619 | return -1; | ||
620 | } | ||
621 | |||
622 | /* Get the cluster for this block_group if it exists */ | ||
623 | if (block_group && !list_empty(&block_group->cluster_list)) | ||
624 | cluster = list_entry(block_group->cluster_list.next, | ||
625 | struct btrfs_free_cluster, | ||
626 | block_group_list); | ||
627 | |||
628 | /* | ||
629 | * We shouldn't have switched the pinned extents yet so this is the | ||
630 | * right one | ||
631 | */ | ||
632 | unpin = root->fs_info->pinned_extents; | ||
633 | |||
634 | /* | ||
635 | * Lock all pages first so we can lock the extent safely. | ||
636 | * | ||
637 | * NOTE: Because we hold the ref the entire time we're going to write to | ||
638 | * the page find_get_page should never fail, so we don't do a check | ||
639 | * after find_get_page at this point. Just putting this here so people | ||
640 | * know and don't freak out. | ||
641 | */ | ||
642 | while (index < num_pages) { | ||
643 | page = grab_cache_page(inode->i_mapping, index); | ||
644 | if (!page) { | ||
645 | int i; | ||
646 | |||
647 | for (i = 0; i < num_pages; i++) { | ||
648 | unlock_page(pages[i]); | ||
649 | page_cache_release(pages[i]); | ||
650 | } | ||
651 | goto out_free; | ||
652 | } | ||
653 | pages[index] = page; | ||
654 | index++; | ||
655 | } | ||
656 | |||
657 | index = 0; | ||
658 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | ||
659 | 0, &cached_state, GFP_NOFS); | ||
660 | |||
661 | /* | ||
662 | * When searching for pinned extents, we need to start at our start | ||
663 | * offset. | ||
664 | */ | ||
665 | if (block_group) | ||
666 | start = block_group->key.objectid; | ||
667 | |||
668 | /* Write out the extent entries */ | ||
669 | do { | ||
670 | struct btrfs_free_space_entry *entry; | ||
671 | void *addr; | ||
672 | unsigned long offset = 0; | ||
673 | unsigned long start_offset = 0; | ||
674 | |||
675 | next_page = false; | ||
676 | |||
677 | if (index == 0) { | ||
678 | start_offset = first_page_offset; | ||
679 | offset = start_offset; | ||
680 | } | ||
681 | |||
682 | if (index >= num_pages) { | ||
683 | out_of_space = true; | ||
684 | break; | ||
685 | } | ||
686 | |||
687 | page = pages[index]; | ||
688 | |||
689 | addr = kmap(page); | ||
690 | entry = addr + start_offset; | ||
691 | |||
692 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
693 | while (node && !next_page) { | ||
694 | struct btrfs_free_space *e; | ||
695 | |||
696 | e = rb_entry(node, struct btrfs_free_space, offset_index); | ||
697 | entries++; | ||
698 | |||
699 | entry->offset = cpu_to_le64(e->offset); | ||
700 | entry->bytes = cpu_to_le64(e->bytes); | ||
701 | if (e->bitmap) { | ||
702 | entry->type = BTRFS_FREE_SPACE_BITMAP; | ||
703 | list_add_tail(&e->list, &bitmap_list); | ||
704 | bitmaps++; | ||
705 | } else { | ||
706 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
707 | } | ||
708 | node = rb_next(node); | ||
709 | if (!node && cluster) { | ||
710 | node = rb_first(&cluster->root); | ||
711 | cluster = NULL; | ||
712 | } | ||
713 | offset += sizeof(struct btrfs_free_space_entry); | ||
714 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
715 | PAGE_CACHE_SIZE) | ||
716 | next_page = true; | ||
717 | entry++; | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * We want to add any pinned extents to our free space cache | ||
722 | * so we don't leak the space | ||
723 | */ | ||
724 | while (block_group && !next_page && | ||
725 | (start < block_group->key.objectid + | ||
726 | block_group->key.offset)) { | ||
727 | ret = find_first_extent_bit(unpin, start, &start, &end, | ||
728 | EXTENT_DIRTY); | ||
729 | if (ret) { | ||
730 | ret = 0; | ||
731 | break; | ||
732 | } | ||
733 | |||
734 | /* This pinned extent is out of our range */ | ||
735 | if (start >= block_group->key.objectid + | ||
736 | block_group->key.offset) | ||
737 | break; | ||
738 | |||
739 | len = block_group->key.objectid + | ||
740 | block_group->key.offset - start; | ||
741 | len = min(len, end + 1 - start); | ||
742 | |||
743 | entries++; | ||
744 | entry->offset = cpu_to_le64(start); | ||
745 | entry->bytes = cpu_to_le64(len); | ||
746 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
747 | |||
748 | start = end + 1; | ||
749 | offset += sizeof(struct btrfs_free_space_entry); | ||
750 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
751 | PAGE_CACHE_SIZE) | ||
752 | next_page = true; | ||
753 | entry++; | ||
754 | } | ||
755 | *crc = ~(u32)0; | ||
756 | *crc = btrfs_csum_data(root, addr + start_offset, *crc, | ||
757 | PAGE_CACHE_SIZE - start_offset); | ||
758 | kunmap(page); | ||
759 | |||
760 | btrfs_csum_final(*crc, (char *)crc); | ||
761 | crc++; | ||
762 | |||
763 | bytes += PAGE_CACHE_SIZE; | ||
764 | |||
765 | index++; | ||
766 | } while (node || next_page); | ||
767 | |||
768 | /* Write out the bitmaps */ | ||
769 | list_for_each_safe(pos, n, &bitmap_list) { | ||
770 | void *addr; | ||
771 | struct btrfs_free_space *entry = | ||
772 | list_entry(pos, struct btrfs_free_space, list); | ||
773 | |||
774 | if (index >= num_pages) { | ||
775 | out_of_space = true; | ||
776 | break; | ||
777 | } | ||
778 | page = pages[index]; | ||
779 | |||
780 | addr = kmap(page); | ||
781 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | ||
782 | *crc = ~(u32)0; | ||
783 | *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); | ||
784 | kunmap(page); | ||
785 | btrfs_csum_final(*crc, (char *)crc); | ||
786 | crc++; | ||
787 | bytes += PAGE_CACHE_SIZE; | ||
788 | |||
789 | list_del_init(&entry->list); | ||
790 | index++; | ||
791 | } | ||
792 | |||
793 | if (out_of_space) { | ||
794 | btrfs_drop_pages(pages, num_pages); | ||
795 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
796 | i_size_read(inode) - 1, &cached_state, | ||
797 | GFP_NOFS); | ||
798 | ret = 0; | ||
799 | goto out_free; | ||
800 | } | ||
801 | |||
802 | /* Zero out the rest of the pages just to make sure */ | ||
803 | while (index < num_pages) { | ||
804 | void *addr; | ||
805 | |||
806 | page = pages[index]; | ||
807 | addr = kmap(page); | ||
808 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
809 | kunmap(page); | ||
810 | bytes += PAGE_CACHE_SIZE; | ||
811 | index++; | ||
812 | } | ||
813 | |||
814 | /* Write the checksums and trans id to the first page */ | ||
815 | { | ||
816 | void *addr; | ||
817 | u64 *gen; | ||
818 | |||
819 | page = pages[0]; | ||
820 | |||
821 | addr = kmap(page); | ||
822 | memcpy(addr, checksums, sizeof(u32) * num_pages); | ||
823 | gen = addr + (sizeof(u32) * num_pages); | ||
824 | *gen = trans->transid; | ||
825 | kunmap(page); | ||
826 | } | ||
827 | |||
828 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | ||
829 | bytes, &cached_state); | ||
830 | btrfs_drop_pages(pages, num_pages); | ||
831 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
832 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | ||
833 | |||
834 | if (ret) { | ||
835 | ret = 0; | ||
836 | goto out_free; | ||
837 | } | ||
838 | |||
839 | BTRFS_I(inode)->generation = trans->transid; | ||
840 | |||
841 | filemap_write_and_wait(inode->i_mapping); | ||
842 | |||
843 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
844 | key.offset = offset; | ||
845 | key.type = 0; | ||
846 | |||
847 | ret = btrfs_search_slot(trans, root, &key, path, 1, 1); | ||
848 | if (ret < 0) { | ||
849 | ret = -1; | ||
850 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | ||
851 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
852 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | ||
853 | goto out_free; | ||
854 | } | ||
855 | leaf = path->nodes[0]; | ||
856 | if (ret > 0) { | ||
857 | struct btrfs_key found_key; | ||
858 | BUG_ON(!path->slots[0]); | ||
859 | path->slots[0]--; | ||
860 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
861 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || | ||
862 | found_key.offset != offset) { | ||
863 | ret = -1; | ||
864 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | ||
865 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
866 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | ||
867 | GFP_NOFS); | ||
868 | btrfs_release_path(path); | ||
869 | goto out_free; | ||
870 | } | ||
871 | } | ||
872 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
873 | struct btrfs_free_space_header); | ||
874 | btrfs_set_free_space_entries(leaf, header, entries); | ||
875 | btrfs_set_free_space_bitmaps(leaf, header, bitmaps); | ||
876 | btrfs_set_free_space_generation(leaf, header, trans->transid); | ||
877 | btrfs_mark_buffer_dirty(leaf); | ||
878 | btrfs_release_path(path); | ||
879 | |||
880 | ret = 1; | ||
881 | |||
882 | out_free: | ||
883 | kfree(checksums); | ||
884 | kfree(pages); | ||
885 | |||
886 | out_update: | ||
887 | if (ret != 1) { | ||
888 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | ||
889 | BTRFS_I(inode)->generation = 0; | ||
890 | } | ||
891 | btrfs_update_inode(trans, root, inode); | ||
892 | return ret; | ||
893 | } | ||
894 | |||
895 | int btrfs_write_out_cache(struct btrfs_root *root, | ||
896 | struct btrfs_trans_handle *trans, | ||
897 | struct btrfs_block_group_cache *block_group, | ||
898 | struct btrfs_path *path) | ||
899 | { | ||
900 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
901 | struct inode *inode; | ||
902 | int ret = 0; | ||
903 | |||
904 | root = root->fs_info->tree_root; | ||
905 | |||
906 | spin_lock(&block_group->lock); | ||
907 | if (block_group->disk_cache_state < BTRFS_DC_SETUP) { | ||
908 | spin_unlock(&block_group->lock); | ||
909 | return 0; | ||
910 | } | ||
911 | spin_unlock(&block_group->lock); | ||
912 | |||
913 | inode = lookup_free_space_inode(root, block_group, path); | ||
914 | if (IS_ERR(inode)) | ||
915 | return 0; | ||
916 | |||
917 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, | ||
918 | path, block_group->key.objectid); | ||
919 | if (ret < 0) { | ||
920 | spin_lock(&block_group->lock); | ||
921 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
922 | spin_unlock(&block_group->lock); | ||
923 | ret = 0; | ||
924 | |||
925 | printk(KERN_ERR "btrfs: failed to write free space cace " | ||
926 | "for block group %llu\n", block_group->key.objectid); | ||
927 | } | ||
928 | |||
929 | iput(inode); | ||
930 | return ret; | ||
931 | } | ||
932 | |||
933 | static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, | ||
31 | u64 offset) | 934 | u64 offset) |
32 | { | 935 | { |
33 | BUG_ON(offset < bitmap_start); | 936 | BUG_ON(offset < bitmap_start); |
34 | offset -= bitmap_start; | 937 | offset -= bitmap_start; |
35 | return (unsigned long)(div64_u64(offset, sectorsize)); | 938 | return (unsigned long)(div_u64(offset, unit)); |
36 | } | 939 | } |
37 | 940 | ||
38 | static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) | 941 | static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) |
39 | { | 942 | { |
40 | return (unsigned long)(div64_u64(bytes, sectorsize)); | 943 | return (unsigned long)(div_u64(bytes, unit)); |
41 | } | 944 | } |
42 | 945 | ||
43 | static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, | 946 | static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, |
44 | u64 offset) | 947 | u64 offset) |
45 | { | 948 | { |
46 | u64 bitmap_start; | 949 | u64 bitmap_start; |
47 | u64 bytes_per_bitmap; | 950 | u64 bytes_per_bitmap; |
48 | 951 | ||
49 | bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; | 952 | bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; |
50 | bitmap_start = offset - block_group->key.objectid; | 953 | bitmap_start = offset - ctl->start; |
51 | bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); | 954 | bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); |
52 | bitmap_start *= bytes_per_bitmap; | 955 | bitmap_start *= bytes_per_bitmap; |
53 | bitmap_start += block_group->key.objectid; | 956 | bitmap_start += ctl->start; |
54 | 957 | ||
55 | return bitmap_start; | 958 | return bitmap_start; |
56 | } | 959 | } |
@@ -85,10 +988,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, | |||
85 | * logically. | 988 | * logically. |
86 | */ | 989 | */ |
87 | if (bitmap) { | 990 | if (bitmap) { |
88 | WARN_ON(info->bitmap); | 991 | if (info->bitmap) { |
992 | WARN_ON_ONCE(1); | ||
993 | return -EEXIST; | ||
994 | } | ||
89 | p = &(*p)->rb_right; | 995 | p = &(*p)->rb_right; |
90 | } else { | 996 | } else { |
91 | WARN_ON(!info->bitmap); | 997 | if (!info->bitmap) { |
998 | WARN_ON_ONCE(1); | ||
999 | return -EEXIST; | ||
1000 | } | ||
92 | p = &(*p)->rb_left; | 1001 | p = &(*p)->rb_left; |
93 | } | 1002 | } |
94 | } | 1003 | } |
@@ -108,10 +1017,10 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, | |||
108 | * offset. | 1017 | * offset. |
109 | */ | 1018 | */ |
110 | static struct btrfs_free_space * | 1019 | static struct btrfs_free_space * |
111 | tree_search_offset(struct btrfs_block_group_cache *block_group, | 1020 | tree_search_offset(struct btrfs_free_space_ctl *ctl, |
112 | u64 offset, int bitmap_only, int fuzzy) | 1021 | u64 offset, int bitmap_only, int fuzzy) |
113 | { | 1022 | { |
114 | struct rb_node *n = block_group->free_space_offset.rb_node; | 1023 | struct rb_node *n = ctl->free_space_offset.rb_node; |
115 | struct btrfs_free_space *entry, *prev = NULL; | 1024 | struct btrfs_free_space *entry, *prev = NULL; |
116 | 1025 | ||
117 | /* find entry that is closest to the 'offset' */ | 1026 | /* find entry that is closest to the 'offset' */ |
@@ -207,8 +1116,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, | |||
207 | break; | 1116 | break; |
208 | } | 1117 | } |
209 | } | 1118 | } |
210 | if (entry->offset + BITS_PER_BITMAP * | 1119 | if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) |
211 | block_group->sectorsize > offset) | ||
212 | return entry; | 1120 | return entry; |
213 | } else if (entry->offset + entry->bytes > offset) | 1121 | } else if (entry->offset + entry->bytes > offset) |
214 | return entry; | 1122 | return entry; |
@@ -219,7 +1127,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, | |||
219 | while (1) { | 1127 | while (1) { |
220 | if (entry->bitmap) { | 1128 | if (entry->bitmap) { |
221 | if (entry->offset + BITS_PER_BITMAP * | 1129 | if (entry->offset + BITS_PER_BITMAP * |
222 | block_group->sectorsize > offset) | 1130 | ctl->unit > offset) |
223 | break; | 1131 | break; |
224 | } else { | 1132 | } else { |
225 | if (entry->offset + entry->bytes > offset) | 1133 | if (entry->offset + entry->bytes > offset) |
@@ -234,53 +1142,69 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, | |||
234 | return entry; | 1142 | return entry; |
235 | } | 1143 | } |
236 | 1144 | ||
237 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, | 1145 | static inline void |
1146 | __unlink_free_space(struct btrfs_free_space_ctl *ctl, | ||
1147 | struct btrfs_free_space *info) | ||
1148 | { | ||
1149 | rb_erase(&info->offset_index, &ctl->free_space_offset); | ||
1150 | ctl->free_extents--; | ||
1151 | } | ||
1152 | |||
1153 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, | ||
238 | struct btrfs_free_space *info) | 1154 | struct btrfs_free_space *info) |
239 | { | 1155 | { |
240 | rb_erase(&info->offset_index, &block_group->free_space_offset); | 1156 | __unlink_free_space(ctl, info); |
241 | block_group->free_extents--; | 1157 | ctl->free_space -= info->bytes; |
242 | block_group->free_space -= info->bytes; | ||
243 | } | 1158 | } |
244 | 1159 | ||
245 | static int link_free_space(struct btrfs_block_group_cache *block_group, | 1160 | static int link_free_space(struct btrfs_free_space_ctl *ctl, |
246 | struct btrfs_free_space *info) | 1161 | struct btrfs_free_space *info) |
247 | { | 1162 | { |
248 | int ret = 0; | 1163 | int ret = 0; |
249 | 1164 | ||
250 | BUG_ON(!info->bitmap && !info->bytes); | 1165 | BUG_ON(!info->bitmap && !info->bytes); |
251 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, | 1166 | ret = tree_insert_offset(&ctl->free_space_offset, info->offset, |
252 | &info->offset_index, (info->bitmap != NULL)); | 1167 | &info->offset_index, (info->bitmap != NULL)); |
253 | if (ret) | 1168 | if (ret) |
254 | return ret; | 1169 | return ret; |
255 | 1170 | ||
256 | block_group->free_space += info->bytes; | 1171 | ctl->free_space += info->bytes; |
257 | block_group->free_extents++; | 1172 | ctl->free_extents++; |
258 | return ret; | 1173 | return ret; |
259 | } | 1174 | } |
260 | 1175 | ||
261 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | 1176 | static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) |
262 | { | 1177 | { |
1178 | struct btrfs_block_group_cache *block_group = ctl->private; | ||
263 | u64 max_bytes; | 1179 | u64 max_bytes; |
264 | u64 bitmap_bytes; | 1180 | u64 bitmap_bytes; |
265 | u64 extent_bytes; | 1181 | u64 extent_bytes; |
1182 | u64 size = block_group->key.offset; | ||
1183 | u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; | ||
1184 | int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); | ||
1185 | |||
1186 | BUG_ON(ctl->total_bitmaps > max_bitmaps); | ||
266 | 1187 | ||
267 | /* | 1188 | /* |
268 | * The goal is to keep the total amount of memory used per 1gb of space | 1189 | * The goal is to keep the total amount of memory used per 1gb of space |
269 | * at or below 32k, so we need to adjust how much memory we allow to be | 1190 | * at or below 32k, so we need to adjust how much memory we allow to be |
270 | * used by extent based free space tracking | 1191 | * used by extent based free space tracking |
271 | */ | 1192 | */ |
272 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | 1193 | if (size < 1024 * 1024 * 1024) |
273 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | 1194 | max_bytes = MAX_CACHE_BYTES_PER_GIG; |
1195 | else | ||
1196 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | ||
1197 | div64_u64(size, 1024 * 1024 * 1024); | ||
274 | 1198 | ||
275 | /* | 1199 | /* |
276 | * we want to account for 1 more bitmap than what we have so we can make | 1200 | * we want to account for 1 more bitmap than what we have so we can make |
277 | * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as | 1201 | * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as |
278 | * we add more bitmaps. | 1202 | * we add more bitmaps. |
279 | */ | 1203 | */ |
280 | bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; | 1204 | bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; |
281 | 1205 | ||
282 | if (bitmap_bytes >= max_bytes) { | 1206 | if (bitmap_bytes >= max_bytes) { |
283 | block_group->extents_thresh = 0; | 1207 | ctl->extents_thresh = 0; |
284 | return; | 1208 | return; |
285 | } | 1209 | } |
286 | 1210 | ||
@@ -291,47 +1215,43 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |||
291 | extent_bytes = max_bytes - bitmap_bytes; | 1215 | extent_bytes = max_bytes - bitmap_bytes; |
292 | extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); | 1216 | extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); |
293 | 1217 | ||
294 | block_group->extents_thresh = | 1218 | ctl->extents_thresh = |
295 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); | 1219 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
296 | } | 1220 | } |
297 | 1221 | ||
298 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, | 1222 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, |
299 | struct btrfs_free_space *info, u64 offset, | 1223 | struct btrfs_free_space *info, u64 offset, |
300 | u64 bytes) | 1224 | u64 bytes) |
301 | { | 1225 | { |
302 | unsigned long start, end; | 1226 | unsigned long start, count; |
303 | unsigned long i; | ||
304 | 1227 | ||
305 | start = offset_to_bit(info->offset, block_group->sectorsize, offset); | 1228 | start = offset_to_bit(info->offset, ctl->unit, offset); |
306 | end = start + bytes_to_bits(bytes, block_group->sectorsize); | 1229 | count = bytes_to_bits(bytes, ctl->unit); |
307 | BUG_ON(end > BITS_PER_BITMAP); | 1230 | BUG_ON(start + count > BITS_PER_BITMAP); |
308 | 1231 | ||
309 | for (i = start; i < end; i++) | 1232 | bitmap_clear(info->bitmap, start, count); |
310 | clear_bit(i, info->bitmap); | ||
311 | 1233 | ||
312 | info->bytes -= bytes; | 1234 | info->bytes -= bytes; |
313 | block_group->free_space -= bytes; | 1235 | ctl->free_space -= bytes; |
314 | } | 1236 | } |
315 | 1237 | ||
316 | static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, | 1238 | static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, |
317 | struct btrfs_free_space *info, u64 offset, | 1239 | struct btrfs_free_space *info, u64 offset, |
318 | u64 bytes) | 1240 | u64 bytes) |
319 | { | 1241 | { |
320 | unsigned long start, end; | 1242 | unsigned long start, count; |
321 | unsigned long i; | ||
322 | 1243 | ||
323 | start = offset_to_bit(info->offset, block_group->sectorsize, offset); | 1244 | start = offset_to_bit(info->offset, ctl->unit, offset); |
324 | end = start + bytes_to_bits(bytes, block_group->sectorsize); | 1245 | count = bytes_to_bits(bytes, ctl->unit); |
325 | BUG_ON(end > BITS_PER_BITMAP); | 1246 | BUG_ON(start + count > BITS_PER_BITMAP); |
326 | 1247 | ||
327 | for (i = start; i < end; i++) | 1248 | bitmap_set(info->bitmap, start, count); |
328 | set_bit(i, info->bitmap); | ||
329 | 1249 | ||
330 | info->bytes += bytes; | 1250 | info->bytes += bytes; |
331 | block_group->free_space += bytes; | 1251 | ctl->free_space += bytes; |
332 | } | 1252 | } |
333 | 1253 | ||
334 | static int search_bitmap(struct btrfs_block_group_cache *block_group, | 1254 | static int search_bitmap(struct btrfs_free_space_ctl *ctl, |
335 | struct btrfs_free_space *bitmap_info, u64 *offset, | 1255 | struct btrfs_free_space *bitmap_info, u64 *offset, |
336 | u64 *bytes) | 1256 | u64 *bytes) |
337 | { | 1257 | { |
@@ -339,9 +1259,9 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group, | |||
339 | unsigned long bits, i; | 1259 | unsigned long bits, i; |
340 | unsigned long next_zero; | 1260 | unsigned long next_zero; |
341 | 1261 | ||
342 | i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, | 1262 | i = offset_to_bit(bitmap_info->offset, ctl->unit, |
343 | max_t(u64, *offset, bitmap_info->offset)); | 1263 | max_t(u64, *offset, bitmap_info->offset)); |
344 | bits = bytes_to_bits(*bytes, block_group->sectorsize); | 1264 | bits = bytes_to_bits(*bytes, ctl->unit); |
345 | 1265 | ||
346 | for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); | 1266 | for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); |
347 | i < BITS_PER_BITMAP; | 1267 | i < BITS_PER_BITMAP; |
@@ -356,29 +1276,25 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group, | |||
356 | } | 1276 | } |
357 | 1277 | ||
358 | if (found_bits) { | 1278 | if (found_bits) { |
359 | *offset = (u64)(i * block_group->sectorsize) + | 1279 | *offset = (u64)(i * ctl->unit) + bitmap_info->offset; |
360 | bitmap_info->offset; | 1280 | *bytes = (u64)(found_bits) * ctl->unit; |
361 | *bytes = (u64)(found_bits) * block_group->sectorsize; | ||
362 | return 0; | 1281 | return 0; |
363 | } | 1282 | } |
364 | 1283 | ||
365 | return -1; | 1284 | return -1; |
366 | } | 1285 | } |
367 | 1286 | ||
368 | static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache | 1287 | static struct btrfs_free_space * |
369 | *block_group, u64 *offset, | 1288 | find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes) |
370 | u64 *bytes, int debug) | ||
371 | { | 1289 | { |
372 | struct btrfs_free_space *entry; | 1290 | struct btrfs_free_space *entry; |
373 | struct rb_node *node; | 1291 | struct rb_node *node; |
374 | int ret; | 1292 | int ret; |
375 | 1293 | ||
376 | if (!block_group->free_space_offset.rb_node) | 1294 | if (!ctl->free_space_offset.rb_node) |
377 | return NULL; | 1295 | return NULL; |
378 | 1296 | ||
379 | entry = tree_search_offset(block_group, | 1297 | entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); |
380 | offset_to_bitmap(block_group, *offset), | ||
381 | 0, 1); | ||
382 | if (!entry) | 1298 | if (!entry) |
383 | return NULL; | 1299 | return NULL; |
384 | 1300 | ||
@@ -388,7 +1304,7 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache | |||
388 | continue; | 1304 | continue; |
389 | 1305 | ||
390 | if (entry->bitmap) { | 1306 | if (entry->bitmap) { |
391 | ret = search_bitmap(block_group, entry, offset, bytes); | 1307 | ret = search_bitmap(ctl, entry, offset, bytes); |
392 | if (!ret) | 1308 | if (!ret) |
393 | return entry; | 1309 | return entry; |
394 | continue; | 1310 | continue; |
@@ -402,23 +1318,28 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache | |||
402 | return NULL; | 1318 | return NULL; |
403 | } | 1319 | } |
404 | 1320 | ||
405 | static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | 1321 | static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, |
406 | struct btrfs_free_space *info, u64 offset) | 1322 | struct btrfs_free_space *info, u64 offset) |
407 | { | 1323 | { |
408 | u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; | 1324 | info->offset = offset_to_bitmap(ctl, offset); |
409 | int max_bitmaps = (int)div64_u64(block_group->key.offset + | ||
410 | bytes_per_bg - 1, bytes_per_bg); | ||
411 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); | ||
412 | |||
413 | info->offset = offset_to_bitmap(block_group, offset); | ||
414 | info->bytes = 0; | 1325 | info->bytes = 0; |
415 | link_free_space(block_group, info); | 1326 | link_free_space(ctl, info); |
416 | block_group->total_bitmaps++; | 1327 | ctl->total_bitmaps++; |
417 | 1328 | ||
418 | recalculate_thresholds(block_group); | 1329 | ctl->op->recalc_thresholds(ctl); |
419 | } | 1330 | } |
420 | 1331 | ||
421 | static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, | 1332 | static void free_bitmap(struct btrfs_free_space_ctl *ctl, |
1333 | struct btrfs_free_space *bitmap_info) | ||
1334 | { | ||
1335 | unlink_free_space(ctl, bitmap_info); | ||
1336 | kfree(bitmap_info->bitmap); | ||
1337 | kmem_cache_free(btrfs_free_space_cachep, bitmap_info); | ||
1338 | ctl->total_bitmaps--; | ||
1339 | ctl->op->recalc_thresholds(ctl); | ||
1340 | } | ||
1341 | |||
1342 | static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, | ||
422 | struct btrfs_free_space *bitmap_info, | 1343 | struct btrfs_free_space *bitmap_info, |
423 | u64 *offset, u64 *bytes) | 1344 | u64 *offset, u64 *bytes) |
424 | { | 1345 | { |
@@ -427,8 +1348,7 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro | |||
427 | int ret; | 1348 | int ret; |
428 | 1349 | ||
429 | again: | 1350 | again: |
430 | end = bitmap_info->offset + | 1351 | end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; |
431 | (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; | ||
432 | 1352 | ||
433 | /* | 1353 | /* |
434 | * XXX - this can go away after a few releases. | 1354 | * XXX - this can go away after a few releases. |
@@ -442,29 +1362,23 @@ again: | |||
442 | */ | 1362 | */ |
443 | search_start = *offset; | 1363 | search_start = *offset; |
444 | search_bytes = *bytes; | 1364 | search_bytes = *bytes; |
445 | ret = search_bitmap(block_group, bitmap_info, &search_start, | 1365 | search_bytes = min(search_bytes, end - search_start + 1); |
446 | &search_bytes); | 1366 | ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); |
447 | BUG_ON(ret < 0 || search_start != *offset); | 1367 | BUG_ON(ret < 0 || search_start != *offset); |
448 | 1368 | ||
449 | if (*offset > bitmap_info->offset && *offset + *bytes > end) { | 1369 | if (*offset > bitmap_info->offset && *offset + *bytes > end) { |
450 | bitmap_clear_bits(block_group, bitmap_info, *offset, | 1370 | bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); |
451 | end - *offset + 1); | ||
452 | *bytes -= end - *offset + 1; | 1371 | *bytes -= end - *offset + 1; |
453 | *offset = end + 1; | 1372 | *offset = end + 1; |
454 | } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { | 1373 | } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { |
455 | bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); | 1374 | bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); |
456 | *bytes = 0; | 1375 | *bytes = 0; |
457 | } | 1376 | } |
458 | 1377 | ||
459 | if (*bytes) { | 1378 | if (*bytes) { |
460 | struct rb_node *next = rb_next(&bitmap_info->offset_index); | 1379 | struct rb_node *next = rb_next(&bitmap_info->offset_index); |
461 | if (!bitmap_info->bytes) { | 1380 | if (!bitmap_info->bytes) |
462 | unlink_free_space(block_group, bitmap_info); | 1381 | free_bitmap(ctl, bitmap_info); |
463 | kfree(bitmap_info->bitmap); | ||
464 | kfree(bitmap_info); | ||
465 | block_group->total_bitmaps--; | ||
466 | recalculate_thresholds(block_group); | ||
467 | } | ||
468 | 1382 | ||
469 | /* | 1383 | /* |
470 | * no entry after this bitmap, but we still have bytes to | 1384 | * no entry after this bitmap, but we still have bytes to |
@@ -491,38 +1405,59 @@ again: | |||
491 | */ | 1405 | */ |
492 | search_start = *offset; | 1406 | search_start = *offset; |
493 | search_bytes = *bytes; | 1407 | search_bytes = *bytes; |
494 | ret = search_bitmap(block_group, bitmap_info, &search_start, | 1408 | ret = search_bitmap(ctl, bitmap_info, &search_start, |
495 | &search_bytes); | 1409 | &search_bytes); |
496 | if (ret < 0 || search_start != *offset) | 1410 | if (ret < 0 || search_start != *offset) |
497 | return -EAGAIN; | 1411 | return -EAGAIN; |
498 | 1412 | ||
499 | goto again; | 1413 | goto again; |
500 | } else if (!bitmap_info->bytes) { | 1414 | } else if (!bitmap_info->bytes) |
501 | unlink_free_space(block_group, bitmap_info); | 1415 | free_bitmap(ctl, bitmap_info); |
502 | kfree(bitmap_info->bitmap); | ||
503 | kfree(bitmap_info); | ||
504 | block_group->total_bitmaps--; | ||
505 | recalculate_thresholds(block_group); | ||
506 | } | ||
507 | 1416 | ||
508 | return 0; | 1417 | return 0; |
509 | } | 1418 | } |
510 | 1419 | ||
511 | static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, | 1420 | static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, |
512 | struct btrfs_free_space *info) | 1421 | struct btrfs_free_space *info, u64 offset, |
1422 | u64 bytes) | ||
513 | { | 1423 | { |
514 | struct btrfs_free_space *bitmap_info; | 1424 | u64 bytes_to_set = 0; |
515 | int added = 0; | 1425 | u64 end; |
516 | u64 bytes, offset, end; | 1426 | |
517 | int ret; | 1427 | end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); |
1428 | |||
1429 | bytes_to_set = min(end - offset, bytes); | ||
1430 | |||
1431 | bitmap_set_bits(ctl, info, offset, bytes_to_set); | ||
1432 | |||
1433 | return bytes_to_set; | ||
1434 | |||
1435 | } | ||
1436 | |||
1437 | static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | ||
1438 | struct btrfs_free_space *info) | ||
1439 | { | ||
1440 | struct btrfs_block_group_cache *block_group = ctl->private; | ||
518 | 1441 | ||
519 | /* | 1442 | /* |
520 | * If we are below the extents threshold then we can add this as an | 1443 | * If we are below the extents threshold then we can add this as an |
521 | * extent, and don't have to deal with the bitmap | 1444 | * extent, and don't have to deal with the bitmap |
522 | */ | 1445 | */ |
523 | if (block_group->free_extents < block_group->extents_thresh && | 1446 | if (ctl->free_extents < ctl->extents_thresh) { |
524 | info->bytes > block_group->sectorsize * 4) | 1447 | /* |
525 | return 0; | 1448 | * If this block group has some small extents we don't want to |
1449 | * use up all of our free slots in the cache with them, we want | ||
1450 | * to reserve them to larger extents, however if we have plent | ||
1451 | * of cache left then go ahead an dadd them, no sense in adding | ||
1452 | * the overhead of a bitmap if we don't have to. | ||
1453 | */ | ||
1454 | if (info->bytes <= block_group->sectorsize * 4) { | ||
1455 | if (ctl->free_extents * 2 <= ctl->extents_thresh) | ||
1456 | return false; | ||
1457 | } else { | ||
1458 | return false; | ||
1459 | } | ||
1460 | } | ||
526 | 1461 | ||
527 | /* | 1462 | /* |
528 | * some block groups are so tiny they can't be enveloped by a bitmap, so | 1463 | * some block groups are so tiny they can't be enveloped by a bitmap, so |
@@ -530,35 +1465,85 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, | |||
530 | */ | 1465 | */ |
531 | if (BITS_PER_BITMAP * block_group->sectorsize > | 1466 | if (BITS_PER_BITMAP * block_group->sectorsize > |
532 | block_group->key.offset) | 1467 | block_group->key.offset) |
533 | return 0; | 1468 | return false; |
1469 | |||
1470 | return true; | ||
1471 | } | ||
1472 | |||
1473 | static struct btrfs_free_space_op free_space_op = { | ||
1474 | .recalc_thresholds = recalculate_thresholds, | ||
1475 | .use_bitmap = use_bitmap, | ||
1476 | }; | ||
1477 | |||
1478 | static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, | ||
1479 | struct btrfs_free_space *info) | ||
1480 | { | ||
1481 | struct btrfs_free_space *bitmap_info; | ||
1482 | struct btrfs_block_group_cache *block_group = NULL; | ||
1483 | int added = 0; | ||
1484 | u64 bytes, offset, bytes_added; | ||
1485 | int ret; | ||
534 | 1486 | ||
535 | bytes = info->bytes; | 1487 | bytes = info->bytes; |
536 | offset = info->offset; | 1488 | offset = info->offset; |
537 | 1489 | ||
1490 | if (!ctl->op->use_bitmap(ctl, info)) | ||
1491 | return 0; | ||
1492 | |||
1493 | if (ctl->op == &free_space_op) | ||
1494 | block_group = ctl->private; | ||
538 | again: | 1495 | again: |
539 | bitmap_info = tree_search_offset(block_group, | 1496 | /* |
540 | offset_to_bitmap(block_group, offset), | 1497 | * Since we link bitmaps right into the cluster we need to see if we |
1498 | * have a cluster here, and if so and it has our bitmap we need to add | ||
1499 | * the free space to that bitmap. | ||
1500 | */ | ||
1501 | if (block_group && !list_empty(&block_group->cluster_list)) { | ||
1502 | struct btrfs_free_cluster *cluster; | ||
1503 | struct rb_node *node; | ||
1504 | struct btrfs_free_space *entry; | ||
1505 | |||
1506 | cluster = list_entry(block_group->cluster_list.next, | ||
1507 | struct btrfs_free_cluster, | ||
1508 | block_group_list); | ||
1509 | spin_lock(&cluster->lock); | ||
1510 | node = rb_first(&cluster->root); | ||
1511 | if (!node) { | ||
1512 | spin_unlock(&cluster->lock); | ||
1513 | goto no_cluster_bitmap; | ||
1514 | } | ||
1515 | |||
1516 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
1517 | if (!entry->bitmap) { | ||
1518 | spin_unlock(&cluster->lock); | ||
1519 | goto no_cluster_bitmap; | ||
1520 | } | ||
1521 | |||
1522 | if (entry->offset == offset_to_bitmap(ctl, offset)) { | ||
1523 | bytes_added = add_bytes_to_bitmap(ctl, entry, | ||
1524 | offset, bytes); | ||
1525 | bytes -= bytes_added; | ||
1526 | offset += bytes_added; | ||
1527 | } | ||
1528 | spin_unlock(&cluster->lock); | ||
1529 | if (!bytes) { | ||
1530 | ret = 1; | ||
1531 | goto out; | ||
1532 | } | ||
1533 | } | ||
1534 | |||
1535 | no_cluster_bitmap: | ||
1536 | bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), | ||
541 | 1, 0); | 1537 | 1, 0); |
542 | if (!bitmap_info) { | 1538 | if (!bitmap_info) { |
543 | BUG_ON(added); | 1539 | BUG_ON(added); |
544 | goto new_bitmap; | 1540 | goto new_bitmap; |
545 | } | 1541 | } |
546 | 1542 | ||
547 | end = bitmap_info->offset + | 1543 | bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); |
548 | (u64)(BITS_PER_BITMAP * block_group->sectorsize); | 1544 | bytes -= bytes_added; |
549 | 1545 | offset += bytes_added; | |
550 | if (offset >= bitmap_info->offset && offset + bytes > end) { | 1546 | added = 0; |
551 | bitmap_set_bits(block_group, bitmap_info, offset, | ||
552 | end - offset); | ||
553 | bytes -= end - offset; | ||
554 | offset = end; | ||
555 | added = 0; | ||
556 | } else if (offset >= bitmap_info->offset && offset + bytes <= end) { | ||
557 | bitmap_set_bits(block_group, bitmap_info, offset, bytes); | ||
558 | bytes = 0; | ||
559 | } else { | ||
560 | BUG(); | ||
561 | } | ||
562 | 1547 | ||
563 | if (!bytes) { | 1548 | if (!bytes) { |
564 | ret = 1; | 1549 | ret = 1; |
@@ -568,19 +1553,19 @@ again: | |||
568 | 1553 | ||
569 | new_bitmap: | 1554 | new_bitmap: |
570 | if (info && info->bitmap) { | 1555 | if (info && info->bitmap) { |
571 | add_new_bitmap(block_group, info, offset); | 1556 | add_new_bitmap(ctl, info, offset); |
572 | added = 1; | 1557 | added = 1; |
573 | info = NULL; | 1558 | info = NULL; |
574 | goto again; | 1559 | goto again; |
575 | } else { | 1560 | } else { |
576 | spin_unlock(&block_group->tree_lock); | 1561 | spin_unlock(&ctl->tree_lock); |
577 | 1562 | ||
578 | /* no pre-allocated info, allocate a new one */ | 1563 | /* no pre-allocated info, allocate a new one */ |
579 | if (!info) { | 1564 | if (!info) { |
580 | info = kzalloc(sizeof(struct btrfs_free_space), | 1565 | info = kmem_cache_zalloc(btrfs_free_space_cachep, |
581 | GFP_NOFS); | 1566 | GFP_NOFS); |
582 | if (!info) { | 1567 | if (!info) { |
583 | spin_lock(&block_group->tree_lock); | 1568 | spin_lock(&ctl->tree_lock); |
584 | ret = -ENOMEM; | 1569 | ret = -ENOMEM; |
585 | goto out; | 1570 | goto out; |
586 | } | 1571 | } |
@@ -588,7 +1573,7 @@ new_bitmap: | |||
588 | 1573 | ||
589 | /* allocate the bitmap */ | 1574 | /* allocate the bitmap */ |
590 | info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 1575 | info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
591 | spin_lock(&block_group->tree_lock); | 1576 | spin_lock(&ctl->tree_lock); |
592 | if (!info->bitmap) { | 1577 | if (!info->bitmap) { |
593 | ret = -ENOMEM; | 1578 | ret = -ENOMEM; |
594 | goto out; | 1579 | goto out; |
@@ -600,77 +1585,94 @@ out: | |||
600 | if (info) { | 1585 | if (info) { |
601 | if (info->bitmap) | 1586 | if (info->bitmap) |
602 | kfree(info->bitmap); | 1587 | kfree(info->bitmap); |
603 | kfree(info); | 1588 | kmem_cache_free(btrfs_free_space_cachep, info); |
604 | } | 1589 | } |
605 | 1590 | ||
606 | return ret; | 1591 | return ret; |
607 | } | 1592 | } |
608 | 1593 | ||
609 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 1594 | static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, |
610 | u64 offset, u64 bytes) | 1595 | struct btrfs_free_space *info, bool update_stat) |
611 | { | 1596 | { |
612 | struct btrfs_free_space *right_info = NULL; | 1597 | struct btrfs_free_space *left_info; |
613 | struct btrfs_free_space *left_info = NULL; | 1598 | struct btrfs_free_space *right_info; |
614 | struct btrfs_free_space *info = NULL; | 1599 | bool merged = false; |
615 | int ret = 0; | 1600 | u64 offset = info->offset; |
616 | 1601 | u64 bytes = info->bytes; | |
617 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | ||
618 | if (!info) | ||
619 | return -ENOMEM; | ||
620 | |||
621 | info->offset = offset; | ||
622 | info->bytes = bytes; | ||
623 | |||
624 | spin_lock(&block_group->tree_lock); | ||
625 | 1602 | ||
626 | /* | 1603 | /* |
627 | * first we want to see if there is free space adjacent to the range we | 1604 | * first we want to see if there is free space adjacent to the range we |
628 | * are adding, if there is remove that struct and add a new one to | 1605 | * are adding, if there is remove that struct and add a new one to |
629 | * cover the entire range | 1606 | * cover the entire range |
630 | */ | 1607 | */ |
631 | right_info = tree_search_offset(block_group, offset + bytes, 0, 0); | 1608 | right_info = tree_search_offset(ctl, offset + bytes, 0, 0); |
632 | if (right_info && rb_prev(&right_info->offset_index)) | 1609 | if (right_info && rb_prev(&right_info->offset_index)) |
633 | left_info = rb_entry(rb_prev(&right_info->offset_index), | 1610 | left_info = rb_entry(rb_prev(&right_info->offset_index), |
634 | struct btrfs_free_space, offset_index); | 1611 | struct btrfs_free_space, offset_index); |
635 | else | 1612 | else |
636 | left_info = tree_search_offset(block_group, offset - 1, 0, 0); | 1613 | left_info = tree_search_offset(ctl, offset - 1, 0, 0); |
637 | |||
638 | /* | ||
639 | * If there was no extent directly to the left or right of this new | ||
640 | * extent then we know we're going to have to allocate a new extent, so | ||
641 | * before we do that see if we need to drop this into a bitmap | ||
642 | */ | ||
643 | if ((!left_info || left_info->bitmap) && | ||
644 | (!right_info || right_info->bitmap)) { | ||
645 | ret = insert_into_bitmap(block_group, info); | ||
646 | |||
647 | if (ret < 0) { | ||
648 | goto out; | ||
649 | } else if (ret) { | ||
650 | ret = 0; | ||
651 | goto out; | ||
652 | } | ||
653 | } | ||
654 | 1614 | ||
655 | if (right_info && !right_info->bitmap) { | 1615 | if (right_info && !right_info->bitmap) { |
656 | unlink_free_space(block_group, right_info); | 1616 | if (update_stat) |
1617 | unlink_free_space(ctl, right_info); | ||
1618 | else | ||
1619 | __unlink_free_space(ctl, right_info); | ||
657 | info->bytes += right_info->bytes; | 1620 | info->bytes += right_info->bytes; |
658 | kfree(right_info); | 1621 | kmem_cache_free(btrfs_free_space_cachep, right_info); |
1622 | merged = true; | ||
659 | } | 1623 | } |
660 | 1624 | ||
661 | if (left_info && !left_info->bitmap && | 1625 | if (left_info && !left_info->bitmap && |
662 | left_info->offset + left_info->bytes == offset) { | 1626 | left_info->offset + left_info->bytes == offset) { |
663 | unlink_free_space(block_group, left_info); | 1627 | if (update_stat) |
1628 | unlink_free_space(ctl, left_info); | ||
1629 | else | ||
1630 | __unlink_free_space(ctl, left_info); | ||
664 | info->offset = left_info->offset; | 1631 | info->offset = left_info->offset; |
665 | info->bytes += left_info->bytes; | 1632 | info->bytes += left_info->bytes; |
666 | kfree(left_info); | 1633 | kmem_cache_free(btrfs_free_space_cachep, left_info); |
1634 | merged = true; | ||
667 | } | 1635 | } |
668 | 1636 | ||
669 | ret = link_free_space(block_group, info); | 1637 | return merged; |
1638 | } | ||
1639 | |||
1640 | int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, | ||
1641 | u64 offset, u64 bytes) | ||
1642 | { | ||
1643 | struct btrfs_free_space *info; | ||
1644 | int ret = 0; | ||
1645 | |||
1646 | info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); | ||
1647 | if (!info) | ||
1648 | return -ENOMEM; | ||
1649 | |||
1650 | info->offset = offset; | ||
1651 | info->bytes = bytes; | ||
1652 | |||
1653 | spin_lock(&ctl->tree_lock); | ||
1654 | |||
1655 | if (try_merge_free_space(ctl, info, true)) | ||
1656 | goto link; | ||
1657 | |||
1658 | /* | ||
1659 | * There was no extent directly to the left or right of this new | ||
1660 | * extent then we know we're going to have to allocate a new extent, so | ||
1661 | * before we do that see if we need to drop this into a bitmap | ||
1662 | */ | ||
1663 | ret = insert_into_bitmap(ctl, info); | ||
1664 | if (ret < 0) { | ||
1665 | goto out; | ||
1666 | } else if (ret) { | ||
1667 | ret = 0; | ||
1668 | goto out; | ||
1669 | } | ||
1670 | link: | ||
1671 | ret = link_free_space(ctl, info); | ||
670 | if (ret) | 1672 | if (ret) |
671 | kfree(info); | 1673 | kmem_cache_free(btrfs_free_space_cachep, info); |
672 | out: | 1674 | out: |
673 | spin_unlock(&block_group->tree_lock); | 1675 | spin_unlock(&ctl->tree_lock); |
674 | 1676 | ||
675 | if (ret) { | 1677 | if (ret) { |
676 | printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); | 1678 | printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); |
@@ -683,21 +1685,21 @@ out: | |||
683 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | 1685 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
684 | u64 offset, u64 bytes) | 1686 | u64 offset, u64 bytes) |
685 | { | 1687 | { |
1688 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
686 | struct btrfs_free_space *info; | 1689 | struct btrfs_free_space *info; |
687 | struct btrfs_free_space *next_info = NULL; | 1690 | struct btrfs_free_space *next_info = NULL; |
688 | int ret = 0; | 1691 | int ret = 0; |
689 | 1692 | ||
690 | spin_lock(&block_group->tree_lock); | 1693 | spin_lock(&ctl->tree_lock); |
691 | 1694 | ||
692 | again: | 1695 | again: |
693 | info = tree_search_offset(block_group, offset, 0, 0); | 1696 | info = tree_search_offset(ctl, offset, 0, 0); |
694 | if (!info) { | 1697 | if (!info) { |
695 | /* | 1698 | /* |
696 | * oops didn't find an extent that matched the space we wanted | 1699 | * oops didn't find an extent that matched the space we wanted |
697 | * to remove, look for a bitmap instead | 1700 | * to remove, look for a bitmap instead |
698 | */ | 1701 | */ |
699 | info = tree_search_offset(block_group, | 1702 | info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), |
700 | offset_to_bitmap(block_group, offset), | ||
701 | 1, 0); | 1703 | 1, 0); |
702 | if (!info) { | 1704 | if (!info) { |
703 | WARN_ON(1); | 1705 | WARN_ON(1); |
@@ -712,8 +1714,8 @@ again: | |||
712 | offset_index); | 1714 | offset_index); |
713 | 1715 | ||
714 | if (next_info->bitmap) | 1716 | if (next_info->bitmap) |
715 | end = next_info->offset + BITS_PER_BITMAP * | 1717 | end = next_info->offset + |
716 | block_group->sectorsize - 1; | 1718 | BITS_PER_BITMAP * ctl->unit - 1; |
717 | else | 1719 | else |
718 | end = next_info->offset + next_info->bytes; | 1720 | end = next_info->offset + next_info->bytes; |
719 | 1721 | ||
@@ -733,20 +1735,20 @@ again: | |||
733 | } | 1735 | } |
734 | 1736 | ||
735 | if (info->bytes == bytes) { | 1737 | if (info->bytes == bytes) { |
736 | unlink_free_space(block_group, info); | 1738 | unlink_free_space(ctl, info); |
737 | if (info->bitmap) { | 1739 | if (info->bitmap) { |
738 | kfree(info->bitmap); | 1740 | kfree(info->bitmap); |
739 | block_group->total_bitmaps--; | 1741 | ctl->total_bitmaps--; |
740 | } | 1742 | } |
741 | kfree(info); | 1743 | kmem_cache_free(btrfs_free_space_cachep, info); |
742 | goto out_lock; | 1744 | goto out_lock; |
743 | } | 1745 | } |
744 | 1746 | ||
745 | if (!info->bitmap && info->offset == offset) { | 1747 | if (!info->bitmap && info->offset == offset) { |
746 | unlink_free_space(block_group, info); | 1748 | unlink_free_space(ctl, info); |
747 | info->offset += bytes; | 1749 | info->offset += bytes; |
748 | info->bytes -= bytes; | 1750 | info->bytes -= bytes; |
749 | link_free_space(block_group, info); | 1751 | link_free_space(ctl, info); |
750 | goto out_lock; | 1752 | goto out_lock; |
751 | } | 1753 | } |
752 | 1754 | ||
@@ -760,13 +1762,13 @@ again: | |||
760 | * first unlink the old info and then | 1762 | * first unlink the old info and then |
761 | * insert it again after the hole we're creating | 1763 | * insert it again after the hole we're creating |
762 | */ | 1764 | */ |
763 | unlink_free_space(block_group, info); | 1765 | unlink_free_space(ctl, info); |
764 | if (offset + bytes < info->offset + info->bytes) { | 1766 | if (offset + bytes < info->offset + info->bytes) { |
765 | u64 old_end = info->offset + info->bytes; | 1767 | u64 old_end = info->offset + info->bytes; |
766 | 1768 | ||
767 | info->offset = offset + bytes; | 1769 | info->offset = offset + bytes; |
768 | info->bytes = old_end - info->offset; | 1770 | info->bytes = old_end - info->offset; |
769 | ret = link_free_space(block_group, info); | 1771 | ret = link_free_space(ctl, info); |
770 | WARN_ON(ret); | 1772 | WARN_ON(ret); |
771 | if (ret) | 1773 | if (ret) |
772 | goto out_lock; | 1774 | goto out_lock; |
@@ -774,9 +1776,9 @@ again: | |||
774 | /* the hole we're creating ends at the end | 1776 | /* the hole we're creating ends at the end |
775 | * of the info struct, just free the info | 1777 | * of the info struct, just free the info |
776 | */ | 1778 | */ |
777 | kfree(info); | 1779 | kmem_cache_free(btrfs_free_space_cachep, info); |
778 | } | 1780 | } |
779 | spin_unlock(&block_group->tree_lock); | 1781 | spin_unlock(&ctl->tree_lock); |
780 | 1782 | ||
781 | /* step two, insert a new info struct to cover | 1783 | /* step two, insert a new info struct to cover |
782 | * anything before the hole | 1784 | * anything before the hole |
@@ -787,12 +1789,12 @@ again: | |||
787 | goto out; | 1789 | goto out; |
788 | } | 1790 | } |
789 | 1791 | ||
790 | ret = remove_from_bitmap(block_group, info, &offset, &bytes); | 1792 | ret = remove_from_bitmap(ctl, info, &offset, &bytes); |
791 | if (ret == -EAGAIN) | 1793 | if (ret == -EAGAIN) |
792 | goto again; | 1794 | goto again; |
793 | BUG_ON(ret); | 1795 | BUG_ON(ret); |
794 | out_lock: | 1796 | out_lock: |
795 | spin_unlock(&block_group->tree_lock); | 1797 | spin_unlock(&ctl->tree_lock); |
796 | out: | 1798 | out: |
797 | return ret; | 1799 | return ret; |
798 | } | 1800 | } |
@@ -800,11 +1802,12 @@ out: | |||
800 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | 1802 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, |
801 | u64 bytes) | 1803 | u64 bytes) |
802 | { | 1804 | { |
1805 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
803 | struct btrfs_free_space *info; | 1806 | struct btrfs_free_space *info; |
804 | struct rb_node *n; | 1807 | struct rb_node *n; |
805 | int count = 0; | 1808 | int count = 0; |
806 | 1809 | ||
807 | for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { | 1810 | for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { |
808 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 1811 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
809 | if (info->bytes >= bytes) | 1812 | if (info->bytes >= bytes) |
810 | count++; | 1813 | count++; |
@@ -819,19 +1822,23 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
819 | "\n", count); | 1822 | "\n", count); |
820 | } | 1823 | } |
821 | 1824 | ||
822 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) | 1825 | void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) |
823 | { | 1826 | { |
824 | struct btrfs_free_space *info; | 1827 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
825 | struct rb_node *n; | ||
826 | u64 ret = 0; | ||
827 | 1828 | ||
828 | for (n = rb_first(&block_group->free_space_offset); n; | 1829 | spin_lock_init(&ctl->tree_lock); |
829 | n = rb_next(n)) { | 1830 | ctl->unit = block_group->sectorsize; |
830 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 1831 | ctl->start = block_group->key.objectid; |
831 | ret += info->bytes; | 1832 | ctl->private = block_group; |
832 | } | 1833 | ctl->op = &free_space_op; |
833 | 1834 | ||
834 | return ret; | 1835 | /* |
1836 | * we only want to have 32k of ram per block group for keeping | ||
1837 | * track of free space, and if we pass 1/2 of that we want to | ||
1838 | * start converting things over to using bitmaps | ||
1839 | */ | ||
1840 | ctl->extents_thresh = ((1024 * 32) / 2) / | ||
1841 | sizeof(struct btrfs_free_space); | ||
835 | } | 1842 | } |
836 | 1843 | ||
837 | /* | 1844 | /* |
@@ -845,31 +1852,31 @@ __btrfs_return_cluster_to_free_space( | |||
845 | struct btrfs_block_group_cache *block_group, | 1852 | struct btrfs_block_group_cache *block_group, |
846 | struct btrfs_free_cluster *cluster) | 1853 | struct btrfs_free_cluster *cluster) |
847 | { | 1854 | { |
1855 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
848 | struct btrfs_free_space *entry; | 1856 | struct btrfs_free_space *entry; |
849 | struct rb_node *node; | 1857 | struct rb_node *node; |
850 | bool bitmap; | ||
851 | 1858 | ||
852 | spin_lock(&cluster->lock); | 1859 | spin_lock(&cluster->lock); |
853 | if (cluster->block_group != block_group) | 1860 | if (cluster->block_group != block_group) |
854 | goto out; | 1861 | goto out; |
855 | 1862 | ||
856 | bitmap = cluster->points_to_bitmap; | ||
857 | cluster->block_group = NULL; | 1863 | cluster->block_group = NULL; |
858 | cluster->window_start = 0; | 1864 | cluster->window_start = 0; |
859 | list_del_init(&cluster->block_group_list); | 1865 | list_del_init(&cluster->block_group_list); |
860 | cluster->points_to_bitmap = false; | ||
861 | |||
862 | if (bitmap) | ||
863 | goto out; | ||
864 | 1866 | ||
865 | node = rb_first(&cluster->root); | 1867 | node = rb_first(&cluster->root); |
866 | while (node) { | 1868 | while (node) { |
1869 | bool bitmap; | ||
1870 | |||
867 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1871 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
868 | node = rb_next(&entry->offset_index); | 1872 | node = rb_next(&entry->offset_index); |
869 | rb_erase(&entry->offset_index, &cluster->root); | 1873 | rb_erase(&entry->offset_index, &cluster->root); |
870 | BUG_ON(entry->bitmap); | 1874 | |
871 | tree_insert_offset(&block_group->free_space_offset, | 1875 | bitmap = (entry->bitmap != NULL); |
872 | entry->offset, &entry->offset_index, 0); | 1876 | if (!bitmap) |
1877 | try_merge_free_space(ctl, entry, false); | ||
1878 | tree_insert_offset(&ctl->free_space_offset, | ||
1879 | entry->offset, &entry->offset_index, bitmap); | ||
873 | } | 1880 | } |
874 | cluster->root = RB_ROOT; | 1881 | cluster->root = RB_ROOT; |
875 | 1882 | ||
@@ -879,14 +1886,41 @@ out: | |||
879 | return 0; | 1886 | return 0; |
880 | } | 1887 | } |
881 | 1888 | ||
882 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | 1889 | void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) |
883 | { | 1890 | { |
884 | struct btrfs_free_space *info; | 1891 | struct btrfs_free_space *info; |
885 | struct rb_node *node; | 1892 | struct rb_node *node; |
1893 | |||
1894 | while ((node = rb_last(&ctl->free_space_offset)) != NULL) { | ||
1895 | info = rb_entry(node, struct btrfs_free_space, offset_index); | ||
1896 | if (!info->bitmap) { | ||
1897 | unlink_free_space(ctl, info); | ||
1898 | kmem_cache_free(btrfs_free_space_cachep, info); | ||
1899 | } else { | ||
1900 | free_bitmap(ctl, info); | ||
1901 | } | ||
1902 | if (need_resched()) { | ||
1903 | spin_unlock(&ctl->tree_lock); | ||
1904 | cond_resched(); | ||
1905 | spin_lock(&ctl->tree_lock); | ||
1906 | } | ||
1907 | } | ||
1908 | } | ||
1909 | |||
1910 | void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) | ||
1911 | { | ||
1912 | spin_lock(&ctl->tree_lock); | ||
1913 | __btrfs_remove_free_space_cache_locked(ctl); | ||
1914 | spin_unlock(&ctl->tree_lock); | ||
1915 | } | ||
1916 | |||
1917 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | ||
1918 | { | ||
1919 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
886 | struct btrfs_free_cluster *cluster; | 1920 | struct btrfs_free_cluster *cluster; |
887 | struct list_head *head; | 1921 | struct list_head *head; |
888 | 1922 | ||
889 | spin_lock(&block_group->tree_lock); | 1923 | spin_lock(&ctl->tree_lock); |
890 | while ((head = block_group->cluster_list.next) != | 1924 | while ((head = block_group->cluster_list.next) != |
891 | &block_group->cluster_list) { | 1925 | &block_group->cluster_list) { |
892 | cluster = list_entry(head, struct btrfs_free_cluster, | 1926 | cluster = list_entry(head, struct btrfs_free_cluster, |
@@ -895,62 +1929,46 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
895 | WARN_ON(cluster->block_group != block_group); | 1929 | WARN_ON(cluster->block_group != block_group); |
896 | __btrfs_return_cluster_to_free_space(block_group, cluster); | 1930 | __btrfs_return_cluster_to_free_space(block_group, cluster); |
897 | if (need_resched()) { | 1931 | if (need_resched()) { |
898 | spin_unlock(&block_group->tree_lock); | 1932 | spin_unlock(&ctl->tree_lock); |
899 | cond_resched(); | ||
900 | spin_lock(&block_group->tree_lock); | ||
901 | } | ||
902 | } | ||
903 | |||
904 | while ((node = rb_last(&block_group->free_space_offset)) != NULL) { | ||
905 | info = rb_entry(node, struct btrfs_free_space, offset_index); | ||
906 | unlink_free_space(block_group, info); | ||
907 | if (info->bitmap) | ||
908 | kfree(info->bitmap); | ||
909 | kfree(info); | ||
910 | if (need_resched()) { | ||
911 | spin_unlock(&block_group->tree_lock); | ||
912 | cond_resched(); | 1933 | cond_resched(); |
913 | spin_lock(&block_group->tree_lock); | 1934 | spin_lock(&ctl->tree_lock); |
914 | } | 1935 | } |
915 | } | 1936 | } |
1937 | __btrfs_remove_free_space_cache_locked(ctl); | ||
1938 | spin_unlock(&ctl->tree_lock); | ||
916 | 1939 | ||
917 | spin_unlock(&block_group->tree_lock); | ||
918 | } | 1940 | } |
919 | 1941 | ||
920 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | 1942 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, |
921 | u64 offset, u64 bytes, u64 empty_size) | 1943 | u64 offset, u64 bytes, u64 empty_size) |
922 | { | 1944 | { |
1945 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
923 | struct btrfs_free_space *entry = NULL; | 1946 | struct btrfs_free_space *entry = NULL; |
924 | u64 bytes_search = bytes + empty_size; | 1947 | u64 bytes_search = bytes + empty_size; |
925 | u64 ret = 0; | 1948 | u64 ret = 0; |
926 | 1949 | ||
927 | spin_lock(&block_group->tree_lock); | 1950 | spin_lock(&ctl->tree_lock); |
928 | entry = find_free_space(block_group, &offset, &bytes_search, 0); | 1951 | entry = find_free_space(ctl, &offset, &bytes_search); |
929 | if (!entry) | 1952 | if (!entry) |
930 | goto out; | 1953 | goto out; |
931 | 1954 | ||
932 | ret = offset; | 1955 | ret = offset; |
933 | if (entry->bitmap) { | 1956 | if (entry->bitmap) { |
934 | bitmap_clear_bits(block_group, entry, offset, bytes); | 1957 | bitmap_clear_bits(ctl, entry, offset, bytes); |
935 | if (!entry->bytes) { | 1958 | if (!entry->bytes) |
936 | unlink_free_space(block_group, entry); | 1959 | free_bitmap(ctl, entry); |
937 | kfree(entry->bitmap); | ||
938 | kfree(entry); | ||
939 | block_group->total_bitmaps--; | ||
940 | recalculate_thresholds(block_group); | ||
941 | } | ||
942 | } else { | 1960 | } else { |
943 | unlink_free_space(block_group, entry); | 1961 | unlink_free_space(ctl, entry); |
944 | entry->offset += bytes; | 1962 | entry->offset += bytes; |
945 | entry->bytes -= bytes; | 1963 | entry->bytes -= bytes; |
946 | if (!entry->bytes) | 1964 | if (!entry->bytes) |
947 | kfree(entry); | 1965 | kmem_cache_free(btrfs_free_space_cachep, entry); |
948 | else | 1966 | else |
949 | link_free_space(block_group, entry); | 1967 | link_free_space(ctl, entry); |
950 | } | 1968 | } |
951 | 1969 | ||
952 | out: | 1970 | out: |
953 | spin_unlock(&block_group->tree_lock); | 1971 | spin_unlock(&ctl->tree_lock); |
954 | 1972 | ||
955 | return ret; | 1973 | return ret; |
956 | } | 1974 | } |
@@ -967,6 +1985,7 @@ int btrfs_return_cluster_to_free_space( | |||
967 | struct btrfs_block_group_cache *block_group, | 1985 | struct btrfs_block_group_cache *block_group, |
968 | struct btrfs_free_cluster *cluster) | 1986 | struct btrfs_free_cluster *cluster) |
969 | { | 1987 | { |
1988 | struct btrfs_free_space_ctl *ctl; | ||
970 | int ret; | 1989 | int ret; |
971 | 1990 | ||
972 | /* first, get a safe pointer to the block group */ | 1991 | /* first, get a safe pointer to the block group */ |
@@ -985,10 +2004,12 @@ int btrfs_return_cluster_to_free_space( | |||
985 | atomic_inc(&block_group->count); | 2004 | atomic_inc(&block_group->count); |
986 | spin_unlock(&cluster->lock); | 2005 | spin_unlock(&cluster->lock); |
987 | 2006 | ||
2007 | ctl = block_group->free_space_ctl; | ||
2008 | |||
988 | /* now return any extents the cluster had on it */ | 2009 | /* now return any extents the cluster had on it */ |
989 | spin_lock(&block_group->tree_lock); | 2010 | spin_lock(&ctl->tree_lock); |
990 | ret = __btrfs_return_cluster_to_free_space(block_group, cluster); | 2011 | ret = __btrfs_return_cluster_to_free_space(block_group, cluster); |
991 | spin_unlock(&block_group->tree_lock); | 2012 | spin_unlock(&ctl->tree_lock); |
992 | 2013 | ||
993 | /* finally drop our ref */ | 2014 | /* finally drop our ref */ |
994 | btrfs_put_block_group(block_group); | 2015 | btrfs_put_block_group(block_group); |
@@ -997,48 +2018,24 @@ int btrfs_return_cluster_to_free_space( | |||
997 | 2018 | ||
998 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | 2019 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, |
999 | struct btrfs_free_cluster *cluster, | 2020 | struct btrfs_free_cluster *cluster, |
2021 | struct btrfs_free_space *entry, | ||
1000 | u64 bytes, u64 min_start) | 2022 | u64 bytes, u64 min_start) |
1001 | { | 2023 | { |
1002 | struct btrfs_free_space *entry; | 2024 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
1003 | int err; | 2025 | int err; |
1004 | u64 search_start = cluster->window_start; | 2026 | u64 search_start = cluster->window_start; |
1005 | u64 search_bytes = bytes; | 2027 | u64 search_bytes = bytes; |
1006 | u64 ret = 0; | 2028 | u64 ret = 0; |
1007 | 2029 | ||
1008 | spin_lock(&block_group->tree_lock); | ||
1009 | spin_lock(&cluster->lock); | ||
1010 | |||
1011 | if (!cluster->points_to_bitmap) | ||
1012 | goto out; | ||
1013 | |||
1014 | if (cluster->block_group != block_group) | ||
1015 | goto out; | ||
1016 | |||
1017 | /* | ||
1018 | * search_start is the beginning of the bitmap, but at some point it may | ||
1019 | * be a good idea to point to the actual start of the free area in the | ||
1020 | * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only | ||
1021 | * to 1 to make sure we get the bitmap entry | ||
1022 | */ | ||
1023 | entry = tree_search_offset(block_group, | ||
1024 | offset_to_bitmap(block_group, search_start), | ||
1025 | 1, 0); | ||
1026 | if (!entry || !entry->bitmap) | ||
1027 | goto out; | ||
1028 | |||
1029 | search_start = min_start; | 2030 | search_start = min_start; |
1030 | search_bytes = bytes; | 2031 | search_bytes = bytes; |
1031 | 2032 | ||
1032 | err = search_bitmap(block_group, entry, &search_start, | 2033 | err = search_bitmap(ctl, entry, &search_start, &search_bytes); |
1033 | &search_bytes); | ||
1034 | if (err) | 2034 | if (err) |
1035 | goto out; | 2035 | return 0; |
1036 | 2036 | ||
1037 | ret = search_start; | 2037 | ret = search_start; |
1038 | bitmap_clear_bits(block_group, entry, ret, bytes); | 2038 | bitmap_clear_bits(ctl, entry, ret, bytes); |
1039 | out: | ||
1040 | spin_unlock(&cluster->lock); | ||
1041 | spin_unlock(&block_group->tree_lock); | ||
1042 | 2039 | ||
1043 | return ret; | 2040 | return ret; |
1044 | } | 2041 | } |
@@ -1052,14 +2049,11 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1052 | struct btrfs_free_cluster *cluster, u64 bytes, | 2049 | struct btrfs_free_cluster *cluster, u64 bytes, |
1053 | u64 min_start) | 2050 | u64 min_start) |
1054 | { | 2051 | { |
2052 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
1055 | struct btrfs_free_space *entry = NULL; | 2053 | struct btrfs_free_space *entry = NULL; |
1056 | struct rb_node *node; | 2054 | struct rb_node *node; |
1057 | u64 ret = 0; | 2055 | u64 ret = 0; |
1058 | 2056 | ||
1059 | if (cluster->points_to_bitmap) | ||
1060 | return btrfs_alloc_from_bitmap(block_group, cluster, bytes, | ||
1061 | min_start); | ||
1062 | |||
1063 | spin_lock(&cluster->lock); | 2057 | spin_lock(&cluster->lock); |
1064 | if (bytes > cluster->max_size) | 2058 | if (bytes > cluster->max_size) |
1065 | goto out; | 2059 | goto out; |
@@ -1072,11 +2066,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1072 | goto out; | 2066 | goto out; |
1073 | 2067 | ||
1074 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2068 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1075 | |||
1076 | while(1) { | 2069 | while(1) { |
1077 | if (entry->bytes < bytes || entry->offset < min_start) { | 2070 | if (entry->bytes < bytes || |
1078 | struct rb_node *node; | 2071 | (!entry->bitmap && entry->offset < min_start)) { |
1079 | |||
1080 | node = rb_next(&entry->offset_index); | 2072 | node = rb_next(&entry->offset_index); |
1081 | if (!node) | 2073 | if (!node) |
1082 | break; | 2074 | break; |
@@ -1084,20 +2076,52 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1084 | offset_index); | 2076 | offset_index); |
1085 | continue; | 2077 | continue; |
1086 | } | 2078 | } |
1087 | ret = entry->offset; | ||
1088 | 2079 | ||
1089 | entry->offset += bytes; | 2080 | if (entry->bitmap) { |
1090 | entry->bytes -= bytes; | 2081 | ret = btrfs_alloc_from_bitmap(block_group, |
2082 | cluster, entry, bytes, | ||
2083 | min_start); | ||
2084 | if (ret == 0) { | ||
2085 | node = rb_next(&entry->offset_index); | ||
2086 | if (!node) | ||
2087 | break; | ||
2088 | entry = rb_entry(node, struct btrfs_free_space, | ||
2089 | offset_index); | ||
2090 | continue; | ||
2091 | } | ||
2092 | } else { | ||
1091 | 2093 | ||
1092 | if (entry->bytes == 0) { | 2094 | ret = entry->offset; |
1093 | rb_erase(&entry->offset_index, &cluster->root); | 2095 | |
1094 | kfree(entry); | 2096 | entry->offset += bytes; |
2097 | entry->bytes -= bytes; | ||
1095 | } | 2098 | } |
2099 | |||
2100 | if (entry->bytes == 0) | ||
2101 | rb_erase(&entry->offset_index, &cluster->root); | ||
1096 | break; | 2102 | break; |
1097 | } | 2103 | } |
1098 | out: | 2104 | out: |
1099 | spin_unlock(&cluster->lock); | 2105 | spin_unlock(&cluster->lock); |
1100 | 2106 | ||
2107 | if (!ret) | ||
2108 | return 0; | ||
2109 | |||
2110 | spin_lock(&ctl->tree_lock); | ||
2111 | |||
2112 | ctl->free_space -= bytes; | ||
2113 | if (entry->bytes == 0) { | ||
2114 | ctl->free_extents--; | ||
2115 | if (entry->bitmap) { | ||
2116 | kfree(entry->bitmap); | ||
2117 | ctl->total_bitmaps--; | ||
2118 | ctl->op->recalc_thresholds(ctl); | ||
2119 | } | ||
2120 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
2121 | } | ||
2122 | |||
2123 | spin_unlock(&ctl->tree_lock); | ||
2124 | |||
1101 | return ret; | 2125 | return ret; |
1102 | } | 2126 | } |
1103 | 2127 | ||
@@ -1106,6 +2130,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
1106 | struct btrfs_free_cluster *cluster, | 2130 | struct btrfs_free_cluster *cluster, |
1107 | u64 offset, u64 bytes, u64 min_bytes) | 2131 | u64 offset, u64 bytes, u64 min_bytes) |
1108 | { | 2132 | { |
2133 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
1109 | unsigned long next_zero; | 2134 | unsigned long next_zero; |
1110 | unsigned long i; | 2135 | unsigned long i; |
1111 | unsigned long search_bits; | 2136 | unsigned long search_bits; |
@@ -1113,12 +2138,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
1113 | unsigned long found_bits; | 2138 | unsigned long found_bits; |
1114 | unsigned long start = 0; | 2139 | unsigned long start = 0; |
1115 | unsigned long total_found = 0; | 2140 | unsigned long total_found = 0; |
2141 | int ret; | ||
1116 | bool found = false; | 2142 | bool found = false; |
1117 | 2143 | ||
1118 | i = offset_to_bit(entry->offset, block_group->sectorsize, | 2144 | i = offset_to_bit(entry->offset, block_group->sectorsize, |
1119 | max_t(u64, offset, entry->offset)); | 2145 | max_t(u64, offset, entry->offset)); |
1120 | search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | 2146 | search_bits = bytes_to_bits(bytes, block_group->sectorsize); |
1121 | total_bits = bytes_to_bits(bytes, block_group->sectorsize); | 2147 | total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); |
1122 | 2148 | ||
1123 | again: | 2149 | again: |
1124 | found_bits = 0; | 2150 | found_bits = 0; |
@@ -1135,7 +2161,7 @@ again: | |||
1135 | } | 2161 | } |
1136 | 2162 | ||
1137 | if (!found_bits) | 2163 | if (!found_bits) |
1138 | return -1; | 2164 | return -ENOSPC; |
1139 | 2165 | ||
1140 | if (!found) { | 2166 | if (!found) { |
1141 | start = i; | 2167 | start = i; |
@@ -1159,131 +2185,67 @@ again: | |||
1159 | 2185 | ||
1160 | cluster->window_start = start * block_group->sectorsize + | 2186 | cluster->window_start = start * block_group->sectorsize + |
1161 | entry->offset; | 2187 | entry->offset; |
1162 | cluster->points_to_bitmap = true; | 2188 | rb_erase(&entry->offset_index, &ctl->free_space_offset); |
2189 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
2190 | &entry->offset_index, 1); | ||
2191 | BUG_ON(ret); | ||
1163 | 2192 | ||
1164 | return 0; | 2193 | return 0; |
1165 | } | 2194 | } |
1166 | 2195 | ||
1167 | /* | 2196 | /* |
1168 | * here we try to find a cluster of blocks in a block group. The goal | 2197 | * This searches the block group for just extents to fill the cluster with. |
1169 | * is to find at least bytes free and up to empty_size + bytes free. | ||
1170 | * We might not find them all in one contiguous area. | ||
1171 | * | ||
1172 | * returns zero and sets up cluster if things worked out, otherwise | ||
1173 | * it returns -enospc | ||
1174 | */ | 2198 | */ |
1175 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | 2199 | static noinline int |
1176 | struct btrfs_root *root, | 2200 | setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, |
1177 | struct btrfs_block_group_cache *block_group, | 2201 | struct btrfs_free_cluster *cluster, |
1178 | struct btrfs_free_cluster *cluster, | 2202 | struct list_head *bitmaps, u64 offset, u64 bytes, |
1179 | u64 offset, u64 bytes, u64 empty_size) | 2203 | u64 min_bytes) |
1180 | { | 2204 | { |
2205 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2206 | struct btrfs_free_space *first = NULL; | ||
1181 | struct btrfs_free_space *entry = NULL; | 2207 | struct btrfs_free_space *entry = NULL; |
2208 | struct btrfs_free_space *prev = NULL; | ||
2209 | struct btrfs_free_space *last; | ||
1182 | struct rb_node *node; | 2210 | struct rb_node *node; |
1183 | struct btrfs_free_space *next; | ||
1184 | struct btrfs_free_space *last = NULL; | ||
1185 | u64 min_bytes; | ||
1186 | u64 window_start; | 2211 | u64 window_start; |
1187 | u64 window_free; | 2212 | u64 window_free; |
1188 | u64 max_extent = 0; | 2213 | u64 max_extent; |
1189 | bool found_bitmap = false; | 2214 | u64 max_gap = 128 * 1024; |
1190 | int ret; | ||
1191 | 2215 | ||
1192 | /* for metadata, allow allocates with more holes */ | 2216 | entry = tree_search_offset(ctl, offset, 0, 1); |
1193 | if (btrfs_test_opt(root, SSD_SPREAD)) { | 2217 | if (!entry) |
1194 | min_bytes = bytes + empty_size; | 2218 | return -ENOSPC; |
1195 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
1196 | /* | ||
1197 | * we want to do larger allocations when we are | ||
1198 | * flushing out the delayed refs, it helps prevent | ||
1199 | * making more work as we go along. | ||
1200 | */ | ||
1201 | if (trans->transaction->delayed_refs.flushing) | ||
1202 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
1203 | else | ||
1204 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
1205 | } else | ||
1206 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
1207 | |||
1208 | spin_lock(&block_group->tree_lock); | ||
1209 | spin_lock(&cluster->lock); | ||
1210 | |||
1211 | /* someone already found a cluster, hooray */ | ||
1212 | if (cluster->block_group) { | ||
1213 | ret = 0; | ||
1214 | goto out; | ||
1215 | } | ||
1216 | again: | ||
1217 | entry = tree_search_offset(block_group, offset, found_bitmap, 1); | ||
1218 | if (!entry) { | ||
1219 | ret = -ENOSPC; | ||
1220 | goto out; | ||
1221 | } | ||
1222 | 2219 | ||
1223 | /* | 2220 | /* |
1224 | * If found_bitmap is true, we exhausted our search for extent entries, | 2221 | * We don't want bitmaps, so just move along until we find a normal |
1225 | * and we just want to search all of the bitmaps that we can find, and | 2222 | * extent entry. |
1226 | * ignore any extent entries we find. | ||
1227 | */ | 2223 | */ |
1228 | while (entry->bitmap || found_bitmap || | 2224 | while (entry->bitmap) { |
1229 | (!entry->bitmap && entry->bytes < min_bytes)) { | 2225 | if (list_empty(&entry->list)) |
1230 | struct rb_node *node = rb_next(&entry->offset_index); | 2226 | list_add_tail(&entry->list, bitmaps); |
1231 | 2227 | node = rb_next(&entry->offset_index); | |
1232 | if (entry->bitmap && entry->bytes > bytes + empty_size) { | 2228 | if (!node) |
1233 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, | 2229 | return -ENOSPC; |
1234 | offset, bytes + empty_size, | ||
1235 | min_bytes); | ||
1236 | if (!ret) | ||
1237 | goto got_it; | ||
1238 | } | ||
1239 | |||
1240 | if (!node) { | ||
1241 | ret = -ENOSPC; | ||
1242 | goto out; | ||
1243 | } | ||
1244 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2230 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1245 | } | 2231 | } |
1246 | 2232 | ||
1247 | /* | ||
1248 | * We already searched all the extent entries from the passed in offset | ||
1249 | * to the end and didn't find enough space for the cluster, and we also | ||
1250 | * didn't find any bitmaps that met our criteria, just go ahead and exit | ||
1251 | */ | ||
1252 | if (found_bitmap) { | ||
1253 | ret = -ENOSPC; | ||
1254 | goto out; | ||
1255 | } | ||
1256 | |||
1257 | cluster->points_to_bitmap = false; | ||
1258 | window_start = entry->offset; | 2233 | window_start = entry->offset; |
1259 | window_free = entry->bytes; | 2234 | window_free = entry->bytes; |
1260 | last = entry; | ||
1261 | max_extent = entry->bytes; | 2235 | max_extent = entry->bytes; |
2236 | first = entry; | ||
2237 | last = entry; | ||
2238 | prev = entry; | ||
1262 | 2239 | ||
1263 | while (1) { | 2240 | while (window_free <= min_bytes) { |
1264 | /* out window is just right, lets fill it */ | 2241 | node = rb_next(&entry->offset_index); |
1265 | if (window_free >= bytes + empty_size) | 2242 | if (!node) |
1266 | break; | 2243 | return -ENOSPC; |
1267 | 2244 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
1268 | node = rb_next(&last->offset_index); | ||
1269 | if (!node) { | ||
1270 | if (found_bitmap) | ||
1271 | goto again; | ||
1272 | ret = -ENOSPC; | ||
1273 | goto out; | ||
1274 | } | ||
1275 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
1276 | 2245 | ||
1277 | /* | 2246 | if (entry->bitmap) { |
1278 | * we found a bitmap, so if this search doesn't result in a | 2247 | if (list_empty(&entry->list)) |
1279 | * cluster, we know to go and search again for the bitmaps and | 2248 | list_add_tail(&entry->list, bitmaps); |
1280 | * start looking for space there | ||
1281 | */ | ||
1282 | if (next->bitmap) { | ||
1283 | if (!found_bitmap) | ||
1284 | offset = next->offset; | ||
1285 | found_bitmap = true; | ||
1286 | last = next; | ||
1287 | continue; | 2249 | continue; |
1288 | } | 2250 | } |
1289 | 2251 | ||
@@ -1291,60 +2253,190 @@ again: | |||
1291 | * we haven't filled the empty size and the window is | 2253 | * we haven't filled the empty size and the window is |
1292 | * very large. reset and try again | 2254 | * very large. reset and try again |
1293 | */ | 2255 | */ |
1294 | if (next->offset - (last->offset + last->bytes) > 128 * 1024 || | 2256 | if (entry->offset - (prev->offset + prev->bytes) > max_gap || |
1295 | next->offset - window_start > (bytes + empty_size) * 2) { | 2257 | entry->offset - window_start > (min_bytes * 2)) { |
1296 | entry = next; | 2258 | first = entry; |
1297 | window_start = entry->offset; | 2259 | window_start = entry->offset; |
1298 | window_free = entry->bytes; | 2260 | window_free = entry->bytes; |
1299 | last = entry; | 2261 | last = entry; |
1300 | max_extent = entry->bytes; | 2262 | max_extent = entry->bytes; |
1301 | } else { | 2263 | } else { |
1302 | last = next; | 2264 | last = entry; |
1303 | window_free += next->bytes; | 2265 | window_free += entry->bytes; |
1304 | if (entry->bytes > max_extent) | 2266 | if (entry->bytes > max_extent) |
1305 | max_extent = entry->bytes; | 2267 | max_extent = entry->bytes; |
1306 | } | 2268 | } |
2269 | prev = entry; | ||
1307 | } | 2270 | } |
1308 | 2271 | ||
1309 | cluster->window_start = entry->offset; | 2272 | cluster->window_start = first->offset; |
2273 | |||
2274 | node = &first->offset_index; | ||
1310 | 2275 | ||
1311 | /* | 2276 | /* |
1312 | * now we've found our entries, pull them out of the free space | 2277 | * now we've found our entries, pull them out of the free space |
1313 | * cache and put them into the cluster rbtree | 2278 | * cache and put them into the cluster rbtree |
1314 | * | ||
1315 | * The cluster includes an rbtree, but only uses the offset index | ||
1316 | * of each free space cache entry. | ||
1317 | */ | 2279 | */ |
1318 | while (1) { | 2280 | do { |
2281 | int ret; | ||
2282 | |||
2283 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
1319 | node = rb_next(&entry->offset_index); | 2284 | node = rb_next(&entry->offset_index); |
1320 | if (entry->bitmap && node) { | 2285 | if (entry->bitmap) |
1321 | entry = rb_entry(node, struct btrfs_free_space, | ||
1322 | offset_index); | ||
1323 | continue; | 2286 | continue; |
1324 | } else if (entry->bitmap && !node) { | ||
1325 | break; | ||
1326 | } | ||
1327 | 2287 | ||
1328 | rb_erase(&entry->offset_index, &block_group->free_space_offset); | 2288 | rb_erase(&entry->offset_index, &ctl->free_space_offset); |
1329 | ret = tree_insert_offset(&cluster->root, entry->offset, | 2289 | ret = tree_insert_offset(&cluster->root, entry->offset, |
1330 | &entry->offset_index, 0); | 2290 | &entry->offset_index, 0); |
1331 | BUG_ON(ret); | 2291 | BUG_ON(ret); |
2292 | } while (node && entry != last); | ||
1332 | 2293 | ||
1333 | if (!node || entry == last) | 2294 | cluster->max_size = max_extent; |
1334 | break; | ||
1335 | 2295 | ||
2296 | return 0; | ||
2297 | } | ||
2298 | |||
2299 | /* | ||
2300 | * This specifically looks for bitmaps that may work in the cluster, we assume | ||
2301 | * that we have already failed to find extents that will work. | ||
2302 | */ | ||
2303 | static noinline int | ||
2304 | setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | ||
2305 | struct btrfs_free_cluster *cluster, | ||
2306 | struct list_head *bitmaps, u64 offset, u64 bytes, | ||
2307 | u64 min_bytes) | ||
2308 | { | ||
2309 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2310 | struct btrfs_free_space *entry; | ||
2311 | struct rb_node *node; | ||
2312 | int ret = -ENOSPC; | ||
2313 | |||
2314 | if (ctl->total_bitmaps == 0) | ||
2315 | return -ENOSPC; | ||
2316 | |||
2317 | /* | ||
2318 | * First check our cached list of bitmaps and see if there is an entry | ||
2319 | * here that will work. | ||
2320 | */ | ||
2321 | list_for_each_entry(entry, bitmaps, list) { | ||
2322 | if (entry->bytes < min_bytes) | ||
2323 | continue; | ||
2324 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | ||
2325 | bytes, min_bytes); | ||
2326 | if (!ret) | ||
2327 | return 0; | ||
2328 | } | ||
2329 | |||
2330 | /* | ||
2331 | * If we do have entries on our list and we are here then we didn't find | ||
2332 | * anything, so go ahead and get the next entry after the last entry in | ||
2333 | * this list and start the search from there. | ||
2334 | */ | ||
2335 | if (!list_empty(bitmaps)) { | ||
2336 | entry = list_entry(bitmaps->prev, struct btrfs_free_space, | ||
2337 | list); | ||
2338 | node = rb_next(&entry->offset_index); | ||
2339 | if (!node) | ||
2340 | return -ENOSPC; | ||
1336 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2341 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2342 | goto search; | ||
1337 | } | 2343 | } |
1338 | 2344 | ||
1339 | cluster->max_size = max_extent; | 2345 | entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); |
1340 | got_it: | 2346 | if (!entry) |
1341 | ret = 0; | 2347 | return -ENOSPC; |
1342 | atomic_inc(&block_group->count); | 2348 | |
1343 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | 2349 | search: |
1344 | cluster->block_group = block_group; | 2350 | node = &entry->offset_index; |
2351 | do { | ||
2352 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2353 | node = rb_next(&entry->offset_index); | ||
2354 | if (!entry->bitmap) | ||
2355 | continue; | ||
2356 | if (entry->bytes < min_bytes) | ||
2357 | continue; | ||
2358 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | ||
2359 | bytes, min_bytes); | ||
2360 | } while (ret && node); | ||
2361 | |||
2362 | return ret; | ||
2363 | } | ||
2364 | |||
2365 | /* | ||
2366 | * here we try to find a cluster of blocks in a block group. The goal | ||
2367 | * is to find at least bytes free and up to empty_size + bytes free. | ||
2368 | * We might not find them all in one contiguous area. | ||
2369 | * | ||
2370 | * returns zero and sets up cluster if things worked out, otherwise | ||
2371 | * it returns -enospc | ||
2372 | */ | ||
2373 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
2374 | struct btrfs_root *root, | ||
2375 | struct btrfs_block_group_cache *block_group, | ||
2376 | struct btrfs_free_cluster *cluster, | ||
2377 | u64 offset, u64 bytes, u64 empty_size) | ||
2378 | { | ||
2379 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2380 | struct list_head bitmaps; | ||
2381 | struct btrfs_free_space *entry, *tmp; | ||
2382 | u64 min_bytes; | ||
2383 | int ret; | ||
2384 | |||
2385 | /* for metadata, allow allocates with more holes */ | ||
2386 | if (btrfs_test_opt(root, SSD_SPREAD)) { | ||
2387 | min_bytes = bytes + empty_size; | ||
2388 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
2389 | /* | ||
2390 | * we want to do larger allocations when we are | ||
2391 | * flushing out the delayed refs, it helps prevent | ||
2392 | * making more work as we go along. | ||
2393 | */ | ||
2394 | if (trans->transaction->delayed_refs.flushing) | ||
2395 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
2396 | else | ||
2397 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
2398 | } else | ||
2399 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
2400 | |||
2401 | spin_lock(&ctl->tree_lock); | ||
2402 | |||
2403 | /* | ||
2404 | * If we know we don't have enough space to make a cluster don't even | ||
2405 | * bother doing all the work to try and find one. | ||
2406 | */ | ||
2407 | if (ctl->free_space < min_bytes) { | ||
2408 | spin_unlock(&ctl->tree_lock); | ||
2409 | return -ENOSPC; | ||
2410 | } | ||
2411 | |||
2412 | spin_lock(&cluster->lock); | ||
2413 | |||
2414 | /* someone already found a cluster, hooray */ | ||
2415 | if (cluster->block_group) { | ||
2416 | ret = 0; | ||
2417 | goto out; | ||
2418 | } | ||
2419 | |||
2420 | INIT_LIST_HEAD(&bitmaps); | ||
2421 | ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, | ||
2422 | bytes, min_bytes); | ||
2423 | if (ret) | ||
2424 | ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, | ||
2425 | offset, bytes, min_bytes); | ||
2426 | |||
2427 | /* Clear our temporary list */ | ||
2428 | list_for_each_entry_safe(entry, tmp, &bitmaps, list) | ||
2429 | list_del_init(&entry->list); | ||
2430 | |||
2431 | if (!ret) { | ||
2432 | atomic_inc(&block_group->count); | ||
2433 | list_add_tail(&cluster->block_group_list, | ||
2434 | &block_group->cluster_list); | ||
2435 | cluster->block_group = block_group; | ||
2436 | } | ||
1345 | out: | 2437 | out: |
1346 | spin_unlock(&cluster->lock); | 2438 | spin_unlock(&cluster->lock); |
1347 | spin_unlock(&block_group->tree_lock); | 2439 | spin_unlock(&ctl->tree_lock); |
1348 | 2440 | ||
1349 | return ret; | 2441 | return ret; |
1350 | } | 2442 | } |
@@ -1358,8 +2450,244 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
1358 | spin_lock_init(&cluster->refill_lock); | 2450 | spin_lock_init(&cluster->refill_lock); |
1359 | cluster->root = RB_ROOT; | 2451 | cluster->root = RB_ROOT; |
1360 | cluster->max_size = 0; | 2452 | cluster->max_size = 0; |
1361 | cluster->points_to_bitmap = false; | ||
1362 | INIT_LIST_HEAD(&cluster->block_group_list); | 2453 | INIT_LIST_HEAD(&cluster->block_group_list); |
1363 | cluster->block_group = NULL; | 2454 | cluster->block_group = NULL; |
1364 | } | 2455 | } |
1365 | 2456 | ||
2457 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
2458 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
2459 | { | ||
2460 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2461 | struct btrfs_free_space *entry = NULL; | ||
2462 | struct btrfs_fs_info *fs_info = block_group->fs_info; | ||
2463 | u64 bytes = 0; | ||
2464 | u64 actually_trimmed; | ||
2465 | int ret = 0; | ||
2466 | |||
2467 | *trimmed = 0; | ||
2468 | |||
2469 | while (start < end) { | ||
2470 | spin_lock(&ctl->tree_lock); | ||
2471 | |||
2472 | if (ctl->free_space < minlen) { | ||
2473 | spin_unlock(&ctl->tree_lock); | ||
2474 | break; | ||
2475 | } | ||
2476 | |||
2477 | entry = tree_search_offset(ctl, start, 0, 1); | ||
2478 | if (!entry) | ||
2479 | entry = tree_search_offset(ctl, | ||
2480 | offset_to_bitmap(ctl, start), | ||
2481 | 1, 1); | ||
2482 | |||
2483 | if (!entry || entry->offset >= end) { | ||
2484 | spin_unlock(&ctl->tree_lock); | ||
2485 | break; | ||
2486 | } | ||
2487 | |||
2488 | if (entry->bitmap) { | ||
2489 | ret = search_bitmap(ctl, entry, &start, &bytes); | ||
2490 | if (!ret) { | ||
2491 | if (start >= end) { | ||
2492 | spin_unlock(&ctl->tree_lock); | ||
2493 | break; | ||
2494 | } | ||
2495 | bytes = min(bytes, end - start); | ||
2496 | bitmap_clear_bits(ctl, entry, start, bytes); | ||
2497 | if (entry->bytes == 0) | ||
2498 | free_bitmap(ctl, entry); | ||
2499 | } else { | ||
2500 | start = entry->offset + BITS_PER_BITMAP * | ||
2501 | block_group->sectorsize; | ||
2502 | spin_unlock(&ctl->tree_lock); | ||
2503 | ret = 0; | ||
2504 | continue; | ||
2505 | } | ||
2506 | } else { | ||
2507 | start = entry->offset; | ||
2508 | bytes = min(entry->bytes, end - start); | ||
2509 | unlink_free_space(ctl, entry); | ||
2510 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
2511 | } | ||
2512 | |||
2513 | spin_unlock(&ctl->tree_lock); | ||
2514 | |||
2515 | if (bytes >= minlen) { | ||
2516 | int update_ret; | ||
2517 | update_ret = btrfs_update_reserved_bytes(block_group, | ||
2518 | bytes, 1, 1); | ||
2519 | |||
2520 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2521 | start, | ||
2522 | bytes, | ||
2523 | &actually_trimmed); | ||
2524 | |||
2525 | btrfs_add_free_space(block_group, start, bytes); | ||
2526 | if (!update_ret) | ||
2527 | btrfs_update_reserved_bytes(block_group, | ||
2528 | bytes, 0, 1); | ||
2529 | |||
2530 | if (ret) | ||
2531 | break; | ||
2532 | *trimmed += actually_trimmed; | ||
2533 | } | ||
2534 | start += bytes; | ||
2535 | bytes = 0; | ||
2536 | |||
2537 | if (fatal_signal_pending(current)) { | ||
2538 | ret = -ERESTARTSYS; | ||
2539 | break; | ||
2540 | } | ||
2541 | |||
2542 | cond_resched(); | ||
2543 | } | ||
2544 | |||
2545 | return ret; | ||
2546 | } | ||
2547 | |||
2548 | /* | ||
2549 | * Find the left-most item in the cache tree, and then return the | ||
2550 | * smallest inode number in the item. | ||
2551 | * | ||
2552 | * Note: the returned inode number may not be the smallest one in | ||
2553 | * the tree, if the left-most item is a bitmap. | ||
2554 | */ | ||
2555 | u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) | ||
2556 | { | ||
2557 | struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; | ||
2558 | struct btrfs_free_space *entry = NULL; | ||
2559 | u64 ino = 0; | ||
2560 | |||
2561 | spin_lock(&ctl->tree_lock); | ||
2562 | |||
2563 | if (RB_EMPTY_ROOT(&ctl->free_space_offset)) | ||
2564 | goto out; | ||
2565 | |||
2566 | entry = rb_entry(rb_first(&ctl->free_space_offset), | ||
2567 | struct btrfs_free_space, offset_index); | ||
2568 | |||
2569 | if (!entry->bitmap) { | ||
2570 | ino = entry->offset; | ||
2571 | |||
2572 | unlink_free_space(ctl, entry); | ||
2573 | entry->offset++; | ||
2574 | entry->bytes--; | ||
2575 | if (!entry->bytes) | ||
2576 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
2577 | else | ||
2578 | link_free_space(ctl, entry); | ||
2579 | } else { | ||
2580 | u64 offset = 0; | ||
2581 | u64 count = 1; | ||
2582 | int ret; | ||
2583 | |||
2584 | ret = search_bitmap(ctl, entry, &offset, &count); | ||
2585 | BUG_ON(ret); | ||
2586 | |||
2587 | ino = offset; | ||
2588 | bitmap_clear_bits(ctl, entry, offset, 1); | ||
2589 | if (entry->bytes == 0) | ||
2590 | free_bitmap(ctl, entry); | ||
2591 | } | ||
2592 | out: | ||
2593 | spin_unlock(&ctl->tree_lock); | ||
2594 | |||
2595 | return ino; | ||
2596 | } | ||
2597 | |||
2598 | struct inode *lookup_free_ino_inode(struct btrfs_root *root, | ||
2599 | struct btrfs_path *path) | ||
2600 | { | ||
2601 | struct inode *inode = NULL; | ||
2602 | |||
2603 | spin_lock(&root->cache_lock); | ||
2604 | if (root->cache_inode) | ||
2605 | inode = igrab(root->cache_inode); | ||
2606 | spin_unlock(&root->cache_lock); | ||
2607 | if (inode) | ||
2608 | return inode; | ||
2609 | |||
2610 | inode = __lookup_free_space_inode(root, path, 0); | ||
2611 | if (IS_ERR(inode)) | ||
2612 | return inode; | ||
2613 | |||
2614 | spin_lock(&root->cache_lock); | ||
2615 | if (!btrfs_fs_closing(root->fs_info)) | ||
2616 | root->cache_inode = igrab(inode); | ||
2617 | spin_unlock(&root->cache_lock); | ||
2618 | |||
2619 | return inode; | ||
2620 | } | ||
2621 | |||
2622 | int create_free_ino_inode(struct btrfs_root *root, | ||
2623 | struct btrfs_trans_handle *trans, | ||
2624 | struct btrfs_path *path) | ||
2625 | { | ||
2626 | return __create_free_space_inode(root, trans, path, | ||
2627 | BTRFS_FREE_INO_OBJECTID, 0); | ||
2628 | } | ||
2629 | |||
2630 | int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | ||
2631 | { | ||
2632 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
2633 | struct btrfs_path *path; | ||
2634 | struct inode *inode; | ||
2635 | int ret = 0; | ||
2636 | u64 root_gen = btrfs_root_generation(&root->root_item); | ||
2637 | |||
2638 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
2639 | return 0; | ||
2640 | |||
2641 | /* | ||
2642 | * If we're unmounting then just return, since this does a search on the | ||
2643 | * normal root and not the commit root and we could deadlock. | ||
2644 | */ | ||
2645 | if (btrfs_fs_closing(fs_info)) | ||
2646 | return 0; | ||
2647 | |||
2648 | path = btrfs_alloc_path(); | ||
2649 | if (!path) | ||
2650 | return 0; | ||
2651 | |||
2652 | inode = lookup_free_ino_inode(root, path); | ||
2653 | if (IS_ERR(inode)) | ||
2654 | goto out; | ||
2655 | |||
2656 | if (root_gen != BTRFS_I(inode)->generation) | ||
2657 | goto out_put; | ||
2658 | |||
2659 | ret = __load_free_space_cache(root, inode, ctl, path, 0); | ||
2660 | |||
2661 | if (ret < 0) | ||
2662 | printk(KERN_ERR "btrfs: failed to load free ino cache for " | ||
2663 | "root %llu\n", root->root_key.objectid); | ||
2664 | out_put: | ||
2665 | iput(inode); | ||
2666 | out: | ||
2667 | btrfs_free_path(path); | ||
2668 | return ret; | ||
2669 | } | ||
2670 | |||
2671 | int btrfs_write_out_ino_cache(struct btrfs_root *root, | ||
2672 | struct btrfs_trans_handle *trans, | ||
2673 | struct btrfs_path *path) | ||
2674 | { | ||
2675 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
2676 | struct inode *inode; | ||
2677 | int ret; | ||
2678 | |||
2679 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
2680 | return 0; | ||
2681 | |||
2682 | inode = lookup_free_ino_inode(root, path); | ||
2683 | if (IS_ERR(inode)) | ||
2684 | return 0; | ||
2685 | |||
2686 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); | ||
2687 | if (ret < 0) | ||
2688 | printk(KERN_ERR "btrfs: failed to write free ino cache " | ||
2689 | "for root %llu\n", root->root_key.objectid); | ||
2690 | |||
2691 | iput(inode); | ||
2692 | return ret; | ||
2693 | } | ||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 890a8e79011b..8f2613f779ed 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -27,17 +27,75 @@ struct btrfs_free_space { | |||
27 | struct list_head list; | 27 | struct list_head list; |
28 | }; | 28 | }; |
29 | 29 | ||
30 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 30 | struct btrfs_free_space_ctl { |
31 | u64 bytenr, u64 size); | 31 | spinlock_t tree_lock; |
32 | struct rb_root free_space_offset; | ||
33 | u64 free_space; | ||
34 | int extents_thresh; | ||
35 | int free_extents; | ||
36 | int total_bitmaps; | ||
37 | int unit; | ||
38 | u64 start; | ||
39 | struct btrfs_free_space_op *op; | ||
40 | void *private; | ||
41 | }; | ||
42 | |||
43 | struct btrfs_free_space_op { | ||
44 | void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl); | ||
45 | bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl, | ||
46 | struct btrfs_free_space *info); | ||
47 | }; | ||
48 | |||
49 | struct inode *lookup_free_space_inode(struct btrfs_root *root, | ||
50 | struct btrfs_block_group_cache | ||
51 | *block_group, struct btrfs_path *path); | ||
52 | int create_free_space_inode(struct btrfs_root *root, | ||
53 | struct btrfs_trans_handle *trans, | ||
54 | struct btrfs_block_group_cache *block_group, | ||
55 | struct btrfs_path *path); | ||
56 | |||
57 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | ||
58 | struct btrfs_trans_handle *trans, | ||
59 | struct btrfs_path *path, | ||
60 | struct inode *inode); | ||
61 | int load_free_space_cache(struct btrfs_fs_info *fs_info, | ||
62 | struct btrfs_block_group_cache *block_group); | ||
63 | int btrfs_write_out_cache(struct btrfs_root *root, | ||
64 | struct btrfs_trans_handle *trans, | ||
65 | struct btrfs_block_group_cache *block_group, | ||
66 | struct btrfs_path *path); | ||
67 | |||
68 | struct inode *lookup_free_ino_inode(struct btrfs_root *root, | ||
69 | struct btrfs_path *path); | ||
70 | int create_free_ino_inode(struct btrfs_root *root, | ||
71 | struct btrfs_trans_handle *trans, | ||
72 | struct btrfs_path *path); | ||
73 | int load_free_ino_cache(struct btrfs_fs_info *fs_info, | ||
74 | struct btrfs_root *root); | ||
75 | int btrfs_write_out_ino_cache(struct btrfs_root *root, | ||
76 | struct btrfs_trans_handle *trans, | ||
77 | struct btrfs_path *path); | ||
78 | |||
79 | void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group); | ||
80 | int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, | ||
81 | u64 bytenr, u64 size); | ||
82 | static inline int | ||
83 | btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
84 | u64 bytenr, u64 size) | ||
85 | { | ||
86 | return __btrfs_add_free_space(block_group->free_space_ctl, | ||
87 | bytenr, size); | ||
88 | } | ||
32 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | 89 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
33 | u64 bytenr, u64 size); | 90 | u64 bytenr, u64 size); |
91 | void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl); | ||
34 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | 92 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache |
35 | *block_group); | 93 | *block_group); |
36 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | 94 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, |
37 | u64 offset, u64 bytes, u64 empty_size); | 95 | u64 offset, u64 bytes, u64 empty_size); |
96 | u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); | ||
38 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | 97 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, |
39 | u64 bytes); | 98 | u64 bytes); |
40 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
41 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | 99 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 100 | struct btrfs_root *root, |
43 | struct btrfs_block_group_cache *block_group, | 101 | struct btrfs_block_group_cache *block_group, |
@@ -50,4 +108,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
50 | int btrfs_return_cluster_to_free_space( | 108 | int btrfs_return_cluster_to_free_space( |
51 | struct btrfs_block_group_cache *block_group, | 109 | struct btrfs_block_group_cache *block_group, |
52 | struct btrfs_free_cluster *cluster); | 110 | struct btrfs_free_cluster *cluster); |
111 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
112 | u64 *trimmed, u64 start, u64 end, u64 minlen); | ||
53 | #endif | 113 | #endif |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 64f1150bb48d..baa74f3db691 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -130,7 +130,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
130 | item_size - (ptr + sub_item_len - item_start)); | 130 | item_size - (ptr + sub_item_len - item_start)); |
131 | ret = btrfs_truncate_item(trans, root, path, | 131 | ret = btrfs_truncate_item(trans, root, path, |
132 | item_size - sub_item_len, 1); | 132 | item_size - sub_item_len, 1); |
133 | BUG_ON(ret); | ||
134 | out: | 133 | out: |
135 | btrfs_free_path(path); | 134 | btrfs_free_path(path); |
136 | return ret; | 135 | return ret; |
@@ -167,7 +166,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
167 | 166 | ||
168 | old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | 167 | old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); |
169 | ret = btrfs_extend_item(trans, root, path, ins_len); | 168 | ret = btrfs_extend_item(trans, root, path, ins_len); |
170 | BUG_ON(ret); | ||
171 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 169 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], |
172 | struct btrfs_inode_ref); | 170 | struct btrfs_inode_ref); |
173 | ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); | 171 | ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c56eb5909172..b4087e0fa871 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -16,11 +16,476 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/delay.h> | ||
20 | #include <linux/kthread.h> | ||
21 | #include <linux/pagemap.h> | ||
22 | |||
19 | #include "ctree.h" | 23 | #include "ctree.h" |
20 | #include "disk-io.h" | 24 | #include "disk-io.h" |
25 | #include "free-space-cache.h" | ||
26 | #include "inode-map.h" | ||
21 | #include "transaction.h" | 27 | #include "transaction.h" |
22 | 28 | ||
23 | int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | 29 | static int caching_kthread(void *data) |
30 | { | ||
31 | struct btrfs_root *root = data; | ||
32 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
33 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
34 | struct btrfs_key key; | ||
35 | struct btrfs_path *path; | ||
36 | struct extent_buffer *leaf; | ||
37 | u64 last = (u64)-1; | ||
38 | int slot; | ||
39 | int ret; | ||
40 | |||
41 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
42 | return 0; | ||
43 | |||
44 | path = btrfs_alloc_path(); | ||
45 | if (!path) | ||
46 | return -ENOMEM; | ||
47 | |||
48 | /* Since the commit root is read-only, we can safely skip locking. */ | ||
49 | path->skip_locking = 1; | ||
50 | path->search_commit_root = 1; | ||
51 | path->reada = 2; | ||
52 | |||
53 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | ||
54 | key.offset = 0; | ||
55 | key.type = BTRFS_INODE_ITEM_KEY; | ||
56 | again: | ||
57 | /* need to make sure the commit_root doesn't disappear */ | ||
58 | mutex_lock(&root->fs_commit_mutex); | ||
59 | |||
60 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
61 | if (ret < 0) | ||
62 | goto out; | ||
63 | |||
64 | while (1) { | ||
65 | if (btrfs_fs_closing(fs_info)) | ||
66 | goto out; | ||
67 | |||
68 | leaf = path->nodes[0]; | ||
69 | slot = path->slots[0]; | ||
70 | if (slot >= btrfs_header_nritems(leaf)) { | ||
71 | ret = btrfs_next_leaf(root, path); | ||
72 | if (ret < 0) | ||
73 | goto out; | ||
74 | else if (ret > 0) | ||
75 | break; | ||
76 | |||
77 | if (need_resched() || | ||
78 | btrfs_transaction_in_commit(fs_info)) { | ||
79 | leaf = path->nodes[0]; | ||
80 | |||
81 | if (btrfs_header_nritems(leaf) == 0) { | ||
82 | WARN_ON(1); | ||
83 | break; | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Save the key so we can advances forward | ||
88 | * in the next search. | ||
89 | */ | ||
90 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
91 | btrfs_release_path(path); | ||
92 | root->cache_progress = last; | ||
93 | mutex_unlock(&root->fs_commit_mutex); | ||
94 | schedule_timeout(1); | ||
95 | goto again; | ||
96 | } else | ||
97 | continue; | ||
98 | } | ||
99 | |||
100 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
101 | |||
102 | if (key.type != BTRFS_INODE_ITEM_KEY) | ||
103 | goto next; | ||
104 | |||
105 | if (key.objectid >= root->highest_objectid) | ||
106 | break; | ||
107 | |||
108 | if (last != (u64)-1 && last + 1 != key.objectid) { | ||
109 | __btrfs_add_free_space(ctl, last + 1, | ||
110 | key.objectid - last - 1); | ||
111 | wake_up(&root->cache_wait); | ||
112 | } | ||
113 | |||
114 | last = key.objectid; | ||
115 | next: | ||
116 | path->slots[0]++; | ||
117 | } | ||
118 | |||
119 | if (last < root->highest_objectid - 1) { | ||
120 | __btrfs_add_free_space(ctl, last + 1, | ||
121 | root->highest_objectid - last - 1); | ||
122 | } | ||
123 | |||
124 | spin_lock(&root->cache_lock); | ||
125 | root->cached = BTRFS_CACHE_FINISHED; | ||
126 | spin_unlock(&root->cache_lock); | ||
127 | |||
128 | root->cache_progress = (u64)-1; | ||
129 | btrfs_unpin_free_ino(root); | ||
130 | out: | ||
131 | wake_up(&root->cache_wait); | ||
132 | mutex_unlock(&root->fs_commit_mutex); | ||
133 | |||
134 | btrfs_free_path(path); | ||
135 | |||
136 | return ret; | ||
137 | } | ||
138 | |||
139 | static void start_caching(struct btrfs_root *root) | ||
140 | { | ||
141 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
142 | struct task_struct *tsk; | ||
143 | int ret; | ||
144 | u64 objectid; | ||
145 | |||
146 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
147 | return; | ||
148 | |||
149 | spin_lock(&root->cache_lock); | ||
150 | if (root->cached != BTRFS_CACHE_NO) { | ||
151 | spin_unlock(&root->cache_lock); | ||
152 | return; | ||
153 | } | ||
154 | |||
155 | root->cached = BTRFS_CACHE_STARTED; | ||
156 | spin_unlock(&root->cache_lock); | ||
157 | |||
158 | ret = load_free_ino_cache(root->fs_info, root); | ||
159 | if (ret == 1) { | ||
160 | spin_lock(&root->cache_lock); | ||
161 | root->cached = BTRFS_CACHE_FINISHED; | ||
162 | spin_unlock(&root->cache_lock); | ||
163 | return; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * It can be quite time-consuming to fill the cache by searching | ||
168 | * through the extent tree, and this can keep ino allocation path | ||
169 | * waiting. Therefore at start we quickly find out the highest | ||
170 | * inode number and we know we can use inode numbers which fall in | ||
171 | * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID]. | ||
172 | */ | ||
173 | ret = btrfs_find_free_objectid(root, &objectid); | ||
174 | if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) { | ||
175 | __btrfs_add_free_space(ctl, objectid, | ||
176 | BTRFS_LAST_FREE_OBJECTID - objectid + 1); | ||
177 | } | ||
178 | |||
179 | tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", | ||
180 | root->root_key.objectid); | ||
181 | BUG_ON(IS_ERR(tsk)); | ||
182 | } | ||
183 | |||
184 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) | ||
185 | { | ||
186 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
187 | return btrfs_find_free_objectid(root, objectid); | ||
188 | |||
189 | again: | ||
190 | *objectid = btrfs_find_ino_for_alloc(root); | ||
191 | |||
192 | if (*objectid != 0) | ||
193 | return 0; | ||
194 | |||
195 | start_caching(root); | ||
196 | |||
197 | wait_event(root->cache_wait, | ||
198 | root->cached == BTRFS_CACHE_FINISHED || | ||
199 | root->free_ino_ctl->free_space > 0); | ||
200 | |||
201 | if (root->cached == BTRFS_CACHE_FINISHED && | ||
202 | root->free_ino_ctl->free_space == 0) | ||
203 | return -ENOSPC; | ||
204 | else | ||
205 | goto again; | ||
206 | } | ||
207 | |||
208 | void btrfs_return_ino(struct btrfs_root *root, u64 objectid) | ||
209 | { | ||
210 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
211 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; | ||
212 | |||
213 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
214 | return; | ||
215 | |||
216 | again: | ||
217 | if (root->cached == BTRFS_CACHE_FINISHED) { | ||
218 | __btrfs_add_free_space(ctl, objectid, 1); | ||
219 | } else { | ||
220 | /* | ||
221 | * If we are in the process of caching free ino chunks, | ||
222 | * to avoid adding the same inode number to the free_ino | ||
223 | * tree twice due to cross transaction, we'll leave it | ||
224 | * in the pinned tree until a transaction is committed | ||
225 | * or the caching work is done. | ||
226 | */ | ||
227 | |||
228 | mutex_lock(&root->fs_commit_mutex); | ||
229 | spin_lock(&root->cache_lock); | ||
230 | if (root->cached == BTRFS_CACHE_FINISHED) { | ||
231 | spin_unlock(&root->cache_lock); | ||
232 | mutex_unlock(&root->fs_commit_mutex); | ||
233 | goto again; | ||
234 | } | ||
235 | spin_unlock(&root->cache_lock); | ||
236 | |||
237 | start_caching(root); | ||
238 | |||
239 | if (objectid <= root->cache_progress || | ||
240 | objectid > root->highest_objectid) | ||
241 | __btrfs_add_free_space(ctl, objectid, 1); | ||
242 | else | ||
243 | __btrfs_add_free_space(pinned, objectid, 1); | ||
244 | |||
245 | mutex_unlock(&root->fs_commit_mutex); | ||
246 | } | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * When a transaction is committed, we'll move those inode numbers which | ||
251 | * are smaller than root->cache_progress from pinned tree to free_ino tree, | ||
252 | * and others will just be dropped, because the commit root we were | ||
253 | * searching has changed. | ||
254 | * | ||
255 | * Must be called with root->fs_commit_mutex held | ||
256 | */ | ||
257 | void btrfs_unpin_free_ino(struct btrfs_root *root) | ||
258 | { | ||
259 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
260 | struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; | ||
261 | struct btrfs_free_space *info; | ||
262 | struct rb_node *n; | ||
263 | u64 count; | ||
264 | |||
265 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
266 | return; | ||
267 | |||
268 | while (1) { | ||
269 | n = rb_first(rbroot); | ||
270 | if (!n) | ||
271 | break; | ||
272 | |||
273 | info = rb_entry(n, struct btrfs_free_space, offset_index); | ||
274 | BUG_ON(info->bitmap); | ||
275 | |||
276 | if (info->offset > root->cache_progress) | ||
277 | goto free; | ||
278 | else if (info->offset + info->bytes > root->cache_progress) | ||
279 | count = root->cache_progress - info->offset + 1; | ||
280 | else | ||
281 | count = info->bytes; | ||
282 | |||
283 | __btrfs_add_free_space(ctl, info->offset, count); | ||
284 | free: | ||
285 | rb_erase(&info->offset_index, rbroot); | ||
286 | kfree(info); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | #define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space)) | ||
291 | #define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8) | ||
292 | |||
293 | /* | ||
294 | * The goal is to keep the memory used by the free_ino tree won't | ||
295 | * exceed the memory if we use bitmaps only. | ||
296 | */ | ||
297 | static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | ||
298 | { | ||
299 | struct btrfs_free_space *info; | ||
300 | struct rb_node *n; | ||
301 | int max_ino; | ||
302 | int max_bitmaps; | ||
303 | |||
304 | n = rb_last(&ctl->free_space_offset); | ||
305 | if (!n) { | ||
306 | ctl->extents_thresh = INIT_THRESHOLD; | ||
307 | return; | ||
308 | } | ||
309 | info = rb_entry(n, struct btrfs_free_space, offset_index); | ||
310 | |||
311 | /* | ||
312 | * Find the maximum inode number in the filesystem. Note we | ||
313 | * ignore the fact that this can be a bitmap, because we are | ||
314 | * not doing precise calculation. | ||
315 | */ | ||
316 | max_ino = info->bytes - 1; | ||
317 | |||
318 | max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP; | ||
319 | if (max_bitmaps <= ctl->total_bitmaps) { | ||
320 | ctl->extents_thresh = 0; | ||
321 | return; | ||
322 | } | ||
323 | |||
324 | ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) * | ||
325 | PAGE_CACHE_SIZE / sizeof(*info); | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * We don't fall back to bitmap, if we are below the extents threshold | ||
330 | * or this chunk of inode numbers is a big one. | ||
331 | */ | ||
332 | static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | ||
333 | struct btrfs_free_space *info) | ||
334 | { | ||
335 | if (ctl->free_extents < ctl->extents_thresh || | ||
336 | info->bytes > INODES_PER_BITMAP / 10) | ||
337 | return false; | ||
338 | |||
339 | return true; | ||
340 | } | ||
341 | |||
342 | static struct btrfs_free_space_op free_ino_op = { | ||
343 | .recalc_thresholds = recalculate_thresholds, | ||
344 | .use_bitmap = use_bitmap, | ||
345 | }; | ||
346 | |||
347 | static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl) | ||
348 | { | ||
349 | } | ||
350 | |||
351 | static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl, | ||
352 | struct btrfs_free_space *info) | ||
353 | { | ||
354 | /* | ||
355 | * We always use extents for two reasons: | ||
356 | * | ||
357 | * - The pinned tree is only used during the process of caching | ||
358 | * work. | ||
359 | * - Make code simpler. See btrfs_unpin_free_ino(). | ||
360 | */ | ||
361 | return false; | ||
362 | } | ||
363 | |||
364 | static struct btrfs_free_space_op pinned_free_ino_op = { | ||
365 | .recalc_thresholds = pinned_recalc_thresholds, | ||
366 | .use_bitmap = pinned_use_bitmap, | ||
367 | }; | ||
368 | |||
369 | void btrfs_init_free_ino_ctl(struct btrfs_root *root) | ||
370 | { | ||
371 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
372 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; | ||
373 | |||
374 | spin_lock_init(&ctl->tree_lock); | ||
375 | ctl->unit = 1; | ||
376 | ctl->start = 0; | ||
377 | ctl->private = NULL; | ||
378 | ctl->op = &free_ino_op; | ||
379 | |||
380 | /* | ||
381 | * Initially we allow to use 16K of ram to cache chunks of | ||
382 | * inode numbers before we resort to bitmaps. This is somewhat | ||
383 | * arbitrary, but it will be adjusted in runtime. | ||
384 | */ | ||
385 | ctl->extents_thresh = INIT_THRESHOLD; | ||
386 | |||
387 | spin_lock_init(&pinned->tree_lock); | ||
388 | pinned->unit = 1; | ||
389 | pinned->start = 0; | ||
390 | pinned->private = NULL; | ||
391 | pinned->extents_thresh = 0; | ||
392 | pinned->op = &pinned_free_ino_op; | ||
393 | } | ||
394 | |||
395 | int btrfs_save_ino_cache(struct btrfs_root *root, | ||
396 | struct btrfs_trans_handle *trans) | ||
397 | { | ||
398 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
399 | struct btrfs_path *path; | ||
400 | struct inode *inode; | ||
401 | u64 alloc_hint = 0; | ||
402 | int ret; | ||
403 | int prealloc; | ||
404 | bool retry = false; | ||
405 | |||
406 | /* only fs tree and subvol/snap needs ino cache */ | ||
407 | if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID && | ||
408 | (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID || | ||
409 | root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) | ||
410 | return 0; | ||
411 | |||
412 | /* Don't save inode cache if we are deleting this root */ | ||
413 | if (btrfs_root_refs(&root->root_item) == 0 && | ||
414 | root != root->fs_info->tree_root) | ||
415 | return 0; | ||
416 | |||
417 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
418 | return 0; | ||
419 | |||
420 | path = btrfs_alloc_path(); | ||
421 | if (!path) | ||
422 | return -ENOMEM; | ||
423 | |||
424 | again: | ||
425 | inode = lookup_free_ino_inode(root, path); | ||
426 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | ||
427 | ret = PTR_ERR(inode); | ||
428 | goto out; | ||
429 | } | ||
430 | |||
431 | if (IS_ERR(inode)) { | ||
432 | BUG_ON(retry); | ||
433 | retry = true; | ||
434 | |||
435 | ret = create_free_ino_inode(root, trans, path); | ||
436 | if (ret) | ||
437 | goto out; | ||
438 | goto again; | ||
439 | } | ||
440 | |||
441 | BTRFS_I(inode)->generation = 0; | ||
442 | ret = btrfs_update_inode(trans, root, inode); | ||
443 | WARN_ON(ret); | ||
444 | |||
445 | if (i_size_read(inode) > 0) { | ||
446 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); | ||
447 | if (ret) | ||
448 | goto out_put; | ||
449 | } | ||
450 | |||
451 | spin_lock(&root->cache_lock); | ||
452 | if (root->cached != BTRFS_CACHE_FINISHED) { | ||
453 | ret = -1; | ||
454 | spin_unlock(&root->cache_lock); | ||
455 | goto out_put; | ||
456 | } | ||
457 | spin_unlock(&root->cache_lock); | ||
458 | |||
459 | spin_lock(&ctl->tree_lock); | ||
460 | prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; | ||
461 | prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE); | ||
462 | prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE; | ||
463 | spin_unlock(&ctl->tree_lock); | ||
464 | |||
465 | /* Just to make sure we have enough space */ | ||
466 | prealloc += 8 * PAGE_CACHE_SIZE; | ||
467 | |||
468 | ret = btrfs_check_data_free_space(inode, prealloc); | ||
469 | if (ret) | ||
470 | goto out_put; | ||
471 | |||
472 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, | ||
473 | prealloc, prealloc, &alloc_hint); | ||
474 | if (ret) | ||
475 | goto out_put; | ||
476 | btrfs_free_reserved_data_space(inode, prealloc); | ||
477 | |||
478 | out_put: | ||
479 | iput(inode); | ||
480 | out: | ||
481 | if (ret == 0) | ||
482 | ret = btrfs_write_out_ino_cache(root, trans, path); | ||
483 | |||
484 | btrfs_free_path(path); | ||
485 | return ret; | ||
486 | } | ||
487 | |||
488 | static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) | ||
24 | { | 489 | { |
25 | struct btrfs_path *path; | 490 | struct btrfs_path *path; |
26 | int ret; | 491 | int ret; |
@@ -30,7 +495,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
30 | int slot; | 495 | int slot; |
31 | 496 | ||
32 | path = btrfs_alloc_path(); | 497 | path = btrfs_alloc_path(); |
33 | BUG_ON(!path); | 498 | if (!path) |
499 | return -ENOMEM; | ||
34 | 500 | ||
35 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; | 501 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; |
36 | search_key.type = -1; | 502 | search_key.type = -1; |
@@ -54,15 +520,14 @@ error: | |||
54 | return ret; | 520 | return ret; |
55 | } | 521 | } |
56 | 522 | ||
57 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 523 | int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid) |
58 | struct btrfs_root *root, | ||
59 | u64 dirid, u64 *objectid) | ||
60 | { | 524 | { |
61 | int ret; | 525 | int ret; |
62 | mutex_lock(&root->objectid_mutex); | 526 | mutex_lock(&root->objectid_mutex); |
63 | 527 | ||
64 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { | 528 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { |
65 | ret = btrfs_find_highest_inode(root, &root->highest_objectid); | 529 | ret = btrfs_find_highest_objectid(root, |
530 | &root->highest_objectid); | ||
66 | if (ret) | 531 | if (ret) |
67 | goto out; | 532 | goto out; |
68 | } | 533 | } |
diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h new file mode 100644 index 000000000000..ddb347bfee23 --- /dev/null +++ b/fs/btrfs/inode-map.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef __BTRFS_INODE_MAP | ||
2 | #define __BTRFS_INODE_MAP | ||
3 | |||
4 | void btrfs_init_free_ino_ctl(struct btrfs_root *root); | ||
5 | void btrfs_unpin_free_ino(struct btrfs_root *root); | ||
6 | void btrfs_return_ino(struct btrfs_root *root, u64 objectid); | ||
7 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid); | ||
8 | int btrfs_save_ino_cache(struct btrfs_root *root, | ||
9 | struct btrfs_trans_handle *trans); | ||
10 | |||
11 | int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid); | ||
12 | |||
13 | #endif | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c03864406af3..3601f0aebddf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/posix_acl.h> | 37 | #include <linux/posix_acl.h> |
38 | #include <linux/falloc.h> | 38 | #include <linux/falloc.h> |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/ratelimit.h> | ||
40 | #include "compat.h" | 41 | #include "compat.h" |
41 | #include "ctree.h" | 42 | #include "ctree.h" |
42 | #include "disk-io.h" | 43 | #include "disk-io.h" |
@@ -50,6 +51,8 @@ | |||
50 | #include "tree-log.h" | 51 | #include "tree-log.h" |
51 | #include "compression.h" | 52 | #include "compression.h" |
52 | #include "locking.h" | 53 | #include "locking.h" |
54 | #include "free-space-cache.h" | ||
55 | #include "inode-map.h" | ||
53 | 56 | ||
54 | struct btrfs_iget_args { | 57 | struct btrfs_iget_args { |
55 | u64 ino; | 58 | u64 ino; |
@@ -70,6 +73,7 @@ static struct kmem_cache *btrfs_inode_cachep; | |||
70 | struct kmem_cache *btrfs_trans_handle_cachep; | 73 | struct kmem_cache *btrfs_trans_handle_cachep; |
71 | struct kmem_cache *btrfs_transaction_cachep; | 74 | struct kmem_cache *btrfs_transaction_cachep; |
72 | struct kmem_cache *btrfs_path_cachep; | 75 | struct kmem_cache *btrfs_path_cachep; |
76 | struct kmem_cache *btrfs_free_space_cachep; | ||
73 | 77 | ||
74 | #define S_SHIFT 12 | 78 | #define S_SHIFT 12 |
75 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | 79 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { |
@@ -82,7 +86,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
82 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, | 86 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, |
83 | }; | 87 | }; |
84 | 88 | ||
85 | static void btrfs_truncate(struct inode *inode); | 89 | static int btrfs_setsize(struct inode *inode, loff_t newsize); |
90 | static int btrfs_truncate(struct inode *inode); | ||
86 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | 91 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); |
87 | static noinline int cow_file_range(struct inode *inode, | 92 | static noinline int cow_file_range(struct inode *inode, |
88 | struct page *locked_page, | 93 | struct page *locked_page, |
@@ -90,13 +95,14 @@ static noinline int cow_file_range(struct inode *inode, | |||
90 | unsigned long *nr_written, int unlock); | 95 | unsigned long *nr_written, int unlock); |
91 | 96 | ||
92 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | 97 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, |
93 | struct inode *inode, struct inode *dir) | 98 | struct inode *inode, struct inode *dir, |
99 | const struct qstr *qstr) | ||
94 | { | 100 | { |
95 | int err; | 101 | int err; |
96 | 102 | ||
97 | err = btrfs_init_acl(trans, inode, dir); | 103 | err = btrfs_init_acl(trans, inode, dir); |
98 | if (!err) | 104 | if (!err) |
99 | err = btrfs_xattr_security_init(trans, inode, dir); | 105 | err = btrfs_xattr_security_init(trans, inode, dir, qstr); |
100 | return err; | 106 | return err; |
101 | } | 107 | } |
102 | 108 | ||
@@ -108,6 +114,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | |||
108 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | 114 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, |
109 | struct btrfs_root *root, struct inode *inode, | 115 | struct btrfs_root *root, struct inode *inode, |
110 | u64 start, size_t size, size_t compressed_size, | 116 | u64 start, size_t size, size_t compressed_size, |
117 | int compress_type, | ||
111 | struct page **compressed_pages) | 118 | struct page **compressed_pages) |
112 | { | 119 | { |
113 | struct btrfs_key key; | 120 | struct btrfs_key key; |
@@ -122,21 +129,17 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
122 | size_t cur_size = size; | 129 | size_t cur_size = size; |
123 | size_t datasize; | 130 | size_t datasize; |
124 | unsigned long offset; | 131 | unsigned long offset; |
125 | int use_compress = 0; | ||
126 | 132 | ||
127 | if (compressed_size && compressed_pages) { | 133 | if (compressed_size && compressed_pages) |
128 | use_compress = 1; | ||
129 | cur_size = compressed_size; | 134 | cur_size = compressed_size; |
130 | } | ||
131 | 135 | ||
132 | path = btrfs_alloc_path(); | 136 | path = btrfs_alloc_path(); |
133 | if (!path) | 137 | if (!path) |
134 | return -ENOMEM; | 138 | return -ENOMEM; |
135 | 139 | ||
136 | path->leave_spinning = 1; | 140 | path->leave_spinning = 1; |
137 | btrfs_set_trans_block_group(trans, inode); | ||
138 | 141 | ||
139 | key.objectid = inode->i_ino; | 142 | key.objectid = btrfs_ino(inode); |
140 | key.offset = start; | 143 | key.offset = start; |
141 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 144 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); |
142 | datasize = btrfs_file_extent_calc_inline_size(cur_size); | 145 | datasize = btrfs_file_extent_calc_inline_size(cur_size); |
@@ -159,7 +162,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | 162 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); |
160 | ptr = btrfs_file_extent_inline_start(ei); | 163 | ptr = btrfs_file_extent_inline_start(ei); |
161 | 164 | ||
162 | if (use_compress) { | 165 | if (compress_type != BTRFS_COMPRESS_NONE) { |
163 | struct page *cpage; | 166 | struct page *cpage; |
164 | int i = 0; | 167 | int i = 0; |
165 | while (compressed_size > 0) { | 168 | while (compressed_size > 0) { |
@@ -176,7 +179,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
176 | compressed_size -= cur_size; | 179 | compressed_size -= cur_size; |
177 | } | 180 | } |
178 | btrfs_set_file_extent_compression(leaf, ei, | 181 | btrfs_set_file_extent_compression(leaf, ei, |
179 | BTRFS_COMPRESS_ZLIB); | 182 | compress_type); |
180 | } else { | 183 | } else { |
181 | page = find_get_page(inode->i_mapping, | 184 | page = find_get_page(inode->i_mapping, |
182 | start >> PAGE_CACHE_SHIFT); | 185 | start >> PAGE_CACHE_SHIFT); |
@@ -217,7 +220,7 @@ fail: | |||
217 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | 220 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, |
218 | struct btrfs_root *root, | 221 | struct btrfs_root *root, |
219 | struct inode *inode, u64 start, u64 end, | 222 | struct inode *inode, u64 start, u64 end, |
220 | size_t compressed_size, | 223 | size_t compressed_size, int compress_type, |
221 | struct page **compressed_pages) | 224 | struct page **compressed_pages) |
222 | { | 225 | { |
223 | u64 isize = i_size_read(inode); | 226 | u64 isize = i_size_read(inode); |
@@ -250,7 +253,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
250 | inline_len = min_t(u64, isize, actual_end); | 253 | inline_len = min_t(u64, isize, actual_end); |
251 | ret = insert_inline_extent(trans, root, inode, start, | 254 | ret = insert_inline_extent(trans, root, inode, start, |
252 | inline_len, compressed_size, | 255 | inline_len, compressed_size, |
253 | compressed_pages); | 256 | compress_type, compressed_pages); |
254 | BUG_ON(ret); | 257 | BUG_ON(ret); |
255 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | 258 | btrfs_delalloc_release_metadata(inode, end + 1 - start); |
256 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 259 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
@@ -263,6 +266,7 @@ struct async_extent { | |||
263 | u64 compressed_size; | 266 | u64 compressed_size; |
264 | struct page **pages; | 267 | struct page **pages; |
265 | unsigned long nr_pages; | 268 | unsigned long nr_pages; |
269 | int compress_type; | ||
266 | struct list_head list; | 270 | struct list_head list; |
267 | }; | 271 | }; |
268 | 272 | ||
@@ -280,16 +284,19 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
280 | u64 start, u64 ram_size, | 284 | u64 start, u64 ram_size, |
281 | u64 compressed_size, | 285 | u64 compressed_size, |
282 | struct page **pages, | 286 | struct page **pages, |
283 | unsigned long nr_pages) | 287 | unsigned long nr_pages, |
288 | int compress_type) | ||
284 | { | 289 | { |
285 | struct async_extent *async_extent; | 290 | struct async_extent *async_extent; |
286 | 291 | ||
287 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); | 292 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); |
293 | BUG_ON(!async_extent); | ||
288 | async_extent->start = start; | 294 | async_extent->start = start; |
289 | async_extent->ram_size = ram_size; | 295 | async_extent->ram_size = ram_size; |
290 | async_extent->compressed_size = compressed_size; | 296 | async_extent->compressed_size = compressed_size; |
291 | async_extent->pages = pages; | 297 | async_extent->pages = pages; |
292 | async_extent->nr_pages = nr_pages; | 298 | async_extent->nr_pages = nr_pages; |
299 | async_extent->compress_type = compress_type; | ||
293 | list_add_tail(&async_extent->list, &cow->extents); | 300 | list_add_tail(&async_extent->list, &cow->extents); |
294 | return 0; | 301 | return 0; |
295 | } | 302 | } |
@@ -319,8 +326,6 @@ static noinline int compress_file_range(struct inode *inode, | |||
319 | struct btrfs_root *root = BTRFS_I(inode)->root; | 326 | struct btrfs_root *root = BTRFS_I(inode)->root; |
320 | struct btrfs_trans_handle *trans; | 327 | struct btrfs_trans_handle *trans; |
321 | u64 num_bytes; | 328 | u64 num_bytes; |
322 | u64 orig_start; | ||
323 | u64 disk_num_bytes; | ||
324 | u64 blocksize = root->sectorsize; | 329 | u64 blocksize = root->sectorsize; |
325 | u64 actual_end; | 330 | u64 actual_end; |
326 | u64 isize = i_size_read(inode); | 331 | u64 isize = i_size_read(inode); |
@@ -334,8 +339,11 @@ static noinline int compress_file_range(struct inode *inode, | |||
334 | unsigned long max_uncompressed = 128 * 1024; | 339 | unsigned long max_uncompressed = 128 * 1024; |
335 | int i; | 340 | int i; |
336 | int will_compress; | 341 | int will_compress; |
342 | int compress_type = root->fs_info->compress_type; | ||
337 | 343 | ||
338 | orig_start = start; | 344 | /* if this is a small write inside eof, kick off a defragbot */ |
345 | if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) | ||
346 | btrfs_add_inode_defrag(NULL, inode); | ||
339 | 347 | ||
340 | actual_end = min_t(u64, isize, end + 1); | 348 | actual_end = min_t(u64, isize, end + 1); |
341 | again: | 349 | again: |
@@ -371,7 +379,6 @@ again: | |||
371 | total_compressed = min(total_compressed, max_uncompressed); | 379 | total_compressed = min(total_compressed, max_uncompressed); |
372 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 380 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
373 | num_bytes = max(blocksize, num_bytes); | 381 | num_bytes = max(blocksize, num_bytes); |
374 | disk_num_bytes = num_bytes; | ||
375 | total_in = 0; | 382 | total_in = 0; |
376 | ret = 0; | 383 | ret = 0; |
377 | 384 | ||
@@ -382,16 +389,22 @@ again: | |||
382 | */ | 389 | */ |
383 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && | 390 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && |
384 | (btrfs_test_opt(root, COMPRESS) || | 391 | (btrfs_test_opt(root, COMPRESS) || |
385 | (BTRFS_I(inode)->force_compress))) { | 392 | (BTRFS_I(inode)->force_compress) || |
393 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | ||
386 | WARN_ON(pages); | 394 | WARN_ON(pages); |
387 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 395 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
396 | BUG_ON(!pages); | ||
397 | |||
398 | if (BTRFS_I(inode)->force_compress) | ||
399 | compress_type = BTRFS_I(inode)->force_compress; | ||
388 | 400 | ||
389 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | 401 | ret = btrfs_compress_pages(compress_type, |
390 | total_compressed, pages, | 402 | inode->i_mapping, start, |
391 | nr_pages, &nr_pages_ret, | 403 | total_compressed, pages, |
392 | &total_in, | 404 | nr_pages, &nr_pages_ret, |
393 | &total_compressed, | 405 | &total_in, |
394 | max_compressed); | 406 | &total_compressed, |
407 | max_compressed); | ||
395 | 408 | ||
396 | if (!ret) { | 409 | if (!ret) { |
397 | unsigned long offset = total_compressed & | 410 | unsigned long offset = total_compressed & |
@@ -412,9 +425,8 @@ again: | |||
412 | } | 425 | } |
413 | } | 426 | } |
414 | if (start == 0) { | 427 | if (start == 0) { |
415 | trans = btrfs_join_transaction(root, 1); | 428 | trans = btrfs_join_transaction(root); |
416 | BUG_ON(!trans); | 429 | BUG_ON(IS_ERR(trans)); |
417 | btrfs_set_trans_block_group(trans, inode); | ||
418 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 430 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
419 | 431 | ||
420 | /* lets try to make an inline extent */ | 432 | /* lets try to make an inline extent */ |
@@ -423,12 +435,13 @@ again: | |||
423 | * to make an uncompressed inline extent. | 435 | * to make an uncompressed inline extent. |
424 | */ | 436 | */ |
425 | ret = cow_file_range_inline(trans, root, inode, | 437 | ret = cow_file_range_inline(trans, root, inode, |
426 | start, end, 0, NULL); | 438 | start, end, 0, 0, NULL); |
427 | } else { | 439 | } else { |
428 | /* try making a compressed inline extent */ | 440 | /* try making a compressed inline extent */ |
429 | ret = cow_file_range_inline(trans, root, inode, | 441 | ret = cow_file_range_inline(trans, root, inode, |
430 | start, end, | 442 | start, end, |
431 | total_compressed, pages); | 443 | total_compressed, |
444 | compress_type, pages); | ||
432 | } | 445 | } |
433 | if (ret == 0) { | 446 | if (ret == 0) { |
434 | /* | 447 | /* |
@@ -467,7 +480,6 @@ again: | |||
467 | if (total_compressed >= total_in) { | 480 | if (total_compressed >= total_in) { |
468 | will_compress = 0; | 481 | will_compress = 0; |
469 | } else { | 482 | } else { |
470 | disk_num_bytes = total_compressed; | ||
471 | num_bytes = total_in; | 483 | num_bytes = total_in; |
472 | } | 484 | } |
473 | } | 485 | } |
@@ -499,9 +511,10 @@ again: | |||
499 | * and will submit them to the elevator. | 511 | * and will submit them to the elevator. |
500 | */ | 512 | */ |
501 | add_async_extent(async_cow, start, num_bytes, | 513 | add_async_extent(async_cow, start, num_bytes, |
502 | total_compressed, pages, nr_pages_ret); | 514 | total_compressed, pages, nr_pages_ret, |
515 | compress_type); | ||
503 | 516 | ||
504 | if (start + num_bytes < end && start + num_bytes < actual_end) { | 517 | if (start + num_bytes < end) { |
505 | start += num_bytes; | 518 | start += num_bytes; |
506 | pages = NULL; | 519 | pages = NULL; |
507 | cond_resched(); | 520 | cond_resched(); |
@@ -521,7 +534,8 @@ cleanup_and_bail_uncompressed: | |||
521 | __set_page_dirty_nobuffers(locked_page); | 534 | __set_page_dirty_nobuffers(locked_page); |
522 | /* unlocked later on in the async handlers */ | 535 | /* unlocked later on in the async handlers */ |
523 | } | 536 | } |
524 | add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); | 537 | add_async_extent(async_cow, start, end - start + 1, |
538 | 0, NULL, 0, BTRFS_COMPRESS_NONE); | ||
525 | *num_added += 1; | 539 | *num_added += 1; |
526 | } | 540 | } |
527 | 541 | ||
@@ -607,7 +621,9 @@ retry: | |||
607 | async_extent->start + async_extent->ram_size - 1, | 621 | async_extent->start + async_extent->ram_size - 1, |
608 | GFP_NOFS); | 622 | GFP_NOFS); |
609 | 623 | ||
610 | trans = btrfs_join_transaction(root, 1); | 624 | trans = btrfs_join_transaction(root); |
625 | BUG_ON(IS_ERR(trans)); | ||
626 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
611 | ret = btrfs_reserve_extent(trans, root, | 627 | ret = btrfs_reserve_extent(trans, root, |
612 | async_extent->compressed_size, | 628 | async_extent->compressed_size, |
613 | async_extent->compressed_size, | 629 | async_extent->compressed_size, |
@@ -638,7 +654,8 @@ retry: | |||
638 | async_extent->start + | 654 | async_extent->start + |
639 | async_extent->ram_size - 1, 0); | 655 | async_extent->ram_size - 1, 0); |
640 | 656 | ||
641 | em = alloc_extent_map(GFP_NOFS); | 657 | em = alloc_extent_map(); |
658 | BUG_ON(!em); | ||
642 | em->start = async_extent->start; | 659 | em->start = async_extent->start; |
643 | em->len = async_extent->ram_size; | 660 | em->len = async_extent->ram_size; |
644 | em->orig_start = em->start; | 661 | em->orig_start = em->start; |
@@ -646,6 +663,7 @@ retry: | |||
646 | em->block_start = ins.objectid; | 663 | em->block_start = ins.objectid; |
647 | em->block_len = ins.offset; | 664 | em->block_len = ins.offset; |
648 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 665 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
666 | em->compress_type = async_extent->compress_type; | ||
649 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 667 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
650 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 668 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
651 | 669 | ||
@@ -662,11 +680,13 @@ retry: | |||
662 | async_extent->ram_size - 1, 0); | 680 | async_extent->ram_size - 1, 0); |
663 | } | 681 | } |
664 | 682 | ||
665 | ret = btrfs_add_ordered_extent(inode, async_extent->start, | 683 | ret = btrfs_add_ordered_extent_compress(inode, |
666 | ins.objectid, | 684 | async_extent->start, |
667 | async_extent->ram_size, | 685 | ins.objectid, |
668 | ins.offset, | 686 | async_extent->ram_size, |
669 | BTRFS_ORDERED_COMPRESSED); | 687 | ins.offset, |
688 | BTRFS_ORDERED_COMPRESSED, | ||
689 | async_extent->compress_type); | ||
670 | BUG_ON(ret); | 690 | BUG_ON(ret); |
671 | 691 | ||
672 | /* | 692 | /* |
@@ -730,6 +750,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | |||
730 | return alloc_hint; | 750 | return alloc_hint; |
731 | } | 751 | } |
732 | 752 | ||
753 | static inline bool is_free_space_inode(struct btrfs_root *root, | ||
754 | struct inode *inode) | ||
755 | { | ||
756 | if (root == root->fs_info->tree_root || | ||
757 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
758 | return true; | ||
759 | return false; | ||
760 | } | ||
761 | |||
733 | /* | 762 | /* |
734 | * when extent_io.c finds a delayed allocation range in the file, | 763 | * when extent_io.c finds a delayed allocation range in the file, |
735 | * the call backs end up in this code. The basic idea is to | 764 | * the call backs end up in this code. The basic idea is to |
@@ -757,29 +786,29 @@ static noinline int cow_file_range(struct inode *inode, | |||
757 | u64 disk_num_bytes; | 786 | u64 disk_num_bytes; |
758 | u64 cur_alloc_size; | 787 | u64 cur_alloc_size; |
759 | u64 blocksize = root->sectorsize; | 788 | u64 blocksize = root->sectorsize; |
760 | u64 actual_end; | ||
761 | u64 isize = i_size_read(inode); | ||
762 | struct btrfs_key ins; | 789 | struct btrfs_key ins; |
763 | struct extent_map *em; | 790 | struct extent_map *em; |
764 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 791 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
765 | int ret = 0; | 792 | int ret = 0; |
766 | 793 | ||
767 | trans = btrfs_join_transaction(root, 1); | 794 | BUG_ON(is_free_space_inode(root, inode)); |
768 | BUG_ON(!trans); | 795 | trans = btrfs_join_transaction(root); |
769 | btrfs_set_trans_block_group(trans, inode); | 796 | BUG_ON(IS_ERR(trans)); |
770 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 797 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
771 | 798 | ||
772 | actual_end = min_t(u64, isize, end + 1); | ||
773 | |||
774 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 799 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
775 | num_bytes = max(blocksize, num_bytes); | 800 | num_bytes = max(blocksize, num_bytes); |
776 | disk_num_bytes = num_bytes; | 801 | disk_num_bytes = num_bytes; |
777 | ret = 0; | 802 | ret = 0; |
778 | 803 | ||
804 | /* if this is a small write inside eof, kick off defrag */ | ||
805 | if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024) | ||
806 | btrfs_add_inode_defrag(trans, inode); | ||
807 | |||
779 | if (start == 0) { | 808 | if (start == 0) { |
780 | /* lets try to make an inline extent */ | 809 | /* lets try to make an inline extent */ |
781 | ret = cow_file_range_inline(trans, root, inode, | 810 | ret = cow_file_range_inline(trans, root, inode, |
782 | start, end, 0, NULL); | 811 | start, end, 0, 0, NULL); |
783 | if (ret == 0) { | 812 | if (ret == 0) { |
784 | extent_clear_unlock_delalloc(inode, | 813 | extent_clear_unlock_delalloc(inode, |
785 | &BTRFS_I(inode)->io_tree, | 814 | &BTRFS_I(inode)->io_tree, |
@@ -814,7 +843,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
814 | (u64)-1, &ins, 1); | 843 | (u64)-1, &ins, 1); |
815 | BUG_ON(ret); | 844 | BUG_ON(ret); |
816 | 845 | ||
817 | em = alloc_extent_map(GFP_NOFS); | 846 | em = alloc_extent_map(); |
847 | BUG_ON(!em); | ||
818 | em->start = start; | 848 | em->start = start; |
819 | em->orig_start = em->start; | 849 | em->orig_start = em->start; |
820 | ram_size = ins.offset; | 850 | ram_size = ins.offset; |
@@ -941,6 +971,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
941 | 1, 0, NULL, GFP_NOFS); | 971 | 1, 0, NULL, GFP_NOFS); |
942 | while (start < end) { | 972 | while (start < end) { |
943 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); | 973 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); |
974 | BUG_ON(!async_cow); | ||
944 | async_cow->inode = inode; | 975 | async_cow->inode = inode; |
945 | async_cow->root = root; | 976 | async_cow->root = root; |
946 | async_cow->locked_page = locked_page; | 977 | async_cow->locked_page = locked_page; |
@@ -994,7 +1025,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root, | |||
994 | LIST_HEAD(list); | 1025 | LIST_HEAD(list); |
995 | 1026 | ||
996 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, | 1027 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, |
997 | bytenr + num_bytes - 1, &list); | 1028 | bytenr + num_bytes - 1, &list, 0); |
998 | if (ret == 0 && list_empty(&list)) | 1029 | if (ret == 0 && list_empty(&list)) |
999 | return 0; | 1030 | return 0; |
1000 | 1031 | ||
@@ -1035,23 +1066,33 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1035 | int type; | 1066 | int type; |
1036 | int nocow; | 1067 | int nocow; |
1037 | int check_prev = 1; | 1068 | int check_prev = 1; |
1069 | bool nolock; | ||
1070 | u64 ino = btrfs_ino(inode); | ||
1038 | 1071 | ||
1039 | path = btrfs_alloc_path(); | 1072 | path = btrfs_alloc_path(); |
1040 | BUG_ON(!path); | 1073 | BUG_ON(!path); |
1041 | trans = btrfs_join_transaction(root, 1); | 1074 | |
1042 | BUG_ON(!trans); | 1075 | nolock = is_free_space_inode(root, inode); |
1076 | |||
1077 | if (nolock) | ||
1078 | trans = btrfs_join_transaction_nolock(root); | ||
1079 | else | ||
1080 | trans = btrfs_join_transaction(root); | ||
1081 | |||
1082 | BUG_ON(IS_ERR(trans)); | ||
1083 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1043 | 1084 | ||
1044 | cow_start = (u64)-1; | 1085 | cow_start = (u64)-1; |
1045 | cur_offset = start; | 1086 | cur_offset = start; |
1046 | while (1) { | 1087 | while (1) { |
1047 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | 1088 | ret = btrfs_lookup_file_extent(trans, root, path, ino, |
1048 | cur_offset, 0); | 1089 | cur_offset, 0); |
1049 | BUG_ON(ret < 0); | 1090 | BUG_ON(ret < 0); |
1050 | if (ret > 0 && path->slots[0] > 0 && check_prev) { | 1091 | if (ret > 0 && path->slots[0] > 0 && check_prev) { |
1051 | leaf = path->nodes[0]; | 1092 | leaf = path->nodes[0]; |
1052 | btrfs_item_key_to_cpu(leaf, &found_key, | 1093 | btrfs_item_key_to_cpu(leaf, &found_key, |
1053 | path->slots[0] - 1); | 1094 | path->slots[0] - 1); |
1054 | if (found_key.objectid == inode->i_ino && | 1095 | if (found_key.objectid == ino && |
1055 | found_key.type == BTRFS_EXTENT_DATA_KEY) | 1096 | found_key.type == BTRFS_EXTENT_DATA_KEY) |
1056 | path->slots[0]--; | 1097 | path->slots[0]--; |
1057 | } | 1098 | } |
@@ -1072,7 +1113,7 @@ next_slot: | |||
1072 | num_bytes = 0; | 1113 | num_bytes = 0; |
1073 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 1114 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
1074 | 1115 | ||
1075 | if (found_key.objectid > inode->i_ino || | 1116 | if (found_key.objectid > ino || |
1076 | found_key.type > BTRFS_EXTENT_DATA_KEY || | 1117 | found_key.type > BTRFS_EXTENT_DATA_KEY || |
1077 | found_key.offset > end) | 1118 | found_key.offset > end) |
1078 | break; | 1119 | break; |
@@ -1107,7 +1148,7 @@ next_slot: | |||
1107 | goto out_check; | 1148 | goto out_check; |
1108 | if (btrfs_extent_readonly(root, disk_bytenr)) | 1149 | if (btrfs_extent_readonly(root, disk_bytenr)) |
1109 | goto out_check; | 1150 | goto out_check; |
1110 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | 1151 | if (btrfs_cross_ref_exist(trans, root, ino, |
1111 | found_key.offset - | 1152 | found_key.offset - |
1112 | extent_offset, disk_bytenr)) | 1153 | extent_offset, disk_bytenr)) |
1113 | goto out_check; | 1154 | goto out_check; |
@@ -1144,7 +1185,7 @@ out_check: | |||
1144 | goto next_slot; | 1185 | goto next_slot; |
1145 | } | 1186 | } |
1146 | 1187 | ||
1147 | btrfs_release_path(root, path); | 1188 | btrfs_release_path(path); |
1148 | if (cow_start != (u64)-1) { | 1189 | if (cow_start != (u64)-1) { |
1149 | ret = cow_file_range(inode, locked_page, cow_start, | 1190 | ret = cow_file_range(inode, locked_page, cow_start, |
1150 | found_key.offset - 1, page_started, | 1191 | found_key.offset - 1, page_started, |
@@ -1157,7 +1198,8 @@ out_check: | |||
1157 | struct extent_map *em; | 1198 | struct extent_map *em; |
1158 | struct extent_map_tree *em_tree; | 1199 | struct extent_map_tree *em_tree; |
1159 | em_tree = &BTRFS_I(inode)->extent_tree; | 1200 | em_tree = &BTRFS_I(inode)->extent_tree; |
1160 | em = alloc_extent_map(GFP_NOFS); | 1201 | em = alloc_extent_map(); |
1202 | BUG_ON(!em); | ||
1161 | em->start = cur_offset; | 1203 | em->start = cur_offset; |
1162 | em->orig_start = em->start; | 1204 | em->orig_start = em->start; |
1163 | em->len = num_bytes; | 1205 | em->len = num_bytes; |
@@ -1201,7 +1243,7 @@ out_check: | |||
1201 | if (cur_offset > end) | 1243 | if (cur_offset > end) |
1202 | break; | 1244 | break; |
1203 | } | 1245 | } |
1204 | btrfs_release_path(root, path); | 1246 | btrfs_release_path(path); |
1205 | 1247 | ||
1206 | if (cur_offset <= end && cow_start == (u64)-1) | 1248 | if (cur_offset <= end && cow_start == (u64)-1) |
1207 | cow_start = cur_offset; | 1249 | cow_start = cur_offset; |
@@ -1211,8 +1253,13 @@ out_check: | |||
1211 | BUG_ON(ret); | 1253 | BUG_ON(ret); |
1212 | } | 1254 | } |
1213 | 1255 | ||
1214 | ret = btrfs_end_transaction(trans, root); | 1256 | if (nolock) { |
1215 | BUG_ON(ret); | 1257 | ret = btrfs_end_transaction_nolock(trans, root); |
1258 | BUG_ON(ret); | ||
1259 | } else { | ||
1260 | ret = btrfs_end_transaction(trans, root); | ||
1261 | BUG_ON(ret); | ||
1262 | } | ||
1216 | btrfs_free_path(path); | 1263 | btrfs_free_path(path); |
1217 | return 0; | 1264 | return 0; |
1218 | } | 1265 | } |
@@ -1234,7 +1281,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1234 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1281 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1235 | page_started, 0, nr_written); | 1282 | page_started, 0, nr_written); |
1236 | else if (!btrfs_test_opt(root, COMPRESS) && | 1283 | else if (!btrfs_test_opt(root, COMPRESS) && |
1237 | !(BTRFS_I(inode)->force_compress)) | 1284 | !(BTRFS_I(inode)->force_compress) && |
1285 | !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) | ||
1238 | ret = cow_file_range(inode, locked_page, start, end, | 1286 | ret = cow_file_range(inode, locked_page, start, end, |
1239 | page_started, nr_written, 1); | 1287 | page_started, nr_written, 1); |
1240 | else | 1288 | else |
@@ -1283,12 +1331,13 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1283 | 1331 | ||
1284 | /* | 1332 | /* |
1285 | * set_bit and clear bit hooks normally require _irqsave/restore | 1333 | * set_bit and clear bit hooks normally require _irqsave/restore |
1286 | * but in this case, we are only testeing for the DELALLOC | 1334 | * but in this case, we are only testing for the DELALLOC |
1287 | * bit, which is only set or cleared with irqs on | 1335 | * bit, which is only set or cleared with irqs on |
1288 | */ | 1336 | */ |
1289 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1337 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1290 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1338 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1291 | u64 len = state->end + 1 - state->start; | 1339 | u64 len = state->end + 1 - state->start; |
1340 | bool do_list = !is_free_space_inode(root, inode); | ||
1292 | 1341 | ||
1293 | if (*bits & EXTENT_FIRST_DELALLOC) | 1342 | if (*bits & EXTENT_FIRST_DELALLOC) |
1294 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1343 | *bits &= ~EXTENT_FIRST_DELALLOC; |
@@ -1298,7 +1347,7 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1298 | spin_lock(&root->fs_info->delalloc_lock); | 1347 | spin_lock(&root->fs_info->delalloc_lock); |
1299 | BTRFS_I(inode)->delalloc_bytes += len; | 1348 | BTRFS_I(inode)->delalloc_bytes += len; |
1300 | root->fs_info->delalloc_bytes += len; | 1349 | root->fs_info->delalloc_bytes += len; |
1301 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1350 | if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1302 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | 1351 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, |
1303 | &root->fs_info->delalloc_inodes); | 1352 | &root->fs_info->delalloc_inodes); |
1304 | } | 1353 | } |
@@ -1315,12 +1364,13 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1315 | { | 1364 | { |
1316 | /* | 1365 | /* |
1317 | * set_bit and clear bit hooks normally require _irqsave/restore | 1366 | * set_bit and clear bit hooks normally require _irqsave/restore |
1318 | * but in this case, we are only testeing for the DELALLOC | 1367 | * but in this case, we are only testing for the DELALLOC |
1319 | * bit, which is only set or cleared with irqs on | 1368 | * bit, which is only set or cleared with irqs on |
1320 | */ | 1369 | */ |
1321 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1370 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1322 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1371 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1323 | u64 len = state->end + 1 - state->start; | 1372 | u64 len = state->end + 1 - state->start; |
1373 | bool do_list = !is_free_space_inode(root, inode); | ||
1324 | 1374 | ||
1325 | if (*bits & EXTENT_FIRST_DELALLOC) | 1375 | if (*bits & EXTENT_FIRST_DELALLOC) |
1326 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1376 | *bits &= ~EXTENT_FIRST_DELALLOC; |
@@ -1330,14 +1380,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1330 | if (*bits & EXTENT_DO_ACCOUNTING) | 1380 | if (*bits & EXTENT_DO_ACCOUNTING) |
1331 | btrfs_delalloc_release_metadata(inode, len); | 1381 | btrfs_delalloc_release_metadata(inode, len); |
1332 | 1382 | ||
1333 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) | 1383 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
1384 | && do_list) | ||
1334 | btrfs_free_reserved_data_space(inode, len); | 1385 | btrfs_free_reserved_data_space(inode, len); |
1335 | 1386 | ||
1336 | spin_lock(&root->fs_info->delalloc_lock); | 1387 | spin_lock(&root->fs_info->delalloc_lock); |
1337 | root->fs_info->delalloc_bytes -= len; | 1388 | root->fs_info->delalloc_bytes -= len; |
1338 | BTRFS_I(inode)->delalloc_bytes -= len; | 1389 | BTRFS_I(inode)->delalloc_bytes -= len; |
1339 | 1390 | ||
1340 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1391 | if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && |
1341 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1392 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1342 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | 1393 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); |
1343 | } | 1394 | } |
@@ -1372,7 +1423,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
1372 | 1423 | ||
1373 | if (map_length < length + size) | 1424 | if (map_length < length + size) |
1374 | return 1; | 1425 | return 1; |
1375 | return 0; | 1426 | return ret; |
1376 | } | 1427 | } |
1377 | 1428 | ||
1378 | /* | 1429 | /* |
@@ -1426,15 +1477,21 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1426 | 1477 | ||
1427 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1478 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
1428 | 1479 | ||
1429 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1480 | if (is_free_space_inode(root, inode)) |
1481 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); | ||
1482 | else | ||
1483 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
1430 | BUG_ON(ret); | 1484 | BUG_ON(ret); |
1431 | 1485 | ||
1432 | if (!(rw & REQ_WRITE)) { | 1486 | if (!(rw & REQ_WRITE)) { |
1433 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1487 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
1434 | return btrfs_submit_compressed_read(inode, bio, | 1488 | return btrfs_submit_compressed_read(inode, bio, |
1435 | mirror_num, bio_flags); | 1489 | mirror_num, bio_flags); |
1436 | } else if (!skip_sum) | 1490 | } else if (!skip_sum) { |
1437 | btrfs_lookup_bio_sums(root, inode, bio, NULL); | 1491 | ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); |
1492 | if (ret) | ||
1493 | return ret; | ||
1494 | } | ||
1438 | goto mapit; | 1495 | goto mapit; |
1439 | } else if (!skip_sum) { | 1496 | } else if (!skip_sum) { |
1440 | /* csum items have already been cloned */ | 1497 | /* csum items have already been cloned */ |
@@ -1462,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | |||
1462 | { | 1519 | { |
1463 | struct btrfs_ordered_sum *sum; | 1520 | struct btrfs_ordered_sum *sum; |
1464 | 1521 | ||
1465 | btrfs_set_trans_block_group(trans, inode); | ||
1466 | |||
1467 | list_for_each_entry(sum, list, list) { | 1522 | list_for_each_entry(sum, list, list) { |
1468 | btrfs_csum_file_blocks(trans, | 1523 | btrfs_csum_file_blocks(trans, |
1469 | BTRFS_I(inode)->root->fs_info->csum_root, sum); | 1524 | BTRFS_I(inode)->root->fs_info->csum_root, sum); |
@@ -1534,6 +1589,7 @@ out: | |||
1534 | out_page: | 1589 | out_page: |
1535 | unlock_page(page); | 1590 | unlock_page(page); |
1536 | page_cache_release(page); | 1591 | page_cache_release(page); |
1592 | kfree(fixup); | ||
1537 | } | 1593 | } |
1538 | 1594 | ||
1539 | /* | 1595 | /* |
@@ -1605,7 +1661,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1605 | &hint, 0); | 1661 | &hint, 0); |
1606 | BUG_ON(ret); | 1662 | BUG_ON(ret); |
1607 | 1663 | ||
1608 | ins.objectid = inode->i_ino; | 1664 | ins.objectid = btrfs_ino(inode); |
1609 | ins.offset = file_pos; | 1665 | ins.offset = file_pos; |
1610 | ins.type = BTRFS_EXTENT_DATA_KEY; | 1666 | ins.type = BTRFS_EXTENT_DATA_KEY; |
1611 | ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); | 1667 | ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); |
@@ -1636,7 +1692,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1636 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 1692 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
1637 | ret = btrfs_alloc_reserved_file_extent(trans, root, | 1693 | ret = btrfs_alloc_reserved_file_extent(trans, root, |
1638 | root->root_key.objectid, | 1694 | root->root_key.objectid, |
1639 | inode->i_ino, file_pos, &ins); | 1695 | btrfs_ino(inode), file_pos, &ins); |
1640 | BUG_ON(ret); | 1696 | BUG_ON(ret); |
1641 | btrfs_free_path(path); | 1697 | btrfs_free_path(path); |
1642 | 1698 | ||
@@ -1660,8 +1716,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1660 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1716 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1661 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1717 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1662 | struct extent_state *cached_state = NULL; | 1718 | struct extent_state *cached_state = NULL; |
1663 | int compressed = 0; | 1719 | int compress_type = 0; |
1664 | int ret; | 1720 | int ret; |
1721 | bool nolock; | ||
1665 | 1722 | ||
1666 | ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, | 1723 | ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, |
1667 | end - start + 1); | 1724 | end - start + 1); |
@@ -1669,12 +1726,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1669 | return 0; | 1726 | return 0; |
1670 | BUG_ON(!ordered_extent); | 1727 | BUG_ON(!ordered_extent); |
1671 | 1728 | ||
1729 | nolock = is_free_space_inode(root, inode); | ||
1730 | |||
1672 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1731 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
1673 | BUG_ON(!list_empty(&ordered_extent->list)); | 1732 | BUG_ON(!list_empty(&ordered_extent->list)); |
1674 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1733 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1675 | if (!ret) { | 1734 | if (!ret) { |
1676 | trans = btrfs_join_transaction(root, 1); | 1735 | if (nolock) |
1677 | btrfs_set_trans_block_group(trans, inode); | 1736 | trans = btrfs_join_transaction_nolock(root); |
1737 | else | ||
1738 | trans = btrfs_join_transaction(root); | ||
1739 | BUG_ON(IS_ERR(trans)); | ||
1678 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1740 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1679 | ret = btrfs_update_inode(trans, root, inode); | 1741 | ret = btrfs_update_inode(trans, root, inode); |
1680 | BUG_ON(ret); | 1742 | BUG_ON(ret); |
@@ -1686,27 +1748,31 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1686 | ordered_extent->file_offset + ordered_extent->len - 1, | 1748 | ordered_extent->file_offset + ordered_extent->len - 1, |
1687 | 0, &cached_state, GFP_NOFS); | 1749 | 0, &cached_state, GFP_NOFS); |
1688 | 1750 | ||
1689 | trans = btrfs_join_transaction(root, 1); | 1751 | if (nolock) |
1690 | btrfs_set_trans_block_group(trans, inode); | 1752 | trans = btrfs_join_transaction_nolock(root); |
1753 | else | ||
1754 | trans = btrfs_join_transaction(root); | ||
1755 | BUG_ON(IS_ERR(trans)); | ||
1691 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1756 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1692 | 1757 | ||
1693 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1758 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1694 | compressed = 1; | 1759 | compress_type = ordered_extent->compress_type; |
1695 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1760 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1696 | BUG_ON(compressed); | 1761 | BUG_ON(compress_type); |
1697 | ret = btrfs_mark_extent_written(trans, inode, | 1762 | ret = btrfs_mark_extent_written(trans, inode, |
1698 | ordered_extent->file_offset, | 1763 | ordered_extent->file_offset, |
1699 | ordered_extent->file_offset + | 1764 | ordered_extent->file_offset + |
1700 | ordered_extent->len); | 1765 | ordered_extent->len); |
1701 | BUG_ON(ret); | 1766 | BUG_ON(ret); |
1702 | } else { | 1767 | } else { |
1768 | BUG_ON(root == root->fs_info->tree_root); | ||
1703 | ret = insert_reserved_file_extent(trans, inode, | 1769 | ret = insert_reserved_file_extent(trans, inode, |
1704 | ordered_extent->file_offset, | 1770 | ordered_extent->file_offset, |
1705 | ordered_extent->start, | 1771 | ordered_extent->start, |
1706 | ordered_extent->disk_len, | 1772 | ordered_extent->disk_len, |
1707 | ordered_extent->len, | 1773 | ordered_extent->len, |
1708 | ordered_extent->len, | 1774 | ordered_extent->len, |
1709 | compressed, 0, 0, | 1775 | compress_type, 0, 0, |
1710 | BTRFS_FILE_EXTENT_REG); | 1776 | BTRFS_FILE_EXTENT_REG); |
1711 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | 1777 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
1712 | ordered_extent->file_offset, | 1778 | ordered_extent->file_offset, |
@@ -1720,13 +1786,22 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1720 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1786 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
1721 | &ordered_extent->list); | 1787 | &ordered_extent->list); |
1722 | 1788 | ||
1723 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1789 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1724 | ret = btrfs_update_inode(trans, root, inode); | 1790 | if (!ret) { |
1725 | BUG_ON(ret); | 1791 | ret = btrfs_update_inode(trans, root, inode); |
1792 | BUG_ON(ret); | ||
1793 | } | ||
1794 | ret = 0; | ||
1726 | out: | 1795 | out: |
1727 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1796 | if (nolock) { |
1728 | if (trans) | 1797 | if (trans) |
1729 | btrfs_end_transaction(trans, root); | 1798 | btrfs_end_transaction_nolock(trans, root); |
1799 | } else { | ||
1800 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | ||
1801 | if (trans) | ||
1802 | btrfs_end_transaction(trans, root); | ||
1803 | } | ||
1804 | |||
1730 | /* once for us */ | 1805 | /* once for us */ |
1731 | btrfs_put_ordered_extent(ordered_extent); | 1806 | btrfs_put_ordered_extent(ordered_extent); |
1732 | /* once for the tree */ | 1807 | /* once for the tree */ |
@@ -1738,6 +1813,8 @@ out: | |||
1738 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1813 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
1739 | struct extent_state *state, int uptodate) | 1814 | struct extent_state *state, int uptodate) |
1740 | { | 1815 | { |
1816 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | ||
1817 | |||
1741 | ClearPagePrivate2(page); | 1818 | ClearPagePrivate2(page); |
1742 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1819 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
1743 | } | 1820 | } |
@@ -1793,7 +1870,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1793 | } | 1870 | } |
1794 | read_unlock(&em_tree->lock); | 1871 | read_unlock(&em_tree->lock); |
1795 | 1872 | ||
1796 | if (!em || IS_ERR(em)) { | 1873 | if (IS_ERR_OR_NULL(em)) { |
1797 | kfree(failrec); | 1874 | kfree(failrec); |
1798 | return -EIO; | 1875 | return -EIO; |
1799 | } | 1876 | } |
@@ -1802,6 +1879,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1802 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 1879 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
1803 | logical = em->block_start; | 1880 | logical = em->block_start; |
1804 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | 1881 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
1882 | extent_set_compress_type(&failrec->bio_flags, | ||
1883 | em->compress_type); | ||
1805 | } | 1884 | } |
1806 | failrec->logical = logical; | 1885 | failrec->logical = logical; |
1807 | free_extent_map(em); | 1886 | free_extent_map(em); |
@@ -1846,10 +1925,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1846 | else | 1925 | else |
1847 | rw = READ; | 1926 | rw = READ; |
1848 | 1927 | ||
1849 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1928 | ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
1850 | failrec->last_mirror, | 1929 | failrec->last_mirror, |
1851 | failrec->bio_flags, 0); | 1930 | failrec->bio_flags, 0); |
1852 | return 0; | 1931 | return ret; |
1853 | } | 1932 | } |
1854 | 1933 | ||
1855 | /* | 1934 | /* |
@@ -1865,7 +1944,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start) | |||
1865 | 1944 | ||
1866 | private = 0; | 1945 | private = 0; |
1867 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | 1946 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, |
1868 | (u64)-1, 1, EXTENT_DIRTY)) { | 1947 | (u64)-1, 1, EXTENT_DIRTY, 0)) { |
1869 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, | 1948 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, |
1870 | start, &private_failure); | 1949 | start, &private_failure); |
1871 | if (ret == 0) { | 1950 | if (ret == 0) { |
@@ -1907,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
1907 | } | 1986 | } |
1908 | 1987 | ||
1909 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) | 1988 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) |
1910 | return 0; | 1989 | goto good; |
1911 | 1990 | ||
1912 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 1991 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
1913 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { | 1992 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
@@ -1940,12 +2019,11 @@ good: | |||
1940 | return 0; | 2019 | return 0; |
1941 | 2020 | ||
1942 | zeroit: | 2021 | zeroit: |
1943 | if (printk_ratelimit()) { | 2022 | printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u " |
1944 | printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " | 2023 | "private %llu\n", |
1945 | "private %llu\n", page->mapping->host->i_ino, | 2024 | (unsigned long long)btrfs_ino(page->mapping->host), |
1946 | (unsigned long long)start, csum, | 2025 | (unsigned long long)start, csum, |
1947 | (unsigned long long)private); | 2026 | (unsigned long long)private); |
1948 | } | ||
1949 | memset(kaddr + offset, 1, end - start + 1); | 2027 | memset(kaddr + offset, 1, end - start + 1); |
1950 | flush_dcache_page(page); | 2028 | flush_dcache_page(page); |
1951 | kunmap_atomic(kaddr, KM_USER0); | 2029 | kunmap_atomic(kaddr, KM_USER0); |
@@ -2161,8 +2239,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2161 | insert = 1; | 2239 | insert = 1; |
2162 | #endif | 2240 | #endif |
2163 | insert = 1; | 2241 | insert = 1; |
2164 | } else { | ||
2165 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
2166 | } | 2242 | } |
2167 | 2243 | ||
2168 | if (!BTRFS_I(inode)->orphan_meta_reserved) { | 2244 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
@@ -2182,7 +2258,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2182 | 2258 | ||
2183 | /* insert an orphan item to track this unlinked/truncated file */ | 2259 | /* insert an orphan item to track this unlinked/truncated file */ |
2184 | if (insert >= 1) { | 2260 | if (insert >= 1) { |
2185 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | 2261 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); |
2186 | BUG_ON(ret); | 2262 | BUG_ON(ret); |
2187 | } | 2263 | } |
2188 | 2264 | ||
@@ -2219,7 +2295,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2219 | spin_unlock(&root->orphan_lock); | 2295 | spin_unlock(&root->orphan_lock); |
2220 | 2296 | ||
2221 | if (trans && delete_item) { | 2297 | if (trans && delete_item) { |
2222 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | 2298 | ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); |
2223 | BUG_ON(ret); | 2299 | BUG_ON(ret); |
2224 | } | 2300 | } |
2225 | 2301 | ||
@@ -2233,21 +2309,23 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2233 | * this cleans up any orphans that may be left on the list from the last use | 2309 | * this cleans up any orphans that may be left on the list from the last use |
2234 | * of this root. | 2310 | * of this root. |
2235 | */ | 2311 | */ |
2236 | void btrfs_orphan_cleanup(struct btrfs_root *root) | 2312 | int btrfs_orphan_cleanup(struct btrfs_root *root) |
2237 | { | 2313 | { |
2238 | struct btrfs_path *path; | 2314 | struct btrfs_path *path; |
2239 | struct extent_buffer *leaf; | 2315 | struct extent_buffer *leaf; |
2240 | struct btrfs_item *item; | ||
2241 | struct btrfs_key key, found_key; | 2316 | struct btrfs_key key, found_key; |
2242 | struct btrfs_trans_handle *trans; | 2317 | struct btrfs_trans_handle *trans; |
2243 | struct inode *inode; | 2318 | struct inode *inode; |
2244 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2319 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2245 | 2320 | ||
2246 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) | 2321 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
2247 | return; | 2322 | return 0; |
2248 | 2323 | ||
2249 | path = btrfs_alloc_path(); | 2324 | path = btrfs_alloc_path(); |
2250 | BUG_ON(!path); | 2325 | if (!path) { |
2326 | ret = -ENOMEM; | ||
2327 | goto out; | ||
2328 | } | ||
2251 | path->reada = -1; | 2329 | path->reada = -1; |
2252 | 2330 | ||
2253 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 2331 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
@@ -2256,18 +2334,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2256 | 2334 | ||
2257 | while (1) { | 2335 | while (1) { |
2258 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 2336 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
2259 | if (ret < 0) { | 2337 | if (ret < 0) |
2260 | printk(KERN_ERR "Error searching slot for orphan: %d" | 2338 | goto out; |
2261 | "\n", ret); | ||
2262 | break; | ||
2263 | } | ||
2264 | 2339 | ||
2265 | /* | 2340 | /* |
2266 | * if ret == 0 means we found what we were searching for, which | 2341 | * if ret == 0 means we found what we were searching for, which |
2267 | * is weird, but possible, so only screw with path if we didnt | 2342 | * is weird, but possible, so only screw with path if we didn't |
2268 | * find the key and see if we have stuff that matches | 2343 | * find the key and see if we have stuff that matches |
2269 | */ | 2344 | */ |
2270 | if (ret > 0) { | 2345 | if (ret > 0) { |
2346 | ret = 0; | ||
2271 | if (path->slots[0] == 0) | 2347 | if (path->slots[0] == 0) |
2272 | break; | 2348 | break; |
2273 | path->slots[0]--; | 2349 | path->slots[0]--; |
@@ -2275,7 +2351,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2275 | 2351 | ||
2276 | /* pull out the item */ | 2352 | /* pull out the item */ |
2277 | leaf = path->nodes[0]; | 2353 | leaf = path->nodes[0]; |
2278 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
2279 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 2354 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
2280 | 2355 | ||
2281 | /* make sure the item matches what we want */ | 2356 | /* make sure the item matches what we want */ |
@@ -2285,7 +2360,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2285 | break; | 2360 | break; |
2286 | 2361 | ||
2287 | /* release the path since we're done with it */ | 2362 | /* release the path since we're done with it */ |
2288 | btrfs_release_path(root, path); | 2363 | btrfs_release_path(path); |
2289 | 2364 | ||
2290 | /* | 2365 | /* |
2291 | * this is where we are basically btrfs_lookup, without the | 2366 | * this is where we are basically btrfs_lookup, without the |
@@ -2296,7 +2371,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2296 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2371 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2297 | found_key.offset = 0; | 2372 | found_key.offset = 0; |
2298 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2373 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2299 | BUG_ON(IS_ERR(inode)); | 2374 | if (IS_ERR(inode)) { |
2375 | ret = PTR_ERR(inode); | ||
2376 | goto out; | ||
2377 | } | ||
2300 | 2378 | ||
2301 | /* | 2379 | /* |
2302 | * add this inode to the orphan list so btrfs_orphan_del does | 2380 | * add this inode to the orphan list so btrfs_orphan_del does |
@@ -2314,6 +2392,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2314 | */ | 2392 | */ |
2315 | if (is_bad_inode(inode)) { | 2393 | if (is_bad_inode(inode)) { |
2316 | trans = btrfs_start_transaction(root, 0); | 2394 | trans = btrfs_start_transaction(root, 0); |
2395 | if (IS_ERR(trans)) { | ||
2396 | ret = PTR_ERR(trans); | ||
2397 | goto out; | ||
2398 | } | ||
2317 | btrfs_orphan_del(trans, inode); | 2399 | btrfs_orphan_del(trans, inode); |
2318 | btrfs_end_transaction(trans, root); | 2400 | btrfs_end_transaction(trans, root); |
2319 | iput(inode); | 2401 | iput(inode); |
@@ -2322,17 +2404,22 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2322 | 2404 | ||
2323 | /* if we have links, this was a truncate, lets do that */ | 2405 | /* if we have links, this was a truncate, lets do that */ |
2324 | if (inode->i_nlink) { | 2406 | if (inode->i_nlink) { |
2407 | if (!S_ISREG(inode->i_mode)) { | ||
2408 | WARN_ON(1); | ||
2409 | iput(inode); | ||
2410 | continue; | ||
2411 | } | ||
2325 | nr_truncate++; | 2412 | nr_truncate++; |
2326 | btrfs_truncate(inode); | 2413 | ret = btrfs_truncate(inode); |
2327 | } else { | 2414 | } else { |
2328 | nr_unlink++; | 2415 | nr_unlink++; |
2329 | } | 2416 | } |
2330 | 2417 | ||
2331 | /* this will do delete_inode and everything for us */ | 2418 | /* this will do delete_inode and everything for us */ |
2332 | iput(inode); | 2419 | iput(inode); |
2420 | if (ret) | ||
2421 | goto out; | ||
2333 | } | 2422 | } |
2334 | btrfs_free_path(path); | ||
2335 | |||
2336 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | 2423 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; |
2337 | 2424 | ||
2338 | if (root->orphan_block_rsv) | 2425 | if (root->orphan_block_rsv) |
@@ -2340,14 +2427,21 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2340 | (u64)-1); | 2427 | (u64)-1); |
2341 | 2428 | ||
2342 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 2429 | if (root->orphan_block_rsv || root->orphan_item_inserted) { |
2343 | trans = btrfs_join_transaction(root, 1); | 2430 | trans = btrfs_join_transaction(root); |
2344 | btrfs_end_transaction(trans, root); | 2431 | if (!IS_ERR(trans)) |
2432 | btrfs_end_transaction(trans, root); | ||
2345 | } | 2433 | } |
2346 | 2434 | ||
2347 | if (nr_unlink) | 2435 | if (nr_unlink) |
2348 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2436 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
2349 | if (nr_truncate) | 2437 | if (nr_truncate) |
2350 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2438 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
2439 | |||
2440 | out: | ||
2441 | if (ret) | ||
2442 | printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); | ||
2443 | btrfs_free_path(path); | ||
2444 | return ret; | ||
2351 | } | 2445 | } |
2352 | 2446 | ||
2353 | /* | 2447 | /* |
@@ -2413,12 +2507,17 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2413 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2507 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2414 | struct btrfs_key location; | 2508 | struct btrfs_key location; |
2415 | int maybe_acls; | 2509 | int maybe_acls; |
2416 | u64 alloc_group_block; | ||
2417 | u32 rdev; | 2510 | u32 rdev; |
2418 | int ret; | 2511 | int ret; |
2512 | bool filled = false; | ||
2513 | |||
2514 | ret = btrfs_fill_inode(inode, &rdev); | ||
2515 | if (!ret) | ||
2516 | filled = true; | ||
2419 | 2517 | ||
2420 | path = btrfs_alloc_path(); | 2518 | path = btrfs_alloc_path(); |
2421 | BUG_ON(!path); | 2519 | BUG_ON(!path); |
2520 | path->leave_spinning = 1; | ||
2422 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 2521 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
2423 | 2522 | ||
2424 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); | 2523 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); |
@@ -2426,8 +2525,18 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2426 | goto make_bad; | 2525 | goto make_bad; |
2427 | 2526 | ||
2428 | leaf = path->nodes[0]; | 2527 | leaf = path->nodes[0]; |
2528 | |||
2529 | if (filled) | ||
2530 | goto cache_acl; | ||
2531 | |||
2429 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2532 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2430 | struct btrfs_inode_item); | 2533 | struct btrfs_inode_item); |
2534 | if (!leaf->map_token) | ||
2535 | map_private_extent_buffer(leaf, (unsigned long)inode_item, | ||
2536 | sizeof(struct btrfs_inode_item), | ||
2537 | &leaf->map_token, &leaf->kaddr, | ||
2538 | &leaf->map_start, &leaf->map_len, | ||
2539 | KM_USER1); | ||
2431 | 2540 | ||
2432 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2541 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
2433 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | 2542 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); |
@@ -2456,21 +2565,22 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2456 | 2565 | ||
2457 | BTRFS_I(inode)->index_cnt = (u64)-1; | 2566 | BTRFS_I(inode)->index_cnt = (u64)-1; |
2458 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); | 2567 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); |
2459 | 2568 | cache_acl: | |
2460 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | ||
2461 | |||
2462 | /* | 2569 | /* |
2463 | * try to precache a NULL acl entry for files that don't have | 2570 | * try to precache a NULL acl entry for files that don't have |
2464 | * any xattrs or acls | 2571 | * any xattrs or acls |
2465 | */ | 2572 | */ |
2466 | maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); | 2573 | maybe_acls = acls_after_inode_item(leaf, path->slots[0], |
2574 | btrfs_ino(inode)); | ||
2467 | if (!maybe_acls) | 2575 | if (!maybe_acls) |
2468 | cache_no_acl(inode); | 2576 | cache_no_acl(inode); |
2469 | 2577 | ||
2470 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, | 2578 | if (leaf->map_token) { |
2471 | alloc_group_block, 0); | 2579 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); |
2580 | leaf->map_token = NULL; | ||
2581 | } | ||
2582 | |||
2472 | btrfs_free_path(path); | 2583 | btrfs_free_path(path); |
2473 | inode_item = NULL; | ||
2474 | 2584 | ||
2475 | switch (inode->i_mode & S_IFMT) { | 2585 | switch (inode->i_mode & S_IFMT) { |
2476 | case S_IFREG: | 2586 | case S_IFREG: |
@@ -2514,6 +2624,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2514 | struct btrfs_inode_item *item, | 2624 | struct btrfs_inode_item *item, |
2515 | struct inode *inode) | 2625 | struct inode *inode) |
2516 | { | 2626 | { |
2627 | if (!leaf->map_token) | ||
2628 | map_private_extent_buffer(leaf, (unsigned long)item, | ||
2629 | sizeof(struct btrfs_inode_item), | ||
2630 | &leaf->map_token, &leaf->kaddr, | ||
2631 | &leaf->map_start, &leaf->map_len, | ||
2632 | KM_USER1); | ||
2633 | |||
2517 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2634 | btrfs_set_inode_uid(leaf, item, inode->i_uid); |
2518 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2635 | btrfs_set_inode_gid(leaf, item, inode->i_gid); |
2519 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2636 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
@@ -2541,7 +2658,12 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2541 | btrfs_set_inode_transid(leaf, item, trans->transid); | 2658 | btrfs_set_inode_transid(leaf, item, trans->transid); |
2542 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2659 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2543 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2660 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2544 | btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); | 2661 | btrfs_set_inode_block_group(leaf, item, 0); |
2662 | |||
2663 | if (leaf->map_token) { | ||
2664 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2665 | leaf->map_token = NULL; | ||
2666 | } | ||
2545 | } | 2667 | } |
2546 | 2668 | ||
2547 | /* | 2669 | /* |
@@ -2555,11 +2677,28 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2555 | struct extent_buffer *leaf; | 2677 | struct extent_buffer *leaf; |
2556 | int ret; | 2678 | int ret; |
2557 | 2679 | ||
2680 | /* | ||
2681 | * If the inode is a free space inode, we can deadlock during commit | ||
2682 | * if we put it into the delayed code. | ||
2683 | * | ||
2684 | * The data relocation inode should also be directly updated | ||
2685 | * without delay | ||
2686 | */ | ||
2687 | if (!is_free_space_inode(root, inode) | ||
2688 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
2689 | ret = btrfs_delayed_update_inode(trans, root, inode); | ||
2690 | if (!ret) | ||
2691 | btrfs_set_inode_last_trans(trans, inode); | ||
2692 | return ret; | ||
2693 | } | ||
2694 | |||
2558 | path = btrfs_alloc_path(); | 2695 | path = btrfs_alloc_path(); |
2559 | BUG_ON(!path); | 2696 | if (!path) |
2697 | return -ENOMEM; | ||
2698 | |||
2560 | path->leave_spinning = 1; | 2699 | path->leave_spinning = 1; |
2561 | ret = btrfs_lookup_inode(trans, root, path, | 2700 | ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, |
2562 | &BTRFS_I(inode)->location, 1); | 2701 | 1); |
2563 | if (ret) { | 2702 | if (ret) { |
2564 | if (ret > 0) | 2703 | if (ret > 0) |
2565 | ret = -ENOENT; | 2704 | ret = -ENOENT; |
@@ -2569,7 +2708,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2569 | btrfs_unlock_up_safe(path, 1); | 2708 | btrfs_unlock_up_safe(path, 1); |
2570 | leaf = path->nodes[0]; | 2709 | leaf = path->nodes[0]; |
2571 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2710 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2572 | struct btrfs_inode_item); | 2711 | struct btrfs_inode_item); |
2573 | 2712 | ||
2574 | fill_inode_item(trans, leaf, inode_item, inode); | 2713 | fill_inode_item(trans, leaf, inode_item, inode); |
2575 | btrfs_mark_buffer_dirty(leaf); | 2714 | btrfs_mark_buffer_dirty(leaf); |
@@ -2580,16 +2719,15 @@ failed: | |||
2580 | return ret; | 2719 | return ret; |
2581 | } | 2720 | } |
2582 | 2721 | ||
2583 | |||
2584 | /* | 2722 | /* |
2585 | * unlink helper that gets used here in inode.c and in the tree logging | 2723 | * unlink helper that gets used here in inode.c and in the tree logging |
2586 | * recovery code. It remove a link in a directory with a given name, and | 2724 | * recovery code. It remove a link in a directory with a given name, and |
2587 | * also drops the back refs in the inode to the directory | 2725 | * also drops the back refs in the inode to the directory |
2588 | */ | 2726 | */ |
2589 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | 2727 | static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, |
2590 | struct btrfs_root *root, | 2728 | struct btrfs_root *root, |
2591 | struct inode *dir, struct inode *inode, | 2729 | struct inode *dir, struct inode *inode, |
2592 | const char *name, int name_len) | 2730 | const char *name, int name_len) |
2593 | { | 2731 | { |
2594 | struct btrfs_path *path; | 2732 | struct btrfs_path *path; |
2595 | int ret = 0; | 2733 | int ret = 0; |
@@ -2597,15 +2735,17 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2597 | struct btrfs_dir_item *di; | 2735 | struct btrfs_dir_item *di; |
2598 | struct btrfs_key key; | 2736 | struct btrfs_key key; |
2599 | u64 index; | 2737 | u64 index; |
2738 | u64 ino = btrfs_ino(inode); | ||
2739 | u64 dir_ino = btrfs_ino(dir); | ||
2600 | 2740 | ||
2601 | path = btrfs_alloc_path(); | 2741 | path = btrfs_alloc_path(); |
2602 | if (!path) { | 2742 | if (!path) { |
2603 | ret = -ENOMEM; | 2743 | ret = -ENOMEM; |
2604 | goto err; | 2744 | goto out; |
2605 | } | 2745 | } |
2606 | 2746 | ||
2607 | path->leave_spinning = 1; | 2747 | path->leave_spinning = 1; |
2608 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | 2748 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, |
2609 | name, name_len, -1); | 2749 | name, name_len, -1); |
2610 | if (IS_ERR(di)) { | 2750 | if (IS_ERR(di)) { |
2611 | ret = PTR_ERR(di); | 2751 | ret = PTR_ERR(di); |
@@ -2620,38 +2760,29 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2620 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 2760 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
2621 | if (ret) | 2761 | if (ret) |
2622 | goto err; | 2762 | goto err; |
2623 | btrfs_release_path(root, path); | 2763 | btrfs_release_path(path); |
2624 | 2764 | ||
2625 | ret = btrfs_del_inode_ref(trans, root, name, name_len, | 2765 | ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, |
2626 | inode->i_ino, | 2766 | dir_ino, &index); |
2627 | dir->i_ino, &index); | ||
2628 | if (ret) { | 2767 | if (ret) { |
2629 | printk(KERN_INFO "btrfs failed to delete reference to %.*s, " | 2768 | printk(KERN_INFO "btrfs failed to delete reference to %.*s, " |
2630 | "inode %lu parent %lu\n", name_len, name, | 2769 | "inode %llu parent %llu\n", name_len, name, |
2631 | inode->i_ino, dir->i_ino); | 2770 | (unsigned long long)ino, (unsigned long long)dir_ino); |
2632 | goto err; | 2771 | goto err; |
2633 | } | 2772 | } |
2634 | 2773 | ||
2635 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | 2774 | ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); |
2636 | index, name, name_len, -1); | 2775 | if (ret) |
2637 | if (IS_ERR(di)) { | ||
2638 | ret = PTR_ERR(di); | ||
2639 | goto err; | ||
2640 | } | ||
2641 | if (!di) { | ||
2642 | ret = -ENOENT; | ||
2643 | goto err; | 2776 | goto err; |
2644 | } | ||
2645 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
2646 | btrfs_release_path(root, path); | ||
2647 | 2777 | ||
2648 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, | 2778 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, |
2649 | inode, dir->i_ino); | 2779 | inode, dir_ino); |
2650 | BUG_ON(ret != 0 && ret != -ENOENT); | 2780 | BUG_ON(ret != 0 && ret != -ENOENT); |
2651 | 2781 | ||
2652 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, | 2782 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, |
2653 | dir, index); | 2783 | dir, index); |
2654 | BUG_ON(ret); | 2784 | if (ret == -ENOENT) |
2785 | ret = 0; | ||
2655 | err: | 2786 | err: |
2656 | btrfs_free_path(path); | 2787 | btrfs_free_path(path); |
2657 | if (ret) | 2788 | if (ret) |
@@ -2660,22 +2791,36 @@ err: | |||
2660 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 2791 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
2661 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2792 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2662 | btrfs_update_inode(trans, root, dir); | 2793 | btrfs_update_inode(trans, root, dir); |
2663 | btrfs_drop_nlink(inode); | ||
2664 | ret = btrfs_update_inode(trans, root, inode); | ||
2665 | out: | 2794 | out: |
2666 | return ret; | 2795 | return ret; |
2667 | } | 2796 | } |
2668 | 2797 | ||
2798 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | ||
2799 | struct btrfs_root *root, | ||
2800 | struct inode *dir, struct inode *inode, | ||
2801 | const char *name, int name_len) | ||
2802 | { | ||
2803 | int ret; | ||
2804 | ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); | ||
2805 | if (!ret) { | ||
2806 | btrfs_drop_nlink(inode); | ||
2807 | ret = btrfs_update_inode(trans, root, inode); | ||
2808 | } | ||
2809 | return ret; | ||
2810 | } | ||
2811 | |||
2812 | |||
2669 | /* helper to check if there is any shared block in the path */ | 2813 | /* helper to check if there is any shared block in the path */ |
2670 | static int check_path_shared(struct btrfs_root *root, | 2814 | static int check_path_shared(struct btrfs_root *root, |
2671 | struct btrfs_path *path) | 2815 | struct btrfs_path *path) |
2672 | { | 2816 | { |
2673 | struct extent_buffer *eb; | 2817 | struct extent_buffer *eb; |
2674 | int level; | 2818 | int level; |
2675 | int ret; | ||
2676 | u64 refs = 1; | 2819 | u64 refs = 1; |
2677 | 2820 | ||
2678 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2821 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
2822 | int ret; | ||
2823 | |||
2679 | if (!path->nodes[level]) | 2824 | if (!path->nodes[level]) |
2680 | break; | 2825 | break; |
2681 | eb = path->nodes[level]; | 2826 | eb = path->nodes[level]; |
@@ -2709,12 +2854,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2709 | int check_link = 1; | 2854 | int check_link = 1; |
2710 | int err = -ENOSPC; | 2855 | int err = -ENOSPC; |
2711 | int ret; | 2856 | int ret; |
2857 | u64 ino = btrfs_ino(inode); | ||
2858 | u64 dir_ino = btrfs_ino(dir); | ||
2712 | 2859 | ||
2713 | trans = btrfs_start_transaction(root, 10); | 2860 | trans = btrfs_start_transaction(root, 10); |
2714 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 2861 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) |
2715 | return trans; | 2862 | return trans; |
2716 | 2863 | ||
2717 | if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) | 2864 | if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
2718 | return ERR_PTR(-ENOSPC); | 2865 | return ERR_PTR(-ENOSPC); |
2719 | 2866 | ||
2720 | /* check if there is someone else holds reference */ | 2867 | /* check if there is someone else holds reference */ |
@@ -2755,7 +2902,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2755 | } else { | 2902 | } else { |
2756 | check_link = 0; | 2903 | check_link = 0; |
2757 | } | 2904 | } |
2758 | btrfs_release_path(root, path); | 2905 | btrfs_release_path(path); |
2759 | 2906 | ||
2760 | ret = btrfs_lookup_inode(trans, root, path, | 2907 | ret = btrfs_lookup_inode(trans, root, path, |
2761 | &BTRFS_I(inode)->location, 0); | 2908 | &BTRFS_I(inode)->location, 0); |
@@ -2769,11 +2916,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2769 | } else { | 2916 | } else { |
2770 | check_link = 0; | 2917 | check_link = 0; |
2771 | } | 2918 | } |
2772 | btrfs_release_path(root, path); | 2919 | btrfs_release_path(path); |
2773 | 2920 | ||
2774 | if (ret == 0 && S_ISREG(inode->i_mode)) { | 2921 | if (ret == 0 && S_ISREG(inode->i_mode)) { |
2775 | ret = btrfs_lookup_file_extent(trans, root, path, | 2922 | ret = btrfs_lookup_file_extent(trans, root, path, |
2776 | inode->i_ino, (u64)-1, 0); | 2923 | ino, (u64)-1, 0); |
2777 | if (ret < 0) { | 2924 | if (ret < 0) { |
2778 | err = ret; | 2925 | err = ret; |
2779 | goto out; | 2926 | goto out; |
@@ -2781,7 +2928,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2781 | BUG_ON(ret == 0); | 2928 | BUG_ON(ret == 0); |
2782 | if (check_path_shared(root, path)) | 2929 | if (check_path_shared(root, path)) |
2783 | goto out; | 2930 | goto out; |
2784 | btrfs_release_path(root, path); | 2931 | btrfs_release_path(path); |
2785 | } | 2932 | } |
2786 | 2933 | ||
2787 | if (!check_link) { | 2934 | if (!check_link) { |
@@ -2789,7 +2936,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2789 | goto out; | 2936 | goto out; |
2790 | } | 2937 | } |
2791 | 2938 | ||
2792 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | 2939 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, |
2793 | dentry->d_name.name, dentry->d_name.len, 0); | 2940 | dentry->d_name.name, dentry->d_name.len, 0); |
2794 | if (IS_ERR(di)) { | 2941 | if (IS_ERR(di)) { |
2795 | err = PTR_ERR(di); | 2942 | err = PTR_ERR(di); |
@@ -2802,11 +2949,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2802 | err = 0; | 2949 | err = 0; |
2803 | goto out; | 2950 | goto out; |
2804 | } | 2951 | } |
2805 | btrfs_release_path(root, path); | 2952 | btrfs_release_path(path); |
2806 | 2953 | ||
2807 | ref = btrfs_lookup_inode_ref(trans, root, path, | 2954 | ref = btrfs_lookup_inode_ref(trans, root, path, |
2808 | dentry->d_name.name, dentry->d_name.len, | 2955 | dentry->d_name.name, dentry->d_name.len, |
2809 | inode->i_ino, dir->i_ino, 0); | 2956 | ino, dir_ino, 0); |
2810 | if (IS_ERR(ref)) { | 2957 | if (IS_ERR(ref)) { |
2811 | err = PTR_ERR(ref); | 2958 | err = PTR_ERR(ref); |
2812 | goto out; | 2959 | goto out; |
@@ -2815,9 +2962,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2815 | if (check_path_shared(root, path)) | 2962 | if (check_path_shared(root, path)) |
2816 | goto out; | 2963 | goto out; |
2817 | index = btrfs_inode_ref_index(path->nodes[0], ref); | 2964 | index = btrfs_inode_ref_index(path->nodes[0], ref); |
2818 | btrfs_release_path(root, path); | 2965 | btrfs_release_path(path); |
2819 | 2966 | ||
2820 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, | 2967 | /* |
2968 | * This is a commit root search, if we can lookup inode item and other | ||
2969 | * relative items in the commit root, it means the transaction of | ||
2970 | * dir/file creation has been committed, and the dir index item that we | ||
2971 | * delay to insert has also been inserted into the commit root. So | ||
2972 | * we needn't worry about the delayed insertion of the dir index item | ||
2973 | * here. | ||
2974 | */ | ||
2975 | di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index, | ||
2821 | dentry->d_name.name, dentry->d_name.len, 0); | 2976 | dentry->d_name.name, dentry->d_name.len, 0); |
2822 | if (IS_ERR(di)) { | 2977 | if (IS_ERR(di)) { |
2823 | err = PTR_ERR(di); | 2978 | err = PTR_ERR(di); |
@@ -2862,8 +3017,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2862 | if (IS_ERR(trans)) | 3017 | if (IS_ERR(trans)) |
2863 | return PTR_ERR(trans); | 3018 | return PTR_ERR(trans); |
2864 | 3019 | ||
2865 | btrfs_set_trans_block_group(trans, dir); | ||
2866 | |||
2867 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); | 3020 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); |
2868 | 3021 | ||
2869 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 3022 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
@@ -2892,47 +3045,41 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
2892 | struct btrfs_key key; | 3045 | struct btrfs_key key; |
2893 | u64 index; | 3046 | u64 index; |
2894 | int ret; | 3047 | int ret; |
3048 | u64 dir_ino = btrfs_ino(dir); | ||
2895 | 3049 | ||
2896 | path = btrfs_alloc_path(); | 3050 | path = btrfs_alloc_path(); |
2897 | if (!path) | 3051 | if (!path) |
2898 | return -ENOMEM; | 3052 | return -ENOMEM; |
2899 | 3053 | ||
2900 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | 3054 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, |
2901 | name, name_len, -1); | 3055 | name, name_len, -1); |
2902 | BUG_ON(!di || IS_ERR(di)); | 3056 | BUG_ON(IS_ERR_OR_NULL(di)); |
2903 | 3057 | ||
2904 | leaf = path->nodes[0]; | 3058 | leaf = path->nodes[0]; |
2905 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | 3059 | btrfs_dir_item_key_to_cpu(leaf, di, &key); |
2906 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | 3060 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); |
2907 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 3061 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
2908 | BUG_ON(ret); | 3062 | BUG_ON(ret); |
2909 | btrfs_release_path(root, path); | 3063 | btrfs_release_path(path); |
2910 | 3064 | ||
2911 | ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, | 3065 | ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, |
2912 | objectid, root->root_key.objectid, | 3066 | objectid, root->root_key.objectid, |
2913 | dir->i_ino, &index, name, name_len); | 3067 | dir_ino, &index, name, name_len); |
2914 | if (ret < 0) { | 3068 | if (ret < 0) { |
2915 | BUG_ON(ret != -ENOENT); | 3069 | BUG_ON(ret != -ENOENT); |
2916 | di = btrfs_search_dir_index_item(root, path, dir->i_ino, | 3070 | di = btrfs_search_dir_index_item(root, path, dir_ino, |
2917 | name, name_len); | 3071 | name, name_len); |
2918 | BUG_ON(!di || IS_ERR(di)); | 3072 | BUG_ON(IS_ERR_OR_NULL(di)); |
2919 | 3073 | ||
2920 | leaf = path->nodes[0]; | 3074 | leaf = path->nodes[0]; |
2921 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 3075 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
2922 | btrfs_release_path(root, path); | 3076 | btrfs_release_path(path); |
2923 | index = key.offset; | 3077 | index = key.offset; |
2924 | } | 3078 | } |
3079 | btrfs_release_path(path); | ||
2925 | 3080 | ||
2926 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | 3081 | ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); |
2927 | index, name, name_len, -1); | ||
2928 | BUG_ON(!di || IS_ERR(di)); | ||
2929 | |||
2930 | leaf = path->nodes[0]; | ||
2931 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
2932 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
2933 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
2934 | BUG_ON(ret); | 3082 | BUG_ON(ret); |
2935 | btrfs_release_path(root, path); | ||
2936 | 3083 | ||
2937 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 3084 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
2938 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 3085 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
@@ -2952,16 +3099,14 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2952 | unsigned long nr = 0; | 3099 | unsigned long nr = 0; |
2953 | 3100 | ||
2954 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || | 3101 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || |
2955 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 3102 | btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) |
2956 | return -ENOTEMPTY; | 3103 | return -ENOTEMPTY; |
2957 | 3104 | ||
2958 | trans = __unlink_start_trans(dir, dentry); | 3105 | trans = __unlink_start_trans(dir, dentry); |
2959 | if (IS_ERR(trans)) | 3106 | if (IS_ERR(trans)) |
2960 | return PTR_ERR(trans); | 3107 | return PTR_ERR(trans); |
2961 | 3108 | ||
2962 | btrfs_set_trans_block_group(trans, dir); | 3109 | if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
2963 | |||
2964 | if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | ||
2965 | err = btrfs_unlink_subvol(trans, root, dir, | 3110 | err = btrfs_unlink_subvol(trans, root, dir, |
2966 | BTRFS_I(inode)->location.objectid, | 3111 | BTRFS_I(inode)->location.objectid, |
2967 | dentry->d_name.name, | 3112 | dentry->d_name.name, |
@@ -2986,178 +3131,6 @@ out: | |||
2986 | return err; | 3131 | return err; |
2987 | } | 3132 | } |
2988 | 3133 | ||
2989 | #if 0 | ||
2990 | /* | ||
2991 | * when truncating bytes in a file, it is possible to avoid reading | ||
2992 | * the leaves that contain only checksum items. This can be the | ||
2993 | * majority of the IO required to delete a large file, but it must | ||
2994 | * be done carefully. | ||
2995 | * | ||
2996 | * The keys in the level just above the leaves are checked to make sure | ||
2997 | * the lowest key in a given leaf is a csum key, and starts at an offset | ||
2998 | * after the new size. | ||
2999 | * | ||
3000 | * Then the key for the next leaf is checked to make sure it also has | ||
3001 | * a checksum item for the same file. If it does, we know our target leaf | ||
3002 | * contains only checksum items, and it can be safely freed without reading | ||
3003 | * it. | ||
3004 | * | ||
3005 | * This is just an optimization targeted at large files. It may do | ||
3006 | * nothing. It will return 0 unless things went badly. | ||
3007 | */ | ||
3008 | static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans, | ||
3009 | struct btrfs_root *root, | ||
3010 | struct btrfs_path *path, | ||
3011 | struct inode *inode, u64 new_size) | ||
3012 | { | ||
3013 | struct btrfs_key key; | ||
3014 | int ret; | ||
3015 | int nritems; | ||
3016 | struct btrfs_key found_key; | ||
3017 | struct btrfs_key other_key; | ||
3018 | struct btrfs_leaf_ref *ref; | ||
3019 | u64 leaf_gen; | ||
3020 | u64 leaf_start; | ||
3021 | |||
3022 | path->lowest_level = 1; | ||
3023 | key.objectid = inode->i_ino; | ||
3024 | key.type = BTRFS_CSUM_ITEM_KEY; | ||
3025 | key.offset = new_size; | ||
3026 | again: | ||
3027 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
3028 | if (ret < 0) | ||
3029 | goto out; | ||
3030 | |||
3031 | if (path->nodes[1] == NULL) { | ||
3032 | ret = 0; | ||
3033 | goto out; | ||
3034 | } | ||
3035 | ret = 0; | ||
3036 | btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]); | ||
3037 | nritems = btrfs_header_nritems(path->nodes[1]); | ||
3038 | |||
3039 | if (!nritems) | ||
3040 | goto out; | ||
3041 | |||
3042 | if (path->slots[1] >= nritems) | ||
3043 | goto next_node; | ||
3044 | |||
3045 | /* did we find a key greater than anything we want to delete? */ | ||
3046 | if (found_key.objectid > inode->i_ino || | ||
3047 | (found_key.objectid == inode->i_ino && found_key.type > key.type)) | ||
3048 | goto out; | ||
3049 | |||
3050 | /* we check the next key in the node to make sure the leave contains | ||
3051 | * only checksum items. This comparison doesn't work if our | ||
3052 | * leaf is the last one in the node | ||
3053 | */ | ||
3054 | if (path->slots[1] + 1 >= nritems) { | ||
3055 | next_node: | ||
3056 | /* search forward from the last key in the node, this | ||
3057 | * will bring us into the next node in the tree | ||
3058 | */ | ||
3059 | btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1); | ||
3060 | |||
3061 | /* unlikely, but we inc below, so check to be safe */ | ||
3062 | if (found_key.offset == (u64)-1) | ||
3063 | goto out; | ||
3064 | |||
3065 | /* search_forward needs a path with locks held, do the | ||
3066 | * search again for the original key. It is possible | ||
3067 | * this will race with a balance and return a path that | ||
3068 | * we could modify, but this drop is just an optimization | ||
3069 | * and is allowed to miss some leaves. | ||
3070 | */ | ||
3071 | btrfs_release_path(root, path); | ||
3072 | found_key.offset++; | ||
3073 | |||
3074 | /* setup a max key for search_forward */ | ||
3075 | other_key.offset = (u64)-1; | ||
3076 | other_key.type = key.type; | ||
3077 | other_key.objectid = key.objectid; | ||
3078 | |||
3079 | path->keep_locks = 1; | ||
3080 | ret = btrfs_search_forward(root, &found_key, &other_key, | ||
3081 | path, 0, 0); | ||
3082 | path->keep_locks = 0; | ||
3083 | if (ret || found_key.objectid != key.objectid || | ||
3084 | found_key.type != key.type) { | ||
3085 | ret = 0; | ||
3086 | goto out; | ||
3087 | } | ||
3088 | |||
3089 | key.offset = found_key.offset; | ||
3090 | btrfs_release_path(root, path); | ||
3091 | cond_resched(); | ||
3092 | goto again; | ||
3093 | } | ||
3094 | |||
3095 | /* we know there's one more slot after us in the tree, | ||
3096 | * read that key so we can verify it is also a checksum item | ||
3097 | */ | ||
3098 | btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1); | ||
3099 | |||
3100 | if (found_key.objectid < inode->i_ino) | ||
3101 | goto next_key; | ||
3102 | |||
3103 | if (found_key.type != key.type || found_key.offset < new_size) | ||
3104 | goto next_key; | ||
3105 | |||
3106 | /* | ||
3107 | * if the key for the next leaf isn't a csum key from this objectid, | ||
3108 | * we can't be sure there aren't good items inside this leaf. | ||
3109 | * Bail out | ||
3110 | */ | ||
3111 | if (other_key.objectid != inode->i_ino || other_key.type != key.type) | ||
3112 | goto out; | ||
3113 | |||
3114 | leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]); | ||
3115 | leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]); | ||
3116 | /* | ||
3117 | * it is safe to delete this leaf, it contains only | ||
3118 | * csum items from this inode at an offset >= new_size | ||
3119 | */ | ||
3120 | ret = btrfs_del_leaf(trans, root, path, leaf_start); | ||
3121 | BUG_ON(ret); | ||
3122 | |||
3123 | if (root->ref_cows && leaf_gen < trans->transid) { | ||
3124 | ref = btrfs_alloc_leaf_ref(root, 0); | ||
3125 | if (ref) { | ||
3126 | ref->root_gen = root->root_key.offset; | ||
3127 | ref->bytenr = leaf_start; | ||
3128 | ref->owner = 0; | ||
3129 | ref->generation = leaf_gen; | ||
3130 | ref->nritems = 0; | ||
3131 | |||
3132 | btrfs_sort_leaf_ref(ref); | ||
3133 | |||
3134 | ret = btrfs_add_leaf_ref(root, ref, 0); | ||
3135 | WARN_ON(ret); | ||
3136 | btrfs_free_leaf_ref(root, ref); | ||
3137 | } else { | ||
3138 | WARN_ON(1); | ||
3139 | } | ||
3140 | } | ||
3141 | next_key: | ||
3142 | btrfs_release_path(root, path); | ||
3143 | |||
3144 | if (other_key.objectid == inode->i_ino && | ||
3145 | other_key.type == key.type && other_key.offset > key.offset) { | ||
3146 | key.offset = other_key.offset; | ||
3147 | cond_resched(); | ||
3148 | goto again; | ||
3149 | } | ||
3150 | ret = 0; | ||
3151 | out: | ||
3152 | /* fixup any changes we've made to the path */ | ||
3153 | path->lowest_level = 0; | ||
3154 | path->keep_locks = 0; | ||
3155 | btrfs_release_path(root, path); | ||
3156 | return ret; | ||
3157 | } | ||
3158 | |||
3159 | #endif | ||
3160 | |||
3161 | /* | 3134 | /* |
3162 | * this can truncate away extent items, csum items and directory items. | 3135 | * this can truncate away extent items, csum items and directory items. |
3163 | * It starts at a high offset and removes keys until it can't find | 3136 | * It starts at a high offset and removes keys until it can't find |
@@ -3193,17 +3166,27 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3193 | int encoding; | 3166 | int encoding; |
3194 | int ret; | 3167 | int ret; |
3195 | int err = 0; | 3168 | int err = 0; |
3169 | u64 ino = btrfs_ino(inode); | ||
3196 | 3170 | ||
3197 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); | 3171 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
3198 | 3172 | ||
3199 | if (root->ref_cows) | 3173 | if (root->ref_cows || root == root->fs_info->tree_root) |
3200 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 3174 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); |
3201 | 3175 | ||
3176 | /* | ||
3177 | * This function is also used to drop the items in the log tree before | ||
3178 | * we relog the inode, so if root != BTRFS_I(inode)->root, it means | ||
3179 | * it is used to drop the loged items. So we shouldn't kill the delayed | ||
3180 | * items. | ||
3181 | */ | ||
3182 | if (min_type == 0 && root == BTRFS_I(inode)->root) | ||
3183 | btrfs_kill_delayed_inode_items(inode); | ||
3184 | |||
3202 | path = btrfs_alloc_path(); | 3185 | path = btrfs_alloc_path(); |
3203 | BUG_ON(!path); | 3186 | BUG_ON(!path); |
3204 | path->reada = -1; | 3187 | path->reada = -1; |
3205 | 3188 | ||
3206 | key.objectid = inode->i_ino; | 3189 | key.objectid = ino; |
3207 | key.offset = (u64)-1; | 3190 | key.offset = (u64)-1; |
3208 | key.type = (u8)-1; | 3191 | key.type = (u8)-1; |
3209 | 3192 | ||
@@ -3231,7 +3214,7 @@ search_again: | |||
3231 | found_type = btrfs_key_type(&found_key); | 3214 | found_type = btrfs_key_type(&found_key); |
3232 | encoding = 0; | 3215 | encoding = 0; |
3233 | 3216 | ||
3234 | if (found_key.objectid != inode->i_ino) | 3217 | if (found_key.objectid != ino) |
3235 | break; | 3218 | break; |
3236 | 3219 | ||
3237 | if (found_type < min_type) | 3220 | if (found_type < min_type) |
@@ -3321,7 +3304,6 @@ search_again: | |||
3321 | btrfs_file_extent_calc_inline_size(size); | 3304 | btrfs_file_extent_calc_inline_size(size); |
3322 | ret = btrfs_truncate_item(trans, root, path, | 3305 | ret = btrfs_truncate_item(trans, root, path, |
3323 | size, 1); | 3306 | size, 1); |
3324 | BUG_ON(ret); | ||
3325 | } else if (root->ref_cows) { | 3307 | } else if (root->ref_cows) { |
3326 | inode_sub_bytes(inode, item_end + 1 - | 3308 | inode_sub_bytes(inode, item_end + 1 - |
3327 | found_key.offset); | 3309 | found_key.offset); |
@@ -3344,12 +3326,13 @@ delete: | |||
3344 | } else { | 3326 | } else { |
3345 | break; | 3327 | break; |
3346 | } | 3328 | } |
3347 | if (found_extent && root->ref_cows) { | 3329 | if (found_extent && (root->ref_cows || |
3330 | root == root->fs_info->tree_root)) { | ||
3348 | btrfs_set_path_blocking(path); | 3331 | btrfs_set_path_blocking(path); |
3349 | ret = btrfs_free_extent(trans, root, extent_start, | 3332 | ret = btrfs_free_extent(trans, root, extent_start, |
3350 | extent_num_bytes, 0, | 3333 | extent_num_bytes, 0, |
3351 | btrfs_header_owner(leaf), | 3334 | btrfs_header_owner(leaf), |
3352 | inode->i_ino, extent_offset); | 3335 | ino, extent_offset); |
3353 | BUG_ON(ret); | 3336 | BUG_ON(ret); |
3354 | } | 3337 | } |
3355 | 3338 | ||
@@ -3358,7 +3341,9 @@ delete: | |||
3358 | 3341 | ||
3359 | if (path->slots[0] == 0 || | 3342 | if (path->slots[0] == 0 || |
3360 | path->slots[0] != pending_del_slot) { | 3343 | path->slots[0] != pending_del_slot) { |
3361 | if (root->ref_cows) { | 3344 | if (root->ref_cows && |
3345 | BTRFS_I(inode)->location.objectid != | ||
3346 | BTRFS_FREE_INO_OBJECTID) { | ||
3362 | err = -EAGAIN; | 3347 | err = -EAGAIN; |
3363 | goto out; | 3348 | goto out; |
3364 | } | 3349 | } |
@@ -3369,7 +3354,7 @@ delete: | |||
3369 | BUG_ON(ret); | 3354 | BUG_ON(ret); |
3370 | pending_del_nr = 0; | 3355 | pending_del_nr = 0; |
3371 | } | 3356 | } |
3372 | btrfs_release_path(root, path); | 3357 | btrfs_release_path(path); |
3373 | goto search_again; | 3358 | goto search_again; |
3374 | } else { | 3359 | } else { |
3375 | path->slots[0]--; | 3360 | path->slots[0]--; |
@@ -3485,7 +3470,13 @@ out: | |||
3485 | return ret; | 3470 | return ret; |
3486 | } | 3471 | } |
3487 | 3472 | ||
3488 | int btrfs_cont_expand(struct inode *inode, loff_t size) | 3473 | /* |
3474 | * This function puts in dummy file extents for the area we're creating a hole | ||
3475 | * for. So if we are truncating this file to a larger size we need to insert | ||
3476 | * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for | ||
3477 | * the range between oldsize and size | ||
3478 | */ | ||
3479 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | ||
3489 | { | 3480 | { |
3490 | struct btrfs_trans_handle *trans; | 3481 | struct btrfs_trans_handle *trans; |
3491 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3482 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -3493,7 +3484,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3493 | struct extent_map *em = NULL; | 3484 | struct extent_map *em = NULL; |
3494 | struct extent_state *cached_state = NULL; | 3485 | struct extent_state *cached_state = NULL; |
3495 | u64 mask = root->sectorsize - 1; | 3486 | u64 mask = root->sectorsize - 1; |
3496 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3487 | u64 hole_start = (oldsize + mask) & ~mask; |
3497 | u64 block_end = (size + mask) & ~mask; | 3488 | u64 block_end = (size + mask) & ~mask; |
3498 | u64 last_byte; | 3489 | u64 last_byte; |
3499 | u64 cur_offset; | 3490 | u64 cur_offset; |
@@ -3521,7 +3512,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3521 | while (1) { | 3512 | while (1) { |
3522 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | 3513 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, |
3523 | block_end - cur_offset, 0); | 3514 | block_end - cur_offset, 0); |
3524 | BUG_ON(IS_ERR(em) || !em); | 3515 | BUG_ON(IS_ERR_OR_NULL(em)); |
3525 | last_byte = min(extent_map_end(em), block_end); | 3516 | last_byte = min(extent_map_end(em), block_end); |
3526 | last_byte = (last_byte + mask) & ~mask; | 3517 | last_byte = (last_byte + mask) & ~mask; |
3527 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | 3518 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { |
@@ -3533,18 +3524,19 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3533 | err = PTR_ERR(trans); | 3524 | err = PTR_ERR(trans); |
3534 | break; | 3525 | break; |
3535 | } | 3526 | } |
3536 | btrfs_set_trans_block_group(trans, inode); | ||
3537 | 3527 | ||
3538 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3528 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3539 | cur_offset + hole_size, | 3529 | cur_offset + hole_size, |
3540 | &hint_byte, 1); | 3530 | &hint_byte, 1); |
3541 | BUG_ON(err); | 3531 | if (err) |
3532 | break; | ||
3542 | 3533 | ||
3543 | err = btrfs_insert_file_extent(trans, root, | 3534 | err = btrfs_insert_file_extent(trans, root, |
3544 | inode->i_ino, cur_offset, 0, | 3535 | btrfs_ino(inode), cur_offset, 0, |
3545 | 0, hole_size, 0, hole_size, | 3536 | 0, hole_size, 0, hole_size, |
3546 | 0, 0, 0); | 3537 | 0, 0, 0); |
3547 | BUG_ON(err); | 3538 | if (err) |
3539 | break; | ||
3548 | 3540 | ||
3549 | btrfs_drop_extent_cache(inode, hole_start, | 3541 | btrfs_drop_extent_cache(inode, hole_start, |
3550 | last_byte - 1, 0); | 3542 | last_byte - 1, 0); |
@@ -3564,94 +3556,58 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3564 | return err; | 3556 | return err; |
3565 | } | 3557 | } |
3566 | 3558 | ||
3567 | static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | 3559 | static int btrfs_setsize(struct inode *inode, loff_t newsize) |
3568 | { | 3560 | { |
3569 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3561 | loff_t oldsize = i_size_read(inode); |
3570 | struct btrfs_trans_handle *trans; | ||
3571 | unsigned long nr; | ||
3572 | int ret; | 3562 | int ret; |
3573 | 3563 | ||
3574 | if (attr->ia_size == inode->i_size) | 3564 | if (newsize == oldsize) |
3575 | return 0; | 3565 | return 0; |
3576 | 3566 | ||
3577 | if (attr->ia_size > inode->i_size) { | 3567 | if (newsize > oldsize) { |
3578 | unsigned long limit; | 3568 | i_size_write(inode, newsize); |
3579 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | 3569 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
3580 | if (attr->ia_size > inode->i_sb->s_maxbytes) | 3570 | truncate_pagecache(inode, oldsize, newsize); |
3581 | return -EFBIG; | 3571 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
3582 | if (limit != RLIM_INFINITY && attr->ia_size > limit) { | ||
3583 | send_sig(SIGXFSZ, current, 0); | ||
3584 | return -EFBIG; | ||
3585 | } | ||
3586 | } | ||
3587 | |||
3588 | trans = btrfs_start_transaction(root, 5); | ||
3589 | if (IS_ERR(trans)) | ||
3590 | return PTR_ERR(trans); | ||
3591 | |||
3592 | btrfs_set_trans_block_group(trans, inode); | ||
3593 | |||
3594 | ret = btrfs_orphan_add(trans, inode); | ||
3595 | BUG_ON(ret); | ||
3596 | |||
3597 | nr = trans->blocks_used; | ||
3598 | btrfs_end_transaction(trans, root); | ||
3599 | btrfs_btree_balance_dirty(root, nr); | ||
3600 | |||
3601 | if (attr->ia_size > inode->i_size) { | ||
3602 | ret = btrfs_cont_expand(inode, attr->ia_size); | ||
3603 | if (ret) { | 3572 | if (ret) { |
3604 | btrfs_truncate(inode); | 3573 | btrfs_setsize(inode, oldsize); |
3605 | return ret; | 3574 | return ret; |
3606 | } | 3575 | } |
3607 | 3576 | ||
3608 | i_size_write(inode, attr->ia_size); | 3577 | mark_inode_dirty(inode); |
3609 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3578 | } else { |
3610 | 3579 | ||
3611 | trans = btrfs_start_transaction(root, 0); | 3580 | /* |
3612 | BUG_ON(IS_ERR(trans)); | 3581 | * We're truncating a file that used to have good data down to |
3613 | btrfs_set_trans_block_group(trans, inode); | 3582 | * zero. Make sure it gets into the ordered flush list so that |
3614 | trans->block_rsv = root->orphan_block_rsv; | 3583 | * any new writes get down to disk quickly. |
3615 | BUG_ON(!trans->block_rsv); | 3584 | */ |
3585 | if (newsize == 0) | ||
3586 | BTRFS_I(inode)->ordered_data_close = 1; | ||
3616 | 3587 | ||
3617 | ret = btrfs_update_inode(trans, root, inode); | 3588 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
3618 | BUG_ON(ret); | 3589 | truncate_setsize(inode, newsize); |
3619 | if (inode->i_nlink > 0) { | 3590 | ret = btrfs_truncate(inode); |
3620 | ret = btrfs_orphan_del(trans, inode); | ||
3621 | BUG_ON(ret); | ||
3622 | } | ||
3623 | nr = trans->blocks_used; | ||
3624 | btrfs_end_transaction(trans, root); | ||
3625 | btrfs_btree_balance_dirty(root, nr); | ||
3626 | return 0; | ||
3627 | } | 3591 | } |
3628 | 3592 | ||
3629 | /* | 3593 | return ret; |
3630 | * We're truncating a file that used to have good data down to | ||
3631 | * zero. Make sure it gets into the ordered flush list so that | ||
3632 | * any new writes get down to disk quickly. | ||
3633 | */ | ||
3634 | if (attr->ia_size == 0) | ||
3635 | BTRFS_I(inode)->ordered_data_close = 1; | ||
3636 | |||
3637 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | ||
3638 | ret = vmtruncate(inode, attr->ia_size); | ||
3639 | BUG_ON(ret); | ||
3640 | |||
3641 | return 0; | ||
3642 | } | 3594 | } |
3643 | 3595 | ||
3644 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3596 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
3645 | { | 3597 | { |
3646 | struct inode *inode = dentry->d_inode; | 3598 | struct inode *inode = dentry->d_inode; |
3599 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3647 | int err; | 3600 | int err; |
3648 | 3601 | ||
3602 | if (btrfs_root_readonly(root)) | ||
3603 | return -EROFS; | ||
3604 | |||
3649 | err = inode_change_ok(inode, attr); | 3605 | err = inode_change_ok(inode, attr); |
3650 | if (err) | 3606 | if (err) |
3651 | return err; | 3607 | return err; |
3652 | 3608 | ||
3653 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { | 3609 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
3654 | err = btrfs_setattr_size(inode, attr); | 3610 | err = btrfs_setsize(inode, attr->ia_size); |
3655 | if (err) | 3611 | if (err) |
3656 | return err; | 3612 | return err; |
3657 | } | 3613 | } |
@@ -3674,8 +3630,11 @@ void btrfs_evict_inode(struct inode *inode) | |||
3674 | unsigned long nr; | 3630 | unsigned long nr; |
3675 | int ret; | 3631 | int ret; |
3676 | 3632 | ||
3633 | trace_btrfs_inode_evict(inode); | ||
3634 | |||
3677 | truncate_inode_pages(&inode->i_data, 0); | 3635 | truncate_inode_pages(&inode->i_data, 0); |
3678 | if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) | 3636 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3637 | is_free_space_inode(root, inode))) | ||
3679 | goto no_delete; | 3638 | goto no_delete; |
3680 | 3639 | ||
3681 | if (is_bad_inode(inode)) { | 3640 | if (is_bad_inode(inode)) { |
@@ -3698,9 +3657,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3698 | btrfs_i_size_write(inode, 0); | 3657 | btrfs_i_size_write(inode, 0); |
3699 | 3658 | ||
3700 | while (1) { | 3659 | while (1) { |
3701 | trans = btrfs_start_transaction(root, 0); | 3660 | trans = btrfs_join_transaction(root); |
3702 | BUG_ON(IS_ERR(trans)); | 3661 | BUG_ON(IS_ERR(trans)); |
3703 | btrfs_set_trans_block_group(trans, inode); | ||
3704 | trans->block_rsv = root->orphan_block_rsv; | 3662 | trans->block_rsv = root->orphan_block_rsv; |
3705 | 3663 | ||
3706 | ret = btrfs_block_rsv_check(trans, root, | 3664 | ret = btrfs_block_rsv_check(trans, root, |
@@ -3728,6 +3686,10 @@ void btrfs_evict_inode(struct inode *inode) | |||
3728 | BUG_ON(ret); | 3686 | BUG_ON(ret); |
3729 | } | 3687 | } |
3730 | 3688 | ||
3689 | if (!(root == root->fs_info->tree_root || | ||
3690 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) | ||
3691 | btrfs_return_ino(root, btrfs_ino(inode)); | ||
3692 | |||
3731 | nr = trans->blocks_used; | 3693 | nr = trans->blocks_used; |
3732 | btrfs_end_transaction(trans, root); | 3694 | btrfs_end_transaction(trans, root); |
3733 | btrfs_btree_balance_dirty(root, nr); | 3695 | btrfs_btree_balance_dirty(root, nr); |
@@ -3753,12 +3715,12 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, | |||
3753 | path = btrfs_alloc_path(); | 3715 | path = btrfs_alloc_path(); |
3754 | BUG_ON(!path); | 3716 | BUG_ON(!path); |
3755 | 3717 | ||
3756 | di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, | 3718 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, |
3757 | namelen, 0); | 3719 | namelen, 0); |
3758 | if (IS_ERR(di)) | 3720 | if (IS_ERR(di)) |
3759 | ret = PTR_ERR(di); | 3721 | ret = PTR_ERR(di); |
3760 | 3722 | ||
3761 | if (!di || IS_ERR(di)) | 3723 | if (IS_ERR_OR_NULL(di)) |
3762 | goto out_err; | 3724 | goto out_err; |
3763 | 3725 | ||
3764 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); | 3726 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); |
@@ -3806,7 +3768,7 @@ static int fixup_tree_root_location(struct btrfs_root *root, | |||
3806 | 3768 | ||
3807 | leaf = path->nodes[0]; | 3769 | leaf = path->nodes[0]; |
3808 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); | 3770 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); |
3809 | if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || | 3771 | if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) || |
3810 | btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) | 3772 | btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) |
3811 | goto out; | 3773 | goto out; |
3812 | 3774 | ||
@@ -3816,7 +3778,7 @@ static int fixup_tree_root_location(struct btrfs_root *root, | |||
3816 | if (ret) | 3778 | if (ret) |
3817 | goto out; | 3779 | goto out; |
3818 | 3780 | ||
3819 | btrfs_release_path(root->fs_info->tree_root, path); | 3781 | btrfs_release_path(path); |
3820 | 3782 | ||
3821 | new_root = btrfs_read_fs_root_no_name(root->fs_info, location); | 3783 | new_root = btrfs_read_fs_root_no_name(root->fs_info, location); |
3822 | if (IS_ERR(new_root)) { | 3784 | if (IS_ERR(new_root)) { |
@@ -3845,11 +3807,12 @@ static void inode_tree_add(struct inode *inode) | |||
3845 | struct btrfs_inode *entry; | 3807 | struct btrfs_inode *entry; |
3846 | struct rb_node **p; | 3808 | struct rb_node **p; |
3847 | struct rb_node *parent; | 3809 | struct rb_node *parent; |
3810 | u64 ino = btrfs_ino(inode); | ||
3848 | again: | 3811 | again: |
3849 | p = &root->inode_tree.rb_node; | 3812 | p = &root->inode_tree.rb_node; |
3850 | parent = NULL; | 3813 | parent = NULL; |
3851 | 3814 | ||
3852 | if (hlist_unhashed(&inode->i_hash)) | 3815 | if (inode_unhashed(inode)) |
3853 | return; | 3816 | return; |
3854 | 3817 | ||
3855 | spin_lock(&root->inode_lock); | 3818 | spin_lock(&root->inode_lock); |
@@ -3857,9 +3820,9 @@ again: | |||
3857 | parent = *p; | 3820 | parent = *p; |
3858 | entry = rb_entry(parent, struct btrfs_inode, rb_node); | 3821 | entry = rb_entry(parent, struct btrfs_inode, rb_node); |
3859 | 3822 | ||
3860 | if (inode->i_ino < entry->vfs_inode.i_ino) | 3823 | if (ino < btrfs_ino(&entry->vfs_inode)) |
3861 | p = &parent->rb_left; | 3824 | p = &parent->rb_left; |
3862 | else if (inode->i_ino > entry->vfs_inode.i_ino) | 3825 | else if (ino > btrfs_ino(&entry->vfs_inode)) |
3863 | p = &parent->rb_right; | 3826 | p = &parent->rb_right; |
3864 | else { | 3827 | else { |
3865 | WARN_ON(!(entry->vfs_inode.i_state & | 3828 | WARN_ON(!(entry->vfs_inode.i_state & |
@@ -3888,7 +3851,14 @@ static void inode_tree_del(struct inode *inode) | |||
3888 | } | 3851 | } |
3889 | spin_unlock(&root->inode_lock); | 3852 | spin_unlock(&root->inode_lock); |
3890 | 3853 | ||
3891 | if (empty && btrfs_root_refs(&root->root_item) == 0) { | 3854 | /* |
3855 | * Free space cache has inodes in the tree root, but the tree root has a | ||
3856 | * root_refs of 0, so this could end up dropping the tree root as a | ||
3857 | * snapshot, so we need the extra !root->fs_info->tree_root check to | ||
3858 | * make sure we don't drop it. | ||
3859 | */ | ||
3860 | if (empty && btrfs_root_refs(&root->root_item) == 0 && | ||
3861 | root != root->fs_info->tree_root) { | ||
3892 | synchronize_srcu(&root->fs_info->subvol_srcu); | 3862 | synchronize_srcu(&root->fs_info->subvol_srcu); |
3893 | spin_lock(&root->inode_lock); | 3863 | spin_lock(&root->inode_lock); |
3894 | empty = RB_EMPTY_ROOT(&root->inode_tree); | 3864 | empty = RB_EMPTY_ROOT(&root->inode_tree); |
@@ -3916,9 +3886,9 @@ again: | |||
3916 | prev = node; | 3886 | prev = node; |
3917 | entry = rb_entry(node, struct btrfs_inode, rb_node); | 3887 | entry = rb_entry(node, struct btrfs_inode, rb_node); |
3918 | 3888 | ||
3919 | if (objectid < entry->vfs_inode.i_ino) | 3889 | if (objectid < btrfs_ino(&entry->vfs_inode)) |
3920 | node = node->rb_left; | 3890 | node = node->rb_left; |
3921 | else if (objectid > entry->vfs_inode.i_ino) | 3891 | else if (objectid > btrfs_ino(&entry->vfs_inode)) |
3922 | node = node->rb_right; | 3892 | node = node->rb_right; |
3923 | else | 3893 | else |
3924 | break; | 3894 | break; |
@@ -3926,7 +3896,7 @@ again: | |||
3926 | if (!node) { | 3896 | if (!node) { |
3927 | while (prev) { | 3897 | while (prev) { |
3928 | entry = rb_entry(prev, struct btrfs_inode, rb_node); | 3898 | entry = rb_entry(prev, struct btrfs_inode, rb_node); |
3929 | if (objectid <= entry->vfs_inode.i_ino) { | 3899 | if (objectid <= btrfs_ino(&entry->vfs_inode)) { |
3930 | node = prev; | 3900 | node = prev; |
3931 | break; | 3901 | break; |
3932 | } | 3902 | } |
@@ -3935,7 +3905,7 @@ again: | |||
3935 | } | 3905 | } |
3936 | while (node) { | 3906 | while (node) { |
3937 | entry = rb_entry(node, struct btrfs_inode, rb_node); | 3907 | entry = rb_entry(node, struct btrfs_inode, rb_node); |
3938 | objectid = entry->vfs_inode.i_ino + 1; | 3908 | objectid = btrfs_ino(&entry->vfs_inode) + 1; |
3939 | inode = igrab(&entry->vfs_inode); | 3909 | inode = igrab(&entry->vfs_inode); |
3940 | if (inode) { | 3910 | if (inode) { |
3941 | spin_unlock(&root->inode_lock); | 3911 | spin_unlock(&root->inode_lock); |
@@ -3973,7 +3943,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) | |||
3973 | static int btrfs_find_actor(struct inode *inode, void *opaque) | 3943 | static int btrfs_find_actor(struct inode *inode, void *opaque) |
3974 | { | 3944 | { |
3975 | struct btrfs_iget_args *args = opaque; | 3945 | struct btrfs_iget_args *args = opaque; |
3976 | return args->ino == inode->i_ino && | 3946 | return args->ino == btrfs_ino(inode) && |
3977 | args->root == BTRFS_I(inode)->root; | 3947 | args->root == BTRFS_I(inode)->root; |
3978 | } | 3948 | } |
3979 | 3949 | ||
@@ -4008,7 +3978,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
4008 | BTRFS_I(inode)->root = root; | 3978 | BTRFS_I(inode)->root = root; |
4009 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | 3979 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); |
4010 | btrfs_read_locked_inode(inode); | 3980 | btrfs_read_locked_inode(inode); |
4011 | |||
4012 | inode_tree_add(inode); | 3981 | inode_tree_add(inode); |
4013 | unlock_new_inode(inode); | 3982 | unlock_new_inode(inode); |
4014 | if (new) | 3983 | if (new) |
@@ -4049,8 +4018,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4049 | int index; | 4018 | int index; |
4050 | int ret; | 4019 | int ret; |
4051 | 4020 | ||
4052 | dentry->d_op = &btrfs_dentry_operations; | ||
4053 | |||
4054 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 4021 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
4055 | return ERR_PTR(-ENAMETOOLONG); | 4022 | return ERR_PTR(-ENAMETOOLONG); |
4056 | 4023 | ||
@@ -4082,17 +4049,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4082 | } | 4049 | } |
4083 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | 4050 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); |
4084 | 4051 | ||
4085 | if (root != sub_root) { | 4052 | if (!IS_ERR(inode) && root != sub_root) { |
4086 | down_read(&root->fs_info->cleanup_work_sem); | 4053 | down_read(&root->fs_info->cleanup_work_sem); |
4087 | if (!(inode->i_sb->s_flags & MS_RDONLY)) | 4054 | if (!(inode->i_sb->s_flags & MS_RDONLY)) |
4088 | btrfs_orphan_cleanup(sub_root); | 4055 | ret = btrfs_orphan_cleanup(sub_root); |
4089 | up_read(&root->fs_info->cleanup_work_sem); | 4056 | up_read(&root->fs_info->cleanup_work_sem); |
4057 | if (ret) | ||
4058 | inode = ERR_PTR(ret); | ||
4090 | } | 4059 | } |
4091 | 4060 | ||
4092 | return inode; | 4061 | return inode; |
4093 | } | 4062 | } |
4094 | 4063 | ||
4095 | static int btrfs_dentry_delete(struct dentry *dentry) | 4064 | static int btrfs_dentry_delete(const struct dentry *dentry) |
4096 | { | 4065 | { |
4097 | struct btrfs_root *root; | 4066 | struct btrfs_root *root; |
4098 | 4067 | ||
@@ -4119,7 +4088,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | |||
4119 | return d_splice_alias(inode, dentry); | 4088 | return d_splice_alias(inode, dentry); |
4120 | } | 4089 | } |
4121 | 4090 | ||
4122 | static unsigned char btrfs_filetype_table[] = { | 4091 | unsigned char btrfs_filetype_table[] = { |
4123 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 4092 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
4124 | }; | 4093 | }; |
4125 | 4094 | ||
@@ -4133,11 +4102,11 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4133 | struct btrfs_key key; | 4102 | struct btrfs_key key; |
4134 | struct btrfs_key found_key; | 4103 | struct btrfs_key found_key; |
4135 | struct btrfs_path *path; | 4104 | struct btrfs_path *path; |
4105 | struct list_head ins_list; | ||
4106 | struct list_head del_list; | ||
4136 | int ret; | 4107 | int ret; |
4137 | u32 nritems; | ||
4138 | struct extent_buffer *leaf; | 4108 | struct extent_buffer *leaf; |
4139 | int slot; | 4109 | int slot; |
4140 | int advance; | ||
4141 | unsigned char d_type; | 4110 | unsigned char d_type; |
4142 | int over = 0; | 4111 | int over = 0; |
4143 | u32 di_cur; | 4112 | u32 di_cur; |
@@ -4147,6 +4116,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4147 | char tmp_name[32]; | 4116 | char tmp_name[32]; |
4148 | char *name_ptr; | 4117 | char *name_ptr; |
4149 | int name_len; | 4118 | int name_len; |
4119 | int is_curr = 0; /* filp->f_pos points to the current index? */ | ||
4150 | 4120 | ||
4151 | /* FIXME, use a real flag for deciding about the key type */ | 4121 | /* FIXME, use a real flag for deciding about the key type */ |
4152 | if (root->fs_info->tree_root == root) | 4122 | if (root->fs_info->tree_root == root) |
@@ -4154,9 +4124,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4154 | 4124 | ||
4155 | /* special case for "." */ | 4125 | /* special case for "." */ |
4156 | if (filp->f_pos == 0) { | 4126 | if (filp->f_pos == 0) { |
4157 | over = filldir(dirent, ".", 1, | 4127 | over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR); |
4158 | 1, inode->i_ino, | ||
4159 | DT_DIR); | ||
4160 | if (over) | 4128 | if (over) |
4161 | return 0; | 4129 | return 0; |
4162 | filp->f_pos = 1; | 4130 | filp->f_pos = 1; |
@@ -4171,36 +4139,37 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4171 | filp->f_pos = 2; | 4139 | filp->f_pos = 2; |
4172 | } | 4140 | } |
4173 | path = btrfs_alloc_path(); | 4141 | path = btrfs_alloc_path(); |
4174 | path->reada = 2; | 4142 | if (!path) |
4143 | return -ENOMEM; | ||
4144 | |||
4145 | path->reada = 1; | ||
4146 | |||
4147 | if (key_type == BTRFS_DIR_INDEX_KEY) { | ||
4148 | INIT_LIST_HEAD(&ins_list); | ||
4149 | INIT_LIST_HEAD(&del_list); | ||
4150 | btrfs_get_delayed_items(inode, &ins_list, &del_list); | ||
4151 | } | ||
4175 | 4152 | ||
4176 | btrfs_set_key_type(&key, key_type); | 4153 | btrfs_set_key_type(&key, key_type); |
4177 | key.offset = filp->f_pos; | 4154 | key.offset = filp->f_pos; |
4178 | key.objectid = inode->i_ino; | 4155 | key.objectid = btrfs_ino(inode); |
4179 | 4156 | ||
4180 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4157 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4181 | if (ret < 0) | 4158 | if (ret < 0) |
4182 | goto err; | 4159 | goto err; |
4183 | advance = 0; | ||
4184 | 4160 | ||
4185 | while (1) { | 4161 | while (1) { |
4186 | leaf = path->nodes[0]; | 4162 | leaf = path->nodes[0]; |
4187 | nritems = btrfs_header_nritems(leaf); | ||
4188 | slot = path->slots[0]; | 4163 | slot = path->slots[0]; |
4189 | if (advance || slot >= nritems) { | 4164 | if (slot >= btrfs_header_nritems(leaf)) { |
4190 | if (slot >= nritems - 1) { | 4165 | ret = btrfs_next_leaf(root, path); |
4191 | ret = btrfs_next_leaf(root, path); | 4166 | if (ret < 0) |
4192 | if (ret) | 4167 | goto err; |
4193 | break; | 4168 | else if (ret > 0) |
4194 | leaf = path->nodes[0]; | 4169 | break; |
4195 | nritems = btrfs_header_nritems(leaf); | 4170 | continue; |
4196 | slot = path->slots[0]; | ||
4197 | } else { | ||
4198 | slot++; | ||
4199 | path->slots[0]++; | ||
4200 | } | ||
4201 | } | 4171 | } |
4202 | 4172 | ||
4203 | advance = 1; | ||
4204 | item = btrfs_item_nr(leaf, slot); | 4173 | item = btrfs_item_nr(leaf, slot); |
4205 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 4174 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
4206 | 4175 | ||
@@ -4209,9 +4178,14 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4209 | if (btrfs_key_type(&found_key) != key_type) | 4178 | if (btrfs_key_type(&found_key) != key_type) |
4210 | break; | 4179 | break; |
4211 | if (found_key.offset < filp->f_pos) | 4180 | if (found_key.offset < filp->f_pos) |
4212 | continue; | 4181 | goto next; |
4182 | if (key_type == BTRFS_DIR_INDEX_KEY && | ||
4183 | btrfs_should_delete_dir_index(&del_list, | ||
4184 | found_key.offset)) | ||
4185 | goto next; | ||
4213 | 4186 | ||
4214 | filp->f_pos = found_key.offset; | 4187 | filp->f_pos = found_key.offset; |
4188 | is_curr = 1; | ||
4215 | 4189 | ||
4216 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 4190 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
4217 | di_cur = 0; | 4191 | di_cur = 0; |
@@ -4220,6 +4194,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4220 | while (di_cur < di_total) { | 4194 | while (di_cur < di_total) { |
4221 | struct btrfs_key location; | 4195 | struct btrfs_key location; |
4222 | 4196 | ||
4197 | if (verify_dir_item(root, leaf, di)) | ||
4198 | break; | ||
4199 | |||
4223 | name_len = btrfs_dir_name_len(leaf, di); | 4200 | name_len = btrfs_dir_name_len(leaf, di); |
4224 | if (name_len <= sizeof(tmp_name)) { | 4201 | if (name_len <= sizeof(tmp_name)) { |
4225 | name_ptr = tmp_name; | 4202 | name_ptr = tmp_name; |
@@ -4259,6 +4236,17 @@ skip: | |||
4259 | di_cur += di_len; | 4236 | di_cur += di_len; |
4260 | di = (struct btrfs_dir_item *)((char *)di + di_len); | 4237 | di = (struct btrfs_dir_item *)((char *)di + di_len); |
4261 | } | 4238 | } |
4239 | next: | ||
4240 | path->slots[0]++; | ||
4241 | } | ||
4242 | |||
4243 | if (key_type == BTRFS_DIR_INDEX_KEY) { | ||
4244 | if (is_curr) | ||
4245 | filp->f_pos++; | ||
4246 | ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir, | ||
4247 | &ins_list); | ||
4248 | if (ret) | ||
4249 | goto nopos; | ||
4262 | } | 4250 | } |
4263 | 4251 | ||
4264 | /* Reached end of directory/root. Bump pos past the last item. */ | 4252 | /* Reached end of directory/root. Bump pos past the last item. */ |
@@ -4273,6 +4261,8 @@ skip: | |||
4273 | nopos: | 4261 | nopos: |
4274 | ret = 0; | 4262 | ret = 0; |
4275 | err: | 4263 | err: |
4264 | if (key_type == BTRFS_DIR_INDEX_KEY) | ||
4265 | btrfs_put_delayed_items(&ins_list, &del_list); | ||
4276 | btrfs_free_path(path); | 4266 | btrfs_free_path(path); |
4277 | return ret; | 4267 | return ret; |
4278 | } | 4268 | } |
@@ -4282,14 +4272,25 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4282 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4272 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4283 | struct btrfs_trans_handle *trans; | 4273 | struct btrfs_trans_handle *trans; |
4284 | int ret = 0; | 4274 | int ret = 0; |
4275 | bool nolock = false; | ||
4285 | 4276 | ||
4286 | if (BTRFS_I(inode)->dummy_inode) | 4277 | if (BTRFS_I(inode)->dummy_inode) |
4287 | return 0; | 4278 | return 0; |
4288 | 4279 | ||
4280 | if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) | ||
4281 | nolock = true; | ||
4282 | |||
4289 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4283 | if (wbc->sync_mode == WB_SYNC_ALL) { |
4290 | trans = btrfs_join_transaction(root, 1); | 4284 | if (nolock) |
4291 | btrfs_set_trans_block_group(trans, inode); | 4285 | trans = btrfs_join_transaction_nolock(root); |
4292 | ret = btrfs_commit_transaction(trans, root); | 4286 | else |
4287 | trans = btrfs_join_transaction(root); | ||
4288 | if (IS_ERR(trans)) | ||
4289 | return PTR_ERR(trans); | ||
4290 | if (nolock) | ||
4291 | ret = btrfs_end_transaction_nolock(trans, root); | ||
4292 | else | ||
4293 | ret = btrfs_commit_transaction(trans, root); | ||
4293 | } | 4294 | } |
4294 | return ret; | 4295 | return ret; |
4295 | } | 4296 | } |
@@ -4300,7 +4301,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4300 | * FIXME, needs more benchmarking...there are no reasons other than performance | 4301 | * FIXME, needs more benchmarking...there are no reasons other than performance |
4301 | * to keep or drop this code. | 4302 | * to keep or drop this code. |
4302 | */ | 4303 | */ |
4303 | void btrfs_dirty_inode(struct inode *inode) | 4304 | void btrfs_dirty_inode(struct inode *inode, int flags) |
4304 | { | 4305 | { |
4305 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4306 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4306 | struct btrfs_trans_handle *trans; | 4307 | struct btrfs_trans_handle *trans; |
@@ -4309,8 +4310,8 @@ void btrfs_dirty_inode(struct inode *inode) | |||
4309 | if (BTRFS_I(inode)->dummy_inode) | 4310 | if (BTRFS_I(inode)->dummy_inode) |
4310 | return; | 4311 | return; |
4311 | 4312 | ||
4312 | trans = btrfs_join_transaction(root, 1); | 4313 | trans = btrfs_join_transaction(root); |
4313 | btrfs_set_trans_block_group(trans, inode); | 4314 | BUG_ON(IS_ERR(trans)); |
4314 | 4315 | ||
4315 | ret = btrfs_update_inode(trans, root, inode); | 4316 | ret = btrfs_update_inode(trans, root, inode); |
4316 | if (ret && ret == -ENOSPC) { | 4317 | if (ret && ret == -ENOSPC) { |
@@ -4318,25 +4319,24 @@ void btrfs_dirty_inode(struct inode *inode) | |||
4318 | btrfs_end_transaction(trans, root); | 4319 | btrfs_end_transaction(trans, root); |
4319 | trans = btrfs_start_transaction(root, 1); | 4320 | trans = btrfs_start_transaction(root, 1); |
4320 | if (IS_ERR(trans)) { | 4321 | if (IS_ERR(trans)) { |
4321 | if (printk_ratelimit()) { | 4322 | printk_ratelimited(KERN_ERR "btrfs: fail to " |
4322 | printk(KERN_ERR "btrfs: fail to " | 4323 | "dirty inode %llu error %ld\n", |
4323 | "dirty inode %lu error %ld\n", | 4324 | (unsigned long long)btrfs_ino(inode), |
4324 | inode->i_ino, PTR_ERR(trans)); | 4325 | PTR_ERR(trans)); |
4325 | } | ||
4326 | return; | 4326 | return; |
4327 | } | 4327 | } |
4328 | btrfs_set_trans_block_group(trans, inode); | ||
4329 | 4328 | ||
4330 | ret = btrfs_update_inode(trans, root, inode); | 4329 | ret = btrfs_update_inode(trans, root, inode); |
4331 | if (ret) { | 4330 | if (ret) { |
4332 | if (printk_ratelimit()) { | 4331 | printk_ratelimited(KERN_ERR "btrfs: fail to " |
4333 | printk(KERN_ERR "btrfs: fail to " | 4332 | "dirty inode %llu error %d\n", |
4334 | "dirty inode %lu error %d\n", | 4333 | (unsigned long long)btrfs_ino(inode), |
4335 | inode->i_ino, ret); | 4334 | ret); |
4336 | } | ||
4337 | } | 4335 | } |
4338 | } | 4336 | } |
4339 | btrfs_end_transaction(trans, root); | 4337 | btrfs_end_transaction(trans, root); |
4338 | if (BTRFS_I(inode)->delayed_node) | ||
4339 | btrfs_balance_delayed_items(root); | ||
4340 | } | 4340 | } |
4341 | 4341 | ||
4342 | /* | 4342 | /* |
@@ -4352,7 +4352,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) | |||
4352 | struct extent_buffer *leaf; | 4352 | struct extent_buffer *leaf; |
4353 | int ret; | 4353 | int ret; |
4354 | 4354 | ||
4355 | key.objectid = inode->i_ino; | 4355 | key.objectid = btrfs_ino(inode); |
4356 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | 4356 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); |
4357 | key.offset = (u64)-1; | 4357 | key.offset = (u64)-1; |
4358 | 4358 | ||
@@ -4384,7 +4384,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) | |||
4384 | leaf = path->nodes[0]; | 4384 | leaf = path->nodes[0]; |
4385 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 4385 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
4386 | 4386 | ||
4387 | if (found_key.objectid != inode->i_ino || | 4387 | if (found_key.objectid != btrfs_ino(inode) || |
4388 | btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { | 4388 | btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { |
4389 | BTRFS_I(inode)->index_cnt = 2; | 4389 | BTRFS_I(inode)->index_cnt = 2; |
4390 | goto out; | 4390 | goto out; |
@@ -4405,9 +4405,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) | |||
4405 | int ret = 0; | 4405 | int ret = 0; |
4406 | 4406 | ||
4407 | if (BTRFS_I(dir)->index_cnt == (u64)-1) { | 4407 | if (BTRFS_I(dir)->index_cnt == (u64)-1) { |
4408 | ret = btrfs_set_inode_index_count(dir); | 4408 | ret = btrfs_inode_delayed_dir_index_count(dir); |
4409 | if (ret) | 4409 | if (ret) { |
4410 | return ret; | 4410 | ret = btrfs_set_inode_index_count(dir); |
4411 | if (ret) | ||
4412 | return ret; | ||
4413 | } | ||
4411 | } | 4414 | } |
4412 | 4415 | ||
4413 | *index = BTRFS_I(dir)->index_cnt; | 4416 | *index = BTRFS_I(dir)->index_cnt; |
@@ -4420,8 +4423,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4420 | struct btrfs_root *root, | 4423 | struct btrfs_root *root, |
4421 | struct inode *dir, | 4424 | struct inode *dir, |
4422 | const char *name, int name_len, | 4425 | const char *name, int name_len, |
4423 | u64 ref_objectid, u64 objectid, | 4426 | u64 ref_objectid, u64 objectid, int mode, |
4424 | u64 alloc_hint, int mode, u64 *index) | 4427 | u64 *index) |
4425 | { | 4428 | { |
4426 | struct inode *inode; | 4429 | struct inode *inode; |
4427 | struct btrfs_inode_item *inode_item; | 4430 | struct btrfs_inode_item *inode_item; |
@@ -4438,12 +4441,23 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4438 | BUG_ON(!path); | 4441 | BUG_ON(!path); |
4439 | 4442 | ||
4440 | inode = new_inode(root->fs_info->sb); | 4443 | inode = new_inode(root->fs_info->sb); |
4441 | if (!inode) | 4444 | if (!inode) { |
4445 | btrfs_free_path(path); | ||
4442 | return ERR_PTR(-ENOMEM); | 4446 | return ERR_PTR(-ENOMEM); |
4447 | } | ||
4448 | |||
4449 | /* | ||
4450 | * we have to initialize this early, so we can reclaim the inode | ||
4451 | * number if we fail afterwards in this function. | ||
4452 | */ | ||
4453 | inode->i_ino = objectid; | ||
4443 | 4454 | ||
4444 | if (dir) { | 4455 | if (dir) { |
4456 | trace_btrfs_inode_request(dir); | ||
4457 | |||
4445 | ret = btrfs_set_inode_index(dir, index); | 4458 | ret = btrfs_set_inode_index(dir, index); |
4446 | if (ret) { | 4459 | if (ret) { |
4460 | btrfs_free_path(path); | ||
4447 | iput(inode); | 4461 | iput(inode); |
4448 | return ERR_PTR(ret); | 4462 | return ERR_PTR(ret); |
4449 | } | 4463 | } |
@@ -4456,14 +4470,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4456 | BTRFS_I(inode)->index_cnt = 2; | 4470 | BTRFS_I(inode)->index_cnt = 2; |
4457 | BTRFS_I(inode)->root = root; | 4471 | BTRFS_I(inode)->root = root; |
4458 | BTRFS_I(inode)->generation = trans->transid; | 4472 | BTRFS_I(inode)->generation = trans->transid; |
4473 | inode->i_generation = BTRFS_I(inode)->generation; | ||
4459 | btrfs_set_inode_space_info(root, inode); | 4474 | btrfs_set_inode_space_info(root, inode); |
4460 | 4475 | ||
4461 | if (mode & S_IFDIR) | 4476 | if (mode & S_IFDIR) |
4462 | owner = 0; | 4477 | owner = 0; |
4463 | else | 4478 | else |
4464 | owner = 1; | 4479 | owner = 1; |
4465 | BTRFS_I(inode)->block_group = | ||
4466 | btrfs_find_block_group(root, 0, alloc_hint, owner); | ||
4467 | 4480 | ||
4468 | key[0].objectid = objectid; | 4481 | key[0].objectid = objectid; |
4469 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 4482 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |
@@ -4482,7 +4495,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4482 | goto fail; | 4495 | goto fail; |
4483 | 4496 | ||
4484 | inode_init_owner(inode, dir, mode); | 4497 | inode_init_owner(inode, dir, mode); |
4485 | inode->i_ino = objectid; | ||
4486 | inode_set_bytes(inode, 0); | 4498 | inode_set_bytes(inode, 0); |
4487 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 4499 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
4488 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 4500 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
@@ -4509,12 +4521,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4509 | if ((mode & S_IFREG)) { | 4521 | if ((mode & S_IFREG)) { |
4510 | if (btrfs_test_opt(root, NODATASUM)) | 4522 | if (btrfs_test_opt(root, NODATASUM)) |
4511 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4523 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
4512 | if (btrfs_test_opt(root, NODATACOW)) | 4524 | if (btrfs_test_opt(root, NODATACOW) || |
4525 | (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) | ||
4513 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | 4526 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
4514 | } | 4527 | } |
4515 | 4528 | ||
4516 | insert_inode_hash(inode); | 4529 | insert_inode_hash(inode); |
4517 | inode_tree_add(inode); | 4530 | inode_tree_add(inode); |
4531 | |||
4532 | trace_btrfs_inode_new(inode); | ||
4533 | btrfs_set_inode_last_trans(trans, inode); | ||
4534 | |||
4518 | return inode; | 4535 | return inode; |
4519 | fail: | 4536 | fail: |
4520 | if (dir) | 4537 | if (dir) |
@@ -4542,29 +4559,29 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
4542 | int ret = 0; | 4559 | int ret = 0; |
4543 | struct btrfs_key key; | 4560 | struct btrfs_key key; |
4544 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; | 4561 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; |
4562 | u64 ino = btrfs_ino(inode); | ||
4563 | u64 parent_ino = btrfs_ino(parent_inode); | ||
4545 | 4564 | ||
4546 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | 4565 | if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { |
4547 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); | 4566 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); |
4548 | } else { | 4567 | } else { |
4549 | key.objectid = inode->i_ino; | 4568 | key.objectid = ino; |
4550 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 4569 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
4551 | key.offset = 0; | 4570 | key.offset = 0; |
4552 | } | 4571 | } |
4553 | 4572 | ||
4554 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | 4573 | if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { |
4555 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | 4574 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, |
4556 | key.objectid, root->root_key.objectid, | 4575 | key.objectid, root->root_key.objectid, |
4557 | parent_inode->i_ino, | 4576 | parent_ino, index, name, name_len); |
4558 | index, name, name_len); | ||
4559 | } else if (add_backref) { | 4577 | } else if (add_backref) { |
4560 | ret = btrfs_insert_inode_ref(trans, root, | 4578 | ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino, |
4561 | name, name_len, inode->i_ino, | 4579 | parent_ino, index); |
4562 | parent_inode->i_ino, index); | ||
4563 | } | 4580 | } |
4564 | 4581 | ||
4565 | if (ret == 0) { | 4582 | if (ret == 0) { |
4566 | ret = btrfs_insert_dir_item(trans, root, name, name_len, | 4583 | ret = btrfs_insert_dir_item(trans, root, name, name_len, |
4567 | parent_inode->i_ino, &key, | 4584 | parent_inode, &key, |
4568 | btrfs_inode_type(inode), index); | 4585 | btrfs_inode_type(inode), index); |
4569 | BUG_ON(ret); | 4586 | BUG_ON(ret); |
4570 | 4587 | ||
@@ -4577,12 +4594,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
4577 | } | 4594 | } |
4578 | 4595 | ||
4579 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, | 4596 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, |
4580 | struct dentry *dentry, struct inode *inode, | 4597 | struct inode *dir, struct dentry *dentry, |
4581 | int backref, u64 index) | 4598 | struct inode *inode, int backref, u64 index) |
4582 | { | 4599 | { |
4583 | int err = btrfs_add_link(trans, dentry->d_parent->d_inode, | 4600 | int err = btrfs_add_link(trans, dir, inode, |
4584 | inode, dentry->d_name.name, | 4601 | dentry->d_name.name, dentry->d_name.len, |
4585 | dentry->d_name.len, backref, index); | 4602 | backref, index); |
4586 | if (!err) { | 4603 | if (!err) { |
4587 | d_instantiate(dentry, inode); | 4604 | d_instantiate(dentry, inode); |
4588 | return 0; | 4605 | return 0; |
@@ -4607,10 +4624,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4607 | if (!new_valid_dev(rdev)) | 4624 | if (!new_valid_dev(rdev)) |
4608 | return -EINVAL; | 4625 | return -EINVAL; |
4609 | 4626 | ||
4610 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4611 | if (err) | ||
4612 | return err; | ||
4613 | |||
4614 | /* | 4627 | /* |
4615 | * 2 for inode item and ref | 4628 | * 2 for inode item and ref |
4616 | * 2 for dir items | 4629 | * 2 for dir items |
@@ -4620,24 +4633,25 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4620 | if (IS_ERR(trans)) | 4633 | if (IS_ERR(trans)) |
4621 | return PTR_ERR(trans); | 4634 | return PTR_ERR(trans); |
4622 | 4635 | ||
4623 | btrfs_set_trans_block_group(trans, dir); | 4636 | err = btrfs_find_free_ino(root, &objectid); |
4637 | if (err) | ||
4638 | goto out_unlock; | ||
4624 | 4639 | ||
4625 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4640 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4626 | dentry->d_name.len, | 4641 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4627 | dentry->d_parent->d_inode->i_ino, objectid, | 4642 | mode, &index); |
4628 | BTRFS_I(dir)->block_group, mode, &index); | 4643 | if (IS_ERR(inode)) { |
4629 | err = PTR_ERR(inode); | 4644 | err = PTR_ERR(inode); |
4630 | if (IS_ERR(inode)) | ||
4631 | goto out_unlock; | 4645 | goto out_unlock; |
4646 | } | ||
4632 | 4647 | ||
4633 | err = btrfs_init_inode_security(trans, inode, dir); | 4648 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
4634 | if (err) { | 4649 | if (err) { |
4635 | drop_inode = 1; | 4650 | drop_inode = 1; |
4636 | goto out_unlock; | 4651 | goto out_unlock; |
4637 | } | 4652 | } |
4638 | 4653 | ||
4639 | btrfs_set_trans_block_group(trans, inode); | 4654 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4640 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | ||
4641 | if (err) | 4655 | if (err) |
4642 | drop_inode = 1; | 4656 | drop_inode = 1; |
4643 | else { | 4657 | else { |
@@ -4645,8 +4659,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4645 | init_special_inode(inode, inode->i_mode, rdev); | 4659 | init_special_inode(inode, inode->i_mode, rdev); |
4646 | btrfs_update_inode(trans, root, inode); | 4660 | btrfs_update_inode(trans, root, inode); |
4647 | } | 4661 | } |
4648 | btrfs_update_inode_block_group(trans, inode); | ||
4649 | btrfs_update_inode_block_group(trans, dir); | ||
4650 | out_unlock: | 4662 | out_unlock: |
4651 | nr = trans->blocks_used; | 4663 | nr = trans->blocks_used; |
4652 | btrfs_end_transaction_throttle(trans, root); | 4664 | btrfs_end_transaction_throttle(trans, root); |
@@ -4670,9 +4682,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4670 | u64 objectid; | 4682 | u64 objectid; |
4671 | u64 index = 0; | 4683 | u64 index = 0; |
4672 | 4684 | ||
4673 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4674 | if (err) | ||
4675 | return err; | ||
4676 | /* | 4685 | /* |
4677 | * 2 for inode item and ref | 4686 | * 2 for inode item and ref |
4678 | * 2 for dir items | 4687 | * 2 for dir items |
@@ -4682,25 +4691,25 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4682 | if (IS_ERR(trans)) | 4691 | if (IS_ERR(trans)) |
4683 | return PTR_ERR(trans); | 4692 | return PTR_ERR(trans); |
4684 | 4693 | ||
4685 | btrfs_set_trans_block_group(trans, dir); | 4694 | err = btrfs_find_free_ino(root, &objectid); |
4695 | if (err) | ||
4696 | goto out_unlock; | ||
4686 | 4697 | ||
4687 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4698 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4688 | dentry->d_name.len, | 4699 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4689 | dentry->d_parent->d_inode->i_ino, | 4700 | mode, &index); |
4690 | objectid, BTRFS_I(dir)->block_group, mode, | 4701 | if (IS_ERR(inode)) { |
4691 | &index); | 4702 | err = PTR_ERR(inode); |
4692 | err = PTR_ERR(inode); | ||
4693 | if (IS_ERR(inode)) | ||
4694 | goto out_unlock; | 4703 | goto out_unlock; |
4704 | } | ||
4695 | 4705 | ||
4696 | err = btrfs_init_inode_security(trans, inode, dir); | 4706 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
4697 | if (err) { | 4707 | if (err) { |
4698 | drop_inode = 1; | 4708 | drop_inode = 1; |
4699 | goto out_unlock; | 4709 | goto out_unlock; |
4700 | } | 4710 | } |
4701 | 4711 | ||
4702 | btrfs_set_trans_block_group(trans, inode); | 4712 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4703 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | ||
4704 | if (err) | 4713 | if (err) |
4705 | drop_inode = 1; | 4714 | drop_inode = 1; |
4706 | else { | 4715 | else { |
@@ -4710,8 +4719,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4710 | inode->i_op = &btrfs_file_inode_operations; | 4719 | inode->i_op = &btrfs_file_inode_operations; |
4711 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 4720 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
4712 | } | 4721 | } |
4713 | btrfs_update_inode_block_group(trans, inode); | ||
4714 | btrfs_update_inode_block_group(trans, dir); | ||
4715 | out_unlock: | 4722 | out_unlock: |
4716 | nr = trans->blocks_used; | 4723 | nr = trans->blocks_used; |
4717 | btrfs_end_transaction_throttle(trans, root); | 4724 | btrfs_end_transaction_throttle(trans, root); |
@@ -4734,41 +4741,42 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4734 | int err; | 4741 | int err; |
4735 | int drop_inode = 0; | 4742 | int drop_inode = 0; |
4736 | 4743 | ||
4737 | if (inode->i_nlink == 0) | ||
4738 | return -ENOENT; | ||
4739 | |||
4740 | /* do not allow sys_link's with other subvols of the same device */ | 4744 | /* do not allow sys_link's with other subvols of the same device */ |
4741 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4745 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4742 | return -EPERM; | 4746 | return -EXDEV; |
4743 | 4747 | ||
4744 | btrfs_inc_nlink(inode); | 4748 | if (inode->i_nlink == ~0U) |
4749 | return -EMLINK; | ||
4745 | 4750 | ||
4746 | err = btrfs_set_inode_index(dir, &index); | 4751 | err = btrfs_set_inode_index(dir, &index); |
4747 | if (err) | 4752 | if (err) |
4748 | goto fail; | 4753 | goto fail; |
4749 | 4754 | ||
4750 | /* | 4755 | /* |
4751 | * 1 item for inode ref | 4756 | * 2 items for inode and inode ref |
4752 | * 2 items for dir items | 4757 | * 2 items for dir items |
4758 | * 1 item for parent inode | ||
4753 | */ | 4759 | */ |
4754 | trans = btrfs_start_transaction(root, 3); | 4760 | trans = btrfs_start_transaction(root, 5); |
4755 | if (IS_ERR(trans)) { | 4761 | if (IS_ERR(trans)) { |
4756 | err = PTR_ERR(trans); | 4762 | err = PTR_ERR(trans); |
4757 | goto fail; | 4763 | goto fail; |
4758 | } | 4764 | } |
4759 | 4765 | ||
4760 | btrfs_set_trans_block_group(trans, dir); | 4766 | btrfs_inc_nlink(inode); |
4761 | atomic_inc(&inode->i_count); | 4767 | inode->i_ctime = CURRENT_TIME; |
4768 | ihold(inode); | ||
4762 | 4769 | ||
4763 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4770 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); |
4764 | 4771 | ||
4765 | if (err) { | 4772 | if (err) { |
4766 | drop_inode = 1; | 4773 | drop_inode = 1; |
4767 | } else { | 4774 | } else { |
4768 | btrfs_update_inode_block_group(trans, dir); | 4775 | struct dentry *parent = dget_parent(dentry); |
4769 | err = btrfs_update_inode(trans, root, inode); | 4776 | err = btrfs_update_inode(trans, root, inode); |
4770 | BUG_ON(err); | 4777 | BUG_ON(err); |
4771 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | 4778 | btrfs_log_new_name(trans, inode, NULL, parent); |
4779 | dput(parent); | ||
4772 | } | 4780 | } |
4773 | 4781 | ||
4774 | nr = trans->blocks_used; | 4782 | nr = trans->blocks_used; |
@@ -4793,10 +4801,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4793 | u64 index = 0; | 4801 | u64 index = 0; |
4794 | unsigned long nr = 1; | 4802 | unsigned long nr = 1; |
4795 | 4803 | ||
4796 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4797 | if (err) | ||
4798 | return err; | ||
4799 | |||
4800 | /* | 4804 | /* |
4801 | * 2 items for inode and ref | 4805 | * 2 items for inode and ref |
4802 | * 2 items for dir items | 4806 | * 2 items for dir items |
@@ -4805,13 +4809,14 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4805 | trans = btrfs_start_transaction(root, 5); | 4809 | trans = btrfs_start_transaction(root, 5); |
4806 | if (IS_ERR(trans)) | 4810 | if (IS_ERR(trans)) |
4807 | return PTR_ERR(trans); | 4811 | return PTR_ERR(trans); |
4808 | btrfs_set_trans_block_group(trans, dir); | 4812 | |
4813 | err = btrfs_find_free_ino(root, &objectid); | ||
4814 | if (err) | ||
4815 | goto out_fail; | ||
4809 | 4816 | ||
4810 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4817 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4811 | dentry->d_name.len, | 4818 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4812 | dentry->d_parent->d_inode->i_ino, objectid, | 4819 | S_IFDIR | mode, &index); |
4813 | BTRFS_I(dir)->block_group, S_IFDIR | mode, | ||
4814 | &index); | ||
4815 | if (IS_ERR(inode)) { | 4820 | if (IS_ERR(inode)) { |
4816 | err = PTR_ERR(inode); | 4821 | err = PTR_ERR(inode); |
4817 | goto out_fail; | 4822 | goto out_fail; |
@@ -4819,29 +4824,25 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4819 | 4824 | ||
4820 | drop_on_err = 1; | 4825 | drop_on_err = 1; |
4821 | 4826 | ||
4822 | err = btrfs_init_inode_security(trans, inode, dir); | 4827 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
4823 | if (err) | 4828 | if (err) |
4824 | goto out_fail; | 4829 | goto out_fail; |
4825 | 4830 | ||
4826 | inode->i_op = &btrfs_dir_inode_operations; | 4831 | inode->i_op = &btrfs_dir_inode_operations; |
4827 | inode->i_fop = &btrfs_dir_file_operations; | 4832 | inode->i_fop = &btrfs_dir_file_operations; |
4828 | btrfs_set_trans_block_group(trans, inode); | ||
4829 | 4833 | ||
4830 | btrfs_i_size_write(inode, 0); | 4834 | btrfs_i_size_write(inode, 0); |
4831 | err = btrfs_update_inode(trans, root, inode); | 4835 | err = btrfs_update_inode(trans, root, inode); |
4832 | if (err) | 4836 | if (err) |
4833 | goto out_fail; | 4837 | goto out_fail; |
4834 | 4838 | ||
4835 | err = btrfs_add_link(trans, dentry->d_parent->d_inode, | 4839 | err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, |
4836 | inode, dentry->d_name.name, | 4840 | dentry->d_name.len, 0, index); |
4837 | dentry->d_name.len, 0, index); | ||
4838 | if (err) | 4841 | if (err) |
4839 | goto out_fail; | 4842 | goto out_fail; |
4840 | 4843 | ||
4841 | d_instantiate(dentry, inode); | 4844 | d_instantiate(dentry, inode); |
4842 | drop_on_err = 0; | 4845 | drop_on_err = 0; |
4843 | btrfs_update_inode_block_group(trans, inode); | ||
4844 | btrfs_update_inode_block_group(trans, dir); | ||
4845 | 4846 | ||
4846 | out_fail: | 4847 | out_fail: |
4847 | nr = trans->blocks_used; | 4848 | nr = trans->blocks_used; |
@@ -4886,19 +4887,23 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4886 | size_t max_size; | 4887 | size_t max_size; |
4887 | unsigned long inline_size; | 4888 | unsigned long inline_size; |
4888 | unsigned long ptr; | 4889 | unsigned long ptr; |
4890 | int compress_type; | ||
4889 | 4891 | ||
4890 | WARN_ON(pg_offset != 0); | 4892 | WARN_ON(pg_offset != 0); |
4893 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
4891 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | 4894 | max_size = btrfs_file_extent_ram_bytes(leaf, item); |
4892 | inline_size = btrfs_file_extent_inline_item_len(leaf, | 4895 | inline_size = btrfs_file_extent_inline_item_len(leaf, |
4893 | btrfs_item_nr(leaf, path->slots[0])); | 4896 | btrfs_item_nr(leaf, path->slots[0])); |
4894 | tmp = kmalloc(inline_size, GFP_NOFS); | 4897 | tmp = kmalloc(inline_size, GFP_NOFS); |
4898 | if (!tmp) | ||
4899 | return -ENOMEM; | ||
4895 | ptr = btrfs_file_extent_inline_start(item); | 4900 | ptr = btrfs_file_extent_inline_start(item); |
4896 | 4901 | ||
4897 | read_extent_buffer(leaf, tmp, ptr, inline_size); | 4902 | read_extent_buffer(leaf, tmp, ptr, inline_size); |
4898 | 4903 | ||
4899 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 4904 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
4900 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | 4905 | ret = btrfs_decompress(compress_type, tmp, page, |
4901 | inline_size, max_size); | 4906 | extent_offset, inline_size, max_size); |
4902 | if (ret) { | 4907 | if (ret) { |
4903 | char *kaddr = kmap_atomic(page, KM_USER0); | 4908 | char *kaddr = kmap_atomic(page, KM_USER0); |
4904 | unsigned long copy_size = min_t(u64, | 4909 | unsigned long copy_size = min_t(u64, |
@@ -4929,7 +4934,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4929 | u64 bytenr; | 4934 | u64 bytenr; |
4930 | u64 extent_start = 0; | 4935 | u64 extent_start = 0; |
4931 | u64 extent_end = 0; | 4936 | u64 extent_end = 0; |
4932 | u64 objectid = inode->i_ino; | 4937 | u64 objectid = btrfs_ino(inode); |
4933 | u32 found_type; | 4938 | u32 found_type; |
4934 | struct btrfs_path *path = NULL; | 4939 | struct btrfs_path *path = NULL; |
4935 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4940 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -4940,7 +4945,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4940 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 4945 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4941 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 4946 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
4942 | struct btrfs_trans_handle *trans = NULL; | 4947 | struct btrfs_trans_handle *trans = NULL; |
4943 | int compressed; | 4948 | int compress_type; |
4944 | 4949 | ||
4945 | again: | 4950 | again: |
4946 | read_lock(&em_tree->lock); | 4951 | read_lock(&em_tree->lock); |
@@ -4957,7 +4962,7 @@ again: | |||
4957 | else | 4962 | else |
4958 | goto out; | 4963 | goto out; |
4959 | } | 4964 | } |
4960 | em = alloc_extent_map(GFP_NOFS); | 4965 | em = alloc_extent_map(); |
4961 | if (!em) { | 4966 | if (!em) { |
4962 | err = -ENOMEM; | 4967 | err = -ENOMEM; |
4963 | goto out; | 4968 | goto out; |
@@ -4970,7 +4975,15 @@ again: | |||
4970 | 4975 | ||
4971 | if (!path) { | 4976 | if (!path) { |
4972 | path = btrfs_alloc_path(); | 4977 | path = btrfs_alloc_path(); |
4973 | BUG_ON(!path); | 4978 | if (!path) { |
4979 | err = -ENOMEM; | ||
4980 | goto out; | ||
4981 | } | ||
4982 | /* | ||
4983 | * Chances are we'll be called again, so go ahead and do | ||
4984 | * readahead | ||
4985 | */ | ||
4986 | path->reada = 1; | ||
4974 | } | 4987 | } |
4975 | 4988 | ||
4976 | ret = btrfs_lookup_file_extent(trans, root, path, | 4989 | ret = btrfs_lookup_file_extent(trans, root, path, |
@@ -4999,7 +5012,7 @@ again: | |||
4999 | 5012 | ||
5000 | found_type = btrfs_file_extent_type(leaf, item); | 5013 | found_type = btrfs_file_extent_type(leaf, item); |
5001 | extent_start = found_key.offset; | 5014 | extent_start = found_key.offset; |
5002 | compressed = btrfs_file_extent_compression(leaf, item); | 5015 | compress_type = btrfs_file_extent_compression(leaf, item); |
5003 | if (found_type == BTRFS_FILE_EXTENT_REG || | 5016 | if (found_type == BTRFS_FILE_EXTENT_REG || |
5004 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 5017 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
5005 | extent_end = extent_start + | 5018 | extent_end = extent_start + |
@@ -5045,8 +5058,9 @@ again: | |||
5045 | em->block_start = EXTENT_MAP_HOLE; | 5058 | em->block_start = EXTENT_MAP_HOLE; |
5046 | goto insert; | 5059 | goto insert; |
5047 | } | 5060 | } |
5048 | if (compressed) { | 5061 | if (compress_type != BTRFS_COMPRESS_NONE) { |
5049 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5062 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5063 | em->compress_type = compress_type; | ||
5050 | em->block_start = bytenr; | 5064 | em->block_start = bytenr; |
5051 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5065 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, |
5052 | item); | 5066 | item); |
@@ -5080,12 +5094,14 @@ again: | |||
5080 | em->len = (copy_size + root->sectorsize - 1) & | 5094 | em->len = (copy_size + root->sectorsize - 1) & |
5081 | ~((u64)root->sectorsize - 1); | 5095 | ~((u64)root->sectorsize - 1); |
5082 | em->orig_start = EXTENT_MAP_INLINE; | 5096 | em->orig_start = EXTENT_MAP_INLINE; |
5083 | if (compressed) | 5097 | if (compress_type) { |
5084 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5098 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5099 | em->compress_type = compress_type; | ||
5100 | } | ||
5085 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 5101 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
5086 | if (create == 0 && !PageUptodate(page)) { | 5102 | if (create == 0 && !PageUptodate(page)) { |
5087 | if (btrfs_file_extent_compression(leaf, item) == | 5103 | if (btrfs_file_extent_compression(leaf, item) != |
5088 | BTRFS_COMPRESS_ZLIB) { | 5104 | BTRFS_COMPRESS_NONE) { |
5089 | ret = uncompress_inline(path, inode, page, | 5105 | ret = uncompress_inline(path, inode, page, |
5090 | pg_offset, | 5106 | pg_offset, |
5091 | extent_offset, item); | 5107 | extent_offset, item); |
@@ -5108,8 +5124,12 @@ again: | |||
5108 | kunmap(page); | 5124 | kunmap(page); |
5109 | free_extent_map(em); | 5125 | free_extent_map(em); |
5110 | em = NULL; | 5126 | em = NULL; |
5111 | btrfs_release_path(root, path); | 5127 | |
5112 | trans = btrfs_join_transaction(root, 1); | 5128 | btrfs_release_path(path); |
5129 | trans = btrfs_join_transaction(root); | ||
5130 | |||
5131 | if (IS_ERR(trans)) | ||
5132 | return ERR_CAST(trans); | ||
5113 | goto again; | 5133 | goto again; |
5114 | } | 5134 | } |
5115 | map = kmap(page); | 5135 | map = kmap(page); |
@@ -5119,7 +5139,7 @@ again: | |||
5119 | btrfs_mark_buffer_dirty(leaf); | 5139 | btrfs_mark_buffer_dirty(leaf); |
5120 | } | 5140 | } |
5121 | set_extent_uptodate(io_tree, em->start, | 5141 | set_extent_uptodate(io_tree, em->start, |
5122 | extent_map_end(em) - 1, GFP_NOFS); | 5142 | extent_map_end(em) - 1, NULL, GFP_NOFS); |
5123 | goto insert; | 5143 | goto insert; |
5124 | } else { | 5144 | } else { |
5125 | printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); | 5145 | printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); |
@@ -5132,7 +5152,7 @@ not_found_em: | |||
5132 | em->block_start = EXTENT_MAP_HOLE; | 5152 | em->block_start = EXTENT_MAP_HOLE; |
5133 | set_bit(EXTENT_FLAG_VACANCY, &em->flags); | 5153 | set_bit(EXTENT_FLAG_VACANCY, &em->flags); |
5134 | insert: | 5154 | insert: |
5135 | btrfs_release_path(root, path); | 5155 | btrfs_release_path(path); |
5136 | if (em->start > start || extent_map_end(em) <= start) { | 5156 | if (em->start > start || extent_map_end(em) <= start) { |
5137 | printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " | 5157 | printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " |
5138 | "[%llu %llu]\n", (unsigned long long)em->start, | 5158 | "[%llu %llu]\n", (unsigned long long)em->start, |
@@ -5186,6 +5206,9 @@ insert: | |||
5186 | } | 5206 | } |
5187 | write_unlock(&em_tree->lock); | 5207 | write_unlock(&em_tree->lock); |
5188 | out: | 5208 | out: |
5209 | |||
5210 | trace_btrfs_get_extent(root, em); | ||
5211 | |||
5189 | if (path) | 5212 | if (path) |
5190 | btrfs_free_path(path); | 5213 | btrfs_free_path(path); |
5191 | if (trans) { | 5214 | if (trans) { |
@@ -5200,22 +5223,160 @@ out: | |||
5200 | return em; | 5223 | return em; |
5201 | } | 5224 | } |
5202 | 5225 | ||
5226 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, | ||
5227 | size_t pg_offset, u64 start, u64 len, | ||
5228 | int create) | ||
5229 | { | ||
5230 | struct extent_map *em; | ||
5231 | struct extent_map *hole_em = NULL; | ||
5232 | u64 range_start = start; | ||
5233 | u64 end; | ||
5234 | u64 found; | ||
5235 | u64 found_end; | ||
5236 | int err = 0; | ||
5237 | |||
5238 | em = btrfs_get_extent(inode, page, pg_offset, start, len, create); | ||
5239 | if (IS_ERR(em)) | ||
5240 | return em; | ||
5241 | if (em) { | ||
5242 | /* | ||
5243 | * if our em maps to a hole, there might | ||
5244 | * actually be delalloc bytes behind it | ||
5245 | */ | ||
5246 | if (em->block_start != EXTENT_MAP_HOLE) | ||
5247 | return em; | ||
5248 | else | ||
5249 | hole_em = em; | ||
5250 | } | ||
5251 | |||
5252 | /* check to see if we've wrapped (len == -1 or similar) */ | ||
5253 | end = start + len; | ||
5254 | if (end < start) | ||
5255 | end = (u64)-1; | ||
5256 | else | ||
5257 | end -= 1; | ||
5258 | |||
5259 | em = NULL; | ||
5260 | |||
5261 | /* ok, we didn't find anything, lets look for delalloc */ | ||
5262 | found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start, | ||
5263 | end, len, EXTENT_DELALLOC, 1); | ||
5264 | found_end = range_start + found; | ||
5265 | if (found_end < range_start) | ||
5266 | found_end = (u64)-1; | ||
5267 | |||
5268 | /* | ||
5269 | * we didn't find anything useful, return | ||
5270 | * the original results from get_extent() | ||
5271 | */ | ||
5272 | if (range_start > end || found_end <= start) { | ||
5273 | em = hole_em; | ||
5274 | hole_em = NULL; | ||
5275 | goto out; | ||
5276 | } | ||
5277 | |||
5278 | /* adjust the range_start to make sure it doesn't | ||
5279 | * go backwards from the start they passed in | ||
5280 | */ | ||
5281 | range_start = max(start,range_start); | ||
5282 | found = found_end - range_start; | ||
5283 | |||
5284 | if (found > 0) { | ||
5285 | u64 hole_start = start; | ||
5286 | u64 hole_len = len; | ||
5287 | |||
5288 | em = alloc_extent_map(); | ||
5289 | if (!em) { | ||
5290 | err = -ENOMEM; | ||
5291 | goto out; | ||
5292 | } | ||
5293 | /* | ||
5294 | * when btrfs_get_extent can't find anything it | ||
5295 | * returns one huge hole | ||
5296 | * | ||
5297 | * make sure what it found really fits our range, and | ||
5298 | * adjust to make sure it is based on the start from | ||
5299 | * the caller | ||
5300 | */ | ||
5301 | if (hole_em) { | ||
5302 | u64 calc_end = extent_map_end(hole_em); | ||
5303 | |||
5304 | if (calc_end <= start || (hole_em->start > end)) { | ||
5305 | free_extent_map(hole_em); | ||
5306 | hole_em = NULL; | ||
5307 | } else { | ||
5308 | hole_start = max(hole_em->start, start); | ||
5309 | hole_len = calc_end - hole_start; | ||
5310 | } | ||
5311 | } | ||
5312 | em->bdev = NULL; | ||
5313 | if (hole_em && range_start > hole_start) { | ||
5314 | /* our hole starts before our delalloc, so we | ||
5315 | * have to return just the parts of the hole | ||
5316 | * that go until the delalloc starts | ||
5317 | */ | ||
5318 | em->len = min(hole_len, | ||
5319 | range_start - hole_start); | ||
5320 | em->start = hole_start; | ||
5321 | em->orig_start = hole_start; | ||
5322 | /* | ||
5323 | * don't adjust block start at all, | ||
5324 | * it is fixed at EXTENT_MAP_HOLE | ||
5325 | */ | ||
5326 | em->block_start = hole_em->block_start; | ||
5327 | em->block_len = hole_len; | ||
5328 | } else { | ||
5329 | em->start = range_start; | ||
5330 | em->len = found; | ||
5331 | em->orig_start = range_start; | ||
5332 | em->block_start = EXTENT_MAP_DELALLOC; | ||
5333 | em->block_len = found; | ||
5334 | } | ||
5335 | } else if (hole_em) { | ||
5336 | return hole_em; | ||
5337 | } | ||
5338 | out: | ||
5339 | |||
5340 | free_extent_map(hole_em); | ||
5341 | if (err) { | ||
5342 | free_extent_map(em); | ||
5343 | return ERR_PTR(err); | ||
5344 | } | ||
5345 | return em; | ||
5346 | } | ||
5347 | |||
5203 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | 5348 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
5349 | struct extent_map *em, | ||
5204 | u64 start, u64 len) | 5350 | u64 start, u64 len) |
5205 | { | 5351 | { |
5206 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5352 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5207 | struct btrfs_trans_handle *trans; | 5353 | struct btrfs_trans_handle *trans; |
5208 | struct extent_map *em; | ||
5209 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5354 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
5210 | struct btrfs_key ins; | 5355 | struct btrfs_key ins; |
5211 | u64 alloc_hint; | 5356 | u64 alloc_hint; |
5212 | int ret; | 5357 | int ret; |
5358 | bool insert = false; | ||
5213 | 5359 | ||
5214 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | 5360 | /* |
5361 | * Ok if the extent map we looked up is a hole and is for the exact | ||
5362 | * range we want, there is no reason to allocate a new one, however if | ||
5363 | * it is not right then we need to free this one and drop the cache for | ||
5364 | * our range. | ||
5365 | */ | ||
5366 | if (em->block_start != EXTENT_MAP_HOLE || em->start != start || | ||
5367 | em->len != len) { | ||
5368 | free_extent_map(em); | ||
5369 | em = NULL; | ||
5370 | insert = true; | ||
5371 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
5372 | } | ||
5215 | 5373 | ||
5216 | trans = btrfs_join_transaction(root, 0); | 5374 | trans = btrfs_join_transaction(root); |
5217 | if (!trans) | 5375 | if (IS_ERR(trans)) |
5218 | return ERR_PTR(-ENOMEM); | 5376 | return ERR_CAST(trans); |
5377 | |||
5378 | if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024) | ||
5379 | btrfs_add_inode_defrag(trans, inode); | ||
5219 | 5380 | ||
5220 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 5381 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
5221 | 5382 | ||
@@ -5227,10 +5388,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5227 | goto out; | 5388 | goto out; |
5228 | } | 5389 | } |
5229 | 5390 | ||
5230 | em = alloc_extent_map(GFP_NOFS); | ||
5231 | if (!em) { | 5391 | if (!em) { |
5232 | em = ERR_PTR(-ENOMEM); | 5392 | em = alloc_extent_map(); |
5233 | goto out; | 5393 | if (!em) { |
5394 | em = ERR_PTR(-ENOMEM); | ||
5395 | goto out; | ||
5396 | } | ||
5234 | } | 5397 | } |
5235 | 5398 | ||
5236 | em->start = start; | 5399 | em->start = start; |
@@ -5240,9 +5403,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5240 | em->block_start = ins.objectid; | 5403 | em->block_start = ins.objectid; |
5241 | em->block_len = ins.offset; | 5404 | em->block_len = ins.offset; |
5242 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 5405 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
5406 | |||
5407 | /* | ||
5408 | * We need to do this because if we're using the original em we searched | ||
5409 | * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that. | ||
5410 | */ | ||
5411 | em->flags = 0; | ||
5243 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 5412 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
5244 | 5413 | ||
5245 | while (1) { | 5414 | while (insert) { |
5246 | write_lock(&em_tree->lock); | 5415 | write_lock(&em_tree->lock); |
5247 | ret = add_extent_mapping(em_tree, em); | 5416 | ret = add_extent_mapping(em_tree, em); |
5248 | write_unlock(&em_tree->lock); | 5417 | write_unlock(&em_tree->lock); |
@@ -5286,7 +5455,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | |||
5286 | if (!path) | 5455 | if (!path) |
5287 | return -ENOMEM; | 5456 | return -ENOMEM; |
5288 | 5457 | ||
5289 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | 5458 | ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), |
5290 | offset, 0); | 5459 | offset, 0); |
5291 | if (ret < 0) | 5460 | if (ret < 0) |
5292 | goto out; | 5461 | goto out; |
@@ -5303,7 +5472,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | |||
5303 | ret = 0; | 5472 | ret = 0; |
5304 | leaf = path->nodes[0]; | 5473 | leaf = path->nodes[0]; |
5305 | btrfs_item_key_to_cpu(leaf, &key, slot); | 5474 | btrfs_item_key_to_cpu(leaf, &key, slot); |
5306 | if (key.objectid != inode->i_ino || | 5475 | if (key.objectid != btrfs_ino(inode) || |
5307 | key.type != BTRFS_EXTENT_DATA_KEY) { | 5476 | key.type != BTRFS_EXTENT_DATA_KEY) { |
5308 | /* not our file or wrong item type, must cow */ | 5477 | /* not our file or wrong item type, must cow */ |
5309 | goto out; | 5478 | goto out; |
@@ -5337,7 +5506,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | |||
5337 | * look for other files referencing this extent, if we | 5506 | * look for other files referencing this extent, if we |
5338 | * find any we must cow | 5507 | * find any we must cow |
5339 | */ | 5508 | */ |
5340 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | 5509 | if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), |
5341 | key.offset - backref_offset, disk_bytenr)) | 5510 | key.offset - backref_offset, disk_bytenr)) |
5342 | goto out; | 5511 | goto out; |
5343 | 5512 | ||
@@ -5438,8 +5607,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5438 | * to make sure the current transaction stays open | 5607 | * to make sure the current transaction stays open |
5439 | * while we look for nocow cross refs | 5608 | * while we look for nocow cross refs |
5440 | */ | 5609 | */ |
5441 | trans = btrfs_join_transaction(root, 0); | 5610 | trans = btrfs_join_transaction(root); |
5442 | if (!trans) | 5611 | if (IS_ERR(trans)) |
5443 | goto must_cow; | 5612 | goto must_cow; |
5444 | 5613 | ||
5445 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | 5614 | if (can_nocow_odirect(trans, inode, start, len) == 1) { |
@@ -5460,8 +5629,7 @@ must_cow: | |||
5460 | * it above | 5629 | * it above |
5461 | */ | 5630 | */ |
5462 | len = bh_result->b_size; | 5631 | len = bh_result->b_size; |
5463 | free_extent_map(em); | 5632 | em = btrfs_new_extent_direct(inode, em, start, len); |
5464 | em = btrfs_new_extent_direct(inode, start, len); | ||
5465 | if (IS_ERR(em)) | 5633 | if (IS_ERR(em)) |
5466 | return PTR_ERR(em); | 5634 | return PTR_ERR(em); |
5467 | len = min(len, em->len - (start - em->start)); | 5635 | len = min(len, em->len - (start - em->start)); |
@@ -5490,13 +5658,21 @@ struct btrfs_dio_private { | |||
5490 | u64 bytes; | 5658 | u64 bytes; |
5491 | u32 *csums; | 5659 | u32 *csums; |
5492 | void *private; | 5660 | void *private; |
5661 | |||
5662 | /* number of bios pending for this dio */ | ||
5663 | atomic_t pending_bios; | ||
5664 | |||
5665 | /* IO errors */ | ||
5666 | int errors; | ||
5667 | |||
5668 | struct bio *orig_bio; | ||
5493 | }; | 5669 | }; |
5494 | 5670 | ||
5495 | static void btrfs_endio_direct_read(struct bio *bio, int err) | 5671 | static void btrfs_endio_direct_read(struct bio *bio, int err) |
5496 | { | 5672 | { |
5673 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5497 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | 5674 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; |
5498 | struct bio_vec *bvec = bio->bi_io_vec; | 5675 | struct bio_vec *bvec = bio->bi_io_vec; |
5499 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5500 | struct inode *inode = dip->inode; | 5676 | struct inode *inode = dip->inode; |
5501 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5677 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5502 | u64 start; | 5678 | u64 start; |
@@ -5520,9 +5696,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5520 | 5696 | ||
5521 | flush_dcache_page(bvec->bv_page); | 5697 | flush_dcache_page(bvec->bv_page); |
5522 | if (csum != *private) { | 5698 | if (csum != *private) { |
5523 | printk(KERN_ERR "btrfs csum failed ino %lu off" | 5699 | printk(KERN_ERR "btrfs csum failed ino %llu off" |
5524 | " %llu csum %u private %u\n", | 5700 | " %llu csum %u private %u\n", |
5525 | inode->i_ino, (unsigned long long)start, | 5701 | (unsigned long long)btrfs_ino(inode), |
5702 | (unsigned long long)start, | ||
5526 | csum, *private); | 5703 | csum, *private); |
5527 | err = -EIO; | 5704 | err = -EIO; |
5528 | } | 5705 | } |
@@ -5539,6 +5716,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5539 | 5716 | ||
5540 | kfree(dip->csums); | 5717 | kfree(dip->csums); |
5541 | kfree(dip); | 5718 | kfree(dip); |
5719 | |||
5720 | /* If we had a csum failure make sure to clear the uptodate flag */ | ||
5721 | if (err) | ||
5722 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
5542 | dio_end_io(bio, err); | 5723 | dio_end_io(bio, err); |
5543 | } | 5724 | } |
5544 | 5725 | ||
@@ -5550,20 +5731,23 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
5550 | struct btrfs_trans_handle *trans; | 5731 | struct btrfs_trans_handle *trans; |
5551 | struct btrfs_ordered_extent *ordered = NULL; | 5732 | struct btrfs_ordered_extent *ordered = NULL; |
5552 | struct extent_state *cached_state = NULL; | 5733 | struct extent_state *cached_state = NULL; |
5734 | u64 ordered_offset = dip->logical_offset; | ||
5735 | u64 ordered_bytes = dip->bytes; | ||
5553 | int ret; | 5736 | int ret; |
5554 | 5737 | ||
5555 | if (err) | 5738 | if (err) |
5556 | goto out_done; | 5739 | goto out_done; |
5557 | 5740 | again: | |
5558 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | 5741 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, |
5559 | dip->logical_offset, dip->bytes); | 5742 | &ordered_offset, |
5743 | ordered_bytes); | ||
5560 | if (!ret) | 5744 | if (!ret) |
5561 | goto out_done; | 5745 | goto out_test; |
5562 | 5746 | ||
5563 | BUG_ON(!ordered); | 5747 | BUG_ON(!ordered); |
5564 | 5748 | ||
5565 | trans = btrfs_join_transaction(root, 1); | 5749 | trans = btrfs_join_transaction(root); |
5566 | if (!trans) { | 5750 | if (IS_ERR(trans)) { |
5567 | err = -ENOMEM; | 5751 | err = -ENOMEM; |
5568 | goto out; | 5752 | goto out; |
5569 | } | 5753 | } |
@@ -5609,8 +5793,10 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
5609 | } | 5793 | } |
5610 | 5794 | ||
5611 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | 5795 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); |
5612 | btrfs_ordered_update_i_size(inode, 0, ordered); | 5796 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5613 | btrfs_update_inode(trans, root, inode); | 5797 | if (!ret) |
5798 | btrfs_update_inode(trans, root, inode); | ||
5799 | ret = 0; | ||
5614 | out_unlock: | 5800 | out_unlock: |
5615 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | 5801 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, |
5616 | ordered->file_offset + ordered->len - 1, | 5802 | ordered->file_offset + ordered->len - 1, |
@@ -5618,13 +5804,29 @@ out_unlock: | |||
5618 | out: | 5804 | out: |
5619 | btrfs_delalloc_release_metadata(inode, ordered->len); | 5805 | btrfs_delalloc_release_metadata(inode, ordered->len); |
5620 | btrfs_end_transaction(trans, root); | 5806 | btrfs_end_transaction(trans, root); |
5807 | ordered_offset = ordered->file_offset + ordered->len; | ||
5621 | btrfs_put_ordered_extent(ordered); | 5808 | btrfs_put_ordered_extent(ordered); |
5622 | btrfs_put_ordered_extent(ordered); | 5809 | btrfs_put_ordered_extent(ordered); |
5810 | |||
5811 | out_test: | ||
5812 | /* | ||
5813 | * our bio might span multiple ordered extents. If we haven't | ||
5814 | * completed the accounting for the whole dio, go back and try again | ||
5815 | */ | ||
5816 | if (ordered_offset < dip->logical_offset + dip->bytes) { | ||
5817 | ordered_bytes = dip->logical_offset + dip->bytes - | ||
5818 | ordered_offset; | ||
5819 | goto again; | ||
5820 | } | ||
5623 | out_done: | 5821 | out_done: |
5624 | bio->bi_private = dip->private; | 5822 | bio->bi_private = dip->private; |
5625 | 5823 | ||
5626 | kfree(dip->csums); | 5824 | kfree(dip->csums); |
5627 | kfree(dip); | 5825 | kfree(dip); |
5826 | |||
5827 | /* If we had an error make sure to clear the uptodate flag */ | ||
5828 | if (err) | ||
5829 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
5628 | dio_end_io(bio, err); | 5830 | dio_end_io(bio, err); |
5629 | } | 5831 | } |
5630 | 5832 | ||
@@ -5639,13 +5841,207 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | |||
5639 | return 0; | 5841 | return 0; |
5640 | } | 5842 | } |
5641 | 5843 | ||
5844 | static void btrfs_end_dio_bio(struct bio *bio, int err) | ||
5845 | { | ||
5846 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5847 | |||
5848 | if (err) { | ||
5849 | printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " | ||
5850 | "sector %#Lx len %u err no %d\n", | ||
5851 | (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw, | ||
5852 | (unsigned long long)bio->bi_sector, bio->bi_size, err); | ||
5853 | dip->errors = 1; | ||
5854 | |||
5855 | /* | ||
5856 | * before atomic variable goto zero, we must make sure | ||
5857 | * dip->errors is perceived to be set. | ||
5858 | */ | ||
5859 | smp_mb__before_atomic_dec(); | ||
5860 | } | ||
5861 | |||
5862 | /* if there are more bios still pending for this dio, just exit */ | ||
5863 | if (!atomic_dec_and_test(&dip->pending_bios)) | ||
5864 | goto out; | ||
5865 | |||
5866 | if (dip->errors) | ||
5867 | bio_io_error(dip->orig_bio); | ||
5868 | else { | ||
5869 | set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); | ||
5870 | bio_endio(dip->orig_bio, 0); | ||
5871 | } | ||
5872 | out: | ||
5873 | bio_put(bio); | ||
5874 | } | ||
5875 | |||
5876 | static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | ||
5877 | u64 first_sector, gfp_t gfp_flags) | ||
5878 | { | ||
5879 | int nr_vecs = bio_get_nr_vecs(bdev); | ||
5880 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); | ||
5881 | } | ||
5882 | |||
5883 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | ||
5884 | int rw, u64 file_offset, int skip_sum, | ||
5885 | u32 *csums, int async_submit) | ||
5886 | { | ||
5887 | int write = rw & REQ_WRITE; | ||
5888 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5889 | int ret; | ||
5890 | |||
5891 | bio_get(bio); | ||
5892 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5893 | if (ret) | ||
5894 | goto err; | ||
5895 | |||
5896 | if (skip_sum) | ||
5897 | goto map; | ||
5898 | |||
5899 | if (write && async_submit) { | ||
5900 | ret = btrfs_wq_submit_bio(root->fs_info, | ||
5901 | inode, rw, bio, 0, 0, | ||
5902 | file_offset, | ||
5903 | __btrfs_submit_bio_start_direct_io, | ||
5904 | __btrfs_submit_bio_done); | ||
5905 | goto err; | ||
5906 | } else if (write) { | ||
5907 | /* | ||
5908 | * If we aren't doing async submit, calculate the csum of the | ||
5909 | * bio now. | ||
5910 | */ | ||
5911 | ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); | ||
5912 | if (ret) | ||
5913 | goto err; | ||
5914 | } else if (!skip_sum) { | ||
5915 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5916 | file_offset, csums); | ||
5917 | if (ret) | ||
5918 | goto err; | ||
5919 | } | ||
5920 | |||
5921 | map: | ||
5922 | ret = btrfs_map_bio(root, rw, bio, 0, async_submit); | ||
5923 | err: | ||
5924 | bio_put(bio); | ||
5925 | return ret; | ||
5926 | } | ||
5927 | |||
5928 | static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | ||
5929 | int skip_sum) | ||
5930 | { | ||
5931 | struct inode *inode = dip->inode; | ||
5932 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5933 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
5934 | struct bio *bio; | ||
5935 | struct bio *orig_bio = dip->orig_bio; | ||
5936 | struct bio_vec *bvec = orig_bio->bi_io_vec; | ||
5937 | u64 start_sector = orig_bio->bi_sector; | ||
5938 | u64 file_offset = dip->logical_offset; | ||
5939 | u64 submit_len = 0; | ||
5940 | u64 map_length; | ||
5941 | int nr_pages = 0; | ||
5942 | u32 *csums = dip->csums; | ||
5943 | int ret = 0; | ||
5944 | int async_submit = 0; | ||
5945 | int write = rw & REQ_WRITE; | ||
5946 | |||
5947 | map_length = orig_bio->bi_size; | ||
5948 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5949 | &map_length, NULL, 0); | ||
5950 | if (ret) { | ||
5951 | bio_put(orig_bio); | ||
5952 | return -EIO; | ||
5953 | } | ||
5954 | |||
5955 | if (map_length >= orig_bio->bi_size) { | ||
5956 | bio = orig_bio; | ||
5957 | goto submit; | ||
5958 | } | ||
5959 | |||
5960 | async_submit = 1; | ||
5961 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | ||
5962 | if (!bio) | ||
5963 | return -ENOMEM; | ||
5964 | bio->bi_private = dip; | ||
5965 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5966 | atomic_inc(&dip->pending_bios); | ||
5967 | |||
5968 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | ||
5969 | if (unlikely(map_length < submit_len + bvec->bv_len || | ||
5970 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | ||
5971 | bvec->bv_offset) < bvec->bv_len)) { | ||
5972 | /* | ||
5973 | * inc the count before we submit the bio so | ||
5974 | * we know the end IO handler won't happen before | ||
5975 | * we inc the count. Otherwise, the dip might get freed | ||
5976 | * before we're done setting it up | ||
5977 | */ | ||
5978 | atomic_inc(&dip->pending_bios); | ||
5979 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | ||
5980 | file_offset, skip_sum, | ||
5981 | csums, async_submit); | ||
5982 | if (ret) { | ||
5983 | bio_put(bio); | ||
5984 | atomic_dec(&dip->pending_bios); | ||
5985 | goto out_err; | ||
5986 | } | ||
5987 | |||
5988 | /* Write's use the ordered csums */ | ||
5989 | if (!write && !skip_sum) | ||
5990 | csums = csums + nr_pages; | ||
5991 | start_sector += submit_len >> 9; | ||
5992 | file_offset += submit_len; | ||
5993 | |||
5994 | submit_len = 0; | ||
5995 | nr_pages = 0; | ||
5996 | |||
5997 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, | ||
5998 | start_sector, GFP_NOFS); | ||
5999 | if (!bio) | ||
6000 | goto out_err; | ||
6001 | bio->bi_private = dip; | ||
6002 | bio->bi_end_io = btrfs_end_dio_bio; | ||
6003 | |||
6004 | map_length = orig_bio->bi_size; | ||
6005 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
6006 | &map_length, NULL, 0); | ||
6007 | if (ret) { | ||
6008 | bio_put(bio); | ||
6009 | goto out_err; | ||
6010 | } | ||
6011 | } else { | ||
6012 | submit_len += bvec->bv_len; | ||
6013 | nr_pages ++; | ||
6014 | bvec++; | ||
6015 | } | ||
6016 | } | ||
6017 | |||
6018 | submit: | ||
6019 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | ||
6020 | csums, async_submit); | ||
6021 | if (!ret) | ||
6022 | return 0; | ||
6023 | |||
6024 | bio_put(bio); | ||
6025 | out_err: | ||
6026 | dip->errors = 1; | ||
6027 | /* | ||
6028 | * before atomic variable goto zero, we must | ||
6029 | * make sure dip->errors is perceived to be set. | ||
6030 | */ | ||
6031 | smp_mb__before_atomic_dec(); | ||
6032 | if (atomic_dec_and_test(&dip->pending_bios)) | ||
6033 | bio_io_error(dip->orig_bio); | ||
6034 | |||
6035 | /* bio_end_io() will handle error, so we needn't return it */ | ||
6036 | return 0; | ||
6037 | } | ||
6038 | |||
5642 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | 6039 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, |
5643 | loff_t file_offset) | 6040 | loff_t file_offset) |
5644 | { | 6041 | { |
5645 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6042 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5646 | struct btrfs_dio_private *dip; | 6043 | struct btrfs_dio_private *dip; |
5647 | struct bio_vec *bvec = bio->bi_io_vec; | 6044 | struct bio_vec *bvec = bio->bi_io_vec; |
5648 | u64 start; | ||
5649 | int skip_sum; | 6045 | int skip_sum; |
5650 | int write = rw & REQ_WRITE; | 6046 | int write = rw & REQ_WRITE; |
5651 | int ret = 0; | 6047 | int ret = 0; |
@@ -5659,9 +6055,11 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
5659 | } | 6055 | } |
5660 | dip->csums = NULL; | 6056 | dip->csums = NULL; |
5661 | 6057 | ||
5662 | if (!skip_sum) { | 6058 | /* Write's use the ordered csum stuff, so we don't need dip->csums */ |
6059 | if (!write && !skip_sum) { | ||
5663 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | 6060 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); |
5664 | if (!dip->csums) { | 6061 | if (!dip->csums) { |
6062 | kfree(dip); | ||
5665 | ret = -ENOMEM; | 6063 | ret = -ENOMEM; |
5666 | goto free_ordered; | 6064 | goto free_ordered; |
5667 | } | 6065 | } |
@@ -5671,7 +6069,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
5671 | dip->inode = inode; | 6069 | dip->inode = inode; |
5672 | dip->logical_offset = file_offset; | 6070 | dip->logical_offset = file_offset; |
5673 | 6071 | ||
5674 | start = dip->logical_offset; | ||
5675 | dip->bytes = 0; | 6072 | dip->bytes = 0; |
5676 | do { | 6073 | do { |
5677 | dip->bytes += bvec->bv_len; | 6074 | dip->bytes += bvec->bv_len; |
@@ -5680,36 +6077,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
5680 | 6077 | ||
5681 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | 6078 | dip->disk_bytenr = (u64)bio->bi_sector << 9; |
5682 | bio->bi_private = dip; | 6079 | bio->bi_private = dip; |
6080 | dip->errors = 0; | ||
6081 | dip->orig_bio = bio; | ||
6082 | atomic_set(&dip->pending_bios, 0); | ||
5683 | 6083 | ||
5684 | if (write) | 6084 | if (write) |
5685 | bio->bi_end_io = btrfs_endio_direct_write; | 6085 | bio->bi_end_io = btrfs_endio_direct_write; |
5686 | else | 6086 | else |
5687 | bio->bi_end_io = btrfs_endio_direct_read; | 6087 | bio->bi_end_io = btrfs_endio_direct_read; |
5688 | 6088 | ||
5689 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 6089 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); |
5690 | if (ret) | 6090 | if (!ret) |
5691 | goto out_err; | ||
5692 | |||
5693 | if (write && !skip_sum) { | ||
5694 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
5695 | inode, rw, bio, 0, 0, | ||
5696 | dip->logical_offset, | ||
5697 | __btrfs_submit_bio_start_direct_io, | ||
5698 | __btrfs_submit_bio_done); | ||
5699 | if (ret) | ||
5700 | goto out_err; | ||
5701 | return; | 6091 | return; |
5702 | } else if (!skip_sum) | ||
5703 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5704 | dip->logical_offset, dip->csums); | ||
5705 | |||
5706 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5707 | if (ret) | ||
5708 | goto out_err; | ||
5709 | return; | ||
5710 | out_err: | ||
5711 | kfree(dip->csums); | ||
5712 | kfree(dip); | ||
5713 | free_ordered: | 6092 | free_ordered: |
5714 | /* | 6093 | /* |
5715 | * If this is a write, we need to clean up the reserved space and kill | 6094 | * If this is a write, we need to clean up the reserved space and kill |
@@ -5717,8 +6096,7 @@ free_ordered: | |||
5717 | */ | 6096 | */ |
5718 | if (write) { | 6097 | if (write) { |
5719 | struct btrfs_ordered_extent *ordered; | 6098 | struct btrfs_ordered_extent *ordered; |
5720 | ordered = btrfs_lookup_ordered_extent(inode, | 6099 | ordered = btrfs_lookup_ordered_extent(inode, file_offset); |
5721 | dip->logical_offset); | ||
5722 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | 6100 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && |
5723 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | 6101 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) |
5724 | btrfs_free_reserved_extent(root, ordered->start, | 6102 | btrfs_free_reserved_extent(root, ordered->start, |
@@ -5734,6 +6112,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
5734 | unsigned long nr_segs) | 6112 | unsigned long nr_segs) |
5735 | { | 6113 | { |
5736 | int seg; | 6114 | int seg; |
6115 | int i; | ||
5737 | size_t size; | 6116 | size_t size; |
5738 | unsigned long addr; | 6117 | unsigned long addr; |
5739 | unsigned blocksize_mask = root->sectorsize - 1; | 6118 | unsigned blocksize_mask = root->sectorsize - 1; |
@@ -5748,8 +6127,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
5748 | addr = (unsigned long)iov[seg].iov_base; | 6127 | addr = (unsigned long)iov[seg].iov_base; |
5749 | size = iov[seg].iov_len; | 6128 | size = iov[seg].iov_len; |
5750 | end += size; | 6129 | end += size; |
5751 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | 6130 | if ((addr & blocksize_mask) || (size & blocksize_mask)) |
5752 | goto out; | 6131 | goto out; |
6132 | |||
6133 | /* If this is a write we don't need to check anymore */ | ||
6134 | if (rw & WRITE) | ||
6135 | continue; | ||
6136 | |||
6137 | /* | ||
6138 | * Check to make sure we don't have duplicate iov_base's in this | ||
6139 | * iovec, if so return EINVAL, otherwise we'll get csum errors | ||
6140 | * when reading back. | ||
6141 | */ | ||
6142 | for (i = seg + 1; i < nr_segs; i++) { | ||
6143 | if (iov[seg].iov_base == iov[i].iov_base) | ||
6144 | goto out; | ||
6145 | } | ||
5753 | } | 6146 | } |
5754 | retval = 0; | 6147 | retval = 0; |
5755 | out: | 6148 | out: |
@@ -5850,7 +6243,7 @@ out: | |||
5850 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 6243 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
5851 | __u64 start, __u64 len) | 6244 | __u64 start, __u64 len) |
5852 | { | 6245 | { |
5853 | return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); | 6246 | return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap); |
5854 | } | 6247 | } |
5855 | 6248 | ||
5856 | int btrfs_readpage(struct file *file, struct page *page) | 6249 | int btrfs_readpage(struct file *file, struct page *page) |
@@ -6100,30 +6493,97 @@ out: | |||
6100 | return ret; | 6493 | return ret; |
6101 | } | 6494 | } |
6102 | 6495 | ||
6103 | static void btrfs_truncate(struct inode *inode) | 6496 | static int btrfs_truncate(struct inode *inode) |
6104 | { | 6497 | { |
6105 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6498 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6499 | struct btrfs_block_rsv *rsv; | ||
6106 | int ret; | 6500 | int ret; |
6501 | int err = 0; | ||
6107 | struct btrfs_trans_handle *trans; | 6502 | struct btrfs_trans_handle *trans; |
6108 | unsigned long nr; | 6503 | unsigned long nr; |
6109 | u64 mask = root->sectorsize - 1; | 6504 | u64 mask = root->sectorsize - 1; |
6110 | 6505 | ||
6111 | if (!S_ISREG(inode->i_mode)) { | ||
6112 | WARN_ON(1); | ||
6113 | return; | ||
6114 | } | ||
6115 | |||
6116 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6506 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
6117 | if (ret) | 6507 | if (ret) |
6118 | return; | 6508 | return ret; |
6119 | 6509 | ||
6120 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6510 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
6121 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6511 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
6122 | 6512 | ||
6123 | trans = btrfs_start_transaction(root, 0); | 6513 | /* |
6124 | BUG_ON(IS_ERR(trans)); | 6514 | * Yes ladies and gentelment, this is indeed ugly. The fact is we have |
6125 | btrfs_set_trans_block_group(trans, inode); | 6515 | * 3 things going on here |
6126 | trans->block_rsv = root->orphan_block_rsv; | 6516 | * |
6517 | * 1) We need to reserve space for our orphan item and the space to | ||
6518 | * delete our orphan item. Lord knows we don't want to have a dangling | ||
6519 | * orphan item because we didn't reserve space to remove it. | ||
6520 | * | ||
6521 | * 2) We need to reserve space to update our inode. | ||
6522 | * | ||
6523 | * 3) We need to have something to cache all the space that is going to | ||
6524 | * be free'd up by the truncate operation, but also have some slack | ||
6525 | * space reserved in case it uses space during the truncate (thank you | ||
6526 | * very much snapshotting). | ||
6527 | * | ||
6528 | * And we need these to all be seperate. The fact is we can use alot of | ||
6529 | * space doing the truncate, and we have no earthly idea how much space | ||
6530 | * we will use, so we need the truncate reservation to be seperate so it | ||
6531 | * doesn't end up using space reserved for updating the inode or | ||
6532 | * removing the orphan item. We also need to be able to stop the | ||
6533 | * transaction and start a new one, which means we need to be able to | ||
6534 | * update the inode several times, and we have no idea of knowing how | ||
6535 | * many times that will be, so we can't just reserve 1 item for the | ||
6536 | * entirety of the opration, so that has to be done seperately as well. | ||
6537 | * Then there is the orphan item, which does indeed need to be held on | ||
6538 | * to for the whole operation, and we need nobody to touch this reserved | ||
6539 | * space except the orphan code. | ||
6540 | * | ||
6541 | * So that leaves us with | ||
6542 | * | ||
6543 | * 1) root->orphan_block_rsv - for the orphan deletion. | ||
6544 | * 2) rsv - for the truncate reservation, which we will steal from the | ||
6545 | * transaction reservation. | ||
6546 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for | ||
6547 | * updating the inode. | ||
6548 | */ | ||
6549 | rsv = btrfs_alloc_block_rsv(root); | ||
6550 | if (!rsv) | ||
6551 | return -ENOMEM; | ||
6552 | btrfs_add_durable_block_rsv(root->fs_info, rsv); | ||
6553 | |||
6554 | trans = btrfs_start_transaction(root, 4); | ||
6555 | if (IS_ERR(trans)) { | ||
6556 | err = PTR_ERR(trans); | ||
6557 | goto out; | ||
6558 | } | ||
6559 | |||
6560 | /* | ||
6561 | * Reserve space for the truncate process. Truncate should be adding | ||
6562 | * space, but if there are snapshots it may end up using space. | ||
6563 | */ | ||
6564 | ret = btrfs_truncate_reserve_metadata(trans, root, rsv); | ||
6565 | BUG_ON(ret); | ||
6566 | |||
6567 | ret = btrfs_orphan_add(trans, inode); | ||
6568 | if (ret) { | ||
6569 | btrfs_end_transaction(trans, root); | ||
6570 | goto out; | ||
6571 | } | ||
6572 | |||
6573 | nr = trans->blocks_used; | ||
6574 | btrfs_end_transaction(trans, root); | ||
6575 | btrfs_btree_balance_dirty(root, nr); | ||
6576 | |||
6577 | /* | ||
6578 | * Ok so we've already migrated our bytes over for the truncate, so here | ||
6579 | * just reserve the one slot we need for updating the inode. | ||
6580 | */ | ||
6581 | trans = btrfs_start_transaction(root, 1); | ||
6582 | if (IS_ERR(trans)) { | ||
6583 | err = PTR_ERR(trans); | ||
6584 | goto out; | ||
6585 | } | ||
6586 | trans->block_rsv = rsv; | ||
6127 | 6587 | ||
6128 | /* | 6588 | /* |
6129 | * setattr is responsible for setting the ordered_data_close flag, | 6589 | * setattr is responsible for setting the ordered_data_close flag, |
@@ -6147,30 +6607,33 @@ static void btrfs_truncate(struct inode *inode) | |||
6147 | 6607 | ||
6148 | while (1) { | 6608 | while (1) { |
6149 | if (!trans) { | 6609 | if (!trans) { |
6150 | trans = btrfs_start_transaction(root, 0); | 6610 | trans = btrfs_start_transaction(root, 3); |
6151 | BUG_ON(IS_ERR(trans)); | 6611 | if (IS_ERR(trans)) { |
6152 | btrfs_set_trans_block_group(trans, inode); | 6612 | err = PTR_ERR(trans); |
6153 | trans->block_rsv = root->orphan_block_rsv; | 6613 | goto out; |
6154 | } | 6614 | } |
6155 | 6615 | ||
6156 | ret = btrfs_block_rsv_check(trans, root, | 6616 | ret = btrfs_truncate_reserve_metadata(trans, root, |
6157 | root->orphan_block_rsv, 0, 5); | 6617 | rsv); |
6158 | if (ret) { | ||
6159 | BUG_ON(ret != -EAGAIN); | ||
6160 | ret = btrfs_commit_transaction(trans, root); | ||
6161 | BUG_ON(ret); | 6618 | BUG_ON(ret); |
6162 | trans = NULL; | 6619 | |
6163 | continue; | 6620 | trans->block_rsv = rsv; |
6164 | } | 6621 | } |
6165 | 6622 | ||
6166 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6623 | ret = btrfs_truncate_inode_items(trans, root, inode, |
6167 | inode->i_size, | 6624 | inode->i_size, |
6168 | BTRFS_EXTENT_DATA_KEY); | 6625 | BTRFS_EXTENT_DATA_KEY); |
6169 | if (ret != -EAGAIN) | 6626 | if (ret != -EAGAIN) { |
6627 | err = ret; | ||
6170 | break; | 6628 | break; |
6629 | } | ||
6171 | 6630 | ||
6631 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
6172 | ret = btrfs_update_inode(trans, root, inode); | 6632 | ret = btrfs_update_inode(trans, root, inode); |
6173 | BUG_ON(ret); | 6633 | if (ret) { |
6634 | err = ret; | ||
6635 | break; | ||
6636 | } | ||
6174 | 6637 | ||
6175 | nr = trans->blocks_used; | 6638 | nr = trans->blocks_used; |
6176 | btrfs_end_transaction(trans, root); | 6639 | btrfs_end_transaction(trans, root); |
@@ -6179,32 +6642,48 @@ static void btrfs_truncate(struct inode *inode) | |||
6179 | } | 6642 | } |
6180 | 6643 | ||
6181 | if (ret == 0 && inode->i_nlink > 0) { | 6644 | if (ret == 0 && inode->i_nlink > 0) { |
6645 | trans->block_rsv = root->orphan_block_rsv; | ||
6182 | ret = btrfs_orphan_del(trans, inode); | 6646 | ret = btrfs_orphan_del(trans, inode); |
6183 | BUG_ON(ret); | 6647 | if (ret) |
6648 | err = ret; | ||
6649 | } else if (ret && inode->i_nlink > 0) { | ||
6650 | /* | ||
6651 | * Failed to do the truncate, remove us from the in memory | ||
6652 | * orphan list. | ||
6653 | */ | ||
6654 | ret = btrfs_orphan_del(NULL, inode); | ||
6184 | } | 6655 | } |
6185 | 6656 | ||
6657 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
6186 | ret = btrfs_update_inode(trans, root, inode); | 6658 | ret = btrfs_update_inode(trans, root, inode); |
6187 | BUG_ON(ret); | 6659 | if (ret && !err) |
6660 | err = ret; | ||
6188 | 6661 | ||
6189 | nr = trans->blocks_used; | 6662 | nr = trans->blocks_used; |
6190 | ret = btrfs_end_transaction_throttle(trans, root); | 6663 | ret = btrfs_end_transaction_throttle(trans, root); |
6191 | BUG_ON(ret); | ||
6192 | btrfs_btree_balance_dirty(root, nr); | 6664 | btrfs_btree_balance_dirty(root, nr); |
6665 | |||
6666 | out: | ||
6667 | btrfs_free_block_rsv(root, rsv); | ||
6668 | |||
6669 | if (ret && !err) | ||
6670 | err = ret; | ||
6671 | |||
6672 | return err; | ||
6193 | } | 6673 | } |
6194 | 6674 | ||
6195 | /* | 6675 | /* |
6196 | * create a new subvolume directory/inode (helper for the ioctl). | 6676 | * create a new subvolume directory/inode (helper for the ioctl). |
6197 | */ | 6677 | */ |
6198 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 6678 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
6199 | struct btrfs_root *new_root, | 6679 | struct btrfs_root *new_root, u64 new_dirid) |
6200 | u64 new_dirid, u64 alloc_hint) | ||
6201 | { | 6680 | { |
6202 | struct inode *inode; | 6681 | struct inode *inode; |
6203 | int err; | 6682 | int err; |
6204 | u64 index = 0; | 6683 | u64 index = 0; |
6205 | 6684 | ||
6206 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, | 6685 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, |
6207 | new_dirid, alloc_hint, S_IFDIR | 0700, &index); | 6686 | new_dirid, S_IFDIR | 0700, &index); |
6208 | if (IS_ERR(inode)) | 6687 | if (IS_ERR(inode)) |
6209 | return PTR_ERR(inode); | 6688 | return PTR_ERR(inode); |
6210 | inode->i_op = &btrfs_dir_inode_operations; | 6689 | inode->i_op = &btrfs_dir_inode_operations; |
@@ -6256,19 +6735,21 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6256 | ei->index_cnt = (u64)-1; | 6735 | ei->index_cnt = (u64)-1; |
6257 | ei->last_unlink_trans = 0; | 6736 | ei->last_unlink_trans = 0; |
6258 | 6737 | ||
6259 | spin_lock_init(&ei->accounting_lock); | ||
6260 | atomic_set(&ei->outstanding_extents, 0); | 6738 | atomic_set(&ei->outstanding_extents, 0); |
6261 | ei->reserved_extents = 0; | 6739 | atomic_set(&ei->reserved_extents, 0); |
6262 | 6740 | ||
6263 | ei->ordered_data_close = 0; | 6741 | ei->ordered_data_close = 0; |
6264 | ei->orphan_meta_reserved = 0; | 6742 | ei->orphan_meta_reserved = 0; |
6265 | ei->dummy_inode = 0; | 6743 | ei->dummy_inode = 0; |
6266 | ei->force_compress = 0; | 6744 | ei->in_defrag = 0; |
6745 | ei->force_compress = BTRFS_COMPRESS_NONE; | ||
6746 | |||
6747 | ei->delayed_node = NULL; | ||
6267 | 6748 | ||
6268 | inode = &ei->vfs_inode; | 6749 | inode = &ei->vfs_inode; |
6269 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | 6750 | extent_map_tree_init(&ei->extent_tree); |
6270 | extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); | 6751 | extent_io_tree_init(&ei->io_tree, &inode->i_data); |
6271 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); | 6752 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); |
6272 | mutex_init(&ei->log_mutex); | 6753 | mutex_init(&ei->log_mutex); |
6273 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6754 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
6274 | INIT_LIST_HEAD(&ei->i_orphan); | 6755 | INIT_LIST_HEAD(&ei->i_orphan); |
@@ -6279,6 +6760,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6279 | return inode; | 6760 | return inode; |
6280 | } | 6761 | } |
6281 | 6762 | ||
6763 | static void btrfs_i_callback(struct rcu_head *head) | ||
6764 | { | ||
6765 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
6766 | INIT_LIST_HEAD(&inode->i_dentry); | ||
6767 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | ||
6768 | } | ||
6769 | |||
6282 | void btrfs_destroy_inode(struct inode *inode) | 6770 | void btrfs_destroy_inode(struct inode *inode) |
6283 | { | 6771 | { |
6284 | struct btrfs_ordered_extent *ordered; | 6772 | struct btrfs_ordered_extent *ordered; |
@@ -6287,7 +6775,7 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6287 | WARN_ON(!list_empty(&inode->i_dentry)); | 6775 | WARN_ON(!list_empty(&inode->i_dentry)); |
6288 | WARN_ON(inode->i_data.nrpages); | 6776 | WARN_ON(inode->i_data.nrpages); |
6289 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6777 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); |
6290 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 6778 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); |
6291 | 6779 | ||
6292 | /* | 6780 | /* |
6293 | * This can happen where we create an inode, but somebody else also | 6781 | * This can happen where we create an inode, but somebody else also |
@@ -6310,8 +6798,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6310 | 6798 | ||
6311 | spin_lock(&root->orphan_lock); | 6799 | spin_lock(&root->orphan_lock); |
6312 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6800 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
6313 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", | 6801 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", |
6314 | inode->i_ino); | 6802 | (unsigned long long)btrfs_ino(inode)); |
6315 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6803 | list_del_init(&BTRFS_I(inode)->i_orphan); |
6316 | } | 6804 | } |
6317 | spin_unlock(&root->orphan_lock); | 6805 | spin_unlock(&root->orphan_lock); |
@@ -6333,14 +6821,16 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6333 | inode_tree_del(inode); | 6821 | inode_tree_del(inode); |
6334 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); | 6822 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); |
6335 | free: | 6823 | free: |
6336 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 6824 | btrfs_remove_delayed_node(inode); |
6825 | call_rcu(&inode->i_rcu, btrfs_i_callback); | ||
6337 | } | 6826 | } |
6338 | 6827 | ||
6339 | int btrfs_drop_inode(struct inode *inode) | 6828 | int btrfs_drop_inode(struct inode *inode) |
6340 | { | 6829 | { |
6341 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6830 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6342 | 6831 | ||
6343 | if (btrfs_root_refs(&root->root_item) == 0) | 6832 | if (btrfs_root_refs(&root->root_item) == 0 && |
6833 | !is_free_space_inode(root, inode)) | ||
6344 | return 1; | 6834 | return 1; |
6345 | else | 6835 | else |
6346 | return generic_drop_inode(inode); | 6836 | return generic_drop_inode(inode); |
@@ -6363,6 +6853,8 @@ void btrfs_destroy_cachep(void) | |||
6363 | kmem_cache_destroy(btrfs_transaction_cachep); | 6853 | kmem_cache_destroy(btrfs_transaction_cachep); |
6364 | if (btrfs_path_cachep) | 6854 | if (btrfs_path_cachep) |
6365 | kmem_cache_destroy(btrfs_path_cachep); | 6855 | kmem_cache_destroy(btrfs_path_cachep); |
6856 | if (btrfs_free_space_cachep) | ||
6857 | kmem_cache_destroy(btrfs_free_space_cachep); | ||
6366 | } | 6858 | } |
6367 | 6859 | ||
6368 | int btrfs_init_cachep(void) | 6860 | int btrfs_init_cachep(void) |
@@ -6391,6 +6883,12 @@ int btrfs_init_cachep(void) | |||
6391 | if (!btrfs_path_cachep) | 6883 | if (!btrfs_path_cachep) |
6392 | goto fail; | 6884 | goto fail; |
6393 | 6885 | ||
6886 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache", | ||
6887 | sizeof(struct btrfs_free_space), 0, | ||
6888 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
6889 | if (!btrfs_free_space_cachep) | ||
6890 | goto fail; | ||
6891 | |||
6394 | return 0; | 6892 | return 0; |
6395 | fail: | 6893 | fail: |
6396 | btrfs_destroy_cachep(); | 6894 | btrfs_destroy_cachep(); |
@@ -6409,6 +6907,26 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
6409 | return 0; | 6907 | return 0; |
6410 | } | 6908 | } |
6411 | 6909 | ||
6910 | /* | ||
6911 | * If a file is moved, it will inherit the cow and compression flags of the new | ||
6912 | * directory. | ||
6913 | */ | ||
6914 | static void fixup_inode_flags(struct inode *dir, struct inode *inode) | ||
6915 | { | ||
6916 | struct btrfs_inode *b_dir = BTRFS_I(dir); | ||
6917 | struct btrfs_inode *b_inode = BTRFS_I(inode); | ||
6918 | |||
6919 | if (b_dir->flags & BTRFS_INODE_NODATACOW) | ||
6920 | b_inode->flags |= BTRFS_INODE_NODATACOW; | ||
6921 | else | ||
6922 | b_inode->flags &= ~BTRFS_INODE_NODATACOW; | ||
6923 | |||
6924 | if (b_dir->flags & BTRFS_INODE_COMPRESS) | ||
6925 | b_inode->flags |= BTRFS_INODE_COMPRESS; | ||
6926 | else | ||
6927 | b_inode->flags &= ~BTRFS_INODE_COMPRESS; | ||
6928 | } | ||
6929 | |||
6412 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 6930 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
6413 | struct inode *new_dir, struct dentry *new_dentry) | 6931 | struct inode *new_dir, struct dentry *new_dentry) |
6414 | { | 6932 | { |
@@ -6421,16 +6939,17 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6421 | u64 index = 0; | 6939 | u64 index = 0; |
6422 | u64 root_objectid; | 6940 | u64 root_objectid; |
6423 | int ret; | 6941 | int ret; |
6942 | u64 old_ino = btrfs_ino(old_inode); | ||
6424 | 6943 | ||
6425 | if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) | 6944 | if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
6426 | return -EPERM; | 6945 | return -EPERM; |
6427 | 6946 | ||
6428 | /* we only allow rename subvolume link between subvolumes */ | 6947 | /* we only allow rename subvolume link between subvolumes */ |
6429 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) | 6948 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) |
6430 | return -EXDEV; | 6949 | return -EXDEV; |
6431 | 6950 | ||
6432 | if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || | 6951 | if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || |
6433 | (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) | 6952 | (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID)) |
6434 | return -ENOTEMPTY; | 6953 | return -ENOTEMPTY; |
6435 | 6954 | ||
6436 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 6955 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
@@ -6446,7 +6965,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6446 | filemap_flush(old_inode->i_mapping); | 6965 | filemap_flush(old_inode->i_mapping); |
6447 | 6966 | ||
6448 | /* close the racy window with snapshot create/destroy ioctl */ | 6967 | /* close the racy window with snapshot create/destroy ioctl */ |
6449 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6968 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) |
6450 | down_read(&root->fs_info->subvol_sem); | 6969 | down_read(&root->fs_info->subvol_sem); |
6451 | /* | 6970 | /* |
6452 | * We want to reserve the absolute worst case amount of items. So if | 6971 | * We want to reserve the absolute worst case amount of items. So if |
@@ -6457,10 +6976,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6457 | * should cover the worst case number of items we'll modify. | 6976 | * should cover the worst case number of items we'll modify. |
6458 | */ | 6977 | */ |
6459 | trans = btrfs_start_transaction(root, 20); | 6978 | trans = btrfs_start_transaction(root, 20); |
6460 | if (IS_ERR(trans)) | 6979 | if (IS_ERR(trans)) { |
6461 | return PTR_ERR(trans); | 6980 | ret = PTR_ERR(trans); |
6462 | 6981 | goto out_notrans; | |
6463 | btrfs_set_trans_block_group(trans, new_dir); | 6982 | } |
6464 | 6983 | ||
6465 | if (dest != root) | 6984 | if (dest != root) |
6466 | btrfs_record_root_in_trans(trans, dest); | 6985 | btrfs_record_root_in_trans(trans, dest); |
@@ -6469,15 +6988,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6469 | if (ret) | 6988 | if (ret) |
6470 | goto out_fail; | 6989 | goto out_fail; |
6471 | 6990 | ||
6472 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | 6991 | if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
6473 | /* force full log commit if subvolume involved. */ | 6992 | /* force full log commit if subvolume involved. */ |
6474 | root->fs_info->last_trans_log_full_commit = trans->transid; | 6993 | root->fs_info->last_trans_log_full_commit = trans->transid; |
6475 | } else { | 6994 | } else { |
6476 | ret = btrfs_insert_inode_ref(trans, dest, | 6995 | ret = btrfs_insert_inode_ref(trans, dest, |
6477 | new_dentry->d_name.name, | 6996 | new_dentry->d_name.name, |
6478 | new_dentry->d_name.len, | 6997 | new_dentry->d_name.len, |
6479 | old_inode->i_ino, | 6998 | old_ino, |
6480 | new_dir->i_ino, index); | 6999 | btrfs_ino(new_dir), index); |
6481 | if (ret) | 7000 | if (ret) |
6482 | goto out_fail; | 7001 | goto out_fail; |
6483 | /* | 7002 | /* |
@@ -6493,10 +7012,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6493 | * make sure the inode gets flushed if it is replacing | 7012 | * make sure the inode gets flushed if it is replacing |
6494 | * something. | 7013 | * something. |
6495 | */ | 7014 | */ |
6496 | if (new_inode && new_inode->i_size && | 7015 | if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) |
6497 | old_inode && S_ISREG(old_inode->i_mode)) { | ||
6498 | btrfs_add_ordered_operation(trans, root, old_inode); | 7016 | btrfs_add_ordered_operation(trans, root, old_inode); |
6499 | } | ||
6500 | 7017 | ||
6501 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 7018 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
6502 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 7019 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
@@ -6505,23 +7022,24 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6505 | if (old_dentry->d_parent != new_dentry->d_parent) | 7022 | if (old_dentry->d_parent != new_dentry->d_parent) |
6506 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | 7023 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); |
6507 | 7024 | ||
6508 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | 7025 | if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
6509 | root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; | 7026 | root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; |
6510 | ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, | 7027 | ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, |
6511 | old_dentry->d_name.name, | 7028 | old_dentry->d_name.name, |
6512 | old_dentry->d_name.len); | 7029 | old_dentry->d_name.len); |
6513 | } else { | 7030 | } else { |
6514 | btrfs_inc_nlink(old_dentry->d_inode); | 7031 | ret = __btrfs_unlink_inode(trans, root, old_dir, |
6515 | ret = btrfs_unlink_inode(trans, root, old_dir, | 7032 | old_dentry->d_inode, |
6516 | old_dentry->d_inode, | 7033 | old_dentry->d_name.name, |
6517 | old_dentry->d_name.name, | 7034 | old_dentry->d_name.len); |
6518 | old_dentry->d_name.len); | 7035 | if (!ret) |
7036 | ret = btrfs_update_inode(trans, root, old_inode); | ||
6519 | } | 7037 | } |
6520 | BUG_ON(ret); | 7038 | BUG_ON(ret); |
6521 | 7039 | ||
6522 | if (new_inode) { | 7040 | if (new_inode) { |
6523 | new_inode->i_ctime = CURRENT_TIME; | 7041 | new_inode->i_ctime = CURRENT_TIME; |
6524 | if (unlikely(new_inode->i_ino == | 7042 | if (unlikely(btrfs_ino(new_inode) == |
6525 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | 7043 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
6526 | root_objectid = BTRFS_I(new_inode)->location.objectid; | 7044 | root_objectid = BTRFS_I(new_inode)->location.objectid; |
6527 | ret = btrfs_unlink_subvol(trans, dest, new_dir, | 7045 | ret = btrfs_unlink_subvol(trans, dest, new_dir, |
@@ -6542,20 +7060,23 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6542 | } | 7060 | } |
6543 | } | 7061 | } |
6544 | 7062 | ||
7063 | fixup_inode_flags(new_dir, old_inode); | ||
7064 | |||
6545 | ret = btrfs_add_link(trans, new_dir, old_inode, | 7065 | ret = btrfs_add_link(trans, new_dir, old_inode, |
6546 | new_dentry->d_name.name, | 7066 | new_dentry->d_name.name, |
6547 | new_dentry->d_name.len, 0, index); | 7067 | new_dentry->d_name.len, 0, index); |
6548 | BUG_ON(ret); | 7068 | BUG_ON(ret); |
6549 | 7069 | ||
6550 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | 7070 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { |
6551 | btrfs_log_new_name(trans, old_inode, old_dir, | 7071 | struct dentry *parent = dget_parent(new_dentry); |
6552 | new_dentry->d_parent); | 7072 | btrfs_log_new_name(trans, old_inode, old_dir, parent); |
7073 | dput(parent); | ||
6553 | btrfs_end_log_trans(root); | 7074 | btrfs_end_log_trans(root); |
6554 | } | 7075 | } |
6555 | out_fail: | 7076 | out_fail: |
6556 | btrfs_end_transaction_throttle(trans, root); | 7077 | btrfs_end_transaction_throttle(trans, root); |
6557 | 7078 | out_notrans: | |
6558 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 7079 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) |
6559 | up_read(&root->fs_info->subvol_sem); | 7080 | up_read(&root->fs_info->subvol_sem); |
6560 | 7081 | ||
6561 | return ret; | 7082 | return ret; |
@@ -6609,38 +7130,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
6609 | return 0; | 7130 | return 0; |
6610 | } | 7131 | } |
6611 | 7132 | ||
6612 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) | ||
6613 | { | ||
6614 | struct btrfs_inode *binode; | ||
6615 | struct inode *inode = NULL; | ||
6616 | |||
6617 | spin_lock(&root->fs_info->delalloc_lock); | ||
6618 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
6619 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
6620 | struct btrfs_inode, delalloc_inodes); | ||
6621 | inode = igrab(&binode->vfs_inode); | ||
6622 | if (inode) { | ||
6623 | list_move_tail(&binode->delalloc_inodes, | ||
6624 | &root->fs_info->delalloc_inodes); | ||
6625 | break; | ||
6626 | } | ||
6627 | |||
6628 | list_del_init(&binode->delalloc_inodes); | ||
6629 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
6630 | } | ||
6631 | spin_unlock(&root->fs_info->delalloc_lock); | ||
6632 | |||
6633 | if (inode) { | ||
6634 | write_inode_now(inode, 0); | ||
6635 | if (delay_iput) | ||
6636 | btrfs_add_delayed_iput(inode); | ||
6637 | else | ||
6638 | iput(inode); | ||
6639 | return 1; | ||
6640 | } | ||
6641 | return 0; | ||
6642 | } | ||
6643 | |||
6644 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 7133 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
6645 | const char *symname) | 7134 | const char *symname) |
6646 | { | 7135 | { |
@@ -6664,9 +7153,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6664 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 7153 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
6665 | return -ENAMETOOLONG; | 7154 | return -ENAMETOOLONG; |
6666 | 7155 | ||
6667 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
6668 | if (err) | ||
6669 | return err; | ||
6670 | /* | 7156 | /* |
6671 | * 2 items for inode item and ref | 7157 | * 2 items for inode item and ref |
6672 | * 2 items for dir items | 7158 | * 2 items for dir items |
@@ -6676,25 +7162,25 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6676 | if (IS_ERR(trans)) | 7162 | if (IS_ERR(trans)) |
6677 | return PTR_ERR(trans); | 7163 | return PTR_ERR(trans); |
6678 | 7164 | ||
6679 | btrfs_set_trans_block_group(trans, dir); | 7165 | err = btrfs_find_free_ino(root, &objectid); |
7166 | if (err) | ||
7167 | goto out_unlock; | ||
6680 | 7168 | ||
6681 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 7169 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
6682 | dentry->d_name.len, | 7170 | dentry->d_name.len, btrfs_ino(dir), objectid, |
6683 | dentry->d_parent->d_inode->i_ino, objectid, | 7171 | S_IFLNK|S_IRWXUGO, &index); |
6684 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, | 7172 | if (IS_ERR(inode)) { |
6685 | &index); | 7173 | err = PTR_ERR(inode); |
6686 | err = PTR_ERR(inode); | ||
6687 | if (IS_ERR(inode)) | ||
6688 | goto out_unlock; | 7174 | goto out_unlock; |
7175 | } | ||
6689 | 7176 | ||
6690 | err = btrfs_init_inode_security(trans, inode, dir); | 7177 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
6691 | if (err) { | 7178 | if (err) { |
6692 | drop_inode = 1; | 7179 | drop_inode = 1; |
6693 | goto out_unlock; | 7180 | goto out_unlock; |
6694 | } | 7181 | } |
6695 | 7182 | ||
6696 | btrfs_set_trans_block_group(trans, inode); | 7183 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
6697 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | ||
6698 | if (err) | 7184 | if (err) |
6699 | drop_inode = 1; | 7185 | drop_inode = 1; |
6700 | else { | 7186 | else { |
@@ -6704,14 +7190,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6704 | inode->i_op = &btrfs_file_inode_operations; | 7190 | inode->i_op = &btrfs_file_inode_operations; |
6705 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 7191 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
6706 | } | 7192 | } |
6707 | btrfs_update_inode_block_group(trans, inode); | ||
6708 | btrfs_update_inode_block_group(trans, dir); | ||
6709 | if (drop_inode) | 7193 | if (drop_inode) |
6710 | goto out_unlock; | 7194 | goto out_unlock; |
6711 | 7195 | ||
6712 | path = btrfs_alloc_path(); | 7196 | path = btrfs_alloc_path(); |
6713 | BUG_ON(!path); | 7197 | BUG_ON(!path); |
6714 | key.objectid = inode->i_ino; | 7198 | key.objectid = btrfs_ino(inode); |
6715 | key.offset = 0; | 7199 | key.offset = 0; |
6716 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 7200 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); |
6717 | datasize = btrfs_file_extent_calc_inline_size(name_len); | 7201 | datasize = btrfs_file_extent_calc_inline_size(name_len); |
@@ -6719,6 +7203,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6719 | datasize); | 7203 | datasize); |
6720 | if (err) { | 7204 | if (err) { |
6721 | drop_inode = 1; | 7205 | drop_inode = 1; |
7206 | btrfs_free_path(path); | ||
6722 | goto out_unlock; | 7207 | goto out_unlock; |
6723 | } | 7208 | } |
6724 | leaf = path->nodes[0]; | 7209 | leaf = path->nodes[0]; |
@@ -6757,27 +7242,34 @@ out_unlock: | |||
6757 | return err; | 7242 | return err; |
6758 | } | 7243 | } |
6759 | 7244 | ||
6760 | int btrfs_prealloc_file_range(struct inode *inode, int mode, | 7245 | static int __btrfs_prealloc_file_range(struct inode *inode, int mode, |
6761 | u64 start, u64 num_bytes, u64 min_size, | 7246 | u64 start, u64 num_bytes, u64 min_size, |
6762 | loff_t actual_len, u64 *alloc_hint) | 7247 | loff_t actual_len, u64 *alloc_hint, |
7248 | struct btrfs_trans_handle *trans) | ||
6763 | { | 7249 | { |
6764 | struct btrfs_trans_handle *trans; | ||
6765 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7250 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6766 | struct btrfs_key ins; | 7251 | struct btrfs_key ins; |
6767 | u64 cur_offset = start; | 7252 | u64 cur_offset = start; |
7253 | u64 i_size; | ||
6768 | int ret = 0; | 7254 | int ret = 0; |
7255 | bool own_trans = true; | ||
6769 | 7256 | ||
7257 | if (trans) | ||
7258 | own_trans = false; | ||
6770 | while (num_bytes > 0) { | 7259 | while (num_bytes > 0) { |
6771 | trans = btrfs_start_transaction(root, 3); | 7260 | if (own_trans) { |
6772 | if (IS_ERR(trans)) { | 7261 | trans = btrfs_start_transaction(root, 3); |
6773 | ret = PTR_ERR(trans); | 7262 | if (IS_ERR(trans)) { |
6774 | break; | 7263 | ret = PTR_ERR(trans); |
7264 | break; | ||
7265 | } | ||
6775 | } | 7266 | } |
6776 | 7267 | ||
6777 | ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, | 7268 | ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, |
6778 | 0, *alloc_hint, (u64)-1, &ins, 1); | 7269 | 0, *alloc_hint, (u64)-1, &ins, 1); |
6779 | if (ret) { | 7270 | if (ret) { |
6780 | btrfs_end_transaction(trans, root); | 7271 | if (own_trans) |
7272 | btrfs_end_transaction(trans, root); | ||
6781 | break; | 7273 | break; |
6782 | } | 7274 | } |
6783 | 7275 | ||
@@ -6800,121 +7292,38 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
6800 | (actual_len > inode->i_size) && | 7292 | (actual_len > inode->i_size) && |
6801 | (cur_offset > inode->i_size)) { | 7293 | (cur_offset > inode->i_size)) { |
6802 | if (cur_offset > actual_len) | 7294 | if (cur_offset > actual_len) |
6803 | i_size_write(inode, actual_len); | 7295 | i_size = actual_len; |
6804 | else | 7296 | else |
6805 | i_size_write(inode, cur_offset); | 7297 | i_size = cur_offset; |
6806 | i_size_write(inode, cur_offset); | 7298 | i_size_write(inode, i_size); |
6807 | btrfs_ordered_update_i_size(inode, cur_offset, NULL); | 7299 | btrfs_ordered_update_i_size(inode, i_size, NULL); |
6808 | } | 7300 | } |
6809 | 7301 | ||
6810 | ret = btrfs_update_inode(trans, root, inode); | 7302 | ret = btrfs_update_inode(trans, root, inode); |
6811 | BUG_ON(ret); | 7303 | BUG_ON(ret); |
6812 | 7304 | ||
6813 | btrfs_end_transaction(trans, root); | 7305 | if (own_trans) |
7306 | btrfs_end_transaction(trans, root); | ||
6814 | } | 7307 | } |
6815 | return ret; | 7308 | return ret; |
6816 | } | 7309 | } |
6817 | 7310 | ||
6818 | static long btrfs_fallocate(struct inode *inode, int mode, | 7311 | int btrfs_prealloc_file_range(struct inode *inode, int mode, |
6819 | loff_t offset, loff_t len) | 7312 | u64 start, u64 num_bytes, u64 min_size, |
7313 | loff_t actual_len, u64 *alloc_hint) | ||
6820 | { | 7314 | { |
6821 | struct extent_state *cached_state = NULL; | 7315 | return __btrfs_prealloc_file_range(inode, mode, start, num_bytes, |
6822 | u64 cur_offset; | 7316 | min_size, actual_len, alloc_hint, |
6823 | u64 last_byte; | 7317 | NULL); |
6824 | u64 alloc_start; | 7318 | } |
6825 | u64 alloc_end; | ||
6826 | u64 alloc_hint = 0; | ||
6827 | u64 locked_end; | ||
6828 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
6829 | struct extent_map *em; | ||
6830 | int ret; | ||
6831 | |||
6832 | alloc_start = offset & ~mask; | ||
6833 | alloc_end = (offset + len + mask) & ~mask; | ||
6834 | |||
6835 | /* | ||
6836 | * wait for ordered IO before we have any locks. We'll loop again | ||
6837 | * below with the locks held. | ||
6838 | */ | ||
6839 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
6840 | |||
6841 | mutex_lock(&inode->i_mutex); | ||
6842 | if (alloc_start > inode->i_size) { | ||
6843 | ret = btrfs_cont_expand(inode, alloc_start); | ||
6844 | if (ret) | ||
6845 | goto out; | ||
6846 | } | ||
6847 | |||
6848 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
6849 | if (ret) | ||
6850 | goto out; | ||
6851 | |||
6852 | locked_end = alloc_end - 1; | ||
6853 | while (1) { | ||
6854 | struct btrfs_ordered_extent *ordered; | ||
6855 | |||
6856 | /* the extent lock is ordered inside the running | ||
6857 | * transaction | ||
6858 | */ | ||
6859 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
6860 | locked_end, 0, &cached_state, GFP_NOFS); | ||
6861 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
6862 | alloc_end - 1); | ||
6863 | if (ordered && | ||
6864 | ordered->file_offset + ordered->len > alloc_start && | ||
6865 | ordered->file_offset < alloc_end) { | ||
6866 | btrfs_put_ordered_extent(ordered); | ||
6867 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
6868 | alloc_start, locked_end, | ||
6869 | &cached_state, GFP_NOFS); | ||
6870 | /* | ||
6871 | * we can't wait on the range with the transaction | ||
6872 | * running or with the extent lock held | ||
6873 | */ | ||
6874 | btrfs_wait_ordered_range(inode, alloc_start, | ||
6875 | alloc_end - alloc_start); | ||
6876 | } else { | ||
6877 | if (ordered) | ||
6878 | btrfs_put_ordered_extent(ordered); | ||
6879 | break; | ||
6880 | } | ||
6881 | } | ||
6882 | |||
6883 | cur_offset = alloc_start; | ||
6884 | while (1) { | ||
6885 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
6886 | alloc_end - cur_offset, 0); | ||
6887 | BUG_ON(IS_ERR(em) || !em); | ||
6888 | last_byte = min(extent_map_end(em), alloc_end); | ||
6889 | last_byte = (last_byte + mask) & ~mask; | ||
6890 | if (em->block_start == EXTENT_MAP_HOLE || | ||
6891 | (cur_offset >= inode->i_size && | ||
6892 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
6893 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
6894 | last_byte - cur_offset, | ||
6895 | 1 << inode->i_blkbits, | ||
6896 | offset + len, | ||
6897 | &alloc_hint); | ||
6898 | if (ret < 0) { | ||
6899 | free_extent_map(em); | ||
6900 | break; | ||
6901 | } | ||
6902 | } | ||
6903 | free_extent_map(em); | ||
6904 | |||
6905 | cur_offset = last_byte; | ||
6906 | if (cur_offset >= alloc_end) { | ||
6907 | ret = 0; | ||
6908 | break; | ||
6909 | } | ||
6910 | } | ||
6911 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
6912 | &cached_state, GFP_NOFS); | ||
6913 | 7319 | ||
6914 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | 7320 | int btrfs_prealloc_file_range_trans(struct inode *inode, |
6915 | out: | 7321 | struct btrfs_trans_handle *trans, int mode, |
6916 | mutex_unlock(&inode->i_mutex); | 7322 | u64 start, u64 num_bytes, u64 min_size, |
6917 | return ret; | 7323 | loff_t actual_len, u64 *alloc_hint) |
7324 | { | ||
7325 | return __btrfs_prealloc_file_range(inode, mode, start, num_bytes, | ||
7326 | min_size, actual_len, alloc_hint, trans); | ||
6918 | } | 7327 | } |
6919 | 7328 | ||
6920 | static int btrfs_set_page_dirty(struct page *page) | 7329 | static int btrfs_set_page_dirty(struct page *page) |
@@ -6922,11 +7331,15 @@ static int btrfs_set_page_dirty(struct page *page) | |||
6922 | return __set_page_dirty_nobuffers(page); | 7331 | return __set_page_dirty_nobuffers(page); |
6923 | } | 7332 | } |
6924 | 7333 | ||
6925 | static int btrfs_permission(struct inode *inode, int mask) | 7334 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) |
6926 | { | 7335 | { |
7336 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
7337 | |||
7338 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | ||
7339 | return -EROFS; | ||
6927 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7340 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
6928 | return -EACCES; | 7341 | return -EACCES; |
6929 | return generic_permission(inode, mask, btrfs_check_acl); | 7342 | return generic_permission(inode, mask, flags, btrfs_check_acl); |
6930 | } | 7343 | } |
6931 | 7344 | ||
6932 | static const struct inode_operations btrfs_dir_inode_operations = { | 7345 | static const struct inode_operations btrfs_dir_inode_operations = { |
@@ -6995,7 +7408,6 @@ static const struct address_space_operations btrfs_aops = { | |||
6995 | .writepage = btrfs_writepage, | 7408 | .writepage = btrfs_writepage, |
6996 | .writepages = btrfs_writepages, | 7409 | .writepages = btrfs_writepages, |
6997 | .readpages = btrfs_readpages, | 7410 | .readpages = btrfs_readpages, |
6998 | .sync_page = block_sync_page, | ||
6999 | .direct_IO = btrfs_direct_IO, | 7411 | .direct_IO = btrfs_direct_IO, |
7000 | .invalidatepage = btrfs_invalidatepage, | 7412 | .invalidatepage = btrfs_invalidatepage, |
7001 | .releasepage = btrfs_releasepage, | 7413 | .releasepage = btrfs_releasepage, |
@@ -7011,7 +7423,6 @@ static const struct address_space_operations btrfs_symlink_aops = { | |||
7011 | }; | 7423 | }; |
7012 | 7424 | ||
7013 | static const struct inode_operations btrfs_file_inode_operations = { | 7425 | static const struct inode_operations btrfs_file_inode_operations = { |
7014 | .truncate = btrfs_truncate, | ||
7015 | .getattr = btrfs_getattr, | 7426 | .getattr = btrfs_getattr, |
7016 | .setattr = btrfs_setattr, | 7427 | .setattr = btrfs_setattr, |
7017 | .setxattr = btrfs_setxattr, | 7428 | .setxattr = btrfs_setxattr, |
@@ -7019,7 +7430,6 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
7019 | .listxattr = btrfs_listxattr, | 7430 | .listxattr = btrfs_listxattr, |
7020 | .removexattr = btrfs_removexattr, | 7431 | .removexattr = btrfs_removexattr, |
7021 | .permission = btrfs_permission, | 7432 | .permission = btrfs_permission, |
7022 | .fallocate = btrfs_fallocate, | ||
7023 | .fiemap = btrfs_fiemap, | 7433 | .fiemap = btrfs_fiemap, |
7024 | }; | 7434 | }; |
7025 | static const struct inode_operations btrfs_special_inode_operations = { | 7435 | static const struct inode_operations btrfs_special_inode_operations = { |
@@ -7035,6 +7445,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
7035 | .readlink = generic_readlink, | 7445 | .readlink = generic_readlink, |
7036 | .follow_link = page_follow_link_light, | 7446 | .follow_link = page_follow_link_light, |
7037 | .put_link = page_put_link, | 7447 | .put_link = page_put_link, |
7448 | .getattr = btrfs_getattr, | ||
7038 | .permission = btrfs_permission, | 7449 | .permission = btrfs_permission, |
7039 | .setxattr = btrfs_setxattr, | 7450 | .setxattr = btrfs_setxattr, |
7040 | .getxattr = btrfs_getxattr, | 7451 | .getxattr = btrfs_getxattr, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9254b3d58dbe..a3c4751e07db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/xattr.h> | 40 | #include <linux/xattr.h> |
41 | #include <linux/vmalloc.h> | 41 | #include <linux/vmalloc.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/blkdev.h> | ||
43 | #include "compat.h" | 44 | #include "compat.h" |
44 | #include "ctree.h" | 45 | #include "ctree.h" |
45 | #include "disk-io.h" | 46 | #include "disk-io.h" |
@@ -49,6 +50,7 @@ | |||
49 | #include "print-tree.h" | 50 | #include "print-tree.h" |
50 | #include "volumes.h" | 51 | #include "volumes.h" |
51 | #include "locking.h" | 52 | #include "locking.h" |
53 | #include "inode-map.h" | ||
52 | 54 | ||
53 | /* Mask out flags that are inappropriate for the given type of inode. */ | 55 | /* Mask out flags that are inappropriate for the given type of inode. */ |
54 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) | 56 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) |
@@ -80,6 +82,13 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags) | |||
80 | iflags |= FS_NOATIME_FL; | 82 | iflags |= FS_NOATIME_FL; |
81 | if (flags & BTRFS_INODE_DIRSYNC) | 83 | if (flags & BTRFS_INODE_DIRSYNC) |
82 | iflags |= FS_DIRSYNC_FL; | 84 | iflags |= FS_DIRSYNC_FL; |
85 | if (flags & BTRFS_INODE_NODATACOW) | ||
86 | iflags |= FS_NOCOW_FL; | ||
87 | |||
88 | if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) | ||
89 | iflags |= FS_COMPR_FL; | ||
90 | else if (flags & BTRFS_INODE_NOCOMPRESS) | ||
91 | iflags |= FS_NOCOMP_FL; | ||
83 | 92 | ||
84 | return iflags; | 93 | return iflags; |
85 | } | 94 | } |
@@ -138,6 +147,21 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg) | |||
138 | return 0; | 147 | return 0; |
139 | } | 148 | } |
140 | 149 | ||
150 | static int check_flags(unsigned int flags) | ||
151 | { | ||
152 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | ||
153 | FS_NOATIME_FL | FS_NODUMP_FL | \ | ||
154 | FS_SYNC_FL | FS_DIRSYNC_FL | \ | ||
155 | FS_NOCOMP_FL | FS_COMPR_FL | | ||
156 | FS_NOCOW_FL)) | ||
157 | return -EOPNOTSUPP; | ||
158 | |||
159 | if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) | ||
160 | return -EINVAL; | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
141 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | 165 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) |
142 | { | 166 | { |
143 | struct inode *inode = file->f_path.dentry->d_inode; | 167 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -147,15 +171,17 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
147 | unsigned int flags, oldflags; | 171 | unsigned int flags, oldflags; |
148 | int ret; | 172 | int ret; |
149 | 173 | ||
174 | if (btrfs_root_readonly(root)) | ||
175 | return -EROFS; | ||
176 | |||
150 | if (copy_from_user(&flags, arg, sizeof(flags))) | 177 | if (copy_from_user(&flags, arg, sizeof(flags))) |
151 | return -EFAULT; | 178 | return -EFAULT; |
152 | 179 | ||
153 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | 180 | ret = check_flags(flags); |
154 | FS_NOATIME_FL | FS_NODUMP_FL | \ | 181 | if (ret) |
155 | FS_SYNC_FL | FS_DIRSYNC_FL)) | 182 | return ret; |
156 | return -EOPNOTSUPP; | ||
157 | 183 | ||
158 | if (!is_owner_or_cap(inode)) | 184 | if (!inode_owner_or_capable(inode)) |
159 | return -EACCES; | 185 | return -EACCES; |
160 | 186 | ||
161 | mutex_lock(&inode->i_mutex); | 187 | mutex_lock(&inode->i_mutex); |
@@ -197,10 +223,28 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
197 | ip->flags |= BTRFS_INODE_DIRSYNC; | 223 | ip->flags |= BTRFS_INODE_DIRSYNC; |
198 | else | 224 | else |
199 | ip->flags &= ~BTRFS_INODE_DIRSYNC; | 225 | ip->flags &= ~BTRFS_INODE_DIRSYNC; |
226 | if (flags & FS_NOCOW_FL) | ||
227 | ip->flags |= BTRFS_INODE_NODATACOW; | ||
228 | else | ||
229 | ip->flags &= ~BTRFS_INODE_NODATACOW; | ||
200 | 230 | ||
231 | /* | ||
232 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS | ||
233 | * flag may be changed automatically if compression code won't make | ||
234 | * things smaller. | ||
235 | */ | ||
236 | if (flags & FS_NOCOMP_FL) { | ||
237 | ip->flags &= ~BTRFS_INODE_COMPRESS; | ||
238 | ip->flags |= BTRFS_INODE_NOCOMPRESS; | ||
239 | } else if (flags & FS_COMPR_FL) { | ||
240 | ip->flags |= BTRFS_INODE_COMPRESS; | ||
241 | ip->flags &= ~BTRFS_INODE_NOCOMPRESS; | ||
242 | } else { | ||
243 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); | ||
244 | } | ||
201 | 245 | ||
202 | trans = btrfs_join_transaction(root, 1); | 246 | trans = btrfs_join_transaction(root); |
203 | BUG_ON(!trans); | 247 | BUG_ON(IS_ERR(trans)); |
204 | 248 | ||
205 | ret = btrfs_update_inode(trans, root, inode); | 249 | ret = btrfs_update_inode(trans, root, inode); |
206 | BUG_ON(ret); | 250 | BUG_ON(ret); |
@@ -210,9 +254,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
210 | btrfs_end_transaction(trans, root); | 254 | btrfs_end_transaction(trans, root); |
211 | 255 | ||
212 | mnt_drop_write(file->f_path.mnt); | 256 | mnt_drop_write(file->f_path.mnt); |
257 | |||
258 | ret = 0; | ||
213 | out_unlock: | 259 | out_unlock: |
214 | mutex_unlock(&inode->i_mutex); | 260 | mutex_unlock(&inode->i_mutex); |
215 | return 0; | 261 | return ret; |
216 | } | 262 | } |
217 | 263 | ||
218 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | 264 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) |
@@ -222,9 +268,54 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | |||
222 | return put_user(inode->i_generation, arg); | 268 | return put_user(inode->i_generation, arg); |
223 | } | 269 | } |
224 | 270 | ||
271 | static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | ||
272 | { | ||
273 | struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; | ||
274 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
275 | struct btrfs_device *device; | ||
276 | struct request_queue *q; | ||
277 | struct fstrim_range range; | ||
278 | u64 minlen = ULLONG_MAX; | ||
279 | u64 num_devices = 0; | ||
280 | int ret; | ||
281 | |||
282 | if (!capable(CAP_SYS_ADMIN)) | ||
283 | return -EPERM; | ||
284 | |||
285 | rcu_read_lock(); | ||
286 | list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, | ||
287 | dev_list) { | ||
288 | if (!device->bdev) | ||
289 | continue; | ||
290 | q = bdev_get_queue(device->bdev); | ||
291 | if (blk_queue_discard(q)) { | ||
292 | num_devices++; | ||
293 | minlen = min((u64)q->limits.discard_granularity, | ||
294 | minlen); | ||
295 | } | ||
296 | } | ||
297 | rcu_read_unlock(); | ||
298 | if (!num_devices) | ||
299 | return -EOPNOTSUPP; | ||
300 | |||
301 | if (copy_from_user(&range, arg, sizeof(range))) | ||
302 | return -EFAULT; | ||
303 | |||
304 | range.minlen = max(range.minlen, minlen); | ||
305 | ret = btrfs_trim_fs(root, &range); | ||
306 | if (ret < 0) | ||
307 | return ret; | ||
308 | |||
309 | if (copy_to_user(arg, &range, sizeof(range))) | ||
310 | return -EFAULT; | ||
311 | |||
312 | return 0; | ||
313 | } | ||
314 | |||
225 | static noinline int create_subvol(struct btrfs_root *root, | 315 | static noinline int create_subvol(struct btrfs_root *root, |
226 | struct dentry *dentry, | 316 | struct dentry *dentry, |
227 | char *name, int namelen) | 317 | char *name, int namelen, |
318 | u64 *async_transid) | ||
228 | { | 319 | { |
229 | struct btrfs_trans_handle *trans; | 320 | struct btrfs_trans_handle *trans; |
230 | struct btrfs_key key; | 321 | struct btrfs_key key; |
@@ -232,17 +323,22 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
232 | struct btrfs_inode_item *inode_item; | 323 | struct btrfs_inode_item *inode_item; |
233 | struct extent_buffer *leaf; | 324 | struct extent_buffer *leaf; |
234 | struct btrfs_root *new_root; | 325 | struct btrfs_root *new_root; |
235 | struct inode *dir = dentry->d_parent->d_inode; | 326 | struct dentry *parent = dget_parent(dentry); |
327 | struct inode *dir; | ||
236 | int ret; | 328 | int ret; |
237 | int err; | 329 | int err; |
238 | u64 objectid; | 330 | u64 objectid; |
239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | 331 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
240 | u64 index = 0; | 332 | u64 index = 0; |
241 | 333 | ||
242 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | 334 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); |
243 | 0, &objectid); | 335 | if (ret) { |
244 | if (ret) | 336 | dput(parent); |
245 | return ret; | 337 | return ret; |
338 | } | ||
339 | |||
340 | dir = parent->d_inode; | ||
341 | |||
246 | /* | 342 | /* |
247 | * 1 - inode item | 343 | * 1 - inode item |
248 | * 2 - refs | 344 | * 2 - refs |
@@ -250,8 +346,10 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
250 | * 2 - dir items | 346 | * 2 - dir items |
251 | */ | 347 | */ |
252 | trans = btrfs_start_transaction(root, 6); | 348 | trans = btrfs_start_transaction(root, 6); |
253 | if (IS_ERR(trans)) | 349 | if (IS_ERR(trans)) { |
350 | dput(parent); | ||
254 | return PTR_ERR(trans); | 351 | return PTR_ERR(trans); |
352 | } | ||
255 | 353 | ||
256 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 354 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
257 | 0, objectid, NULL, 0, 0, 0); | 355 | 0, objectid, NULL, 0, 0, 0); |
@@ -282,6 +380,10 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
282 | inode_item->nbytes = cpu_to_le64(root->leafsize); | 380 | inode_item->nbytes = cpu_to_le64(root->leafsize); |
283 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); | 381 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); |
284 | 382 | ||
383 | root_item.flags = 0; | ||
384 | root_item.byte_limit = 0; | ||
385 | inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); | ||
386 | |||
285 | btrfs_set_root_bytenr(&root_item, leaf->start); | 387 | btrfs_set_root_bytenr(&root_item, leaf->start); |
286 | btrfs_set_root_generation(&root_item, trans->transid); | 388 | btrfs_set_root_generation(&root_item, trans->transid); |
287 | btrfs_set_root_level(&root_item, 0); | 389 | btrfs_set_root_level(&root_item, 0); |
@@ -312,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
312 | 414 | ||
313 | btrfs_record_root_in_trans(trans, new_root); | 415 | btrfs_record_root_in_trans(trans, new_root); |
314 | 416 | ||
315 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid, | 417 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid); |
316 | BTRFS_I(dir)->block_group); | ||
317 | /* | 418 | /* |
318 | * insert the directory item | 419 | * insert the directory item |
319 | */ | 420 | */ |
@@ -321,7 +422,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
321 | BUG_ON(ret); | 422 | BUG_ON(ret); |
322 | 423 | ||
323 | ret = btrfs_insert_dir_item(trans, root, | 424 | ret = btrfs_insert_dir_item(trans, root, |
324 | name, namelen, dir->i_ino, &key, | 425 | name, namelen, dir, &key, |
325 | BTRFS_FT_DIR, index); | 426 | BTRFS_FT_DIR, index); |
326 | if (ret) | 427 | if (ret) |
327 | goto fail; | 428 | goto fail; |
@@ -332,21 +433,30 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
332 | 433 | ||
333 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | 434 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, |
334 | objectid, root->root_key.objectid, | 435 | objectid, root->root_key.objectid, |
335 | dir->i_ino, index, name, namelen); | 436 | btrfs_ino(dir), index, name, namelen); |
336 | 437 | ||
337 | BUG_ON(ret); | 438 | BUG_ON(ret); |
338 | 439 | ||
339 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); | 440 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
340 | fail: | 441 | fail: |
341 | err = btrfs_commit_transaction(trans, root); | 442 | dput(parent); |
443 | if (async_transid) { | ||
444 | *async_transid = trans->transid; | ||
445 | err = btrfs_commit_transaction_async(trans, root, 1); | ||
446 | } else { | ||
447 | err = btrfs_commit_transaction(trans, root); | ||
448 | } | ||
342 | if (err && !ret) | 449 | if (err && !ret) |
343 | ret = err; | 450 | ret = err; |
344 | return ret; | 451 | return ret; |
345 | } | 452 | } |
346 | 453 | ||
347 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) | 454 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
455 | char *name, int namelen, u64 *async_transid, | ||
456 | bool readonly) | ||
348 | { | 457 | { |
349 | struct inode *inode; | 458 | struct inode *inode; |
459 | struct dentry *parent; | ||
350 | struct btrfs_pending_snapshot *pending_snapshot; | 460 | struct btrfs_pending_snapshot *pending_snapshot; |
351 | struct btrfs_trans_handle *trans; | 461 | struct btrfs_trans_handle *trans; |
352 | int ret; | 462 | int ret; |
@@ -361,6 +471,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) | |||
361 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 471 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
362 | pending_snapshot->dentry = dentry; | 472 | pending_snapshot->dentry = dentry; |
363 | pending_snapshot->root = root; | 473 | pending_snapshot->root = root; |
474 | pending_snapshot->readonly = readonly; | ||
364 | 475 | ||
365 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 476 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); |
366 | if (IS_ERR(trans)) { | 477 | if (IS_ERR(trans)) { |
@@ -371,18 +482,31 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) | |||
371 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); | 482 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); |
372 | BUG_ON(ret); | 483 | BUG_ON(ret); |
373 | 484 | ||
485 | spin_lock(&root->fs_info->trans_lock); | ||
374 | list_add(&pending_snapshot->list, | 486 | list_add(&pending_snapshot->list, |
375 | &trans->transaction->pending_snapshots); | 487 | &trans->transaction->pending_snapshots); |
376 | ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); | 488 | spin_unlock(&root->fs_info->trans_lock); |
489 | if (async_transid) { | ||
490 | *async_transid = trans->transid; | ||
491 | ret = btrfs_commit_transaction_async(trans, | ||
492 | root->fs_info->extent_root, 1); | ||
493 | } else { | ||
494 | ret = btrfs_commit_transaction(trans, | ||
495 | root->fs_info->extent_root); | ||
496 | } | ||
377 | BUG_ON(ret); | 497 | BUG_ON(ret); |
378 | 498 | ||
379 | ret = pending_snapshot->error; | 499 | ret = pending_snapshot->error; |
380 | if (ret) | 500 | if (ret) |
381 | goto fail; | 501 | goto fail; |
382 | 502 | ||
383 | btrfs_orphan_cleanup(pending_snapshot->snap); | 503 | ret = btrfs_orphan_cleanup(pending_snapshot->snap); |
504 | if (ret) | ||
505 | goto fail; | ||
384 | 506 | ||
385 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 507 | parent = dget_parent(dentry); |
508 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | ||
509 | dput(parent); | ||
386 | if (IS_ERR(inode)) { | 510 | if (IS_ERR(inode)) { |
387 | ret = PTR_ERR(inode); | 511 | ret = PTR_ERR(inode); |
388 | goto fail; | 512 | goto fail; |
@@ -395,6 +519,76 @@ fail: | |||
395 | return ret; | 519 | return ret; |
396 | } | 520 | } |
397 | 521 | ||
522 | /* copy of check_sticky in fs/namei.c() | ||
523 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
524 | * minimal. | ||
525 | */ | ||
526 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) | ||
527 | { | ||
528 | uid_t fsuid = current_fsuid(); | ||
529 | |||
530 | if (!(dir->i_mode & S_ISVTX)) | ||
531 | return 0; | ||
532 | if (inode->i_uid == fsuid) | ||
533 | return 0; | ||
534 | if (dir->i_uid == fsuid) | ||
535 | return 0; | ||
536 | return !capable(CAP_FOWNER); | ||
537 | } | ||
538 | |||
539 | /* copy of may_delete in fs/namei.c() | ||
540 | * Check whether we can remove a link victim from directory dir, check | ||
541 | * whether the type of victim is right. | ||
542 | * 1. We can't do it if dir is read-only (done in permission()) | ||
543 | * 2. We should have write and exec permissions on dir | ||
544 | * 3. We can't remove anything from append-only dir | ||
545 | * 4. We can't do anything with immutable dir (done in permission()) | ||
546 | * 5. If the sticky bit on dir is set we should either | ||
547 | * a. be owner of dir, or | ||
548 | * b. be owner of victim, or | ||
549 | * c. have CAP_FOWNER capability | ||
550 | * 6. If the victim is append-only or immutable we can't do antyhing with | ||
551 | * links pointing to it. | ||
552 | * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. | ||
553 | * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. | ||
554 | * 9. We can't remove a root or mountpoint. | ||
555 | * 10. We don't allow removal of NFS sillyrenamed files; it's handled by | ||
556 | * nfs_async_unlink(). | ||
557 | */ | ||
558 | |||
559 | static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) | ||
560 | { | ||
561 | int error; | ||
562 | |||
563 | if (!victim->d_inode) | ||
564 | return -ENOENT; | ||
565 | |||
566 | BUG_ON(victim->d_parent->d_inode != dir); | ||
567 | audit_inode_child(victim, dir); | ||
568 | |||
569 | error = inode_permission(dir, MAY_WRITE | MAY_EXEC); | ||
570 | if (error) | ||
571 | return error; | ||
572 | if (IS_APPEND(dir)) | ||
573 | return -EPERM; | ||
574 | if (btrfs_check_sticky(dir, victim->d_inode)|| | ||
575 | IS_APPEND(victim->d_inode)|| | ||
576 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) | ||
577 | return -EPERM; | ||
578 | if (isdir) { | ||
579 | if (!S_ISDIR(victim->d_inode->i_mode)) | ||
580 | return -ENOTDIR; | ||
581 | if (IS_ROOT(victim)) | ||
582 | return -EBUSY; | ||
583 | } else if (S_ISDIR(victim->d_inode->i_mode)) | ||
584 | return -EISDIR; | ||
585 | if (IS_DEADDIR(dir)) | ||
586 | return -ENOENT; | ||
587 | if (victim->d_flags & DCACHE_NFSFS_RENAMED) | ||
588 | return -EBUSY; | ||
589 | return 0; | ||
590 | } | ||
591 | |||
398 | /* copy of may_create in fs/namei.c() */ | 592 | /* copy of may_create in fs/namei.c() */ |
399 | static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | 593 | static inline int btrfs_may_create(struct inode *dir, struct dentry *child) |
400 | { | 594 | { |
@@ -412,7 +606,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
412 | */ | 606 | */ |
413 | static noinline int btrfs_mksubvol(struct path *parent, | 607 | static noinline int btrfs_mksubvol(struct path *parent, |
414 | char *name, int namelen, | 608 | char *name, int namelen, |
415 | struct btrfs_root *snap_src) | 609 | struct btrfs_root *snap_src, |
610 | u64 *async_transid, bool readonly) | ||
416 | { | 611 | { |
417 | struct inode *dir = parent->dentry->d_inode; | 612 | struct inode *dir = parent->dentry->d_inode; |
418 | struct dentry *dentry; | 613 | struct dentry *dentry; |
@@ -443,10 +638,11 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
443 | goto out_up_read; | 638 | goto out_up_read; |
444 | 639 | ||
445 | if (snap_src) { | 640 | if (snap_src) { |
446 | error = create_snapshot(snap_src, dentry); | 641 | error = create_snapshot(snap_src, dentry, |
642 | name, namelen, async_transid, readonly); | ||
447 | } else { | 643 | } else { |
448 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 644 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
449 | name, namelen); | 645 | name, namelen, async_transid); |
450 | } | 646 | } |
451 | if (!error) | 647 | if (!error) |
452 | fsnotify_mkdir(dir, dentry); | 648 | fsnotify_mkdir(dir, dentry); |
@@ -461,6 +657,107 @@ out_unlock: | |||
461 | return error; | 657 | return error; |
462 | } | 658 | } |
463 | 659 | ||
660 | /* | ||
661 | * When we're defragging a range, we don't want to kick it off again | ||
662 | * if it is really just waiting for delalloc to send it down. | ||
663 | * If we find a nice big extent or delalloc range for the bytes in the | ||
664 | * file you want to defrag, we return 0 to let you know to skip this | ||
665 | * part of the file | ||
666 | */ | ||
667 | static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) | ||
668 | { | ||
669 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
670 | struct extent_map *em = NULL; | ||
671 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
672 | u64 end; | ||
673 | |||
674 | read_lock(&em_tree->lock); | ||
675 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
676 | read_unlock(&em_tree->lock); | ||
677 | |||
678 | if (em) { | ||
679 | end = extent_map_end(em); | ||
680 | free_extent_map(em); | ||
681 | if (end - offset > thresh) | ||
682 | return 0; | ||
683 | } | ||
684 | /* if we already have a nice delalloc here, just stop */ | ||
685 | thresh /= 2; | ||
686 | end = count_range_bits(io_tree, &offset, offset + thresh, | ||
687 | thresh, EXTENT_DELALLOC, 1); | ||
688 | if (end >= thresh) | ||
689 | return 0; | ||
690 | return 1; | ||
691 | } | ||
692 | |||
693 | /* | ||
694 | * helper function to walk through a file and find extents | ||
695 | * newer than a specific transid, and smaller than thresh. | ||
696 | * | ||
697 | * This is used by the defragging code to find new and small | ||
698 | * extents | ||
699 | */ | ||
700 | static int find_new_extents(struct btrfs_root *root, | ||
701 | struct inode *inode, u64 newer_than, | ||
702 | u64 *off, int thresh) | ||
703 | { | ||
704 | struct btrfs_path *path; | ||
705 | struct btrfs_key min_key; | ||
706 | struct btrfs_key max_key; | ||
707 | struct extent_buffer *leaf; | ||
708 | struct btrfs_file_extent_item *extent; | ||
709 | int type; | ||
710 | int ret; | ||
711 | u64 ino = btrfs_ino(inode); | ||
712 | |||
713 | path = btrfs_alloc_path(); | ||
714 | if (!path) | ||
715 | return -ENOMEM; | ||
716 | |||
717 | min_key.objectid = ino; | ||
718 | min_key.type = BTRFS_EXTENT_DATA_KEY; | ||
719 | min_key.offset = *off; | ||
720 | |||
721 | max_key.objectid = ino; | ||
722 | max_key.type = (u8)-1; | ||
723 | max_key.offset = (u64)-1; | ||
724 | |||
725 | path->keep_locks = 1; | ||
726 | |||
727 | while(1) { | ||
728 | ret = btrfs_search_forward(root, &min_key, &max_key, | ||
729 | path, 0, newer_than); | ||
730 | if (ret != 0) | ||
731 | goto none; | ||
732 | if (min_key.objectid != ino) | ||
733 | goto none; | ||
734 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | ||
735 | goto none; | ||
736 | |||
737 | leaf = path->nodes[0]; | ||
738 | extent = btrfs_item_ptr(leaf, path->slots[0], | ||
739 | struct btrfs_file_extent_item); | ||
740 | |||
741 | type = btrfs_file_extent_type(leaf, extent); | ||
742 | if (type == BTRFS_FILE_EXTENT_REG && | ||
743 | btrfs_file_extent_num_bytes(leaf, extent) < thresh && | ||
744 | check_defrag_in_cache(inode, min_key.offset, thresh)) { | ||
745 | *off = min_key.offset; | ||
746 | btrfs_free_path(path); | ||
747 | return 0; | ||
748 | } | ||
749 | |||
750 | if (min_key.offset == (u64)-1) | ||
751 | goto none; | ||
752 | |||
753 | min_key.offset++; | ||
754 | btrfs_release_path(path); | ||
755 | } | ||
756 | none: | ||
757 | btrfs_free_path(path); | ||
758 | return -ENOENT; | ||
759 | } | ||
760 | |||
464 | static int should_defrag_range(struct inode *inode, u64 start, u64 len, | 761 | static int should_defrag_range(struct inode *inode, u64 start, u64 len, |
465 | int thresh, u64 *last_len, u64 *skip, | 762 | int thresh, u64 *last_len, u64 *skip, |
466 | u64 *defrag_end) | 763 | u64 *defrag_end) |
@@ -470,10 +767,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
470 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 767 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
471 | int ret = 1; | 768 | int ret = 1; |
472 | 769 | ||
473 | |||
474 | if (thresh == 0) | ||
475 | thresh = 256 * 1024; | ||
476 | |||
477 | /* | 770 | /* |
478 | * make sure that once we start defragging and extent, we keep on | 771 | * make sure that once we start defragging and extent, we keep on |
479 | * defragging it | 772 | * defragging it |
@@ -532,28 +825,208 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
532 | return ret; | 825 | return ret; |
533 | } | 826 | } |
534 | 827 | ||
535 | static int btrfs_defrag_file(struct file *file, | 828 | /* |
536 | struct btrfs_ioctl_defrag_range_args *range) | 829 | * it doesn't do much good to defrag one or two pages |
830 | * at a time. This pulls in a nice chunk of pages | ||
831 | * to COW and defrag. | ||
832 | * | ||
833 | * It also makes sure the delalloc code has enough | ||
834 | * dirty data to avoid making new small extents as part | ||
835 | * of the defrag | ||
836 | * | ||
837 | * It's a good idea to start RA on this range | ||
838 | * before calling this. | ||
839 | */ | ||
840 | static int cluster_pages_for_defrag(struct inode *inode, | ||
841 | struct page **pages, | ||
842 | unsigned long start_index, | ||
843 | int num_pages) | ||
537 | { | 844 | { |
538 | struct inode *inode = fdentry(file)->d_inode; | 845 | unsigned long file_end; |
539 | struct btrfs_root *root = BTRFS_I(inode)->root; | 846 | u64 isize = i_size_read(inode); |
540 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
541 | struct btrfs_ordered_extent *ordered; | ||
542 | struct page *page; | ||
543 | unsigned long last_index; | ||
544 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | ||
545 | unsigned long total_read = 0; | ||
546 | u64 page_start; | 847 | u64 page_start; |
547 | u64 page_end; | 848 | u64 page_end; |
849 | int ret; | ||
850 | int i; | ||
851 | int i_done; | ||
852 | struct btrfs_ordered_extent *ordered; | ||
853 | struct extent_state *cached_state = NULL; | ||
854 | |||
855 | if (isize == 0) | ||
856 | return 0; | ||
857 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
858 | |||
859 | ret = btrfs_delalloc_reserve_space(inode, | ||
860 | num_pages << PAGE_CACHE_SHIFT); | ||
861 | if (ret) | ||
862 | return ret; | ||
863 | again: | ||
864 | ret = 0; | ||
865 | i_done = 0; | ||
866 | |||
867 | /* step one, lock all the pages */ | ||
868 | for (i = 0; i < num_pages; i++) { | ||
869 | struct page *page; | ||
870 | page = grab_cache_page(inode->i_mapping, | ||
871 | start_index + i); | ||
872 | if (!page) | ||
873 | break; | ||
874 | |||
875 | if (!PageUptodate(page)) { | ||
876 | btrfs_readpage(NULL, page); | ||
877 | lock_page(page); | ||
878 | if (!PageUptodate(page)) { | ||
879 | unlock_page(page); | ||
880 | page_cache_release(page); | ||
881 | ret = -EIO; | ||
882 | break; | ||
883 | } | ||
884 | } | ||
885 | isize = i_size_read(inode); | ||
886 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
887 | if (!isize || page->index > file_end || | ||
888 | page->mapping != inode->i_mapping) { | ||
889 | /* whoops, we blew past eof, skip this page */ | ||
890 | unlock_page(page); | ||
891 | page_cache_release(page); | ||
892 | break; | ||
893 | } | ||
894 | pages[i] = page; | ||
895 | i_done++; | ||
896 | } | ||
897 | if (!i_done || ret) | ||
898 | goto out; | ||
899 | |||
900 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) | ||
901 | goto out; | ||
902 | |||
903 | /* | ||
904 | * so now we have a nice long stream of locked | ||
905 | * and up to date pages, lets wait on them | ||
906 | */ | ||
907 | for (i = 0; i < i_done; i++) | ||
908 | wait_on_page_writeback(pages[i]); | ||
909 | |||
910 | page_start = page_offset(pages[0]); | ||
911 | page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; | ||
912 | |||
913 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | ||
914 | page_start, page_end - 1, 0, &cached_state, | ||
915 | GFP_NOFS); | ||
916 | ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1); | ||
917 | if (ordered && | ||
918 | ordered->file_offset + ordered->len > page_start && | ||
919 | ordered->file_offset < page_end) { | ||
920 | btrfs_put_ordered_extent(ordered); | ||
921 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
922 | page_start, page_end - 1, | ||
923 | &cached_state, GFP_NOFS); | ||
924 | for (i = 0; i < i_done; i++) { | ||
925 | unlock_page(pages[i]); | ||
926 | page_cache_release(pages[i]); | ||
927 | } | ||
928 | btrfs_wait_ordered_range(inode, page_start, | ||
929 | page_end - page_start); | ||
930 | goto again; | ||
931 | } | ||
932 | if (ordered) | ||
933 | btrfs_put_ordered_extent(ordered); | ||
934 | |||
935 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, | ||
936 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | ||
937 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | ||
938 | GFP_NOFS); | ||
939 | |||
940 | if (i_done != num_pages) { | ||
941 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
942 | btrfs_delalloc_release_space(inode, | ||
943 | (num_pages - i_done) << PAGE_CACHE_SHIFT); | ||
944 | } | ||
945 | |||
946 | |||
947 | btrfs_set_extent_delalloc(inode, page_start, page_end - 1, | ||
948 | &cached_state); | ||
949 | |||
950 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
951 | page_start, page_end - 1, &cached_state, | ||
952 | GFP_NOFS); | ||
953 | |||
954 | for (i = 0; i < i_done; i++) { | ||
955 | clear_page_dirty_for_io(pages[i]); | ||
956 | ClearPageChecked(pages[i]); | ||
957 | set_page_extent_mapped(pages[i]); | ||
958 | set_page_dirty(pages[i]); | ||
959 | unlock_page(pages[i]); | ||
960 | page_cache_release(pages[i]); | ||
961 | } | ||
962 | return i_done; | ||
963 | out: | ||
964 | for (i = 0; i < i_done; i++) { | ||
965 | unlock_page(pages[i]); | ||
966 | page_cache_release(pages[i]); | ||
967 | } | ||
968 | btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); | ||
969 | return ret; | ||
970 | |||
971 | } | ||
972 | |||
973 | int btrfs_defrag_file(struct inode *inode, struct file *file, | ||
974 | struct btrfs_ioctl_defrag_range_args *range, | ||
975 | u64 newer_than, unsigned long max_to_defrag) | ||
976 | { | ||
977 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
978 | struct btrfs_super_block *disk_super; | ||
979 | struct file_ra_state *ra = NULL; | ||
980 | unsigned long last_index; | ||
981 | u64 features; | ||
548 | u64 last_len = 0; | 982 | u64 last_len = 0; |
549 | u64 skip = 0; | 983 | u64 skip = 0; |
550 | u64 defrag_end = 0; | 984 | u64 defrag_end = 0; |
985 | u64 newer_off = range->start; | ||
986 | int newer_left = 0; | ||
551 | unsigned long i; | 987 | unsigned long i; |
552 | int ret; | 988 | int ret; |
989 | int defrag_count = 0; | ||
990 | int compress_type = BTRFS_COMPRESS_ZLIB; | ||
991 | int extent_thresh = range->extent_thresh; | ||
992 | int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; | ||
993 | u64 new_align = ~((u64)128 * 1024 - 1); | ||
994 | struct page **pages = NULL; | ||
995 | |||
996 | if (extent_thresh == 0) | ||
997 | extent_thresh = 256 * 1024; | ||
998 | |||
999 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { | ||
1000 | if (range->compress_type > BTRFS_COMPRESS_TYPES) | ||
1001 | return -EINVAL; | ||
1002 | if (range->compress_type) | ||
1003 | compress_type = range->compress_type; | ||
1004 | } | ||
553 | 1005 | ||
554 | if (inode->i_size == 0) | 1006 | if (inode->i_size == 0) |
555 | return 0; | 1007 | return 0; |
556 | 1008 | ||
1009 | /* | ||
1010 | * if we were not given a file, allocate a readahead | ||
1011 | * context | ||
1012 | */ | ||
1013 | if (!file) { | ||
1014 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
1015 | if (!ra) | ||
1016 | return -ENOMEM; | ||
1017 | file_ra_state_init(ra, inode->i_mapping); | ||
1018 | } else { | ||
1019 | ra = &file->f_ra; | ||
1020 | } | ||
1021 | |||
1022 | pages = kmalloc(sizeof(struct page *) * newer_cluster, | ||
1023 | GFP_NOFS); | ||
1024 | if (!pages) { | ||
1025 | ret = -ENOMEM; | ||
1026 | goto out_ra; | ||
1027 | } | ||
1028 | |||
1029 | /* find the last page to defrag */ | ||
557 | if (range->start + range->len > range->start) { | 1030 | if (range->start + range->len > range->start) { |
558 | last_index = min_t(u64, inode->i_size - 1, | 1031 | last_index = min_t(u64, inode->i_size - 1, |
559 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; | 1032 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; |
@@ -561,11 +1034,37 @@ static int btrfs_defrag_file(struct file *file, | |||
561 | last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; | 1034 | last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; |
562 | } | 1035 | } |
563 | 1036 | ||
564 | i = range->start >> PAGE_CACHE_SHIFT; | 1037 | if (newer_than) { |
565 | while (i <= last_index) { | 1038 | ret = find_new_extents(root, inode, newer_than, |
566 | if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, | 1039 | &newer_off, 64 * 1024); |
1040 | if (!ret) { | ||
1041 | range->start = newer_off; | ||
1042 | /* | ||
1043 | * we always align our defrag to help keep | ||
1044 | * the extents in the file evenly spaced | ||
1045 | */ | ||
1046 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; | ||
1047 | newer_left = newer_cluster; | ||
1048 | } else | ||
1049 | goto out_ra; | ||
1050 | } else { | ||
1051 | i = range->start >> PAGE_CACHE_SHIFT; | ||
1052 | } | ||
1053 | if (!max_to_defrag) | ||
1054 | max_to_defrag = last_index - 1; | ||
1055 | |||
1056 | while (i <= last_index && defrag_count < max_to_defrag) { | ||
1057 | /* | ||
1058 | * make sure we stop running if someone unmounts | ||
1059 | * the FS | ||
1060 | */ | ||
1061 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) | ||
1062 | break; | ||
1063 | |||
1064 | if (!newer_than && | ||
1065 | !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, | ||
567 | PAGE_CACHE_SIZE, | 1066 | PAGE_CACHE_SIZE, |
568 | range->extent_thresh, | 1067 | extent_thresh, |
569 | &last_len, &skip, | 1068 | &last_len, &skip, |
570 | &defrag_end)) { | 1069 | &defrag_end)) { |
571 | unsigned long next; | 1070 | unsigned long next; |
@@ -577,92 +1076,39 @@ static int btrfs_defrag_file(struct file *file, | |||
577 | i = max(i + 1, next); | 1076 | i = max(i + 1, next); |
578 | continue; | 1077 | continue; |
579 | } | 1078 | } |
580 | |||
581 | if (total_read % ra_pages == 0) { | ||
582 | btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, | ||
583 | min(last_index, i + ra_pages - 1)); | ||
584 | } | ||
585 | total_read++; | ||
586 | mutex_lock(&inode->i_mutex); | ||
587 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 1079 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
588 | BTRFS_I(inode)->force_compress = 1; | 1080 | BTRFS_I(inode)->force_compress = compress_type; |
589 | 1081 | ||
590 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 1082 | btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster); |
591 | if (ret) | ||
592 | goto err_unlock; | ||
593 | again: | ||
594 | if (inode->i_size == 0 || | ||
595 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { | ||
596 | ret = 0; | ||
597 | goto err_reservations; | ||
598 | } | ||
599 | 1083 | ||
600 | page = grab_cache_page(inode->i_mapping, i); | 1084 | ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster); |
601 | if (!page) { | 1085 | if (ret < 0) |
602 | ret = -ENOMEM; | 1086 | goto out_ra; |
603 | goto err_reservations; | ||
604 | } | ||
605 | |||
606 | if (!PageUptodate(page)) { | ||
607 | btrfs_readpage(NULL, page); | ||
608 | lock_page(page); | ||
609 | if (!PageUptodate(page)) { | ||
610 | unlock_page(page); | ||
611 | page_cache_release(page); | ||
612 | ret = -EIO; | ||
613 | goto err_reservations; | ||
614 | } | ||
615 | } | ||
616 | |||
617 | if (page->mapping != inode->i_mapping) { | ||
618 | unlock_page(page); | ||
619 | page_cache_release(page); | ||
620 | goto again; | ||
621 | } | ||
622 | |||
623 | wait_on_page_writeback(page); | ||
624 | 1087 | ||
625 | if (PageDirty(page)) { | 1088 | defrag_count += ret; |
626 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 1089 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); |
627 | goto loop_unlock; | 1090 | i += ret; |
628 | } | ||
629 | 1091 | ||
630 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 1092 | if (newer_than) { |
631 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 1093 | if (newer_off == (u64)-1) |
632 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 1094 | break; |
633 | 1095 | ||
634 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 1096 | newer_off = max(newer_off + 1, |
635 | if (ordered) { | 1097 | (u64)i << PAGE_CACHE_SHIFT); |
636 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 1098 | |
637 | unlock_page(page); | 1099 | ret = find_new_extents(root, inode, |
638 | page_cache_release(page); | 1100 | newer_than, &newer_off, |
639 | btrfs_start_ordered_extent(inode, ordered, 1); | 1101 | 64 * 1024); |
640 | btrfs_put_ordered_extent(ordered); | 1102 | if (!ret) { |
641 | goto again; | 1103 | range->start = newer_off; |
1104 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; | ||
1105 | newer_left = newer_cluster; | ||
1106 | } else { | ||
1107 | break; | ||
1108 | } | ||
1109 | } else { | ||
1110 | i++; | ||
642 | } | 1111 | } |
643 | set_page_extent_mapped(page); | ||
644 | |||
645 | /* | ||
646 | * this makes sure page_mkwrite is called on the | ||
647 | * page if it is dirtied again later | ||
648 | */ | ||
649 | clear_page_dirty_for_io(page); | ||
650 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, | ||
651 | page_end, EXTENT_DIRTY | EXTENT_DELALLOC | | ||
652 | EXTENT_DO_ACCOUNTING, GFP_NOFS); | ||
653 | |||
654 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
655 | ClearPageChecked(page); | ||
656 | set_page_dirty(page); | ||
657 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
658 | |||
659 | loop_unlock: | ||
660 | unlock_page(page); | ||
661 | page_cache_release(page); | ||
662 | mutex_unlock(&inode->i_mutex); | ||
663 | |||
664 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | ||
665 | i++; | ||
666 | } | 1112 | } |
667 | 1113 | ||
668 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) | 1114 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) |
@@ -683,16 +1129,25 @@ loop_unlock: | |||
683 | atomic_dec(&root->fs_info->async_submit_draining); | 1129 | atomic_dec(&root->fs_info->async_submit_draining); |
684 | 1130 | ||
685 | mutex_lock(&inode->i_mutex); | 1131 | mutex_lock(&inode->i_mutex); |
686 | BTRFS_I(inode)->force_compress = 0; | 1132 | BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; |
687 | mutex_unlock(&inode->i_mutex); | 1133 | mutex_unlock(&inode->i_mutex); |
688 | } | 1134 | } |
689 | 1135 | ||
690 | return 0; | 1136 | disk_super = &root->fs_info->super_copy; |
1137 | features = btrfs_super_incompat_flags(disk_super); | ||
1138 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | ||
1139 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | ||
1140 | btrfs_set_super_incompat_flags(disk_super, features); | ||
1141 | } | ||
691 | 1142 | ||
692 | err_reservations: | 1143 | if (!file) |
693 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 1144 | kfree(ra); |
694 | err_unlock: | 1145 | return defrag_count; |
695 | mutex_unlock(&inode->i_mutex); | 1146 | |
1147 | out_ra: | ||
1148 | if (!file) | ||
1149 | kfree(ra); | ||
1150 | kfree(pages); | ||
696 | return ret; | 1151 | return ret; |
697 | } | 1152 | } |
698 | 1153 | ||
@@ -708,7 +1163,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
708 | char *sizestr; | 1163 | char *sizestr; |
709 | char *devstr = NULL; | 1164 | char *devstr = NULL; |
710 | int ret = 0; | 1165 | int ret = 0; |
711 | int namelen; | ||
712 | int mod = 0; | 1166 | int mod = 0; |
713 | 1167 | ||
714 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 1168 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
@@ -722,7 +1176,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
722 | return PTR_ERR(vol_args); | 1176 | return PTR_ERR(vol_args); |
723 | 1177 | ||
724 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 1178 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
725 | namelen = strlen(vol_args->name); | ||
726 | 1179 | ||
727 | mutex_lock(&root->fs_info->volume_mutex); | 1180 | mutex_lock(&root->fs_info->volume_mutex); |
728 | sizestr = vol_args->name; | 1181 | sizestr = vol_args->name; |
@@ -789,6 +1242,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
789 | 1242 | ||
790 | if (new_size > old_size) { | 1243 | if (new_size > old_size) { |
791 | trans = btrfs_start_transaction(root, 0); | 1244 | trans = btrfs_start_transaction(root, 0); |
1245 | if (IS_ERR(trans)) { | ||
1246 | ret = PTR_ERR(trans); | ||
1247 | goto out_unlock; | ||
1248 | } | ||
792 | ret = btrfs_grow_device(trans, device, new_size); | 1249 | ret = btrfs_grow_device(trans, device, new_size); |
793 | btrfs_commit_transaction(trans, root); | 1250 | btrfs_commit_transaction(trans, root); |
794 | } else { | 1251 | } else { |
@@ -801,11 +1258,14 @@ out_unlock: | |||
801 | return ret; | 1258 | return ret; |
802 | } | 1259 | } |
803 | 1260 | ||
804 | static noinline int btrfs_ioctl_snap_create(struct file *file, | 1261 | static noinline int btrfs_ioctl_snap_create_transid(struct file *file, |
805 | void __user *arg, int subvol) | 1262 | char *name, |
1263 | unsigned long fd, | ||
1264 | int subvol, | ||
1265 | u64 *transid, | ||
1266 | bool readonly) | ||
806 | { | 1267 | { |
807 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 1268 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
808 | struct btrfs_ioctl_vol_args *vol_args; | ||
809 | struct file *src_file; | 1269 | struct file *src_file; |
810 | int namelen; | 1270 | int namelen; |
811 | int ret = 0; | 1271 | int ret = 0; |
@@ -813,23 +1273,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
813 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 1273 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
814 | return -EROFS; | 1274 | return -EROFS; |
815 | 1275 | ||
816 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1276 | namelen = strlen(name); |
817 | if (IS_ERR(vol_args)) | 1277 | if (strchr(name, '/')) { |
818 | return PTR_ERR(vol_args); | ||
819 | |||
820 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
821 | namelen = strlen(vol_args->name); | ||
822 | if (strchr(vol_args->name, '/')) { | ||
823 | ret = -EINVAL; | 1278 | ret = -EINVAL; |
824 | goto out; | 1279 | goto out; |
825 | } | 1280 | } |
826 | 1281 | ||
827 | if (subvol) { | 1282 | if (subvol) { |
828 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, | 1283 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
829 | NULL); | 1284 | NULL, transid, readonly); |
830 | } else { | 1285 | } else { |
831 | struct inode *src_inode; | 1286 | struct inode *src_inode; |
832 | src_file = fget(vol_args->fd); | 1287 | src_file = fget(fd); |
833 | if (!src_file) { | 1288 | if (!src_file) { |
834 | ret = -EINVAL; | 1289 | ret = -EINVAL; |
835 | goto out; | 1290 | goto out; |
@@ -843,15 +1298,155 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
843 | fput(src_file); | 1298 | fput(src_file); |
844 | goto out; | 1299 | goto out; |
845 | } | 1300 | } |
846 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, | 1301 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
847 | BTRFS_I(src_inode)->root); | 1302 | BTRFS_I(src_inode)->root, |
1303 | transid, readonly); | ||
848 | fput(src_file); | 1304 | fput(src_file); |
849 | } | 1305 | } |
850 | out: | 1306 | out: |
1307 | return ret; | ||
1308 | } | ||
1309 | |||
1310 | static noinline int btrfs_ioctl_snap_create(struct file *file, | ||
1311 | void __user *arg, int subvol) | ||
1312 | { | ||
1313 | struct btrfs_ioctl_vol_args *vol_args; | ||
1314 | int ret; | ||
1315 | |||
1316 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
1317 | if (IS_ERR(vol_args)) | ||
1318 | return PTR_ERR(vol_args); | ||
1319 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
1320 | |||
1321 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
1322 | vol_args->fd, subvol, | ||
1323 | NULL, false); | ||
1324 | |||
1325 | kfree(vol_args); | ||
1326 | return ret; | ||
1327 | } | ||
1328 | |||
1329 | static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | ||
1330 | void __user *arg, int subvol) | ||
1331 | { | ||
1332 | struct btrfs_ioctl_vol_args_v2 *vol_args; | ||
1333 | int ret; | ||
1334 | u64 transid = 0; | ||
1335 | u64 *ptr = NULL; | ||
1336 | bool readonly = false; | ||
1337 | |||
1338 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
1339 | if (IS_ERR(vol_args)) | ||
1340 | return PTR_ERR(vol_args); | ||
1341 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
1342 | |||
1343 | if (vol_args->flags & | ||
1344 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { | ||
1345 | ret = -EOPNOTSUPP; | ||
1346 | goto out; | ||
1347 | } | ||
1348 | |||
1349 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
1350 | ptr = &transid; | ||
1351 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | ||
1352 | readonly = true; | ||
1353 | |||
1354 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
1355 | vol_args->fd, subvol, | ||
1356 | ptr, readonly); | ||
1357 | |||
1358 | if (ret == 0 && ptr && | ||
1359 | copy_to_user(arg + | ||
1360 | offsetof(struct btrfs_ioctl_vol_args_v2, | ||
1361 | transid), ptr, sizeof(*ptr))) | ||
1362 | ret = -EFAULT; | ||
1363 | out: | ||
851 | kfree(vol_args); | 1364 | kfree(vol_args); |
852 | return ret; | 1365 | return ret; |
853 | } | 1366 | } |
854 | 1367 | ||
1368 | static noinline int btrfs_ioctl_subvol_getflags(struct file *file, | ||
1369 | void __user *arg) | ||
1370 | { | ||
1371 | struct inode *inode = fdentry(file)->d_inode; | ||
1372 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1373 | int ret = 0; | ||
1374 | u64 flags = 0; | ||
1375 | |||
1376 | if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) | ||
1377 | return -EINVAL; | ||
1378 | |||
1379 | down_read(&root->fs_info->subvol_sem); | ||
1380 | if (btrfs_root_readonly(root)) | ||
1381 | flags |= BTRFS_SUBVOL_RDONLY; | ||
1382 | up_read(&root->fs_info->subvol_sem); | ||
1383 | |||
1384 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
1385 | ret = -EFAULT; | ||
1386 | |||
1387 | return ret; | ||
1388 | } | ||
1389 | |||
1390 | static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | ||
1391 | void __user *arg) | ||
1392 | { | ||
1393 | struct inode *inode = fdentry(file)->d_inode; | ||
1394 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1395 | struct btrfs_trans_handle *trans; | ||
1396 | u64 root_flags; | ||
1397 | u64 flags; | ||
1398 | int ret = 0; | ||
1399 | |||
1400 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
1401 | return -EROFS; | ||
1402 | |||
1403 | if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) | ||
1404 | return -EINVAL; | ||
1405 | |||
1406 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
1407 | return -EFAULT; | ||
1408 | |||
1409 | if (flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
1410 | return -EINVAL; | ||
1411 | |||
1412 | if (flags & ~BTRFS_SUBVOL_RDONLY) | ||
1413 | return -EOPNOTSUPP; | ||
1414 | |||
1415 | if (!inode_owner_or_capable(inode)) | ||
1416 | return -EACCES; | ||
1417 | |||
1418 | down_write(&root->fs_info->subvol_sem); | ||
1419 | |||
1420 | /* nothing to do */ | ||
1421 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) | ||
1422 | goto out; | ||
1423 | |||
1424 | root_flags = btrfs_root_flags(&root->root_item); | ||
1425 | if (flags & BTRFS_SUBVOL_RDONLY) | ||
1426 | btrfs_set_root_flags(&root->root_item, | ||
1427 | root_flags | BTRFS_ROOT_SUBVOL_RDONLY); | ||
1428 | else | ||
1429 | btrfs_set_root_flags(&root->root_item, | ||
1430 | root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); | ||
1431 | |||
1432 | trans = btrfs_start_transaction(root, 1); | ||
1433 | if (IS_ERR(trans)) { | ||
1434 | ret = PTR_ERR(trans); | ||
1435 | goto out_reset; | ||
1436 | } | ||
1437 | |||
1438 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
1439 | &root->root_key, &root->root_item); | ||
1440 | |||
1441 | btrfs_commit_transaction(trans, root); | ||
1442 | out_reset: | ||
1443 | if (ret) | ||
1444 | btrfs_set_root_flags(&root->root_item, root_flags); | ||
1445 | out: | ||
1446 | up_write(&root->fs_info->subvol_sem); | ||
1447 | return ret; | ||
1448 | } | ||
1449 | |||
855 | /* | 1450 | /* |
856 | * helper to check if the subvolume references other subvolumes | 1451 | * helper to check if the subvolume references other subvolumes |
857 | */ | 1452 | */ |
@@ -928,7 +1523,6 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
928 | int nritems; | 1523 | int nritems; |
929 | int i; | 1524 | int i; |
930 | int slot; | 1525 | int slot; |
931 | int found = 0; | ||
932 | int ret = 0; | 1526 | int ret = 0; |
933 | 1527 | ||
934 | leaf = path->nodes[0]; | 1528 | leaf = path->nodes[0]; |
@@ -975,7 +1569,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
975 | item_off, item_len); | 1569 | item_off, item_len); |
976 | *sk_offset += item_len; | 1570 | *sk_offset += item_len; |
977 | } | 1571 | } |
978 | found++; | 1572 | (*num_found)++; |
979 | 1573 | ||
980 | if (*num_found >= sk->nr_items) | 1574 | if (*num_found >= sk->nr_items) |
981 | break; | 1575 | break; |
@@ -994,7 +1588,6 @@ advance_key: | |||
994 | } else | 1588 | } else |
995 | ret = 1; | 1589 | ret = 1; |
996 | overflow: | 1590 | overflow: |
997 | *num_found += found; | ||
998 | return ret; | 1591 | return ret; |
999 | } | 1592 | } |
1000 | 1593 | ||
@@ -1051,7 +1644,7 @@ static noinline int search_ioctl(struct inode *inode, | |||
1051 | } | 1644 | } |
1052 | ret = copy_to_sk(root, path, &key, sk, args->buf, | 1645 | ret = copy_to_sk(root, path, &key, sk, args->buf, |
1053 | &sk_offset, &num_found); | 1646 | &sk_offset, &num_found); |
1054 | btrfs_release_path(root, path); | 1647 | btrfs_release_path(path); |
1055 | if (ret || num_found >= sk->nr_items) | 1648 | if (ret || num_found >= sk->nr_items) |
1056 | break; | 1649 | break; |
1057 | 1650 | ||
@@ -1073,14 +1666,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file, | |||
1073 | if (!capable(CAP_SYS_ADMIN)) | 1666 | if (!capable(CAP_SYS_ADMIN)) |
1074 | return -EPERM; | 1667 | return -EPERM; |
1075 | 1668 | ||
1076 | args = kmalloc(sizeof(*args), GFP_KERNEL); | 1669 | args = memdup_user(argp, sizeof(*args)); |
1077 | if (!args) | 1670 | if (IS_ERR(args)) |
1078 | return -ENOMEM; | 1671 | return PTR_ERR(args); |
1079 | 1672 | ||
1080 | if (copy_from_user(args, argp, sizeof(*args))) { | ||
1081 | kfree(args); | ||
1082 | return -EFAULT; | ||
1083 | } | ||
1084 | inode = fdentry(file)->d_inode; | 1673 | inode = fdentry(file)->d_inode; |
1085 | ret = search_ioctl(inode, args); | 1674 | ret = search_ioctl(inode, args); |
1086 | if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) | 1675 | if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) |
@@ -1162,7 +1751,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, | |||
1162 | if (key.offset == BTRFS_FIRST_FREE_OBJECTID) | 1751 | if (key.offset == BTRFS_FIRST_FREE_OBJECTID) |
1163 | break; | 1752 | break; |
1164 | 1753 | ||
1165 | btrfs_release_path(root, path); | 1754 | btrfs_release_path(path); |
1166 | key.objectid = key.offset; | 1755 | key.objectid = key.offset; |
1167 | key.offset = (u64)-1; | 1756 | key.offset = (u64)-1; |
1168 | dirid = key.objectid; | 1757 | dirid = key.objectid; |
@@ -1188,14 +1777,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, | |||
1188 | if (!capable(CAP_SYS_ADMIN)) | 1777 | if (!capable(CAP_SYS_ADMIN)) |
1189 | return -EPERM; | 1778 | return -EPERM; |
1190 | 1779 | ||
1191 | args = kmalloc(sizeof(*args), GFP_KERNEL); | 1780 | args = memdup_user(argp, sizeof(*args)); |
1192 | if (!args) | 1781 | if (IS_ERR(args)) |
1193 | return -ENOMEM; | 1782 | return PTR_ERR(args); |
1194 | 1783 | ||
1195 | if (copy_from_user(args, argp, sizeof(*args))) { | ||
1196 | kfree(args); | ||
1197 | return -EFAULT; | ||
1198 | } | ||
1199 | inode = fdentry(file)->d_inode; | 1784 | inode = fdentry(file)->d_inode; |
1200 | 1785 | ||
1201 | if (args->treeid == 0) | 1786 | if (args->treeid == 0) |
@@ -1227,9 +1812,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1227 | int ret; | 1812 | int ret; |
1228 | int err = 0; | 1813 | int err = 0; |
1229 | 1814 | ||
1230 | if (!capable(CAP_SYS_ADMIN)) | ||
1231 | return -EPERM; | ||
1232 | |||
1233 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1815 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
1234 | if (IS_ERR(vol_args)) | 1816 | if (IS_ERR(vol_args)) |
1235 | return PTR_ERR(vol_args); | 1817 | return PTR_ERR(vol_args); |
@@ -1259,12 +1841,50 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1259 | } | 1841 | } |
1260 | 1842 | ||
1261 | inode = dentry->d_inode; | 1843 | inode = dentry->d_inode; |
1262 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | 1844 | dest = BTRFS_I(inode)->root; |
1845 | if (!capable(CAP_SYS_ADMIN)){ | ||
1846 | /* | ||
1847 | * Regular user. Only allow this with a special mount | ||
1848 | * option, when the user has write+exec access to the | ||
1849 | * subvol root, and when rmdir(2) would have been | ||
1850 | * allowed. | ||
1851 | * | ||
1852 | * Note that this is _not_ check that the subvol is | ||
1853 | * empty or doesn't contain data that we wouldn't | ||
1854 | * otherwise be able to delete. | ||
1855 | * | ||
1856 | * Users who want to delete empty subvols should try | ||
1857 | * rmdir(2). | ||
1858 | */ | ||
1859 | err = -EPERM; | ||
1860 | if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | ||
1861 | goto out_dput; | ||
1862 | |||
1863 | /* | ||
1864 | * Do not allow deletion if the parent dir is the same | ||
1865 | * as the dir to be deleted. That means the ioctl | ||
1866 | * must be called on the dentry referencing the root | ||
1867 | * of the subvol, not a random directory contained | ||
1868 | * within it. | ||
1869 | */ | ||
1263 | err = -EINVAL; | 1870 | err = -EINVAL; |
1264 | goto out_dput; | 1871 | if (root == dest) |
1872 | goto out_dput; | ||
1873 | |||
1874 | err = inode_permission(inode, MAY_WRITE | MAY_EXEC); | ||
1875 | if (err) | ||
1876 | goto out_dput; | ||
1877 | |||
1878 | /* check if subvolume may be deleted by a non-root user */ | ||
1879 | err = btrfs_may_delete(dir, dentry, 1); | ||
1880 | if (err) | ||
1881 | goto out_dput; | ||
1265 | } | 1882 | } |
1266 | 1883 | ||
1267 | dest = BTRFS_I(inode)->root; | 1884 | if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { |
1885 | err = -EINVAL; | ||
1886 | goto out_dput; | ||
1887 | } | ||
1268 | 1888 | ||
1269 | mutex_lock(&inode->i_mutex); | 1889 | mutex_lock(&inode->i_mutex); |
1270 | err = d_invalidate(dentry); | 1890 | err = d_invalidate(dentry); |
@@ -1304,7 +1924,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1304 | BUG_ON(ret); | 1924 | BUG_ON(ret); |
1305 | } | 1925 | } |
1306 | 1926 | ||
1307 | ret = btrfs_commit_transaction(trans, root); | 1927 | ret = btrfs_end_transaction(trans, root); |
1308 | BUG_ON(ret); | 1928 | BUG_ON(ret); |
1309 | inode->i_flags |= S_DEAD; | 1929 | inode->i_flags |= S_DEAD; |
1310 | out_up_write: | 1930 | out_up_write: |
@@ -1333,6 +1953,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1333 | struct btrfs_ioctl_defrag_range_args *range; | 1953 | struct btrfs_ioctl_defrag_range_args *range; |
1334 | int ret; | 1954 | int ret; |
1335 | 1955 | ||
1956 | if (btrfs_root_readonly(root)) | ||
1957 | return -EROFS; | ||
1958 | |||
1336 | ret = mnt_want_write(file->f_path.mnt); | 1959 | ret = mnt_want_write(file->f_path.mnt); |
1337 | if (ret) | 1960 | if (ret) |
1338 | return ret; | 1961 | return ret; |
@@ -1376,7 +1999,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1376 | /* the rest are all set to zero by kzalloc */ | 1999 | /* the rest are all set to zero by kzalloc */ |
1377 | range->len = (u64)-1; | 2000 | range->len = (u64)-1; |
1378 | } | 2001 | } |
1379 | ret = btrfs_defrag_file(file, range); | 2002 | ret = btrfs_defrag_file(fdentry(file)->d_inode, file, |
2003 | range, 0, 0); | ||
2004 | if (ret > 0) | ||
2005 | ret = 0; | ||
1380 | kfree(range); | 2006 | kfree(range); |
1381 | break; | 2007 | break; |
1382 | default: | 2008 | default: |
@@ -1428,6 +2054,80 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
1428 | return ret; | 2054 | return ret; |
1429 | } | 2055 | } |
1430 | 2056 | ||
2057 | static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) | ||
2058 | { | ||
2059 | struct btrfs_ioctl_fs_info_args *fi_args; | ||
2060 | struct btrfs_device *device; | ||
2061 | struct btrfs_device *next; | ||
2062 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
2063 | int ret = 0; | ||
2064 | |||
2065 | if (!capable(CAP_SYS_ADMIN)) | ||
2066 | return -EPERM; | ||
2067 | |||
2068 | fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); | ||
2069 | if (!fi_args) | ||
2070 | return -ENOMEM; | ||
2071 | |||
2072 | fi_args->num_devices = fs_devices->num_devices; | ||
2073 | memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); | ||
2074 | |||
2075 | mutex_lock(&fs_devices->device_list_mutex); | ||
2076 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | ||
2077 | if (device->devid > fi_args->max_id) | ||
2078 | fi_args->max_id = device->devid; | ||
2079 | } | ||
2080 | mutex_unlock(&fs_devices->device_list_mutex); | ||
2081 | |||
2082 | if (copy_to_user(arg, fi_args, sizeof(*fi_args))) | ||
2083 | ret = -EFAULT; | ||
2084 | |||
2085 | kfree(fi_args); | ||
2086 | return ret; | ||
2087 | } | ||
2088 | |||
2089 | static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | ||
2090 | { | ||
2091 | struct btrfs_ioctl_dev_info_args *di_args; | ||
2092 | struct btrfs_device *dev; | ||
2093 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
2094 | int ret = 0; | ||
2095 | char *s_uuid = NULL; | ||
2096 | char empty_uuid[BTRFS_UUID_SIZE] = {0}; | ||
2097 | |||
2098 | if (!capable(CAP_SYS_ADMIN)) | ||
2099 | return -EPERM; | ||
2100 | |||
2101 | di_args = memdup_user(arg, sizeof(*di_args)); | ||
2102 | if (IS_ERR(di_args)) | ||
2103 | return PTR_ERR(di_args); | ||
2104 | |||
2105 | if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0) | ||
2106 | s_uuid = di_args->uuid; | ||
2107 | |||
2108 | mutex_lock(&fs_devices->device_list_mutex); | ||
2109 | dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL); | ||
2110 | mutex_unlock(&fs_devices->device_list_mutex); | ||
2111 | |||
2112 | if (!dev) { | ||
2113 | ret = -ENODEV; | ||
2114 | goto out; | ||
2115 | } | ||
2116 | |||
2117 | di_args->devid = dev->devid; | ||
2118 | di_args->bytes_used = dev->bytes_used; | ||
2119 | di_args->total_bytes = dev->total_bytes; | ||
2120 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); | ||
2121 | strncpy(di_args->path, dev->name, sizeof(di_args->path)); | ||
2122 | |||
2123 | out: | ||
2124 | if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) | ||
2125 | ret = -EFAULT; | ||
2126 | |||
2127 | kfree(di_args); | ||
2128 | return ret; | ||
2129 | } | ||
2130 | |||
1431 | static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | 2131 | static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, |
1432 | u64 off, u64 olen, u64 destoff) | 2132 | u64 off, u64 olen, u64 destoff) |
1433 | { | 2133 | { |
@@ -1461,6 +2161,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1461 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) | 2161 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
1462 | return -EINVAL; | 2162 | return -EINVAL; |
1463 | 2163 | ||
2164 | if (btrfs_root_readonly(root)) | ||
2165 | return -EROFS; | ||
2166 | |||
1464 | ret = mnt_want_write(file->f_path.mnt); | 2167 | ret = mnt_want_write(file->f_path.mnt); |
1465 | if (ret) | 2168 | if (ret) |
1466 | return ret; | 2169 | return ret; |
@@ -1502,11 +2205,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1502 | path->reada = 2; | 2205 | path->reada = 2; |
1503 | 2206 | ||
1504 | if (inode < src) { | 2207 | if (inode < src) { |
1505 | mutex_lock(&inode->i_mutex); | 2208 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
1506 | mutex_lock(&src->i_mutex); | 2209 | mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); |
1507 | } else { | 2210 | } else { |
1508 | mutex_lock(&src->i_mutex); | 2211 | mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); |
1509 | mutex_lock(&inode->i_mutex); | 2212 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
1510 | } | 2213 | } |
1511 | 2214 | ||
1512 | /* determine range to clone */ | 2215 | /* determine range to clone */ |
@@ -1517,12 +2220,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1517 | olen = len = src->i_size - off; | 2220 | olen = len = src->i_size - off; |
1518 | /* if we extend to eof, continue to block boundary */ | 2221 | /* if we extend to eof, continue to block boundary */ |
1519 | if (off + len == src->i_size) | 2222 | if (off + len == src->i_size) |
1520 | len = ((src->i_size + bs-1) & ~(bs-1)) | 2223 | len = ALIGN(src->i_size, bs) - off; |
1521 | - off; | ||
1522 | 2224 | ||
1523 | /* verify the end result is block aligned */ | 2225 | /* verify the end result is block aligned */ |
1524 | if ((off & (bs-1)) || | 2226 | if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || |
1525 | ((off + len) & (bs-1))) | 2227 | !IS_ALIGNED(destoff, bs)) |
1526 | goto out_unlock; | 2228 | goto out_unlock; |
1527 | 2229 | ||
1528 | /* do any pending delalloc/csum calc on src, one way or | 2230 | /* do any pending delalloc/csum calc on src, one way or |
@@ -1530,17 +2232,19 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1530 | while (1) { | 2232 | while (1) { |
1531 | struct btrfs_ordered_extent *ordered; | 2233 | struct btrfs_ordered_extent *ordered; |
1532 | lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 2234 | lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1533 | ordered = btrfs_lookup_first_ordered_extent(inode, off+len); | 2235 | ordered = btrfs_lookup_first_ordered_extent(src, off+len); |
1534 | if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) | 2236 | if (!ordered && |
2237 | !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, | ||
2238 | EXTENT_DELALLOC, 0, NULL)) | ||
1535 | break; | 2239 | break; |
1536 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 2240 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1537 | if (ordered) | 2241 | if (ordered) |
1538 | btrfs_put_ordered_extent(ordered); | 2242 | btrfs_put_ordered_extent(ordered); |
1539 | btrfs_wait_ordered_range(src, off, off+len); | 2243 | btrfs_wait_ordered_range(src, off, len); |
1540 | } | 2244 | } |
1541 | 2245 | ||
1542 | /* clone data */ | 2246 | /* clone data */ |
1543 | key.objectid = src->i_ino; | 2247 | key.objectid = btrfs_ino(src); |
1544 | key.type = BTRFS_EXTENT_DATA_KEY; | 2248 | key.type = BTRFS_EXTENT_DATA_KEY; |
1545 | key.offset = 0; | 2249 | key.offset = 0; |
1546 | 2250 | ||
@@ -1567,7 +2271,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1567 | 2271 | ||
1568 | btrfs_item_key_to_cpu(leaf, &key, slot); | 2272 | btrfs_item_key_to_cpu(leaf, &key, slot); |
1569 | if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || | 2273 | if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || |
1570 | key.objectid != src->i_ino) | 2274 | key.objectid != btrfs_ino(src)) |
1571 | break; | 2275 | break; |
1572 | 2276 | ||
1573 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { | 2277 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { |
@@ -1603,15 +2307,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1603 | datal = btrfs_file_extent_ram_bytes(leaf, | 2307 | datal = btrfs_file_extent_ram_bytes(leaf, |
1604 | extent); | 2308 | extent); |
1605 | } | 2309 | } |
1606 | btrfs_release_path(root, path); | 2310 | btrfs_release_path(path); |
1607 | 2311 | ||
1608 | if (key.offset + datal < off || | 2312 | if (key.offset + datal <= off || |
1609 | key.offset >= off+len) | 2313 | key.offset >= off+len) |
1610 | goto next; | 2314 | goto next; |
1611 | 2315 | ||
1612 | memcpy(&new_key, &key, sizeof(new_key)); | 2316 | memcpy(&new_key, &key, sizeof(new_key)); |
1613 | new_key.objectid = inode->i_ino; | 2317 | new_key.objectid = btrfs_ino(inode); |
1614 | new_key.offset = key.offset + destoff - off; | 2318 | if (off <= key.offset) |
2319 | new_key.offset = key.offset + destoff - off; | ||
2320 | else | ||
2321 | new_key.offset = destoff; | ||
1615 | 2322 | ||
1616 | trans = btrfs_start_transaction(root, 1); | 2323 | trans = btrfs_start_transaction(root, 1); |
1617 | if (IS_ERR(trans)) { | 2324 | if (IS_ERR(trans)) { |
@@ -1661,7 +2368,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1661 | ret = btrfs_inc_extent_ref(trans, root, | 2368 | ret = btrfs_inc_extent_ref(trans, root, |
1662 | disko, diskl, 0, | 2369 | disko, diskl, 0, |
1663 | root->root_key.objectid, | 2370 | root->root_key.objectid, |
1664 | inode->i_ino, | 2371 | btrfs_ino(inode), |
1665 | new_key.offset - datao); | 2372 | new_key.offset - datao); |
1666 | BUG_ON(ret); | 2373 | BUG_ON(ret); |
1667 | } | 2374 | } |
@@ -1710,7 +2417,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1710 | } | 2417 | } |
1711 | 2418 | ||
1712 | btrfs_mark_buffer_dirty(leaf); | 2419 | btrfs_mark_buffer_dirty(leaf); |
1713 | btrfs_release_path(root, path); | 2420 | btrfs_release_path(path); |
1714 | 2421 | ||
1715 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2422 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
1716 | 2423 | ||
@@ -1720,8 +2427,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1720 | * but shouldn't round up the file size | 2427 | * but shouldn't round up the file size |
1721 | */ | 2428 | */ |
1722 | endoff = new_key.offset + datal; | 2429 | endoff = new_key.offset + datal; |
1723 | if (endoff > off+olen) | 2430 | if (endoff > destoff+olen) |
1724 | endoff = off+olen; | 2431 | endoff = destoff+olen; |
1725 | if (endoff > inode->i_size) | 2432 | if (endoff > inode->i_size) |
1726 | btrfs_i_size_write(inode, endoff); | 2433 | btrfs_i_size_write(inode, endoff); |
1727 | 2434 | ||
@@ -1731,12 +2438,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1731 | btrfs_end_transaction(trans, root); | 2438 | btrfs_end_transaction(trans, root); |
1732 | } | 2439 | } |
1733 | next: | 2440 | next: |
1734 | btrfs_release_path(root, path); | 2441 | btrfs_release_path(path); |
1735 | key.offset++; | 2442 | key.offset++; |
1736 | } | 2443 | } |
1737 | ret = 0; | 2444 | ret = 0; |
1738 | out: | 2445 | out: |
1739 | btrfs_release_path(root, path); | 2446 | btrfs_release_path(path); |
1740 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 2447 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1741 | out_unlock: | 2448 | out_unlock: |
1742 | mutex_unlock(&src->i_mutex); | 2449 | mutex_unlock(&src->i_mutex); |
@@ -1781,26 +2488,26 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1781 | if (file->private_data) | 2488 | if (file->private_data) |
1782 | goto out; | 2489 | goto out; |
1783 | 2490 | ||
2491 | ret = -EROFS; | ||
2492 | if (btrfs_root_readonly(root)) | ||
2493 | goto out; | ||
2494 | |||
1784 | ret = mnt_want_write(file->f_path.mnt); | 2495 | ret = mnt_want_write(file->f_path.mnt); |
1785 | if (ret) | 2496 | if (ret) |
1786 | goto out; | 2497 | goto out; |
1787 | 2498 | ||
1788 | mutex_lock(&root->fs_info->trans_mutex); | 2499 | atomic_inc(&root->fs_info->open_ioctl_trans); |
1789 | root->fs_info->open_ioctl_trans++; | ||
1790 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1791 | 2500 | ||
1792 | ret = -ENOMEM; | 2501 | ret = -ENOMEM; |
1793 | trans = btrfs_start_ioctl_transaction(root, 0); | 2502 | trans = btrfs_start_ioctl_transaction(root); |
1794 | if (!trans) | 2503 | if (IS_ERR(trans)) |
1795 | goto out_drop; | 2504 | goto out_drop; |
1796 | 2505 | ||
1797 | file->private_data = trans; | 2506 | file->private_data = trans; |
1798 | return 0; | 2507 | return 0; |
1799 | 2508 | ||
1800 | out_drop: | 2509 | out_drop: |
1801 | mutex_lock(&root->fs_info->trans_mutex); | 2510 | atomic_dec(&root->fs_info->open_ioctl_trans); |
1802 | root->fs_info->open_ioctl_trans--; | ||
1803 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1804 | mnt_drop_write(file->f_path.mnt); | 2511 | mnt_drop_write(file->f_path.mnt); |
1805 | out: | 2512 | out: |
1806 | return ret; | 2513 | return ret; |
@@ -1847,9 +2554,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
1847 | path->leave_spinning = 1; | 2554 | path->leave_spinning = 1; |
1848 | 2555 | ||
1849 | trans = btrfs_start_transaction(root, 1); | 2556 | trans = btrfs_start_transaction(root, 1); |
1850 | if (!trans) { | 2557 | if (IS_ERR(trans)) { |
1851 | btrfs_free_path(path); | 2558 | btrfs_free_path(path); |
1852 | return -ENOMEM; | 2559 | return PTR_ERR(trans); |
1853 | } | 2560 | } |
1854 | 2561 | ||
1855 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 2562 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); |
@@ -1879,35 +2586,80 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
1879 | return 0; | 2586 | return 0; |
1880 | } | 2587 | } |
1881 | 2588 | ||
2589 | static void get_block_group_info(struct list_head *groups_list, | ||
2590 | struct btrfs_ioctl_space_info *space) | ||
2591 | { | ||
2592 | struct btrfs_block_group_cache *block_group; | ||
2593 | |||
2594 | space->total_bytes = 0; | ||
2595 | space->used_bytes = 0; | ||
2596 | space->flags = 0; | ||
2597 | list_for_each_entry(block_group, groups_list, list) { | ||
2598 | space->flags = block_group->flags; | ||
2599 | space->total_bytes += block_group->key.offset; | ||
2600 | space->used_bytes += | ||
2601 | btrfs_block_group_used(&block_group->item); | ||
2602 | } | ||
2603 | } | ||
2604 | |||
1882 | long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | 2605 | long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) |
1883 | { | 2606 | { |
1884 | struct btrfs_ioctl_space_args space_args; | 2607 | struct btrfs_ioctl_space_args space_args; |
1885 | struct btrfs_ioctl_space_info space; | 2608 | struct btrfs_ioctl_space_info space; |
1886 | struct btrfs_ioctl_space_info *dest; | 2609 | struct btrfs_ioctl_space_info *dest; |
1887 | struct btrfs_ioctl_space_info *dest_orig; | 2610 | struct btrfs_ioctl_space_info *dest_orig; |
1888 | struct btrfs_ioctl_space_info *user_dest; | 2611 | struct btrfs_ioctl_space_info __user *user_dest; |
1889 | struct btrfs_space_info *info; | 2612 | struct btrfs_space_info *info; |
2613 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | ||
2614 | BTRFS_BLOCK_GROUP_SYSTEM, | ||
2615 | BTRFS_BLOCK_GROUP_METADATA, | ||
2616 | BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; | ||
2617 | int num_types = 4; | ||
1890 | int alloc_size; | 2618 | int alloc_size; |
1891 | int ret = 0; | 2619 | int ret = 0; |
1892 | int slot_count = 0; | 2620 | u64 slot_count = 0; |
2621 | int i, c; | ||
1893 | 2622 | ||
1894 | if (copy_from_user(&space_args, | 2623 | if (copy_from_user(&space_args, |
1895 | (struct btrfs_ioctl_space_args __user *)arg, | 2624 | (struct btrfs_ioctl_space_args __user *)arg, |
1896 | sizeof(space_args))) | 2625 | sizeof(space_args))) |
1897 | return -EFAULT; | 2626 | return -EFAULT; |
1898 | 2627 | ||
1899 | /* first we count slots */ | 2628 | for (i = 0; i < num_types; i++) { |
1900 | rcu_read_lock(); | 2629 | struct btrfs_space_info *tmp; |
1901 | list_for_each_entry_rcu(info, &root->fs_info->space_info, list) | 2630 | |
1902 | slot_count++; | 2631 | info = NULL; |
1903 | rcu_read_unlock(); | 2632 | rcu_read_lock(); |
2633 | list_for_each_entry_rcu(tmp, &root->fs_info->space_info, | ||
2634 | list) { | ||
2635 | if (tmp->flags == types[i]) { | ||
2636 | info = tmp; | ||
2637 | break; | ||
2638 | } | ||
2639 | } | ||
2640 | rcu_read_unlock(); | ||
2641 | |||
2642 | if (!info) | ||
2643 | continue; | ||
2644 | |||
2645 | down_read(&info->groups_sem); | ||
2646 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | ||
2647 | if (!list_empty(&info->block_groups[c])) | ||
2648 | slot_count++; | ||
2649 | } | ||
2650 | up_read(&info->groups_sem); | ||
2651 | } | ||
1904 | 2652 | ||
1905 | /* space_slots == 0 means they are asking for a count */ | 2653 | /* space_slots == 0 means they are asking for a count */ |
1906 | if (space_args.space_slots == 0) { | 2654 | if (space_args.space_slots == 0) { |
1907 | space_args.total_spaces = slot_count; | 2655 | space_args.total_spaces = slot_count; |
1908 | goto out; | 2656 | goto out; |
1909 | } | 2657 | } |
2658 | |||
2659 | slot_count = min_t(u64, space_args.space_slots, slot_count); | ||
2660 | |||
1910 | alloc_size = sizeof(*dest) * slot_count; | 2661 | alloc_size = sizeof(*dest) * slot_count; |
2662 | |||
1911 | /* we generally have at most 6 or so space infos, one for each raid | 2663 | /* we generally have at most 6 or so space infos, one for each raid |
1912 | * level. So, a whole page should be more than enough for everyone | 2664 | * level. So, a whole page should be more than enough for everyone |
1913 | */ | 2665 | */ |
@@ -1921,27 +2673,40 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
1921 | dest_orig = dest; | 2673 | dest_orig = dest; |
1922 | 2674 | ||
1923 | /* now we have a buffer to copy into */ | 2675 | /* now we have a buffer to copy into */ |
1924 | rcu_read_lock(); | 2676 | for (i = 0; i < num_types; i++) { |
1925 | list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { | 2677 | struct btrfs_space_info *tmp; |
1926 | /* make sure we don't copy more than we allocated | ||
1927 | * in our buffer | ||
1928 | */ | ||
1929 | if (slot_count == 0) | ||
1930 | break; | ||
1931 | slot_count--; | ||
1932 | 2678 | ||
1933 | /* make sure userland has enough room in their buffer */ | 2679 | if (!slot_count) |
1934 | if (space_args.total_spaces >= space_args.space_slots) | ||
1935 | break; | 2680 | break; |
1936 | 2681 | ||
1937 | space.flags = info->flags; | 2682 | info = NULL; |
1938 | space.total_bytes = info->total_bytes; | 2683 | rcu_read_lock(); |
1939 | space.used_bytes = info->bytes_used; | 2684 | list_for_each_entry_rcu(tmp, &root->fs_info->space_info, |
1940 | memcpy(dest, &space, sizeof(space)); | 2685 | list) { |
1941 | dest++; | 2686 | if (tmp->flags == types[i]) { |
1942 | space_args.total_spaces++; | 2687 | info = tmp; |
2688 | break; | ||
2689 | } | ||
2690 | } | ||
2691 | rcu_read_unlock(); | ||
2692 | |||
2693 | if (!info) | ||
2694 | continue; | ||
2695 | down_read(&info->groups_sem); | ||
2696 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | ||
2697 | if (!list_empty(&info->block_groups[c])) { | ||
2698 | get_block_group_info(&info->block_groups[c], | ||
2699 | &space); | ||
2700 | memcpy(dest, &space, sizeof(space)); | ||
2701 | dest++; | ||
2702 | space_args.total_spaces++; | ||
2703 | slot_count--; | ||
2704 | } | ||
2705 | if (!slot_count) | ||
2706 | break; | ||
2707 | } | ||
2708 | up_read(&info->groups_sem); | ||
1943 | } | 2709 | } |
1944 | rcu_read_unlock(); | ||
1945 | 2710 | ||
1946 | user_dest = (struct btrfs_ioctl_space_info *) | 2711 | user_dest = (struct btrfs_ioctl_space_info *) |
1947 | (arg + sizeof(struct btrfs_ioctl_space_args)); | 2712 | (arg + sizeof(struct btrfs_ioctl_space_args)); |
@@ -1976,14 +2741,101 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
1976 | 2741 | ||
1977 | btrfs_end_transaction(trans, root); | 2742 | btrfs_end_transaction(trans, root); |
1978 | 2743 | ||
1979 | mutex_lock(&root->fs_info->trans_mutex); | 2744 | atomic_dec(&root->fs_info->open_ioctl_trans); |
1980 | root->fs_info->open_ioctl_trans--; | ||
1981 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1982 | 2745 | ||
1983 | mnt_drop_write(file->f_path.mnt); | 2746 | mnt_drop_write(file->f_path.mnt); |
1984 | return 0; | 2747 | return 0; |
1985 | } | 2748 | } |
1986 | 2749 | ||
2750 | static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) | ||
2751 | { | ||
2752 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | ||
2753 | struct btrfs_trans_handle *trans; | ||
2754 | u64 transid; | ||
2755 | int ret; | ||
2756 | |||
2757 | trans = btrfs_start_transaction(root, 0); | ||
2758 | if (IS_ERR(trans)) | ||
2759 | return PTR_ERR(trans); | ||
2760 | transid = trans->transid; | ||
2761 | ret = btrfs_commit_transaction_async(trans, root, 0); | ||
2762 | if (ret) { | ||
2763 | btrfs_end_transaction(trans, root); | ||
2764 | return ret; | ||
2765 | } | ||
2766 | |||
2767 | if (argp) | ||
2768 | if (copy_to_user(argp, &transid, sizeof(transid))) | ||
2769 | return -EFAULT; | ||
2770 | return 0; | ||
2771 | } | ||
2772 | |||
2773 | static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) | ||
2774 | { | ||
2775 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | ||
2776 | u64 transid; | ||
2777 | |||
2778 | if (argp) { | ||
2779 | if (copy_from_user(&transid, argp, sizeof(transid))) | ||
2780 | return -EFAULT; | ||
2781 | } else { | ||
2782 | transid = 0; /* current trans */ | ||
2783 | } | ||
2784 | return btrfs_wait_for_commit(root, transid); | ||
2785 | } | ||
2786 | |||
2787 | static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) | ||
2788 | { | ||
2789 | int ret; | ||
2790 | struct btrfs_ioctl_scrub_args *sa; | ||
2791 | |||
2792 | if (!capable(CAP_SYS_ADMIN)) | ||
2793 | return -EPERM; | ||
2794 | |||
2795 | sa = memdup_user(arg, sizeof(*sa)); | ||
2796 | if (IS_ERR(sa)) | ||
2797 | return PTR_ERR(sa); | ||
2798 | |||
2799 | ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end, | ||
2800 | &sa->progress, sa->flags & BTRFS_SCRUB_READONLY); | ||
2801 | |||
2802 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
2803 | ret = -EFAULT; | ||
2804 | |||
2805 | kfree(sa); | ||
2806 | return ret; | ||
2807 | } | ||
2808 | |||
2809 | static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg) | ||
2810 | { | ||
2811 | if (!capable(CAP_SYS_ADMIN)) | ||
2812 | return -EPERM; | ||
2813 | |||
2814 | return btrfs_scrub_cancel(root); | ||
2815 | } | ||
2816 | |||
2817 | static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, | ||
2818 | void __user *arg) | ||
2819 | { | ||
2820 | struct btrfs_ioctl_scrub_args *sa; | ||
2821 | int ret; | ||
2822 | |||
2823 | if (!capable(CAP_SYS_ADMIN)) | ||
2824 | return -EPERM; | ||
2825 | |||
2826 | sa = memdup_user(arg, sizeof(*sa)); | ||
2827 | if (IS_ERR(sa)) | ||
2828 | return PTR_ERR(sa); | ||
2829 | |||
2830 | ret = btrfs_scrub_progress(root, sa->devid, &sa->progress); | ||
2831 | |||
2832 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
2833 | ret = -EFAULT; | ||
2834 | |||
2835 | kfree(sa); | ||
2836 | return ret; | ||
2837 | } | ||
2838 | |||
1987 | long btrfs_ioctl(struct file *file, unsigned int | 2839 | long btrfs_ioctl(struct file *file, unsigned int |
1988 | cmd, unsigned long arg) | 2840 | cmd, unsigned long arg) |
1989 | { | 2841 | { |
@@ -1997,12 +2849,20 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
1997 | return btrfs_ioctl_setflags(file, argp); | 2849 | return btrfs_ioctl_setflags(file, argp); |
1998 | case FS_IOC_GETVERSION: | 2850 | case FS_IOC_GETVERSION: |
1999 | return btrfs_ioctl_getversion(file, argp); | 2851 | return btrfs_ioctl_getversion(file, argp); |
2852 | case FITRIM: | ||
2853 | return btrfs_ioctl_fitrim(file, argp); | ||
2000 | case BTRFS_IOC_SNAP_CREATE: | 2854 | case BTRFS_IOC_SNAP_CREATE: |
2001 | return btrfs_ioctl_snap_create(file, argp, 0); | 2855 | return btrfs_ioctl_snap_create(file, argp, 0); |
2856 | case BTRFS_IOC_SNAP_CREATE_V2: | ||
2857 | return btrfs_ioctl_snap_create_v2(file, argp, 0); | ||
2002 | case BTRFS_IOC_SUBVOL_CREATE: | 2858 | case BTRFS_IOC_SUBVOL_CREATE: |
2003 | return btrfs_ioctl_snap_create(file, argp, 1); | 2859 | return btrfs_ioctl_snap_create(file, argp, 1); |
2004 | case BTRFS_IOC_SNAP_DESTROY: | 2860 | case BTRFS_IOC_SNAP_DESTROY: |
2005 | return btrfs_ioctl_snap_destroy(file, argp); | 2861 | return btrfs_ioctl_snap_destroy(file, argp); |
2862 | case BTRFS_IOC_SUBVOL_GETFLAGS: | ||
2863 | return btrfs_ioctl_subvol_getflags(file, argp); | ||
2864 | case BTRFS_IOC_SUBVOL_SETFLAGS: | ||
2865 | return btrfs_ioctl_subvol_setflags(file, argp); | ||
2006 | case BTRFS_IOC_DEFAULT_SUBVOL: | 2866 | case BTRFS_IOC_DEFAULT_SUBVOL: |
2007 | return btrfs_ioctl_default_subvol(file, argp); | 2867 | return btrfs_ioctl_default_subvol(file, argp); |
2008 | case BTRFS_IOC_DEFRAG: | 2868 | case BTRFS_IOC_DEFRAG: |
@@ -2015,6 +2875,10 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2015 | return btrfs_ioctl_add_dev(root, argp); | 2875 | return btrfs_ioctl_add_dev(root, argp); |
2016 | case BTRFS_IOC_RM_DEV: | 2876 | case BTRFS_IOC_RM_DEV: |
2017 | return btrfs_ioctl_rm_dev(root, argp); | 2877 | return btrfs_ioctl_rm_dev(root, argp); |
2878 | case BTRFS_IOC_FS_INFO: | ||
2879 | return btrfs_ioctl_fs_info(root, argp); | ||
2880 | case BTRFS_IOC_DEV_INFO: | ||
2881 | return btrfs_ioctl_dev_info(root, argp); | ||
2018 | case BTRFS_IOC_BALANCE: | 2882 | case BTRFS_IOC_BALANCE: |
2019 | return btrfs_balance(root->fs_info->dev_root); | 2883 | return btrfs_balance(root->fs_info->dev_root); |
2020 | case BTRFS_IOC_CLONE: | 2884 | case BTRFS_IOC_CLONE: |
@@ -2034,6 +2898,16 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2034 | case BTRFS_IOC_SYNC: | 2898 | case BTRFS_IOC_SYNC: |
2035 | btrfs_sync_fs(file->f_dentry->d_sb, 1); | 2899 | btrfs_sync_fs(file->f_dentry->d_sb, 1); |
2036 | return 0; | 2900 | return 0; |
2901 | case BTRFS_IOC_START_SYNC: | ||
2902 | return btrfs_ioctl_start_sync(file, argp); | ||
2903 | case BTRFS_IOC_WAIT_SYNC: | ||
2904 | return btrfs_ioctl_wait_sync(file, argp); | ||
2905 | case BTRFS_IOC_SCRUB: | ||
2906 | return btrfs_ioctl_scrub(root, argp); | ||
2907 | case BTRFS_IOC_SCRUB_CANCEL: | ||
2908 | return btrfs_ioctl_scrub_cancel(root, argp); | ||
2909 | case BTRFS_IOC_SCRUB_PROGRESS: | ||
2910 | return btrfs_ioctl_scrub_progress(root, argp); | ||
2037 | } | 2911 | } |
2038 | 2912 | ||
2039 | return -ENOTTY; | 2913 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 424694aa517f..ad1ea789fcb4 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -22,14 +22,93 @@ | |||
22 | 22 | ||
23 | #define BTRFS_IOCTL_MAGIC 0x94 | 23 | #define BTRFS_IOCTL_MAGIC 0x94 |
24 | #define BTRFS_VOL_NAME_MAX 255 | 24 | #define BTRFS_VOL_NAME_MAX 255 |
25 | #define BTRFS_PATH_NAME_MAX 4087 | ||
26 | 25 | ||
27 | /* this should be 4k */ | 26 | /* this should be 4k */ |
27 | #define BTRFS_PATH_NAME_MAX 4087 | ||
28 | struct btrfs_ioctl_vol_args { | 28 | struct btrfs_ioctl_vol_args { |
29 | __s64 fd; | 29 | __s64 fd; |
30 | char name[BTRFS_PATH_NAME_MAX + 1]; | 30 | char name[BTRFS_PATH_NAME_MAX + 1]; |
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | ||
34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | ||
35 | #define BTRFS_FSID_SIZE 16 | ||
36 | #define BTRFS_UUID_SIZE 16 | ||
37 | |||
38 | #define BTRFS_SUBVOL_NAME_MAX 4039 | ||
39 | struct btrfs_ioctl_vol_args_v2 { | ||
40 | __s64 fd; | ||
41 | __u64 transid; | ||
42 | __u64 flags; | ||
43 | __u64 unused[4]; | ||
44 | char name[BTRFS_SUBVOL_NAME_MAX + 1]; | ||
45 | }; | ||
46 | |||
47 | /* | ||
48 | * structure to report errors and progress to userspace, either as a | ||
49 | * result of a finished scrub, a canceled scrub or a progress inquiry | ||
50 | */ | ||
51 | struct btrfs_scrub_progress { | ||
52 | __u64 data_extents_scrubbed; /* # of data extents scrubbed */ | ||
53 | __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ | ||
54 | __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ | ||
55 | __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ | ||
56 | __u64 read_errors; /* # of read errors encountered (EIO) */ | ||
57 | __u64 csum_errors; /* # of failed csum checks */ | ||
58 | __u64 verify_errors; /* # of occurences, where the metadata | ||
59 | * of a tree block did not match the | ||
60 | * expected values, like generation or | ||
61 | * logical */ | ||
62 | __u64 no_csum; /* # of 4k data block for which no csum | ||
63 | * is present, probably the result of | ||
64 | * data written with nodatasum */ | ||
65 | __u64 csum_discards; /* # of csum for which no data was found | ||
66 | * in the extent tree. */ | ||
67 | __u64 super_errors; /* # of bad super blocks encountered */ | ||
68 | __u64 malloc_errors; /* # of internal kmalloc errors. These | ||
69 | * will likely cause an incomplete | ||
70 | * scrub */ | ||
71 | __u64 uncorrectable_errors; /* # of errors where either no intact | ||
72 | * copy was found or the writeback | ||
73 | * failed */ | ||
74 | __u64 corrected_errors; /* # of errors corrected */ | ||
75 | __u64 last_physical; /* last physical address scrubbed. In | ||
76 | * case a scrub was aborted, this can | ||
77 | * be used to restart the scrub */ | ||
78 | __u64 unverified_errors; /* # of occurences where a read for a | ||
79 | * full (64k) bio failed, but the re- | ||
80 | * check succeeded for each 4k piece. | ||
81 | * Intermittent error. */ | ||
82 | }; | ||
83 | |||
84 | #define BTRFS_SCRUB_READONLY 1 | ||
85 | struct btrfs_ioctl_scrub_args { | ||
86 | __u64 devid; /* in */ | ||
87 | __u64 start; /* in */ | ||
88 | __u64 end; /* in */ | ||
89 | __u64 flags; /* in */ | ||
90 | struct btrfs_scrub_progress progress; /* out */ | ||
91 | /* pad to 1k */ | ||
92 | __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; | ||
93 | }; | ||
94 | |||
95 | #define BTRFS_DEVICE_PATH_NAME_MAX 1024 | ||
96 | struct btrfs_ioctl_dev_info_args { | ||
97 | __u64 devid; /* in/out */ | ||
98 | __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ | ||
99 | __u64 bytes_used; /* out */ | ||
100 | __u64 total_bytes; /* out */ | ||
101 | __u64 unused[379]; /* pad to 4k */ | ||
102 | __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ | ||
103 | }; | ||
104 | |||
105 | struct btrfs_ioctl_fs_info_args { | ||
106 | __u64 max_id; /* out */ | ||
107 | __u64 num_devices; /* out */ | ||
108 | __u8 fsid[BTRFS_FSID_SIZE]; /* out */ | ||
109 | __u64 reserved[124]; /* pad to 1k */ | ||
110 | }; | ||
111 | |||
33 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 | 112 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 |
34 | struct btrfs_ioctl_ino_lookup_args { | 113 | struct btrfs_ioctl_ino_lookup_args { |
35 | __u64 treeid; | 114 | __u64 treeid; |
@@ -102,30 +181,6 @@ struct btrfs_ioctl_clone_range_args { | |||
102 | #define BTRFS_DEFRAG_RANGE_COMPRESS 1 | 181 | #define BTRFS_DEFRAG_RANGE_COMPRESS 1 |
103 | #define BTRFS_DEFRAG_RANGE_START_IO 2 | 182 | #define BTRFS_DEFRAG_RANGE_START_IO 2 |
104 | 183 | ||
105 | struct btrfs_ioctl_defrag_range_args { | ||
106 | /* start of the defrag operation */ | ||
107 | __u64 start; | ||
108 | |||
109 | /* number of bytes to defrag, use (u64)-1 to say all */ | ||
110 | __u64 len; | ||
111 | |||
112 | /* | ||
113 | * flags for the operation, which can include turning | ||
114 | * on compression for this one defrag | ||
115 | */ | ||
116 | __u64 flags; | ||
117 | |||
118 | /* | ||
119 | * any extent bigger than this will be considered | ||
120 | * already defragged. Use 0 to take the kernel default | ||
121 | * Use 1 to say every single extent must be rewritten | ||
122 | */ | ||
123 | __u32 extent_thresh; | ||
124 | |||
125 | /* spare for later */ | ||
126 | __u32 unused[5]; | ||
127 | }; | ||
128 | |||
129 | struct btrfs_ioctl_space_info { | 184 | struct btrfs_ioctl_space_info { |
130 | __u64 flags; | 185 | __u64 flags; |
131 | __u64 total_bytes; | 186 | __u64 total_bytes; |
@@ -178,4 +233,19 @@ struct btrfs_ioctl_space_args { | |||
178 | #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) | 233 | #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) |
179 | #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ | 234 | #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ |
180 | struct btrfs_ioctl_space_args) | 235 | struct btrfs_ioctl_space_args) |
236 | #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) | ||
237 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | ||
238 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ | ||
239 | struct btrfs_ioctl_vol_args_v2) | ||
240 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) | ||
241 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | ||
242 | #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ | ||
243 | struct btrfs_ioctl_scrub_args) | ||
244 | #define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) | ||
245 | #define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ | ||
246 | struct btrfs_ioctl_scrub_args) | ||
247 | #define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ | ||
248 | struct btrfs_ioctl_dev_info_args) | ||
249 | #define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ | ||
250 | struct btrfs_ioctl_fs_info_args) | ||
181 | #endif | 251 | #endif |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 6151f2ea38bb..66fa43dc3f0f 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -185,31 +185,6 @@ sleep: | |||
185 | return 0; | 185 | return 0; |
186 | } | 186 | } |
187 | 187 | ||
188 | /* | ||
189 | * Very quick trylock, this does not spin or schedule. It returns | ||
190 | * 1 with the spinlock held if it was able to take the lock, or it | ||
191 | * returns zero if it was unable to take the lock. | ||
192 | * | ||
193 | * After this call, scheduling is not safe without first calling | ||
194 | * btrfs_set_lock_blocking() | ||
195 | */ | ||
196 | int btrfs_try_tree_lock(struct extent_buffer *eb) | ||
197 | { | ||
198 | if (spin_trylock(&eb->lock)) { | ||
199 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | ||
200 | /* | ||
201 | * we've got the spinlock, but the real owner is | ||
202 | * blocking. Drop the spinlock and return failure | ||
203 | */ | ||
204 | spin_unlock(&eb->lock); | ||
205 | return 0; | ||
206 | } | ||
207 | return 1; | ||
208 | } | ||
209 | /* someone else has the spinlock giveup */ | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | int btrfs_tree_unlock(struct extent_buffer *eb) | 188 | int btrfs_tree_unlock(struct extent_buffer *eb) |
214 | { | 189 | { |
215 | /* | 190 | /* |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 6c4ce457168c..5c33a560a2f1 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
@@ -21,8 +21,6 @@ | |||
21 | 21 | ||
22 | int btrfs_tree_lock(struct extent_buffer *eb); | 22 | int btrfs_tree_lock(struct extent_buffer *eb); |
23 | int btrfs_tree_unlock(struct extent_buffer *eb); | 23 | int btrfs_tree_unlock(struct extent_buffer *eb); |
24 | |||
25 | int btrfs_try_tree_lock(struct extent_buffer *eb); | ||
26 | int btrfs_try_spin_lock(struct extent_buffer *eb); | 24 | int btrfs_try_spin_lock(struct extent_buffer *eb); |
27 | 25 | ||
28 | void btrfs_set_lock_blocking(struct extent_buffer *eb); | 26 | void btrfs_set_lock_blocking(struct extent_buffer *eb); |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..a178f5ebea78 --- /dev/null +++ b/fs/btrfs/lzo.c | |||
@@ -0,0 +1,427 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/pagemap.h> | ||
26 | #include <linux/bio.h> | ||
27 | #include <linux/lzo.h> | ||
28 | #include "compression.h" | ||
29 | |||
30 | #define LZO_LEN 4 | ||
31 | |||
32 | struct workspace { | ||
33 | void *mem; | ||
34 | void *buf; /* where compressed data goes */ | ||
35 | void *cbuf; /* where decompressed data goes */ | ||
36 | struct list_head list; | ||
37 | }; | ||
38 | |||
39 | static void lzo_free_workspace(struct list_head *ws) | ||
40 | { | ||
41 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
42 | |||
43 | vfree(workspace->buf); | ||
44 | vfree(workspace->cbuf); | ||
45 | vfree(workspace->mem); | ||
46 | kfree(workspace); | ||
47 | } | ||
48 | |||
49 | static struct list_head *lzo_alloc_workspace(void) | ||
50 | { | ||
51 | struct workspace *workspace; | ||
52 | |||
53 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
54 | if (!workspace) | ||
55 | return ERR_PTR(-ENOMEM); | ||
56 | |||
57 | workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); | ||
58 | workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
59 | workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
60 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) | ||
61 | goto fail; | ||
62 | |||
63 | INIT_LIST_HEAD(&workspace->list); | ||
64 | |||
65 | return &workspace->list; | ||
66 | fail: | ||
67 | lzo_free_workspace(&workspace->list); | ||
68 | return ERR_PTR(-ENOMEM); | ||
69 | } | ||
70 | |||
71 | static inline void write_compress_length(char *buf, size_t len) | ||
72 | { | ||
73 | __le32 dlen; | ||
74 | |||
75 | dlen = cpu_to_le32(len); | ||
76 | memcpy(buf, &dlen, LZO_LEN); | ||
77 | } | ||
78 | |||
79 | static inline size_t read_compress_length(char *buf) | ||
80 | { | ||
81 | __le32 dlen; | ||
82 | |||
83 | memcpy(&dlen, buf, LZO_LEN); | ||
84 | return le32_to_cpu(dlen); | ||
85 | } | ||
86 | |||
87 | static int lzo_compress_pages(struct list_head *ws, | ||
88 | struct address_space *mapping, | ||
89 | u64 start, unsigned long len, | ||
90 | struct page **pages, | ||
91 | unsigned long nr_dest_pages, | ||
92 | unsigned long *out_pages, | ||
93 | unsigned long *total_in, | ||
94 | unsigned long *total_out, | ||
95 | unsigned long max_out) | ||
96 | { | ||
97 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
98 | int ret = 0; | ||
99 | char *data_in; | ||
100 | char *cpage_out; | ||
101 | int nr_pages = 0; | ||
102 | struct page *in_page = NULL; | ||
103 | struct page *out_page = NULL; | ||
104 | unsigned long bytes_left; | ||
105 | |||
106 | size_t in_len; | ||
107 | size_t out_len; | ||
108 | char *buf; | ||
109 | unsigned long tot_in = 0; | ||
110 | unsigned long tot_out = 0; | ||
111 | unsigned long pg_bytes_left; | ||
112 | unsigned long out_offset; | ||
113 | unsigned long bytes; | ||
114 | |||
115 | *out_pages = 0; | ||
116 | *total_out = 0; | ||
117 | *total_in = 0; | ||
118 | |||
119 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
120 | data_in = kmap(in_page); | ||
121 | |||
122 | /* | ||
123 | * store the size of all chunks of compressed data in | ||
124 | * the first 4 bytes | ||
125 | */ | ||
126 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
127 | if (out_page == NULL) { | ||
128 | ret = -ENOMEM; | ||
129 | goto out; | ||
130 | } | ||
131 | cpage_out = kmap(out_page); | ||
132 | out_offset = LZO_LEN; | ||
133 | tot_out = LZO_LEN; | ||
134 | pages[0] = out_page; | ||
135 | nr_pages = 1; | ||
136 | pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
137 | |||
138 | /* compress at most one page of data each time */ | ||
139 | in_len = min(len, PAGE_CACHE_SIZE); | ||
140 | while (tot_in < len) { | ||
141 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, | ||
142 | &out_len, workspace->mem); | ||
143 | if (ret != LZO_E_OK) { | ||
144 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
145 | ret); | ||
146 | ret = -1; | ||
147 | goto out; | ||
148 | } | ||
149 | |||
150 | /* store the size of this chunk of compressed data */ | ||
151 | write_compress_length(cpage_out + out_offset, out_len); | ||
152 | tot_out += LZO_LEN; | ||
153 | out_offset += LZO_LEN; | ||
154 | pg_bytes_left -= LZO_LEN; | ||
155 | |||
156 | tot_in += in_len; | ||
157 | tot_out += out_len; | ||
158 | |||
159 | /* copy bytes from the working buffer into the pages */ | ||
160 | buf = workspace->cbuf; | ||
161 | while (out_len) { | ||
162 | bytes = min_t(unsigned long, pg_bytes_left, out_len); | ||
163 | |||
164 | memcpy(cpage_out + out_offset, buf, bytes); | ||
165 | |||
166 | out_len -= bytes; | ||
167 | pg_bytes_left -= bytes; | ||
168 | buf += bytes; | ||
169 | out_offset += bytes; | ||
170 | |||
171 | /* | ||
172 | * we need another page for writing out. | ||
173 | * | ||
174 | * Note if there's less than 4 bytes left, we just | ||
175 | * skip to a new page. | ||
176 | */ | ||
177 | if ((out_len == 0 && pg_bytes_left < LZO_LEN) || | ||
178 | pg_bytes_left == 0) { | ||
179 | if (pg_bytes_left) { | ||
180 | memset(cpage_out + out_offset, 0, | ||
181 | pg_bytes_left); | ||
182 | tot_out += pg_bytes_left; | ||
183 | } | ||
184 | |||
185 | /* we're done, don't allocate new page */ | ||
186 | if (out_len == 0 && tot_in >= len) | ||
187 | break; | ||
188 | |||
189 | kunmap(out_page); | ||
190 | if (nr_pages == nr_dest_pages) { | ||
191 | out_page = NULL; | ||
192 | ret = -1; | ||
193 | goto out; | ||
194 | } | ||
195 | |||
196 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
197 | if (out_page == NULL) { | ||
198 | ret = -ENOMEM; | ||
199 | goto out; | ||
200 | } | ||
201 | cpage_out = kmap(out_page); | ||
202 | pages[nr_pages++] = out_page; | ||
203 | |||
204 | pg_bytes_left = PAGE_CACHE_SIZE; | ||
205 | out_offset = 0; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* we're making it bigger, give up */ | ||
210 | if (tot_in > 8192 && tot_in < tot_out) | ||
211 | goto out; | ||
212 | |||
213 | /* we're all done */ | ||
214 | if (tot_in >= len) | ||
215 | break; | ||
216 | |||
217 | if (tot_out > max_out) | ||
218 | break; | ||
219 | |||
220 | bytes_left = len - tot_in; | ||
221 | kunmap(in_page); | ||
222 | page_cache_release(in_page); | ||
223 | |||
224 | start += PAGE_CACHE_SIZE; | ||
225 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
226 | data_in = kmap(in_page); | ||
227 | in_len = min(bytes_left, PAGE_CACHE_SIZE); | ||
228 | } | ||
229 | |||
230 | if (tot_out > tot_in) | ||
231 | goto out; | ||
232 | |||
233 | /* store the size of all chunks of compressed data */ | ||
234 | cpage_out = kmap(pages[0]); | ||
235 | write_compress_length(cpage_out, tot_out); | ||
236 | |||
237 | kunmap(pages[0]); | ||
238 | |||
239 | ret = 0; | ||
240 | *total_out = tot_out; | ||
241 | *total_in = tot_in; | ||
242 | out: | ||
243 | *out_pages = nr_pages; | ||
244 | if (out_page) | ||
245 | kunmap(out_page); | ||
246 | |||
247 | if (in_page) { | ||
248 | kunmap(in_page); | ||
249 | page_cache_release(in_page); | ||
250 | } | ||
251 | |||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | static int lzo_decompress_biovec(struct list_head *ws, | ||
256 | struct page **pages_in, | ||
257 | u64 disk_start, | ||
258 | struct bio_vec *bvec, | ||
259 | int vcnt, | ||
260 | size_t srclen) | ||
261 | { | ||
262 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
263 | int ret = 0, ret2; | ||
264 | char *data_in; | ||
265 | unsigned long page_in_index = 0; | ||
266 | unsigned long page_out_index = 0; | ||
267 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
268 | PAGE_CACHE_SIZE; | ||
269 | unsigned long buf_start; | ||
270 | unsigned long buf_offset = 0; | ||
271 | unsigned long bytes; | ||
272 | unsigned long working_bytes; | ||
273 | unsigned long pg_offset; | ||
274 | |||
275 | size_t in_len; | ||
276 | size_t out_len; | ||
277 | unsigned long in_offset; | ||
278 | unsigned long in_page_bytes_left; | ||
279 | unsigned long tot_in; | ||
280 | unsigned long tot_out; | ||
281 | unsigned long tot_len; | ||
282 | char *buf; | ||
283 | bool may_late_unmap, need_unmap; | ||
284 | |||
285 | data_in = kmap(pages_in[0]); | ||
286 | tot_len = read_compress_length(data_in); | ||
287 | |||
288 | tot_in = LZO_LEN; | ||
289 | in_offset = LZO_LEN; | ||
290 | tot_len = min_t(size_t, srclen, tot_len); | ||
291 | in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
292 | |||
293 | tot_out = 0; | ||
294 | pg_offset = 0; | ||
295 | |||
296 | while (tot_in < tot_len) { | ||
297 | in_len = read_compress_length(data_in + in_offset); | ||
298 | in_page_bytes_left -= LZO_LEN; | ||
299 | in_offset += LZO_LEN; | ||
300 | tot_in += LZO_LEN; | ||
301 | |||
302 | tot_in += in_len; | ||
303 | working_bytes = in_len; | ||
304 | may_late_unmap = need_unmap = false; | ||
305 | |||
306 | /* fast path: avoid using the working buffer */ | ||
307 | if (in_page_bytes_left >= in_len) { | ||
308 | buf = data_in + in_offset; | ||
309 | bytes = in_len; | ||
310 | may_late_unmap = true; | ||
311 | goto cont; | ||
312 | } | ||
313 | |||
314 | /* copy bytes from the pages into the working buffer */ | ||
315 | buf = workspace->cbuf; | ||
316 | buf_offset = 0; | ||
317 | while (working_bytes) { | ||
318 | bytes = min(working_bytes, in_page_bytes_left); | ||
319 | |||
320 | memcpy(buf + buf_offset, data_in + in_offset, bytes); | ||
321 | buf_offset += bytes; | ||
322 | cont: | ||
323 | working_bytes -= bytes; | ||
324 | in_page_bytes_left -= bytes; | ||
325 | in_offset += bytes; | ||
326 | |||
327 | /* check if we need to pick another page */ | ||
328 | if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) | ||
329 | || in_page_bytes_left == 0) { | ||
330 | tot_in += in_page_bytes_left; | ||
331 | |||
332 | if (working_bytes == 0 && tot_in >= tot_len) | ||
333 | break; | ||
334 | |||
335 | if (page_in_index + 1 >= total_pages_in) { | ||
336 | ret = -1; | ||
337 | goto done; | ||
338 | } | ||
339 | |||
340 | if (may_late_unmap) | ||
341 | need_unmap = true; | ||
342 | else | ||
343 | kunmap(pages_in[page_in_index]); | ||
344 | |||
345 | data_in = kmap(pages_in[++page_in_index]); | ||
346 | |||
347 | in_page_bytes_left = PAGE_CACHE_SIZE; | ||
348 | in_offset = 0; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); | ||
353 | ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, | ||
354 | &out_len); | ||
355 | if (need_unmap) | ||
356 | kunmap(pages_in[page_in_index - 1]); | ||
357 | if (ret != LZO_E_OK) { | ||
358 | printk(KERN_WARNING "btrfs decompress failed\n"); | ||
359 | ret = -1; | ||
360 | break; | ||
361 | } | ||
362 | |||
363 | buf_start = tot_out; | ||
364 | tot_out += out_len; | ||
365 | |||
366 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, | ||
367 | tot_out, disk_start, | ||
368 | bvec, vcnt, | ||
369 | &page_out_index, &pg_offset); | ||
370 | if (ret2 == 0) | ||
371 | break; | ||
372 | } | ||
373 | done: | ||
374 | kunmap(pages_in[page_in_index]); | ||
375 | return ret; | ||
376 | } | ||
377 | |||
378 | static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||
379 | struct page *dest_page, | ||
380 | unsigned long start_byte, | ||
381 | size_t srclen, size_t destlen) | ||
382 | { | ||
383 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
384 | size_t in_len; | ||
385 | size_t out_len; | ||
386 | size_t tot_len; | ||
387 | int ret = 0; | ||
388 | char *kaddr; | ||
389 | unsigned long bytes; | ||
390 | |||
391 | BUG_ON(srclen < LZO_LEN); | ||
392 | |||
393 | tot_len = read_compress_length(data_in); | ||
394 | data_in += LZO_LEN; | ||
395 | |||
396 | in_len = read_compress_length(data_in); | ||
397 | data_in += LZO_LEN; | ||
398 | |||
399 | out_len = PAGE_CACHE_SIZE; | ||
400 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | ||
401 | if (ret != LZO_E_OK) { | ||
402 | printk(KERN_WARNING "btrfs decompress failed!\n"); | ||
403 | ret = -1; | ||
404 | goto out; | ||
405 | } | ||
406 | |||
407 | if (out_len < start_byte) { | ||
408 | ret = -1; | ||
409 | goto out; | ||
410 | } | ||
411 | |||
412 | bytes = min_t(unsigned long, destlen, out_len - start_byte); | ||
413 | |||
414 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
415 | memcpy(kaddr, workspace->buf + start_byte, bytes); | ||
416 | kunmap_atomic(kaddr, KM_USER0); | ||
417 | out: | ||
418 | return ret; | ||
419 | } | ||
420 | |||
421 | struct btrfs_compress_op btrfs_lzo_compress = { | ||
422 | .alloc_workspace = lzo_alloc_workspace, | ||
423 | .free_workspace = lzo_free_workspace, | ||
424 | .compress_pages = lzo_compress_pages, | ||
425 | .decompress_biovec = lzo_decompress_biovec, | ||
426 | .decompress = lzo_decompress, | ||
427 | }; | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e56c72bc5add..a1c940425307 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
141 | u64 file_offset) | 141 | u64 file_offset) |
142 | { | 142 | { |
143 | struct rb_root *root = &tree->tree; | 143 | struct rb_root *root = &tree->tree; |
144 | struct rb_node *prev; | 144 | struct rb_node *prev = NULL; |
145 | struct rb_node *ret; | 145 | struct rb_node *ret; |
146 | struct btrfs_ordered_extent *entry; | 146 | struct btrfs_ordered_extent *entry; |
147 | 147 | ||
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
172 | */ | 172 | */ |
173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
174 | u64 start, u64 len, u64 disk_len, | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | 175 | int type, int dio, int compress_type) |
176 | { | 176 | { |
177 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
178 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
189 | entry->disk_len = disk_len; | 189 | entry->disk_len = disk_len; |
190 | entry->bytes_left = len; | 190 | entry->bytes_left = len; |
191 | entry->inode = inode; | 191 | entry->inode = inode; |
192 | entry->compress_type = compress_type; | ||
192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 193 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
193 | set_bit(type, &entry->flags); | 194 | set_bit(type, &entry->flags); |
194 | 195 | ||
@@ -201,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
201 | INIT_LIST_HEAD(&entry->list); | 202 | INIT_LIST_HEAD(&entry->list); |
202 | INIT_LIST_HEAD(&entry->root_extent_list); | 203 | INIT_LIST_HEAD(&entry->root_extent_list); |
203 | 204 | ||
205 | trace_btrfs_ordered_extent_add(inode, entry); | ||
206 | |||
204 | spin_lock(&tree->lock); | 207 | spin_lock(&tree->lock); |
205 | node = tree_insert(&tree->tree, file_offset, | 208 | node = tree_insert(&tree->tree, file_offset, |
206 | &entry->rb_node); | 209 | &entry->rb_node); |
@@ -220,14 +223,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
220 | u64 start, u64 len, u64 disk_len, int type) | 223 | u64 start, u64 len, u64 disk_len, int type) |
221 | { | 224 | { |
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 225 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
223 | disk_len, type, 0); | 226 | disk_len, type, 0, |
227 | BTRFS_COMPRESS_NONE); | ||
224 | } | 228 | } |
225 | 229 | ||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 230 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
227 | u64 start, u64 len, u64 disk_len, int type) | 231 | u64 start, u64 len, u64 disk_len, int type) |
228 | { | 232 | { |
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 233 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
230 | disk_len, type, 1); | 234 | disk_len, type, 1, |
235 | BTRFS_COMPRESS_NONE); | ||
236 | } | ||
237 | |||
238 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
239 | u64 start, u64 len, u64 disk_len, | ||
240 | int type, int compress_type) | ||
241 | { | ||
242 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
243 | disk_len, type, 0, | ||
244 | compress_type); | ||
231 | } | 245 | } |
232 | 246 | ||
233 | /* | 247 | /* |
@@ -250,6 +264,73 @@ int btrfs_add_ordered_sum(struct inode *inode, | |||
250 | 264 | ||
251 | /* | 265 | /* |
252 | * this is used to account for finished IO across a given range | 266 | * this is used to account for finished IO across a given range |
267 | * of the file. The IO may span ordered extents. If | ||
268 | * a given ordered_extent is completely done, 1 is returned, otherwise | ||
269 | * 0. | ||
270 | * | ||
271 | * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used | ||
272 | * to make sure this function only returns 1 once for a given ordered extent. | ||
273 | * | ||
274 | * file_offset is updated to one byte past the range that is recorded as | ||
275 | * complete. This allows you to walk forward in the file. | ||
276 | */ | ||
277 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | ||
278 | struct btrfs_ordered_extent **cached, | ||
279 | u64 *file_offset, u64 io_size) | ||
280 | { | ||
281 | struct btrfs_ordered_inode_tree *tree; | ||
282 | struct rb_node *node; | ||
283 | struct btrfs_ordered_extent *entry = NULL; | ||
284 | int ret; | ||
285 | u64 dec_end; | ||
286 | u64 dec_start; | ||
287 | u64 to_dec; | ||
288 | |||
289 | tree = &BTRFS_I(inode)->ordered_tree; | ||
290 | spin_lock(&tree->lock); | ||
291 | node = tree_search(tree, *file_offset); | ||
292 | if (!node) { | ||
293 | ret = 1; | ||
294 | goto out; | ||
295 | } | ||
296 | |||
297 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
298 | if (!offset_in_entry(entry, *file_offset)) { | ||
299 | ret = 1; | ||
300 | goto out; | ||
301 | } | ||
302 | |||
303 | dec_start = max(*file_offset, entry->file_offset); | ||
304 | dec_end = min(*file_offset + io_size, entry->file_offset + | ||
305 | entry->len); | ||
306 | *file_offset = dec_end; | ||
307 | if (dec_start > dec_end) { | ||
308 | printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", | ||
309 | (unsigned long long)dec_start, | ||
310 | (unsigned long long)dec_end); | ||
311 | } | ||
312 | to_dec = dec_end - dec_start; | ||
313 | if (to_dec > entry->bytes_left) { | ||
314 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", | ||
315 | (unsigned long long)entry->bytes_left, | ||
316 | (unsigned long long)to_dec); | ||
317 | } | ||
318 | entry->bytes_left -= to_dec; | ||
319 | if (entry->bytes_left == 0) | ||
320 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | ||
321 | else | ||
322 | ret = 1; | ||
323 | out: | ||
324 | if (!ret && cached && entry) { | ||
325 | *cached = entry; | ||
326 | atomic_inc(&entry->refs); | ||
327 | } | ||
328 | spin_unlock(&tree->lock); | ||
329 | return ret == 0; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * this is used to account for finished IO across a given range | ||
253 | * of the file. The IO should not span ordered extents. If | 334 | * of the file. The IO should not span ordered extents. If |
254 | * a given ordered_extent is completely done, 1 is returned, otherwise | 335 | * a given ordered_extent is completely done, 1 is returned, otherwise |
255 | * 0. | 336 | * 0. |
@@ -308,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
308 | struct list_head *cur; | 389 | struct list_head *cur; |
309 | struct btrfs_ordered_sum *sum; | 390 | struct btrfs_ordered_sum *sum; |
310 | 391 | ||
392 | trace_btrfs_ordered_extent_put(entry->inode, entry); | ||
393 | |||
311 | if (atomic_dec_and_test(&entry->refs)) { | 394 | if (atomic_dec_and_test(&entry->refs)) { |
312 | while (!list_empty(&entry->list)) { | 395 | while (!list_empty(&entry->list)) { |
313 | cur = entry->list.next; | 396 | cur = entry->list.next; |
@@ -341,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
341 | spin_lock(&root->fs_info->ordered_extent_lock); | 424 | spin_lock(&root->fs_info->ordered_extent_lock); |
342 | list_del_init(&entry->root_extent_list); | 425 | list_del_init(&entry->root_extent_list); |
343 | 426 | ||
427 | trace_btrfs_ordered_extent_remove(inode, entry); | ||
428 | |||
344 | /* | 429 | /* |
345 | * we have no more ordered extents for this inode and | 430 | * we have no more ordered extents for this inode and |
346 | * no dirty pages. We can safely remove it from the | 431 | * no dirty pages. We can safely remove it from the |
@@ -506,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
506 | u64 start = entry->file_offset; | 591 | u64 start = entry->file_offset; |
507 | u64 end = start + entry->len - 1; | 592 | u64 end = start + entry->len - 1; |
508 | 593 | ||
594 | trace_btrfs_ordered_extent_start(inode, entry); | ||
595 | |||
509 | /* | 596 | /* |
510 | * pages in the range can be dirty, clean or writeback. We | 597 | * pages in the range can be dirty, clean or writeback. We |
511 | * start IO on any dirty ones so the wait doesn't stall waiting | 598 | * start IO on any dirty ones so the wait doesn't stall waiting |
@@ -526,7 +613,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
526 | { | 613 | { |
527 | u64 end; | 614 | u64 end; |
528 | u64 orig_end; | 615 | u64 orig_end; |
529 | u64 wait_end; | ||
530 | struct btrfs_ordered_extent *ordered; | 616 | struct btrfs_ordered_extent *ordered; |
531 | int found; | 617 | int found; |
532 | 618 | ||
@@ -537,7 +623,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
537 | if (orig_end > INT_LIMIT(loff_t)) | 623 | if (orig_end > INT_LIMIT(loff_t)) |
538 | orig_end = INT_LIMIT(loff_t); | 624 | orig_end = INT_LIMIT(loff_t); |
539 | } | 625 | } |
540 | wait_end = orig_end; | ||
541 | again: | 626 | again: |
542 | /* start IO across the range first to instantiate any delalloc | 627 | /* start IO across the range first to instantiate any delalloc |
543 | * extents | 628 | * extents |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 8ac365492a3f..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum { | |||
68 | 68 | ||
69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
70 | 70 | ||
71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | 71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ |
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent { | |||
93 | /* flags (described above) */ | 93 | /* flags (described above) */ |
94 | unsigned long flags; | 94 | unsigned long flags; |
95 | 95 | ||
96 | /* compression algorithm */ | ||
97 | int compress_type; | ||
98 | |||
96 | /* reference count */ | 99 | /* reference count */ |
97 | atomic_t refs; | 100 | atomic_t refs; |
98 | 101 | ||
@@ -141,10 +144,16 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
141 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 144 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
142 | struct btrfs_ordered_extent **cached, | 145 | struct btrfs_ordered_extent **cached, |
143 | u64 file_offset, u64 io_size); | 146 | u64 file_offset, u64 io_size); |
147 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | ||
148 | struct btrfs_ordered_extent **cached, | ||
149 | u64 *file_offset, u64 io_size); | ||
144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 150 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
145 | u64 start, u64 len, u64 disk_len, int type); | 151 | u64 start, u64 len, u64 disk_len, int type); |
146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
147 | u64 start, u64 len, u64 disk_len, int type); | 153 | u64 start, u64 len, u64 disk_len, int type); |
154 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
155 | u64 start, u64 len, u64 disk_len, | ||
156 | int type, int compress_type); | ||
148 | int btrfs_add_ordered_sum(struct inode *inode, | 157 | int btrfs_add_ordered_sum(struct inode *inode, |
149 | struct btrfs_ordered_extent *entry, | 158 | struct btrfs_ordered_extent *entry, |
150 | struct btrfs_ordered_sum *sum); | 159 | struct btrfs_ordered_sum *sum); |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 79cba5fbc28e..f8be250963a0 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | |||
56 | return -ENOMEM; | 56 | return -ENOMEM; |
57 | 57 | ||
58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
59 | if (ret) | 59 | if (ret < 0) |
60 | goto out; | 60 | goto out; |
61 | if (ret) { | ||
62 | ret = -ENOENT; | ||
63 | goto out; | ||
64 | } | ||
61 | 65 | ||
62 | ret = btrfs_del_item(trans, root, path); | 66 | ret = btrfs_del_item(trans, root, path); |
63 | 67 | ||
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0d126be22b63..fb2605d998e9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
260 | #else | 260 | #else |
261 | BUG(); | 261 | BUG(); |
262 | #endif | 262 | #endif |
263 | break; | ||
263 | case BTRFS_BLOCK_GROUP_ITEM_KEY: | 264 | case BTRFS_BLOCK_GROUP_ITEM_KEY: |
264 | bi = btrfs_item_ptr(l, i, | 265 | bi = btrfs_item_ptr(l, i, |
265 | struct btrfs_block_group_item); | 266 | struct btrfs_block_group_item); |
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index a97314cf6bd6..82d569cb6267 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c | |||
@@ -23,56 +23,6 @@ | |||
23 | #include "ref-cache.h" | 23 | #include "ref-cache.h" |
24 | #include "transaction.h" | 24 | #include "transaction.h" |
25 | 25 | ||
26 | /* | ||
27 | * leaf refs are used to cache the information about which extents | ||
28 | * a given leaf has references on. This allows us to process that leaf | ||
29 | * in btrfs_drop_snapshot without needing to read it back from disk. | ||
30 | */ | ||
31 | |||
32 | /* | ||
33 | * kmalloc a leaf reference struct and update the counters for the | ||
34 | * total ref cache size | ||
35 | */ | ||
36 | struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, | ||
37 | int nr_extents) | ||
38 | { | ||
39 | struct btrfs_leaf_ref *ref; | ||
40 | size_t size = btrfs_leaf_ref_size(nr_extents); | ||
41 | |||
42 | ref = kmalloc(size, GFP_NOFS); | ||
43 | if (ref) { | ||
44 | spin_lock(&root->fs_info->ref_cache_lock); | ||
45 | root->fs_info->total_ref_cache_size += size; | ||
46 | spin_unlock(&root->fs_info->ref_cache_lock); | ||
47 | |||
48 | memset(ref, 0, sizeof(*ref)); | ||
49 | atomic_set(&ref->usage, 1); | ||
50 | INIT_LIST_HEAD(&ref->list); | ||
51 | } | ||
52 | return ref; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * free a leaf reference struct and update the counters for the | ||
57 | * total ref cache size | ||
58 | */ | ||
59 | void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) | ||
60 | { | ||
61 | if (!ref) | ||
62 | return; | ||
63 | WARN_ON(atomic_read(&ref->usage) == 0); | ||
64 | if (atomic_dec_and_test(&ref->usage)) { | ||
65 | size_t size = btrfs_leaf_ref_size(ref->nritems); | ||
66 | |||
67 | BUG_ON(ref->in_tree); | ||
68 | kfree(ref); | ||
69 | |||
70 | spin_lock(&root->fs_info->ref_cache_lock); | ||
71 | root->fs_info->total_ref_cache_size -= size; | ||
72 | spin_unlock(&root->fs_info->ref_cache_lock); | ||
73 | } | ||
74 | } | ||
75 | |||
76 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | 26 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, |
77 | struct rb_node *node) | 27 | struct rb_node *node) |
78 | { | 28 | { |
@@ -116,117 +66,3 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) | |||
116 | } | 66 | } |
117 | return NULL; | 67 | return NULL; |
118 | } | 68 | } |
119 | |||
120 | int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, | ||
121 | int shared) | ||
122 | { | ||
123 | struct btrfs_leaf_ref *ref = NULL; | ||
124 | struct btrfs_leaf_ref_tree *tree = root->ref_tree; | ||
125 | |||
126 | if (shared) | ||
127 | tree = &root->fs_info->shared_ref_tree; | ||
128 | if (!tree) | ||
129 | return 0; | ||
130 | |||
131 | spin_lock(&tree->lock); | ||
132 | while (!list_empty(&tree->list)) { | ||
133 | ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list); | ||
134 | BUG_ON(ref->tree != tree); | ||
135 | if (ref->root_gen > max_root_gen) | ||
136 | break; | ||
137 | if (!xchg(&ref->in_tree, 0)) { | ||
138 | cond_resched_lock(&tree->lock); | ||
139 | continue; | ||
140 | } | ||
141 | |||
142 | rb_erase(&ref->rb_node, &tree->root); | ||
143 | list_del_init(&ref->list); | ||
144 | |||
145 | spin_unlock(&tree->lock); | ||
146 | btrfs_free_leaf_ref(root, ref); | ||
147 | cond_resched(); | ||
148 | spin_lock(&tree->lock); | ||
149 | } | ||
150 | spin_unlock(&tree->lock); | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * find the leaf ref for a given extent. This returns the ref struct with | ||
156 | * a usage reference incremented | ||
157 | */ | ||
158 | struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, | ||
159 | u64 bytenr) | ||
160 | { | ||
161 | struct rb_node *rb; | ||
162 | struct btrfs_leaf_ref *ref = NULL; | ||
163 | struct btrfs_leaf_ref_tree *tree = root->ref_tree; | ||
164 | again: | ||
165 | if (tree) { | ||
166 | spin_lock(&tree->lock); | ||
167 | rb = tree_search(&tree->root, bytenr); | ||
168 | if (rb) | ||
169 | ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); | ||
170 | if (ref) | ||
171 | atomic_inc(&ref->usage); | ||
172 | spin_unlock(&tree->lock); | ||
173 | if (ref) | ||
174 | return ref; | ||
175 | } | ||
176 | if (tree != &root->fs_info->shared_ref_tree) { | ||
177 | tree = &root->fs_info->shared_ref_tree; | ||
178 | goto again; | ||
179 | } | ||
180 | return NULL; | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * add a fully filled in leaf ref struct | ||
185 | * remove all the refs older than a given root generation | ||
186 | */ | ||
187 | int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, | ||
188 | int shared) | ||
189 | { | ||
190 | int ret = 0; | ||
191 | struct rb_node *rb; | ||
192 | struct btrfs_leaf_ref_tree *tree = root->ref_tree; | ||
193 | |||
194 | if (shared) | ||
195 | tree = &root->fs_info->shared_ref_tree; | ||
196 | |||
197 | spin_lock(&tree->lock); | ||
198 | rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node); | ||
199 | if (rb) { | ||
200 | ret = -EEXIST; | ||
201 | } else { | ||
202 | atomic_inc(&ref->usage); | ||
203 | ref->tree = tree; | ||
204 | ref->in_tree = 1; | ||
205 | list_add_tail(&ref->list, &tree->list); | ||
206 | } | ||
207 | spin_unlock(&tree->lock); | ||
208 | return ret; | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * remove a single leaf ref from the tree. This drops the ref held by the tree | ||
213 | * only | ||
214 | */ | ||
215 | int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) | ||
216 | { | ||
217 | struct btrfs_leaf_ref_tree *tree; | ||
218 | |||
219 | if (!xchg(&ref->in_tree, 0)) | ||
220 | return 0; | ||
221 | |||
222 | tree = ref->tree; | ||
223 | spin_lock(&tree->lock); | ||
224 | |||
225 | rb_erase(&ref->rb_node, &tree->root); | ||
226 | list_del_init(&ref->list); | ||
227 | |||
228 | spin_unlock(&tree->lock); | ||
229 | |||
230 | btrfs_free_leaf_ref(root, ref); | ||
231 | return 0; | ||
232 | } | ||
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index e2a55cb2072b..24f7001f6387 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h | |||
@@ -49,28 +49,4 @@ static inline size_t btrfs_leaf_ref_size(int nr_extents) | |||
49 | return sizeof(struct btrfs_leaf_ref) + | 49 | return sizeof(struct btrfs_leaf_ref) + |
50 | sizeof(struct btrfs_extent_info) * nr_extents; | 50 | sizeof(struct btrfs_extent_info) * nr_extents; |
51 | } | 51 | } |
52 | |||
53 | static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) | ||
54 | { | ||
55 | tree->root = RB_ROOT; | ||
56 | INIT_LIST_HEAD(&tree->list); | ||
57 | spin_lock_init(&tree->lock); | ||
58 | } | ||
59 | |||
60 | static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree) | ||
61 | { | ||
62 | return RB_EMPTY_ROOT(&tree->root); | ||
63 | } | ||
64 | |||
65 | void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree); | ||
66 | struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, | ||
67 | int nr_extents); | ||
68 | void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); | ||
69 | struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, | ||
70 | u64 bytenr); | ||
71 | int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, | ||
72 | int shared); | ||
73 | int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, | ||
74 | int shared); | ||
75 | int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); | ||
76 | #endif | 52 | #endif |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b37d723b9d4a..5e0a3dc79a45 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #include "locking.h" | 29 | #include "locking.h" |
30 | #include "btrfs_inode.h" | 30 | #include "btrfs_inode.h" |
31 | #include "async-thread.h" | 31 | #include "async-thread.h" |
32 | #include "free-space-cache.h" | ||
33 | #include "inode-map.h" | ||
32 | 34 | ||
33 | /* | 35 | /* |
34 | * backref_node, mapping_node and tree_block start with this | 36 | * backref_node, mapping_node and tree_block start with this |
@@ -178,8 +180,6 @@ struct reloc_control { | |||
178 | u64 search_start; | 180 | u64 search_start; |
179 | u64 extents_found; | 181 | u64 extents_found; |
180 | 182 | ||
181 | int block_rsv_retries; | ||
182 | |||
183 | unsigned int stage:8; | 183 | unsigned int stage:8; |
184 | unsigned int create_reloc_tree:1; | 184 | unsigned int create_reloc_tree:1; |
185 | unsigned int merge_reloc_tree:1; | 185 | unsigned int merge_reloc_tree:1; |
@@ -508,6 +508,7 @@ static int update_backref_cache(struct btrfs_trans_handle *trans, | |||
508 | return 1; | 508 | return 1; |
509 | } | 509 | } |
510 | 510 | ||
511 | |||
511 | static int should_ignore_root(struct btrfs_root *root) | 512 | static int should_ignore_root(struct btrfs_root *root) |
512 | { | 513 | { |
513 | struct btrfs_root *reloc_root; | 514 | struct btrfs_root *reloc_root; |
@@ -530,7 +531,6 @@ static int should_ignore_root(struct btrfs_root *root) | |||
530 | */ | 531 | */ |
531 | return 1; | 532 | return 1; |
532 | } | 533 | } |
533 | |||
534 | /* | 534 | /* |
535 | * find reloc tree by address of tree root | 535 | * find reloc tree by address of tree root |
536 | */ | 536 | */ |
@@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
677 | err = -ENOMEM; | 677 | err = -ENOMEM; |
678 | goto out; | 678 | goto out; |
679 | } | 679 | } |
680 | path1->reada = 1; | ||
681 | path2->reada = 2; | ||
680 | 682 | ||
681 | node = alloc_backref_node(cache); | 683 | node = alloc_backref_node(cache); |
682 | if (!node) { | 684 | if (!node) { |
@@ -710,7 +712,7 @@ again: | |||
710 | WARN_ON(cur->checked); | 712 | WARN_ON(cur->checked); |
711 | if (!list_empty(&cur->upper)) { | 713 | if (!list_empty(&cur->upper)) { |
712 | /* | 714 | /* |
713 | * the backref was added previously when processsing | 715 | * the backref was added previously when processing |
714 | * backref of type BTRFS_TREE_BLOCK_REF_KEY | 716 | * backref of type BTRFS_TREE_BLOCK_REF_KEY |
715 | */ | 717 | */ |
716 | BUG_ON(!list_is_singular(&cur->upper)); | 718 | BUG_ON(!list_is_singular(&cur->upper)); |
@@ -962,7 +964,7 @@ again: | |||
962 | lower = upper; | 964 | lower = upper; |
963 | upper = NULL; | 965 | upper = NULL; |
964 | } | 966 | } |
965 | btrfs_release_path(root, path2); | 967 | btrfs_release_path(path2); |
966 | next: | 968 | next: |
967 | if (ptr < end) { | 969 | if (ptr < end) { |
968 | ptr += btrfs_extent_inline_ref_size(key.type); | 970 | ptr += btrfs_extent_inline_ref_size(key.type); |
@@ -975,7 +977,7 @@ next: | |||
975 | if (ptr >= end) | 977 | if (ptr >= end) |
976 | path1->slots[0]++; | 978 | path1->slots[0]++; |
977 | } | 979 | } |
978 | btrfs_release_path(rc->extent_root, path1); | 980 | btrfs_release_path(path1); |
979 | 981 | ||
980 | cur->checked = 1; | 982 | cur->checked = 1; |
981 | WARN_ON(exist); | 983 | WARN_ON(exist); |
@@ -1158,6 +1160,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, | |||
1158 | new_node->bytenr = dest->node->start; | 1160 | new_node->bytenr = dest->node->start; |
1159 | new_node->level = node->level; | 1161 | new_node->level = node->level; |
1160 | new_node->lowest = node->lowest; | 1162 | new_node->lowest = node->lowest; |
1163 | new_node->checked = 1; | ||
1161 | new_node->root = dest; | 1164 | new_node->root = dest; |
1162 | 1165 | ||
1163 | if (!node->lowest) { | 1166 | if (!node->lowest) { |
@@ -1365,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
1365 | int ret; | 1368 | int ret; |
1366 | 1369 | ||
1367 | if (!root->reloc_root) | 1370 | if (!root->reloc_root) |
1368 | return 0; | 1371 | goto out; |
1369 | 1372 | ||
1370 | reloc_root = root->reloc_root; | 1373 | reloc_root = root->reloc_root; |
1371 | root_item = &reloc_root->root_item; | 1374 | root_item = &reloc_root->root_item; |
@@ -1387,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
1387 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | 1390 | ret = btrfs_update_root(trans, root->fs_info->tree_root, |
1388 | &reloc_root->root_key, root_item); | 1391 | &reloc_root->root_key, root_item); |
1389 | BUG_ON(ret); | 1392 | BUG_ON(ret); |
1393 | |||
1394 | out: | ||
1390 | return 0; | 1395 | return 0; |
1391 | } | 1396 | } |
1392 | 1397 | ||
@@ -1409,9 +1414,9 @@ again: | |||
1409 | prev = node; | 1414 | prev = node; |
1410 | entry = rb_entry(node, struct btrfs_inode, rb_node); | 1415 | entry = rb_entry(node, struct btrfs_inode, rb_node); |
1411 | 1416 | ||
1412 | if (objectid < entry->vfs_inode.i_ino) | 1417 | if (objectid < btrfs_ino(&entry->vfs_inode)) |
1413 | node = node->rb_left; | 1418 | node = node->rb_left; |
1414 | else if (objectid > entry->vfs_inode.i_ino) | 1419 | else if (objectid > btrfs_ino(&entry->vfs_inode)) |
1415 | node = node->rb_right; | 1420 | node = node->rb_right; |
1416 | else | 1421 | else |
1417 | break; | 1422 | break; |
@@ -1419,7 +1424,7 @@ again: | |||
1419 | if (!node) { | 1424 | if (!node) { |
1420 | while (prev) { | 1425 | while (prev) { |
1421 | entry = rb_entry(prev, struct btrfs_inode, rb_node); | 1426 | entry = rb_entry(prev, struct btrfs_inode, rb_node); |
1422 | if (objectid <= entry->vfs_inode.i_ino) { | 1427 | if (objectid <= btrfs_ino(&entry->vfs_inode)) { |
1423 | node = prev; | 1428 | node = prev; |
1424 | break; | 1429 | break; |
1425 | } | 1430 | } |
@@ -1434,7 +1439,7 @@ again: | |||
1434 | return inode; | 1439 | return inode; |
1435 | } | 1440 | } |
1436 | 1441 | ||
1437 | objectid = entry->vfs_inode.i_ino + 1; | 1442 | objectid = btrfs_ino(&entry->vfs_inode) + 1; |
1438 | if (cond_resched_lock(&root->inode_lock)) | 1443 | if (cond_resched_lock(&root->inode_lock)) |
1439 | goto again; | 1444 | goto again; |
1440 | 1445 | ||
@@ -1470,7 +1475,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, | |||
1470 | return -ENOMEM; | 1475 | return -ENOMEM; |
1471 | 1476 | ||
1472 | bytenr -= BTRFS_I(reloc_inode)->index_cnt; | 1477 | bytenr -= BTRFS_I(reloc_inode)->index_cnt; |
1473 | ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, | 1478 | ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode), |
1474 | bytenr, 0); | 1479 | bytenr, 0); |
1475 | if (ret < 0) | 1480 | if (ret < 0) |
1476 | goto out; | 1481 | goto out; |
@@ -1558,11 +1563,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1558 | if (first) { | 1563 | if (first) { |
1559 | inode = find_next_inode(root, key.objectid); | 1564 | inode = find_next_inode(root, key.objectid); |
1560 | first = 0; | 1565 | first = 0; |
1561 | } else if (inode && inode->i_ino < key.objectid) { | 1566 | } else if (inode && btrfs_ino(inode) < key.objectid) { |
1562 | btrfs_add_delayed_iput(inode); | 1567 | btrfs_add_delayed_iput(inode); |
1563 | inode = find_next_inode(root, key.objectid); | 1568 | inode = find_next_inode(root, key.objectid); |
1564 | } | 1569 | } |
1565 | if (inode && inode->i_ino == key.objectid) { | 1570 | if (inode && btrfs_ino(inode) == key.objectid) { |
1566 | end = key.offset + | 1571 | end = key.offset + |
1567 | btrfs_file_extent_num_bytes(leaf, fi); | 1572 | btrfs_file_extent_num_bytes(leaf, fi); |
1568 | WARN_ON(!IS_ALIGNED(key.offset, | 1573 | WARN_ON(!IS_ALIGNED(key.offset, |
@@ -1724,6 +1729,7 @@ again: | |||
1724 | 1729 | ||
1725 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1730 | eb = read_tree_block(dest, old_bytenr, blocksize, |
1726 | old_ptr_gen); | 1731 | old_ptr_gen); |
1732 | BUG_ON(!eb); | ||
1727 | btrfs_tree_lock(eb); | 1733 | btrfs_tree_lock(eb); |
1728 | if (cow) { | 1734 | if (cow) { |
1729 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1735 | ret = btrfs_cow_block(trans, dest, eb, parent, |
@@ -1748,7 +1754,7 @@ again: | |||
1748 | 1754 | ||
1749 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 1755 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1750 | path->slots[level]); | 1756 | path->slots[level]); |
1751 | btrfs_release_path(src, path); | 1757 | btrfs_release_path(path); |
1752 | 1758 | ||
1753 | path->lowest_level = level; | 1759 | path->lowest_level = level; |
1754 | ret = btrfs_search_slot(trans, src, &key, path, 0, 1); | 1760 | ret = btrfs_search_slot(trans, src, &key, path, 0, 1); |
@@ -1892,6 +1898,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
1892 | struct inode *inode = NULL; | 1898 | struct inode *inode = NULL; |
1893 | u64 objectid; | 1899 | u64 objectid; |
1894 | u64 start, end; | 1900 | u64 start, end; |
1901 | u64 ino; | ||
1895 | 1902 | ||
1896 | objectid = min_key->objectid; | 1903 | objectid = min_key->objectid; |
1897 | while (1) { | 1904 | while (1) { |
@@ -1904,17 +1911,18 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
1904 | inode = find_next_inode(root, objectid); | 1911 | inode = find_next_inode(root, objectid); |
1905 | if (!inode) | 1912 | if (!inode) |
1906 | break; | 1913 | break; |
1914 | ino = btrfs_ino(inode); | ||
1907 | 1915 | ||
1908 | if (inode->i_ino > max_key->objectid) { | 1916 | if (ino > max_key->objectid) { |
1909 | iput(inode); | 1917 | iput(inode); |
1910 | break; | 1918 | break; |
1911 | } | 1919 | } |
1912 | 1920 | ||
1913 | objectid = inode->i_ino + 1; | 1921 | objectid = ino + 1; |
1914 | if (!S_ISREG(inode->i_mode)) | 1922 | if (!S_ISREG(inode->i_mode)) |
1915 | continue; | 1923 | continue; |
1916 | 1924 | ||
1917 | if (unlikely(min_key->objectid == inode->i_ino)) { | 1925 | if (unlikely(min_key->objectid == ino)) { |
1918 | if (min_key->type > BTRFS_EXTENT_DATA_KEY) | 1926 | if (min_key->type > BTRFS_EXTENT_DATA_KEY) |
1919 | continue; | 1927 | continue; |
1920 | if (min_key->type < BTRFS_EXTENT_DATA_KEY) | 1928 | if (min_key->type < BTRFS_EXTENT_DATA_KEY) |
@@ -1927,7 +1935,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
1927 | start = 0; | 1935 | start = 0; |
1928 | } | 1936 | } |
1929 | 1937 | ||
1930 | if (unlikely(max_key->objectid == inode->i_ino)) { | 1938 | if (unlikely(max_key->objectid == ino)) { |
1931 | if (max_key->type < BTRFS_EXTENT_DATA_KEY) | 1939 | if (max_key->type < BTRFS_EXTENT_DATA_KEY) |
1932 | continue; | 1940 | continue; |
1933 | if (max_key->type > BTRFS_EXTENT_DATA_KEY) { | 1941 | if (max_key->type > BTRFS_EXTENT_DATA_KEY) { |
@@ -1995,6 +2003,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1995 | path = btrfs_alloc_path(); | 2003 | path = btrfs_alloc_path(); |
1996 | if (!path) | 2004 | if (!path) |
1997 | return -ENOMEM; | 2005 | return -ENOMEM; |
2006 | path->reada = 1; | ||
1998 | 2007 | ||
1999 | reloc_root = root->reloc_root; | 2008 | reloc_root = root->reloc_root; |
2000 | root_item = &reloc_root->root_item; | 2009 | root_item = &reloc_root->root_item; |
@@ -2029,6 +2038,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
2029 | 2038 | ||
2030 | while (1) { | 2039 | while (1) { |
2031 | trans = btrfs_start_transaction(root, 0); | 2040 | trans = btrfs_start_transaction(root, 0); |
2041 | BUG_ON(IS_ERR(trans)); | ||
2032 | trans->block_rsv = rc->block_rsv; | 2042 | trans->block_rsv = rc->block_rsv; |
2033 | 2043 | ||
2034 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, | 2044 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
@@ -2133,29 +2143,34 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
2133 | LIST_HEAD(reloc_roots); | 2143 | LIST_HEAD(reloc_roots); |
2134 | u64 num_bytes = 0; | 2144 | u64 num_bytes = 0; |
2135 | int ret; | 2145 | int ret; |
2136 | int retries = 0; | ||
2137 | 2146 | ||
2138 | mutex_lock(&root->fs_info->trans_mutex); | 2147 | mutex_lock(&root->fs_info->reloc_mutex); |
2139 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | 2148 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
2140 | rc->merging_rsv_size += rc->nodes_relocated * 2; | 2149 | rc->merging_rsv_size += rc->nodes_relocated * 2; |
2141 | mutex_unlock(&root->fs_info->trans_mutex); | 2150 | mutex_unlock(&root->fs_info->reloc_mutex); |
2151 | |||
2142 | again: | 2152 | again: |
2143 | if (!err) { | 2153 | if (!err) { |
2144 | num_bytes = rc->merging_rsv_size; | 2154 | num_bytes = rc->merging_rsv_size; |
2145 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | 2155 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, |
2146 | num_bytes, &retries); | 2156 | num_bytes); |
2147 | if (ret) | 2157 | if (ret) |
2148 | err = ret; | 2158 | err = ret; |
2149 | } | 2159 | } |
2150 | 2160 | ||
2151 | trans = btrfs_join_transaction(rc->extent_root, 1); | 2161 | trans = btrfs_join_transaction(rc->extent_root); |
2162 | if (IS_ERR(trans)) { | ||
2163 | if (!err) | ||
2164 | btrfs_block_rsv_release(rc->extent_root, | ||
2165 | rc->block_rsv, num_bytes); | ||
2166 | return PTR_ERR(trans); | ||
2167 | } | ||
2152 | 2168 | ||
2153 | if (!err) { | 2169 | if (!err) { |
2154 | if (num_bytes != rc->merging_rsv_size) { | 2170 | if (num_bytes != rc->merging_rsv_size) { |
2155 | btrfs_end_transaction(trans, rc->extent_root); | 2171 | btrfs_end_transaction(trans, rc->extent_root); |
2156 | btrfs_block_rsv_release(rc->extent_root, | 2172 | btrfs_block_rsv_release(rc->extent_root, |
2157 | rc->block_rsv, num_bytes); | 2173 | rc->block_rsv, num_bytes); |
2158 | retries = 0; | ||
2159 | goto again; | 2174 | goto again; |
2160 | } | 2175 | } |
2161 | } | 2176 | } |
@@ -2202,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc) | |||
2202 | int ret; | 2217 | int ret; |
2203 | again: | 2218 | again: |
2204 | root = rc->extent_root; | 2219 | root = rc->extent_root; |
2205 | mutex_lock(&root->fs_info->trans_mutex); | 2220 | |
2221 | /* | ||
2222 | * this serializes us with btrfs_record_root_in_transaction, | ||
2223 | * we have to make sure nobody is in the middle of | ||
2224 | * adding their roots to the list while we are | ||
2225 | * doing this splice | ||
2226 | */ | ||
2227 | mutex_lock(&root->fs_info->reloc_mutex); | ||
2206 | list_splice_init(&rc->reloc_roots, &reloc_roots); | 2228 | list_splice_init(&rc->reloc_roots, &reloc_roots); |
2207 | mutex_unlock(&root->fs_info->trans_mutex); | 2229 | mutex_unlock(&root->fs_info->reloc_mutex); |
2208 | 2230 | ||
2209 | while (!list_empty(&reloc_roots)) { | 2231 | while (!list_empty(&reloc_roots)) { |
2210 | found = 1; | 2232 | found = 1; |
@@ -2340,7 +2362,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | |||
2340 | root = next->root; | 2362 | root = next->root; |
2341 | BUG_ON(!root); | 2363 | BUG_ON(!root); |
2342 | 2364 | ||
2343 | /* no other choice for non-refernce counted tree */ | 2365 | /* no other choice for non-references counted tree */ |
2344 | if (!root->ref_cows) | 2366 | if (!root->ref_cows) |
2345 | return root; | 2367 | return root; |
2346 | 2368 | ||
@@ -2405,15 +2427,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, | |||
2405 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; | 2427 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
2406 | 2428 | ||
2407 | trans->block_rsv = rc->block_rsv; | 2429 | trans->block_rsv = rc->block_rsv; |
2408 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, | 2430 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); |
2409 | &rc->block_rsv_retries); | ||
2410 | if (ret) { | 2431 | if (ret) { |
2411 | if (ret == -EAGAIN) | 2432 | if (ret == -EAGAIN) |
2412 | rc->commit_transaction = 1; | 2433 | rc->commit_transaction = 1; |
2413 | return ret; | 2434 | return ret; |
2414 | } | 2435 | } |
2415 | 2436 | ||
2416 | rc->block_rsv_retries = 0; | ||
2417 | return 0; | 2437 | return 0; |
2418 | } | 2438 | } |
2419 | 2439 | ||
@@ -2492,7 +2512,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2492 | path->locks[upper->level] = 0; | 2512 | path->locks[upper->level] = 0; |
2493 | 2513 | ||
2494 | slot = path->slots[upper->level]; | 2514 | slot = path->slots[upper->level]; |
2495 | btrfs_release_path(NULL, path); | 2515 | btrfs_release_path(path); |
2496 | } else { | 2516 | } else { |
2497 | ret = btrfs_bin_search(upper->eb, key, upper->level, | 2517 | ret = btrfs_bin_search(upper->eb, key, upper->level, |
2498 | &slot); | 2518 | &slot); |
@@ -2510,6 +2530,10 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2510 | blocksize = btrfs_level_size(root, node->level); | 2530 | blocksize = btrfs_level_size(root, node->level); |
2511 | generation = btrfs_node_ptr_generation(upper->eb, slot); | 2531 | generation = btrfs_node_ptr_generation(upper->eb, slot); |
2512 | eb = read_tree_block(root, bytenr, blocksize, generation); | 2532 | eb = read_tree_block(root, bytenr, blocksize, generation); |
2533 | if (!eb) { | ||
2534 | err = -EIO; | ||
2535 | goto next; | ||
2536 | } | ||
2513 | btrfs_tree_lock(eb); | 2537 | btrfs_tree_lock(eb); |
2514 | btrfs_set_lock_blocking(eb); | 2538 | btrfs_set_lock_blocking(eb); |
2515 | 2539 | ||
@@ -2667,6 +2691,7 @@ static int get_tree_block_key(struct reloc_control *rc, | |||
2667 | BUG_ON(block->key_ready); | 2691 | BUG_ON(block->key_ready); |
2668 | eb = read_tree_block(rc->extent_root, block->bytenr, | 2692 | eb = read_tree_block(rc->extent_root, block->bytenr, |
2669 | block->key.objectid, block->key.offset); | 2693 | block->key.objectid, block->key.offset); |
2694 | BUG_ON(!eb); | ||
2670 | WARN_ON(btrfs_header_level(eb) != block->level); | 2695 | WARN_ON(btrfs_header_level(eb) != block->level); |
2671 | if (block->level == 0) | 2696 | if (block->level == 0) |
2672 | btrfs_item_key_to_cpu(eb, &block->key, 0); | 2697 | btrfs_item_key_to_cpu(eb, &block->key, 0); |
@@ -2728,7 +2753,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
2728 | } else { | 2753 | } else { |
2729 | path->lowest_level = node->level; | 2754 | path->lowest_level = node->level; |
2730 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 2755 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
2731 | btrfs_release_path(root, path); | 2756 | btrfs_release_path(path); |
2732 | if (ret > 0) | 2757 | if (ret > 0) |
2733 | ret = 0; | 2758 | ret = 0; |
2734 | } | 2759 | } |
@@ -2861,7 +2886,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, | |||
2861 | struct extent_map *em; | 2886 | struct extent_map *em; |
2862 | int ret = 0; | 2887 | int ret = 0; |
2863 | 2888 | ||
2864 | em = alloc_extent_map(GFP_NOFS); | 2889 | em = alloc_extent_map(); |
2865 | if (!em) | 2890 | if (!em) |
2866 | return -ENOMEM; | 2891 | return -ENOMEM; |
2867 | 2892 | ||
@@ -3099,6 +3124,8 @@ static int add_tree_block(struct reloc_control *rc, | |||
3099 | BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); | 3124 | BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); |
3100 | ret = get_ref_objectid_v0(rc, path, extent_key, | 3125 | ret = get_ref_objectid_v0(rc, path, extent_key, |
3101 | &ref_owner, NULL); | 3126 | &ref_owner, NULL); |
3127 | if (ret < 0) | ||
3128 | return ret; | ||
3102 | BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); | 3129 | BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); |
3103 | level = (int)ref_owner; | 3130 | level = (int)ref_owner; |
3104 | /* FIXME: get real generation */ | 3131 | /* FIXME: get real generation */ |
@@ -3108,7 +3135,7 @@ static int add_tree_block(struct reloc_control *rc, | |||
3108 | #endif | 3135 | #endif |
3109 | } | 3136 | } |
3110 | 3137 | ||
3111 | btrfs_release_path(rc->extent_root, path); | 3138 | btrfs_release_path(path); |
3112 | 3139 | ||
3113 | BUG_ON(level == -1); | 3140 | BUG_ON(level == -1); |
3114 | 3141 | ||
@@ -3191,6 +3218,55 @@ static int block_use_full_backref(struct reloc_control *rc, | |||
3191 | return ret; | 3218 | return ret; |
3192 | } | 3219 | } |
3193 | 3220 | ||
3221 | static int delete_block_group_cache(struct btrfs_fs_info *fs_info, | ||
3222 | struct inode *inode, u64 ino) | ||
3223 | { | ||
3224 | struct btrfs_key key; | ||
3225 | struct btrfs_path *path; | ||
3226 | struct btrfs_root *root = fs_info->tree_root; | ||
3227 | struct btrfs_trans_handle *trans; | ||
3228 | unsigned long nr; | ||
3229 | int ret = 0; | ||
3230 | |||
3231 | if (inode) | ||
3232 | goto truncate; | ||
3233 | |||
3234 | key.objectid = ino; | ||
3235 | key.type = BTRFS_INODE_ITEM_KEY; | ||
3236 | key.offset = 0; | ||
3237 | |||
3238 | inode = btrfs_iget(fs_info->sb, &key, root, NULL); | ||
3239 | if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) { | ||
3240 | if (inode && !IS_ERR(inode)) | ||
3241 | iput(inode); | ||
3242 | return -ENOENT; | ||
3243 | } | ||
3244 | |||
3245 | truncate: | ||
3246 | path = btrfs_alloc_path(); | ||
3247 | if (!path) { | ||
3248 | ret = -ENOMEM; | ||
3249 | goto out; | ||
3250 | } | ||
3251 | |||
3252 | trans = btrfs_join_transaction(root); | ||
3253 | if (IS_ERR(trans)) { | ||
3254 | btrfs_free_path(path); | ||
3255 | ret = PTR_ERR(trans); | ||
3256 | goto out; | ||
3257 | } | ||
3258 | |||
3259 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); | ||
3260 | |||
3261 | btrfs_free_path(path); | ||
3262 | nr = trans->blocks_used; | ||
3263 | btrfs_end_transaction(trans, root); | ||
3264 | btrfs_btree_balance_dirty(root, nr); | ||
3265 | out: | ||
3266 | iput(inode); | ||
3267 | return ret; | ||
3268 | } | ||
3269 | |||
3194 | /* | 3270 | /* |
3195 | * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY | 3271 | * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY |
3196 | * this function scans fs tree to find blocks reference the data extent | 3272 | * this function scans fs tree to find blocks reference the data extent |
@@ -3217,15 +3293,28 @@ static int find_data_references(struct reloc_control *rc, | |||
3217 | int counted; | 3293 | int counted; |
3218 | int ret; | 3294 | int ret; |
3219 | 3295 | ||
3220 | path = btrfs_alloc_path(); | ||
3221 | if (!path) | ||
3222 | return -ENOMEM; | ||
3223 | |||
3224 | ref_root = btrfs_extent_data_ref_root(leaf, ref); | 3296 | ref_root = btrfs_extent_data_ref_root(leaf, ref); |
3225 | ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); | 3297 | ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); |
3226 | ref_offset = btrfs_extent_data_ref_offset(leaf, ref); | 3298 | ref_offset = btrfs_extent_data_ref_offset(leaf, ref); |
3227 | ref_count = btrfs_extent_data_ref_count(leaf, ref); | 3299 | ref_count = btrfs_extent_data_ref_count(leaf, ref); |
3228 | 3300 | ||
3301 | /* | ||
3302 | * This is an extent belonging to the free space cache, lets just delete | ||
3303 | * it and redo the search. | ||
3304 | */ | ||
3305 | if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { | ||
3306 | ret = delete_block_group_cache(rc->extent_root->fs_info, | ||
3307 | NULL, ref_objectid); | ||
3308 | if (ret != -ENOENT) | ||
3309 | return ret; | ||
3310 | ret = 0; | ||
3311 | } | ||
3312 | |||
3313 | path = btrfs_alloc_path(); | ||
3314 | if (!path) | ||
3315 | return -ENOMEM; | ||
3316 | path->reada = 1; | ||
3317 | |||
3229 | root = read_fs_root(rc->extent_root->fs_info, ref_root); | 3318 | root = read_fs_root(rc->extent_root->fs_info, ref_root); |
3230 | if (IS_ERR(root)) { | 3319 | if (IS_ERR(root)) { |
3231 | err = PTR_ERR(root); | 3320 | err = PTR_ERR(root); |
@@ -3433,7 +3522,7 @@ int add_data_references(struct reloc_control *rc, | |||
3433 | } | 3522 | } |
3434 | path->slots[0]++; | 3523 | path->slots[0]++; |
3435 | } | 3524 | } |
3436 | btrfs_release_path(rc->extent_root, path); | 3525 | btrfs_release_path(path); |
3437 | if (err) | 3526 | if (err) |
3438 | free_block_list(blocks); | 3527 | free_block_list(blocks); |
3439 | return err; | 3528 | return err; |
@@ -3496,7 +3585,7 @@ next: | |||
3496 | EXTENT_DIRTY); | 3585 | EXTENT_DIRTY); |
3497 | 3586 | ||
3498 | if (ret == 0 && start <= key.objectid) { | 3587 | if (ret == 0 && start <= key.objectid) { |
3499 | btrfs_release_path(rc->extent_root, path); | 3588 | btrfs_release_path(path); |
3500 | rc->search_start = end + 1; | 3589 | rc->search_start = end + 1; |
3501 | } else { | 3590 | } else { |
3502 | rc->search_start = key.objectid + key.offset; | 3591 | rc->search_start = key.objectid + key.offset; |
@@ -3504,24 +3593,26 @@ next: | |||
3504 | return 0; | 3593 | return 0; |
3505 | } | 3594 | } |
3506 | } | 3595 | } |
3507 | btrfs_release_path(rc->extent_root, path); | 3596 | btrfs_release_path(path); |
3508 | return ret; | 3597 | return ret; |
3509 | } | 3598 | } |
3510 | 3599 | ||
3511 | static void set_reloc_control(struct reloc_control *rc) | 3600 | static void set_reloc_control(struct reloc_control *rc) |
3512 | { | 3601 | { |
3513 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3602 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
3514 | mutex_lock(&fs_info->trans_mutex); | 3603 | |
3604 | mutex_lock(&fs_info->reloc_mutex); | ||
3515 | fs_info->reloc_ctl = rc; | 3605 | fs_info->reloc_ctl = rc; |
3516 | mutex_unlock(&fs_info->trans_mutex); | 3606 | mutex_unlock(&fs_info->reloc_mutex); |
3517 | } | 3607 | } |
3518 | 3608 | ||
3519 | static void unset_reloc_control(struct reloc_control *rc) | 3609 | static void unset_reloc_control(struct reloc_control *rc) |
3520 | { | 3610 | { |
3521 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3611 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
3522 | mutex_lock(&fs_info->trans_mutex); | 3612 | |
3613 | mutex_lock(&fs_info->reloc_mutex); | ||
3523 | fs_info->reloc_ctl = NULL; | 3614 | fs_info->reloc_ctl = NULL; |
3524 | mutex_unlock(&fs_info->trans_mutex); | 3615 | mutex_unlock(&fs_info->reloc_mutex); |
3525 | } | 3616 | } |
3526 | 3617 | ||
3527 | static int check_extent_flags(u64 flags) | 3618 | static int check_extent_flags(u64 flags) |
@@ -3554,8 +3645,7 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3554 | * is no reservation in transaction handle. | 3645 | * is no reservation in transaction handle. |
3555 | */ | 3646 | */ |
3556 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | 3647 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, |
3557 | rc->extent_root->nodesize * 256, | 3648 | rc->extent_root->nodesize * 256); |
3558 | &rc->block_rsv_retries); | ||
3559 | if (ret) | 3649 | if (ret) |
3560 | return ret; | 3650 | return ret; |
3561 | 3651 | ||
@@ -3567,12 +3657,12 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3567 | rc->extents_found = 0; | 3657 | rc->extents_found = 0; |
3568 | rc->nodes_relocated = 0; | 3658 | rc->nodes_relocated = 0; |
3569 | rc->merging_rsv_size = 0; | 3659 | rc->merging_rsv_size = 0; |
3570 | rc->block_rsv_retries = 0; | ||
3571 | 3660 | ||
3572 | rc->create_reloc_tree = 1; | 3661 | rc->create_reloc_tree = 1; |
3573 | set_reloc_control(rc); | 3662 | set_reloc_control(rc); |
3574 | 3663 | ||
3575 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3664 | trans = btrfs_join_transaction(rc->extent_root); |
3665 | BUG_ON(IS_ERR(trans)); | ||
3576 | btrfs_commit_transaction(trans, rc->extent_root); | 3666 | btrfs_commit_transaction(trans, rc->extent_root); |
3577 | return 0; | 3667 | return 0; |
3578 | } | 3668 | } |
@@ -3589,10 +3679,12 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3589 | u32 item_size; | 3679 | u32 item_size; |
3590 | int ret; | 3680 | int ret; |
3591 | int err = 0; | 3681 | int err = 0; |
3682 | int progress = 0; | ||
3592 | 3683 | ||
3593 | path = btrfs_alloc_path(); | 3684 | path = btrfs_alloc_path(); |
3594 | if (!path) | 3685 | if (!path) |
3595 | return -ENOMEM; | 3686 | return -ENOMEM; |
3687 | path->reada = 1; | ||
3596 | 3688 | ||
3597 | ret = prepare_to_relocate(rc); | 3689 | ret = prepare_to_relocate(rc); |
3598 | if (ret) { | 3690 | if (ret) { |
@@ -3601,8 +3693,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3601 | } | 3693 | } |
3602 | 3694 | ||
3603 | while (1) { | 3695 | while (1) { |
3696 | progress++; | ||
3604 | trans = btrfs_start_transaction(rc->extent_root, 0); | 3697 | trans = btrfs_start_transaction(rc->extent_root, 0); |
3605 | 3698 | BUG_ON(IS_ERR(trans)); | |
3699 | restart: | ||
3606 | if (update_backref_cache(trans, &rc->backref_cache)) { | 3700 | if (update_backref_cache(trans, &rc->backref_cache)) { |
3607 | btrfs_end_transaction(trans, rc->extent_root); | 3701 | btrfs_end_transaction(trans, rc->extent_root); |
3608 | continue; | 3702 | continue; |
@@ -3639,7 +3733,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3639 | flags = BTRFS_EXTENT_FLAG_DATA; | 3733 | flags = BTRFS_EXTENT_FLAG_DATA; |
3640 | 3734 | ||
3641 | if (path_change) { | 3735 | if (path_change) { |
3642 | btrfs_release_path(rc->extent_root, path); | 3736 | btrfs_release_path(path); |
3643 | 3737 | ||
3644 | path->search_commit_root = 1; | 3738 | path->search_commit_root = 1; |
3645 | path->skip_locking = 1; | 3739 | path->skip_locking = 1; |
@@ -3662,7 +3756,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3662 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3756 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3663 | ret = add_data_references(rc, &key, path, &blocks); | 3757 | ret = add_data_references(rc, &key, path, &blocks); |
3664 | } else { | 3758 | } else { |
3665 | btrfs_release_path(rc->extent_root, path); | 3759 | btrfs_release_path(path); |
3666 | ret = 0; | 3760 | ret = 0; |
3667 | } | 3761 | } |
3668 | if (ret < 0) { | 3762 | if (ret < 0) { |
@@ -3715,8 +3809,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3715 | } | 3809 | } |
3716 | } | 3810 | } |
3717 | } | 3811 | } |
3812 | if (trans && progress && err == -ENOSPC) { | ||
3813 | ret = btrfs_force_chunk_alloc(trans, rc->extent_root, | ||
3814 | rc->block_group->flags); | ||
3815 | if (ret == 0) { | ||
3816 | err = 0; | ||
3817 | progress = 0; | ||
3818 | goto restart; | ||
3819 | } | ||
3820 | } | ||
3718 | 3821 | ||
3719 | btrfs_release_path(rc->extent_root, path); | 3822 | btrfs_release_path(path); |
3720 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | 3823 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, |
3721 | GFP_NOFS); | 3824 | GFP_NOFS); |
3722 | 3825 | ||
@@ -3748,8 +3851,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3748 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | 3851 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
3749 | 3852 | ||
3750 | /* get rid of pinned extents */ | 3853 | /* get rid of pinned extents */ |
3751 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3854 | trans = btrfs_join_transaction(rc->extent_root); |
3752 | btrfs_commit_transaction(trans, rc->extent_root); | 3855 | if (IS_ERR(trans)) |
3856 | err = PTR_ERR(trans); | ||
3857 | else | ||
3858 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3753 | out_free: | 3859 | out_free: |
3754 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | 3860 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); |
3755 | btrfs_free_path(path); | 3861 | btrfs_free_path(path); |
@@ -3781,7 +3887,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
3781 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | | 3887 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
3782 | BTRFS_INODE_PREALLOC); | 3888 | BTRFS_INODE_PREALLOC); |
3783 | btrfs_mark_buffer_dirty(leaf); | 3889 | btrfs_mark_buffer_dirty(leaf); |
3784 | btrfs_release_path(root, path); | 3890 | btrfs_release_path(path); |
3785 | out: | 3891 | out: |
3786 | btrfs_free_path(path); | 3892 | btrfs_free_path(path); |
3787 | return ret; | 3893 | return ret; |
@@ -3811,7 +3917,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3811 | if (IS_ERR(trans)) | 3917 | if (IS_ERR(trans)) |
3812 | return ERR_CAST(trans); | 3918 | return ERR_CAST(trans); |
3813 | 3919 | ||
3814 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 3920 | err = btrfs_find_free_objectid(root, &objectid); |
3815 | if (err) | 3921 | if (err) |
3816 | goto out; | 3922 | goto out; |
3817 | 3923 | ||
@@ -3849,7 +3955,7 @@ static struct reloc_control *alloc_reloc_control(void) | |||
3849 | INIT_LIST_HEAD(&rc->reloc_roots); | 3955 | INIT_LIST_HEAD(&rc->reloc_roots); |
3850 | backref_cache_init(&rc->backref_cache); | 3956 | backref_cache_init(&rc->backref_cache); |
3851 | mapping_tree_init(&rc->reloc_root_tree); | 3957 | mapping_tree_init(&rc->reloc_root_tree); |
3852 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | 3958 | extent_io_tree_init(&rc->processed_blocks, NULL); |
3853 | return rc; | 3959 | return rc; |
3854 | } | 3960 | } |
3855 | 3961 | ||
@@ -3860,6 +3966,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3860 | { | 3966 | { |
3861 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3967 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3862 | struct reloc_control *rc; | 3968 | struct reloc_control *rc; |
3969 | struct inode *inode; | ||
3970 | struct btrfs_path *path; | ||
3863 | int ret; | 3971 | int ret; |
3864 | int rw = 0; | 3972 | int rw = 0; |
3865 | int err = 0; | 3973 | int err = 0; |
@@ -3882,6 +3990,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3882 | rw = 1; | 3990 | rw = 1; |
3883 | } | 3991 | } |
3884 | 3992 | ||
3993 | path = btrfs_alloc_path(); | ||
3994 | if (!path) { | ||
3995 | err = -ENOMEM; | ||
3996 | goto out; | ||
3997 | } | ||
3998 | |||
3999 | inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, | ||
4000 | path); | ||
4001 | btrfs_free_path(path); | ||
4002 | |||
4003 | if (!IS_ERR(inode)) | ||
4004 | ret = delete_block_group_cache(fs_info, inode, 0); | ||
4005 | else | ||
4006 | ret = PTR_ERR(inode); | ||
4007 | |||
4008 | if (ret && ret != -ENOENT) { | ||
4009 | err = ret; | ||
4010 | goto out; | ||
4011 | } | ||
4012 | |||
3885 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); | 4013 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); |
3886 | if (IS_ERR(rc->data_inode)) { | 4014 | if (IS_ERR(rc->data_inode)) { |
3887 | err = PTR_ERR(rc->data_inode); | 4015 | err = PTR_ERR(rc->data_inode); |
@@ -3945,6 +4073,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
3945 | int ret; | 4073 | int ret; |
3946 | 4074 | ||
3947 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); | 4075 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
4076 | BUG_ON(IS_ERR(trans)); | ||
3948 | 4077 | ||
3949 | memset(&root->root_item.drop_progress, 0, | 4078 | memset(&root->root_item.drop_progress, 0, |
3950 | sizeof(root->root_item.drop_progress)); | 4079 | sizeof(root->root_item.drop_progress)); |
@@ -3981,6 +4110,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3981 | path = btrfs_alloc_path(); | 4110 | path = btrfs_alloc_path(); |
3982 | if (!path) | 4111 | if (!path) |
3983 | return -ENOMEM; | 4112 | return -ENOMEM; |
4113 | path->reada = -1; | ||
3984 | 4114 | ||
3985 | key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 4115 | key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
3986 | key.type = BTRFS_ROOT_ITEM_KEY; | 4116 | key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -4000,7 +4130,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4000 | } | 4130 | } |
4001 | leaf = path->nodes[0]; | 4131 | leaf = path->nodes[0]; |
4002 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 4132 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
4003 | btrfs_release_path(root->fs_info->tree_root, path); | 4133 | btrfs_release_path(path); |
4004 | 4134 | ||
4005 | if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || | 4135 | if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || |
4006 | key.type != BTRFS_ROOT_ITEM_KEY) | 4136 | key.type != BTRFS_ROOT_ITEM_KEY) |
@@ -4032,7 +4162,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4032 | 4162 | ||
4033 | key.offset--; | 4163 | key.offset--; |
4034 | } | 4164 | } |
4035 | btrfs_release_path(root->fs_info->tree_root, path); | 4165 | btrfs_release_path(path); |
4036 | 4166 | ||
4037 | if (list_empty(&reloc_roots)) | 4167 | if (list_empty(&reloc_roots)) |
4038 | goto out; | 4168 | goto out; |
@@ -4047,7 +4177,12 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4047 | 4177 | ||
4048 | set_reloc_control(rc); | 4178 | set_reloc_control(rc); |
4049 | 4179 | ||
4050 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4180 | trans = btrfs_join_transaction(rc->extent_root); |
4181 | if (IS_ERR(trans)) { | ||
4182 | unset_reloc_control(rc); | ||
4183 | err = PTR_ERR(trans); | ||
4184 | goto out_free; | ||
4185 | } | ||
4051 | 4186 | ||
4052 | rc->merge_reloc_tree = 1; | 4187 | rc->merge_reloc_tree = 1; |
4053 | 4188 | ||
@@ -4076,10 +4211,14 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4076 | 4211 | ||
4077 | unset_reloc_control(rc); | 4212 | unset_reloc_control(rc); |
4078 | 4213 | ||
4079 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4214 | trans = btrfs_join_transaction(rc->extent_root); |
4080 | btrfs_commit_transaction(trans, rc->extent_root); | 4215 | if (IS_ERR(trans)) |
4081 | out: | 4216 | err = PTR_ERR(trans); |
4217 | else | ||
4218 | btrfs_commit_transaction(trans, rc->extent_root); | ||
4219 | out_free: | ||
4082 | kfree(rc); | 4220 | kfree(rc); |
4221 | out: | ||
4083 | while (!list_empty(&reloc_roots)) { | 4222 | while (!list_empty(&reloc_roots)) { |
4084 | reloc_root = list_entry(reloc_roots.next, | 4223 | reloc_root = list_entry(reloc_roots.next, |
4085 | struct btrfs_root, root_list); | 4224 | struct btrfs_root, root_list); |
@@ -4097,7 +4236,7 @@ out: | |||
4097 | if (IS_ERR(fs_root)) | 4236 | if (IS_ERR(fs_root)) |
4098 | err = PTR_ERR(fs_root); | 4237 | err = PTR_ERR(fs_root); |
4099 | else | 4238 | else |
4100 | btrfs_orphan_cleanup(fs_root); | 4239 | err = btrfs_orphan_cleanup(fs_root); |
4101 | } | 4240 | } |
4102 | return err; | 4241 | return err; |
4103 | } | 4242 | } |
@@ -4124,7 +4263,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
4124 | 4263 | ||
4125 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; | 4264 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; |
4126 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, | 4265 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, |
4127 | disk_bytenr + len - 1, &list); | 4266 | disk_bytenr + len - 1, &list, 0); |
4128 | 4267 | ||
4129 | while (!list_empty(&list)) { | 4268 | while (!list_empty(&list)) { |
4130 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | 4269 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); |
@@ -4143,7 +4282,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
4143 | btrfs_add_ordered_sum(inode, ordered, sums); | 4282 | btrfs_add_ordered_sum(inode, ordered, sums); |
4144 | } | 4283 | } |
4145 | btrfs_put_ordered_extent(ordered); | 4284 | btrfs_put_ordered_extent(ordered); |
4146 | return 0; | 4285 | return ret; |
4147 | } | 4286 | } |
4148 | 4287 | ||
4149 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | 4288 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 2d958be761c8..ebe45443de06 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -22,53 +22,6 @@ | |||
22 | #include "print-tree.h" | 22 | #include "print-tree.h" |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * search forward for a root, starting with objectid 'search_start' | ||
26 | * if a root key is found, the objectid we find is filled into 'found_objectid' | ||
27 | * and 0 is returned. < 0 is returned on error, 1 if there is nothing | ||
28 | * left in the tree. | ||
29 | */ | ||
30 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | ||
31 | u64 *found_objectid) | ||
32 | { | ||
33 | struct btrfs_path *path; | ||
34 | struct btrfs_key search_key; | ||
35 | int ret; | ||
36 | |||
37 | root = root->fs_info->tree_root; | ||
38 | search_key.objectid = search_start; | ||
39 | search_key.type = (u8)-1; | ||
40 | search_key.offset = (u64)-1; | ||
41 | |||
42 | path = btrfs_alloc_path(); | ||
43 | BUG_ON(!path); | ||
44 | again: | ||
45 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | ||
46 | if (ret < 0) | ||
47 | goto out; | ||
48 | if (ret == 0) { | ||
49 | ret = 1; | ||
50 | goto out; | ||
51 | } | ||
52 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { | ||
53 | ret = btrfs_next_leaf(root, path); | ||
54 | if (ret) | ||
55 | goto out; | ||
56 | } | ||
57 | btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]); | ||
58 | if (search_key.type != BTRFS_ROOT_ITEM_KEY) { | ||
59 | search_key.offset++; | ||
60 | btrfs_release_path(root, path); | ||
61 | goto again; | ||
62 | } | ||
63 | ret = 0; | ||
64 | *found_objectid = search_key.objectid; | ||
65 | |||
66 | out: | ||
67 | btrfs_free_path(path); | ||
68 | return ret; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * lookup the root with the highest offset for a given objectid. The key we do | 25 | * lookup the root with the highest offset for a given objectid. The key we do |
73 | * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 | 26 | * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 |
74 | * on error. | 27 | * on error. |
@@ -88,7 +41,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
88 | search_key.offset = (u64)-1; | 41 | search_key.offset = (u64)-1; |
89 | 42 | ||
90 | path = btrfs_alloc_path(); | 43 | path = btrfs_alloc_path(); |
91 | BUG_ON(!path); | 44 | if (!path) |
45 | return -ENOMEM; | ||
92 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | 46 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
93 | if (ret < 0) | 47 | if (ret < 0) |
94 | goto out; | 48 | goto out; |
@@ -181,7 +135,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
181 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) | 135 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) |
182 | { | 136 | { |
183 | struct btrfs_root *dead_root; | 137 | struct btrfs_root *dead_root; |
184 | struct btrfs_item *item; | ||
185 | struct btrfs_root_item *ri; | 138 | struct btrfs_root_item *ri; |
186 | struct btrfs_key key; | 139 | struct btrfs_key key; |
187 | struct btrfs_key found_key; | 140 | struct btrfs_key found_key; |
@@ -214,7 +167,6 @@ again: | |||
214 | nritems = btrfs_header_nritems(leaf); | 167 | nritems = btrfs_header_nritems(leaf); |
215 | slot = path->slots[0]; | 168 | slot = path->slots[0]; |
216 | } | 169 | } |
217 | item = btrfs_item_nr(leaf, slot); | ||
218 | btrfs_item_key_to_cpu(leaf, &key, slot); | 170 | btrfs_item_key_to_cpu(leaf, &key, slot); |
219 | if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) | 171 | if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) |
220 | goto next; | 172 | goto next; |
@@ -231,7 +183,7 @@ again: | |||
231 | 183 | ||
232 | memcpy(&found_key, &key, sizeof(key)); | 184 | memcpy(&found_key, &key, sizeof(key)); |
233 | key.offset++; | 185 | key.offset++; |
234 | btrfs_release_path(root, path); | 186 | btrfs_release_path(path); |
235 | dead_root = | 187 | dead_root = |
236 | btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | 188 | btrfs_read_fs_root_no_radix(root->fs_info->tree_root, |
237 | &found_key); | 189 | &found_key); |
@@ -293,7 +245,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
293 | } | 245 | } |
294 | 246 | ||
295 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 247 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
296 | btrfs_release_path(tree_root, path); | 248 | btrfs_release_path(path); |
297 | 249 | ||
298 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || | 250 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || |
299 | key.type != BTRFS_ORPHAN_ITEM_KEY) | 251 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
@@ -334,7 +286,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
334 | struct extent_buffer *leaf; | 286 | struct extent_buffer *leaf; |
335 | 287 | ||
336 | path = btrfs_alloc_path(); | 288 | path = btrfs_alloc_path(); |
337 | BUG_ON(!path); | 289 | if (!path) |
290 | return -ENOMEM; | ||
338 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); | 291 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); |
339 | if (ret < 0) | 292 | if (ret < 0) |
340 | goto out; | 293 | goto out; |
@@ -385,18 +338,22 @@ again: | |||
385 | *sequence = btrfs_root_ref_sequence(leaf, ref); | 338 | *sequence = btrfs_root_ref_sequence(leaf, ref); |
386 | 339 | ||
387 | ret = btrfs_del_item(trans, tree_root, path); | 340 | ret = btrfs_del_item(trans, tree_root, path); |
388 | BUG_ON(ret); | 341 | if (ret) { |
342 | err = ret; | ||
343 | goto out; | ||
344 | } | ||
389 | } else | 345 | } else |
390 | err = -ENOENT; | 346 | err = -ENOENT; |
391 | 347 | ||
392 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | 348 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { |
393 | btrfs_release_path(tree_root, path); | 349 | btrfs_release_path(path); |
394 | key.objectid = ref_id; | 350 | key.objectid = ref_id; |
395 | key.type = BTRFS_ROOT_REF_KEY; | 351 | key.type = BTRFS_ROOT_REF_KEY; |
396 | key.offset = root_id; | 352 | key.offset = root_id; |
397 | goto again; | 353 | goto again; |
398 | } | 354 | } |
399 | 355 | ||
356 | out: | ||
400 | btrfs_free_path(path); | 357 | btrfs_free_path(path); |
401 | return err; | 358 | return err; |
402 | } | 359 | } |
@@ -463,7 +420,7 @@ again: | |||
463 | btrfs_mark_buffer_dirty(leaf); | 420 | btrfs_mark_buffer_dirty(leaf); |
464 | 421 | ||
465 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | 422 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { |
466 | btrfs_release_path(tree_root, path); | 423 | btrfs_release_path(path); |
467 | key.objectid = ref_id; | 424 | key.objectid = ref_id; |
468 | key.type = BTRFS_ROOT_REF_KEY; | 425 | key.type = BTRFS_ROOT_REF_KEY; |
469 | key.offset = root_id; | 426 | key.offset = root_id; |
@@ -473,3 +430,21 @@ again: | |||
473 | btrfs_free_path(path); | 430 | btrfs_free_path(path); |
474 | return 0; | 431 | return 0; |
475 | } | 432 | } |
433 | |||
434 | /* | ||
435 | * Old btrfs forgets to init root_item->flags and root_item->byte_limit | ||
436 | * for subvolumes. To work around this problem, we steal a bit from | ||
437 | * root_item->inode_item->flags, and use it to indicate if those fields | ||
438 | * have been properly initialized. | ||
439 | */ | ||
440 | void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) | ||
441 | { | ||
442 | u64 inode_flags = le64_to_cpu(root_item->inode.flags); | ||
443 | |||
444 | if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { | ||
445 | inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; | ||
446 | root_item->inode.flags = cpu_to_le64(inode_flags); | ||
447 | root_item->flags = 0; | ||
448 | root_item->byte_limit = 0; | ||
449 | } | ||
450 | } | ||
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c new file mode 100644 index 000000000000..a8d03d5efb5d --- /dev/null +++ b/fs/btrfs/scrub.c | |||
@@ -0,0 +1,1395 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 STRATO. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/blkdev.h> | ||
20 | #include "ctree.h" | ||
21 | #include "volumes.h" | ||
22 | #include "disk-io.h" | ||
23 | #include "ordered-data.h" | ||
24 | |||
25 | /* | ||
26 | * This is only the first step towards a full-features scrub. It reads all | ||
27 | * extent and super block and verifies the checksums. In case a bad checksum | ||
28 | * is found or the extent cannot be read, good data will be written back if | ||
29 | * any can be found. | ||
30 | * | ||
31 | * Future enhancements: | ||
32 | * - To enhance the performance, better read-ahead strategies for the | ||
33 | * extent-tree can be employed. | ||
34 | * - In case an unrepairable extent is encountered, track which files are | ||
35 | * affected and report them | ||
36 | * - In case of a read error on files with nodatasum, map the file and read | ||
37 | * the extent to trigger a writeback of the good copy | ||
38 | * - track and record media errors, throw out bad devices | ||
39 | * - add a mode to also read unallocated space | ||
40 | * - make the prefetch cancellable | ||
41 | */ | ||
42 | |||
43 | struct scrub_bio; | ||
44 | struct scrub_page; | ||
45 | struct scrub_dev; | ||
46 | static void scrub_bio_end_io(struct bio *bio, int err); | ||
47 | static void scrub_checksum(struct btrfs_work *work); | ||
48 | static int scrub_checksum_data(struct scrub_dev *sdev, | ||
49 | struct scrub_page *spag, void *buffer); | ||
50 | static int scrub_checksum_tree_block(struct scrub_dev *sdev, | ||
51 | struct scrub_page *spag, u64 logical, | ||
52 | void *buffer); | ||
53 | static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer); | ||
54 | static int scrub_fixup_check(struct scrub_bio *sbio, int ix); | ||
55 | static void scrub_fixup_end_io(struct bio *bio, int err); | ||
56 | static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | ||
57 | struct page *page); | ||
58 | static void scrub_fixup(struct scrub_bio *sbio, int ix); | ||
59 | |||
60 | #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ | ||
61 | #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ | ||
62 | |||
63 | struct scrub_page { | ||
64 | u64 flags; /* extent flags */ | ||
65 | u64 generation; | ||
66 | u64 mirror_num; | ||
67 | int have_csum; | ||
68 | u8 csum[BTRFS_CSUM_SIZE]; | ||
69 | }; | ||
70 | |||
71 | struct scrub_bio { | ||
72 | int index; | ||
73 | struct scrub_dev *sdev; | ||
74 | struct bio *bio; | ||
75 | int err; | ||
76 | u64 logical; | ||
77 | u64 physical; | ||
78 | struct scrub_page spag[SCRUB_PAGES_PER_BIO]; | ||
79 | u64 count; | ||
80 | int next_free; | ||
81 | struct btrfs_work work; | ||
82 | }; | ||
83 | |||
84 | struct scrub_dev { | ||
85 | struct scrub_bio *bios[SCRUB_BIOS_PER_DEV]; | ||
86 | struct btrfs_device *dev; | ||
87 | int first_free; | ||
88 | int curr; | ||
89 | atomic_t in_flight; | ||
90 | spinlock_t list_lock; | ||
91 | wait_queue_head_t list_wait; | ||
92 | u16 csum_size; | ||
93 | struct list_head csum_list; | ||
94 | atomic_t cancel_req; | ||
95 | int readonly; | ||
96 | /* | ||
97 | * statistics | ||
98 | */ | ||
99 | struct btrfs_scrub_progress stat; | ||
100 | spinlock_t stat_lock; | ||
101 | }; | ||
102 | |||
103 | static void scrub_free_csums(struct scrub_dev *sdev) | ||
104 | { | ||
105 | while (!list_empty(&sdev->csum_list)) { | ||
106 | struct btrfs_ordered_sum *sum; | ||
107 | sum = list_first_entry(&sdev->csum_list, | ||
108 | struct btrfs_ordered_sum, list); | ||
109 | list_del(&sum->list); | ||
110 | kfree(sum); | ||
111 | } | ||
112 | } | ||
113 | |||
114 | static void scrub_free_bio(struct bio *bio) | ||
115 | { | ||
116 | int i; | ||
117 | struct page *last_page = NULL; | ||
118 | |||
119 | if (!bio) | ||
120 | return; | ||
121 | |||
122 | for (i = 0; i < bio->bi_vcnt; ++i) { | ||
123 | if (bio->bi_io_vec[i].bv_page == last_page) | ||
124 | continue; | ||
125 | last_page = bio->bi_io_vec[i].bv_page; | ||
126 | __free_page(last_page); | ||
127 | } | ||
128 | bio_put(bio); | ||
129 | } | ||
130 | |||
131 | static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) | ||
132 | { | ||
133 | int i; | ||
134 | |||
135 | if (!sdev) | ||
136 | return; | ||
137 | |||
138 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | ||
139 | struct scrub_bio *sbio = sdev->bios[i]; | ||
140 | |||
141 | if (!sbio) | ||
142 | break; | ||
143 | |||
144 | scrub_free_bio(sbio->bio); | ||
145 | kfree(sbio); | ||
146 | } | ||
147 | |||
148 | scrub_free_csums(sdev); | ||
149 | kfree(sdev); | ||
150 | } | ||
151 | |||
152 | static noinline_for_stack | ||
153 | struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | ||
154 | { | ||
155 | struct scrub_dev *sdev; | ||
156 | int i; | ||
157 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | ||
158 | |||
159 | sdev = kzalloc(sizeof(*sdev), GFP_NOFS); | ||
160 | if (!sdev) | ||
161 | goto nomem; | ||
162 | sdev->dev = dev; | ||
163 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | ||
164 | struct scrub_bio *sbio; | ||
165 | |||
166 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); | ||
167 | if (!sbio) | ||
168 | goto nomem; | ||
169 | sdev->bios[i] = sbio; | ||
170 | |||
171 | sbio->index = i; | ||
172 | sbio->sdev = sdev; | ||
173 | sbio->count = 0; | ||
174 | sbio->work.func = scrub_checksum; | ||
175 | |||
176 | if (i != SCRUB_BIOS_PER_DEV-1) | ||
177 | sdev->bios[i]->next_free = i + 1; | ||
178 | else | ||
179 | sdev->bios[i]->next_free = -1; | ||
180 | } | ||
181 | sdev->first_free = 0; | ||
182 | sdev->curr = -1; | ||
183 | atomic_set(&sdev->in_flight, 0); | ||
184 | atomic_set(&sdev->cancel_req, 0); | ||
185 | sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy); | ||
186 | INIT_LIST_HEAD(&sdev->csum_list); | ||
187 | |||
188 | spin_lock_init(&sdev->list_lock); | ||
189 | spin_lock_init(&sdev->stat_lock); | ||
190 | init_waitqueue_head(&sdev->list_wait); | ||
191 | return sdev; | ||
192 | |||
193 | nomem: | ||
194 | scrub_free_dev(sdev); | ||
195 | return ERR_PTR(-ENOMEM); | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * scrub_recheck_error gets called when either verification of the page | ||
200 | * failed or the bio failed to read, e.g. with EIO. In the latter case, | ||
201 | * recheck_error gets called for every page in the bio, even though only | ||
202 | * one may be bad | ||
203 | */ | ||
204 | static void scrub_recheck_error(struct scrub_bio *sbio, int ix) | ||
205 | { | ||
206 | if (sbio->err) { | ||
207 | if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, | ||
208 | (sbio->physical + ix * PAGE_SIZE) >> 9, | ||
209 | sbio->bio->bi_io_vec[ix].bv_page) == 0) { | ||
210 | if (scrub_fixup_check(sbio, ix) == 0) | ||
211 | return; | ||
212 | } | ||
213 | } | ||
214 | |||
215 | scrub_fixup(sbio, ix); | ||
216 | } | ||
217 | |||
218 | static int scrub_fixup_check(struct scrub_bio *sbio, int ix) | ||
219 | { | ||
220 | int ret = 1; | ||
221 | struct page *page; | ||
222 | void *buffer; | ||
223 | u64 flags = sbio->spag[ix].flags; | ||
224 | |||
225 | page = sbio->bio->bi_io_vec[ix].bv_page; | ||
226 | buffer = kmap_atomic(page, KM_USER0); | ||
227 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
228 | ret = scrub_checksum_data(sbio->sdev, | ||
229 | sbio->spag + ix, buffer); | ||
230 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
231 | ret = scrub_checksum_tree_block(sbio->sdev, | ||
232 | sbio->spag + ix, | ||
233 | sbio->logical + ix * PAGE_SIZE, | ||
234 | buffer); | ||
235 | } else { | ||
236 | WARN_ON(1); | ||
237 | } | ||
238 | kunmap_atomic(buffer, KM_USER0); | ||
239 | |||
240 | return ret; | ||
241 | } | ||
242 | |||
243 | static void scrub_fixup_end_io(struct bio *bio, int err) | ||
244 | { | ||
245 | complete((struct completion *)bio->bi_private); | ||
246 | } | ||
247 | |||
248 | static void scrub_fixup(struct scrub_bio *sbio, int ix) | ||
249 | { | ||
250 | struct scrub_dev *sdev = sbio->sdev; | ||
251 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
252 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
253 | struct btrfs_multi_bio *multi = NULL; | ||
254 | u64 logical = sbio->logical + ix * PAGE_SIZE; | ||
255 | u64 length; | ||
256 | int i; | ||
257 | int ret; | ||
258 | DECLARE_COMPLETION_ONSTACK(complete); | ||
259 | |||
260 | if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && | ||
261 | (sbio->spag[ix].have_csum == 0)) { | ||
262 | /* | ||
263 | * nodatasum, don't try to fix anything | ||
264 | * FIXME: we can do better, open the inode and trigger a | ||
265 | * writeback | ||
266 | */ | ||
267 | goto uncorrectable; | ||
268 | } | ||
269 | |||
270 | length = PAGE_SIZE; | ||
271 | ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, | ||
272 | &multi, 0); | ||
273 | if (ret || !multi || length < PAGE_SIZE) { | ||
274 | printk(KERN_ERR | ||
275 | "scrub_fixup: btrfs_map_block failed us for %llu\n", | ||
276 | (unsigned long long)logical); | ||
277 | WARN_ON(1); | ||
278 | return; | ||
279 | } | ||
280 | |||
281 | if (multi->num_stripes == 1) | ||
282 | /* there aren't any replicas */ | ||
283 | goto uncorrectable; | ||
284 | |||
285 | /* | ||
286 | * first find a good copy | ||
287 | */ | ||
288 | for (i = 0; i < multi->num_stripes; ++i) { | ||
289 | if (i == sbio->spag[ix].mirror_num) | ||
290 | continue; | ||
291 | |||
292 | if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev, | ||
293 | multi->stripes[i].physical >> 9, | ||
294 | sbio->bio->bi_io_vec[ix].bv_page)) { | ||
295 | /* I/O-error, this is not a good copy */ | ||
296 | continue; | ||
297 | } | ||
298 | |||
299 | if (scrub_fixup_check(sbio, ix) == 0) | ||
300 | break; | ||
301 | } | ||
302 | if (i == multi->num_stripes) | ||
303 | goto uncorrectable; | ||
304 | |||
305 | if (!sdev->readonly) { | ||
306 | /* | ||
307 | * bi_io_vec[ix].bv_page now contains good data, write it back | ||
308 | */ | ||
309 | if (scrub_fixup_io(WRITE, sdev->dev->bdev, | ||
310 | (sbio->physical + ix * PAGE_SIZE) >> 9, | ||
311 | sbio->bio->bi_io_vec[ix].bv_page)) { | ||
312 | /* I/O-error, writeback failed, give up */ | ||
313 | goto uncorrectable; | ||
314 | } | ||
315 | } | ||
316 | |||
317 | kfree(multi); | ||
318 | spin_lock(&sdev->stat_lock); | ||
319 | ++sdev->stat.corrected_errors; | ||
320 | spin_unlock(&sdev->stat_lock); | ||
321 | |||
322 | if (printk_ratelimit()) | ||
323 | printk(KERN_ERR "btrfs: fixed up at %llu\n", | ||
324 | (unsigned long long)logical); | ||
325 | return; | ||
326 | |||
327 | uncorrectable: | ||
328 | kfree(multi); | ||
329 | spin_lock(&sdev->stat_lock); | ||
330 | ++sdev->stat.uncorrectable_errors; | ||
331 | spin_unlock(&sdev->stat_lock); | ||
332 | |||
333 | if (printk_ratelimit()) | ||
334 | printk(KERN_ERR "btrfs: unable to fixup at %llu\n", | ||
335 | (unsigned long long)logical); | ||
336 | } | ||
337 | |||
338 | static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | ||
339 | struct page *page) | ||
340 | { | ||
341 | struct bio *bio = NULL; | ||
342 | int ret; | ||
343 | DECLARE_COMPLETION_ONSTACK(complete); | ||
344 | |||
345 | bio = bio_alloc(GFP_NOFS, 1); | ||
346 | bio->bi_bdev = bdev; | ||
347 | bio->bi_sector = sector; | ||
348 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
349 | bio->bi_end_io = scrub_fixup_end_io; | ||
350 | bio->bi_private = &complete; | ||
351 | submit_bio(rw, bio); | ||
352 | |||
353 | /* this will also unplug the queue */ | ||
354 | wait_for_completion(&complete); | ||
355 | |||
356 | ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
357 | bio_put(bio); | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | static void scrub_bio_end_io(struct bio *bio, int err) | ||
362 | { | ||
363 | struct scrub_bio *sbio = bio->bi_private; | ||
364 | struct scrub_dev *sdev = sbio->sdev; | ||
365 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
366 | |||
367 | sbio->err = err; | ||
368 | sbio->bio = bio; | ||
369 | |||
370 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | ||
371 | } | ||
372 | |||
373 | static void scrub_checksum(struct btrfs_work *work) | ||
374 | { | ||
375 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); | ||
376 | struct scrub_dev *sdev = sbio->sdev; | ||
377 | struct page *page; | ||
378 | void *buffer; | ||
379 | int i; | ||
380 | u64 flags; | ||
381 | u64 logical; | ||
382 | int ret; | ||
383 | |||
384 | if (sbio->err) { | ||
385 | for (i = 0; i < sbio->count; ++i) | ||
386 | scrub_recheck_error(sbio, i); | ||
387 | |||
388 | sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
389 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | ||
390 | sbio->bio->bi_phys_segments = 0; | ||
391 | sbio->bio->bi_idx = 0; | ||
392 | |||
393 | for (i = 0; i < sbio->count; i++) { | ||
394 | struct bio_vec *bi; | ||
395 | bi = &sbio->bio->bi_io_vec[i]; | ||
396 | bi->bv_offset = 0; | ||
397 | bi->bv_len = PAGE_SIZE; | ||
398 | } | ||
399 | |||
400 | spin_lock(&sdev->stat_lock); | ||
401 | ++sdev->stat.read_errors; | ||
402 | spin_unlock(&sdev->stat_lock); | ||
403 | goto out; | ||
404 | } | ||
405 | for (i = 0; i < sbio->count; ++i) { | ||
406 | page = sbio->bio->bi_io_vec[i].bv_page; | ||
407 | buffer = kmap_atomic(page, KM_USER0); | ||
408 | flags = sbio->spag[i].flags; | ||
409 | logical = sbio->logical + i * PAGE_SIZE; | ||
410 | ret = 0; | ||
411 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
412 | ret = scrub_checksum_data(sdev, sbio->spag + i, buffer); | ||
413 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
414 | ret = scrub_checksum_tree_block(sdev, sbio->spag + i, | ||
415 | logical, buffer); | ||
416 | } else if (flags & BTRFS_EXTENT_FLAG_SUPER) { | ||
417 | BUG_ON(i); | ||
418 | (void)scrub_checksum_super(sbio, buffer); | ||
419 | } else { | ||
420 | WARN_ON(1); | ||
421 | } | ||
422 | kunmap_atomic(buffer, KM_USER0); | ||
423 | if (ret) | ||
424 | scrub_recheck_error(sbio, i); | ||
425 | } | ||
426 | |||
427 | out: | ||
428 | scrub_free_bio(sbio->bio); | ||
429 | sbio->bio = NULL; | ||
430 | spin_lock(&sdev->list_lock); | ||
431 | sbio->next_free = sdev->first_free; | ||
432 | sdev->first_free = sbio->index; | ||
433 | spin_unlock(&sdev->list_lock); | ||
434 | atomic_dec(&sdev->in_flight); | ||
435 | wake_up(&sdev->list_wait); | ||
436 | } | ||
437 | |||
438 | static int scrub_checksum_data(struct scrub_dev *sdev, | ||
439 | struct scrub_page *spag, void *buffer) | ||
440 | { | ||
441 | u8 csum[BTRFS_CSUM_SIZE]; | ||
442 | u32 crc = ~(u32)0; | ||
443 | int fail = 0; | ||
444 | struct btrfs_root *root = sdev->dev->dev_root; | ||
445 | |||
446 | if (!spag->have_csum) | ||
447 | return 0; | ||
448 | |||
449 | crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE); | ||
450 | btrfs_csum_final(crc, csum); | ||
451 | if (memcmp(csum, spag->csum, sdev->csum_size)) | ||
452 | fail = 1; | ||
453 | |||
454 | spin_lock(&sdev->stat_lock); | ||
455 | ++sdev->stat.data_extents_scrubbed; | ||
456 | sdev->stat.data_bytes_scrubbed += PAGE_SIZE; | ||
457 | if (fail) | ||
458 | ++sdev->stat.csum_errors; | ||
459 | spin_unlock(&sdev->stat_lock); | ||
460 | |||
461 | return fail; | ||
462 | } | ||
463 | |||
464 | static int scrub_checksum_tree_block(struct scrub_dev *sdev, | ||
465 | struct scrub_page *spag, u64 logical, | ||
466 | void *buffer) | ||
467 | { | ||
468 | struct btrfs_header *h; | ||
469 | struct btrfs_root *root = sdev->dev->dev_root; | ||
470 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
471 | u8 csum[BTRFS_CSUM_SIZE]; | ||
472 | u32 crc = ~(u32)0; | ||
473 | int fail = 0; | ||
474 | int crc_fail = 0; | ||
475 | |||
476 | /* | ||
477 | * we don't use the getter functions here, as we | ||
478 | * a) don't have an extent buffer and | ||
479 | * b) the page is already kmapped | ||
480 | */ | ||
481 | h = (struct btrfs_header *)buffer; | ||
482 | |||
483 | if (logical != le64_to_cpu(h->bytenr)) | ||
484 | ++fail; | ||
485 | |||
486 | if (spag->generation != le64_to_cpu(h->generation)) | ||
487 | ++fail; | ||
488 | |||
489 | if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | ||
490 | ++fail; | ||
491 | |||
492 | if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | ||
493 | BTRFS_UUID_SIZE)) | ||
494 | ++fail; | ||
495 | |||
496 | crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, | ||
497 | PAGE_SIZE - BTRFS_CSUM_SIZE); | ||
498 | btrfs_csum_final(crc, csum); | ||
499 | if (memcmp(csum, h->csum, sdev->csum_size)) | ||
500 | ++crc_fail; | ||
501 | |||
502 | spin_lock(&sdev->stat_lock); | ||
503 | ++sdev->stat.tree_extents_scrubbed; | ||
504 | sdev->stat.tree_bytes_scrubbed += PAGE_SIZE; | ||
505 | if (crc_fail) | ||
506 | ++sdev->stat.csum_errors; | ||
507 | if (fail) | ||
508 | ++sdev->stat.verify_errors; | ||
509 | spin_unlock(&sdev->stat_lock); | ||
510 | |||
511 | return fail || crc_fail; | ||
512 | } | ||
513 | |||
514 | static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) | ||
515 | { | ||
516 | struct btrfs_super_block *s; | ||
517 | u64 logical; | ||
518 | struct scrub_dev *sdev = sbio->sdev; | ||
519 | struct btrfs_root *root = sdev->dev->dev_root; | ||
520 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
521 | u8 csum[BTRFS_CSUM_SIZE]; | ||
522 | u32 crc = ~(u32)0; | ||
523 | int fail = 0; | ||
524 | |||
525 | s = (struct btrfs_super_block *)buffer; | ||
526 | logical = sbio->logical; | ||
527 | |||
528 | if (logical != le64_to_cpu(s->bytenr)) | ||
529 | ++fail; | ||
530 | |||
531 | if (sbio->spag[0].generation != le64_to_cpu(s->generation)) | ||
532 | ++fail; | ||
533 | |||
534 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | ||
535 | ++fail; | ||
536 | |||
537 | crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, | ||
538 | PAGE_SIZE - BTRFS_CSUM_SIZE); | ||
539 | btrfs_csum_final(crc, csum); | ||
540 | if (memcmp(csum, s->csum, sbio->sdev->csum_size)) | ||
541 | ++fail; | ||
542 | |||
543 | if (fail) { | ||
544 | /* | ||
545 | * if we find an error in a super block, we just report it. | ||
546 | * They will get written with the next transaction commit | ||
547 | * anyway | ||
548 | */ | ||
549 | spin_lock(&sdev->stat_lock); | ||
550 | ++sdev->stat.super_errors; | ||
551 | spin_unlock(&sdev->stat_lock); | ||
552 | } | ||
553 | |||
554 | return fail; | ||
555 | } | ||
556 | |||
557 | static int scrub_submit(struct scrub_dev *sdev) | ||
558 | { | ||
559 | struct scrub_bio *sbio; | ||
560 | struct bio *bio; | ||
561 | int i; | ||
562 | |||
563 | if (sdev->curr == -1) | ||
564 | return 0; | ||
565 | |||
566 | sbio = sdev->bios[sdev->curr]; | ||
567 | |||
568 | bio = bio_alloc(GFP_NOFS, sbio->count); | ||
569 | if (!bio) | ||
570 | goto nomem; | ||
571 | |||
572 | bio->bi_private = sbio; | ||
573 | bio->bi_end_io = scrub_bio_end_io; | ||
574 | bio->bi_bdev = sdev->dev->bdev; | ||
575 | bio->bi_sector = sbio->physical >> 9; | ||
576 | |||
577 | for (i = 0; i < sbio->count; ++i) { | ||
578 | struct page *page; | ||
579 | int ret; | ||
580 | |||
581 | page = alloc_page(GFP_NOFS); | ||
582 | if (!page) | ||
583 | goto nomem; | ||
584 | |||
585 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
586 | if (!ret) { | ||
587 | __free_page(page); | ||
588 | goto nomem; | ||
589 | } | ||
590 | } | ||
591 | |||
592 | sbio->err = 0; | ||
593 | sdev->curr = -1; | ||
594 | atomic_inc(&sdev->in_flight); | ||
595 | |||
596 | submit_bio(READ, bio); | ||
597 | |||
598 | return 0; | ||
599 | |||
600 | nomem: | ||
601 | scrub_free_bio(bio); | ||
602 | |||
603 | return -ENOMEM; | ||
604 | } | ||
605 | |||
606 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, | ||
607 | u64 physical, u64 flags, u64 gen, u64 mirror_num, | ||
608 | u8 *csum, int force) | ||
609 | { | ||
610 | struct scrub_bio *sbio; | ||
611 | |||
612 | again: | ||
613 | /* | ||
614 | * grab a fresh bio or wait for one to become available | ||
615 | */ | ||
616 | while (sdev->curr == -1) { | ||
617 | spin_lock(&sdev->list_lock); | ||
618 | sdev->curr = sdev->first_free; | ||
619 | if (sdev->curr != -1) { | ||
620 | sdev->first_free = sdev->bios[sdev->curr]->next_free; | ||
621 | sdev->bios[sdev->curr]->next_free = -1; | ||
622 | sdev->bios[sdev->curr]->count = 0; | ||
623 | spin_unlock(&sdev->list_lock); | ||
624 | } else { | ||
625 | spin_unlock(&sdev->list_lock); | ||
626 | wait_event(sdev->list_wait, sdev->first_free != -1); | ||
627 | } | ||
628 | } | ||
629 | sbio = sdev->bios[sdev->curr]; | ||
630 | if (sbio->count == 0) { | ||
631 | sbio->physical = physical; | ||
632 | sbio->logical = logical; | ||
633 | } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || | ||
634 | sbio->logical + sbio->count * PAGE_SIZE != logical) { | ||
635 | int ret; | ||
636 | |||
637 | ret = scrub_submit(sdev); | ||
638 | if (ret) | ||
639 | return ret; | ||
640 | goto again; | ||
641 | } | ||
642 | sbio->spag[sbio->count].flags = flags; | ||
643 | sbio->spag[sbio->count].generation = gen; | ||
644 | sbio->spag[sbio->count].have_csum = 0; | ||
645 | sbio->spag[sbio->count].mirror_num = mirror_num; | ||
646 | if (csum) { | ||
647 | sbio->spag[sbio->count].have_csum = 1; | ||
648 | memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); | ||
649 | } | ||
650 | ++sbio->count; | ||
651 | if (sbio->count == SCRUB_PAGES_PER_BIO || force) { | ||
652 | int ret; | ||
653 | |||
654 | ret = scrub_submit(sdev); | ||
655 | if (ret) | ||
656 | return ret; | ||
657 | } | ||
658 | |||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | ||
663 | u8 *csum) | ||
664 | { | ||
665 | struct btrfs_ordered_sum *sum = NULL; | ||
666 | int ret = 0; | ||
667 | unsigned long i; | ||
668 | unsigned long num_sectors; | ||
669 | u32 sectorsize = sdev->dev->dev_root->sectorsize; | ||
670 | |||
671 | while (!list_empty(&sdev->csum_list)) { | ||
672 | sum = list_first_entry(&sdev->csum_list, | ||
673 | struct btrfs_ordered_sum, list); | ||
674 | if (sum->bytenr > logical) | ||
675 | return 0; | ||
676 | if (sum->bytenr + sum->len > logical) | ||
677 | break; | ||
678 | |||
679 | ++sdev->stat.csum_discards; | ||
680 | list_del(&sum->list); | ||
681 | kfree(sum); | ||
682 | sum = NULL; | ||
683 | } | ||
684 | if (!sum) | ||
685 | return 0; | ||
686 | |||
687 | num_sectors = sum->len / sectorsize; | ||
688 | for (i = 0; i < num_sectors; ++i) { | ||
689 | if (sum->sums[i].bytenr == logical) { | ||
690 | memcpy(csum, &sum->sums[i].sum, sdev->csum_size); | ||
691 | ret = 1; | ||
692 | break; | ||
693 | } | ||
694 | } | ||
695 | if (ret && i == num_sectors - 1) { | ||
696 | list_del(&sum->list); | ||
697 | kfree(sum); | ||
698 | } | ||
699 | return ret; | ||
700 | } | ||
701 | |||
702 | /* scrub extent tries to collect up to 64 kB for each bio */ | ||
703 | static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, | ||
704 | u64 physical, u64 flags, u64 gen, u64 mirror_num) | ||
705 | { | ||
706 | int ret; | ||
707 | u8 csum[BTRFS_CSUM_SIZE]; | ||
708 | |||
709 | while (len) { | ||
710 | u64 l = min_t(u64, len, PAGE_SIZE); | ||
711 | int have_csum = 0; | ||
712 | |||
713 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
714 | /* push csums to sbio */ | ||
715 | have_csum = scrub_find_csum(sdev, logical, l, csum); | ||
716 | if (have_csum == 0) | ||
717 | ++sdev->stat.no_csum; | ||
718 | } | ||
719 | ret = scrub_page(sdev, logical, l, physical, flags, gen, | ||
720 | mirror_num, have_csum ? csum : NULL, 0); | ||
721 | if (ret) | ||
722 | return ret; | ||
723 | len -= l; | ||
724 | logical += l; | ||
725 | physical += l; | ||
726 | } | ||
727 | return 0; | ||
728 | } | ||
729 | |||
730 | static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | ||
731 | struct map_lookup *map, int num, u64 base, u64 length) | ||
732 | { | ||
733 | struct btrfs_path *path; | ||
734 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
735 | struct btrfs_root *root = fs_info->extent_root; | ||
736 | struct btrfs_root *csum_root = fs_info->csum_root; | ||
737 | struct btrfs_extent_item *extent; | ||
738 | struct blk_plug plug; | ||
739 | u64 flags; | ||
740 | int ret; | ||
741 | int slot; | ||
742 | int i; | ||
743 | u64 nstripes; | ||
744 | int start_stripe; | ||
745 | struct extent_buffer *l; | ||
746 | struct btrfs_key key; | ||
747 | u64 physical; | ||
748 | u64 logical; | ||
749 | u64 generation; | ||
750 | u64 mirror_num; | ||
751 | |||
752 | u64 increment = map->stripe_len; | ||
753 | u64 offset; | ||
754 | |||
755 | nstripes = length; | ||
756 | offset = 0; | ||
757 | do_div(nstripes, map->stripe_len); | ||
758 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
759 | offset = map->stripe_len * num; | ||
760 | increment = map->stripe_len * map->num_stripes; | ||
761 | mirror_num = 0; | ||
762 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
763 | int factor = map->num_stripes / map->sub_stripes; | ||
764 | offset = map->stripe_len * (num / map->sub_stripes); | ||
765 | increment = map->stripe_len * factor; | ||
766 | mirror_num = num % map->sub_stripes; | ||
767 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | ||
768 | increment = map->stripe_len; | ||
769 | mirror_num = num % map->num_stripes; | ||
770 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | ||
771 | increment = map->stripe_len; | ||
772 | mirror_num = num % map->num_stripes; | ||
773 | } else { | ||
774 | increment = map->stripe_len; | ||
775 | mirror_num = 0; | ||
776 | } | ||
777 | |||
778 | path = btrfs_alloc_path(); | ||
779 | if (!path) | ||
780 | return -ENOMEM; | ||
781 | |||
782 | path->reada = 2; | ||
783 | path->search_commit_root = 1; | ||
784 | path->skip_locking = 1; | ||
785 | |||
786 | /* | ||
787 | * find all extents for each stripe and just read them to get | ||
788 | * them into the page cache | ||
789 | * FIXME: we can do better. build a more intelligent prefetching | ||
790 | */ | ||
791 | logical = base + offset; | ||
792 | physical = map->stripes[num].physical; | ||
793 | ret = 0; | ||
794 | for (i = 0; i < nstripes; ++i) { | ||
795 | key.objectid = logical; | ||
796 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
797 | key.offset = (u64)0; | ||
798 | |||
799 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
800 | if (ret < 0) | ||
801 | goto out_noplug; | ||
802 | |||
803 | /* | ||
804 | * we might miss half an extent here, but that doesn't matter, | ||
805 | * as it's only the prefetch | ||
806 | */ | ||
807 | while (1) { | ||
808 | l = path->nodes[0]; | ||
809 | slot = path->slots[0]; | ||
810 | if (slot >= btrfs_header_nritems(l)) { | ||
811 | ret = btrfs_next_leaf(root, path); | ||
812 | if (ret == 0) | ||
813 | continue; | ||
814 | if (ret < 0) | ||
815 | goto out_noplug; | ||
816 | |||
817 | break; | ||
818 | } | ||
819 | btrfs_item_key_to_cpu(l, &key, slot); | ||
820 | |||
821 | if (key.objectid >= logical + map->stripe_len) | ||
822 | break; | ||
823 | |||
824 | path->slots[0]++; | ||
825 | } | ||
826 | btrfs_release_path(path); | ||
827 | logical += increment; | ||
828 | physical += map->stripe_len; | ||
829 | cond_resched(); | ||
830 | } | ||
831 | |||
832 | /* | ||
833 | * collect all data csums for the stripe to avoid seeking during | ||
834 | * the scrub. This might currently (crc32) end up to be about 1MB | ||
835 | */ | ||
836 | start_stripe = 0; | ||
837 | blk_start_plug(&plug); | ||
838 | again: | ||
839 | logical = base + offset + start_stripe * increment; | ||
840 | for (i = start_stripe; i < nstripes; ++i) { | ||
841 | ret = btrfs_lookup_csums_range(csum_root, logical, | ||
842 | logical + map->stripe_len - 1, | ||
843 | &sdev->csum_list, 1); | ||
844 | if (ret) | ||
845 | goto out; | ||
846 | |||
847 | logical += increment; | ||
848 | cond_resched(); | ||
849 | } | ||
850 | /* | ||
851 | * now find all extents for each stripe and scrub them | ||
852 | */ | ||
853 | logical = base + offset + start_stripe * increment; | ||
854 | physical = map->stripes[num].physical + start_stripe * map->stripe_len; | ||
855 | ret = 0; | ||
856 | for (i = start_stripe; i < nstripes; ++i) { | ||
857 | /* | ||
858 | * canceled? | ||
859 | */ | ||
860 | if (atomic_read(&fs_info->scrub_cancel_req) || | ||
861 | atomic_read(&sdev->cancel_req)) { | ||
862 | ret = -ECANCELED; | ||
863 | goto out; | ||
864 | } | ||
865 | /* | ||
866 | * check to see if we have to pause | ||
867 | */ | ||
868 | if (atomic_read(&fs_info->scrub_pause_req)) { | ||
869 | /* push queued extents */ | ||
870 | scrub_submit(sdev); | ||
871 | wait_event(sdev->list_wait, | ||
872 | atomic_read(&sdev->in_flight) == 0); | ||
873 | atomic_inc(&fs_info->scrubs_paused); | ||
874 | wake_up(&fs_info->scrub_pause_wait); | ||
875 | mutex_lock(&fs_info->scrub_lock); | ||
876 | while (atomic_read(&fs_info->scrub_pause_req)) { | ||
877 | mutex_unlock(&fs_info->scrub_lock); | ||
878 | wait_event(fs_info->scrub_pause_wait, | ||
879 | atomic_read(&fs_info->scrub_pause_req) == 0); | ||
880 | mutex_lock(&fs_info->scrub_lock); | ||
881 | } | ||
882 | atomic_dec(&fs_info->scrubs_paused); | ||
883 | mutex_unlock(&fs_info->scrub_lock); | ||
884 | wake_up(&fs_info->scrub_pause_wait); | ||
885 | scrub_free_csums(sdev); | ||
886 | start_stripe = i; | ||
887 | goto again; | ||
888 | } | ||
889 | |||
890 | key.objectid = logical; | ||
891 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
892 | key.offset = (u64)0; | ||
893 | |||
894 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
895 | if (ret < 0) | ||
896 | goto out; | ||
897 | if (ret > 0) { | ||
898 | ret = btrfs_previous_item(root, path, 0, | ||
899 | BTRFS_EXTENT_ITEM_KEY); | ||
900 | if (ret < 0) | ||
901 | goto out; | ||
902 | if (ret > 0) { | ||
903 | /* there's no smaller item, so stick with the | ||
904 | * larger one */ | ||
905 | btrfs_release_path(path); | ||
906 | ret = btrfs_search_slot(NULL, root, &key, | ||
907 | path, 0, 0); | ||
908 | if (ret < 0) | ||
909 | goto out; | ||
910 | } | ||
911 | } | ||
912 | |||
913 | while (1) { | ||
914 | l = path->nodes[0]; | ||
915 | slot = path->slots[0]; | ||
916 | if (slot >= btrfs_header_nritems(l)) { | ||
917 | ret = btrfs_next_leaf(root, path); | ||
918 | if (ret == 0) | ||
919 | continue; | ||
920 | if (ret < 0) | ||
921 | goto out; | ||
922 | |||
923 | break; | ||
924 | } | ||
925 | btrfs_item_key_to_cpu(l, &key, slot); | ||
926 | |||
927 | if (key.objectid + key.offset <= logical) | ||
928 | goto next; | ||
929 | |||
930 | if (key.objectid >= logical + map->stripe_len) | ||
931 | break; | ||
932 | |||
933 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) | ||
934 | goto next; | ||
935 | |||
936 | extent = btrfs_item_ptr(l, slot, | ||
937 | struct btrfs_extent_item); | ||
938 | flags = btrfs_extent_flags(l, extent); | ||
939 | generation = btrfs_extent_generation(l, extent); | ||
940 | |||
941 | if (key.objectid < logical && | ||
942 | (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { | ||
943 | printk(KERN_ERR | ||
944 | "btrfs scrub: tree block %llu spanning " | ||
945 | "stripes, ignored. logical=%llu\n", | ||
946 | (unsigned long long)key.objectid, | ||
947 | (unsigned long long)logical); | ||
948 | goto next; | ||
949 | } | ||
950 | |||
951 | /* | ||
952 | * trim extent to this stripe | ||
953 | */ | ||
954 | if (key.objectid < logical) { | ||
955 | key.offset -= logical - key.objectid; | ||
956 | key.objectid = logical; | ||
957 | } | ||
958 | if (key.objectid + key.offset > | ||
959 | logical + map->stripe_len) { | ||
960 | key.offset = logical + map->stripe_len - | ||
961 | key.objectid; | ||
962 | } | ||
963 | |||
964 | ret = scrub_extent(sdev, key.objectid, key.offset, | ||
965 | key.objectid - logical + physical, | ||
966 | flags, generation, mirror_num); | ||
967 | if (ret) | ||
968 | goto out; | ||
969 | |||
970 | next: | ||
971 | path->slots[0]++; | ||
972 | } | ||
973 | btrfs_release_path(path); | ||
974 | logical += increment; | ||
975 | physical += map->stripe_len; | ||
976 | spin_lock(&sdev->stat_lock); | ||
977 | sdev->stat.last_physical = physical; | ||
978 | spin_unlock(&sdev->stat_lock); | ||
979 | } | ||
980 | /* push queued extents */ | ||
981 | scrub_submit(sdev); | ||
982 | |||
983 | out: | ||
984 | blk_finish_plug(&plug); | ||
985 | out_noplug: | ||
986 | btrfs_free_path(path); | ||
987 | return ret < 0 ? ret : 0; | ||
988 | } | ||
989 | |||
990 | static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, | ||
991 | u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length) | ||
992 | { | ||
993 | struct btrfs_mapping_tree *map_tree = | ||
994 | &sdev->dev->dev_root->fs_info->mapping_tree; | ||
995 | struct map_lookup *map; | ||
996 | struct extent_map *em; | ||
997 | int i; | ||
998 | int ret = -EINVAL; | ||
999 | |||
1000 | read_lock(&map_tree->map_tree.lock); | ||
1001 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | ||
1002 | read_unlock(&map_tree->map_tree.lock); | ||
1003 | |||
1004 | if (!em) | ||
1005 | return -EINVAL; | ||
1006 | |||
1007 | map = (struct map_lookup *)em->bdev; | ||
1008 | if (em->start != chunk_offset) | ||
1009 | goto out; | ||
1010 | |||
1011 | if (em->len < length) | ||
1012 | goto out; | ||
1013 | |||
1014 | for (i = 0; i < map->num_stripes; ++i) { | ||
1015 | if (map->stripes[i].dev == sdev->dev) { | ||
1016 | ret = scrub_stripe(sdev, map, i, chunk_offset, length); | ||
1017 | if (ret) | ||
1018 | goto out; | ||
1019 | } | ||
1020 | } | ||
1021 | out: | ||
1022 | free_extent_map(em); | ||
1023 | |||
1024 | return ret; | ||
1025 | } | ||
1026 | |||
1027 | static noinline_for_stack | ||
1028 | int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | ||
1029 | { | ||
1030 | struct btrfs_dev_extent *dev_extent = NULL; | ||
1031 | struct btrfs_path *path; | ||
1032 | struct btrfs_root *root = sdev->dev->dev_root; | ||
1033 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1034 | u64 length; | ||
1035 | u64 chunk_tree; | ||
1036 | u64 chunk_objectid; | ||
1037 | u64 chunk_offset; | ||
1038 | int ret; | ||
1039 | int slot; | ||
1040 | struct extent_buffer *l; | ||
1041 | struct btrfs_key key; | ||
1042 | struct btrfs_key found_key; | ||
1043 | struct btrfs_block_group_cache *cache; | ||
1044 | |||
1045 | path = btrfs_alloc_path(); | ||
1046 | if (!path) | ||
1047 | return -ENOMEM; | ||
1048 | |||
1049 | path->reada = 2; | ||
1050 | path->search_commit_root = 1; | ||
1051 | path->skip_locking = 1; | ||
1052 | |||
1053 | key.objectid = sdev->dev->devid; | ||
1054 | key.offset = 0ull; | ||
1055 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
1056 | |||
1057 | |||
1058 | while (1) { | ||
1059 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
1060 | if (ret < 0) | ||
1061 | break; | ||
1062 | if (ret > 0) { | ||
1063 | if (path->slots[0] >= | ||
1064 | btrfs_header_nritems(path->nodes[0])) { | ||
1065 | ret = btrfs_next_leaf(root, path); | ||
1066 | if (ret) | ||
1067 | break; | ||
1068 | } | ||
1069 | } | ||
1070 | |||
1071 | l = path->nodes[0]; | ||
1072 | slot = path->slots[0]; | ||
1073 | |||
1074 | btrfs_item_key_to_cpu(l, &found_key, slot); | ||
1075 | |||
1076 | if (found_key.objectid != sdev->dev->devid) | ||
1077 | break; | ||
1078 | |||
1079 | if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) | ||
1080 | break; | ||
1081 | |||
1082 | if (found_key.offset >= end) | ||
1083 | break; | ||
1084 | |||
1085 | if (found_key.offset < key.offset) | ||
1086 | break; | ||
1087 | |||
1088 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
1089 | length = btrfs_dev_extent_length(l, dev_extent); | ||
1090 | |||
1091 | if (found_key.offset + length <= start) { | ||
1092 | key.offset = found_key.offset + length; | ||
1093 | btrfs_release_path(path); | ||
1094 | continue; | ||
1095 | } | ||
1096 | |||
1097 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | ||
1098 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | ||
1099 | chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); | ||
1100 | |||
1101 | /* | ||
1102 | * get a reference on the corresponding block group to prevent | ||
1103 | * the chunk from going away while we scrub it | ||
1104 | */ | ||
1105 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); | ||
1106 | if (!cache) { | ||
1107 | ret = -ENOENT; | ||
1108 | break; | ||
1109 | } | ||
1110 | ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, | ||
1111 | chunk_offset, length); | ||
1112 | btrfs_put_block_group(cache); | ||
1113 | if (ret) | ||
1114 | break; | ||
1115 | |||
1116 | key.offset = found_key.offset + length; | ||
1117 | btrfs_release_path(path); | ||
1118 | } | ||
1119 | |||
1120 | btrfs_free_path(path); | ||
1121 | |||
1122 | /* | ||
1123 | * ret can still be 1 from search_slot or next_leaf, | ||
1124 | * that's not an error | ||
1125 | */ | ||
1126 | return ret < 0 ? ret : 0; | ||
1127 | } | ||
1128 | |||
1129 | static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | ||
1130 | { | ||
1131 | int i; | ||
1132 | u64 bytenr; | ||
1133 | u64 gen; | ||
1134 | int ret; | ||
1135 | struct btrfs_device *device = sdev->dev; | ||
1136 | struct btrfs_root *root = device->dev_root; | ||
1137 | |||
1138 | gen = root->fs_info->last_trans_committed; | ||
1139 | |||
1140 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
1141 | bytenr = btrfs_sb_offset(i); | ||
1142 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | ||
1143 | break; | ||
1144 | |||
1145 | ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr, | ||
1146 | BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); | ||
1147 | if (ret) | ||
1148 | return ret; | ||
1149 | } | ||
1150 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | ||
1151 | |||
1152 | return 0; | ||
1153 | } | ||
1154 | |||
1155 | /* | ||
1156 | * get a reference count on fs_info->scrub_workers. start worker if necessary | ||
1157 | */ | ||
1158 | static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) | ||
1159 | { | ||
1160 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1161 | |||
1162 | mutex_lock(&fs_info->scrub_lock); | ||
1163 | if (fs_info->scrub_workers_refcnt == 0) { | ||
1164 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | ||
1165 | fs_info->thread_pool_size, &fs_info->generic_worker); | ||
1166 | fs_info->scrub_workers.idle_thresh = 4; | ||
1167 | btrfs_start_workers(&fs_info->scrub_workers, 1); | ||
1168 | } | ||
1169 | ++fs_info->scrub_workers_refcnt; | ||
1170 | mutex_unlock(&fs_info->scrub_lock); | ||
1171 | |||
1172 | return 0; | ||
1173 | } | ||
1174 | |||
1175 | static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) | ||
1176 | { | ||
1177 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1178 | |||
1179 | mutex_lock(&fs_info->scrub_lock); | ||
1180 | if (--fs_info->scrub_workers_refcnt == 0) | ||
1181 | btrfs_stop_workers(&fs_info->scrub_workers); | ||
1182 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | ||
1183 | mutex_unlock(&fs_info->scrub_lock); | ||
1184 | } | ||
1185 | |||
1186 | |||
1187 | int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | ||
1188 | struct btrfs_scrub_progress *progress, int readonly) | ||
1189 | { | ||
1190 | struct scrub_dev *sdev; | ||
1191 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1192 | int ret; | ||
1193 | struct btrfs_device *dev; | ||
1194 | |||
1195 | if (btrfs_fs_closing(root->fs_info)) | ||
1196 | return -EINVAL; | ||
1197 | |||
1198 | /* | ||
1199 | * check some assumptions | ||
1200 | */ | ||
1201 | if (root->sectorsize != PAGE_SIZE || | ||
1202 | root->sectorsize != root->leafsize || | ||
1203 | root->sectorsize != root->nodesize) { | ||
1204 | printk(KERN_ERR "btrfs_scrub: size assumptions fail\n"); | ||
1205 | return -EINVAL; | ||
1206 | } | ||
1207 | |||
1208 | ret = scrub_workers_get(root); | ||
1209 | if (ret) | ||
1210 | return ret; | ||
1211 | |||
1212 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1213 | dev = btrfs_find_device(root, devid, NULL, NULL); | ||
1214 | if (!dev || dev->missing) { | ||
1215 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1216 | scrub_workers_put(root); | ||
1217 | return -ENODEV; | ||
1218 | } | ||
1219 | mutex_lock(&fs_info->scrub_lock); | ||
1220 | |||
1221 | if (!dev->in_fs_metadata) { | ||
1222 | mutex_unlock(&fs_info->scrub_lock); | ||
1223 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1224 | scrub_workers_put(root); | ||
1225 | return -ENODEV; | ||
1226 | } | ||
1227 | |||
1228 | if (dev->scrub_device) { | ||
1229 | mutex_unlock(&fs_info->scrub_lock); | ||
1230 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1231 | scrub_workers_put(root); | ||
1232 | return -EINPROGRESS; | ||
1233 | } | ||
1234 | sdev = scrub_setup_dev(dev); | ||
1235 | if (IS_ERR(sdev)) { | ||
1236 | mutex_unlock(&fs_info->scrub_lock); | ||
1237 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1238 | scrub_workers_put(root); | ||
1239 | return PTR_ERR(sdev); | ||
1240 | } | ||
1241 | sdev->readonly = readonly; | ||
1242 | dev->scrub_device = sdev; | ||
1243 | |||
1244 | atomic_inc(&fs_info->scrubs_running); | ||
1245 | mutex_unlock(&fs_info->scrub_lock); | ||
1246 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1247 | |||
1248 | down_read(&fs_info->scrub_super_lock); | ||
1249 | ret = scrub_supers(sdev); | ||
1250 | up_read(&fs_info->scrub_super_lock); | ||
1251 | |||
1252 | if (!ret) | ||
1253 | ret = scrub_enumerate_chunks(sdev, start, end); | ||
1254 | |||
1255 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | ||
1256 | |||
1257 | atomic_dec(&fs_info->scrubs_running); | ||
1258 | wake_up(&fs_info->scrub_pause_wait); | ||
1259 | |||
1260 | if (progress) | ||
1261 | memcpy(progress, &sdev->stat, sizeof(*progress)); | ||
1262 | |||
1263 | mutex_lock(&fs_info->scrub_lock); | ||
1264 | dev->scrub_device = NULL; | ||
1265 | mutex_unlock(&fs_info->scrub_lock); | ||
1266 | |||
1267 | scrub_free_dev(sdev); | ||
1268 | scrub_workers_put(root); | ||
1269 | |||
1270 | return ret; | ||
1271 | } | ||
1272 | |||
1273 | int btrfs_scrub_pause(struct btrfs_root *root) | ||
1274 | { | ||
1275 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1276 | |||
1277 | mutex_lock(&fs_info->scrub_lock); | ||
1278 | atomic_inc(&fs_info->scrub_pause_req); | ||
1279 | while (atomic_read(&fs_info->scrubs_paused) != | ||
1280 | atomic_read(&fs_info->scrubs_running)) { | ||
1281 | mutex_unlock(&fs_info->scrub_lock); | ||
1282 | wait_event(fs_info->scrub_pause_wait, | ||
1283 | atomic_read(&fs_info->scrubs_paused) == | ||
1284 | atomic_read(&fs_info->scrubs_running)); | ||
1285 | mutex_lock(&fs_info->scrub_lock); | ||
1286 | } | ||
1287 | mutex_unlock(&fs_info->scrub_lock); | ||
1288 | |||
1289 | return 0; | ||
1290 | } | ||
1291 | |||
1292 | int btrfs_scrub_continue(struct btrfs_root *root) | ||
1293 | { | ||
1294 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1295 | |||
1296 | atomic_dec(&fs_info->scrub_pause_req); | ||
1297 | wake_up(&fs_info->scrub_pause_wait); | ||
1298 | return 0; | ||
1299 | } | ||
1300 | |||
1301 | int btrfs_scrub_pause_super(struct btrfs_root *root) | ||
1302 | { | ||
1303 | down_write(&root->fs_info->scrub_super_lock); | ||
1304 | return 0; | ||
1305 | } | ||
1306 | |||
1307 | int btrfs_scrub_continue_super(struct btrfs_root *root) | ||
1308 | { | ||
1309 | up_write(&root->fs_info->scrub_super_lock); | ||
1310 | return 0; | ||
1311 | } | ||
1312 | |||
1313 | int btrfs_scrub_cancel(struct btrfs_root *root) | ||
1314 | { | ||
1315 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1316 | |||
1317 | mutex_lock(&fs_info->scrub_lock); | ||
1318 | if (!atomic_read(&fs_info->scrubs_running)) { | ||
1319 | mutex_unlock(&fs_info->scrub_lock); | ||
1320 | return -ENOTCONN; | ||
1321 | } | ||
1322 | |||
1323 | atomic_inc(&fs_info->scrub_cancel_req); | ||
1324 | while (atomic_read(&fs_info->scrubs_running)) { | ||
1325 | mutex_unlock(&fs_info->scrub_lock); | ||
1326 | wait_event(fs_info->scrub_pause_wait, | ||
1327 | atomic_read(&fs_info->scrubs_running) == 0); | ||
1328 | mutex_lock(&fs_info->scrub_lock); | ||
1329 | } | ||
1330 | atomic_dec(&fs_info->scrub_cancel_req); | ||
1331 | mutex_unlock(&fs_info->scrub_lock); | ||
1332 | |||
1333 | return 0; | ||
1334 | } | ||
1335 | |||
1336 | int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) | ||
1337 | { | ||
1338 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1339 | struct scrub_dev *sdev; | ||
1340 | |||
1341 | mutex_lock(&fs_info->scrub_lock); | ||
1342 | sdev = dev->scrub_device; | ||
1343 | if (!sdev) { | ||
1344 | mutex_unlock(&fs_info->scrub_lock); | ||
1345 | return -ENOTCONN; | ||
1346 | } | ||
1347 | atomic_inc(&sdev->cancel_req); | ||
1348 | while (dev->scrub_device) { | ||
1349 | mutex_unlock(&fs_info->scrub_lock); | ||
1350 | wait_event(fs_info->scrub_pause_wait, | ||
1351 | dev->scrub_device == NULL); | ||
1352 | mutex_lock(&fs_info->scrub_lock); | ||
1353 | } | ||
1354 | mutex_unlock(&fs_info->scrub_lock); | ||
1355 | |||
1356 | return 0; | ||
1357 | } | ||
1358 | int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) | ||
1359 | { | ||
1360 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1361 | struct btrfs_device *dev; | ||
1362 | int ret; | ||
1363 | |||
1364 | /* | ||
1365 | * we have to hold the device_list_mutex here so the device | ||
1366 | * does not go away in cancel_dev. FIXME: find a better solution | ||
1367 | */ | ||
1368 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
1369 | dev = btrfs_find_device(root, devid, NULL, NULL); | ||
1370 | if (!dev) { | ||
1371 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1372 | return -ENODEV; | ||
1373 | } | ||
1374 | ret = btrfs_scrub_cancel_dev(root, dev); | ||
1375 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1376 | |||
1377 | return ret; | ||
1378 | } | ||
1379 | |||
1380 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | ||
1381 | struct btrfs_scrub_progress *progress) | ||
1382 | { | ||
1383 | struct btrfs_device *dev; | ||
1384 | struct scrub_dev *sdev = NULL; | ||
1385 | |||
1386 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1387 | dev = btrfs_find_device(root, devid, NULL, NULL); | ||
1388 | if (dev) | ||
1389 | sdev = dev->scrub_device; | ||
1390 | if (sdev) | ||
1391 | memcpy(progress, &sdev->stat, sizeof(*progress)); | ||
1392 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1393 | |||
1394 | return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV; | ||
1395 | } | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1776dbd8dc98..15634d4648d7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -39,7 +39,9 @@ | |||
39 | #include <linux/miscdevice.h> | 39 | #include <linux/miscdevice.h> |
40 | #include <linux/magic.h> | 40 | #include <linux/magic.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/cleancache.h> | ||
42 | #include "compat.h" | 43 | #include "compat.h" |
44 | #include "delayed-inode.h" | ||
43 | #include "ctree.h" | 45 | #include "ctree.h" |
44 | #include "disk-io.h" | 46 | #include "disk-io.h" |
45 | #include "transaction.h" | 47 | #include "transaction.h" |
@@ -52,8 +54,95 @@ | |||
52 | #include "export.h" | 54 | #include "export.h" |
53 | #include "compression.h" | 55 | #include "compression.h" |
54 | 56 | ||
57 | #define CREATE_TRACE_POINTS | ||
58 | #include <trace/events/btrfs.h> | ||
59 | |||
55 | static const struct super_operations btrfs_super_ops; | 60 | static const struct super_operations btrfs_super_ops; |
56 | 61 | ||
62 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | ||
63 | char nbuf[16]) | ||
64 | { | ||
65 | char *errstr = NULL; | ||
66 | |||
67 | switch (errno) { | ||
68 | case -EIO: | ||
69 | errstr = "IO failure"; | ||
70 | break; | ||
71 | case -ENOMEM: | ||
72 | errstr = "Out of memory"; | ||
73 | break; | ||
74 | case -EROFS: | ||
75 | errstr = "Readonly filesystem"; | ||
76 | break; | ||
77 | default: | ||
78 | if (nbuf) { | ||
79 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | ||
80 | errstr = nbuf; | ||
81 | } | ||
82 | break; | ||
83 | } | ||
84 | |||
85 | return errstr; | ||
86 | } | ||
87 | |||
88 | static void __save_error_info(struct btrfs_fs_info *fs_info) | ||
89 | { | ||
90 | /* | ||
91 | * today we only save the error info into ram. Long term we'll | ||
92 | * also send it down to the disk | ||
93 | */ | ||
94 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | ||
95 | } | ||
96 | |||
97 | /* NOTE: | ||
98 | * We move write_super stuff at umount in order to avoid deadlock | ||
99 | * for umount hold all lock. | ||
100 | */ | ||
101 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
102 | { | ||
103 | __save_error_info(fs_info); | ||
104 | } | ||
105 | |||
106 | /* btrfs handle error by forcing the filesystem readonly */ | ||
107 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | ||
108 | { | ||
109 | struct super_block *sb = fs_info->sb; | ||
110 | |||
111 | if (sb->s_flags & MS_RDONLY) | ||
112 | return; | ||
113 | |||
114 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
115 | sb->s_flags |= MS_RDONLY; | ||
116 | printk(KERN_INFO "btrfs is forced readonly\n"); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * __btrfs_std_error decodes expected errors from the caller and | ||
122 | * invokes the approciate error response. | ||
123 | */ | ||
124 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
125 | unsigned int line, int errno) | ||
126 | { | ||
127 | struct super_block *sb = fs_info->sb; | ||
128 | char nbuf[16]; | ||
129 | const char *errstr; | ||
130 | |||
131 | /* | ||
132 | * Special case: if the error is EROFS, and we're already | ||
133 | * under MS_RDONLY, then it is safe here. | ||
134 | */ | ||
135 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | ||
136 | return; | ||
137 | |||
138 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | ||
139 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | ||
140 | sb->s_id, function, line, errstr); | ||
141 | save_error_info(fs_info); | ||
142 | |||
143 | btrfs_handle_error(fs_info); | ||
144 | } | ||
145 | |||
57 | static void btrfs_put_super(struct super_block *sb) | 146 | static void btrfs_put_super(struct super_block *sb) |
58 | { | 147 | { |
59 | struct btrfs_root *root = btrfs_sb(sb); | 148 | struct btrfs_root *root = btrfs_sb(sb); |
@@ -61,14 +150,19 @@ static void btrfs_put_super(struct super_block *sb) | |||
61 | 150 | ||
62 | ret = close_ctree(root); | 151 | ret = close_ctree(root); |
63 | sb->s_fs_info = NULL; | 152 | sb->s_fs_info = NULL; |
153 | |||
154 | (void)ret; /* FIXME: need to fix VFS to return error? */ | ||
64 | } | 155 | } |
65 | 156 | ||
66 | enum { | 157 | enum { |
67 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, | 158 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
68 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, | 159 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
69 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, | 160 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
70 | Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, | 161 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
71 | Opt_discard, Opt_err, | 162 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
163 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, | ||
164 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, | ||
165 | Opt_inode_cache, Opt_err, | ||
72 | }; | 166 | }; |
73 | 167 | ||
74 | static match_table_t tokens = { | 168 | static match_table_t tokens = { |
@@ -83,7 +177,9 @@ static match_table_t tokens = { | |||
83 | {Opt_alloc_start, "alloc_start=%s"}, | 177 | {Opt_alloc_start, "alloc_start=%s"}, |
84 | {Opt_thread_pool, "thread_pool=%d"}, | 178 | {Opt_thread_pool, "thread_pool=%d"}, |
85 | {Opt_compress, "compress"}, | 179 | {Opt_compress, "compress"}, |
180 | {Opt_compress_type, "compress=%s"}, | ||
86 | {Opt_compress_force, "compress-force"}, | 181 | {Opt_compress_force, "compress-force"}, |
182 | {Opt_compress_force_type, "compress-force=%s"}, | ||
87 | {Opt_ssd, "ssd"}, | 183 | {Opt_ssd, "ssd"}, |
88 | {Opt_ssd_spread, "ssd_spread"}, | 184 | {Opt_ssd_spread, "ssd_spread"}, |
89 | {Opt_nossd, "nossd"}, | 185 | {Opt_nossd, "nossd"}, |
@@ -92,6 +188,13 @@ static match_table_t tokens = { | |||
92 | {Opt_flushoncommit, "flushoncommit"}, | 188 | {Opt_flushoncommit, "flushoncommit"}, |
93 | {Opt_ratio, "metadata_ratio=%d"}, | 189 | {Opt_ratio, "metadata_ratio=%d"}, |
94 | {Opt_discard, "discard"}, | 190 | {Opt_discard, "discard"}, |
191 | {Opt_space_cache, "space_cache"}, | ||
192 | {Opt_clear_cache, "clear_cache"}, | ||
193 | {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, | ||
194 | {Opt_enospc_debug, "enospc_debug"}, | ||
195 | {Opt_subvolrootid, "subvolrootid=%d"}, | ||
196 | {Opt_defrag, "autodefrag"}, | ||
197 | {Opt_inode_cache, "inode_cache"}, | ||
95 | {Opt_err, NULL}, | 198 | {Opt_err, NULL}, |
96 | }; | 199 | }; |
97 | 200 | ||
@@ -106,6 +209,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
106 | char *p, *num, *orig; | 209 | char *p, *num, *orig; |
107 | int intarg; | 210 | int intarg; |
108 | int ret = 0; | 211 | int ret = 0; |
212 | char *compress_type; | ||
213 | bool compress_force = false; | ||
109 | 214 | ||
110 | if (!options) | 215 | if (!options) |
111 | return 0; | 216 | return 0; |
@@ -133,6 +238,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
133 | break; | 238 | break; |
134 | case Opt_subvol: | 239 | case Opt_subvol: |
135 | case Opt_subvolid: | 240 | case Opt_subvolid: |
241 | case Opt_subvolrootid: | ||
136 | case Opt_device: | 242 | case Opt_device: |
137 | /* | 243 | /* |
138 | * These are parsed by btrfs_parse_early_options | 244 | * These are parsed by btrfs_parse_early_options |
@@ -148,14 +254,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
148 | btrfs_set_opt(info->mount_opt, NODATACOW); | 254 | btrfs_set_opt(info->mount_opt, NODATACOW); |
149 | btrfs_set_opt(info->mount_opt, NODATASUM); | 255 | btrfs_set_opt(info->mount_opt, NODATASUM); |
150 | break; | 256 | break; |
151 | case Opt_compress: | ||
152 | printk(KERN_INFO "btrfs: use compression\n"); | ||
153 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
154 | break; | ||
155 | case Opt_compress_force: | 257 | case Opt_compress_force: |
156 | printk(KERN_INFO "btrfs: forcing compression\n"); | 258 | case Opt_compress_force_type: |
157 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 259 | compress_force = true; |
260 | case Opt_compress: | ||
261 | case Opt_compress_type: | ||
262 | if (token == Opt_compress || | ||
263 | token == Opt_compress_force || | ||
264 | strcmp(args[0].from, "zlib") == 0) { | ||
265 | compress_type = "zlib"; | ||
266 | info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
267 | } else if (strcmp(args[0].from, "lzo") == 0) { | ||
268 | compress_type = "lzo"; | ||
269 | info->compress_type = BTRFS_COMPRESS_LZO; | ||
270 | } else { | ||
271 | ret = -EINVAL; | ||
272 | goto out; | ||
273 | } | ||
274 | |||
158 | btrfs_set_opt(info->mount_opt, COMPRESS); | 275 | btrfs_set_opt(info->mount_opt, COMPRESS); |
276 | if (compress_force) { | ||
277 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | ||
278 | pr_info("btrfs: force %s compression\n", | ||
279 | compress_type); | ||
280 | } else | ||
281 | pr_info("btrfs: use %s compression\n", | ||
282 | compress_type); | ||
159 | break; | 283 | break; |
160 | case Opt_ssd: | 284 | case Opt_ssd: |
161 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 285 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
@@ -235,6 +359,28 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
235 | case Opt_discard: | 359 | case Opt_discard: |
236 | btrfs_set_opt(info->mount_opt, DISCARD); | 360 | btrfs_set_opt(info->mount_opt, DISCARD); |
237 | break; | 361 | break; |
362 | case Opt_space_cache: | ||
363 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); | ||
364 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | ||
365 | break; | ||
366 | case Opt_inode_cache: | ||
367 | printk(KERN_INFO "btrfs: enabling inode map caching\n"); | ||
368 | btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); | ||
369 | break; | ||
370 | case Opt_clear_cache: | ||
371 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); | ||
372 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); | ||
373 | break; | ||
374 | case Opt_user_subvol_rm_allowed: | ||
375 | btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); | ||
376 | break; | ||
377 | case Opt_enospc_debug: | ||
378 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); | ||
379 | break; | ||
380 | case Opt_defrag: | ||
381 | printk(KERN_INFO "btrfs: enabling auto defrag"); | ||
382 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); | ||
383 | break; | ||
238 | case Opt_err: | 384 | case Opt_err: |
239 | printk(KERN_INFO "btrfs: unrecognized mount option " | 385 | printk(KERN_INFO "btrfs: unrecognized mount option " |
240 | "'%s'\n", p); | 386 | "'%s'\n", p); |
@@ -257,10 +403,10 @@ out: | |||
257 | */ | 403 | */ |
258 | static int btrfs_parse_early_options(const char *options, fmode_t flags, | 404 | static int btrfs_parse_early_options(const char *options, fmode_t flags, |
259 | void *holder, char **subvol_name, u64 *subvol_objectid, | 405 | void *holder, char **subvol_name, u64 *subvol_objectid, |
260 | struct btrfs_fs_devices **fs_devices) | 406 | u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) |
261 | { | 407 | { |
262 | substring_t args[MAX_OPT_ARGS]; | 408 | substring_t args[MAX_OPT_ARGS]; |
263 | char *opts, *p; | 409 | char *opts, *orig, *p; |
264 | int error = 0; | 410 | int error = 0; |
265 | int intarg; | 411 | int intarg; |
266 | 412 | ||
@@ -274,6 +420,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
274 | opts = kstrdup(options, GFP_KERNEL); | 420 | opts = kstrdup(options, GFP_KERNEL); |
275 | if (!opts) | 421 | if (!opts) |
276 | return -ENOMEM; | 422 | return -ENOMEM; |
423 | orig = opts; | ||
277 | 424 | ||
278 | while ((p = strsep(&opts, ",")) != NULL) { | 425 | while ((p = strsep(&opts, ",")) != NULL) { |
279 | int token; | 426 | int token; |
@@ -297,6 +444,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
297 | *subvol_objectid = intarg; | 444 | *subvol_objectid = intarg; |
298 | } | 445 | } |
299 | break; | 446 | break; |
447 | case Opt_subvolrootid: | ||
448 | intarg = 0; | ||
449 | error = match_int(&args[0], &intarg); | ||
450 | if (!error) { | ||
451 | /* we want the original fs_tree */ | ||
452 | if (!intarg) | ||
453 | *subvol_rootid = | ||
454 | BTRFS_FS_TREE_OBJECTID; | ||
455 | else | ||
456 | *subvol_rootid = intarg; | ||
457 | } | ||
458 | break; | ||
300 | case Opt_device: | 459 | case Opt_device: |
301 | error = btrfs_scan_one_device(match_strdup(&args[0]), | 460 | error = btrfs_scan_one_device(match_strdup(&args[0]), |
302 | flags, holder, fs_devices); | 461 | flags, holder, fs_devices); |
@@ -309,7 +468,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
309 | } | 468 | } |
310 | 469 | ||
311 | out_free_opts: | 470 | out_free_opts: |
312 | kfree(opts); | 471 | kfree(orig); |
313 | out: | 472 | out: |
314 | /* | 473 | /* |
315 | * If no subvolume name is specified we use the default one. Allocate | 474 | * If no subvolume name is specified we use the default one. Allocate |
@@ -360,8 +519,10 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
360 | */ | 519 | */ |
361 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 520 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); |
362 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); | 521 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); |
363 | if (IS_ERR(di)) | 522 | if (IS_ERR(di)) { |
523 | btrfs_free_path(path); | ||
364 | return ERR_CAST(di); | 524 | return ERR_CAST(di); |
525 | } | ||
365 | if (!di) { | 526 | if (!di) { |
366 | /* | 527 | /* |
367 | * Ok the default dir item isn't there. This is weird since | 528 | * Ok the default dir item isn't there. This is weird since |
@@ -380,7 +541,7 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
380 | find_root: | 541 | find_root: |
381 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | 542 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); |
382 | if (IS_ERR(new_root)) | 543 | if (IS_ERR(new_root)) |
383 | return ERR_PTR(PTR_ERR(new_root)); | 544 | return ERR_CAST(new_root); |
384 | 545 | ||
385 | if (btrfs_root_refs(&new_root->root_item) == 0) | 546 | if (btrfs_root_refs(&new_root->root_item) == 0) |
386 | return ERR_PTR(-ENOENT); | 547 | return ERR_PTR(-ENOENT); |
@@ -436,7 +597,6 @@ static int btrfs_fill_super(struct super_block *sb, | |||
436 | { | 597 | { |
437 | struct inode *inode; | 598 | struct inode *inode; |
438 | struct dentry *root_dentry; | 599 | struct dentry *root_dentry; |
439 | struct btrfs_super_block *disk_super; | ||
440 | struct btrfs_root *tree_root; | 600 | struct btrfs_root *tree_root; |
441 | struct btrfs_key key; | 601 | struct btrfs_key key; |
442 | int err; | 602 | int err; |
@@ -444,6 +604,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
444 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 604 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
445 | sb->s_magic = BTRFS_SUPER_MAGIC; | 605 | sb->s_magic = BTRFS_SUPER_MAGIC; |
446 | sb->s_op = &btrfs_super_ops; | 606 | sb->s_op = &btrfs_super_ops; |
607 | sb->s_d_op = &btrfs_dentry_operations; | ||
447 | sb->s_export_op = &btrfs_export_ops; | 608 | sb->s_export_op = &btrfs_export_ops; |
448 | sb->s_xattr = btrfs_xattr_handlers; | 609 | sb->s_xattr = btrfs_xattr_handlers; |
449 | sb->s_time_gran = 1; | 610 | sb->s_time_gran = 1; |
@@ -458,7 +619,6 @@ static int btrfs_fill_super(struct super_block *sb, | |||
458 | return PTR_ERR(tree_root); | 619 | return PTR_ERR(tree_root); |
459 | } | 620 | } |
460 | sb->s_fs_info = tree_root; | 621 | sb->s_fs_info = tree_root; |
461 | disk_super = &tree_root->fs_info->super_copy; | ||
462 | 622 | ||
463 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | 623 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; |
464 | key.type = BTRFS_INODE_ITEM_KEY; | 624 | key.type = BTRFS_INODE_ITEM_KEY; |
@@ -479,6 +639,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
479 | sb->s_root = root_dentry; | 639 | sb->s_root = root_dentry; |
480 | 640 | ||
481 | save_mount_options(sb, data); | 641 | save_mount_options(sb, data); |
642 | cleancache_init_fs(sb); | ||
482 | return 0; | 643 | return 0; |
483 | 644 | ||
484 | fail_close: | 645 | fail_close: |
@@ -492,6 +653,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
492 | struct btrfs_root *root = btrfs_sb(sb); | 653 | struct btrfs_root *root = btrfs_sb(sb); |
493 | int ret; | 654 | int ret; |
494 | 655 | ||
656 | trace_btrfs_sync_fs(wait); | ||
657 | |||
495 | if (!wait) { | 658 | if (!wait) { |
496 | filemap_flush(root->fs_info->btree_inode->i_mapping); | 659 | filemap_flush(root->fs_info->btree_inode->i_mapping); |
497 | return 0; | 660 | return 0; |
@@ -501,6 +664,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
501 | btrfs_wait_ordered_extents(root, 0, 0); | 664 | btrfs_wait_ordered_extents(root, 0, 0); |
502 | 665 | ||
503 | trans = btrfs_start_transaction(root, 0); | 666 | trans = btrfs_start_transaction(root, 0); |
667 | if (IS_ERR(trans)) | ||
668 | return PTR_ERR(trans); | ||
504 | ret = btrfs_commit_transaction(trans, root); | 669 | ret = btrfs_commit_transaction(trans, root); |
505 | return ret; | 670 | return ret; |
506 | } | 671 | } |
@@ -509,6 +674,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
509 | { | 674 | { |
510 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); | 675 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); |
511 | struct btrfs_fs_info *info = root->fs_info; | 676 | struct btrfs_fs_info *info = root->fs_info; |
677 | char *compress_type; | ||
512 | 678 | ||
513 | if (btrfs_test_opt(root, DEGRADED)) | 679 | if (btrfs_test_opt(root, DEGRADED)) |
514 | seq_puts(seq, ",degraded"); | 680 | seq_puts(seq, ",degraded"); |
@@ -527,8 +693,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
527 | if (info->thread_pool_size != min_t(unsigned long, | 693 | if (info->thread_pool_size != min_t(unsigned long, |
528 | num_online_cpus() + 2, 8)) | 694 | num_online_cpus() + 2, 8)) |
529 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | 695 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); |
530 | if (btrfs_test_opt(root, COMPRESS)) | 696 | if (btrfs_test_opt(root, COMPRESS)) { |
531 | seq_puts(seq, ",compress"); | 697 | if (info->compress_type == BTRFS_COMPRESS_ZLIB) |
698 | compress_type = "zlib"; | ||
699 | else | ||
700 | compress_type = "lzo"; | ||
701 | if (btrfs_test_opt(root, FORCE_COMPRESS)) | ||
702 | seq_printf(seq, ",compress-force=%s", compress_type); | ||
703 | else | ||
704 | seq_printf(seq, ",compress=%s", compress_type); | ||
705 | } | ||
532 | if (btrfs_test_opt(root, NOSSD)) | 706 | if (btrfs_test_opt(root, NOSSD)) |
533 | seq_puts(seq, ",nossd"); | 707 | seq_puts(seq, ",nossd"); |
534 | if (btrfs_test_opt(root, SSD_SPREAD)) | 708 | if (btrfs_test_opt(root, SSD_SPREAD)) |
@@ -543,46 +717,74 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
543 | seq_puts(seq, ",discard"); | 717 | seq_puts(seq, ",discard"); |
544 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) | 718 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) |
545 | seq_puts(seq, ",noacl"); | 719 | seq_puts(seq, ",noacl"); |
720 | if (btrfs_test_opt(root, SPACE_CACHE)) | ||
721 | seq_puts(seq, ",space_cache"); | ||
722 | if (btrfs_test_opt(root, CLEAR_CACHE)) | ||
723 | seq_puts(seq, ",clear_cache"); | ||
724 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | ||
725 | seq_puts(seq, ",user_subvol_rm_allowed"); | ||
726 | if (btrfs_test_opt(root, ENOSPC_DEBUG)) | ||
727 | seq_puts(seq, ",enospc_debug"); | ||
728 | if (btrfs_test_opt(root, AUTO_DEFRAG)) | ||
729 | seq_puts(seq, ",autodefrag"); | ||
730 | if (btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
731 | seq_puts(seq, ",inode_cache"); | ||
546 | return 0; | 732 | return 0; |
547 | } | 733 | } |
548 | 734 | ||
549 | static int btrfs_test_super(struct super_block *s, void *data) | 735 | static int btrfs_test_super(struct super_block *s, void *data) |
550 | { | 736 | { |
551 | struct btrfs_fs_devices *test_fs_devices = data; | 737 | struct btrfs_root *test_root = data; |
552 | struct btrfs_root *root = btrfs_sb(s); | 738 | struct btrfs_root *root = btrfs_sb(s); |
553 | 739 | ||
554 | return root->fs_info->fs_devices == test_fs_devices; | 740 | /* |
741 | * If this super block is going away, return false as it | ||
742 | * can't match as an existing super block. | ||
743 | */ | ||
744 | if (!atomic_read(&s->s_active)) | ||
745 | return 0; | ||
746 | return root->fs_info->fs_devices == test_root->fs_info->fs_devices; | ||
555 | } | 747 | } |
556 | 748 | ||
749 | static int btrfs_set_super(struct super_block *s, void *data) | ||
750 | { | ||
751 | s->s_fs_info = data; | ||
752 | |||
753 | return set_anon_super(s, data); | ||
754 | } | ||
755 | |||
756 | |||
557 | /* | 757 | /* |
558 | * Find a superblock for the given device / mount point. | 758 | * Find a superblock for the given device / mount point. |
559 | * | 759 | * |
560 | * Note: This is based on get_sb_bdev from fs/super.c with a few additions | 760 | * Note: This is based on get_sb_bdev from fs/super.c with a few additions |
561 | * for multiple device setup. Make sure to keep it in sync. | 761 | * for multiple device setup. Make sure to keep it in sync. |
562 | */ | 762 | */ |
563 | static int btrfs_get_sb(struct file_system_type *fs_type, int flags, | 763 | static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, |
564 | const char *dev_name, void *data, struct vfsmount *mnt) | 764 | const char *device_name, void *data) |
565 | { | 765 | { |
566 | struct block_device *bdev = NULL; | 766 | struct block_device *bdev = NULL; |
567 | struct super_block *s; | 767 | struct super_block *s; |
568 | struct dentry *root; | 768 | struct dentry *root; |
569 | struct btrfs_fs_devices *fs_devices = NULL; | 769 | struct btrfs_fs_devices *fs_devices = NULL; |
770 | struct btrfs_root *tree_root = NULL; | ||
771 | struct btrfs_fs_info *fs_info = NULL; | ||
570 | fmode_t mode = FMODE_READ; | 772 | fmode_t mode = FMODE_READ; |
571 | char *subvol_name = NULL; | 773 | char *subvol_name = NULL; |
572 | u64 subvol_objectid = 0; | 774 | u64 subvol_objectid = 0; |
775 | u64 subvol_rootid = 0; | ||
573 | int error = 0; | 776 | int error = 0; |
574 | int found = 0; | ||
575 | 777 | ||
576 | if (!(flags & MS_RDONLY)) | 778 | if (!(flags & MS_RDONLY)) |
577 | mode |= FMODE_WRITE; | 779 | mode |= FMODE_WRITE; |
578 | 780 | ||
579 | error = btrfs_parse_early_options(data, mode, fs_type, | 781 | error = btrfs_parse_early_options(data, mode, fs_type, |
580 | &subvol_name, &subvol_objectid, | 782 | &subvol_name, &subvol_objectid, |
581 | &fs_devices); | 783 | &subvol_rootid, &fs_devices); |
582 | if (error) | 784 | if (error) |
583 | return error; | 785 | return ERR_PTR(error); |
584 | 786 | ||
585 | error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); | 787 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); |
586 | if (error) | 788 | if (error) |
587 | goto error_free_subvol_name; | 789 | goto error_free_subvol_name; |
588 | 790 | ||
@@ -595,8 +797,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, | |||
595 | goto error_close_devices; | 797 | goto error_close_devices; |
596 | } | 798 | } |
597 | 799 | ||
800 | /* | ||
801 | * Setup a dummy root and fs_info for test/set super. This is because | ||
802 | * we don't actually fill this stuff out until open_ctree, but we need | ||
803 | * it for searching for existing supers, so this lets us do that and | ||
804 | * then open_ctree will properly initialize everything later. | ||
805 | */ | ||
806 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); | ||
807 | tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
808 | if (!fs_info || !tree_root) { | ||
809 | error = -ENOMEM; | ||
810 | goto error_close_devices; | ||
811 | } | ||
812 | fs_info->tree_root = tree_root; | ||
813 | fs_info->fs_devices = fs_devices; | ||
814 | tree_root->fs_info = fs_info; | ||
815 | |||
598 | bdev = fs_devices->latest_bdev; | 816 | bdev = fs_devices->latest_bdev; |
599 | s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); | 817 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); |
600 | if (IS_ERR(s)) | 818 | if (IS_ERR(s)) |
601 | goto error_s; | 819 | goto error_s; |
602 | 820 | ||
@@ -607,12 +825,13 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, | |||
607 | goto error_close_devices; | 825 | goto error_close_devices; |
608 | } | 826 | } |
609 | 827 | ||
610 | found = 1; | ||
611 | btrfs_close_devices(fs_devices); | 828 | btrfs_close_devices(fs_devices); |
829 | kfree(fs_info); | ||
830 | kfree(tree_root); | ||
612 | } else { | 831 | } else { |
613 | char b[BDEVNAME_SIZE]; | 832 | char b[BDEVNAME_SIZE]; |
614 | 833 | ||
615 | s->s_flags = flags; | 834 | s->s_flags = flags | MS_NOSEC; |
616 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | 835 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); |
617 | error = btrfs_fill_super(s, fs_devices, data, | 836 | error = btrfs_fill_super(s, fs_devices, data, |
618 | flags & MS_SILENT ? 1 : 0); | 837 | flags & MS_SILENT ? 1 : 0); |
@@ -625,51 +844,58 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, | |||
625 | s->s_flags |= MS_ACTIVE; | 844 | s->s_flags |= MS_ACTIVE; |
626 | } | 845 | } |
627 | 846 | ||
628 | root = get_default_root(s, subvol_objectid); | ||
629 | if (IS_ERR(root)) { | ||
630 | error = PTR_ERR(root); | ||
631 | deactivate_locked_super(s); | ||
632 | goto error; | ||
633 | } | ||
634 | /* if they gave us a subvolume name bind mount into that */ | 847 | /* if they gave us a subvolume name bind mount into that */ |
635 | if (strcmp(subvol_name, ".")) { | 848 | if (strcmp(subvol_name, ".")) { |
636 | struct dentry *new_root; | 849 | struct dentry *new_root; |
850 | |||
851 | root = get_default_root(s, subvol_rootid); | ||
852 | if (IS_ERR(root)) { | ||
853 | error = PTR_ERR(root); | ||
854 | deactivate_locked_super(s); | ||
855 | goto error_free_subvol_name; | ||
856 | } | ||
857 | |||
637 | mutex_lock(&root->d_inode->i_mutex); | 858 | mutex_lock(&root->d_inode->i_mutex); |
638 | new_root = lookup_one_len(subvol_name, root, | 859 | new_root = lookup_one_len(subvol_name, root, |
639 | strlen(subvol_name)); | 860 | strlen(subvol_name)); |
640 | mutex_unlock(&root->d_inode->i_mutex); | 861 | mutex_unlock(&root->d_inode->i_mutex); |
641 | 862 | ||
642 | if (IS_ERR(new_root)) { | 863 | if (IS_ERR(new_root)) { |
864 | dput(root); | ||
643 | deactivate_locked_super(s); | 865 | deactivate_locked_super(s); |
644 | error = PTR_ERR(new_root); | 866 | error = PTR_ERR(new_root); |
645 | dput(root); | 867 | goto error_free_subvol_name; |
646 | goto error_close_devices; | ||
647 | } | 868 | } |
648 | if (!new_root->d_inode) { | 869 | if (!new_root->d_inode) { |
649 | dput(root); | 870 | dput(root); |
650 | dput(new_root); | 871 | dput(new_root); |
651 | deactivate_locked_super(s); | 872 | deactivate_locked_super(s); |
652 | error = -ENXIO; | 873 | error = -ENXIO; |
653 | goto error_close_devices; | 874 | goto error_free_subvol_name; |
654 | } | 875 | } |
655 | dput(root); | 876 | dput(root); |
656 | root = new_root; | 877 | root = new_root; |
878 | } else { | ||
879 | root = get_default_root(s, subvol_objectid); | ||
880 | if (IS_ERR(root)) { | ||
881 | error = PTR_ERR(root); | ||
882 | deactivate_locked_super(s); | ||
883 | goto error_free_subvol_name; | ||
884 | } | ||
657 | } | 885 | } |
658 | 886 | ||
659 | mnt->mnt_sb = s; | ||
660 | mnt->mnt_root = root; | ||
661 | |||
662 | kfree(subvol_name); | 887 | kfree(subvol_name); |
663 | return 0; | 888 | return root; |
664 | 889 | ||
665 | error_s: | 890 | error_s: |
666 | error = PTR_ERR(s); | 891 | error = PTR_ERR(s); |
667 | error_close_devices: | 892 | error_close_devices: |
668 | btrfs_close_devices(fs_devices); | 893 | btrfs_close_devices(fs_devices); |
894 | kfree(fs_info); | ||
895 | kfree(tree_root); | ||
669 | error_free_subvol_name: | 896 | error_free_subvol_name: |
670 | kfree(subvol_name); | 897 | kfree(subvol_name); |
671 | error: | 898 | return ERR_PTR(error); |
672 | return error; | ||
673 | } | 899 | } |
674 | 900 | ||
675 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) | 901 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) |
@@ -709,6 +935,153 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
709 | return 0; | 935 | return 0; |
710 | } | 936 | } |
711 | 937 | ||
938 | /* Used to sort the devices by max_avail(descending sort) */ | ||
939 | static int btrfs_cmp_device_free_bytes(const void *dev_info1, | ||
940 | const void *dev_info2) | ||
941 | { | ||
942 | if (((struct btrfs_device_info *)dev_info1)->max_avail > | ||
943 | ((struct btrfs_device_info *)dev_info2)->max_avail) | ||
944 | return -1; | ||
945 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < | ||
946 | ((struct btrfs_device_info *)dev_info2)->max_avail) | ||
947 | return 1; | ||
948 | else | ||
949 | return 0; | ||
950 | } | ||
951 | |||
952 | /* | ||
953 | * sort the devices by max_avail, in which max free extent size of each device | ||
954 | * is stored.(Descending Sort) | ||
955 | */ | ||
956 | static inline void btrfs_descending_sort_devices( | ||
957 | struct btrfs_device_info *devices, | ||
958 | size_t nr_devices) | ||
959 | { | ||
960 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
961 | btrfs_cmp_device_free_bytes, NULL); | ||
962 | } | ||
963 | |||
964 | /* | ||
965 | * The helper to calc the free space on the devices that can be used to store | ||
966 | * file data. | ||
967 | */ | ||
968 | static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | ||
969 | { | ||
970 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
971 | struct btrfs_device_info *devices_info; | ||
972 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
973 | struct btrfs_device *device; | ||
974 | u64 skip_space; | ||
975 | u64 type; | ||
976 | u64 avail_space; | ||
977 | u64 used_space; | ||
978 | u64 min_stripe_size; | ||
979 | int min_stripes = 1; | ||
980 | int i = 0, nr_devices; | ||
981 | int ret; | ||
982 | |||
983 | nr_devices = fs_info->fs_devices->rw_devices; | ||
984 | BUG_ON(!nr_devices); | ||
985 | |||
986 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | ||
987 | GFP_NOFS); | ||
988 | if (!devices_info) | ||
989 | return -ENOMEM; | ||
990 | |||
991 | /* calc min stripe number for data space alloction */ | ||
992 | type = btrfs_get_alloc_profile(root, 1); | ||
993 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
994 | min_stripes = 2; | ||
995 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
996 | min_stripes = 2; | ||
997 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
998 | min_stripes = 4; | ||
999 | |||
1000 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
1001 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | ||
1002 | else | ||
1003 | min_stripe_size = BTRFS_STRIPE_LEN; | ||
1004 | |||
1005 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | ||
1006 | if (!device->in_fs_metadata) | ||
1007 | continue; | ||
1008 | |||
1009 | avail_space = device->total_bytes - device->bytes_used; | ||
1010 | |||
1011 | /* align with stripe_len */ | ||
1012 | do_div(avail_space, BTRFS_STRIPE_LEN); | ||
1013 | avail_space *= BTRFS_STRIPE_LEN; | ||
1014 | |||
1015 | /* | ||
1016 | * In order to avoid overwritting the superblock on the drive, | ||
1017 | * btrfs starts at an offset of at least 1MB when doing chunk | ||
1018 | * allocation. | ||
1019 | */ | ||
1020 | skip_space = 1024 * 1024; | ||
1021 | |||
1022 | /* user can set the offset in fs_info->alloc_start. */ | ||
1023 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
1024 | device->total_bytes) | ||
1025 | skip_space = max(fs_info->alloc_start, skip_space); | ||
1026 | |||
1027 | /* | ||
1028 | * btrfs can not use the free space in [0, skip_space - 1], | ||
1029 | * we must subtract it from the total. In order to implement | ||
1030 | * it, we account the used space in this range first. | ||
1031 | */ | ||
1032 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | ||
1033 | &used_space); | ||
1034 | if (ret) { | ||
1035 | kfree(devices_info); | ||
1036 | return ret; | ||
1037 | } | ||
1038 | |||
1039 | /* calc the free space in [0, skip_space - 1] */ | ||
1040 | skip_space -= used_space; | ||
1041 | |||
1042 | /* | ||
1043 | * we can use the free space in [0, skip_space - 1], subtract | ||
1044 | * it from the total. | ||
1045 | */ | ||
1046 | if (avail_space && avail_space >= skip_space) | ||
1047 | avail_space -= skip_space; | ||
1048 | else | ||
1049 | avail_space = 0; | ||
1050 | |||
1051 | if (avail_space < min_stripe_size) | ||
1052 | continue; | ||
1053 | |||
1054 | devices_info[i].dev = device; | ||
1055 | devices_info[i].max_avail = avail_space; | ||
1056 | |||
1057 | i++; | ||
1058 | } | ||
1059 | |||
1060 | nr_devices = i; | ||
1061 | |||
1062 | btrfs_descending_sort_devices(devices_info, nr_devices); | ||
1063 | |||
1064 | i = nr_devices - 1; | ||
1065 | avail_space = 0; | ||
1066 | while (nr_devices >= min_stripes) { | ||
1067 | if (devices_info[i].max_avail >= min_stripe_size) { | ||
1068 | int j; | ||
1069 | u64 alloc_size; | ||
1070 | |||
1071 | avail_space += devices_info[i].max_avail * min_stripes; | ||
1072 | alloc_size = devices_info[i].max_avail; | ||
1073 | for (j = i + 1 - min_stripes; j <= i; j++) | ||
1074 | devices_info[j].max_avail -= alloc_size; | ||
1075 | } | ||
1076 | i--; | ||
1077 | nr_devices--; | ||
1078 | } | ||
1079 | |||
1080 | kfree(devices_info); | ||
1081 | *free_bytes = avail_space; | ||
1082 | return 0; | ||
1083 | } | ||
1084 | |||
712 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 1085 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
713 | { | 1086 | { |
714 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 1087 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
@@ -716,20 +1089,39 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
716 | struct list_head *head = &root->fs_info->space_info; | 1089 | struct list_head *head = &root->fs_info->space_info; |
717 | struct btrfs_space_info *found; | 1090 | struct btrfs_space_info *found; |
718 | u64 total_used = 0; | 1091 | u64 total_used = 0; |
1092 | u64 total_free_data = 0; | ||
719 | int bits = dentry->d_sb->s_blocksize_bits; | 1093 | int bits = dentry->d_sb->s_blocksize_bits; |
720 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 1094 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
1095 | int ret; | ||
721 | 1096 | ||
1097 | /* holding chunk_muext to avoid allocating new chunks */ | ||
1098 | mutex_lock(&root->fs_info->chunk_mutex); | ||
722 | rcu_read_lock(); | 1099 | rcu_read_lock(); |
723 | list_for_each_entry_rcu(found, head, list) | 1100 | list_for_each_entry_rcu(found, head, list) { |
1101 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { | ||
1102 | total_free_data += found->disk_total - found->disk_used; | ||
1103 | total_free_data -= | ||
1104 | btrfs_account_ro_block_groups_free_space(found); | ||
1105 | } | ||
1106 | |||
724 | total_used += found->disk_used; | 1107 | total_used += found->disk_used; |
1108 | } | ||
725 | rcu_read_unlock(); | 1109 | rcu_read_unlock(); |
726 | 1110 | ||
727 | buf->f_namelen = BTRFS_NAME_LEN; | 1111 | buf->f_namelen = BTRFS_NAME_LEN; |
728 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 1112 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
729 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 1113 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
730 | buf->f_bavail = buf->f_bfree; | ||
731 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1114 | buf->f_bsize = dentry->d_sb->s_blocksize; |
732 | buf->f_type = BTRFS_SUPER_MAGIC; | 1115 | buf->f_type = BTRFS_SUPER_MAGIC; |
1116 | buf->f_bavail = total_free_data; | ||
1117 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | ||
1118 | if (ret) { | ||
1119 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
1120 | return ret; | ||
1121 | } | ||
1122 | buf->f_bavail += total_free_data; | ||
1123 | buf->f_bavail = buf->f_bavail >> bits; | ||
1124 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
733 | 1125 | ||
734 | /* We treat it as constant endianness (it doesn't matter _which_) | 1126 | /* We treat it as constant endianness (it doesn't matter _which_) |
735 | because we want the fsid to come out the same whether mounted | 1127 | because we want the fsid to come out the same whether mounted |
@@ -746,7 +1138,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
746 | static struct file_system_type btrfs_fs_type = { | 1138 | static struct file_system_type btrfs_fs_type = { |
747 | .owner = THIS_MODULE, | 1139 | .owner = THIS_MODULE, |
748 | .name = "btrfs", | 1140 | .name = "btrfs", |
749 | .get_sb = btrfs_get_sb, | 1141 | .mount = btrfs_mount, |
750 | .kill_sb = kill_anon_super, | 1142 | .kill_sb = kill_anon_super, |
751 | .fs_flags = FS_REQUIRES_DEV, | 1143 | .fs_flags = FS_REQUIRES_DEV, |
752 | }; | 1144 | }; |
@@ -815,6 +1207,7 @@ static const struct file_operations btrfs_ctl_fops = { | |||
815 | .unlocked_ioctl = btrfs_control_ioctl, | 1207 | .unlocked_ioctl = btrfs_control_ioctl, |
816 | .compat_ioctl = btrfs_control_ioctl, | 1208 | .compat_ioctl = btrfs_control_ioctl, |
817 | .owner = THIS_MODULE, | 1209 | .owner = THIS_MODULE, |
1210 | .llseek = noop_llseek, | ||
818 | }; | 1211 | }; |
819 | 1212 | ||
820 | static struct miscdevice btrfs_misc = { | 1213 | static struct miscdevice btrfs_misc = { |
@@ -845,10 +1238,14 @@ static int __init init_btrfs_fs(void) | |||
845 | if (err) | 1238 | if (err) |
846 | return err; | 1239 | return err; |
847 | 1240 | ||
848 | err = btrfs_init_cachep(); | 1241 | err = btrfs_init_compress(); |
849 | if (err) | 1242 | if (err) |
850 | goto free_sysfs; | 1243 | goto free_sysfs; |
851 | 1244 | ||
1245 | err = btrfs_init_cachep(); | ||
1246 | if (err) | ||
1247 | goto free_compress; | ||
1248 | |||
852 | err = extent_io_init(); | 1249 | err = extent_io_init(); |
853 | if (err) | 1250 | if (err) |
854 | goto free_cachep; | 1251 | goto free_cachep; |
@@ -857,10 +1254,14 @@ static int __init init_btrfs_fs(void) | |||
857 | if (err) | 1254 | if (err) |
858 | goto free_extent_io; | 1255 | goto free_extent_io; |
859 | 1256 | ||
860 | err = btrfs_interface_init(); | 1257 | err = btrfs_delayed_inode_init(); |
861 | if (err) | 1258 | if (err) |
862 | goto free_extent_map; | 1259 | goto free_extent_map; |
863 | 1260 | ||
1261 | err = btrfs_interface_init(); | ||
1262 | if (err) | ||
1263 | goto free_delayed_inode; | ||
1264 | |||
864 | err = register_filesystem(&btrfs_fs_type); | 1265 | err = register_filesystem(&btrfs_fs_type); |
865 | if (err) | 1266 | if (err) |
866 | goto unregister_ioctl; | 1267 | goto unregister_ioctl; |
@@ -870,12 +1271,16 @@ static int __init init_btrfs_fs(void) | |||
870 | 1271 | ||
871 | unregister_ioctl: | 1272 | unregister_ioctl: |
872 | btrfs_interface_exit(); | 1273 | btrfs_interface_exit(); |
1274 | free_delayed_inode: | ||
1275 | btrfs_delayed_inode_exit(); | ||
873 | free_extent_map: | 1276 | free_extent_map: |
874 | extent_map_exit(); | 1277 | extent_map_exit(); |
875 | free_extent_io: | 1278 | free_extent_io: |
876 | extent_io_exit(); | 1279 | extent_io_exit(); |
877 | free_cachep: | 1280 | free_cachep: |
878 | btrfs_destroy_cachep(); | 1281 | btrfs_destroy_cachep(); |
1282 | free_compress: | ||
1283 | btrfs_exit_compress(); | ||
879 | free_sysfs: | 1284 | free_sysfs: |
880 | btrfs_exit_sysfs(); | 1285 | btrfs_exit_sysfs(); |
881 | return err; | 1286 | return err; |
@@ -884,13 +1289,14 @@ free_sysfs: | |||
884 | static void __exit exit_btrfs_fs(void) | 1289 | static void __exit exit_btrfs_fs(void) |
885 | { | 1290 | { |
886 | btrfs_destroy_cachep(); | 1291 | btrfs_destroy_cachep(); |
1292 | btrfs_delayed_inode_exit(); | ||
887 | extent_map_exit(); | 1293 | extent_map_exit(); |
888 | extent_io_exit(); | 1294 | extent_io_exit(); |
889 | btrfs_interface_exit(); | 1295 | btrfs_interface_exit(); |
890 | unregister_filesystem(&btrfs_fs_type); | 1296 | unregister_filesystem(&btrfs_fs_type); |
891 | btrfs_exit_sysfs(); | 1297 | btrfs_exit_sysfs(); |
892 | btrfs_cleanup_fs_uuids(); | 1298 | btrfs_cleanup_fs_uuids(); |
893 | btrfs_zlib_exit(); | 1299 | btrfs_exit_compress(); |
894 | } | 1300 | } |
895 | 1301 | ||
896 | module_init(init_btrfs_fs) | 1302 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 4ce16ef702a3..daac9ae6d731 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -28,232 +28,9 @@ | |||
28 | #include "disk-io.h" | 28 | #include "disk-io.h" |
29 | #include "transaction.h" | 29 | #include "transaction.h" |
30 | 30 | ||
31 | static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) | ||
32 | { | ||
33 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
34 | (unsigned long long)btrfs_root_used(&root->root_item)); | ||
35 | } | ||
36 | |||
37 | static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) | ||
38 | { | ||
39 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
40 | (unsigned long long)btrfs_root_limit(&root->root_item)); | ||
41 | } | ||
42 | |||
43 | static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) | ||
44 | { | ||
45 | |||
46 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
47 | (unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); | ||
48 | } | ||
49 | |||
50 | static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) | ||
51 | { | ||
52 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
53 | (unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); | ||
54 | } | ||
55 | |||
56 | static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) | ||
57 | { | ||
58 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
59 | (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); | ||
60 | } | ||
61 | |||
62 | /* this is for root attrs (subvols/snapshots) */ | ||
63 | struct btrfs_root_attr { | ||
64 | struct attribute attr; | ||
65 | ssize_t (*show)(struct btrfs_root *, char *); | ||
66 | ssize_t (*store)(struct btrfs_root *, const char *, size_t); | ||
67 | }; | ||
68 | |||
69 | #define ROOT_ATTR(name, mode, show, store) \ | ||
70 | static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \ | ||
71 | show, store) | ||
72 | |||
73 | ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); | ||
74 | ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); | ||
75 | |||
76 | static struct attribute *btrfs_root_attrs[] = { | ||
77 | &btrfs_root_attr_blocks_used.attr, | ||
78 | &btrfs_root_attr_block_limit.attr, | ||
79 | NULL, | ||
80 | }; | ||
81 | |||
82 | /* this is for super attrs (actual full fs) */ | ||
83 | struct btrfs_super_attr { | ||
84 | struct attribute attr; | ||
85 | ssize_t (*show)(struct btrfs_fs_info *, char *); | ||
86 | ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); | ||
87 | }; | ||
88 | |||
89 | #define SUPER_ATTR(name, mode, show, store) \ | ||
90 | static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \ | ||
91 | show, store) | ||
92 | |||
93 | SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); | ||
94 | SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); | ||
95 | SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL); | ||
96 | |||
97 | static struct attribute *btrfs_super_attrs[] = { | ||
98 | &btrfs_super_attr_blocks_used.attr, | ||
99 | &btrfs_super_attr_total_blocks.attr, | ||
100 | &btrfs_super_attr_blocksize.attr, | ||
101 | NULL, | ||
102 | }; | ||
103 | |||
104 | static ssize_t btrfs_super_attr_show(struct kobject *kobj, | ||
105 | struct attribute *attr, char *buf) | ||
106 | { | ||
107 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
108 | super_kobj); | ||
109 | struct btrfs_super_attr *a = container_of(attr, | ||
110 | struct btrfs_super_attr, | ||
111 | attr); | ||
112 | |||
113 | return a->show ? a->show(fs, buf) : 0; | ||
114 | } | ||
115 | |||
116 | static ssize_t btrfs_super_attr_store(struct kobject *kobj, | ||
117 | struct attribute *attr, | ||
118 | const char *buf, size_t len) | ||
119 | { | ||
120 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
121 | super_kobj); | ||
122 | struct btrfs_super_attr *a = container_of(attr, | ||
123 | struct btrfs_super_attr, | ||
124 | attr); | ||
125 | |||
126 | return a->store ? a->store(fs, buf, len) : 0; | ||
127 | } | ||
128 | |||
129 | static ssize_t btrfs_root_attr_show(struct kobject *kobj, | ||
130 | struct attribute *attr, char *buf) | ||
131 | { | ||
132 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
133 | root_kobj); | ||
134 | struct btrfs_root_attr *a = container_of(attr, | ||
135 | struct btrfs_root_attr, | ||
136 | attr); | ||
137 | |||
138 | return a->show ? a->show(root, buf) : 0; | ||
139 | } | ||
140 | |||
141 | static ssize_t btrfs_root_attr_store(struct kobject *kobj, | ||
142 | struct attribute *attr, | ||
143 | const char *buf, size_t len) | ||
144 | { | ||
145 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
146 | root_kobj); | ||
147 | struct btrfs_root_attr *a = container_of(attr, | ||
148 | struct btrfs_root_attr, | ||
149 | attr); | ||
150 | return a->store ? a->store(root, buf, len) : 0; | ||
151 | } | ||
152 | |||
153 | static void btrfs_super_release(struct kobject *kobj) | ||
154 | { | ||
155 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
156 | super_kobj); | ||
157 | complete(&fs->kobj_unregister); | ||
158 | } | ||
159 | |||
160 | static void btrfs_root_release(struct kobject *kobj) | ||
161 | { | ||
162 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
163 | root_kobj); | ||
164 | complete(&root->kobj_unregister); | ||
165 | } | ||
166 | |||
167 | static const struct sysfs_ops btrfs_super_attr_ops = { | ||
168 | .show = btrfs_super_attr_show, | ||
169 | .store = btrfs_super_attr_store, | ||
170 | }; | ||
171 | |||
172 | static const struct sysfs_ops btrfs_root_attr_ops = { | ||
173 | .show = btrfs_root_attr_show, | ||
174 | .store = btrfs_root_attr_store, | ||
175 | }; | ||
176 | |||
177 | static struct kobj_type btrfs_root_ktype = { | ||
178 | .default_attrs = btrfs_root_attrs, | ||
179 | .sysfs_ops = &btrfs_root_attr_ops, | ||
180 | .release = btrfs_root_release, | ||
181 | }; | ||
182 | |||
183 | static struct kobj_type btrfs_super_ktype = { | ||
184 | .default_attrs = btrfs_super_attrs, | ||
185 | .sysfs_ops = &btrfs_super_attr_ops, | ||
186 | .release = btrfs_super_release, | ||
187 | }; | ||
188 | |||
189 | /* /sys/fs/btrfs/ entry */ | 31 | /* /sys/fs/btrfs/ entry */ |
190 | static struct kset *btrfs_kset; | 32 | static struct kset *btrfs_kset; |
191 | 33 | ||
192 | int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) | ||
193 | { | ||
194 | int error; | ||
195 | char *name; | ||
196 | char c; | ||
197 | int len = strlen(fs->sb->s_id) + 1; | ||
198 | int i; | ||
199 | |||
200 | name = kmalloc(len, GFP_NOFS); | ||
201 | if (!name) { | ||
202 | error = -ENOMEM; | ||
203 | goto fail; | ||
204 | } | ||
205 | |||
206 | for (i = 0; i < len; i++) { | ||
207 | c = fs->sb->s_id[i]; | ||
208 | if (c == '/' || c == '\\') | ||
209 | c = '!'; | ||
210 | name[i] = c; | ||
211 | } | ||
212 | name[len] = '\0'; | ||
213 | |||
214 | fs->super_kobj.kset = btrfs_kset; | ||
215 | error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype, | ||
216 | NULL, "%s", name); | ||
217 | kfree(name); | ||
218 | if (error) | ||
219 | goto fail; | ||
220 | |||
221 | return 0; | ||
222 | |||
223 | fail: | ||
224 | printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); | ||
225 | return error; | ||
226 | } | ||
227 | |||
228 | int btrfs_sysfs_add_root(struct btrfs_root *root) | ||
229 | { | ||
230 | int error; | ||
231 | |||
232 | error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype, | ||
233 | &root->fs_info->super_kobj, | ||
234 | "%s", root->name); | ||
235 | if (error) | ||
236 | goto fail; | ||
237 | |||
238 | return 0; | ||
239 | |||
240 | fail: | ||
241 | printk(KERN_ERR "btrfs: sysfs creation for root failed\n"); | ||
242 | return error; | ||
243 | } | ||
244 | |||
245 | void btrfs_sysfs_del_root(struct btrfs_root *root) | ||
246 | { | ||
247 | kobject_put(&root->root_kobj); | ||
248 | wait_for_completion(&root->kobj_unregister); | ||
249 | } | ||
250 | |||
251 | void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) | ||
252 | { | ||
253 | kobject_put(&fs->super_kobj); | ||
254 | wait_for_completion(&fs->kobj_unregister); | ||
255 | } | ||
256 | |||
257 | int btrfs_init_sysfs(void) | 34 | int btrfs_init_sysfs(void) |
258 | { | 35 | { |
259 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); | 36 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 66e4c66cc63b..51dcec86757f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -27,15 +27,15 @@ | |||
27 | #include "transaction.h" | 27 | #include "transaction.h" |
28 | #include "locking.h" | 28 | #include "locking.h" |
29 | #include "tree-log.h" | 29 | #include "tree-log.h" |
30 | #include "inode-map.h" | ||
30 | 31 | ||
31 | #define BTRFS_ROOT_TRANS_TAG 0 | 32 | #define BTRFS_ROOT_TRANS_TAG 0 |
32 | 33 | ||
33 | static noinline void put_transaction(struct btrfs_transaction *transaction) | 34 | static noinline void put_transaction(struct btrfs_transaction *transaction) |
34 | { | 35 | { |
35 | WARN_ON(transaction->use_count == 0); | 36 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
36 | transaction->use_count--; | 37 | if (atomic_dec_and_test(&transaction->use_count)) { |
37 | if (transaction->use_count == 0) { | 38 | BUG_ON(!list_empty(&transaction->list)); |
38 | list_del_init(&transaction->list); | ||
39 | memset(transaction, 0, sizeof(*transaction)); | 39 | memset(transaction, 0, sizeof(*transaction)); |
40 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 40 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
41 | } | 41 | } |
@@ -50,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
50 | /* | 50 | /* |
51 | * either allocate a new transaction or hop into the existing one | 51 | * either allocate a new transaction or hop into the existing one |
52 | */ | 52 | */ |
53 | static noinline int join_transaction(struct btrfs_root *root) | 53 | static noinline int join_transaction(struct btrfs_root *root, int nofail) |
54 | { | 54 | { |
55 | struct btrfs_transaction *cur_trans; | 55 | struct btrfs_transaction *cur_trans; |
56 | |||
57 | spin_lock(&root->fs_info->trans_lock); | ||
58 | if (root->fs_info->trans_no_join) { | ||
59 | if (!nofail) { | ||
60 | spin_unlock(&root->fs_info->trans_lock); | ||
61 | return -EBUSY; | ||
62 | } | ||
63 | } | ||
64 | |||
56 | cur_trans = root->fs_info->running_transaction; | 65 | cur_trans = root->fs_info->running_transaction; |
57 | if (!cur_trans) { | 66 | if (cur_trans) { |
58 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | 67 | atomic_inc(&cur_trans->use_count); |
59 | GFP_NOFS); | 68 | atomic_inc(&cur_trans->num_writers); |
60 | BUG_ON(!cur_trans); | ||
61 | root->fs_info->generation++; | ||
62 | cur_trans->num_writers = 1; | ||
63 | cur_trans->num_joined = 0; | ||
64 | cur_trans->transid = root->fs_info->generation; | ||
65 | init_waitqueue_head(&cur_trans->writer_wait); | ||
66 | init_waitqueue_head(&cur_trans->commit_wait); | ||
67 | cur_trans->in_commit = 0; | ||
68 | cur_trans->blocked = 0; | ||
69 | cur_trans->use_count = 1; | ||
70 | cur_trans->commit_done = 0; | ||
71 | cur_trans->start_time = get_seconds(); | ||
72 | |||
73 | cur_trans->delayed_refs.root = RB_ROOT; | ||
74 | cur_trans->delayed_refs.num_entries = 0; | ||
75 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
76 | cur_trans->delayed_refs.num_heads = 0; | ||
77 | cur_trans->delayed_refs.flushing = 0; | ||
78 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
79 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
80 | |||
81 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
82 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
83 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
84 | root->fs_info->btree_inode->i_mapping, | ||
85 | GFP_NOFS); | ||
86 | spin_lock(&root->fs_info->new_trans_lock); | ||
87 | root->fs_info->running_transaction = cur_trans; | ||
88 | spin_unlock(&root->fs_info->new_trans_lock); | ||
89 | } else { | ||
90 | cur_trans->num_writers++; | ||
91 | cur_trans->num_joined++; | 69 | cur_trans->num_joined++; |
70 | spin_unlock(&root->fs_info->trans_lock); | ||
71 | return 0; | ||
92 | } | 72 | } |
73 | spin_unlock(&root->fs_info->trans_lock); | ||
74 | |||
75 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | ||
76 | if (!cur_trans) | ||
77 | return -ENOMEM; | ||
78 | spin_lock(&root->fs_info->trans_lock); | ||
79 | if (root->fs_info->running_transaction) { | ||
80 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | ||
81 | cur_trans = root->fs_info->running_transaction; | ||
82 | atomic_inc(&cur_trans->use_count); | ||
83 | atomic_inc(&cur_trans->num_writers); | ||
84 | cur_trans->num_joined++; | ||
85 | spin_unlock(&root->fs_info->trans_lock); | ||
86 | return 0; | ||
87 | } | ||
88 | atomic_set(&cur_trans->num_writers, 1); | ||
89 | cur_trans->num_joined = 0; | ||
90 | init_waitqueue_head(&cur_trans->writer_wait); | ||
91 | init_waitqueue_head(&cur_trans->commit_wait); | ||
92 | cur_trans->in_commit = 0; | ||
93 | cur_trans->blocked = 0; | ||
94 | /* | ||
95 | * One for this trans handle, one so it will live on until we | ||
96 | * commit the transaction. | ||
97 | */ | ||
98 | atomic_set(&cur_trans->use_count, 2); | ||
99 | cur_trans->commit_done = 0; | ||
100 | cur_trans->start_time = get_seconds(); | ||
101 | |||
102 | cur_trans->delayed_refs.root = RB_ROOT; | ||
103 | cur_trans->delayed_refs.num_entries = 0; | ||
104 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
105 | cur_trans->delayed_refs.num_heads = 0; | ||
106 | cur_trans->delayed_refs.flushing = 0; | ||
107 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
108 | spin_lock_init(&cur_trans->commit_lock); | ||
109 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
110 | |||
111 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
112 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
113 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
114 | root->fs_info->btree_inode->i_mapping); | ||
115 | root->fs_info->generation++; | ||
116 | cur_trans->transid = root->fs_info->generation; | ||
117 | root->fs_info->running_transaction = cur_trans; | ||
118 | spin_unlock(&root->fs_info->trans_lock); | ||
93 | 119 | ||
94 | return 0; | 120 | return 0; |
95 | } | 121 | } |
@@ -100,36 +126,82 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
100 | * to make sure the old root from before we joined the transaction is deleted | 126 | * to make sure the old root from before we joined the transaction is deleted |
101 | * when the transaction commits | 127 | * when the transaction commits |
102 | */ | 128 | */ |
103 | static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, | 129 | static int record_root_in_trans(struct btrfs_trans_handle *trans, |
104 | struct btrfs_root *root) | 130 | struct btrfs_root *root) |
105 | { | 131 | { |
106 | if (root->ref_cows && root->last_trans < trans->transid) { | 132 | if (root->ref_cows && root->last_trans < trans->transid) { |
107 | WARN_ON(root == root->fs_info->extent_root); | 133 | WARN_ON(root == root->fs_info->extent_root); |
108 | WARN_ON(root->commit_root != root->node); | 134 | WARN_ON(root->commit_root != root->node); |
109 | 135 | ||
136 | /* | ||
137 | * see below for in_trans_setup usage rules | ||
138 | * we have the reloc mutex held now, so there | ||
139 | * is only one writer in this function | ||
140 | */ | ||
141 | root->in_trans_setup = 1; | ||
142 | |||
143 | /* make sure readers find in_trans_setup before | ||
144 | * they find our root->last_trans update | ||
145 | */ | ||
146 | smp_wmb(); | ||
147 | |||
148 | spin_lock(&root->fs_info->fs_roots_radix_lock); | ||
149 | if (root->last_trans == trans->transid) { | ||
150 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | ||
151 | return 0; | ||
152 | } | ||
110 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 153 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
111 | (unsigned long)root->root_key.objectid, | 154 | (unsigned long)root->root_key.objectid, |
112 | BTRFS_ROOT_TRANS_TAG); | 155 | BTRFS_ROOT_TRANS_TAG); |
156 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | ||
113 | root->last_trans = trans->transid; | 157 | root->last_trans = trans->transid; |
158 | |||
159 | /* this is pretty tricky. We don't want to | ||
160 | * take the relocation lock in btrfs_record_root_in_trans | ||
161 | * unless we're really doing the first setup for this root in | ||
162 | * this transaction. | ||
163 | * | ||
164 | * Normally we'd use root->last_trans as a flag to decide | ||
165 | * if we want to take the expensive mutex. | ||
166 | * | ||
167 | * But, we have to set root->last_trans before we | ||
168 | * init the relocation root, otherwise, we trip over warnings | ||
169 | * in ctree.c. The solution used here is to flag ourselves | ||
170 | * with root->in_trans_setup. When this is 1, we're still | ||
171 | * fixing up the reloc trees and everyone must wait. | ||
172 | * | ||
173 | * When this is zero, they can trust root->last_trans and fly | ||
174 | * through btrfs_record_root_in_trans without having to take the | ||
175 | * lock. smp_wmb() makes sure that all the writes above are | ||
176 | * done before we pop in the zero below | ||
177 | */ | ||
114 | btrfs_init_reloc_root(trans, root); | 178 | btrfs_init_reloc_root(trans, root); |
179 | smp_wmb(); | ||
180 | root->in_trans_setup = 0; | ||
115 | } | 181 | } |
116 | return 0; | 182 | return 0; |
117 | } | 183 | } |
118 | 184 | ||
185 | |||
119 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 186 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
120 | struct btrfs_root *root) | 187 | struct btrfs_root *root) |
121 | { | 188 | { |
122 | if (!root->ref_cows) | 189 | if (!root->ref_cows) |
123 | return 0; | 190 | return 0; |
124 | 191 | ||
125 | mutex_lock(&root->fs_info->trans_mutex); | 192 | /* |
126 | if (root->last_trans == trans->transid) { | 193 | * see record_root_in_trans for comments about in_trans_setup usage |
127 | mutex_unlock(&root->fs_info->trans_mutex); | 194 | * and barriers |
195 | */ | ||
196 | smp_rmb(); | ||
197 | if (root->last_trans == trans->transid && | ||
198 | !root->in_trans_setup) | ||
128 | return 0; | 199 | return 0; |
129 | } | ||
130 | 200 | ||
201 | mutex_lock(&root->fs_info->reloc_mutex); | ||
131 | record_root_in_trans(trans, root); | 202 | record_root_in_trans(trans, root); |
132 | mutex_unlock(&root->fs_info->trans_mutex); | 203 | mutex_unlock(&root->fs_info->reloc_mutex); |
204 | |||
133 | return 0; | 205 | return 0; |
134 | } | 206 | } |
135 | 207 | ||
@@ -141,21 +213,23 @@ static void wait_current_trans(struct btrfs_root *root) | |||
141 | { | 213 | { |
142 | struct btrfs_transaction *cur_trans; | 214 | struct btrfs_transaction *cur_trans; |
143 | 215 | ||
216 | spin_lock(&root->fs_info->trans_lock); | ||
144 | cur_trans = root->fs_info->running_transaction; | 217 | cur_trans = root->fs_info->running_transaction; |
145 | if (cur_trans && cur_trans->blocked) { | 218 | if (cur_trans && cur_trans->blocked) { |
146 | DEFINE_WAIT(wait); | 219 | DEFINE_WAIT(wait); |
147 | cur_trans->use_count++; | 220 | atomic_inc(&cur_trans->use_count); |
221 | spin_unlock(&root->fs_info->trans_lock); | ||
148 | while (1) { | 222 | while (1) { |
149 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 223 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, |
150 | TASK_UNINTERRUPTIBLE); | 224 | TASK_UNINTERRUPTIBLE); |
151 | if (!cur_trans->blocked) | 225 | if (!cur_trans->blocked) |
152 | break; | 226 | break; |
153 | mutex_unlock(&root->fs_info->trans_mutex); | ||
154 | schedule(); | 227 | schedule(); |
155 | mutex_lock(&root->fs_info->trans_mutex); | ||
156 | } | 228 | } |
157 | finish_wait(&root->fs_info->transaction_wait, &wait); | 229 | finish_wait(&root->fs_info->transaction_wait, &wait); |
158 | put_transaction(cur_trans); | 230 | put_transaction(cur_trans); |
231 | } else { | ||
232 | spin_unlock(&root->fs_info->trans_lock); | ||
159 | } | 233 | } |
160 | } | 234 | } |
161 | 235 | ||
@@ -163,14 +237,21 @@ enum btrfs_trans_type { | |||
163 | TRANS_START, | 237 | TRANS_START, |
164 | TRANS_JOIN, | 238 | TRANS_JOIN, |
165 | TRANS_USERSPACE, | 239 | TRANS_USERSPACE, |
240 | TRANS_JOIN_NOLOCK, | ||
166 | }; | 241 | }; |
167 | 242 | ||
168 | static int may_wait_transaction(struct btrfs_root *root, int type) | 243 | static int may_wait_transaction(struct btrfs_root *root, int type) |
169 | { | 244 | { |
170 | if (!root->fs_info->log_root_recovering && | 245 | if (root->fs_info->log_root_recovering) |
171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | 246 | return 0; |
172 | type == TRANS_USERSPACE)) | 247 | |
248 | if (type == TRANS_USERSPACE) | ||
173 | return 1; | 249 | return 1; |
250 | |||
251 | if (type == TRANS_START && | ||
252 | !atomic_read(&root->fs_info->open_ioctl_trans)) | ||
253 | return 1; | ||
254 | |||
174 | return 0; | 255 | return 0; |
175 | } | 256 | } |
176 | 257 | ||
@@ -181,29 +262,47 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
181 | struct btrfs_transaction *cur_trans; | 262 | struct btrfs_transaction *cur_trans; |
182 | int retries = 0; | 263 | int retries = 0; |
183 | int ret; | 264 | int ret; |
265 | |||
266 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
267 | return ERR_PTR(-EROFS); | ||
268 | |||
269 | if (current->journal_info) { | ||
270 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); | ||
271 | h = current->journal_info; | ||
272 | h->use_count++; | ||
273 | h->orig_rsv = h->block_rsv; | ||
274 | h->block_rsv = NULL; | ||
275 | goto got_it; | ||
276 | } | ||
184 | again: | 277 | again: |
185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 278 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
186 | if (!h) | 279 | if (!h) |
187 | return ERR_PTR(-ENOMEM); | 280 | return ERR_PTR(-ENOMEM); |
188 | 281 | ||
189 | mutex_lock(&root->fs_info->trans_mutex); | ||
190 | if (may_wait_transaction(root, type)) | 282 | if (may_wait_transaction(root, type)) |
191 | wait_current_trans(root); | 283 | wait_current_trans(root); |
192 | 284 | ||
193 | ret = join_transaction(root); | 285 | do { |
194 | BUG_ON(ret); | 286 | ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); |
287 | if (ret == -EBUSY) | ||
288 | wait_current_trans(root); | ||
289 | } while (ret == -EBUSY); | ||
290 | |||
291 | if (ret < 0) { | ||
292 | kmem_cache_free(btrfs_trans_handle_cachep, h); | ||
293 | return ERR_PTR(ret); | ||
294 | } | ||
195 | 295 | ||
196 | cur_trans = root->fs_info->running_transaction; | 296 | cur_trans = root->fs_info->running_transaction; |
197 | cur_trans->use_count++; | ||
198 | mutex_unlock(&root->fs_info->trans_mutex); | ||
199 | 297 | ||
200 | h->transid = cur_trans->transid; | 298 | h->transid = cur_trans->transid; |
201 | h->transaction = cur_trans; | 299 | h->transaction = cur_trans; |
202 | h->blocks_used = 0; | 300 | h->blocks_used = 0; |
203 | h->block_group = 0; | ||
204 | h->bytes_reserved = 0; | 301 | h->bytes_reserved = 0; |
205 | h->delayed_ref_updates = 0; | 302 | h->delayed_ref_updates = 0; |
303 | h->use_count = 1; | ||
206 | h->block_rsv = NULL; | 304 | h->block_rsv = NULL; |
305 | h->orig_rsv = NULL; | ||
207 | 306 | ||
208 | smp_mb(); | 307 | smp_mb(); |
209 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 308 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
@@ -212,21 +311,27 @@ again: | |||
212 | } | 311 | } |
213 | 312 | ||
214 | if (num_items > 0) { | 313 | if (num_items > 0) { |
215 | ret = btrfs_trans_reserve_metadata(h, root, num_items, | 314 | ret = btrfs_trans_reserve_metadata(h, root, num_items); |
216 | &retries); | 315 | if (ret == -EAGAIN && !retries) { |
217 | if (ret == -EAGAIN) { | 316 | retries++; |
218 | btrfs_commit_transaction(h, root); | 317 | btrfs_commit_transaction(h, root); |
219 | goto again; | 318 | goto again; |
319 | } else if (ret == -EAGAIN) { | ||
320 | /* | ||
321 | * We have already retried and got EAGAIN, so really we | ||
322 | * don't have space, so set ret to -ENOSPC. | ||
323 | */ | ||
324 | ret = -ENOSPC; | ||
220 | } | 325 | } |
326 | |||
221 | if (ret < 0) { | 327 | if (ret < 0) { |
222 | btrfs_end_transaction(h, root); | 328 | btrfs_end_transaction(h, root); |
223 | return ERR_PTR(ret); | 329 | return ERR_PTR(ret); |
224 | } | 330 | } |
225 | } | 331 | } |
226 | 332 | ||
227 | mutex_lock(&root->fs_info->trans_mutex); | 333 | got_it: |
228 | record_root_in_trans(h, root); | 334 | btrfs_record_root_in_trans(h, root); |
229 | mutex_unlock(&root->fs_info->trans_mutex); | ||
230 | 335 | ||
231 | if (!current->journal_info && type != TRANS_USERSPACE) | 336 | if (!current->journal_info && type != TRANS_USERSPACE) |
232 | current->journal_info = h; | 337 | current->journal_info = h; |
@@ -238,16 +343,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | |||
238 | { | 343 | { |
239 | return start_transaction(root, num_items, TRANS_START); | 344 | return start_transaction(root, num_items, TRANS_START); |
240 | } | 345 | } |
241 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 346 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) |
242 | int num_blocks) | ||
243 | { | 347 | { |
244 | return start_transaction(root, 0, TRANS_JOIN); | 348 | return start_transaction(root, 0, TRANS_JOIN); |
245 | } | 349 | } |
246 | 350 | ||
247 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 351 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) |
248 | int num_blocks) | 352 | { |
353 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); | ||
354 | } | ||
355 | |||
356 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) | ||
249 | { | 357 | { |
250 | return start_transaction(r, 0, TRANS_USERSPACE); | 358 | return start_transaction(root, 0, TRANS_USERSPACE); |
251 | } | 359 | } |
252 | 360 | ||
253 | /* wait for a transaction commit to be fully complete */ | 361 | /* wait for a transaction commit to be fully complete */ |
@@ -255,70 +363,72 @@ static noinline int wait_for_commit(struct btrfs_root *root, | |||
255 | struct btrfs_transaction *commit) | 363 | struct btrfs_transaction *commit) |
256 | { | 364 | { |
257 | DEFINE_WAIT(wait); | 365 | DEFINE_WAIT(wait); |
258 | mutex_lock(&root->fs_info->trans_mutex); | ||
259 | while (!commit->commit_done) { | 366 | while (!commit->commit_done) { |
260 | prepare_to_wait(&commit->commit_wait, &wait, | 367 | prepare_to_wait(&commit->commit_wait, &wait, |
261 | TASK_UNINTERRUPTIBLE); | 368 | TASK_UNINTERRUPTIBLE); |
262 | if (commit->commit_done) | 369 | if (commit->commit_done) |
263 | break; | 370 | break; |
264 | mutex_unlock(&root->fs_info->trans_mutex); | ||
265 | schedule(); | 371 | schedule(); |
266 | mutex_lock(&root->fs_info->trans_mutex); | ||
267 | } | 372 | } |
268 | mutex_unlock(&root->fs_info->trans_mutex); | ||
269 | finish_wait(&commit->commit_wait, &wait); | 373 | finish_wait(&commit->commit_wait, &wait); |
270 | return 0; | 374 | return 0; |
271 | } | 375 | } |
272 | 376 | ||
273 | #if 0 | 377 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) |
274 | /* | ||
275 | * rate limit against the drop_snapshot code. This helps to slow down new | ||
276 | * operations if the drop_snapshot code isn't able to keep up. | ||
277 | */ | ||
278 | static void throttle_on_drops(struct btrfs_root *root) | ||
279 | { | 378 | { |
280 | struct btrfs_fs_info *info = root->fs_info; | 379 | struct btrfs_transaction *cur_trans = NULL, *t; |
281 | int harder_count = 0; | 380 | int ret; |
282 | 381 | ||
283 | harder: | 382 | ret = 0; |
284 | if (atomic_read(&info->throttles)) { | 383 | if (transid) { |
285 | DEFINE_WAIT(wait); | 384 | if (transid <= root->fs_info->last_trans_committed) |
286 | int thr; | 385 | goto out; |
287 | thr = atomic_read(&info->throttle_gen); | 386 | |
288 | 387 | /* find specified transaction */ | |
289 | do { | 388 | spin_lock(&root->fs_info->trans_lock); |
290 | prepare_to_wait(&info->transaction_throttle, | 389 | list_for_each_entry(t, &root->fs_info->trans_list, list) { |
291 | &wait, TASK_UNINTERRUPTIBLE); | 390 | if (t->transid == transid) { |
292 | if (!atomic_read(&info->throttles)) { | 391 | cur_trans = t; |
293 | finish_wait(&info->transaction_throttle, &wait); | 392 | atomic_inc(&cur_trans->use_count); |
294 | break; | 393 | break; |
295 | } | 394 | } |
296 | schedule(); | 395 | if (t->transid > transid) |
297 | finish_wait(&info->transaction_throttle, &wait); | 396 | break; |
298 | } while (thr == atomic_read(&info->throttle_gen)); | 397 | } |
299 | harder_count++; | 398 | spin_unlock(&root->fs_info->trans_lock); |
300 | 399 | ret = -EINVAL; | |
301 | if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && | 400 | if (!cur_trans) |
302 | harder_count < 2) | 401 | goto out; /* bad transid */ |
303 | goto harder; | 402 | } else { |
403 | /* find newest transaction that is committing | committed */ | ||
404 | spin_lock(&root->fs_info->trans_lock); | ||
405 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, | ||
406 | list) { | ||
407 | if (t->in_commit) { | ||
408 | if (t->commit_done) | ||
409 | break; | ||
410 | cur_trans = t; | ||
411 | atomic_inc(&cur_trans->use_count); | ||
412 | break; | ||
413 | } | ||
414 | } | ||
415 | spin_unlock(&root->fs_info->trans_lock); | ||
416 | if (!cur_trans) | ||
417 | goto out; /* nothing committing|committed */ | ||
418 | } | ||
304 | 419 | ||
305 | if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && | 420 | wait_for_commit(root, cur_trans); |
306 | harder_count < 10) | ||
307 | goto harder; | ||
308 | 421 | ||
309 | if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && | 422 | put_transaction(cur_trans); |
310 | harder_count < 20) | 423 | ret = 0; |
311 | goto harder; | 424 | out: |
312 | } | 425 | return ret; |
313 | } | 426 | } |
314 | #endif | ||
315 | 427 | ||
316 | void btrfs_throttle(struct btrfs_root *root) | 428 | void btrfs_throttle(struct btrfs_root *root) |
317 | { | 429 | { |
318 | mutex_lock(&root->fs_info->trans_mutex); | 430 | if (!atomic_read(&root->fs_info->open_ioctl_trans)) |
319 | if (!root->fs_info->open_ioctl_trans) | ||
320 | wait_current_trans(root); | 431 | wait_current_trans(root); |
321 | mutex_unlock(&root->fs_info->trans_mutex); | ||
322 | } | 432 | } |
323 | 433 | ||
324 | static int should_end_transaction(struct btrfs_trans_handle *trans, | 434 | static int should_end_transaction(struct btrfs_trans_handle *trans, |
@@ -336,6 +446,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
336 | struct btrfs_transaction *cur_trans = trans->transaction; | 446 | struct btrfs_transaction *cur_trans = trans->transaction; |
337 | int updates; | 447 | int updates; |
338 | 448 | ||
449 | smp_mb(); | ||
339 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 450 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) |
340 | return 1; | 451 | return 1; |
341 | 452 | ||
@@ -348,12 +459,17 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
348 | } | 459 | } |
349 | 460 | ||
350 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 461 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
351 | struct btrfs_root *root, int throttle) | 462 | struct btrfs_root *root, int throttle, int lock) |
352 | { | 463 | { |
353 | struct btrfs_transaction *cur_trans = trans->transaction; | 464 | struct btrfs_transaction *cur_trans = trans->transaction; |
354 | struct btrfs_fs_info *info = root->fs_info; | 465 | struct btrfs_fs_info *info = root->fs_info; |
355 | int count = 0; | 466 | int count = 0; |
356 | 467 | ||
468 | if (--trans->use_count) { | ||
469 | trans->block_rsv = trans->orig_rsv; | ||
470 | return 0; | ||
471 | } | ||
472 | |||
357 | while (count < 4) { | 473 | while (count < 4) { |
358 | unsigned long cur = trans->delayed_ref_updates; | 474 | unsigned long cur = trans->delayed_ref_updates; |
359 | trans->delayed_ref_updates = 0; | 475 | trans->delayed_ref_updates = 0; |
@@ -376,26 +492,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
376 | 492 | ||
377 | btrfs_trans_release_metadata(trans, root); | 493 | btrfs_trans_release_metadata(trans, root); |
378 | 494 | ||
379 | if (!root->fs_info->open_ioctl_trans && | 495 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
380 | should_end_transaction(trans, root)) | 496 | should_end_transaction(trans, root)) { |
381 | trans->transaction->blocked = 1; | 497 | trans->transaction->blocked = 1; |
498 | smp_wmb(); | ||
499 | } | ||
382 | 500 | ||
383 | if (cur_trans->blocked && !cur_trans->in_commit) { | 501 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { |
384 | if (throttle) | 502 | if (throttle) |
385 | return btrfs_commit_transaction(trans, root); | 503 | return btrfs_commit_transaction(trans, root); |
386 | else | 504 | else |
387 | wake_up_process(info->transaction_kthread); | 505 | wake_up_process(info->transaction_kthread); |
388 | } | 506 | } |
389 | 507 | ||
390 | mutex_lock(&info->trans_mutex); | ||
391 | WARN_ON(cur_trans != info->running_transaction); | 508 | WARN_ON(cur_trans != info->running_transaction); |
392 | WARN_ON(cur_trans->num_writers < 1); | 509 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); |
393 | cur_trans->num_writers--; | 510 | atomic_dec(&cur_trans->num_writers); |
394 | 511 | ||
512 | smp_mb(); | ||
395 | if (waitqueue_active(&cur_trans->writer_wait)) | 513 | if (waitqueue_active(&cur_trans->writer_wait)) |
396 | wake_up(&cur_trans->writer_wait); | 514 | wake_up(&cur_trans->writer_wait); |
397 | put_transaction(cur_trans); | 515 | put_transaction(cur_trans); |
398 | mutex_unlock(&info->trans_mutex); | ||
399 | 516 | ||
400 | if (current->journal_info == trans) | 517 | if (current->journal_info == trans) |
401 | current->journal_info = NULL; | 518 | current->journal_info = NULL; |
@@ -411,13 +528,40 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
411 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 528 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
412 | struct btrfs_root *root) | 529 | struct btrfs_root *root) |
413 | { | 530 | { |
414 | return __btrfs_end_transaction(trans, root, 0); | 531 | int ret; |
532 | |||
533 | ret = __btrfs_end_transaction(trans, root, 0, 1); | ||
534 | if (ret) | ||
535 | return ret; | ||
536 | return 0; | ||
415 | } | 537 | } |
416 | 538 | ||
417 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 539 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
418 | struct btrfs_root *root) | 540 | struct btrfs_root *root) |
419 | { | 541 | { |
420 | return __btrfs_end_transaction(trans, root, 1); | 542 | int ret; |
543 | |||
544 | ret = __btrfs_end_transaction(trans, root, 1, 1); | ||
545 | if (ret) | ||
546 | return ret; | ||
547 | return 0; | ||
548 | } | ||
549 | |||
550 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
551 | struct btrfs_root *root) | ||
552 | { | ||
553 | int ret; | ||
554 | |||
555 | ret = __btrfs_end_transaction(trans, root, 0, 0); | ||
556 | if (ret) | ||
557 | return ret; | ||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, | ||
562 | struct btrfs_root *root) | ||
563 | { | ||
564 | return __btrfs_end_transaction(trans, root, 1, 1); | ||
421 | } | 565 | } |
422 | 566 | ||
423 | /* | 567 | /* |
@@ -643,9 +787,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
643 | */ | 787 | */ |
644 | int btrfs_add_dead_root(struct btrfs_root *root) | 788 | int btrfs_add_dead_root(struct btrfs_root *root) |
645 | { | 789 | { |
646 | mutex_lock(&root->fs_info->trans_mutex); | 790 | spin_lock(&root->fs_info->trans_lock); |
647 | list_add(&root->root_list, &root->fs_info->dead_roots); | 791 | list_add(&root->root_list, &root->fs_info->dead_roots); |
648 | mutex_unlock(&root->fs_info->trans_mutex); | 792 | spin_unlock(&root->fs_info->trans_lock); |
649 | return 0; | 793 | return 0; |
650 | } | 794 | } |
651 | 795 | ||
@@ -661,6 +805,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
661 | int ret; | 805 | int ret; |
662 | int err = 0; | 806 | int err = 0; |
663 | 807 | ||
808 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
664 | while (1) { | 809 | while (1) { |
665 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, | 810 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, |
666 | (void **)gang, 0, | 811 | (void **)gang, 0, |
@@ -673,13 +818,20 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
673 | radix_tree_tag_clear(&fs_info->fs_roots_radix, | 818 | radix_tree_tag_clear(&fs_info->fs_roots_radix, |
674 | (unsigned long)root->root_key.objectid, | 819 | (unsigned long)root->root_key.objectid, |
675 | BTRFS_ROOT_TRANS_TAG); | 820 | BTRFS_ROOT_TRANS_TAG); |
821 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
676 | 822 | ||
677 | btrfs_free_log(trans, root); | 823 | btrfs_free_log(trans, root); |
678 | btrfs_update_reloc_root(trans, root); | 824 | btrfs_update_reloc_root(trans, root); |
679 | btrfs_orphan_commit_root(trans, root); | 825 | btrfs_orphan_commit_root(trans, root); |
680 | 826 | ||
827 | btrfs_save_ino_cache(root, trans); | ||
828 | |||
681 | if (root->commit_root != root->node) { | 829 | if (root->commit_root != root->node) { |
830 | mutex_lock(&root->fs_commit_mutex); | ||
682 | switch_commit_root(root); | 831 | switch_commit_root(root); |
832 | btrfs_unpin_free_ino(root); | ||
833 | mutex_unlock(&root->fs_commit_mutex); | ||
834 | |||
683 | btrfs_set_root_node(&root->root_item, | 835 | btrfs_set_root_node(&root->root_item, |
684 | root->node); | 836 | root->node); |
685 | } | 837 | } |
@@ -687,10 +839,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
687 | err = btrfs_update_root(trans, fs_info->tree_root, | 839 | err = btrfs_update_root(trans, fs_info->tree_root, |
688 | &root->root_key, | 840 | &root->root_key, |
689 | &root->root_item); | 841 | &root->root_item); |
842 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
690 | if (err) | 843 | if (err) |
691 | break; | 844 | break; |
692 | } | 845 | } |
693 | } | 846 | } |
847 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
694 | return err; | 848 | return err; |
695 | } | 849 | } |
696 | 850 | ||
@@ -720,104 +874,13 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
720 | btrfs_btree_balance_dirty(info->tree_root, nr); | 874 | btrfs_btree_balance_dirty(info->tree_root, nr); |
721 | cond_resched(); | 875 | cond_resched(); |
722 | 876 | ||
723 | if (root->fs_info->closing || ret != -EAGAIN) | 877 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) |
724 | break; | 878 | break; |
725 | } | 879 | } |
726 | root->defrag_running = 0; | 880 | root->defrag_running = 0; |
727 | return ret; | 881 | return ret; |
728 | } | 882 | } |
729 | 883 | ||
730 | #if 0 | ||
731 | /* | ||
732 | * when dropping snapshots, we generate a ton of delayed refs, and it makes | ||
733 | * sense not to join the transaction while it is trying to flush the current | ||
734 | * queue of delayed refs out. | ||
735 | * | ||
736 | * This is used by the drop snapshot code only | ||
737 | */ | ||
738 | static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | ||
739 | { | ||
740 | DEFINE_WAIT(wait); | ||
741 | |||
742 | mutex_lock(&info->trans_mutex); | ||
743 | while (info->running_transaction && | ||
744 | info->running_transaction->delayed_refs.flushing) { | ||
745 | prepare_to_wait(&info->transaction_wait, &wait, | ||
746 | TASK_UNINTERRUPTIBLE); | ||
747 | mutex_unlock(&info->trans_mutex); | ||
748 | |||
749 | schedule(); | ||
750 | |||
751 | mutex_lock(&info->trans_mutex); | ||
752 | finish_wait(&info->transaction_wait, &wait); | ||
753 | } | ||
754 | mutex_unlock(&info->trans_mutex); | ||
755 | return 0; | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | ||
760 | * all of them | ||
761 | */ | ||
762 | int btrfs_drop_dead_root(struct btrfs_root *root) | ||
763 | { | ||
764 | struct btrfs_trans_handle *trans; | ||
765 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
766 | unsigned long nr; | ||
767 | int ret; | ||
768 | |||
769 | while (1) { | ||
770 | /* | ||
771 | * we don't want to jump in and create a bunch of | ||
772 | * delayed refs if the transaction is starting to close | ||
773 | */ | ||
774 | wait_transaction_pre_flush(tree_root->fs_info); | ||
775 | trans = btrfs_start_transaction(tree_root, 1); | ||
776 | |||
777 | /* | ||
778 | * we've joined a transaction, make sure it isn't | ||
779 | * closing right now | ||
780 | */ | ||
781 | if (trans->transaction->delayed_refs.flushing) { | ||
782 | btrfs_end_transaction(trans, tree_root); | ||
783 | continue; | ||
784 | } | ||
785 | |||
786 | ret = btrfs_drop_snapshot(trans, root); | ||
787 | if (ret != -EAGAIN) | ||
788 | break; | ||
789 | |||
790 | ret = btrfs_update_root(trans, tree_root, | ||
791 | &root->root_key, | ||
792 | &root->root_item); | ||
793 | if (ret) | ||
794 | break; | ||
795 | |||
796 | nr = trans->blocks_used; | ||
797 | ret = btrfs_end_transaction(trans, tree_root); | ||
798 | BUG_ON(ret); | ||
799 | |||
800 | btrfs_btree_balance_dirty(tree_root, nr); | ||
801 | cond_resched(); | ||
802 | } | ||
803 | BUG_ON(ret); | ||
804 | |||
805 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | ||
806 | BUG_ON(ret); | ||
807 | |||
808 | nr = trans->blocks_used; | ||
809 | ret = btrfs_end_transaction(trans, tree_root); | ||
810 | BUG_ON(ret); | ||
811 | |||
812 | free_extent_buffer(root->node); | ||
813 | free_extent_buffer(root->commit_root); | ||
814 | kfree(root); | ||
815 | |||
816 | btrfs_btree_balance_dirty(tree_root, nr); | ||
817 | return ret; | ||
818 | } | ||
819 | #endif | ||
820 | |||
821 | /* | 884 | /* |
822 | * new snapshots need to be created at a very specific time in the | 885 | * new snapshots need to be created at a very specific time in the |
823 | * transaction commit. This does the actual creation | 886 | * transaction commit. This does the actual creation |
@@ -832,14 +895,15 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
832 | struct btrfs_root *root = pending->root; | 895 | struct btrfs_root *root = pending->root; |
833 | struct btrfs_root *parent_root; | 896 | struct btrfs_root *parent_root; |
834 | struct inode *parent_inode; | 897 | struct inode *parent_inode; |
898 | struct dentry *parent; | ||
835 | struct dentry *dentry; | 899 | struct dentry *dentry; |
836 | struct extent_buffer *tmp; | 900 | struct extent_buffer *tmp; |
837 | struct extent_buffer *old; | 901 | struct extent_buffer *old; |
838 | int ret; | 902 | int ret; |
839 | int retries = 0; | ||
840 | u64 to_reserve = 0; | 903 | u64 to_reserve = 0; |
841 | u64 index = 0; | 904 | u64 index = 0; |
842 | u64 objectid; | 905 | u64 objectid; |
906 | u64 root_flags; | ||
843 | 907 | ||
844 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 908 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
845 | if (!new_root_item) { | 909 | if (!new_root_item) { |
@@ -847,7 +911,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
847 | goto fail; | 911 | goto fail; |
848 | } | 912 | } |
849 | 913 | ||
850 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | 914 | ret = btrfs_find_free_objectid(tree_root, &objectid); |
851 | if (ret) { | 915 | if (ret) { |
852 | pending->error = ret; | 916 | pending->error = ret; |
853 | goto fail; | 917 | goto fail; |
@@ -858,7 +922,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
858 | 922 | ||
859 | if (to_reserve > 0) { | 923 | if (to_reserve > 0) { |
860 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | 924 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, |
861 | to_reserve, &retries); | 925 | to_reserve); |
862 | if (ret) { | 926 | if (ret) { |
863 | pending->error = ret; | 927 | pending->error = ret; |
864 | goto fail; | 928 | goto fail; |
@@ -872,7 +936,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
872 | trans->block_rsv = &pending->block_rsv; | 936 | trans->block_rsv = &pending->block_rsv; |
873 | 937 | ||
874 | dentry = pending->dentry; | 938 | dentry = pending->dentry; |
875 | parent_inode = dentry->d_parent->d_inode; | 939 | parent = dget_parent(dentry); |
940 | parent_inode = parent->d_inode; | ||
876 | parent_root = BTRFS_I(parent_inode)->root; | 941 | parent_root = BTRFS_I(parent_inode)->root; |
877 | record_root_in_trans(trans, parent_root); | 942 | record_root_in_trans(trans, parent_root); |
878 | 943 | ||
@@ -883,7 +948,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
883 | BUG_ON(ret); | 948 | BUG_ON(ret); |
884 | ret = btrfs_insert_dir_item(trans, parent_root, | 949 | ret = btrfs_insert_dir_item(trans, parent_root, |
885 | dentry->d_name.name, dentry->d_name.len, | 950 | dentry->d_name.name, dentry->d_name.len, |
886 | parent_inode->i_ino, &key, | 951 | parent_inode, &key, |
887 | BTRFS_FT_DIR, index); | 952 | BTRFS_FT_DIR, index); |
888 | BUG_ON(ret); | 953 | BUG_ON(ret); |
889 | 954 | ||
@@ -892,9 +957,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
892 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 957 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
893 | BUG_ON(ret); | 958 | BUG_ON(ret); |
894 | 959 | ||
960 | /* | ||
961 | * pull in the delayed directory update | ||
962 | * and the delayed inode item | ||
963 | * otherwise we corrupt the FS during | ||
964 | * snapshot | ||
965 | */ | ||
966 | ret = btrfs_run_delayed_items(trans, root); | ||
967 | BUG_ON(ret); | ||
968 | |||
895 | record_root_in_trans(trans, root); | 969 | record_root_in_trans(trans, root); |
896 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 970 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
897 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 971 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
972 | btrfs_check_and_init_root_item(new_root_item); | ||
973 | |||
974 | root_flags = btrfs_root_flags(new_root_item); | ||
975 | if (pending->readonly) | ||
976 | root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; | ||
977 | else | ||
978 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | ||
979 | btrfs_set_root_flags(new_root_item, root_flags); | ||
898 | 980 | ||
899 | old = btrfs_lock_root_node(root); | 981 | old = btrfs_lock_root_node(root); |
900 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 982 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
@@ -917,9 +999,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
917 | */ | 999 | */ |
918 | ret = btrfs_add_root_ref(trans, tree_root, objectid, | 1000 | ret = btrfs_add_root_ref(trans, tree_root, objectid, |
919 | parent_root->root_key.objectid, | 1001 | parent_root->root_key.objectid, |
920 | parent_inode->i_ino, index, | 1002 | btrfs_ino(parent_inode), index, |
921 | dentry->d_name.name, dentry->d_name.len); | 1003 | dentry->d_name.name, dentry->d_name.len); |
922 | BUG_ON(ret); | 1004 | BUG_ON(ret); |
1005 | dput(parent); | ||
923 | 1006 | ||
924 | key.offset = (u64)-1; | 1007 | key.offset = (u64)-1; |
925 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | 1008 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); |
@@ -966,33 +1049,152 @@ static void update_super_roots(struct btrfs_root *root) | |||
966 | super->root = root_item->bytenr; | 1049 | super->root = root_item->bytenr; |
967 | super->generation = root_item->generation; | 1050 | super->generation = root_item->generation; |
968 | super->root_level = root_item->level; | 1051 | super->root_level = root_item->level; |
1052 | if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE)) | ||
1053 | super->cache_generation = root_item->generation; | ||
969 | } | 1054 | } |
970 | 1055 | ||
971 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 1056 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) |
972 | { | 1057 | { |
973 | int ret = 0; | 1058 | int ret = 0; |
974 | spin_lock(&info->new_trans_lock); | 1059 | spin_lock(&info->trans_lock); |
975 | if (info->running_transaction) | 1060 | if (info->running_transaction) |
976 | ret = info->running_transaction->in_commit; | 1061 | ret = info->running_transaction->in_commit; |
977 | spin_unlock(&info->new_trans_lock); | 1062 | spin_unlock(&info->trans_lock); |
978 | return ret; | 1063 | return ret; |
979 | } | 1064 | } |
980 | 1065 | ||
981 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | 1066 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) |
982 | { | 1067 | { |
983 | int ret = 0; | 1068 | int ret = 0; |
984 | spin_lock(&info->new_trans_lock); | 1069 | spin_lock(&info->trans_lock); |
985 | if (info->running_transaction) | 1070 | if (info->running_transaction) |
986 | ret = info->running_transaction->blocked; | 1071 | ret = info->running_transaction->blocked; |
987 | spin_unlock(&info->new_trans_lock); | 1072 | spin_unlock(&info->trans_lock); |
988 | return ret; | 1073 | return ret; |
989 | } | 1074 | } |
990 | 1075 | ||
1076 | /* | ||
1077 | * wait for the current transaction commit to start and block subsequent | ||
1078 | * transaction joins | ||
1079 | */ | ||
1080 | static void wait_current_trans_commit_start(struct btrfs_root *root, | ||
1081 | struct btrfs_transaction *trans) | ||
1082 | { | ||
1083 | DEFINE_WAIT(wait); | ||
1084 | |||
1085 | if (trans->in_commit) | ||
1086 | return; | ||
1087 | |||
1088 | while (1) { | ||
1089 | prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, | ||
1090 | TASK_UNINTERRUPTIBLE); | ||
1091 | if (trans->in_commit) { | ||
1092 | finish_wait(&root->fs_info->transaction_blocked_wait, | ||
1093 | &wait); | ||
1094 | break; | ||
1095 | } | ||
1096 | schedule(); | ||
1097 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); | ||
1098 | } | ||
1099 | } | ||
1100 | |||
1101 | /* | ||
1102 | * wait for the current transaction to start and then become unblocked. | ||
1103 | * caller holds ref. | ||
1104 | */ | ||
1105 | static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | ||
1106 | struct btrfs_transaction *trans) | ||
1107 | { | ||
1108 | DEFINE_WAIT(wait); | ||
1109 | |||
1110 | if (trans->commit_done || (trans->in_commit && !trans->blocked)) | ||
1111 | return; | ||
1112 | |||
1113 | while (1) { | ||
1114 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | ||
1115 | TASK_UNINTERRUPTIBLE); | ||
1116 | if (trans->commit_done || | ||
1117 | (trans->in_commit && !trans->blocked)) { | ||
1118 | finish_wait(&root->fs_info->transaction_wait, | ||
1119 | &wait); | ||
1120 | break; | ||
1121 | } | ||
1122 | schedule(); | ||
1123 | finish_wait(&root->fs_info->transaction_wait, | ||
1124 | &wait); | ||
1125 | } | ||
1126 | } | ||
1127 | |||
1128 | /* | ||
1129 | * commit transactions asynchronously. once btrfs_commit_transaction_async | ||
1130 | * returns, any subsequent transaction will not be allowed to join. | ||
1131 | */ | ||
1132 | struct btrfs_async_commit { | ||
1133 | struct btrfs_trans_handle *newtrans; | ||
1134 | struct btrfs_root *root; | ||
1135 | struct delayed_work work; | ||
1136 | }; | ||
1137 | |||
1138 | static void do_async_commit(struct work_struct *work) | ||
1139 | { | ||
1140 | struct btrfs_async_commit *ac = | ||
1141 | container_of(work, struct btrfs_async_commit, work.work); | ||
1142 | |||
1143 | btrfs_commit_transaction(ac->newtrans, ac->root); | ||
1144 | kfree(ac); | ||
1145 | } | ||
1146 | |||
1147 | int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | ||
1148 | struct btrfs_root *root, | ||
1149 | int wait_for_unblock) | ||
1150 | { | ||
1151 | struct btrfs_async_commit *ac; | ||
1152 | struct btrfs_transaction *cur_trans; | ||
1153 | |||
1154 | ac = kmalloc(sizeof(*ac), GFP_NOFS); | ||
1155 | if (!ac) | ||
1156 | return -ENOMEM; | ||
1157 | |||
1158 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | ||
1159 | ac->root = root; | ||
1160 | ac->newtrans = btrfs_join_transaction(root); | ||
1161 | if (IS_ERR(ac->newtrans)) { | ||
1162 | int err = PTR_ERR(ac->newtrans); | ||
1163 | kfree(ac); | ||
1164 | return err; | ||
1165 | } | ||
1166 | |||
1167 | /* take transaction reference */ | ||
1168 | cur_trans = trans->transaction; | ||
1169 | atomic_inc(&cur_trans->use_count); | ||
1170 | |||
1171 | btrfs_end_transaction(trans, root); | ||
1172 | schedule_delayed_work(&ac->work, 0); | ||
1173 | |||
1174 | /* wait for transaction to start and unblock */ | ||
1175 | if (wait_for_unblock) | ||
1176 | wait_current_trans_commit_start_and_unblock(root, cur_trans); | ||
1177 | else | ||
1178 | wait_current_trans_commit_start(root, cur_trans); | ||
1179 | |||
1180 | if (current->journal_info == trans) | ||
1181 | current->journal_info = NULL; | ||
1182 | |||
1183 | put_transaction(cur_trans); | ||
1184 | return 0; | ||
1185 | } | ||
1186 | |||
1187 | /* | ||
1188 | * btrfs_transaction state sequence: | ||
1189 | * in_commit = 0, blocked = 0 (initial) | ||
1190 | * in_commit = 1, blocked = 1 | ||
1191 | * blocked = 0 | ||
1192 | * commit_done = 1 | ||
1193 | */ | ||
991 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 1194 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
992 | struct btrfs_root *root) | 1195 | struct btrfs_root *root) |
993 | { | 1196 | { |
994 | unsigned long joined = 0; | 1197 | unsigned long joined = 0; |
995 | unsigned long timeout = 1; | ||
996 | struct btrfs_transaction *cur_trans; | 1198 | struct btrfs_transaction *cur_trans; |
997 | struct btrfs_transaction *prev_trans = NULL; | 1199 | struct btrfs_transaction *prev_trans = NULL; |
998 | DEFINE_WAIT(wait); | 1200 | DEFINE_WAIT(wait); |
@@ -1021,36 +1223,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1021 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1223 | ret = btrfs_run_delayed_refs(trans, root, 0); |
1022 | BUG_ON(ret); | 1224 | BUG_ON(ret); |
1023 | 1225 | ||
1024 | mutex_lock(&root->fs_info->trans_mutex); | 1226 | spin_lock(&cur_trans->commit_lock); |
1025 | if (cur_trans->in_commit) { | 1227 | if (cur_trans->in_commit) { |
1026 | cur_trans->use_count++; | 1228 | spin_unlock(&cur_trans->commit_lock); |
1027 | mutex_unlock(&root->fs_info->trans_mutex); | 1229 | atomic_inc(&cur_trans->use_count); |
1028 | btrfs_end_transaction(trans, root); | 1230 | btrfs_end_transaction(trans, root); |
1029 | 1231 | ||
1030 | ret = wait_for_commit(root, cur_trans); | 1232 | ret = wait_for_commit(root, cur_trans); |
1031 | BUG_ON(ret); | 1233 | BUG_ON(ret); |
1032 | 1234 | ||
1033 | mutex_lock(&root->fs_info->trans_mutex); | ||
1034 | put_transaction(cur_trans); | 1235 | put_transaction(cur_trans); |
1035 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1036 | 1236 | ||
1037 | return 0; | 1237 | return 0; |
1038 | } | 1238 | } |
1039 | 1239 | ||
1040 | trans->transaction->in_commit = 1; | 1240 | trans->transaction->in_commit = 1; |
1041 | trans->transaction->blocked = 1; | 1241 | trans->transaction->blocked = 1; |
1242 | spin_unlock(&cur_trans->commit_lock); | ||
1243 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
1244 | |||
1245 | spin_lock(&root->fs_info->trans_lock); | ||
1042 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1246 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
1043 | prev_trans = list_entry(cur_trans->list.prev, | 1247 | prev_trans = list_entry(cur_trans->list.prev, |
1044 | struct btrfs_transaction, list); | 1248 | struct btrfs_transaction, list); |
1045 | if (!prev_trans->commit_done) { | 1249 | if (!prev_trans->commit_done) { |
1046 | prev_trans->use_count++; | 1250 | atomic_inc(&prev_trans->use_count); |
1047 | mutex_unlock(&root->fs_info->trans_mutex); | 1251 | spin_unlock(&root->fs_info->trans_lock); |
1048 | 1252 | ||
1049 | wait_for_commit(root, prev_trans); | 1253 | wait_for_commit(root, prev_trans); |
1050 | 1254 | ||
1051 | mutex_lock(&root->fs_info->trans_mutex); | ||
1052 | put_transaction(prev_trans); | 1255 | put_transaction(prev_trans); |
1256 | } else { | ||
1257 | spin_unlock(&root->fs_info->trans_lock); | ||
1053 | } | 1258 | } |
1259 | } else { | ||
1260 | spin_unlock(&root->fs_info->trans_lock); | ||
1054 | } | 1261 | } |
1055 | 1262 | ||
1056 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | 1263 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) |
@@ -1058,17 +1265,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1058 | 1265 | ||
1059 | do { | 1266 | do { |
1060 | int snap_pending = 0; | 1267 | int snap_pending = 0; |
1268 | |||
1061 | joined = cur_trans->num_joined; | 1269 | joined = cur_trans->num_joined; |
1062 | if (!list_empty(&trans->transaction->pending_snapshots)) | 1270 | if (!list_empty(&trans->transaction->pending_snapshots)) |
1063 | snap_pending = 1; | 1271 | snap_pending = 1; |
1064 | 1272 | ||
1065 | WARN_ON(cur_trans != trans->transaction); | 1273 | WARN_ON(cur_trans != trans->transaction); |
1066 | if (cur_trans->num_writers > 1) | ||
1067 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
1068 | else if (should_grow) | ||
1069 | timeout = 1; | ||
1070 | |||
1071 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1072 | 1274 | ||
1073 | if (flush_on_commit || snap_pending) { | 1275 | if (flush_on_commit || snap_pending) { |
1074 | btrfs_start_delalloc_inodes(root, 1); | 1276 | btrfs_start_delalloc_inodes(root, 1); |
@@ -1076,6 +1278,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1076 | BUG_ON(ret); | 1278 | BUG_ON(ret); |
1077 | } | 1279 | } |
1078 | 1280 | ||
1281 | ret = btrfs_run_delayed_items(trans, root); | ||
1282 | BUG_ON(ret); | ||
1283 | |||
1079 | /* | 1284 | /* |
1080 | * rename don't use btrfs_join_transaction, so, once we | 1285 | * rename don't use btrfs_join_transaction, so, once we |
1081 | * set the transaction to blocked above, we aren't going | 1286 | * set the transaction to blocked above, we aren't going |
@@ -1088,23 +1293,51 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1088 | prepare_to_wait(&cur_trans->writer_wait, &wait, | 1293 | prepare_to_wait(&cur_trans->writer_wait, &wait, |
1089 | TASK_UNINTERRUPTIBLE); | 1294 | TASK_UNINTERRUPTIBLE); |
1090 | 1295 | ||
1091 | smp_mb(); | 1296 | if (atomic_read(&cur_trans->num_writers) > 1) |
1092 | if (cur_trans->num_writers > 1 || should_grow) | 1297 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); |
1093 | schedule_timeout(timeout); | 1298 | else if (should_grow) |
1299 | schedule_timeout(1); | ||
1094 | 1300 | ||
1095 | mutex_lock(&root->fs_info->trans_mutex); | ||
1096 | finish_wait(&cur_trans->writer_wait, &wait); | 1301 | finish_wait(&cur_trans->writer_wait, &wait); |
1097 | } while (cur_trans->num_writers > 1 || | 1302 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
1098 | (should_grow && cur_trans->num_joined != joined)); | 1303 | (should_grow && cur_trans->num_joined != joined)); |
1099 | 1304 | ||
1305 | /* | ||
1306 | * Ok now we need to make sure to block out any other joins while we | ||
1307 | * commit the transaction. We could have started a join before setting | ||
1308 | * no_join so make sure to wait for num_writers to == 1 again. | ||
1309 | */ | ||
1310 | spin_lock(&root->fs_info->trans_lock); | ||
1311 | root->fs_info->trans_no_join = 1; | ||
1312 | spin_unlock(&root->fs_info->trans_lock); | ||
1313 | wait_event(cur_trans->writer_wait, | ||
1314 | atomic_read(&cur_trans->num_writers) == 1); | ||
1315 | |||
1316 | /* | ||
1317 | * the reloc mutex makes sure that we stop | ||
1318 | * the balancing code from coming in and moving | ||
1319 | * extents around in the middle of the commit | ||
1320 | */ | ||
1321 | mutex_lock(&root->fs_info->reloc_mutex); | ||
1322 | |||
1323 | ret = btrfs_run_delayed_items(trans, root); | ||
1324 | BUG_ON(ret); | ||
1325 | |||
1100 | ret = create_pending_snapshots(trans, root->fs_info); | 1326 | ret = create_pending_snapshots(trans, root->fs_info); |
1101 | BUG_ON(ret); | 1327 | BUG_ON(ret); |
1102 | 1328 | ||
1103 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1329 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
1104 | BUG_ON(ret); | 1330 | BUG_ON(ret); |
1105 | 1331 | ||
1332 | /* | ||
1333 | * make sure none of the code above managed to slip in a | ||
1334 | * delayed item | ||
1335 | */ | ||
1336 | btrfs_assert_delayed_root_empty(root); | ||
1337 | |||
1106 | WARN_ON(cur_trans != trans->transaction); | 1338 | WARN_ON(cur_trans != trans->transaction); |
1107 | 1339 | ||
1340 | btrfs_scrub_pause(root); | ||
1108 | /* btrfs_commit_tree_roots is responsible for getting the | 1341 | /* btrfs_commit_tree_roots is responsible for getting the |
1109 | * various roots consistent with each other. Every pointer | 1342 | * various roots consistent with each other. Every pointer |
1110 | * in the tree of tree roots has to point to the most up to date | 1343 | * in the tree of tree roots has to point to the most up to date |
@@ -1134,9 +1367,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1134 | btrfs_prepare_extent_commit(trans, root); | 1367 | btrfs_prepare_extent_commit(trans, root); |
1135 | 1368 | ||
1136 | cur_trans = root->fs_info->running_transaction; | 1369 | cur_trans = root->fs_info->running_transaction; |
1137 | spin_lock(&root->fs_info->new_trans_lock); | ||
1138 | root->fs_info->running_transaction = NULL; | ||
1139 | spin_unlock(&root->fs_info->new_trans_lock); | ||
1140 | 1370 | ||
1141 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, | 1371 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, |
1142 | root->fs_info->tree_root->node); | 1372 | root->fs_info->tree_root->node); |
@@ -1157,10 +1387,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1157 | sizeof(root->fs_info->super_copy)); | 1387 | sizeof(root->fs_info->super_copy)); |
1158 | 1388 | ||
1159 | trans->transaction->blocked = 0; | 1389 | trans->transaction->blocked = 0; |
1390 | spin_lock(&root->fs_info->trans_lock); | ||
1391 | root->fs_info->running_transaction = NULL; | ||
1392 | root->fs_info->trans_no_join = 0; | ||
1393 | spin_unlock(&root->fs_info->trans_lock); | ||
1394 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
1160 | 1395 | ||
1161 | wake_up(&root->fs_info->transaction_wait); | 1396 | wake_up(&root->fs_info->transaction_wait); |
1162 | 1397 | ||
1163 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1164 | ret = btrfs_write_and_wait_transaction(trans, root); | 1398 | ret = btrfs_write_and_wait_transaction(trans, root); |
1165 | BUG_ON(ret); | 1399 | BUG_ON(ret); |
1166 | write_ctree_super(trans, root, 0); | 1400 | write_ctree_super(trans, root, 0); |
@@ -1173,18 +1407,22 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1173 | 1407 | ||
1174 | btrfs_finish_extent_commit(trans, root); | 1408 | btrfs_finish_extent_commit(trans, root); |
1175 | 1409 | ||
1176 | mutex_lock(&root->fs_info->trans_mutex); | ||
1177 | |||
1178 | cur_trans->commit_done = 1; | 1410 | cur_trans->commit_done = 1; |
1179 | 1411 | ||
1180 | root->fs_info->last_trans_committed = cur_trans->transid; | 1412 | root->fs_info->last_trans_committed = cur_trans->transid; |
1181 | 1413 | ||
1182 | wake_up(&cur_trans->commit_wait); | 1414 | wake_up(&cur_trans->commit_wait); |
1183 | 1415 | ||
1416 | spin_lock(&root->fs_info->trans_lock); | ||
1417 | list_del_init(&cur_trans->list); | ||
1418 | spin_unlock(&root->fs_info->trans_lock); | ||
1419 | |||
1184 | put_transaction(cur_trans); | 1420 | put_transaction(cur_trans); |
1185 | put_transaction(cur_trans); | 1421 | put_transaction(cur_trans); |
1186 | 1422 | ||
1187 | mutex_unlock(&root->fs_info->trans_mutex); | 1423 | trace_btrfs_transaction_commit(root); |
1424 | |||
1425 | btrfs_scrub_continue(root); | ||
1188 | 1426 | ||
1189 | if (current->journal_info == trans) | 1427 | if (current->journal_info == trans) |
1190 | current->journal_info = NULL; | 1428 | current->journal_info = NULL; |
@@ -1205,14 +1443,16 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1205 | LIST_HEAD(list); | 1443 | LIST_HEAD(list); |
1206 | struct btrfs_fs_info *fs_info = root->fs_info; | 1444 | struct btrfs_fs_info *fs_info = root->fs_info; |
1207 | 1445 | ||
1208 | mutex_lock(&fs_info->trans_mutex); | 1446 | spin_lock(&fs_info->trans_lock); |
1209 | list_splice_init(&fs_info->dead_roots, &list); | 1447 | list_splice_init(&fs_info->dead_roots, &list); |
1210 | mutex_unlock(&fs_info->trans_mutex); | 1448 | spin_unlock(&fs_info->trans_lock); |
1211 | 1449 | ||
1212 | while (!list_empty(&list)) { | 1450 | while (!list_empty(&list)) { |
1213 | root = list_entry(list.next, struct btrfs_root, root_list); | 1451 | root = list_entry(list.next, struct btrfs_root, root_list); |
1214 | list_del(&root->root_list); | 1452 | list_del(&root->root_list); |
1215 | 1453 | ||
1454 | btrfs_kill_all_delayed_nodes(root); | ||
1455 | |||
1216 | if (btrfs_header_backref_rev(root->node) < | 1456 | if (btrfs_header_backref_rev(root->node) < |
1217 | BTRFS_MIXED_BACKREF_REV) | 1457 | BTRFS_MIXED_BACKREF_REV) |
1218 | btrfs_drop_snapshot(root, NULL, 0); | 1458 | btrfs_drop_snapshot(root, NULL, 0); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e104986d0bfd..02564e6230ac 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -27,11 +27,13 @@ struct btrfs_transaction { | |||
27 | * total writers in this transaction, it must be zero before the | 27 | * total writers in this transaction, it must be zero before the |
28 | * transaction can end | 28 | * transaction can end |
29 | */ | 29 | */ |
30 | unsigned long num_writers; | 30 | atomic_t num_writers; |
31 | atomic_t use_count; | ||
31 | 32 | ||
32 | unsigned long num_joined; | 33 | unsigned long num_joined; |
34 | |||
35 | spinlock_t commit_lock; | ||
33 | int in_commit; | 36 | int in_commit; |
34 | int use_count; | ||
35 | int commit_done; | 37 | int commit_done; |
36 | int blocked; | 38 | int blocked; |
37 | struct list_head list; | 39 | struct list_head list; |
@@ -45,13 +47,14 @@ struct btrfs_transaction { | |||
45 | 47 | ||
46 | struct btrfs_trans_handle { | 48 | struct btrfs_trans_handle { |
47 | u64 transid; | 49 | u64 transid; |
48 | u64 block_group; | ||
49 | u64 bytes_reserved; | 50 | u64 bytes_reserved; |
51 | unsigned long use_count; | ||
50 | unsigned long blocks_reserved; | 52 | unsigned long blocks_reserved; |
51 | unsigned long blocks_used; | 53 | unsigned long blocks_used; |
52 | unsigned long delayed_ref_updates; | 54 | unsigned long delayed_ref_updates; |
53 | struct btrfs_transaction *transaction; | 55 | struct btrfs_transaction *transaction; |
54 | struct btrfs_block_rsv *block_rsv; | 56 | struct btrfs_block_rsv *block_rsv; |
57 | struct btrfs_block_rsv *orig_rsv; | ||
55 | }; | 58 | }; |
56 | 59 | ||
57 | struct btrfs_pending_snapshot { | 60 | struct btrfs_pending_snapshot { |
@@ -62,22 +65,10 @@ struct btrfs_pending_snapshot { | |||
62 | struct btrfs_block_rsv block_rsv; | 65 | struct btrfs_block_rsv block_rsv; |
63 | /* extra metadata reseration for relocation */ | 66 | /* extra metadata reseration for relocation */ |
64 | int error; | 67 | int error; |
68 | bool readonly; | ||
65 | struct list_head list; | 69 | struct list_head list; |
66 | }; | 70 | }; |
67 | 71 | ||
68 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, | ||
69 | struct inode *inode) | ||
70 | { | ||
71 | trans->block_group = BTRFS_I(inode)->block_group; | ||
72 | } | ||
73 | |||
74 | static inline void btrfs_update_inode_block_group( | ||
75 | struct btrfs_trans_handle *trans, | ||
76 | struct inode *inode) | ||
77 | { | ||
78 | BTRFS_I(inode)->block_group = trans->block_group; | ||
79 | } | ||
80 | |||
81 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | 72 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, |
82 | struct inode *inode) | 73 | struct inode *inode) |
83 | { | 74 | { |
@@ -87,25 +78,29 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
87 | 78 | ||
88 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 79 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
89 | struct btrfs_root *root); | 80 | struct btrfs_root *root); |
81 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
82 | struct btrfs_root *root); | ||
90 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 83 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
91 | int num_items); | 84 | int num_items); |
92 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 85 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
93 | int num_blocks); | 86 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); |
94 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 87 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); |
95 | int num_blocks); | 88 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); |
96 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 89 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
97 | struct btrfs_root *root); | 90 | struct btrfs_root *root); |
98 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | ||
99 | struct btrfs_root *root); | ||
100 | 91 | ||
101 | int btrfs_add_dead_root(struct btrfs_root *root); | 92 | int btrfs_add_dead_root(struct btrfs_root *root); |
102 | int btrfs_drop_dead_root(struct btrfs_root *root); | ||
103 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); | 93 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); |
104 | int btrfs_clean_old_snapshots(struct btrfs_root *root); | 94 | int btrfs_clean_old_snapshots(struct btrfs_root *root); |
105 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 95 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
106 | struct btrfs_root *root); | 96 | struct btrfs_root *root); |
97 | int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | ||
98 | struct btrfs_root *root, | ||
99 | int wait_for_unblock); | ||
107 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 100 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
108 | struct btrfs_root *root); | 101 | struct btrfs_root *root); |
102 | int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, | ||
103 | struct btrfs_root *root); | ||
109 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | 104 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, |
110 | struct btrfs_root *root); | 105 | struct btrfs_root *root); |
111 | void btrfs_throttle(struct btrfs_root *root); | 106 | void btrfs_throttle(struct btrfs_root *root); |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index f7ac8e013ed7..3b580ee8ab1d 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
36 | int ret = 0; | 36 | int ret = 0; |
37 | int wret; | 37 | int wret; |
38 | int level; | 38 | int level; |
39 | int orig_level; | ||
40 | int is_extent = 0; | 39 | int is_extent = 0; |
41 | int next_key_ret = 0; | 40 | int next_key_ret = 0; |
42 | u64 last_ret = 0; | 41 | u64 last_ret = 0; |
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
64 | return -ENOMEM; | 63 | return -ENOMEM; |
65 | 64 | ||
66 | level = btrfs_header_level(root->node); | 65 | level = btrfs_header_level(root->node); |
67 | orig_level = level; | ||
68 | 66 | ||
69 | if (level == 0) | 67 | if (level == 0) |
70 | goto out; | 68 | goto out; |
@@ -99,7 +97,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
99 | ret = 0; | 97 | ret = 0; |
100 | goto out; | 98 | goto out; |
101 | } | 99 | } |
102 | btrfs_release_path(root, path); | 100 | btrfs_release_path(path); |
103 | wret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 101 | wret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
104 | 102 | ||
105 | if (wret < 0) { | 103 | if (wret < 0) { |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fb102a9aee9c..4ce8a9f41d1e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -333,11 +333,17 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, | |||
333 | goto insert; | 333 | goto insert; |
334 | 334 | ||
335 | if (item_size == 0) { | 335 | if (item_size == 0) { |
336 | btrfs_release_path(root, path); | 336 | btrfs_release_path(path); |
337 | return 0; | 337 | return 0; |
338 | } | 338 | } |
339 | dst_copy = kmalloc(item_size, GFP_NOFS); | 339 | dst_copy = kmalloc(item_size, GFP_NOFS); |
340 | src_copy = kmalloc(item_size, GFP_NOFS); | 340 | src_copy = kmalloc(item_size, GFP_NOFS); |
341 | if (!dst_copy || !src_copy) { | ||
342 | btrfs_release_path(path); | ||
343 | kfree(dst_copy); | ||
344 | kfree(src_copy); | ||
345 | return -ENOMEM; | ||
346 | } | ||
341 | 347 | ||
342 | read_extent_buffer(eb, src_copy, src_ptr, item_size); | 348 | read_extent_buffer(eb, src_copy, src_ptr, item_size); |
343 | 349 | ||
@@ -355,13 +361,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, | |||
355 | * sync | 361 | * sync |
356 | */ | 362 | */ |
357 | if (ret == 0) { | 363 | if (ret == 0) { |
358 | btrfs_release_path(root, path); | 364 | btrfs_release_path(path); |
359 | return 0; | 365 | return 0; |
360 | } | 366 | } |
361 | 367 | ||
362 | } | 368 | } |
363 | insert: | 369 | insert: |
364 | btrfs_release_path(root, path); | 370 | btrfs_release_path(path); |
365 | /* try to insert the key into the destination tree */ | 371 | /* try to insert the key into the destination tree */ |
366 | ret = btrfs_insert_empty_item(trans, root, path, | 372 | ret = btrfs_insert_empty_item(trans, root, path, |
367 | key, item_size); | 373 | key, item_size); |
@@ -376,7 +382,6 @@ insert: | |||
376 | } else if (found_size < item_size) { | 382 | } else if (found_size < item_size) { |
377 | ret = btrfs_extend_item(trans, root, path, | 383 | ret = btrfs_extend_item(trans, root, path, |
378 | item_size - found_size); | 384 | item_size - found_size); |
379 | BUG_ON(ret); | ||
380 | } | 385 | } |
381 | } else if (ret) { | 386 | } else if (ret) { |
382 | return ret; | 387 | return ret; |
@@ -432,7 +437,7 @@ insert: | |||
432 | } | 437 | } |
433 | no_copy: | 438 | no_copy: |
434 | btrfs_mark_buffer_dirty(path->nodes[0]); | 439 | btrfs_mark_buffer_dirty(path->nodes[0]); |
435 | btrfs_release_path(root, path); | 440 | btrfs_release_path(path); |
436 | return 0; | 441 | return 0; |
437 | } | 442 | } |
438 | 443 | ||
@@ -513,7 +518,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
513 | * file. This must be done before the btrfs_drop_extents run | 518 | * file. This must be done before the btrfs_drop_extents run |
514 | * so we don't try to drop this extent. | 519 | * so we don't try to drop this extent. |
515 | */ | 520 | */ |
516 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | 521 | ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), |
517 | start, 0); | 522 | start, 0); |
518 | 523 | ||
519 | if (ret == 0 && | 524 | if (ret == 0 && |
@@ -538,11 +543,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
538 | * we don't have to do anything | 543 | * we don't have to do anything |
539 | */ | 544 | */ |
540 | if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { | 545 | if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { |
541 | btrfs_release_path(root, path); | 546 | btrfs_release_path(path); |
542 | goto out; | 547 | goto out; |
543 | } | 548 | } |
544 | } | 549 | } |
545 | btrfs_release_path(root, path); | 550 | btrfs_release_path(path); |
546 | 551 | ||
547 | saved_nbytes = inode_get_bytes(inode); | 552 | saved_nbytes = inode_get_bytes(inode); |
548 | /* drop any overlapping extents */ | 553 | /* drop any overlapping extents */ |
@@ -584,6 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
584 | ins.objectid, ins.offset, | 589 | ins.objectid, ins.offset, |
585 | 0, root->root_key.objectid, | 590 | 0, root->root_key.objectid, |
586 | key->objectid, offset); | 591 | key->objectid, offset); |
592 | BUG_ON(ret); | ||
587 | } else { | 593 | } else { |
588 | /* | 594 | /* |
589 | * insert the extent pointer in the extent | 595 | * insert the extent pointer in the extent |
@@ -594,7 +600,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
594 | key->objectid, offset, &ins); | 600 | key->objectid, offset, &ins); |
595 | BUG_ON(ret); | 601 | BUG_ON(ret); |
596 | } | 602 | } |
597 | btrfs_release_path(root, path); | 603 | btrfs_release_path(path); |
598 | 604 | ||
599 | if (btrfs_file_extent_compression(eb, item)) { | 605 | if (btrfs_file_extent_compression(eb, item)) { |
600 | csum_start = ins.objectid; | 606 | csum_start = ins.objectid; |
@@ -608,7 +614,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
608 | 614 | ||
609 | ret = btrfs_lookup_csums_range(root->log_root, | 615 | ret = btrfs_lookup_csums_range(root->log_root, |
610 | csum_start, csum_end - 1, | 616 | csum_start, csum_end - 1, |
611 | &ordered_sums); | 617 | &ordered_sums, 0); |
612 | BUG_ON(ret); | 618 | BUG_ON(ret); |
613 | while (!list_empty(&ordered_sums)) { | 619 | while (!list_empty(&ordered_sums)) { |
614 | struct btrfs_ordered_sum *sums; | 620 | struct btrfs_ordered_sum *sums; |
@@ -623,7 +629,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
623 | kfree(sums); | 629 | kfree(sums); |
624 | } | 630 | } |
625 | } else { | 631 | } else { |
626 | btrfs_release_path(root, path); | 632 | btrfs_release_path(path); |
627 | } | 633 | } |
628 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 634 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
629 | /* inline extents are easy, we just overwrite them */ | 635 | /* inline extents are easy, we just overwrite them */ |
@@ -665,11 +671,17 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, | |||
665 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | 671 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
666 | name_len = btrfs_dir_name_len(leaf, di); | 672 | name_len = btrfs_dir_name_len(leaf, di); |
667 | name = kmalloc(name_len, GFP_NOFS); | 673 | name = kmalloc(name_len, GFP_NOFS); |
674 | if (!name) | ||
675 | return -ENOMEM; | ||
676 | |||
668 | read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); | 677 | read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); |
669 | btrfs_release_path(root, path); | 678 | btrfs_release_path(path); |
670 | 679 | ||
671 | inode = read_one_inode(root, location.objectid); | 680 | inode = read_one_inode(root, location.objectid); |
672 | BUG_ON(!inode); | 681 | if (!inode) { |
682 | kfree(name); | ||
683 | return -EIO; | ||
684 | } | ||
673 | 685 | ||
674 | ret = link_to_fixup_dir(trans, root, path, location.objectid); | 686 | ret = link_to_fixup_dir(trans, root, path, location.objectid); |
675 | BUG_ON(ret); | 687 | BUG_ON(ret); |
@@ -704,7 +716,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, | |||
704 | goto out; | 716 | goto out; |
705 | } else | 717 | } else |
706 | goto out; | 718 | goto out; |
707 | btrfs_release_path(root, path); | 719 | btrfs_release_path(path); |
708 | 720 | ||
709 | di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); | 721 | di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); |
710 | if (di && !IS_ERR(di)) { | 722 | if (di && !IS_ERR(di)) { |
@@ -715,7 +727,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, | |||
715 | goto out; | 727 | goto out; |
716 | match = 1; | 728 | match = 1; |
717 | out: | 729 | out: |
718 | btrfs_release_path(root, path); | 730 | btrfs_release_path(path); |
719 | return match; | 731 | return match; |
720 | } | 732 | } |
721 | 733 | ||
@@ -744,6 +756,9 @@ static noinline int backref_in_log(struct btrfs_root *log, | |||
744 | int match = 0; | 756 | int match = 0; |
745 | 757 | ||
746 | path = btrfs_alloc_path(); | 758 | path = btrfs_alloc_path(); |
759 | if (!path) | ||
760 | return -ENOMEM; | ||
761 | |||
747 | ret = btrfs_search_slot(NULL, log, key, path, 0, 0); | 762 | ret = btrfs_search_slot(NULL, log, key, path, 0, 0); |
748 | if (ret != 0) | 763 | if (ret != 0) |
749 | goto out; | 764 | goto out; |
@@ -786,18 +801,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
786 | { | 801 | { |
787 | struct inode *dir; | 802 | struct inode *dir; |
788 | int ret; | 803 | int ret; |
789 | struct btrfs_key location; | ||
790 | struct btrfs_inode_ref *ref; | 804 | struct btrfs_inode_ref *ref; |
791 | struct btrfs_dir_item *di; | ||
792 | struct inode *inode; | 805 | struct inode *inode; |
793 | char *name; | 806 | char *name; |
794 | int namelen; | 807 | int namelen; |
795 | unsigned long ref_ptr; | 808 | unsigned long ref_ptr; |
796 | unsigned long ref_end; | 809 | unsigned long ref_end; |
797 | 810 | int search_done = 0; | |
798 | location.objectid = key->objectid; | ||
799 | location.type = BTRFS_INODE_ITEM_KEY; | ||
800 | location.offset = 0; | ||
801 | 811 | ||
802 | /* | 812 | /* |
803 | * it is possible that we didn't log all the parent directories | 813 | * it is possible that we didn't log all the parent directories |
@@ -810,7 +820,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
810 | return -ENOENT; | 820 | return -ENOENT; |
811 | 821 | ||
812 | inode = read_one_inode(root, key->objectid); | 822 | inode = read_one_inode(root, key->objectid); |
813 | BUG_ON(!inode); | 823 | if (!inode) { |
824 | iput(dir); | ||
825 | return -EIO; | ||
826 | } | ||
814 | 827 | ||
815 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | 828 | ref_ptr = btrfs_item_ptr_offset(eb, slot); |
816 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | 829 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); |
@@ -825,7 +838,7 @@ again: | |||
825 | read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); | 838 | read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); |
826 | 839 | ||
827 | /* if we already have a perfect match, we're done */ | 840 | /* if we already have a perfect match, we're done */ |
828 | if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, | 841 | if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), |
829 | btrfs_inode_ref_index(eb, ref), | 842 | btrfs_inode_ref_index(eb, ref), |
830 | name, namelen)) { | 843 | name, namelen)) { |
831 | goto out; | 844 | goto out; |
@@ -838,7 +851,10 @@ again: | |||
838 | * existing back reference, and we don't want to create | 851 | * existing back reference, and we don't want to create |
839 | * dangling pointers in the directory. | 852 | * dangling pointers in the directory. |
840 | */ | 853 | */ |
841 | conflict_again: | 854 | |
855 | if (search_done) | ||
856 | goto insert; | ||
857 | |||
842 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | 858 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); |
843 | if (ret == 0) { | 859 | if (ret == 0) { |
844 | char *victim_name; | 860 | char *victim_name; |
@@ -874,42 +890,26 @@ conflict_again: | |||
874 | if (!backref_in_log(log, key, victim_name, | 890 | if (!backref_in_log(log, key, victim_name, |
875 | victim_name_len)) { | 891 | victim_name_len)) { |
876 | btrfs_inc_nlink(inode); | 892 | btrfs_inc_nlink(inode); |
877 | btrfs_release_path(root, path); | 893 | btrfs_release_path(path); |
878 | 894 | ||
879 | ret = btrfs_unlink_inode(trans, root, dir, | 895 | ret = btrfs_unlink_inode(trans, root, dir, |
880 | inode, victim_name, | 896 | inode, victim_name, |
881 | victim_name_len); | 897 | victim_name_len); |
882 | kfree(victim_name); | ||
883 | btrfs_release_path(root, path); | ||
884 | goto conflict_again; | ||
885 | } | 898 | } |
886 | kfree(victim_name); | 899 | kfree(victim_name); |
887 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 900 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
888 | } | 901 | } |
889 | BUG_ON(ret); | 902 | BUG_ON(ret); |
890 | } | ||
891 | btrfs_release_path(root, path); | ||
892 | 903 | ||
893 | /* look for a conflicting sequence number */ | 904 | /* |
894 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | 905 | * NOTE: we have searched root tree and checked the |
895 | btrfs_inode_ref_index(eb, ref), | 906 | * coresponding ref, it does not need to check again. |
896 | name, namelen, 0); | 907 | */ |
897 | if (di && !IS_ERR(di)) { | 908 | search_done = 1; |
898 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
899 | BUG_ON(ret); | ||
900 | } | ||
901 | btrfs_release_path(root, path); | ||
902 | |||
903 | |||
904 | /* look for a conflicting name */ | ||
905 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
906 | name, namelen, 0); | ||
907 | if (di && !IS_ERR(di)) { | ||
908 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
909 | BUG_ON(ret); | ||
910 | } | 909 | } |
911 | btrfs_release_path(root, path); | 910 | btrfs_release_path(path); |
912 | 911 | ||
912 | insert: | ||
913 | /* insert our name */ | 913 | /* insert our name */ |
914 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | 914 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, |
915 | btrfs_inode_ref_index(eb, ref)); | 915 | btrfs_inode_ref_index(eb, ref)); |
@@ -928,7 +928,7 @@ out: | |||
928 | BUG_ON(ret); | 928 | BUG_ON(ret); |
929 | 929 | ||
930 | out_nowrite: | 930 | out_nowrite: |
931 | btrfs_release_path(root, path); | 931 | btrfs_release_path(path); |
932 | iput(dir); | 932 | iput(dir); |
933 | iput(inode); | 933 | iput(inode); |
934 | return 0; | 934 | return 0; |
@@ -966,12 +966,15 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
966 | unsigned long ptr; | 966 | unsigned long ptr; |
967 | unsigned long ptr_end; | 967 | unsigned long ptr_end; |
968 | int name_len; | 968 | int name_len; |
969 | u64 ino = btrfs_ino(inode); | ||
969 | 970 | ||
970 | key.objectid = inode->i_ino; | 971 | key.objectid = ino; |
971 | key.type = BTRFS_INODE_REF_KEY; | 972 | key.type = BTRFS_INODE_REF_KEY; |
972 | key.offset = (u64)-1; | 973 | key.offset = (u64)-1; |
973 | 974 | ||
974 | path = btrfs_alloc_path(); | 975 | path = btrfs_alloc_path(); |
976 | if (!path) | ||
977 | return -ENOMEM; | ||
975 | 978 | ||
976 | while (1) { | 979 | while (1) { |
977 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 980 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
@@ -984,7 +987,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
984 | } | 987 | } |
985 | btrfs_item_key_to_cpu(path->nodes[0], &key, | 988 | btrfs_item_key_to_cpu(path->nodes[0], &key, |
986 | path->slots[0]); | 989 | path->slots[0]); |
987 | if (key.objectid != inode->i_ino || | 990 | if (key.objectid != ino || |
988 | key.type != BTRFS_INODE_REF_KEY) | 991 | key.type != BTRFS_INODE_REF_KEY) |
989 | break; | 992 | break; |
990 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | 993 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); |
@@ -1003,9 +1006,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
1003 | if (key.offset == 0) | 1006 | if (key.offset == 0) |
1004 | break; | 1007 | break; |
1005 | key.offset--; | 1008 | key.offset--; |
1006 | btrfs_release_path(root, path); | 1009 | btrfs_release_path(path); |
1007 | } | 1010 | } |
1008 | btrfs_release_path(root, path); | 1011 | btrfs_release_path(path); |
1009 | if (nlink != inode->i_nlink) { | 1012 | if (nlink != inode->i_nlink) { |
1010 | inode->i_nlink = nlink; | 1013 | inode->i_nlink = nlink; |
1011 | btrfs_update_inode(trans, root, inode); | 1014 | btrfs_update_inode(trans, root, inode); |
@@ -1015,10 +1018,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
1015 | if (inode->i_nlink == 0) { | 1018 | if (inode->i_nlink == 0) { |
1016 | if (S_ISDIR(inode->i_mode)) { | 1019 | if (S_ISDIR(inode->i_mode)) { |
1017 | ret = replay_dir_deletes(trans, root, NULL, path, | 1020 | ret = replay_dir_deletes(trans, root, NULL, path, |
1018 | inode->i_ino, 1); | 1021 | ino, 1); |
1019 | BUG_ON(ret); | 1022 | BUG_ON(ret); |
1020 | } | 1023 | } |
1021 | ret = insert_orphan_item(trans, root, inode->i_ino); | 1024 | ret = insert_orphan_item(trans, root, ino); |
1022 | BUG_ON(ret); | 1025 | BUG_ON(ret); |
1023 | } | 1026 | } |
1024 | btrfs_free_path(path); | 1027 | btrfs_free_path(path); |
@@ -1054,11 +1057,13 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | |||
1054 | break; | 1057 | break; |
1055 | 1058 | ||
1056 | ret = btrfs_del_item(trans, root, path); | 1059 | ret = btrfs_del_item(trans, root, path); |
1057 | BUG_ON(ret); | 1060 | if (ret) |
1061 | goto out; | ||
1058 | 1062 | ||
1059 | btrfs_release_path(root, path); | 1063 | btrfs_release_path(path); |
1060 | inode = read_one_inode(root, key.offset); | 1064 | inode = read_one_inode(root, key.offset); |
1061 | BUG_ON(!inode); | 1065 | if (!inode) |
1066 | return -EIO; | ||
1062 | 1067 | ||
1063 | ret = fixup_inode_link_count(trans, root, inode); | 1068 | ret = fixup_inode_link_count(trans, root, inode); |
1064 | BUG_ON(ret); | 1069 | BUG_ON(ret); |
@@ -1072,8 +1077,10 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | |||
1072 | */ | 1077 | */ |
1073 | key.offset = (u64)-1; | 1078 | key.offset = (u64)-1; |
1074 | } | 1079 | } |
1075 | btrfs_release_path(root, path); | 1080 | ret = 0; |
1076 | return 0; | 1081 | out: |
1082 | btrfs_release_path(path); | ||
1083 | return ret; | ||
1077 | } | 1084 | } |
1078 | 1085 | ||
1079 | 1086 | ||
@@ -1092,7 +1099,8 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, | |||
1092 | struct inode *inode; | 1099 | struct inode *inode; |
1093 | 1100 | ||
1094 | inode = read_one_inode(root, objectid); | 1101 | inode = read_one_inode(root, objectid); |
1095 | BUG_ON(!inode); | 1102 | if (!inode) |
1103 | return -EIO; | ||
1096 | 1104 | ||
1097 | key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; | 1105 | key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; |
1098 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 1106 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); |
@@ -1100,7 +1108,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, | |||
1100 | 1108 | ||
1101 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); | 1109 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); |
1102 | 1110 | ||
1103 | btrfs_release_path(root, path); | 1111 | btrfs_release_path(path); |
1104 | if (ret == 0) { | 1112 | if (ret == 0) { |
1105 | btrfs_inc_nlink(inode); | 1113 | btrfs_inc_nlink(inode); |
1106 | btrfs_update_inode(trans, root, inode); | 1114 | btrfs_update_inode(trans, root, inode); |
@@ -1179,10 +1187,14 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1179 | int ret; | 1187 | int ret; |
1180 | 1188 | ||
1181 | dir = read_one_inode(root, key->objectid); | 1189 | dir = read_one_inode(root, key->objectid); |
1182 | BUG_ON(!dir); | 1190 | if (!dir) |
1191 | return -EIO; | ||
1183 | 1192 | ||
1184 | name_len = btrfs_dir_name_len(eb, di); | 1193 | name_len = btrfs_dir_name_len(eb, di); |
1185 | name = kmalloc(name_len, GFP_NOFS); | 1194 | name = kmalloc(name_len, GFP_NOFS); |
1195 | if (!name) | ||
1196 | return -ENOMEM; | ||
1197 | |||
1186 | log_type = btrfs_dir_type(eb, di); | 1198 | log_type = btrfs_dir_type(eb, di); |
1187 | read_extent_buffer(eb, name, (unsigned long)(di + 1), | 1199 | read_extent_buffer(eb, name, (unsigned long)(di + 1), |
1188 | name_len); | 1200 | name_len); |
@@ -1193,7 +1205,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1193 | exists = 1; | 1205 | exists = 1; |
1194 | else | 1206 | else |
1195 | exists = 0; | 1207 | exists = 0; |
1196 | btrfs_release_path(root, path); | 1208 | btrfs_release_path(path); |
1197 | 1209 | ||
1198 | if (key->type == BTRFS_DIR_ITEM_KEY) { | 1210 | if (key->type == BTRFS_DIR_ITEM_KEY) { |
1199 | dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, | 1211 | dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, |
@@ -1206,7 +1218,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1206 | } else { | 1218 | } else { |
1207 | BUG(); | 1219 | BUG(); |
1208 | } | 1220 | } |
1209 | if (!dst_di || IS_ERR(dst_di)) { | 1221 | if (IS_ERR_OR_NULL(dst_di)) { |
1210 | /* we need a sequence number to insert, so we only | 1222 | /* we need a sequence number to insert, so we only |
1211 | * do inserts for the BTRFS_DIR_INDEX_KEY types | 1223 | * do inserts for the BTRFS_DIR_INDEX_KEY types |
1212 | */ | 1224 | */ |
@@ -1237,13 +1249,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1237 | if (key->type == BTRFS_DIR_INDEX_KEY) | 1249 | if (key->type == BTRFS_DIR_INDEX_KEY) |
1238 | goto insert; | 1250 | goto insert; |
1239 | out: | 1251 | out: |
1240 | btrfs_release_path(root, path); | 1252 | btrfs_release_path(path); |
1241 | kfree(name); | 1253 | kfree(name); |
1242 | iput(dir); | 1254 | iput(dir); |
1243 | return 0; | 1255 | return 0; |
1244 | 1256 | ||
1245 | insert: | 1257 | insert: |
1246 | btrfs_release_path(root, path); | 1258 | btrfs_release_path(path); |
1247 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, | 1259 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, |
1248 | name, name_len, log_type, &log_key); | 1260 | name, name_len, log_type, &log_key); |
1249 | 1261 | ||
@@ -1274,6 +1286,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, | |||
1274 | ptr_end = ptr + item_size; | 1286 | ptr_end = ptr + item_size; |
1275 | while (ptr < ptr_end) { | 1287 | while (ptr < ptr_end) { |
1276 | di = (struct btrfs_dir_item *)ptr; | 1288 | di = (struct btrfs_dir_item *)ptr; |
1289 | if (verify_dir_item(root, eb, di)) | ||
1290 | return -EIO; | ||
1277 | name_len = btrfs_dir_name_len(eb, di); | 1291 | name_len = btrfs_dir_name_len(eb, di); |
1278 | ret = replay_one_name(trans, root, path, eb, di, key); | 1292 | ret = replay_one_name(trans, root, path, eb, di, key); |
1279 | BUG_ON(ret); | 1293 | BUG_ON(ret); |
@@ -1362,7 +1376,7 @@ next: | |||
1362 | *end_ret = found_end; | 1376 | *end_ret = found_end; |
1363 | ret = 0; | 1377 | ret = 0; |
1364 | out: | 1378 | out: |
1365 | btrfs_release_path(root, path); | 1379 | btrfs_release_path(path); |
1366 | return ret; | 1380 | return ret; |
1367 | } | 1381 | } |
1368 | 1382 | ||
@@ -1400,6 +1414,11 @@ again: | |||
1400 | ptr_end = ptr + item_size; | 1414 | ptr_end = ptr + item_size; |
1401 | while (ptr < ptr_end) { | 1415 | while (ptr < ptr_end) { |
1402 | di = (struct btrfs_dir_item *)ptr; | 1416 | di = (struct btrfs_dir_item *)ptr; |
1417 | if (verify_dir_item(root, eb, di)) { | ||
1418 | ret = -EIO; | ||
1419 | goto out; | ||
1420 | } | ||
1421 | |||
1403 | name_len = btrfs_dir_name_len(eb, di); | 1422 | name_len = btrfs_dir_name_len(eb, di); |
1404 | name = kmalloc(name_len, GFP_NOFS); | 1423 | name = kmalloc(name_len, GFP_NOFS); |
1405 | if (!name) { | 1424 | if (!name) { |
@@ -1420,12 +1439,15 @@ again: | |||
1420 | dir_key->offset, | 1439 | dir_key->offset, |
1421 | name, name_len, 0); | 1440 | name, name_len, 0); |
1422 | } | 1441 | } |
1423 | if (!log_di || IS_ERR(log_di)) { | 1442 | if (IS_ERR_OR_NULL(log_di)) { |
1424 | btrfs_dir_item_key_to_cpu(eb, di, &location); | 1443 | btrfs_dir_item_key_to_cpu(eb, di, &location); |
1425 | btrfs_release_path(root, path); | 1444 | btrfs_release_path(path); |
1426 | btrfs_release_path(log, log_path); | 1445 | btrfs_release_path(log_path); |
1427 | inode = read_one_inode(root, location.objectid); | 1446 | inode = read_one_inode(root, location.objectid); |
1428 | BUG_ON(!inode); | 1447 | if (!inode) { |
1448 | kfree(name); | ||
1449 | return -EIO; | ||
1450 | } | ||
1429 | 1451 | ||
1430 | ret = link_to_fixup_dir(trans, root, | 1452 | ret = link_to_fixup_dir(trans, root, |
1431 | path, location.objectid); | 1453 | path, location.objectid); |
@@ -1447,7 +1469,7 @@ again: | |||
1447 | ret = 0; | 1469 | ret = 0; |
1448 | goto out; | 1470 | goto out; |
1449 | } | 1471 | } |
1450 | btrfs_release_path(log, log_path); | 1472 | btrfs_release_path(log_path); |
1451 | kfree(name); | 1473 | kfree(name); |
1452 | 1474 | ||
1453 | ptr = (unsigned long)(di + 1); | 1475 | ptr = (unsigned long)(di + 1); |
@@ -1455,8 +1477,8 @@ again: | |||
1455 | } | 1477 | } |
1456 | ret = 0; | 1478 | ret = 0; |
1457 | out: | 1479 | out: |
1458 | btrfs_release_path(root, path); | 1480 | btrfs_release_path(path); |
1459 | btrfs_release_path(log, log_path); | 1481 | btrfs_release_path(log_path); |
1460 | return ret; | 1482 | return ret; |
1461 | } | 1483 | } |
1462 | 1484 | ||
@@ -1544,7 +1566,7 @@ again: | |||
1544 | break; | 1566 | break; |
1545 | dir_key.offset = found_key.offset + 1; | 1567 | dir_key.offset = found_key.offset + 1; |
1546 | } | 1568 | } |
1547 | btrfs_release_path(root, path); | 1569 | btrfs_release_path(path); |
1548 | if (range_end == (u64)-1) | 1570 | if (range_end == (u64)-1) |
1549 | break; | 1571 | break; |
1550 | range_start = range_end + 1; | 1572 | range_start = range_end + 1; |
@@ -1555,11 +1577,11 @@ next_type: | |||
1555 | if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { | 1577 | if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { |
1556 | key_type = BTRFS_DIR_LOG_INDEX_KEY; | 1578 | key_type = BTRFS_DIR_LOG_INDEX_KEY; |
1557 | dir_key.type = BTRFS_DIR_INDEX_KEY; | 1579 | dir_key.type = BTRFS_DIR_INDEX_KEY; |
1558 | btrfs_release_path(root, path); | 1580 | btrfs_release_path(path); |
1559 | goto again; | 1581 | goto again; |
1560 | } | 1582 | } |
1561 | out: | 1583 | out: |
1562 | btrfs_release_path(root, path); | 1584 | btrfs_release_path(path); |
1563 | btrfs_free_path(log_path); | 1585 | btrfs_free_path(log_path); |
1564 | iput(dir); | 1586 | iput(dir); |
1565 | return ret; | 1587 | return ret; |
@@ -1583,7 +1605,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1583 | struct btrfs_path *path; | 1605 | struct btrfs_path *path; |
1584 | struct btrfs_root *root = wc->replay_dest; | 1606 | struct btrfs_root *root = wc->replay_dest; |
1585 | struct btrfs_key key; | 1607 | struct btrfs_key key; |
1586 | u32 item_size; | ||
1587 | int level; | 1608 | int level; |
1588 | int i; | 1609 | int i; |
1589 | int ret; | 1610 | int ret; |
@@ -1601,7 +1622,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1601 | nritems = btrfs_header_nritems(eb); | 1622 | nritems = btrfs_header_nritems(eb); |
1602 | for (i = 0; i < nritems; i++) { | 1623 | for (i = 0; i < nritems; i++) { |
1603 | btrfs_item_key_to_cpu(eb, &key, i); | 1624 | btrfs_item_key_to_cpu(eb, &key, i); |
1604 | item_size = btrfs_item_size_nr(eb, i); | ||
1605 | 1625 | ||
1606 | /* inode keys are done during the first stage */ | 1626 | /* inode keys are done during the first stage */ |
1607 | if (key.type == BTRFS_INODE_ITEM_KEY && | 1627 | if (key.type == BTRFS_INODE_ITEM_KEY && |
@@ -1668,7 +1688,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1668 | struct walk_control *wc) | 1688 | struct walk_control *wc) |
1669 | { | 1689 | { |
1670 | u64 root_owner; | 1690 | u64 root_owner; |
1671 | u64 root_gen; | ||
1672 | u64 bytenr; | 1691 | u64 bytenr; |
1673 | u64 ptr_gen; | 1692 | u64 ptr_gen; |
1674 | struct extent_buffer *next; | 1693 | struct extent_buffer *next; |
@@ -1698,9 +1717,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1698 | 1717 | ||
1699 | parent = path->nodes[*level]; | 1718 | parent = path->nodes[*level]; |
1700 | root_owner = btrfs_header_owner(parent); | 1719 | root_owner = btrfs_header_owner(parent); |
1701 | root_gen = btrfs_header_generation(parent); | ||
1702 | 1720 | ||
1703 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1721 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1722 | if (!next) | ||
1723 | return -ENOMEM; | ||
1704 | 1724 | ||
1705 | if (*level == 1) { | 1725 | if (*level == 1) { |
1706 | wc->process_func(root, next, wc, ptr_gen); | 1726 | wc->process_func(root, next, wc, ptr_gen); |
@@ -1749,7 +1769,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1749 | struct walk_control *wc) | 1769 | struct walk_control *wc) |
1750 | { | 1770 | { |
1751 | u64 root_owner; | 1771 | u64 root_owner; |
1752 | u64 root_gen; | ||
1753 | int i; | 1772 | int i; |
1754 | int slot; | 1773 | int slot; |
1755 | int ret; | 1774 | int ret; |
@@ -1757,8 +1776,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1757 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 1776 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { |
1758 | slot = path->slots[i]; | 1777 | slot = path->slots[i]; |
1759 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { | 1778 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { |
1760 | struct extent_buffer *node; | ||
1761 | node = path->nodes[i]; | ||
1762 | path->slots[i]++; | 1779 | path->slots[i]++; |
1763 | *level = i; | 1780 | *level = i; |
1764 | WARN_ON(*level == 0); | 1781 | WARN_ON(*level == 0); |
@@ -1771,7 +1788,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1771 | parent = path->nodes[*level + 1]; | 1788 | parent = path->nodes[*level + 1]; |
1772 | 1789 | ||
1773 | root_owner = btrfs_header_owner(parent); | 1790 | root_owner = btrfs_header_owner(parent); |
1774 | root_gen = btrfs_header_generation(parent); | ||
1775 | wc->process_func(root, path->nodes[*level], wc, | 1791 | wc->process_func(root, path->nodes[*level], wc, |
1776 | btrfs_header_generation(path->nodes[*level])); | 1792 | btrfs_header_generation(path->nodes[*level])); |
1777 | if (wc->free) { | 1793 | if (wc->free) { |
@@ -1815,7 +1831,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1815 | int orig_level; | 1831 | int orig_level; |
1816 | 1832 | ||
1817 | path = btrfs_alloc_path(); | 1833 | path = btrfs_alloc_path(); |
1818 | BUG_ON(!path); | 1834 | if (!path) |
1835 | return -ENOMEM; | ||
1819 | 1836 | ||
1820 | level = btrfs_header_level(log->node); | 1837 | level = btrfs_header_level(log->node); |
1821 | orig_level = level; | 1838 | orig_level = level; |
@@ -2045,6 +2062,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2045 | wait_log_commit(trans, log_root_tree, | 2062 | wait_log_commit(trans, log_root_tree, |
2046 | log_root_tree->log_transid); | 2063 | log_root_tree->log_transid); |
2047 | mutex_unlock(&log_root_tree->log_mutex); | 2064 | mutex_unlock(&log_root_tree->log_mutex); |
2065 | ret = 0; | ||
2048 | goto out; | 2066 | goto out; |
2049 | } | 2067 | } |
2050 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2068 | atomic_set(&log_root_tree->log_commit[index2], 1); |
@@ -2091,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2091 | * the running transaction open, so a full commit can't hop | 2109 | * the running transaction open, so a full commit can't hop |
2092 | * in and cause problems either. | 2110 | * in and cause problems either. |
2093 | */ | 2111 | */ |
2112 | btrfs_scrub_pause_super(root); | ||
2094 | write_ctree_super(trans, root->fs_info->tree_root, 1); | 2113 | write_ctree_super(trans, root->fs_info->tree_root, 1); |
2114 | btrfs_scrub_continue_super(root); | ||
2095 | ret = 0; | 2115 | ret = 0; |
2096 | 2116 | ||
2097 | mutex_lock(&root->log_mutex); | 2117 | mutex_lock(&root->log_mutex); |
@@ -2109,7 +2129,7 @@ out: | |||
2109 | smp_mb(); | 2129 | smp_mb(); |
2110 | if (waitqueue_active(&root->log_commit_wait[index1])) | 2130 | if (waitqueue_active(&root->log_commit_wait[index1])) |
2111 | wake_up(&root->log_commit_wait[index1]); | 2131 | wake_up(&root->log_commit_wait[index1]); |
2112 | return 0; | 2132 | return ret; |
2113 | } | 2133 | } |
2114 | 2134 | ||
2115 | static void free_log_tree(struct btrfs_trans_handle *trans, | 2135 | static void free_log_tree(struct btrfs_trans_handle *trans, |
@@ -2195,6 +2215,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2195 | int ret; | 2215 | int ret; |
2196 | int err = 0; | 2216 | int err = 0; |
2197 | int bytes_del = 0; | 2217 | int bytes_del = 0; |
2218 | u64 dir_ino = btrfs_ino(dir); | ||
2198 | 2219 | ||
2199 | if (BTRFS_I(dir)->logged_trans < trans->transid) | 2220 | if (BTRFS_I(dir)->logged_trans < trans->transid) |
2200 | return 0; | 2221 | return 0; |
@@ -2207,7 +2228,12 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2207 | 2228 | ||
2208 | log = root->log_root; | 2229 | log = root->log_root; |
2209 | path = btrfs_alloc_path(); | 2230 | path = btrfs_alloc_path(); |
2210 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2231 | if (!path) { |
2232 | err = -ENOMEM; | ||
2233 | goto out_unlock; | ||
2234 | } | ||
2235 | |||
2236 | di = btrfs_lookup_dir_item(trans, log, path, dir_ino, | ||
2211 | name, name_len, -1); | 2237 | name, name_len, -1); |
2212 | if (IS_ERR(di)) { | 2238 | if (IS_ERR(di)) { |
2213 | err = PTR_ERR(di); | 2239 | err = PTR_ERR(di); |
@@ -2218,8 +2244,8 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2218 | bytes_del += name_len; | 2244 | bytes_del += name_len; |
2219 | BUG_ON(ret); | 2245 | BUG_ON(ret); |
2220 | } | 2246 | } |
2221 | btrfs_release_path(log, path); | 2247 | btrfs_release_path(path); |
2222 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, | 2248 | di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, |
2223 | index, name, name_len, -1); | 2249 | index, name, name_len, -1); |
2224 | if (IS_ERR(di)) { | 2250 | if (IS_ERR(di)) { |
2225 | err = PTR_ERR(di); | 2251 | err = PTR_ERR(di); |
@@ -2237,10 +2263,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2237 | if (bytes_del) { | 2263 | if (bytes_del) { |
2238 | struct btrfs_key key; | 2264 | struct btrfs_key key; |
2239 | 2265 | ||
2240 | key.objectid = dir->i_ino; | 2266 | key.objectid = dir_ino; |
2241 | key.offset = 0; | 2267 | key.offset = 0; |
2242 | key.type = BTRFS_INODE_ITEM_KEY; | 2268 | key.type = BTRFS_INODE_ITEM_KEY; |
2243 | btrfs_release_path(log, path); | 2269 | btrfs_release_path(path); |
2244 | 2270 | ||
2245 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); | 2271 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); |
2246 | if (ret < 0) { | 2272 | if (ret < 0) { |
@@ -2262,10 +2288,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2262 | btrfs_mark_buffer_dirty(path->nodes[0]); | 2288 | btrfs_mark_buffer_dirty(path->nodes[0]); |
2263 | } else | 2289 | } else |
2264 | ret = 0; | 2290 | ret = 0; |
2265 | btrfs_release_path(log, path); | 2291 | btrfs_release_path(path); |
2266 | } | 2292 | } |
2267 | fail: | 2293 | fail: |
2268 | btrfs_free_path(path); | 2294 | btrfs_free_path(path); |
2295 | out_unlock: | ||
2269 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2296 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2270 | if (ret == -ENOSPC) { | 2297 | if (ret == -ENOSPC) { |
2271 | root->fs_info->last_trans_log_full_commit = trans->transid; | 2298 | root->fs_info->last_trans_log_full_commit = trans->transid; |
@@ -2273,7 +2300,7 @@ fail: | |||
2273 | } | 2300 | } |
2274 | btrfs_end_log_trans(root); | 2301 | btrfs_end_log_trans(root); |
2275 | 2302 | ||
2276 | return 0; | 2303 | return err; |
2277 | } | 2304 | } |
2278 | 2305 | ||
2279 | /* see comments for btrfs_del_dir_entries_in_log */ | 2306 | /* see comments for btrfs_del_dir_entries_in_log */ |
@@ -2295,7 +2322,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2295 | log = root->log_root; | 2322 | log = root->log_root; |
2296 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 2323 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
2297 | 2324 | ||
2298 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2325 | ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), |
2299 | dirid, &index); | 2326 | dirid, &index); |
2300 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2327 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2301 | if (ret == -ENOSPC) { | 2328 | if (ret == -ENOSPC) { |
@@ -2336,7 +2363,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, | |||
2336 | struct btrfs_dir_log_item); | 2363 | struct btrfs_dir_log_item); |
2337 | btrfs_set_dir_log_end(path->nodes[0], item, last_offset); | 2364 | btrfs_set_dir_log_end(path->nodes[0], item, last_offset); |
2338 | btrfs_mark_buffer_dirty(path->nodes[0]); | 2365 | btrfs_mark_buffer_dirty(path->nodes[0]); |
2339 | btrfs_release_path(log, path); | 2366 | btrfs_release_path(path); |
2340 | return 0; | 2367 | return 0; |
2341 | } | 2368 | } |
2342 | 2369 | ||
@@ -2361,13 +2388,14 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2361 | int nritems; | 2388 | int nritems; |
2362 | u64 first_offset = min_offset; | 2389 | u64 first_offset = min_offset; |
2363 | u64 last_offset = (u64)-1; | 2390 | u64 last_offset = (u64)-1; |
2391 | u64 ino = btrfs_ino(inode); | ||
2364 | 2392 | ||
2365 | log = root->log_root; | 2393 | log = root->log_root; |
2366 | max_key.objectid = inode->i_ino; | 2394 | max_key.objectid = ino; |
2367 | max_key.offset = (u64)-1; | 2395 | max_key.offset = (u64)-1; |
2368 | max_key.type = key_type; | 2396 | max_key.type = key_type; |
2369 | 2397 | ||
2370 | min_key.objectid = inode->i_ino; | 2398 | min_key.objectid = ino; |
2371 | min_key.type = key_type; | 2399 | min_key.type = key_type; |
2372 | min_key.offset = min_offset; | 2400 | min_key.offset = min_offset; |
2373 | 2401 | ||
@@ -2380,18 +2408,17 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2380 | * we didn't find anything from this transaction, see if there | 2408 | * we didn't find anything from this transaction, see if there |
2381 | * is anything at all | 2409 | * is anything at all |
2382 | */ | 2410 | */ |
2383 | if (ret != 0 || min_key.objectid != inode->i_ino || | 2411 | if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) { |
2384 | min_key.type != key_type) { | 2412 | min_key.objectid = ino; |
2385 | min_key.objectid = inode->i_ino; | ||
2386 | min_key.type = key_type; | 2413 | min_key.type = key_type; |
2387 | min_key.offset = (u64)-1; | 2414 | min_key.offset = (u64)-1; |
2388 | btrfs_release_path(root, path); | 2415 | btrfs_release_path(path); |
2389 | ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); | 2416 | ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); |
2390 | if (ret < 0) { | 2417 | if (ret < 0) { |
2391 | btrfs_release_path(root, path); | 2418 | btrfs_release_path(path); |
2392 | return ret; | 2419 | return ret; |
2393 | } | 2420 | } |
2394 | ret = btrfs_previous_item(root, path, inode->i_ino, key_type); | 2421 | ret = btrfs_previous_item(root, path, ino, key_type); |
2395 | 2422 | ||
2396 | /* if ret == 0 there are items for this type, | 2423 | /* if ret == 0 there are items for this type, |
2397 | * create a range to tell us the last key of this type. | 2424 | * create a range to tell us the last key of this type. |
@@ -2409,7 +2436,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2409 | } | 2436 | } |
2410 | 2437 | ||
2411 | /* go backward to find any previous key */ | 2438 | /* go backward to find any previous key */ |
2412 | ret = btrfs_previous_item(root, path, inode->i_ino, key_type); | 2439 | ret = btrfs_previous_item(root, path, ino, key_type); |
2413 | if (ret == 0) { | 2440 | if (ret == 0) { |
2414 | struct btrfs_key tmp; | 2441 | struct btrfs_key tmp; |
2415 | btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); | 2442 | btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); |
@@ -2424,7 +2451,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2424 | } | 2451 | } |
2425 | } | 2452 | } |
2426 | } | 2453 | } |
2427 | btrfs_release_path(root, path); | 2454 | btrfs_release_path(path); |
2428 | 2455 | ||
2429 | /* find the first key from this transaction again */ | 2456 | /* find the first key from this transaction again */ |
2430 | ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); | 2457 | ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); |
@@ -2444,8 +2471,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2444 | for (i = path->slots[0]; i < nritems; i++) { | 2471 | for (i = path->slots[0]; i < nritems; i++) { |
2445 | btrfs_item_key_to_cpu(src, &min_key, i); | 2472 | btrfs_item_key_to_cpu(src, &min_key, i); |
2446 | 2473 | ||
2447 | if (min_key.objectid != inode->i_ino || | 2474 | if (min_key.objectid != ino || min_key.type != key_type) |
2448 | min_key.type != key_type) | ||
2449 | goto done; | 2475 | goto done; |
2450 | ret = overwrite_item(trans, log, dst_path, src, i, | 2476 | ret = overwrite_item(trans, log, dst_path, src, i, |
2451 | &min_key); | 2477 | &min_key); |
@@ -2466,7 +2492,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2466 | goto done; | 2492 | goto done; |
2467 | } | 2493 | } |
2468 | btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); | 2494 | btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); |
2469 | if (tmp.objectid != inode->i_ino || tmp.type != key_type) { | 2495 | if (tmp.objectid != ino || tmp.type != key_type) { |
2470 | last_offset = (u64)-1; | 2496 | last_offset = (u64)-1; |
2471 | goto done; | 2497 | goto done; |
2472 | } | 2498 | } |
@@ -2482,8 +2508,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2482 | } | 2508 | } |
2483 | } | 2509 | } |
2484 | done: | 2510 | done: |
2485 | btrfs_release_path(root, path); | 2511 | btrfs_release_path(path); |
2486 | btrfs_release_path(log, dst_path); | 2512 | btrfs_release_path(dst_path); |
2487 | 2513 | ||
2488 | if (err == 0) { | 2514 | if (err == 0) { |
2489 | *last_offset_ret = last_offset; | 2515 | *last_offset_ret = last_offset; |
@@ -2492,8 +2518,7 @@ done: | |||
2492 | * is valid | 2518 | * is valid |
2493 | */ | 2519 | */ |
2494 | ret = insert_dir_log_key(trans, log, path, key_type, | 2520 | ret = insert_dir_log_key(trans, log, path, key_type, |
2495 | inode->i_ino, first_offset, | 2521 | ino, first_offset, last_offset); |
2496 | last_offset); | ||
2497 | if (ret) | 2522 | if (ret) |
2498 | err = ret; | 2523 | err = ret; |
2499 | } | 2524 | } |
@@ -2579,10 +2604,11 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2579 | break; | 2604 | break; |
2580 | 2605 | ||
2581 | ret = btrfs_del_item(trans, log, path); | 2606 | ret = btrfs_del_item(trans, log, path); |
2582 | BUG_ON(ret); | 2607 | if (ret) |
2583 | btrfs_release_path(log, path); | 2608 | break; |
2609 | btrfs_release_path(path); | ||
2584 | } | 2610 | } |
2585 | btrfs_release_path(log, path); | 2611 | btrfs_release_path(path); |
2586 | return ret; | 2612 | return ret; |
2587 | } | 2613 | } |
2588 | 2614 | ||
@@ -2607,6 +2633,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2607 | 2633 | ||
2608 | ins_data = kmalloc(nr * sizeof(struct btrfs_key) + | 2634 | ins_data = kmalloc(nr * sizeof(struct btrfs_key) + |
2609 | nr * sizeof(u32), GFP_NOFS); | 2635 | nr * sizeof(u32), GFP_NOFS); |
2636 | if (!ins_data) | ||
2637 | return -ENOMEM; | ||
2638 | |||
2610 | ins_sizes = (u32 *)ins_data; | 2639 | ins_sizes = (u32 *)ins_data; |
2611 | ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); | 2640 | ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); |
2612 | 2641 | ||
@@ -2654,6 +2683,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2654 | extent = btrfs_item_ptr(src, start_slot + i, | 2683 | extent = btrfs_item_ptr(src, start_slot + i, |
2655 | struct btrfs_file_extent_item); | 2684 | struct btrfs_file_extent_item); |
2656 | 2685 | ||
2686 | if (btrfs_file_extent_generation(src, extent) < trans->transid) | ||
2687 | continue; | ||
2688 | |||
2657 | found_type = btrfs_file_extent_type(src, extent); | 2689 | found_type = btrfs_file_extent_type(src, extent); |
2658 | if (found_type == BTRFS_FILE_EXTENT_REG || | 2690 | if (found_type == BTRFS_FILE_EXTENT_REG || |
2659 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 2691 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
@@ -2678,14 +2710,14 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2678 | ret = btrfs_lookup_csums_range( | 2710 | ret = btrfs_lookup_csums_range( |
2679 | log->fs_info->csum_root, | 2711 | log->fs_info->csum_root, |
2680 | ds + cs, ds + cs + cl - 1, | 2712 | ds + cs, ds + cs + cl - 1, |
2681 | &ordered_sums); | 2713 | &ordered_sums, 0); |
2682 | BUG_ON(ret); | 2714 | BUG_ON(ret); |
2683 | } | 2715 | } |
2684 | } | 2716 | } |
2685 | } | 2717 | } |
2686 | 2718 | ||
2687 | btrfs_mark_buffer_dirty(dst_path->nodes[0]); | 2719 | btrfs_mark_buffer_dirty(dst_path->nodes[0]); |
2688 | btrfs_release_path(log, dst_path); | 2720 | btrfs_release_path(dst_path); |
2689 | kfree(ins_data); | 2721 | kfree(ins_data); |
2690 | 2722 | ||
2691 | /* | 2723 | /* |
@@ -2729,23 +2761,29 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2729 | struct btrfs_key max_key; | 2761 | struct btrfs_key max_key; |
2730 | struct btrfs_root *log = root->log_root; | 2762 | struct btrfs_root *log = root->log_root; |
2731 | struct extent_buffer *src = NULL; | 2763 | struct extent_buffer *src = NULL; |
2732 | u32 size; | ||
2733 | int err = 0; | 2764 | int err = 0; |
2734 | int ret; | 2765 | int ret; |
2735 | int nritems; | 2766 | int nritems; |
2736 | int ins_start_slot = 0; | 2767 | int ins_start_slot = 0; |
2737 | int ins_nr; | 2768 | int ins_nr; |
2769 | u64 ino = btrfs_ino(inode); | ||
2738 | 2770 | ||
2739 | log = root->log_root; | 2771 | log = root->log_root; |
2740 | 2772 | ||
2741 | path = btrfs_alloc_path(); | 2773 | path = btrfs_alloc_path(); |
2774 | if (!path) | ||
2775 | return -ENOMEM; | ||
2742 | dst_path = btrfs_alloc_path(); | 2776 | dst_path = btrfs_alloc_path(); |
2777 | if (!dst_path) { | ||
2778 | btrfs_free_path(path); | ||
2779 | return -ENOMEM; | ||
2780 | } | ||
2743 | 2781 | ||
2744 | min_key.objectid = inode->i_ino; | 2782 | min_key.objectid = ino; |
2745 | min_key.type = BTRFS_INODE_ITEM_KEY; | 2783 | min_key.type = BTRFS_INODE_ITEM_KEY; |
2746 | min_key.offset = 0; | 2784 | min_key.offset = 0; |
2747 | 2785 | ||
2748 | max_key.objectid = inode->i_ino; | 2786 | max_key.objectid = ino; |
2749 | 2787 | ||
2750 | /* today the code can only do partial logging of directories */ | 2788 | /* today the code can only do partial logging of directories */ |
2751 | if (!S_ISDIR(inode->i_mode)) | 2789 | if (!S_ISDIR(inode->i_mode)) |
@@ -2757,6 +2795,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2757 | max_key.type = (u8)-1; | 2795 | max_key.type = (u8)-1; |
2758 | max_key.offset = (u64)-1; | 2796 | max_key.offset = (u64)-1; |
2759 | 2797 | ||
2798 | ret = btrfs_commit_inode_delayed_items(trans, inode); | ||
2799 | if (ret) { | ||
2800 | btrfs_free_path(path); | ||
2801 | btrfs_free_path(dst_path); | ||
2802 | return ret; | ||
2803 | } | ||
2804 | |||
2760 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 2805 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
2761 | 2806 | ||
2762 | /* | 2807 | /* |
@@ -2768,8 +2813,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2768 | 2813 | ||
2769 | if (inode_only == LOG_INODE_EXISTS) | 2814 | if (inode_only == LOG_INODE_EXISTS) |
2770 | max_key_type = BTRFS_XATTR_ITEM_KEY; | 2815 | max_key_type = BTRFS_XATTR_ITEM_KEY; |
2771 | ret = drop_objectid_items(trans, log, path, | 2816 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
2772 | inode->i_ino, max_key_type); | ||
2773 | } else { | 2817 | } else { |
2774 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 2818 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); |
2775 | } | 2819 | } |
@@ -2787,13 +2831,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2787 | break; | 2831 | break; |
2788 | again: | 2832 | again: |
2789 | /* note, ins_nr might be > 0 here, cleanup outside the loop */ | 2833 | /* note, ins_nr might be > 0 here, cleanup outside the loop */ |
2790 | if (min_key.objectid != inode->i_ino) | 2834 | if (min_key.objectid != ino) |
2791 | break; | 2835 | break; |
2792 | if (min_key.type > max_key.type) | 2836 | if (min_key.type > max_key.type) |
2793 | break; | 2837 | break; |
2794 | 2838 | ||
2795 | src = path->nodes[0]; | 2839 | src = path->nodes[0]; |
2796 | size = btrfs_item_size_nr(src, path->slots[0]); | ||
2797 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { | 2840 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { |
2798 | ins_nr++; | 2841 | ins_nr++; |
2799 | goto next_slot; | 2842 | goto next_slot; |
@@ -2830,7 +2873,7 @@ next_slot: | |||
2830 | } | 2873 | } |
2831 | ins_nr = 0; | 2874 | ins_nr = 0; |
2832 | } | 2875 | } |
2833 | btrfs_release_path(root, path); | 2876 | btrfs_release_path(path); |
2834 | 2877 | ||
2835 | if (min_key.offset < (u64)-1) | 2878 | if (min_key.offset < (u64)-1) |
2836 | min_key.offset++; | 2879 | min_key.offset++; |
@@ -2853,8 +2896,8 @@ next_slot: | |||
2853 | } | 2896 | } |
2854 | WARN_ON(ins_nr); | 2897 | WARN_ON(ins_nr); |
2855 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 2898 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
2856 | btrfs_release_path(root, path); | 2899 | btrfs_release_path(path); |
2857 | btrfs_release_path(log, dst_path); | 2900 | btrfs_release_path(dst_path); |
2858 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2901 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
2859 | if (ret) { | 2902 | if (ret) { |
2860 | err = ret; | 2903 | err = ret; |
@@ -2884,6 +2927,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2884 | { | 2927 | { |
2885 | int ret = 0; | 2928 | int ret = 0; |
2886 | struct btrfs_root *root; | 2929 | struct btrfs_root *root; |
2930 | struct dentry *old_parent = NULL; | ||
2887 | 2931 | ||
2888 | /* | 2932 | /* |
2889 | * for regular files, if its inode is already on disk, we don't | 2933 | * for regular files, if its inode is already on disk, we don't |
@@ -2925,10 +2969,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2925 | if (IS_ROOT(parent)) | 2969 | if (IS_ROOT(parent)) |
2926 | break; | 2970 | break; |
2927 | 2971 | ||
2928 | parent = parent->d_parent; | 2972 | parent = dget_parent(parent); |
2973 | dput(old_parent); | ||
2974 | old_parent = parent; | ||
2929 | inode = parent->d_inode; | 2975 | inode = parent->d_inode; |
2930 | 2976 | ||
2931 | } | 2977 | } |
2978 | dput(old_parent); | ||
2932 | out: | 2979 | out: |
2933 | return ret; | 2980 | return ret; |
2934 | } | 2981 | } |
@@ -2960,6 +3007,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2960 | { | 3007 | { |
2961 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 3008 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
2962 | struct super_block *sb; | 3009 | struct super_block *sb; |
3010 | struct dentry *old_parent = NULL; | ||
2963 | int ret = 0; | 3011 | int ret = 0; |
2964 | u64 last_committed = root->fs_info->last_trans_committed; | 3012 | u64 last_committed = root->fs_info->last_trans_committed; |
2965 | 3013 | ||
@@ -3031,10 +3079,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3031 | if (IS_ROOT(parent)) | 3079 | if (IS_ROOT(parent)) |
3032 | break; | 3080 | break; |
3033 | 3081 | ||
3034 | parent = parent->d_parent; | 3082 | parent = dget_parent(parent); |
3083 | dput(old_parent); | ||
3084 | old_parent = parent; | ||
3035 | } | 3085 | } |
3036 | ret = 0; | 3086 | ret = 0; |
3037 | end_trans: | 3087 | end_trans: |
3088 | dput(old_parent); | ||
3038 | if (ret < 0) { | 3089 | if (ret < 0) { |
3039 | BUG_ON(ret != -ENOSPC); | 3090 | BUG_ON(ret != -ENOSPC); |
3040 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3091 | root->fs_info->last_trans_log_full_commit = trans->transid; |
@@ -3054,8 +3105,13 @@ end_no_trans: | |||
3054 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 3105 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
3055 | struct btrfs_root *root, struct dentry *dentry) | 3106 | struct btrfs_root *root, struct dentry *dentry) |
3056 | { | 3107 | { |
3057 | return btrfs_log_inode_parent(trans, root, dentry->d_inode, | 3108 | struct dentry *parent = dget_parent(dentry); |
3058 | dentry->d_parent, 0); | 3109 | int ret; |
3110 | |||
3111 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | ||
3112 | dput(parent); | ||
3113 | |||
3114 | return ret; | ||
3059 | } | 3115 | } |
3060 | 3116 | ||
3061 | /* | 3117 | /* |
@@ -3077,16 +3133,20 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3077 | .stage = 0, | 3133 | .stage = 0, |
3078 | }; | 3134 | }; |
3079 | 3135 | ||
3080 | fs_info->log_root_recovering = 1; | ||
3081 | path = btrfs_alloc_path(); | 3136 | path = btrfs_alloc_path(); |
3082 | BUG_ON(!path); | 3137 | if (!path) |
3138 | return -ENOMEM; | ||
3139 | |||
3140 | fs_info->log_root_recovering = 1; | ||
3083 | 3141 | ||
3084 | trans = btrfs_start_transaction(fs_info->tree_root, 0); | 3142 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3143 | BUG_ON(IS_ERR(trans)); | ||
3085 | 3144 | ||
3086 | wc.trans = trans; | 3145 | wc.trans = trans; |
3087 | wc.pin = 1; | 3146 | wc.pin = 1; |
3088 | 3147 | ||
3089 | walk_log_tree(trans, log_root_tree, &wc); | 3148 | ret = walk_log_tree(trans, log_root_tree, &wc); |
3149 | BUG_ON(ret); | ||
3090 | 3150 | ||
3091 | again: | 3151 | again: |
3092 | key.objectid = BTRFS_TREE_LOG_OBJECTID; | 3152 | key.objectid = BTRFS_TREE_LOG_OBJECTID; |
@@ -3104,21 +3164,20 @@ again: | |||
3104 | } | 3164 | } |
3105 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 3165 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, |
3106 | path->slots[0]); | 3166 | path->slots[0]); |
3107 | btrfs_release_path(log_root_tree, path); | 3167 | btrfs_release_path(path); |
3108 | if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) | 3168 | if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) |
3109 | break; | 3169 | break; |
3110 | 3170 | ||
3111 | log = btrfs_read_fs_root_no_radix(log_root_tree, | 3171 | log = btrfs_read_fs_root_no_radix(log_root_tree, |
3112 | &found_key); | 3172 | &found_key); |
3113 | BUG_ON(!log); | 3173 | BUG_ON(IS_ERR(log)); |
3114 | |||
3115 | 3174 | ||
3116 | tmp_key.objectid = found_key.offset; | 3175 | tmp_key.objectid = found_key.offset; |
3117 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; | 3176 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; |
3118 | tmp_key.offset = (u64)-1; | 3177 | tmp_key.offset = (u64)-1; |
3119 | 3178 | ||
3120 | wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); | 3179 | wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); |
3121 | BUG_ON(!wc.replay_dest); | 3180 | BUG_ON(IS_ERR_OR_NULL(wc.replay_dest)); |
3122 | 3181 | ||
3123 | wc.replay_dest->log_root = log; | 3182 | wc.replay_dest->log_root = log; |
3124 | btrfs_record_root_in_trans(trans, wc.replay_dest); | 3183 | btrfs_record_root_in_trans(trans, wc.replay_dest); |
@@ -3140,7 +3199,7 @@ again: | |||
3140 | if (found_key.offset == 0) | 3199 | if (found_key.offset == 0) |
3141 | break; | 3200 | break; |
3142 | } | 3201 | } |
3143 | btrfs_release_path(log_root_tree, path); | 3202 | btrfs_release_path(path); |
3144 | 3203 | ||
3145 | /* step one is to pin it all, step two is to replay just inodes */ | 3204 | /* step one is to pin it all, step two is to replay just inodes */ |
3146 | if (wc.pin) { | 3205 | if (wc.pin) { |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 3dfae84c8cc8..2270ac58d746 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -38,7 +38,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
38 | struct btrfs_root *root, | 38 | struct btrfs_root *root, |
39 | const char *name, int name_len, | 39 | const char *name, int name_len, |
40 | struct inode *inode, u64 dirid); | 40 | struct inode *inode, u64 dirid); |
41 | int btrfs_join_running_log_trans(struct btrfs_root *root); | ||
42 | int btrfs_end_log_trans(struct btrfs_root *root); | 41 | int btrfs_end_log_trans(struct btrfs_root *root); |
43 | int btrfs_pin_log_trans(struct btrfs_root *root); | 42 | int btrfs_pin_log_trans(struct btrfs_root *root); |
44 | int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | 43 | int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh deleted file mode 100644 index 1ca1952fd917..000000000000 --- a/fs/btrfs/version.sh +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | #!/bin/bash | ||
2 | # | ||
3 | # determine-version -- report a useful version for releases | ||
4 | # | ||
5 | # Copyright 2008, Aron Griffis <agriffis@n01se.net> | ||
6 | # Copyright 2008, Oracle | ||
7 | # Released under the GNU GPLv2 | ||
8 | |||
9 | v="v0.16" | ||
10 | |||
11 | which git &> /dev/null | ||
12 | if [ $? == 0 ]; then | ||
13 | git branch >& /dev/null | ||
14 | if [ $? == 0 ]; then | ||
15 | if head=`git rev-parse --verify HEAD 2>/dev/null`; then | ||
16 | if tag=`git describe --tags 2>/dev/null`; then | ||
17 | v="$tag" | ||
18 | fi | ||
19 | |||
20 | # Are there uncommitted changes? | ||
21 | git update-index --refresh --unmerged > /dev/null | ||
22 | if git diff-index --name-only HEAD | \ | ||
23 | grep -v "^scripts/package" \ | ||
24 | | read dummy; then | ||
25 | v="$v"-dirty | ||
26 | fi | ||
27 | fi | ||
28 | fi | ||
29 | fi | ||
30 | |||
31 | echo "#ifndef __BUILD_VERSION" > .build-version.h | ||
32 | echo "#define __BUILD_VERSION" >> .build-version.h | ||
33 | echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h | ||
34 | echo "#endif" >> .build-version.h | ||
35 | |||
36 | diff -q version.h .build-version.h >& /dev/null | ||
37 | |||
38 | if [ $? == 0 ]; then | ||
39 | rm .build-version.h | ||
40 | exit 0 | ||
41 | fi | ||
42 | |||
43 | mv .build-version.h version.h | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd318ff280b2..19450bc53632 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include "compat.h" | 27 | #include "compat.h" |
27 | #include "ctree.h" | 28 | #include "ctree.h" |
@@ -32,38 +33,14 @@ | |||
32 | #include "volumes.h" | 33 | #include "volumes.h" |
33 | #include "async-thread.h" | 34 | #include "async-thread.h" |
34 | 35 | ||
35 | struct map_lookup { | ||
36 | u64 type; | ||
37 | int io_align; | ||
38 | int io_width; | ||
39 | int stripe_len; | ||
40 | int sector_size; | ||
41 | int num_stripes; | ||
42 | int sub_stripes; | ||
43 | struct btrfs_bio_stripe stripes[]; | ||
44 | }; | ||
45 | |||
46 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 36 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
47 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
48 | struct btrfs_device *device); | 38 | struct btrfs_device *device); |
49 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root); | 39 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root); |
50 | 40 | ||
51 | #define map_lookup_size(n) (sizeof(struct map_lookup) + \ | ||
52 | (sizeof(struct btrfs_bio_stripe) * (n))) | ||
53 | |||
54 | static DEFINE_MUTEX(uuid_mutex); | 41 | static DEFINE_MUTEX(uuid_mutex); |
55 | static LIST_HEAD(fs_uuids); | 42 | static LIST_HEAD(fs_uuids); |
56 | 43 | ||
57 | void btrfs_lock_volumes(void) | ||
58 | { | ||
59 | mutex_lock(&uuid_mutex); | ||
60 | } | ||
61 | |||
62 | void btrfs_unlock_volumes(void) | ||
63 | { | ||
64 | mutex_unlock(&uuid_mutex); | ||
65 | } | ||
66 | |||
67 | static void lock_chunks(struct btrfs_root *root) | 44 | static void lock_chunks(struct btrfs_root *root) |
68 | { | 45 | { |
69 | mutex_lock(&root->fs_info->chunk_mutex); | 46 | mutex_lock(&root->fs_info->chunk_mutex); |
@@ -161,22 +138,25 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
161 | struct bio *cur; | 138 | struct bio *cur; |
162 | int again = 0; | 139 | int again = 0; |
163 | unsigned long num_run; | 140 | unsigned long num_run; |
164 | unsigned long num_sync_run; | ||
165 | unsigned long batch_run = 0; | 141 | unsigned long batch_run = 0; |
166 | unsigned long limit; | 142 | unsigned long limit; |
167 | unsigned long last_waited = 0; | 143 | unsigned long last_waited = 0; |
168 | int force_reg = 0; | 144 | int force_reg = 0; |
145 | struct blk_plug plug; | ||
146 | |||
147 | /* | ||
148 | * this function runs all the bios we've collected for | ||
149 | * a particular device. We don't want to wander off to | ||
150 | * another device without first sending all of these down. | ||
151 | * So, setup a plug here and finish it off before we return | ||
152 | */ | ||
153 | blk_start_plug(&plug); | ||
169 | 154 | ||
170 | bdi = blk_get_backing_dev_info(device->bdev); | 155 | bdi = blk_get_backing_dev_info(device->bdev); |
171 | fs_info = device->dev_root->fs_info; | 156 | fs_info = device->dev_root->fs_info; |
172 | limit = btrfs_async_submit_limit(fs_info); | 157 | limit = btrfs_async_submit_limit(fs_info); |
173 | limit = limit * 2 / 3; | 158 | limit = limit * 2 / 3; |
174 | 159 | ||
175 | /* we want to make sure that every time we switch from the sync | ||
176 | * list to the normal list, we unplug | ||
177 | */ | ||
178 | num_sync_run = 0; | ||
179 | |||
180 | loop: | 160 | loop: |
181 | spin_lock(&device->io_lock); | 161 | spin_lock(&device->io_lock); |
182 | 162 | ||
@@ -222,15 +202,6 @@ loop_lock: | |||
222 | 202 | ||
223 | spin_unlock(&device->io_lock); | 203 | spin_unlock(&device->io_lock); |
224 | 204 | ||
225 | /* | ||
226 | * if we're doing the regular priority list, make sure we unplug | ||
227 | * for any high prio bios we've sent down | ||
228 | */ | ||
229 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
230 | num_sync_run = 0; | ||
231 | blk_run_backing_dev(bdi, NULL); | ||
232 | } | ||
233 | |||
234 | while (pending) { | 205 | while (pending) { |
235 | 206 | ||
236 | rmb(); | 207 | rmb(); |
@@ -258,19 +229,11 @@ loop_lock: | |||
258 | 229 | ||
259 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 230 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
260 | 231 | ||
261 | if (cur->bi_rw & REQ_SYNC) | ||
262 | num_sync_run++; | ||
263 | |||
264 | submit_bio(cur->bi_rw, cur); | 232 | submit_bio(cur->bi_rw, cur); |
265 | num_run++; | 233 | num_run++; |
266 | batch_run++; | 234 | batch_run++; |
267 | if (need_resched()) { | 235 | if (need_resched()) |
268 | if (num_sync_run) { | ||
269 | blk_run_backing_dev(bdi, NULL); | ||
270 | num_sync_run = 0; | ||
271 | } | ||
272 | cond_resched(); | 236 | cond_resched(); |
273 | } | ||
274 | 237 | ||
275 | /* | 238 | /* |
276 | * we made progress, there is more work to do and the bdi | 239 | * we made progress, there is more work to do and the bdi |
@@ -303,13 +266,8 @@ loop_lock: | |||
303 | * against it before looping | 266 | * against it before looping |
304 | */ | 267 | */ |
305 | last_waited = ioc->last_waited; | 268 | last_waited = ioc->last_waited; |
306 | if (need_resched()) { | 269 | if (need_resched()) |
307 | if (num_sync_run) { | ||
308 | blk_run_backing_dev(bdi, NULL); | ||
309 | num_sync_run = 0; | ||
310 | } | ||
311 | cond_resched(); | 270 | cond_resched(); |
312 | } | ||
313 | continue; | 271 | continue; |
314 | } | 272 | } |
315 | spin_lock(&device->io_lock); | 273 | spin_lock(&device->io_lock); |
@@ -322,22 +280,6 @@ loop_lock: | |||
322 | } | 280 | } |
323 | } | 281 | } |
324 | 282 | ||
325 | if (num_sync_run) { | ||
326 | num_sync_run = 0; | ||
327 | blk_run_backing_dev(bdi, NULL); | ||
328 | } | ||
329 | /* | ||
330 | * IO has already been through a long path to get here. Checksumming, | ||
331 | * async helper threads, perhaps compression. We've done a pretty | ||
332 | * good job of collecting a batch of IO and should just unplug | ||
333 | * the device right away. | ||
334 | * | ||
335 | * This will help anyone who is waiting on the IO, they might have | ||
336 | * already unplugged, but managed to do so before the bio they | ||
337 | * cared about found its way down here. | ||
338 | */ | ||
339 | blk_run_backing_dev(bdi, NULL); | ||
340 | |||
341 | cond_resched(); | 283 | cond_resched(); |
342 | if (again) | 284 | if (again) |
343 | goto loop; | 285 | goto loop; |
@@ -348,6 +290,7 @@ loop_lock: | |||
348 | spin_unlock(&device->io_lock); | 290 | spin_unlock(&device->io_lock); |
349 | 291 | ||
350 | done: | 292 | done: |
293 | blk_finish_plug(&plug); | ||
351 | return 0; | 294 | return 0; |
352 | } | 295 | } |
353 | 296 | ||
@@ -398,7 +341,6 @@ static noinline int device_list_add(const char *path, | |||
398 | device->work.func = pending_bios_fn; | 341 | device->work.func = pending_bios_fn; |
399 | memcpy(device->uuid, disk_super->dev_item.uuid, | 342 | memcpy(device->uuid, disk_super->dev_item.uuid, |
400 | BTRFS_UUID_SIZE); | 343 | BTRFS_UUID_SIZE); |
401 | device->barriers = 1; | ||
402 | spin_lock_init(&device->io_lock); | 344 | spin_lock_init(&device->io_lock); |
403 | device->name = kstrdup(path, GFP_NOFS); | 345 | device->name = kstrdup(path, GFP_NOFS); |
404 | if (!device->name) { | 346 | if (!device->name) { |
@@ -408,17 +350,21 @@ static noinline int device_list_add(const char *path, | |||
408 | INIT_LIST_HEAD(&device->dev_alloc_list); | 350 | INIT_LIST_HEAD(&device->dev_alloc_list); |
409 | 351 | ||
410 | mutex_lock(&fs_devices->device_list_mutex); | 352 | mutex_lock(&fs_devices->device_list_mutex); |
411 | list_add(&device->dev_list, &fs_devices->devices); | 353 | list_add_rcu(&device->dev_list, &fs_devices->devices); |
412 | mutex_unlock(&fs_devices->device_list_mutex); | 354 | mutex_unlock(&fs_devices->device_list_mutex); |
413 | 355 | ||
414 | device->fs_devices = fs_devices; | 356 | device->fs_devices = fs_devices; |
415 | fs_devices->num_devices++; | 357 | fs_devices->num_devices++; |
416 | } else if (strcmp(device->name, path)) { | 358 | } else if (!device->name || strcmp(device->name, path)) { |
417 | name = kstrdup(path, GFP_NOFS); | 359 | name = kstrdup(path, GFP_NOFS); |
418 | if (!name) | 360 | if (!name) |
419 | return -ENOMEM; | 361 | return -ENOMEM; |
420 | kfree(device->name); | 362 | kfree(device->name); |
421 | device->name = name; | 363 | device->name = name; |
364 | if (device->missing) { | ||
365 | fs_devices->missing_devices--; | ||
366 | device->missing = 0; | ||
367 | } | ||
422 | } | 368 | } |
423 | 369 | ||
424 | if (found_transid > fs_devices->latest_trans) { | 370 | if (found_transid > fs_devices->latest_trans) { |
@@ -447,7 +393,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
447 | fs_devices->latest_trans = orig->latest_trans; | 393 | fs_devices->latest_trans = orig->latest_trans; |
448 | memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); | 394 | memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); |
449 | 395 | ||
450 | mutex_lock(&orig->device_list_mutex); | 396 | /* We have held the volume lock, it is safe to get the devices. */ |
451 | list_for_each_entry(orig_dev, &orig->devices, dev_list) { | 397 | list_for_each_entry(orig_dev, &orig->devices, dev_list) { |
452 | device = kzalloc(sizeof(*device), GFP_NOFS); | 398 | device = kzalloc(sizeof(*device), GFP_NOFS); |
453 | if (!device) | 399 | if (!device) |
@@ -462,7 +408,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
462 | device->devid = orig_dev->devid; | 408 | device->devid = orig_dev->devid; |
463 | device->work.func = pending_bios_fn; | 409 | device->work.func = pending_bios_fn; |
464 | memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); | 410 | memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); |
465 | device->barriers = 1; | ||
466 | spin_lock_init(&device->io_lock); | 411 | spin_lock_init(&device->io_lock); |
467 | INIT_LIST_HEAD(&device->dev_list); | 412 | INIT_LIST_HEAD(&device->dev_list); |
468 | INIT_LIST_HEAD(&device->dev_alloc_list); | 413 | INIT_LIST_HEAD(&device->dev_alloc_list); |
@@ -471,10 +416,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
471 | device->fs_devices = fs_devices; | 416 | device->fs_devices = fs_devices; |
472 | fs_devices->num_devices++; | 417 | fs_devices->num_devices++; |
473 | } | 418 | } |
474 | mutex_unlock(&orig->device_list_mutex); | ||
475 | return fs_devices; | 419 | return fs_devices; |
476 | error: | 420 | error: |
477 | mutex_unlock(&orig->device_list_mutex); | ||
478 | free_fs_devices(fs_devices); | 421 | free_fs_devices(fs_devices); |
479 | return ERR_PTR(-ENOMEM); | 422 | return ERR_PTR(-ENOMEM); |
480 | } | 423 | } |
@@ -485,13 +428,13 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) | |||
485 | 428 | ||
486 | mutex_lock(&uuid_mutex); | 429 | mutex_lock(&uuid_mutex); |
487 | again: | 430 | again: |
488 | mutex_lock(&fs_devices->device_list_mutex); | 431 | /* This is the initialized path, it is safe to release the devices. */ |
489 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 432 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
490 | if (device->in_fs_metadata) | 433 | if (device->in_fs_metadata) |
491 | continue; | 434 | continue; |
492 | 435 | ||
493 | if (device->bdev) { | 436 | if (device->bdev) { |
494 | close_bdev_exclusive(device->bdev, device->mode); | 437 | blkdev_put(device->bdev, device->mode); |
495 | device->bdev = NULL; | 438 | device->bdev = NULL; |
496 | fs_devices->open_devices--; | 439 | fs_devices->open_devices--; |
497 | } | 440 | } |
@@ -505,7 +448,6 @@ again: | |||
505 | kfree(device->name); | 448 | kfree(device->name); |
506 | kfree(device); | 449 | kfree(device); |
507 | } | 450 | } |
508 | mutex_unlock(&fs_devices->device_list_mutex); | ||
509 | 451 | ||
510 | if (fs_devices->seed) { | 452 | if (fs_devices->seed) { |
511 | fs_devices = fs_devices->seed; | 453 | fs_devices = fs_devices->seed; |
@@ -516,6 +458,29 @@ again: | |||
516 | return 0; | 458 | return 0; |
517 | } | 459 | } |
518 | 460 | ||
461 | static void __free_device(struct work_struct *work) | ||
462 | { | ||
463 | struct btrfs_device *device; | ||
464 | |||
465 | device = container_of(work, struct btrfs_device, rcu_work); | ||
466 | |||
467 | if (device->bdev) | ||
468 | blkdev_put(device->bdev, device->mode); | ||
469 | |||
470 | kfree(device->name); | ||
471 | kfree(device); | ||
472 | } | ||
473 | |||
474 | static void free_device(struct rcu_head *head) | ||
475 | { | ||
476 | struct btrfs_device *device; | ||
477 | |||
478 | device = container_of(head, struct btrfs_device, rcu); | ||
479 | |||
480 | INIT_WORK(&device->rcu_work, __free_device); | ||
481 | schedule_work(&device->rcu_work); | ||
482 | } | ||
483 | |||
519 | static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | 484 | static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) |
520 | { | 485 | { |
521 | struct btrfs_device *device; | 486 | struct btrfs_device *device; |
@@ -523,20 +488,32 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
523 | if (--fs_devices->opened > 0) | 488 | if (--fs_devices->opened > 0) |
524 | return 0; | 489 | return 0; |
525 | 490 | ||
491 | mutex_lock(&fs_devices->device_list_mutex); | ||
526 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | 492 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
527 | if (device->bdev) { | 493 | struct btrfs_device *new_device; |
528 | close_bdev_exclusive(device->bdev, device->mode); | 494 | |
495 | if (device->bdev) | ||
529 | fs_devices->open_devices--; | 496 | fs_devices->open_devices--; |
530 | } | 497 | |
531 | if (device->writeable) { | 498 | if (device->writeable) { |
532 | list_del_init(&device->dev_alloc_list); | 499 | list_del_init(&device->dev_alloc_list); |
533 | fs_devices->rw_devices--; | 500 | fs_devices->rw_devices--; |
534 | } | 501 | } |
535 | 502 | ||
536 | device->bdev = NULL; | 503 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); |
537 | device->writeable = 0; | 504 | BUG_ON(!new_device); |
538 | device->in_fs_metadata = 0; | 505 | memcpy(new_device, device, sizeof(*new_device)); |
506 | new_device->name = kstrdup(device->name, GFP_NOFS); | ||
507 | BUG_ON(device->name && !new_device->name); | ||
508 | new_device->bdev = NULL; | ||
509 | new_device->writeable = 0; | ||
510 | new_device->in_fs_metadata = 0; | ||
511 | list_replace_rcu(&device->dev_list, &new_device->dev_list); | ||
512 | |||
513 | call_rcu(&device->rcu, free_device); | ||
539 | } | 514 | } |
515 | mutex_unlock(&fs_devices->device_list_mutex); | ||
516 | |||
540 | WARN_ON(fs_devices->open_devices); | 517 | WARN_ON(fs_devices->open_devices); |
541 | WARN_ON(fs_devices->rw_devices); | 518 | WARN_ON(fs_devices->rw_devices); |
542 | fs_devices->opened = 0; | 519 | fs_devices->opened = 0; |
@@ -582,13 +559,15 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
582 | int seeding = 1; | 559 | int seeding = 1; |
583 | int ret = 0; | 560 | int ret = 0; |
584 | 561 | ||
562 | flags |= FMODE_EXCL; | ||
563 | |||
585 | list_for_each_entry(device, head, dev_list) { | 564 | list_for_each_entry(device, head, dev_list) { |
586 | if (device->bdev) | 565 | if (device->bdev) |
587 | continue; | 566 | continue; |
588 | if (!device->name) | 567 | if (!device->name) |
589 | continue; | 568 | continue; |
590 | 569 | ||
591 | bdev = open_bdev_exclusive(device->name, flags, holder); | 570 | bdev = blkdev_get_by_path(device->name, flags, holder); |
592 | if (IS_ERR(bdev)) { | 571 | if (IS_ERR(bdev)) { |
593 | printk(KERN_INFO "open %s failed\n", device->name); | 572 | printk(KERN_INFO "open %s failed\n", device->name); |
594 | goto error; | 573 | goto error; |
@@ -596,8 +575,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
596 | set_blocksize(bdev, 4096); | 575 | set_blocksize(bdev, 4096); |
597 | 576 | ||
598 | bh = btrfs_read_dev_super(bdev); | 577 | bh = btrfs_read_dev_super(bdev); |
599 | if (!bh) | 578 | if (!bh) { |
579 | ret = -EINVAL; | ||
600 | goto error_close; | 580 | goto error_close; |
581 | } | ||
601 | 582 | ||
602 | disk_super = (struct btrfs_super_block *)bh->b_data; | 583 | disk_super = (struct btrfs_super_block *)bh->b_data; |
603 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 584 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
@@ -635,12 +616,13 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
635 | list_add(&device->dev_alloc_list, | 616 | list_add(&device->dev_alloc_list, |
636 | &fs_devices->alloc_list); | 617 | &fs_devices->alloc_list); |
637 | } | 618 | } |
619 | brelse(bh); | ||
638 | continue; | 620 | continue; |
639 | 621 | ||
640 | error_brelse: | 622 | error_brelse: |
641 | brelse(bh); | 623 | brelse(bh); |
642 | error_close: | 624 | error_close: |
643 | close_bdev_exclusive(bdev, FMODE_READ); | 625 | blkdev_put(bdev, flags); |
644 | error: | 626 | error: |
645 | continue; | 627 | continue; |
646 | } | 628 | } |
@@ -686,7 +668,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
686 | 668 | ||
687 | mutex_lock(&uuid_mutex); | 669 | mutex_lock(&uuid_mutex); |
688 | 670 | ||
689 | bdev = open_bdev_exclusive(path, flags, holder); | 671 | flags |= FMODE_EXCL; |
672 | bdev = blkdev_get_by_path(path, flags, holder); | ||
690 | 673 | ||
691 | if (IS_ERR(bdev)) { | 674 | if (IS_ERR(bdev)) { |
692 | ret = PTR_ERR(bdev); | 675 | ret = PTR_ERR(bdev); |
@@ -698,7 +681,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
698 | goto error_close; | 681 | goto error_close; |
699 | bh = btrfs_read_dev_super(bdev); | 682 | bh = btrfs_read_dev_super(bdev); |
700 | if (!bh) { | 683 | if (!bh) { |
701 | ret = -EIO; | 684 | ret = -EINVAL; |
702 | goto error_close; | 685 | goto error_close; |
703 | } | 686 | } |
704 | disk_super = (struct btrfs_super_block *)bh->b_data; | 687 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -706,77 +689,178 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
706 | transid = btrfs_super_generation(disk_super); | 689 | transid = btrfs_super_generation(disk_super); |
707 | if (disk_super->label[0]) | 690 | if (disk_super->label[0]) |
708 | printk(KERN_INFO "device label %s ", disk_super->label); | 691 | printk(KERN_INFO "device label %s ", disk_super->label); |
709 | else { | 692 | else |
710 | /* FIXME, make a readl uuid parser */ | 693 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); |
711 | printk(KERN_INFO "device fsid %llx-%llx ", | ||
712 | *(unsigned long long *)disk_super->fsid, | ||
713 | *(unsigned long long *)(disk_super->fsid + 8)); | ||
714 | } | ||
715 | printk(KERN_CONT "devid %llu transid %llu %s\n", | 694 | printk(KERN_CONT "devid %llu transid %llu %s\n", |
716 | (unsigned long long)devid, (unsigned long long)transid, path); | 695 | (unsigned long long)devid, (unsigned long long)transid, path); |
717 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 696 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
718 | 697 | ||
719 | brelse(bh); | 698 | brelse(bh); |
720 | error_close: | 699 | error_close: |
721 | close_bdev_exclusive(bdev, flags); | 700 | blkdev_put(bdev, flags); |
722 | error: | 701 | error: |
723 | mutex_unlock(&uuid_mutex); | 702 | mutex_unlock(&uuid_mutex); |
724 | return ret; | 703 | return ret; |
725 | } | 704 | } |
726 | 705 | ||
706 | /* helper to account the used device space in the range */ | ||
707 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
708 | u64 end, u64 *length) | ||
709 | { | ||
710 | struct btrfs_key key; | ||
711 | struct btrfs_root *root = device->dev_root; | ||
712 | struct btrfs_dev_extent *dev_extent; | ||
713 | struct btrfs_path *path; | ||
714 | u64 extent_end; | ||
715 | int ret; | ||
716 | int slot; | ||
717 | struct extent_buffer *l; | ||
718 | |||
719 | *length = 0; | ||
720 | |||
721 | if (start >= device->total_bytes) | ||
722 | return 0; | ||
723 | |||
724 | path = btrfs_alloc_path(); | ||
725 | if (!path) | ||
726 | return -ENOMEM; | ||
727 | path->reada = 2; | ||
728 | |||
729 | key.objectid = device->devid; | ||
730 | key.offset = start; | ||
731 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
732 | |||
733 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
734 | if (ret < 0) | ||
735 | goto out; | ||
736 | if (ret > 0) { | ||
737 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | ||
738 | if (ret < 0) | ||
739 | goto out; | ||
740 | } | ||
741 | |||
742 | while (1) { | ||
743 | l = path->nodes[0]; | ||
744 | slot = path->slots[0]; | ||
745 | if (slot >= btrfs_header_nritems(l)) { | ||
746 | ret = btrfs_next_leaf(root, path); | ||
747 | if (ret == 0) | ||
748 | continue; | ||
749 | if (ret < 0) | ||
750 | goto out; | ||
751 | |||
752 | break; | ||
753 | } | ||
754 | btrfs_item_key_to_cpu(l, &key, slot); | ||
755 | |||
756 | if (key.objectid < device->devid) | ||
757 | goto next; | ||
758 | |||
759 | if (key.objectid > device->devid) | ||
760 | break; | ||
761 | |||
762 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
763 | goto next; | ||
764 | |||
765 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
766 | extent_end = key.offset + btrfs_dev_extent_length(l, | ||
767 | dev_extent); | ||
768 | if (key.offset <= start && extent_end > end) { | ||
769 | *length = end - start + 1; | ||
770 | break; | ||
771 | } else if (key.offset <= start && extent_end > start) | ||
772 | *length += extent_end - start; | ||
773 | else if (key.offset > start && extent_end <= end) | ||
774 | *length += extent_end - key.offset; | ||
775 | else if (key.offset > start && key.offset <= end) { | ||
776 | *length += end - key.offset + 1; | ||
777 | break; | ||
778 | } else if (key.offset > end) | ||
779 | break; | ||
780 | |||
781 | next: | ||
782 | path->slots[0]++; | ||
783 | } | ||
784 | ret = 0; | ||
785 | out: | ||
786 | btrfs_free_path(path); | ||
787 | return ret; | ||
788 | } | ||
789 | |||
727 | /* | 790 | /* |
791 | * find_free_dev_extent - find free space in the specified device | ||
792 | * @trans: transaction handler | ||
793 | * @device: the device which we search the free space in | ||
794 | * @num_bytes: the size of the free space that we need | ||
795 | * @start: store the start of the free space. | ||
796 | * @len: the size of the free space. that we find, or the size of the max | ||
797 | * free space if we don't find suitable free space | ||
798 | * | ||
728 | * this uses a pretty simple search, the expectation is that it is | 799 | * this uses a pretty simple search, the expectation is that it is |
729 | * called very infrequently and that a given device has a small number | 800 | * called very infrequently and that a given device has a small number |
730 | * of extents | 801 | * of extents |
802 | * | ||
803 | * @start is used to store the start of the free space if we find. But if we | ||
804 | * don't find suitable free space, it will be used to store the start position | ||
805 | * of the max free space. | ||
806 | * | ||
807 | * @len is used to store the size of the free space that we find. | ||
808 | * But if we don't find suitable free space, it is used to store the size of | ||
809 | * the max free space. | ||
731 | */ | 810 | */ |
732 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 811 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
733 | struct btrfs_device *device, u64 num_bytes, | 812 | struct btrfs_device *device, u64 num_bytes, |
734 | u64 *start, u64 *max_avail) | 813 | u64 *start, u64 *len) |
735 | { | 814 | { |
736 | struct btrfs_key key; | 815 | struct btrfs_key key; |
737 | struct btrfs_root *root = device->dev_root; | 816 | struct btrfs_root *root = device->dev_root; |
738 | struct btrfs_dev_extent *dev_extent = NULL; | 817 | struct btrfs_dev_extent *dev_extent; |
739 | struct btrfs_path *path; | 818 | struct btrfs_path *path; |
740 | u64 hole_size = 0; | 819 | u64 hole_size; |
741 | u64 last_byte = 0; | 820 | u64 max_hole_start; |
742 | u64 search_start = 0; | 821 | u64 max_hole_size; |
822 | u64 extent_end; | ||
823 | u64 search_start; | ||
743 | u64 search_end = device->total_bytes; | 824 | u64 search_end = device->total_bytes; |
744 | int ret; | 825 | int ret; |
745 | int slot = 0; | 826 | int slot; |
746 | int start_found; | ||
747 | struct extent_buffer *l; | 827 | struct extent_buffer *l; |
748 | 828 | ||
749 | path = btrfs_alloc_path(); | ||
750 | if (!path) | ||
751 | return -ENOMEM; | ||
752 | path->reada = 2; | ||
753 | start_found = 0; | ||
754 | |||
755 | /* FIXME use last free of some kind */ | 829 | /* FIXME use last free of some kind */ |
756 | 830 | ||
757 | /* we don't want to overwrite the superblock on the drive, | 831 | /* we don't want to overwrite the superblock on the drive, |
758 | * so we make sure to start at an offset of at least 1MB | 832 | * so we make sure to start at an offset of at least 1MB |
759 | */ | 833 | */ |
760 | search_start = max((u64)1024 * 1024, search_start); | 834 | search_start = max(root->fs_info->alloc_start, 1024ull * 1024); |
835 | |||
836 | max_hole_start = search_start; | ||
837 | max_hole_size = 0; | ||
761 | 838 | ||
762 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | 839 | if (search_start >= search_end) { |
763 | search_start = max(root->fs_info->alloc_start, search_start); | 840 | ret = -ENOSPC; |
841 | goto error; | ||
842 | } | ||
843 | |||
844 | path = btrfs_alloc_path(); | ||
845 | if (!path) { | ||
846 | ret = -ENOMEM; | ||
847 | goto error; | ||
848 | } | ||
849 | path->reada = 2; | ||
764 | 850 | ||
765 | key.objectid = device->devid; | 851 | key.objectid = device->devid; |
766 | key.offset = search_start; | 852 | key.offset = search_start; |
767 | key.type = BTRFS_DEV_EXTENT_KEY; | 853 | key.type = BTRFS_DEV_EXTENT_KEY; |
854 | |||
768 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 855 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
769 | if (ret < 0) | 856 | if (ret < 0) |
770 | goto error; | 857 | goto out; |
771 | if (ret > 0) { | 858 | if (ret > 0) { |
772 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | 859 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
773 | if (ret < 0) | 860 | if (ret < 0) |
774 | goto error; | 861 | goto out; |
775 | if (ret > 0) | ||
776 | start_found = 1; | ||
777 | } | 862 | } |
778 | l = path->nodes[0]; | 863 | |
779 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
780 | while (1) { | 864 | while (1) { |
781 | l = path->nodes[0]; | 865 | l = path->nodes[0]; |
782 | slot = path->slots[0]; | 866 | slot = path->slots[0]; |
@@ -785,24 +869,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
785 | if (ret == 0) | 869 | if (ret == 0) |
786 | continue; | 870 | continue; |
787 | if (ret < 0) | 871 | if (ret < 0) |
788 | goto error; | 872 | goto out; |
789 | no_more_items: | 873 | |
790 | if (!start_found) { | 874 | break; |
791 | if (search_start >= search_end) { | ||
792 | ret = -ENOSPC; | ||
793 | goto error; | ||
794 | } | ||
795 | *start = search_start; | ||
796 | start_found = 1; | ||
797 | goto check_pending; | ||
798 | } | ||
799 | *start = last_byte > search_start ? | ||
800 | last_byte : search_start; | ||
801 | if (search_end <= *start) { | ||
802 | ret = -ENOSPC; | ||
803 | goto error; | ||
804 | } | ||
805 | goto check_pending; | ||
806 | } | 875 | } |
807 | btrfs_item_key_to_cpu(l, &key, slot); | 876 | btrfs_item_key_to_cpu(l, &key, slot); |
808 | 877 | ||
@@ -810,48 +879,62 @@ no_more_items: | |||
810 | goto next; | 879 | goto next; |
811 | 880 | ||
812 | if (key.objectid > device->devid) | 881 | if (key.objectid > device->devid) |
813 | goto no_more_items; | 882 | break; |
814 | 883 | ||
815 | if (key.offset >= search_start && key.offset > last_byte && | 884 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) |
816 | start_found) { | 885 | goto next; |
817 | if (last_byte < search_start) | ||
818 | last_byte = search_start; | ||
819 | hole_size = key.offset - last_byte; | ||
820 | 886 | ||
821 | if (hole_size > *max_avail) | 887 | if (key.offset > search_start) { |
822 | *max_avail = hole_size; | 888 | hole_size = key.offset - search_start; |
889 | |||
890 | if (hole_size > max_hole_size) { | ||
891 | max_hole_start = search_start; | ||
892 | max_hole_size = hole_size; | ||
893 | } | ||
823 | 894 | ||
824 | if (key.offset > last_byte && | 895 | /* |
825 | hole_size >= num_bytes) { | 896 | * If this free space is greater than which we need, |
826 | *start = last_byte; | 897 | * it must be the max free space that we have found |
827 | goto check_pending; | 898 | * until now, so max_hole_start must point to the start |
899 | * of this free space and the length of this free space | ||
900 | * is stored in max_hole_size. Thus, we return | ||
901 | * max_hole_start and max_hole_size and go back to the | ||
902 | * caller. | ||
903 | */ | ||
904 | if (hole_size >= num_bytes) { | ||
905 | ret = 0; | ||
906 | goto out; | ||
828 | } | 907 | } |
829 | } | 908 | } |
830 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
831 | goto next; | ||
832 | 909 | ||
833 | start_found = 1; | ||
834 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 910 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
835 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | 911 | extent_end = key.offset + btrfs_dev_extent_length(l, |
912 | dev_extent); | ||
913 | if (extent_end > search_start) | ||
914 | search_start = extent_end; | ||
836 | next: | 915 | next: |
837 | path->slots[0]++; | 916 | path->slots[0]++; |
838 | cond_resched(); | 917 | cond_resched(); |
839 | } | 918 | } |
840 | check_pending: | ||
841 | /* we have to make sure we didn't find an extent that has already | ||
842 | * been allocated by the map tree or the original allocation | ||
843 | */ | ||
844 | BUG_ON(*start < search_start); | ||
845 | 919 | ||
846 | if (*start + num_bytes > search_end) { | 920 | hole_size = search_end- search_start; |
847 | ret = -ENOSPC; | 921 | if (hole_size > max_hole_size) { |
848 | goto error; | 922 | max_hole_start = search_start; |
923 | max_hole_size = hole_size; | ||
849 | } | 924 | } |
850 | /* check for pending inserts here */ | ||
851 | ret = 0; | ||
852 | 925 | ||
853 | error: | 926 | /* See above. */ |
927 | if (hole_size < num_bytes) | ||
928 | ret = -ENOSPC; | ||
929 | else | ||
930 | ret = 0; | ||
931 | |||
932 | out: | ||
854 | btrfs_free_path(path); | 933 | btrfs_free_path(path); |
934 | error: | ||
935 | *start = max_hole_start; | ||
936 | if (len) | ||
937 | *len = max_hole_size; | ||
855 | return ret; | 938 | return ret; |
856 | } | 939 | } |
857 | 940 | ||
@@ -879,14 +962,14 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | |||
879 | if (ret > 0) { | 962 | if (ret > 0) { |
880 | ret = btrfs_previous_item(root, path, key.objectid, | 963 | ret = btrfs_previous_item(root, path, key.objectid, |
881 | BTRFS_DEV_EXTENT_KEY); | 964 | BTRFS_DEV_EXTENT_KEY); |
882 | BUG_ON(ret); | 965 | if (ret) |
966 | goto out; | ||
883 | leaf = path->nodes[0]; | 967 | leaf = path->nodes[0]; |
884 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 968 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
885 | extent = btrfs_item_ptr(leaf, path->slots[0], | 969 | extent = btrfs_item_ptr(leaf, path->slots[0], |
886 | struct btrfs_dev_extent); | 970 | struct btrfs_dev_extent); |
887 | BUG_ON(found_key.offset > start || found_key.offset + | 971 | BUG_ON(found_key.offset > start || found_key.offset + |
888 | btrfs_dev_extent_length(leaf, extent) < start); | 972 | btrfs_dev_extent_length(leaf, extent) < start); |
889 | ret = 0; | ||
890 | } else if (ret == 0) { | 973 | } else if (ret == 0) { |
891 | leaf = path->nodes[0]; | 974 | leaf = path->nodes[0]; |
892 | extent = btrfs_item_ptr(leaf, path->slots[0], | 975 | extent = btrfs_item_ptr(leaf, path->slots[0], |
@@ -897,8 +980,8 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | |||
897 | if (device->bytes_used > 0) | 980 | if (device->bytes_used > 0) |
898 | device->bytes_used -= btrfs_dev_extent_length(leaf, extent); | 981 | device->bytes_used -= btrfs_dev_extent_length(leaf, extent); |
899 | ret = btrfs_del_item(trans, root, path); | 982 | ret = btrfs_del_item(trans, root, path); |
900 | BUG_ON(ret); | ||
901 | 983 | ||
984 | out: | ||
902 | btrfs_free_path(path); | 985 | btrfs_free_path(path); |
903 | return ret; | 986 | return ret; |
904 | } | 987 | } |
@@ -1098,6 +1181,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1098 | return -ENOMEM; | 1181 | return -ENOMEM; |
1099 | 1182 | ||
1100 | trans = btrfs_start_transaction(root, 0); | 1183 | trans = btrfs_start_transaction(root, 0); |
1184 | if (IS_ERR(trans)) { | ||
1185 | btrfs_free_path(path); | ||
1186 | return PTR_ERR(trans); | ||
1187 | } | ||
1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1188 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1102 | key.type = BTRFS_DEV_ITEM_KEY; | 1189 | key.type = BTRFS_DEV_ITEM_KEY; |
1103 | key.offset = device->devid; | 1190 | key.offset = device->devid; |
@@ -1129,11 +1216,13 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1129 | struct block_device *bdev; | 1216 | struct block_device *bdev; |
1130 | struct buffer_head *bh = NULL; | 1217 | struct buffer_head *bh = NULL; |
1131 | struct btrfs_super_block *disk_super; | 1218 | struct btrfs_super_block *disk_super; |
1219 | struct btrfs_fs_devices *cur_devices; | ||
1132 | u64 all_avail; | 1220 | u64 all_avail; |
1133 | u64 devid; | 1221 | u64 devid; |
1134 | u64 num_devices; | 1222 | u64 num_devices; |
1135 | u8 *dev_uuid; | 1223 | u8 *dev_uuid; |
1136 | int ret = 0; | 1224 | int ret = 0; |
1225 | bool clear_super = false; | ||
1137 | 1226 | ||
1138 | mutex_lock(&uuid_mutex); | 1227 | mutex_lock(&uuid_mutex); |
1139 | mutex_lock(&root->fs_info->volume_mutex); | 1228 | mutex_lock(&root->fs_info->volume_mutex); |
@@ -1164,14 +1253,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1164 | 1253 | ||
1165 | device = NULL; | 1254 | device = NULL; |
1166 | devices = &root->fs_info->fs_devices->devices; | 1255 | devices = &root->fs_info->fs_devices->devices; |
1167 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 1256 | /* |
1257 | * It is safe to read the devices since the volume_mutex | ||
1258 | * is held. | ||
1259 | */ | ||
1168 | list_for_each_entry(tmp, devices, dev_list) { | 1260 | list_for_each_entry(tmp, devices, dev_list) { |
1169 | if (tmp->in_fs_metadata && !tmp->bdev) { | 1261 | if (tmp->in_fs_metadata && !tmp->bdev) { |
1170 | device = tmp; | 1262 | device = tmp; |
1171 | break; | 1263 | break; |
1172 | } | 1264 | } |
1173 | } | 1265 | } |
1174 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1175 | bdev = NULL; | 1266 | bdev = NULL; |
1176 | bh = NULL; | 1267 | bh = NULL; |
1177 | disk_super = NULL; | 1268 | disk_super = NULL; |
@@ -1181,8 +1272,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1181 | goto out; | 1272 | goto out; |
1182 | } | 1273 | } |
1183 | } else { | 1274 | } else { |
1184 | bdev = open_bdev_exclusive(device_path, FMODE_READ, | 1275 | bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, |
1185 | root->fs_info->bdev_holder); | 1276 | root->fs_info->bdev_holder); |
1186 | if (IS_ERR(bdev)) { | 1277 | if (IS_ERR(bdev)) { |
1187 | ret = PTR_ERR(bdev); | 1278 | ret = PTR_ERR(bdev); |
1188 | goto out; | 1279 | goto out; |
@@ -1191,7 +1282,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1191 | set_blocksize(bdev, 4096); | 1282 | set_blocksize(bdev, 4096); |
1192 | bh = btrfs_read_dev_super(bdev); | 1283 | bh = btrfs_read_dev_super(bdev); |
1193 | if (!bh) { | 1284 | if (!bh) { |
1194 | ret = -EIO; | 1285 | ret = -EINVAL; |
1195 | goto error_close; | 1286 | goto error_close; |
1196 | } | 1287 | } |
1197 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1288 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -1213,31 +1304,39 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1213 | } | 1304 | } |
1214 | 1305 | ||
1215 | if (device->writeable) { | 1306 | if (device->writeable) { |
1307 | lock_chunks(root); | ||
1216 | list_del_init(&device->dev_alloc_list); | 1308 | list_del_init(&device->dev_alloc_list); |
1309 | unlock_chunks(root); | ||
1217 | root->fs_info->fs_devices->rw_devices--; | 1310 | root->fs_info->fs_devices->rw_devices--; |
1311 | clear_super = true; | ||
1218 | } | 1312 | } |
1219 | 1313 | ||
1220 | ret = btrfs_shrink_device(device, 0); | 1314 | ret = btrfs_shrink_device(device, 0); |
1221 | if (ret) | 1315 | if (ret) |
1222 | goto error_brelse; | 1316 | goto error_undo; |
1223 | 1317 | ||
1224 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); | 1318 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); |
1225 | if (ret) | 1319 | if (ret) |
1226 | goto error_brelse; | 1320 | goto error_undo; |
1227 | 1321 | ||
1228 | device->in_fs_metadata = 0; | 1322 | device->in_fs_metadata = 0; |
1323 | btrfs_scrub_cancel_dev(root, device); | ||
1229 | 1324 | ||
1230 | /* | 1325 | /* |
1231 | * the device list mutex makes sure that we don't change | 1326 | * the device list mutex makes sure that we don't change |
1232 | * the device list while someone else is writing out all | 1327 | * the device list while someone else is writing out all |
1233 | * the device supers. | 1328 | * the device supers. |
1234 | */ | 1329 | */ |
1330 | |||
1331 | cur_devices = device->fs_devices; | ||
1235 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 1332 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
1236 | list_del_init(&device->dev_list); | 1333 | list_del_rcu(&device->dev_list); |
1237 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1238 | 1334 | ||
1239 | device->fs_devices->num_devices--; | 1335 | device->fs_devices->num_devices--; |
1240 | 1336 | ||
1337 | if (device->missing) | ||
1338 | root->fs_info->fs_devices->missing_devices--; | ||
1339 | |||
1241 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1340 | next_device = list_entry(root->fs_info->fs_devices->devices.next, |
1242 | struct btrfs_device, dev_list); | 1341 | struct btrfs_device, dev_list); |
1243 | if (device->bdev == root->fs_info->sb->s_bdev) | 1342 | if (device->bdev == root->fs_info->sb->s_bdev) |
@@ -1245,34 +1344,36 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1245 | if (device->bdev == root->fs_info->fs_devices->latest_bdev) | 1344 | if (device->bdev == root->fs_info->fs_devices->latest_bdev) |
1246 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; | 1345 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; |
1247 | 1346 | ||
1248 | if (device->bdev) { | 1347 | if (device->bdev) |
1249 | close_bdev_exclusive(device->bdev, device->mode); | ||
1250 | device->bdev = NULL; | ||
1251 | device->fs_devices->open_devices--; | 1348 | device->fs_devices->open_devices--; |
1252 | } | 1349 | |
1350 | call_rcu(&device->rcu, free_device); | ||
1351 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1253 | 1352 | ||
1254 | num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | 1353 | num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; |
1255 | btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); | 1354 | btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); |
1256 | 1355 | ||
1257 | if (device->fs_devices->open_devices == 0) { | 1356 | if (cur_devices->open_devices == 0) { |
1258 | struct btrfs_fs_devices *fs_devices; | 1357 | struct btrfs_fs_devices *fs_devices; |
1259 | fs_devices = root->fs_info->fs_devices; | 1358 | fs_devices = root->fs_info->fs_devices; |
1260 | while (fs_devices) { | 1359 | while (fs_devices) { |
1261 | if (fs_devices->seed == device->fs_devices) | 1360 | if (fs_devices->seed == cur_devices) |
1262 | break; | 1361 | break; |
1263 | fs_devices = fs_devices->seed; | 1362 | fs_devices = fs_devices->seed; |
1264 | } | 1363 | } |
1265 | fs_devices->seed = device->fs_devices->seed; | 1364 | fs_devices->seed = cur_devices->seed; |
1266 | device->fs_devices->seed = NULL; | 1365 | cur_devices->seed = NULL; |
1267 | __btrfs_close_devices(device->fs_devices); | 1366 | lock_chunks(root); |
1268 | free_fs_devices(device->fs_devices); | 1367 | __btrfs_close_devices(cur_devices); |
1368 | unlock_chunks(root); | ||
1369 | free_fs_devices(cur_devices); | ||
1269 | } | 1370 | } |
1270 | 1371 | ||
1271 | /* | 1372 | /* |
1272 | * at this point, the device is zero sized. We want to | 1373 | * at this point, the device is zero sized. We want to |
1273 | * remove it from the devices list and zero out the old super | 1374 | * remove it from the devices list and zero out the old super |
1274 | */ | 1375 | */ |
1275 | if (device->writeable) { | 1376 | if (clear_super) { |
1276 | /* make sure this device isn't detected as part of | 1377 | /* make sure this device isn't detected as part of |
1277 | * the FS anymore | 1378 | * the FS anymore |
1278 | */ | 1379 | */ |
@@ -1281,19 +1382,26 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1281 | sync_dirty_buffer(bh); | 1382 | sync_dirty_buffer(bh); |
1282 | } | 1383 | } |
1283 | 1384 | ||
1284 | kfree(device->name); | ||
1285 | kfree(device); | ||
1286 | ret = 0; | 1385 | ret = 0; |
1287 | 1386 | ||
1288 | error_brelse: | 1387 | error_brelse: |
1289 | brelse(bh); | 1388 | brelse(bh); |
1290 | error_close: | 1389 | error_close: |
1291 | if (bdev) | 1390 | if (bdev) |
1292 | close_bdev_exclusive(bdev, FMODE_READ); | 1391 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
1293 | out: | 1392 | out: |
1294 | mutex_unlock(&root->fs_info->volume_mutex); | 1393 | mutex_unlock(&root->fs_info->volume_mutex); |
1295 | mutex_unlock(&uuid_mutex); | 1394 | mutex_unlock(&uuid_mutex); |
1296 | return ret; | 1395 | return ret; |
1396 | error_undo: | ||
1397 | if (device->writeable) { | ||
1398 | lock_chunks(root); | ||
1399 | list_add(&device->dev_alloc_list, | ||
1400 | &root->fs_info->fs_devices->alloc_list); | ||
1401 | unlock_chunks(root); | ||
1402 | root->fs_info->fs_devices->rw_devices++; | ||
1403 | } | ||
1404 | goto error_brelse; | ||
1297 | } | 1405 | } |
1298 | 1406 | ||
1299 | /* | 1407 | /* |
@@ -1330,7 +1438,12 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, | |||
1330 | INIT_LIST_HEAD(&seed_devices->devices); | 1438 | INIT_LIST_HEAD(&seed_devices->devices); |
1331 | INIT_LIST_HEAD(&seed_devices->alloc_list); | 1439 | INIT_LIST_HEAD(&seed_devices->alloc_list); |
1332 | mutex_init(&seed_devices->device_list_mutex); | 1440 | mutex_init(&seed_devices->device_list_mutex); |
1333 | list_splice_init(&fs_devices->devices, &seed_devices->devices); | 1441 | |
1442 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1443 | list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, | ||
1444 | synchronize_rcu); | ||
1445 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1446 | |||
1334 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); | 1447 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); |
1335 | list_for_each_entry(device, &seed_devices->devices, dev_list) { | 1448 | list_for_each_entry(device, &seed_devices->devices, dev_list) { |
1336 | device->fs_devices = seed_devices; | 1449 | device->fs_devices = seed_devices; |
@@ -1391,7 +1504,7 @@ next_slot: | |||
1391 | goto error; | 1504 | goto error; |
1392 | leaf = path->nodes[0]; | 1505 | leaf = path->nodes[0]; |
1393 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 1506 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
1394 | btrfs_release_path(root, path); | 1507 | btrfs_release_path(path); |
1395 | continue; | 1508 | continue; |
1396 | } | 1509 | } |
1397 | 1510 | ||
@@ -1441,7 +1554,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1441 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) | 1554 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) |
1442 | return -EINVAL; | 1555 | return -EINVAL; |
1443 | 1556 | ||
1444 | bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); | 1557 | bdev = blkdev_get_by_path(device_path, FMODE_EXCL, |
1558 | root->fs_info->bdev_holder); | ||
1445 | if (IS_ERR(bdev)) | 1559 | if (IS_ERR(bdev)) |
1446 | return PTR_ERR(bdev); | 1560 | return PTR_ERR(bdev); |
1447 | 1561 | ||
@@ -1482,14 +1596,21 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1482 | 1596 | ||
1483 | ret = find_next_devid(root, &device->devid); | 1597 | ret = find_next_devid(root, &device->devid); |
1484 | if (ret) { | 1598 | if (ret) { |
1599 | kfree(device->name); | ||
1485 | kfree(device); | 1600 | kfree(device); |
1486 | goto error; | 1601 | goto error; |
1487 | } | 1602 | } |
1488 | 1603 | ||
1489 | trans = btrfs_start_transaction(root, 0); | 1604 | trans = btrfs_start_transaction(root, 0); |
1605 | if (IS_ERR(trans)) { | ||
1606 | kfree(device->name); | ||
1607 | kfree(device); | ||
1608 | ret = PTR_ERR(trans); | ||
1609 | goto error; | ||
1610 | } | ||
1611 | |||
1490 | lock_chunks(root); | 1612 | lock_chunks(root); |
1491 | 1613 | ||
1492 | device->barriers = 1; | ||
1493 | device->writeable = 1; | 1614 | device->writeable = 1; |
1494 | device->work.func = pending_bios_fn; | 1615 | device->work.func = pending_bios_fn; |
1495 | generate_random_uuid(device->uuid); | 1616 | generate_random_uuid(device->uuid); |
@@ -1503,7 +1624,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1503 | device->dev_root = root->fs_info->dev_root; | 1624 | device->dev_root = root->fs_info->dev_root; |
1504 | device->bdev = bdev; | 1625 | device->bdev = bdev; |
1505 | device->in_fs_metadata = 1; | 1626 | device->in_fs_metadata = 1; |
1506 | device->mode = 0; | 1627 | device->mode = FMODE_EXCL; |
1507 | set_blocksize(device->bdev, 4096); | 1628 | set_blocksize(device->bdev, 4096); |
1508 | 1629 | ||
1509 | if (seeding_dev) { | 1630 | if (seeding_dev) { |
@@ -1519,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1519 | * half setup | 1640 | * half setup |
1520 | */ | 1641 | */ |
1521 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 1642 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
1522 | list_add(&device->dev_list, &root->fs_info->fs_devices->devices); | 1643 | list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); |
1523 | list_add(&device->dev_alloc_list, | 1644 | list_add(&device->dev_alloc_list, |
1524 | &root->fs_info->fs_devices->alloc_list); | 1645 | &root->fs_info->fs_devices->alloc_list); |
1525 | root->fs_info->fs_devices->num_devices++; | 1646 | root->fs_info->fs_devices->num_devices++; |
@@ -1568,7 +1689,7 @@ out: | |||
1568 | mutex_unlock(&root->fs_info->volume_mutex); | 1689 | mutex_unlock(&root->fs_info->volume_mutex); |
1569 | return ret; | 1690 | return ret; |
1570 | error: | 1691 | error: |
1571 | close_bdev_exclusive(bdev, 0); | 1692 | blkdev_put(bdev, FMODE_EXCL); |
1572 | if (seeding_dev) { | 1693 | if (seeding_dev) { |
1573 | mutex_unlock(&uuid_mutex); | 1694 | mutex_unlock(&uuid_mutex); |
1574 | up_write(&sb->s_umount); | 1695 | up_write(&sb->s_umount); |
@@ -1677,10 +1798,9 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | |||
1677 | BUG_ON(ret); | 1798 | BUG_ON(ret); |
1678 | 1799 | ||
1679 | ret = btrfs_del_item(trans, root, path); | 1800 | ret = btrfs_del_item(trans, root, path); |
1680 | BUG_ON(ret); | ||
1681 | 1801 | ||
1682 | btrfs_free_path(path); | 1802 | btrfs_free_path(path); |
1683 | return 0; | 1803 | return ret; |
1684 | } | 1804 | } |
1685 | 1805 | ||
1686 | static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | 1806 | static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 |
@@ -1755,7 +1875,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1755 | return ret; | 1875 | return ret; |
1756 | 1876 | ||
1757 | trans = btrfs_start_transaction(root, 0); | 1877 | trans = btrfs_start_transaction(root, 0); |
1758 | BUG_ON(!trans); | 1878 | BUG_ON(IS_ERR(trans)); |
1759 | 1879 | ||
1760 | lock_chunks(root); | 1880 | lock_chunks(root); |
1761 | 1881 | ||
@@ -1786,6 +1906,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1786 | 1906 | ||
1787 | BUG_ON(ret); | 1907 | BUG_ON(ret); |
1788 | 1908 | ||
1909 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); | ||
1910 | |||
1789 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 1911 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
1790 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | 1912 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); |
1791 | BUG_ON(ret); | 1913 | BUG_ON(ret); |
@@ -1853,7 +1975,7 @@ again: | |||
1853 | chunk = btrfs_item_ptr(leaf, path->slots[0], | 1975 | chunk = btrfs_item_ptr(leaf, path->slots[0], |
1854 | struct btrfs_chunk); | 1976 | struct btrfs_chunk); |
1855 | chunk_type = btrfs_chunk_type(leaf, chunk); | 1977 | chunk_type = btrfs_chunk_type(leaf, chunk); |
1856 | btrfs_release_path(chunk_root, path); | 1978 | btrfs_release_path(path); |
1857 | 1979 | ||
1858 | if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { | 1980 | if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { |
1859 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, | 1981 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, |
@@ -1901,7 +2023,6 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1901 | u64 size_to_free; | 2023 | u64 size_to_free; |
1902 | struct btrfs_path *path; | 2024 | struct btrfs_path *path; |
1903 | struct btrfs_key key; | 2025 | struct btrfs_key key; |
1904 | struct btrfs_chunk *chunk; | ||
1905 | struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; | 2026 | struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; |
1906 | struct btrfs_trans_handle *trans; | 2027 | struct btrfs_trans_handle *trans; |
1907 | struct btrfs_key found_key; | 2028 | struct btrfs_key found_key; |
@@ -1909,6 +2030,9 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1909 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 2030 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) |
1910 | return -EROFS; | 2031 | return -EROFS; |
1911 | 2032 | ||
2033 | if (!capable(CAP_SYS_ADMIN)) | ||
2034 | return -EPERM; | ||
2035 | |||
1912 | mutex_lock(&dev_root->fs_info->volume_mutex); | 2036 | mutex_lock(&dev_root->fs_info->volume_mutex); |
1913 | dev_root = dev_root->fs_info->dev_root; | 2037 | dev_root = dev_root->fs_info->dev_root; |
1914 | 2038 | ||
@@ -1927,7 +2051,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1927 | BUG_ON(ret); | 2051 | BUG_ON(ret); |
1928 | 2052 | ||
1929 | trans = btrfs_start_transaction(dev_root, 0); | 2053 | trans = btrfs_start_transaction(dev_root, 0); |
1930 | BUG_ON(!trans); | 2054 | BUG_ON(IS_ERR(trans)); |
1931 | 2055 | ||
1932 | ret = btrfs_grow_device(trans, device, old_size); | 2056 | ret = btrfs_grow_device(trans, device, old_size); |
1933 | BUG_ON(ret); | 2057 | BUG_ON(ret); |
@@ -1965,19 +2089,17 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1965 | if (found_key.objectid != key.objectid) | 2089 | if (found_key.objectid != key.objectid) |
1966 | break; | 2090 | break; |
1967 | 2091 | ||
1968 | chunk = btrfs_item_ptr(path->nodes[0], | ||
1969 | path->slots[0], | ||
1970 | struct btrfs_chunk); | ||
1971 | /* chunk zero is special */ | 2092 | /* chunk zero is special */ |
1972 | if (found_key.offset == 0) | 2093 | if (found_key.offset == 0) |
1973 | break; | 2094 | break; |
1974 | 2095 | ||
1975 | btrfs_release_path(chunk_root, path); | 2096 | btrfs_release_path(path); |
1976 | ret = btrfs_relocate_chunk(chunk_root, | 2097 | ret = btrfs_relocate_chunk(chunk_root, |
1977 | chunk_root->root_key.objectid, | 2098 | chunk_root->root_key.objectid, |
1978 | found_key.objectid, | 2099 | found_key.objectid, |
1979 | found_key.offset); | 2100 | found_key.offset); |
1980 | BUG_ON(ret && ret != -ENOSPC); | 2101 | if (ret && ret != -ENOSPC) |
2102 | goto error; | ||
1981 | key.offset = found_key.offset - 1; | 2103 | key.offset = found_key.offset - 1; |
1982 | } | 2104 | } |
1983 | ret = 0; | 2105 | ret = 0; |
@@ -2044,7 +2166,7 @@ again: | |||
2044 | goto done; | 2166 | goto done; |
2045 | if (ret) { | 2167 | if (ret) { |
2046 | ret = 0; | 2168 | ret = 0; |
2047 | btrfs_release_path(root, path); | 2169 | btrfs_release_path(path); |
2048 | break; | 2170 | break; |
2049 | } | 2171 | } |
2050 | 2172 | ||
@@ -2053,7 +2175,7 @@ again: | |||
2053 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 2175 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
2054 | 2176 | ||
2055 | if (key.objectid != device->devid) { | 2177 | if (key.objectid != device->devid) { |
2056 | btrfs_release_path(root, path); | 2178 | btrfs_release_path(path); |
2057 | break; | 2179 | break; |
2058 | } | 2180 | } |
2059 | 2181 | ||
@@ -2061,14 +2183,14 @@ again: | |||
2061 | length = btrfs_dev_extent_length(l, dev_extent); | 2183 | length = btrfs_dev_extent_length(l, dev_extent); |
2062 | 2184 | ||
2063 | if (key.offset + length <= new_size) { | 2185 | if (key.offset + length <= new_size) { |
2064 | btrfs_release_path(root, path); | 2186 | btrfs_release_path(path); |
2065 | break; | 2187 | break; |
2066 | } | 2188 | } |
2067 | 2189 | ||
2068 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 2190 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
2069 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 2191 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
2070 | chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); | 2192 | chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); |
2071 | btrfs_release_path(root, path); | 2193 | btrfs_release_path(path); |
2072 | 2194 | ||
2073 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, | 2195 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, |
2074 | chunk_offset); | 2196 | chunk_offset); |
@@ -2096,6 +2218,11 @@ again: | |||
2096 | 2218 | ||
2097 | /* Shrinking succeeded, else we would be at "done". */ | 2219 | /* Shrinking succeeded, else we would be at "done". */ |
2098 | trans = btrfs_start_transaction(root, 0); | 2220 | trans = btrfs_start_transaction(root, 0); |
2221 | if (IS_ERR(trans)) { | ||
2222 | ret = PTR_ERR(trans); | ||
2223 | goto done; | ||
2224 | } | ||
2225 | |||
2099 | lock_chunks(root); | 2226 | lock_chunks(root); |
2100 | 2227 | ||
2101 | device->disk_total_bytes = new_size; | 2228 | device->disk_total_bytes = new_size; |
@@ -2139,211 +2266,243 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, | |||
2139 | return 0; | 2266 | return 0; |
2140 | } | 2267 | } |
2141 | 2268 | ||
2142 | static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | 2269 | /* |
2143 | int num_stripes, int sub_stripes) | 2270 | * sort the devices in descending order by max_avail, total_avail |
2271 | */ | ||
2272 | static int btrfs_cmp_device_info(const void *a, const void *b) | ||
2144 | { | 2273 | { |
2145 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) | 2274 | const struct btrfs_device_info *di_a = a; |
2146 | return calc_size; | 2275 | const struct btrfs_device_info *di_b = b; |
2147 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | 2276 | |
2148 | return calc_size * (num_stripes / sub_stripes); | 2277 | if (di_a->max_avail > di_b->max_avail) |
2149 | else | 2278 | return -1; |
2150 | return calc_size * num_stripes; | 2279 | if (di_a->max_avail < di_b->max_avail) |
2280 | return 1; | ||
2281 | if (di_a->total_avail > di_b->total_avail) | ||
2282 | return -1; | ||
2283 | if (di_a->total_avail < di_b->total_avail) | ||
2284 | return 1; | ||
2285 | return 0; | ||
2151 | } | 2286 | } |
2152 | 2287 | ||
2153 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2288 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
2154 | struct btrfs_root *extent_root, | 2289 | struct btrfs_root *extent_root, |
2155 | struct map_lookup **map_ret, | 2290 | struct map_lookup **map_ret, |
2156 | u64 *num_bytes, u64 *stripe_size, | 2291 | u64 *num_bytes_out, u64 *stripe_size_out, |
2157 | u64 start, u64 type) | 2292 | u64 start, u64 type) |
2158 | { | 2293 | { |
2159 | struct btrfs_fs_info *info = extent_root->fs_info; | 2294 | struct btrfs_fs_info *info = extent_root->fs_info; |
2160 | struct btrfs_device *device = NULL; | ||
2161 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 2295 | struct btrfs_fs_devices *fs_devices = info->fs_devices; |
2162 | struct list_head *cur; | 2296 | struct list_head *cur; |
2163 | struct map_lookup *map = NULL; | 2297 | struct map_lookup *map = NULL; |
2164 | struct extent_map_tree *em_tree; | 2298 | struct extent_map_tree *em_tree; |
2165 | struct extent_map *em; | 2299 | struct extent_map *em; |
2166 | struct list_head private_devs; | 2300 | struct btrfs_device_info *devices_info = NULL; |
2167 | int min_stripe_size = 1 * 1024 * 1024; | 2301 | u64 total_avail; |
2168 | u64 calc_size = 1024 * 1024 * 1024; | 2302 | int num_stripes; /* total number of stripes to allocate */ |
2169 | u64 max_chunk_size = calc_size; | 2303 | int sub_stripes; /* sub_stripes info for map */ |
2170 | u64 min_free; | 2304 | int dev_stripes; /* stripes per dev */ |
2171 | u64 avail; | 2305 | int devs_max; /* max devs to use */ |
2172 | u64 max_avail = 0; | 2306 | int devs_min; /* min devs needed */ |
2173 | u64 dev_offset; | 2307 | int devs_increment; /* ndevs has to be a multiple of this */ |
2174 | int num_stripes = 1; | 2308 | int ncopies; /* how many copies to data has */ |
2175 | int min_stripes = 1; | ||
2176 | int sub_stripes = 0; | ||
2177 | int looped = 0; | ||
2178 | int ret; | 2309 | int ret; |
2179 | int index; | 2310 | u64 max_stripe_size; |
2180 | int stripe_len = 64 * 1024; | 2311 | u64 max_chunk_size; |
2312 | u64 stripe_size; | ||
2313 | u64 num_bytes; | ||
2314 | int ndevs; | ||
2315 | int i; | ||
2316 | int j; | ||
2181 | 2317 | ||
2182 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2318 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && |
2183 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2319 | (type & BTRFS_BLOCK_GROUP_DUP)) { |
2184 | WARN_ON(1); | 2320 | WARN_ON(1); |
2185 | type &= ~BTRFS_BLOCK_GROUP_DUP; | 2321 | type &= ~BTRFS_BLOCK_GROUP_DUP; |
2186 | } | 2322 | } |
2323 | |||
2187 | if (list_empty(&fs_devices->alloc_list)) | 2324 | if (list_empty(&fs_devices->alloc_list)) |
2188 | return -ENOSPC; | 2325 | return -ENOSPC; |
2189 | 2326 | ||
2190 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2327 | sub_stripes = 1; |
2191 | num_stripes = fs_devices->rw_devices; | 2328 | dev_stripes = 1; |
2192 | min_stripes = 2; | 2329 | devs_increment = 1; |
2193 | } | 2330 | ncopies = 1; |
2331 | devs_max = 0; /* 0 == as many as possible */ | ||
2332 | devs_min = 1; | ||
2333 | |||
2334 | /* | ||
2335 | * define the properties of each RAID type. | ||
2336 | * FIXME: move this to a global table and use it in all RAID | ||
2337 | * calculation code | ||
2338 | */ | ||
2194 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 2339 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { |
2195 | num_stripes = 2; | 2340 | dev_stripes = 2; |
2196 | min_stripes = 2; | 2341 | ncopies = 2; |
2197 | } | 2342 | devs_max = 1; |
2198 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2343 | } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) { |
2199 | if (fs_devices->rw_devices < 2) | 2344 | devs_min = 2; |
2200 | return -ENOSPC; | 2345 | } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
2201 | num_stripes = 2; | 2346 | devs_increment = 2; |
2202 | min_stripes = 2; | 2347 | ncopies = 2; |
2203 | } | 2348 | devs_max = 2; |
2204 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2349 | devs_min = 2; |
2205 | num_stripes = fs_devices->rw_devices; | 2350 | } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
2206 | if (num_stripes < 4) | ||
2207 | return -ENOSPC; | ||
2208 | num_stripes &= ~(u32)1; | ||
2209 | sub_stripes = 2; | 2351 | sub_stripes = 2; |
2210 | min_stripes = 4; | 2352 | devs_increment = 2; |
2353 | ncopies = 2; | ||
2354 | devs_min = 4; | ||
2355 | } else { | ||
2356 | devs_max = 1; | ||
2211 | } | 2357 | } |
2212 | 2358 | ||
2213 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2359 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
2214 | max_chunk_size = 10 * calc_size; | 2360 | max_stripe_size = 1024 * 1024 * 1024; |
2215 | min_stripe_size = 64 * 1024 * 1024; | 2361 | max_chunk_size = 10 * max_stripe_size; |
2216 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { | 2362 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { |
2217 | max_chunk_size = 256 * 1024 * 1024; | 2363 | max_stripe_size = 256 * 1024 * 1024; |
2218 | min_stripe_size = 32 * 1024 * 1024; | 2364 | max_chunk_size = max_stripe_size; |
2219 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | 2365 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { |
2220 | calc_size = 8 * 1024 * 1024; | 2366 | max_stripe_size = 8 * 1024 * 1024; |
2221 | max_chunk_size = calc_size * 2; | 2367 | max_chunk_size = 2 * max_stripe_size; |
2222 | min_stripe_size = 1 * 1024 * 1024; | 2368 | } else { |
2369 | printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", | ||
2370 | type); | ||
2371 | BUG_ON(1); | ||
2223 | } | 2372 | } |
2224 | 2373 | ||
2225 | /* we don't want a chunk larger than 10% of writeable space */ | 2374 | /* we don't want a chunk larger than 10% of writeable space */ |
2226 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2375 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
2227 | max_chunk_size); | 2376 | max_chunk_size); |
2228 | 2377 | ||
2229 | again: | 2378 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, |
2230 | max_avail = 0; | 2379 | GFP_NOFS); |
2231 | if (!map || map->num_stripes != num_stripes) { | 2380 | if (!devices_info) |
2232 | kfree(map); | 2381 | return -ENOMEM; |
2233 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2234 | if (!map) | ||
2235 | return -ENOMEM; | ||
2236 | map->num_stripes = num_stripes; | ||
2237 | } | ||
2238 | |||
2239 | if (calc_size * num_stripes > max_chunk_size) { | ||
2240 | calc_size = max_chunk_size; | ||
2241 | do_div(calc_size, num_stripes); | ||
2242 | do_div(calc_size, stripe_len); | ||
2243 | calc_size *= stripe_len; | ||
2244 | } | ||
2245 | 2382 | ||
2246 | /* we don't want tiny stripes */ | 2383 | cur = fs_devices->alloc_list.next; |
2247 | if (!looped) | ||
2248 | calc_size = max_t(u64, min_stripe_size, calc_size); | ||
2249 | 2384 | ||
2250 | /* | 2385 | /* |
2251 | * we're about to do_div by the stripe_len so lets make sure | 2386 | * in the first pass through the devices list, we gather information |
2252 | * we end up with something bigger than a stripe | 2387 | * about the available holes on each device. |
2253 | */ | 2388 | */ |
2254 | calc_size = max_t(u64, calc_size, stripe_len * 4); | 2389 | ndevs = 0; |
2390 | while (cur != &fs_devices->alloc_list) { | ||
2391 | struct btrfs_device *device; | ||
2392 | u64 max_avail; | ||
2393 | u64 dev_offset; | ||
2255 | 2394 | ||
2256 | do_div(calc_size, stripe_len); | 2395 | device = list_entry(cur, struct btrfs_device, dev_alloc_list); |
2257 | calc_size *= stripe_len; | ||
2258 | 2396 | ||
2259 | cur = fs_devices->alloc_list.next; | 2397 | cur = cur->next; |
2260 | index = 0; | ||
2261 | 2398 | ||
2262 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2399 | if (!device->writeable) { |
2263 | min_free = calc_size * 2; | 2400 | printk(KERN_ERR |
2264 | else | 2401 | "btrfs: read-only device in alloc_list\n"); |
2265 | min_free = calc_size; | 2402 | WARN_ON(1); |
2403 | continue; | ||
2404 | } | ||
2266 | 2405 | ||
2267 | /* | 2406 | if (!device->in_fs_metadata) |
2268 | * we add 1MB because we never use the first 1MB of the device, unless | 2407 | continue; |
2269 | * we've looped, then we are likely allocating the maximum amount of | ||
2270 | * space left already | ||
2271 | */ | ||
2272 | if (!looped) | ||
2273 | min_free += 1024 * 1024; | ||
2274 | 2408 | ||
2275 | INIT_LIST_HEAD(&private_devs); | ||
2276 | while (index < num_stripes) { | ||
2277 | device = list_entry(cur, struct btrfs_device, dev_alloc_list); | ||
2278 | BUG_ON(!device->writeable); | ||
2279 | if (device->total_bytes > device->bytes_used) | 2409 | if (device->total_bytes > device->bytes_used) |
2280 | avail = device->total_bytes - device->bytes_used; | 2410 | total_avail = device->total_bytes - device->bytes_used; |
2281 | else | 2411 | else |
2282 | avail = 0; | 2412 | total_avail = 0; |
2283 | cur = cur->next; | 2413 | /* avail is off by max(alloc_start, 1MB), but that is the same |
2414 | * for all devices, so it doesn't hurt the sorting later on | ||
2415 | */ | ||
2284 | 2416 | ||
2285 | if (device->in_fs_metadata && avail >= min_free) { | 2417 | ret = find_free_dev_extent(trans, device, |
2286 | ret = find_free_dev_extent(trans, device, | 2418 | max_stripe_size * dev_stripes, |
2287 | min_free, &dev_offset, | 2419 | &dev_offset, &max_avail); |
2288 | &max_avail); | 2420 | if (ret && ret != -ENOSPC) |
2289 | if (ret == 0) { | 2421 | goto error; |
2290 | list_move_tail(&device->dev_alloc_list, | 2422 | |
2291 | &private_devs); | 2423 | if (ret == 0) |
2292 | map->stripes[index].dev = device; | 2424 | max_avail = max_stripe_size * dev_stripes; |
2293 | map->stripes[index].physical = dev_offset; | 2425 | |
2294 | index++; | 2426 | if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) |
2295 | if (type & BTRFS_BLOCK_GROUP_DUP) { | 2427 | continue; |
2296 | map->stripes[index].dev = device; | 2428 | |
2297 | map->stripes[index].physical = | 2429 | devices_info[ndevs].dev_offset = dev_offset; |
2298 | dev_offset + calc_size; | 2430 | devices_info[ndevs].max_avail = max_avail; |
2299 | index++; | 2431 | devices_info[ndevs].total_avail = total_avail; |
2300 | } | 2432 | devices_info[ndevs].dev = device; |
2301 | } | 2433 | ++ndevs; |
2302 | } else if (device->in_fs_metadata && avail > max_avail) | ||
2303 | max_avail = avail; | ||
2304 | if (cur == &fs_devices->alloc_list) | ||
2305 | break; | ||
2306 | } | 2434 | } |
2307 | list_splice(&private_devs, &fs_devices->alloc_list); | 2435 | |
2308 | if (index < num_stripes) { | 2436 | /* |
2309 | if (index >= min_stripes) { | 2437 | * now sort the devices by hole size / available space |
2310 | num_stripes = index; | 2438 | */ |
2311 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2439 | sort(devices_info, ndevs, sizeof(struct btrfs_device_info), |
2312 | num_stripes /= sub_stripes; | 2440 | btrfs_cmp_device_info, NULL); |
2313 | num_stripes *= sub_stripes; | 2441 | |
2314 | } | 2442 | /* round down to number of usable stripes */ |
2315 | looped = 1; | 2443 | ndevs -= ndevs % devs_increment; |
2316 | goto again; | 2444 | |
2317 | } | 2445 | if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) { |
2318 | if (!looped && max_avail > 0) { | 2446 | ret = -ENOSPC; |
2319 | looped = 1; | 2447 | goto error; |
2320 | calc_size = max_avail; | 2448 | } |
2321 | goto again; | 2449 | |
2450 | if (devs_max && ndevs > devs_max) | ||
2451 | ndevs = devs_max; | ||
2452 | /* | ||
2453 | * the primary goal is to maximize the number of stripes, so use as many | ||
2454 | * devices as possible, even if the stripes are not maximum sized. | ||
2455 | */ | ||
2456 | stripe_size = devices_info[ndevs-1].max_avail; | ||
2457 | num_stripes = ndevs * dev_stripes; | ||
2458 | |||
2459 | if (stripe_size * num_stripes > max_chunk_size * ncopies) { | ||
2460 | stripe_size = max_chunk_size * ncopies; | ||
2461 | do_div(stripe_size, num_stripes); | ||
2462 | } | ||
2463 | |||
2464 | do_div(stripe_size, dev_stripes); | ||
2465 | do_div(stripe_size, BTRFS_STRIPE_LEN); | ||
2466 | stripe_size *= BTRFS_STRIPE_LEN; | ||
2467 | |||
2468 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2469 | if (!map) { | ||
2470 | ret = -ENOMEM; | ||
2471 | goto error; | ||
2472 | } | ||
2473 | map->num_stripes = num_stripes; | ||
2474 | |||
2475 | for (i = 0; i < ndevs; ++i) { | ||
2476 | for (j = 0; j < dev_stripes; ++j) { | ||
2477 | int s = i * dev_stripes + j; | ||
2478 | map->stripes[s].dev = devices_info[i].dev; | ||
2479 | map->stripes[s].physical = devices_info[i].dev_offset + | ||
2480 | j * stripe_size; | ||
2322 | } | 2481 | } |
2323 | kfree(map); | ||
2324 | return -ENOSPC; | ||
2325 | } | 2482 | } |
2326 | map->sector_size = extent_root->sectorsize; | 2483 | map->sector_size = extent_root->sectorsize; |
2327 | map->stripe_len = stripe_len; | 2484 | map->stripe_len = BTRFS_STRIPE_LEN; |
2328 | map->io_align = stripe_len; | 2485 | map->io_align = BTRFS_STRIPE_LEN; |
2329 | map->io_width = stripe_len; | 2486 | map->io_width = BTRFS_STRIPE_LEN; |
2330 | map->type = type; | 2487 | map->type = type; |
2331 | map->num_stripes = num_stripes; | ||
2332 | map->sub_stripes = sub_stripes; | 2488 | map->sub_stripes = sub_stripes; |
2333 | 2489 | ||
2334 | *map_ret = map; | 2490 | *map_ret = map; |
2335 | *stripe_size = calc_size; | 2491 | num_bytes = stripe_size * (num_stripes / ncopies); |
2336 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2492 | |
2337 | num_stripes, sub_stripes); | 2493 | *stripe_size_out = stripe_size; |
2494 | *num_bytes_out = num_bytes; | ||
2338 | 2495 | ||
2339 | em = alloc_extent_map(GFP_NOFS); | 2496 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); |
2497 | |||
2498 | em = alloc_extent_map(); | ||
2340 | if (!em) { | 2499 | if (!em) { |
2341 | kfree(map); | 2500 | ret = -ENOMEM; |
2342 | return -ENOMEM; | 2501 | goto error; |
2343 | } | 2502 | } |
2344 | em->bdev = (struct block_device *)map; | 2503 | em->bdev = (struct block_device *)map; |
2345 | em->start = start; | 2504 | em->start = start; |
2346 | em->len = *num_bytes; | 2505 | em->len = num_bytes; |
2347 | em->block_start = 0; | 2506 | em->block_start = 0; |
2348 | em->block_len = em->len; | 2507 | em->block_len = em->len; |
2349 | 2508 | ||
@@ -2356,23 +2515,30 @@ again: | |||
2356 | 2515 | ||
2357 | ret = btrfs_make_block_group(trans, extent_root, 0, type, | 2516 | ret = btrfs_make_block_group(trans, extent_root, 0, type, |
2358 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 2517 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
2359 | start, *num_bytes); | 2518 | start, num_bytes); |
2360 | BUG_ON(ret); | 2519 | BUG_ON(ret); |
2361 | 2520 | ||
2362 | index = 0; | 2521 | for (i = 0; i < map->num_stripes; ++i) { |
2363 | while (index < map->num_stripes) { | 2522 | struct btrfs_device *device; |
2364 | device = map->stripes[index].dev; | 2523 | u64 dev_offset; |
2365 | dev_offset = map->stripes[index].physical; | 2524 | |
2525 | device = map->stripes[i].dev; | ||
2526 | dev_offset = map->stripes[i].physical; | ||
2366 | 2527 | ||
2367 | ret = btrfs_alloc_dev_extent(trans, device, | 2528 | ret = btrfs_alloc_dev_extent(trans, device, |
2368 | info->chunk_root->root_key.objectid, | 2529 | info->chunk_root->root_key.objectid, |
2369 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 2530 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
2370 | start, dev_offset, calc_size); | 2531 | start, dev_offset, stripe_size); |
2371 | BUG_ON(ret); | 2532 | BUG_ON(ret); |
2372 | index++; | ||
2373 | } | 2533 | } |
2374 | 2534 | ||
2535 | kfree(devices_info); | ||
2375 | return 0; | 2536 | return 0; |
2537 | |||
2538 | error: | ||
2539 | kfree(map); | ||
2540 | kfree(devices_info); | ||
2541 | return ret; | ||
2376 | } | 2542 | } |
2377 | 2543 | ||
2378 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 2544 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, |
@@ -2438,6 +2604,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2438 | item_size); | 2604 | item_size); |
2439 | BUG_ON(ret); | 2605 | BUG_ON(ret); |
2440 | } | 2606 | } |
2607 | |||
2441 | kfree(chunk); | 2608 | kfree(chunk); |
2442 | return 0; | 2609 | return 0; |
2443 | } | 2610 | } |
@@ -2569,7 +2736,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
2569 | 2736 | ||
2570 | void btrfs_mapping_init(struct btrfs_mapping_tree *tree) | 2737 | void btrfs_mapping_init(struct btrfs_mapping_tree *tree) |
2571 | { | 2738 | { |
2572 | extent_map_tree_init(&tree->map_tree, GFP_NOFS); | 2739 | extent_map_tree_init(&tree->map_tree); |
2573 | } | 2740 | } |
2574 | 2741 | ||
2575 | void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | 2742 | void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) |
@@ -2635,14 +2802,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num, | |||
2635 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 2802 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
2636 | u64 logical, u64 *length, | 2803 | u64 logical, u64 *length, |
2637 | struct btrfs_multi_bio **multi_ret, | 2804 | struct btrfs_multi_bio **multi_ret, |
2638 | int mirror_num, struct page *unplug_page) | 2805 | int mirror_num) |
2639 | { | 2806 | { |
2640 | struct extent_map *em; | 2807 | struct extent_map *em; |
2641 | struct map_lookup *map; | 2808 | struct map_lookup *map; |
2642 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2809 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
2643 | u64 offset; | 2810 | u64 offset; |
2644 | u64 stripe_offset; | 2811 | u64 stripe_offset; |
2812 | u64 stripe_end_offset; | ||
2645 | u64 stripe_nr; | 2813 | u64 stripe_nr; |
2814 | u64 stripe_nr_orig; | ||
2815 | u64 stripe_nr_end; | ||
2646 | int stripes_allocated = 8; | 2816 | int stripes_allocated = 8; |
2647 | int stripes_required = 1; | 2817 | int stripes_required = 1; |
2648 | int stripe_index; | 2818 | int stripe_index; |
@@ -2651,7 +2821,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2651 | int max_errors = 0; | 2821 | int max_errors = 0; |
2652 | struct btrfs_multi_bio *multi = NULL; | 2822 | struct btrfs_multi_bio *multi = NULL; |
2653 | 2823 | ||
2654 | if (multi_ret && !(rw & REQ_WRITE)) | 2824 | if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) |
2655 | stripes_allocated = 1; | 2825 | stripes_allocated = 1; |
2656 | again: | 2826 | again: |
2657 | if (multi_ret) { | 2827 | if (multi_ret) { |
@@ -2667,11 +2837,6 @@ again: | |||
2667 | em = lookup_extent_mapping(em_tree, logical, *length); | 2837 | em = lookup_extent_mapping(em_tree, logical, *length); |
2668 | read_unlock(&em_tree->lock); | 2838 | read_unlock(&em_tree->lock); |
2669 | 2839 | ||
2670 | if (!em && unplug_page) { | ||
2671 | kfree(multi); | ||
2672 | return 0; | ||
2673 | } | ||
2674 | |||
2675 | if (!em) { | 2840 | if (!em) { |
2676 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", | 2841 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", |
2677 | (unsigned long long)logical, | 2842 | (unsigned long long)logical, |
@@ -2697,7 +2862,15 @@ again: | |||
2697 | max_errors = 1; | 2862 | max_errors = 1; |
2698 | } | 2863 | } |
2699 | } | 2864 | } |
2700 | if (multi_ret && (rw & REQ_WRITE) && | 2865 | if (rw & REQ_DISCARD) { |
2866 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | | ||
2867 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2868 | BTRFS_BLOCK_GROUP_DUP | | ||
2869 | BTRFS_BLOCK_GROUP_RAID10)) { | ||
2870 | stripes_required = map->num_stripes; | ||
2871 | } | ||
2872 | } | ||
2873 | if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
2701 | stripes_allocated < stripes_required) { | 2874 | stripes_allocated < stripes_required) { |
2702 | stripes_allocated = map->num_stripes; | 2875 | stripes_allocated = map->num_stripes; |
2703 | free_extent_map(em); | 2876 | free_extent_map(em); |
@@ -2717,23 +2890,37 @@ again: | |||
2717 | /* stripe_offset is the offset of this block in its stripe*/ | 2890 | /* stripe_offset is the offset of this block in its stripe*/ |
2718 | stripe_offset = offset - stripe_offset; | 2891 | stripe_offset = offset - stripe_offset; |
2719 | 2892 | ||
2720 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | 2893 | if (rw & REQ_DISCARD) |
2721 | BTRFS_BLOCK_GROUP_RAID10 | | 2894 | *length = min_t(u64, em->len - offset, *length); |
2722 | BTRFS_BLOCK_GROUP_DUP)) { | 2895 | else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | |
2896 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2897 | BTRFS_BLOCK_GROUP_RAID10 | | ||
2898 | BTRFS_BLOCK_GROUP_DUP)) { | ||
2723 | /* we limit the length of each bio to what fits in a stripe */ | 2899 | /* we limit the length of each bio to what fits in a stripe */ |
2724 | *length = min_t(u64, em->len - offset, | 2900 | *length = min_t(u64, em->len - offset, |
2725 | map->stripe_len - stripe_offset); | 2901 | map->stripe_len - stripe_offset); |
2726 | } else { | 2902 | } else { |
2727 | *length = em->len - offset; | 2903 | *length = em->len - offset; |
2728 | } | 2904 | } |
2729 | 2905 | ||
2730 | if (!multi_ret && !unplug_page) | 2906 | if (!multi_ret) |
2731 | goto out; | 2907 | goto out; |
2732 | 2908 | ||
2733 | num_stripes = 1; | 2909 | num_stripes = 1; |
2734 | stripe_index = 0; | 2910 | stripe_index = 0; |
2735 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 2911 | stripe_nr_orig = stripe_nr; |
2736 | if (unplug_page || (rw & REQ_WRITE)) | 2912 | stripe_nr_end = (offset + *length + map->stripe_len - 1) & |
2913 | (~(map->stripe_len - 1)); | ||
2914 | do_div(stripe_nr_end, map->stripe_len); | ||
2915 | stripe_end_offset = stripe_nr_end * map->stripe_len - | ||
2916 | (offset + *length); | ||
2917 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
2918 | if (rw & REQ_DISCARD) | ||
2919 | num_stripes = min_t(u64, map->num_stripes, | ||
2920 | stripe_nr_end - stripe_nr_orig); | ||
2921 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
2922 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | ||
2923 | if (rw & (REQ_WRITE | REQ_DISCARD)) | ||
2737 | num_stripes = map->num_stripes; | 2924 | num_stripes = map->num_stripes; |
2738 | else if (mirror_num) | 2925 | else if (mirror_num) |
2739 | stripe_index = mirror_num - 1; | 2926 | stripe_index = mirror_num - 1; |
@@ -2744,7 +2931,7 @@ again: | |||
2744 | } | 2931 | } |
2745 | 2932 | ||
2746 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 2933 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
2747 | if (rw & REQ_WRITE) | 2934 | if (rw & (REQ_WRITE | REQ_DISCARD)) |
2748 | num_stripes = map->num_stripes; | 2935 | num_stripes = map->num_stripes; |
2749 | else if (mirror_num) | 2936 | else if (mirror_num) |
2750 | stripe_index = mirror_num - 1; | 2937 | stripe_index = mirror_num - 1; |
@@ -2755,8 +2942,12 @@ again: | |||
2755 | stripe_index = do_div(stripe_nr, factor); | 2942 | stripe_index = do_div(stripe_nr, factor); |
2756 | stripe_index *= map->sub_stripes; | 2943 | stripe_index *= map->sub_stripes; |
2757 | 2944 | ||
2758 | if (unplug_page || (rw & REQ_WRITE)) | 2945 | if (rw & REQ_WRITE) |
2759 | num_stripes = map->sub_stripes; | 2946 | num_stripes = map->sub_stripes; |
2947 | else if (rw & REQ_DISCARD) | ||
2948 | num_stripes = min_t(u64, map->sub_stripes * | ||
2949 | (stripe_nr_end - stripe_nr_orig), | ||
2950 | map->num_stripes); | ||
2760 | else if (mirror_num) | 2951 | else if (mirror_num) |
2761 | stripe_index += mirror_num - 1; | 2952 | stripe_index += mirror_num - 1; |
2762 | else { | 2953 | else { |
@@ -2774,24 +2965,101 @@ again: | |||
2774 | } | 2965 | } |
2775 | BUG_ON(stripe_index >= map->num_stripes); | 2966 | BUG_ON(stripe_index >= map->num_stripes); |
2776 | 2967 | ||
2777 | for (i = 0; i < num_stripes; i++) { | 2968 | if (rw & REQ_DISCARD) { |
2778 | if (unplug_page) { | 2969 | for (i = 0; i < num_stripes; i++) { |
2779 | struct btrfs_device *device; | ||
2780 | struct backing_dev_info *bdi; | ||
2781 | |||
2782 | device = map->stripes[stripe_index].dev; | ||
2783 | if (device->bdev) { | ||
2784 | bdi = blk_get_backing_dev_info(device->bdev); | ||
2785 | if (bdi->unplug_io_fn) | ||
2786 | bdi->unplug_io_fn(bdi, unplug_page); | ||
2787 | } | ||
2788 | } else { | ||
2789 | multi->stripes[i].physical = | 2970 | multi->stripes[i].physical = |
2790 | map->stripes[stripe_index].physical + | 2971 | map->stripes[stripe_index].physical + |
2791 | stripe_offset + stripe_nr * map->stripe_len; | 2972 | stripe_offset + stripe_nr * map->stripe_len; |
2792 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | 2973 | multi->stripes[i].dev = map->stripes[stripe_index].dev; |
2974 | |||
2975 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
2976 | u64 stripes; | ||
2977 | u32 last_stripe = 0; | ||
2978 | int j; | ||
2979 | |||
2980 | div_u64_rem(stripe_nr_end - 1, | ||
2981 | map->num_stripes, | ||
2982 | &last_stripe); | ||
2983 | |||
2984 | for (j = 0; j < map->num_stripes; j++) { | ||
2985 | u32 test; | ||
2986 | |||
2987 | div_u64_rem(stripe_nr_end - 1 - j, | ||
2988 | map->num_stripes, &test); | ||
2989 | if (test == stripe_index) | ||
2990 | break; | ||
2991 | } | ||
2992 | stripes = stripe_nr_end - 1 - j; | ||
2993 | do_div(stripes, map->num_stripes); | ||
2994 | multi->stripes[i].length = map->stripe_len * | ||
2995 | (stripes - stripe_nr + 1); | ||
2996 | |||
2997 | if (i == 0) { | ||
2998 | multi->stripes[i].length -= | ||
2999 | stripe_offset; | ||
3000 | stripe_offset = 0; | ||
3001 | } | ||
3002 | if (stripe_index == last_stripe) | ||
3003 | multi->stripes[i].length -= | ||
3004 | stripe_end_offset; | ||
3005 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
3006 | u64 stripes; | ||
3007 | int j; | ||
3008 | int factor = map->num_stripes / | ||
3009 | map->sub_stripes; | ||
3010 | u32 last_stripe = 0; | ||
3011 | |||
3012 | div_u64_rem(stripe_nr_end - 1, | ||
3013 | factor, &last_stripe); | ||
3014 | last_stripe *= map->sub_stripes; | ||
3015 | |||
3016 | for (j = 0; j < factor; j++) { | ||
3017 | u32 test; | ||
3018 | |||
3019 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3020 | factor, &test); | ||
3021 | |||
3022 | if (test == | ||
3023 | stripe_index / map->sub_stripes) | ||
3024 | break; | ||
3025 | } | ||
3026 | stripes = stripe_nr_end - 1 - j; | ||
3027 | do_div(stripes, factor); | ||
3028 | multi->stripes[i].length = map->stripe_len * | ||
3029 | (stripes - stripe_nr + 1); | ||
3030 | |||
3031 | if (i < map->sub_stripes) { | ||
3032 | multi->stripes[i].length -= | ||
3033 | stripe_offset; | ||
3034 | if (i == map->sub_stripes - 1) | ||
3035 | stripe_offset = 0; | ||
3036 | } | ||
3037 | if (stripe_index >= last_stripe && | ||
3038 | stripe_index <= (last_stripe + | ||
3039 | map->sub_stripes - 1)) { | ||
3040 | multi->stripes[i].length -= | ||
3041 | stripe_end_offset; | ||
3042 | } | ||
3043 | } else | ||
3044 | multi->stripes[i].length = *length; | ||
3045 | |||
3046 | stripe_index++; | ||
3047 | if (stripe_index == map->num_stripes) { | ||
3048 | /* This could only happen for RAID0/10 */ | ||
3049 | stripe_index = 0; | ||
3050 | stripe_nr++; | ||
3051 | } | ||
3052 | } | ||
3053 | } else { | ||
3054 | for (i = 0; i < num_stripes; i++) { | ||
3055 | multi->stripes[i].physical = | ||
3056 | map->stripes[stripe_index].physical + | ||
3057 | stripe_offset + | ||
3058 | stripe_nr * map->stripe_len; | ||
3059 | multi->stripes[i].dev = | ||
3060 | map->stripes[stripe_index].dev; | ||
3061 | stripe_index++; | ||
2793 | } | 3062 | } |
2794 | stripe_index++; | ||
2795 | } | 3063 | } |
2796 | if (multi_ret) { | 3064 | if (multi_ret) { |
2797 | *multi_ret = multi; | 3065 | *multi_ret = multi; |
@@ -2808,7 +3076,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2808 | struct btrfs_multi_bio **multi_ret, int mirror_num) | 3076 | struct btrfs_multi_bio **multi_ret, int mirror_num) |
2809 | { | 3077 | { |
2810 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, | 3078 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, |
2811 | mirror_num, NULL); | 3079 | mirror_num); |
2812 | } | 3080 | } |
2813 | 3081 | ||
2814 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 3082 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
@@ -2876,14 +3144,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
2876 | return 0; | 3144 | return 0; |
2877 | } | 3145 | } |
2878 | 3146 | ||
2879 | int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, | ||
2880 | u64 logical, struct page *page) | ||
2881 | { | ||
2882 | u64 length = PAGE_CACHE_SIZE; | ||
2883 | return __btrfs_map_block(map_tree, READ, logical, &length, | ||
2884 | NULL, 0, page); | ||
2885 | } | ||
2886 | |||
2887 | static void end_bio_multi_stripe(struct bio *bio, int err) | 3147 | static void end_bio_multi_stripe(struct bio *bio, int err) |
2888 | { | 3148 | { |
2889 | struct btrfs_multi_bio *multi = bio->bi_private; | 3149 | struct btrfs_multi_bio *multi = bio->bi_private; |
@@ -3034,8 +3294,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
3034 | } | 3294 | } |
3035 | bio->bi_sector = multi->stripes[dev_nr].physical >> 9; | 3295 | bio->bi_sector = multi->stripes[dev_nr].physical >> 9; |
3036 | dev = multi->stripes[dev_nr].dev; | 3296 | dev = multi->stripes[dev_nr].dev; |
3037 | BUG_ON(rw == WRITE && !dev->writeable); | 3297 | if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { |
3038 | if (dev && dev->bdev) { | ||
3039 | bio->bi_bdev = dev->bdev; | 3298 | bio->bi_bdev = dev->bdev; |
3040 | if (async_submit) | 3299 | if (async_submit) |
3041 | schedule_bio(root, dev, rw, bio); | 3300 | schedule_bio(root, dev, rw, bio); |
@@ -3084,12 +3343,13 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
3084 | return NULL; | 3343 | return NULL; |
3085 | list_add(&device->dev_list, | 3344 | list_add(&device->dev_list, |
3086 | &fs_devices->devices); | 3345 | &fs_devices->devices); |
3087 | device->barriers = 1; | ||
3088 | device->dev_root = root->fs_info->dev_root; | 3346 | device->dev_root = root->fs_info->dev_root; |
3089 | device->devid = devid; | 3347 | device->devid = devid; |
3090 | device->work.func = pending_bios_fn; | 3348 | device->work.func = pending_bios_fn; |
3091 | device->fs_devices = fs_devices; | 3349 | device->fs_devices = fs_devices; |
3350 | device->missing = 1; | ||
3092 | fs_devices->num_devices++; | 3351 | fs_devices->num_devices++; |
3352 | fs_devices->missing_devices++; | ||
3093 | spin_lock_init(&device->io_lock); | 3353 | spin_lock_init(&device->io_lock); |
3094 | INIT_LIST_HEAD(&device->dev_alloc_list); | 3354 | INIT_LIST_HEAD(&device->dev_alloc_list); |
3095 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); | 3355 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); |
@@ -3126,7 +3386,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
3126 | free_extent_map(em); | 3386 | free_extent_map(em); |
3127 | } | 3387 | } |
3128 | 3388 | ||
3129 | em = alloc_extent_map(GFP_NOFS); | 3389 | em = alloc_extent_map(); |
3130 | if (!em) | 3390 | if (!em) |
3131 | return -ENOMEM; | 3391 | return -ENOMEM; |
3132 | num_stripes = btrfs_chunk_num_stripes(leaf, chunk); | 3392 | num_stripes = btrfs_chunk_num_stripes(leaf, chunk); |
@@ -3287,6 +3547,15 @@ static int read_one_dev(struct btrfs_root *root, | |||
3287 | device = add_missing_dev(root, devid, dev_uuid); | 3547 | device = add_missing_dev(root, devid, dev_uuid); |
3288 | if (!device) | 3548 | if (!device) |
3289 | return -ENOMEM; | 3549 | return -ENOMEM; |
3550 | } else if (!device->missing) { | ||
3551 | /* | ||
3552 | * this happens when a device that was properly setup | ||
3553 | * in the device info lists suddenly goes bad. | ||
3554 | * device->bdev is NULL, and so we have to set | ||
3555 | * device->missing to one here | ||
3556 | */ | ||
3557 | root->fs_info->fs_devices->missing_devices++; | ||
3558 | device->missing = 1; | ||
3290 | } | 3559 | } |
3291 | } | 3560 | } |
3292 | 3561 | ||
@@ -3306,15 +3575,6 @@ static int read_one_dev(struct btrfs_root *root, | |||
3306 | return ret; | 3575 | return ret; |
3307 | } | 3576 | } |
3308 | 3577 | ||
3309 | int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) | ||
3310 | { | ||
3311 | struct btrfs_dev_item *dev_item; | ||
3312 | |||
3313 | dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, | ||
3314 | dev_item); | ||
3315 | return read_one_dev(root, buf, dev_item); | ||
3316 | } | ||
3317 | |||
3318 | int btrfs_read_sys_array(struct btrfs_root *root) | 3578 | int btrfs_read_sys_array(struct btrfs_root *root) |
3319 | { | 3579 | { |
3320 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 3580 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; |
@@ -3431,7 +3691,7 @@ again: | |||
3431 | } | 3691 | } |
3432 | if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { | 3692 | if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { |
3433 | key.objectid = 0; | 3693 | key.objectid = 0; |
3434 | btrfs_release_path(root, path); | 3694 | btrfs_release_path(path); |
3435 | goto again; | 3695 | goto again; |
3436 | } | 3696 | } |
3437 | ret = 0; | 3697 | ret = 0; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 31b0fabdd2ea..7c12d61ae7ae 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -20,8 +20,11 @@ | |||
20 | #define __BTRFS_VOLUMES_ | 20 | #define __BTRFS_VOLUMES_ |
21 | 21 | ||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | ||
23 | #include "async-thread.h" | 24 | #include "async-thread.h" |
24 | 25 | ||
26 | #define BTRFS_STRIPE_LEN (64 * 1024) | ||
27 | |||
25 | struct buffer_head; | 28 | struct buffer_head; |
26 | struct btrfs_pending_bios { | 29 | struct btrfs_pending_bios { |
27 | struct bio *head; | 30 | struct bio *head; |
@@ -42,15 +45,15 @@ struct btrfs_device { | |||
42 | int running_pending; | 45 | int running_pending; |
43 | u64 generation; | 46 | u64 generation; |
44 | 47 | ||
45 | int barriers; | ||
46 | int writeable; | 48 | int writeable; |
47 | int in_fs_metadata; | 49 | int in_fs_metadata; |
50 | int missing; | ||
48 | 51 | ||
49 | spinlock_t io_lock; | 52 | spinlock_t io_lock; |
50 | 53 | ||
51 | struct block_device *bdev; | 54 | struct block_device *bdev; |
52 | 55 | ||
53 | /* the mode sent to open_bdev_exclusive */ | 56 | /* the mode sent to blkdev_get */ |
54 | fmode_t mode; | 57 | fmode_t mode; |
55 | 58 | ||
56 | char *name; | 59 | char *name; |
@@ -82,7 +85,12 @@ struct btrfs_device { | |||
82 | /* physical drive uuid (or lvm uuid) */ | 85 | /* physical drive uuid (or lvm uuid) */ |
83 | u8 uuid[BTRFS_UUID_SIZE]; | 86 | u8 uuid[BTRFS_UUID_SIZE]; |
84 | 87 | ||
88 | /* per-device scrub information */ | ||
89 | struct scrub_dev *scrub_device; | ||
90 | |||
85 | struct btrfs_work work; | 91 | struct btrfs_work work; |
92 | struct rcu_head rcu; | ||
93 | struct work_struct rcu_work; | ||
86 | }; | 94 | }; |
87 | 95 | ||
88 | struct btrfs_fs_devices { | 96 | struct btrfs_fs_devices { |
@@ -94,6 +102,7 @@ struct btrfs_fs_devices { | |||
94 | u64 num_devices; | 102 | u64 num_devices; |
95 | u64 open_devices; | 103 | u64 open_devices; |
96 | u64 rw_devices; | 104 | u64 rw_devices; |
105 | u64 missing_devices; | ||
97 | u64 total_rw_bytes; | 106 | u64 total_rw_bytes; |
98 | struct block_device *latest_bdev; | 107 | struct block_device *latest_bdev; |
99 | 108 | ||
@@ -122,6 +131,7 @@ struct btrfs_fs_devices { | |||
122 | struct btrfs_bio_stripe { | 131 | struct btrfs_bio_stripe { |
123 | struct btrfs_device *dev; | 132 | struct btrfs_device *dev; |
124 | u64 physical; | 133 | u64 physical; |
134 | u64 length; /* only used for discard mappings */ | ||
125 | }; | 135 | }; |
126 | 136 | ||
127 | struct btrfs_multi_bio { | 137 | struct btrfs_multi_bio { |
@@ -135,6 +145,30 @@ struct btrfs_multi_bio { | |||
135 | struct btrfs_bio_stripe stripes[]; | 145 | struct btrfs_bio_stripe stripes[]; |
136 | }; | 146 | }; |
137 | 147 | ||
148 | struct btrfs_device_info { | ||
149 | struct btrfs_device *dev; | ||
150 | u64 dev_offset; | ||
151 | u64 max_avail; | ||
152 | u64 total_avail; | ||
153 | }; | ||
154 | |||
155 | struct map_lookup { | ||
156 | u64 type; | ||
157 | int io_align; | ||
158 | int io_width; | ||
159 | int stripe_len; | ||
160 | int sector_size; | ||
161 | int num_stripes; | ||
162 | int sub_stripes; | ||
163 | struct btrfs_bio_stripe stripes[]; | ||
164 | }; | ||
165 | |||
166 | #define map_lookup_size(n) (sizeof(struct map_lookup) + \ | ||
167 | (sizeof(struct btrfs_bio_stripe) * (n))) | ||
168 | |||
169 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
170 | u64 end, u64 *length); | ||
171 | |||
138 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | 172 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ |
139 | (sizeof(struct btrfs_bio_stripe) * (n))) | 173 | (sizeof(struct btrfs_bio_stripe) * (n))) |
140 | 174 | ||
@@ -156,7 +190,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree); | |||
156 | void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); | 190 | void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); |
157 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | 191 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, |
158 | int mirror_num, int async_submit); | 192 | int mirror_num, int async_submit); |
159 | int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); | ||
160 | int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | 193 | int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, |
161 | fmode_t flags, void *holder); | 194 | fmode_t flags, void *holder); |
162 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | 195 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, |
@@ -169,8 +202,6 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, | |||
169 | int btrfs_rm_device(struct btrfs_root *root, char *device_path); | 202 | int btrfs_rm_device(struct btrfs_root *root, char *device_path); |
170 | int btrfs_cleanup_fs_uuids(void); | 203 | int btrfs_cleanup_fs_uuids(void); |
171 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); | 204 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); |
172 | int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, | ||
173 | u64 logical, struct page *page); | ||
174 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | 205 | int btrfs_grow_device(struct btrfs_trans_handle *trans, |
175 | struct btrfs_device *device, u64 new_size); | 206 | struct btrfs_device *device, u64 new_size); |
176 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | 207 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, |
@@ -178,8 +209,6 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | |||
178 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); | 209 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); |
179 | int btrfs_init_new_device(struct btrfs_root *root, char *path); | 210 | int btrfs_init_new_device(struct btrfs_root *root, char *path); |
180 | int btrfs_balance(struct btrfs_root *dev_root); | 211 | int btrfs_balance(struct btrfs_root *dev_root); |
181 | void btrfs_unlock_volumes(void); | ||
182 | void btrfs_lock_volumes(void); | ||
183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 212 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
184 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 213 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
185 | struct btrfs_device *device, u64 num_bytes, | 214 | struct btrfs_device *device, u64 num_bytes, |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 88ecbb215878..5366fe452ab0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -44,7 +44,7 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name, | |||
44 | return -ENOMEM; | 44 | return -ENOMEM; |
45 | 45 | ||
46 | /* lookup the xattr by name */ | 46 | /* lookup the xattr by name */ |
47 | di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, | 47 | di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name, |
48 | strlen(name), 0); | 48 | strlen(name), 0); |
49 | if (!di) { | 49 | if (!di) { |
50 | ret = -ENODATA; | 50 | ret = -ENODATA; |
@@ -103,7 +103,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
103 | return -ENOMEM; | 103 | return -ENOMEM; |
104 | 104 | ||
105 | /* first lets see if we already have this xattr */ | 105 | /* first lets see if we already have this xattr */ |
106 | di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, | 106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, |
107 | strlen(name), -1); | 107 | strlen(name), -1); |
108 | if (IS_ERR(di)) { | 108 | if (IS_ERR(di)) { |
109 | ret = PTR_ERR(di); | 109 | ret = PTR_ERR(di); |
@@ -120,13 +120,13 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
120 | 120 | ||
121 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 121 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
122 | BUG_ON(ret); | 122 | BUG_ON(ret); |
123 | btrfs_release_path(root, path); | 123 | btrfs_release_path(path); |
124 | 124 | ||
125 | /* if we don't have a value then we are removing the xattr */ | 125 | /* if we don't have a value then we are removing the xattr */ |
126 | if (!value) | 126 | if (!value) |
127 | goto out; | 127 | goto out; |
128 | } else { | 128 | } else { |
129 | btrfs_release_path(root, path); | 129 | btrfs_release_path(path); |
130 | 130 | ||
131 | if (flags & XATTR_REPLACE) { | 131 | if (flags & XATTR_REPLACE) { |
132 | /* we couldn't find the attr to replace */ | 132 | /* we couldn't find the attr to replace */ |
@@ -136,7 +136,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
136 | } | 136 | } |
137 | 137 | ||
138 | /* ok we have to create a completely new xattr */ | 138 | /* ok we have to create a completely new xattr */ |
139 | ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino, | 139 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), |
140 | name, name_len, value, size); | 140 | name, name_len, value, size); |
141 | BUG_ON(ret); | 141 | BUG_ON(ret); |
142 | out: | 142 | out: |
@@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
158 | if (IS_ERR(trans)) | 158 | if (IS_ERR(trans)) |
159 | return PTR_ERR(trans); | 159 | return PTR_ERR(trans); |
160 | 160 | ||
161 | btrfs_set_trans_block_group(trans, inode); | ||
162 | |||
163 | ret = do_setxattr(trans, inode, name, value, size, flags); | 161 | ret = do_setxattr(trans, inode, name, value, size, flags); |
164 | if (ret) | 162 | if (ret) |
165 | goto out; | 163 | goto out; |
@@ -178,21 +176,19 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
178 | struct inode *inode = dentry->d_inode; | 176 | struct inode *inode = dentry->d_inode; |
179 | struct btrfs_root *root = BTRFS_I(inode)->root; | 177 | struct btrfs_root *root = BTRFS_I(inode)->root; |
180 | struct btrfs_path *path; | 178 | struct btrfs_path *path; |
181 | struct btrfs_item *item; | ||
182 | struct extent_buffer *leaf; | 179 | struct extent_buffer *leaf; |
183 | struct btrfs_dir_item *di; | 180 | struct btrfs_dir_item *di; |
184 | int ret = 0, slot, advance; | 181 | int ret = 0, slot; |
185 | size_t total_size = 0, size_left = size; | 182 | size_t total_size = 0, size_left = size; |
186 | unsigned long name_ptr; | 183 | unsigned long name_ptr; |
187 | size_t name_len; | 184 | size_t name_len; |
188 | u32 nritems; | ||
189 | 185 | ||
190 | /* | 186 | /* |
191 | * ok we want all objects associated with this id. | 187 | * ok we want all objects associated with this id. |
192 | * NOTE: we set key.offset = 0; because we want to start with the | 188 | * NOTE: we set key.offset = 0; because we want to start with the |
193 | * first xattr that we find and walk forward | 189 | * first xattr that we find and walk forward |
194 | */ | 190 | */ |
195 | key.objectid = inode->i_ino; | 191 | key.objectid = btrfs_ino(inode); |
196 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 192 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); |
197 | key.offset = 0; | 193 | key.offset = 0; |
198 | 194 | ||
@@ -205,36 +201,25 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
205 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 201 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
206 | if (ret < 0) | 202 | if (ret < 0) |
207 | goto err; | 203 | goto err; |
208 | advance = 0; | 204 | |
209 | while (1) { | 205 | while (1) { |
210 | leaf = path->nodes[0]; | 206 | leaf = path->nodes[0]; |
211 | nritems = btrfs_header_nritems(leaf); | ||
212 | slot = path->slots[0]; | 207 | slot = path->slots[0]; |
213 | 208 | ||
214 | /* this is where we start walking through the path */ | 209 | /* this is where we start walking through the path */ |
215 | if (advance || slot >= nritems) { | 210 | if (slot >= btrfs_header_nritems(leaf)) { |
216 | /* | 211 | /* |
217 | * if we've reached the last slot in this leaf we need | 212 | * if we've reached the last slot in this leaf we need |
218 | * to go to the next leaf and reset everything | 213 | * to go to the next leaf and reset everything |
219 | */ | 214 | */ |
220 | if (slot >= nritems-1) { | 215 | ret = btrfs_next_leaf(root, path); |
221 | ret = btrfs_next_leaf(root, path); | 216 | if (ret < 0) |
222 | if (ret) | 217 | goto err; |
223 | break; | 218 | else if (ret > 0) |
224 | leaf = path->nodes[0]; | 219 | break; |
225 | nritems = btrfs_header_nritems(leaf); | 220 | continue; |
226 | slot = path->slots[0]; | ||
227 | } else { | ||
228 | /* | ||
229 | * just walking through the slots on this leaf | ||
230 | */ | ||
231 | slot++; | ||
232 | path->slots[0]++; | ||
233 | } | ||
234 | } | 221 | } |
235 | advance = 1; | ||
236 | 222 | ||
237 | item = btrfs_item_nr(leaf, slot); | ||
238 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 223 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
239 | 224 | ||
240 | /* check to make sure this item is what we want */ | 225 | /* check to make sure this item is what we want */ |
@@ -244,13 +229,15 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
244 | break; | 229 | break; |
245 | 230 | ||
246 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 231 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
232 | if (verify_dir_item(root, leaf, di)) | ||
233 | continue; | ||
247 | 234 | ||
248 | name_len = btrfs_dir_name_len(leaf, di); | 235 | name_len = btrfs_dir_name_len(leaf, di); |
249 | total_size += name_len + 1; | 236 | total_size += name_len + 1; |
250 | 237 | ||
251 | /* we are just looking for how big our buffer needs to be */ | 238 | /* we are just looking for how big our buffer needs to be */ |
252 | if (!size) | 239 | if (!size) |
253 | continue; | 240 | goto next; |
254 | 241 | ||
255 | if (!buffer || (name_len + 1) > size_left) { | 242 | if (!buffer || (name_len + 1) > size_left) { |
256 | ret = -ERANGE; | 243 | ret = -ERANGE; |
@@ -263,6 +250,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
263 | 250 | ||
264 | size_left -= name_len + 1; | 251 | size_left -= name_len + 1; |
265 | buffer += name_len + 1; | 252 | buffer += name_len + 1; |
253 | next: | ||
254 | path->slots[0]++; | ||
266 | } | 255 | } |
267 | ret = total_size; | 256 | ret = total_size; |
268 | 257 | ||
@@ -318,6 +307,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | |||
318 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 307 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
319 | size_t size, int flags) | 308 | size_t size, int flags) |
320 | { | 309 | { |
310 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
311 | |||
312 | /* | ||
313 | * The permission on security.* and system.* is not checked | ||
314 | * in permission(). | ||
315 | */ | ||
316 | if (btrfs_root_readonly(root)) | ||
317 | return -EROFS; | ||
318 | |||
321 | /* | 319 | /* |
322 | * If this is a request for a synthetic attribute in the system.* | 320 | * If this is a request for a synthetic attribute in the system.* |
323 | * namespace use the generic infrastructure to resolve a handler | 321 | * namespace use the generic infrastructure to resolve a handler |
@@ -338,6 +336,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
338 | 336 | ||
339 | int btrfs_removexattr(struct dentry *dentry, const char *name) | 337 | int btrfs_removexattr(struct dentry *dentry, const char *name) |
340 | { | 338 | { |
339 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
340 | |||
341 | /* | ||
342 | * The permission on security.* and system.* is not checked | ||
343 | * in permission(). | ||
344 | */ | ||
345 | if (btrfs_root_readonly(root)) | ||
346 | return -EROFS; | ||
347 | |||
341 | /* | 348 | /* |
342 | * If this is a request for a synthetic attribute in the system.* | 349 | * If this is a request for a synthetic attribute in the system.* |
343 | * namespace use the generic infrastructure to resolve a handler | 350 | * namespace use the generic infrastructure to resolve a handler |
@@ -354,7 +361,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) | |||
354 | } | 361 | } |
355 | 362 | ||
356 | int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, | 363 | int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, |
357 | struct inode *inode, struct inode *dir) | 364 | struct inode *inode, struct inode *dir, |
365 | const struct qstr *qstr) | ||
358 | { | 366 | { |
359 | int err; | 367 | int err; |
360 | size_t len; | 368 | size_t len; |
@@ -362,7 +370,8 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, | |||
362 | char *suffix; | 370 | char *suffix; |
363 | char *name; | 371 | char *name; |
364 | 372 | ||
365 | err = security_inode_init_security(inode, dir, &suffix, &value, &len); | 373 | err = security_inode_init_security(inode, dir, qstr, &suffix, &value, |
374 | &len); | ||
366 | if (err) { | 375 | if (err) { |
367 | if (err == -EOPNOTSUPP) | 376 | if (err == -EOPNOTSUPP) |
368 | return 0; | 377 | return 0; |
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 7a43fd640bbb..b3cc8039134b 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h | |||
@@ -37,6 +37,7 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name, | |||
37 | extern int btrfs_removexattr(struct dentry *dentry, const char *name); | 37 | extern int btrfs_removexattr(struct dentry *dentry, const char *name); |
38 | 38 | ||
39 | extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, | 39 | extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, |
40 | struct inode *inode, struct inode *dir); | 40 | struct inode *inode, struct inode *dir, |
41 | const struct qstr *qstr); | ||
41 | 42 | ||
42 | #endif /* __XATTR__ */ | 43 | #endif /* __XATTR__ */ |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 3e2b90eaa239..faccd47c6c46 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -32,15 +32,6 @@ | |||
32 | #include <linux/bio.h> | 32 | #include <linux/bio.h> |
33 | #include "compression.h" | 33 | #include "compression.h" |
34 | 34 | ||
35 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
36 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
37 | If it doesn't manage to finish, call it again with | ||
38 | avail_in == 0 and avail_out set to the remaining 12 | ||
39 | bytes for it to clean up. | ||
40 | Q: Is 12 bytes sufficient? | ||
41 | */ | ||
42 | #define STREAM_END_SPACE 12 | ||
43 | |||
44 | struct workspace { | 35 | struct workspace { |
45 | z_stream inf_strm; | 36 | z_stream inf_strm; |
46 | z_stream def_strm; | 37 | z_stream def_strm; |
@@ -48,169 +39,63 @@ struct workspace { | |||
48 | struct list_head list; | 39 | struct list_head list; |
49 | }; | 40 | }; |
50 | 41 | ||
51 | static LIST_HEAD(idle_workspace); | 42 | static void zlib_free_workspace(struct list_head *ws) |
52 | static DEFINE_SPINLOCK(workspace_lock); | ||
53 | static unsigned long num_workspace; | ||
54 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
55 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
56 | |||
57 | /* | ||
58 | * this finds an available zlib workspace or allocates a new one | ||
59 | * NULL or an ERR_PTR is returned if things go bad. | ||
60 | */ | ||
61 | static struct workspace *find_zlib_workspace(void) | ||
62 | { | 43 | { |
63 | struct workspace *workspace; | 44 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
64 | int ret; | ||
65 | int cpus = num_online_cpus(); | ||
66 | |||
67 | again: | ||
68 | spin_lock(&workspace_lock); | ||
69 | if (!list_empty(&idle_workspace)) { | ||
70 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
71 | list); | ||
72 | list_del(&workspace->list); | ||
73 | num_workspace--; | ||
74 | spin_unlock(&workspace_lock); | ||
75 | return workspace; | ||
76 | |||
77 | } | ||
78 | spin_unlock(&workspace_lock); | ||
79 | if (atomic_read(&alloc_workspace) > cpus) { | ||
80 | DEFINE_WAIT(wait); | ||
81 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
82 | if (atomic_read(&alloc_workspace) > cpus) | ||
83 | schedule(); | ||
84 | finish_wait(&workspace_wait, &wait); | ||
85 | goto again; | ||
86 | } | ||
87 | atomic_inc(&alloc_workspace); | ||
88 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
89 | if (!workspace) { | ||
90 | ret = -ENOMEM; | ||
91 | goto fail; | ||
92 | } | ||
93 | |||
94 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | ||
95 | if (!workspace->def_strm.workspace) { | ||
96 | ret = -ENOMEM; | ||
97 | goto fail; | ||
98 | } | ||
99 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | ||
100 | if (!workspace->inf_strm.workspace) { | ||
101 | ret = -ENOMEM; | ||
102 | goto fail_inflate; | ||
103 | } | ||
104 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
105 | if (!workspace->buf) { | ||
106 | ret = -ENOMEM; | ||
107 | goto fail_kmalloc; | ||
108 | } | ||
109 | return workspace; | ||
110 | 45 | ||
111 | fail_kmalloc: | ||
112 | vfree(workspace->inf_strm.workspace); | ||
113 | fail_inflate: | ||
114 | vfree(workspace->def_strm.workspace); | ||
115 | fail: | ||
116 | kfree(workspace); | ||
117 | atomic_dec(&alloc_workspace); | ||
118 | wake_up(&workspace_wait); | ||
119 | return ERR_PTR(ret); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * put a workspace struct back on the list or free it if we have enough | ||
124 | * idle ones sitting around | ||
125 | */ | ||
126 | static int free_workspace(struct workspace *workspace) | ||
127 | { | ||
128 | spin_lock(&workspace_lock); | ||
129 | if (num_workspace < num_online_cpus()) { | ||
130 | list_add_tail(&workspace->list, &idle_workspace); | ||
131 | num_workspace++; | ||
132 | spin_unlock(&workspace_lock); | ||
133 | if (waitqueue_active(&workspace_wait)) | ||
134 | wake_up(&workspace_wait); | ||
135 | return 0; | ||
136 | } | ||
137 | spin_unlock(&workspace_lock); | ||
138 | vfree(workspace->def_strm.workspace); | 46 | vfree(workspace->def_strm.workspace); |
139 | vfree(workspace->inf_strm.workspace); | 47 | vfree(workspace->inf_strm.workspace); |
140 | kfree(workspace->buf); | 48 | kfree(workspace->buf); |
141 | kfree(workspace); | 49 | kfree(workspace); |
142 | |||
143 | atomic_dec(&alloc_workspace); | ||
144 | if (waitqueue_active(&workspace_wait)) | ||
145 | wake_up(&workspace_wait); | ||
146 | return 0; | ||
147 | } | 50 | } |
148 | 51 | ||
149 | /* | 52 | static struct list_head *zlib_alloc_workspace(void) |
150 | * cleanup function for module exit | ||
151 | */ | ||
152 | static void free_workspaces(void) | ||
153 | { | 53 | { |
154 | struct workspace *workspace; | 54 | struct workspace *workspace; |
155 | while (!list_empty(&idle_workspace)) { | 55 | |
156 | workspace = list_entry(idle_workspace.next, struct workspace, | 56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
157 | list); | 57 | if (!workspace) |
158 | list_del(&workspace->list); | 58 | return ERR_PTR(-ENOMEM); |
159 | vfree(workspace->def_strm.workspace); | 59 | |
160 | vfree(workspace->inf_strm.workspace); | 60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize( |
161 | kfree(workspace->buf); | 61 | MAX_WBITS, MAX_MEM_LEVEL)); |
162 | kfree(workspace); | 62 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
163 | atomic_dec(&alloc_workspace); | 63 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
164 | } | 64 | if (!workspace->def_strm.workspace || |
65 | !workspace->inf_strm.workspace || !workspace->buf) | ||
66 | goto fail; | ||
67 | |||
68 | INIT_LIST_HEAD(&workspace->list); | ||
69 | |||
70 | return &workspace->list; | ||
71 | fail: | ||
72 | zlib_free_workspace(&workspace->list); | ||
73 | return ERR_PTR(-ENOMEM); | ||
165 | } | 74 | } |
166 | 75 | ||
167 | /* | 76 | static int zlib_compress_pages(struct list_head *ws, |
168 | * given an address space and start/len, compress the bytes. | 77 | struct address_space *mapping, |
169 | * | 78 | u64 start, unsigned long len, |
170 | * pages are allocated to hold the compressed result and stored | 79 | struct page **pages, |
171 | * in 'pages' | 80 | unsigned long nr_dest_pages, |
172 | * | 81 | unsigned long *out_pages, |
173 | * out_pages is used to return the number of pages allocated. There | 82 | unsigned long *total_in, |
174 | * may be pages allocated even if we return an error | 83 | unsigned long *total_out, |
175 | * | 84 | unsigned long max_out) |
176 | * total_in is used to return the number of bytes actually read. It | ||
177 | * may be smaller then len if we had to exit early because we | ||
178 | * ran out of room in the pages array or because we cross the | ||
179 | * max_out threshold. | ||
180 | * | ||
181 | * total_out is used to return the total number of compressed bytes | ||
182 | * | ||
183 | * max_out tells us the max number of bytes that we're allowed to | ||
184 | * stuff into pages | ||
185 | */ | ||
186 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
187 | u64 start, unsigned long len, | ||
188 | struct page **pages, | ||
189 | unsigned long nr_dest_pages, | ||
190 | unsigned long *out_pages, | ||
191 | unsigned long *total_in, | ||
192 | unsigned long *total_out, | ||
193 | unsigned long max_out) | ||
194 | { | 85 | { |
86 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
195 | int ret; | 87 | int ret; |
196 | struct workspace *workspace; | ||
197 | char *data_in; | 88 | char *data_in; |
198 | char *cpage_out; | 89 | char *cpage_out; |
199 | int nr_pages = 0; | 90 | int nr_pages = 0; |
200 | struct page *in_page = NULL; | 91 | struct page *in_page = NULL; |
201 | struct page *out_page = NULL; | 92 | struct page *out_page = NULL; |
202 | int out_written = 0; | ||
203 | int in_read = 0; | ||
204 | unsigned long bytes_left; | 93 | unsigned long bytes_left; |
205 | 94 | ||
206 | *out_pages = 0; | 95 | *out_pages = 0; |
207 | *total_out = 0; | 96 | *total_out = 0; |
208 | *total_in = 0; | 97 | *total_in = 0; |
209 | 98 | ||
210 | workspace = find_zlib_workspace(); | ||
211 | if (IS_ERR(workspace)) | ||
212 | return -1; | ||
213 | |||
214 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
215 | printk(KERN_WARNING "deflateInit failed\n"); | 100 | printk(KERN_WARNING "deflateInit failed\n"); |
216 | ret = -1; | 101 | ret = -1; |
@@ -224,6 +109,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
224 | data_in = kmap(in_page); | 109 | data_in = kmap(in_page); |
225 | 110 | ||
226 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 111 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
112 | if (out_page == NULL) { | ||
113 | ret = -1; | ||
114 | goto out; | ||
115 | } | ||
227 | cpage_out = kmap(out_page); | 116 | cpage_out = kmap(out_page); |
228 | pages[0] = out_page; | 117 | pages[0] = out_page; |
229 | nr_pages = 1; | 118 | nr_pages = 1; |
@@ -233,9 +122,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
233 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | 122 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; |
234 | workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); | 123 | workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); |
235 | 124 | ||
236 | out_written = 0; | ||
237 | in_read = 0; | ||
238 | |||
239 | while (workspace->def_strm.total_in < len) { | 125 | while (workspace->def_strm.total_in < len) { |
240 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | 126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); |
241 | if (ret != Z_OK) { | 127 | if (ret != Z_OK) { |
@@ -265,6 +151,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
265 | goto out; | 151 | goto out; |
266 | } | 152 | } |
267 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 153 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
154 | if (out_page == NULL) { | ||
155 | ret = -1; | ||
156 | goto out; | ||
157 | } | ||
268 | cpage_out = kmap(out_page); | 158 | cpage_out = kmap(out_page); |
269 | pages[nr_pages] = out_page; | 159 | pages[nr_pages] = out_page; |
270 | nr_pages++; | 160 | nr_pages++; |
@@ -319,55 +209,26 @@ out: | |||
319 | kunmap(in_page); | 209 | kunmap(in_page); |
320 | page_cache_release(in_page); | 210 | page_cache_release(in_page); |
321 | } | 211 | } |
322 | free_workspace(workspace); | ||
323 | return ret; | 212 | return ret; |
324 | } | 213 | } |
325 | 214 | ||
326 | /* | 215 | static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, |
327 | * pages_in is an array of pages with compressed data. | 216 | u64 disk_start, |
328 | * | 217 | struct bio_vec *bvec, |
329 | * disk_start is the starting logical offset of this array in the file | 218 | int vcnt, |
330 | * | 219 | size_t srclen) |
331 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
332 | * | ||
333 | * vcnt is the count of pages in the biovec | ||
334 | * | ||
335 | * srclen is the number of bytes in pages_in | ||
336 | * | ||
337 | * The basic idea is that we have a bio that was created by readpages. | ||
338 | * The pages in the bio are for the uncompressed data, and they may not | ||
339 | * be contiguous. They all correspond to the range of bytes covered by | ||
340 | * the compressed extent. | ||
341 | */ | ||
342 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
343 | u64 disk_start, | ||
344 | struct bio_vec *bvec, | ||
345 | int vcnt, | ||
346 | size_t srclen) | ||
347 | { | 220 | { |
348 | int ret = 0; | 221 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
222 | int ret = 0, ret2; | ||
349 | int wbits = MAX_WBITS; | 223 | int wbits = MAX_WBITS; |
350 | struct workspace *workspace; | ||
351 | char *data_in; | 224 | char *data_in; |
352 | size_t total_out = 0; | 225 | size_t total_out = 0; |
353 | unsigned long page_bytes_left; | ||
354 | unsigned long page_in_index = 0; | 226 | unsigned long page_in_index = 0; |
355 | unsigned long page_out_index = 0; | 227 | unsigned long page_out_index = 0; |
356 | struct page *page_out; | ||
357 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 228 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / |
358 | PAGE_CACHE_SIZE; | 229 | PAGE_CACHE_SIZE; |
359 | unsigned long buf_start; | 230 | unsigned long buf_start; |
360 | unsigned long buf_offset; | ||
361 | unsigned long bytes; | ||
362 | unsigned long working_bytes; | ||
363 | unsigned long pg_offset; | 231 | unsigned long pg_offset; |
364 | unsigned long start_byte; | ||
365 | unsigned long current_buf_start; | ||
366 | char *kaddr; | ||
367 | |||
368 | workspace = find_zlib_workspace(); | ||
369 | if (IS_ERR(workspace)) | ||
370 | return -ENOMEM; | ||
371 | 232 | ||
372 | data_in = kmap(pages_in[page_in_index]); | 233 | data_in = kmap(pages_in[page_in_index]); |
373 | workspace->inf_strm.next_in = data_in; | 234 | workspace->inf_strm.next_in = data_in; |
@@ -377,8 +238,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
377 | workspace->inf_strm.total_out = 0; | 238 | workspace->inf_strm.total_out = 0; |
378 | workspace->inf_strm.next_out = workspace->buf; | 239 | workspace->inf_strm.next_out = workspace->buf; |
379 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 240 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
380 | page_out = bvec[page_out_index].bv_page; | ||
381 | page_bytes_left = PAGE_CACHE_SIZE; | ||
382 | pg_offset = 0; | 241 | pg_offset = 0; |
383 | 242 | ||
384 | /* If it's deflate, and it's got no preset dictionary, then | 243 | /* If it's deflate, and it's got no preset dictionary, then |
@@ -394,107 +253,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
394 | 253 | ||
395 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
396 | printk(KERN_WARNING "inflateInit failed\n"); | 255 | printk(KERN_WARNING "inflateInit failed\n"); |
397 | ret = -1; | 256 | return -1; |
398 | goto out; | ||
399 | } | 257 | } |
400 | while (workspace->inf_strm.total_in < srclen) { | 258 | while (workspace->inf_strm.total_in < srclen) { |
401 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 259 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
402 | if (ret != Z_OK && ret != Z_STREAM_END) | 260 | if (ret != Z_OK && ret != Z_STREAM_END) |
403 | break; | 261 | break; |
404 | /* | ||
405 | * buf start is the byte offset we're of the start of | ||
406 | * our workspace buffer | ||
407 | */ | ||
408 | buf_start = total_out; | ||
409 | 262 | ||
410 | /* total_out is the last byte of the workspace buffer */ | 263 | buf_start = total_out; |
411 | total_out = workspace->inf_strm.total_out; | 264 | total_out = workspace->inf_strm.total_out; |
412 | 265 | ||
413 | working_bytes = total_out - buf_start; | 266 | /* we didn't make progress in this inflate call, we're done */ |
414 | 267 | if (buf_start == total_out) | |
415 | /* | ||
416 | * start byte is the first byte of the page we're currently | ||
417 | * copying into relative to the start of the compressed data. | ||
418 | */ | ||
419 | start_byte = page_offset(page_out) - disk_start; | ||
420 | |||
421 | if (working_bytes == 0) { | ||
422 | /* we didn't make progress in this inflate | ||
423 | * call, we're done | ||
424 | */ | ||
425 | if (ret != Z_STREAM_END) | ||
426 | ret = -1; | ||
427 | break; | 268 | break; |
428 | } | ||
429 | 269 | ||
430 | /* we haven't yet hit data corresponding to this page */ | 270 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, |
431 | if (total_out <= start_byte) | 271 | total_out, disk_start, |
432 | goto next; | 272 | bvec, vcnt, |
433 | 273 | &page_out_index, &pg_offset); | |
434 | /* | 274 | if (ret2 == 0) { |
435 | * the start of the data we care about is offset into | 275 | ret = 0; |
436 | * the middle of our working buffer | 276 | goto done; |
437 | */ | ||
438 | if (total_out > start_byte && buf_start < start_byte) { | ||
439 | buf_offset = start_byte - buf_start; | ||
440 | working_bytes -= buf_offset; | ||
441 | } else { | ||
442 | buf_offset = 0; | ||
443 | } | ||
444 | current_buf_start = buf_start; | ||
445 | |||
446 | /* copy bytes from the working buffer into the pages */ | ||
447 | while (working_bytes > 0) { | ||
448 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
449 | PAGE_CACHE_SIZE - buf_offset); | ||
450 | bytes = min(bytes, working_bytes); | ||
451 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
452 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
453 | bytes); | ||
454 | kunmap_atomic(kaddr, KM_USER0); | ||
455 | flush_dcache_page(page_out); | ||
456 | |||
457 | pg_offset += bytes; | ||
458 | page_bytes_left -= bytes; | ||
459 | buf_offset += bytes; | ||
460 | working_bytes -= bytes; | ||
461 | current_buf_start += bytes; | ||
462 | |||
463 | /* check if we need to pick another page */ | ||
464 | if (page_bytes_left == 0) { | ||
465 | page_out_index++; | ||
466 | if (page_out_index >= vcnt) { | ||
467 | ret = 0; | ||
468 | goto done; | ||
469 | } | ||
470 | |||
471 | page_out = bvec[page_out_index].bv_page; | ||
472 | pg_offset = 0; | ||
473 | page_bytes_left = PAGE_CACHE_SIZE; | ||
474 | start_byte = page_offset(page_out) - disk_start; | ||
475 | |||
476 | /* | ||
477 | * make sure our new page is covered by this | ||
478 | * working buffer | ||
479 | */ | ||
480 | if (total_out <= start_byte) | ||
481 | goto next; | ||
482 | |||
483 | /* the next page in the biovec might not | ||
484 | * be adjacent to the last page, but it | ||
485 | * might still be found inside this working | ||
486 | * buffer. bump our offset pointer | ||
487 | */ | ||
488 | if (total_out > start_byte && | ||
489 | current_buf_start < start_byte) { | ||
490 | buf_offset = start_byte - buf_start; | ||
491 | working_bytes = total_out - start_byte; | ||
492 | current_buf_start = buf_start + | ||
493 | buf_offset; | ||
494 | } | ||
495 | } | ||
496 | } | 277 | } |
497 | next: | 278 | |
498 | workspace->inf_strm.next_out = workspace->buf; | 279 | workspace->inf_strm.next_out = workspace->buf; |
499 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 280 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
500 | 281 | ||
@@ -521,35 +302,21 @@ done: | |||
521 | zlib_inflateEnd(&workspace->inf_strm); | 302 | zlib_inflateEnd(&workspace->inf_strm); |
522 | if (data_in) | 303 | if (data_in) |
523 | kunmap(pages_in[page_in_index]); | 304 | kunmap(pages_in[page_in_index]); |
524 | out: | ||
525 | free_workspace(workspace); | ||
526 | return ret; | 305 | return ret; |
527 | } | 306 | } |
528 | 307 | ||
529 | /* | 308 | static int zlib_decompress(struct list_head *ws, unsigned char *data_in, |
530 | * a less complex decompression routine. Our compressed data fits in a | 309 | struct page *dest_page, |
531 | * single page, and we want to read a single page out of it. | 310 | unsigned long start_byte, |
532 | * start_byte tells us the offset into the compressed data we're interested in | 311 | size_t srclen, size_t destlen) |
533 | */ | ||
534 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
535 | struct page *dest_page, | ||
536 | unsigned long start_byte, | ||
537 | size_t srclen, size_t destlen) | ||
538 | { | 312 | { |
313 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
539 | int ret = 0; | 314 | int ret = 0; |
540 | int wbits = MAX_WBITS; | 315 | int wbits = MAX_WBITS; |
541 | struct workspace *workspace; | ||
542 | unsigned long bytes_left = destlen; | 316 | unsigned long bytes_left = destlen; |
543 | unsigned long total_out = 0; | 317 | unsigned long total_out = 0; |
544 | char *kaddr; | 318 | char *kaddr; |
545 | 319 | ||
546 | if (destlen > PAGE_CACHE_SIZE) | ||
547 | return -ENOMEM; | ||
548 | |||
549 | workspace = find_zlib_workspace(); | ||
550 | if (IS_ERR(workspace)) | ||
551 | return -ENOMEM; | ||
552 | |||
553 | workspace->inf_strm.next_in = data_in; | 320 | workspace->inf_strm.next_in = data_in; |
554 | workspace->inf_strm.avail_in = srclen; | 321 | workspace->inf_strm.avail_in = srclen; |
555 | workspace->inf_strm.total_in = 0; | 322 | workspace->inf_strm.total_in = 0; |
@@ -570,8 +337,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, | |||
570 | 337 | ||
571 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
572 | printk(KERN_WARNING "inflateInit failed\n"); | 339 | printk(KERN_WARNING "inflateInit failed\n"); |
573 | ret = -1; | 340 | return -1; |
574 | goto out; | ||
575 | } | 341 | } |
576 | 342 | ||
577 | while (bytes_left > 0) { | 343 | while (bytes_left > 0) { |
@@ -621,12 +387,13 @@ next: | |||
621 | ret = 0; | 387 | ret = 0; |
622 | 388 | ||
623 | zlib_inflateEnd(&workspace->inf_strm); | 389 | zlib_inflateEnd(&workspace->inf_strm); |
624 | out: | ||
625 | free_workspace(workspace); | ||
626 | return ret; | 390 | return ret; |
627 | } | 391 | } |
628 | 392 | ||
629 | void btrfs_zlib_exit(void) | 393 | struct btrfs_compress_op btrfs_zlib_compress = { |
630 | { | 394 | .alloc_workspace = zlib_alloc_workspace, |
631 | free_workspaces(); | 395 | .free_workspace = zlib_free_workspace, |
632 | } | 396 | .compress_pages = zlib_compress_pages, |
397 | .decompress_biovec = zlib_decompress_biovec, | ||
398 | .decompress = zlib_decompress, | ||
399 | }; | ||