diff options
author | Michal Marek <mmarek@suse.cz> | 2011-03-09 10:15:44 -0500 |
---|---|---|
committer | Michal Marek <mmarek@suse.cz> | 2011-03-09 10:15:44 -0500 |
commit | 2d8ad8719591fa803b0d589ed057fa46f49b7155 (patch) | |
tree | 4ae051577dad1161c91dafbf4207bb10a9dc91bb /fs/btrfs | |
parent | 9b4ce7bce5f30712fd926ab4599a803314a07719 (diff) | |
parent | c56eb8fb6dccb83d9fe62fd4dc00c834de9bc470 (diff) |
Merge commit 'v2.6.38-rc1' into kbuild/packaging
Diffstat (limited to 'fs/btrfs')
49 files changed, 11815 insertions, 4282 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -4,6 +4,8 @@ config BTRFS_FS | |||
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select ZLIB_INFLATE | 5 | select ZLIB_INFLATE |
6 | select ZLIB_DEFLATE | 6 | select ZLIB_DEFLATE |
7 | select LZO_COMPRESS | ||
8 | select LZO_DECOMPRESS | ||
7 | help | 9 | help |
8 | Btrfs is a new filesystem with extents, writable snapshotting, | 10 | Btrfs is a new filesystem with extents, writable snapshotting, |
9 | support for multiple devices and many more features. | 11 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..31610ea73aec 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o | 10 | compression.o delayed-ref.o relocation.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6df6d6ed74fd..15b5ca2a2606 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/posix_acl_xattr.h> | 22 | #include <linux/posix_acl_xattr.h> |
23 | #include <linux/posix_acl.h> | 23 | #include <linux/posix_acl.h> |
24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
25 | #include <linux/slab.h> | ||
25 | 26 | ||
26 | #include "ctree.h" | 27 | #include "ctree.h" |
27 | #include "btrfs_inode.h" | 28 | #include "btrfs_inode.h" |
@@ -59,6 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
59 | size = __btrfs_getxattr(inode, name, value, size); | 60 | size = __btrfs_getxattr(inode, name, value, size); |
60 | if (size > 0) { | 61 | if (size > 0) { |
61 | acl = posix_acl_from_xattr(value, size); | 62 | acl = posix_acl_from_xattr(value, size); |
63 | if (IS_ERR(acl)) { | ||
64 | kfree(value); | ||
65 | return acl; | ||
66 | } | ||
62 | set_cached_acl(inode, type, acl); | 67 | set_cached_acl(inode, type, acl); |
63 | } | 68 | } |
64 | kfree(value); | 69 | kfree(value); |
@@ -159,6 +164,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
159 | int ret; | 164 | int ret; |
160 | struct posix_acl *acl = NULL; | 165 | struct posix_acl *acl = NULL; |
161 | 166 | ||
167 | if (!is_owner_or_cap(dentry->d_inode)) | ||
168 | return -EPERM; | ||
169 | |||
170 | if (!IS_POSIXACL(dentry->d_inode)) | ||
171 | return -EOPNOTSUPP; | ||
172 | |||
162 | if (value) { | 173 | if (value) { |
163 | acl = posix_acl_from_xattr(value, size); | 174 | acl = posix_acl_from_xattr(value, size); |
164 | if (acl == NULL) { | 175 | if (acl == NULL) { |
@@ -176,18 +187,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
176 | return ret; | 187 | return ret; |
177 | } | 188 | } |
178 | 189 | ||
179 | int btrfs_check_acl(struct inode *inode, int mask) | 190 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) |
180 | { | 191 | { |
181 | struct posix_acl *acl; | ||
182 | int error = -EAGAIN; | 192 | int error = -EAGAIN; |
183 | 193 | ||
184 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); | 194 | if (flags & IPERM_FLAG_RCU) { |
195 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
196 | error = -ECHILD; | ||
185 | 197 | ||
186 | if (IS_ERR(acl)) | 198 | } else { |
187 | return PTR_ERR(acl); | 199 | struct posix_acl *acl; |
188 | if (acl) { | 200 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); |
189 | error = posix_acl_permission(inode, acl, mask); | 201 | if (IS_ERR(acl)) |
190 | posix_acl_release(acl); | 202 | return PTR_ERR(acl); |
203 | if (acl) { | ||
204 | error = posix_acl_permission(inode, acl, mask); | ||
205 | posix_acl_release(acl); | ||
206 | } | ||
191 | } | 207 | } |
192 | 208 | ||
193 | return error; | 209 | return error; |
@@ -281,14 +297,14 @@ int btrfs_acl_chmod(struct inode *inode) | |||
281 | return ret; | 297 | return ret; |
282 | } | 298 | } |
283 | 299 | ||
284 | struct xattr_handler btrfs_xattr_acl_default_handler = { | 300 | const struct xattr_handler btrfs_xattr_acl_default_handler = { |
285 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 301 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
286 | .flags = ACL_TYPE_DEFAULT, | 302 | .flags = ACL_TYPE_DEFAULT, |
287 | .get = btrfs_xattr_acl_get, | 303 | .get = btrfs_xattr_acl_get, |
288 | .set = btrfs_xattr_acl_set, | 304 | .set = btrfs_xattr_acl_set, |
289 | }; | 305 | }; |
290 | 306 | ||
291 | struct xattr_handler btrfs_xattr_acl_access_handler = { | 307 | const struct xattr_handler btrfs_xattr_acl_access_handler = { |
292 | .prefix = POSIX_ACL_XATTR_ACCESS, | 308 | .prefix = POSIX_ACL_XATTR_ACCESS, |
293 | .flags = ACL_TYPE_ACCESS, | 309 | .flags = ACL_TYPE_ACCESS, |
294 | .get = btrfs_xattr_acl_get, | 310 | .get = btrfs_xattr_acl_get, |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c0861e781cdb..7ec14097fef1 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/kthread.h> | 19 | #include <linux/kthread.h> |
20 | #include <linux/slab.h> | ||
20 | #include <linux/list.h> | 21 | #include <linux/list.h> |
21 | #include <linux/spinlock.h> | 22 | #include <linux/spinlock.h> |
22 | #include <linux/freezer.h> | 23 | #include <linux/freezer.h> |
@@ -376,6 +377,7 @@ again: | |||
376 | if (!list_empty(&worker->pending) || | 377 | if (!list_empty(&worker->pending) || |
377 | !list_empty(&worker->prio_pending)) { | 378 | !list_empty(&worker->prio_pending)) { |
378 | spin_unlock_irq(&worker->lock); | 379 | spin_unlock_irq(&worker->lock); |
380 | set_current_state(TASK_RUNNING); | ||
379 | goto again; | 381 | goto again; |
380 | } | 382 | } |
381 | 383 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 3f1f50d9d916..ccc991c542df 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -137,8 +137,8 @@ struct btrfs_inode { | |||
137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
138 | */ | 138 | */ |
139 | spinlock_t accounting_lock; | 139 | spinlock_t accounting_lock; |
140 | atomic_t outstanding_extents; | ||
140 | int reserved_extents; | 141 | int reserved_extents; |
141 | int outstanding_extents; | ||
142 | 142 | ||
143 | /* | 143 | /* |
144 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
@@ -151,8 +151,14 @@ struct btrfs_inode { | |||
151 | * of these. | 151 | * of these. |
152 | */ | 152 | */ |
153 | unsigned ordered_data_close:1; | 153 | unsigned ordered_data_close:1; |
154 | unsigned orphan_meta_reserved:1; | ||
154 | unsigned dummy_inode:1; | 155 | unsigned dummy_inode:1; |
155 | 156 | ||
157 | /* | ||
158 | * always compress this one file | ||
159 | */ | ||
160 | unsigned force_compress:4; | ||
161 | |||
156 | struct inode vfs_inode; | 162 | struct inode vfs_inode; |
157 | }; | 163 | }; |
158 | 164 | ||
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index a11a32058b50..f745287fbf2e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <linux/swap.h> | 31 | #include <linux/swap.h> |
32 | #include <linux/writeback.h> | 32 | #include <linux/writeback.h> |
33 | #include <linux/bit_spinlock.h> | 33 | #include <linux/bit_spinlock.h> |
34 | #include <linux/pagevec.h> | 34 | #include <linux/slab.h> |
35 | #include "compat.h" | 35 | #include "compat.h" |
36 | #include "ctree.h" | 36 | #include "ctree.h" |
37 | #include "disk-io.h" | 37 | #include "disk-io.h" |
@@ -62,6 +62,9 @@ struct compressed_bio { | |||
62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
64 | 64 | ||
65 | /* the compression algorithm for this bio */ | ||
66 | int compress_type; | ||
67 | |||
65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
67 | 70 | ||
@@ -91,23 +94,10 @@ static inline int compressed_bio_size(struct btrfs_root *root, | |||
91 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | 94 | static struct bio *compressed_bio_alloc(struct block_device *bdev, |
92 | u64 first_byte, gfp_t gfp_flags) | 95 | u64 first_byte, gfp_t gfp_flags) |
93 | { | 96 | { |
94 | struct bio *bio; | ||
95 | int nr_vecs; | 97 | int nr_vecs; |
96 | 98 | ||
97 | nr_vecs = bio_get_nr_vecs(bdev); | 99 | nr_vecs = bio_get_nr_vecs(bdev); |
98 | bio = bio_alloc(gfp_flags, nr_vecs); | 100 | return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags); |
99 | |||
100 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | ||
101 | while (!bio && (nr_vecs /= 2)) | ||
102 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
103 | } | ||
104 | |||
105 | if (bio) { | ||
106 | bio->bi_size = 0; | ||
107 | bio->bi_bdev = bdev; | ||
108 | bio->bi_sector = first_byte >> 9; | ||
109 | } | ||
110 | return bio; | ||
111 | } | 101 | } |
112 | 102 | ||
113 | static int check_compressed_csum(struct inode *inode, | 103 | static int check_compressed_csum(struct inode *inode, |
@@ -163,7 +153,6 @@ fail: | |||
163 | */ | 153 | */ |
164 | static void end_compressed_bio_read(struct bio *bio, int err) | 154 | static void end_compressed_bio_read(struct bio *bio, int err) |
165 | { | 155 | { |
166 | struct extent_io_tree *tree; | ||
167 | struct compressed_bio *cb = bio->bi_private; | 156 | struct compressed_bio *cb = bio->bi_private; |
168 | struct inode *inode; | 157 | struct inode *inode; |
169 | struct page *page; | 158 | struct page *page; |
@@ -187,12 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
187 | /* ok, we're the last bio for this extent, lets start | 176 | /* ok, we're the last bio for this extent, lets start |
188 | * the decompression. | 177 | * the decompression. |
189 | */ | 178 | */ |
190 | tree = &BTRFS_I(inode)->io_tree; | 179 | ret = btrfs_decompress_biovec(cb->compress_type, |
191 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 180 | cb->compressed_pages, |
192 | cb->start, | 181 | cb->start, |
193 | cb->orig_bio->bi_io_vec, | 182 | cb->orig_bio->bi_io_vec, |
194 | cb->orig_bio->bi_vcnt, | 183 | cb->orig_bio->bi_vcnt, |
195 | cb->compressed_len); | 184 | cb->compressed_len); |
196 | csum_failed: | 185 | csum_failed: |
197 | if (ret) | 186 | if (ret) |
198 | cb->errors = 1; | 187 | cb->errors = 1; |
@@ -445,7 +434,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
445 | unsigned long nr_pages = 0; | 434 | unsigned long nr_pages = 0; |
446 | struct extent_map *em; | 435 | struct extent_map *em; |
447 | struct address_space *mapping = inode->i_mapping; | 436 | struct address_space *mapping = inode->i_mapping; |
448 | struct pagevec pvec; | ||
449 | struct extent_map_tree *em_tree; | 437 | struct extent_map_tree *em_tree; |
450 | struct extent_io_tree *tree; | 438 | struct extent_io_tree *tree; |
451 | u64 end; | 439 | u64 end; |
@@ -461,7 +449,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
461 | 449 | ||
462 | end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 450 | end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; |
463 | 451 | ||
464 | pagevec_init(&pvec, 0); | ||
465 | while (last_offset < compressed_end) { | 452 | while (last_offset < compressed_end) { |
466 | page_index = last_offset >> PAGE_CACHE_SHIFT; | 453 | page_index = last_offset >> PAGE_CACHE_SHIFT; |
467 | 454 | ||
@@ -478,26 +465,17 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
478 | goto next; | 465 | goto next; |
479 | } | 466 | } |
480 | 467 | ||
481 | page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS); | 468 | page = __page_cache_alloc(mapping_gfp_mask(mapping) & |
469 | ~__GFP_FS); | ||
482 | if (!page) | 470 | if (!page) |
483 | break; | 471 | break; |
484 | 472 | ||
485 | page->index = page_index; | 473 | if (add_to_page_cache_lru(page, mapping, page_index, |
486 | /* | 474 | GFP_NOFS)) { |
487 | * what we want to do here is call add_to_page_cache_lru, | ||
488 | * but that isn't exported, so we reproduce it here | ||
489 | */ | ||
490 | if (add_to_page_cache(page, mapping, | ||
491 | page->index, GFP_NOFS)) { | ||
492 | page_cache_release(page); | 475 | page_cache_release(page); |
493 | goto next; | 476 | goto next; |
494 | } | 477 | } |
495 | 478 | ||
496 | /* open coding of lru_cache_add, also not exported */ | ||
497 | page_cache_get(page); | ||
498 | if (!pagevec_add(&pvec, page)) | ||
499 | __pagevec_lru_add_file(&pvec); | ||
500 | |||
501 | end = last_offset + PAGE_CACHE_SIZE - 1; | 479 | end = last_offset + PAGE_CACHE_SIZE - 1; |
502 | /* | 480 | /* |
503 | * at this point, we have a locked page in the page cache | 481 | * at this point, we have a locked page in the page cache |
@@ -551,8 +529,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
551 | next: | 529 | next: |
552 | last_offset += PAGE_CACHE_SIZE; | 530 | last_offset += PAGE_CACHE_SIZE; |
553 | } | 531 | } |
554 | if (pagevec_count(&pvec)) | ||
555 | __pagevec_lru_add_file(&pvec); | ||
556 | return 0; | 532 | return 0; |
557 | } | 533 | } |
558 | 534 | ||
@@ -616,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
616 | 592 | ||
617 | cb->len = uncompressed_len; | 593 | cb->len = uncompressed_len; |
618 | cb->compressed_len = compressed_len; | 594 | cb->compressed_len = compressed_len; |
595 | cb->compress_type = extent_compress_type(bio_flags); | ||
619 | cb->orig_bio = bio; | 596 | cb->orig_bio = bio; |
620 | 597 | ||
621 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 598 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
@@ -705,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
705 | bio_put(comp_bio); | 682 | bio_put(comp_bio); |
706 | return 0; | 683 | return 0; |
707 | } | 684 | } |
685 | |||
686 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
687 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
688 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
689 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
690 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
691 | |||
692 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
693 | &btrfs_zlib_compress, | ||
694 | &btrfs_lzo_compress, | ||
695 | }; | ||
696 | |||
697 | int __init btrfs_init_compress(void) | ||
698 | { | ||
699 | int i; | ||
700 | |||
701 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
702 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
703 | spin_lock_init(&comp_workspace_lock[i]); | ||
704 | atomic_set(&comp_alloc_workspace[i], 0); | ||
705 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
706 | } | ||
707 | return 0; | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | * this finds an available workspace or allocates a new one | ||
712 | * ERR_PTR is returned if things go bad. | ||
713 | */ | ||
714 | static struct list_head *find_workspace(int type) | ||
715 | { | ||
716 | struct list_head *workspace; | ||
717 | int cpus = num_online_cpus(); | ||
718 | int idx = type - 1; | ||
719 | |||
720 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
721 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
722 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
723 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
724 | int *num_workspace = &comp_num_workspace[idx]; | ||
725 | again: | ||
726 | spin_lock(workspace_lock); | ||
727 | if (!list_empty(idle_workspace)) { | ||
728 | workspace = idle_workspace->next; | ||
729 | list_del(workspace); | ||
730 | (*num_workspace)--; | ||
731 | spin_unlock(workspace_lock); | ||
732 | return workspace; | ||
733 | |||
734 | } | ||
735 | if (atomic_read(alloc_workspace) > cpus) { | ||
736 | DEFINE_WAIT(wait); | ||
737 | |||
738 | spin_unlock(workspace_lock); | ||
739 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
740 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
741 | schedule(); | ||
742 | finish_wait(workspace_wait, &wait); | ||
743 | goto again; | ||
744 | } | ||
745 | atomic_inc(alloc_workspace); | ||
746 | spin_unlock(workspace_lock); | ||
747 | |||
748 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
749 | if (IS_ERR(workspace)) { | ||
750 | atomic_dec(alloc_workspace); | ||
751 | wake_up(workspace_wait); | ||
752 | } | ||
753 | return workspace; | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * put a workspace struct back on the list or free it if we have enough | ||
758 | * idle ones sitting around | ||
759 | */ | ||
760 | static void free_workspace(int type, struct list_head *workspace) | ||
761 | { | ||
762 | int idx = type - 1; | ||
763 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
764 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
765 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
766 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
767 | int *num_workspace = &comp_num_workspace[idx]; | ||
768 | |||
769 | spin_lock(workspace_lock); | ||
770 | if (*num_workspace < num_online_cpus()) { | ||
771 | list_add_tail(workspace, idle_workspace); | ||
772 | (*num_workspace)++; | ||
773 | spin_unlock(workspace_lock); | ||
774 | goto wake; | ||
775 | } | ||
776 | spin_unlock(workspace_lock); | ||
777 | |||
778 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
779 | atomic_dec(alloc_workspace); | ||
780 | wake: | ||
781 | if (waitqueue_active(workspace_wait)) | ||
782 | wake_up(workspace_wait); | ||
783 | } | ||
784 | |||
785 | /* | ||
786 | * cleanup function for module exit | ||
787 | */ | ||
788 | static void free_workspaces(void) | ||
789 | { | ||
790 | struct list_head *workspace; | ||
791 | int i; | ||
792 | |||
793 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
794 | while (!list_empty(&comp_idle_workspace[i])) { | ||
795 | workspace = comp_idle_workspace[i].next; | ||
796 | list_del(workspace); | ||
797 | btrfs_compress_op[i]->free_workspace(workspace); | ||
798 | atomic_dec(&comp_alloc_workspace[i]); | ||
799 | } | ||
800 | } | ||
801 | } | ||
802 | |||
803 | /* | ||
804 | * given an address space and start/len, compress the bytes. | ||
805 | * | ||
806 | * pages are allocated to hold the compressed result and stored | ||
807 | * in 'pages' | ||
808 | * | ||
809 | * out_pages is used to return the number of pages allocated. There | ||
810 | * may be pages allocated even if we return an error | ||
811 | * | ||
812 | * total_in is used to return the number of bytes actually read. It | ||
813 | * may be smaller then len if we had to exit early because we | ||
814 | * ran out of room in the pages array or because we cross the | ||
815 | * max_out threshold. | ||
816 | * | ||
817 | * total_out is used to return the total number of compressed bytes | ||
818 | * | ||
819 | * max_out tells us the max number of bytes that we're allowed to | ||
820 | * stuff into pages | ||
821 | */ | ||
822 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
823 | u64 start, unsigned long len, | ||
824 | struct page **pages, | ||
825 | unsigned long nr_dest_pages, | ||
826 | unsigned long *out_pages, | ||
827 | unsigned long *total_in, | ||
828 | unsigned long *total_out, | ||
829 | unsigned long max_out) | ||
830 | { | ||
831 | struct list_head *workspace; | ||
832 | int ret; | ||
833 | |||
834 | workspace = find_workspace(type); | ||
835 | if (IS_ERR(workspace)) | ||
836 | return -1; | ||
837 | |||
838 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
839 | start, len, pages, | ||
840 | nr_dest_pages, out_pages, | ||
841 | total_in, total_out, | ||
842 | max_out); | ||
843 | free_workspace(type, workspace); | ||
844 | return ret; | ||
845 | } | ||
846 | |||
847 | /* | ||
848 | * pages_in is an array of pages with compressed data. | ||
849 | * | ||
850 | * disk_start is the starting logical offset of this array in the file | ||
851 | * | ||
852 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
853 | * | ||
854 | * vcnt is the count of pages in the biovec | ||
855 | * | ||
856 | * srclen is the number of bytes in pages_in | ||
857 | * | ||
858 | * The basic idea is that we have a bio that was created by readpages. | ||
859 | * The pages in the bio are for the uncompressed data, and they may not | ||
860 | * be contiguous. They all correspond to the range of bytes covered by | ||
861 | * the compressed extent. | ||
862 | */ | ||
863 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
864 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
865 | { | ||
866 | struct list_head *workspace; | ||
867 | int ret; | ||
868 | |||
869 | workspace = find_workspace(type); | ||
870 | if (IS_ERR(workspace)) | ||
871 | return -ENOMEM; | ||
872 | |||
873 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
874 | disk_start, | ||
875 | bvec, vcnt, srclen); | ||
876 | free_workspace(type, workspace); | ||
877 | return ret; | ||
878 | } | ||
879 | |||
880 | /* | ||
881 | * a less complex decompression routine. Our compressed data fits in a | ||
882 | * single page, and we want to read a single page out of it. | ||
883 | * start_byte tells us the offset into the compressed data we're interested in | ||
884 | */ | ||
885 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
886 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
887 | { | ||
888 | struct list_head *workspace; | ||
889 | int ret; | ||
890 | |||
891 | workspace = find_workspace(type); | ||
892 | if (IS_ERR(workspace)) | ||
893 | return -ENOMEM; | ||
894 | |||
895 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
896 | dest_page, start_byte, | ||
897 | srclen, destlen); | ||
898 | |||
899 | free_workspace(type, workspace); | ||
900 | return ret; | ||
901 | } | ||
902 | |||
903 | void __exit btrfs_exit_compress(void) | ||
904 | { | ||
905 | free_workspaces(); | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Copy uncompressed data from working buffer to pages. | ||
910 | * | ||
911 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
912 | * | ||
913 | * total_out is the last byte of the buffer | ||
914 | */ | ||
915 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
916 | unsigned long total_out, u64 disk_start, | ||
917 | struct bio_vec *bvec, int vcnt, | ||
918 | unsigned long *page_index, | ||
919 | unsigned long *pg_offset) | ||
920 | { | ||
921 | unsigned long buf_offset; | ||
922 | unsigned long current_buf_start; | ||
923 | unsigned long start_byte; | ||
924 | unsigned long working_bytes = total_out - buf_start; | ||
925 | unsigned long bytes; | ||
926 | char *kaddr; | ||
927 | struct page *page_out = bvec[*page_index].bv_page; | ||
928 | |||
929 | /* | ||
930 | * start byte is the first byte of the page we're currently | ||
931 | * copying into relative to the start of the compressed data. | ||
932 | */ | ||
933 | start_byte = page_offset(page_out) - disk_start; | ||
934 | |||
935 | /* we haven't yet hit data corresponding to this page */ | ||
936 | if (total_out <= start_byte) | ||
937 | return 1; | ||
938 | |||
939 | /* | ||
940 | * the start of the data we care about is offset into | ||
941 | * the middle of our working buffer | ||
942 | */ | ||
943 | if (total_out > start_byte && buf_start < start_byte) { | ||
944 | buf_offset = start_byte - buf_start; | ||
945 | working_bytes -= buf_offset; | ||
946 | } else { | ||
947 | buf_offset = 0; | ||
948 | } | ||
949 | current_buf_start = buf_start; | ||
950 | |||
951 | /* copy bytes from the working buffer into the pages */ | ||
952 | while (working_bytes > 0) { | ||
953 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
954 | PAGE_CACHE_SIZE - buf_offset); | ||
955 | bytes = min(bytes, working_bytes); | ||
956 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
957 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
958 | kunmap_atomic(kaddr, KM_USER0); | ||
959 | flush_dcache_page(page_out); | ||
960 | |||
961 | *pg_offset += bytes; | ||
962 | buf_offset += bytes; | ||
963 | working_bytes -= bytes; | ||
964 | current_buf_start += bytes; | ||
965 | |||
966 | /* check if we need to pick another page */ | ||
967 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
968 | (*page_index)++; | ||
969 | if (*page_index >= vcnt) | ||
970 | return 0; | ||
971 | |||
972 | page_out = bvec[*page_index].bv_page; | ||
973 | *pg_offset = 0; | ||
974 | start_byte = page_offset(page_out) - disk_start; | ||
975 | |||
976 | /* | ||
977 | * make sure our new page is covered by this | ||
978 | * working buffer | ||
979 | */ | ||
980 | if (total_out <= start_byte) | ||
981 | return 1; | ||
982 | |||
983 | /* | ||
984 | * the next page in the biovec might not be adjacent | ||
985 | * to the last page, but it might still be found | ||
986 | * inside this working buffer. bump our offset pointer | ||
987 | */ | ||
988 | if (total_out > start_byte && | ||
989 | current_buf_start < start_byte) { | ||
990 | buf_offset = start_byte - buf_start; | ||
991 | working_bytes = total_out - start_byte; | ||
992 | current_buf_start = buf_start + buf_offset; | ||
993 | } | ||
994 | } | ||
995 | } | ||
996 | |||
997 | return 1; | ||
998 | } | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..51000174b9d7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -19,24 +19,27 @@ | |||
19 | #ifndef __BTRFS_COMPRESSION_ | 19 | #ifndef __BTRFS_COMPRESSION_ |
20 | #define __BTRFS_COMPRESSION_ | 20 | #define __BTRFS_COMPRESSION_ |
21 | 21 | ||
22 | int btrfs_zlib_decompress(unsigned char *data_in, | 22 | int btrfs_init_compress(void); |
23 | struct page *dest_page, | 23 | void btrfs_exit_compress(void); |
24 | unsigned long start_byte, | 24 | |
25 | size_t srclen, size_t destlen); | 25 | int btrfs_compress_pages(int type, struct address_space *mapping, |
26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | 26 | u64 start, unsigned long len, |
27 | u64 start, unsigned long len, | 27 | struct page **pages, |
28 | struct page **pages, | 28 | unsigned long nr_dest_pages, |
29 | unsigned long nr_dest_pages, | 29 | unsigned long *out_pages, |
30 | unsigned long *out_pages, | 30 | unsigned long *total_in, |
31 | unsigned long *total_in, | 31 | unsigned long *total_out, |
32 | unsigned long *total_out, | 32 | unsigned long max_out); |
33 | unsigned long max_out); | 33 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, |
34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | 34 | struct bio_vec *bvec, int vcnt, size_t srclen); |
35 | u64 disk_start, | 35 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, |
36 | struct bio_vec *bvec, | 36 | unsigned long start_byte, size_t srclen, size_t destlen); |
37 | int vcnt, | 37 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, |
38 | size_t srclen); | 38 | unsigned long total_out, u64 disk_start, |
39 | void btrfs_zlib_exit(void); | 39 | struct bio_vec *bvec, int vcnt, |
40 | unsigned long *page_index, | ||
41 | unsigned long *pg_offset); | ||
42 | |||
40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | 43 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, |
41 | unsigned long len, u64 disk_start, | 44 | unsigned long len, u64 disk_start, |
42 | unsigned long compressed_len, | 45 | unsigned long compressed_len, |
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
44 | unsigned long nr_pages); | 47 | unsigned long nr_pages); |
45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 48 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
46 | int mirror_num, unsigned long bio_flags); | 49 | int mirror_num, unsigned long bio_flags); |
50 | |||
51 | struct btrfs_compress_op { | ||
52 | struct list_head *(*alloc_workspace)(void); | ||
53 | |||
54 | void (*free_workspace)(struct list_head *workspace); | ||
55 | |||
56 | int (*compress_pages)(struct list_head *workspace, | ||
57 | struct address_space *mapping, | ||
58 | u64 start, unsigned long len, | ||
59 | struct page **pages, | ||
60 | unsigned long nr_dest_pages, | ||
61 | unsigned long *out_pages, | ||
62 | unsigned long *total_in, | ||
63 | unsigned long *total_out, | ||
64 | unsigned long max_out); | ||
65 | |||
66 | int (*decompress_biovec)(struct list_head *workspace, | ||
67 | struct page **pages_in, | ||
68 | u64 disk_start, | ||
69 | struct bio_vec *bvec, | ||
70 | int vcnt, | ||
71 | size_t srclen); | ||
72 | |||
73 | int (*decompress)(struct list_head *workspace, | ||
74 | unsigned char *data_in, | ||
75 | struct page *dest_page, | ||
76 | unsigned long start_byte, | ||
77 | size_t srclen, size_t destlen); | ||
78 | }; | ||
79 | |||
80 | extern struct btrfs_compress_op btrfs_zlib_compress; | ||
81 | extern struct btrfs_compress_op btrfs_lzo_compress; | ||
82 | |||
47 | #endif | 83 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4bc570a396e..b5baff0dccfe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | ||
20 | #include "ctree.h" | 21 | #include "ctree.h" |
21 | #include "disk-io.h" | 22 | #include "disk-io.h" |
22 | #include "transaction.h" | 23 | #include "transaction.h" |
@@ -104,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
104 | /* this also releases the path */ | 105 | /* this also releases the path */ |
105 | void btrfs_free_path(struct btrfs_path *p) | 106 | void btrfs_free_path(struct btrfs_path *p) |
106 | { | 107 | { |
108 | if (!p) | ||
109 | return; | ||
107 | btrfs_release_path(NULL, p); | 110 | btrfs_release_path(NULL, p); |
108 | kmem_cache_free(btrfs_path_cachep, p); | 111 | kmem_cache_free(btrfs_path_cachep, p); |
109 | } | 112 | } |
@@ -199,7 +202,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
199 | struct extent_buffer **cow_ret, u64 new_root_objectid) | 202 | struct extent_buffer **cow_ret, u64 new_root_objectid) |
200 | { | 203 | { |
201 | struct extent_buffer *cow; | 204 | struct extent_buffer *cow; |
202 | u32 nritems; | ||
203 | int ret = 0; | 205 | int ret = 0; |
204 | int level; | 206 | int level; |
205 | struct btrfs_disk_key disk_key; | 207 | struct btrfs_disk_key disk_key; |
@@ -209,7 +211,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
209 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | 211 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); |
210 | 212 | ||
211 | level = btrfs_header_level(buf); | 213 | level = btrfs_header_level(buf); |
212 | nritems = btrfs_header_nritems(buf); | ||
213 | if (level == 0) | 214 | if (level == 0) |
214 | btrfs_item_key(buf, &disk_key, 0); | 215 | btrfs_item_key(buf, &disk_key, 0); |
215 | else | 216 | else |
@@ -279,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, | |||
279 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | 280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, |
280 | struct btrfs_root *root, | 281 | struct btrfs_root *root, |
281 | struct extent_buffer *buf, | 282 | struct extent_buffer *buf, |
282 | struct extent_buffer *cow) | 283 | struct extent_buffer *cow, |
284 | int *last_ref) | ||
283 | { | 285 | { |
284 | u64 refs; | 286 | u64 refs; |
285 | u64 owner; | 287 | u64 owner; |
@@ -365,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
365 | BUG_ON(ret); | 367 | BUG_ON(ret); |
366 | } | 368 | } |
367 | clean_tree_block(trans, root, buf); | 369 | clean_tree_block(trans, root, buf); |
370 | *last_ref = 1; | ||
368 | } | 371 | } |
369 | return 0; | 372 | return 0; |
370 | } | 373 | } |
@@ -391,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
391 | struct btrfs_disk_key disk_key; | 394 | struct btrfs_disk_key disk_key; |
392 | struct extent_buffer *cow; | 395 | struct extent_buffer *cow; |
393 | int level; | 396 | int level; |
397 | int last_ref = 0; | ||
394 | int unlock_orig = 0; | 398 | int unlock_orig = 0; |
395 | u64 parent_start; | 399 | u64 parent_start; |
396 | 400 | ||
@@ -441,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
441 | (unsigned long)btrfs_header_fsid(cow), | 445 | (unsigned long)btrfs_header_fsid(cow), |
442 | BTRFS_FSID_SIZE); | 446 | BTRFS_FSID_SIZE); |
443 | 447 | ||
444 | update_ref_for_cow(trans, root, buf, cow); | 448 | update_ref_for_cow(trans, root, buf, cow, &last_ref); |
449 | |||
450 | if (root->ref_cows) | ||
451 | btrfs_reloc_cow_block(trans, root, buf, cow); | ||
445 | 452 | ||
446 | if (buf == root->node) { | 453 | if (buf == root->node) { |
447 | WARN_ON(parent && parent != buf); | 454 | WARN_ON(parent && parent != buf); |
@@ -456,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
456 | extent_buffer_get(cow); | 463 | extent_buffer_get(cow); |
457 | spin_unlock(&root->node_lock); | 464 | spin_unlock(&root->node_lock); |
458 | 465 | ||
459 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 466 | btrfs_free_tree_block(trans, root, buf, parent_start, |
460 | parent_start, root->root_key.objectid, level); | 467 | last_ref); |
461 | free_extent_buffer(buf); | 468 | free_extent_buffer(buf); |
462 | add_root_to_dirty_list(root); | 469 | add_root_to_dirty_list(root); |
463 | } else { | 470 | } else { |
@@ -472,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
472 | btrfs_set_node_ptr_generation(parent, parent_slot, | 479 | btrfs_set_node_ptr_generation(parent, parent_slot, |
473 | trans->transid); | 480 | trans->transid); |
474 | btrfs_mark_buffer_dirty(parent); | 481 | btrfs_mark_buffer_dirty(parent); |
475 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 482 | btrfs_free_tree_block(trans, root, buf, parent_start, |
476 | parent_start, root->root_key.objectid, level); | 483 | last_ref); |
477 | } | 484 | } |
478 | if (unlock_orig) | 485 | if (unlock_orig) |
479 | btrfs_tree_unlock(buf); | 486 | btrfs_tree_unlock(buf); |
@@ -948,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
948 | return bin_search(eb, key, level, slot); | 955 | return bin_search(eb, key, level, slot); |
949 | } | 956 | } |
950 | 957 | ||
958 | static void root_add_used(struct btrfs_root *root, u32 size) | ||
959 | { | ||
960 | spin_lock(&root->accounting_lock); | ||
961 | btrfs_set_root_used(&root->root_item, | ||
962 | btrfs_root_used(&root->root_item) + size); | ||
963 | spin_unlock(&root->accounting_lock); | ||
964 | } | ||
965 | |||
966 | static void root_sub_used(struct btrfs_root *root, u32 size) | ||
967 | { | ||
968 | spin_lock(&root->accounting_lock); | ||
969 | btrfs_set_root_used(&root->root_item, | ||
970 | btrfs_root_used(&root->root_item) - size); | ||
971 | spin_unlock(&root->accounting_lock); | ||
972 | } | ||
973 | |||
951 | /* given a node and slot number, this reads the blocks it points to. The | 974 | /* given a node and slot number, this reads the blocks it points to. The |
952 | * extent buffer is returned with a reference taken (but unlocked). | 975 | * extent buffer is returned with a reference taken (but unlocked). |
953 | * NULL is returned on error. | 976 | * NULL is returned on error. |
@@ -985,7 +1008,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
985 | int wret; | 1008 | int wret; |
986 | int pslot; | 1009 | int pslot; |
987 | int orig_slot = path->slots[level]; | 1010 | int orig_slot = path->slots[level]; |
988 | int err_on_enospc = 0; | ||
989 | u64 orig_ptr; | 1011 | u64 orig_ptr; |
990 | 1012 | ||
991 | if (level == 0) | 1013 | if (level == 0) |
@@ -1018,7 +1040,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1018 | btrfs_tree_lock(child); | 1040 | btrfs_tree_lock(child); |
1019 | btrfs_set_lock_blocking(child); | 1041 | btrfs_set_lock_blocking(child); |
1020 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); | 1042 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
1021 | BUG_ON(ret); | 1043 | if (ret) { |
1044 | btrfs_tree_unlock(child); | ||
1045 | free_extent_buffer(child); | ||
1046 | goto enospc; | ||
1047 | } | ||
1022 | 1048 | ||
1023 | spin_lock(&root->node_lock); | 1049 | spin_lock(&root->node_lock); |
1024 | root->node = child; | 1050 | root->node = child; |
@@ -1033,18 +1059,18 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1033 | btrfs_tree_unlock(mid); | 1059 | btrfs_tree_unlock(mid); |
1034 | /* once for the path */ | 1060 | /* once for the path */ |
1035 | free_extent_buffer(mid); | 1061 | free_extent_buffer(mid); |
1036 | ret = btrfs_free_tree_block(trans, root, mid->start, mid->len, | 1062 | |
1037 | 0, root->root_key.objectid, level); | 1063 | root_sub_used(root, mid->len); |
1064 | btrfs_free_tree_block(trans, root, mid, 0, 1); | ||
1038 | /* once for the root ptr */ | 1065 | /* once for the root ptr */ |
1039 | free_extent_buffer(mid); | 1066 | free_extent_buffer(mid); |
1040 | return ret; | 1067 | return 0; |
1041 | } | 1068 | } |
1042 | if (btrfs_header_nritems(mid) > | 1069 | if (btrfs_header_nritems(mid) > |
1043 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1070 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
1044 | return 0; | 1071 | return 0; |
1045 | 1072 | ||
1046 | if (btrfs_header_nritems(mid) < 2) | 1073 | btrfs_header_nritems(mid); |
1047 | err_on_enospc = 1; | ||
1048 | 1074 | ||
1049 | left = read_node_slot(root, parent, pslot - 1); | 1075 | left = read_node_slot(root, parent, pslot - 1); |
1050 | if (left) { | 1076 | if (left) { |
@@ -1075,8 +1101,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1075 | wret = push_node_left(trans, root, left, mid, 1); | 1101 | wret = push_node_left(trans, root, left, mid, 1); |
1076 | if (wret < 0) | 1102 | if (wret < 0) |
1077 | ret = wret; | 1103 | ret = wret; |
1078 | if (btrfs_header_nritems(mid) < 2) | 1104 | btrfs_header_nritems(mid); |
1079 | err_on_enospc = 1; | ||
1080 | } | 1105 | } |
1081 | 1106 | ||
1082 | /* | 1107 | /* |
@@ -1087,23 +1112,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1087 | if (wret < 0 && wret != -ENOSPC) | 1112 | if (wret < 0 && wret != -ENOSPC) |
1088 | ret = wret; | 1113 | ret = wret; |
1089 | if (btrfs_header_nritems(right) == 0) { | 1114 | if (btrfs_header_nritems(right) == 0) { |
1090 | u64 bytenr = right->start; | ||
1091 | u32 blocksize = right->len; | ||
1092 | |||
1093 | clean_tree_block(trans, root, right); | 1115 | clean_tree_block(trans, root, right); |
1094 | btrfs_tree_unlock(right); | 1116 | btrfs_tree_unlock(right); |
1095 | free_extent_buffer(right); | ||
1096 | right = NULL; | ||
1097 | wret = del_ptr(trans, root, path, level + 1, pslot + | 1117 | wret = del_ptr(trans, root, path, level + 1, pslot + |
1098 | 1); | 1118 | 1); |
1099 | if (wret) | 1119 | if (wret) |
1100 | ret = wret; | 1120 | ret = wret; |
1101 | wret = btrfs_free_tree_block(trans, root, | 1121 | root_sub_used(root, right->len); |
1102 | bytenr, blocksize, 0, | 1122 | btrfs_free_tree_block(trans, root, right, 0, 1); |
1103 | root->root_key.objectid, | 1123 | free_extent_buffer(right); |
1104 | level); | 1124 | right = NULL; |
1105 | if (wret) | ||
1106 | ret = wret; | ||
1107 | } else { | 1125 | } else { |
1108 | struct btrfs_disk_key right_key; | 1126 | struct btrfs_disk_key right_key; |
1109 | btrfs_node_key(right, &right_key, 0); | 1127 | btrfs_node_key(right, &right_key, 0); |
@@ -1135,21 +1153,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1135 | BUG_ON(wret == 1); | 1153 | BUG_ON(wret == 1); |
1136 | } | 1154 | } |
1137 | if (btrfs_header_nritems(mid) == 0) { | 1155 | if (btrfs_header_nritems(mid) == 0) { |
1138 | /* we've managed to empty the middle node, drop it */ | ||
1139 | u64 bytenr = mid->start; | ||
1140 | u32 blocksize = mid->len; | ||
1141 | |||
1142 | clean_tree_block(trans, root, mid); | 1156 | clean_tree_block(trans, root, mid); |
1143 | btrfs_tree_unlock(mid); | 1157 | btrfs_tree_unlock(mid); |
1144 | free_extent_buffer(mid); | ||
1145 | mid = NULL; | ||
1146 | wret = del_ptr(trans, root, path, level + 1, pslot); | 1158 | wret = del_ptr(trans, root, path, level + 1, pslot); |
1147 | if (wret) | 1159 | if (wret) |
1148 | ret = wret; | 1160 | ret = wret; |
1149 | wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, | 1161 | root_sub_used(root, mid->len); |
1150 | 0, root->root_key.objectid, level); | 1162 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
1151 | if (wret) | 1163 | free_extent_buffer(mid); |
1152 | ret = wret; | 1164 | mid = NULL; |
1153 | } else { | 1165 | } else { |
1154 | /* update the parent key to reflect our changes */ | 1166 | /* update the parent key to reflect our changes */ |
1155 | struct btrfs_disk_key mid_key; | 1167 | struct btrfs_disk_key mid_key; |
@@ -1209,14 +1221,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1209 | int wret; | 1221 | int wret; |
1210 | int pslot; | 1222 | int pslot; |
1211 | int orig_slot = path->slots[level]; | 1223 | int orig_slot = path->slots[level]; |
1212 | u64 orig_ptr; | ||
1213 | 1224 | ||
1214 | if (level == 0) | 1225 | if (level == 0) |
1215 | return 1; | 1226 | return 1; |
1216 | 1227 | ||
1217 | mid = path->nodes[level]; | 1228 | mid = path->nodes[level]; |
1218 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 1229 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
1219 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | ||
1220 | 1230 | ||
1221 | if (level < BTRFS_MAX_LEVEL - 1) | 1231 | if (level < BTRFS_MAX_LEVEL - 1) |
1222 | parent = path->nodes[level + 1]; | 1232 | parent = path->nodes[level + 1]; |
@@ -1562,13 +1572,33 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1562 | blocksize = btrfs_level_size(root, level - 1); | 1572 | blocksize = btrfs_level_size(root, level - 1); |
1563 | 1573 | ||
1564 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | 1574 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); |
1565 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | 1575 | if (tmp) { |
1566 | /* | 1576 | if (btrfs_buffer_uptodate(tmp, 0)) { |
1567 | * we found an up to date block without sleeping, return | 1577 | if (btrfs_buffer_uptodate(tmp, gen)) { |
1568 | * right away | 1578 | /* |
1569 | */ | 1579 | * we found an up to date block without |
1570 | *eb_ret = tmp; | 1580 | * sleeping, return |
1571 | return 0; | 1581 | * right away |
1582 | */ | ||
1583 | *eb_ret = tmp; | ||
1584 | return 0; | ||
1585 | } | ||
1586 | /* the pages were up to date, but we failed | ||
1587 | * the generation number check. Do a full | ||
1588 | * read for the generation number that is correct. | ||
1589 | * We must do this without dropping locks so | ||
1590 | * we can trust our generation number | ||
1591 | */ | ||
1592 | free_extent_buffer(tmp); | ||
1593 | tmp = read_tree_block(root, blocknr, blocksize, gen); | ||
1594 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1595 | *eb_ret = tmp; | ||
1596 | return 0; | ||
1597 | } | ||
1598 | free_extent_buffer(tmp); | ||
1599 | btrfs_release_path(NULL, p); | ||
1600 | return -EIO; | ||
1601 | } | ||
1572 | } | 1602 | } |
1573 | 1603 | ||
1574 | /* | 1604 | /* |
@@ -1581,15 +1611,14 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1581 | btrfs_unlock_up_safe(p, level + 1); | 1611 | btrfs_unlock_up_safe(p, level + 1); |
1582 | btrfs_set_path_blocking(p); | 1612 | btrfs_set_path_blocking(p); |
1583 | 1613 | ||
1584 | if (tmp) | 1614 | free_extent_buffer(tmp); |
1585 | free_extent_buffer(tmp); | ||
1586 | if (p->reada) | 1615 | if (p->reada) |
1587 | reada_for_search(root, p, level, slot, key->objectid); | 1616 | reada_for_search(root, p, level, slot, key->objectid); |
1588 | 1617 | ||
1589 | btrfs_release_path(NULL, p); | 1618 | btrfs_release_path(NULL, p); |
1590 | 1619 | ||
1591 | ret = -EAGAIN; | 1620 | ret = -EAGAIN; |
1592 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1621 | tmp = read_tree_block(root, blocknr, blocksize, 0); |
1593 | if (tmp) { | 1622 | if (tmp) { |
1594 | /* | 1623 | /* |
1595 | * If the read above didn't mark this buffer up to date, | 1624 | * If the read above didn't mark this buffer up to date, |
@@ -1739,7 +1768,6 @@ again: | |||
1739 | p->nodes[level + 1], | 1768 | p->nodes[level + 1], |
1740 | p->slots[level + 1], &b); | 1769 | p->slots[level + 1], &b); |
1741 | if (err) { | 1770 | if (err) { |
1742 | free_extent_buffer(b); | ||
1743 | ret = err; | 1771 | ret = err; |
1744 | goto done; | 1772 | goto done; |
1745 | } | 1773 | } |
@@ -2075,6 +2103,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2075 | if (IS_ERR(c)) | 2103 | if (IS_ERR(c)) |
2076 | return PTR_ERR(c); | 2104 | return PTR_ERR(c); |
2077 | 2105 | ||
2106 | root_add_used(root, root->nodesize); | ||
2107 | |||
2078 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); | 2108 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); |
2079 | btrfs_set_header_nritems(c, 1); | 2109 | btrfs_set_header_nritems(c, 1); |
2080 | btrfs_set_header_level(c, level); | 2110 | btrfs_set_header_level(c, level); |
@@ -2133,6 +2163,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2133 | int nritems; | 2163 | int nritems; |
2134 | 2164 | ||
2135 | BUG_ON(!path->nodes[level]); | 2165 | BUG_ON(!path->nodes[level]); |
2166 | btrfs_assert_tree_locked(path->nodes[level]); | ||
2136 | lower = path->nodes[level]; | 2167 | lower = path->nodes[level]; |
2137 | nritems = btrfs_header_nritems(lower); | 2168 | nritems = btrfs_header_nritems(lower); |
2138 | BUG_ON(slot > nritems); | 2169 | BUG_ON(slot > nritems); |
@@ -2201,6 +2232,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2201 | if (IS_ERR(split)) | 2232 | if (IS_ERR(split)) |
2202 | return PTR_ERR(split); | 2233 | return PTR_ERR(split); |
2203 | 2234 | ||
2235 | root_add_used(root, root->nodesize); | ||
2236 | |||
2204 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); | 2237 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); |
2205 | btrfs_set_header_level(split, btrfs_header_level(c)); | 2238 | btrfs_set_header_level(split, btrfs_header_level(c)); |
2206 | btrfs_set_header_bytenr(split, split->start); | 2239 | btrfs_set_header_bytenr(split, split->start); |
@@ -2285,12 +2318,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, | |||
2285 | return ret; | 2318 | return ret; |
2286 | } | 2319 | } |
2287 | 2320 | ||
2321 | /* | ||
2322 | * min slot controls the lowest index we're willing to push to the | ||
2323 | * right. We'll push up to and including min_slot, but no lower | ||
2324 | */ | ||
2288 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | 2325 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, |
2289 | struct btrfs_root *root, | 2326 | struct btrfs_root *root, |
2290 | struct btrfs_path *path, | 2327 | struct btrfs_path *path, |
2291 | int data_size, int empty, | 2328 | int data_size, int empty, |
2292 | struct extent_buffer *right, | 2329 | struct extent_buffer *right, |
2293 | int free_space, u32 left_nritems) | 2330 | int free_space, u32 left_nritems, |
2331 | u32 min_slot) | ||
2294 | { | 2332 | { |
2295 | struct extent_buffer *left = path->nodes[0]; | 2333 | struct extent_buffer *left = path->nodes[0]; |
2296 | struct extent_buffer *upper = path->nodes[1]; | 2334 | struct extent_buffer *upper = path->nodes[1]; |
@@ -2308,7 +2346,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2308 | if (empty) | 2346 | if (empty) |
2309 | nr = 0; | 2347 | nr = 0; |
2310 | else | 2348 | else |
2311 | nr = 1; | 2349 | nr = max_t(u32, 1, min_slot); |
2312 | 2350 | ||
2313 | if (path->slots[0] >= left_nritems) | 2351 | if (path->slots[0] >= left_nritems) |
2314 | push_space += data_size; | 2352 | push_space += data_size; |
@@ -2414,6 +2452,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2414 | 2452 | ||
2415 | if (left_nritems) | 2453 | if (left_nritems) |
2416 | btrfs_mark_buffer_dirty(left); | 2454 | btrfs_mark_buffer_dirty(left); |
2455 | else | ||
2456 | clean_tree_block(trans, root, left); | ||
2457 | |||
2417 | btrfs_mark_buffer_dirty(right); | 2458 | btrfs_mark_buffer_dirty(right); |
2418 | 2459 | ||
2419 | btrfs_item_key(right, &disk_key, 0); | 2460 | btrfs_item_key(right, &disk_key, 0); |
@@ -2447,10 +2488,14 @@ out_unlock: | |||
2447 | * | 2488 | * |
2448 | * returns 1 if the push failed because the other node didn't have enough | 2489 | * returns 1 if the push failed because the other node didn't have enough |
2449 | * room, 0 if everything worked out and < 0 if there were major errors. | 2490 | * room, 0 if everything worked out and < 0 if there were major errors. |
2491 | * | ||
2492 | * this will push starting from min_slot to the end of the leaf. It won't | ||
2493 | * push any slot lower than min_slot | ||
2450 | */ | 2494 | */ |
2451 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | 2495 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root |
2452 | *root, struct btrfs_path *path, int data_size, | 2496 | *root, struct btrfs_path *path, |
2453 | int empty) | 2497 | int min_data_size, int data_size, |
2498 | int empty, u32 min_slot) | ||
2454 | { | 2499 | { |
2455 | struct extent_buffer *left = path->nodes[0]; | 2500 | struct extent_buffer *left = path->nodes[0]; |
2456 | struct extent_buffer *right; | 2501 | struct extent_buffer *right; |
@@ -2471,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2471 | btrfs_assert_tree_locked(path->nodes[1]); | 2516 | btrfs_assert_tree_locked(path->nodes[1]); |
2472 | 2517 | ||
2473 | right = read_node_slot(root, upper, slot + 1); | 2518 | right = read_node_slot(root, upper, slot + 1); |
2519 | if (right == NULL) | ||
2520 | return 1; | ||
2521 | |||
2474 | btrfs_tree_lock(right); | 2522 | btrfs_tree_lock(right); |
2475 | btrfs_set_lock_blocking(right); | 2523 | btrfs_set_lock_blocking(right); |
2476 | 2524 | ||
@@ -2492,8 +2540,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2492 | if (left_nritems == 0) | 2540 | if (left_nritems == 0) |
2493 | goto out_unlock; | 2541 | goto out_unlock; |
2494 | 2542 | ||
2495 | return __push_leaf_right(trans, root, path, data_size, empty, | 2543 | return __push_leaf_right(trans, root, path, min_data_size, empty, |
2496 | right, free_space, left_nritems); | 2544 | right, free_space, left_nritems, min_slot); |
2497 | out_unlock: | 2545 | out_unlock: |
2498 | btrfs_tree_unlock(right); | 2546 | btrfs_tree_unlock(right); |
2499 | free_extent_buffer(right); | 2547 | free_extent_buffer(right); |
@@ -2503,16 +2551,20 @@ out_unlock: | |||
2503 | /* | 2551 | /* |
2504 | * push some data in the path leaf to the left, trying to free up at | 2552 | * push some data in the path leaf to the left, trying to free up at |
2505 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2553 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
2554 | * | ||
2555 | * max_slot can put a limit on how far into the leaf we'll push items. The | ||
2556 | * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the | ||
2557 | * items | ||
2506 | */ | 2558 | */ |
2507 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | 2559 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, |
2508 | struct btrfs_root *root, | 2560 | struct btrfs_root *root, |
2509 | struct btrfs_path *path, int data_size, | 2561 | struct btrfs_path *path, int data_size, |
2510 | int empty, struct extent_buffer *left, | 2562 | int empty, struct extent_buffer *left, |
2511 | int free_space, int right_nritems) | 2563 | int free_space, u32 right_nritems, |
2564 | u32 max_slot) | ||
2512 | { | 2565 | { |
2513 | struct btrfs_disk_key disk_key; | 2566 | struct btrfs_disk_key disk_key; |
2514 | struct extent_buffer *right = path->nodes[0]; | 2567 | struct extent_buffer *right = path->nodes[0]; |
2515 | int slot; | ||
2516 | int i; | 2568 | int i; |
2517 | int push_space = 0; | 2569 | int push_space = 0; |
2518 | int push_items = 0; | 2570 | int push_items = 0; |
@@ -2524,12 +2576,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2524 | u32 this_item_size; | 2576 | u32 this_item_size; |
2525 | u32 old_left_item_size; | 2577 | u32 old_left_item_size; |
2526 | 2578 | ||
2527 | slot = path->slots[1]; | ||
2528 | |||
2529 | if (empty) | 2579 | if (empty) |
2530 | nr = right_nritems; | 2580 | nr = min(right_nritems, max_slot); |
2531 | else | 2581 | else |
2532 | nr = right_nritems - 1; | 2582 | nr = min(right_nritems - 1, max_slot); |
2533 | 2583 | ||
2534 | for (i = 0; i < nr; i++) { | 2584 | for (i = 0; i < nr; i++) { |
2535 | item = btrfs_item_nr(right, i); | 2585 | item = btrfs_item_nr(right, i); |
@@ -2659,6 +2709,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2659 | btrfs_mark_buffer_dirty(left); | 2709 | btrfs_mark_buffer_dirty(left); |
2660 | if (right_nritems) | 2710 | if (right_nritems) |
2661 | btrfs_mark_buffer_dirty(right); | 2711 | btrfs_mark_buffer_dirty(right); |
2712 | else | ||
2713 | clean_tree_block(trans, root, right); | ||
2662 | 2714 | ||
2663 | btrfs_item_key(right, &disk_key, 0); | 2715 | btrfs_item_key(right, &disk_key, 0); |
2664 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); | 2716 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); |
@@ -2668,8 +2720,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2668 | /* then fixup the leaf pointer in the path */ | 2720 | /* then fixup the leaf pointer in the path */ |
2669 | if (path->slots[0] < push_items) { | 2721 | if (path->slots[0] < push_items) { |
2670 | path->slots[0] += old_left_nritems; | 2722 | path->slots[0] += old_left_nritems; |
2671 | if (btrfs_header_nritems(path->nodes[0]) == 0) | ||
2672 | clean_tree_block(trans, root, path->nodes[0]); | ||
2673 | btrfs_tree_unlock(path->nodes[0]); | 2723 | btrfs_tree_unlock(path->nodes[0]); |
2674 | free_extent_buffer(path->nodes[0]); | 2724 | free_extent_buffer(path->nodes[0]); |
2675 | path->nodes[0] = left; | 2725 | path->nodes[0] = left; |
@@ -2690,10 +2740,14 @@ out: | |||
2690 | /* | 2740 | /* |
2691 | * push some data in the path leaf to the left, trying to free up at | 2741 | * push some data in the path leaf to the left, trying to free up at |
2692 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2742 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
2743 | * | ||
2744 | * max_slot can put a limit on how far into the leaf we'll push items. The | ||
2745 | * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the | ||
2746 | * items | ||
2693 | */ | 2747 | */ |
2694 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | 2748 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root |
2695 | *root, struct btrfs_path *path, int data_size, | 2749 | *root, struct btrfs_path *path, int min_data_size, |
2696 | int empty) | 2750 | int data_size, int empty, u32 max_slot) |
2697 | { | 2751 | { |
2698 | struct extent_buffer *right = path->nodes[0]; | 2752 | struct extent_buffer *right = path->nodes[0]; |
2699 | struct extent_buffer *left; | 2753 | struct extent_buffer *left; |
@@ -2715,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2715 | btrfs_assert_tree_locked(path->nodes[1]); | 2769 | btrfs_assert_tree_locked(path->nodes[1]); |
2716 | 2770 | ||
2717 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2771 | left = read_node_slot(root, path->nodes[1], slot - 1); |
2772 | if (left == NULL) | ||
2773 | return 1; | ||
2774 | |||
2718 | btrfs_tree_lock(left); | 2775 | btrfs_tree_lock(left); |
2719 | btrfs_set_lock_blocking(left); | 2776 | btrfs_set_lock_blocking(left); |
2720 | 2777 | ||
@@ -2739,8 +2796,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2739 | goto out; | 2796 | goto out; |
2740 | } | 2797 | } |
2741 | 2798 | ||
2742 | return __push_leaf_left(trans, root, path, data_size, | 2799 | return __push_leaf_left(trans, root, path, min_data_size, |
2743 | empty, left, free_space, right_nritems); | 2800 | empty, left, free_space, right_nritems, |
2801 | max_slot); | ||
2744 | out: | 2802 | out: |
2745 | btrfs_tree_unlock(left); | 2803 | btrfs_tree_unlock(left); |
2746 | free_extent_buffer(left); | 2804 | free_extent_buffer(left); |
@@ -2833,6 +2891,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, | |||
2833 | } | 2891 | } |
2834 | 2892 | ||
2835 | /* | 2893 | /* |
2894 | * double splits happen when we need to insert a big item in the middle | ||
2895 | * of a leaf. A double split can leave us with 3 mostly empty leaves: | ||
2896 | * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] | ||
2897 | * A B C | ||
2898 | * | ||
2899 | * We avoid this by trying to push the items on either side of our target | ||
2900 | * into the adjacent leaves. If all goes well we can avoid the double split | ||
2901 | * completely. | ||
2902 | */ | ||
2903 | static noinline int push_for_double_split(struct btrfs_trans_handle *trans, | ||
2904 | struct btrfs_root *root, | ||
2905 | struct btrfs_path *path, | ||
2906 | int data_size) | ||
2907 | { | ||
2908 | int ret; | ||
2909 | int progress = 0; | ||
2910 | int slot; | ||
2911 | u32 nritems; | ||
2912 | |||
2913 | slot = path->slots[0]; | ||
2914 | |||
2915 | /* | ||
2916 | * try to push all the items after our slot into the | ||
2917 | * right leaf | ||
2918 | */ | ||
2919 | ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); | ||
2920 | if (ret < 0) | ||
2921 | return ret; | ||
2922 | |||
2923 | if (ret == 0) | ||
2924 | progress++; | ||
2925 | |||
2926 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
2927 | /* | ||
2928 | * our goal is to get our slot at the start or end of a leaf. If | ||
2929 | * we've done so we're done | ||
2930 | */ | ||
2931 | if (path->slots[0] == 0 || path->slots[0] == nritems) | ||
2932 | return 0; | ||
2933 | |||
2934 | if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) | ||
2935 | return 0; | ||
2936 | |||
2937 | /* try to push all the items before our slot into the next leaf */ | ||
2938 | slot = path->slots[0]; | ||
2939 | ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); | ||
2940 | if (ret < 0) | ||
2941 | return ret; | ||
2942 | |||
2943 | if (ret == 0) | ||
2944 | progress++; | ||
2945 | |||
2946 | if (progress) | ||
2947 | return 0; | ||
2948 | return 1; | ||
2949 | } | ||
2950 | |||
2951 | /* | ||
2836 | * split the path's leaf in two, making sure there is at least data_size | 2952 | * split the path's leaf in two, making sure there is at least data_size |
2837 | * available for the resulting leaf level of the path. | 2953 | * available for the resulting leaf level of the path. |
2838 | * | 2954 | * |
@@ -2854,6 +2970,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2854 | int wret; | 2970 | int wret; |
2855 | int split; | 2971 | int split; |
2856 | int num_doubles = 0; | 2972 | int num_doubles = 0; |
2973 | int tried_avoid_double = 0; | ||
2857 | 2974 | ||
2858 | l = path->nodes[0]; | 2975 | l = path->nodes[0]; |
2859 | slot = path->slots[0]; | 2976 | slot = path->slots[0]; |
@@ -2862,12 +2979,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2862 | return -EOVERFLOW; | 2979 | return -EOVERFLOW; |
2863 | 2980 | ||
2864 | /* first try to make some room by pushing left and right */ | 2981 | /* first try to make some room by pushing left and right */ |
2865 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2982 | if (data_size) { |
2866 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2983 | wret = push_leaf_right(trans, root, path, data_size, |
2984 | data_size, 0, 0); | ||
2867 | if (wret < 0) | 2985 | if (wret < 0) |
2868 | return wret; | 2986 | return wret; |
2869 | if (wret) { | 2987 | if (wret) { |
2870 | wret = push_leaf_left(trans, root, path, data_size, 0); | 2988 | wret = push_leaf_left(trans, root, path, data_size, |
2989 | data_size, 0, (u32)-1); | ||
2871 | if (wret < 0) | 2990 | if (wret < 0) |
2872 | return wret; | 2991 | return wret; |
2873 | } | 2992 | } |
@@ -2901,6 +3020,8 @@ again: | |||
2901 | if (mid != nritems && | 3020 | if (mid != nritems && |
2902 | leaf_space_used(l, mid, nritems - mid) + | 3021 | leaf_space_used(l, mid, nritems - mid) + |
2903 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | 3022 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { |
3023 | if (data_size && !tried_avoid_double) | ||
3024 | goto push_for_double; | ||
2904 | split = 2; | 3025 | split = 2; |
2905 | } | 3026 | } |
2906 | } | 3027 | } |
@@ -2917,6 +3038,8 @@ again: | |||
2917 | if (mid != nritems && | 3038 | if (mid != nritems && |
2918 | leaf_space_used(l, mid, nritems - mid) + | 3039 | leaf_space_used(l, mid, nritems - mid) + |
2919 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | 3040 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { |
3041 | if (data_size && !tried_avoid_double) | ||
3042 | goto push_for_double; | ||
2920 | split = 2 ; | 3043 | split = 2 ; |
2921 | } | 3044 | } |
2922 | } | 3045 | } |
@@ -2931,10 +3054,10 @@ again: | |||
2931 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 3054 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
2932 | root->root_key.objectid, | 3055 | root->root_key.objectid, |
2933 | &disk_key, 0, l->start, 0); | 3056 | &disk_key, 0, l->start, 0); |
2934 | if (IS_ERR(right)) { | 3057 | if (IS_ERR(right)) |
2935 | BUG_ON(1); | ||
2936 | return PTR_ERR(right); | 3058 | return PTR_ERR(right); |
2937 | } | 3059 | |
3060 | root_add_used(root, root->leafsize); | ||
2938 | 3061 | ||
2939 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 3062 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
2940 | btrfs_set_header_bytenr(right, right->start); | 3063 | btrfs_set_header_bytenr(right, right->start); |
@@ -2997,6 +3120,13 @@ again: | |||
2997 | } | 3120 | } |
2998 | 3121 | ||
2999 | return ret; | 3122 | return ret; |
3123 | |||
3124 | push_for_double: | ||
3125 | push_for_double_split(trans, root, path, data_size); | ||
3126 | tried_avoid_double = 1; | ||
3127 | if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) | ||
3128 | return 0; | ||
3129 | goto again; | ||
3000 | } | 3130 | } |
3001 | 3131 | ||
3002 | static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | 3132 | static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, |
@@ -3040,6 +3170,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
3040 | if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) | 3170 | if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) |
3041 | goto err; | 3171 | goto err; |
3042 | 3172 | ||
3173 | /* the leaf has changed, it now has room. return now */ | ||
3174 | if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) | ||
3175 | goto err; | ||
3176 | |||
3043 | if (key.type == BTRFS_EXTENT_DATA_KEY) { | 3177 | if (key.type == BTRFS_EXTENT_DATA_KEY) { |
3044 | fi = btrfs_item_ptr(leaf, path->slots[0], | 3178 | fi = btrfs_item_ptr(leaf, path->slots[0], |
3045 | struct btrfs_file_extent_item); | 3179 | struct btrfs_file_extent_item); |
@@ -3049,7 +3183,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
3049 | 3183 | ||
3050 | btrfs_set_path_blocking(path); | 3184 | btrfs_set_path_blocking(path); |
3051 | ret = split_leaf(trans, root, &key, path, ins_len, 1); | 3185 | ret = split_leaf(trans, root, &key, path, ins_len, 1); |
3052 | BUG_ON(ret); | 3186 | if (ret) |
3187 | goto err; | ||
3053 | 3188 | ||
3054 | path->keep_locks = 0; | 3189 | path->keep_locks = 0; |
3055 | btrfs_unlock_up_safe(path, 1); | 3190 | btrfs_unlock_up_safe(path, 1); |
@@ -3212,7 +3347,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3212 | { | 3347 | { |
3213 | int ret = 0; | 3348 | int ret = 0; |
3214 | int slot; | 3349 | int slot; |
3215 | int slot_orig; | ||
3216 | struct extent_buffer *leaf; | 3350 | struct extent_buffer *leaf; |
3217 | struct btrfs_item *item; | 3351 | struct btrfs_item *item; |
3218 | u32 nritems; | 3352 | u32 nritems; |
@@ -3222,7 +3356,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3222 | unsigned int size_diff; | 3356 | unsigned int size_diff; |
3223 | int i; | 3357 | int i; |
3224 | 3358 | ||
3225 | slot_orig = path->slots[0]; | ||
3226 | leaf = path->nodes[0]; | 3359 | leaf = path->nodes[0]; |
3227 | slot = path->slots[0]; | 3360 | slot = path->slots[0]; |
3228 | 3361 | ||
@@ -3327,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3327 | { | 3460 | { |
3328 | int ret = 0; | 3461 | int ret = 0; |
3329 | int slot; | 3462 | int slot; |
3330 | int slot_orig; | ||
3331 | struct extent_buffer *leaf; | 3463 | struct extent_buffer *leaf; |
3332 | struct btrfs_item *item; | 3464 | struct btrfs_item *item; |
3333 | u32 nritems; | 3465 | u32 nritems; |
@@ -3336,7 +3468,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3336 | unsigned int old_size; | 3468 | unsigned int old_size; |
3337 | int i; | 3469 | int i; |
3338 | 3470 | ||
3339 | slot_orig = path->slots[0]; | ||
3340 | leaf = path->nodes[0]; | 3471 | leaf = path->nodes[0]; |
3341 | 3472 | ||
3342 | nritems = btrfs_header_nritems(leaf); | 3473 | nritems = btrfs_header_nritems(leaf); |
@@ -3669,7 +3800,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3669 | struct btrfs_key *cpu_key, u32 *data_size, | 3800 | struct btrfs_key *cpu_key, u32 *data_size, |
3670 | int nr) | 3801 | int nr) |
3671 | { | 3802 | { |
3672 | struct extent_buffer *leaf; | ||
3673 | int ret = 0; | 3803 | int ret = 0; |
3674 | int slot; | 3804 | int slot; |
3675 | int i; | 3805 | int i; |
@@ -3686,7 +3816,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3686 | if (ret < 0) | 3816 | if (ret < 0) |
3687 | goto out; | 3817 | goto out; |
3688 | 3818 | ||
3689 | leaf = path->nodes[0]; | ||
3690 | slot = path->slots[0]; | 3819 | slot = path->slots[0]; |
3691 | BUG_ON(slot < 0); | 3820 | BUG_ON(slot < 0); |
3692 | 3821 | ||
@@ -3791,9 +3920,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3791 | */ | 3920 | */ |
3792 | btrfs_unlock_up_safe(path, 0); | 3921 | btrfs_unlock_up_safe(path, 0); |
3793 | 3922 | ||
3794 | ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, | 3923 | root_sub_used(root, leaf->len); |
3795 | 0, root->root_key.objectid, 0); | 3924 | |
3796 | return ret; | 3925 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
3926 | return 0; | ||
3797 | } | 3927 | } |
3798 | /* | 3928 | /* |
3799 | * delete the item at the leaf level in path. If that empties | 3929 | * delete the item at the leaf level in path. If that empties |
@@ -3860,6 +3990,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3860 | if (leaf == root->node) { | 3990 | if (leaf == root->node) { |
3861 | btrfs_set_header_level(leaf, 0); | 3991 | btrfs_set_header_level(leaf, 0); |
3862 | } else { | 3992 | } else { |
3993 | btrfs_set_path_blocking(path); | ||
3994 | clean_tree_block(trans, root, leaf); | ||
3863 | ret = btrfs_del_leaf(trans, root, path, leaf); | 3995 | ret = btrfs_del_leaf(trans, root, path, leaf); |
3864 | BUG_ON(ret); | 3996 | BUG_ON(ret); |
3865 | } | 3997 | } |
@@ -3885,13 +4017,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3885 | extent_buffer_get(leaf); | 4017 | extent_buffer_get(leaf); |
3886 | 4018 | ||
3887 | btrfs_set_path_blocking(path); | 4019 | btrfs_set_path_blocking(path); |
3888 | wret = push_leaf_left(trans, root, path, 1, 1); | 4020 | wret = push_leaf_left(trans, root, path, 1, 1, |
4021 | 1, (u32)-1); | ||
3889 | if (wret < 0 && wret != -ENOSPC) | 4022 | if (wret < 0 && wret != -ENOSPC) |
3890 | ret = wret; | 4023 | ret = wret; |
3891 | 4024 | ||
3892 | if (path->nodes[0] == leaf && | 4025 | if (path->nodes[0] == leaf && |
3893 | btrfs_header_nritems(leaf)) { | 4026 | btrfs_header_nritems(leaf)) { |
3894 | wret = push_leaf_right(trans, root, path, 1, 1); | 4027 | wret = push_leaf_right(trans, root, path, 1, |
4028 | 1, 1, 0); | ||
3895 | if (wret < 0 && wret != -ENOSPC) | 4029 | if (wret < 0 && wret != -ENOSPC) |
3896 | ret = wret; | 4030 | ret = wret; |
3897 | } | 4031 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2aa8ec6a0981..2c98b3af6052 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/completion.h> | 26 | #include <linux/completion.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
29 | #include <linux/slab.h> | ||
30 | #include <linux/kobject.h> | ||
29 | #include <asm/kmap_types.h> | 31 | #include <asm/kmap_types.h> |
30 | #include "extent_io.h" | 32 | #include "extent_io.h" |
31 | #include "extent_map.h" | 33 | #include "extent_map.h" |
@@ -33,6 +35,7 @@ | |||
33 | 35 | ||
34 | struct btrfs_trans_handle; | 36 | struct btrfs_trans_handle; |
35 | struct btrfs_transaction; | 37 | struct btrfs_transaction; |
38 | struct btrfs_pending_snapshot; | ||
36 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 39 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
37 | extern struct kmem_cache *btrfs_transaction_cachep; | 40 | extern struct kmem_cache *btrfs_transaction_cachep; |
38 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 41 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
@@ -97,6 +100,9 @@ struct btrfs_ordered_sum; | |||
97 | */ | 100 | */ |
98 | #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL | 101 | #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL |
99 | 102 | ||
103 | /* For storing free space cache */ | ||
104 | #define BTRFS_FREE_SPACE_OBJECTID -11ULL | ||
105 | |||
100 | /* dummy objectid represents multiple objectids */ | 106 | /* dummy objectid represents multiple objectids */ |
101 | #define BTRFS_MULTIPLE_OBJECTIDS -255ULL | 107 | #define BTRFS_MULTIPLE_OBJECTIDS -255ULL |
102 | 108 | ||
@@ -263,6 +269,22 @@ struct btrfs_chunk { | |||
263 | /* additional stripes go here */ | 269 | /* additional stripes go here */ |
264 | } __attribute__ ((__packed__)); | 270 | } __attribute__ ((__packed__)); |
265 | 271 | ||
272 | #define BTRFS_FREE_SPACE_EXTENT 1 | ||
273 | #define BTRFS_FREE_SPACE_BITMAP 2 | ||
274 | |||
275 | struct btrfs_free_space_entry { | ||
276 | __le64 offset; | ||
277 | __le64 bytes; | ||
278 | u8 type; | ||
279 | } __attribute__ ((__packed__)); | ||
280 | |||
281 | struct btrfs_free_space_header { | ||
282 | struct btrfs_disk_key location; | ||
283 | __le64 generation; | ||
284 | __le64 num_entries; | ||
285 | __le64 num_bitmaps; | ||
286 | } __attribute__ ((__packed__)); | ||
287 | |||
266 | static inline unsigned long btrfs_chunk_item_size(int num_stripes) | 288 | static inline unsigned long btrfs_chunk_item_size(int num_stripes) |
267 | { | 289 | { |
268 | BUG_ON(num_stripes == 0); | 290 | BUG_ON(num_stripes == 0); |
@@ -273,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
273 | #define BTRFS_FSID_SIZE 16 | 295 | #define BTRFS_FSID_SIZE 16 |
274 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) | 296 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) |
275 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | 297 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) |
298 | |||
299 | /* | ||
300 | * File system states | ||
301 | */ | ||
302 | |||
303 | /* Errors detected */ | ||
304 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | ||
305 | |||
276 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | 306 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) |
277 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | 307 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) |
278 | 308 | ||
@@ -363,8 +393,10 @@ struct btrfs_super_block { | |||
363 | 393 | ||
364 | char label[BTRFS_LABEL_SIZE]; | 394 | char label[BTRFS_LABEL_SIZE]; |
365 | 395 | ||
396 | __le64 cache_generation; | ||
397 | |||
366 | /* future expansion */ | 398 | /* future expansion */ |
367 | __le64 reserved[32]; | 399 | __le64 reserved[31]; |
368 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; | 400 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; |
369 | } __attribute__ ((__packed__)); | 401 | } __attribute__ ((__packed__)); |
370 | 402 | ||
@@ -373,11 +405,17 @@ struct btrfs_super_block { | |||
373 | * ones specified below then we will fail to mount | 405 | * ones specified below then we will fail to mount |
374 | */ | 406 | */ |
375 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) | 407 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) |
408 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) | ||
409 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) | ||
410 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) | ||
376 | 411 | ||
377 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 412 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
378 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 413 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
379 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 414 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
380 | BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | 415 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ |
416 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | ||
417 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ | ||
418 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | ||
381 | 419 | ||
382 | /* | 420 | /* |
383 | * A leaf is full of items. offset and size tell us where to find | 421 | * A leaf is full of items. offset and size tell us where to find |
@@ -524,9 +562,11 @@ struct btrfs_timespec { | |||
524 | } __attribute__ ((__packed__)); | 562 | } __attribute__ ((__packed__)); |
525 | 563 | ||
526 | enum btrfs_compression_type { | 564 | enum btrfs_compression_type { |
527 | BTRFS_COMPRESS_NONE = 0, | 565 | BTRFS_COMPRESS_NONE = 0, |
528 | BTRFS_COMPRESS_ZLIB = 1, | 566 | BTRFS_COMPRESS_ZLIB = 1, |
529 | BTRFS_COMPRESS_LAST = 2, | 567 | BTRFS_COMPRESS_LZO = 2, |
568 | BTRFS_COMPRESS_TYPES = 2, | ||
569 | BTRFS_COMPRESS_LAST = 3, | ||
530 | }; | 570 | }; |
531 | 571 | ||
532 | struct btrfs_inode_item { | 572 | struct btrfs_inode_item { |
@@ -570,6 +610,8 @@ struct btrfs_dir_item { | |||
570 | u8 type; | 610 | u8 type; |
571 | } __attribute__ ((__packed__)); | 611 | } __attribute__ ((__packed__)); |
572 | 612 | ||
613 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | ||
614 | |||
573 | struct btrfs_root_item { | 615 | struct btrfs_root_item { |
574 | struct btrfs_inode_item inode; | 616 | struct btrfs_inode_item inode; |
575 | __le64 generation; | 617 | __le64 generation; |
@@ -660,6 +702,7 @@ struct btrfs_csum_item { | |||
660 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | 702 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) |
661 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) | 703 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) |
662 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) | 704 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) |
705 | #define BTRFS_NR_RAID_TYPES 5 | ||
663 | 706 | ||
664 | struct btrfs_block_group_item { | 707 | struct btrfs_block_group_item { |
665 | __le64 used; | 708 | __le64 used; |
@@ -670,43 +713,50 @@ struct btrfs_block_group_item { | |||
670 | struct btrfs_space_info { | 713 | struct btrfs_space_info { |
671 | u64 flags; | 714 | u64 flags; |
672 | 715 | ||
673 | u64 total_bytes; /* total bytes in the space */ | 716 | u64 total_bytes; /* total bytes in the space, |
674 | u64 bytes_used; /* total bytes used on disk */ | 717 | this doesn't take mirrors into account */ |
718 | u64 bytes_used; /* total bytes used, | ||
719 | this does't take mirrors into account */ | ||
675 | u64 bytes_pinned; /* total bytes pinned, will be freed when the | 720 | u64 bytes_pinned; /* total bytes pinned, will be freed when the |
676 | transaction finishes */ | 721 | transaction finishes */ |
677 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 722 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
678 | current allocations */ | 723 | current allocations */ |
679 | u64 bytes_readonly; /* total bytes that are read only */ | 724 | u64 bytes_readonly; /* total bytes that are read only */ |
680 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 725 | |
681 | u64 bytes_root; /* the number of bytes needed to commit a | ||
682 | transaction */ | ||
683 | u64 bytes_may_use; /* number of bytes that may be used for | 726 | u64 bytes_may_use; /* number of bytes that may be used for |
684 | delalloc/allocations */ | 727 | delalloc/allocations */ |
685 | u64 bytes_delalloc; /* number of bytes currently reserved for | 728 | u64 disk_used; /* total bytes used on disk */ |
686 | delayed allocation */ | 729 | u64 disk_total; /* total bytes on disk, takes mirrors into |
730 | account */ | ||
687 | 731 | ||
688 | int full; /* indicates that we cannot allocate any more | 732 | int full; /* indicates that we cannot allocate any more |
689 | chunks for this space */ | 733 | chunks for this space */ |
690 | int force_alloc; /* set if we need to force a chunk alloc for | 734 | int force_alloc; /* set if we need to force a chunk alloc for |
691 | this space */ | 735 | this space */ |
692 | int force_delalloc; /* make people start doing filemap_flush until | ||
693 | we're under a threshold */ | ||
694 | 736 | ||
695 | struct list_head list; | 737 | struct list_head list; |
696 | 738 | ||
697 | /* for controlling how we free up space for allocations */ | ||
698 | wait_queue_head_t allocate_wait; | ||
699 | wait_queue_head_t flush_wait; | ||
700 | int allocating_chunk; | ||
701 | int flushing; | ||
702 | |||
703 | /* for block groups in our same type */ | 739 | /* for block groups in our same type */ |
704 | struct list_head block_groups; | 740 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
705 | spinlock_t lock; | 741 | spinlock_t lock; |
706 | struct rw_semaphore groups_sem; | 742 | struct rw_semaphore groups_sem; |
707 | atomic_t caching_threads; | 743 | atomic_t caching_threads; |
708 | }; | 744 | }; |
709 | 745 | ||
746 | struct btrfs_block_rsv { | ||
747 | u64 size; | ||
748 | u64 reserved; | ||
749 | u64 freed[2]; | ||
750 | struct btrfs_space_info *space_info; | ||
751 | struct list_head list; | ||
752 | spinlock_t lock; | ||
753 | atomic_t usage; | ||
754 | unsigned int priority:8; | ||
755 | unsigned int durable:1; | ||
756 | unsigned int refill_used:1; | ||
757 | unsigned int full:1; | ||
758 | }; | ||
759 | |||
710 | /* | 760 | /* |
711 | * free clusters are used to claim free space in relatively large chunks, | 761 | * free clusters are used to claim free space in relatively large chunks, |
712 | * allowing us to do less seeky writes. They are used for all metadata | 762 | * allowing us to do less seeky writes. They are used for all metadata |
@@ -741,6 +791,14 @@ enum btrfs_caching_type { | |||
741 | BTRFS_CACHE_FINISHED = 2, | 791 | BTRFS_CACHE_FINISHED = 2, |
742 | }; | 792 | }; |
743 | 793 | ||
794 | enum btrfs_disk_cache_state { | ||
795 | BTRFS_DC_WRITTEN = 0, | ||
796 | BTRFS_DC_ERROR = 1, | ||
797 | BTRFS_DC_CLEAR = 2, | ||
798 | BTRFS_DC_SETUP = 3, | ||
799 | BTRFS_DC_NEED_WRITE = 4, | ||
800 | }; | ||
801 | |||
744 | struct btrfs_caching_control { | 802 | struct btrfs_caching_control { |
745 | struct list_head list; | 803 | struct list_head list; |
746 | struct mutex mutex; | 804 | struct mutex mutex; |
@@ -754,17 +812,22 @@ struct btrfs_block_group_cache { | |||
754 | struct btrfs_key key; | 812 | struct btrfs_key key; |
755 | struct btrfs_block_group_item item; | 813 | struct btrfs_block_group_item item; |
756 | struct btrfs_fs_info *fs_info; | 814 | struct btrfs_fs_info *fs_info; |
815 | struct inode *inode; | ||
757 | spinlock_t lock; | 816 | spinlock_t lock; |
758 | u64 pinned; | 817 | u64 pinned; |
759 | u64 reserved; | 818 | u64 reserved; |
819 | u64 reserved_pinned; | ||
760 | u64 bytes_super; | 820 | u64 bytes_super; |
761 | u64 flags; | 821 | u64 flags; |
762 | u64 sectorsize; | 822 | u64 sectorsize; |
763 | int extents_thresh; | 823 | int extents_thresh; |
764 | int free_extents; | 824 | int free_extents; |
765 | int total_bitmaps; | 825 | int total_bitmaps; |
766 | int ro; | 826 | unsigned int ro:1; |
767 | int dirty; | 827 | unsigned int dirty:1; |
828 | unsigned int iref:1; | ||
829 | |||
830 | int disk_cache_state; | ||
768 | 831 | ||
769 | /* cache tracking stuff */ | 832 | /* cache tracking stuff */ |
770 | int cached; | 833 | int cached; |
@@ -822,6 +885,22 @@ struct btrfs_fs_info { | |||
822 | /* logical->physical extent mapping */ | 885 | /* logical->physical extent mapping */ |
823 | struct btrfs_mapping_tree mapping_tree; | 886 | struct btrfs_mapping_tree mapping_tree; |
824 | 887 | ||
888 | /* block reservation for extent, checksum and root tree */ | ||
889 | struct btrfs_block_rsv global_block_rsv; | ||
890 | /* block reservation for delay allocation */ | ||
891 | struct btrfs_block_rsv delalloc_block_rsv; | ||
892 | /* block reservation for metadata operations */ | ||
893 | struct btrfs_block_rsv trans_block_rsv; | ||
894 | /* block reservation for chunk tree */ | ||
895 | struct btrfs_block_rsv chunk_block_rsv; | ||
896 | |||
897 | struct btrfs_block_rsv empty_block_rsv; | ||
898 | |||
899 | /* list of block reservations that cross multiple transactions */ | ||
900 | struct list_head durable_block_rsv_list; | ||
901 | |||
902 | struct mutex durable_block_rsv_mutex; | ||
903 | |||
825 | u64 generation; | 904 | u64 generation; |
826 | u64 last_trans_committed; | 905 | u64 last_trans_committed; |
827 | 906 | ||
@@ -831,13 +910,14 @@ struct btrfs_fs_info { | |||
831 | */ | 910 | */ |
832 | u64 last_trans_log_full_commit; | 911 | u64 last_trans_log_full_commit; |
833 | u64 open_ioctl_trans; | 912 | u64 open_ioctl_trans; |
834 | unsigned long mount_opt; | 913 | unsigned long mount_opt:20; |
835 | u64 max_extent; | 914 | unsigned long compress_type:4; |
836 | u64 max_inline; | 915 | u64 max_inline; |
837 | u64 alloc_start; | 916 | u64 alloc_start; |
838 | struct btrfs_transaction *running_transaction; | 917 | struct btrfs_transaction *running_transaction; |
839 | wait_queue_head_t transaction_throttle; | 918 | wait_queue_head_t transaction_throttle; |
840 | wait_queue_head_t transaction_wait; | 919 | wait_queue_head_t transaction_wait; |
920 | wait_queue_head_t transaction_blocked_wait; | ||
841 | wait_queue_head_t async_submit_wait; | 921 | wait_queue_head_t async_submit_wait; |
842 | 922 | ||
843 | struct btrfs_super_block super_copy; | 923 | struct btrfs_super_block super_copy; |
@@ -924,8 +1004,8 @@ struct btrfs_fs_info { | |||
924 | struct btrfs_workers endio_meta_workers; | 1004 | struct btrfs_workers endio_meta_workers; |
925 | struct btrfs_workers endio_meta_write_workers; | 1005 | struct btrfs_workers endio_meta_write_workers; |
926 | struct btrfs_workers endio_write_workers; | 1006 | struct btrfs_workers endio_write_workers; |
1007 | struct btrfs_workers endio_freespace_worker; | ||
927 | struct btrfs_workers submit_workers; | 1008 | struct btrfs_workers submit_workers; |
928 | struct btrfs_workers enospc_workers; | ||
929 | /* | 1009 | /* |
930 | * fixup workers take dirty pages that didn't properly go through | 1010 | * fixup workers take dirty pages that didn't properly go through |
931 | * the cow mechanism and make them safe to write. It happens | 1011 | * the cow mechanism and make them safe to write. It happens |
@@ -941,6 +1021,7 @@ struct btrfs_fs_info { | |||
941 | int do_barriers; | 1021 | int do_barriers; |
942 | int closing; | 1022 | int closing; |
943 | int log_root_recovering; | 1023 | int log_root_recovering; |
1024 | int enospc_unlink; | ||
944 | 1025 | ||
945 | u64 total_pinned; | 1026 | u64 total_pinned; |
946 | 1027 | ||
@@ -985,6 +1066,9 @@ struct btrfs_fs_info { | |||
985 | unsigned metadata_ratio; | 1066 | unsigned metadata_ratio; |
986 | 1067 | ||
987 | void *bdev_holder; | 1068 | void *bdev_holder; |
1069 | |||
1070 | /* filesystem state */ | ||
1071 | u64 fs_state; | ||
988 | }; | 1072 | }; |
989 | 1073 | ||
990 | /* | 1074 | /* |
@@ -1010,6 +1094,9 @@ struct btrfs_root { | |||
1010 | struct completion kobj_unregister; | 1094 | struct completion kobj_unregister; |
1011 | struct mutex objectid_mutex; | 1095 | struct mutex objectid_mutex; |
1012 | 1096 | ||
1097 | spinlock_t accounting_lock; | ||
1098 | struct btrfs_block_rsv *block_rsv; | ||
1099 | |||
1013 | struct mutex log_mutex; | 1100 | struct mutex log_mutex; |
1014 | wait_queue_head_t log_writer_wait; | 1101 | wait_queue_head_t log_writer_wait; |
1015 | wait_queue_head_t log_commit_wait[2]; | 1102 | wait_queue_head_t log_commit_wait[2]; |
@@ -1041,7 +1128,6 @@ struct btrfs_root { | |||
1041 | int ref_cows; | 1128 | int ref_cows; |
1042 | int track_dirty; | 1129 | int track_dirty; |
1043 | int in_radix; | 1130 | int in_radix; |
1044 | int clean_orphans; | ||
1045 | 1131 | ||
1046 | u64 defrag_trans_start; | 1132 | u64 defrag_trans_start; |
1047 | struct btrfs_key defrag_progress; | 1133 | struct btrfs_key defrag_progress; |
@@ -1055,8 +1141,11 @@ struct btrfs_root { | |||
1055 | 1141 | ||
1056 | struct list_head root_list; | 1142 | struct list_head root_list; |
1057 | 1143 | ||
1058 | spinlock_t list_lock; | 1144 | spinlock_t orphan_lock; |
1059 | struct list_head orphan_list; | 1145 | struct list_head orphan_list; |
1146 | struct btrfs_block_rsv *orphan_block_rsv; | ||
1147 | int orphan_item_inserted; | ||
1148 | int orphan_cleanup_state; | ||
1060 | 1149 | ||
1061 | spinlock_t inode_lock; | 1150 | spinlock_t inode_lock; |
1062 | /* red-black tree that keeps track of in-memory inodes */ | 1151 | /* red-black tree that keeps track of in-memory inodes */ |
@@ -1162,6 +1251,9 @@ struct btrfs_root { | |||
1162 | #define BTRFS_MOUNT_NOSSD (1 << 9) | 1251 | #define BTRFS_MOUNT_NOSSD (1 << 9) |
1163 | #define BTRFS_MOUNT_DISCARD (1 << 10) | 1252 | #define BTRFS_MOUNT_DISCARD (1 << 10) |
1164 | #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) | 1253 | #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) |
1254 | #define BTRFS_MOUNT_SPACE_CACHE (1 << 12) | ||
1255 | #define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) | ||
1256 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) | ||
1165 | 1257 | ||
1166 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1258 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1167 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1259 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1182,7 +1274,6 @@ struct btrfs_root { | |||
1182 | #define BTRFS_INODE_NOATIME (1 << 9) | 1274 | #define BTRFS_INODE_NOATIME (1 << 9) |
1183 | #define BTRFS_INODE_DIRSYNC (1 << 10) | 1275 | #define BTRFS_INODE_DIRSYNC (1 << 10) |
1184 | 1276 | ||
1185 | |||
1186 | /* some macros to generate set/get funcs for the struct fields. This | 1277 | /* some macros to generate set/get funcs for the struct fields. This |
1187 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple | 1278 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple |
1188 | * one for u8: | 1279 | * one for u8: |
@@ -1636,6 +1727,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, | |||
1636 | write_eb_member(eb, item, struct btrfs_dir_item, location, key); | 1727 | write_eb_member(eb, item, struct btrfs_dir_item, location, key); |
1637 | } | 1728 | } |
1638 | 1729 | ||
1730 | BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, | ||
1731 | num_entries, 64); | ||
1732 | BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, | ||
1733 | num_bitmaps, 64); | ||
1734 | BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, | ||
1735 | generation, 64); | ||
1736 | |||
1737 | static inline void btrfs_free_space_key(struct extent_buffer *eb, | ||
1738 | struct btrfs_free_space_header *h, | ||
1739 | struct btrfs_disk_key *key) | ||
1740 | { | ||
1741 | read_eb_member(eb, h, struct btrfs_free_space_header, location, key); | ||
1742 | } | ||
1743 | |||
1744 | static inline void btrfs_set_free_space_key(struct extent_buffer *eb, | ||
1745 | struct btrfs_free_space_header *h, | ||
1746 | struct btrfs_disk_key *key) | ||
1747 | { | ||
1748 | write_eb_member(eb, h, struct btrfs_free_space_header, location, key); | ||
1749 | } | ||
1750 | |||
1639 | /* struct btrfs_disk_key */ | 1751 | /* struct btrfs_disk_key */ |
1640 | BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, | 1752 | BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, |
1641 | objectid, 64); | 1753 | objectid, 64); |
@@ -1800,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | |||
1800 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, | 1912 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, |
1801 | last_snapshot, 64); | 1913 | last_snapshot, 64); |
1802 | 1914 | ||
1915 | static inline bool btrfs_root_readonly(struct btrfs_root *root) | ||
1916 | { | ||
1917 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | ||
1918 | } | ||
1919 | |||
1803 | /* struct btrfs_super_block */ | 1920 | /* struct btrfs_super_block */ |
1804 | 1921 | ||
1805 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 1922 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
@@ -1842,11 +1959,13 @@ BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, | |||
1842 | BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, | 1959 | BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, |
1843 | compat_flags, 64); | 1960 | compat_flags, 64); |
1844 | BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, | 1961 | BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, |
1845 | compat_flags, 64); | 1962 | compat_ro_flags, 64); |
1846 | BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, | 1963 | BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, |
1847 | incompat_flags, 64); | 1964 | incompat_flags, 64); |
1848 | BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, | 1965 | BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, |
1849 | csum_type, 16); | 1966 | csum_type, 16); |
1967 | BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, | ||
1968 | cache_generation, 64); | ||
1850 | 1969 | ||
1851 | static inline int btrfs_super_csum_size(struct btrfs_super_block *s) | 1970 | static inline int btrfs_super_csum_size(struct btrfs_super_block *s) |
1852 | { | 1971 | { |
@@ -1959,11 +2078,20 @@ static inline struct dentry *fdentry(struct file *file) | |||
1959 | return file->f_path.dentry; | 2078 | return file->f_path.dentry; |
1960 | } | 2079 | } |
1961 | 2080 | ||
2081 | static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | ||
2082 | { | ||
2083 | return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && | ||
2084 | (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); | ||
2085 | } | ||
2086 | |||
1962 | /* extent-tree.c */ | 2087 | /* extent-tree.c */ |
1963 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 2088 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
1964 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 2089 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1965 | struct btrfs_root *root, unsigned long count); | 2090 | struct btrfs_root *root, unsigned long count); |
1966 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 2091 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
2092 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
2093 | struct btrfs_root *root, u64 bytenr, | ||
2094 | u64 num_bytes, u64 *refs, u64 *flags); | ||
1967 | int btrfs_pin_extent(struct btrfs_root *root, | 2095 | int btrfs_pin_extent(struct btrfs_root *root, |
1968 | u64 bytenr, u64 num, int reserved); | 2096 | u64 bytenr, u64 num, int reserved); |
1969 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 2097 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
@@ -1983,10 +2111,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
1983 | u64 parent, u64 root_objectid, | 2111 | u64 parent, u64 root_objectid, |
1984 | struct btrfs_disk_key *key, int level, | 2112 | struct btrfs_disk_key *key, int level, |
1985 | u64 hint, u64 empty_size); | 2113 | u64 hint, u64 empty_size); |
1986 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2114 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
1987 | struct btrfs_root *root, | 2115 | struct btrfs_root *root, |
1988 | u64 bytenr, u32 blocksize, | 2116 | struct extent_buffer *buf, |
1989 | u64 parent, u64 root_objectid, int level); | 2117 | u64 parent, int last_ref); |
1990 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2118 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
1991 | struct btrfs_root *root, | 2119 | struct btrfs_root *root, |
1992 | u64 bytenr, u32 blocksize, | 2120 | u64 bytenr, u32 blocksize, |
@@ -2040,27 +2168,57 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2040 | u64 size); | 2168 | u64 size); |
2041 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2169 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2042 | struct btrfs_root *root, u64 group_start); | 2170 | struct btrfs_root *root, u64 group_start); |
2043 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | ||
2044 | struct btrfs_block_group_cache *group); | ||
2045 | |||
2046 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2171 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2172 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | ||
2047 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2173 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2048 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2174 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2175 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | ||
2176 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | ||
2177 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2178 | struct btrfs_root *root, | ||
2179 | int num_items); | ||
2180 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | ||
2181 | struct btrfs_root *root); | ||
2182 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2183 | struct inode *inode); | ||
2184 | void btrfs_orphan_release_metadata(struct inode *inode); | ||
2185 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2186 | struct btrfs_pending_snapshot *pending); | ||
2187 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); | ||
2188 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); | ||
2189 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); | ||
2190 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | ||
2191 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | ||
2192 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | ||
2193 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
2194 | struct btrfs_block_rsv *rsv); | ||
2195 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
2196 | struct btrfs_block_rsv *rsv); | ||
2197 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
2198 | struct btrfs_root *root, | ||
2199 | struct btrfs_block_rsv *block_rsv, | ||
2200 | u64 num_bytes); | ||
2201 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
2202 | struct btrfs_root *root, | ||
2203 | struct btrfs_block_rsv *block_rsv, | ||
2204 | u64 min_reserved, int min_factor); | ||
2205 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
2206 | struct btrfs_block_rsv *dst_rsv, | ||
2207 | u64 num_bytes); | ||
2208 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
2209 | struct btrfs_block_rsv *block_rsv, | ||
2210 | u64 num_bytes); | ||
2211 | int btrfs_set_block_group_ro(struct btrfs_root *root, | ||
2212 | struct btrfs_block_group_cache *cache); | ||
2213 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
2214 | struct btrfs_block_group_cache *cache); | ||
2215 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); | ||
2216 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | ||
2217 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | ||
2218 | u64 start, u64 end); | ||
2219 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
2220 | u64 num_bytes); | ||
2049 | 2221 | ||
2050 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); | ||
2051 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | ||
2052 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2053 | struct inode *inode, int num_items); | ||
2054 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2055 | struct inode *inode, int num_items); | ||
2056 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | ||
2057 | u64 bytes); | ||
2058 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | ||
2059 | struct inode *inode, u64 bytes); | ||
2060 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
2061 | u64 bytes); | ||
2062 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
2063 | u64 bytes); | ||
2064 | /* ctree.c */ | 2222 | /* ctree.c */ |
2065 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2223 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2066 | int level, int *slot); | 2224 | int level, int *slot); |
@@ -2151,7 +2309,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2151 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2309 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2152 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2310 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2153 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2311 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2154 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); | 2312 | int btrfs_drop_snapshot(struct btrfs_root *root, |
2313 | struct btrfs_block_rsv *block_rsv, int update_ref); | ||
2155 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2314 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2156 | struct btrfs_root *root, | 2315 | struct btrfs_root *root, |
2157 | struct extent_buffer *node, | 2316 | struct extent_buffer *node, |
@@ -2244,6 +2403,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
2244 | struct btrfs_root *root, | 2403 | struct btrfs_root *root, |
2245 | const char *name, int name_len, | 2404 | const char *name, int name_len, |
2246 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 2405 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
2406 | struct btrfs_inode_ref * | ||
2407 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
2408 | struct btrfs_root *root, | ||
2409 | struct btrfs_path *path, | ||
2410 | const char *name, int name_len, | ||
2411 | u64 inode_objectid, u64 ref_objectid, int mod); | ||
2247 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 2412 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
2248 | struct btrfs_root *root, | 2413 | struct btrfs_root *root, |
2249 | struct btrfs_path *path, u64 objectid); | 2414 | struct btrfs_path *path, u64 objectid); |
@@ -2256,6 +2421,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
2256 | struct btrfs_root *root, u64 bytenr, u64 len); | 2421 | struct btrfs_root *root, u64 bytenr, u64 len); |
2257 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 2422 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
2258 | struct bio *bio, u32 *dst); | 2423 | struct bio *bio, u32 *dst); |
2424 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
2425 | struct bio *bio, u64 logical_offset, u32 *dst); | ||
2259 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 2426 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
2260 | struct btrfs_root *root, | 2427 | struct btrfs_root *root, |
2261 | u64 objectid, u64 pos, | 2428 | u64 objectid, u64 pos, |
@@ -2310,7 +2477,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2310 | u32 min_type); | 2477 | u32 min_type); |
2311 | 2478 | ||
2312 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2479 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
2313 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); | 2480 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput, |
2481 | int sync); | ||
2482 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | ||
2483 | struct extent_state **cached_state); | ||
2314 | int btrfs_writepages(struct address_space *mapping, | 2484 | int btrfs_writepages(struct address_space *mapping, |
2315 | struct writeback_control *wbc); | 2485 | struct writeback_control *wbc); |
2316 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 2486 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
@@ -2324,18 +2494,18 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
2324 | pgoff_t offset, pgoff_t last_index); | 2494 | pgoff_t offset, pgoff_t last_index); |
2325 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2495 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2326 | int btrfs_readpage(struct file *file, struct page *page); | 2496 | int btrfs_readpage(struct file *file, struct page *page); |
2327 | void btrfs_delete_inode(struct inode *inode); | 2497 | void btrfs_evict_inode(struct inode *inode); |
2328 | void btrfs_put_inode(struct inode *inode); | 2498 | void btrfs_put_inode(struct inode *inode); |
2329 | int btrfs_write_inode(struct inode *inode, int wait); | 2499 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); |
2330 | void btrfs_dirty_inode(struct inode *inode); | 2500 | void btrfs_dirty_inode(struct inode *inode); |
2331 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2501 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
2332 | void btrfs_destroy_inode(struct inode *inode); | 2502 | void btrfs_destroy_inode(struct inode *inode); |
2333 | void btrfs_drop_inode(struct inode *inode); | 2503 | int btrfs_drop_inode(struct inode *inode); |
2334 | int btrfs_init_cachep(void); | 2504 | int btrfs_init_cachep(void); |
2335 | void btrfs_destroy_cachep(void); | 2505 | void btrfs_destroy_cachep(void); |
2336 | long btrfs_ioctl_trans_end(struct file *file); | 2506 | long btrfs_ioctl_trans_end(struct file *file); |
2337 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | 2507 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, |
2338 | struct btrfs_root *root); | 2508 | struct btrfs_root *root, int *was_new); |
2339 | int btrfs_commit_write(struct file *file, struct page *page, | 2509 | int btrfs_commit_write(struct file *file, struct page *page, |
2340 | unsigned from, unsigned to); | 2510 | unsigned from, unsigned to); |
2341 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | 2511 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, |
@@ -2347,10 +2517,24 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2347 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2517 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2348 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2518 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2349 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2519 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
2520 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2521 | struct btrfs_pending_snapshot *pending, | ||
2522 | u64 *bytes_to_reserve); | ||
2523 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2524 | struct btrfs_pending_snapshot *pending); | ||
2525 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
2526 | struct btrfs_root *root); | ||
2350 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2527 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
2351 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2528 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2352 | void btrfs_add_delayed_iput(struct inode *inode); | 2529 | void btrfs_add_delayed_iput(struct inode *inode); |
2353 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2530 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
2531 | int btrfs_prealloc_file_range(struct inode *inode, int mode, | ||
2532 | u64 start, u64 num_bytes, u64 min_size, | ||
2533 | loff_t actual_len, u64 *alloc_hint); | ||
2534 | int btrfs_prealloc_file_range_trans(struct inode *inode, | ||
2535 | struct btrfs_trans_handle *trans, int mode, | ||
2536 | u64 start, u64 num_bytes, u64 min_size, | ||
2537 | loff_t actual_len, u64 *alloc_hint); | ||
2354 | extern const struct dentry_operations btrfs_dentry_operations; | 2538 | extern const struct dentry_operations btrfs_dentry_operations; |
2355 | 2539 | ||
2356 | /* ioctl.c */ | 2540 | /* ioctl.c */ |
@@ -2359,7 +2543,7 @@ void btrfs_update_iflags(struct inode *inode); | |||
2359 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | 2543 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); |
2360 | 2544 | ||
2361 | /* file.c */ | 2545 | /* file.c */ |
2362 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | 2546 | int btrfs_sync_file(struct file *file, int datasync); |
2363 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2547 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
2364 | int skip_pinned); | 2548 | int skip_pinned); |
2365 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | 2549 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); |
@@ -2386,13 +2570,20 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *root); | |||
2386 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | 2570 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); |
2387 | 2571 | ||
2388 | /* super.c */ | 2572 | /* super.c */ |
2389 | u64 btrfs_parse_size(char *str); | ||
2390 | int btrfs_parse_options(struct btrfs_root *root, char *options); | 2573 | int btrfs_parse_options(struct btrfs_root *root, char *options); |
2391 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2574 | int btrfs_sync_fs(struct super_block *sb, int wait); |
2575 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
2576 | unsigned int line, int errno); | ||
2577 | |||
2578 | #define btrfs_std_error(fs_info, errno) \ | ||
2579 | do { \ | ||
2580 | if ((errno)) \ | ||
2581 | __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ | ||
2582 | } while (0) | ||
2392 | 2583 | ||
2393 | /* acl.c */ | 2584 | /* acl.c */ |
2394 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2585 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
2395 | int btrfs_check_acl(struct inode *inode, int mask); | 2586 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); |
2396 | #else | 2587 | #else |
2397 | #define btrfs_check_acl NULL | 2588 | #define btrfs_check_acl NULL |
2398 | #endif | 2589 | #endif |
@@ -2408,4 +2599,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
2408 | struct btrfs_root *root); | 2599 | struct btrfs_root *root); |
2409 | int btrfs_recover_relocation(struct btrfs_root *root); | 2600 | int btrfs_recover_relocation(struct btrfs_root *root); |
2410 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | 2601 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); |
2602 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
2603 | struct btrfs_root *root, struct extent_buffer *buf, | ||
2604 | struct extent_buffer *cow); | ||
2605 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2606 | struct btrfs_pending_snapshot *pending, | ||
2607 | u64 *bytes_to_reserve); | ||
2608 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
2609 | struct btrfs_pending_snapshot *pending); | ||
2411 | #endif | 2610 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 84e6781413b1..e807b143b857 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | ||
20 | #include <linux/sort.h> | 21 | #include <linux/sort.h> |
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "delayed-ref.h" | 23 | #include "delayed-ref.h" |
@@ -318,107 +319,6 @@ out: | |||
318 | } | 319 | } |
319 | 320 | ||
320 | /* | 321 | /* |
321 | * helper function to lookup reference count and flags of extent. | ||
322 | * | ||
323 | * the head node for delayed ref is used to store the sum of all the | ||
324 | * reference count modifications queued up in the rbtree. the head | ||
325 | * node may also store the extent flags to set. This way you can check | ||
326 | * to see what the reference count and extent flags would be if all of | ||
327 | * the delayed refs are not processed. | ||
328 | */ | ||
329 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
330 | struct btrfs_root *root, u64 bytenr, | ||
331 | u64 num_bytes, u64 *refs, u64 *flags) | ||
332 | { | ||
333 | struct btrfs_delayed_ref_node *ref; | ||
334 | struct btrfs_delayed_ref_head *head; | ||
335 | struct btrfs_delayed_ref_root *delayed_refs; | ||
336 | struct btrfs_path *path; | ||
337 | struct btrfs_extent_item *ei; | ||
338 | struct extent_buffer *leaf; | ||
339 | struct btrfs_key key; | ||
340 | u32 item_size; | ||
341 | u64 num_refs; | ||
342 | u64 extent_flags; | ||
343 | int ret; | ||
344 | |||
345 | path = btrfs_alloc_path(); | ||
346 | if (!path) | ||
347 | return -ENOMEM; | ||
348 | |||
349 | key.objectid = bytenr; | ||
350 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
351 | key.offset = num_bytes; | ||
352 | delayed_refs = &trans->transaction->delayed_refs; | ||
353 | again: | ||
354 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
355 | &key, path, 0, 0); | ||
356 | if (ret < 0) | ||
357 | goto out; | ||
358 | |||
359 | if (ret == 0) { | ||
360 | leaf = path->nodes[0]; | ||
361 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
362 | if (item_size >= sizeof(*ei)) { | ||
363 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
364 | struct btrfs_extent_item); | ||
365 | num_refs = btrfs_extent_refs(leaf, ei); | ||
366 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
367 | } else { | ||
368 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
369 | struct btrfs_extent_item_v0 *ei0; | ||
370 | BUG_ON(item_size != sizeof(*ei0)); | ||
371 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
372 | struct btrfs_extent_item_v0); | ||
373 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
374 | /* FIXME: this isn't correct for data */ | ||
375 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
376 | #else | ||
377 | BUG(); | ||
378 | #endif | ||
379 | } | ||
380 | BUG_ON(num_refs == 0); | ||
381 | } else { | ||
382 | num_refs = 0; | ||
383 | extent_flags = 0; | ||
384 | ret = 0; | ||
385 | } | ||
386 | |||
387 | spin_lock(&delayed_refs->lock); | ||
388 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); | ||
389 | if (ref) { | ||
390 | head = btrfs_delayed_node_to_head(ref); | ||
391 | if (!mutex_trylock(&head->mutex)) { | ||
392 | atomic_inc(&ref->refs); | ||
393 | spin_unlock(&delayed_refs->lock); | ||
394 | |||
395 | btrfs_release_path(root->fs_info->extent_root, path); | ||
396 | |||
397 | mutex_lock(&head->mutex); | ||
398 | mutex_unlock(&head->mutex); | ||
399 | btrfs_put_delayed_ref(ref); | ||
400 | goto again; | ||
401 | } | ||
402 | if (head->extent_op && head->extent_op->update_flags) | ||
403 | extent_flags |= head->extent_op->flags_to_set; | ||
404 | else | ||
405 | BUG_ON(num_refs == 0); | ||
406 | |||
407 | num_refs += ref->ref_mod; | ||
408 | mutex_unlock(&head->mutex); | ||
409 | } | ||
410 | WARN_ON(num_refs == 0); | ||
411 | if (refs) | ||
412 | *refs = num_refs; | ||
413 | if (flags) | ||
414 | *flags = extent_flags; | ||
415 | out: | ||
416 | spin_unlock(&delayed_refs->lock); | ||
417 | btrfs_free_path(path); | ||
418 | return ret; | ||
419 | } | ||
420 | |||
421 | /* | ||
422 | * helper function to update an extent delayed ref in the | 322 | * helper function to update an extent delayed ref in the |
423 | * rbtree. existing and update must both have the same | 323 | * rbtree. existing and update must both have the same |
424 | * bytenr and parent | 324 | * bytenr and parent |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f6fc67ddad36..50e3cf92fbda 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
167 | struct btrfs_delayed_ref_head * | 167 | struct btrfs_delayed_ref_head * |
168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); |
170 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
171 | struct btrfs_root *root, u64 bytenr, | ||
172 | u64 num_bytes, u64 *refs, u64 *flags); | ||
173 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | 170 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, |
174 | u64 bytenr, u64 num_bytes, u64 orig_parent, | 171 | u64 bytenr, u64 num_bytes, u64 orig_parent, |
175 | u64 parent, u64 orig_ref_root, u64 ref_root, | 172 | u64 parent, u64 orig_ref_root, u64 ref_root, |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index e9103b3baa49..f0cad5ae5be7 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | |||
427 | ret = btrfs_truncate_item(trans, root, path, | 427 | ret = btrfs_truncate_item(trans, root, path, |
428 | item_len - sub_item_len, 1); | 428 | item_len - sub_item_len, 1); |
429 | } | 429 | } |
430 | return 0; | 430 | return ret; |
431 | } | 431 | } |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87b25543d7d1..b531c36455d8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -27,6 +27,8 @@ | |||
27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
28 | #include <linux/freezer.h> | 28 | #include <linux/freezer.h> |
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | ||
31 | #include <linux/migrate.h> | ||
30 | #include "compat.h" | 32 | #include "compat.h" |
31 | #include "ctree.h" | 33 | #include "ctree.h" |
32 | #include "disk-io.h" | 34 | #include "disk-io.h" |
@@ -42,8 +44,20 @@ | |||
42 | static struct extent_io_ops btree_extent_io_ops; | 44 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 45 | static void end_workqueue_fn(struct btrfs_work *work); |
44 | static void free_fs_root(struct btrfs_root *root); | 46 | static void free_fs_root(struct btrfs_root *root); |
45 | 47 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |
46 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); | 48 | int read_only); |
49 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | ||
50 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | ||
51 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
52 | struct btrfs_root *root); | ||
53 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | ||
54 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | ||
55 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
56 | struct extent_io_tree *dirty_pages, | ||
57 | int mark); | ||
58 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
59 | struct extent_io_tree *pinned_extents); | ||
60 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
47 | 61 | ||
48 | /* | 62 | /* |
49 | * end_io_wq structs are used to do processing in task context when an IO is | 63 | * end_io_wq structs are used to do processing in task context when an IO is |
@@ -75,6 +89,11 @@ struct async_submit_bio { | |||
75 | int rw; | 89 | int rw; |
76 | int mirror_num; | 90 | int mirror_num; |
77 | unsigned long bio_flags; | 91 | unsigned long bio_flags; |
92 | /* | ||
93 | * bio_offset is optional, can be used if the pages in the bio | ||
94 | * can't tell us where in the file the bio should go | ||
95 | */ | ||
96 | u64 bio_offset; | ||
78 | struct btrfs_work work; | 97 | struct btrfs_work work; |
79 | }; | 98 | }; |
80 | 99 | ||
@@ -263,13 +282,15 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
263 | static int verify_parent_transid(struct extent_io_tree *io_tree, | 282 | static int verify_parent_transid(struct extent_io_tree *io_tree, |
264 | struct extent_buffer *eb, u64 parent_transid) | 283 | struct extent_buffer *eb, u64 parent_transid) |
265 | { | 284 | { |
285 | struct extent_state *cached_state = NULL; | ||
266 | int ret; | 286 | int ret; |
267 | 287 | ||
268 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | 288 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) |
269 | return 0; | 289 | return 0; |
270 | 290 | ||
271 | lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); | 291 | lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, |
272 | if (extent_buffer_uptodate(io_tree, eb) && | 292 | 0, &cached_state, GFP_NOFS); |
293 | if (extent_buffer_uptodate(io_tree, eb, cached_state) && | ||
273 | btrfs_header_generation(eb) == parent_transid) { | 294 | btrfs_header_generation(eb) == parent_transid) { |
274 | ret = 0; | 295 | ret = 0; |
275 | goto out; | 296 | goto out; |
@@ -282,10 +303,10 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
282 | (unsigned long long)btrfs_header_generation(eb)); | 303 | (unsigned long long)btrfs_header_generation(eb)); |
283 | } | 304 | } |
284 | ret = 1; | 305 | ret = 1; |
285 | clear_extent_buffer_uptodate(io_tree, eb); | 306 | clear_extent_buffer_uptodate(io_tree, eb, &cached_state); |
286 | out: | 307 | out: |
287 | unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, | 308 | unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, |
288 | GFP_NOFS); | 309 | &cached_state, GFP_NOFS); |
289 | return ret; | 310 | return ret; |
290 | } | 311 | } |
291 | 312 | ||
@@ -332,7 +353,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
332 | struct extent_io_tree *tree; | 353 | struct extent_io_tree *tree; |
333 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 354 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
334 | u64 found_start; | 355 | u64 found_start; |
335 | int found_level; | ||
336 | unsigned long len; | 356 | unsigned long len; |
337 | struct extent_buffer *eb; | 357 | struct extent_buffer *eb; |
338 | int ret; | 358 | int ret; |
@@ -347,9 +367,15 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
347 | WARN_ON(len == 0); | 367 | WARN_ON(len == 0); |
348 | 368 | ||
349 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 369 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
370 | if (eb == NULL) { | ||
371 | WARN_ON(1); | ||
372 | goto out; | ||
373 | } | ||
350 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 374 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
351 | btrfs_header_generation(eb)); | 375 | btrfs_header_generation(eb)); |
352 | BUG_ON(ret); | 376 | BUG_ON(ret); |
377 | WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); | ||
378 | |||
353 | found_start = btrfs_header_bytenr(eb); | 379 | found_start = btrfs_header_bytenr(eb); |
354 | if (found_start != start) { | 380 | if (found_start != start) { |
355 | WARN_ON(1); | 381 | WARN_ON(1); |
@@ -363,8 +389,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
363 | WARN_ON(1); | 389 | WARN_ON(1); |
364 | goto err; | 390 | goto err; |
365 | } | 391 | } |
366 | found_level = btrfs_header_level(eb); | ||
367 | |||
368 | csum_tree_block(root, eb, 0); | 392 | csum_tree_block(root, eb, 0); |
369 | err: | 393 | err: |
370 | free_extent_buffer(eb); | 394 | free_extent_buffer(eb); |
@@ -421,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
421 | WARN_ON(len == 0); | 445 | WARN_ON(len == 0); |
422 | 446 | ||
423 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 447 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
448 | if (eb == NULL) { | ||
449 | ret = -EIO; | ||
450 | goto out; | ||
451 | } | ||
424 | 452 | ||
425 | found_start = btrfs_header_bytenr(eb); | 453 | found_start = btrfs_header_bytenr(eb); |
426 | if (found_start != start) { | 454 | if (found_start != start) { |
@@ -474,10 +502,13 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
474 | end_io_wq->work.func = end_workqueue_fn; | 502 | end_io_wq->work.func = end_workqueue_fn; |
475 | end_io_wq->work.flags = 0; | 503 | end_io_wq->work.flags = 0; |
476 | 504 | ||
477 | if (bio->bi_rw & (1 << BIO_RW)) { | 505 | if (bio->bi_rw & REQ_WRITE) { |
478 | if (end_io_wq->metadata) | 506 | if (end_io_wq->metadata == 1) |
479 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 507 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, |
480 | &end_io_wq->work); | 508 | &end_io_wq->work); |
509 | else if (end_io_wq->metadata == 2) | ||
510 | btrfs_queue_worker(&fs_info->endio_freespace_worker, | ||
511 | &end_io_wq->work); | ||
481 | else | 512 | else |
482 | btrfs_queue_worker(&fs_info->endio_write_workers, | 513 | btrfs_queue_worker(&fs_info->endio_write_workers, |
483 | &end_io_wq->work); | 514 | &end_io_wq->work); |
@@ -491,6 +522,13 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
491 | } | 522 | } |
492 | } | 523 | } |
493 | 524 | ||
525 | /* | ||
526 | * For the metadata arg you want | ||
527 | * | ||
528 | * 0 - if data | ||
529 | * 1 - if normal metadta | ||
530 | * 2 - if writing to the free space cache area | ||
531 | */ | ||
494 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 532 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
495 | int metadata) | 533 | int metadata) |
496 | { | 534 | { |
@@ -527,13 +565,12 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) | |||
527 | 565 | ||
528 | static void run_one_async_start(struct btrfs_work *work) | 566 | static void run_one_async_start(struct btrfs_work *work) |
529 | { | 567 | { |
530 | struct btrfs_fs_info *fs_info; | ||
531 | struct async_submit_bio *async; | 568 | struct async_submit_bio *async; |
532 | 569 | ||
533 | async = container_of(work, struct async_submit_bio, work); | 570 | async = container_of(work, struct async_submit_bio, work); |
534 | fs_info = BTRFS_I(async->inode)->root->fs_info; | ||
535 | async->submit_bio_start(async->inode, async->rw, async->bio, | 571 | async->submit_bio_start(async->inode, async->rw, async->bio, |
536 | async->mirror_num, async->bio_flags); | 572 | async->mirror_num, async->bio_flags, |
573 | async->bio_offset); | ||
537 | } | 574 | } |
538 | 575 | ||
539 | static void run_one_async_done(struct btrfs_work *work) | 576 | static void run_one_async_done(struct btrfs_work *work) |
@@ -555,7 +592,8 @@ static void run_one_async_done(struct btrfs_work *work) | |||
555 | wake_up(&fs_info->async_submit_wait); | 592 | wake_up(&fs_info->async_submit_wait); |
556 | 593 | ||
557 | async->submit_bio_done(async->inode, async->rw, async->bio, | 594 | async->submit_bio_done(async->inode, async->rw, async->bio, |
558 | async->mirror_num, async->bio_flags); | 595 | async->mirror_num, async->bio_flags, |
596 | async->bio_offset); | ||
559 | } | 597 | } |
560 | 598 | ||
561 | static void run_one_async_free(struct btrfs_work *work) | 599 | static void run_one_async_free(struct btrfs_work *work) |
@@ -569,6 +607,7 @@ static void run_one_async_free(struct btrfs_work *work) | |||
569 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 607 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
570 | int rw, struct bio *bio, int mirror_num, | 608 | int rw, struct bio *bio, int mirror_num, |
571 | unsigned long bio_flags, | 609 | unsigned long bio_flags, |
610 | u64 bio_offset, | ||
572 | extent_submit_bio_hook_t *submit_bio_start, | 611 | extent_submit_bio_hook_t *submit_bio_start, |
573 | extent_submit_bio_hook_t *submit_bio_done) | 612 | extent_submit_bio_hook_t *submit_bio_done) |
574 | { | 613 | { |
@@ -591,10 +630,11 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
591 | 630 | ||
592 | async->work.flags = 0; | 631 | async->work.flags = 0; |
593 | async->bio_flags = bio_flags; | 632 | async->bio_flags = bio_flags; |
633 | async->bio_offset = bio_offset; | ||
594 | 634 | ||
595 | atomic_inc(&fs_info->nr_async_submits); | 635 | atomic_inc(&fs_info->nr_async_submits); |
596 | 636 | ||
597 | if (rw & (1 << BIO_RW_SYNCIO)) | 637 | if (rw & REQ_SYNC) |
598 | btrfs_set_work_high_prio(&async->work); | 638 | btrfs_set_work_high_prio(&async->work); |
599 | 639 | ||
600 | btrfs_queue_worker(&fs_info->workers, &async->work); | 640 | btrfs_queue_worker(&fs_info->workers, &async->work); |
@@ -626,7 +666,8 @@ static int btree_csum_one_bio(struct bio *bio) | |||
626 | 666 | ||
627 | static int __btree_submit_bio_start(struct inode *inode, int rw, | 667 | static int __btree_submit_bio_start(struct inode *inode, int rw, |
628 | struct bio *bio, int mirror_num, | 668 | struct bio *bio, int mirror_num, |
629 | unsigned long bio_flags) | 669 | unsigned long bio_flags, |
670 | u64 bio_offset) | ||
630 | { | 671 | { |
631 | /* | 672 | /* |
632 | * when we're called for a write, we're already in the async | 673 | * when we're called for a write, we're already in the async |
@@ -637,7 +678,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, | |||
637 | } | 678 | } |
638 | 679 | ||
639 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 680 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
640 | int mirror_num, unsigned long bio_flags) | 681 | int mirror_num, unsigned long bio_flags, |
682 | u64 bio_offset) | ||
641 | { | 683 | { |
642 | /* | 684 | /* |
643 | * when we're called for a write, we're already in the async | 685 | * when we're called for a write, we're already in the async |
@@ -647,7 +689,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
647 | } | 689 | } |
648 | 690 | ||
649 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 691 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
650 | int mirror_num, unsigned long bio_flags) | 692 | int mirror_num, unsigned long bio_flags, |
693 | u64 bio_offset) | ||
651 | { | 694 | { |
652 | int ret; | 695 | int ret; |
653 | 696 | ||
@@ -655,7 +698,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
655 | bio, 1); | 698 | bio, 1); |
656 | BUG_ON(ret); | 699 | BUG_ON(ret); |
657 | 700 | ||
658 | if (!(rw & (1 << BIO_RW))) { | 701 | if (!(rw & REQ_WRITE)) { |
659 | /* | 702 | /* |
660 | * called for a read, do the setup so that checksum validation | 703 | * called for a read, do the setup so that checksum validation |
661 | * can happen in the async kernel threads | 704 | * can happen in the async kernel threads |
@@ -670,10 +713,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
670 | */ | 713 | */ |
671 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 714 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
672 | inode, rw, bio, mirror_num, 0, | 715 | inode, rw, bio, mirror_num, 0, |
716 | bio_offset, | ||
673 | __btree_submit_bio_start, | 717 | __btree_submit_bio_start, |
674 | __btree_submit_bio_done); | 718 | __btree_submit_bio_done); |
675 | } | 719 | } |
676 | 720 | ||
721 | #ifdef CONFIG_MIGRATION | ||
722 | static int btree_migratepage(struct address_space *mapping, | ||
723 | struct page *newpage, struct page *page) | ||
724 | { | ||
725 | /* | ||
726 | * we can't safely write a btree page from here, | ||
727 | * we haven't done the locking hook | ||
728 | */ | ||
729 | if (PageDirty(page)) | ||
730 | return -EAGAIN; | ||
731 | /* | ||
732 | * Buffers may be managed in a filesystem specific way. | ||
733 | * We must have no buffers or drop them. | ||
734 | */ | ||
735 | if (page_has_private(page) && | ||
736 | !try_to_release_page(page, GFP_KERNEL)) | ||
737 | return -EAGAIN; | ||
738 | return migrate_page(mapping, newpage, page); | ||
739 | } | ||
740 | #endif | ||
741 | |||
677 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 742 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
678 | { | 743 | { |
679 | struct extent_io_tree *tree; | 744 | struct extent_io_tree *tree; |
@@ -688,8 +753,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) | |||
688 | } | 753 | } |
689 | 754 | ||
690 | redirty_page_for_writepage(wbc, page); | 755 | redirty_page_for_writepage(wbc, page); |
691 | eb = btrfs_find_tree_block(root, page_offset(page), | 756 | eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); |
692 | PAGE_CACHE_SIZE); | ||
693 | WARN_ON(!eb); | 757 | WARN_ON(!eb); |
694 | 758 | ||
695 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | 759 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); |
@@ -780,6 +844,9 @@ static const struct address_space_operations btree_aops = { | |||
780 | .releasepage = btree_releasepage, | 844 | .releasepage = btree_releasepage, |
781 | .invalidatepage = btree_invalidatepage, | 845 | .invalidatepage = btree_invalidatepage, |
782 | .sync_page = block_sync_page, | 846 | .sync_page = block_sync_page, |
847 | #ifdef CONFIG_MIGRATION | ||
848 | .migratepage = btree_migratepage, | ||
849 | #endif | ||
783 | }; | 850 | }; |
784 | 851 | ||
785 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 852 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
@@ -836,12 +903,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
836 | u32 blocksize, u64 parent_transid) | 903 | u32 blocksize, u64 parent_transid) |
837 | { | 904 | { |
838 | struct extent_buffer *buf = NULL; | 905 | struct extent_buffer *buf = NULL; |
839 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
840 | struct extent_io_tree *io_tree; | ||
841 | int ret; | 906 | int ret; |
842 | 907 | ||
843 | io_tree = &BTRFS_I(btree_inode)->io_tree; | ||
844 | |||
845 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 908 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); |
846 | if (!buf) | 909 | if (!buf) |
847 | return NULL; | 910 | return NULL; |
@@ -893,7 +956,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
893 | root->ref_cows = 0; | 956 | root->ref_cows = 0; |
894 | root->track_dirty = 0; | 957 | root->track_dirty = 0; |
895 | root->in_radix = 0; | 958 | root->in_radix = 0; |
896 | root->clean_orphans = 0; | 959 | root->orphan_item_inserted = 0; |
960 | root->orphan_cleanup_state = 0; | ||
897 | 961 | ||
898 | root->fs_info = fs_info; | 962 | root->fs_info = fs_info; |
899 | root->objectid = objectid; | 963 | root->objectid = objectid; |
@@ -901,14 +965,17 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
901 | root->highest_objectid = 0; | 965 | root->highest_objectid = 0; |
902 | root->name = NULL; | 966 | root->name = NULL; |
903 | root->in_sysfs = 0; | 967 | root->in_sysfs = 0; |
904 | root->inode_tree.rb_node = NULL; | 968 | root->inode_tree = RB_ROOT; |
969 | root->block_rsv = NULL; | ||
970 | root->orphan_block_rsv = NULL; | ||
905 | 971 | ||
906 | INIT_LIST_HEAD(&root->dirty_list); | 972 | INIT_LIST_HEAD(&root->dirty_list); |
907 | INIT_LIST_HEAD(&root->orphan_list); | 973 | INIT_LIST_HEAD(&root->orphan_list); |
908 | INIT_LIST_HEAD(&root->root_list); | 974 | INIT_LIST_HEAD(&root->root_list); |
909 | spin_lock_init(&root->node_lock); | 975 | spin_lock_init(&root->node_lock); |
910 | spin_lock_init(&root->list_lock); | 976 | spin_lock_init(&root->orphan_lock); |
911 | spin_lock_init(&root->inode_lock); | 977 | spin_lock_init(&root->inode_lock); |
978 | spin_lock_init(&root->accounting_lock); | ||
912 | mutex_init(&root->objectid_mutex); | 979 | mutex_init(&root->objectid_mutex); |
913 | mutex_init(&root->log_mutex); | 980 | mutex_init(&root->log_mutex); |
914 | init_waitqueue_head(&root->log_writer_wait); | 981 | init_waitqueue_head(&root->log_writer_wait); |
@@ -962,44 +1029,11 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
962 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1029 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
963 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1030 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
964 | blocksize, generation); | 1031 | blocksize, generation); |
965 | BUG_ON(!root->node); | 1032 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { |
966 | root->commit_root = btrfs_root_node(root); | 1033 | free_extent_buffer(root->node); |
967 | return 0; | 1034 | return -EIO; |
968 | } | ||
969 | |||
970 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
971 | struct btrfs_fs_info *fs_info) | ||
972 | { | ||
973 | struct extent_buffer *eb; | ||
974 | struct btrfs_root *log_root_tree = fs_info->log_root_tree; | ||
975 | u64 start = 0; | ||
976 | u64 end = 0; | ||
977 | int ret; | ||
978 | |||
979 | if (!log_root_tree) | ||
980 | return 0; | ||
981 | |||
982 | while (1) { | ||
983 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | ||
984 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | ||
985 | if (ret) | ||
986 | break; | ||
987 | |||
988 | clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, | ||
989 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | ||
990 | } | 1035 | } |
991 | eb = fs_info->log_root_tree->node; | 1036 | root->commit_root = btrfs_root_node(root); |
992 | |||
993 | WARN_ON(btrfs_header_level(eb) != 0); | ||
994 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
995 | |||
996 | ret = btrfs_free_reserved_extent(fs_info->tree_root, | ||
997 | eb->start, eb->len); | ||
998 | BUG_ON(ret); | ||
999 | |||
1000 | free_extent_buffer(eb); | ||
1001 | kfree(fs_info->log_root_tree); | ||
1002 | fs_info->log_root_tree = NULL; | ||
1003 | return 0; | 1037 | return 0; |
1004 | } | 1038 | } |
1005 | 1039 | ||
@@ -1133,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1133 | } | 1167 | } |
1134 | btrfs_free_path(path); | 1168 | btrfs_free_path(path); |
1135 | if (ret) { | 1169 | if (ret) { |
1170 | kfree(root); | ||
1136 | if (ret > 0) | 1171 | if (ret > 0) |
1137 | ret = -ENOENT; | 1172 | ret = -ENOENT; |
1138 | return ERR_PTR(ret); | 1173 | return ERR_PTR(ret); |
@@ -1190,19 +1225,23 @@ again: | |||
1190 | if (root) | 1225 | if (root) |
1191 | return root; | 1226 | return root; |
1192 | 1227 | ||
1193 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1194 | if (ret == 0) | ||
1195 | ret = -ENOENT; | ||
1196 | if (ret < 0) | ||
1197 | return ERR_PTR(ret); | ||
1198 | |||
1199 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1228 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
1200 | if (IS_ERR(root)) | 1229 | if (IS_ERR(root)) |
1201 | return root; | 1230 | return root; |
1202 | 1231 | ||
1203 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
1204 | set_anon_super(&root->anon_super, NULL); | 1232 | set_anon_super(&root->anon_super, NULL); |
1205 | 1233 | ||
1234 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
1235 | ret = -ENOENT; | ||
1236 | goto fail; | ||
1237 | } | ||
1238 | |||
1239 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1240 | if (ret < 0) | ||
1241 | goto fail; | ||
1242 | if (ret == 0) | ||
1243 | root->orphan_item_inserted = 1; | ||
1244 | |||
1206 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1245 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); |
1207 | if (ret) | 1246 | if (ret) |
1208 | goto fail; | 1247 | goto fail; |
@@ -1211,10 +1250,9 @@ again: | |||
1211 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1250 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
1212 | (unsigned long)root->root_key.objectid, | 1251 | (unsigned long)root->root_key.objectid, |
1213 | root); | 1252 | root); |
1214 | if (ret == 0) { | 1253 | if (ret == 0) |
1215 | root->in_radix = 1; | 1254 | root->in_radix = 1; |
1216 | root->clean_orphans = 1; | 1255 | |
1217 | } | ||
1218 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1256 | spin_unlock(&fs_info->fs_roots_radix_lock); |
1219 | radix_tree_preload_end(); | 1257 | radix_tree_preload_end(); |
1220 | if (ret) { | 1258 | if (ret) { |
@@ -1372,19 +1410,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1372 | { | 1410 | { |
1373 | int err; | 1411 | int err; |
1374 | 1412 | ||
1375 | bdi->name = "btrfs"; | ||
1376 | bdi->capabilities = BDI_CAP_MAP_COPY; | 1413 | bdi->capabilities = BDI_CAP_MAP_COPY; |
1377 | err = bdi_init(bdi); | 1414 | err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); |
1378 | if (err) | 1415 | if (err) |
1379 | return err; | 1416 | return err; |
1380 | 1417 | ||
1381 | err = bdi_register(bdi, NULL, "btrfs-%d", | ||
1382 | atomic_inc_return(&btrfs_bdi_num)); | ||
1383 | if (err) { | ||
1384 | bdi_destroy(bdi); | ||
1385 | return err; | ||
1386 | } | ||
1387 | |||
1388 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1418 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1389 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | 1419 | bdi->unplug_io_fn = btrfs_unplug_io_fn; |
1390 | bdi->unplug_io_data = info; | 1420 | bdi->unplug_io_data = info; |
@@ -1400,7 +1430,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1400 | u64 start = 0; | 1430 | u64 start = 0; |
1401 | struct page *page; | 1431 | struct page *page; |
1402 | struct extent_io_tree *io_tree = NULL; | 1432 | struct extent_io_tree *io_tree = NULL; |
1403 | struct btrfs_fs_info *info = NULL; | ||
1404 | struct bio_vec *bvec; | 1433 | struct bio_vec *bvec; |
1405 | int i; | 1434 | int i; |
1406 | int ret; | 1435 | int ret; |
@@ -1419,7 +1448,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1419 | buf_len = page->private >> 2; | 1448 | buf_len = page->private >> 2; |
1420 | start = page_offset(page) + bvec->bv_offset; | 1449 | start = page_offset(page) + bvec->bv_offset; |
1421 | io_tree = &BTRFS_I(page->mapping->host)->io_tree; | 1450 | io_tree = &BTRFS_I(page->mapping->host)->io_tree; |
1422 | info = BTRFS_I(page->mapping->host)->root->fs_info; | ||
1423 | } | 1451 | } |
1424 | /* are we fully contained in this bio? */ | 1452 | /* are we fully contained in this bio? */ |
1425 | if (buf_len <= length) | 1453 | if (buf_len <= length) |
@@ -1450,7 +1478,7 @@ static void end_workqueue_fn(struct btrfs_work *work) | |||
1450 | * ram and up to date before trying to verify things. For | 1478 | * ram and up to date before trying to verify things. For |
1451 | * blocksize <= pagesize, it is basically a noop | 1479 | * blocksize <= pagesize, it is basically a noop |
1452 | */ | 1480 | */ |
1453 | if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata && | 1481 | if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata && |
1454 | !bio_ready_for_csum(bio)) { | 1482 | !bio_ready_for_csum(bio)) { |
1455 | btrfs_queue_worker(&fs_info->endio_meta_workers, | 1483 | btrfs_queue_worker(&fs_info->endio_meta_workers, |
1456 | &end_io_wq->work); | 1484 | &end_io_wq->work); |
@@ -1468,10 +1496,6 @@ static int cleaner_kthread(void *arg) | |||
1468 | struct btrfs_root *root = arg; | 1496 | struct btrfs_root *root = arg; |
1469 | 1497 | ||
1470 | do { | 1498 | do { |
1471 | smp_mb(); | ||
1472 | if (root->fs_info->closing) | ||
1473 | break; | ||
1474 | |||
1475 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1499 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1476 | 1500 | ||
1477 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1501 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
@@ -1484,11 +1508,9 @@ static int cleaner_kthread(void *arg) | |||
1484 | if (freezing(current)) { | 1508 | if (freezing(current)) { |
1485 | refrigerator(); | 1509 | refrigerator(); |
1486 | } else { | 1510 | } else { |
1487 | smp_mb(); | ||
1488 | if (root->fs_info->closing) | ||
1489 | break; | ||
1490 | set_current_state(TASK_INTERRUPTIBLE); | 1511 | set_current_state(TASK_INTERRUPTIBLE); |
1491 | schedule(); | 1512 | if (!kthread_should_stop()) |
1513 | schedule(); | ||
1492 | __set_current_state(TASK_RUNNING); | 1514 | __set_current_state(TASK_RUNNING); |
1493 | } | 1515 | } |
1494 | } while (!kthread_should_stop()); | 1516 | } while (!kthread_should_stop()); |
@@ -1500,36 +1522,40 @@ static int transaction_kthread(void *arg) | |||
1500 | struct btrfs_root *root = arg; | 1522 | struct btrfs_root *root = arg; |
1501 | struct btrfs_trans_handle *trans; | 1523 | struct btrfs_trans_handle *trans; |
1502 | struct btrfs_transaction *cur; | 1524 | struct btrfs_transaction *cur; |
1525 | u64 transid; | ||
1503 | unsigned long now; | 1526 | unsigned long now; |
1504 | unsigned long delay; | 1527 | unsigned long delay; |
1505 | int ret; | 1528 | int ret; |
1506 | 1529 | ||
1507 | do { | 1530 | do { |
1508 | smp_mb(); | ||
1509 | if (root->fs_info->closing) | ||
1510 | break; | ||
1511 | |||
1512 | delay = HZ * 30; | 1531 | delay = HZ * 30; |
1513 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1532 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1514 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1533 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1515 | 1534 | ||
1516 | mutex_lock(&root->fs_info->trans_mutex); | 1535 | spin_lock(&root->fs_info->new_trans_lock); |
1517 | cur = root->fs_info->running_transaction; | 1536 | cur = root->fs_info->running_transaction; |
1518 | if (!cur) { | 1537 | if (!cur) { |
1519 | mutex_unlock(&root->fs_info->trans_mutex); | 1538 | spin_unlock(&root->fs_info->new_trans_lock); |
1520 | goto sleep; | 1539 | goto sleep; |
1521 | } | 1540 | } |
1522 | 1541 | ||
1523 | now = get_seconds(); | 1542 | now = get_seconds(); |
1524 | if (now < cur->start_time || now - cur->start_time < 30) { | 1543 | if (!cur->blocked && |
1525 | mutex_unlock(&root->fs_info->trans_mutex); | 1544 | (now < cur->start_time || now - cur->start_time < 30)) { |
1545 | spin_unlock(&root->fs_info->new_trans_lock); | ||
1526 | delay = HZ * 5; | 1546 | delay = HZ * 5; |
1527 | goto sleep; | 1547 | goto sleep; |
1528 | } | 1548 | } |
1529 | mutex_unlock(&root->fs_info->trans_mutex); | 1549 | transid = cur->transid; |
1530 | trans = btrfs_start_transaction(root, 1); | 1550 | spin_unlock(&root->fs_info->new_trans_lock); |
1531 | ret = btrfs_commit_transaction(trans, root); | ||
1532 | 1551 | ||
1552 | trans = btrfs_join_transaction(root, 1); | ||
1553 | if (transid == trans->transid) { | ||
1554 | ret = btrfs_commit_transaction(trans, root); | ||
1555 | BUG_ON(ret); | ||
1556 | } else { | ||
1557 | btrfs_end_transaction(trans, root); | ||
1558 | } | ||
1533 | sleep: | 1559 | sleep: |
1534 | wake_up_process(root->fs_info->cleaner_kthread); | 1560 | wake_up_process(root->fs_info->cleaner_kthread); |
1535 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1561 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1537,10 +1563,10 @@ sleep: | |||
1537 | if (freezing(current)) { | 1563 | if (freezing(current)) { |
1538 | refrigerator(); | 1564 | refrigerator(); |
1539 | } else { | 1565 | } else { |
1540 | if (root->fs_info->closing) | ||
1541 | break; | ||
1542 | set_current_state(TASK_INTERRUPTIBLE); | 1566 | set_current_state(TASK_INTERRUPTIBLE); |
1543 | schedule_timeout(delay); | 1567 | if (!kthread_should_stop() && |
1568 | !btrfs_transaction_blocked(root->fs_info)) | ||
1569 | schedule_timeout(delay); | ||
1544 | __set_current_state(TASK_RUNNING); | 1570 | __set_current_state(TASK_RUNNING); |
1545 | } | 1571 | } |
1546 | } while (!kthread_should_stop()); | 1572 | } while (!kthread_should_stop()); |
@@ -1564,10 +1590,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1564 | GFP_NOFS); | 1590 | GFP_NOFS); |
1565 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | 1591 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), |
1566 | GFP_NOFS); | 1592 | GFP_NOFS); |
1567 | struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), | 1593 | struct btrfs_root *tree_root = btrfs_sb(sb); |
1568 | GFP_NOFS); | 1594 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1569 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), | ||
1570 | GFP_NOFS); | ||
1571 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | 1595 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), |
1572 | GFP_NOFS); | 1596 | GFP_NOFS); |
1573 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | 1597 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), |
@@ -1627,12 +1651,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1627 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1651 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1628 | INIT_LIST_HEAD(&fs_info->space_info); | 1652 | INIT_LIST_HEAD(&fs_info->space_info); |
1629 | btrfs_mapping_init(&fs_info->mapping_tree); | 1653 | btrfs_mapping_init(&fs_info->mapping_tree); |
1654 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | ||
1655 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | ||
1656 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | ||
1657 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | ||
1658 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | ||
1659 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | ||
1660 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
1630 | atomic_set(&fs_info->nr_async_submits, 0); | 1661 | atomic_set(&fs_info->nr_async_submits, 0); |
1631 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1662 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1632 | atomic_set(&fs_info->async_submit_draining, 0); | 1663 | atomic_set(&fs_info->async_submit_draining, 0); |
1633 | atomic_set(&fs_info->nr_async_bios, 0); | 1664 | atomic_set(&fs_info->nr_async_bios, 0); |
1634 | fs_info->sb = sb; | 1665 | fs_info->sb = sb; |
1635 | fs_info->max_extent = (u64)-1; | ||
1636 | fs_info->max_inline = 8192 * 1024; | 1666 | fs_info->max_inline = 8192 * 1024; |
1637 | fs_info->metadata_ratio = 0; | 1667 | fs_info->metadata_ratio = 0; |
1638 | 1668 | ||
@@ -1673,7 +1703,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1673 | insert_inode_hash(fs_info->btree_inode); | 1703 | insert_inode_hash(fs_info->btree_inode); |
1674 | 1704 | ||
1675 | spin_lock_init(&fs_info->block_group_cache_lock); | 1705 | spin_lock_init(&fs_info->block_group_cache_lock); |
1676 | fs_info->block_group_cache_tree.rb_node = NULL; | 1706 | fs_info->block_group_cache_tree = RB_ROOT; |
1677 | 1707 | ||
1678 | extent_io_tree_init(&fs_info->freed_extents[0], | 1708 | extent_io_tree_init(&fs_info->freed_extents[0], |
1679 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1709 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
@@ -1699,15 +1729,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1699 | 1729 | ||
1700 | init_waitqueue_head(&fs_info->transaction_throttle); | 1730 | init_waitqueue_head(&fs_info->transaction_throttle); |
1701 | init_waitqueue_head(&fs_info->transaction_wait); | 1731 | init_waitqueue_head(&fs_info->transaction_wait); |
1732 | init_waitqueue_head(&fs_info->transaction_blocked_wait); | ||
1702 | init_waitqueue_head(&fs_info->async_submit_wait); | 1733 | init_waitqueue_head(&fs_info->async_submit_wait); |
1703 | 1734 | ||
1704 | __setup_root(4096, 4096, 4096, 4096, tree_root, | 1735 | __setup_root(4096, 4096, 4096, 4096, tree_root, |
1705 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1736 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
1706 | 1737 | ||
1707 | |||
1708 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 1738 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
1709 | if (!bh) | 1739 | if (!bh) { |
1740 | err = -EINVAL; | ||
1710 | goto fail_iput; | 1741 | goto fail_iput; |
1742 | } | ||
1711 | 1743 | ||
1712 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 1744 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
1713 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 1745 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, |
@@ -1720,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1720 | if (!btrfs_super_root(disk_super)) | 1752 | if (!btrfs_super_root(disk_super)) |
1721 | goto fail_iput; | 1753 | goto fail_iput; |
1722 | 1754 | ||
1755 | /* check FS state, whether FS is broken. */ | ||
1756 | fs_info->fs_state |= btrfs_super_flags(disk_super); | ||
1757 | |||
1758 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | ||
1759 | |||
1723 | ret = btrfs_parse_options(tree_root, options); | 1760 | ret = btrfs_parse_options(tree_root, options); |
1724 | if (ret) { | 1761 | if (ret) { |
1725 | err = ret; | 1762 | err = ret; |
@@ -1737,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1737 | } | 1774 | } |
1738 | 1775 | ||
1739 | features = btrfs_super_incompat_flags(disk_super); | 1776 | features = btrfs_super_incompat_flags(disk_super); |
1740 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | 1777 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
1741 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 1778 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
1742 | btrfs_set_super_incompat_flags(disk_super, features); | 1779 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
1743 | } | 1780 | btrfs_set_super_incompat_flags(disk_super, features); |
1744 | 1781 | ||
1745 | features = btrfs_super_compat_ro_flags(disk_super) & | 1782 | features = btrfs_super_compat_ro_flags(disk_super) & |
1746 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1783 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
@@ -1767,9 +1804,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1767 | min_t(u64, fs_devices->num_devices, | 1804 | min_t(u64, fs_devices->num_devices, |
1768 | fs_info->thread_pool_size), | 1805 | fs_info->thread_pool_size), |
1769 | &fs_info->generic_worker); | 1806 | &fs_info->generic_worker); |
1770 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
1771 | fs_info->thread_pool_size, | ||
1772 | &fs_info->generic_worker); | ||
1773 | 1807 | ||
1774 | /* a higher idle thresh on the submit workers makes it much more | 1808 | /* a higher idle thresh on the submit workers makes it much more |
1775 | * likely that bios will be send down in a sane order to the | 1809 | * likely that bios will be send down in a sane order to the |
@@ -1797,6 +1831,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1797 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | 1831 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", |
1798 | fs_info->thread_pool_size, | 1832 | fs_info->thread_pool_size, |
1799 | &fs_info->generic_worker); | 1833 | &fs_info->generic_worker); |
1834 | btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", | ||
1835 | 1, &fs_info->generic_worker); | ||
1800 | 1836 | ||
1801 | /* | 1837 | /* |
1802 | * endios are largely parallel and should have a very | 1838 | * endios are largely parallel and should have a very |
@@ -1817,7 +1853,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1817 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1853 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
1818 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1854 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
1819 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1855 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1820 | btrfs_start_workers(&fs_info->enospc_workers, 1); | 1856 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); |
1821 | 1857 | ||
1822 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1858 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1823 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1859 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1920,17 +1956,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1920 | 1956 | ||
1921 | csum_root->track_dirty = 1; | 1957 | csum_root->track_dirty = 1; |
1922 | 1958 | ||
1923 | btrfs_read_block_groups(extent_root); | ||
1924 | |||
1925 | fs_info->generation = generation; | 1959 | fs_info->generation = generation; |
1926 | fs_info->last_trans_committed = generation; | 1960 | fs_info->last_trans_committed = generation; |
1927 | fs_info->data_alloc_profile = (u64)-1; | 1961 | fs_info->data_alloc_profile = (u64)-1; |
1928 | fs_info->metadata_alloc_profile = (u64)-1; | 1962 | fs_info->metadata_alloc_profile = (u64)-1; |
1929 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1963 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
1964 | |||
1965 | ret = btrfs_read_block_groups(extent_root); | ||
1966 | if (ret) { | ||
1967 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | ||
1968 | goto fail_block_groups; | ||
1969 | } | ||
1970 | |||
1930 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 1971 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
1931 | "btrfs-cleaner"); | 1972 | "btrfs-cleaner"); |
1932 | if (IS_ERR(fs_info->cleaner_kthread)) | 1973 | if (IS_ERR(fs_info->cleaner_kthread)) |
1933 | goto fail_csum_root; | 1974 | goto fail_block_groups; |
1934 | 1975 | ||
1935 | fs_info->transaction_kthread = kthread_run(transaction_kthread, | 1976 | fs_info->transaction_kthread = kthread_run(transaction_kthread, |
1936 | tree_root, | 1977 | tree_root, |
@@ -1946,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1946 | btrfs_set_opt(fs_info->mount_opt, SSD); | 1987 | btrfs_set_opt(fs_info->mount_opt, SSD); |
1947 | } | 1988 | } |
1948 | 1989 | ||
1949 | if (btrfs_super_log_root(disk_super) != 0) { | 1990 | /* do not make disk changes in broken FS */ |
1991 | if (btrfs_super_log_root(disk_super) != 0 && | ||
1992 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
1950 | u64 bytenr = btrfs_super_log_root(disk_super); | 1993 | u64 bytenr = btrfs_super_log_root(disk_super); |
1951 | 1994 | ||
1952 | if (fs_devices->rw_devices == 0) { | 1995 | if (fs_devices->rw_devices == 0) { |
@@ -1959,8 +2002,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1959 | btrfs_level_size(tree_root, | 2002 | btrfs_level_size(tree_root, |
1960 | btrfs_super_log_root_level(disk_super)); | 2003 | btrfs_super_log_root_level(disk_super)); |
1961 | 2004 | ||
1962 | log_tree_root = kzalloc(sizeof(struct btrfs_root), | 2005 | log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); |
1963 | GFP_NOFS); | 2006 | if (!log_tree_root) { |
2007 | err = -ENOMEM; | ||
2008 | goto fail_trans_kthread; | ||
2009 | } | ||
1964 | 2010 | ||
1965 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | 2011 | __setup_root(nodesize, leafsize, sectorsize, stripesize, |
1966 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); | 2012 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); |
@@ -1981,8 +2027,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1981 | BUG_ON(ret); | 2027 | BUG_ON(ret); |
1982 | 2028 | ||
1983 | if (!(sb->s_flags & MS_RDONLY)) { | 2029 | if (!(sb->s_flags & MS_RDONLY)) { |
1984 | ret = btrfs_recover_relocation(tree_root); | 2030 | ret = btrfs_cleanup_fs_roots(fs_info); |
1985 | BUG_ON(ret); | 2031 | BUG_ON(ret); |
2032 | |||
2033 | ret = btrfs_recover_relocation(tree_root); | ||
2034 | if (ret < 0) { | ||
2035 | printk(KERN_WARNING | ||
2036 | "btrfs: failed to recover relocation\n"); | ||
2037 | err = -EINVAL; | ||
2038 | goto fail_trans_kthread; | ||
2039 | } | ||
1986 | } | 2040 | } |
1987 | 2041 | ||
1988 | location.objectid = BTRFS_FS_TREE_OBJECTID; | 2042 | location.objectid = BTRFS_FS_TREE_OBJECTID; |
@@ -1992,10 +2046,15 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1992 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 2046 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
1993 | if (!fs_info->fs_root) | 2047 | if (!fs_info->fs_root) |
1994 | goto fail_trans_kthread; | 2048 | goto fail_trans_kthread; |
2049 | if (IS_ERR(fs_info->fs_root)) { | ||
2050 | err = PTR_ERR(fs_info->fs_root); | ||
2051 | goto fail_trans_kthread; | ||
2052 | } | ||
1995 | 2053 | ||
1996 | if (!(sb->s_flags & MS_RDONLY)) { | 2054 | if (!(sb->s_flags & MS_RDONLY)) { |
1997 | down_read(&fs_info->cleanup_work_sem); | 2055 | down_read(&fs_info->cleanup_work_sem); |
1998 | btrfs_orphan_cleanup(fs_info->fs_root); | 2056 | btrfs_orphan_cleanup(fs_info->fs_root); |
2057 | btrfs_orphan_cleanup(fs_info->tree_root); | ||
1999 | up_read(&fs_info->cleanup_work_sem); | 2058 | up_read(&fs_info->cleanup_work_sem); |
2000 | } | 2059 | } |
2001 | 2060 | ||
@@ -2013,7 +2072,8 @@ fail_cleaner: | |||
2013 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); | 2072 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); |
2014 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2073 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2015 | 2074 | ||
2016 | fail_csum_root: | 2075 | fail_block_groups: |
2076 | btrfs_free_block_groups(fs_info); | ||
2017 | free_extent_buffer(csum_root->node); | 2077 | free_extent_buffer(csum_root->node); |
2018 | free_extent_buffer(csum_root->commit_root); | 2078 | free_extent_buffer(csum_root->commit_root); |
2019 | fail_dev_root: | 2079 | fail_dev_root: |
@@ -2037,8 +2097,8 @@ fail_sb_buffer: | |||
2037 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2097 | btrfs_stop_workers(&fs_info->endio_meta_workers); |
2038 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2098 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2039 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2099 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2100 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | ||
2040 | btrfs_stop_workers(&fs_info->submit_workers); | 2101 | btrfs_stop_workers(&fs_info->submit_workers); |
2041 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2042 | fail_iput: | 2102 | fail_iput: |
2043 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2103 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2044 | iput(fs_info->btree_inode); | 2104 | iput(fs_info->btree_inode); |
@@ -2066,7 +2126,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
2066 | if (uptodate) { | 2126 | if (uptodate) { |
2067 | set_buffer_uptodate(bh); | 2127 | set_buffer_uptodate(bh); |
2068 | } else { | 2128 | } else { |
2069 | if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { | 2129 | if (printk_ratelimit()) { |
2070 | printk(KERN_WARNING "lost page write due to " | 2130 | printk(KERN_WARNING "lost page write due to " |
2071 | "I/O error on %s\n", | 2131 | "I/O error on %s\n", |
2072 | bdevname(bh->b_bdev, b)); | 2132 | bdevname(bh->b_bdev, b)); |
@@ -2203,21 +2263,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2203 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2263 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2204 | } | 2264 | } |
2205 | 2265 | ||
2206 | if (i == last_barrier && do_barriers && device->barriers) { | 2266 | if (i == last_barrier && do_barriers) |
2207 | ret = submit_bh(WRITE_BARRIER, bh); | 2267 | ret = submit_bh(WRITE_FLUSH_FUA, bh); |
2208 | if (ret == -EOPNOTSUPP) { | 2268 | else |
2209 | printk("btrfs: disabling barriers on dev %s\n", | ||
2210 | device->name); | ||
2211 | set_buffer_uptodate(bh); | ||
2212 | device->barriers = 0; | ||
2213 | /* one reference for submit_bh */ | ||
2214 | get_bh(bh); | ||
2215 | lock_buffer(bh); | ||
2216 | ret = submit_bh(WRITE_SYNC, bh); | ||
2217 | } | ||
2218 | } else { | ||
2219 | ret = submit_bh(WRITE_SYNC, bh); | 2269 | ret = submit_bh(WRITE_SYNC, bh); |
2220 | } | ||
2221 | 2270 | ||
2222 | if (ret) | 2271 | if (ret) |
2223 | errors++; | 2272 | errors++; |
@@ -2403,11 +2452,11 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2403 | down_write(&root->fs_info->cleanup_work_sem); | 2452 | down_write(&root->fs_info->cleanup_work_sem); |
2404 | up_write(&root->fs_info->cleanup_work_sem); | 2453 | up_write(&root->fs_info->cleanup_work_sem); |
2405 | 2454 | ||
2406 | trans = btrfs_start_transaction(root, 1); | 2455 | trans = btrfs_join_transaction(root, 1); |
2407 | ret = btrfs_commit_transaction(trans, root); | 2456 | ret = btrfs_commit_transaction(trans, root); |
2408 | BUG_ON(ret); | 2457 | BUG_ON(ret); |
2409 | /* run commit again to drop the original snapshot */ | 2458 | /* run commit again to drop the original snapshot */ |
2410 | trans = btrfs_start_transaction(root, 1); | 2459 | trans = btrfs_join_transaction(root, 1); |
2411 | btrfs_commit_transaction(trans, root); | 2460 | btrfs_commit_transaction(trans, root); |
2412 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2461 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2413 | BUG_ON(ret); | 2462 | BUG_ON(ret); |
@@ -2424,15 +2473,36 @@ int close_ctree(struct btrfs_root *root) | |||
2424 | fs_info->closing = 1; | 2473 | fs_info->closing = 1; |
2425 | smp_mb(); | 2474 | smp_mb(); |
2426 | 2475 | ||
2427 | kthread_stop(root->fs_info->transaction_kthread); | 2476 | btrfs_put_block_group_cache(fs_info); |
2428 | kthread_stop(root->fs_info->cleaner_kthread); | ||
2429 | 2477 | ||
2478 | /* | ||
2479 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
2480 | * | ||
2481 | * 1. when btrfs flips readonly somewhere else before | ||
2482 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
2483 | * and btrfs will skip to write sb directly to keep | ||
2484 | * ERROR state on disk. | ||
2485 | * | ||
2486 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
2487 | * and in such case, btrfs cannnot write sb via btrfs_commit_super, | ||
2488 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
2489 | * btrfs will cleanup all FS resources first and write sb then. | ||
2490 | */ | ||
2430 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2491 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2431 | ret = btrfs_commit_super(root); | 2492 | ret = btrfs_commit_super(root); |
2432 | if (ret) | 2493 | if (ret) |
2433 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2494 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2434 | } | 2495 | } |
2435 | 2496 | ||
2497 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
2498 | ret = btrfs_error_commit_super(root); | ||
2499 | if (ret) | ||
2500 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
2501 | } | ||
2502 | |||
2503 | kthread_stop(root->fs_info->transaction_kthread); | ||
2504 | kthread_stop(root->fs_info->cleaner_kthread); | ||
2505 | |||
2436 | fs_info->closing = 2; | 2506 | fs_info->closing = 2; |
2437 | smp_mb(); | 2507 | smp_mb(); |
2438 | 2508 | ||
@@ -2470,8 +2540,8 @@ int close_ctree(struct btrfs_root *root) | |||
2470 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2540 | btrfs_stop_workers(&fs_info->endio_meta_workers); |
2471 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2541 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2472 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2542 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2543 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | ||
2473 | btrfs_stop_workers(&fs_info->submit_workers); | 2544 | btrfs_stop_workers(&fs_info->submit_workers); |
2474 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2475 | 2545 | ||
2476 | btrfs_close_devices(fs_info->fs_devices); | 2546 | btrfs_close_devices(fs_info->fs_devices); |
2477 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2547 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
@@ -2492,7 +2562,8 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | |||
2492 | int ret; | 2562 | int ret; |
2493 | struct inode *btree_inode = buf->first_page->mapping->host; | 2563 | struct inode *btree_inode = buf->first_page->mapping->host; |
2494 | 2564 | ||
2495 | ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); | 2565 | ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, |
2566 | NULL); | ||
2496 | if (!ret) | 2567 | if (!ret) |
2497 | return ret; | 2568 | return ret; |
2498 | 2569 | ||
@@ -2600,6 +2671,352 @@ out: | |||
2600 | return 0; | 2671 | return 0; |
2601 | } | 2672 | } |
2602 | 2673 | ||
2674 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
2675 | int read_only) | ||
2676 | { | ||
2677 | if (read_only) | ||
2678 | return; | ||
2679 | |||
2680 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
2681 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
2682 | "running btrfsck is recommended\n"); | ||
2683 | } | ||
2684 | |||
2685 | int btrfs_error_commit_super(struct btrfs_root *root) | ||
2686 | { | ||
2687 | int ret; | ||
2688 | |||
2689 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
2690 | btrfs_run_delayed_iputs(root); | ||
2691 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
2692 | |||
2693 | down_write(&root->fs_info->cleanup_work_sem); | ||
2694 | up_write(&root->fs_info->cleanup_work_sem); | ||
2695 | |||
2696 | /* cleanup FS via transaction */ | ||
2697 | btrfs_cleanup_transaction(root); | ||
2698 | |||
2699 | ret = write_ctree_super(NULL, root, 0); | ||
2700 | |||
2701 | return ret; | ||
2702 | } | ||
2703 | |||
2704 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | ||
2705 | { | ||
2706 | struct btrfs_inode *btrfs_inode; | ||
2707 | struct list_head splice; | ||
2708 | |||
2709 | INIT_LIST_HEAD(&splice); | ||
2710 | |||
2711 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
2712 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2713 | |||
2714 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
2715 | while (!list_empty(&splice)) { | ||
2716 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2717 | ordered_operations); | ||
2718 | |||
2719 | list_del_init(&btrfs_inode->ordered_operations); | ||
2720 | |||
2721 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2722 | } | ||
2723 | |||
2724 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2725 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
2726 | |||
2727 | return 0; | ||
2728 | } | ||
2729 | |||
2730 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | ||
2731 | { | ||
2732 | struct list_head splice; | ||
2733 | struct btrfs_ordered_extent *ordered; | ||
2734 | struct inode *inode; | ||
2735 | |||
2736 | INIT_LIST_HEAD(&splice); | ||
2737 | |||
2738 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2739 | |||
2740 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
2741 | while (!list_empty(&splice)) { | ||
2742 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | ||
2743 | root_extent_list); | ||
2744 | |||
2745 | list_del_init(&ordered->root_extent_list); | ||
2746 | atomic_inc(&ordered->refs); | ||
2747 | |||
2748 | /* the inode may be getting freed (in sys_unlink path). */ | ||
2749 | inode = igrab(ordered->inode); | ||
2750 | |||
2751 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2752 | if (inode) | ||
2753 | iput(inode); | ||
2754 | |||
2755 | atomic_set(&ordered->refs, 1); | ||
2756 | btrfs_put_ordered_extent(ordered); | ||
2757 | |||
2758 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2759 | } | ||
2760 | |||
2761 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2762 | |||
2763 | return 0; | ||
2764 | } | ||
2765 | |||
2766 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
2767 | struct btrfs_root *root) | ||
2768 | { | ||
2769 | struct rb_node *node; | ||
2770 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2771 | struct btrfs_delayed_ref_node *ref; | ||
2772 | int ret = 0; | ||
2773 | |||
2774 | delayed_refs = &trans->delayed_refs; | ||
2775 | |||
2776 | spin_lock(&delayed_refs->lock); | ||
2777 | if (delayed_refs->num_entries == 0) { | ||
2778 | printk(KERN_INFO "delayed_refs has NO entry\n"); | ||
2779 | return ret; | ||
2780 | } | ||
2781 | |||
2782 | node = rb_first(&delayed_refs->root); | ||
2783 | while (node) { | ||
2784 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2785 | node = rb_next(node); | ||
2786 | |||
2787 | ref->in_tree = 0; | ||
2788 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
2789 | delayed_refs->num_entries--; | ||
2790 | |||
2791 | atomic_set(&ref->refs, 1); | ||
2792 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2793 | struct btrfs_delayed_ref_head *head; | ||
2794 | |||
2795 | head = btrfs_delayed_node_to_head(ref); | ||
2796 | mutex_lock(&head->mutex); | ||
2797 | kfree(head->extent_op); | ||
2798 | delayed_refs->num_heads--; | ||
2799 | if (list_empty(&head->cluster)) | ||
2800 | delayed_refs->num_heads_ready--; | ||
2801 | list_del_init(&head->cluster); | ||
2802 | mutex_unlock(&head->mutex); | ||
2803 | } | ||
2804 | |||
2805 | spin_unlock(&delayed_refs->lock); | ||
2806 | btrfs_put_delayed_ref(ref); | ||
2807 | |||
2808 | cond_resched(); | ||
2809 | spin_lock(&delayed_refs->lock); | ||
2810 | } | ||
2811 | |||
2812 | spin_unlock(&delayed_refs->lock); | ||
2813 | |||
2814 | return ret; | ||
2815 | } | ||
2816 | |||
2817 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | ||
2818 | { | ||
2819 | struct btrfs_pending_snapshot *snapshot; | ||
2820 | struct list_head splice; | ||
2821 | |||
2822 | INIT_LIST_HEAD(&splice); | ||
2823 | |||
2824 | list_splice_init(&t->pending_snapshots, &splice); | ||
2825 | |||
2826 | while (!list_empty(&splice)) { | ||
2827 | snapshot = list_entry(splice.next, | ||
2828 | struct btrfs_pending_snapshot, | ||
2829 | list); | ||
2830 | |||
2831 | list_del_init(&snapshot->list); | ||
2832 | |||
2833 | kfree(snapshot); | ||
2834 | } | ||
2835 | |||
2836 | return 0; | ||
2837 | } | ||
2838 | |||
2839 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | ||
2840 | { | ||
2841 | struct btrfs_inode *btrfs_inode; | ||
2842 | struct list_head splice; | ||
2843 | |||
2844 | INIT_LIST_HEAD(&splice); | ||
2845 | |||
2846 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2847 | |||
2848 | spin_lock(&root->fs_info->delalloc_lock); | ||
2849 | |||
2850 | while (!list_empty(&splice)) { | ||
2851 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2852 | delalloc_inodes); | ||
2853 | |||
2854 | list_del_init(&btrfs_inode->delalloc_inodes); | ||
2855 | |||
2856 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2857 | } | ||
2858 | |||
2859 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2860 | |||
2861 | return 0; | ||
2862 | } | ||
2863 | |||
2864 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
2865 | struct extent_io_tree *dirty_pages, | ||
2866 | int mark) | ||
2867 | { | ||
2868 | int ret; | ||
2869 | struct page *page; | ||
2870 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
2871 | struct extent_buffer *eb; | ||
2872 | u64 start = 0; | ||
2873 | u64 end; | ||
2874 | u64 offset; | ||
2875 | unsigned long index; | ||
2876 | |||
2877 | while (1) { | ||
2878 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
2879 | mark); | ||
2880 | if (ret) | ||
2881 | break; | ||
2882 | |||
2883 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | ||
2884 | while (start <= end) { | ||
2885 | index = start >> PAGE_CACHE_SHIFT; | ||
2886 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
2887 | page = find_get_page(btree_inode->i_mapping, index); | ||
2888 | if (!page) | ||
2889 | continue; | ||
2890 | offset = page_offset(page); | ||
2891 | |||
2892 | spin_lock(&dirty_pages->buffer_lock); | ||
2893 | eb = radix_tree_lookup( | ||
2894 | &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, | ||
2895 | offset >> PAGE_CACHE_SHIFT); | ||
2896 | spin_unlock(&dirty_pages->buffer_lock); | ||
2897 | if (eb) { | ||
2898 | ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, | ||
2899 | &eb->bflags); | ||
2900 | atomic_set(&eb->refs, 1); | ||
2901 | } | ||
2902 | if (PageWriteback(page)) | ||
2903 | end_page_writeback(page); | ||
2904 | |||
2905 | lock_page(page); | ||
2906 | if (PageDirty(page)) { | ||
2907 | clear_page_dirty_for_io(page); | ||
2908 | spin_lock_irq(&page->mapping->tree_lock); | ||
2909 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
2910 | page_index(page), | ||
2911 | PAGECACHE_TAG_DIRTY); | ||
2912 | spin_unlock_irq(&page->mapping->tree_lock); | ||
2913 | } | ||
2914 | |||
2915 | page->mapping->a_ops->invalidatepage(page, 0); | ||
2916 | unlock_page(page); | ||
2917 | } | ||
2918 | } | ||
2919 | |||
2920 | return ret; | ||
2921 | } | ||
2922 | |||
2923 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
2924 | struct extent_io_tree *pinned_extents) | ||
2925 | { | ||
2926 | struct extent_io_tree *unpin; | ||
2927 | u64 start; | ||
2928 | u64 end; | ||
2929 | int ret; | ||
2930 | |||
2931 | unpin = pinned_extents; | ||
2932 | while (1) { | ||
2933 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
2934 | EXTENT_DIRTY); | ||
2935 | if (ret) | ||
2936 | break; | ||
2937 | |||
2938 | /* opt_discard */ | ||
2939 | ret = btrfs_error_discard_extent(root, start, end + 1 - start); | ||
2940 | |||
2941 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
2942 | btrfs_error_unpin_extent_range(root, start, end); | ||
2943 | cond_resched(); | ||
2944 | } | ||
2945 | |||
2946 | return 0; | ||
2947 | } | ||
2948 | |||
2949 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
2950 | { | ||
2951 | struct btrfs_transaction *t; | ||
2952 | LIST_HEAD(list); | ||
2953 | |||
2954 | WARN_ON(1); | ||
2955 | |||
2956 | mutex_lock(&root->fs_info->trans_mutex); | ||
2957 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
2958 | |||
2959 | list_splice_init(&root->fs_info->trans_list, &list); | ||
2960 | while (!list_empty(&list)) { | ||
2961 | t = list_entry(list.next, struct btrfs_transaction, list); | ||
2962 | if (!t) | ||
2963 | break; | ||
2964 | |||
2965 | btrfs_destroy_ordered_operations(root); | ||
2966 | |||
2967 | btrfs_destroy_ordered_extents(root); | ||
2968 | |||
2969 | btrfs_destroy_delayed_refs(t, root); | ||
2970 | |||
2971 | btrfs_block_rsv_release(root, | ||
2972 | &root->fs_info->trans_block_rsv, | ||
2973 | t->dirty_pages.dirty_bytes); | ||
2974 | |||
2975 | /* FIXME: cleanup wait for commit */ | ||
2976 | t->in_commit = 1; | ||
2977 | t->blocked = 1; | ||
2978 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
2979 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
2980 | |||
2981 | t->blocked = 0; | ||
2982 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
2983 | wake_up(&root->fs_info->transaction_wait); | ||
2984 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2985 | |||
2986 | mutex_lock(&root->fs_info->trans_mutex); | ||
2987 | t->commit_done = 1; | ||
2988 | if (waitqueue_active(&t->commit_wait)) | ||
2989 | wake_up(&t->commit_wait); | ||
2990 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2991 | |||
2992 | mutex_lock(&root->fs_info->trans_mutex); | ||
2993 | |||
2994 | btrfs_destroy_pending_snapshots(t); | ||
2995 | |||
2996 | btrfs_destroy_delalloc_inodes(root); | ||
2997 | |||
2998 | spin_lock(&root->fs_info->new_trans_lock); | ||
2999 | root->fs_info->running_transaction = NULL; | ||
3000 | spin_unlock(&root->fs_info->new_trans_lock); | ||
3001 | |||
3002 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | ||
3003 | EXTENT_DIRTY); | ||
3004 | |||
3005 | btrfs_destroy_pinned_extent(root, | ||
3006 | root->fs_info->pinned_extents); | ||
3007 | |||
3008 | t->use_count = 0; | ||
3009 | list_del_init(&t->list); | ||
3010 | memset(t, 0, sizeof(*t)); | ||
3011 | kmem_cache_free(btrfs_transaction_cachep, t); | ||
3012 | } | ||
3013 | |||
3014 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
3015 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3016 | |||
3017 | return 0; | ||
3018 | } | ||
3019 | |||
2603 | static struct extent_io_ops btree_extent_io_ops = { | 3020 | static struct extent_io_ops btree_extent_io_ops = { |
2604 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3021 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
2605 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3022 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c958ecbc1916..07b20dc2fd95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
52 | struct btrfs_root *root, int max_mirrors); | 52 | struct btrfs_root *root, int max_mirrors); |
53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
54 | int btrfs_commit_super(struct btrfs_root *root); | 54 | int btrfs_commit_super(struct btrfs_root *root); |
55 | int btrfs_error_commit_super(struct btrfs_root *root); | ||
55 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 56 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
56 | u64 bytenr, u32 blocksize); | 57 | u64 bytenr, u32 blocksize); |
57 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | 58 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, |
@@ -87,7 +88,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
87 | int metadata); | 88 | int metadata); |
88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 89 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
89 | int rw, struct bio *bio, int mirror_num, | 90 | int rw, struct bio *bio, int mirror_num, |
90 | unsigned long bio_flags, | 91 | unsigned long bio_flags, u64 bio_offset, |
91 | extent_submit_bio_hook_t *submit_bio_start, | 92 | extent_submit_bio_hook_t *submit_bio_start, |
92 | extent_submit_bio_hook_t *submit_bio_done); | 93 | extent_submit_bio_hook_t *submit_bio_done); |
93 | 94 | ||
@@ -95,8 +96,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | |||
95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 96 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
96 | int btrfs_write_tree_block(struct extent_buffer *buf); | 97 | int btrfs_write_tree_block(struct extent_buffer *buf); |
97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | 98 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); |
98 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
99 | struct btrfs_fs_info *fs_info); | ||
100 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | 99 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, |
101 | struct btrfs_fs_info *fs_info); | 100 | struct btrfs_fs_info *fs_info); |
102 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 101 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index ba5c3fd5ab8c..9786963b07e5 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -65,7 +65,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
65 | { | 65 | { |
66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; | 66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; |
67 | struct btrfs_root *root; | 67 | struct btrfs_root *root; |
68 | struct dentry *dentry; | ||
69 | struct inode *inode; | 68 | struct inode *inode; |
70 | struct btrfs_key key; | 69 | struct btrfs_key key; |
71 | int index; | 70 | int index; |
@@ -95,7 +94,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
95 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 94 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
96 | key.offset = 0; | 95 | key.offset = 0; |
97 | 96 | ||
98 | inode = btrfs_iget(sb, &key, root); | 97 | inode = btrfs_iget(sb, &key, root, NULL); |
99 | if (IS_ERR(inode)) { | 98 | if (IS_ERR(inode)) { |
100 | err = PTR_ERR(inode); | 99 | err = PTR_ERR(inode); |
101 | goto fail; | 100 | goto fail; |
@@ -108,10 +107,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
108 | return ERR_PTR(-ESTALE); | 107 | return ERR_PTR(-ESTALE); |
109 | } | 108 | } |
110 | 109 | ||
111 | dentry = d_obtain_alias(inode); | 110 | return d_obtain_alias(inode); |
112 | if (!IS_ERR(dentry)) | ||
113 | dentry->d_op = &btrfs_dentry_operations; | ||
114 | return dentry; | ||
115 | fail: | 111 | fail: |
116 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 112 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
117 | return ERR_PTR(err); | 113 | return ERR_PTR(err); |
@@ -166,7 +162,6 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
166 | static struct dentry *btrfs_get_parent(struct dentry *child) | 162 | static struct dentry *btrfs_get_parent(struct dentry *child) |
167 | { | 163 | { |
168 | struct inode *dir = child->d_inode; | 164 | struct inode *dir = child->d_inode; |
169 | static struct dentry *dentry; | ||
170 | struct btrfs_root *root = BTRFS_I(dir)->root; | 165 | struct btrfs_root *root = BTRFS_I(dir)->root; |
171 | struct btrfs_path *path; | 166 | struct btrfs_path *path; |
172 | struct extent_buffer *leaf; | 167 | struct extent_buffer *leaf; |
@@ -223,18 +218,91 @@ static struct dentry *btrfs_get_parent(struct dentry *child) | |||
223 | 218 | ||
224 | key.type = BTRFS_INODE_ITEM_KEY; | 219 | key.type = BTRFS_INODE_ITEM_KEY; |
225 | key.offset = 0; | 220 | key.offset = 0; |
226 | dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | 221 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); |
227 | if (!IS_ERR(dentry)) | ||
228 | dentry->d_op = &btrfs_dentry_operations; | ||
229 | return dentry; | ||
230 | fail: | 222 | fail: |
231 | btrfs_free_path(path); | 223 | btrfs_free_path(path); |
232 | return ERR_PTR(ret); | 224 | return ERR_PTR(ret); |
233 | } | 225 | } |
234 | 226 | ||
227 | static int btrfs_get_name(struct dentry *parent, char *name, | ||
228 | struct dentry *child) | ||
229 | { | ||
230 | struct inode *inode = child->d_inode; | ||
231 | struct inode *dir = parent->d_inode; | ||
232 | struct btrfs_path *path; | ||
233 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
234 | struct btrfs_inode_ref *iref; | ||
235 | struct btrfs_root_ref *rref; | ||
236 | struct extent_buffer *leaf; | ||
237 | unsigned long name_ptr; | ||
238 | struct btrfs_key key; | ||
239 | int name_len; | ||
240 | int ret; | ||
241 | |||
242 | if (!dir || !inode) | ||
243 | return -EINVAL; | ||
244 | |||
245 | if (!S_ISDIR(dir->i_mode)) | ||
246 | return -EINVAL; | ||
247 | |||
248 | path = btrfs_alloc_path(); | ||
249 | if (!path) | ||
250 | return -ENOMEM; | ||
251 | path->leave_spinning = 1; | ||
252 | |||
253 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
254 | key.objectid = BTRFS_I(inode)->root->root_key.objectid; | ||
255 | key.type = BTRFS_ROOT_BACKREF_KEY; | ||
256 | key.offset = (u64)-1; | ||
257 | root = root->fs_info->tree_root; | ||
258 | } else { | ||
259 | key.objectid = inode->i_ino; | ||
260 | key.offset = dir->i_ino; | ||
261 | key.type = BTRFS_INODE_REF_KEY; | ||
262 | } | ||
263 | |||
264 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
265 | if (ret < 0) { | ||
266 | btrfs_free_path(path); | ||
267 | return ret; | ||
268 | } else if (ret > 0) { | ||
269 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
270 | path->slots[0]--; | ||
271 | } else { | ||
272 | btrfs_free_path(path); | ||
273 | return -ENOENT; | ||
274 | } | ||
275 | } | ||
276 | leaf = path->nodes[0]; | ||
277 | |||
278 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
279 | rref = btrfs_item_ptr(leaf, path->slots[0], | ||
280 | struct btrfs_root_ref); | ||
281 | name_ptr = (unsigned long)(rref + 1); | ||
282 | name_len = btrfs_root_ref_name_len(leaf, rref); | ||
283 | } else { | ||
284 | iref = btrfs_item_ptr(leaf, path->slots[0], | ||
285 | struct btrfs_inode_ref); | ||
286 | name_ptr = (unsigned long)(iref + 1); | ||
287 | name_len = btrfs_inode_ref_name_len(leaf, iref); | ||
288 | } | ||
289 | |||
290 | read_extent_buffer(leaf, name, name_ptr, name_len); | ||
291 | btrfs_free_path(path); | ||
292 | |||
293 | /* | ||
294 | * have to add the null termination to make sure that reconnect_path | ||
295 | * gets the right len for strlen | ||
296 | */ | ||
297 | name[name_len] = '\0'; | ||
298 | |||
299 | return 0; | ||
300 | } | ||
301 | |||
235 | const struct export_operations btrfs_export_ops = { | 302 | const struct export_operations btrfs_export_ops = { |
236 | .encode_fh = btrfs_encode_fh, | 303 | .encode_fh = btrfs_encode_fh, |
237 | .fh_to_dentry = btrfs_fh_to_dentry, | 304 | .fh_to_dentry = btrfs_fh_to_dentry, |
238 | .fh_to_parent = btrfs_fh_to_parent, | 305 | .fh_to_parent = btrfs_fh_to_parent, |
239 | .get_parent = btrfs_get_parent, | 306 | .get_parent = btrfs_get_parent, |
307 | .get_name = btrfs_get_name, | ||
240 | }; | 308 | }; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 432a2da4641e..b55269340cec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/sort.h> | 22 | #include <linux/sort.h> |
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/slab.h> | ||
25 | #include "compat.h" | 26 | #include "compat.h" |
26 | #include "hash.h" | 27 | #include "hash.h" |
27 | #include "ctree.h" | 28 | #include "ctree.h" |
@@ -34,10 +35,9 @@ | |||
34 | 35 | ||
35 | static int update_block_group(struct btrfs_trans_handle *trans, | 36 | static int update_block_group(struct btrfs_trans_handle *trans, |
36 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
37 | u64 bytenr, u64 num_bytes, int alloc, | 38 | u64 bytenr, u64 num_bytes, int alloc); |
38 | int mark_free); | 39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, |
39 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 40 | u64 num_bytes, int reserve, int sinfo); |
40 | u64 num_bytes, int reserve); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -60,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
61 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
62 | u64 flags, int force); | 62 | u64 flags, int force); |
63 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
64 | struct btrfs_root *root, | ||
65 | struct btrfs_path *path, | ||
66 | u64 bytenr, u64 num_bytes, | ||
67 | int is_data, int reserved, | ||
68 | struct extent_buffer **must_clean); | ||
69 | static int find_next_key(struct btrfs_path *path, int level, | 63 | static int find_next_key(struct btrfs_path *path, int level, |
70 | struct btrfs_key *key); | 64 | struct btrfs_key *key); |
71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 65 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
@@ -90,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | |||
90 | 84 | ||
91 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | 85 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
92 | { | 86 | { |
93 | if (atomic_dec_and_test(&cache->count)) | 87 | if (atomic_dec_and_test(&cache->count)) { |
88 | WARN_ON(cache->pinned > 0); | ||
89 | WARN_ON(cache->reserved > 0); | ||
90 | WARN_ON(cache->reserved_pinned > 0); | ||
94 | kfree(cache); | 91 | kfree(cache); |
92 | } | ||
95 | } | 93 | } |
96 | 94 | ||
97 | /* | 95 | /* |
@@ -244,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache) | |||
244 | return NULL; | 242 | return NULL; |
245 | } | 243 | } |
246 | 244 | ||
245 | /* We're loading it the fast way, so we don't have a caching_ctl. */ | ||
246 | if (!cache->caching_ctl) { | ||
247 | spin_unlock(&cache->lock); | ||
248 | return NULL; | ||
249 | } | ||
250 | |||
247 | ctl = cache->caching_ctl; | 251 | ctl = cache->caching_ctl; |
248 | atomic_inc(&ctl->count); | 252 | atomic_inc(&ctl->count); |
249 | spin_unlock(&cache->lock); | 253 | spin_unlock(&cache->lock); |
@@ -318,7 +322,7 @@ static int caching_kthread(void *data) | |||
318 | 322 | ||
319 | exclude_super_stripes(extent_root, block_group); | 323 | exclude_super_stripes(extent_root, block_group); |
320 | spin_lock(&block_group->space_info->lock); | 324 | spin_lock(&block_group->space_info->lock); |
321 | block_group->space_info->bytes_super += block_group->bytes_super; | 325 | block_group->space_info->bytes_readonly += block_group->bytes_super; |
322 | spin_unlock(&block_group->space_info->lock); | 326 | spin_unlock(&block_group->space_info->lock); |
323 | 327 | ||
324 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 328 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
@@ -423,7 +427,10 @@ err: | |||
423 | return 0; | 427 | return 0; |
424 | } | 428 | } |
425 | 429 | ||
426 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 430 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
431 | struct btrfs_trans_handle *trans, | ||
432 | struct btrfs_root *root, | ||
433 | int load_cache_only) | ||
427 | { | 434 | { |
428 | struct btrfs_fs_info *fs_info = cache->fs_info; | 435 | struct btrfs_fs_info *fs_info = cache->fs_info; |
429 | struct btrfs_caching_control *caching_ctl; | 436 | struct btrfs_caching_control *caching_ctl; |
@@ -434,6 +441,39 @@ static int cache_block_group(struct btrfs_block_group_cache *cache) | |||
434 | if (cache->cached != BTRFS_CACHE_NO) | 441 | if (cache->cached != BTRFS_CACHE_NO) |
435 | return 0; | 442 | return 0; |
436 | 443 | ||
444 | /* | ||
445 | * We can't do the read from on-disk cache during a commit since we need | ||
446 | * to have the normal tree locking. Also if we are currently trying to | ||
447 | * allocate blocks for the tree root we can't do the fast caching since | ||
448 | * we likely hold important locks. | ||
449 | */ | ||
450 | if (!trans->transaction->in_commit && | ||
451 | (root && root != root->fs_info->tree_root)) { | ||
452 | spin_lock(&cache->lock); | ||
453 | if (cache->cached != BTRFS_CACHE_NO) { | ||
454 | spin_unlock(&cache->lock); | ||
455 | return 0; | ||
456 | } | ||
457 | cache->cached = BTRFS_CACHE_STARTED; | ||
458 | spin_unlock(&cache->lock); | ||
459 | |||
460 | ret = load_free_space_cache(fs_info, cache); | ||
461 | |||
462 | spin_lock(&cache->lock); | ||
463 | if (ret == 1) { | ||
464 | cache->cached = BTRFS_CACHE_FINISHED; | ||
465 | cache->last_byte_to_unpin = (u64)-1; | ||
466 | } else { | ||
467 | cache->cached = BTRFS_CACHE_NO; | ||
468 | } | ||
469 | spin_unlock(&cache->lock); | ||
470 | if (ret == 1) | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | if (load_cache_only) | ||
475 | return 0; | ||
476 | |||
437 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | 477 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); |
438 | BUG_ON(!caching_ctl); | 478 | BUG_ON(!caching_ctl); |
439 | 479 | ||
@@ -506,9 +546,12 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
506 | struct list_head *head = &info->space_info; | 546 | struct list_head *head = &info->space_info; |
507 | struct btrfs_space_info *found; | 547 | struct btrfs_space_info *found; |
508 | 548 | ||
549 | flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | | ||
550 | BTRFS_BLOCK_GROUP_METADATA; | ||
551 | |||
509 | rcu_read_lock(); | 552 | rcu_read_lock(); |
510 | list_for_each_entry_rcu(found, head, list) { | 553 | list_for_each_entry_rcu(found, head, list) { |
511 | if (found->flags == flags) { | 554 | if (found->flags & flags) { |
512 | rcu_read_unlock(); | 555 | rcu_read_unlock(); |
513 | return found; | 556 | return found; |
514 | } | 557 | } |
@@ -541,6 +584,15 @@ static u64 div_factor(u64 num, int factor) | |||
541 | return num; | 584 | return num; |
542 | } | 585 | } |
543 | 586 | ||
587 | static u64 div_factor_fine(u64 num, int factor) | ||
588 | { | ||
589 | if (factor == 100) | ||
590 | return num; | ||
591 | num *= factor; | ||
592 | do_div(num, 100); | ||
593 | return num; | ||
594 | } | ||
595 | |||
544 | u64 btrfs_find_block_group(struct btrfs_root *root, | 596 | u64 btrfs_find_block_group(struct btrfs_root *root, |
545 | u64 search_start, u64 search_hint, int owner) | 597 | u64 search_start, u64 search_hint, int owner) |
546 | { | 598 | { |
@@ -609,6 +661,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
609 | } | 661 | } |
610 | 662 | ||
611 | /* | 663 | /* |
664 | * helper function to lookup reference count and flags of extent. | ||
665 | * | ||
666 | * the head node for delayed ref is used to store the sum of all the | ||
667 | * reference count modifications queued up in the rbtree. the head | ||
668 | * node may also store the extent flags to set. This way you can check | ||
669 | * to see what the reference count and extent flags would be if all of | ||
670 | * the delayed refs are not processed. | ||
671 | */ | ||
672 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
673 | struct btrfs_root *root, u64 bytenr, | ||
674 | u64 num_bytes, u64 *refs, u64 *flags) | ||
675 | { | ||
676 | struct btrfs_delayed_ref_head *head; | ||
677 | struct btrfs_delayed_ref_root *delayed_refs; | ||
678 | struct btrfs_path *path; | ||
679 | struct btrfs_extent_item *ei; | ||
680 | struct extent_buffer *leaf; | ||
681 | struct btrfs_key key; | ||
682 | u32 item_size; | ||
683 | u64 num_refs; | ||
684 | u64 extent_flags; | ||
685 | int ret; | ||
686 | |||
687 | path = btrfs_alloc_path(); | ||
688 | if (!path) | ||
689 | return -ENOMEM; | ||
690 | |||
691 | key.objectid = bytenr; | ||
692 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
693 | key.offset = num_bytes; | ||
694 | if (!trans) { | ||
695 | path->skip_locking = 1; | ||
696 | path->search_commit_root = 1; | ||
697 | } | ||
698 | again: | ||
699 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
700 | &key, path, 0, 0); | ||
701 | if (ret < 0) | ||
702 | goto out_free; | ||
703 | |||
704 | if (ret == 0) { | ||
705 | leaf = path->nodes[0]; | ||
706 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
707 | if (item_size >= sizeof(*ei)) { | ||
708 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
709 | struct btrfs_extent_item); | ||
710 | num_refs = btrfs_extent_refs(leaf, ei); | ||
711 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
712 | } else { | ||
713 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
714 | struct btrfs_extent_item_v0 *ei0; | ||
715 | BUG_ON(item_size != sizeof(*ei0)); | ||
716 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
717 | struct btrfs_extent_item_v0); | ||
718 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
719 | /* FIXME: this isn't correct for data */ | ||
720 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
721 | #else | ||
722 | BUG(); | ||
723 | #endif | ||
724 | } | ||
725 | BUG_ON(num_refs == 0); | ||
726 | } else { | ||
727 | num_refs = 0; | ||
728 | extent_flags = 0; | ||
729 | ret = 0; | ||
730 | } | ||
731 | |||
732 | if (!trans) | ||
733 | goto out; | ||
734 | |||
735 | delayed_refs = &trans->transaction->delayed_refs; | ||
736 | spin_lock(&delayed_refs->lock); | ||
737 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
738 | if (head) { | ||
739 | if (!mutex_trylock(&head->mutex)) { | ||
740 | atomic_inc(&head->node.refs); | ||
741 | spin_unlock(&delayed_refs->lock); | ||
742 | |||
743 | btrfs_release_path(root->fs_info->extent_root, path); | ||
744 | |||
745 | mutex_lock(&head->mutex); | ||
746 | mutex_unlock(&head->mutex); | ||
747 | btrfs_put_delayed_ref(&head->node); | ||
748 | goto again; | ||
749 | } | ||
750 | if (head->extent_op && head->extent_op->update_flags) | ||
751 | extent_flags |= head->extent_op->flags_to_set; | ||
752 | else | ||
753 | BUG_ON(num_refs == 0); | ||
754 | |||
755 | num_refs += head->node.ref_mod; | ||
756 | mutex_unlock(&head->mutex); | ||
757 | } | ||
758 | spin_unlock(&delayed_refs->lock); | ||
759 | out: | ||
760 | WARN_ON(num_refs == 0); | ||
761 | if (refs) | ||
762 | *refs = num_refs; | ||
763 | if (flags) | ||
764 | *flags = extent_flags; | ||
765 | out_free: | ||
766 | btrfs_free_path(path); | ||
767 | return ret; | ||
768 | } | ||
769 | |||
770 | /* | ||
612 | * Back reference rules. Back refs have three main goals: | 771 | * Back reference rules. Back refs have three main goals: |
613 | * | 772 | * |
614 | * 1) differentiate between all holders of references to an extent so that | 773 | * 1) differentiate between all holders of references to an extent so that |
@@ -1587,8 +1746,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1587 | static void btrfs_issue_discard(struct block_device *bdev, | 1746 | static void btrfs_issue_discard(struct block_device *bdev, |
1588 | u64 start, u64 len) | 1747 | u64 start, u64 len) |
1589 | { | 1748 | { |
1590 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1749 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); |
1591 | DISCARD_FL_BARRIER); | ||
1592 | } | 1750 | } |
1593 | 1751 | ||
1594 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1752 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
@@ -1870,7 +2028,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
1870 | return ret; | 2028 | return ret; |
1871 | } | 2029 | } |
1872 | 2030 | ||
1873 | |||
1874 | /* helper function to actually process a single delayed ref entry */ | 2031 | /* helper function to actually process a single delayed ref entry */ |
1875 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | 2032 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, |
1876 | struct btrfs_root *root, | 2033 | struct btrfs_root *root, |
@@ -1890,32 +2047,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1890 | BUG_ON(extent_op); | 2047 | BUG_ON(extent_op); |
1891 | head = btrfs_delayed_node_to_head(node); | 2048 | head = btrfs_delayed_node_to_head(node); |
1892 | if (insert_reserved) { | 2049 | if (insert_reserved) { |
1893 | int mark_free = 0; | 2050 | btrfs_pin_extent(root, node->bytenr, |
1894 | struct extent_buffer *must_clean = NULL; | 2051 | node->num_bytes, 1); |
1895 | |||
1896 | ret = pin_down_bytes(trans, root, NULL, | ||
1897 | node->bytenr, node->num_bytes, | ||
1898 | head->is_data, 1, &must_clean); | ||
1899 | if (ret > 0) | ||
1900 | mark_free = 1; | ||
1901 | |||
1902 | if (must_clean) { | ||
1903 | clean_tree_block(NULL, root, must_clean); | ||
1904 | btrfs_tree_unlock(must_clean); | ||
1905 | free_extent_buffer(must_clean); | ||
1906 | } | ||
1907 | if (head->is_data) { | 2052 | if (head->is_data) { |
1908 | ret = btrfs_del_csums(trans, root, | 2053 | ret = btrfs_del_csums(trans, root, |
1909 | node->bytenr, | 2054 | node->bytenr, |
1910 | node->num_bytes); | 2055 | node->num_bytes); |
1911 | BUG_ON(ret); | 2056 | BUG_ON(ret); |
1912 | } | 2057 | } |
1913 | if (mark_free) { | ||
1914 | ret = btrfs_free_reserved_extent(root, | ||
1915 | node->bytenr, | ||
1916 | node->num_bytes); | ||
1917 | BUG_ON(ret); | ||
1918 | } | ||
1919 | } | 2058 | } |
1920 | mutex_unlock(&head->mutex); | 2059 | mutex_unlock(&head->mutex); |
1921 | return 0; | 2060 | return 0; |
@@ -2346,6 +2485,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | |||
2346 | ret = 0; | 2485 | ret = 0; |
2347 | out: | 2486 | out: |
2348 | btrfs_free_path(path); | 2487 | btrfs_free_path(path); |
2488 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
2489 | WARN_ON(ret > 0); | ||
2349 | return ret; | 2490 | return ret; |
2350 | } | 2491 | } |
2351 | 2492 | ||
@@ -2597,6 +2738,111 @@ next_block_group(struct btrfs_root *root, | |||
2597 | return cache; | 2738 | return cache; |
2598 | } | 2739 | } |
2599 | 2740 | ||
2741 | static int cache_save_setup(struct btrfs_block_group_cache *block_group, | ||
2742 | struct btrfs_trans_handle *trans, | ||
2743 | struct btrfs_path *path) | ||
2744 | { | ||
2745 | struct btrfs_root *root = block_group->fs_info->tree_root; | ||
2746 | struct inode *inode = NULL; | ||
2747 | u64 alloc_hint = 0; | ||
2748 | int dcs = BTRFS_DC_ERROR; | ||
2749 | int num_pages = 0; | ||
2750 | int retries = 0; | ||
2751 | int ret = 0; | ||
2752 | |||
2753 | /* | ||
2754 | * If this block group is smaller than 100 megs don't bother caching the | ||
2755 | * block group. | ||
2756 | */ | ||
2757 | if (block_group->key.offset < (100 * 1024 * 1024)) { | ||
2758 | spin_lock(&block_group->lock); | ||
2759 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
2760 | spin_unlock(&block_group->lock); | ||
2761 | return 0; | ||
2762 | } | ||
2763 | |||
2764 | again: | ||
2765 | inode = lookup_free_space_inode(root, block_group, path); | ||
2766 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | ||
2767 | ret = PTR_ERR(inode); | ||
2768 | btrfs_release_path(root, path); | ||
2769 | goto out; | ||
2770 | } | ||
2771 | |||
2772 | if (IS_ERR(inode)) { | ||
2773 | BUG_ON(retries); | ||
2774 | retries++; | ||
2775 | |||
2776 | if (block_group->ro) | ||
2777 | goto out_free; | ||
2778 | |||
2779 | ret = create_free_space_inode(root, trans, block_group, path); | ||
2780 | if (ret) | ||
2781 | goto out_free; | ||
2782 | goto again; | ||
2783 | } | ||
2784 | |||
2785 | /* | ||
2786 | * We want to set the generation to 0, that way if anything goes wrong | ||
2787 | * from here on out we know not to trust this cache when we load up next | ||
2788 | * time. | ||
2789 | */ | ||
2790 | BTRFS_I(inode)->generation = 0; | ||
2791 | ret = btrfs_update_inode(trans, root, inode); | ||
2792 | WARN_ON(ret); | ||
2793 | |||
2794 | if (i_size_read(inode) > 0) { | ||
2795 | ret = btrfs_truncate_free_space_cache(root, trans, path, | ||
2796 | inode); | ||
2797 | if (ret) | ||
2798 | goto out_put; | ||
2799 | } | ||
2800 | |||
2801 | spin_lock(&block_group->lock); | ||
2802 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | ||
2803 | /* We're not cached, don't bother trying to write stuff out */ | ||
2804 | dcs = BTRFS_DC_WRITTEN; | ||
2805 | spin_unlock(&block_group->lock); | ||
2806 | goto out_put; | ||
2807 | } | ||
2808 | spin_unlock(&block_group->lock); | ||
2809 | |||
2810 | num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); | ||
2811 | if (!num_pages) | ||
2812 | num_pages = 1; | ||
2813 | |||
2814 | /* | ||
2815 | * Just to make absolutely sure we have enough space, we're going to | ||
2816 | * preallocate 12 pages worth of space for each block group. In | ||
2817 | * practice we ought to use at most 8, but we need extra space so we can | ||
2818 | * add our header and have a terminator between the extents and the | ||
2819 | * bitmaps. | ||
2820 | */ | ||
2821 | num_pages *= 16; | ||
2822 | num_pages *= PAGE_CACHE_SIZE; | ||
2823 | |||
2824 | ret = btrfs_check_data_free_space(inode, num_pages); | ||
2825 | if (ret) | ||
2826 | goto out_put; | ||
2827 | |||
2828 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, | ||
2829 | num_pages, num_pages, | ||
2830 | &alloc_hint); | ||
2831 | if (!ret) | ||
2832 | dcs = BTRFS_DC_SETUP; | ||
2833 | btrfs_free_reserved_data_space(inode, num_pages); | ||
2834 | out_put: | ||
2835 | iput(inode); | ||
2836 | out_free: | ||
2837 | btrfs_release_path(root, path); | ||
2838 | out: | ||
2839 | spin_lock(&block_group->lock); | ||
2840 | block_group->disk_cache_state = dcs; | ||
2841 | spin_unlock(&block_group->lock); | ||
2842 | |||
2843 | return ret; | ||
2844 | } | ||
2845 | |||
2600 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 2846 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
2601 | struct btrfs_root *root) | 2847 | struct btrfs_root *root) |
2602 | { | 2848 | { |
@@ -2609,6 +2855,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2609 | if (!path) | 2855 | if (!path) |
2610 | return -ENOMEM; | 2856 | return -ENOMEM; |
2611 | 2857 | ||
2858 | again: | ||
2859 | while (1) { | ||
2860 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
2861 | while (cache) { | ||
2862 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | ||
2863 | break; | ||
2864 | cache = next_block_group(root, cache); | ||
2865 | } | ||
2866 | if (!cache) { | ||
2867 | if (last == 0) | ||
2868 | break; | ||
2869 | last = 0; | ||
2870 | continue; | ||
2871 | } | ||
2872 | err = cache_save_setup(cache, trans, path); | ||
2873 | last = cache->key.objectid + cache->key.offset; | ||
2874 | btrfs_put_block_group(cache); | ||
2875 | } | ||
2876 | |||
2612 | while (1) { | 2877 | while (1) { |
2613 | if (last == 0) { | 2878 | if (last == 0) { |
2614 | err = btrfs_run_delayed_refs(trans, root, | 2879 | err = btrfs_run_delayed_refs(trans, root, |
@@ -2618,6 +2883,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2618 | 2883 | ||
2619 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | 2884 | cache = btrfs_lookup_first_block_group(root->fs_info, last); |
2620 | while (cache) { | 2885 | while (cache) { |
2886 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) { | ||
2887 | btrfs_put_block_group(cache); | ||
2888 | goto again; | ||
2889 | } | ||
2890 | |||
2621 | if (cache->dirty) | 2891 | if (cache->dirty) |
2622 | break; | 2892 | break; |
2623 | cache = next_block_group(root, cache); | 2893 | cache = next_block_group(root, cache); |
@@ -2629,6 +2899,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2629 | continue; | 2899 | continue; |
2630 | } | 2900 | } |
2631 | 2901 | ||
2902 | if (cache->disk_cache_state == BTRFS_DC_SETUP) | ||
2903 | cache->disk_cache_state = BTRFS_DC_NEED_WRITE; | ||
2632 | cache->dirty = 0; | 2904 | cache->dirty = 0; |
2633 | last = cache->key.objectid + cache->key.offset; | 2905 | last = cache->key.objectid + cache->key.offset; |
2634 | 2906 | ||
@@ -2637,6 +2909,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2637 | btrfs_put_block_group(cache); | 2909 | btrfs_put_block_group(cache); |
2638 | } | 2910 | } |
2639 | 2911 | ||
2912 | while (1) { | ||
2913 | /* | ||
2914 | * I don't think this is needed since we're just marking our | ||
2915 | * preallocated extent as written, but just in case it can't | ||
2916 | * hurt. | ||
2917 | */ | ||
2918 | if (last == 0) { | ||
2919 | err = btrfs_run_delayed_refs(trans, root, | ||
2920 | (unsigned long)-1); | ||
2921 | BUG_ON(err); | ||
2922 | } | ||
2923 | |||
2924 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
2925 | while (cache) { | ||
2926 | /* | ||
2927 | * Really this shouldn't happen, but it could if we | ||
2928 | * couldn't write the entire preallocated extent and | ||
2929 | * splitting the extent resulted in a new block. | ||
2930 | */ | ||
2931 | if (cache->dirty) { | ||
2932 | btrfs_put_block_group(cache); | ||
2933 | goto again; | ||
2934 | } | ||
2935 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
2936 | break; | ||
2937 | cache = next_block_group(root, cache); | ||
2938 | } | ||
2939 | if (!cache) { | ||
2940 | if (last == 0) | ||
2941 | break; | ||
2942 | last = 0; | ||
2943 | continue; | ||
2944 | } | ||
2945 | |||
2946 | btrfs_write_out_cache(root, trans, cache, path); | ||
2947 | |||
2948 | /* | ||
2949 | * If we didn't have an error then the cache state is still | ||
2950 | * NEED_WRITE, so we can set it to WRITTEN. | ||
2951 | */ | ||
2952 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
2953 | cache->disk_cache_state = BTRFS_DC_WRITTEN; | ||
2954 | last = cache->key.objectid + cache->key.offset; | ||
2955 | btrfs_put_block_group(cache); | ||
2956 | } | ||
2957 | |||
2640 | btrfs_free_path(path); | 2958 | btrfs_free_path(path); |
2641 | return 0; | 2959 | return 0; |
2642 | } | 2960 | } |
@@ -2659,12 +2977,22 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2659 | struct btrfs_space_info **space_info) | 2977 | struct btrfs_space_info **space_info) |
2660 | { | 2978 | { |
2661 | struct btrfs_space_info *found; | 2979 | struct btrfs_space_info *found; |
2980 | int i; | ||
2981 | int factor; | ||
2982 | |||
2983 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | ||
2984 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2985 | factor = 2; | ||
2986 | else | ||
2987 | factor = 1; | ||
2662 | 2988 | ||
2663 | found = __find_space_info(info, flags); | 2989 | found = __find_space_info(info, flags); |
2664 | if (found) { | 2990 | if (found) { |
2665 | spin_lock(&found->lock); | 2991 | spin_lock(&found->lock); |
2666 | found->total_bytes += total_bytes; | 2992 | found->total_bytes += total_bytes; |
2993 | found->disk_total += total_bytes * factor; | ||
2667 | found->bytes_used += bytes_used; | 2994 | found->bytes_used += bytes_used; |
2995 | found->disk_used += bytes_used * factor; | ||
2668 | found->full = 0; | 2996 | found->full = 0; |
2669 | spin_unlock(&found->lock); | 2997 | spin_unlock(&found->lock); |
2670 | *space_info = found; | 2998 | *space_info = found; |
@@ -2674,16 +3002,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2674 | if (!found) | 3002 | if (!found) |
2675 | return -ENOMEM; | 3003 | return -ENOMEM; |
2676 | 3004 | ||
2677 | INIT_LIST_HEAD(&found->block_groups); | 3005 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
3006 | INIT_LIST_HEAD(&found->block_groups[i]); | ||
2678 | init_rwsem(&found->groups_sem); | 3007 | init_rwsem(&found->groups_sem); |
2679 | spin_lock_init(&found->lock); | 3008 | spin_lock_init(&found->lock); |
2680 | found->flags = flags; | 3009 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | |
3010 | BTRFS_BLOCK_GROUP_SYSTEM | | ||
3011 | BTRFS_BLOCK_GROUP_METADATA); | ||
2681 | found->total_bytes = total_bytes; | 3012 | found->total_bytes = total_bytes; |
3013 | found->disk_total = total_bytes * factor; | ||
2682 | found->bytes_used = bytes_used; | 3014 | found->bytes_used = bytes_used; |
3015 | found->disk_used = bytes_used * factor; | ||
2683 | found->bytes_pinned = 0; | 3016 | found->bytes_pinned = 0; |
2684 | found->bytes_reserved = 0; | 3017 | found->bytes_reserved = 0; |
2685 | found->bytes_readonly = 0; | 3018 | found->bytes_readonly = 0; |
2686 | found->bytes_delalloc = 0; | 3019 | found->bytes_may_use = 0; |
2687 | found->full = 0; | 3020 | found->full = 0; |
2688 | found->force_alloc = 0; | 3021 | found->force_alloc = 0; |
2689 | *space_info = found; | 3022 | *space_info = found; |
@@ -2708,22 +3041,15 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
2708 | } | 3041 | } |
2709 | } | 3042 | } |
2710 | 3043 | ||
2711 | static void set_block_group_readonly(struct btrfs_block_group_cache *cache) | ||
2712 | { | ||
2713 | spin_lock(&cache->space_info->lock); | ||
2714 | spin_lock(&cache->lock); | ||
2715 | if (!cache->ro) { | ||
2716 | cache->space_info->bytes_readonly += cache->key.offset - | ||
2717 | btrfs_block_group_used(&cache->item); | ||
2718 | cache->ro = 1; | ||
2719 | } | ||
2720 | spin_unlock(&cache->lock); | ||
2721 | spin_unlock(&cache->space_info->lock); | ||
2722 | } | ||
2723 | |||
2724 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 3044 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
2725 | { | 3045 | { |
2726 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 3046 | /* |
3047 | * we add in the count of missing devices because we want | ||
3048 | * to make sure that any RAID levels on a degraded FS | ||
3049 | * continue to be honored. | ||
3050 | */ | ||
3051 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | ||
3052 | root->fs_info->fs_devices->missing_devices; | ||
2727 | 3053 | ||
2728 | if (num_devices == 1) | 3054 | if (num_devices == 1) |
2729 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 3055 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); |
@@ -2749,718 +3075,995 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
2749 | return flags; | 3075 | return flags; |
2750 | } | 3076 | } |
2751 | 3077 | ||
2752 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | 3078 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
2753 | { | 3079 | { |
2754 | struct btrfs_fs_info *info = root->fs_info; | 3080 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
2755 | u64 alloc_profile; | 3081 | flags |= root->fs_info->avail_data_alloc_bits & |
2756 | 3082 | root->fs_info->data_alloc_profile; | |
2757 | if (data) { | 3083 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
2758 | alloc_profile = info->avail_data_alloc_bits & | 3084 | flags |= root->fs_info->avail_system_alloc_bits & |
2759 | info->data_alloc_profile; | 3085 | root->fs_info->system_alloc_profile; |
2760 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | 3086 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) |
2761 | } else if (root == root->fs_info->chunk_root) { | 3087 | flags |= root->fs_info->avail_metadata_alloc_bits & |
2762 | alloc_profile = info->avail_system_alloc_bits & | 3088 | root->fs_info->metadata_alloc_profile; |
2763 | info->system_alloc_profile; | 3089 | return btrfs_reduce_alloc_profile(root, flags); |
2764 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | 3090 | } |
2765 | } else { | 3091 | |
2766 | alloc_profile = info->avail_metadata_alloc_bits & | 3092 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
2767 | info->metadata_alloc_profile; | 3093 | { |
2768 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | 3094 | u64 flags; |
2769 | } | 3095 | |
3096 | if (data) | ||
3097 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
3098 | else if (root == root->fs_info->chunk_root) | ||
3099 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
3100 | else | ||
3101 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
2770 | 3102 | ||
2771 | return btrfs_reduce_alloc_profile(root, data); | 3103 | return get_alloc_profile(root, flags); |
2772 | } | 3104 | } |
2773 | 3105 | ||
2774 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | 3106 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) |
2775 | { | 3107 | { |
2776 | u64 alloc_target; | ||
2777 | |||
2778 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
2779 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | 3108 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, |
2780 | alloc_target); | 3109 | BTRFS_BLOCK_GROUP_DATA); |
2781 | } | 3110 | } |
2782 | 3111 | ||
2783 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | 3112 | /* |
3113 | * This will check the space that the inode allocates from to make sure we have | ||
3114 | * enough space for bytes. | ||
3115 | */ | ||
3116 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | ||
2784 | { | 3117 | { |
2785 | u64 num_bytes; | 3118 | struct btrfs_space_info *data_sinfo; |
2786 | int level; | 3119 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3120 | u64 used; | ||
3121 | int ret = 0, committed = 0, alloc_chunk = 1; | ||
2787 | 3122 | ||
2788 | level = BTRFS_MAX_LEVEL - 2; | 3123 | /* make sure bytes are sectorsize aligned */ |
2789 | /* | 3124 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2790 | * NOTE: these calculations are absolutely the worst possible case. | ||
2791 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2792 | * that the tree has grown to its maximum level size. | ||
2793 | */ | ||
2794 | 3125 | ||
2795 | /* | 3126 | if (root == root->fs_info->tree_root) { |
2796 | * for every item we insert we could insert both an extent item and a | 3127 | alloc_chunk = 0; |
2797 | * extent ref item. Then for ever item we insert, we will need to cow | 3128 | committed = 1; |
2798 | * both the original leaf, plus the leaf to the left and right of it. | 3129 | } |
2799 | * | ||
2800 | * Unless we are talking about the extent root, then we just want the | ||
2801 | * number of items * 2, since we just need the extent item plus its ref. | ||
2802 | */ | ||
2803 | if (root == root->fs_info->extent_root) | ||
2804 | num_bytes = num_items * 2; | ||
2805 | else | ||
2806 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2807 | 3130 | ||
2808 | /* | 3131 | data_sinfo = BTRFS_I(inode)->space_info; |
2809 | * num_bytes is total number of leaves we could need times the leaf | 3132 | if (!data_sinfo) |
2810 | * size, and then for every leaf we could end up cow'ing 2 nodes per | 3133 | goto alloc; |
2811 | * level, down to the leaf level. | ||
2812 | */ | ||
2813 | num_bytes = (num_bytes * root->leafsize) + | ||
2814 | (num_bytes * (level * 2)) * root->nodesize; | ||
2815 | 3134 | ||
2816 | return num_bytes; | 3135 | again: |
2817 | } | 3136 | /* make sure we have enough space to handle the data first */ |
3137 | spin_lock(&data_sinfo->lock); | ||
3138 | used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + | ||
3139 | data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + | ||
3140 | data_sinfo->bytes_may_use; | ||
2818 | 3141 | ||
2819 | /* | 3142 | if (used + bytes > data_sinfo->total_bytes) { |
2820 | * Unreserve metadata space for delalloc. If we have less reserved credits than | 3143 | struct btrfs_trans_handle *trans; |
2821 | * we have extents, this function does nothing. | ||
2822 | */ | ||
2823 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2824 | struct inode *inode, int num_items) | ||
2825 | { | ||
2826 | struct btrfs_fs_info *info = root->fs_info; | ||
2827 | struct btrfs_space_info *meta_sinfo; | ||
2828 | u64 num_bytes; | ||
2829 | u64 alloc_target; | ||
2830 | bool bug = false; | ||
2831 | 3144 | ||
2832 | /* get the space info for where the metadata will live */ | 3145 | /* |
2833 | alloc_target = btrfs_get_alloc_profile(root, 0); | 3146 | * if we don't have enough free bytes in this space then we need |
2834 | meta_sinfo = __find_space_info(info, alloc_target); | 3147 | * to alloc a new chunk. |
3148 | */ | ||
3149 | if (!data_sinfo->full && alloc_chunk) { | ||
3150 | u64 alloc_target; | ||
2835 | 3151 | ||
2836 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | 3152 | data_sinfo->force_alloc = 1; |
2837 | num_items); | 3153 | spin_unlock(&data_sinfo->lock); |
3154 | alloc: | ||
3155 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
3156 | trans = btrfs_join_transaction(root, 1); | ||
3157 | if (IS_ERR(trans)) | ||
3158 | return PTR_ERR(trans); | ||
2838 | 3159 | ||
2839 | spin_lock(&meta_sinfo->lock); | 3160 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
2840 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 3161 | bytes + 2 * 1024 * 1024, |
2841 | if (BTRFS_I(inode)->reserved_extents <= | 3162 | alloc_target, 0); |
2842 | BTRFS_I(inode)->outstanding_extents) { | 3163 | btrfs_end_transaction(trans, root); |
2843 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 3164 | if (ret < 0) { |
2844 | spin_unlock(&meta_sinfo->lock); | 3165 | if (ret != -ENOSPC) |
2845 | return 0; | 3166 | return ret; |
2846 | } | 3167 | else |
2847 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 3168 | goto commit_trans; |
3169 | } | ||
2848 | 3170 | ||
2849 | BTRFS_I(inode)->reserved_extents--; | 3171 | if (!data_sinfo) { |
2850 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | 3172 | btrfs_set_inode_space_info(root, inode); |
3173 | data_sinfo = BTRFS_I(inode)->space_info; | ||
3174 | } | ||
3175 | goto again; | ||
3176 | } | ||
3177 | spin_unlock(&data_sinfo->lock); | ||
2851 | 3178 | ||
2852 | if (meta_sinfo->bytes_delalloc < num_bytes) { | 3179 | /* commit the current transaction and try again */ |
2853 | bug = true; | 3180 | commit_trans: |
2854 | meta_sinfo->bytes_delalloc = 0; | 3181 | if (!committed && !root->fs_info->open_ioctl_trans) { |
2855 | } else { | 3182 | committed = 1; |
2856 | meta_sinfo->bytes_delalloc -= num_bytes; | 3183 | trans = btrfs_join_transaction(root, 1); |
2857 | } | 3184 | if (IS_ERR(trans)) |
2858 | spin_unlock(&meta_sinfo->lock); | 3185 | return PTR_ERR(trans); |
3186 | ret = btrfs_commit_transaction(trans, root); | ||
3187 | if (ret) | ||
3188 | return ret; | ||
3189 | goto again; | ||
3190 | } | ||
2859 | 3191 | ||
2860 | BUG_ON(bug); | 3192 | #if 0 /* I hope we never need this code again, just in case */ |
3193 | printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " | ||
3194 | "%llu bytes_reserved, " "%llu bytes_pinned, " | ||
3195 | "%llu bytes_readonly, %llu may use %llu total\n", | ||
3196 | (unsigned long long)bytes, | ||
3197 | (unsigned long long)data_sinfo->bytes_used, | ||
3198 | (unsigned long long)data_sinfo->bytes_reserved, | ||
3199 | (unsigned long long)data_sinfo->bytes_pinned, | ||
3200 | (unsigned long long)data_sinfo->bytes_readonly, | ||
3201 | (unsigned long long)data_sinfo->bytes_may_use, | ||
3202 | (unsigned long long)data_sinfo->total_bytes); | ||
3203 | #endif | ||
3204 | return -ENOSPC; | ||
3205 | } | ||
3206 | data_sinfo->bytes_may_use += bytes; | ||
3207 | BTRFS_I(inode)->reserved_bytes += bytes; | ||
3208 | spin_unlock(&data_sinfo->lock); | ||
2861 | 3209 | ||
2862 | return 0; | 3210 | return 0; |
2863 | } | 3211 | } |
2864 | 3212 | ||
2865 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | 3213 | /* |
3214 | * called when we are clearing an delalloc extent from the | ||
3215 | * inode's io_tree or there was an error for whatever reason | ||
3216 | * after calling btrfs_check_data_free_space | ||
3217 | */ | ||
3218 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | ||
2866 | { | 3219 | { |
2867 | u64 thresh; | 3220 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3221 | struct btrfs_space_info *data_sinfo; | ||
2868 | 3222 | ||
2869 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 3223 | /* make sure bytes are sectorsize aligned */ |
2870 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 3224 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2871 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
2872 | meta_sinfo->bytes_may_use; | ||
2873 | 3225 | ||
2874 | thresh = meta_sinfo->total_bytes - thresh; | 3226 | data_sinfo = BTRFS_I(inode)->space_info; |
2875 | thresh *= 80; | 3227 | spin_lock(&data_sinfo->lock); |
2876 | do_div(thresh, 100); | 3228 | data_sinfo->bytes_may_use -= bytes; |
2877 | if (thresh <= meta_sinfo->bytes_delalloc) | 3229 | BTRFS_I(inode)->reserved_bytes -= bytes; |
2878 | meta_sinfo->force_delalloc = 1; | 3230 | spin_unlock(&data_sinfo->lock); |
2879 | else | ||
2880 | meta_sinfo->force_delalloc = 0; | ||
2881 | } | 3231 | } |
2882 | 3232 | ||
2883 | struct async_flush { | 3233 | static void force_metadata_allocation(struct btrfs_fs_info *info) |
2884 | struct btrfs_root *root; | 3234 | { |
2885 | struct btrfs_space_info *info; | 3235 | struct list_head *head = &info->space_info; |
2886 | struct btrfs_work work; | 3236 | struct btrfs_space_info *found; |
2887 | }; | 3237 | |
3238 | rcu_read_lock(); | ||
3239 | list_for_each_entry_rcu(found, head, list) { | ||
3240 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | ||
3241 | found->force_alloc = 1; | ||
3242 | } | ||
3243 | rcu_read_unlock(); | ||
3244 | } | ||
2888 | 3245 | ||
2889 | static noinline void flush_delalloc_async(struct btrfs_work *work) | 3246 | static int should_alloc_chunk(struct btrfs_root *root, |
3247 | struct btrfs_space_info *sinfo, u64 alloc_bytes) | ||
2890 | { | 3248 | { |
2891 | struct async_flush *async; | 3249 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
2892 | struct btrfs_root *root; | 3250 | u64 thresh; |
2893 | struct btrfs_space_info *info; | ||
2894 | 3251 | ||
2895 | async = container_of(work, struct async_flush, work); | 3252 | if (sinfo->bytes_used + sinfo->bytes_reserved + |
2896 | root = async->root; | 3253 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) |
2897 | info = async->info; | 3254 | return 0; |
2898 | 3255 | ||
2899 | btrfs_start_delalloc_inodes(root, 0); | 3256 | if (sinfo->bytes_used + sinfo->bytes_reserved + |
2900 | wake_up(&info->flush_wait); | 3257 | alloc_bytes < div_factor(num_bytes, 8)) |
2901 | btrfs_wait_ordered_extents(root, 0, 0); | 3258 | return 0; |
2902 | 3259 | ||
2903 | spin_lock(&info->lock); | 3260 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); |
2904 | info->flushing = 0; | 3261 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); |
2905 | spin_unlock(&info->lock); | ||
2906 | wake_up(&info->flush_wait); | ||
2907 | 3262 | ||
2908 | kfree(async); | 3263 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) |
3264 | return 0; | ||
3265 | |||
3266 | return 1; | ||
2909 | } | 3267 | } |
2910 | 3268 | ||
2911 | static void wait_on_flush(struct btrfs_space_info *info) | 3269 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
3270 | struct btrfs_root *extent_root, u64 alloc_bytes, | ||
3271 | u64 flags, int force) | ||
2912 | { | 3272 | { |
2913 | DEFINE_WAIT(wait); | 3273 | struct btrfs_space_info *space_info; |
2914 | u64 used; | 3274 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3275 | int ret = 0; | ||
2915 | 3276 | ||
2916 | while (1) { | 3277 | mutex_lock(&fs_info->chunk_mutex); |
2917 | prepare_to_wait(&info->flush_wait, &wait, | ||
2918 | TASK_UNINTERRUPTIBLE); | ||
2919 | spin_lock(&info->lock); | ||
2920 | if (!info->flushing) { | ||
2921 | spin_unlock(&info->lock); | ||
2922 | break; | ||
2923 | } | ||
2924 | 3278 | ||
2925 | used = info->bytes_used + info->bytes_reserved + | 3279 | flags = btrfs_reduce_alloc_profile(extent_root, flags); |
2926 | info->bytes_pinned + info->bytes_readonly + | 3280 | |
2927 | info->bytes_super + info->bytes_root + | 3281 | space_info = __find_space_info(extent_root->fs_info, flags); |
2928 | info->bytes_may_use + info->bytes_delalloc; | 3282 | if (!space_info) { |
2929 | if (used < info->total_bytes) { | 3283 | ret = update_space_info(extent_root->fs_info, flags, |
2930 | spin_unlock(&info->lock); | 3284 | 0, 0, &space_info); |
2931 | break; | 3285 | BUG_ON(ret); |
2932 | } | 3286 | } |
2933 | spin_unlock(&info->lock); | 3287 | BUG_ON(!space_info); |
2934 | schedule(); | 3288 | |
3289 | spin_lock(&space_info->lock); | ||
3290 | if (space_info->force_alloc) | ||
3291 | force = 1; | ||
3292 | if (space_info->full) { | ||
3293 | spin_unlock(&space_info->lock); | ||
3294 | goto out; | ||
3295 | } | ||
3296 | |||
3297 | if (!force && !should_alloc_chunk(extent_root, space_info, | ||
3298 | alloc_bytes)) { | ||
3299 | spin_unlock(&space_info->lock); | ||
3300 | goto out; | ||
3301 | } | ||
3302 | spin_unlock(&space_info->lock); | ||
3303 | |||
3304 | /* | ||
3305 | * If we have mixed data/metadata chunks we want to make sure we keep | ||
3306 | * allocating mixed chunks instead of individual chunks. | ||
3307 | */ | ||
3308 | if (btrfs_mixed_space_info(space_info)) | ||
3309 | flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); | ||
3310 | |||
3311 | /* | ||
3312 | * if we're doing a data chunk, go ahead and make sure that | ||
3313 | * we keep a reasonable number of metadata chunks allocated in the | ||
3314 | * FS as well. | ||
3315 | */ | ||
3316 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { | ||
3317 | fs_info->data_chunk_allocations++; | ||
3318 | if (!(fs_info->data_chunk_allocations % | ||
3319 | fs_info->metadata_ratio)) | ||
3320 | force_metadata_allocation(fs_info); | ||
2935 | } | 3321 | } |
2936 | finish_wait(&info->flush_wait, &wait); | 3322 | |
3323 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | ||
3324 | spin_lock(&space_info->lock); | ||
3325 | if (ret) | ||
3326 | space_info->full = 1; | ||
3327 | else | ||
3328 | ret = 1; | ||
3329 | space_info->force_alloc = 0; | ||
3330 | spin_unlock(&space_info->lock); | ||
3331 | out: | ||
3332 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | ||
3333 | return ret; | ||
2937 | } | 3334 | } |
2938 | 3335 | ||
2939 | static void flush_delalloc(struct btrfs_root *root, | 3336 | /* |
2940 | struct btrfs_space_info *info) | 3337 | * shrink metadata reservation for delalloc |
3338 | */ | ||
3339 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
3340 | struct btrfs_root *root, u64 to_reclaim, int sync) | ||
2941 | { | 3341 | { |
2942 | struct async_flush *async; | 3342 | struct btrfs_block_rsv *block_rsv; |
2943 | bool wait = false; | 3343 | struct btrfs_space_info *space_info; |
3344 | u64 reserved; | ||
3345 | u64 max_reclaim; | ||
3346 | u64 reclaimed = 0; | ||
3347 | int pause = 1; | ||
3348 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | ||
2944 | 3349 | ||
2945 | spin_lock(&info->lock); | 3350 | block_rsv = &root->fs_info->delalloc_block_rsv; |
3351 | space_info = block_rsv->space_info; | ||
2946 | 3352 | ||
2947 | if (!info->flushing) { | 3353 | smp_mb(); |
2948 | info->flushing = 1; | 3354 | reserved = space_info->bytes_reserved; |
2949 | init_waitqueue_head(&info->flush_wait); | ||
2950 | } else { | ||
2951 | wait = true; | ||
2952 | } | ||
2953 | 3355 | ||
2954 | spin_unlock(&info->lock); | 3356 | if (reserved == 0) |
3357 | return 0; | ||
2955 | 3358 | ||
2956 | if (wait) { | 3359 | max_reclaim = min(reserved, to_reclaim); |
2957 | wait_on_flush(info); | ||
2958 | return; | ||
2959 | } | ||
2960 | 3360 | ||
2961 | async = kzalloc(sizeof(*async), GFP_NOFS); | 3361 | while (1) { |
2962 | if (!async) | 3362 | /* have the flusher threads jump in and do some IO */ |
2963 | goto flush; | 3363 | smp_mb(); |
3364 | nr_pages = min_t(unsigned long, nr_pages, | ||
3365 | root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); | ||
3366 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); | ||
3367 | |||
3368 | spin_lock(&space_info->lock); | ||
3369 | if (reserved > space_info->bytes_reserved) | ||
3370 | reclaimed += reserved - space_info->bytes_reserved; | ||
3371 | reserved = space_info->bytes_reserved; | ||
3372 | spin_unlock(&space_info->lock); | ||
2964 | 3373 | ||
2965 | async->root = root; | 3374 | if (reserved == 0 || reclaimed >= max_reclaim) |
2966 | async->info = info; | 3375 | break; |
2967 | async->work.func = flush_delalloc_async; | ||
2968 | 3376 | ||
2969 | btrfs_queue_worker(&root->fs_info->enospc_workers, | 3377 | if (trans && trans->transaction->blocked) |
2970 | &async->work); | 3378 | return -EAGAIN; |
2971 | wait_on_flush(info); | ||
2972 | return; | ||
2973 | 3379 | ||
2974 | flush: | 3380 | __set_current_state(TASK_INTERRUPTIBLE); |
2975 | btrfs_start_delalloc_inodes(root, 0); | 3381 | schedule_timeout(pause); |
2976 | btrfs_wait_ordered_extents(root, 0, 0); | 3382 | pause <<= 1; |
3383 | if (pause > HZ / 10) | ||
3384 | pause = HZ / 10; | ||
2977 | 3385 | ||
2978 | spin_lock(&info->lock); | 3386 | } |
2979 | info->flushing = 0; | 3387 | return reclaimed >= to_reclaim; |
2980 | spin_unlock(&info->lock); | ||
2981 | wake_up(&info->flush_wait); | ||
2982 | } | 3388 | } |
2983 | 3389 | ||
2984 | static int maybe_allocate_chunk(struct btrfs_root *root, | 3390 | /* |
2985 | struct btrfs_space_info *info) | 3391 | * Retries tells us how many times we've called reserve_metadata_bytes. The |
3392 | * idea is if this is the first call (retries == 0) then we will add to our | ||
3393 | * reserved count if we can't make the allocation in order to hold our place | ||
3394 | * while we go and try and free up space. That way for retries > 1 we don't try | ||
3395 | * and add space, we just check to see if the amount of unused space is >= the | ||
3396 | * total space, meaning that our reservation is valid. | ||
3397 | * | ||
3398 | * However if we don't intend to retry this reservation, pass -1 as retries so | ||
3399 | * that it short circuits this logic. | ||
3400 | */ | ||
3401 | static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | ||
3402 | struct btrfs_root *root, | ||
3403 | struct btrfs_block_rsv *block_rsv, | ||
3404 | u64 orig_bytes, int flush) | ||
2986 | { | 3405 | { |
2987 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | 3406 | struct btrfs_space_info *space_info = block_rsv->space_info; |
2988 | struct btrfs_trans_handle *trans; | 3407 | u64 unused; |
2989 | bool wait = false; | 3408 | u64 num_bytes = orig_bytes; |
3409 | int retries = 0; | ||
2990 | int ret = 0; | 3410 | int ret = 0; |
2991 | u64 min_metadata; | 3411 | bool reserved = false; |
2992 | u64 free_space; | 3412 | bool committed = false; |
3413 | |||
3414 | again: | ||
3415 | ret = -ENOSPC; | ||
3416 | if (reserved) | ||
3417 | num_bytes = 0; | ||
3418 | |||
3419 | spin_lock(&space_info->lock); | ||
3420 | unused = space_info->bytes_used + space_info->bytes_reserved + | ||
3421 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
3422 | space_info->bytes_may_use; | ||
2993 | 3423 | ||
2994 | free_space = btrfs_super_total_bytes(disk_super); | ||
2995 | /* | 3424 | /* |
2996 | * we allow the metadata to grow to a max of either 10gb or 5% of the | 3425 | * The idea here is that we've not already over-reserved the block group |
2997 | * space in the volume. | 3426 | * then we can go ahead and save our reservation first and then start |
3427 | * flushing if we need to. Otherwise if we've already overcommitted | ||
3428 | * lets start flushing stuff first and then come back and try to make | ||
3429 | * our reservation. | ||
2998 | */ | 3430 | */ |
2999 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | 3431 | if (unused <= space_info->total_bytes) { |
3000 | div64_u64(free_space * 5, 100)); | 3432 | unused = space_info->total_bytes - unused; |
3001 | if (info->total_bytes >= min_metadata) { | 3433 | if (unused >= num_bytes) { |
3002 | spin_unlock(&info->lock); | 3434 | if (!reserved) |
3003 | return 0; | 3435 | space_info->bytes_reserved += orig_bytes; |
3436 | ret = 0; | ||
3437 | } else { | ||
3438 | /* | ||
3439 | * Ok set num_bytes to orig_bytes since we aren't | ||
3440 | * overocmmitted, this way we only try and reclaim what | ||
3441 | * we need. | ||
3442 | */ | ||
3443 | num_bytes = orig_bytes; | ||
3444 | } | ||
3445 | } else { | ||
3446 | /* | ||
3447 | * Ok we're over committed, set num_bytes to the overcommitted | ||
3448 | * amount plus the amount of bytes that we need for this | ||
3449 | * reservation. | ||
3450 | */ | ||
3451 | num_bytes = unused - space_info->total_bytes + | ||
3452 | (orig_bytes * (retries + 1)); | ||
3004 | } | 3453 | } |
3005 | 3454 | ||
3006 | if (info->full) { | 3455 | /* |
3007 | spin_unlock(&info->lock); | 3456 | * Couldn't make our reservation, save our place so while we're trying |
3008 | return 0; | 3457 | * to reclaim space we can actually use it instead of somebody else |
3458 | * stealing it from us. | ||
3459 | */ | ||
3460 | if (ret && !reserved) { | ||
3461 | space_info->bytes_reserved += orig_bytes; | ||
3462 | reserved = true; | ||
3009 | } | 3463 | } |
3010 | 3464 | ||
3011 | if (!info->allocating_chunk) { | 3465 | spin_unlock(&space_info->lock); |
3012 | info->force_alloc = 1; | ||
3013 | info->allocating_chunk = 1; | ||
3014 | init_waitqueue_head(&info->allocate_wait); | ||
3015 | } else { | ||
3016 | wait = true; | ||
3017 | } | ||
3018 | 3466 | ||
3019 | spin_unlock(&info->lock); | 3467 | if (!ret) |
3468 | return 0; | ||
3020 | 3469 | ||
3021 | if (wait) { | 3470 | if (!flush) |
3022 | wait_event(info->allocate_wait, | 3471 | goto out; |
3023 | !info->allocating_chunk); | ||
3024 | return 1; | ||
3025 | } | ||
3026 | 3472 | ||
3027 | trans = btrfs_start_transaction(root, 1); | 3473 | /* |
3028 | if (!trans) { | 3474 | * We do synchronous shrinking since we don't actually unreserve |
3029 | ret = -ENOMEM; | 3475 | * metadata until after the IO is completed. |
3476 | */ | ||
3477 | ret = shrink_delalloc(trans, root, num_bytes, 1); | ||
3478 | if (ret > 0) | ||
3479 | return 0; | ||
3480 | else if (ret < 0) | ||
3030 | goto out; | 3481 | goto out; |
3482 | |||
3483 | /* | ||
3484 | * So if we were overcommitted it's possible that somebody else flushed | ||
3485 | * out enough space and we simply didn't have enough space to reclaim, | ||
3486 | * so go back around and try again. | ||
3487 | */ | ||
3488 | if (retries < 2) { | ||
3489 | retries++; | ||
3490 | goto again; | ||
3031 | } | 3491 | } |
3032 | 3492 | ||
3033 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3493 | spin_lock(&space_info->lock); |
3034 | 4096 + 2 * 1024 * 1024, | 3494 | /* |
3035 | info->flags, 0); | 3495 | * Not enough space to be reclaimed, don't bother committing the |
3036 | btrfs_end_transaction(trans, root); | 3496 | * transaction. |
3497 | */ | ||
3498 | if (space_info->bytes_pinned < orig_bytes) | ||
3499 | ret = -ENOSPC; | ||
3500 | spin_unlock(&space_info->lock); | ||
3037 | if (ret) | 3501 | if (ret) |
3038 | goto out; | 3502 | goto out; |
3503 | |||
3504 | ret = -EAGAIN; | ||
3505 | if (trans || committed) | ||
3506 | goto out; | ||
3507 | |||
3508 | ret = -ENOSPC; | ||
3509 | trans = btrfs_join_transaction(root, 1); | ||
3510 | if (IS_ERR(trans)) | ||
3511 | goto out; | ||
3512 | ret = btrfs_commit_transaction(trans, root); | ||
3513 | if (!ret) { | ||
3514 | trans = NULL; | ||
3515 | committed = true; | ||
3516 | goto again; | ||
3517 | } | ||
3518 | |||
3039 | out: | 3519 | out: |
3040 | spin_lock(&info->lock); | 3520 | if (reserved) { |
3041 | info->allocating_chunk = 0; | 3521 | spin_lock(&space_info->lock); |
3042 | spin_unlock(&info->lock); | 3522 | space_info->bytes_reserved -= orig_bytes; |
3043 | wake_up(&info->allocate_wait); | 3523 | spin_unlock(&space_info->lock); |
3524 | } | ||
3044 | 3525 | ||
3045 | if (ret) | 3526 | return ret; |
3046 | return 0; | ||
3047 | return 1; | ||
3048 | } | 3527 | } |
3049 | 3528 | ||
3050 | /* | 3529 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, |
3051 | * Reserve metadata space for delalloc. | 3530 | struct btrfs_root *root) |
3052 | */ | ||
3053 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
3054 | struct inode *inode, int num_items) | ||
3055 | { | 3531 | { |
3056 | struct btrfs_fs_info *info = root->fs_info; | 3532 | struct btrfs_block_rsv *block_rsv; |
3057 | struct btrfs_space_info *meta_sinfo; | 3533 | if (root->ref_cows) |
3058 | u64 num_bytes; | 3534 | block_rsv = trans->block_rsv; |
3059 | u64 used; | 3535 | else |
3060 | u64 alloc_target; | 3536 | block_rsv = root->block_rsv; |
3061 | int flushed = 0; | ||
3062 | int force_delalloc; | ||
3063 | |||
3064 | /* get the space info for where the metadata will live */ | ||
3065 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3066 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3067 | 3537 | ||
3068 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | 3538 | if (!block_rsv) |
3069 | num_items); | 3539 | block_rsv = &root->fs_info->empty_block_rsv; |
3070 | again: | ||
3071 | spin_lock(&meta_sinfo->lock); | ||
3072 | 3540 | ||
3073 | force_delalloc = meta_sinfo->force_delalloc; | 3541 | return block_rsv; |
3542 | } | ||
3074 | 3543 | ||
3075 | if (unlikely(!meta_sinfo->bytes_root)) | 3544 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, |
3076 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | 3545 | u64 num_bytes) |
3546 | { | ||
3547 | int ret = -ENOSPC; | ||
3548 | spin_lock(&block_rsv->lock); | ||
3549 | if (block_rsv->reserved >= num_bytes) { | ||
3550 | block_rsv->reserved -= num_bytes; | ||
3551 | if (block_rsv->reserved < block_rsv->size) | ||
3552 | block_rsv->full = 0; | ||
3553 | ret = 0; | ||
3554 | } | ||
3555 | spin_unlock(&block_rsv->lock); | ||
3556 | return ret; | ||
3557 | } | ||
3077 | 3558 | ||
3078 | if (!flushed) | 3559 | static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, |
3079 | meta_sinfo->bytes_delalloc += num_bytes; | 3560 | u64 num_bytes, int update_size) |
3561 | { | ||
3562 | spin_lock(&block_rsv->lock); | ||
3563 | block_rsv->reserved += num_bytes; | ||
3564 | if (update_size) | ||
3565 | block_rsv->size += num_bytes; | ||
3566 | else if (block_rsv->reserved >= block_rsv->size) | ||
3567 | block_rsv->full = 1; | ||
3568 | spin_unlock(&block_rsv->lock); | ||
3569 | } | ||
3080 | 3570 | ||
3081 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 3571 | void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, |
3082 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 3572 | struct btrfs_block_rsv *dest, u64 num_bytes) |
3083 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | 3573 | { |
3084 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | 3574 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3085 | 3575 | ||
3086 | if (used > meta_sinfo->total_bytes) { | 3576 | spin_lock(&block_rsv->lock); |
3087 | flushed++; | 3577 | if (num_bytes == (u64)-1) |
3578 | num_bytes = block_rsv->size; | ||
3579 | block_rsv->size -= num_bytes; | ||
3580 | if (block_rsv->reserved >= block_rsv->size) { | ||
3581 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
3582 | block_rsv->reserved = block_rsv->size; | ||
3583 | block_rsv->full = 1; | ||
3584 | } else { | ||
3585 | num_bytes = 0; | ||
3586 | } | ||
3587 | spin_unlock(&block_rsv->lock); | ||
3088 | 3588 | ||
3089 | if (flushed == 1) { | 3589 | if (num_bytes > 0) { |
3090 | if (maybe_allocate_chunk(root, meta_sinfo)) | 3590 | if (dest) { |
3091 | goto again; | 3591 | block_rsv_add_bytes(dest, num_bytes, 0); |
3092 | flushed++; | ||
3093 | } else { | 3592 | } else { |
3094 | spin_unlock(&meta_sinfo->lock); | 3593 | spin_lock(&space_info->lock); |
3095 | } | 3594 | space_info->bytes_reserved -= num_bytes; |
3096 | 3595 | spin_unlock(&space_info->lock); | |
3097 | if (flushed == 2) { | ||
3098 | filemap_flush(inode->i_mapping); | ||
3099 | goto again; | ||
3100 | } else if (flushed == 3) { | ||
3101 | flush_delalloc(root, meta_sinfo); | ||
3102 | goto again; | ||
3103 | } | 3596 | } |
3104 | spin_lock(&meta_sinfo->lock); | ||
3105 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
3106 | spin_unlock(&meta_sinfo->lock); | ||
3107 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
3108 | BTRFS_I(inode)->outstanding_extents, | ||
3109 | BTRFS_I(inode)->reserved_extents); | ||
3110 | dump_space_info(meta_sinfo, 0, 0); | ||
3111 | return -ENOSPC; | ||
3112 | } | 3597 | } |
3598 | } | ||
3113 | 3599 | ||
3114 | BTRFS_I(inode)->reserved_extents++; | 3600 | static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, |
3115 | check_force_delalloc(meta_sinfo); | 3601 | struct btrfs_block_rsv *dst, u64 num_bytes) |
3116 | spin_unlock(&meta_sinfo->lock); | 3602 | { |
3603 | int ret; | ||
3117 | 3604 | ||
3118 | if (!flushed && force_delalloc) | 3605 | ret = block_rsv_use_bytes(src, num_bytes); |
3119 | filemap_flush(inode->i_mapping); | 3606 | if (ret) |
3607 | return ret; | ||
3120 | 3608 | ||
3609 | block_rsv_add_bytes(dst, num_bytes, 1); | ||
3121 | return 0; | 3610 | return 0; |
3122 | } | 3611 | } |
3123 | 3612 | ||
3124 | /* | 3613 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) |
3125 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3126 | * be paired with btrfs_reserve_metadata_space. | ||
3127 | * | ||
3128 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3129 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3130 | * oprations which will result in more used metadata, so we want to make sure we | ||
3131 | * can do that without issue. | ||
3132 | */ | ||
3133 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3134 | { | 3614 | { |
3135 | struct btrfs_fs_info *info = root->fs_info; | 3615 | memset(rsv, 0, sizeof(*rsv)); |
3136 | struct btrfs_space_info *meta_sinfo; | 3616 | spin_lock_init(&rsv->lock); |
3137 | u64 num_bytes; | 3617 | atomic_set(&rsv->usage, 1); |
3138 | u64 alloc_target; | 3618 | rsv->priority = 6; |
3139 | bool bug = false; | 3619 | INIT_LIST_HEAD(&rsv->list); |
3140 | 3620 | } | |
3141 | /* get the space info for where the metadata will live */ | ||
3142 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3143 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3144 | 3621 | ||
3145 | num_bytes = calculate_bytes_needed(root, num_items); | 3622 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) |
3623 | { | ||
3624 | struct btrfs_block_rsv *block_rsv; | ||
3625 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3146 | 3626 | ||
3147 | spin_lock(&meta_sinfo->lock); | 3627 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); |
3148 | if (meta_sinfo->bytes_may_use < num_bytes) { | 3628 | if (!block_rsv) |
3149 | bug = true; | 3629 | return NULL; |
3150 | meta_sinfo->bytes_may_use = 0; | ||
3151 | } else { | ||
3152 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3153 | } | ||
3154 | spin_unlock(&meta_sinfo->lock); | ||
3155 | 3630 | ||
3156 | BUG_ON(bug); | 3631 | btrfs_init_block_rsv(block_rsv); |
3632 | block_rsv->space_info = __find_space_info(fs_info, | ||
3633 | BTRFS_BLOCK_GROUP_METADATA); | ||
3634 | return block_rsv; | ||
3635 | } | ||
3157 | 3636 | ||
3158 | return 0; | 3637 | void btrfs_free_block_rsv(struct btrfs_root *root, |
3638 | struct btrfs_block_rsv *rsv) | ||
3639 | { | ||
3640 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | ||
3641 | btrfs_block_rsv_release(root, rsv, (u64)-1); | ||
3642 | if (!rsv->durable) | ||
3643 | kfree(rsv); | ||
3644 | } | ||
3159 | } | 3645 | } |
3160 | 3646 | ||
3161 | /* | 3647 | /* |
3162 | * Reserve some metadata space for use. We'll calculate the worste case number | 3648 | * make the block_rsv struct be able to capture freed space. |
3163 | * of bytes that would be needed to modify num_items number of items. If we | 3649 | * the captured space will re-add to the the block_rsv struct |
3164 | * have space, fantastic, if not, you get -ENOSPC. Please call | 3650 | * after transaction commit |
3165 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3166 | * items you reserved, since whatever metadata you needed should have already | ||
3167 | * been allocated. | ||
3168 | * | ||
3169 | * This will commit the transaction to make more space if we don't have enough | ||
3170 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3171 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3172 | * callers responsibility to handle it properly. | ||
3173 | */ | 3651 | */ |
3174 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | 3652 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, |
3653 | struct btrfs_block_rsv *block_rsv) | ||
3175 | { | 3654 | { |
3176 | struct btrfs_fs_info *info = root->fs_info; | 3655 | block_rsv->durable = 1; |
3177 | struct btrfs_space_info *meta_sinfo; | 3656 | mutex_lock(&fs_info->durable_block_rsv_mutex); |
3178 | u64 num_bytes; | 3657 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); |
3179 | u64 used; | 3658 | mutex_unlock(&fs_info->durable_block_rsv_mutex); |
3180 | u64 alloc_target; | 3659 | } |
3181 | int retries = 0; | ||
3182 | 3660 | ||
3183 | /* get the space info for where the metadata will live */ | 3661 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, |
3184 | alloc_target = btrfs_get_alloc_profile(root, 0); | 3662 | struct btrfs_root *root, |
3185 | meta_sinfo = __find_space_info(info, alloc_target); | 3663 | struct btrfs_block_rsv *block_rsv, |
3664 | u64 num_bytes) | ||
3665 | { | ||
3666 | int ret; | ||
3186 | 3667 | ||
3187 | num_bytes = calculate_bytes_needed(root, num_items); | 3668 | if (num_bytes == 0) |
3188 | again: | 3669 | return 0; |
3189 | spin_lock(&meta_sinfo->lock); | ||
3190 | 3670 | ||
3191 | if (unlikely(!meta_sinfo->bytes_root)) | 3671 | ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); |
3192 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | 3672 | if (!ret) { |
3673 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
3674 | return 0; | ||
3675 | } | ||
3193 | 3676 | ||
3194 | if (!retries) | 3677 | return ret; |
3195 | meta_sinfo->bytes_may_use += num_bytes; | 3678 | } |
3196 | 3679 | ||
3197 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 3680 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, |
3198 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 3681 | struct btrfs_root *root, |
3199 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | 3682 | struct btrfs_block_rsv *block_rsv, |
3200 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | 3683 | u64 min_reserved, int min_factor) |
3684 | { | ||
3685 | u64 num_bytes = 0; | ||
3686 | int commit_trans = 0; | ||
3687 | int ret = -ENOSPC; | ||
3201 | 3688 | ||
3202 | if (used > meta_sinfo->total_bytes) { | 3689 | if (!block_rsv) |
3203 | retries++; | 3690 | return 0; |
3204 | if (retries == 1) { | ||
3205 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3206 | goto again; | ||
3207 | retries++; | ||
3208 | } else { | ||
3209 | spin_unlock(&meta_sinfo->lock); | ||
3210 | } | ||
3211 | 3691 | ||
3212 | if (retries == 2) { | 3692 | spin_lock(&block_rsv->lock); |
3213 | flush_delalloc(root, meta_sinfo); | 3693 | if (min_factor > 0) |
3214 | goto again; | 3694 | num_bytes = div_factor(block_rsv->size, min_factor); |
3695 | if (min_reserved > num_bytes) | ||
3696 | num_bytes = min_reserved; | ||
3697 | |||
3698 | if (block_rsv->reserved >= num_bytes) { | ||
3699 | ret = 0; | ||
3700 | } else { | ||
3701 | num_bytes -= block_rsv->reserved; | ||
3702 | if (block_rsv->durable && | ||
3703 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
3704 | commit_trans = 1; | ||
3705 | } | ||
3706 | spin_unlock(&block_rsv->lock); | ||
3707 | if (!ret) | ||
3708 | return 0; | ||
3709 | |||
3710 | if (block_rsv->refill_used) { | ||
3711 | ret = reserve_metadata_bytes(trans, root, block_rsv, | ||
3712 | num_bytes, 0); | ||
3713 | if (!ret) { | ||
3714 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
3715 | return 0; | ||
3215 | } | 3716 | } |
3216 | spin_lock(&meta_sinfo->lock); | 3717 | } |
3217 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3218 | spin_unlock(&meta_sinfo->lock); | ||
3219 | 3718 | ||
3220 | dump_space_info(meta_sinfo, 0, 0); | 3719 | if (commit_trans) { |
3221 | return -ENOSPC; | 3720 | if (trans) |
3721 | return -EAGAIN; | ||
3722 | |||
3723 | trans = btrfs_join_transaction(root, 1); | ||
3724 | BUG_ON(IS_ERR(trans)); | ||
3725 | ret = btrfs_commit_transaction(trans, root); | ||
3726 | return 0; | ||
3222 | } | 3727 | } |
3223 | 3728 | ||
3224 | check_force_delalloc(meta_sinfo); | 3729 | return -ENOSPC; |
3225 | spin_unlock(&meta_sinfo->lock); | 3730 | } |
3226 | 3731 | ||
3227 | return 0; | 3732 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
3733 | struct btrfs_block_rsv *dst_rsv, | ||
3734 | u64 num_bytes) | ||
3735 | { | ||
3736 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3737 | } | ||
3738 | |||
3739 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
3740 | struct btrfs_block_rsv *block_rsv, | ||
3741 | u64 num_bytes) | ||
3742 | { | ||
3743 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
3744 | if (global_rsv->full || global_rsv == block_rsv || | ||
3745 | block_rsv->space_info != global_rsv->space_info) | ||
3746 | global_rsv = NULL; | ||
3747 | block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); | ||
3228 | } | 3748 | } |
3229 | 3749 | ||
3230 | /* | 3750 | /* |
3231 | * This will check the space that the inode allocates from to make sure we have | 3751 | * helper to calculate size of global block reservation. |
3232 | * enough space for bytes. | 3752 | * the desired value is sum of space used by extent tree, |
3753 | * checksum tree and root tree | ||
3233 | */ | 3754 | */ |
3234 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 3755 | static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) |
3235 | u64 bytes) | ||
3236 | { | 3756 | { |
3237 | struct btrfs_space_info *data_sinfo; | 3757 | struct btrfs_space_info *sinfo; |
3238 | int ret = 0, committed = 0; | 3758 | u64 num_bytes; |
3759 | u64 meta_used; | ||
3760 | u64 data_used; | ||
3761 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | ||
3762 | #if 0 | ||
3763 | /* | ||
3764 | * per tree used space accounting can be inaccuracy, so we | ||
3765 | * can't rely on it. | ||
3766 | */ | ||
3767 | spin_lock(&fs_info->extent_root->accounting_lock); | ||
3768 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); | ||
3769 | spin_unlock(&fs_info->extent_root->accounting_lock); | ||
3239 | 3770 | ||
3240 | /* make sure bytes are sectorsize aligned */ | 3771 | spin_lock(&fs_info->csum_root->accounting_lock); |
3241 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3772 | num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); |
3773 | spin_unlock(&fs_info->csum_root->accounting_lock); | ||
3242 | 3774 | ||
3243 | data_sinfo = BTRFS_I(inode)->space_info; | 3775 | spin_lock(&fs_info->tree_root->accounting_lock); |
3244 | if (!data_sinfo) | 3776 | num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); |
3245 | goto alloc; | 3777 | spin_unlock(&fs_info->tree_root->accounting_lock); |
3778 | #endif | ||
3779 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | ||
3780 | spin_lock(&sinfo->lock); | ||
3781 | data_used = sinfo->bytes_used; | ||
3782 | spin_unlock(&sinfo->lock); | ||
3246 | 3783 | ||
3247 | again: | 3784 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
3248 | /* make sure we have enough space to handle the data first */ | 3785 | spin_lock(&sinfo->lock); |
3249 | spin_lock(&data_sinfo->lock); | 3786 | if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) |
3250 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | 3787 | data_used = 0; |
3251 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | 3788 | meta_used = sinfo->bytes_used; |
3252 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | 3789 | spin_unlock(&sinfo->lock); |
3253 | data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { | ||
3254 | struct btrfs_trans_handle *trans; | ||
3255 | 3790 | ||
3256 | /* | 3791 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * |
3257 | * if we don't have enough free bytes in this space then we need | 3792 | csum_size * 2; |
3258 | * to alloc a new chunk. | 3793 | num_bytes += div64_u64(data_used + meta_used, 50); |
3259 | */ | ||
3260 | if (!data_sinfo->full) { | ||
3261 | u64 alloc_target; | ||
3262 | 3794 | ||
3263 | data_sinfo->force_alloc = 1; | 3795 | if (num_bytes * 3 > meta_used) |
3264 | spin_unlock(&data_sinfo->lock); | 3796 | num_bytes = div64_u64(meta_used, 3); |
3265 | alloc: | ||
3266 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
3267 | trans = btrfs_start_transaction(root, 1); | ||
3268 | if (!trans) | ||
3269 | return -ENOMEM; | ||
3270 | 3797 | ||
3271 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3798 | return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); |
3272 | bytes + 2 * 1024 * 1024, | 3799 | } |
3273 | alloc_target, 0); | ||
3274 | btrfs_end_transaction(trans, root); | ||
3275 | if (ret) | ||
3276 | return ret; | ||
3277 | 3800 | ||
3278 | if (!data_sinfo) { | 3801 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) |
3279 | btrfs_set_inode_space_info(root, inode); | 3802 | { |
3280 | data_sinfo = BTRFS_I(inode)->space_info; | 3803 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
3281 | } | 3804 | struct btrfs_space_info *sinfo = block_rsv->space_info; |
3282 | goto again; | 3805 | u64 num_bytes; |
3283 | } | ||
3284 | spin_unlock(&data_sinfo->lock); | ||
3285 | 3806 | ||
3286 | /* commit the current transaction and try again */ | 3807 | num_bytes = calc_global_metadata_size(fs_info); |
3287 | if (!committed && !root->fs_info->open_ioctl_trans) { | ||
3288 | committed = 1; | ||
3289 | trans = btrfs_join_transaction(root, 1); | ||
3290 | if (!trans) | ||
3291 | return -ENOMEM; | ||
3292 | ret = btrfs_commit_transaction(trans, root); | ||
3293 | if (ret) | ||
3294 | return ret; | ||
3295 | goto again; | ||
3296 | } | ||
3297 | 3808 | ||
3298 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 3809 | spin_lock(&block_rsv->lock); |
3299 | ", %llu bytes_used, %llu bytes_reserved, " | 3810 | spin_lock(&sinfo->lock); |
3300 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use " | 3811 | |
3301 | "%llu total\n", (unsigned long long)bytes, | 3812 | block_rsv->size = num_bytes; |
3302 | (unsigned long long)data_sinfo->bytes_delalloc, | 3813 | |
3303 | (unsigned long long)data_sinfo->bytes_used, | 3814 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + |
3304 | (unsigned long long)data_sinfo->bytes_reserved, | 3815 | sinfo->bytes_reserved + sinfo->bytes_readonly + |
3305 | (unsigned long long)data_sinfo->bytes_pinned, | 3816 | sinfo->bytes_may_use; |
3306 | (unsigned long long)data_sinfo->bytes_readonly, | 3817 | |
3307 | (unsigned long long)data_sinfo->bytes_may_use, | 3818 | if (sinfo->total_bytes > num_bytes) { |
3308 | (unsigned long long)data_sinfo->total_bytes); | 3819 | num_bytes = sinfo->total_bytes - num_bytes; |
3309 | return -ENOSPC; | 3820 | block_rsv->reserved += num_bytes; |
3821 | sinfo->bytes_reserved += num_bytes; | ||
3310 | } | 3822 | } |
3311 | data_sinfo->bytes_may_use += bytes; | ||
3312 | BTRFS_I(inode)->reserved_bytes += bytes; | ||
3313 | spin_unlock(&data_sinfo->lock); | ||
3314 | 3823 | ||
3315 | return 0; | 3824 | if (block_rsv->reserved >= block_rsv->size) { |
3825 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
3826 | sinfo->bytes_reserved -= num_bytes; | ||
3827 | block_rsv->reserved = block_rsv->size; | ||
3828 | block_rsv->full = 1; | ||
3829 | } | ||
3830 | #if 0 | ||
3831 | printk(KERN_INFO"global block rsv size %llu reserved %llu\n", | ||
3832 | block_rsv->size, block_rsv->reserved); | ||
3833 | #endif | ||
3834 | spin_unlock(&sinfo->lock); | ||
3835 | spin_unlock(&block_rsv->lock); | ||
3316 | } | 3836 | } |
3317 | 3837 | ||
3318 | /* | 3838 | static void init_global_block_rsv(struct btrfs_fs_info *fs_info) |
3319 | * if there was an error for whatever reason after calling | ||
3320 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | ||
3321 | */ | ||
3322 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | ||
3323 | struct inode *inode, u64 bytes) | ||
3324 | { | 3839 | { |
3325 | struct btrfs_space_info *data_sinfo; | 3840 | struct btrfs_space_info *space_info; |
3326 | 3841 | ||
3327 | /* make sure bytes are sectorsize aligned */ | 3842 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
3328 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3843 | fs_info->chunk_block_rsv.space_info = space_info; |
3844 | fs_info->chunk_block_rsv.priority = 10; | ||
3329 | 3845 | ||
3330 | data_sinfo = BTRFS_I(inode)->space_info; | 3846 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
3331 | spin_lock(&data_sinfo->lock); | 3847 | fs_info->global_block_rsv.space_info = space_info; |
3332 | data_sinfo->bytes_may_use -= bytes; | 3848 | fs_info->global_block_rsv.priority = 10; |
3333 | BTRFS_I(inode)->reserved_bytes -= bytes; | 3849 | fs_info->global_block_rsv.refill_used = 1; |
3334 | spin_unlock(&data_sinfo->lock); | 3850 | fs_info->delalloc_block_rsv.space_info = space_info; |
3851 | fs_info->trans_block_rsv.space_info = space_info; | ||
3852 | fs_info->empty_block_rsv.space_info = space_info; | ||
3853 | fs_info->empty_block_rsv.priority = 10; | ||
3854 | |||
3855 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | ||
3856 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | ||
3857 | fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; | ||
3858 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | ||
3859 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | ||
3860 | |||
3861 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
3862 | |||
3863 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
3864 | |||
3865 | update_global_block_rsv(fs_info); | ||
3335 | } | 3866 | } |
3336 | 3867 | ||
3337 | /* called when we are adding a delalloc extent to the inode's io_tree */ | 3868 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) |
3338 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
3339 | u64 bytes) | ||
3340 | { | 3869 | { |
3341 | struct btrfs_space_info *data_sinfo; | 3870 | block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); |
3871 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); | ||
3872 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); | ||
3873 | WARN_ON(fs_info->trans_block_rsv.size > 0); | ||
3874 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | ||
3875 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | ||
3876 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | ||
3877 | } | ||
3342 | 3878 | ||
3343 | /* get the space info for where this inode will be storing its data */ | 3879 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) |
3344 | data_sinfo = BTRFS_I(inode)->space_info; | 3880 | { |
3881 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
3882 | 3 * num_items; | ||
3883 | } | ||
3345 | 3884 | ||
3346 | /* make sure we have enough space to handle the data first */ | 3885 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
3347 | spin_lock(&data_sinfo->lock); | 3886 | struct btrfs_root *root, |
3348 | data_sinfo->bytes_delalloc += bytes; | 3887 | int num_items) |
3888 | { | ||
3889 | u64 num_bytes; | ||
3890 | int ret; | ||
3349 | 3891 | ||
3350 | /* | 3892 | if (num_items == 0 || root->fs_info->chunk_root == root) |
3351 | * we are adding a delalloc extent without calling | 3893 | return 0; |
3352 | * btrfs_check_data_free_space first. This happens on a weird | 3894 | |
3353 | * writepage condition, but shouldn't hurt our accounting | 3895 | num_bytes = calc_trans_metadata_size(root, num_items); |
3354 | */ | 3896 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, |
3355 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | 3897 | num_bytes); |
3356 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | 3898 | if (!ret) { |
3357 | BTRFS_I(inode)->reserved_bytes = 0; | 3899 | trans->bytes_reserved += num_bytes; |
3358 | } else { | 3900 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
3359 | data_sinfo->bytes_may_use -= bytes; | ||
3360 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
3361 | } | 3901 | } |
3902 | return ret; | ||
3903 | } | ||
3362 | 3904 | ||
3363 | spin_unlock(&data_sinfo->lock); | 3905 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
3906 | struct btrfs_root *root) | ||
3907 | { | ||
3908 | if (!trans->bytes_reserved) | ||
3909 | return; | ||
3910 | |||
3911 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | ||
3912 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3913 | trans->bytes_reserved); | ||
3914 | trans->bytes_reserved = 0; | ||
3364 | } | 3915 | } |
3365 | 3916 | ||
3366 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | 3917 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
3367 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 3918 | struct inode *inode) |
3368 | u64 bytes) | ||
3369 | { | 3919 | { |
3370 | struct btrfs_space_info *info; | 3920 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3921 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
3922 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | ||
3371 | 3923 | ||
3372 | info = BTRFS_I(inode)->space_info; | 3924 | /* |
3925 | * one for deleting orphan item, one for updating inode and | ||
3926 | * two for calling btrfs_truncate_inode_items. | ||
3927 | * | ||
3928 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
3929 | * more space than it uses in most cases. So two units of | ||
3930 | * metadata space should be enough for calling it many times. | ||
3931 | * If all of the metadata space is used, we can commit | ||
3932 | * transaction and use space it freed. | ||
3933 | */ | ||
3934 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
3935 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3936 | } | ||
3373 | 3937 | ||
3374 | spin_lock(&info->lock); | 3938 | void btrfs_orphan_release_metadata(struct inode *inode) |
3375 | info->bytes_delalloc -= bytes; | 3939 | { |
3376 | spin_unlock(&info->lock); | 3940 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3941 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
3942 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | ||
3377 | } | 3943 | } |
3378 | 3944 | ||
3379 | static void force_metadata_allocation(struct btrfs_fs_info *info) | 3945 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, |
3946 | struct btrfs_pending_snapshot *pending) | ||
3380 | { | 3947 | { |
3381 | struct list_head *head = &info->space_info; | 3948 | struct btrfs_root *root = pending->root; |
3382 | struct btrfs_space_info *found; | 3949 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); |
3950 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | ||
3951 | /* | ||
3952 | * two for root back/forward refs, two for directory entries | ||
3953 | * and one for root of the snapshot. | ||
3954 | */ | ||
3955 | u64 num_bytes = calc_trans_metadata_size(root, 5); | ||
3956 | dst_rsv->space_info = src_rsv->space_info; | ||
3957 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3958 | } | ||
3383 | 3959 | ||
3384 | rcu_read_lock(); | 3960 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) |
3385 | list_for_each_entry_rcu(found, head, list) { | 3961 | { |
3386 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | 3962 | return num_bytes >>= 3; |
3387 | found->force_alloc = 1; | ||
3388 | } | ||
3389 | rcu_read_unlock(); | ||
3390 | } | 3963 | } |
3391 | 3964 | ||
3392 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3965 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) |
3393 | struct btrfs_root *extent_root, u64 alloc_bytes, | ||
3394 | u64 flags, int force) | ||
3395 | { | 3966 | { |
3396 | struct btrfs_space_info *space_info; | 3967 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3397 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3968 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3398 | u64 thresh; | 3969 | u64 to_reserve; |
3399 | int ret = 0; | 3970 | int nr_extents; |
3971 | int ret; | ||
3400 | 3972 | ||
3401 | mutex_lock(&fs_info->chunk_mutex); | 3973 | if (btrfs_transaction_in_commit(root->fs_info)) |
3974 | schedule_timeout(1); | ||
3402 | 3975 | ||
3403 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 3976 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3404 | 3977 | ||
3405 | space_info = __find_space_info(extent_root->fs_info, flags); | 3978 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
3406 | if (!space_info) { | 3979 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; |
3407 | ret = update_space_info(extent_root->fs_info, flags, | 3980 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { |
3408 | 0, 0, &space_info); | 3981 | nr_extents -= BTRFS_I(inode)->reserved_extents; |
3409 | BUG_ON(ret); | 3982 | to_reserve = calc_trans_metadata_size(root, nr_extents); |
3983 | } else { | ||
3984 | nr_extents = 0; | ||
3985 | to_reserve = 0; | ||
3410 | } | 3986 | } |
3411 | BUG_ON(!space_info); | 3987 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
3412 | 3988 | ||
3413 | spin_lock(&space_info->lock); | 3989 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3414 | if (space_info->force_alloc) | 3990 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3415 | force = 1; | 3991 | if (ret) |
3416 | if (space_info->full) { | 3992 | return ret; |
3417 | spin_unlock(&space_info->lock); | ||
3418 | goto out; | ||
3419 | } | ||
3420 | 3993 | ||
3421 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3994 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
3422 | thresh = div_factor(thresh, 8); | 3995 | BTRFS_I(inode)->reserved_extents += nr_extents; |
3423 | if (!force && | 3996 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
3424 | (space_info->bytes_used + space_info->bytes_pinned + | 3997 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
3425 | space_info->bytes_reserved + alloc_bytes) < thresh) { | ||
3426 | spin_unlock(&space_info->lock); | ||
3427 | goto out; | ||
3428 | } | ||
3429 | spin_unlock(&space_info->lock); | ||
3430 | 3998 | ||
3431 | /* | 3999 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3432 | * if we're doing a data chunk, go ahead and make sure that | 4000 | |
3433 | * we keep a reasonable number of metadata chunks allocated in the | 4001 | if (block_rsv->size > 512 * 1024 * 1024) |
3434 | * FS as well. | 4002 | shrink_delalloc(NULL, root, to_reserve, 0); |
3435 | */ | 4003 | |
3436 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { | 4004 | return 0; |
3437 | fs_info->data_chunk_allocations++; | 4005 | } |
3438 | if (!(fs_info->data_chunk_allocations % | 4006 | |
3439 | fs_info->metadata_ratio)) | 4007 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
3440 | force_metadata_allocation(fs_info); | 4008 | { |
4009 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4010 | u64 to_free; | ||
4011 | int nr_extents; | ||
4012 | |||
4013 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
4014 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | ||
4015 | |||
4016 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
4017 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
4018 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | ||
4019 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | ||
4020 | BTRFS_I(inode)->reserved_extents -= nr_extents; | ||
4021 | } else { | ||
4022 | nr_extents = 0; | ||
3441 | } | 4023 | } |
4024 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3442 | 4025 | ||
3443 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 4026 | to_free = calc_csum_metadata_size(inode, num_bytes); |
3444 | spin_lock(&space_info->lock); | 4027 | if (nr_extents > 0) |
4028 | to_free += calc_trans_metadata_size(root, nr_extents); | ||
4029 | |||
4030 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | ||
4031 | to_free); | ||
4032 | } | ||
4033 | |||
4034 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | ||
4035 | { | ||
4036 | int ret; | ||
4037 | |||
4038 | ret = btrfs_check_data_free_space(inode, num_bytes); | ||
3445 | if (ret) | 4039 | if (ret) |
3446 | space_info->full = 1; | 4040 | return ret; |
3447 | space_info->force_alloc = 0; | 4041 | |
3448 | spin_unlock(&space_info->lock); | 4042 | ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); |
3449 | out: | 4043 | if (ret) { |
3450 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 4044 | btrfs_free_reserved_data_space(inode, num_bytes); |
3451 | return ret; | 4045 | return ret; |
4046 | } | ||
4047 | |||
4048 | return 0; | ||
4049 | } | ||
4050 | |||
4051 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | ||
4052 | { | ||
4053 | btrfs_delalloc_release_metadata(inode, num_bytes); | ||
4054 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
3452 | } | 4055 | } |
3453 | 4056 | ||
3454 | static int update_block_group(struct btrfs_trans_handle *trans, | 4057 | static int update_block_group(struct btrfs_trans_handle *trans, |
3455 | struct btrfs_root *root, | 4058 | struct btrfs_root *root, |
3456 | u64 bytenr, u64 num_bytes, int alloc, | 4059 | u64 bytenr, u64 num_bytes, int alloc) |
3457 | int mark_free) | ||
3458 | { | 4060 | { |
3459 | struct btrfs_block_group_cache *cache; | 4061 | struct btrfs_block_group_cache *cache = NULL; |
3460 | struct btrfs_fs_info *info = root->fs_info; | 4062 | struct btrfs_fs_info *info = root->fs_info; |
3461 | u64 total = num_bytes; | 4063 | u64 total = num_bytes; |
3462 | u64 old_val; | 4064 | u64 old_val; |
3463 | u64 byte_in_group; | 4065 | u64 byte_in_group; |
4066 | int factor; | ||
3464 | 4067 | ||
3465 | /* block accounting for super block */ | 4068 | /* block accounting for super block */ |
3466 | spin_lock(&info->delalloc_lock); | 4069 | spin_lock(&info->delalloc_lock); |
@@ -3476,11 +4079,31 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3476 | cache = btrfs_lookup_block_group(info, bytenr); | 4079 | cache = btrfs_lookup_block_group(info, bytenr); |
3477 | if (!cache) | 4080 | if (!cache) |
3478 | return -1; | 4081 | return -1; |
4082 | if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
4083 | BTRFS_BLOCK_GROUP_RAID1 | | ||
4084 | BTRFS_BLOCK_GROUP_RAID10)) | ||
4085 | factor = 2; | ||
4086 | else | ||
4087 | factor = 1; | ||
4088 | /* | ||
4089 | * If this block group has free space cache written out, we | ||
4090 | * need to make sure to load it if we are removing space. This | ||
4091 | * is because we need the unpinning stage to actually add the | ||
4092 | * space back to the block group, otherwise we will leak space. | ||
4093 | */ | ||
4094 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | ||
4095 | cache_block_group(cache, trans, NULL, 1); | ||
4096 | |||
3479 | byte_in_group = bytenr - cache->key.objectid; | 4097 | byte_in_group = bytenr - cache->key.objectid; |
3480 | WARN_ON(byte_in_group > cache->key.offset); | 4098 | WARN_ON(byte_in_group > cache->key.offset); |
3481 | 4099 | ||
3482 | spin_lock(&cache->space_info->lock); | 4100 | spin_lock(&cache->space_info->lock); |
3483 | spin_lock(&cache->lock); | 4101 | spin_lock(&cache->lock); |
4102 | |||
4103 | if (btrfs_super_cache_generation(&info->super_copy) != 0 && | ||
4104 | cache->disk_cache_state < BTRFS_DC_CLEAR) | ||
4105 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
4106 | |||
3484 | cache->dirty = 1; | 4107 | cache->dirty = 1; |
3485 | old_val = btrfs_block_group_used(&cache->item); | 4108 | old_val = btrfs_block_group_used(&cache->item); |
3486 | num_bytes = min(total, cache->key.offset - byte_in_group); | 4109 | num_bytes = min(total, cache->key.offset - byte_in_group); |
@@ -3488,31 +4111,24 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3488 | old_val += num_bytes; | 4111 | old_val += num_bytes; |
3489 | btrfs_set_block_group_used(&cache->item, old_val); | 4112 | btrfs_set_block_group_used(&cache->item, old_val); |
3490 | cache->reserved -= num_bytes; | 4113 | cache->reserved -= num_bytes; |
3491 | cache->space_info->bytes_used += num_bytes; | ||
3492 | cache->space_info->bytes_reserved -= num_bytes; | 4114 | cache->space_info->bytes_reserved -= num_bytes; |
3493 | if (cache->ro) | 4115 | cache->space_info->bytes_used += num_bytes; |
3494 | cache->space_info->bytes_readonly -= num_bytes; | 4116 | cache->space_info->disk_used += num_bytes * factor; |
3495 | spin_unlock(&cache->lock); | 4117 | spin_unlock(&cache->lock); |
3496 | spin_unlock(&cache->space_info->lock); | 4118 | spin_unlock(&cache->space_info->lock); |
3497 | } else { | 4119 | } else { |
3498 | old_val -= num_bytes; | 4120 | old_val -= num_bytes; |
3499 | cache->space_info->bytes_used -= num_bytes; | ||
3500 | if (cache->ro) | ||
3501 | cache->space_info->bytes_readonly += num_bytes; | ||
3502 | btrfs_set_block_group_used(&cache->item, old_val); | 4121 | btrfs_set_block_group_used(&cache->item, old_val); |
4122 | cache->pinned += num_bytes; | ||
4123 | cache->space_info->bytes_pinned += num_bytes; | ||
4124 | cache->space_info->bytes_used -= num_bytes; | ||
4125 | cache->space_info->disk_used -= num_bytes * factor; | ||
3503 | spin_unlock(&cache->lock); | 4126 | spin_unlock(&cache->lock); |
3504 | spin_unlock(&cache->space_info->lock); | 4127 | spin_unlock(&cache->space_info->lock); |
3505 | if (mark_free) { | ||
3506 | int ret; | ||
3507 | |||
3508 | ret = btrfs_discard_extent(root, bytenr, | ||
3509 | num_bytes); | ||
3510 | WARN_ON(ret); | ||
3511 | 4128 | ||
3512 | ret = btrfs_add_free_space(cache, bytenr, | 4129 | set_extent_dirty(info->pinned_extents, |
3513 | num_bytes); | 4130 | bytenr, bytenr + num_bytes - 1, |
3514 | WARN_ON(ret); | 4131 | GFP_NOFS | __GFP_NOFAIL); |
3515 | } | ||
3516 | } | 4132 | } |
3517 | btrfs_put_block_group(cache); | 4133 | btrfs_put_block_group(cache); |
3518 | total -= num_bytes; | 4134 | total -= num_bytes; |
@@ -3536,18 +4152,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
3536 | return bytenr; | 4152 | return bytenr; |
3537 | } | 4153 | } |
3538 | 4154 | ||
3539 | /* | 4155 | static int pin_down_extent(struct btrfs_root *root, |
3540 | * this function must be called within transaction | 4156 | struct btrfs_block_group_cache *cache, |
3541 | */ | 4157 | u64 bytenr, u64 num_bytes, int reserved) |
3542 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3543 | u64 bytenr, u64 num_bytes, int reserved) | ||
3544 | { | 4158 | { |
3545 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3546 | struct btrfs_block_group_cache *cache; | ||
3547 | |||
3548 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3549 | BUG_ON(!cache); | ||
3550 | |||
3551 | spin_lock(&cache->space_info->lock); | 4159 | spin_lock(&cache->space_info->lock); |
3552 | spin_lock(&cache->lock); | 4160 | spin_lock(&cache->lock); |
3553 | cache->pinned += num_bytes; | 4161 | cache->pinned += num_bytes; |
@@ -3559,28 +4167,68 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
3559 | spin_unlock(&cache->lock); | 4167 | spin_unlock(&cache->lock); |
3560 | spin_unlock(&cache->space_info->lock); | 4168 | spin_unlock(&cache->space_info->lock); |
3561 | 4169 | ||
3562 | btrfs_put_block_group(cache); | 4170 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, |
4171 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | ||
4172 | return 0; | ||
4173 | } | ||
3563 | 4174 | ||
3564 | set_extent_dirty(fs_info->pinned_extents, | 4175 | /* |
3565 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); | 4176 | * this function must be called within transaction |
4177 | */ | ||
4178 | int btrfs_pin_extent(struct btrfs_root *root, | ||
4179 | u64 bytenr, u64 num_bytes, int reserved) | ||
4180 | { | ||
4181 | struct btrfs_block_group_cache *cache; | ||
4182 | |||
4183 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
4184 | BUG_ON(!cache); | ||
4185 | |||
4186 | pin_down_extent(root, cache, bytenr, num_bytes, reserved); | ||
4187 | |||
4188 | btrfs_put_block_group(cache); | ||
3566 | return 0; | 4189 | return 0; |
3567 | } | 4190 | } |
3568 | 4191 | ||
3569 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 4192 | /* |
3570 | u64 num_bytes, int reserve) | 4193 | * update size of reserved extents. this function may return -EAGAIN |
4194 | * if 'reserve' is true or 'sinfo' is false. | ||
4195 | */ | ||
4196 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
4197 | u64 num_bytes, int reserve, int sinfo) | ||
3571 | { | 4198 | { |
3572 | spin_lock(&cache->space_info->lock); | 4199 | int ret = 0; |
3573 | spin_lock(&cache->lock); | 4200 | if (sinfo) { |
3574 | if (reserve) { | 4201 | struct btrfs_space_info *space_info = cache->space_info; |
3575 | cache->reserved += num_bytes; | 4202 | spin_lock(&space_info->lock); |
3576 | cache->space_info->bytes_reserved += num_bytes; | 4203 | spin_lock(&cache->lock); |
4204 | if (reserve) { | ||
4205 | if (cache->ro) { | ||
4206 | ret = -EAGAIN; | ||
4207 | } else { | ||
4208 | cache->reserved += num_bytes; | ||
4209 | space_info->bytes_reserved += num_bytes; | ||
4210 | } | ||
4211 | } else { | ||
4212 | if (cache->ro) | ||
4213 | space_info->bytes_readonly += num_bytes; | ||
4214 | cache->reserved -= num_bytes; | ||
4215 | space_info->bytes_reserved -= num_bytes; | ||
4216 | } | ||
4217 | spin_unlock(&cache->lock); | ||
4218 | spin_unlock(&space_info->lock); | ||
3577 | } else { | 4219 | } else { |
3578 | cache->reserved -= num_bytes; | 4220 | spin_lock(&cache->lock); |
3579 | cache->space_info->bytes_reserved -= num_bytes; | 4221 | if (cache->ro) { |
4222 | ret = -EAGAIN; | ||
4223 | } else { | ||
4224 | if (reserve) | ||
4225 | cache->reserved += num_bytes; | ||
4226 | else | ||
4227 | cache->reserved -= num_bytes; | ||
4228 | } | ||
4229 | spin_unlock(&cache->lock); | ||
3580 | } | 4230 | } |
3581 | spin_unlock(&cache->lock); | 4231 | return ret; |
3582 | spin_unlock(&cache->space_info->lock); | ||
3583 | return 0; | ||
3584 | } | 4232 | } |
3585 | 4233 | ||
3586 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 4234 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
@@ -3611,6 +4259,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
3611 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 4259 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
3612 | 4260 | ||
3613 | up_write(&fs_info->extent_commit_sem); | 4261 | up_write(&fs_info->extent_commit_sem); |
4262 | |||
4263 | update_global_block_rsv(fs_info); | ||
3614 | return 0; | 4264 | return 0; |
3615 | } | 4265 | } |
3616 | 4266 | ||
@@ -3637,14 +4287,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
3637 | btrfs_add_free_space(cache, start, len); | 4287 | btrfs_add_free_space(cache, start, len); |
3638 | } | 4288 | } |
3639 | 4289 | ||
4290 | start += len; | ||
4291 | |||
3640 | spin_lock(&cache->space_info->lock); | 4292 | spin_lock(&cache->space_info->lock); |
3641 | spin_lock(&cache->lock); | 4293 | spin_lock(&cache->lock); |
3642 | cache->pinned -= len; | 4294 | cache->pinned -= len; |
3643 | cache->space_info->bytes_pinned -= len; | 4295 | cache->space_info->bytes_pinned -= len; |
4296 | if (cache->ro) { | ||
4297 | cache->space_info->bytes_readonly += len; | ||
4298 | } else if (cache->reserved_pinned > 0) { | ||
4299 | len = min(len, cache->reserved_pinned); | ||
4300 | cache->reserved_pinned -= len; | ||
4301 | cache->space_info->bytes_reserved += len; | ||
4302 | } | ||
3644 | spin_unlock(&cache->lock); | 4303 | spin_unlock(&cache->lock); |
3645 | spin_unlock(&cache->space_info->lock); | 4304 | spin_unlock(&cache->space_info->lock); |
3646 | |||
3647 | start += len; | ||
3648 | } | 4305 | } |
3649 | 4306 | ||
3650 | if (cache) | 4307 | if (cache) |
@@ -3657,8 +4314,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3657 | { | 4314 | { |
3658 | struct btrfs_fs_info *fs_info = root->fs_info; | 4315 | struct btrfs_fs_info *fs_info = root->fs_info; |
3659 | struct extent_io_tree *unpin; | 4316 | struct extent_io_tree *unpin; |
4317 | struct btrfs_block_rsv *block_rsv; | ||
4318 | struct btrfs_block_rsv *next_rsv; | ||
3660 | u64 start; | 4319 | u64 start; |
3661 | u64 end; | 4320 | u64 end; |
4321 | int idx; | ||
3662 | int ret; | 4322 | int ret; |
3663 | 4323 | ||
3664 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4324 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
@@ -3679,59 +4339,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3679 | cond_resched(); | 4339 | cond_resched(); |
3680 | } | 4340 | } |
3681 | 4341 | ||
3682 | return ret; | 4342 | mutex_lock(&fs_info->durable_block_rsv_mutex); |
3683 | } | 4343 | list_for_each_entry_safe(block_rsv, next_rsv, |
3684 | 4344 | &fs_info->durable_block_rsv_list, list) { | |
3685 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
3686 | struct btrfs_root *root, | ||
3687 | struct btrfs_path *path, | ||
3688 | u64 bytenr, u64 num_bytes, | ||
3689 | int is_data, int reserved, | ||
3690 | struct extent_buffer **must_clean) | ||
3691 | { | ||
3692 | int err = 0; | ||
3693 | struct extent_buffer *buf; | ||
3694 | |||
3695 | if (is_data) | ||
3696 | goto pinit; | ||
3697 | 4345 | ||
3698 | /* | 4346 | idx = trans->transid & 0x1; |
3699 | * discard is sloooow, and so triggering discards on | 4347 | if (block_rsv->freed[idx] > 0) { |
3700 | * individual btree blocks isn't a good plan. Just | 4348 | block_rsv_add_bytes(block_rsv, |
3701 | * pin everything in discard mode. | 4349 | block_rsv->freed[idx], 0); |
3702 | */ | 4350 | block_rsv->freed[idx] = 0; |
3703 | if (btrfs_test_opt(root, DISCARD)) | 4351 | } |
3704 | goto pinit; | 4352 | if (atomic_read(&block_rsv->usage) == 0) { |
3705 | 4353 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); | |
3706 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | ||
3707 | if (!buf) | ||
3708 | goto pinit; | ||
3709 | 4354 | ||
3710 | /* we can reuse a block if it hasn't been written | 4355 | if (block_rsv->freed[0] == 0 && |
3711 | * and it is from this transaction. We can't | 4356 | block_rsv->freed[1] == 0) { |
3712 | * reuse anything from the tree log root because | 4357 | list_del_init(&block_rsv->list); |
3713 | * it has tiny sub-transactions. | 4358 | kfree(block_rsv); |
3714 | */ | 4359 | } |
3715 | if (btrfs_buffer_uptodate(buf, 0) && | 4360 | } else { |
3716 | btrfs_try_tree_lock(buf)) { | 4361 | btrfs_block_rsv_release(root, block_rsv, 0); |
3717 | u64 header_owner = btrfs_header_owner(buf); | ||
3718 | u64 header_transid = btrfs_header_generation(buf); | ||
3719 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | ||
3720 | header_transid == trans->transid && | ||
3721 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
3722 | *must_clean = buf; | ||
3723 | return 1; | ||
3724 | } | 4362 | } |
3725 | btrfs_tree_unlock(buf); | ||
3726 | } | 4363 | } |
3727 | free_extent_buffer(buf); | 4364 | mutex_unlock(&fs_info->durable_block_rsv_mutex); |
3728 | pinit: | ||
3729 | if (path) | ||
3730 | btrfs_set_path_blocking(path); | ||
3731 | /* unlocks the pinned mutex */ | ||
3732 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); | ||
3733 | 4365 | ||
3734 | BUG_ON(err < 0); | ||
3735 | return 0; | 4366 | return 0; |
3736 | } | 4367 | } |
3737 | 4368 | ||
@@ -3892,9 +4523,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3892 | BUG_ON(ret); | 4523 | BUG_ON(ret); |
3893 | } | 4524 | } |
3894 | } else { | 4525 | } else { |
3895 | int mark_free = 0; | ||
3896 | struct extent_buffer *must_clean = NULL; | ||
3897 | |||
3898 | if (found_extent) { | 4526 | if (found_extent) { |
3899 | BUG_ON(is_data && refs_to_drop != | 4527 | BUG_ON(is_data && refs_to_drop != |
3900 | extent_data_ref_count(root, path, iref)); | 4528 | extent_data_ref_count(root, path, iref)); |
@@ -3907,31 +4535,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3907 | } | 4535 | } |
3908 | } | 4536 | } |
3909 | 4537 | ||
3910 | ret = pin_down_bytes(trans, root, path, bytenr, | ||
3911 | num_bytes, is_data, 0, &must_clean); | ||
3912 | if (ret > 0) | ||
3913 | mark_free = 1; | ||
3914 | BUG_ON(ret < 0); | ||
3915 | /* | ||
3916 | * it is going to be very rare for someone to be waiting | ||
3917 | * on the block we're freeing. del_items might need to | ||
3918 | * schedule, so rather than get fancy, just force it | ||
3919 | * to blocking here | ||
3920 | */ | ||
3921 | if (must_clean) | ||
3922 | btrfs_set_lock_blocking(must_clean); | ||
3923 | |||
3924 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 4538 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
3925 | num_to_del); | 4539 | num_to_del); |
3926 | BUG_ON(ret); | 4540 | BUG_ON(ret); |
3927 | btrfs_release_path(extent_root, path); | 4541 | btrfs_release_path(extent_root, path); |
3928 | 4542 | ||
3929 | if (must_clean) { | ||
3930 | clean_tree_block(NULL, root, must_clean); | ||
3931 | btrfs_tree_unlock(must_clean); | ||
3932 | free_extent_buffer(must_clean); | ||
3933 | } | ||
3934 | |||
3935 | if (is_data) { | 4543 | if (is_data) { |
3936 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 4544 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
3937 | BUG_ON(ret); | 4545 | BUG_ON(ret); |
@@ -3941,8 +4549,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3941 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | 4549 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); |
3942 | } | 4550 | } |
3943 | 4551 | ||
3944 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 4552 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
3945 | mark_free); | ||
3946 | BUG_ON(ret); | 4553 | BUG_ON(ret); |
3947 | } | 4554 | } |
3948 | btrfs_free_path(path); | 4555 | btrfs_free_path(path); |
@@ -3950,7 +4557,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3950 | } | 4557 | } |
3951 | 4558 | ||
3952 | /* | 4559 | /* |
3953 | * when we free an extent, it is possible (and likely) that we free the last | 4560 | * when we free an block, it is possible (and likely) that we free the last |
3954 | * delayed ref for that extent as well. This searches the delayed ref tree for | 4561 | * delayed ref for that extent as well. This searches the delayed ref tree for |
3955 | * a given extent, and if there are no other delayed refs to be processed, it | 4562 | * a given extent, and if there are no other delayed refs to be processed, it |
3956 | * removes it from the tree. | 4563 | * removes it from the tree. |
@@ -3962,7 +4569,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
3962 | struct btrfs_delayed_ref_root *delayed_refs; | 4569 | struct btrfs_delayed_ref_root *delayed_refs; |
3963 | struct btrfs_delayed_ref_node *ref; | 4570 | struct btrfs_delayed_ref_node *ref; |
3964 | struct rb_node *node; | 4571 | struct rb_node *node; |
3965 | int ret; | 4572 | int ret = 0; |
3966 | 4573 | ||
3967 | delayed_refs = &trans->transaction->delayed_refs; | 4574 | delayed_refs = &trans->transaction->delayed_refs; |
3968 | spin_lock(&delayed_refs->lock); | 4575 | spin_lock(&delayed_refs->lock); |
@@ -4014,17 +4621,100 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
4014 | list_del_init(&head->cluster); | 4621 | list_del_init(&head->cluster); |
4015 | spin_unlock(&delayed_refs->lock); | 4622 | spin_unlock(&delayed_refs->lock); |
4016 | 4623 | ||
4017 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | 4624 | BUG_ON(head->extent_op); |
4018 | &head->node, head->extent_op, | 4625 | if (head->must_insert_reserved) |
4019 | head->must_insert_reserved); | 4626 | ret = 1; |
4020 | BUG_ON(ret); | 4627 | |
4628 | mutex_unlock(&head->mutex); | ||
4021 | btrfs_put_delayed_ref(&head->node); | 4629 | btrfs_put_delayed_ref(&head->node); |
4022 | return 0; | 4630 | return ret; |
4023 | out: | 4631 | out: |
4024 | spin_unlock(&delayed_refs->lock); | 4632 | spin_unlock(&delayed_refs->lock); |
4025 | return 0; | 4633 | return 0; |
4026 | } | 4634 | } |
4027 | 4635 | ||
4636 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
4637 | struct btrfs_root *root, | ||
4638 | struct extent_buffer *buf, | ||
4639 | u64 parent, int last_ref) | ||
4640 | { | ||
4641 | struct btrfs_block_rsv *block_rsv; | ||
4642 | struct btrfs_block_group_cache *cache = NULL; | ||
4643 | int ret; | ||
4644 | |||
4645 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4646 | ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, | ||
4647 | parent, root->root_key.objectid, | ||
4648 | btrfs_header_level(buf), | ||
4649 | BTRFS_DROP_DELAYED_REF, NULL); | ||
4650 | BUG_ON(ret); | ||
4651 | } | ||
4652 | |||
4653 | if (!last_ref) | ||
4654 | return; | ||
4655 | |||
4656 | block_rsv = get_block_rsv(trans, root); | ||
4657 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | ||
4658 | if (block_rsv->space_info != cache->space_info) | ||
4659 | goto out; | ||
4660 | |||
4661 | if (btrfs_header_generation(buf) == trans->transid) { | ||
4662 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4663 | ret = check_ref_cleanup(trans, root, buf->start); | ||
4664 | if (!ret) | ||
4665 | goto pin; | ||
4666 | } | ||
4667 | |||
4668 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
4669 | pin_down_extent(root, cache, buf->start, buf->len, 1); | ||
4670 | goto pin; | ||
4671 | } | ||
4672 | |||
4673 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | ||
4674 | |||
4675 | btrfs_add_free_space(cache, buf->start, buf->len); | ||
4676 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | ||
4677 | if (ret == -EAGAIN) { | ||
4678 | /* block group became read-only */ | ||
4679 | update_reserved_bytes(cache, buf->len, 0, 1); | ||
4680 | goto out; | ||
4681 | } | ||
4682 | |||
4683 | ret = 1; | ||
4684 | spin_lock(&block_rsv->lock); | ||
4685 | if (block_rsv->reserved < block_rsv->size) { | ||
4686 | block_rsv->reserved += buf->len; | ||
4687 | ret = 0; | ||
4688 | } | ||
4689 | spin_unlock(&block_rsv->lock); | ||
4690 | |||
4691 | if (ret) { | ||
4692 | spin_lock(&cache->space_info->lock); | ||
4693 | cache->space_info->bytes_reserved -= buf->len; | ||
4694 | spin_unlock(&cache->space_info->lock); | ||
4695 | } | ||
4696 | goto out; | ||
4697 | } | ||
4698 | pin: | ||
4699 | if (block_rsv->durable && !cache->ro) { | ||
4700 | ret = 0; | ||
4701 | spin_lock(&cache->lock); | ||
4702 | if (!cache->ro) { | ||
4703 | cache->reserved_pinned += buf->len; | ||
4704 | ret = 1; | ||
4705 | } | ||
4706 | spin_unlock(&cache->lock); | ||
4707 | |||
4708 | if (ret) { | ||
4709 | spin_lock(&block_rsv->lock); | ||
4710 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
4711 | spin_unlock(&block_rsv->lock); | ||
4712 | } | ||
4713 | } | ||
4714 | out: | ||
4715 | btrfs_put_block_group(cache); | ||
4716 | } | ||
4717 | |||
4028 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 4718 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
4029 | struct btrfs_root *root, | 4719 | struct btrfs_root *root, |
4030 | u64 bytenr, u64 num_bytes, u64 parent, | 4720 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -4046,8 +4736,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4046 | parent, root_objectid, (int)owner, | 4736 | parent, root_objectid, (int)owner, |
4047 | BTRFS_DROP_DELAYED_REF, NULL); | 4737 | BTRFS_DROP_DELAYED_REF, NULL); |
4048 | BUG_ON(ret); | 4738 | BUG_ON(ret); |
4049 | ret = check_ref_cleanup(trans, root, bytenr); | ||
4050 | BUG_ON(ret); | ||
4051 | } else { | 4739 | } else { |
4052 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | 4740 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, |
4053 | parent, root_objectid, owner, | 4741 | parent, root_objectid, owner, |
@@ -4057,21 +4745,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4057 | return ret; | 4745 | return ret; |
4058 | } | 4746 | } |
4059 | 4747 | ||
4060 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
4061 | struct btrfs_root *root, | ||
4062 | u64 bytenr, u32 blocksize, | ||
4063 | u64 parent, u64 root_objectid, int level) | ||
4064 | { | ||
4065 | u64 used; | ||
4066 | spin_lock(&root->node_lock); | ||
4067 | used = btrfs_root_used(&root->root_item) - blocksize; | ||
4068 | btrfs_set_root_used(&root->root_item, used); | ||
4069 | spin_unlock(&root->node_lock); | ||
4070 | |||
4071 | return btrfs_free_extent(trans, root, bytenr, blocksize, | ||
4072 | parent, root_objectid, level, 0); | ||
4073 | } | ||
4074 | |||
4075 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 4748 | static u64 stripe_align(struct btrfs_root *root, u64 val) |
4076 | { | 4749 | { |
4077 | u64 mask = ((u64)root->stripesize - 1); | 4750 | u64 mask = ((u64)root->stripesize - 1); |
@@ -4124,6 +4797,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
4124 | return 0; | 4797 | return 0; |
4125 | } | 4798 | } |
4126 | 4799 | ||
4800 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | ||
4801 | { | ||
4802 | int index; | ||
4803 | if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) | ||
4804 | index = 0; | ||
4805 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) | ||
4806 | index = 1; | ||
4807 | else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) | ||
4808 | index = 2; | ||
4809 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) | ||
4810 | index = 3; | ||
4811 | else | ||
4812 | index = 4; | ||
4813 | return index; | ||
4814 | } | ||
4815 | |||
4127 | enum btrfs_loop_type { | 4816 | enum btrfs_loop_type { |
4128 | LOOP_FIND_IDEAL = 0, | 4817 | LOOP_FIND_IDEAL = 0, |
4129 | LOOP_CACHING_NOWAIT = 1, | 4818 | LOOP_CACHING_NOWAIT = 1, |
@@ -4145,7 +4834,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4145 | u64 num_bytes, u64 empty_size, | 4834 | u64 num_bytes, u64 empty_size, |
4146 | u64 search_start, u64 search_end, | 4835 | u64 search_start, u64 search_end, |
4147 | u64 hint_byte, struct btrfs_key *ins, | 4836 | u64 hint_byte, struct btrfs_key *ins, |
4148 | u64 exclude_start, u64 exclude_nr, | ||
4149 | int data) | 4837 | int data) |
4150 | { | 4838 | { |
4151 | int ret = 0; | 4839 | int ret = 0; |
@@ -4158,9 +4846,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4158 | struct btrfs_space_info *space_info; | 4846 | struct btrfs_space_info *space_info; |
4159 | int last_ptr_loop = 0; | 4847 | int last_ptr_loop = 0; |
4160 | int loop = 0; | 4848 | int loop = 0; |
4849 | int index = 0; | ||
4161 | bool found_uncached_bg = false; | 4850 | bool found_uncached_bg = false; |
4162 | bool failed_cluster_refill = false; | 4851 | bool failed_cluster_refill = false; |
4163 | bool failed_alloc = false; | 4852 | bool failed_alloc = false; |
4853 | bool use_cluster = true; | ||
4164 | u64 ideal_cache_percent = 0; | 4854 | u64 ideal_cache_percent = 0; |
4165 | u64 ideal_cache_offset = 0; | 4855 | u64 ideal_cache_offset = 0; |
4166 | 4856 | ||
@@ -4170,17 +4860,29 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4170 | ins->offset = 0; | 4860 | ins->offset = 0; |
4171 | 4861 | ||
4172 | space_info = __find_space_info(root->fs_info, data); | 4862 | space_info = __find_space_info(root->fs_info, data); |
4863 | if (!space_info) { | ||
4864 | printk(KERN_ERR "No space info for %d\n", data); | ||
4865 | return -ENOSPC; | ||
4866 | } | ||
4867 | |||
4868 | /* | ||
4869 | * If the space info is for both data and metadata it means we have a | ||
4870 | * small filesystem and we can't use the clustering stuff. | ||
4871 | */ | ||
4872 | if (btrfs_mixed_space_info(space_info)) | ||
4873 | use_cluster = false; | ||
4173 | 4874 | ||
4174 | if (orig_root->ref_cows || empty_size) | 4875 | if (orig_root->ref_cows || empty_size) |
4175 | allowed_chunk_alloc = 1; | 4876 | allowed_chunk_alloc = 1; |
4176 | 4877 | ||
4177 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 4878 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { |
4178 | last_ptr = &root->fs_info->meta_alloc_cluster; | 4879 | last_ptr = &root->fs_info->meta_alloc_cluster; |
4179 | if (!btrfs_test_opt(root, SSD)) | 4880 | if (!btrfs_test_opt(root, SSD)) |
4180 | empty_cluster = 64 * 1024; | 4881 | empty_cluster = 64 * 1024; |
4181 | } | 4882 | } |
4182 | 4883 | ||
4183 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { | 4884 | if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && |
4885 | btrfs_test_opt(root, SSD)) { | ||
4184 | last_ptr = &root->fs_info->data_alloc_cluster; | 4886 | last_ptr = &root->fs_info->data_alloc_cluster; |
4185 | } | 4887 | } |
4186 | 4888 | ||
@@ -4223,6 +4925,7 @@ ideal_cache: | |||
4223 | btrfs_put_block_group(block_group); | 4925 | btrfs_put_block_group(block_group); |
4224 | up_read(&space_info->groups_sem); | 4926 | up_read(&space_info->groups_sem); |
4225 | } else { | 4927 | } else { |
4928 | index = get_block_group_index(block_group); | ||
4226 | goto have_block_group; | 4929 | goto have_block_group; |
4227 | } | 4930 | } |
4228 | } else if (block_group) { | 4931 | } else if (block_group) { |
@@ -4231,17 +4934,42 @@ ideal_cache: | |||
4231 | } | 4934 | } |
4232 | search: | 4935 | search: |
4233 | down_read(&space_info->groups_sem); | 4936 | down_read(&space_info->groups_sem); |
4234 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4937 | list_for_each_entry(block_group, &space_info->block_groups[index], |
4938 | list) { | ||
4235 | u64 offset; | 4939 | u64 offset; |
4236 | int cached; | 4940 | int cached; |
4237 | 4941 | ||
4238 | btrfs_get_block_group(block_group); | 4942 | btrfs_get_block_group(block_group); |
4239 | search_start = block_group->key.objectid; | 4943 | search_start = block_group->key.objectid; |
4240 | 4944 | ||
4945 | /* | ||
4946 | * this can happen if we end up cycling through all the | ||
4947 | * raid types, but we want to make sure we only allocate | ||
4948 | * for the proper type. | ||
4949 | */ | ||
4950 | if (!block_group_bits(block_group, data)) { | ||
4951 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | ||
4952 | BTRFS_BLOCK_GROUP_RAID1 | | ||
4953 | BTRFS_BLOCK_GROUP_RAID10; | ||
4954 | |||
4955 | /* | ||
4956 | * if they asked for extra copies and this block group | ||
4957 | * doesn't provide them, bail. This does allow us to | ||
4958 | * fill raid0 from raid1. | ||
4959 | */ | ||
4960 | if ((data & extra) && !(block_group->flags & extra)) | ||
4961 | goto loop; | ||
4962 | } | ||
4963 | |||
4241 | have_block_group: | 4964 | have_block_group: |
4242 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4965 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4243 | u64 free_percent; | 4966 | u64 free_percent; |
4244 | 4967 | ||
4968 | ret = cache_block_group(block_group, trans, | ||
4969 | orig_root, 1); | ||
4970 | if (block_group->cached == BTRFS_CACHE_FINISHED) | ||
4971 | goto have_block_group; | ||
4972 | |||
4245 | free_percent = btrfs_block_group_used(&block_group->item); | 4973 | free_percent = btrfs_block_group_used(&block_group->item); |
4246 | free_percent *= 100; | 4974 | free_percent *= 100; |
4247 | free_percent = div64_u64(free_percent, | 4975 | free_percent = div64_u64(free_percent, |
@@ -4262,7 +4990,8 @@ have_block_group: | |||
4262 | if (loop > LOOP_CACHING_NOWAIT || | 4990 | if (loop > LOOP_CACHING_NOWAIT || |
4263 | (loop > LOOP_FIND_IDEAL && | 4991 | (loop > LOOP_FIND_IDEAL && |
4264 | atomic_read(&space_info->caching_threads) < 2)) { | 4992 | atomic_read(&space_info->caching_threads) < 2)) { |
4265 | ret = cache_block_group(block_group); | 4993 | ret = cache_block_group(block_group, trans, |
4994 | orig_root, 0); | ||
4266 | BUG_ON(ret); | 4995 | BUG_ON(ret); |
4267 | } | 4996 | } |
4268 | found_uncached_bg = true; | 4997 | found_uncached_bg = true; |
@@ -4422,23 +5151,22 @@ checks: | |||
4422 | goto loop; | 5151 | goto loop; |
4423 | } | 5152 | } |
4424 | 5153 | ||
4425 | if (exclude_nr > 0 && | 5154 | ins->objectid = search_start; |
4426 | (search_start + num_bytes > exclude_start && | 5155 | ins->offset = num_bytes; |
4427 | search_start < exclude_start + exclude_nr)) { | ||
4428 | search_start = exclude_start + exclude_nr; | ||
4429 | 5156 | ||
5157 | if (offset < search_start) | ||
5158 | btrfs_add_free_space(block_group, offset, | ||
5159 | search_start - offset); | ||
5160 | BUG_ON(offset > search_start); | ||
5161 | |||
5162 | ret = update_reserved_bytes(block_group, num_bytes, 1, | ||
5163 | (data & BTRFS_BLOCK_GROUP_DATA)); | ||
5164 | if (ret == -EAGAIN) { | ||
4430 | btrfs_add_free_space(block_group, offset, num_bytes); | 5165 | btrfs_add_free_space(block_group, offset, num_bytes); |
4431 | /* | ||
4432 | * if search_start is still in this block group | ||
4433 | * then we just re-search this block group | ||
4434 | */ | ||
4435 | if (search_start >= block_group->key.objectid && | ||
4436 | search_start < (block_group->key.objectid + | ||
4437 | block_group->key.offset)) | ||
4438 | goto have_block_group; | ||
4439 | goto loop; | 5166 | goto loop; |
4440 | } | 5167 | } |
4441 | 5168 | ||
5169 | /* we are all good, lets return */ | ||
4442 | ins->objectid = search_start; | 5170 | ins->objectid = search_start; |
4443 | ins->offset = num_bytes; | 5171 | ins->offset = num_bytes; |
4444 | 5172 | ||
@@ -4446,18 +5174,18 @@ checks: | |||
4446 | btrfs_add_free_space(block_group, offset, | 5174 | btrfs_add_free_space(block_group, offset, |
4447 | search_start - offset); | 5175 | search_start - offset); |
4448 | BUG_ON(offset > search_start); | 5176 | BUG_ON(offset > search_start); |
4449 | |||
4450 | update_reserved_extents(block_group, num_bytes, 1); | ||
4451 | |||
4452 | /* we are all good, lets return */ | ||
4453 | break; | 5177 | break; |
4454 | loop: | 5178 | loop: |
4455 | failed_cluster_refill = false; | 5179 | failed_cluster_refill = false; |
4456 | failed_alloc = false; | 5180 | failed_alloc = false; |
5181 | BUG_ON(index != get_block_group_index(block_group)); | ||
4457 | btrfs_put_block_group(block_group); | 5182 | btrfs_put_block_group(block_group); |
4458 | } | 5183 | } |
4459 | up_read(&space_info->groups_sem); | 5184 | up_read(&space_info->groups_sem); |
4460 | 5185 | ||
5186 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | ||
5187 | goto search; | ||
5188 | |||
4461 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for | 5189 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
4462 | * for them to make caching progress. Also | 5190 | * for them to make caching progress. Also |
4463 | * determine the best possible bg to cache | 5191 | * determine the best possible bg to cache |
@@ -4471,6 +5199,7 @@ loop: | |||
4471 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 5199 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
4472 | (found_uncached_bg || empty_size || empty_cluster || | 5200 | (found_uncached_bg || empty_size || empty_cluster || |
4473 | allowed_chunk_alloc)) { | 5201 | allowed_chunk_alloc)) { |
5202 | index = 0; | ||
4474 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5203 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4475 | found_uncached_bg = false; | 5204 | found_uncached_bg = false; |
4476 | loop++; | 5205 | loop++; |
@@ -4553,31 +5282,30 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
4553 | int dump_block_groups) | 5282 | int dump_block_groups) |
4554 | { | 5283 | { |
4555 | struct btrfs_block_group_cache *cache; | 5284 | struct btrfs_block_group_cache *cache; |
5285 | int index = 0; | ||
4556 | 5286 | ||
4557 | spin_lock(&info->lock); | 5287 | spin_lock(&info->lock); |
4558 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 5288 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
4559 | (unsigned long long)(info->total_bytes - info->bytes_used - | 5289 | (unsigned long long)(info->total_bytes - info->bytes_used - |
4560 | info->bytes_pinned - info->bytes_reserved - | 5290 | info->bytes_pinned - info->bytes_reserved - |
4561 | info->bytes_super), | 5291 | info->bytes_readonly), |
4562 | (info->full) ? "" : "not "); | 5292 | (info->full) ? "" : "not "); |
4563 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 5293 | printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " |
4564 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" | 5294 | "reserved=%llu, may_use=%llu, readonly=%llu\n", |
4565 | "\n", | ||
4566 | (unsigned long long)info->total_bytes, | 5295 | (unsigned long long)info->total_bytes, |
5296 | (unsigned long long)info->bytes_used, | ||
4567 | (unsigned long long)info->bytes_pinned, | 5297 | (unsigned long long)info->bytes_pinned, |
4568 | (unsigned long long)info->bytes_delalloc, | 5298 | (unsigned long long)info->bytes_reserved, |
4569 | (unsigned long long)info->bytes_may_use, | 5299 | (unsigned long long)info->bytes_may_use, |
4570 | (unsigned long long)info->bytes_used, | 5300 | (unsigned long long)info->bytes_readonly); |
4571 | (unsigned long long)info->bytes_root, | ||
4572 | (unsigned long long)info->bytes_super, | ||
4573 | (unsigned long long)info->bytes_reserved); | ||
4574 | spin_unlock(&info->lock); | 5301 | spin_unlock(&info->lock); |
4575 | 5302 | ||
4576 | if (!dump_block_groups) | 5303 | if (!dump_block_groups) |
4577 | return; | 5304 | return; |
4578 | 5305 | ||
4579 | down_read(&info->groups_sem); | 5306 | down_read(&info->groups_sem); |
4580 | list_for_each_entry(cache, &info->block_groups, list) { | 5307 | again: |
5308 | list_for_each_entry(cache, &info->block_groups[index], list) { | ||
4581 | spin_lock(&cache->lock); | 5309 | spin_lock(&cache->lock); |
4582 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 5310 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " |
4583 | "%llu pinned %llu reserved\n", | 5311 | "%llu pinned %llu reserved\n", |
@@ -4589,6 +5317,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
4589 | btrfs_dump_free_space(cache, bytes); | 5317 | btrfs_dump_free_space(cache, bytes); |
4590 | spin_unlock(&cache->lock); | 5318 | spin_unlock(&cache->lock); |
4591 | } | 5319 | } |
5320 | if (++index < BTRFS_NR_RAID_TYPES) | ||
5321 | goto again; | ||
4592 | up_read(&info->groups_sem); | 5322 | up_read(&info->groups_sem); |
4593 | } | 5323 | } |
4594 | 5324 | ||
@@ -4614,9 +5344,8 @@ again: | |||
4614 | 5344 | ||
4615 | WARN_ON(num_bytes < root->sectorsize); | 5345 | WARN_ON(num_bytes < root->sectorsize); |
4616 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5346 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
4617 | search_start, search_end, hint_byte, ins, | 5347 | search_start, search_end, hint_byte, |
4618 | trans->alloc_exclude_start, | 5348 | ins, data); |
4619 | trans->alloc_exclude_nr, data); | ||
4620 | 5349 | ||
4621 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | 5350 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { |
4622 | num_bytes = num_bytes >> 1; | 5351 | num_bytes = num_bytes >> 1; |
@@ -4654,7 +5383,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
4654 | ret = btrfs_discard_extent(root, start, len); | 5383 | ret = btrfs_discard_extent(root, start, len); |
4655 | 5384 | ||
4656 | btrfs_add_free_space(cache, start, len); | 5385 | btrfs_add_free_space(cache, start, len); |
4657 | update_reserved_extents(cache, len, 0); | 5386 | update_reserved_bytes(cache, len, 0, 1); |
4658 | btrfs_put_block_group(cache); | 5387 | btrfs_put_block_group(cache); |
4659 | 5388 | ||
4660 | return ret; | 5389 | return ret; |
@@ -4717,8 +5446,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
4717 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5446 | btrfs_mark_buffer_dirty(path->nodes[0]); |
4718 | btrfs_free_path(path); | 5447 | btrfs_free_path(path); |
4719 | 5448 | ||
4720 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5449 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
4721 | 1, 0); | ||
4722 | if (ret) { | 5450 | if (ret) { |
4723 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5451 | printk(KERN_ERR "btrfs update block group failed for %llu " |
4724 | "%llu\n", (unsigned long long)ins->objectid, | 5452 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -4778,8 +5506,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
4778 | btrfs_mark_buffer_dirty(leaf); | 5506 | btrfs_mark_buffer_dirty(leaf); |
4779 | btrfs_free_path(path); | 5507 | btrfs_free_path(path); |
4780 | 5508 | ||
4781 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5509 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
4782 | 1, 0); | ||
4783 | if (ret) { | 5510 | if (ret) { |
4784 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5511 | printk(KERN_ERR "btrfs update block group failed for %llu " |
4785 | "%llu\n", (unsigned long long)ins->objectid, | 5512 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -4821,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4821 | u64 num_bytes = ins->offset; | 5548 | u64 num_bytes = ins->offset; |
4822 | 5549 | ||
4823 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 5550 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
4824 | cache_block_group(block_group); | 5551 | cache_block_group(block_group, trans, NULL, 0); |
4825 | caching_ctl = get_caching_control(block_group); | 5552 | caching_ctl = get_caching_control(block_group); |
4826 | 5553 | ||
4827 | if (!caching_ctl) { | 5554 | if (!caching_ctl) { |
@@ -4855,73 +5582,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4855 | put_caching_control(caching_ctl); | 5582 | put_caching_control(caching_ctl); |
4856 | } | 5583 | } |
4857 | 5584 | ||
4858 | update_reserved_extents(block_group, ins->offset, 1); | 5585 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); |
5586 | BUG_ON(ret); | ||
4859 | btrfs_put_block_group(block_group); | 5587 | btrfs_put_block_group(block_group); |
4860 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5588 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
4861 | 0, owner, offset, ins, 1); | 5589 | 0, owner, offset, ins, 1); |
4862 | return ret; | 5590 | return ret; |
4863 | } | 5591 | } |
4864 | 5592 | ||
4865 | /* | ||
4866 | * finds a free extent and does all the dirty work required for allocation | ||
4867 | * returns the key for the extent through ins, and a tree buffer for | ||
4868 | * the first block of the extent through buf. | ||
4869 | * | ||
4870 | * returns 0 if everything worked, non-zero otherwise. | ||
4871 | */ | ||
4872 | static int alloc_tree_block(struct btrfs_trans_handle *trans, | ||
4873 | struct btrfs_root *root, | ||
4874 | u64 num_bytes, u64 parent, u64 root_objectid, | ||
4875 | struct btrfs_disk_key *key, int level, | ||
4876 | u64 empty_size, u64 hint_byte, u64 search_end, | ||
4877 | struct btrfs_key *ins) | ||
4878 | { | ||
4879 | int ret; | ||
4880 | u64 flags = 0; | ||
4881 | |||
4882 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | ||
4883 | empty_size, hint_byte, search_end, | ||
4884 | ins, 0); | ||
4885 | if (ret) | ||
4886 | return ret; | ||
4887 | |||
4888 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
4889 | if (parent == 0) | ||
4890 | parent = ins->objectid; | ||
4891 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
4892 | } else | ||
4893 | BUG_ON(parent > 0); | ||
4894 | |||
4895 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4896 | struct btrfs_delayed_extent_op *extent_op; | ||
4897 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
4898 | BUG_ON(!extent_op); | ||
4899 | if (key) | ||
4900 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
4901 | else | ||
4902 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
4903 | extent_op->flags_to_set = flags; | ||
4904 | extent_op->update_key = 1; | ||
4905 | extent_op->update_flags = 1; | ||
4906 | extent_op->is_data = 0; | ||
4907 | |||
4908 | ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, | ||
4909 | ins->offset, parent, root_objectid, | ||
4910 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
4911 | extent_op); | ||
4912 | BUG_ON(ret); | ||
4913 | } | ||
4914 | |||
4915 | if (root_objectid == root->root_key.objectid) { | ||
4916 | u64 used; | ||
4917 | spin_lock(&root->node_lock); | ||
4918 | used = btrfs_root_used(&root->root_item) + num_bytes; | ||
4919 | btrfs_set_root_used(&root->root_item, used); | ||
4920 | spin_unlock(&root->node_lock); | ||
4921 | } | ||
4922 | return ret; | ||
4923 | } | ||
4924 | |||
4925 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 5593 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
4926 | struct btrfs_root *root, | 5594 | struct btrfs_root *root, |
4927 | u64 bytenr, u32 blocksize, | 5595 | u64 bytenr, u32 blocksize, |
@@ -4960,8 +5628,41 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
4960 | return buf; | 5628 | return buf; |
4961 | } | 5629 | } |
4962 | 5630 | ||
5631 | static struct btrfs_block_rsv * | ||
5632 | use_block_rsv(struct btrfs_trans_handle *trans, | ||
5633 | struct btrfs_root *root, u32 blocksize) | ||
5634 | { | ||
5635 | struct btrfs_block_rsv *block_rsv; | ||
5636 | int ret; | ||
5637 | |||
5638 | block_rsv = get_block_rsv(trans, root); | ||
5639 | |||
5640 | if (block_rsv->size == 0) { | ||
5641 | ret = reserve_metadata_bytes(trans, root, block_rsv, | ||
5642 | blocksize, 0); | ||
5643 | if (ret) | ||
5644 | return ERR_PTR(ret); | ||
5645 | return block_rsv; | ||
5646 | } | ||
5647 | |||
5648 | ret = block_rsv_use_bytes(block_rsv, blocksize); | ||
5649 | if (!ret) | ||
5650 | return block_rsv; | ||
5651 | |||
5652 | return ERR_PTR(-ENOSPC); | ||
5653 | } | ||
5654 | |||
5655 | static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) | ||
5656 | { | ||
5657 | block_rsv_add_bytes(block_rsv, blocksize, 0); | ||
5658 | block_rsv_release_bytes(block_rsv, NULL, 0); | ||
5659 | } | ||
5660 | |||
4963 | /* | 5661 | /* |
4964 | * helper function to allocate a block for a given tree | 5662 | * finds a free extent and does all the dirty work required for allocation |
5663 | * returns the key for the extent through ins, and a tree buffer for | ||
5664 | * the first block of the extent through buf. | ||
5665 | * | ||
4965 | * returns the tree buffer or NULL. | 5666 | * returns the tree buffer or NULL. |
4966 | */ | 5667 | */ |
4967 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 5668 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, |
@@ -4971,18 +5672,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4971 | u64 hint, u64 empty_size) | 5672 | u64 hint, u64 empty_size) |
4972 | { | 5673 | { |
4973 | struct btrfs_key ins; | 5674 | struct btrfs_key ins; |
4974 | int ret; | 5675 | struct btrfs_block_rsv *block_rsv; |
4975 | struct extent_buffer *buf; | 5676 | struct extent_buffer *buf; |
5677 | u64 flags = 0; | ||
5678 | int ret; | ||
4976 | 5679 | ||
4977 | ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, | 5680 | |
4978 | key, level, empty_size, hint, (u64)-1, &ins); | 5681 | block_rsv = use_block_rsv(trans, root, blocksize); |
5682 | if (IS_ERR(block_rsv)) | ||
5683 | return ERR_CAST(block_rsv); | ||
5684 | |||
5685 | ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, | ||
5686 | empty_size, hint, (u64)-1, &ins, 0); | ||
4979 | if (ret) { | 5687 | if (ret) { |
4980 | BUG_ON(ret > 0); | 5688 | unuse_block_rsv(block_rsv, blocksize); |
4981 | return ERR_PTR(ret); | 5689 | return ERR_PTR(ret); |
4982 | } | 5690 | } |
4983 | 5691 | ||
4984 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, | 5692 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
4985 | blocksize, level); | 5693 | blocksize, level); |
5694 | BUG_ON(IS_ERR(buf)); | ||
5695 | |||
5696 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
5697 | if (parent == 0) | ||
5698 | parent = ins.objectid; | ||
5699 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
5700 | } else | ||
5701 | BUG_ON(parent > 0); | ||
5702 | |||
5703 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
5704 | struct btrfs_delayed_extent_op *extent_op; | ||
5705 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
5706 | BUG_ON(!extent_op); | ||
5707 | if (key) | ||
5708 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
5709 | else | ||
5710 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
5711 | extent_op->flags_to_set = flags; | ||
5712 | extent_op->update_key = 1; | ||
5713 | extent_op->update_flags = 1; | ||
5714 | extent_op->is_data = 0; | ||
5715 | |||
5716 | ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, | ||
5717 | ins.offset, parent, root_objectid, | ||
5718 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
5719 | extent_op); | ||
5720 | BUG_ON(ret); | ||
5721 | } | ||
4986 | return buf; | 5722 | return buf; |
4987 | } | 5723 | } |
4988 | 5724 | ||
@@ -5011,7 +5747,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
5011 | u64 generation; | 5747 | u64 generation; |
5012 | u64 refs; | 5748 | u64 refs; |
5013 | u64 flags; | 5749 | u64 flags; |
5014 | u64 last = 0; | ||
5015 | u32 nritems; | 5750 | u32 nritems; |
5016 | u32 blocksize; | 5751 | u32 blocksize; |
5017 | struct btrfs_key key; | 5752 | struct btrfs_key key; |
@@ -5079,7 +5814,6 @@ reada: | |||
5079 | generation); | 5814 | generation); |
5080 | if (ret) | 5815 | if (ret) |
5081 | break; | 5816 | break; |
5082 | last = bytenr + blocksize; | ||
5083 | nread++; | 5817 | nread++; |
5084 | } | 5818 | } |
5085 | wc->reada_slot = slot; | 5819 | wc->reada_slot = slot; |
@@ -5205,6 +5939,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
5205 | next = btrfs_find_tree_block(root, bytenr, blocksize); | 5939 | next = btrfs_find_tree_block(root, bytenr, blocksize); |
5206 | if (!next) { | 5940 | if (!next) { |
5207 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 5941 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
5942 | if (!next) | ||
5943 | return -ENOMEM; | ||
5208 | reada = 1; | 5944 | reada = 1; |
5209 | } | 5945 | } |
5210 | btrfs_tree_lock(next); | 5946 | btrfs_tree_lock(next); |
@@ -5305,7 +6041,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
5305 | struct btrfs_path *path, | 6041 | struct btrfs_path *path, |
5306 | struct walk_control *wc) | 6042 | struct walk_control *wc) |
5307 | { | 6043 | { |
5308 | int ret = 0; | 6044 | int ret; |
5309 | int level = wc->level; | 6045 | int level = wc->level; |
5310 | struct extent_buffer *eb = path->nodes[level]; | 6046 | struct extent_buffer *eb = path->nodes[level]; |
5311 | u64 parent = 0; | 6047 | u64 parent = 0; |
@@ -5383,13 +6119,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
5383 | btrfs_header_owner(path->nodes[level + 1])); | 6119 | btrfs_header_owner(path->nodes[level + 1])); |
5384 | } | 6120 | } |
5385 | 6121 | ||
5386 | ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, | 6122 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
5387 | root->root_key.objectid, level, 0); | ||
5388 | BUG_ON(ret); | ||
5389 | out: | 6123 | out: |
5390 | wc->refs[level] = 0; | 6124 | wc->refs[level] = 0; |
5391 | wc->flags[level] = 0; | 6125 | wc->flags[level] = 0; |
5392 | return ret; | 6126 | return 0; |
5393 | } | 6127 | } |
5394 | 6128 | ||
5395 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | 6129 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, |
@@ -5402,10 +6136,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
5402 | int ret; | 6136 | int ret; |
5403 | 6137 | ||
5404 | while (level >= 0) { | 6138 | while (level >= 0) { |
5405 | if (path->slots[level] >= | ||
5406 | btrfs_header_nritems(path->nodes[level])) | ||
5407 | break; | ||
5408 | |||
5409 | ret = walk_down_proc(trans, root, path, wc, lookup_info); | 6139 | ret = walk_down_proc(trans, root, path, wc, lookup_info); |
5410 | if (ret > 0) | 6140 | if (ret > 0) |
5411 | break; | 6141 | break; |
@@ -5413,11 +6143,16 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
5413 | if (level == 0) | 6143 | if (level == 0) |
5414 | break; | 6144 | break; |
5415 | 6145 | ||
6146 | if (path->slots[level] >= | ||
6147 | btrfs_header_nritems(path->nodes[level])) | ||
6148 | break; | ||
6149 | |||
5416 | ret = do_walk_down(trans, root, path, wc, &lookup_info); | 6150 | ret = do_walk_down(trans, root, path, wc, &lookup_info); |
5417 | if (ret > 0) { | 6151 | if (ret > 0) { |
5418 | path->slots[level]++; | 6152 | path->slots[level]++; |
5419 | continue; | 6153 | continue; |
5420 | } | 6154 | } else if (ret < 0) |
6155 | return ret; | ||
5421 | level = wc->level; | 6156 | level = wc->level; |
5422 | } | 6157 | } |
5423 | return 0; | 6158 | return 0; |
@@ -5466,7 +6201,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
5466 | * also make sure backrefs for the shared block and all lower level | 6201 | * also make sure backrefs for the shared block and all lower level |
5467 | * blocks are properly updated. | 6202 | * blocks are properly updated. |
5468 | */ | 6203 | */ |
5469 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | 6204 | int btrfs_drop_snapshot(struct btrfs_root *root, |
6205 | struct btrfs_block_rsv *block_rsv, int update_ref) | ||
5470 | { | 6206 | { |
5471 | struct btrfs_path *path; | 6207 | struct btrfs_path *path; |
5472 | struct btrfs_trans_handle *trans; | 6208 | struct btrfs_trans_handle *trans; |
@@ -5484,7 +6220,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5484 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6220 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
5485 | BUG_ON(!wc); | 6221 | BUG_ON(!wc); |
5486 | 6222 | ||
5487 | trans = btrfs_start_transaction(tree_root, 1); | 6223 | trans = btrfs_start_transaction(tree_root, 0); |
6224 | if (block_rsv) | ||
6225 | trans->block_rsv = block_rsv; | ||
5488 | 6226 | ||
5489 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 6227 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
5490 | level = btrfs_header_level(root->node); | 6228 | level = btrfs_header_level(root->node); |
@@ -5572,22 +6310,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5572 | } | 6310 | } |
5573 | 6311 | ||
5574 | BUG_ON(wc->level == 0); | 6312 | BUG_ON(wc->level == 0); |
5575 | if (trans->transaction->in_commit || | 6313 | if (btrfs_should_end_transaction(trans, tree_root)) { |
5576 | trans->transaction->delayed_refs.flushing) { | ||
5577 | ret = btrfs_update_root(trans, tree_root, | 6314 | ret = btrfs_update_root(trans, tree_root, |
5578 | &root->root_key, | 6315 | &root->root_key, |
5579 | root_item); | 6316 | root_item); |
5580 | BUG_ON(ret); | 6317 | BUG_ON(ret); |
5581 | 6318 | ||
5582 | btrfs_end_transaction(trans, tree_root); | 6319 | btrfs_end_transaction_throttle(trans, tree_root); |
5583 | trans = btrfs_start_transaction(tree_root, 1); | 6320 | trans = btrfs_start_transaction(tree_root, 0); |
5584 | } else { | 6321 | if (block_rsv) |
5585 | unsigned long update; | 6322 | trans->block_rsv = block_rsv; |
5586 | update = trans->delayed_ref_updates; | ||
5587 | trans->delayed_ref_updates = 0; | ||
5588 | if (update) | ||
5589 | btrfs_run_delayed_refs(trans, tree_root, | ||
5590 | update); | ||
5591 | } | 6323 | } |
5592 | } | 6324 | } |
5593 | btrfs_release_path(root, path); | 6325 | btrfs_release_path(root, path); |
@@ -5601,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5601 | NULL, NULL); | 6333 | NULL, NULL); |
5602 | BUG_ON(ret < 0); | 6334 | BUG_ON(ret < 0); |
5603 | if (ret > 0) { | 6335 | if (ret > 0) { |
5604 | ret = btrfs_del_orphan_item(trans, tree_root, | 6336 | /* if we fail to delete the orphan item this time |
5605 | root->root_key.objectid); | 6337 | * around, it'll get picked up the next time. |
5606 | BUG_ON(ret); | 6338 | * |
6339 | * The most common failure here is just -ENOENT. | ||
6340 | */ | ||
6341 | btrfs_del_orphan_item(trans, tree_root, | ||
6342 | root->root_key.objectid); | ||
5607 | } | 6343 | } |
5608 | } | 6344 | } |
5609 | 6345 | ||
@@ -5615,7 +6351,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5615 | kfree(root); | 6351 | kfree(root); |
5616 | } | 6352 | } |
5617 | out: | 6353 | out: |
5618 | btrfs_end_transaction(trans, tree_root); | 6354 | btrfs_end_transaction_throttle(trans, tree_root); |
5619 | kfree(wc); | 6355 | kfree(wc); |
5620 | btrfs_free_path(path); | 6356 | btrfs_free_path(path); |
5621 | return err; | 6357 | return err; |
@@ -6561,6 +7297,7 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root, | |||
6561 | struct btrfs_key key; | 7297 | struct btrfs_key key; |
6562 | struct inode *inode = NULL; | 7298 | struct inode *inode = NULL; |
6563 | struct btrfs_file_extent_item *fi; | 7299 | struct btrfs_file_extent_item *fi; |
7300 | struct extent_state *cached_state = NULL; | ||
6564 | u64 num_bytes; | 7301 | u64 num_bytes; |
6565 | u64 skip_objectid = 0; | 7302 | u64 skip_objectid = 0; |
6566 | u32 nritems; | 7303 | u32 nritems; |
@@ -6589,12 +7326,14 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root, | |||
6589 | } | 7326 | } |
6590 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | 7327 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); |
6591 | 7328 | ||
6592 | lock_extent(&BTRFS_I(inode)->io_tree, key.offset, | 7329 | lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset, |
6593 | key.offset + num_bytes - 1, GFP_NOFS); | 7330 | key.offset + num_bytes - 1, 0, &cached_state, |
7331 | GFP_NOFS); | ||
6594 | btrfs_drop_extent_cache(inode, key.offset, | 7332 | btrfs_drop_extent_cache(inode, key.offset, |
6595 | key.offset + num_bytes - 1, 1); | 7333 | key.offset + num_bytes - 1, 1); |
6596 | unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, | 7334 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset, |
6597 | key.offset + num_bytes - 1, GFP_NOFS); | 7335 | key.offset + num_bytes - 1, &cached_state, |
7336 | GFP_NOFS); | ||
6598 | cond_resched(); | 7337 | cond_resched(); |
6599 | } | 7338 | } |
6600 | iput(inode); | 7339 | iput(inode); |
@@ -7176,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7176 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | 7915 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | |
7177 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 7916 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
7178 | 7917 | ||
7179 | num_devices = root->fs_info->fs_devices->rw_devices; | 7918 | /* |
7919 | * we add in the count of missing devices because we want | ||
7920 | * to make sure that any RAID levels on a degraded FS | ||
7921 | * continue to be honored. | ||
7922 | */ | ||
7923 | num_devices = root->fs_info->fs_devices->rw_devices + | ||
7924 | root->fs_info->fs_devices->missing_devices; | ||
7925 | |||
7180 | if (num_devices == 1) { | 7926 | if (num_devices == 1) { |
7181 | stripped |= BTRFS_BLOCK_GROUP_DUP; | 7927 | stripped |= BTRFS_BLOCK_GROUP_DUP; |
7182 | stripped = flags & ~stripped; | 7928 | stripped = flags & ~stripped; |
@@ -7208,48 +7954,137 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7208 | return flags; | 7954 | return flags; |
7209 | } | 7955 | } |
7210 | 7956 | ||
7211 | static int __alloc_chunk_for_shrink(struct btrfs_root *root, | 7957 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) |
7212 | struct btrfs_block_group_cache *shrink_block_group, | 7958 | { |
7213 | int force) | 7959 | struct btrfs_space_info *sinfo = cache->space_info; |
7960 | u64 num_bytes; | ||
7961 | int ret = -ENOSPC; | ||
7962 | |||
7963 | if (cache->ro) | ||
7964 | return 0; | ||
7965 | |||
7966 | spin_lock(&sinfo->lock); | ||
7967 | spin_lock(&cache->lock); | ||
7968 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
7969 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
7970 | |||
7971 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | ||
7972 | sinfo->bytes_may_use + sinfo->bytes_readonly + | ||
7973 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { | ||
7974 | sinfo->bytes_readonly += num_bytes; | ||
7975 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
7976 | cache->reserved_pinned = 0; | ||
7977 | cache->ro = 1; | ||
7978 | ret = 0; | ||
7979 | } | ||
7980 | |||
7981 | spin_unlock(&cache->lock); | ||
7982 | spin_unlock(&sinfo->lock); | ||
7983 | return ret; | ||
7984 | } | ||
7985 | |||
7986 | int btrfs_set_block_group_ro(struct btrfs_root *root, | ||
7987 | struct btrfs_block_group_cache *cache) | ||
7988 | |||
7214 | { | 7989 | { |
7215 | struct btrfs_trans_handle *trans; | 7990 | struct btrfs_trans_handle *trans; |
7216 | u64 new_alloc_flags; | 7991 | u64 alloc_flags; |
7217 | u64 calc; | 7992 | int ret; |
7218 | 7993 | ||
7219 | spin_lock(&shrink_block_group->lock); | 7994 | BUG_ON(cache->ro); |
7220 | if (btrfs_block_group_used(&shrink_block_group->item) + | ||
7221 | shrink_block_group->reserved > 0) { | ||
7222 | spin_unlock(&shrink_block_group->lock); | ||
7223 | 7995 | ||
7224 | trans = btrfs_start_transaction(root, 1); | 7996 | trans = btrfs_join_transaction(root, 1); |
7225 | spin_lock(&shrink_block_group->lock); | 7997 | BUG_ON(IS_ERR(trans)); |
7226 | 7998 | ||
7227 | new_alloc_flags = update_block_group_flags(root, | 7999 | alloc_flags = update_block_group_flags(root, cache->flags); |
7228 | shrink_block_group->flags); | 8000 | if (alloc_flags != cache->flags) |
7229 | if (new_alloc_flags != shrink_block_group->flags) { | 8001 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); |
7230 | calc = | 8002 | |
7231 | btrfs_block_group_used(&shrink_block_group->item); | 8003 | ret = set_block_group_ro(cache); |
7232 | } else { | 8004 | if (!ret) |
7233 | calc = shrink_block_group->key.offset; | 8005 | goto out; |
8006 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | ||
8007 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
8008 | if (ret < 0) | ||
8009 | goto out; | ||
8010 | ret = set_block_group_ro(cache); | ||
8011 | out: | ||
8012 | btrfs_end_transaction(trans, root); | ||
8013 | return ret; | ||
8014 | } | ||
8015 | |||
8016 | /* | ||
8017 | * helper to account the unused space of all the readonly block group in the | ||
8018 | * list. takes mirrors into account. | ||
8019 | */ | ||
8020 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | ||
8021 | { | ||
8022 | struct btrfs_block_group_cache *block_group; | ||
8023 | u64 free_bytes = 0; | ||
8024 | int factor; | ||
8025 | |||
8026 | list_for_each_entry(block_group, groups_list, list) { | ||
8027 | spin_lock(&block_group->lock); | ||
8028 | |||
8029 | if (!block_group->ro) { | ||
8030 | spin_unlock(&block_group->lock); | ||
8031 | continue; | ||
7234 | } | 8032 | } |
7235 | spin_unlock(&shrink_block_group->lock); | ||
7236 | 8033 | ||
7237 | do_chunk_alloc(trans, root->fs_info->extent_root, | 8034 | if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | |
7238 | calc + 2 * 1024 * 1024, new_alloc_flags, force); | 8035 | BTRFS_BLOCK_GROUP_RAID10 | |
8036 | BTRFS_BLOCK_GROUP_DUP)) | ||
8037 | factor = 2; | ||
8038 | else | ||
8039 | factor = 1; | ||
7239 | 8040 | ||
7240 | btrfs_end_transaction(trans, root); | 8041 | free_bytes += (block_group->key.offset - |
7241 | } else | 8042 | btrfs_block_group_used(&block_group->item)) * |
7242 | spin_unlock(&shrink_block_group->lock); | 8043 | factor; |
7243 | return 0; | 8044 | |
8045 | spin_unlock(&block_group->lock); | ||
8046 | } | ||
8047 | |||
8048 | return free_bytes; | ||
7244 | } | 8049 | } |
7245 | 8050 | ||
8051 | /* | ||
8052 | * helper to account the unused space of all the readonly block group in the | ||
8053 | * space_info. takes mirrors into account. | ||
8054 | */ | ||
8055 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
8056 | { | ||
8057 | int i; | ||
8058 | u64 free_bytes = 0; | ||
8059 | |||
8060 | spin_lock(&sinfo->lock); | ||
8061 | |||
8062 | for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
8063 | if (!list_empty(&sinfo->block_groups[i])) | ||
8064 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
8065 | &sinfo->block_groups[i]); | ||
8066 | |||
8067 | spin_unlock(&sinfo->lock); | ||
7246 | 8068 | ||
7247 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | 8069 | return free_bytes; |
7248 | struct btrfs_block_group_cache *group) | 8070 | } |
7249 | 8071 | ||
8072 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
8073 | struct btrfs_block_group_cache *cache) | ||
7250 | { | 8074 | { |
7251 | __alloc_chunk_for_shrink(root, group, 1); | 8075 | struct btrfs_space_info *sinfo = cache->space_info; |
7252 | set_block_group_readonly(group); | 8076 | u64 num_bytes; |
8077 | |||
8078 | BUG_ON(!cache->ro); | ||
8079 | |||
8080 | spin_lock(&sinfo->lock); | ||
8081 | spin_lock(&cache->lock); | ||
8082 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
8083 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
8084 | sinfo->bytes_readonly -= num_bytes; | ||
8085 | cache->ro = 0; | ||
8086 | spin_unlock(&cache->lock); | ||
8087 | spin_unlock(&sinfo->lock); | ||
7253 | return 0; | 8088 | return 0; |
7254 | } | 8089 | } |
7255 | 8090 | ||
@@ -7314,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7314 | mutex_lock(&root->fs_info->chunk_mutex); | 8149 | mutex_lock(&root->fs_info->chunk_mutex); |
7315 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8150 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
7316 | u64 min_free = btrfs_block_group_used(&block_group->item); | 8151 | u64 min_free = btrfs_block_group_used(&block_group->item); |
7317 | u64 dev_offset, max_avail; | 8152 | u64 dev_offset; |
7318 | 8153 | ||
7319 | /* | 8154 | /* |
7320 | * check to make sure we can actually find a chunk with enough | 8155 | * check to make sure we can actually find a chunk with enough |
@@ -7322,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7322 | */ | 8157 | */ |
7323 | if (device->total_bytes > device->bytes_used + min_free) { | 8158 | if (device->total_bytes > device->bytes_used + min_free) { |
7324 | ret = find_free_dev_extent(NULL, device, min_free, | 8159 | ret = find_free_dev_extent(NULL, device, min_free, |
7325 | &dev_offset, &max_avail); | 8160 | &dev_offset, NULL); |
7326 | if (!ret) | 8161 | if (!ret) |
7327 | break; | 8162 | break; |
7328 | ret = -1; | 8163 | ret = -1; |
@@ -7366,11 +8201,44 @@ static int find_first_block_group(struct btrfs_root *root, | |||
7366 | } | 8201 | } |
7367 | path->slots[0]++; | 8202 | path->slots[0]++; |
7368 | } | 8203 | } |
7369 | ret = -ENOENT; | ||
7370 | out: | 8204 | out: |
7371 | return ret; | 8205 | return ret; |
7372 | } | 8206 | } |
7373 | 8207 | ||
8208 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info) | ||
8209 | { | ||
8210 | struct btrfs_block_group_cache *block_group; | ||
8211 | u64 last = 0; | ||
8212 | |||
8213 | while (1) { | ||
8214 | struct inode *inode; | ||
8215 | |||
8216 | block_group = btrfs_lookup_first_block_group(info, last); | ||
8217 | while (block_group) { | ||
8218 | spin_lock(&block_group->lock); | ||
8219 | if (block_group->iref) | ||
8220 | break; | ||
8221 | spin_unlock(&block_group->lock); | ||
8222 | block_group = next_block_group(info->tree_root, | ||
8223 | block_group); | ||
8224 | } | ||
8225 | if (!block_group) { | ||
8226 | if (last == 0) | ||
8227 | break; | ||
8228 | last = 0; | ||
8229 | continue; | ||
8230 | } | ||
8231 | |||
8232 | inode = block_group->inode; | ||
8233 | block_group->iref = 0; | ||
8234 | block_group->inode = NULL; | ||
8235 | spin_unlock(&block_group->lock); | ||
8236 | iput(inode); | ||
8237 | last = block_group->key.objectid + block_group->key.offset; | ||
8238 | btrfs_put_block_group(block_group); | ||
8239 | } | ||
8240 | } | ||
8241 | |||
7374 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | 8242 | int btrfs_free_block_groups(struct btrfs_fs_info *info) |
7375 | { | 8243 | { |
7376 | struct btrfs_block_group_cache *block_group; | 8244 | struct btrfs_block_group_cache *block_group; |
@@ -7417,17 +8285,33 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7417 | */ | 8285 | */ |
7418 | synchronize_rcu(); | 8286 | synchronize_rcu(); |
7419 | 8287 | ||
8288 | release_global_block_rsv(info); | ||
8289 | |||
7420 | while(!list_empty(&info->space_info)) { | 8290 | while(!list_empty(&info->space_info)) { |
7421 | space_info = list_entry(info->space_info.next, | 8291 | space_info = list_entry(info->space_info.next, |
7422 | struct btrfs_space_info, | 8292 | struct btrfs_space_info, |
7423 | list); | 8293 | list); |
7424 | 8294 | if (space_info->bytes_pinned > 0 || | |
8295 | space_info->bytes_reserved > 0) { | ||
8296 | WARN_ON(1); | ||
8297 | dump_space_info(space_info, 0, 0); | ||
8298 | } | ||
7425 | list_del(&space_info->list); | 8299 | list_del(&space_info->list); |
7426 | kfree(space_info); | 8300 | kfree(space_info); |
7427 | } | 8301 | } |
7428 | return 0; | 8302 | return 0; |
7429 | } | 8303 | } |
7430 | 8304 | ||
8305 | static void __link_block_group(struct btrfs_space_info *space_info, | ||
8306 | struct btrfs_block_group_cache *cache) | ||
8307 | { | ||
8308 | int index = get_block_group_index(cache); | ||
8309 | |||
8310 | down_write(&space_info->groups_sem); | ||
8311 | list_add_tail(&cache->list, &space_info->block_groups[index]); | ||
8312 | up_write(&space_info->groups_sem); | ||
8313 | } | ||
8314 | |||
7431 | int btrfs_read_block_groups(struct btrfs_root *root) | 8315 | int btrfs_read_block_groups(struct btrfs_root *root) |
7432 | { | 8316 | { |
7433 | struct btrfs_path *path; | 8317 | struct btrfs_path *path; |
@@ -7438,6 +8322,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7438 | struct btrfs_key key; | 8322 | struct btrfs_key key; |
7439 | struct btrfs_key found_key; | 8323 | struct btrfs_key found_key; |
7440 | struct extent_buffer *leaf; | 8324 | struct extent_buffer *leaf; |
8325 | int need_clear = 0; | ||
8326 | u64 cache_gen; | ||
7441 | 8327 | ||
7442 | root = info->extent_root; | 8328 | root = info->extent_root; |
7443 | key.objectid = 0; | 8329 | key.objectid = 0; |
@@ -7447,21 +8333,27 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7447 | if (!path) | 8333 | if (!path) |
7448 | return -ENOMEM; | 8334 | return -ENOMEM; |
7449 | 8335 | ||
8336 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | ||
8337 | if (cache_gen != 0 && | ||
8338 | btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) | ||
8339 | need_clear = 1; | ||
8340 | if (btrfs_test_opt(root, CLEAR_CACHE)) | ||
8341 | need_clear = 1; | ||
8342 | if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) | ||
8343 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
8344 | |||
7450 | while (1) { | 8345 | while (1) { |
7451 | ret = find_first_block_group(root, path, &key); | 8346 | ret = find_first_block_group(root, path, &key); |
7452 | if (ret > 0) { | 8347 | if (ret > 0) |
7453 | ret = 0; | 8348 | break; |
7454 | goto error; | ||
7455 | } | ||
7456 | if (ret != 0) | 8349 | if (ret != 0) |
7457 | goto error; | 8350 | goto error; |
7458 | |||
7459 | leaf = path->nodes[0]; | 8351 | leaf = path->nodes[0]; |
7460 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 8352 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
7461 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 8353 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
7462 | if (!cache) { | 8354 | if (!cache) { |
7463 | ret = -ENOMEM; | 8355 | ret = -ENOMEM; |
7464 | break; | 8356 | goto error; |
7465 | } | 8357 | } |
7466 | 8358 | ||
7467 | atomic_set(&cache->count, 1); | 8359 | atomic_set(&cache->count, 1); |
@@ -7471,6 +8363,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7471 | INIT_LIST_HEAD(&cache->list); | 8363 | INIT_LIST_HEAD(&cache->list); |
7472 | INIT_LIST_HEAD(&cache->cluster_list); | 8364 | INIT_LIST_HEAD(&cache->cluster_list); |
7473 | 8365 | ||
8366 | if (need_clear) | ||
8367 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
8368 | |||
7474 | /* | 8369 | /* |
7475 | * we only want to have 32k of ram per block group for keeping | 8370 | * we only want to have 32k of ram per block group for keeping |
7476 | * track of free space, and if we pass 1/2 of that we want to | 8371 | * track of free space, and if we pass 1/2 of that we want to |
@@ -7518,20 +8413,36 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7518 | BUG_ON(ret); | 8413 | BUG_ON(ret); |
7519 | cache->space_info = space_info; | 8414 | cache->space_info = space_info; |
7520 | spin_lock(&cache->space_info->lock); | 8415 | spin_lock(&cache->space_info->lock); |
7521 | cache->space_info->bytes_super += cache->bytes_super; | 8416 | cache->space_info->bytes_readonly += cache->bytes_super; |
7522 | spin_unlock(&cache->space_info->lock); | 8417 | spin_unlock(&cache->space_info->lock); |
7523 | 8418 | ||
7524 | down_write(&space_info->groups_sem); | 8419 | __link_block_group(space_info, cache); |
7525 | list_add_tail(&cache->list, &space_info->block_groups); | ||
7526 | up_write(&space_info->groups_sem); | ||
7527 | 8420 | ||
7528 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 8421 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7529 | BUG_ON(ret); | 8422 | BUG_ON(ret); |
7530 | 8423 | ||
7531 | set_avail_alloc_bits(root->fs_info, cache->flags); | 8424 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7532 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 8425 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7533 | set_block_group_readonly(cache); | 8426 | set_block_group_ro(cache); |
7534 | } | 8427 | } |
8428 | |||
8429 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | ||
8430 | if (!(get_alloc_profile(root, space_info->flags) & | ||
8431 | (BTRFS_BLOCK_GROUP_RAID10 | | ||
8432 | BTRFS_BLOCK_GROUP_RAID1 | | ||
8433 | BTRFS_BLOCK_GROUP_DUP))) | ||
8434 | continue; | ||
8435 | /* | ||
8436 | * avoid allocating from un-mirrored block group if there are | ||
8437 | * mirrored block groups. | ||
8438 | */ | ||
8439 | list_for_each_entry(cache, &space_info->block_groups[3], list) | ||
8440 | set_block_group_ro(cache); | ||
8441 | list_for_each_entry(cache, &space_info->block_groups[4], list) | ||
8442 | set_block_group_ro(cache); | ||
8443 | } | ||
8444 | |||
8445 | init_global_block_rsv(info); | ||
7535 | ret = 0; | 8446 | ret = 0; |
7536 | error: | 8447 | error: |
7537 | btrfs_free_path(path); | 8448 | btrfs_free_path(path); |
@@ -7559,6 +8470,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7559 | cache->key.offset = size; | 8470 | cache->key.offset = size; |
7560 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 8471 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
7561 | cache->sectorsize = root->sectorsize; | 8472 | cache->sectorsize = root->sectorsize; |
8473 | cache->fs_info = root->fs_info; | ||
7562 | 8474 | ||
7563 | /* | 8475 | /* |
7564 | * we only want to have 32k of ram per block group for keeping track | 8476 | * we only want to have 32k of ram per block group for keeping track |
@@ -7592,12 +8504,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7592 | BUG_ON(ret); | 8504 | BUG_ON(ret); |
7593 | 8505 | ||
7594 | spin_lock(&cache->space_info->lock); | 8506 | spin_lock(&cache->space_info->lock); |
7595 | cache->space_info->bytes_super += cache->bytes_super; | 8507 | cache->space_info->bytes_readonly += cache->bytes_super; |
7596 | spin_unlock(&cache->space_info->lock); | 8508 | spin_unlock(&cache->space_info->lock); |
7597 | 8509 | ||
7598 | down_write(&cache->space_info->groups_sem); | 8510 | __link_block_group(cache->space_info, cache); |
7599 | list_add_tail(&cache->list, &cache->space_info->block_groups); | ||
7600 | up_write(&cache->space_info->groups_sem); | ||
7601 | 8511 | ||
7602 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 8512 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7603 | BUG_ON(ret); | 8513 | BUG_ON(ret); |
@@ -7617,8 +8527,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7617 | struct btrfs_path *path; | 8527 | struct btrfs_path *path; |
7618 | struct btrfs_block_group_cache *block_group; | 8528 | struct btrfs_block_group_cache *block_group; |
7619 | struct btrfs_free_cluster *cluster; | 8529 | struct btrfs_free_cluster *cluster; |
8530 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
7620 | struct btrfs_key key; | 8531 | struct btrfs_key key; |
8532 | struct inode *inode; | ||
7621 | int ret; | 8533 | int ret; |
8534 | int factor; | ||
7622 | 8535 | ||
7623 | root = root->fs_info->extent_root; | 8536 | root = root->fs_info->extent_root; |
7624 | 8537 | ||
@@ -7627,6 +8540,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7627 | BUG_ON(!block_group->ro); | 8540 | BUG_ON(!block_group->ro); |
7628 | 8541 | ||
7629 | memcpy(&key, &block_group->key, sizeof(key)); | 8542 | memcpy(&key, &block_group->key, sizeof(key)); |
8543 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
8544 | BTRFS_BLOCK_GROUP_RAID1 | | ||
8545 | BTRFS_BLOCK_GROUP_RAID10)) | ||
8546 | factor = 2; | ||
8547 | else | ||
8548 | factor = 1; | ||
7630 | 8549 | ||
7631 | /* make sure this block group isn't part of an allocation cluster */ | 8550 | /* make sure this block group isn't part of an allocation cluster */ |
7632 | cluster = &root->fs_info->data_alloc_cluster; | 8551 | cluster = &root->fs_info->data_alloc_cluster; |
@@ -7646,6 +8565,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7646 | path = btrfs_alloc_path(); | 8565 | path = btrfs_alloc_path(); |
7647 | BUG_ON(!path); | 8566 | BUG_ON(!path); |
7648 | 8567 | ||
8568 | inode = lookup_free_space_inode(root, block_group, path); | ||
8569 | if (!IS_ERR(inode)) { | ||
8570 | btrfs_orphan_add(trans, inode); | ||
8571 | clear_nlink(inode); | ||
8572 | /* One for the block groups ref */ | ||
8573 | spin_lock(&block_group->lock); | ||
8574 | if (block_group->iref) { | ||
8575 | block_group->iref = 0; | ||
8576 | block_group->inode = NULL; | ||
8577 | spin_unlock(&block_group->lock); | ||
8578 | iput(inode); | ||
8579 | } else { | ||
8580 | spin_unlock(&block_group->lock); | ||
8581 | } | ||
8582 | /* One for our lookup ref */ | ||
8583 | iput(inode); | ||
8584 | } | ||
8585 | |||
8586 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
8587 | key.offset = block_group->key.objectid; | ||
8588 | key.type = 0; | ||
8589 | |||
8590 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | ||
8591 | if (ret < 0) | ||
8592 | goto out; | ||
8593 | if (ret > 0) | ||
8594 | btrfs_release_path(tree_root, path); | ||
8595 | if (ret == 0) { | ||
8596 | ret = btrfs_del_item(trans, tree_root, path); | ||
8597 | if (ret) | ||
8598 | goto out; | ||
8599 | btrfs_release_path(tree_root, path); | ||
8600 | } | ||
8601 | |||
7649 | spin_lock(&root->fs_info->block_group_cache_lock); | 8602 | spin_lock(&root->fs_info->block_group_cache_lock); |
7650 | rb_erase(&block_group->cache_node, | 8603 | rb_erase(&block_group->cache_node, |
7651 | &root->fs_info->block_group_cache_tree); | 8604 | &root->fs_info->block_group_cache_tree); |
@@ -7667,8 +8620,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7667 | spin_lock(&block_group->space_info->lock); | 8620 | spin_lock(&block_group->space_info->lock); |
7668 | block_group->space_info->total_bytes -= block_group->key.offset; | 8621 | block_group->space_info->total_bytes -= block_group->key.offset; |
7669 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 8622 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
8623 | block_group->space_info->disk_total -= block_group->key.offset * factor; | ||
7670 | spin_unlock(&block_group->space_info->lock); | 8624 | spin_unlock(&block_group->space_info->lock); |
7671 | 8625 | ||
8626 | memcpy(&key, &block_group->key, sizeof(key)); | ||
8627 | |||
7672 | btrfs_clear_space_info_full(root->fs_info); | 8628 | btrfs_clear_space_info_full(root->fs_info); |
7673 | 8629 | ||
7674 | btrfs_put_block_group(block_group); | 8630 | btrfs_put_block_group(block_group); |
@@ -7685,3 +8641,14 @@ out: | |||
7685 | btrfs_free_path(path); | 8641 | btrfs_free_path(path); |
7686 | return ret; | 8642 | return ret; |
7687 | } | 8643 | } |
8644 | |||
8645 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | ||
8646 | { | ||
8647 | return unpin_extent_range(root, start, end); | ||
8648 | } | ||
8649 | |||
8650 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
8651 | u64 num_bytes) | ||
8652 | { | ||
8653 | return btrfs_discard_extent(root, bytenr, num_bytes); | ||
8654 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96577e8bf9fd..2e993cf1766e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2,7 +2,6 @@ | |||
2 | #include <linux/slab.h> | 2 | #include <linux/slab.h> |
3 | #include <linux/bio.h> | 3 | #include <linux/bio.h> |
4 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
5 | #include <linux/gfp.h> | ||
6 | #include <linux/pagemap.h> | 5 | #include <linux/pagemap.h> |
7 | #include <linux/page-flags.h> | 6 | #include <linux/page-flags.h> |
8 | #include <linux/module.h> | 7 | #include <linux/module.h> |
@@ -104,8 +103,8 @@ void extent_io_exit(void) | |||
104 | void extent_io_tree_init(struct extent_io_tree *tree, | 103 | void extent_io_tree_init(struct extent_io_tree *tree, |
105 | struct address_space *mapping, gfp_t mask) | 104 | struct address_space *mapping, gfp_t mask) |
106 | { | 105 | { |
107 | tree->state.rb_node = NULL; | 106 | tree->state = RB_ROOT; |
108 | tree->buffer.rb_node = NULL; | 107 | INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); |
109 | tree->ops = NULL; | 108 | tree->ops = NULL; |
110 | tree->dirty_bytes = 0; | 109 | tree->dirty_bytes = 0; |
111 | spin_lock_init(&tree->lock); | 110 | spin_lock_init(&tree->lock); |
@@ -136,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
136 | return state; | 135 | return state; |
137 | } | 136 | } |
138 | 137 | ||
139 | static void free_extent_state(struct extent_state *state) | 138 | void free_extent_state(struct extent_state *state) |
140 | { | 139 | { |
141 | if (!state) | 140 | if (!state) |
142 | return; | 141 | return; |
@@ -236,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, | |||
236 | return ret; | 235 | return ret; |
237 | } | 236 | } |
238 | 237 | ||
239 | static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, | ||
240 | u64 offset, struct rb_node *node) | ||
241 | { | ||
242 | struct rb_root *root = &tree->buffer; | ||
243 | struct rb_node **p = &root->rb_node; | ||
244 | struct rb_node *parent = NULL; | ||
245 | struct extent_buffer *eb; | ||
246 | |||
247 | while (*p) { | ||
248 | parent = *p; | ||
249 | eb = rb_entry(parent, struct extent_buffer, rb_node); | ||
250 | |||
251 | if (offset < eb->start) | ||
252 | p = &(*p)->rb_left; | ||
253 | else if (offset > eb->start) | ||
254 | p = &(*p)->rb_right; | ||
255 | else | ||
256 | return eb; | ||
257 | } | ||
258 | |||
259 | rb_link_node(node, parent, p); | ||
260 | rb_insert_color(node, root); | ||
261 | return NULL; | ||
262 | } | ||
263 | |||
264 | static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | ||
265 | u64 offset) | ||
266 | { | ||
267 | struct rb_root *root = &tree->buffer; | ||
268 | struct rb_node *n = root->rb_node; | ||
269 | struct extent_buffer *eb; | ||
270 | |||
271 | while (n) { | ||
272 | eb = rb_entry(n, struct extent_buffer, rb_node); | ||
273 | if (offset < eb->start) | ||
274 | n = n->rb_left; | ||
275 | else if (offset > eb->start) | ||
276 | n = n->rb_right; | ||
277 | else | ||
278 | return eb; | ||
279 | } | ||
280 | return NULL; | ||
281 | } | ||
282 | |||
283 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | 238 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, |
284 | struct extent_state *other) | 239 | struct extent_state *other) |
285 | { | 240 | { |
@@ -336,21 +291,18 @@ static int merge_state(struct extent_io_tree *tree, | |||
336 | } | 291 | } |
337 | 292 | ||
338 | static int set_state_cb(struct extent_io_tree *tree, | 293 | static int set_state_cb(struct extent_io_tree *tree, |
339 | struct extent_state *state, | 294 | struct extent_state *state, int *bits) |
340 | unsigned long bits) | ||
341 | { | 295 | { |
342 | if (tree->ops && tree->ops->set_bit_hook) { | 296 | if (tree->ops && tree->ops->set_bit_hook) { |
343 | return tree->ops->set_bit_hook(tree->mapping->host, | 297 | return tree->ops->set_bit_hook(tree->mapping->host, |
344 | state->start, state->end, | 298 | state, bits); |
345 | state->state, bits); | ||
346 | } | 299 | } |
347 | 300 | ||
348 | return 0; | 301 | return 0; |
349 | } | 302 | } |
350 | 303 | ||
351 | static void clear_state_cb(struct extent_io_tree *tree, | 304 | static void clear_state_cb(struct extent_io_tree *tree, |
352 | struct extent_state *state, | 305 | struct extent_state *state, int *bits) |
353 | unsigned long bits) | ||
354 | { | 306 | { |
355 | if (tree->ops && tree->ops->clear_bit_hook) | 307 | if (tree->ops && tree->ops->clear_bit_hook) |
356 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 308 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
@@ -368,9 +320,10 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
368 | */ | 320 | */ |
369 | static int insert_state(struct extent_io_tree *tree, | 321 | static int insert_state(struct extent_io_tree *tree, |
370 | struct extent_state *state, u64 start, u64 end, | 322 | struct extent_state *state, u64 start, u64 end, |
371 | int bits) | 323 | int *bits) |
372 | { | 324 | { |
373 | struct rb_node *node; | 325 | struct rb_node *node; |
326 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
374 | int ret; | 327 | int ret; |
375 | 328 | ||
376 | if (end < start) { | 329 | if (end < start) { |
@@ -385,9 +338,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
385 | if (ret) | 338 | if (ret) |
386 | return ret; | 339 | return ret; |
387 | 340 | ||
388 | if (bits & EXTENT_DIRTY) | 341 | if (bits_to_set & EXTENT_DIRTY) |
389 | tree->dirty_bytes += end - start + 1; | 342 | tree->dirty_bytes += end - start + 1; |
390 | state->state |= bits; | 343 | state->state |= bits_to_set; |
391 | node = tree_insert(&tree->state, end, &state->rb_node); | 344 | node = tree_insert(&tree->state, end, &state->rb_node); |
392 | if (node) { | 345 | if (node) { |
393 | struct extent_state *found; | 346 | struct extent_state *found; |
@@ -457,13 +410,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
457 | * struct is freed and removed from the tree | 410 | * struct is freed and removed from the tree |
458 | */ | 411 | */ |
459 | static int clear_state_bit(struct extent_io_tree *tree, | 412 | static int clear_state_bit(struct extent_io_tree *tree, |
460 | struct extent_state *state, int bits, int wake, | 413 | struct extent_state *state, |
461 | int delete) | 414 | int *bits, int wake) |
462 | { | 415 | { |
463 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; | 416 | int bits_to_clear = *bits & ~EXTENT_CTLBITS; |
464 | int ret = state->state & bits_to_clear; | 417 | int ret = state->state & bits_to_clear; |
465 | 418 | ||
466 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 419 | if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
467 | u64 range = state->end - state->start + 1; | 420 | u64 range = state->end - state->start + 1; |
468 | WARN_ON(range > tree->dirty_bytes); | 421 | WARN_ON(range > tree->dirty_bytes); |
469 | tree->dirty_bytes -= range; | 422 | tree->dirty_bytes -= range; |
@@ -472,9 +425,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
472 | state->state &= ~bits_to_clear; | 425 | state->state &= ~bits_to_clear; |
473 | if (wake) | 426 | if (wake) |
474 | wake_up(&state->wq); | 427 | wake_up(&state->wq); |
475 | if (delete || state->state == 0) { | 428 | if (state->state == 0) { |
476 | if (state->tree) { | 429 | if (state->tree) { |
477 | clear_state_cb(tree, state, state->state); | ||
478 | rb_erase(&state->rb_node, &tree->state); | 430 | rb_erase(&state->rb_node, &tree->state); |
479 | state->tree = NULL; | 431 | state->tree = NULL; |
480 | free_extent_state(state); | 432 | free_extent_state(state); |
@@ -513,7 +465,14 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
513 | u64 last_end; | 465 | u64 last_end; |
514 | int err; | 466 | int err; |
515 | int set = 0; | 467 | int set = 0; |
468 | int clear = 0; | ||
469 | |||
470 | if (delete) | ||
471 | bits |= ~EXTENT_CTLBITS; | ||
472 | bits |= EXTENT_FIRST_DELALLOC; | ||
516 | 473 | ||
474 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | ||
475 | clear = 1; | ||
517 | again: | 476 | again: |
518 | if (!prealloc && (mask & __GFP_WAIT)) { | 477 | if (!prealloc && (mask & __GFP_WAIT)) { |
519 | prealloc = alloc_extent_state(mask); | 478 | prealloc = alloc_extent_state(mask); |
@@ -524,14 +483,20 @@ again: | |||
524 | spin_lock(&tree->lock); | 483 | spin_lock(&tree->lock); |
525 | if (cached_state) { | 484 | if (cached_state) { |
526 | cached = *cached_state; | 485 | cached = *cached_state; |
527 | *cached_state = NULL; | 486 | |
528 | cached_state = NULL; | 487 | if (clear) { |
488 | *cached_state = NULL; | ||
489 | cached_state = NULL; | ||
490 | } | ||
491 | |||
529 | if (cached && cached->tree && cached->start == start) { | 492 | if (cached && cached->tree && cached->start == start) { |
530 | atomic_dec(&cached->refs); | 493 | if (clear) |
494 | atomic_dec(&cached->refs); | ||
531 | state = cached; | 495 | state = cached; |
532 | goto hit_next; | 496 | goto hit_next; |
533 | } | 497 | } |
534 | free_extent_state(cached); | 498 | if (clear) |
499 | free_extent_state(cached); | ||
535 | } | 500 | } |
536 | /* | 501 | /* |
537 | * this search will find the extents that end after | 502 | * this search will find the extents that end after |
@@ -572,8 +537,7 @@ hit_next: | |||
572 | if (err) | 537 | if (err) |
573 | goto out; | 538 | goto out; |
574 | if (state->end <= end) { | 539 | if (state->end <= end) { |
575 | set |= clear_state_bit(tree, state, bits, wake, | 540 | set |= clear_state_bit(tree, state, &bits, wake); |
576 | delete); | ||
577 | if (last_end == (u64)-1) | 541 | if (last_end == (u64)-1) |
578 | goto out; | 542 | goto out; |
579 | start = last_end + 1; | 543 | start = last_end + 1; |
@@ -594,7 +558,7 @@ hit_next: | |||
594 | if (wake) | 558 | if (wake) |
595 | wake_up(&state->wq); | 559 | wake_up(&state->wq); |
596 | 560 | ||
597 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); | 561 | set |= clear_state_bit(tree, prealloc, &bits, wake); |
598 | 562 | ||
599 | prealloc = NULL; | 563 | prealloc = NULL; |
600 | goto out; | 564 | goto out; |
@@ -605,7 +569,7 @@ hit_next: | |||
605 | else | 569 | else |
606 | next_node = NULL; | 570 | next_node = NULL; |
607 | 571 | ||
608 | set |= clear_state_bit(tree, state, bits, wake, delete); | 572 | set |= clear_state_bit(tree, state, &bits, wake); |
609 | if (last_end == (u64)-1) | 573 | if (last_end == (u64)-1) |
610 | goto out; | 574 | goto out; |
611 | start = last_end + 1; | 575 | start = last_end + 1; |
@@ -698,19 +662,19 @@ out: | |||
698 | 662 | ||
699 | static int set_state_bits(struct extent_io_tree *tree, | 663 | static int set_state_bits(struct extent_io_tree *tree, |
700 | struct extent_state *state, | 664 | struct extent_state *state, |
701 | int bits) | 665 | int *bits) |
702 | { | 666 | { |
703 | int ret; | 667 | int ret; |
668 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
704 | 669 | ||
705 | ret = set_state_cb(tree, state, bits); | 670 | ret = set_state_cb(tree, state, bits); |
706 | if (ret) | 671 | if (ret) |
707 | return ret; | 672 | return ret; |
708 | 673 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | |
709 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | ||
710 | u64 range = state->end - state->start + 1; | 674 | u64 range = state->end - state->start + 1; |
711 | tree->dirty_bytes += range; | 675 | tree->dirty_bytes += range; |
712 | } | 676 | } |
713 | state->state |= bits; | 677 | state->state |= bits_to_set; |
714 | 678 | ||
715 | return 0; | 679 | return 0; |
716 | } | 680 | } |
@@ -737,10 +701,9 @@ static void cache_state(struct extent_state *state, | |||
737 | * [start, end] is inclusive This takes the tree lock. | 701 | * [start, end] is inclusive This takes the tree lock. |
738 | */ | 702 | */ |
739 | 703 | ||
740 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 704 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
741 | int bits, int exclusive_bits, u64 *failed_start, | 705 | int bits, int exclusive_bits, u64 *failed_start, |
742 | struct extent_state **cached_state, | 706 | struct extent_state **cached_state, gfp_t mask) |
743 | gfp_t mask) | ||
744 | { | 707 | { |
745 | struct extent_state *state; | 708 | struct extent_state *state; |
746 | struct extent_state *prealloc = NULL; | 709 | struct extent_state *prealloc = NULL; |
@@ -749,6 +712,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
749 | u64 last_start; | 712 | u64 last_start; |
750 | u64 last_end; | 713 | u64 last_end; |
751 | 714 | ||
715 | bits |= EXTENT_FIRST_DELALLOC; | ||
752 | again: | 716 | again: |
753 | if (!prealloc && (mask & __GFP_WAIT)) { | 717 | if (!prealloc && (mask & __GFP_WAIT)) { |
754 | prealloc = alloc_extent_state(mask); | 718 | prealloc = alloc_extent_state(mask); |
@@ -770,7 +734,7 @@ again: | |||
770 | */ | 734 | */ |
771 | node = tree_search(tree, start); | 735 | node = tree_search(tree, start); |
772 | if (!node) { | 736 | if (!node) { |
773 | err = insert_state(tree, prealloc, start, end, bits); | 737 | err = insert_state(tree, prealloc, start, end, &bits); |
774 | prealloc = NULL; | 738 | prealloc = NULL; |
775 | BUG_ON(err == -EEXIST); | 739 | BUG_ON(err == -EEXIST); |
776 | goto out; | 740 | goto out; |
@@ -794,7 +758,7 @@ hit_next: | |||
794 | goto out; | 758 | goto out; |
795 | } | 759 | } |
796 | 760 | ||
797 | err = set_state_bits(tree, state, bits); | 761 | err = set_state_bits(tree, state, &bits); |
798 | if (err) | 762 | if (err) |
799 | goto out; | 763 | goto out; |
800 | 764 | ||
@@ -844,7 +808,7 @@ hit_next: | |||
844 | if (err) | 808 | if (err) |
845 | goto out; | 809 | goto out; |
846 | if (state->end <= end) { | 810 | if (state->end <= end) { |
847 | err = set_state_bits(tree, state, bits); | 811 | err = set_state_bits(tree, state, &bits); |
848 | if (err) | 812 | if (err) |
849 | goto out; | 813 | goto out; |
850 | cache_state(state, cached_state); | 814 | cache_state(state, cached_state); |
@@ -869,7 +833,7 @@ hit_next: | |||
869 | else | 833 | else |
870 | this_end = last_start - 1; | 834 | this_end = last_start - 1; |
871 | err = insert_state(tree, prealloc, start, this_end, | 835 | err = insert_state(tree, prealloc, start, this_end, |
872 | bits); | 836 | &bits); |
873 | BUG_ON(err == -EEXIST); | 837 | BUG_ON(err == -EEXIST); |
874 | if (err) { | 838 | if (err) { |
875 | prealloc = NULL; | 839 | prealloc = NULL; |
@@ -895,7 +859,7 @@ hit_next: | |||
895 | err = split_state(tree, state, prealloc, end + 1); | 859 | err = split_state(tree, state, prealloc, end + 1); |
896 | BUG_ON(err == -EEXIST); | 860 | BUG_ON(err == -EEXIST); |
897 | 861 | ||
898 | err = set_state_bits(tree, prealloc, bits); | 862 | err = set_state_bits(tree, prealloc, &bits); |
899 | if (err) { | 863 | if (err) { |
900 | prealloc = NULL; | 864 | prealloc = NULL; |
901 | goto out; | 865 | goto out; |
@@ -946,11 +910,11 @@ int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | |||
946 | } | 910 | } |
947 | 911 | ||
948 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 912 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
949 | gfp_t mask) | 913 | struct extent_state **cached_state, gfp_t mask) |
950 | { | 914 | { |
951 | return set_extent_bit(tree, start, end, | 915 | return set_extent_bit(tree, start, end, |
952 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, | 916 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, |
953 | 0, NULL, NULL, mask); | 917 | 0, NULL, cached_state, mask); |
954 | } | 918 | } |
955 | 919 | ||
956 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 920 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -958,8 +922,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
958 | { | 922 | { |
959 | return clear_extent_bit(tree, start, end, | 923 | return clear_extent_bit(tree, start, end, |
960 | EXTENT_DIRTY | EXTENT_DELALLOC | | 924 | EXTENT_DIRTY | EXTENT_DELALLOC | |
961 | EXTENT_DO_ACCOUNTING, 0, 0, | 925 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); |
962 | NULL, mask); | ||
963 | } | 926 | } |
964 | 927 | ||
965 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 928 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -984,10 +947,11 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | |||
984 | } | 947 | } |
985 | 948 | ||
986 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 949 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
987 | u64 end, gfp_t mask) | 950 | u64 end, struct extent_state **cached_state, |
951 | gfp_t mask) | ||
988 | { | 952 | { |
989 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, | 953 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
990 | NULL, mask); | 954 | cached_state, mask); |
991 | } | 955 | } |
992 | 956 | ||
993 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 957 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
@@ -1171,7 +1135,8 @@ out: | |||
1171 | * 1 is returned if we find something, 0 if nothing was in the tree | 1135 | * 1 is returned if we find something, 0 if nothing was in the tree |
1172 | */ | 1136 | */ |
1173 | static noinline u64 find_delalloc_range(struct extent_io_tree *tree, | 1137 | static noinline u64 find_delalloc_range(struct extent_io_tree *tree, |
1174 | u64 *start, u64 *end, u64 max_bytes) | 1138 | u64 *start, u64 *end, u64 max_bytes, |
1139 | struct extent_state **cached_state) | ||
1175 | { | 1140 | { |
1176 | struct rb_node *node; | 1141 | struct rb_node *node; |
1177 | struct extent_state *state; | 1142 | struct extent_state *state; |
@@ -1203,8 +1168,11 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree, | |||
1203 | *end = state->end; | 1168 | *end = state->end; |
1204 | goto out; | 1169 | goto out; |
1205 | } | 1170 | } |
1206 | if (!found) | 1171 | if (!found) { |
1207 | *start = state->start; | 1172 | *start = state->start; |
1173 | *cached_state = state; | ||
1174 | atomic_inc(&state->refs); | ||
1175 | } | ||
1208 | found++; | 1176 | found++; |
1209 | *end = state->end; | 1177 | *end = state->end; |
1210 | cur_start = state->end + 1; | 1178 | cur_start = state->end + 1; |
@@ -1336,10 +1304,11 @@ again: | |||
1336 | delalloc_start = *start; | 1304 | delalloc_start = *start; |
1337 | delalloc_end = 0; | 1305 | delalloc_end = 0; |
1338 | found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, | 1306 | found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, |
1339 | max_bytes); | 1307 | max_bytes, &cached_state); |
1340 | if (!found || delalloc_end <= *start) { | 1308 | if (!found || delalloc_end <= *start) { |
1341 | *start = delalloc_start; | 1309 | *start = delalloc_start; |
1342 | *end = delalloc_end; | 1310 | *end = delalloc_end; |
1311 | free_extent_state(cached_state); | ||
1343 | return found; | 1312 | return found; |
1344 | } | 1313 | } |
1345 | 1314 | ||
@@ -1421,9 +1390,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1421 | if (op & EXTENT_CLEAR_DELALLOC) | 1390 | if (op & EXTENT_CLEAR_DELALLOC) |
1422 | clear_bits |= EXTENT_DELALLOC; | 1391 | clear_bits |= EXTENT_DELALLOC; |
1423 | 1392 | ||
1424 | if (op & EXTENT_CLEAR_ACCOUNTING) | ||
1425 | clear_bits |= EXTENT_DO_ACCOUNTING; | ||
1426 | |||
1427 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | 1393 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); |
1428 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 1394 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
1429 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | 1395 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | |
@@ -1722,7 +1688,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
1722 | } | 1688 | } |
1723 | 1689 | ||
1724 | if (!uptodate) { | 1690 | if (!uptodate) { |
1725 | clear_extent_uptodate(tree, start, end, GFP_NOFS); | 1691 | clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); |
1726 | ClearPageUptodate(page); | 1692 | ClearPageUptodate(page); |
1727 | SetPageError(page); | 1693 | SetPageError(page); |
1728 | } | 1694 | } |
@@ -1750,7 +1716,8 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
1750 | static void end_bio_extent_readpage(struct bio *bio, int err) | 1716 | static void end_bio_extent_readpage(struct bio *bio, int err) |
1751 | { | 1717 | { |
1752 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 1718 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
1753 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 1719 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; |
1720 | struct bio_vec *bvec = bio->bi_io_vec; | ||
1754 | struct extent_io_tree *tree; | 1721 | struct extent_io_tree *tree; |
1755 | u64 start; | 1722 | u64 start; |
1756 | u64 end; | 1723 | u64 end; |
@@ -1773,7 +1740,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1773 | else | 1740 | else |
1774 | whole_page = 0; | 1741 | whole_page = 0; |
1775 | 1742 | ||
1776 | if (--bvec >= bio->bi_io_vec) | 1743 | if (++bvec <= bvec_end) |
1777 | prefetchw(&bvec->bv_page->flags); | 1744 | prefetchw(&bvec->bv_page->flags); |
1778 | 1745 | ||
1779 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 1746 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
@@ -1818,7 +1785,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1818 | } | 1785 | } |
1819 | check_page_locked(tree, page); | 1786 | check_page_locked(tree, page); |
1820 | } | 1787 | } |
1821 | } while (bvec >= bio->bi_io_vec); | 1788 | } while (bvec <= bvec_end); |
1822 | 1789 | ||
1823 | bio_put(bio); | 1790 | bio_put(bio); |
1824 | } | 1791 | } |
@@ -1861,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) | |||
1861 | bio_put(bio); | 1828 | bio_put(bio); |
1862 | } | 1829 | } |
1863 | 1830 | ||
1864 | static struct bio * | 1831 | struct bio * |
1865 | extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 1832 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
1866 | gfp_t gfp_flags) | 1833 | gfp_t gfp_flags) |
1867 | { | 1834 | { |
1868 | struct bio *bio; | 1835 | struct bio *bio; |
1869 | 1836 | ||
@@ -1890,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1890 | struct page *page = bvec->bv_page; | 1857 | struct page *page = bvec->bv_page; |
1891 | struct extent_io_tree *tree = bio->bi_private; | 1858 | struct extent_io_tree *tree = bio->bi_private; |
1892 | u64 start; | 1859 | u64 start; |
1893 | u64 end; | ||
1894 | 1860 | ||
1895 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | 1861 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; |
1896 | end = start + bvec->bv_len - 1; | ||
1897 | 1862 | ||
1898 | bio->bi_private = NULL; | 1863 | bio->bi_private = NULL; |
1899 | 1864 | ||
@@ -1901,7 +1866,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1901 | 1866 | ||
1902 | if (tree->ops && tree->ops->submit_bio_hook) | 1867 | if (tree->ops && tree->ops->submit_bio_hook) |
1903 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1868 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1904 | mirror_num, bio_flags); | 1869 | mirror_num, bio_flags, start); |
1905 | else | 1870 | else |
1906 | submit_bio(rw, bio); | 1871 | submit_bio(rw, bio); |
1907 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1872 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
@@ -1954,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1954 | else | 1919 | else |
1955 | nr = bio_get_nr_vecs(bdev); | 1920 | nr = bio_get_nr_vecs(bdev); |
1956 | 1921 | ||
1957 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1922 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1958 | 1923 | ||
1959 | bio_add_page(bio, page, page_size, offset); | 1924 | bio_add_page(bio, page, page_size, offset); |
1960 | bio->bi_end_io = end_io_func; | 1925 | bio->bi_end_io = end_io_func; |
@@ -2005,6 +1970,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2005 | sector_t sector; | 1970 | sector_t sector; |
2006 | struct extent_map *em; | 1971 | struct extent_map *em; |
2007 | struct block_device *bdev; | 1972 | struct block_device *bdev; |
1973 | struct btrfs_ordered_extent *ordered; | ||
2008 | int ret; | 1974 | int ret; |
2009 | int nr = 0; | 1975 | int nr = 0; |
2010 | size_t page_offset = 0; | 1976 | size_t page_offset = 0; |
@@ -2016,7 +1982,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2016 | set_page_extent_mapped(page); | 1982 | set_page_extent_mapped(page); |
2017 | 1983 | ||
2018 | end = page_end; | 1984 | end = page_end; |
2019 | lock_extent(tree, start, end, GFP_NOFS); | 1985 | while (1) { |
1986 | lock_extent(tree, start, end, GFP_NOFS); | ||
1987 | ordered = btrfs_lookup_ordered_extent(inode, start); | ||
1988 | if (!ordered) | ||
1989 | break; | ||
1990 | unlock_extent(tree, start, end, GFP_NOFS); | ||
1991 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
1992 | btrfs_put_ordered_extent(ordered); | ||
1993 | } | ||
2020 | 1994 | ||
2021 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | 1995 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { |
2022 | char *userpage; | 1996 | char *userpage; |
@@ -2054,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2054 | BUG_ON(extent_map_end(em) <= cur); | 2028 | BUG_ON(extent_map_end(em) <= cur); |
2055 | BUG_ON(end < cur); | 2029 | BUG_ON(end < cur); |
2056 | 2030 | ||
2057 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
2058 | this_bio_flag = EXTENT_BIO_COMPRESSED; | 2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; |
2033 | extent_set_compress_type(&this_bio_flag, | ||
2034 | em->compress_type); | ||
2035 | } | ||
2059 | 2036 | ||
2060 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 2037 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
2061 | cur_end = min(extent_map_end(em) - 1, end); | 2038 | cur_end = min(extent_map_end(em) - 1, end); |
@@ -2184,7 +2161,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2184 | u64 last_byte = i_size_read(inode); | 2161 | u64 last_byte = i_size_read(inode); |
2185 | u64 block_start; | 2162 | u64 block_start; |
2186 | u64 iosize; | 2163 | u64 iosize; |
2187 | u64 unlock_start; | ||
2188 | sector_t sector; | 2164 | sector_t sector; |
2189 | struct extent_state *cached_state = NULL; | 2165 | struct extent_state *cached_state = NULL; |
2190 | struct extent_map *em; | 2166 | struct extent_map *em; |
@@ -2309,7 +2285,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2309 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2285 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2310 | tree->ops->writepage_end_io_hook(page, start, | 2286 | tree->ops->writepage_end_io_hook(page, start, |
2311 | page_end, NULL, 1); | 2287 | page_end, NULL, 1); |
2312 | unlock_start = page_end + 1; | ||
2313 | goto done; | 2288 | goto done; |
2314 | } | 2289 | } |
2315 | 2290 | ||
@@ -2320,7 +2295,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2320 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2295 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2321 | tree->ops->writepage_end_io_hook(page, cur, | 2296 | tree->ops->writepage_end_io_hook(page, cur, |
2322 | page_end, NULL, 1); | 2297 | page_end, NULL, 1); |
2323 | unlock_start = page_end + 1; | ||
2324 | break; | 2298 | break; |
2325 | } | 2299 | } |
2326 | em = epd->get_extent(inode, page, pg_offset, cur, | 2300 | em = epd->get_extent(inode, page, pg_offset, cur, |
@@ -2367,7 +2341,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2367 | 2341 | ||
2368 | cur += iosize; | 2342 | cur += iosize; |
2369 | pg_offset += iosize; | 2343 | pg_offset += iosize; |
2370 | unlock_start = cur; | ||
2371 | continue; | 2344 | continue; |
2372 | } | 2345 | } |
2373 | /* leave this out until we have a page_mkwrite call */ | 2346 | /* leave this out until we have a page_mkwrite call */ |
@@ -2453,7 +2426,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2453 | pgoff_t index; | 2426 | pgoff_t index; |
2454 | pgoff_t end; /* Inclusive */ | 2427 | pgoff_t end; /* Inclusive */ |
2455 | int scanned = 0; | 2428 | int scanned = 0; |
2456 | int range_whole = 0; | ||
2457 | 2429 | ||
2458 | pagevec_init(&pvec, 0); | 2430 | pagevec_init(&pvec, 0); |
2459 | if (wbc->range_cyclic) { | 2431 | if (wbc->range_cyclic) { |
@@ -2462,8 +2434,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2462 | } else { | 2434 | } else { |
2463 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2435 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2464 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2436 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2465 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
2466 | range_whole = 1; | ||
2467 | scanned = 1; | 2437 | scanned = 1; |
2468 | } | 2438 | } |
2469 | retry: | 2439 | retry: |
@@ -2574,7 +2544,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2574 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 2544 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
2575 | }; | 2545 | }; |
2576 | struct writeback_control wbc_writepages = { | 2546 | struct writeback_control wbc_writepages = { |
2577 | .bdi = wbc->bdi, | ||
2578 | .sync_mode = wbc->sync_mode, | 2547 | .sync_mode = wbc->sync_mode, |
2579 | .older_than_this = NULL, | 2548 | .older_than_this = NULL, |
2580 | .nr_to_write = 64, | 2549 | .nr_to_write = 64, |
@@ -2608,7 +2577,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2608 | .sync_io = mode == WB_SYNC_ALL, | 2577 | .sync_io = mode == WB_SYNC_ALL, |
2609 | }; | 2578 | }; |
2610 | struct writeback_control wbc_writepages = { | 2579 | struct writeback_control wbc_writepages = { |
2611 | .bdi = inode->i_mapping->backing_dev_info, | ||
2612 | .sync_mode = mode, | 2580 | .sync_mode = mode, |
2613 | .older_than_this = NULL, | 2581 | .older_than_this = NULL, |
2614 | .nr_to_write = nr_pages * 2, | 2582 | .nr_to_write = nr_pages * 2, |
@@ -2663,33 +2631,20 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2663 | { | 2631 | { |
2664 | struct bio *bio = NULL; | 2632 | struct bio *bio = NULL; |
2665 | unsigned page_idx; | 2633 | unsigned page_idx; |
2666 | struct pagevec pvec; | ||
2667 | unsigned long bio_flags = 0; | 2634 | unsigned long bio_flags = 0; |
2668 | 2635 | ||
2669 | pagevec_init(&pvec, 0); | ||
2670 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 2636 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
2671 | struct page *page = list_entry(pages->prev, struct page, lru); | 2637 | struct page *page = list_entry(pages->prev, struct page, lru); |
2672 | 2638 | ||
2673 | prefetchw(&page->flags); | 2639 | prefetchw(&page->flags); |
2674 | list_del(&page->lru); | 2640 | list_del(&page->lru); |
2675 | /* | 2641 | if (!add_to_page_cache_lru(page, mapping, |
2676 | * what we want to do here is call add_to_page_cache_lru, | ||
2677 | * but that isn't exported, so we reproduce it here | ||
2678 | */ | ||
2679 | if (!add_to_page_cache(page, mapping, | ||
2680 | page->index, GFP_KERNEL)) { | 2642 | page->index, GFP_KERNEL)) { |
2681 | |||
2682 | /* open coding of lru_cache_add, also not exported */ | ||
2683 | page_cache_get(page); | ||
2684 | if (!pagevec_add(&pvec, page)) | ||
2685 | __pagevec_lru_add_file(&pvec); | ||
2686 | __extent_read_full_page(tree, page, get_extent, | 2643 | __extent_read_full_page(tree, page, get_extent, |
2687 | &bio, 0, &bio_flags); | 2644 | &bio, 0, &bio_flags); |
2688 | } | 2645 | } |
2689 | page_cache_release(page); | 2646 | page_cache_release(page); |
2690 | } | 2647 | } |
2691 | if (pagevec_count(&pvec)) | ||
2692 | __pagevec_lru_add_file(&pvec); | ||
2693 | BUG_ON(!list_empty(pages)); | 2648 | BUG_ON(!list_empty(pages)); |
2694 | if (bio) | 2649 | if (bio) |
2695 | submit_one_bio(READ, bio, 0, bio_flags); | 2650 | submit_one_bio(READ, bio, 0, bio_flags); |
@@ -2704,6 +2659,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
2704 | int extent_invalidatepage(struct extent_io_tree *tree, | 2659 | int extent_invalidatepage(struct extent_io_tree *tree, |
2705 | struct page *page, unsigned long offset) | 2660 | struct page *page, unsigned long offset) |
2706 | { | 2661 | { |
2662 | struct extent_state *cached_state = NULL; | ||
2707 | u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); | 2663 | u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); |
2708 | u64 end = start + PAGE_CACHE_SIZE - 1; | 2664 | u64 end = start + PAGE_CACHE_SIZE - 1; |
2709 | size_t blocksize = page->mapping->host->i_sb->s_blocksize; | 2665 | size_t blocksize = page->mapping->host->i_sb->s_blocksize; |
@@ -2712,12 +2668,12 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
2712 | if (start > end) | 2668 | if (start > end) |
2713 | return 0; | 2669 | return 0; |
2714 | 2670 | ||
2715 | lock_extent(tree, start, end, GFP_NOFS); | 2671 | lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS); |
2716 | wait_on_page_writeback(page); | 2672 | wait_on_page_writeback(page); |
2717 | clear_extent_bit(tree, start, end, | 2673 | clear_extent_bit(tree, start, end, |
2718 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 2674 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
2719 | EXTENT_DO_ACCOUNTING, | 2675 | EXTENT_DO_ACCOUNTING, |
2720 | 1, 1, NULL, GFP_NOFS); | 2676 | 1, 1, &cached_state, GFP_NOFS); |
2721 | return 0; | 2677 | return 0; |
2722 | } | 2678 | } |
2723 | 2679 | ||
@@ -2817,6 +2773,8 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2817 | NULL, 1, | 2773 | NULL, 1, |
2818 | end_bio_extent_preparewrite, 0, | 2774 | end_bio_extent_preparewrite, 0, |
2819 | 0, 0); | 2775 | 0, 0); |
2776 | if (ret && !err) | ||
2777 | err = ret; | ||
2820 | iocount++; | 2778 | iocount++; |
2821 | block_start = block_start + iosize; | 2779 | block_start = block_start + iosize; |
2822 | } else { | 2780 | } else { |
@@ -2920,16 +2878,17 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, | |||
2920 | get_extent_t *get_extent) | 2878 | get_extent_t *get_extent) |
2921 | { | 2879 | { |
2922 | struct inode *inode = mapping->host; | 2880 | struct inode *inode = mapping->host; |
2881 | struct extent_state *cached_state = NULL; | ||
2923 | u64 start = iblock << inode->i_blkbits; | 2882 | u64 start = iblock << inode->i_blkbits; |
2924 | sector_t sector = 0; | 2883 | sector_t sector = 0; |
2925 | size_t blksize = (1 << inode->i_blkbits); | 2884 | size_t blksize = (1 << inode->i_blkbits); |
2926 | struct extent_map *em; | 2885 | struct extent_map *em; |
2927 | 2886 | ||
2928 | lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, | 2887 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, |
2929 | GFP_NOFS); | 2888 | 0, &cached_state, GFP_NOFS); |
2930 | em = get_extent(inode, NULL, 0, start, blksize, 0); | 2889 | em = get_extent(inode, NULL, 0, start, blksize, 0); |
2931 | unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, | 2890 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, |
2932 | GFP_NOFS); | 2891 | start + blksize - 1, &cached_state, GFP_NOFS); |
2933 | if (!em || IS_ERR(em)) | 2892 | if (!em || IS_ERR(em)) |
2934 | return 0; | 2893 | return 0; |
2935 | 2894 | ||
@@ -2945,22 +2904,55 @@ out: | |||
2945 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2904 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2946 | __u64 start, __u64 len, get_extent_t *get_extent) | 2905 | __u64 start, __u64 len, get_extent_t *get_extent) |
2947 | { | 2906 | { |
2948 | int ret; | 2907 | int ret = 0; |
2949 | u64 off = start; | 2908 | u64 off = start; |
2950 | u64 max = start + len; | 2909 | u64 max = start + len; |
2951 | u32 flags = 0; | 2910 | u32 flags = 0; |
2911 | u32 found_type; | ||
2912 | u64 last; | ||
2952 | u64 disko = 0; | 2913 | u64 disko = 0; |
2914 | struct btrfs_key found_key; | ||
2953 | struct extent_map *em = NULL; | 2915 | struct extent_map *em = NULL; |
2916 | struct extent_state *cached_state = NULL; | ||
2917 | struct btrfs_path *path; | ||
2918 | struct btrfs_file_extent_item *item; | ||
2954 | int end = 0; | 2919 | int end = 0; |
2955 | u64 em_start = 0, em_len = 0; | 2920 | u64 em_start = 0, em_len = 0; |
2956 | unsigned long emflags; | 2921 | unsigned long emflags; |
2957 | ret = 0; | 2922 | int hole = 0; |
2958 | 2923 | ||
2959 | if (len == 0) | 2924 | if (len == 0) |
2960 | return -EINVAL; | 2925 | return -EINVAL; |
2961 | 2926 | ||
2962 | lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, | 2927 | path = btrfs_alloc_path(); |
2963 | GFP_NOFS); | 2928 | if (!path) |
2929 | return -ENOMEM; | ||
2930 | path->leave_spinning = 1; | ||
2931 | |||
2932 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | ||
2933 | path, inode->i_ino, -1, 0); | ||
2934 | if (ret < 0) { | ||
2935 | btrfs_free_path(path); | ||
2936 | return ret; | ||
2937 | } | ||
2938 | WARN_ON(!ret); | ||
2939 | path->slots[0]--; | ||
2940 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2941 | struct btrfs_file_extent_item); | ||
2942 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | ||
2943 | found_type = btrfs_key_type(&found_key); | ||
2944 | |||
2945 | /* No extents, just return */ | ||
2946 | if (found_key.objectid != inode->i_ino || | ||
2947 | found_type != BTRFS_EXTENT_DATA_KEY) { | ||
2948 | btrfs_free_path(path); | ||
2949 | return 0; | ||
2950 | } | ||
2951 | last = found_key.offset; | ||
2952 | btrfs_free_path(path); | ||
2953 | |||
2954 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | ||
2955 | &cached_state, GFP_NOFS); | ||
2964 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 2956 | em = get_extent(inode, NULL, 0, off, max - off, 0); |
2965 | if (!em) | 2957 | if (!em) |
2966 | goto out; | 2958 | goto out; |
@@ -2968,11 +2960,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2968 | ret = PTR_ERR(em); | 2960 | ret = PTR_ERR(em); |
2969 | goto out; | 2961 | goto out; |
2970 | } | 2962 | } |
2963 | |||
2971 | while (!end) { | 2964 | while (!end) { |
2965 | hole = 0; | ||
2972 | off = em->start + em->len; | 2966 | off = em->start + em->len; |
2973 | if (off >= max) | 2967 | if (off >= max) |
2974 | end = 1; | 2968 | end = 1; |
2975 | 2969 | ||
2970 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
2971 | hole = 1; | ||
2972 | goto next; | ||
2973 | } | ||
2974 | |||
2976 | em_start = em->start; | 2975 | em_start = em->start; |
2977 | em_len = em->len; | 2976 | em_len = em->len; |
2978 | 2977 | ||
@@ -2982,8 +2981,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2982 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { | 2981 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2983 | end = 1; | 2982 | end = 1; |
2984 | flags |= FIEMAP_EXTENT_LAST; | 2983 | flags |= FIEMAP_EXTENT_LAST; |
2985 | } else if (em->block_start == EXTENT_MAP_HOLE) { | ||
2986 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
2987 | } else if (em->block_start == EXTENT_MAP_INLINE) { | 2984 | } else if (em->block_start == EXTENT_MAP_INLINE) { |
2988 | flags |= (FIEMAP_EXTENT_DATA_INLINE | | 2985 | flags |= (FIEMAP_EXTENT_DATA_INLINE | |
2989 | FIEMAP_EXTENT_NOT_ALIGNED); | 2986 | FIEMAP_EXTENT_NOT_ALIGNED); |
@@ -2996,10 +2993,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2996 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2993 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2997 | flags |= FIEMAP_EXTENT_ENCODED; | 2994 | flags |= FIEMAP_EXTENT_ENCODED; |
2998 | 2995 | ||
2996 | next: | ||
2999 | emflags = em->flags; | 2997 | emflags = em->flags; |
3000 | free_extent_map(em); | 2998 | free_extent_map(em); |
3001 | em = NULL; | 2999 | em = NULL; |
3002 | |||
3003 | if (!end) { | 3000 | if (!end) { |
3004 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3001 | em = get_extent(inode, NULL, 0, off, max - off, 0); |
3005 | if (!em) | 3002 | if (!em) |
@@ -3010,21 +3007,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3010 | } | 3007 | } |
3011 | emflags = em->flags; | 3008 | emflags = em->flags; |
3012 | } | 3009 | } |
3010 | |||
3013 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | 3011 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { |
3014 | flags |= FIEMAP_EXTENT_LAST; | 3012 | flags |= FIEMAP_EXTENT_LAST; |
3015 | end = 1; | 3013 | end = 1; |
3016 | } | 3014 | } |
3017 | 3015 | ||
3018 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 3016 | if (em_start == last) { |
3019 | em_len, flags); | 3017 | flags |= FIEMAP_EXTENT_LAST; |
3020 | if (ret) | 3018 | end = 1; |
3021 | goto out_free; | 3019 | } |
3020 | |||
3021 | if (!hole) { | ||
3022 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | ||
3023 | em_len, flags); | ||
3024 | if (ret) | ||
3025 | goto out_free; | ||
3026 | } | ||
3022 | } | 3027 | } |
3023 | out_free: | 3028 | out_free: |
3024 | free_extent_map(em); | 3029 | free_extent_map(em); |
3025 | out: | 3030 | out: |
3026 | unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, | 3031 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, |
3027 | GFP_NOFS); | 3032 | &cached_state, GFP_NOFS); |
3028 | return ret; | 3033 | return ret; |
3029 | } | 3034 | } |
3030 | 3035 | ||
@@ -3070,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3070 | #endif | 3075 | #endif |
3071 | 3076 | ||
3072 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 3077 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
3078 | if (eb == NULL) | ||
3079 | return NULL; | ||
3073 | eb->start = start; | 3080 | eb->start = start; |
3074 | eb->len = len; | 3081 | eb->len = len; |
3075 | spin_lock_init(&eb->lock); | 3082 | spin_lock_init(&eb->lock); |
@@ -3096,6 +3103,39 @@ static void __free_extent_buffer(struct extent_buffer *eb) | |||
3096 | kmem_cache_free(extent_buffer_cache, eb); | 3103 | kmem_cache_free(extent_buffer_cache, eb); |
3097 | } | 3104 | } |
3098 | 3105 | ||
3106 | /* | ||
3107 | * Helper for releasing extent buffer page. | ||
3108 | */ | ||
3109 | static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | ||
3110 | unsigned long start_idx) | ||
3111 | { | ||
3112 | unsigned long index; | ||
3113 | struct page *page; | ||
3114 | |||
3115 | if (!eb->first_page) | ||
3116 | return; | ||
3117 | |||
3118 | index = num_extent_pages(eb->start, eb->len); | ||
3119 | if (start_idx >= index) | ||
3120 | return; | ||
3121 | |||
3122 | do { | ||
3123 | index--; | ||
3124 | page = extent_buffer_page(eb, index); | ||
3125 | if (page) | ||
3126 | page_cache_release(page); | ||
3127 | } while (index != start_idx); | ||
3128 | } | ||
3129 | |||
3130 | /* | ||
3131 | * Helper for releasing the extent buffer. | ||
3132 | */ | ||
3133 | static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) | ||
3134 | { | ||
3135 | btrfs_release_extent_buffer_page(eb, 0); | ||
3136 | __free_extent_buffer(eb); | ||
3137 | } | ||
3138 | |||
3099 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | 3139 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, |
3100 | u64 start, unsigned long len, | 3140 | u64 start, unsigned long len, |
3101 | struct page *page0, | 3141 | struct page *page0, |
@@ -3109,16 +3149,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3109 | struct page *p; | 3149 | struct page *p; |
3110 | struct address_space *mapping = tree->mapping; | 3150 | struct address_space *mapping = tree->mapping; |
3111 | int uptodate = 1; | 3151 | int uptodate = 1; |
3152 | int ret; | ||
3112 | 3153 | ||
3113 | spin_lock(&tree->buffer_lock); | 3154 | rcu_read_lock(); |
3114 | eb = buffer_search(tree, start); | 3155 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3115 | if (eb) { | 3156 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
3116 | atomic_inc(&eb->refs); | 3157 | rcu_read_unlock(); |
3117 | spin_unlock(&tree->buffer_lock); | ||
3118 | mark_page_accessed(eb->first_page); | 3158 | mark_page_accessed(eb->first_page); |
3119 | return eb; | 3159 | return eb; |
3120 | } | 3160 | } |
3121 | spin_unlock(&tree->buffer_lock); | 3161 | rcu_read_unlock(); |
3122 | 3162 | ||
3123 | eb = __alloc_extent_buffer(tree, start, len, mask); | 3163 | eb = __alloc_extent_buffer(tree, start, len, mask); |
3124 | if (!eb) | 3164 | if (!eb) |
@@ -3157,27 +3197,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3157 | if (uptodate) | 3197 | if (uptodate) |
3158 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3198 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3159 | 3199 | ||
3200 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
3201 | if (ret) | ||
3202 | goto free_eb; | ||
3203 | |||
3160 | spin_lock(&tree->buffer_lock); | 3204 | spin_lock(&tree->buffer_lock); |
3161 | exists = buffer_tree_insert(tree, start, &eb->rb_node); | 3205 | ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); |
3162 | if (exists) { | 3206 | if (ret == -EEXIST) { |
3207 | exists = radix_tree_lookup(&tree->buffer, | ||
3208 | start >> PAGE_CACHE_SHIFT); | ||
3163 | /* add one reference for the caller */ | 3209 | /* add one reference for the caller */ |
3164 | atomic_inc(&exists->refs); | 3210 | atomic_inc(&exists->refs); |
3165 | spin_unlock(&tree->buffer_lock); | 3211 | spin_unlock(&tree->buffer_lock); |
3212 | radix_tree_preload_end(); | ||
3166 | goto free_eb; | 3213 | goto free_eb; |
3167 | } | 3214 | } |
3168 | spin_unlock(&tree->buffer_lock); | ||
3169 | |||
3170 | /* add one reference for the tree */ | 3215 | /* add one reference for the tree */ |
3171 | atomic_inc(&eb->refs); | 3216 | atomic_inc(&eb->refs); |
3217 | spin_unlock(&tree->buffer_lock); | ||
3218 | radix_tree_preload_end(); | ||
3172 | return eb; | 3219 | return eb; |
3173 | 3220 | ||
3174 | free_eb: | 3221 | free_eb: |
3175 | if (!atomic_dec_and_test(&eb->refs)) | 3222 | if (!atomic_dec_and_test(&eb->refs)) |
3176 | return exists; | 3223 | return exists; |
3177 | for (index = 1; index < i; index++) | 3224 | btrfs_release_extent_buffer(eb); |
3178 | page_cache_release(extent_buffer_page(eb, index)); | ||
3179 | page_cache_release(extent_buffer_page(eb, 0)); | ||
3180 | __free_extent_buffer(eb); | ||
3181 | return exists; | 3225 | return exists; |
3182 | } | 3226 | } |
3183 | 3227 | ||
@@ -3187,16 +3231,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | |||
3187 | { | 3231 | { |
3188 | struct extent_buffer *eb; | 3232 | struct extent_buffer *eb; |
3189 | 3233 | ||
3190 | spin_lock(&tree->buffer_lock); | 3234 | rcu_read_lock(); |
3191 | eb = buffer_search(tree, start); | 3235 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3192 | if (eb) | 3236 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
3193 | atomic_inc(&eb->refs); | 3237 | rcu_read_unlock(); |
3194 | spin_unlock(&tree->buffer_lock); | ||
3195 | |||
3196 | if (eb) | ||
3197 | mark_page_accessed(eb->first_page); | 3238 | mark_page_accessed(eb->first_page); |
3239 | return eb; | ||
3240 | } | ||
3241 | rcu_read_unlock(); | ||
3198 | 3242 | ||
3199 | return eb; | 3243 | return NULL; |
3200 | } | 3244 | } |
3201 | 3245 | ||
3202 | void free_extent_buffer(struct extent_buffer *eb) | 3246 | void free_extent_buffer(struct extent_buffer *eb) |
@@ -3265,7 +3309,8 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3265 | } | 3309 | } |
3266 | 3310 | ||
3267 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3311 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
3268 | struct extent_buffer *eb) | 3312 | struct extent_buffer *eb, |
3313 | struct extent_state **cached_state) | ||
3269 | { | 3314 | { |
3270 | unsigned long i; | 3315 | unsigned long i; |
3271 | struct page *page; | 3316 | struct page *page; |
@@ -3275,7 +3320,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3275 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3320 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3276 | 3321 | ||
3277 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3322 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3278 | GFP_NOFS); | 3323 | cached_state, GFP_NOFS); |
3279 | for (i = 0; i < num_pages; i++) { | 3324 | for (i = 0; i < num_pages; i++) { |
3280 | page = extent_buffer_page(eb, i); | 3325 | page = extent_buffer_page(eb, i); |
3281 | if (page) | 3326 | if (page) |
@@ -3335,7 +3380,8 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
3335 | } | 3380 | } |
3336 | 3381 | ||
3337 | int extent_buffer_uptodate(struct extent_io_tree *tree, | 3382 | int extent_buffer_uptodate(struct extent_io_tree *tree, |
3338 | struct extent_buffer *eb) | 3383 | struct extent_buffer *eb, |
3384 | struct extent_state *cached_state) | ||
3339 | { | 3385 | { |
3340 | int ret = 0; | 3386 | int ret = 0; |
3341 | unsigned long num_pages; | 3387 | unsigned long num_pages; |
@@ -3347,7 +3393,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3347 | return 1; | 3393 | return 1; |
3348 | 3394 | ||
3349 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3395 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3350 | EXTENT_UPTODATE, 1, NULL); | 3396 | EXTENT_UPTODATE, 1, cached_state); |
3351 | if (ret) | 3397 | if (ret) |
3352 | return ret; | 3398 | return ret; |
3353 | 3399 | ||
@@ -3824,34 +3870,47 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
3824 | } | 3870 | } |
3825 | } | 3871 | } |
3826 | 3872 | ||
3873 | static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) | ||
3874 | { | ||
3875 | struct extent_buffer *eb = | ||
3876 | container_of(head, struct extent_buffer, rcu_head); | ||
3877 | |||
3878 | btrfs_release_extent_buffer(eb); | ||
3879 | } | ||
3880 | |||
3827 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | 3881 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) |
3828 | { | 3882 | { |
3829 | u64 start = page_offset(page); | 3883 | u64 start = page_offset(page); |
3830 | struct extent_buffer *eb; | 3884 | struct extent_buffer *eb; |
3831 | int ret = 1; | 3885 | int ret = 1; |
3832 | unsigned long i; | ||
3833 | unsigned long num_pages; | ||
3834 | 3886 | ||
3835 | spin_lock(&tree->buffer_lock); | 3887 | spin_lock(&tree->buffer_lock); |
3836 | eb = buffer_search(tree, start); | 3888 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3837 | if (!eb) | 3889 | if (!eb) { |
3838 | goto out; | 3890 | spin_unlock(&tree->buffer_lock); |
3891 | return ret; | ||
3892 | } | ||
3839 | 3893 | ||
3840 | if (atomic_read(&eb->refs) > 1) { | 3894 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
3841 | ret = 0; | 3895 | ret = 0; |
3842 | goto out; | 3896 | goto out; |
3843 | } | 3897 | } |
3844 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3898 | |
3899 | /* | ||
3900 | * set @eb->refs to 0 if it is already 1, and then release the @eb. | ||
3901 | * Or go back. | ||
3902 | */ | ||
3903 | if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { | ||
3845 | ret = 0; | 3904 | ret = 0; |
3846 | goto out; | 3905 | goto out; |
3847 | } | 3906 | } |
3848 | /* at this point we can safely release the extent buffer */ | 3907 | |
3849 | num_pages = num_extent_pages(eb->start, eb->len); | 3908 | radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3850 | for (i = 0; i < num_pages; i++) | ||
3851 | page_cache_release(extent_buffer_page(eb, i)); | ||
3852 | rb_erase(&eb->rb_node, &tree->buffer); | ||
3853 | __free_extent_buffer(eb); | ||
3854 | out: | 3909 | out: |
3855 | spin_unlock(&tree->buffer_lock); | 3910 | spin_unlock(&tree->buffer_lock); |
3911 | |||
3912 | /* at this point we can safely release the extent buffer */ | ||
3913 | if (atomic_read(&eb->refs) == 0) | ||
3914 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | ||
3856 | return ret; | 3915 | return ret; |
3857 | } | 3916 | } |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 36de250a7b2b..7083cfafd061 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -16,10 +16,16 @@ | |||
16 | #define EXTENT_BOUNDARY (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
17 | #define EXTENT_NODATASUM (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | ||
19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | ||
20 | 22 | ||
21 | /* flags for bio submission */ | 23 | /* |
24 | * flags for bio submission. The high bits indicate the compression | ||
25 | * type for this bio | ||
26 | */ | ||
22 | #define EXTENT_BIO_COMPRESSED 1 | 27 | #define EXTENT_BIO_COMPRESSED 1 |
28 | #define EXTENT_BIO_FLAG_SHIFT 16 | ||
23 | 29 | ||
24 | /* these are bit numbers for test/set bit */ | 30 | /* these are bit numbers for test/set bit */ |
25 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
@@ -47,7 +53,7 @@ struct extent_state; | |||
47 | 53 | ||
48 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 54 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
49 | struct bio *bio, int mirror_num, | 55 | struct bio *bio, int mirror_num, |
50 | unsigned long bio_flags); | 56 | unsigned long bio_flags, u64 bio_offset); |
51 | struct extent_io_ops { | 57 | struct extent_io_ops { |
52 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, | 58 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
53 | u64 start, u64 end, int *page_started, | 59 | u64 start, u64 end, int *page_started, |
@@ -69,10 +75,10 @@ struct extent_io_ops { | |||
69 | struct extent_state *state); | 75 | struct extent_state *state); |
70 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 76 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
71 | struct extent_state *state, int uptodate); | 77 | struct extent_state *state, int uptodate); |
72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 78 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
73 | unsigned long old, unsigned long bits); | 79 | int *bits); |
74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 80 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
75 | unsigned long bits); | 81 | int *bits); |
76 | int (*merge_extent_hook)(struct inode *inode, | 82 | int (*merge_extent_hook)(struct inode *inode, |
77 | struct extent_state *new, | 83 | struct extent_state *new, |
78 | struct extent_state *other); | 84 | struct extent_state *other); |
@@ -83,7 +89,7 @@ struct extent_io_ops { | |||
83 | 89 | ||
84 | struct extent_io_tree { | 90 | struct extent_io_tree { |
85 | struct rb_root state; | 91 | struct rb_root state; |
86 | struct rb_root buffer; | 92 | struct radix_tree_root buffer; |
87 | struct address_space *mapping; | 93 | struct address_space *mapping; |
88 | u64 dirty_bytes; | 94 | u64 dirty_bytes; |
89 | spinlock_t lock; | 95 | spinlock_t lock; |
@@ -121,7 +127,7 @@ struct extent_buffer { | |||
121 | unsigned long bflags; | 127 | unsigned long bflags; |
122 | atomic_t refs; | 128 | atomic_t refs; |
123 | struct list_head leak_list; | 129 | struct list_head leak_list; |
124 | struct rb_node rb_node; | 130 | struct rcu_head rcu_head; |
125 | 131 | ||
126 | /* the spinlock is used to protect most operations */ | 132 | /* the spinlock is used to protect most operations */ |
127 | spinlock_t lock; | 133 | spinlock_t lock; |
@@ -133,6 +139,17 @@ struct extent_buffer { | |||
133 | wait_queue_head_t lock_wq; | 139 | wait_queue_head_t lock_wq; |
134 | }; | 140 | }; |
135 | 141 | ||
142 | static inline void extent_set_compress_type(unsigned long *bio_flags, | ||
143 | int compress_type) | ||
144 | { | ||
145 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; | ||
146 | } | ||
147 | |||
148 | static inline int extent_compress_type(unsigned long bio_flags) | ||
149 | { | ||
150 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; | ||
151 | } | ||
152 | |||
136 | struct extent_map_tree; | 153 | struct extent_map_tree; |
137 | 154 | ||
138 | static inline struct extent_state *extent_state_next(struct extent_state *state) | 155 | static inline struct extent_state *extent_state_next(struct extent_state *state) |
@@ -163,6 +180,8 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | |||
163 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 180 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
164 | int bits, struct extent_state **cached, gfp_t mask); | 181 | int bits, struct extent_state **cached, gfp_t mask); |
165 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 182 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
183 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | ||
184 | struct extent_state **cached, gfp_t mask); | ||
166 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 185 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
167 | gfp_t mask); | 186 | gfp_t mask); |
168 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | 187 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, |
@@ -174,6 +193,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
174 | u64 *start, u64 search_end, | 193 | u64 *start, u64 search_end, |
175 | u64 max_bytes, unsigned long bits); | 194 | u64 max_bytes, unsigned long bits); |
176 | 195 | ||
196 | void free_extent_state(struct extent_state *state); | ||
177 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 197 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
178 | int bits, int filled, struct extent_state *cached_state); | 198 | int bits, int filled, struct extent_state *cached_state); |
179 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 199 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -183,6 +203,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
183 | gfp_t mask); | 203 | gfp_t mask); |
184 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 204 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
185 | int bits, gfp_t mask); | 205 | int bits, gfp_t mask); |
206 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
207 | int bits, int exclusive_bits, u64 *failed_start, | ||
208 | struct extent_state **cached_state, gfp_t mask); | ||
186 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 209 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
187 | gfp_t mask); | 210 | gfp_t mask); |
188 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 211 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -196,7 +219,7 @@ int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | |||
196 | int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, | 219 | int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, |
197 | u64 end, gfp_t mask); | 220 | u64 end, gfp_t mask); |
198 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 221 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
199 | gfp_t mask); | 222 | struct extent_state **cached_state, gfp_t mask); |
200 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | 223 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, |
201 | gfp_t mask); | 224 | gfp_t mask); |
202 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 225 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
@@ -281,9 +304,11 @@ int test_extent_buffer_dirty(struct extent_io_tree *tree, | |||
281 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, | 304 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, |
282 | struct extent_buffer *eb); | 305 | struct extent_buffer *eb); |
283 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 306 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
284 | struct extent_buffer *eb); | 307 | struct extent_buffer *eb, |
308 | struct extent_state **cached_state); | ||
285 | int extent_buffer_uptodate(struct extent_io_tree *tree, | 309 | int extent_buffer_uptodate(struct extent_io_tree *tree, |
286 | struct extent_buffer *eb); | 310 | struct extent_buffer *eb, |
311 | struct extent_state *cached_state); | ||
287 | int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, | 312 | int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, |
288 | unsigned long min_len, char **token, char **map, | 313 | unsigned long min_len, char **token, char **map, |
289 | unsigned long *map_start, | 314 | unsigned long *map_start, |
@@ -300,4 +325,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
300 | struct extent_io_tree *tree, | 325 | struct extent_io_tree *tree, |
301 | u64 start, u64 end, struct page *locked_page, | 326 | u64 start, u64 end, struct page *locked_page, |
302 | unsigned long op); | 327 | unsigned long op); |
328 | struct bio * | ||
329 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | ||
330 | gfp_t gfp_flags); | ||
303 | #endif | 331 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 428fcac45f90..b0e1fce12530 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -1,9 +1,9 @@ | |||
1 | #include <linux/err.h> | 1 | #include <linux/err.h> |
2 | #include <linux/gfp.h> | ||
3 | #include <linux/slab.h> | 2 | #include <linux/slab.h> |
4 | #include <linux/module.h> | 3 | #include <linux/module.h> |
5 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
6 | #include <linux/hardirq.h> | 5 | #include <linux/hardirq.h> |
6 | #include "ctree.h" | ||
7 | #include "extent_map.h" | 7 | #include "extent_map.h" |
8 | 8 | ||
9 | 9 | ||
@@ -35,7 +35,7 @@ void extent_map_exit(void) | |||
35 | */ | 35 | */ |
36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) |
37 | { | 37 | { |
38 | tree->map.rb_node = NULL; | 38 | tree->map = RB_ROOT; |
39 | rwlock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
40 | } | 40 | } |
41 | 41 | ||
@@ -55,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
55 | return em; | 55 | return em; |
56 | em->in_tree = 0; | 56 | em->in_tree = 0; |
57 | em->flags = 0; | 57 | em->flags = 0; |
58 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
58 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
59 | return em; | 60 | return em; |
60 | } | 61 | } |
@@ -336,7 +337,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
336 | goto out; | 337 | goto out; |
337 | } | 338 | } |
338 | if (IS_ERR(rb_node)) { | 339 | if (IS_ERR(rb_node)) { |
339 | em = ERR_PTR(PTR_ERR(rb_node)); | 340 | em = ERR_CAST(rb_node); |
340 | goto out; | 341 | goto out; |
341 | } | 342 | } |
342 | em = rb_entry(rb_node, struct extent_map, rb_node); | 343 | em = rb_entry(rb_node, struct extent_map, rb_node); |
@@ -385,7 +386,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | |||
385 | goto out; | 386 | goto out; |
386 | } | 387 | } |
387 | if (IS_ERR(rb_node)) { | 388 | if (IS_ERR(rb_node)) { |
388 | em = ERR_PTR(PTR_ERR(rb_node)); | 389 | em = ERR_CAST(rb_node); |
389 | goto out; | 390 | goto out; |
390 | } | 391 | } |
391 | em = rb_entry(rb_node, struct extent_map, rb_node); | 392 | em = rb_entry(rb_node, struct extent_map, rb_node); |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..28b44dbd1e35 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -26,7 +26,8 @@ struct extent_map { | |||
26 | unsigned long flags; | 26 | unsigned long flags; |
27 | struct block_device *bdev; | 27 | struct block_device *bdev; |
28 | atomic_t refs; | 28 | atomic_t refs; |
29 | int in_tree; | 29 | unsigned int in_tree:1; |
30 | unsigned int compress_type:4; | ||
30 | }; | 31 | }; |
31 | 32 | ||
32 | struct extent_map_tree { | 33 | struct extent_map_tree { |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9b99886562d0..a562a250ae77 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/bio.h> | 19 | #include <linux/bio.h> |
20 | #include <linux/slab.h> | ||
20 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
21 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
22 | #include "ctree.h" | 23 | #include "ctree.h" |
@@ -148,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
148 | } | 149 | } |
149 | 150 | ||
150 | 151 | ||
151 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 152 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
152 | struct bio *bio, u32 *dst) | 153 | struct inode *inode, struct bio *bio, |
154 | u64 logical_offset, u32 *dst, int dio) | ||
153 | { | 155 | { |
154 | u32 sum; | 156 | u32 sum; |
155 | struct bio_vec *bvec = bio->bi_io_vec; | 157 | struct bio_vec *bvec = bio->bi_io_vec; |
156 | int bio_index = 0; | 158 | int bio_index = 0; |
157 | u64 offset; | 159 | u64 offset = 0; |
158 | u64 item_start_offset = 0; | 160 | u64 item_start_offset = 0; |
159 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
160 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
@@ -173,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
173 | WARN_ON(bio->bi_vcnt <= 0); | 175 | WARN_ON(bio->bi_vcnt <= 0); |
174 | 176 | ||
175 | disk_bytenr = (u64)bio->bi_sector << 9; | 177 | disk_bytenr = (u64)bio->bi_sector << 9; |
178 | if (dio) | ||
179 | offset = logical_offset; | ||
176 | while (bio_index < bio->bi_vcnt) { | 180 | while (bio_index < bio->bi_vcnt) { |
177 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 181 | if (!dio) |
182 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
178 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); | 183 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); |
179 | if (ret == 0) | 184 | if (ret == 0) |
180 | goto found; | 185 | goto found; |
@@ -237,6 +242,7 @@ found: | |||
237 | else | 242 | else |
238 | set_state_private(io_tree, offset, sum); | 243 | set_state_private(io_tree, offset, sum); |
239 | disk_bytenr += bvec->bv_len; | 244 | disk_bytenr += bvec->bv_len; |
245 | offset += bvec->bv_len; | ||
240 | bio_index++; | 246 | bio_index++; |
241 | bvec++; | 247 | bvec++; |
242 | } | 248 | } |
@@ -244,6 +250,18 @@ found: | |||
244 | return 0; | 250 | return 0; |
245 | } | 251 | } |
246 | 252 | ||
253 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
254 | struct bio *bio, u32 *dst) | ||
255 | { | ||
256 | return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0); | ||
257 | } | ||
258 | |||
259 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
260 | struct bio *bio, u64 offset, u32 *dst) | ||
261 | { | ||
262 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); | ||
263 | } | ||
264 | |||
247 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 265 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
248 | struct list_head *list) | 266 | struct list_head *list) |
249 | { | 267 | { |
@@ -656,6 +674,9 @@ again: | |||
656 | goto found; | 674 | goto found; |
657 | } | 675 | } |
658 | ret = PTR_ERR(item); | 676 | ret = PTR_ERR(item); |
677 | if (ret != -EFBIG && ret != -ENOENT) | ||
678 | goto fail_unlock; | ||
679 | |||
659 | if (ret == -EFBIG) { | 680 | if (ret == -EFBIG) { |
660 | u32 item_size; | 681 | u32 item_size; |
661 | /* we found one, but it isn't big enough yet */ | 682 | /* we found one, but it isn't big enough yet */ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c02033596f02..c800d58f3013 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -24,10 +24,12 @@ | |||
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/falloc.h> | ||
27 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
29 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
30 | #include <linux/compat.h> | 31 | #include <linux/compat.h> |
32 | #include <linux/slab.h> | ||
31 | #include "ctree.h" | 33 | #include "ctree.h" |
32 | #include "disk-io.h" | 34 | #include "disk-io.h" |
33 | #include "transaction.h" | 35 | #include "transaction.h" |
@@ -45,32 +47,46 @@ | |||
45 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
46 | int write_bytes, | 48 | int write_bytes, |
47 | struct page **prepared_pages, | 49 | struct page **prepared_pages, |
48 | const char __user *buf) | 50 | struct iov_iter *i) |
49 | { | 51 | { |
50 | long page_fault = 0; | 52 | size_t copied = 0; |
51 | int i; | 53 | int pg = 0; |
52 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 54 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
55 | int total_copied = 0; | ||
53 | 56 | ||
54 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 57 | while (write_bytes > 0) { |
55 | size_t count = min_t(size_t, | 58 | size_t count = min_t(size_t, |
56 | PAGE_CACHE_SIZE - offset, write_bytes); | 59 | PAGE_CACHE_SIZE - offset, write_bytes); |
57 | struct page *page = prepared_pages[i]; | 60 | struct page *page = prepared_pages[pg]; |
58 | fault_in_pages_readable(buf, count); | 61 | /* |
62 | * Copy data from userspace to the current page | ||
63 | * | ||
64 | * Disable pagefault to avoid recursive lock since | ||
65 | * the pages are already locked | ||
66 | */ | ||
67 | pagefault_disable(); | ||
68 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); | ||
69 | pagefault_enable(); | ||
59 | 70 | ||
60 | /* Copy data from userspace to the current page */ | ||
61 | kmap(page); | ||
62 | page_fault = __copy_from_user(page_address(page) + offset, | ||
63 | buf, count); | ||
64 | /* Flush processor's dcache for this page */ | 71 | /* Flush processor's dcache for this page */ |
65 | flush_dcache_page(page); | 72 | flush_dcache_page(page); |
66 | kunmap(page); | 73 | iov_iter_advance(i, copied); |
67 | buf += count; | 74 | write_bytes -= copied; |
68 | write_bytes -= count; | 75 | total_copied += copied; |
69 | 76 | ||
70 | if (page_fault) | 77 | /* Return to btrfs_file_aio_write to fault page */ |
78 | if (unlikely(copied == 0)) { | ||
71 | break; | 79 | break; |
80 | } | ||
81 | |||
82 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
83 | offset += copied; | ||
84 | } else { | ||
85 | pg++; | ||
86 | offset = 0; | ||
87 | } | ||
72 | } | 88 | } |
73 | return page_fault ? -EFAULT : 0; | 89 | return total_copied; |
74 | } | 90 | } |
75 | 91 | ||
76 | /* | 92 | /* |
@@ -123,9 +139,9 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 139 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
124 | 140 | ||
125 | end_of_last_block = start_pos + num_bytes - 1; | 141 | end_of_last_block = start_pos + num_bytes - 1; |
126 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | 142 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
127 | if (err) | 143 | NULL); |
128 | return err; | 144 | BUG_ON(err); |
129 | 145 | ||
130 | for (i = 0; i < num_pages; i++) { | 146 | for (i = 0; i < num_pages; i++) { |
131 | struct page *p = pages[i]; | 147 | struct page *p = pages[i]; |
@@ -140,7 +156,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
140 | * at this time. | 156 | * at this time. |
141 | */ | 157 | */ |
142 | } | 158 | } |
143 | return err; | 159 | return 0; |
144 | } | 160 | } |
145 | 161 | ||
146 | /* | 162 | /* |
@@ -209,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
209 | 225 | ||
210 | split->bdev = em->bdev; | 226 | split->bdev = em->bdev; |
211 | split->flags = flags; | 227 | split->flags = flags; |
228 | split->compress_type = em->compress_type; | ||
212 | ret = add_extent_mapping(em_tree, split); | 229 | ret = add_extent_mapping(em_tree, split); |
213 | BUG_ON(ret); | 230 | BUG_ON(ret); |
214 | free_extent_map(split); | 231 | free_extent_map(split); |
@@ -223,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
223 | split->len = em->start + em->len - (start + len); | 240 | split->len = em->start + em->len - (start + len); |
224 | split->bdev = em->bdev; | 241 | split->bdev = em->bdev; |
225 | split->flags = flags; | 242 | split->flags = flags; |
243 | split->compress_type = em->compress_type; | ||
226 | 244 | ||
227 | if (compressed) { | 245 | if (compressed) { |
228 | split->block_len = em->block_len; | 246 | split->block_len = em->block_len; |
@@ -720,13 +738,15 @@ again: | |||
720 | inode->i_ino, orig_offset); | 738 | inode->i_ino, orig_offset); |
721 | BUG_ON(ret); | 739 | BUG_ON(ret); |
722 | } | 740 | } |
723 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
724 | struct btrfs_file_extent_item); | ||
725 | if (del_nr == 0) { | 741 | if (del_nr == 0) { |
742 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
743 | struct btrfs_file_extent_item); | ||
726 | btrfs_set_file_extent_type(leaf, fi, | 744 | btrfs_set_file_extent_type(leaf, fi, |
727 | BTRFS_FILE_EXTENT_REG); | 745 | BTRFS_FILE_EXTENT_REG); |
728 | btrfs_mark_buffer_dirty(leaf); | 746 | btrfs_mark_buffer_dirty(leaf); |
729 | } else { | 747 | } else { |
748 | fi = btrfs_item_ptr(leaf, del_slot - 1, | ||
749 | struct btrfs_file_extent_item); | ||
730 | btrfs_set_file_extent_type(leaf, fi, | 750 | btrfs_set_file_extent_type(leaf, fi, |
731 | BTRFS_FILE_EXTENT_REG); | 751 | BTRFS_FILE_EXTENT_REG); |
732 | btrfs_set_file_extent_num_bytes(leaf, fi, | 752 | btrfs_set_file_extent_num_bytes(leaf, fi, |
@@ -751,6 +771,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
751 | loff_t pos, unsigned long first_index, | 771 | loff_t pos, unsigned long first_index, |
752 | unsigned long last_index, size_t write_bytes) | 772 | unsigned long last_index, size_t write_bytes) |
753 | { | 773 | { |
774 | struct extent_state *cached_state = NULL; | ||
754 | int i; | 775 | int i; |
755 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 776 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
756 | struct inode *inode = fdentry(file)->d_inode; | 777 | struct inode *inode = fdentry(file)->d_inode; |
@@ -779,16 +800,18 @@ again: | |||
779 | } | 800 | } |
780 | if (start_pos < inode->i_size) { | 801 | if (start_pos < inode->i_size) { |
781 | struct btrfs_ordered_extent *ordered; | 802 | struct btrfs_ordered_extent *ordered; |
782 | lock_extent(&BTRFS_I(inode)->io_tree, | 803 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
783 | start_pos, last_pos - 1, GFP_NOFS); | 804 | start_pos, last_pos - 1, 0, &cached_state, |
805 | GFP_NOFS); | ||
784 | ordered = btrfs_lookup_first_ordered_extent(inode, | 806 | ordered = btrfs_lookup_first_ordered_extent(inode, |
785 | last_pos - 1); | 807 | last_pos - 1); |
786 | if (ordered && | 808 | if (ordered && |
787 | ordered->file_offset + ordered->len > start_pos && | 809 | ordered->file_offset + ordered->len > start_pos && |
788 | ordered->file_offset < last_pos) { | 810 | ordered->file_offset < last_pos) { |
789 | btrfs_put_ordered_extent(ordered); | 811 | btrfs_put_ordered_extent(ordered); |
790 | unlock_extent(&BTRFS_I(inode)->io_tree, | 812 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
791 | start_pos, last_pos - 1, GFP_NOFS); | 813 | start_pos, last_pos - 1, |
814 | &cached_state, GFP_NOFS); | ||
792 | for (i = 0; i < num_pages; i++) { | 815 | for (i = 0; i < num_pages; i++) { |
793 | unlock_page(pages[i]); | 816 | unlock_page(pages[i]); |
794 | page_cache_release(pages[i]); | 817 | page_cache_release(pages[i]); |
@@ -800,12 +823,13 @@ again: | |||
800 | if (ordered) | 823 | if (ordered) |
801 | btrfs_put_ordered_extent(ordered); | 824 | btrfs_put_ordered_extent(ordered); |
802 | 825 | ||
803 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, | 826 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, |
804 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 827 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
805 | EXTENT_DO_ACCOUNTING, | 828 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, |
806 | GFP_NOFS); | 829 | GFP_NOFS); |
807 | unlock_extent(&BTRFS_I(inode)->io_tree, | 830 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
808 | start_pos, last_pos - 1, GFP_NOFS); | 831 | start_pos, last_pos - 1, &cached_state, |
832 | GFP_NOFS); | ||
809 | } | 833 | } |
810 | for (i = 0; i < num_pages; i++) { | 834 | for (i = 0; i < num_pages; i++) { |
811 | clear_page_dirty_for_io(pages[i]); | 835 | clear_page_dirty_for_io(pages[i]); |
@@ -815,45 +839,48 @@ again: | |||
815 | return 0; | 839 | return 0; |
816 | } | 840 | } |
817 | 841 | ||
818 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 842 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
819 | size_t count, loff_t *ppos) | 843 | const struct iovec *iov, |
844 | unsigned long nr_segs, loff_t pos) | ||
820 | { | 845 | { |
821 | loff_t pos; | 846 | struct file *file = iocb->ki_filp; |
847 | struct inode *inode = fdentry(file)->d_inode; | ||
848 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
849 | struct page *pinned[2]; | ||
850 | struct page **pages = NULL; | ||
851 | struct iov_iter i; | ||
852 | loff_t *ppos = &iocb->ki_pos; | ||
822 | loff_t start_pos; | 853 | loff_t start_pos; |
823 | ssize_t num_written = 0; | 854 | ssize_t num_written = 0; |
824 | ssize_t err = 0; | 855 | ssize_t err = 0; |
856 | size_t count; | ||
857 | size_t ocount; | ||
825 | int ret = 0; | 858 | int ret = 0; |
826 | struct inode *inode = fdentry(file)->d_inode; | ||
827 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
828 | struct page **pages = NULL; | ||
829 | int nrptrs; | 859 | int nrptrs; |
830 | struct page *pinned[2]; | ||
831 | unsigned long first_index; | 860 | unsigned long first_index; |
832 | unsigned long last_index; | 861 | unsigned long last_index; |
833 | int will_write; | 862 | int will_write; |
863 | int buffered = 0; | ||
864 | int copied = 0; | ||
865 | int dirty_pages = 0; | ||
834 | 866 | ||
835 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 867 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
836 | (file->f_flags & O_DIRECT)); | 868 | (file->f_flags & O_DIRECT)); |
837 | 869 | ||
838 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
839 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
840 | pinned[0] = NULL; | 870 | pinned[0] = NULL; |
841 | pinned[1] = NULL; | 871 | pinned[1] = NULL; |
842 | 872 | ||
843 | pos = *ppos; | ||
844 | start_pos = pos; | 873 | start_pos = pos; |
845 | 874 | ||
846 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 875 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
847 | 876 | ||
848 | /* do the reserve before the mutex lock in case we have to do some | ||
849 | * flushing. We wouldn't deadlock, but this is more polite. | ||
850 | */ | ||
851 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
852 | if (err) | ||
853 | goto out_nolock; | ||
854 | |||
855 | mutex_lock(&inode->i_mutex); | 877 | mutex_lock(&inode->i_mutex); |
856 | 878 | ||
879 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
880 | if (err) | ||
881 | goto out; | ||
882 | count = ocount; | ||
883 | |||
857 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 884 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
858 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 885 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
859 | if (err) | 886 | if (err) |
@@ -866,16 +893,65 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
866 | if (err) | 893 | if (err) |
867 | goto out; | 894 | goto out; |
868 | 895 | ||
896 | /* | ||
897 | * If BTRFS flips readonly due to some impossible error | ||
898 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
899 | * although we have opened a file as writable, we have | ||
900 | * to stop this write operation to ensure FS consistency. | ||
901 | */ | ||
902 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
903 | err = -EROFS; | ||
904 | goto out; | ||
905 | } | ||
906 | |||
869 | file_update_time(file); | 907 | file_update_time(file); |
908 | BTRFS_I(inode)->sequence++; | ||
909 | |||
910 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
911 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
912 | pos, ppos, count, | ||
913 | ocount); | ||
914 | /* | ||
915 | * the generic O_DIRECT will update in-memory i_size after the | ||
916 | * DIOs are done. But our endio handlers that update the on | ||
917 | * disk i_size never update past the in memory i_size. So we | ||
918 | * need one more update here to catch any additions to the | ||
919 | * file | ||
920 | */ | ||
921 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
922 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
923 | mark_inode_dirty(inode); | ||
924 | } | ||
925 | |||
926 | if (num_written < 0) { | ||
927 | ret = num_written; | ||
928 | num_written = 0; | ||
929 | goto out; | ||
930 | } else if (num_written == count) { | ||
931 | /* pick up pos changes done by the generic code */ | ||
932 | pos = *ppos; | ||
933 | goto out; | ||
934 | } | ||
935 | /* | ||
936 | * We are going to do buffered for the rest of the range, so we | ||
937 | * need to make sure to invalidate the buffered pages when we're | ||
938 | * done. | ||
939 | */ | ||
940 | buffered = 1; | ||
941 | pos += num_written; | ||
942 | } | ||
870 | 943 | ||
944 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
945 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
946 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
947 | (sizeof(struct page *))); | ||
871 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 948 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
872 | 949 | ||
873 | /* generic_write_checks can change our pos */ | 950 | /* generic_write_checks can change our pos */ |
874 | start_pos = pos; | 951 | start_pos = pos; |
875 | 952 | ||
876 | BTRFS_I(inode)->sequence++; | ||
877 | first_index = pos >> PAGE_CACHE_SHIFT; | 953 | first_index = pos >> PAGE_CACHE_SHIFT; |
878 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 954 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
879 | 955 | ||
880 | /* | 956 | /* |
881 | * there are lots of better ways to do this, but this code | 957 | * there are lots of better ways to do this, but this code |
@@ -892,7 +968,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
892 | unlock_page(pinned[0]); | 968 | unlock_page(pinned[0]); |
893 | } | 969 | } |
894 | } | 970 | } |
895 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 971 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
896 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 972 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
897 | if (!PageUptodate(pinned[1])) { | 973 | if (!PageUptodate(pinned[1])) { |
898 | ret = btrfs_readpage(NULL, pinned[1]); | 974 | ret = btrfs_readpage(NULL, pinned[1]); |
@@ -903,10 +979,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
903 | } | 979 | } |
904 | } | 980 | } |
905 | 981 | ||
906 | while (count > 0) { | 982 | while (iov_iter_count(&i) > 0) { |
907 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 983 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
908 | size_t write_bytes = min(count, nrptrs * | 984 | size_t write_bytes = min(iov_iter_count(&i), |
909 | (size_t)PAGE_CACHE_SIZE - | 985 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
910 | offset); | 986 | offset); |
911 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 987 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
912 | PAGE_CACHE_SHIFT; | 988 | PAGE_CACHE_SHIFT; |
@@ -914,7 +990,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
914 | WARN_ON(num_pages > nrptrs); | 990 | WARN_ON(num_pages > nrptrs); |
915 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 991 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
916 | 992 | ||
917 | ret = btrfs_check_data_free_space(root, inode, write_bytes); | 993 | /* |
994 | * Fault pages before locking them in prepare_pages | ||
995 | * to avoid recursive lock | ||
996 | */ | ||
997 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | ||
998 | ret = -EFAULT; | ||
999 | goto out; | ||
1000 | } | ||
1001 | |||
1002 | ret = btrfs_delalloc_reserve_space(inode, | ||
1003 | num_pages << PAGE_CACHE_SHIFT); | ||
918 | if (ret) | 1004 | if (ret) |
919 | goto out; | 1005 | goto out; |
920 | 1006 | ||
@@ -922,45 +1008,49 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
922 | pos, first_index, last_index, | 1008 | pos, first_index, last_index, |
923 | write_bytes); | 1009 | write_bytes); |
924 | if (ret) { | 1010 | if (ret) { |
925 | btrfs_free_reserved_data_space(root, inode, | 1011 | btrfs_delalloc_release_space(inode, |
926 | write_bytes); | 1012 | num_pages << PAGE_CACHE_SHIFT); |
927 | goto out; | 1013 | goto out; |
928 | } | 1014 | } |
929 | 1015 | ||
930 | ret = btrfs_copy_from_user(pos, num_pages, | 1016 | copied = btrfs_copy_from_user(pos, num_pages, |
931 | write_bytes, pages, buf); | 1017 | write_bytes, pages, &i); |
932 | if (ret) { | 1018 | dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> |
933 | btrfs_free_reserved_data_space(root, inode, | 1019 | PAGE_CACHE_SHIFT; |
934 | write_bytes); | 1020 | |
935 | btrfs_drop_pages(pages, num_pages); | 1021 | if (num_pages > dirty_pages) { |
936 | goto out; | 1022 | if (copied > 0) |
1023 | atomic_inc( | ||
1024 | &BTRFS_I(inode)->outstanding_extents); | ||
1025 | btrfs_delalloc_release_space(inode, | ||
1026 | (num_pages - dirty_pages) << | ||
1027 | PAGE_CACHE_SHIFT); | ||
937 | } | 1028 | } |
938 | 1029 | ||
939 | ret = dirty_and_release_pages(NULL, root, file, pages, | 1030 | if (copied > 0) { |
940 | num_pages, pos, write_bytes); | 1031 | dirty_and_release_pages(NULL, root, file, pages, |
941 | btrfs_drop_pages(pages, num_pages); | 1032 | dirty_pages, pos, copied); |
942 | if (ret) { | ||
943 | btrfs_free_reserved_data_space(root, inode, | ||
944 | write_bytes); | ||
945 | goto out; | ||
946 | } | 1033 | } |
947 | 1034 | ||
948 | if (will_write) { | 1035 | btrfs_drop_pages(pages, num_pages); |
949 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1036 | |
950 | pos + write_bytes - 1); | 1037 | if (copied > 0) { |
951 | } else { | 1038 | if (will_write) { |
952 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1039 | filemap_fdatawrite_range(inode->i_mapping, pos, |
953 | num_pages); | 1040 | pos + copied - 1); |
954 | if (num_pages < | 1041 | } else { |
955 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1042 | balance_dirty_pages_ratelimited_nr( |
956 | btrfs_btree_balance_dirty(root, 1); | 1043 | inode->i_mapping, |
957 | btrfs_throttle(root); | 1044 | dirty_pages); |
1045 | if (dirty_pages < | ||
1046 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1047 | btrfs_btree_balance_dirty(root, 1); | ||
1048 | btrfs_throttle(root); | ||
1049 | } | ||
958 | } | 1050 | } |
959 | 1051 | ||
960 | buf += write_bytes; | 1052 | pos += copied; |
961 | count -= write_bytes; | 1053 | num_written += copied; |
962 | pos += write_bytes; | ||
963 | num_written += write_bytes; | ||
964 | 1054 | ||
965 | cond_resched(); | 1055 | cond_resched(); |
966 | } | 1056 | } |
@@ -968,9 +1058,7 @@ out: | |||
968 | mutex_unlock(&inode->i_mutex); | 1058 | mutex_unlock(&inode->i_mutex); |
969 | if (ret) | 1059 | if (ret) |
970 | err = ret; | 1060 | err = ret; |
971 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
972 | 1061 | ||
973 | out_nolock: | ||
974 | kfree(pages); | 1062 | kfree(pages); |
975 | if (pinned[0]) | 1063 | if (pinned[0]) |
976 | page_cache_release(pinned[0]); | 1064 | page_cache_release(pinned[0]); |
@@ -1000,9 +1088,15 @@ out_nolock: | |||
1000 | num_written = err; | 1088 | num_written = err; |
1001 | 1089 | ||
1002 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1090 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1003 | trans = btrfs_start_transaction(root, 1); | 1091 | trans = btrfs_start_transaction(root, 0); |
1092 | if (IS_ERR(trans)) { | ||
1093 | num_written = PTR_ERR(trans); | ||
1094 | goto done; | ||
1095 | } | ||
1096 | mutex_lock(&inode->i_mutex); | ||
1004 | ret = btrfs_log_dentry_safe(trans, root, | 1097 | ret = btrfs_log_dentry_safe(trans, root, |
1005 | file->f_dentry); | 1098 | file->f_dentry); |
1099 | mutex_unlock(&inode->i_mutex); | ||
1006 | if (ret == 0) { | 1100 | if (ret == 0) { |
1007 | ret = btrfs_sync_log(trans, root); | 1101 | ret = btrfs_sync_log(trans, root); |
1008 | if (ret == 0) | 1102 | if (ret == 0) |
@@ -1015,12 +1109,13 @@ out_nolock: | |||
1015 | btrfs_end_transaction(trans, root); | 1109 | btrfs_end_transaction(trans, root); |
1016 | } | 1110 | } |
1017 | } | 1111 | } |
1018 | if (file->f_flags & O_DIRECT) { | 1112 | if (file->f_flags & O_DIRECT && buffered) { |
1019 | invalidate_mapping_pages(inode->i_mapping, | 1113 | invalidate_mapping_pages(inode->i_mapping, |
1020 | start_pos >> PAGE_CACHE_SHIFT, | 1114 | start_pos >> PAGE_CACHE_SHIFT, |
1021 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1115 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
1022 | } | 1116 | } |
1023 | } | 1117 | } |
1118 | done: | ||
1024 | current->backing_dev_info = NULL; | 1119 | current->backing_dev_info = NULL; |
1025 | return num_written ? num_written : err; | 1120 | return num_written ? num_written : err; |
1026 | } | 1121 | } |
@@ -1055,8 +1150,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1055 | * important optimization for directories because holding the mutex prevents | 1150 | * important optimization for directories because holding the mutex prevents |
1056 | * new operations on the dir while we write to disk. | 1151 | * new operations on the dir while we write to disk. |
1057 | */ | 1152 | */ |
1058 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | 1153 | int btrfs_sync_file(struct file *file, int datasync) |
1059 | { | 1154 | { |
1155 | struct dentry *dentry = file->f_path.dentry; | ||
1060 | struct inode *inode = dentry->d_inode; | 1156 | struct inode *inode = dentry->d_inode; |
1061 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1157 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1062 | int ret = 0; | 1158 | int ret = 0; |
@@ -1093,12 +1189,12 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1093 | /* | 1189 | /* |
1094 | * ok we haven't committed the transaction yet, lets do a commit | 1190 | * ok we haven't committed the transaction yet, lets do a commit |
1095 | */ | 1191 | */ |
1096 | if (file && file->private_data) | 1192 | if (file->private_data) |
1097 | btrfs_ioctl_trans_end(file); | 1193 | btrfs_ioctl_trans_end(file); |
1098 | 1194 | ||
1099 | trans = btrfs_start_transaction(root, 1); | 1195 | trans = btrfs_start_transaction(root, 0); |
1100 | if (!trans) { | 1196 | if (IS_ERR(trans)) { |
1101 | ret = -ENOMEM; | 1197 | ret = PTR_ERR(trans); |
1102 | goto out; | 1198 | goto out; |
1103 | } | 1199 | } |
1104 | 1200 | ||
@@ -1133,7 +1229,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1133 | } | 1229 | } |
1134 | mutex_lock(&dentry->d_inode->i_mutex); | 1230 | mutex_lock(&dentry->d_inode->i_mutex); |
1135 | out: | 1231 | out: |
1136 | return ret > 0 ? EIO : ret; | 1232 | return ret > 0 ? -EIO : ret; |
1137 | } | 1233 | } |
1138 | 1234 | ||
1139 | static const struct vm_operations_struct btrfs_file_vm_ops = { | 1235 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
@@ -1143,21 +1239,141 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { | |||
1143 | 1239 | ||
1144 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | 1240 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) |
1145 | { | 1241 | { |
1146 | vma->vm_ops = &btrfs_file_vm_ops; | 1242 | struct address_space *mapping = filp->f_mapping; |
1243 | |||
1244 | if (!mapping->a_ops->readpage) | ||
1245 | return -ENOEXEC; | ||
1246 | |||
1147 | file_accessed(filp); | 1247 | file_accessed(filp); |
1248 | vma->vm_ops = &btrfs_file_vm_ops; | ||
1249 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
1250 | |||
1148 | return 0; | 1251 | return 0; |
1149 | } | 1252 | } |
1150 | 1253 | ||
1254 | static long btrfs_fallocate(struct file *file, int mode, | ||
1255 | loff_t offset, loff_t len) | ||
1256 | { | ||
1257 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1258 | struct extent_state *cached_state = NULL; | ||
1259 | u64 cur_offset; | ||
1260 | u64 last_byte; | ||
1261 | u64 alloc_start; | ||
1262 | u64 alloc_end; | ||
1263 | u64 alloc_hint = 0; | ||
1264 | u64 locked_end; | ||
1265 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1266 | struct extent_map *em; | ||
1267 | int ret; | ||
1268 | |||
1269 | alloc_start = offset & ~mask; | ||
1270 | alloc_end = (offset + len + mask) & ~mask; | ||
1271 | |||
1272 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1273 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
1274 | return -EOPNOTSUPP; | ||
1275 | |||
1276 | /* | ||
1277 | * wait for ordered IO before we have any locks. We'll loop again | ||
1278 | * below with the locks held. | ||
1279 | */ | ||
1280 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
1281 | |||
1282 | mutex_lock(&inode->i_mutex); | ||
1283 | ret = inode_newsize_ok(inode, alloc_end); | ||
1284 | if (ret) | ||
1285 | goto out; | ||
1286 | |||
1287 | if (alloc_start > inode->i_size) { | ||
1288 | ret = btrfs_cont_expand(inode, alloc_start); | ||
1289 | if (ret) | ||
1290 | goto out; | ||
1291 | } | ||
1292 | |||
1293 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
1294 | if (ret) | ||
1295 | goto out; | ||
1296 | |||
1297 | locked_end = alloc_end - 1; | ||
1298 | while (1) { | ||
1299 | struct btrfs_ordered_extent *ordered; | ||
1300 | |||
1301 | /* the extent lock is ordered inside the running | ||
1302 | * transaction | ||
1303 | */ | ||
1304 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
1305 | locked_end, 0, &cached_state, GFP_NOFS); | ||
1306 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
1307 | alloc_end - 1); | ||
1308 | if (ordered && | ||
1309 | ordered->file_offset + ordered->len > alloc_start && | ||
1310 | ordered->file_offset < alloc_end) { | ||
1311 | btrfs_put_ordered_extent(ordered); | ||
1312 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
1313 | alloc_start, locked_end, | ||
1314 | &cached_state, GFP_NOFS); | ||
1315 | /* | ||
1316 | * we can't wait on the range with the transaction | ||
1317 | * running or with the extent lock held | ||
1318 | */ | ||
1319 | btrfs_wait_ordered_range(inode, alloc_start, | ||
1320 | alloc_end - alloc_start); | ||
1321 | } else { | ||
1322 | if (ordered) | ||
1323 | btrfs_put_ordered_extent(ordered); | ||
1324 | break; | ||
1325 | } | ||
1326 | } | ||
1327 | |||
1328 | cur_offset = alloc_start; | ||
1329 | while (1) { | ||
1330 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
1331 | alloc_end - cur_offset, 0); | ||
1332 | BUG_ON(IS_ERR(em) || !em); | ||
1333 | last_byte = min(extent_map_end(em), alloc_end); | ||
1334 | last_byte = (last_byte + mask) & ~mask; | ||
1335 | if (em->block_start == EXTENT_MAP_HOLE || | ||
1336 | (cur_offset >= inode->i_size && | ||
1337 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
1338 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
1339 | last_byte - cur_offset, | ||
1340 | 1 << inode->i_blkbits, | ||
1341 | offset + len, | ||
1342 | &alloc_hint); | ||
1343 | if (ret < 0) { | ||
1344 | free_extent_map(em); | ||
1345 | break; | ||
1346 | } | ||
1347 | } | ||
1348 | free_extent_map(em); | ||
1349 | |||
1350 | cur_offset = last_byte; | ||
1351 | if (cur_offset >= alloc_end) { | ||
1352 | ret = 0; | ||
1353 | break; | ||
1354 | } | ||
1355 | } | ||
1356 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
1357 | &cached_state, GFP_NOFS); | ||
1358 | |||
1359 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
1360 | out: | ||
1361 | mutex_unlock(&inode->i_mutex); | ||
1362 | return ret; | ||
1363 | } | ||
1364 | |||
1151 | const struct file_operations btrfs_file_operations = { | 1365 | const struct file_operations btrfs_file_operations = { |
1152 | .llseek = generic_file_llseek, | 1366 | .llseek = generic_file_llseek, |
1153 | .read = do_sync_read, | 1367 | .read = do_sync_read, |
1368 | .write = do_sync_write, | ||
1154 | .aio_read = generic_file_aio_read, | 1369 | .aio_read = generic_file_aio_read, |
1155 | .splice_read = generic_file_splice_read, | 1370 | .splice_read = generic_file_splice_read, |
1156 | .write = btrfs_file_write, | 1371 | .aio_write = btrfs_file_aio_write, |
1157 | .mmap = btrfs_file_mmap, | 1372 | .mmap = btrfs_file_mmap, |
1158 | .open = generic_file_open, | 1373 | .open = generic_file_open, |
1159 | .release = btrfs_release_file, | 1374 | .release = btrfs_release_file, |
1160 | .fsync = btrfs_sync_file, | 1375 | .fsync = btrfs_sync_file, |
1376 | .fallocate = btrfs_fallocate, | ||
1161 | .unlocked_ioctl = btrfs_ioctl, | 1377 | .unlocked_ioctl = btrfs_ioctl, |
1162 | #ifdef CONFIG_COMPAT | 1378 | #ifdef CONFIG_COMPAT |
1163 | .compat_ioctl = btrfs_ioctl, | 1379 | .compat_ioctl = btrfs_ioctl, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cb2849f03251..60d684266959 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -18,14 +18,768 @@ | |||
18 | 18 | ||
19 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/slab.h> | ||
21 | #include <linux/math64.h> | 22 | #include <linux/math64.h> |
22 | #include "ctree.h" | 23 | #include "ctree.h" |
23 | #include "free-space-cache.h" | 24 | #include "free-space-cache.h" |
24 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "disk-io.h" | ||
25 | 27 | ||
26 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 28 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
27 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 29 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
28 | 30 | ||
31 | static void recalculate_thresholds(struct btrfs_block_group_cache | ||
32 | *block_group); | ||
33 | static int link_free_space(struct btrfs_block_group_cache *block_group, | ||
34 | struct btrfs_free_space *info); | ||
35 | |||
36 | struct inode *lookup_free_space_inode(struct btrfs_root *root, | ||
37 | struct btrfs_block_group_cache | ||
38 | *block_group, struct btrfs_path *path) | ||
39 | { | ||
40 | struct btrfs_key key; | ||
41 | struct btrfs_key location; | ||
42 | struct btrfs_disk_key disk_key; | ||
43 | struct btrfs_free_space_header *header; | ||
44 | struct extent_buffer *leaf; | ||
45 | struct inode *inode = NULL; | ||
46 | int ret; | ||
47 | |||
48 | spin_lock(&block_group->lock); | ||
49 | if (block_group->inode) | ||
50 | inode = igrab(block_group->inode); | ||
51 | spin_unlock(&block_group->lock); | ||
52 | if (inode) | ||
53 | return inode; | ||
54 | |||
55 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
56 | key.offset = block_group->key.objectid; | ||
57 | key.type = 0; | ||
58 | |||
59 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
60 | if (ret < 0) | ||
61 | return ERR_PTR(ret); | ||
62 | if (ret > 0) { | ||
63 | btrfs_release_path(root, path); | ||
64 | return ERR_PTR(-ENOENT); | ||
65 | } | ||
66 | |||
67 | leaf = path->nodes[0]; | ||
68 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
69 | struct btrfs_free_space_header); | ||
70 | btrfs_free_space_key(leaf, header, &disk_key); | ||
71 | btrfs_disk_key_to_cpu(&location, &disk_key); | ||
72 | btrfs_release_path(root, path); | ||
73 | |||
74 | inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); | ||
75 | if (!inode) | ||
76 | return ERR_PTR(-ENOENT); | ||
77 | if (IS_ERR(inode)) | ||
78 | return inode; | ||
79 | if (is_bad_inode(inode)) { | ||
80 | iput(inode); | ||
81 | return ERR_PTR(-ENOENT); | ||
82 | } | ||
83 | |||
84 | spin_lock(&block_group->lock); | ||
85 | if (!root->fs_info->closing) { | ||
86 | block_group->inode = igrab(inode); | ||
87 | block_group->iref = 1; | ||
88 | } | ||
89 | spin_unlock(&block_group->lock); | ||
90 | |||
91 | return inode; | ||
92 | } | ||
93 | |||
94 | int create_free_space_inode(struct btrfs_root *root, | ||
95 | struct btrfs_trans_handle *trans, | ||
96 | struct btrfs_block_group_cache *block_group, | ||
97 | struct btrfs_path *path) | ||
98 | { | ||
99 | struct btrfs_key key; | ||
100 | struct btrfs_disk_key disk_key; | ||
101 | struct btrfs_free_space_header *header; | ||
102 | struct btrfs_inode_item *inode_item; | ||
103 | struct extent_buffer *leaf; | ||
104 | u64 objectid; | ||
105 | int ret; | ||
106 | |||
107 | ret = btrfs_find_free_objectid(trans, root, 0, &objectid); | ||
108 | if (ret < 0) | ||
109 | return ret; | ||
110 | |||
111 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
112 | if (ret) | ||
113 | return ret; | ||
114 | |||
115 | leaf = path->nodes[0]; | ||
116 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | ||
117 | struct btrfs_inode_item); | ||
118 | btrfs_item_key(leaf, &disk_key, path->slots[0]); | ||
119 | memset_extent_buffer(leaf, 0, (unsigned long)inode_item, | ||
120 | sizeof(*inode_item)); | ||
121 | btrfs_set_inode_generation(leaf, inode_item, trans->transid); | ||
122 | btrfs_set_inode_size(leaf, inode_item, 0); | ||
123 | btrfs_set_inode_nbytes(leaf, inode_item, 0); | ||
124 | btrfs_set_inode_uid(leaf, inode_item, 0); | ||
125 | btrfs_set_inode_gid(leaf, inode_item, 0); | ||
126 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | ||
127 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | ||
128 | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); | ||
129 | btrfs_set_inode_nlink(leaf, inode_item, 1); | ||
130 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | ||
131 | btrfs_set_inode_block_group(leaf, inode_item, | ||
132 | block_group->key.objectid); | ||
133 | btrfs_mark_buffer_dirty(leaf); | ||
134 | btrfs_release_path(root, path); | ||
135 | |||
136 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
137 | key.offset = block_group->key.objectid; | ||
138 | key.type = 0; | ||
139 | |||
140 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
141 | sizeof(struct btrfs_free_space_header)); | ||
142 | if (ret < 0) { | ||
143 | btrfs_release_path(root, path); | ||
144 | return ret; | ||
145 | } | ||
146 | leaf = path->nodes[0]; | ||
147 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
148 | struct btrfs_free_space_header); | ||
149 | memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); | ||
150 | btrfs_set_free_space_key(leaf, header, &disk_key); | ||
151 | btrfs_mark_buffer_dirty(leaf); | ||
152 | btrfs_release_path(root, path); | ||
153 | |||
154 | return 0; | ||
155 | } | ||
156 | |||
157 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | ||
158 | struct btrfs_trans_handle *trans, | ||
159 | struct btrfs_path *path, | ||
160 | struct inode *inode) | ||
161 | { | ||
162 | loff_t oldsize; | ||
163 | int ret = 0; | ||
164 | |||
165 | trans->block_rsv = root->orphan_block_rsv; | ||
166 | ret = btrfs_block_rsv_check(trans, root, | ||
167 | root->orphan_block_rsv, | ||
168 | 0, 5); | ||
169 | if (ret) | ||
170 | return ret; | ||
171 | |||
172 | oldsize = i_size_read(inode); | ||
173 | btrfs_i_size_write(inode, 0); | ||
174 | truncate_pagecache(inode, oldsize, 0); | ||
175 | |||
176 | /* | ||
177 | * We don't need an orphan item because truncating the free space cache | ||
178 | * will never be split across transactions. | ||
179 | */ | ||
180 | ret = btrfs_truncate_inode_items(trans, root, inode, | ||
181 | 0, BTRFS_EXTENT_DATA_KEY); | ||
182 | if (ret) { | ||
183 | WARN_ON(1); | ||
184 | return ret; | ||
185 | } | ||
186 | |||
187 | return btrfs_update_inode(trans, root, inode); | ||
188 | } | ||
189 | |||
190 | static int readahead_cache(struct inode *inode) | ||
191 | { | ||
192 | struct file_ra_state *ra; | ||
193 | unsigned long last_index; | ||
194 | |||
195 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
196 | if (!ra) | ||
197 | return -ENOMEM; | ||
198 | |||
199 | file_ra_state_init(ra, inode->i_mapping); | ||
200 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | ||
201 | |||
202 | page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); | ||
203 | |||
204 | kfree(ra); | ||
205 | |||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | int load_free_space_cache(struct btrfs_fs_info *fs_info, | ||
210 | struct btrfs_block_group_cache *block_group) | ||
211 | { | ||
212 | struct btrfs_root *root = fs_info->tree_root; | ||
213 | struct inode *inode; | ||
214 | struct btrfs_free_space_header *header; | ||
215 | struct extent_buffer *leaf; | ||
216 | struct page *page; | ||
217 | struct btrfs_path *path; | ||
218 | u32 *checksums = NULL, *crc; | ||
219 | char *disk_crcs = NULL; | ||
220 | struct btrfs_key key; | ||
221 | struct list_head bitmaps; | ||
222 | u64 num_entries; | ||
223 | u64 num_bitmaps; | ||
224 | u64 generation; | ||
225 | u32 cur_crc = ~(u32)0; | ||
226 | pgoff_t index = 0; | ||
227 | unsigned long first_page_offset; | ||
228 | int num_checksums; | ||
229 | int ret = 0; | ||
230 | |||
231 | /* | ||
232 | * If we're unmounting then just return, since this does a search on the | ||
233 | * normal root and not the commit root and we could deadlock. | ||
234 | */ | ||
235 | smp_mb(); | ||
236 | if (fs_info->closing) | ||
237 | return 0; | ||
238 | |||
239 | /* | ||
240 | * If this block group has been marked to be cleared for one reason or | ||
241 | * another then we can't trust the on disk cache, so just return. | ||
242 | */ | ||
243 | spin_lock(&block_group->lock); | ||
244 | if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { | ||
245 | spin_unlock(&block_group->lock); | ||
246 | return 0; | ||
247 | } | ||
248 | spin_unlock(&block_group->lock); | ||
249 | |||
250 | INIT_LIST_HEAD(&bitmaps); | ||
251 | |||
252 | path = btrfs_alloc_path(); | ||
253 | if (!path) | ||
254 | return 0; | ||
255 | |||
256 | inode = lookup_free_space_inode(root, block_group, path); | ||
257 | if (IS_ERR(inode)) { | ||
258 | btrfs_free_path(path); | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | /* Nothing in the space cache, goodbye */ | ||
263 | if (!i_size_read(inode)) { | ||
264 | btrfs_free_path(path); | ||
265 | goto out; | ||
266 | } | ||
267 | |||
268 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
269 | key.offset = block_group->key.objectid; | ||
270 | key.type = 0; | ||
271 | |||
272 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
273 | if (ret) { | ||
274 | btrfs_free_path(path); | ||
275 | goto out; | ||
276 | } | ||
277 | |||
278 | leaf = path->nodes[0]; | ||
279 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
280 | struct btrfs_free_space_header); | ||
281 | num_entries = btrfs_free_space_entries(leaf, header); | ||
282 | num_bitmaps = btrfs_free_space_bitmaps(leaf, header); | ||
283 | generation = btrfs_free_space_generation(leaf, header); | ||
284 | btrfs_free_path(path); | ||
285 | |||
286 | if (BTRFS_I(inode)->generation != generation) { | ||
287 | printk(KERN_ERR "btrfs: free space inode generation (%llu) did" | ||
288 | " not match free space cache generation (%llu) for " | ||
289 | "block group %llu\n", | ||
290 | (unsigned long long)BTRFS_I(inode)->generation, | ||
291 | (unsigned long long)generation, | ||
292 | (unsigned long long)block_group->key.objectid); | ||
293 | goto free_cache; | ||
294 | } | ||
295 | |||
296 | if (!num_entries) | ||
297 | goto out; | ||
298 | |||
299 | /* Setup everything for doing checksumming */ | ||
300 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
301 | checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
302 | if (!checksums) | ||
303 | goto out; | ||
304 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
305 | disk_crcs = kzalloc(first_page_offset, GFP_NOFS); | ||
306 | if (!disk_crcs) | ||
307 | goto out; | ||
308 | |||
309 | ret = readahead_cache(inode); | ||
310 | if (ret) { | ||
311 | ret = 0; | ||
312 | goto out; | ||
313 | } | ||
314 | |||
315 | while (1) { | ||
316 | struct btrfs_free_space_entry *entry; | ||
317 | struct btrfs_free_space *e; | ||
318 | void *addr; | ||
319 | unsigned long offset = 0; | ||
320 | unsigned long start_offset = 0; | ||
321 | int need_loop = 0; | ||
322 | |||
323 | if (!num_entries && !num_bitmaps) | ||
324 | break; | ||
325 | |||
326 | if (index == 0) { | ||
327 | start_offset = first_page_offset; | ||
328 | offset = start_offset; | ||
329 | } | ||
330 | |||
331 | page = grab_cache_page(inode->i_mapping, index); | ||
332 | if (!page) { | ||
333 | ret = 0; | ||
334 | goto free_cache; | ||
335 | } | ||
336 | |||
337 | if (!PageUptodate(page)) { | ||
338 | btrfs_readpage(NULL, page); | ||
339 | lock_page(page); | ||
340 | if (!PageUptodate(page)) { | ||
341 | unlock_page(page); | ||
342 | page_cache_release(page); | ||
343 | printk(KERN_ERR "btrfs: error reading free " | ||
344 | "space cache: %llu\n", | ||
345 | (unsigned long long) | ||
346 | block_group->key.objectid); | ||
347 | goto free_cache; | ||
348 | } | ||
349 | } | ||
350 | addr = kmap(page); | ||
351 | |||
352 | if (index == 0) { | ||
353 | u64 *gen; | ||
354 | |||
355 | memcpy(disk_crcs, addr, first_page_offset); | ||
356 | gen = addr + (sizeof(u32) * num_checksums); | ||
357 | if (*gen != BTRFS_I(inode)->generation) { | ||
358 | printk(KERN_ERR "btrfs: space cache generation" | ||
359 | " (%llu) does not match inode (%llu) " | ||
360 | "for block group %llu\n", | ||
361 | (unsigned long long)*gen, | ||
362 | (unsigned long long) | ||
363 | BTRFS_I(inode)->generation, | ||
364 | (unsigned long long) | ||
365 | block_group->key.objectid); | ||
366 | kunmap(page); | ||
367 | unlock_page(page); | ||
368 | page_cache_release(page); | ||
369 | goto free_cache; | ||
370 | } | ||
371 | crc = (u32 *)disk_crcs; | ||
372 | } | ||
373 | entry = addr + start_offset; | ||
374 | |||
375 | /* First lets check our crc before we do anything fun */ | ||
376 | cur_crc = ~(u32)0; | ||
377 | cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, | ||
378 | PAGE_CACHE_SIZE - start_offset); | ||
379 | btrfs_csum_final(cur_crc, (char *)&cur_crc); | ||
380 | if (cur_crc != *crc) { | ||
381 | printk(KERN_ERR "btrfs: crc mismatch for page %lu in " | ||
382 | "block group %llu\n", index, | ||
383 | (unsigned long long)block_group->key.objectid); | ||
384 | kunmap(page); | ||
385 | unlock_page(page); | ||
386 | page_cache_release(page); | ||
387 | goto free_cache; | ||
388 | } | ||
389 | crc++; | ||
390 | |||
391 | while (1) { | ||
392 | if (!num_entries) | ||
393 | break; | ||
394 | |||
395 | need_loop = 1; | ||
396 | e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | ||
397 | if (!e) { | ||
398 | kunmap(page); | ||
399 | unlock_page(page); | ||
400 | page_cache_release(page); | ||
401 | goto free_cache; | ||
402 | } | ||
403 | |||
404 | e->offset = le64_to_cpu(entry->offset); | ||
405 | e->bytes = le64_to_cpu(entry->bytes); | ||
406 | if (!e->bytes) { | ||
407 | kunmap(page); | ||
408 | kfree(e); | ||
409 | unlock_page(page); | ||
410 | page_cache_release(page); | ||
411 | goto free_cache; | ||
412 | } | ||
413 | |||
414 | if (entry->type == BTRFS_FREE_SPACE_EXTENT) { | ||
415 | spin_lock(&block_group->tree_lock); | ||
416 | ret = link_free_space(block_group, e); | ||
417 | spin_unlock(&block_group->tree_lock); | ||
418 | BUG_ON(ret); | ||
419 | } else { | ||
420 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
421 | if (!e->bitmap) { | ||
422 | kunmap(page); | ||
423 | kfree(e); | ||
424 | unlock_page(page); | ||
425 | page_cache_release(page); | ||
426 | goto free_cache; | ||
427 | } | ||
428 | spin_lock(&block_group->tree_lock); | ||
429 | ret = link_free_space(block_group, e); | ||
430 | block_group->total_bitmaps++; | ||
431 | recalculate_thresholds(block_group); | ||
432 | spin_unlock(&block_group->tree_lock); | ||
433 | list_add_tail(&e->list, &bitmaps); | ||
434 | } | ||
435 | |||
436 | num_entries--; | ||
437 | offset += sizeof(struct btrfs_free_space_entry); | ||
438 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
439 | PAGE_CACHE_SIZE) | ||
440 | break; | ||
441 | entry++; | ||
442 | } | ||
443 | |||
444 | /* | ||
445 | * We read an entry out of this page, we need to move on to the | ||
446 | * next page. | ||
447 | */ | ||
448 | if (need_loop) { | ||
449 | kunmap(page); | ||
450 | goto next; | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * We add the bitmaps at the end of the entries in order that | ||
455 | * the bitmap entries are added to the cache. | ||
456 | */ | ||
457 | e = list_entry(bitmaps.next, struct btrfs_free_space, list); | ||
458 | list_del_init(&e->list); | ||
459 | memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); | ||
460 | kunmap(page); | ||
461 | num_bitmaps--; | ||
462 | next: | ||
463 | unlock_page(page); | ||
464 | page_cache_release(page); | ||
465 | index++; | ||
466 | } | ||
467 | |||
468 | ret = 1; | ||
469 | out: | ||
470 | kfree(checksums); | ||
471 | kfree(disk_crcs); | ||
472 | iput(inode); | ||
473 | return ret; | ||
474 | |||
475 | free_cache: | ||
476 | /* This cache is bogus, make sure it gets cleared */ | ||
477 | spin_lock(&block_group->lock); | ||
478 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
479 | spin_unlock(&block_group->lock); | ||
480 | btrfs_remove_free_space_cache(block_group); | ||
481 | goto out; | ||
482 | } | ||
483 | |||
484 | int btrfs_write_out_cache(struct btrfs_root *root, | ||
485 | struct btrfs_trans_handle *trans, | ||
486 | struct btrfs_block_group_cache *block_group, | ||
487 | struct btrfs_path *path) | ||
488 | { | ||
489 | struct btrfs_free_space_header *header; | ||
490 | struct extent_buffer *leaf; | ||
491 | struct inode *inode; | ||
492 | struct rb_node *node; | ||
493 | struct list_head *pos, *n; | ||
494 | struct page *page; | ||
495 | struct extent_state *cached_state = NULL; | ||
496 | struct list_head bitmap_list; | ||
497 | struct btrfs_key key; | ||
498 | u64 bytes = 0; | ||
499 | u32 *crc, *checksums; | ||
500 | pgoff_t index = 0, last_index = 0; | ||
501 | unsigned long first_page_offset; | ||
502 | int num_checksums; | ||
503 | int entries = 0; | ||
504 | int bitmaps = 0; | ||
505 | int ret = 0; | ||
506 | |||
507 | root = root->fs_info->tree_root; | ||
508 | |||
509 | INIT_LIST_HEAD(&bitmap_list); | ||
510 | |||
511 | spin_lock(&block_group->lock); | ||
512 | if (block_group->disk_cache_state < BTRFS_DC_SETUP) { | ||
513 | spin_unlock(&block_group->lock); | ||
514 | return 0; | ||
515 | } | ||
516 | spin_unlock(&block_group->lock); | ||
517 | |||
518 | inode = lookup_free_space_inode(root, block_group, path); | ||
519 | if (IS_ERR(inode)) | ||
520 | return 0; | ||
521 | |||
522 | if (!i_size_read(inode)) { | ||
523 | iput(inode); | ||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | node = rb_first(&block_group->free_space_offset); | ||
528 | if (!node) { | ||
529 | iput(inode); | ||
530 | return 0; | ||
531 | } | ||
532 | |||
533 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | ||
534 | filemap_write_and_wait(inode->i_mapping); | ||
535 | btrfs_wait_ordered_range(inode, inode->i_size & | ||
536 | ~(root->sectorsize - 1), (u64)-1); | ||
537 | |||
538 | /* We need a checksum per page. */ | ||
539 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
540 | crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
541 | if (!crc) { | ||
542 | iput(inode); | ||
543 | return 0; | ||
544 | } | ||
545 | |||
546 | /* Since the first page has all of our checksums and our generation we | ||
547 | * need to calculate the offset into the page that we can start writing | ||
548 | * our entries. | ||
549 | */ | ||
550 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
551 | |||
552 | /* | ||
553 | * Lock all pages first so we can lock the extent safely. | ||
554 | * | ||
555 | * NOTE: Because we hold the ref the entire time we're going to write to | ||
556 | * the page find_get_page should never fail, so we don't do a check | ||
557 | * after find_get_page at this point. Just putting this here so people | ||
558 | * know and don't freak out. | ||
559 | */ | ||
560 | while (index <= last_index) { | ||
561 | page = grab_cache_page(inode->i_mapping, index); | ||
562 | if (!page) { | ||
563 | pgoff_t i = 0; | ||
564 | |||
565 | while (i < index) { | ||
566 | page = find_get_page(inode->i_mapping, i); | ||
567 | unlock_page(page); | ||
568 | page_cache_release(page); | ||
569 | page_cache_release(page); | ||
570 | i++; | ||
571 | } | ||
572 | goto out_free; | ||
573 | } | ||
574 | index++; | ||
575 | } | ||
576 | |||
577 | index = 0; | ||
578 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | ||
579 | 0, &cached_state, GFP_NOFS); | ||
580 | |||
581 | /* Write out the extent entries */ | ||
582 | do { | ||
583 | struct btrfs_free_space_entry *entry; | ||
584 | void *addr; | ||
585 | unsigned long offset = 0; | ||
586 | unsigned long start_offset = 0; | ||
587 | |||
588 | if (index == 0) { | ||
589 | start_offset = first_page_offset; | ||
590 | offset = start_offset; | ||
591 | } | ||
592 | |||
593 | page = find_get_page(inode->i_mapping, index); | ||
594 | |||
595 | addr = kmap(page); | ||
596 | entry = addr + start_offset; | ||
597 | |||
598 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
599 | while (1) { | ||
600 | struct btrfs_free_space *e; | ||
601 | |||
602 | e = rb_entry(node, struct btrfs_free_space, offset_index); | ||
603 | entries++; | ||
604 | |||
605 | entry->offset = cpu_to_le64(e->offset); | ||
606 | entry->bytes = cpu_to_le64(e->bytes); | ||
607 | if (e->bitmap) { | ||
608 | entry->type = BTRFS_FREE_SPACE_BITMAP; | ||
609 | list_add_tail(&e->list, &bitmap_list); | ||
610 | bitmaps++; | ||
611 | } else { | ||
612 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
613 | } | ||
614 | node = rb_next(node); | ||
615 | if (!node) | ||
616 | break; | ||
617 | offset += sizeof(struct btrfs_free_space_entry); | ||
618 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
619 | PAGE_CACHE_SIZE) | ||
620 | break; | ||
621 | entry++; | ||
622 | } | ||
623 | *crc = ~(u32)0; | ||
624 | *crc = btrfs_csum_data(root, addr + start_offset, *crc, | ||
625 | PAGE_CACHE_SIZE - start_offset); | ||
626 | kunmap(page); | ||
627 | |||
628 | btrfs_csum_final(*crc, (char *)crc); | ||
629 | crc++; | ||
630 | |||
631 | bytes += PAGE_CACHE_SIZE; | ||
632 | |||
633 | ClearPageChecked(page); | ||
634 | set_page_extent_mapped(page); | ||
635 | SetPageUptodate(page); | ||
636 | set_page_dirty(page); | ||
637 | |||
638 | /* | ||
639 | * We need to release our reference we got for grab_cache_page, | ||
640 | * except for the first page which will hold our checksums, we | ||
641 | * do that below. | ||
642 | */ | ||
643 | if (index != 0) { | ||
644 | unlock_page(page); | ||
645 | page_cache_release(page); | ||
646 | } | ||
647 | |||
648 | page_cache_release(page); | ||
649 | |||
650 | index++; | ||
651 | } while (node); | ||
652 | |||
653 | /* Write out the bitmaps */ | ||
654 | list_for_each_safe(pos, n, &bitmap_list) { | ||
655 | void *addr; | ||
656 | struct btrfs_free_space *entry = | ||
657 | list_entry(pos, struct btrfs_free_space, list); | ||
658 | |||
659 | page = find_get_page(inode->i_mapping, index); | ||
660 | |||
661 | addr = kmap(page); | ||
662 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | ||
663 | *crc = ~(u32)0; | ||
664 | *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); | ||
665 | kunmap(page); | ||
666 | btrfs_csum_final(*crc, (char *)crc); | ||
667 | crc++; | ||
668 | bytes += PAGE_CACHE_SIZE; | ||
669 | |||
670 | ClearPageChecked(page); | ||
671 | set_page_extent_mapped(page); | ||
672 | SetPageUptodate(page); | ||
673 | set_page_dirty(page); | ||
674 | unlock_page(page); | ||
675 | page_cache_release(page); | ||
676 | page_cache_release(page); | ||
677 | list_del_init(&entry->list); | ||
678 | index++; | ||
679 | } | ||
680 | |||
681 | /* Zero out the rest of the pages just to make sure */ | ||
682 | while (index <= last_index) { | ||
683 | void *addr; | ||
684 | |||
685 | page = find_get_page(inode->i_mapping, index); | ||
686 | |||
687 | addr = kmap(page); | ||
688 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
689 | kunmap(page); | ||
690 | ClearPageChecked(page); | ||
691 | set_page_extent_mapped(page); | ||
692 | SetPageUptodate(page); | ||
693 | set_page_dirty(page); | ||
694 | unlock_page(page); | ||
695 | page_cache_release(page); | ||
696 | page_cache_release(page); | ||
697 | bytes += PAGE_CACHE_SIZE; | ||
698 | index++; | ||
699 | } | ||
700 | |||
701 | btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); | ||
702 | |||
703 | /* Write the checksums and trans id to the first page */ | ||
704 | { | ||
705 | void *addr; | ||
706 | u64 *gen; | ||
707 | |||
708 | page = find_get_page(inode->i_mapping, 0); | ||
709 | |||
710 | addr = kmap(page); | ||
711 | memcpy(addr, checksums, sizeof(u32) * num_checksums); | ||
712 | gen = addr + (sizeof(u32) * num_checksums); | ||
713 | *gen = trans->transid; | ||
714 | kunmap(page); | ||
715 | ClearPageChecked(page); | ||
716 | set_page_extent_mapped(page); | ||
717 | SetPageUptodate(page); | ||
718 | set_page_dirty(page); | ||
719 | unlock_page(page); | ||
720 | page_cache_release(page); | ||
721 | page_cache_release(page); | ||
722 | } | ||
723 | BTRFS_I(inode)->generation = trans->transid; | ||
724 | |||
725 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
726 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | ||
727 | |||
728 | filemap_write_and_wait(inode->i_mapping); | ||
729 | |||
730 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
731 | key.offset = block_group->key.objectid; | ||
732 | key.type = 0; | ||
733 | |||
734 | ret = btrfs_search_slot(trans, root, &key, path, 1, 1); | ||
735 | if (ret < 0) { | ||
736 | ret = 0; | ||
737 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | ||
738 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
739 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | ||
740 | goto out_free; | ||
741 | } | ||
742 | leaf = path->nodes[0]; | ||
743 | if (ret > 0) { | ||
744 | struct btrfs_key found_key; | ||
745 | BUG_ON(!path->slots[0]); | ||
746 | path->slots[0]--; | ||
747 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
748 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || | ||
749 | found_key.offset != block_group->key.objectid) { | ||
750 | ret = 0; | ||
751 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | ||
752 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
753 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | ||
754 | GFP_NOFS); | ||
755 | btrfs_release_path(root, path); | ||
756 | goto out_free; | ||
757 | } | ||
758 | } | ||
759 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
760 | struct btrfs_free_space_header); | ||
761 | btrfs_set_free_space_entries(leaf, header, entries); | ||
762 | btrfs_set_free_space_bitmaps(leaf, header, bitmaps); | ||
763 | btrfs_set_free_space_generation(leaf, header, trans->transid); | ||
764 | btrfs_mark_buffer_dirty(leaf); | ||
765 | btrfs_release_path(root, path); | ||
766 | |||
767 | ret = 1; | ||
768 | |||
769 | out_free: | ||
770 | if (ret == 0) { | ||
771 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | ||
772 | spin_lock(&block_group->lock); | ||
773 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
774 | spin_unlock(&block_group->lock); | ||
775 | BTRFS_I(inode)->generation = 0; | ||
776 | } | ||
777 | kfree(checksums); | ||
778 | btrfs_update_inode(trans, root, inode); | ||
779 | iput(inode); | ||
780 | return ret; | ||
781 | } | ||
782 | |||
29 | static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, | 783 | static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, |
30 | u64 offset) | 784 | u64 offset) |
31 | { | 785 | { |
@@ -870,7 +1624,7 @@ __btrfs_return_cluster_to_free_space( | |||
870 | tree_insert_offset(&block_group->free_space_offset, | 1624 | tree_insert_offset(&block_group->free_space_offset, |
871 | entry->offset, &entry->offset_index, 0); | 1625 | entry->offset, &entry->offset_index, 0); |
872 | } | 1626 | } |
873 | cluster->root.rb_node = NULL; | 1627 | cluster->root = RB_ROOT; |
874 | 1628 | ||
875 | out: | 1629 | out: |
876 | spin_unlock(&cluster->lock); | 1630 | spin_unlock(&cluster->lock); |
@@ -1355,7 +2109,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
1355 | { | 2109 | { |
1356 | spin_lock_init(&cluster->lock); | 2110 | spin_lock_init(&cluster->lock); |
1357 | spin_lock_init(&cluster->refill_lock); | 2111 | spin_lock_init(&cluster->refill_lock); |
1358 | cluster->root.rb_node = NULL; | 2112 | cluster->root = RB_ROOT; |
1359 | cluster->max_size = 0; | 2113 | cluster->max_size = 0; |
1360 | cluster->points_to_bitmap = false; | 2114 | cluster->points_to_bitmap = false; |
1361 | INIT_LIST_HEAD(&cluster->block_group_list); | 2115 | INIT_LIST_HEAD(&cluster->block_group_list); |
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 890a8e79011b..e49ca5c321b5 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -27,6 +27,24 @@ struct btrfs_free_space { | |||
27 | struct list_head list; | 27 | struct list_head list; |
28 | }; | 28 | }; |
29 | 29 | ||
30 | struct inode *lookup_free_space_inode(struct btrfs_root *root, | ||
31 | struct btrfs_block_group_cache | ||
32 | *block_group, struct btrfs_path *path); | ||
33 | int create_free_space_inode(struct btrfs_root *root, | ||
34 | struct btrfs_trans_handle *trans, | ||
35 | struct btrfs_block_group_cache *block_group, | ||
36 | struct btrfs_path *path); | ||
37 | |||
38 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | ||
39 | struct btrfs_trans_handle *trans, | ||
40 | struct btrfs_path *path, | ||
41 | struct inode *inode); | ||
42 | int load_free_space_cache(struct btrfs_fs_info *fs_info, | ||
43 | struct btrfs_block_group_cache *block_group); | ||
44 | int btrfs_write_out_cache(struct btrfs_root *root, | ||
45 | struct btrfs_trans_handle *trans, | ||
46 | struct btrfs_block_group_cache *block_group, | ||
47 | struct btrfs_path *path); | ||
30 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 48 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, |
31 | u64 bytenr, u64 size); | 49 | u64 bytenr, u64 size); |
32 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | 50 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 72ce3c173d6a..64f1150bb48d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
49 | return 0; | 49 | return 0; |
50 | } | 50 | } |
51 | 51 | ||
52 | struct btrfs_inode_ref * | ||
53 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
54 | struct btrfs_root *root, | ||
55 | struct btrfs_path *path, | ||
56 | const char *name, int name_len, | ||
57 | u64 inode_objectid, u64 ref_objectid, int mod) | ||
58 | { | ||
59 | struct btrfs_key key; | ||
60 | struct btrfs_inode_ref *ref; | ||
61 | int ins_len = mod < 0 ? -1 : 0; | ||
62 | int cow = mod != 0; | ||
63 | int ret; | ||
64 | |||
65 | key.objectid = inode_objectid; | ||
66 | key.type = BTRFS_INODE_REF_KEY; | ||
67 | key.offset = ref_objectid; | ||
68 | |||
69 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
70 | if (ret < 0) | ||
71 | return ERR_PTR(ret); | ||
72 | if (ret > 0) | ||
73 | return NULL; | ||
74 | if (!find_name_in_backref(path, name, name_len, &ref)) | ||
75 | return NULL; | ||
76 | return ref; | ||
77 | } | ||
78 | |||
52 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 79 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, |
53 | struct btrfs_root *root, | 80 | struct btrfs_root *root, |
54 | const char *name, int name_len, | 81 | const char *name, int name_len, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8cd109972fa6..160b55b3e132 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/xattr.h> | 36 | #include <linux/xattr.h> |
37 | #include <linux/posix_acl.h> | 37 | #include <linux/posix_acl.h> |
38 | #include <linux/falloc.h> | 38 | #include <linux/falloc.h> |
39 | #include <linux/slab.h> | ||
39 | #include "compat.h" | 40 | #include "compat.h" |
40 | #include "ctree.h" | 41 | #include "ctree.h" |
41 | #include "disk-io.h" | 42 | #include "disk-io.h" |
@@ -121,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
121 | size_t cur_size = size; | 122 | size_t cur_size = size; |
122 | size_t datasize; | 123 | size_t datasize; |
123 | unsigned long offset; | 124 | unsigned long offset; |
124 | int use_compress = 0; | 125 | int compress_type = BTRFS_COMPRESS_NONE; |
125 | 126 | ||
126 | if (compressed_size && compressed_pages) { | 127 | if (compressed_size && compressed_pages) { |
127 | use_compress = 1; | 128 | compress_type = root->fs_info->compress_type; |
128 | cur_size = compressed_size; | 129 | cur_size = compressed_size; |
129 | } | 130 | } |
130 | 131 | ||
@@ -158,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
158 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | 159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); |
159 | ptr = btrfs_file_extent_inline_start(ei); | 160 | ptr = btrfs_file_extent_inline_start(ei); |
160 | 161 | ||
161 | if (use_compress) { | 162 | if (compress_type != BTRFS_COMPRESS_NONE) { |
162 | struct page *cpage; | 163 | struct page *cpage; |
163 | int i = 0; | 164 | int i = 0; |
164 | while (compressed_size > 0) { | 165 | while (compressed_size > 0) { |
@@ -175,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
175 | compressed_size -= cur_size; | 176 | compressed_size -= cur_size; |
176 | } | 177 | } |
177 | btrfs_set_file_extent_compression(leaf, ei, | 178 | btrfs_set_file_extent_compression(leaf, ei, |
178 | BTRFS_COMPRESS_ZLIB); | 179 | compress_type); |
179 | } else { | 180 | } else { |
180 | page = find_get_page(inode->i_mapping, | 181 | page = find_get_page(inode->i_mapping, |
181 | start >> PAGE_CACHE_SHIFT); | 182 | start >> PAGE_CACHE_SHIFT); |
@@ -251,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
251 | inline_len, compressed_size, | 252 | inline_len, compressed_size, |
252 | compressed_pages); | 253 | compressed_pages); |
253 | BUG_ON(ret); | 254 | BUG_ON(ret); |
255 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | ||
254 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 256 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
255 | return 0; | 257 | return 0; |
256 | } | 258 | } |
@@ -261,6 +263,7 @@ struct async_extent { | |||
261 | u64 compressed_size; | 263 | u64 compressed_size; |
262 | struct page **pages; | 264 | struct page **pages; |
263 | unsigned long nr_pages; | 265 | unsigned long nr_pages; |
266 | int compress_type; | ||
264 | struct list_head list; | 267 | struct list_head list; |
265 | }; | 268 | }; |
266 | 269 | ||
@@ -278,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
278 | u64 start, u64 ram_size, | 281 | u64 start, u64 ram_size, |
279 | u64 compressed_size, | 282 | u64 compressed_size, |
280 | struct page **pages, | 283 | struct page **pages, |
281 | unsigned long nr_pages) | 284 | unsigned long nr_pages, |
285 | int compress_type) | ||
282 | { | 286 | { |
283 | struct async_extent *async_extent; | 287 | struct async_extent *async_extent; |
284 | 288 | ||
@@ -288,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
288 | async_extent->compressed_size = compressed_size; | 292 | async_extent->compressed_size = compressed_size; |
289 | async_extent->pages = pages; | 293 | async_extent->pages = pages; |
290 | async_extent->nr_pages = nr_pages; | 294 | async_extent->nr_pages = nr_pages; |
295 | async_extent->compress_type = compress_type; | ||
291 | list_add_tail(&async_extent->list, &cow->extents); | 296 | list_add_tail(&async_extent->list, &cow->extents); |
292 | return 0; | 297 | return 0; |
293 | } | 298 | } |
@@ -317,8 +322,6 @@ static noinline int compress_file_range(struct inode *inode, | |||
317 | struct btrfs_root *root = BTRFS_I(inode)->root; | 322 | struct btrfs_root *root = BTRFS_I(inode)->root; |
318 | struct btrfs_trans_handle *trans; | 323 | struct btrfs_trans_handle *trans; |
319 | u64 num_bytes; | 324 | u64 num_bytes; |
320 | u64 orig_start; | ||
321 | u64 disk_num_bytes; | ||
322 | u64 blocksize = root->sectorsize; | 325 | u64 blocksize = root->sectorsize; |
323 | u64 actual_end; | 326 | u64 actual_end; |
324 | u64 isize = i_size_read(inode); | 327 | u64 isize = i_size_read(inode); |
@@ -332,8 +335,7 @@ static noinline int compress_file_range(struct inode *inode, | |||
332 | unsigned long max_uncompressed = 128 * 1024; | 335 | unsigned long max_uncompressed = 128 * 1024; |
333 | int i; | 336 | int i; |
334 | int will_compress; | 337 | int will_compress; |
335 | 338 | int compress_type = root->fs_info->compress_type; | |
336 | orig_start = start; | ||
337 | 339 | ||
338 | actual_end = min_t(u64, isize, end + 1); | 340 | actual_end = min_t(u64, isize, end + 1); |
339 | again: | 341 | again: |
@@ -369,7 +371,6 @@ again: | |||
369 | total_compressed = min(total_compressed, max_uncompressed); | 371 | total_compressed = min(total_compressed, max_uncompressed); |
370 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 372 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
371 | num_bytes = max(blocksize, num_bytes); | 373 | num_bytes = max(blocksize, num_bytes); |
372 | disk_num_bytes = num_bytes; | ||
373 | total_in = 0; | 374 | total_in = 0; |
374 | ret = 0; | 375 | ret = 0; |
375 | 376 | ||
@@ -379,16 +380,21 @@ again: | |||
379 | * change at any time if we discover bad compression ratios. | 380 | * change at any time if we discover bad compression ratios. |
380 | */ | 381 | */ |
381 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && | 382 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && |
382 | btrfs_test_opt(root, COMPRESS)) { | 383 | (btrfs_test_opt(root, COMPRESS) || |
384 | (BTRFS_I(inode)->force_compress))) { | ||
383 | WARN_ON(pages); | 385 | WARN_ON(pages); |
384 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 386 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
385 | 387 | ||
386 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | 388 | if (BTRFS_I(inode)->force_compress) |
387 | total_compressed, pages, | 389 | compress_type = BTRFS_I(inode)->force_compress; |
388 | nr_pages, &nr_pages_ret, | 390 | |
389 | &total_in, | 391 | ret = btrfs_compress_pages(compress_type, |
390 | &total_compressed, | 392 | inode->i_mapping, start, |
391 | max_compressed); | 393 | total_compressed, pages, |
394 | nr_pages, &nr_pages_ret, | ||
395 | &total_in, | ||
396 | &total_compressed, | ||
397 | max_compressed); | ||
392 | 398 | ||
393 | if (!ret) { | 399 | if (!ret) { |
394 | unsigned long offset = total_compressed & | 400 | unsigned long offset = total_compressed & |
@@ -412,6 +418,7 @@ again: | |||
412 | trans = btrfs_join_transaction(root, 1); | 418 | trans = btrfs_join_transaction(root, 1); |
413 | BUG_ON(!trans); | 419 | BUG_ON(!trans); |
414 | btrfs_set_trans_block_group(trans, inode); | 420 | btrfs_set_trans_block_group(trans, inode); |
421 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
415 | 422 | ||
416 | /* lets try to make an inline extent */ | 423 | /* lets try to make an inline extent */ |
417 | if (ret || total_in < (actual_end - start)) { | 424 | if (ret || total_in < (actual_end - start)) { |
@@ -437,7 +444,6 @@ again: | |||
437 | start, end, NULL, | 444 | start, end, NULL, |
438 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 445 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
439 | EXTENT_CLEAR_DELALLOC | | 446 | EXTENT_CLEAR_DELALLOC | |
440 | EXTENT_CLEAR_ACCOUNTING | | ||
441 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | 447 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); |
442 | 448 | ||
443 | btrfs_end_transaction(trans, root); | 449 | btrfs_end_transaction(trans, root); |
@@ -464,7 +470,6 @@ again: | |||
464 | if (total_compressed >= total_in) { | 470 | if (total_compressed >= total_in) { |
465 | will_compress = 0; | 471 | will_compress = 0; |
466 | } else { | 472 | } else { |
467 | disk_num_bytes = total_compressed; | ||
468 | num_bytes = total_in; | 473 | num_bytes = total_in; |
469 | } | 474 | } |
470 | } | 475 | } |
@@ -483,8 +488,10 @@ again: | |||
483 | nr_pages_ret = 0; | 488 | nr_pages_ret = 0; |
484 | 489 | ||
485 | /* flag the file so we don't compress in the future */ | 490 | /* flag the file so we don't compress in the future */ |
486 | if (!btrfs_test_opt(root, FORCE_COMPRESS)) | 491 | if (!btrfs_test_opt(root, FORCE_COMPRESS) && |
492 | !(BTRFS_I(inode)->force_compress)) { | ||
487 | BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; | 493 | BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; |
494 | } | ||
488 | } | 495 | } |
489 | if (will_compress) { | 496 | if (will_compress) { |
490 | *num_added += 1; | 497 | *num_added += 1; |
@@ -494,9 +501,10 @@ again: | |||
494 | * and will submit them to the elevator. | 501 | * and will submit them to the elevator. |
495 | */ | 502 | */ |
496 | add_async_extent(async_cow, start, num_bytes, | 503 | add_async_extent(async_cow, start, num_bytes, |
497 | total_compressed, pages, nr_pages_ret); | 504 | total_compressed, pages, nr_pages_ret, |
505 | compress_type); | ||
498 | 506 | ||
499 | if (start + num_bytes < end && start + num_bytes < actual_end) { | 507 | if (start + num_bytes < end) { |
500 | start += num_bytes; | 508 | start += num_bytes; |
501 | pages = NULL; | 509 | pages = NULL; |
502 | cond_resched(); | 510 | cond_resched(); |
@@ -516,7 +524,8 @@ cleanup_and_bail_uncompressed: | |||
516 | __set_page_dirty_nobuffers(locked_page); | 524 | __set_page_dirty_nobuffers(locked_page); |
517 | /* unlocked later on in the async handlers */ | 525 | /* unlocked later on in the async handlers */ |
518 | } | 526 | } |
519 | add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); | 527 | add_async_extent(async_cow, start, end - start + 1, |
528 | 0, NULL, 0, BTRFS_COMPRESS_NONE); | ||
520 | *num_added += 1; | 529 | *num_added += 1; |
521 | } | 530 | } |
522 | 531 | ||
@@ -570,8 +579,8 @@ retry: | |||
570 | unsigned long nr_written = 0; | 579 | unsigned long nr_written = 0; |
571 | 580 | ||
572 | lock_extent(io_tree, async_extent->start, | 581 | lock_extent(io_tree, async_extent->start, |
573 | async_extent->start + | 582 | async_extent->start + |
574 | async_extent->ram_size - 1, GFP_NOFS); | 583 | async_extent->ram_size - 1, GFP_NOFS); |
575 | 584 | ||
576 | /* allocate blocks */ | 585 | /* allocate blocks */ |
577 | ret = cow_file_range(inode, async_cow->locked_page, | 586 | ret = cow_file_range(inode, async_cow->locked_page, |
@@ -641,6 +650,7 @@ retry: | |||
641 | em->block_start = ins.objectid; | 650 | em->block_start = ins.objectid; |
642 | em->block_len = ins.offset; | 651 | em->block_len = ins.offset; |
643 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 652 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
653 | em->compress_type = async_extent->compress_type; | ||
644 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 654 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
645 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 655 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
646 | 656 | ||
@@ -657,11 +667,13 @@ retry: | |||
657 | async_extent->ram_size - 1, 0); | 667 | async_extent->ram_size - 1, 0); |
658 | } | 668 | } |
659 | 669 | ||
660 | ret = btrfs_add_ordered_extent(inode, async_extent->start, | 670 | ret = btrfs_add_ordered_extent_compress(inode, |
661 | ins.objectid, | 671 | async_extent->start, |
662 | async_extent->ram_size, | 672 | ins.objectid, |
663 | ins.offset, | 673 | async_extent->ram_size, |
664 | BTRFS_ORDERED_COMPRESSED); | 674 | ins.offset, |
675 | BTRFS_ORDERED_COMPRESSED, | ||
676 | async_extent->compress_type); | ||
665 | BUG_ON(ret); | 677 | BUG_ON(ret); |
666 | 678 | ||
667 | /* | 679 | /* |
@@ -693,6 +705,38 @@ retry: | |||
693 | return 0; | 705 | return 0; |
694 | } | 706 | } |
695 | 707 | ||
708 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | ||
709 | u64 num_bytes) | ||
710 | { | ||
711 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
712 | struct extent_map *em; | ||
713 | u64 alloc_hint = 0; | ||
714 | |||
715 | read_lock(&em_tree->lock); | ||
716 | em = search_extent_mapping(em_tree, start, num_bytes); | ||
717 | if (em) { | ||
718 | /* | ||
719 | * if block start isn't an actual block number then find the | ||
720 | * first block in this inode and use that as a hint. If that | ||
721 | * block is also bogus then just don't worry about it. | ||
722 | */ | ||
723 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
724 | free_extent_map(em); | ||
725 | em = search_extent_mapping(em_tree, 0, 0); | ||
726 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
727 | alloc_hint = em->block_start; | ||
728 | if (em) | ||
729 | free_extent_map(em); | ||
730 | } else { | ||
731 | alloc_hint = em->block_start; | ||
732 | free_extent_map(em); | ||
733 | } | ||
734 | } | ||
735 | read_unlock(&em_tree->lock); | ||
736 | |||
737 | return alloc_hint; | ||
738 | } | ||
739 | |||
696 | /* | 740 | /* |
697 | * when extent_io.c finds a delayed allocation range in the file, | 741 | * when extent_io.c finds a delayed allocation range in the file, |
698 | * the call backs end up in this code. The basic idea is to | 742 | * the call backs end up in this code. The basic idea is to |
@@ -720,18 +764,16 @@ static noinline int cow_file_range(struct inode *inode, | |||
720 | u64 disk_num_bytes; | 764 | u64 disk_num_bytes; |
721 | u64 cur_alloc_size; | 765 | u64 cur_alloc_size; |
722 | u64 blocksize = root->sectorsize; | 766 | u64 blocksize = root->sectorsize; |
723 | u64 actual_end; | ||
724 | u64 isize = i_size_read(inode); | ||
725 | struct btrfs_key ins; | 767 | struct btrfs_key ins; |
726 | struct extent_map *em; | 768 | struct extent_map *em; |
727 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 769 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
728 | int ret = 0; | 770 | int ret = 0; |
729 | 771 | ||
772 | BUG_ON(root == root->fs_info->tree_root); | ||
730 | trans = btrfs_join_transaction(root, 1); | 773 | trans = btrfs_join_transaction(root, 1); |
731 | BUG_ON(!trans); | 774 | BUG_ON(!trans); |
732 | btrfs_set_trans_block_group(trans, inode); | 775 | btrfs_set_trans_block_group(trans, inode); |
733 | 776 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | |
734 | actual_end = min_t(u64, isize, end + 1); | ||
735 | 777 | ||
736 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 778 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
737 | num_bytes = max(blocksize, num_bytes); | 779 | num_bytes = max(blocksize, num_bytes); |
@@ -749,7 +791,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
749 | EXTENT_CLEAR_UNLOCK_PAGE | | 791 | EXTENT_CLEAR_UNLOCK_PAGE | |
750 | EXTENT_CLEAR_UNLOCK | | 792 | EXTENT_CLEAR_UNLOCK | |
751 | EXTENT_CLEAR_DELALLOC | | 793 | EXTENT_CLEAR_DELALLOC | |
752 | EXTENT_CLEAR_ACCOUNTING | | ||
753 | EXTENT_CLEAR_DIRTY | | 794 | EXTENT_CLEAR_DIRTY | |
754 | EXTENT_SET_WRITEBACK | | 795 | EXTENT_SET_WRITEBACK | |
755 | EXTENT_END_WRITEBACK); | 796 | EXTENT_END_WRITEBACK); |
@@ -765,35 +806,13 @@ static noinline int cow_file_range(struct inode *inode, | |||
765 | BUG_ON(disk_num_bytes > | 806 | BUG_ON(disk_num_bytes > |
766 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 807 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
767 | 808 | ||
768 | 809 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | |
769 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
770 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
771 | start, num_bytes); | ||
772 | if (em) { | ||
773 | /* | ||
774 | * if block start isn't an actual block number then find the | ||
775 | * first block in this inode and use that as a hint. If that | ||
776 | * block is also bogus then just don't worry about it. | ||
777 | */ | ||
778 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
779 | free_extent_map(em); | ||
780 | em = search_extent_mapping(em_tree, 0, 0); | ||
781 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
782 | alloc_hint = em->block_start; | ||
783 | if (em) | ||
784 | free_extent_map(em); | ||
785 | } else { | ||
786 | alloc_hint = em->block_start; | ||
787 | free_extent_map(em); | ||
788 | } | ||
789 | } | ||
790 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
791 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 810 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
792 | 811 | ||
793 | while (disk_num_bytes > 0) { | 812 | while (disk_num_bytes > 0) { |
794 | unsigned long op; | 813 | unsigned long op; |
795 | 814 | ||
796 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); | 815 | cur_alloc_size = disk_num_bytes; |
797 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | 816 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, |
798 | root->sectorsize, 0, alloc_hint, | 817 | root->sectorsize, 0, alloc_hint, |
799 | (u64)-1, &ins, 1); | 818 | (u64)-1, &ins, 1); |
@@ -1020,10 +1039,16 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1020 | int type; | 1039 | int type; |
1021 | int nocow; | 1040 | int nocow; |
1022 | int check_prev = 1; | 1041 | int check_prev = 1; |
1042 | bool nolock = false; | ||
1023 | 1043 | ||
1024 | path = btrfs_alloc_path(); | 1044 | path = btrfs_alloc_path(); |
1025 | BUG_ON(!path); | 1045 | BUG_ON(!path); |
1026 | trans = btrfs_join_transaction(root, 1); | 1046 | if (root == root->fs_info->tree_root) { |
1047 | nolock = true; | ||
1048 | trans = btrfs_join_transaction_nolock(root, 1); | ||
1049 | } else { | ||
1050 | trans = btrfs_join_transaction(root, 1); | ||
1051 | } | ||
1027 | BUG_ON(!trans); | 1052 | BUG_ON(!trans); |
1028 | 1053 | ||
1029 | cow_start = (u64)-1; | 1054 | cow_start = (u64)-1; |
@@ -1170,6 +1195,13 @@ out_check: | |||
1170 | num_bytes, num_bytes, type); | 1195 | num_bytes, num_bytes, type); |
1171 | BUG_ON(ret); | 1196 | BUG_ON(ret); |
1172 | 1197 | ||
1198 | if (root->root_key.objectid == | ||
1199 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
1200 | ret = btrfs_reloc_clone_csums(inode, cur_offset, | ||
1201 | num_bytes); | ||
1202 | BUG_ON(ret); | ||
1203 | } | ||
1204 | |||
1173 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1205 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
1174 | cur_offset, cur_offset + num_bytes - 1, | 1206 | cur_offset, cur_offset + num_bytes - 1, |
1175 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | | 1207 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
@@ -1189,8 +1221,13 @@ out_check: | |||
1189 | BUG_ON(ret); | 1221 | BUG_ON(ret); |
1190 | } | 1222 | } |
1191 | 1223 | ||
1192 | ret = btrfs_end_transaction(trans, root); | 1224 | if (nolock) { |
1193 | BUG_ON(ret); | 1225 | ret = btrfs_end_transaction_nolock(trans, root); |
1226 | BUG_ON(ret); | ||
1227 | } else { | ||
1228 | ret = btrfs_end_transaction(trans, root); | ||
1229 | BUG_ON(ret); | ||
1230 | } | ||
1194 | btrfs_free_path(path); | 1231 | btrfs_free_path(path); |
1195 | return 0; | 1232 | return 0; |
1196 | } | 1233 | } |
@@ -1211,7 +1248,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1211 | else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) | 1248 | else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) |
1212 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1249 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1213 | page_started, 0, nr_written); | 1250 | page_started, 0, nr_written); |
1214 | else if (!btrfs_test_opt(root, COMPRESS)) | 1251 | else if (!btrfs_test_opt(root, COMPRESS) && |
1252 | !(BTRFS_I(inode)->force_compress)) | ||
1215 | ret = cow_file_range(inode, locked_page, start, end, | 1253 | ret = cow_file_range(inode, locked_page, start, end, |
1216 | page_started, nr_written, 1); | 1254 | page_started, nr_written, 1); |
1217 | else | 1255 | else |
@@ -1221,36 +1259,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1221 | } | 1259 | } |
1222 | 1260 | ||
1223 | static int btrfs_split_extent_hook(struct inode *inode, | 1261 | static int btrfs_split_extent_hook(struct inode *inode, |
1224 | struct extent_state *orig, u64 split) | 1262 | struct extent_state *orig, u64 split) |
1225 | { | 1263 | { |
1226 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1264 | /* not delalloc, ignore it */ |
1227 | u64 size; | ||
1228 | |||
1229 | if (!(orig->state & EXTENT_DELALLOC)) | 1265 | if (!(orig->state & EXTENT_DELALLOC)) |
1230 | return 0; | 1266 | return 0; |
1231 | 1267 | ||
1232 | size = orig->end - orig->start + 1; | 1268 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
1233 | if (size > root->fs_info->max_extent) { | ||
1234 | u64 num_extents; | ||
1235 | u64 new_size; | ||
1236 | |||
1237 | new_size = orig->end - split + 1; | ||
1238 | num_extents = div64_u64(size + root->fs_info->max_extent - 1, | ||
1239 | root->fs_info->max_extent); | ||
1240 | |||
1241 | /* | ||
1242 | * if we break a large extent up then leave oustanding_extents | ||
1243 | * be, since we've already accounted for the large extent. | ||
1244 | */ | ||
1245 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
1246 | root->fs_info->max_extent) < num_extents) | ||
1247 | return 0; | ||
1248 | } | ||
1249 | |||
1250 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1251 | BTRFS_I(inode)->outstanding_extents++; | ||
1252 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1253 | |||
1254 | return 0; | 1269 | return 0; |
1255 | } | 1270 | } |
1256 | 1271 | ||
@@ -1264,42 +1279,11 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1264 | struct extent_state *new, | 1279 | struct extent_state *new, |
1265 | struct extent_state *other) | 1280 | struct extent_state *other) |
1266 | { | 1281 | { |
1267 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1268 | u64 new_size, old_size; | ||
1269 | u64 num_extents; | ||
1270 | |||
1271 | /* not delalloc, ignore it */ | 1282 | /* not delalloc, ignore it */ |
1272 | if (!(other->state & EXTENT_DELALLOC)) | 1283 | if (!(other->state & EXTENT_DELALLOC)) |
1273 | return 0; | 1284 | return 0; |
1274 | 1285 | ||
1275 | old_size = other->end - other->start + 1; | 1286 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
1276 | if (new->start < other->start) | ||
1277 | new_size = other->end - new->start + 1; | ||
1278 | else | ||
1279 | new_size = new->end - other->start + 1; | ||
1280 | |||
1281 | /* we're not bigger than the max, unreserve the space and go */ | ||
1282 | if (new_size <= root->fs_info->max_extent) { | ||
1283 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1284 | BTRFS_I(inode)->outstanding_extents--; | ||
1285 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1286 | return 0; | ||
1287 | } | ||
1288 | |||
1289 | /* | ||
1290 | * If we grew by another max_extent, just return, we want to keep that | ||
1291 | * reserved amount. | ||
1292 | */ | ||
1293 | num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, | ||
1294 | root->fs_info->max_extent); | ||
1295 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
1296 | root->fs_info->max_extent) > num_extents) | ||
1297 | return 0; | ||
1298 | |||
1299 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1300 | BTRFS_I(inode)->outstanding_extents--; | ||
1301 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1302 | |||
1303 | return 0; | 1287 | return 0; |
1304 | } | 1288 | } |
1305 | 1289 | ||
@@ -1308,8 +1292,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1308 | * bytes in this file, and to maintain the list of inodes that | 1292 | * bytes in this file, and to maintain the list of inodes that |
1309 | * have pending delalloc work to be done. | 1293 | * have pending delalloc work to be done. |
1310 | */ | 1294 | */ |
1311 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1295 | static int btrfs_set_bit_hook(struct inode *inode, |
1312 | unsigned long old, unsigned long bits) | 1296 | struct extent_state *state, int *bits) |
1313 | { | 1297 | { |
1314 | 1298 | ||
1315 | /* | 1299 | /* |
@@ -1317,17 +1301,21 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1317 | * but in this case, we are only testeing for the DELALLOC | 1301 | * but in this case, we are only testeing for the DELALLOC |
1318 | * bit, which is only set or cleared with irqs on | 1302 | * bit, which is only set or cleared with irqs on |
1319 | */ | 1303 | */ |
1320 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1304 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1321 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1305 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1306 | u64 len = state->end + 1 - state->start; | ||
1307 | int do_list = (root->root_key.objectid != | ||
1308 | BTRFS_ROOT_TREE_OBJECTID); | ||
1309 | |||
1310 | if (*bits & EXTENT_FIRST_DELALLOC) | ||
1311 | *bits &= ~EXTENT_FIRST_DELALLOC; | ||
1312 | else | ||
1313 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
1322 | 1314 | ||
1323 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1324 | BTRFS_I(inode)->outstanding_extents++; | ||
1325 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1326 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | ||
1327 | spin_lock(&root->fs_info->delalloc_lock); | 1315 | spin_lock(&root->fs_info->delalloc_lock); |
1328 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1316 | BTRFS_I(inode)->delalloc_bytes += len; |
1329 | root->fs_info->delalloc_bytes += end - start + 1; | 1317 | root->fs_info->delalloc_bytes += len; |
1330 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1318 | if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1331 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | 1319 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, |
1332 | &root->fs_info->delalloc_inodes); | 1320 | &root->fs_info->delalloc_inodes); |
1333 | } | 1321 | } |
@@ -1340,45 +1328,36 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1340 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1328 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1341 | */ | 1329 | */ |
1342 | static int btrfs_clear_bit_hook(struct inode *inode, | 1330 | static int btrfs_clear_bit_hook(struct inode *inode, |
1343 | struct extent_state *state, unsigned long bits) | 1331 | struct extent_state *state, int *bits) |
1344 | { | 1332 | { |
1345 | /* | 1333 | /* |
1346 | * set_bit and clear bit hooks normally require _irqsave/restore | 1334 | * set_bit and clear bit hooks normally require _irqsave/restore |
1347 | * but in this case, we are only testeing for the DELALLOC | 1335 | * but in this case, we are only testeing for the DELALLOC |
1348 | * bit, which is only set or cleared with irqs on | 1336 | * bit, which is only set or cleared with irqs on |
1349 | */ | 1337 | */ |
1350 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1338 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1351 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1339 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1340 | u64 len = state->end + 1 - state->start; | ||
1341 | int do_list = (root->root_key.objectid != | ||
1342 | BTRFS_ROOT_TREE_OBJECTID); | ||
1352 | 1343 | ||
1353 | if (bits & EXTENT_DO_ACCOUNTING) { | 1344 | if (*bits & EXTENT_FIRST_DELALLOC) |
1354 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1345 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1355 | BTRFS_I(inode)->outstanding_extents--; | 1346 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) |
1356 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1347 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
1357 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | 1348 | |
1358 | } | 1349 | if (*bits & EXTENT_DO_ACCOUNTING) |
1350 | btrfs_delalloc_release_metadata(inode, len); | ||
1351 | |||
1352 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID | ||
1353 | && do_list) | ||
1354 | btrfs_free_reserved_data_space(inode, len); | ||
1359 | 1355 | ||
1360 | spin_lock(&root->fs_info->delalloc_lock); | 1356 | spin_lock(&root->fs_info->delalloc_lock); |
1361 | if (state->end - state->start + 1 > | 1357 | root->fs_info->delalloc_bytes -= len; |
1362 | root->fs_info->delalloc_bytes) { | 1358 | BTRFS_I(inode)->delalloc_bytes -= len; |
1363 | printk(KERN_INFO "btrfs warning: delalloc account " | 1359 | |
1364 | "%llu %llu\n", | 1360 | if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && |
1365 | (unsigned long long) | ||
1366 | state->end - state->start + 1, | ||
1367 | (unsigned long long) | ||
1368 | root->fs_info->delalloc_bytes); | ||
1369 | btrfs_delalloc_free_space(root, inode, (u64)-1); | ||
1370 | root->fs_info->delalloc_bytes = 0; | ||
1371 | BTRFS_I(inode)->delalloc_bytes = 0; | ||
1372 | } else { | ||
1373 | btrfs_delalloc_free_space(root, inode, | ||
1374 | state->end - | ||
1375 | state->start + 1); | ||
1376 | root->fs_info->delalloc_bytes -= state->end - | ||
1377 | state->start + 1; | ||
1378 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
1379 | state->start + 1; | ||
1380 | } | ||
1381 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | ||
1382 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1361 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1383 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | 1362 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); |
1384 | } | 1363 | } |
@@ -1413,7 +1392,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
1413 | 1392 | ||
1414 | if (map_length < length + size) | 1393 | if (map_length < length + size) |
1415 | return 1; | 1394 | return 1; |
1416 | return 0; | 1395 | return ret; |
1417 | } | 1396 | } |
1418 | 1397 | ||
1419 | /* | 1398 | /* |
@@ -1426,7 +1405,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
1426 | */ | 1405 | */ |
1427 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, | 1406 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, |
1428 | struct bio *bio, int mirror_num, | 1407 | struct bio *bio, int mirror_num, |
1429 | unsigned long bio_flags) | 1408 | unsigned long bio_flags, |
1409 | u64 bio_offset) | ||
1430 | { | 1410 | { |
1431 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1411 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1432 | int ret = 0; | 1412 | int ret = 0; |
@@ -1445,7 +1425,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, | |||
1445 | * are inserted into the btree | 1425 | * are inserted into the btree |
1446 | */ | 1426 | */ |
1447 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 1427 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
1448 | int mirror_num, unsigned long bio_flags) | 1428 | int mirror_num, unsigned long bio_flags, |
1429 | u64 bio_offset) | ||
1449 | { | 1430 | { |
1450 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1431 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1451 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 1432 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); |
@@ -1456,7 +1437,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
1456 | * on write, or reading the csums from the tree before a read | 1437 | * on write, or reading the csums from the tree before a read |
1457 | */ | 1438 | */ |
1458 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 1439 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
1459 | int mirror_num, unsigned long bio_flags) | 1440 | int mirror_num, unsigned long bio_flags, |
1441 | u64 bio_offset) | ||
1460 | { | 1442 | { |
1461 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1443 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1462 | int ret = 0; | 1444 | int ret = 0; |
@@ -1464,10 +1446,13 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1464 | 1446 | ||
1465 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1447 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
1466 | 1448 | ||
1467 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1449 | if (root == root->fs_info->tree_root) |
1450 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); | ||
1451 | else | ||
1452 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
1468 | BUG_ON(ret); | 1453 | BUG_ON(ret); |
1469 | 1454 | ||
1470 | if (!(rw & (1 << BIO_RW))) { | 1455 | if (!(rw & REQ_WRITE)) { |
1471 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1456 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
1472 | return btrfs_submit_compressed_read(inode, bio, | 1457 | return btrfs_submit_compressed_read(inode, bio, |
1473 | mirror_num, bio_flags); | 1458 | mirror_num, bio_flags); |
@@ -1481,7 +1466,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1481 | /* we're doing a write, do the async checksumming */ | 1466 | /* we're doing a write, do the async checksumming */ |
1482 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1467 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
1483 | inode, rw, bio, mirror_num, | 1468 | inode, rw, bio, mirror_num, |
1484 | bio_flags, __btrfs_submit_bio_start, | 1469 | bio_flags, bio_offset, |
1470 | __btrfs_submit_bio_start, | ||
1485 | __btrfs_submit_bio_done); | 1471 | __btrfs_submit_bio_done); |
1486 | } | 1472 | } |
1487 | 1473 | ||
@@ -1508,12 +1494,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | |||
1508 | return 0; | 1494 | return 0; |
1509 | } | 1495 | } |
1510 | 1496 | ||
1511 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) | 1497 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
1498 | struct extent_state **cached_state) | ||
1512 | { | 1499 | { |
1513 | if ((end & (PAGE_CACHE_SIZE - 1)) == 0) | 1500 | if ((end & (PAGE_CACHE_SIZE - 1)) == 0) |
1514 | WARN_ON(1); | 1501 | WARN_ON(1); |
1515 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, | 1502 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, |
1516 | GFP_NOFS); | 1503 | cached_state, GFP_NOFS); |
1517 | } | 1504 | } |
1518 | 1505 | ||
1519 | /* see btrfs_writepage_start_hook for details on why this is required */ | 1506 | /* see btrfs_writepage_start_hook for details on why this is required */ |
@@ -1526,6 +1513,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) | |||
1526 | { | 1513 | { |
1527 | struct btrfs_writepage_fixup *fixup; | 1514 | struct btrfs_writepage_fixup *fixup; |
1528 | struct btrfs_ordered_extent *ordered; | 1515 | struct btrfs_ordered_extent *ordered; |
1516 | struct extent_state *cached_state = NULL; | ||
1529 | struct page *page; | 1517 | struct page *page; |
1530 | struct inode *inode; | 1518 | struct inode *inode; |
1531 | u64 page_start; | 1519 | u64 page_start; |
@@ -1544,7 +1532,8 @@ again: | |||
1544 | page_start = page_offset(page); | 1532 | page_start = page_offset(page); |
1545 | page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; | 1533 | page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; |
1546 | 1534 | ||
1547 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | 1535 | lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0, |
1536 | &cached_state, GFP_NOFS); | ||
1548 | 1537 | ||
1549 | /* already ordered? We're done */ | 1538 | /* already ordered? We're done */ |
1550 | if (PagePrivate2(page)) | 1539 | if (PagePrivate2(page)) |
@@ -1552,17 +1541,19 @@ again: | |||
1552 | 1541 | ||
1553 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 1542 | ordered = btrfs_lookup_ordered_extent(inode, page_start); |
1554 | if (ordered) { | 1543 | if (ordered) { |
1555 | unlock_extent(&BTRFS_I(inode)->io_tree, page_start, | 1544 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, |
1556 | page_end, GFP_NOFS); | 1545 | page_end, &cached_state, GFP_NOFS); |
1557 | unlock_page(page); | 1546 | unlock_page(page); |
1558 | btrfs_start_ordered_extent(inode, ordered, 1); | 1547 | btrfs_start_ordered_extent(inode, ordered, 1); |
1559 | goto again; | 1548 | goto again; |
1560 | } | 1549 | } |
1561 | 1550 | ||
1562 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 1551 | BUG(); |
1552 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); | ||
1563 | ClearPageChecked(page); | 1553 | ClearPageChecked(page); |
1564 | out: | 1554 | out: |
1565 | unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | 1555 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, |
1556 | &cached_state, GFP_NOFS); | ||
1566 | out_page: | 1557 | out_page: |
1567 | unlock_page(page); | 1558 | unlock_page(page); |
1568 | page_cache_release(page); | 1559 | page_cache_release(page); |
@@ -1681,24 +1672,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1681 | * before we start the transaction. It limits the amount of btree | 1672 | * before we start the transaction. It limits the amount of btree |
1682 | * reads required while inside the transaction. | 1673 | * reads required while inside the transaction. |
1683 | */ | 1674 | */ |
1684 | static noinline void reada_csum(struct btrfs_root *root, | ||
1685 | struct btrfs_path *path, | ||
1686 | struct btrfs_ordered_extent *ordered_extent) | ||
1687 | { | ||
1688 | struct btrfs_ordered_sum *sum; | ||
1689 | u64 bytenr; | ||
1690 | |||
1691 | sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, | ||
1692 | list); | ||
1693 | bytenr = sum->sums[0].bytenr; | ||
1694 | |||
1695 | /* | ||
1696 | * we don't care about the results, the point of this search is | ||
1697 | * just to get the btree leaves into ram | ||
1698 | */ | ||
1699 | btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); | ||
1700 | } | ||
1701 | |||
1702 | /* as ordered data IO finishes, this gets called so we can finish | 1675 | /* as ordered data IO finishes, this gets called so we can finish |
1703 | * an ordered extent if the range of bytes in the file it covers are | 1676 | * an ordered extent if the range of bytes in the file it covers are |
1704 | * fully written. | 1677 | * fully written. |
@@ -1706,96 +1679,94 @@ static noinline void reada_csum(struct btrfs_root *root, | |||
1706 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1679 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) |
1707 | { | 1680 | { |
1708 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1681 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1709 | struct btrfs_trans_handle *trans; | 1682 | struct btrfs_trans_handle *trans = NULL; |
1710 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1683 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1711 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1684 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1712 | struct btrfs_path *path; | 1685 | struct extent_state *cached_state = NULL; |
1713 | int compressed = 0; | 1686 | int compress_type = 0; |
1714 | int ret; | 1687 | int ret; |
1688 | bool nolock = false; | ||
1715 | 1689 | ||
1716 | ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); | 1690 | ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, |
1691 | end - start + 1); | ||
1717 | if (!ret) | 1692 | if (!ret) |
1718 | return 0; | 1693 | return 0; |
1694 | BUG_ON(!ordered_extent); | ||
1719 | 1695 | ||
1720 | /* | 1696 | nolock = (root == root->fs_info->tree_root); |
1721 | * before we join the transaction, try to do some of our IO. | ||
1722 | * This will limit the amount of IO that we have to do with | ||
1723 | * the transaction running. We're unlikely to need to do any | ||
1724 | * IO if the file extents are new, the disk_i_size checks | ||
1725 | * covers the most common case. | ||
1726 | */ | ||
1727 | if (start < BTRFS_I(inode)->disk_i_size) { | ||
1728 | path = btrfs_alloc_path(); | ||
1729 | if (path) { | ||
1730 | ret = btrfs_lookup_file_extent(NULL, root, path, | ||
1731 | inode->i_ino, | ||
1732 | start, 0); | ||
1733 | ordered_extent = btrfs_lookup_ordered_extent(inode, | ||
1734 | start); | ||
1735 | if (!list_empty(&ordered_extent->list)) { | ||
1736 | btrfs_release_path(root, path); | ||
1737 | reada_csum(root, path, ordered_extent); | ||
1738 | } | ||
1739 | btrfs_free_path(path); | ||
1740 | } | ||
1741 | } | ||
1742 | 1697 | ||
1743 | if (!ordered_extent) | ||
1744 | ordered_extent = btrfs_lookup_ordered_extent(inode, start); | ||
1745 | BUG_ON(!ordered_extent); | ||
1746 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1698 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
1747 | BUG_ON(!list_empty(&ordered_extent->list)); | 1699 | BUG_ON(!list_empty(&ordered_extent->list)); |
1748 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1700 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1749 | if (!ret) { | 1701 | if (!ret) { |
1750 | trans = btrfs_join_transaction(root, 1); | 1702 | if (nolock) |
1703 | trans = btrfs_join_transaction_nolock(root, 1); | ||
1704 | else | ||
1705 | trans = btrfs_join_transaction(root, 1); | ||
1706 | BUG_ON(!trans); | ||
1707 | btrfs_set_trans_block_group(trans, inode); | ||
1708 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1751 | ret = btrfs_update_inode(trans, root, inode); | 1709 | ret = btrfs_update_inode(trans, root, inode); |
1752 | BUG_ON(ret); | 1710 | BUG_ON(ret); |
1753 | btrfs_end_transaction(trans, root); | ||
1754 | } | 1711 | } |
1755 | goto out; | 1712 | goto out; |
1756 | } | 1713 | } |
1757 | 1714 | ||
1758 | lock_extent(io_tree, ordered_extent->file_offset, | 1715 | lock_extent_bits(io_tree, ordered_extent->file_offset, |
1759 | ordered_extent->file_offset + ordered_extent->len - 1, | 1716 | ordered_extent->file_offset + ordered_extent->len - 1, |
1760 | GFP_NOFS); | 1717 | 0, &cached_state, GFP_NOFS); |
1761 | 1718 | ||
1762 | trans = btrfs_join_transaction(root, 1); | 1719 | if (nolock) |
1720 | trans = btrfs_join_transaction_nolock(root, 1); | ||
1721 | else | ||
1722 | trans = btrfs_join_transaction(root, 1); | ||
1723 | btrfs_set_trans_block_group(trans, inode); | ||
1724 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1763 | 1725 | ||
1764 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1726 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1765 | compressed = 1; | 1727 | compress_type = ordered_extent->compress_type; |
1766 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1728 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1767 | BUG_ON(compressed); | 1729 | BUG_ON(compress_type); |
1768 | ret = btrfs_mark_extent_written(trans, inode, | 1730 | ret = btrfs_mark_extent_written(trans, inode, |
1769 | ordered_extent->file_offset, | 1731 | ordered_extent->file_offset, |
1770 | ordered_extent->file_offset + | 1732 | ordered_extent->file_offset + |
1771 | ordered_extent->len); | 1733 | ordered_extent->len); |
1772 | BUG_ON(ret); | 1734 | BUG_ON(ret); |
1773 | } else { | 1735 | } else { |
1736 | BUG_ON(root == root->fs_info->tree_root); | ||
1774 | ret = insert_reserved_file_extent(trans, inode, | 1737 | ret = insert_reserved_file_extent(trans, inode, |
1775 | ordered_extent->file_offset, | 1738 | ordered_extent->file_offset, |
1776 | ordered_extent->start, | 1739 | ordered_extent->start, |
1777 | ordered_extent->disk_len, | 1740 | ordered_extent->disk_len, |
1778 | ordered_extent->len, | 1741 | ordered_extent->len, |
1779 | ordered_extent->len, | 1742 | ordered_extent->len, |
1780 | compressed, 0, 0, | 1743 | compress_type, 0, 0, |
1781 | BTRFS_FILE_EXTENT_REG); | 1744 | BTRFS_FILE_EXTENT_REG); |
1782 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | 1745 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
1783 | ordered_extent->file_offset, | 1746 | ordered_extent->file_offset, |
1784 | ordered_extent->len); | 1747 | ordered_extent->len); |
1785 | BUG_ON(ret); | 1748 | BUG_ON(ret); |
1786 | } | 1749 | } |
1787 | unlock_extent(io_tree, ordered_extent->file_offset, | 1750 | unlock_extent_cached(io_tree, ordered_extent->file_offset, |
1788 | ordered_extent->file_offset + ordered_extent->len - 1, | 1751 | ordered_extent->file_offset + |
1789 | GFP_NOFS); | 1752 | ordered_extent->len - 1, &cached_state, GFP_NOFS); |
1753 | |||
1790 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1754 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
1791 | &ordered_extent->list); | 1755 | &ordered_extent->list); |
1792 | 1756 | ||
1793 | /* this also removes the ordered extent from the tree */ | ||
1794 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1757 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1795 | ret = btrfs_update_inode(trans, root, inode); | 1758 | ret = btrfs_update_inode(trans, root, inode); |
1796 | BUG_ON(ret); | 1759 | BUG_ON(ret); |
1797 | btrfs_end_transaction(trans, root); | ||
1798 | out: | 1760 | out: |
1761 | if (nolock) { | ||
1762 | if (trans) | ||
1763 | btrfs_end_transaction_nolock(trans, root); | ||
1764 | } else { | ||
1765 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | ||
1766 | if (trans) | ||
1767 | btrfs_end_transaction(trans, root); | ||
1768 | } | ||
1769 | |||
1799 | /* once for us */ | 1770 | /* once for us */ |
1800 | btrfs_put_ordered_extent(ordered_extent); | 1771 | btrfs_put_ordered_extent(ordered_extent); |
1801 | /* once for the tree */ | 1772 | /* once for the tree */ |
@@ -1871,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1871 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 1842 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
1872 | logical = em->block_start; | 1843 | logical = em->block_start; |
1873 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | 1844 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
1845 | extent_set_compress_type(&failrec->bio_flags, | ||
1846 | em->compress_type); | ||
1874 | } | 1847 | } |
1875 | failrec->logical = logical; | 1848 | failrec->logical = logical; |
1876 | free_extent_map(em); | 1849 | free_extent_map(em); |
@@ -1910,14 +1883,14 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1910 | bio->bi_size = 0; | 1883 | bio->bi_size = 0; |
1911 | 1884 | ||
1912 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | 1885 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); |
1913 | if (failed_bio->bi_rw & (1 << BIO_RW)) | 1886 | if (failed_bio->bi_rw & REQ_WRITE) |
1914 | rw = WRITE; | 1887 | rw = WRITE; |
1915 | else | 1888 | else |
1916 | rw = READ; | 1889 | rw = READ; |
1917 | 1890 | ||
1918 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1891 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
1919 | failrec->last_mirror, | 1892 | failrec->last_mirror, |
1920 | failrec->bio_flags); | 1893 | failrec->bio_flags, 0); |
1921 | return 0; | 1894 | return 0; |
1922 | } | 1895 | } |
1923 | 1896 | ||
@@ -2072,32 +2045,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
2072 | } | 2045 | } |
2073 | 2046 | ||
2074 | /* | 2047 | /* |
2048 | * calculate extra metadata reservation when snapshotting a subvolume | ||
2049 | * contains orphan files. | ||
2050 | */ | ||
2051 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2052 | struct btrfs_pending_snapshot *pending, | ||
2053 | u64 *bytes_to_reserve) | ||
2054 | { | ||
2055 | struct btrfs_root *root; | ||
2056 | struct btrfs_block_rsv *block_rsv; | ||
2057 | u64 num_bytes; | ||
2058 | int index; | ||
2059 | |||
2060 | root = pending->root; | ||
2061 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2062 | return; | ||
2063 | |||
2064 | block_rsv = root->orphan_block_rsv; | ||
2065 | |||
2066 | /* orphan block reservation for the snapshot */ | ||
2067 | num_bytes = block_rsv->size; | ||
2068 | |||
2069 | /* | ||
2070 | * after the snapshot is created, COWing tree blocks may use more | ||
2071 | * space than it frees. So we should make sure there is enough | ||
2072 | * reserved space. | ||
2073 | */ | ||
2074 | index = trans->transid & 0x1; | ||
2075 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2076 | num_bytes += block_rsv->size - | ||
2077 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2078 | } | ||
2079 | |||
2080 | *bytes_to_reserve += num_bytes; | ||
2081 | } | ||
2082 | |||
2083 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2084 | struct btrfs_pending_snapshot *pending) | ||
2085 | { | ||
2086 | struct btrfs_root *root = pending->root; | ||
2087 | struct btrfs_root *snap = pending->snap; | ||
2088 | struct btrfs_block_rsv *block_rsv; | ||
2089 | u64 num_bytes; | ||
2090 | int index; | ||
2091 | int ret; | ||
2092 | |||
2093 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2094 | return; | ||
2095 | |||
2096 | /* refill source subvolume's orphan block reservation */ | ||
2097 | block_rsv = root->orphan_block_rsv; | ||
2098 | index = trans->transid & 0x1; | ||
2099 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2100 | num_bytes = block_rsv->size - | ||
2101 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2102 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2103 | root->orphan_block_rsv, | ||
2104 | num_bytes); | ||
2105 | BUG_ON(ret); | ||
2106 | } | ||
2107 | |||
2108 | /* setup orphan block reservation for the snapshot */ | ||
2109 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
2110 | BUG_ON(!block_rsv); | ||
2111 | |||
2112 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2113 | snap->orphan_block_rsv = block_rsv; | ||
2114 | |||
2115 | num_bytes = root->orphan_block_rsv->size; | ||
2116 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2117 | block_rsv, num_bytes); | ||
2118 | BUG_ON(ret); | ||
2119 | |||
2120 | #if 0 | ||
2121 | /* insert orphan item for the snapshot */ | ||
2122 | WARN_ON(!root->orphan_item_inserted); | ||
2123 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2124 | snap->root_key.objectid); | ||
2125 | BUG_ON(ret); | ||
2126 | snap->orphan_item_inserted = 1; | ||
2127 | #endif | ||
2128 | } | ||
2129 | |||
2130 | enum btrfs_orphan_cleanup_state { | ||
2131 | ORPHAN_CLEANUP_STARTED = 1, | ||
2132 | ORPHAN_CLEANUP_DONE = 2, | ||
2133 | }; | ||
2134 | |||
2135 | /* | ||
2136 | * This is called in transaction commmit time. If there are no orphan | ||
2137 | * files in the subvolume, it removes orphan item and frees block_rsv | ||
2138 | * structure. | ||
2139 | */ | ||
2140 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
2141 | struct btrfs_root *root) | ||
2142 | { | ||
2143 | int ret; | ||
2144 | |||
2145 | if (!list_empty(&root->orphan_list) || | ||
2146 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | ||
2147 | return; | ||
2148 | |||
2149 | if (root->orphan_item_inserted && | ||
2150 | btrfs_root_refs(&root->root_item) > 0) { | ||
2151 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | ||
2152 | root->root_key.objectid); | ||
2153 | BUG_ON(ret); | ||
2154 | root->orphan_item_inserted = 0; | ||
2155 | } | ||
2156 | |||
2157 | if (root->orphan_block_rsv) { | ||
2158 | WARN_ON(root->orphan_block_rsv->size > 0); | ||
2159 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | ||
2160 | root->orphan_block_rsv = NULL; | ||
2161 | } | ||
2162 | } | ||
2163 | |||
2164 | /* | ||
2075 | * This creates an orphan entry for the given inode in case something goes | 2165 | * This creates an orphan entry for the given inode in case something goes |
2076 | * wrong in the middle of an unlink/truncate. | 2166 | * wrong in the middle of an unlink/truncate. |
2167 | * | ||
2168 | * NOTE: caller of this function should reserve 5 units of metadata for | ||
2169 | * this function. | ||
2077 | */ | 2170 | */ |
2078 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | 2171 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) |
2079 | { | 2172 | { |
2080 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2173 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2081 | int ret = 0; | 2174 | struct btrfs_block_rsv *block_rsv = NULL; |
2175 | int reserve = 0; | ||
2176 | int insert = 0; | ||
2177 | int ret; | ||
2082 | 2178 | ||
2083 | spin_lock(&root->list_lock); | 2179 | if (!root->orphan_block_rsv) { |
2180 | block_rsv = btrfs_alloc_block_rsv(root); | ||
2181 | BUG_ON(!block_rsv); | ||
2182 | } | ||
2084 | 2183 | ||
2085 | /* already on the orphan list, we're good */ | 2184 | spin_lock(&root->orphan_lock); |
2086 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2185 | if (!root->orphan_block_rsv) { |
2087 | spin_unlock(&root->list_lock); | 2186 | root->orphan_block_rsv = block_rsv; |
2088 | return 0; | 2187 | } else if (block_rsv) { |
2188 | btrfs_free_block_rsv(root, block_rsv); | ||
2189 | block_rsv = NULL; | ||
2190 | } | ||
2191 | |||
2192 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | ||
2193 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
2194 | #if 0 | ||
2195 | /* | ||
2196 | * For proper ENOSPC handling, we should do orphan | ||
2197 | * cleanup when mounting. But this introduces backward | ||
2198 | * compatibility issue. | ||
2199 | */ | ||
2200 | if (!xchg(&root->orphan_item_inserted, 1)) | ||
2201 | insert = 2; | ||
2202 | else | ||
2203 | insert = 1; | ||
2204 | #endif | ||
2205 | insert = 1; | ||
2206 | } else { | ||
2207 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
2089 | } | 2208 | } |
2090 | 2209 | ||
2091 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2210 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
2211 | BTRFS_I(inode)->orphan_meta_reserved = 1; | ||
2212 | reserve = 1; | ||
2213 | } | ||
2214 | spin_unlock(&root->orphan_lock); | ||
2092 | 2215 | ||
2093 | spin_unlock(&root->list_lock); | 2216 | if (block_rsv) |
2217 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2094 | 2218 | ||
2095 | /* | 2219 | /* grab metadata reservation from transaction handle */ |
2096 | * insert an orphan item to track this unlinked/truncated file | 2220 | if (reserve) { |
2097 | */ | 2221 | ret = btrfs_orphan_reserve_metadata(trans, inode); |
2098 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | 2222 | BUG_ON(ret); |
2223 | } | ||
2099 | 2224 | ||
2100 | return ret; | 2225 | /* insert an orphan item to track this unlinked/truncated file */ |
2226 | if (insert >= 1) { | ||
2227 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | ||
2228 | BUG_ON(ret); | ||
2229 | } | ||
2230 | |||
2231 | /* insert an orphan item to track subvolume contains orphan files */ | ||
2232 | if (insert >= 2) { | ||
2233 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2234 | root->root_key.objectid); | ||
2235 | BUG_ON(ret); | ||
2236 | } | ||
2237 | return 0; | ||
2101 | } | 2238 | } |
2102 | 2239 | ||
2103 | /* | 2240 | /* |
@@ -2107,26 +2244,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2107 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | 2244 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) |
2108 | { | 2245 | { |
2109 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2246 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2247 | int delete_item = 0; | ||
2248 | int release_rsv = 0; | ||
2110 | int ret = 0; | 2249 | int ret = 0; |
2111 | 2250 | ||
2112 | spin_lock(&root->list_lock); | 2251 | spin_lock(&root->orphan_lock); |
2113 | 2252 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | |
2114 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2253 | list_del_init(&BTRFS_I(inode)->i_orphan); |
2115 | spin_unlock(&root->list_lock); | 2254 | delete_item = 1; |
2116 | return 0; | ||
2117 | } | 2255 | } |
2118 | 2256 | ||
2119 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2257 | if (BTRFS_I(inode)->orphan_meta_reserved) { |
2120 | if (!trans) { | 2258 | BTRFS_I(inode)->orphan_meta_reserved = 0; |
2121 | spin_unlock(&root->list_lock); | 2259 | release_rsv = 1; |
2122 | return 0; | ||
2123 | } | 2260 | } |
2261 | spin_unlock(&root->orphan_lock); | ||
2124 | 2262 | ||
2125 | spin_unlock(&root->list_lock); | 2263 | if (trans && delete_item) { |
2264 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | ||
2265 | BUG_ON(ret); | ||
2266 | } | ||
2126 | 2267 | ||
2127 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | 2268 | if (release_rsv) |
2269 | btrfs_orphan_release_metadata(inode); | ||
2128 | 2270 | ||
2129 | return ret; | 2271 | return 0; |
2130 | } | 2272 | } |
2131 | 2273 | ||
2132 | /* | 2274 | /* |
@@ -2137,13 +2279,12 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2137 | { | 2279 | { |
2138 | struct btrfs_path *path; | 2280 | struct btrfs_path *path; |
2139 | struct extent_buffer *leaf; | 2281 | struct extent_buffer *leaf; |
2140 | struct btrfs_item *item; | ||
2141 | struct btrfs_key key, found_key; | 2282 | struct btrfs_key key, found_key; |
2142 | struct btrfs_trans_handle *trans; | 2283 | struct btrfs_trans_handle *trans; |
2143 | struct inode *inode; | 2284 | struct inode *inode; |
2144 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2285 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2145 | 2286 | ||
2146 | if (!xchg(&root->clean_orphans, 0)) | 2287 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
2147 | return; | 2288 | return; |
2148 | 2289 | ||
2149 | path = btrfs_alloc_path(); | 2290 | path = btrfs_alloc_path(); |
@@ -2175,7 +2316,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2175 | 2316 | ||
2176 | /* pull out the item */ | 2317 | /* pull out the item */ |
2177 | leaf = path->nodes[0]; | 2318 | leaf = path->nodes[0]; |
2178 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
2179 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 2319 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
2180 | 2320 | ||
2181 | /* make sure the item matches what we want */ | 2321 | /* make sure the item matches what we want */ |
@@ -2195,17 +2335,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2195 | found_key.objectid = found_key.offset; | 2335 | found_key.objectid = found_key.offset; |
2196 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2336 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2197 | found_key.offset = 0; | 2337 | found_key.offset = 0; |
2198 | inode = btrfs_iget(root->fs_info->sb, &found_key, root); | 2338 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2199 | if (IS_ERR(inode)) | 2339 | BUG_ON(IS_ERR(inode)); |
2200 | break; | ||
2201 | 2340 | ||
2202 | /* | 2341 | /* |
2203 | * add this inode to the orphan list so btrfs_orphan_del does | 2342 | * add this inode to the orphan list so btrfs_orphan_del does |
2204 | * the proper thing when we hit it | 2343 | * the proper thing when we hit it |
2205 | */ | 2344 | */ |
2206 | spin_lock(&root->list_lock); | 2345 | spin_lock(&root->orphan_lock); |
2207 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2346 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); |
2208 | spin_unlock(&root->list_lock); | 2347 | spin_unlock(&root->orphan_lock); |
2209 | 2348 | ||
2210 | /* | 2349 | /* |
2211 | * if this is a bad inode, means we actually succeeded in | 2350 | * if this is a bad inode, means we actually succeeded in |
@@ -2214,7 +2353,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2214 | * do a destroy_inode | 2353 | * do a destroy_inode |
2215 | */ | 2354 | */ |
2216 | if (is_bad_inode(inode)) { | 2355 | if (is_bad_inode(inode)) { |
2217 | trans = btrfs_start_transaction(root, 1); | 2356 | trans = btrfs_start_transaction(root, 0); |
2218 | btrfs_orphan_del(trans, inode); | 2357 | btrfs_orphan_del(trans, inode); |
2219 | btrfs_end_transaction(trans, root); | 2358 | btrfs_end_transaction(trans, root); |
2220 | iput(inode); | 2359 | iput(inode); |
@@ -2232,13 +2371,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2232 | /* this will do delete_inode and everything for us */ | 2371 | /* this will do delete_inode and everything for us */ |
2233 | iput(inode); | 2372 | iput(inode); |
2234 | } | 2373 | } |
2374 | btrfs_free_path(path); | ||
2375 | |||
2376 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | ||
2377 | |||
2378 | if (root->orphan_block_rsv) | ||
2379 | btrfs_block_rsv_release(root, root->orphan_block_rsv, | ||
2380 | (u64)-1); | ||
2381 | |||
2382 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | ||
2383 | trans = btrfs_join_transaction(root, 1); | ||
2384 | btrfs_end_transaction(trans, root); | ||
2385 | } | ||
2235 | 2386 | ||
2236 | if (nr_unlink) | 2387 | if (nr_unlink) |
2237 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2388 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
2238 | if (nr_truncate) | 2389 | if (nr_truncate) |
2239 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2390 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
2240 | |||
2241 | btrfs_free_path(path); | ||
2242 | } | 2391 | } |
2243 | 2392 | ||
2244 | /* | 2393 | /* |
@@ -2542,7 +2691,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2542 | 2691 | ||
2543 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, | 2692 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, |
2544 | dir, index); | 2693 | dir, index); |
2545 | BUG_ON(ret); | 2694 | if (ret == -ENOENT) |
2695 | ret = 0; | ||
2546 | err: | 2696 | err: |
2547 | btrfs_free_path(path); | 2697 | btrfs_free_path(path); |
2548 | if (ret) | 2698 | if (ret) |
@@ -2557,29 +2707,201 @@ out: | |||
2557 | return ret; | 2707 | return ret; |
2558 | } | 2708 | } |
2559 | 2709 | ||
2560 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 2710 | /* helper to check if there is any shared block in the path */ |
2711 | static int check_path_shared(struct btrfs_root *root, | ||
2712 | struct btrfs_path *path) | ||
2713 | { | ||
2714 | struct extent_buffer *eb; | ||
2715 | int level; | ||
2716 | u64 refs = 1; | ||
2717 | int uninitialized_var(ret); | ||
2718 | |||
2719 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
2720 | if (!path->nodes[level]) | ||
2721 | break; | ||
2722 | eb = path->nodes[level]; | ||
2723 | if (!btrfs_block_can_be_shared(root, eb)) | ||
2724 | continue; | ||
2725 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, | ||
2726 | &refs, NULL); | ||
2727 | if (refs > 1) | ||
2728 | return 1; | ||
2729 | } | ||
2730 | return ret; /* XXX callers? */ | ||
2731 | } | ||
2732 | |||
2733 | /* | ||
2734 | * helper to start transaction for unlink and rmdir. | ||
2735 | * | ||
2736 | * unlink and rmdir are special in btrfs, they do not always free space. | ||
2737 | * so in enospc case, we should make sure they will free space before | ||
2738 | * allowing them to use the global metadata reservation. | ||
2739 | */ | ||
2740 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | ||
2741 | struct dentry *dentry) | ||
2561 | { | 2742 | { |
2562 | struct btrfs_root *root; | ||
2563 | struct btrfs_trans_handle *trans; | 2743 | struct btrfs_trans_handle *trans; |
2744 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
2745 | struct btrfs_path *path; | ||
2746 | struct btrfs_inode_ref *ref; | ||
2747 | struct btrfs_dir_item *di; | ||
2564 | struct inode *inode = dentry->d_inode; | 2748 | struct inode *inode = dentry->d_inode; |
2749 | u64 index; | ||
2750 | int check_link = 1; | ||
2751 | int err = -ENOSPC; | ||
2565 | int ret; | 2752 | int ret; |
2566 | unsigned long nr = 0; | ||
2567 | 2753 | ||
2568 | root = BTRFS_I(dir)->root; | 2754 | trans = btrfs_start_transaction(root, 10); |
2755 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | ||
2756 | return trans; | ||
2569 | 2757 | ||
2570 | /* | 2758 | if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
2571 | * 5 items for unlink inode | 2759 | return ERR_PTR(-ENOSPC); |
2572 | * 1 for orphan | 2760 | |
2573 | */ | 2761 | /* check if there is someone else holds reference */ |
2574 | ret = btrfs_reserve_metadata_space(root, 6); | 2762 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) |
2575 | if (ret) | 2763 | return ERR_PTR(-ENOSPC); |
2576 | return ret; | 2764 | |
2765 | if (atomic_read(&inode->i_count) > 2) | ||
2766 | return ERR_PTR(-ENOSPC); | ||
2577 | 2767 | ||
2578 | trans = btrfs_start_transaction(root, 1); | 2768 | if (xchg(&root->fs_info->enospc_unlink, 1)) |
2769 | return ERR_PTR(-ENOSPC); | ||
2770 | |||
2771 | path = btrfs_alloc_path(); | ||
2772 | if (!path) { | ||
2773 | root->fs_info->enospc_unlink = 0; | ||
2774 | return ERR_PTR(-ENOMEM); | ||
2775 | } | ||
2776 | |||
2777 | trans = btrfs_start_transaction(root, 0); | ||
2579 | if (IS_ERR(trans)) { | 2778 | if (IS_ERR(trans)) { |
2580 | btrfs_unreserve_metadata_space(root, 6); | 2779 | btrfs_free_path(path); |
2581 | return PTR_ERR(trans); | 2780 | root->fs_info->enospc_unlink = 0; |
2781 | return trans; | ||
2782 | } | ||
2783 | |||
2784 | path->skip_locking = 1; | ||
2785 | path->search_commit_root = 1; | ||
2786 | |||
2787 | ret = btrfs_lookup_inode(trans, root, path, | ||
2788 | &BTRFS_I(dir)->location, 0); | ||
2789 | if (ret < 0) { | ||
2790 | err = ret; | ||
2791 | goto out; | ||
2792 | } | ||
2793 | if (ret == 0) { | ||
2794 | if (check_path_shared(root, path)) | ||
2795 | goto out; | ||
2796 | } else { | ||
2797 | check_link = 0; | ||
2798 | } | ||
2799 | btrfs_release_path(root, path); | ||
2800 | |||
2801 | ret = btrfs_lookup_inode(trans, root, path, | ||
2802 | &BTRFS_I(inode)->location, 0); | ||
2803 | if (ret < 0) { | ||
2804 | err = ret; | ||
2805 | goto out; | ||
2806 | } | ||
2807 | if (ret == 0) { | ||
2808 | if (check_path_shared(root, path)) | ||
2809 | goto out; | ||
2810 | } else { | ||
2811 | check_link = 0; | ||
2812 | } | ||
2813 | btrfs_release_path(root, path); | ||
2814 | |||
2815 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
2816 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
2817 | inode->i_ino, (u64)-1, 0); | ||
2818 | if (ret < 0) { | ||
2819 | err = ret; | ||
2820 | goto out; | ||
2821 | } | ||
2822 | BUG_ON(ret == 0); | ||
2823 | if (check_path_shared(root, path)) | ||
2824 | goto out; | ||
2825 | btrfs_release_path(root, path); | ||
2826 | } | ||
2827 | |||
2828 | if (!check_link) { | ||
2829 | err = 0; | ||
2830 | goto out; | ||
2831 | } | ||
2832 | |||
2833 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
2834 | dentry->d_name.name, dentry->d_name.len, 0); | ||
2835 | if (IS_ERR(di)) { | ||
2836 | err = PTR_ERR(di); | ||
2837 | goto out; | ||
2582 | } | 2838 | } |
2839 | if (di) { | ||
2840 | if (check_path_shared(root, path)) | ||
2841 | goto out; | ||
2842 | } else { | ||
2843 | err = 0; | ||
2844 | goto out; | ||
2845 | } | ||
2846 | btrfs_release_path(root, path); | ||
2847 | |||
2848 | ref = btrfs_lookup_inode_ref(trans, root, path, | ||
2849 | dentry->d_name.name, dentry->d_name.len, | ||
2850 | inode->i_ino, dir->i_ino, 0); | ||
2851 | if (IS_ERR(ref)) { | ||
2852 | err = PTR_ERR(ref); | ||
2853 | goto out; | ||
2854 | } | ||
2855 | BUG_ON(!ref); | ||
2856 | if (check_path_shared(root, path)) | ||
2857 | goto out; | ||
2858 | index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
2859 | btrfs_release_path(root, path); | ||
2860 | |||
2861 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, | ||
2862 | dentry->d_name.name, dentry->d_name.len, 0); | ||
2863 | if (IS_ERR(di)) { | ||
2864 | err = PTR_ERR(di); | ||
2865 | goto out; | ||
2866 | } | ||
2867 | BUG_ON(ret == -ENOENT); | ||
2868 | if (check_path_shared(root, path)) | ||
2869 | goto out; | ||
2870 | |||
2871 | err = 0; | ||
2872 | out: | ||
2873 | btrfs_free_path(path); | ||
2874 | if (err) { | ||
2875 | btrfs_end_transaction(trans, root); | ||
2876 | root->fs_info->enospc_unlink = 0; | ||
2877 | return ERR_PTR(err); | ||
2878 | } | ||
2879 | |||
2880 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
2881 | return trans; | ||
2882 | } | ||
2883 | |||
2884 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
2885 | struct btrfs_root *root) | ||
2886 | { | ||
2887 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | ||
2888 | BUG_ON(!root->fs_info->enospc_unlink); | ||
2889 | root->fs_info->enospc_unlink = 0; | ||
2890 | } | ||
2891 | btrfs_end_transaction_throttle(trans, root); | ||
2892 | } | ||
2893 | |||
2894 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | ||
2895 | { | ||
2896 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
2897 | struct btrfs_trans_handle *trans; | ||
2898 | struct inode *inode = dentry->d_inode; | ||
2899 | int ret; | ||
2900 | unsigned long nr = 0; | ||
2901 | |||
2902 | trans = __unlink_start_trans(dir, dentry); | ||
2903 | if (IS_ERR(trans)) | ||
2904 | return PTR_ERR(trans); | ||
2583 | 2905 | ||
2584 | btrfs_set_trans_block_group(trans, dir); | 2906 | btrfs_set_trans_block_group(trans, dir); |
2585 | 2907 | ||
@@ -2587,14 +2909,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2587 | 2909 | ||
2588 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2910 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2589 | dentry->d_name.name, dentry->d_name.len); | 2911 | dentry->d_name.name, dentry->d_name.len); |
2912 | BUG_ON(ret); | ||
2590 | 2913 | ||
2591 | if (inode->i_nlink == 0) | 2914 | if (inode->i_nlink == 0) { |
2592 | ret = btrfs_orphan_add(trans, inode); | 2915 | ret = btrfs_orphan_add(trans, inode); |
2916 | BUG_ON(ret); | ||
2917 | } | ||
2593 | 2918 | ||
2594 | nr = trans->blocks_used; | 2919 | nr = trans->blocks_used; |
2595 | 2920 | __unlink_end_trans(trans, root); | |
2596 | btrfs_end_transaction_throttle(trans, root); | ||
2597 | btrfs_unreserve_metadata_space(root, 6); | ||
2598 | btrfs_btree_balance_dirty(root, nr); | 2921 | btrfs_btree_balance_dirty(root, nr); |
2599 | return ret; | 2922 | return ret; |
2600 | } | 2923 | } |
@@ -2656,7 +2979,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
2656 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2979 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2657 | ret = btrfs_update_inode(trans, root, dir); | 2980 | ret = btrfs_update_inode(trans, root, dir); |
2658 | BUG_ON(ret); | 2981 | BUG_ON(ret); |
2659 | dir->i_sb->s_dirt = 1; | ||
2660 | 2982 | ||
2661 | btrfs_free_path(path); | 2983 | btrfs_free_path(path); |
2662 | return 0; | 2984 | return 0; |
@@ -2666,7 +2988,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2666 | { | 2988 | { |
2667 | struct inode *inode = dentry->d_inode; | 2989 | struct inode *inode = dentry->d_inode; |
2668 | int err = 0; | 2990 | int err = 0; |
2669 | int ret; | ||
2670 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2991 | struct btrfs_root *root = BTRFS_I(dir)->root; |
2671 | struct btrfs_trans_handle *trans; | 2992 | struct btrfs_trans_handle *trans; |
2672 | unsigned long nr = 0; | 2993 | unsigned long nr = 0; |
@@ -2675,15 +2996,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2675 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 2996 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
2676 | return -ENOTEMPTY; | 2997 | return -ENOTEMPTY; |
2677 | 2998 | ||
2678 | ret = btrfs_reserve_metadata_space(root, 5); | 2999 | trans = __unlink_start_trans(dir, dentry); |
2679 | if (ret) | 3000 | if (IS_ERR(trans)) |
2680 | return ret; | ||
2681 | |||
2682 | trans = btrfs_start_transaction(root, 1); | ||
2683 | if (IS_ERR(trans)) { | ||
2684 | btrfs_unreserve_metadata_space(root, 5); | ||
2685 | return PTR_ERR(trans); | 3001 | return PTR_ERR(trans); |
2686 | } | ||
2687 | 3002 | ||
2688 | btrfs_set_trans_block_group(trans, dir); | 3003 | btrfs_set_trans_block_group(trans, dir); |
2689 | 3004 | ||
@@ -2706,12 +3021,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2706 | btrfs_i_size_write(inode, 0); | 3021 | btrfs_i_size_write(inode, 0); |
2707 | out: | 3022 | out: |
2708 | nr = trans->blocks_used; | 3023 | nr = trans->blocks_used; |
2709 | ret = btrfs_end_transaction_throttle(trans, root); | 3024 | __unlink_end_trans(trans, root); |
2710 | btrfs_unreserve_metadata_space(root, 5); | ||
2711 | btrfs_btree_balance_dirty(root, nr); | 3025 | btrfs_btree_balance_dirty(root, nr); |
2712 | 3026 | ||
2713 | if (ret && !err) | ||
2714 | err = ret; | ||
2715 | return err; | 3027 | return err; |
2716 | } | 3028 | } |
2717 | 3029 | ||
@@ -2925,7 +3237,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2925 | 3237 | ||
2926 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); | 3238 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
2927 | 3239 | ||
2928 | if (root->ref_cows) | 3240 | if (root->ref_cows || root == root->fs_info->tree_root) |
2929 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 3241 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); |
2930 | 3242 | ||
2931 | path = btrfs_alloc_path(); | 3243 | path = btrfs_alloc_path(); |
@@ -3073,7 +3385,8 @@ delete: | |||
3073 | } else { | 3385 | } else { |
3074 | break; | 3386 | break; |
3075 | } | 3387 | } |
3076 | if (found_extent && root->ref_cows) { | 3388 | if (found_extent && (root->ref_cows || |
3389 | root == root->fs_info->tree_root)) { | ||
3077 | btrfs_set_path_blocking(path); | 3390 | btrfs_set_path_blocking(path); |
3078 | ret = btrfs_free_extent(trans, root, extent_start, | 3391 | ret = btrfs_free_extent(trans, root, extent_start, |
3079 | extent_num_bytes, 0, | 3392 | extent_num_bytes, 0, |
@@ -3108,6 +3421,7 @@ out: | |||
3108 | if (pending_del_nr) { | 3421 | if (pending_del_nr) { |
3109 | ret = btrfs_del_items(trans, root, path, pending_del_slot, | 3422 | ret = btrfs_del_items(trans, root, path, pending_del_slot, |
3110 | pending_del_nr); | 3423 | pending_del_nr); |
3424 | BUG_ON(ret); | ||
3111 | } | 3425 | } |
3112 | btrfs_free_path(path); | 3426 | btrfs_free_path(path); |
3113 | return err; | 3427 | return err; |
@@ -3123,6 +3437,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3123 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3437 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3124 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3438 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
3125 | struct btrfs_ordered_extent *ordered; | 3439 | struct btrfs_ordered_extent *ordered; |
3440 | struct extent_state *cached_state = NULL; | ||
3126 | char *kaddr; | 3441 | char *kaddr; |
3127 | u32 blocksize = root->sectorsize; | 3442 | u32 blocksize = root->sectorsize; |
3128 | pgoff_t index = from >> PAGE_CACHE_SHIFT; | 3443 | pgoff_t index = from >> PAGE_CACHE_SHIFT; |
@@ -3134,11 +3449,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3134 | 3449 | ||
3135 | if ((offset & (blocksize - 1)) == 0) | 3450 | if ((offset & (blocksize - 1)) == 0) |
3136 | goto out; | 3451 | goto out; |
3137 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 3452 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
3138 | if (ret) | ||
3139 | goto out; | ||
3140 | |||
3141 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
3142 | if (ret) | 3453 | if (ret) |
3143 | goto out; | 3454 | goto out; |
3144 | 3455 | ||
@@ -3146,8 +3457,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3146 | again: | 3457 | again: |
3147 | page = grab_cache_page(mapping, index); | 3458 | page = grab_cache_page(mapping, index); |
3148 | if (!page) { | 3459 | if (!page) { |
3149 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3460 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3150 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
3151 | goto out; | 3461 | goto out; |
3152 | } | 3462 | } |
3153 | 3463 | ||
@@ -3169,12 +3479,14 @@ again: | |||
3169 | } | 3479 | } |
3170 | wait_on_page_writeback(page); | 3480 | wait_on_page_writeback(page); |
3171 | 3481 | ||
3172 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 3482 | lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, |
3483 | GFP_NOFS); | ||
3173 | set_page_extent_mapped(page); | 3484 | set_page_extent_mapped(page); |
3174 | 3485 | ||
3175 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 3486 | ordered = btrfs_lookup_ordered_extent(inode, page_start); |
3176 | if (ordered) { | 3487 | if (ordered) { |
3177 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 3488 | unlock_extent_cached(io_tree, page_start, page_end, |
3489 | &cached_state, GFP_NOFS); | ||
3178 | unlock_page(page); | 3490 | unlock_page(page); |
3179 | page_cache_release(page); | 3491 | page_cache_release(page); |
3180 | btrfs_start_ordered_extent(inode, ordered, 1); | 3492 | btrfs_start_ordered_extent(inode, ordered, 1); |
@@ -3182,13 +3494,15 @@ again: | |||
3182 | goto again; | 3494 | goto again; |
3183 | } | 3495 | } |
3184 | 3496 | ||
3185 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, | 3497 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, |
3186 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | 3498 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, |
3187 | GFP_NOFS); | 3499 | 0, 0, &cached_state, GFP_NOFS); |
3188 | 3500 | ||
3189 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); | 3501 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, |
3502 | &cached_state); | ||
3190 | if (ret) { | 3503 | if (ret) { |
3191 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 3504 | unlock_extent_cached(io_tree, page_start, page_end, |
3505 | &cached_state, GFP_NOFS); | ||
3192 | goto out_unlock; | 3506 | goto out_unlock; |
3193 | } | 3507 | } |
3194 | 3508 | ||
@@ -3201,12 +3515,12 @@ again: | |||
3201 | } | 3515 | } |
3202 | ClearPageChecked(page); | 3516 | ClearPageChecked(page); |
3203 | set_page_dirty(page); | 3517 | set_page_dirty(page); |
3204 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 3518 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, |
3519 | GFP_NOFS); | ||
3205 | 3520 | ||
3206 | out_unlock: | 3521 | out_unlock: |
3207 | if (ret) | 3522 | if (ret) |
3208 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3523 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3209 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
3210 | unlock_page(page); | 3524 | unlock_page(page); |
3211 | page_cache_release(page); | 3525 | page_cache_release(page); |
3212 | out: | 3526 | out: |
@@ -3218,7 +3532,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3218 | struct btrfs_trans_handle *trans; | 3532 | struct btrfs_trans_handle *trans; |
3219 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3533 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3220 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3534 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
3221 | struct extent_map *em; | 3535 | struct extent_map *em = NULL; |
3536 | struct extent_state *cached_state = NULL; | ||
3222 | u64 mask = root->sectorsize - 1; | 3537 | u64 mask = root->sectorsize - 1; |
3223 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3538 | u64 hole_start = (inode->i_size + mask) & ~mask; |
3224 | u64 block_end = (size + mask) & ~mask; | 3539 | u64 block_end = (size + mask) & ~mask; |
@@ -3234,11 +3549,13 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3234 | struct btrfs_ordered_extent *ordered; | 3549 | struct btrfs_ordered_extent *ordered; |
3235 | btrfs_wait_ordered_range(inode, hole_start, | 3550 | btrfs_wait_ordered_range(inode, hole_start, |
3236 | block_end - hole_start); | 3551 | block_end - hole_start); |
3237 | lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); | 3552 | lock_extent_bits(io_tree, hole_start, block_end - 1, 0, |
3553 | &cached_state, GFP_NOFS); | ||
3238 | ordered = btrfs_lookup_ordered_extent(inode, hole_start); | 3554 | ordered = btrfs_lookup_ordered_extent(inode, hole_start); |
3239 | if (!ordered) | 3555 | if (!ordered) |
3240 | break; | 3556 | break; |
3241 | unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); | 3557 | unlock_extent_cached(io_tree, hole_start, block_end - 1, |
3558 | &cached_state, GFP_NOFS); | ||
3242 | btrfs_put_ordered_extent(ordered); | 3559 | btrfs_put_ordered_extent(ordered); |
3243 | } | 3560 | } |
3244 | 3561 | ||
@@ -3253,11 +3570,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3253 | u64 hint_byte = 0; | 3570 | u64 hint_byte = 0; |
3254 | hole_size = last_byte - cur_offset; | 3571 | hole_size = last_byte - cur_offset; |
3255 | 3572 | ||
3256 | err = btrfs_reserve_metadata_space(root, 2); | 3573 | trans = btrfs_start_transaction(root, 2); |
3257 | if (err) | 3574 | if (IS_ERR(trans)) { |
3575 | err = PTR_ERR(trans); | ||
3258 | break; | 3576 | break; |
3259 | 3577 | } | |
3260 | trans = btrfs_start_transaction(root, 1); | ||
3261 | btrfs_set_trans_block_group(trans, inode); | 3578 | btrfs_set_trans_block_group(trans, inode); |
3262 | 3579 | ||
3263 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3580 | err = btrfs_drop_extents(trans, inode, cur_offset, |
@@ -3275,15 +3592,17 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3275 | last_byte - 1, 0); | 3592 | last_byte - 1, 0); |
3276 | 3593 | ||
3277 | btrfs_end_transaction(trans, root); | 3594 | btrfs_end_transaction(trans, root); |
3278 | btrfs_unreserve_metadata_space(root, 2); | ||
3279 | } | 3595 | } |
3280 | free_extent_map(em); | 3596 | free_extent_map(em); |
3597 | em = NULL; | ||
3281 | cur_offset = last_byte; | 3598 | cur_offset = last_byte; |
3282 | if (cur_offset >= block_end) | 3599 | if (cur_offset >= block_end) |
3283 | break; | 3600 | break; |
3284 | } | 3601 | } |
3285 | 3602 | ||
3286 | unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); | 3603 | free_extent_map(em); |
3604 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, | ||
3605 | GFP_NOFS); | ||
3287 | return err; | 3606 | return err; |
3288 | } | 3607 | } |
3289 | 3608 | ||
@@ -3308,11 +3627,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3308 | } | 3627 | } |
3309 | } | 3628 | } |
3310 | 3629 | ||
3311 | ret = btrfs_reserve_metadata_space(root, 1); | 3630 | trans = btrfs_start_transaction(root, 5); |
3312 | if (ret) | 3631 | if (IS_ERR(trans)) |
3313 | return ret; | 3632 | return PTR_ERR(trans); |
3314 | 3633 | ||
3315 | trans = btrfs_start_transaction(root, 1); | ||
3316 | btrfs_set_trans_block_group(trans, inode); | 3634 | btrfs_set_trans_block_group(trans, inode); |
3317 | 3635 | ||
3318 | ret = btrfs_orphan_add(trans, inode); | 3636 | ret = btrfs_orphan_add(trans, inode); |
@@ -3320,7 +3638,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3320 | 3638 | ||
3321 | nr = trans->blocks_used; | 3639 | nr = trans->blocks_used; |
3322 | btrfs_end_transaction(trans, root); | 3640 | btrfs_end_transaction(trans, root); |
3323 | btrfs_unreserve_metadata_space(root, 1); | ||
3324 | btrfs_btree_balance_dirty(root, nr); | 3641 | btrfs_btree_balance_dirty(root, nr); |
3325 | 3642 | ||
3326 | if (attr->ia_size > inode->i_size) { | 3643 | if (attr->ia_size > inode->i_size) { |
@@ -3333,8 +3650,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3333 | i_size_write(inode, attr->ia_size); | 3650 | i_size_write(inode, attr->ia_size); |
3334 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3651 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
3335 | 3652 | ||
3336 | trans = btrfs_start_transaction(root, 1); | 3653 | trans = btrfs_start_transaction(root, 0); |
3654 | BUG_ON(IS_ERR(trans)); | ||
3337 | btrfs_set_trans_block_group(trans, inode); | 3655 | btrfs_set_trans_block_group(trans, inode); |
3656 | trans->block_rsv = root->orphan_block_rsv; | ||
3657 | BUG_ON(!trans->block_rsv); | ||
3338 | 3658 | ||
3339 | ret = btrfs_update_inode(trans, root, inode); | 3659 | ret = btrfs_update_inode(trans, root, inode); |
3340 | BUG_ON(ret); | 3660 | BUG_ON(ret); |
@@ -3366,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3366 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3686 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
3367 | { | 3687 | { |
3368 | struct inode *inode = dentry->d_inode; | 3688 | struct inode *inode = dentry->d_inode; |
3689 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3369 | int err; | 3690 | int err; |
3370 | 3691 | ||
3692 | if (btrfs_root_readonly(root)) | ||
3693 | return -EROFS; | ||
3694 | |||
3371 | err = inode_change_ok(inode, attr); | 3695 | err = inode_change_ok(inode, attr); |
3372 | if (err) | 3696 | if (err) |
3373 | return err; | 3697 | return err; |
@@ -3377,17 +3701,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3377 | if (err) | 3701 | if (err) |
3378 | return err; | 3702 | return err; |
3379 | } | 3703 | } |
3380 | attr->ia_valid &= ~ATTR_SIZE; | ||
3381 | 3704 | ||
3382 | if (attr->ia_valid) | 3705 | if (attr->ia_valid) { |
3383 | err = inode_setattr(inode, attr); | 3706 | setattr_copy(inode, attr); |
3707 | mark_inode_dirty(inode); | ||
3708 | |||
3709 | if (attr->ia_valid & ATTR_MODE) | ||
3710 | err = btrfs_acl_chmod(inode); | ||
3711 | } | ||
3384 | 3712 | ||
3385 | if (!err && ((attr->ia_valid & ATTR_MODE))) | ||
3386 | err = btrfs_acl_chmod(inode); | ||
3387 | return err; | 3713 | return err; |
3388 | } | 3714 | } |
3389 | 3715 | ||
3390 | void btrfs_delete_inode(struct inode *inode) | 3716 | void btrfs_evict_inode(struct inode *inode) |
3391 | { | 3717 | { |
3392 | struct btrfs_trans_handle *trans; | 3718 | struct btrfs_trans_handle *trans; |
3393 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3719 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -3395,10 +3721,15 @@ void btrfs_delete_inode(struct inode *inode) | |||
3395 | int ret; | 3721 | int ret; |
3396 | 3722 | ||
3397 | truncate_inode_pages(&inode->i_data, 0); | 3723 | truncate_inode_pages(&inode->i_data, 0); |
3724 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | ||
3725 | root == root->fs_info->tree_root)) | ||
3726 | goto no_delete; | ||
3727 | |||
3398 | if (is_bad_inode(inode)) { | 3728 | if (is_bad_inode(inode)) { |
3399 | btrfs_orphan_del(NULL, inode); | 3729 | btrfs_orphan_del(NULL, inode); |
3400 | goto no_delete; | 3730 | goto no_delete; |
3401 | } | 3731 | } |
3732 | /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ | ||
3402 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3733 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
3403 | 3734 | ||
3404 | if (root->fs_info->log_root_recovering) { | 3735 | if (root->fs_info->log_root_recovering) { |
@@ -3414,10 +3745,21 @@ void btrfs_delete_inode(struct inode *inode) | |||
3414 | btrfs_i_size_write(inode, 0); | 3745 | btrfs_i_size_write(inode, 0); |
3415 | 3746 | ||
3416 | while (1) { | 3747 | while (1) { |
3417 | trans = btrfs_start_transaction(root, 1); | 3748 | trans = btrfs_start_transaction(root, 0); |
3749 | BUG_ON(IS_ERR(trans)); | ||
3418 | btrfs_set_trans_block_group(trans, inode); | 3750 | btrfs_set_trans_block_group(trans, inode); |
3419 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3751 | trans->block_rsv = root->orphan_block_rsv; |
3752 | |||
3753 | ret = btrfs_block_rsv_check(trans, root, | ||
3754 | root->orphan_block_rsv, 0, 5); | ||
3755 | if (ret) { | ||
3756 | BUG_ON(ret != -EAGAIN); | ||
3757 | ret = btrfs_commit_transaction(trans, root); | ||
3758 | BUG_ON(ret); | ||
3759 | continue; | ||
3760 | } | ||
3420 | 3761 | ||
3762 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | ||
3421 | if (ret != -EAGAIN) | 3763 | if (ret != -EAGAIN) |
3422 | break; | 3764 | break; |
3423 | 3765 | ||
@@ -3425,6 +3767,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
3425 | btrfs_end_transaction(trans, root); | 3767 | btrfs_end_transaction(trans, root); |
3426 | trans = NULL; | 3768 | trans = NULL; |
3427 | btrfs_btree_balance_dirty(root, nr); | 3769 | btrfs_btree_balance_dirty(root, nr); |
3770 | |||
3428 | } | 3771 | } |
3429 | 3772 | ||
3430 | if (ret == 0) { | 3773 | if (ret == 0) { |
@@ -3436,7 +3779,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
3436 | btrfs_end_transaction(trans, root); | 3779 | btrfs_end_transaction(trans, root); |
3437 | btrfs_btree_balance_dirty(root, nr); | 3780 | btrfs_btree_balance_dirty(root, nr); |
3438 | no_delete: | 3781 | no_delete: |
3439 | clear_inode(inode); | 3782 | end_writeback(inode); |
3440 | return; | 3783 | return; |
3441 | } | 3784 | } |
3442 | 3785 | ||
@@ -3553,7 +3896,7 @@ again: | |||
3553 | p = &root->inode_tree.rb_node; | 3896 | p = &root->inode_tree.rb_node; |
3554 | parent = NULL; | 3897 | parent = NULL; |
3555 | 3898 | ||
3556 | if (hlist_unhashed(&inode->i_hash)) | 3899 | if (inode_unhashed(inode)) |
3557 | return; | 3900 | return; |
3558 | 3901 | ||
3559 | spin_lock(&root->inode_lock); | 3902 | spin_lock(&root->inode_lock); |
@@ -3567,7 +3910,7 @@ again: | |||
3567 | p = &parent->rb_right; | 3910 | p = &parent->rb_right; |
3568 | else { | 3911 | else { |
3569 | WARN_ON(!(entry->vfs_inode.i_state & | 3912 | WARN_ON(!(entry->vfs_inode.i_state & |
3570 | (I_WILL_FREE | I_FREEING | I_CLEAR))); | 3913 | (I_WILL_FREE | I_FREEING))); |
3571 | rb_erase(parent, &root->inode_tree); | 3914 | rb_erase(parent, &root->inode_tree); |
3572 | RB_CLEAR_NODE(parent); | 3915 | RB_CLEAR_NODE(parent); |
3573 | spin_unlock(&root->inode_lock); | 3916 | spin_unlock(&root->inode_lock); |
@@ -3592,7 +3935,14 @@ static void inode_tree_del(struct inode *inode) | |||
3592 | } | 3935 | } |
3593 | spin_unlock(&root->inode_lock); | 3936 | spin_unlock(&root->inode_lock); |
3594 | 3937 | ||
3595 | if (empty && btrfs_root_refs(&root->root_item) == 0) { | 3938 | /* |
3939 | * Free space cache has inodes in the tree root, but the tree root has a | ||
3940 | * root_refs of 0, so this could end up dropping the tree root as a | ||
3941 | * snapshot, so we need the extra !root->fs_info->tree_root check to | ||
3942 | * make sure we don't drop it. | ||
3943 | */ | ||
3944 | if (empty && btrfs_root_refs(&root->root_item) == 0 && | ||
3945 | root != root->fs_info->tree_root) { | ||
3596 | synchronize_srcu(&root->fs_info->subvol_srcu); | 3946 | synchronize_srcu(&root->fs_info->subvol_srcu); |
3597 | spin_lock(&root->inode_lock); | 3947 | spin_lock(&root->inode_lock); |
3598 | empty = RB_EMPTY_ROOT(&root->inode_tree); | 3948 | empty = RB_EMPTY_ROOT(&root->inode_tree); |
@@ -3646,7 +3996,7 @@ again: | |||
3646 | if (atomic_read(&inode->i_count) > 1) | 3996 | if (atomic_read(&inode->i_count) > 1) |
3647 | d_prune_aliases(inode); | 3997 | d_prune_aliases(inode); |
3648 | /* | 3998 | /* |
3649 | * btrfs_drop_inode will remove it from | 3999 | * btrfs_drop_inode will have it removed from |
3650 | * the inode cache when its usage count | 4000 | * the inode cache when its usage count |
3651 | * hits zero. | 4001 | * hits zero. |
3652 | */ | 4002 | */ |
@@ -3665,39 +4015,10 @@ again: | |||
3665 | return 0; | 4015 | return 0; |
3666 | } | 4016 | } |
3667 | 4017 | ||
3668 | static noinline void init_btrfs_i(struct inode *inode) | ||
3669 | { | ||
3670 | struct btrfs_inode *bi = BTRFS_I(inode); | ||
3671 | |||
3672 | bi->generation = 0; | ||
3673 | bi->sequence = 0; | ||
3674 | bi->last_trans = 0; | ||
3675 | bi->last_sub_trans = 0; | ||
3676 | bi->logged_trans = 0; | ||
3677 | bi->delalloc_bytes = 0; | ||
3678 | bi->reserved_bytes = 0; | ||
3679 | bi->disk_i_size = 0; | ||
3680 | bi->flags = 0; | ||
3681 | bi->index_cnt = (u64)-1; | ||
3682 | bi->last_unlink_trans = 0; | ||
3683 | bi->ordered_data_close = 0; | ||
3684 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | ||
3685 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | ||
3686 | inode->i_mapping, GFP_NOFS); | ||
3687 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | ||
3688 | inode->i_mapping, GFP_NOFS); | ||
3689 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | ||
3690 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
3691 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||
3692 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
3693 | mutex_init(&BTRFS_I(inode)->log_mutex); | ||
3694 | } | ||
3695 | |||
3696 | static int btrfs_init_locked_inode(struct inode *inode, void *p) | 4018 | static int btrfs_init_locked_inode(struct inode *inode, void *p) |
3697 | { | 4019 | { |
3698 | struct btrfs_iget_args *args = p; | 4020 | struct btrfs_iget_args *args = p; |
3699 | inode->i_ino = args->ino; | 4021 | inode->i_ino = args->ino; |
3700 | init_btrfs_i(inode); | ||
3701 | BTRFS_I(inode)->root = args->root; | 4022 | BTRFS_I(inode)->root = args->root; |
3702 | btrfs_set_inode_space_info(args->root, inode); | 4023 | btrfs_set_inode_space_info(args->root, inode); |
3703 | return 0; | 4024 | return 0; |
@@ -3729,7 +4050,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s, | |||
3729 | * Returns in *is_new if the inode was read from disk | 4050 | * Returns in *is_new if the inode was read from disk |
3730 | */ | 4051 | */ |
3731 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | 4052 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, |
3732 | struct btrfs_root *root) | 4053 | struct btrfs_root *root, int *new) |
3733 | { | 4054 | { |
3734 | struct inode *inode; | 4055 | struct inode *inode; |
3735 | 4056 | ||
@@ -3744,6 +4065,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3744 | 4065 | ||
3745 | inode_tree_add(inode); | 4066 | inode_tree_add(inode); |
3746 | unlock_new_inode(inode); | 4067 | unlock_new_inode(inode); |
4068 | if (new) | ||
4069 | *new = 1; | ||
3747 | } | 4070 | } |
3748 | 4071 | ||
3749 | return inode; | 4072 | return inode; |
@@ -3758,8 +4081,6 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
3758 | if (!inode) | 4081 | if (!inode) |
3759 | return ERR_PTR(-ENOMEM); | 4082 | return ERR_PTR(-ENOMEM); |
3760 | 4083 | ||
3761 | init_btrfs_i(inode); | ||
3762 | |||
3763 | BTRFS_I(inode)->root = root; | 4084 | BTRFS_I(inode)->root = root; |
3764 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4085 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
3765 | BTRFS_I(inode)->dummy_inode = 1; | 4086 | BTRFS_I(inode)->dummy_inode = 1; |
@@ -3782,8 +4103,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3782 | int index; | 4103 | int index; |
3783 | int ret; | 4104 | int ret; |
3784 | 4105 | ||
3785 | dentry->d_op = &btrfs_dentry_operations; | ||
3786 | |||
3787 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 4106 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
3788 | return ERR_PTR(-ENAMETOOLONG); | 4107 | return ERR_PTR(-ENAMETOOLONG); |
3789 | 4108 | ||
@@ -3796,7 +4115,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3796 | return NULL; | 4115 | return NULL; |
3797 | 4116 | ||
3798 | if (location.type == BTRFS_INODE_ITEM_KEY) { | 4117 | if (location.type == BTRFS_INODE_ITEM_KEY) { |
3799 | inode = btrfs_iget(dir->i_sb, &location, root); | 4118 | inode = btrfs_iget(dir->i_sb, &location, root, NULL); |
3800 | return inode; | 4119 | return inode; |
3801 | } | 4120 | } |
3802 | 4121 | ||
@@ -3811,7 +4130,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3811 | else | 4130 | else |
3812 | inode = new_simple_dir(dir->i_sb, &location, sub_root); | 4131 | inode = new_simple_dir(dir->i_sb, &location, sub_root); |
3813 | } else { | 4132 | } else { |
3814 | inode = btrfs_iget(dir->i_sb, &location, sub_root); | 4133 | inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL); |
3815 | } | 4134 | } |
3816 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | 4135 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); |
3817 | 4136 | ||
@@ -3825,7 +4144,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3825 | return inode; | 4144 | return inode; |
3826 | } | 4145 | } |
3827 | 4146 | ||
3828 | static int btrfs_dentry_delete(struct dentry *dentry) | 4147 | static int btrfs_dentry_delete(const struct dentry *dentry) |
3829 | { | 4148 | { |
3830 | struct btrfs_root *root; | 4149 | struct btrfs_root *root; |
3831 | 4150 | ||
@@ -4010,19 +4329,29 @@ err: | |||
4010 | return ret; | 4329 | return ret; |
4011 | } | 4330 | } |
4012 | 4331 | ||
4013 | int btrfs_write_inode(struct inode *inode, int wait) | 4332 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
4014 | { | 4333 | { |
4015 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4334 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4016 | struct btrfs_trans_handle *trans; | 4335 | struct btrfs_trans_handle *trans; |
4017 | int ret = 0; | 4336 | int ret = 0; |
4337 | bool nolock = false; | ||
4018 | 4338 | ||
4019 | if (root->fs_info->btree_inode == inode) | 4339 | if (BTRFS_I(inode)->dummy_inode) |
4020 | return 0; | 4340 | return 0; |
4021 | 4341 | ||
4022 | if (wait) { | 4342 | smp_mb(); |
4023 | trans = btrfs_join_transaction(root, 1); | 4343 | nolock = (root->fs_info->closing && root == root->fs_info->tree_root); |
4344 | |||
4345 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
4346 | if (nolock) | ||
4347 | trans = btrfs_join_transaction_nolock(root, 1); | ||
4348 | else | ||
4349 | trans = btrfs_join_transaction(root, 1); | ||
4024 | btrfs_set_trans_block_group(trans, inode); | 4350 | btrfs_set_trans_block_group(trans, inode); |
4025 | ret = btrfs_commit_transaction(trans, root); | 4351 | if (nolock) |
4352 | ret = btrfs_end_transaction_nolock(trans, root); | ||
4353 | else | ||
4354 | ret = btrfs_commit_transaction(trans, root); | ||
4026 | } | 4355 | } |
4027 | return ret; | 4356 | return ret; |
4028 | } | 4357 | } |
@@ -4037,10 +4366,38 @@ void btrfs_dirty_inode(struct inode *inode) | |||
4037 | { | 4366 | { |
4038 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4367 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4039 | struct btrfs_trans_handle *trans; | 4368 | struct btrfs_trans_handle *trans; |
4369 | int ret; | ||
4370 | |||
4371 | if (BTRFS_I(inode)->dummy_inode) | ||
4372 | return; | ||
4040 | 4373 | ||
4041 | trans = btrfs_join_transaction(root, 1); | 4374 | trans = btrfs_join_transaction(root, 1); |
4042 | btrfs_set_trans_block_group(trans, inode); | 4375 | btrfs_set_trans_block_group(trans, inode); |
4043 | btrfs_update_inode(trans, root, inode); | 4376 | |
4377 | ret = btrfs_update_inode(trans, root, inode); | ||
4378 | if (ret && ret == -ENOSPC) { | ||
4379 | /* whoops, lets try again with the full transaction */ | ||
4380 | btrfs_end_transaction(trans, root); | ||
4381 | trans = btrfs_start_transaction(root, 1); | ||
4382 | if (IS_ERR(trans)) { | ||
4383 | if (printk_ratelimit()) { | ||
4384 | printk(KERN_ERR "btrfs: fail to " | ||
4385 | "dirty inode %lu error %ld\n", | ||
4386 | inode->i_ino, PTR_ERR(trans)); | ||
4387 | } | ||
4388 | return; | ||
4389 | } | ||
4390 | btrfs_set_trans_block_group(trans, inode); | ||
4391 | |||
4392 | ret = btrfs_update_inode(trans, root, inode); | ||
4393 | if (ret) { | ||
4394 | if (printk_ratelimit()) { | ||
4395 | printk(KERN_ERR "btrfs: fail to " | ||
4396 | "dirty inode %lu error %d\n", | ||
4397 | inode->i_ino, ret); | ||
4398 | } | ||
4399 | } | ||
4400 | } | ||
4044 | btrfs_end_transaction(trans, root); | 4401 | btrfs_end_transaction(trans, root); |
4045 | } | 4402 | } |
4046 | 4403 | ||
@@ -4158,10 +4515,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4158 | * btrfs_get_inode_index_count has an explanation for the magic | 4515 | * btrfs_get_inode_index_count has an explanation for the magic |
4159 | * number | 4516 | * number |
4160 | */ | 4517 | */ |
4161 | init_btrfs_i(inode); | ||
4162 | BTRFS_I(inode)->index_cnt = 2; | 4518 | BTRFS_I(inode)->index_cnt = 2; |
4163 | BTRFS_I(inode)->root = root; | 4519 | BTRFS_I(inode)->root = root; |
4164 | BTRFS_I(inode)->generation = trans->transid; | 4520 | BTRFS_I(inode)->generation = trans->transid; |
4521 | inode->i_generation = BTRFS_I(inode)->generation; | ||
4165 | btrfs_set_inode_space_info(root, inode); | 4522 | btrfs_set_inode_space_info(root, inode); |
4166 | 4523 | ||
4167 | if (mode & S_IFDIR) | 4524 | if (mode & S_IFDIR) |
@@ -4187,16 +4544,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4187 | if (ret != 0) | 4544 | if (ret != 0) |
4188 | goto fail; | 4545 | goto fail; |
4189 | 4546 | ||
4190 | inode->i_uid = current_fsuid(); | 4547 | inode_init_owner(inode, dir, mode); |
4191 | |||
4192 | if (dir && (dir->i_mode & S_ISGID)) { | ||
4193 | inode->i_gid = dir->i_gid; | ||
4194 | if (S_ISDIR(mode)) | ||
4195 | mode |= S_ISGID; | ||
4196 | } else | ||
4197 | inode->i_gid = current_fsgid(); | ||
4198 | |||
4199 | inode->i_mode = mode; | ||
4200 | inode->i_ino = objectid; | 4548 | inode->i_ino = objectid; |
4201 | inode_set_bytes(inode, 0); | 4549 | inode_set_bytes(inode, 0); |
4202 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 4550 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
@@ -4292,12 +4640,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
4292 | } | 4640 | } |
4293 | 4641 | ||
4294 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, | 4642 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, |
4295 | struct dentry *dentry, struct inode *inode, | 4643 | struct inode *dir, struct dentry *dentry, |
4296 | int backref, u64 index) | 4644 | struct inode *inode, int backref, u64 index) |
4297 | { | 4645 | { |
4298 | int err = btrfs_add_link(trans, dentry->d_parent->d_inode, | 4646 | int err = btrfs_add_link(trans, dir, inode, |
4299 | inode, dentry->d_name.name, | 4647 | dentry->d_name.name, dentry->d_name.len, |
4300 | dentry->d_name.len, backref, index); | 4648 | backref, index); |
4301 | if (!err) { | 4649 | if (!err) { |
4302 | d_instantiate(dentry, inode); | 4650 | d_instantiate(dentry, inode); |
4303 | return 0; | 4651 | return 0; |
@@ -4322,29 +4670,23 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4322 | if (!new_valid_dev(rdev)) | 4670 | if (!new_valid_dev(rdev)) |
4323 | return -EINVAL; | 4671 | return -EINVAL; |
4324 | 4672 | ||
4673 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4674 | if (err) | ||
4675 | return err; | ||
4676 | |||
4325 | /* | 4677 | /* |
4326 | * 2 for inode item and ref | 4678 | * 2 for inode item and ref |
4327 | * 2 for dir items | 4679 | * 2 for dir items |
4328 | * 1 for xattr if selinux is on | 4680 | * 1 for xattr if selinux is on |
4329 | */ | 4681 | */ |
4330 | err = btrfs_reserve_metadata_space(root, 5); | 4682 | trans = btrfs_start_transaction(root, 5); |
4331 | if (err) | 4683 | if (IS_ERR(trans)) |
4332 | return err; | 4684 | return PTR_ERR(trans); |
4333 | 4685 | ||
4334 | trans = btrfs_start_transaction(root, 1); | ||
4335 | if (!trans) | ||
4336 | goto fail; | ||
4337 | btrfs_set_trans_block_group(trans, dir); | 4686 | btrfs_set_trans_block_group(trans, dir); |
4338 | 4687 | ||
4339 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4340 | if (err) { | ||
4341 | err = -ENOSPC; | ||
4342 | goto out_unlock; | ||
4343 | } | ||
4344 | |||
4345 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4688 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4346 | dentry->d_name.len, | 4689 | dentry->d_name.len, dir->i_ino, objectid, |
4347 | dentry->d_parent->d_inode->i_ino, objectid, | ||
4348 | BTRFS_I(dir)->block_group, mode, &index); | 4690 | BTRFS_I(dir)->block_group, mode, &index); |
4349 | err = PTR_ERR(inode); | 4691 | err = PTR_ERR(inode); |
4350 | if (IS_ERR(inode)) | 4692 | if (IS_ERR(inode)) |
@@ -4357,7 +4699,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4357 | } | 4699 | } |
4358 | 4700 | ||
4359 | btrfs_set_trans_block_group(trans, inode); | 4701 | btrfs_set_trans_block_group(trans, inode); |
4360 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 4702 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4361 | if (err) | 4703 | if (err) |
4362 | drop_inode = 1; | 4704 | drop_inode = 1; |
4363 | else { | 4705 | else { |
@@ -4370,13 +4712,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4370 | out_unlock: | 4712 | out_unlock: |
4371 | nr = trans->blocks_used; | 4713 | nr = trans->blocks_used; |
4372 | btrfs_end_transaction_throttle(trans, root); | 4714 | btrfs_end_transaction_throttle(trans, root); |
4373 | fail: | 4715 | btrfs_btree_balance_dirty(root, nr); |
4374 | btrfs_unreserve_metadata_space(root, 5); | ||
4375 | if (drop_inode) { | 4716 | if (drop_inode) { |
4376 | inode_dec_link_count(inode); | 4717 | inode_dec_link_count(inode); |
4377 | iput(inode); | 4718 | iput(inode); |
4378 | } | 4719 | } |
4379 | btrfs_btree_balance_dirty(root, nr); | ||
4380 | return err; | 4720 | return err; |
4381 | } | 4721 | } |
4382 | 4722 | ||
@@ -4386,37 +4726,29 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4386 | struct btrfs_trans_handle *trans; | 4726 | struct btrfs_trans_handle *trans; |
4387 | struct btrfs_root *root = BTRFS_I(dir)->root; | 4727 | struct btrfs_root *root = BTRFS_I(dir)->root; |
4388 | struct inode *inode = NULL; | 4728 | struct inode *inode = NULL; |
4389 | int err; | ||
4390 | int drop_inode = 0; | 4729 | int drop_inode = 0; |
4730 | int err; | ||
4391 | unsigned long nr = 0; | 4731 | unsigned long nr = 0; |
4392 | u64 objectid; | 4732 | u64 objectid; |
4393 | u64 index = 0; | 4733 | u64 index = 0; |
4394 | 4734 | ||
4735 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4736 | if (err) | ||
4737 | return err; | ||
4395 | /* | 4738 | /* |
4396 | * 2 for inode item and ref | 4739 | * 2 for inode item and ref |
4397 | * 2 for dir items | 4740 | * 2 for dir items |
4398 | * 1 for xattr if selinux is on | 4741 | * 1 for xattr if selinux is on |
4399 | */ | 4742 | */ |
4400 | err = btrfs_reserve_metadata_space(root, 5); | 4743 | trans = btrfs_start_transaction(root, 5); |
4401 | if (err) | 4744 | if (IS_ERR(trans)) |
4402 | return err; | 4745 | return PTR_ERR(trans); |
4403 | 4746 | ||
4404 | trans = btrfs_start_transaction(root, 1); | ||
4405 | if (!trans) | ||
4406 | goto fail; | ||
4407 | btrfs_set_trans_block_group(trans, dir); | 4747 | btrfs_set_trans_block_group(trans, dir); |
4408 | 4748 | ||
4409 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4410 | if (err) { | ||
4411 | err = -ENOSPC; | ||
4412 | goto out_unlock; | ||
4413 | } | ||
4414 | |||
4415 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4749 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4416 | dentry->d_name.len, | 4750 | dentry->d_name.len, dir->i_ino, objectid, |
4417 | dentry->d_parent->d_inode->i_ino, | 4751 | BTRFS_I(dir)->block_group, mode, &index); |
4418 | objectid, BTRFS_I(dir)->block_group, mode, | ||
4419 | &index); | ||
4420 | err = PTR_ERR(inode); | 4752 | err = PTR_ERR(inode); |
4421 | if (IS_ERR(inode)) | 4753 | if (IS_ERR(inode)) |
4422 | goto out_unlock; | 4754 | goto out_unlock; |
@@ -4428,7 +4760,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4428 | } | 4760 | } |
4429 | 4761 | ||
4430 | btrfs_set_trans_block_group(trans, inode); | 4762 | btrfs_set_trans_block_group(trans, inode); |
4431 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 4763 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4432 | if (err) | 4764 | if (err) |
4433 | drop_inode = 1; | 4765 | drop_inode = 1; |
4434 | else { | 4766 | else { |
@@ -4443,8 +4775,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4443 | out_unlock: | 4775 | out_unlock: |
4444 | nr = trans->blocks_used; | 4776 | nr = trans->blocks_used; |
4445 | btrfs_end_transaction_throttle(trans, root); | 4777 | btrfs_end_transaction_throttle(trans, root); |
4446 | fail: | ||
4447 | btrfs_unreserve_metadata_space(root, 5); | ||
4448 | if (drop_inode) { | 4778 | if (drop_inode) { |
4449 | inode_dec_link_count(inode); | 4779 | inode_dec_link_count(inode); |
4450 | iput(inode); | 4780 | iput(inode); |
@@ -4471,40 +4801,42 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4471 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4801 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4472 | return -EPERM; | 4802 | return -EPERM; |
4473 | 4803 | ||
4474 | /* | ||
4475 | * 1 item for inode ref | ||
4476 | * 2 items for dir items | ||
4477 | */ | ||
4478 | err = btrfs_reserve_metadata_space(root, 3); | ||
4479 | if (err) | ||
4480 | return err; | ||
4481 | |||
4482 | btrfs_inc_nlink(inode); | 4804 | btrfs_inc_nlink(inode); |
4805 | inode->i_ctime = CURRENT_TIME; | ||
4483 | 4806 | ||
4484 | err = btrfs_set_inode_index(dir, &index); | 4807 | err = btrfs_set_inode_index(dir, &index); |
4485 | if (err) | 4808 | if (err) |
4486 | goto fail; | 4809 | goto fail; |
4487 | 4810 | ||
4488 | trans = btrfs_start_transaction(root, 1); | 4811 | /* |
4812 | * 1 item for inode ref | ||
4813 | * 2 items for dir items | ||
4814 | */ | ||
4815 | trans = btrfs_start_transaction(root, 3); | ||
4816 | if (IS_ERR(trans)) { | ||
4817 | err = PTR_ERR(trans); | ||
4818 | goto fail; | ||
4819 | } | ||
4489 | 4820 | ||
4490 | btrfs_set_trans_block_group(trans, dir); | 4821 | btrfs_set_trans_block_group(trans, dir); |
4491 | atomic_inc(&inode->i_count); | 4822 | ihold(inode); |
4492 | 4823 | ||
4493 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4824 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); |
4494 | 4825 | ||
4495 | if (err) { | 4826 | if (err) { |
4496 | drop_inode = 1; | 4827 | drop_inode = 1; |
4497 | } else { | 4828 | } else { |
4829 | struct dentry *parent = dget_parent(dentry); | ||
4498 | btrfs_update_inode_block_group(trans, dir); | 4830 | btrfs_update_inode_block_group(trans, dir); |
4499 | err = btrfs_update_inode(trans, root, inode); | 4831 | err = btrfs_update_inode(trans, root, inode); |
4500 | BUG_ON(err); | 4832 | BUG_ON(err); |
4501 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | 4833 | btrfs_log_new_name(trans, inode, NULL, parent); |
4834 | dput(parent); | ||
4502 | } | 4835 | } |
4503 | 4836 | ||
4504 | nr = trans->blocks_used; | 4837 | nr = trans->blocks_used; |
4505 | btrfs_end_transaction_throttle(trans, root); | 4838 | btrfs_end_transaction_throttle(trans, root); |
4506 | fail: | 4839 | fail: |
4507 | btrfs_unreserve_metadata_space(root, 3); | ||
4508 | if (drop_inode) { | 4840 | if (drop_inode) { |
4509 | inode_dec_link_count(inode); | 4841 | inode_dec_link_count(inode); |
4510 | iput(inode); | 4842 | iput(inode); |
@@ -4524,31 +4856,22 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4524 | u64 index = 0; | 4856 | u64 index = 0; |
4525 | unsigned long nr = 1; | 4857 | unsigned long nr = 1; |
4526 | 4858 | ||
4859 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4860 | if (err) | ||
4861 | return err; | ||
4862 | |||
4527 | /* | 4863 | /* |
4528 | * 2 items for inode and ref | 4864 | * 2 items for inode and ref |
4529 | * 2 items for dir items | 4865 | * 2 items for dir items |
4530 | * 1 for xattr if selinux is on | 4866 | * 1 for xattr if selinux is on |
4531 | */ | 4867 | */ |
4532 | err = btrfs_reserve_metadata_space(root, 5); | 4868 | trans = btrfs_start_transaction(root, 5); |
4533 | if (err) | 4869 | if (IS_ERR(trans)) |
4534 | return err; | 4870 | return PTR_ERR(trans); |
4535 | |||
4536 | trans = btrfs_start_transaction(root, 1); | ||
4537 | if (!trans) { | ||
4538 | err = -ENOMEM; | ||
4539 | goto out_unlock; | ||
4540 | } | ||
4541 | btrfs_set_trans_block_group(trans, dir); | 4871 | btrfs_set_trans_block_group(trans, dir); |
4542 | 4872 | ||
4543 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4544 | if (err) { | ||
4545 | err = -ENOSPC; | ||
4546 | goto out_unlock; | ||
4547 | } | ||
4548 | |||
4549 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4873 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4550 | dentry->d_name.len, | 4874 | dentry->d_name.len, dir->i_ino, objectid, |
4551 | dentry->d_parent->d_inode->i_ino, objectid, | ||
4552 | BTRFS_I(dir)->block_group, S_IFDIR | mode, | 4875 | BTRFS_I(dir)->block_group, S_IFDIR | mode, |
4553 | &index); | 4876 | &index); |
4554 | if (IS_ERR(inode)) { | 4877 | if (IS_ERR(inode)) { |
@@ -4571,9 +4894,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4571 | if (err) | 4894 | if (err) |
4572 | goto out_fail; | 4895 | goto out_fail; |
4573 | 4896 | ||
4574 | err = btrfs_add_link(trans, dentry->d_parent->d_inode, | 4897 | err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, |
4575 | inode, dentry->d_name.name, | 4898 | dentry->d_name.len, 0, index); |
4576 | dentry->d_name.len, 0, index); | ||
4577 | if (err) | 4899 | if (err) |
4578 | goto out_fail; | 4900 | goto out_fail; |
4579 | 4901 | ||
@@ -4585,9 +4907,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4585 | out_fail: | 4907 | out_fail: |
4586 | nr = trans->blocks_used; | 4908 | nr = trans->blocks_used; |
4587 | btrfs_end_transaction_throttle(trans, root); | 4909 | btrfs_end_transaction_throttle(trans, root); |
4588 | |||
4589 | out_unlock: | ||
4590 | btrfs_unreserve_metadata_space(root, 5); | ||
4591 | if (drop_on_err) | 4910 | if (drop_on_err) |
4592 | iput(inode); | 4911 | iput(inode); |
4593 | btrfs_btree_balance_dirty(root, nr); | 4912 | btrfs_btree_balance_dirty(root, nr); |
@@ -4628,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4628 | size_t max_size; | 4947 | size_t max_size; |
4629 | unsigned long inline_size; | 4948 | unsigned long inline_size; |
4630 | unsigned long ptr; | 4949 | unsigned long ptr; |
4950 | int compress_type; | ||
4631 | 4951 | ||
4632 | WARN_ON(pg_offset != 0); | 4952 | WARN_ON(pg_offset != 0); |
4953 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
4633 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | 4954 | max_size = btrfs_file_extent_ram_bytes(leaf, item); |
4634 | inline_size = btrfs_file_extent_inline_item_len(leaf, | 4955 | inline_size = btrfs_file_extent_inline_item_len(leaf, |
4635 | btrfs_item_nr(leaf, path->slots[0])); | 4956 | btrfs_item_nr(leaf, path->slots[0])); |
@@ -4639,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4639 | read_extent_buffer(leaf, tmp, ptr, inline_size); | 4960 | read_extent_buffer(leaf, tmp, ptr, inline_size); |
4640 | 4961 | ||
4641 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 4962 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
4642 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | 4963 | ret = btrfs_decompress(compress_type, tmp, page, |
4643 | inline_size, max_size); | 4964 | extent_offset, inline_size, max_size); |
4644 | if (ret) { | 4965 | if (ret) { |
4645 | char *kaddr = kmap_atomic(page, KM_USER0); | 4966 | char *kaddr = kmap_atomic(page, KM_USER0); |
4646 | unsigned long copy_size = min_t(u64, | 4967 | unsigned long copy_size = min_t(u64, |
@@ -4682,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4682 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5003 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4683 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 5004 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
4684 | struct btrfs_trans_handle *trans = NULL; | 5005 | struct btrfs_trans_handle *trans = NULL; |
4685 | int compressed; | 5006 | int compress_type; |
4686 | 5007 | ||
4687 | again: | 5008 | again: |
4688 | read_lock(&em_tree->lock); | 5009 | read_lock(&em_tree->lock); |
@@ -4741,7 +5062,7 @@ again: | |||
4741 | 5062 | ||
4742 | found_type = btrfs_file_extent_type(leaf, item); | 5063 | found_type = btrfs_file_extent_type(leaf, item); |
4743 | extent_start = found_key.offset; | 5064 | extent_start = found_key.offset; |
4744 | compressed = btrfs_file_extent_compression(leaf, item); | 5065 | compress_type = btrfs_file_extent_compression(leaf, item); |
4745 | if (found_type == BTRFS_FILE_EXTENT_REG || | 5066 | if (found_type == BTRFS_FILE_EXTENT_REG || |
4746 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 5067 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
4747 | extent_end = extent_start + | 5068 | extent_end = extent_start + |
@@ -4787,8 +5108,9 @@ again: | |||
4787 | em->block_start = EXTENT_MAP_HOLE; | 5108 | em->block_start = EXTENT_MAP_HOLE; |
4788 | goto insert; | 5109 | goto insert; |
4789 | } | 5110 | } |
4790 | if (compressed) { | 5111 | if (compress_type != BTRFS_COMPRESS_NONE) { |
4791 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5112 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5113 | em->compress_type = compress_type; | ||
4792 | em->block_start = bytenr; | 5114 | em->block_start = bytenr; |
4793 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5115 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, |
4794 | item); | 5116 | item); |
@@ -4822,12 +5144,14 @@ again: | |||
4822 | em->len = (copy_size + root->sectorsize - 1) & | 5144 | em->len = (copy_size + root->sectorsize - 1) & |
4823 | ~((u64)root->sectorsize - 1); | 5145 | ~((u64)root->sectorsize - 1); |
4824 | em->orig_start = EXTENT_MAP_INLINE; | 5146 | em->orig_start = EXTENT_MAP_INLINE; |
4825 | if (compressed) | 5147 | if (compress_type) { |
4826 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5148 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5149 | em->compress_type = compress_type; | ||
5150 | } | ||
4827 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 5151 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
4828 | if (create == 0 && !PageUptodate(page)) { | 5152 | if (create == 0 && !PageUptodate(page)) { |
4829 | if (btrfs_file_extent_compression(leaf, item) == | 5153 | if (btrfs_file_extent_compression(leaf, item) != |
4830 | BTRFS_COMPRESS_ZLIB) { | 5154 | BTRFS_COMPRESS_NONE) { |
4831 | ret = uncompress_inline(path, inode, page, | 5155 | ret = uncompress_inline(path, inode, page, |
4832 | pg_offset, | 5156 | pg_offset, |
4833 | extent_offset, item); | 5157 | extent_offset, item); |
@@ -4845,6 +5169,7 @@ again: | |||
4845 | } | 5169 | } |
4846 | flush_dcache_page(page); | 5170 | flush_dcache_page(page); |
4847 | } else if (create && PageUptodate(page)) { | 5171 | } else if (create && PageUptodate(page)) { |
5172 | WARN_ON(1); | ||
4848 | if (!trans) { | 5173 | if (!trans) { |
4849 | kunmap(page); | 5174 | kunmap(page); |
4850 | free_extent_map(em); | 5175 | free_extent_map(em); |
@@ -4941,11 +5266,823 @@ out: | |||
4941 | return em; | 5266 | return em; |
4942 | } | 5267 | } |
4943 | 5268 | ||
5269 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | ||
5270 | u64 start, u64 len) | ||
5271 | { | ||
5272 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5273 | struct btrfs_trans_handle *trans; | ||
5274 | struct extent_map *em; | ||
5275 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
5276 | struct btrfs_key ins; | ||
5277 | u64 alloc_hint; | ||
5278 | int ret; | ||
5279 | |||
5280 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
5281 | |||
5282 | trans = btrfs_join_transaction(root, 0); | ||
5283 | if (!trans) | ||
5284 | return ERR_PTR(-ENOMEM); | ||
5285 | |||
5286 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5287 | |||
5288 | alloc_hint = get_extent_allocation_hint(inode, start, len); | ||
5289 | ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, | ||
5290 | alloc_hint, (u64)-1, &ins, 1); | ||
5291 | if (ret) { | ||
5292 | em = ERR_PTR(ret); | ||
5293 | goto out; | ||
5294 | } | ||
5295 | |||
5296 | em = alloc_extent_map(GFP_NOFS); | ||
5297 | if (!em) { | ||
5298 | em = ERR_PTR(-ENOMEM); | ||
5299 | goto out; | ||
5300 | } | ||
5301 | |||
5302 | em->start = start; | ||
5303 | em->orig_start = em->start; | ||
5304 | em->len = ins.offset; | ||
5305 | |||
5306 | em->block_start = ins.objectid; | ||
5307 | em->block_len = ins.offset; | ||
5308 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
5309 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
5310 | |||
5311 | while (1) { | ||
5312 | write_lock(&em_tree->lock); | ||
5313 | ret = add_extent_mapping(em_tree, em); | ||
5314 | write_unlock(&em_tree->lock); | ||
5315 | if (ret != -EEXIST) | ||
5316 | break; | ||
5317 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
5318 | } | ||
5319 | |||
5320 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
5321 | ins.offset, ins.offset, 0); | ||
5322 | if (ret) { | ||
5323 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | ||
5324 | em = ERR_PTR(ret); | ||
5325 | } | ||
5326 | out: | ||
5327 | btrfs_end_transaction(trans, root); | ||
5328 | return em; | ||
5329 | } | ||
5330 | |||
5331 | /* | ||
5332 | * returns 1 when the nocow is safe, < 1 on error, 0 if the | ||
5333 | * block must be cow'd | ||
5334 | */ | ||
5335 | static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | ||
5336 | struct inode *inode, u64 offset, u64 len) | ||
5337 | { | ||
5338 | struct btrfs_path *path; | ||
5339 | int ret; | ||
5340 | struct extent_buffer *leaf; | ||
5341 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5342 | struct btrfs_file_extent_item *fi; | ||
5343 | struct btrfs_key key; | ||
5344 | u64 disk_bytenr; | ||
5345 | u64 backref_offset; | ||
5346 | u64 extent_end; | ||
5347 | u64 num_bytes; | ||
5348 | int slot; | ||
5349 | int found_type; | ||
5350 | |||
5351 | path = btrfs_alloc_path(); | ||
5352 | if (!path) | ||
5353 | return -ENOMEM; | ||
5354 | |||
5355 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | ||
5356 | offset, 0); | ||
5357 | if (ret < 0) | ||
5358 | goto out; | ||
5359 | |||
5360 | slot = path->slots[0]; | ||
5361 | if (ret == 1) { | ||
5362 | if (slot == 0) { | ||
5363 | /* can't find the item, must cow */ | ||
5364 | ret = 0; | ||
5365 | goto out; | ||
5366 | } | ||
5367 | slot--; | ||
5368 | } | ||
5369 | ret = 0; | ||
5370 | leaf = path->nodes[0]; | ||
5371 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
5372 | if (key.objectid != inode->i_ino || | ||
5373 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
5374 | /* not our file or wrong item type, must cow */ | ||
5375 | goto out; | ||
5376 | } | ||
5377 | |||
5378 | if (key.offset > offset) { | ||
5379 | /* Wrong offset, must cow */ | ||
5380 | goto out; | ||
5381 | } | ||
5382 | |||
5383 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
5384 | found_type = btrfs_file_extent_type(leaf, fi); | ||
5385 | if (found_type != BTRFS_FILE_EXTENT_REG && | ||
5386 | found_type != BTRFS_FILE_EXTENT_PREALLOC) { | ||
5387 | /* not a regular extent, must cow */ | ||
5388 | goto out; | ||
5389 | } | ||
5390 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
5391 | backref_offset = btrfs_file_extent_offset(leaf, fi); | ||
5392 | |||
5393 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | ||
5394 | if (extent_end < offset + len) { | ||
5395 | /* extent doesn't include our full range, must cow */ | ||
5396 | goto out; | ||
5397 | } | ||
5398 | |||
5399 | if (btrfs_extent_readonly(root, disk_bytenr)) | ||
5400 | goto out; | ||
5401 | |||
5402 | /* | ||
5403 | * look for other files referencing this extent, if we | ||
5404 | * find any we must cow | ||
5405 | */ | ||
5406 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | ||
5407 | key.offset - backref_offset, disk_bytenr)) | ||
5408 | goto out; | ||
5409 | |||
5410 | /* | ||
5411 | * adjust disk_bytenr and num_bytes to cover just the bytes | ||
5412 | * in this extent we are about to write. If there | ||
5413 | * are any csums in that range we have to cow in order | ||
5414 | * to keep the csums correct | ||
5415 | */ | ||
5416 | disk_bytenr += backref_offset; | ||
5417 | disk_bytenr += offset - key.offset; | ||
5418 | num_bytes = min(offset + len, extent_end) - offset; | ||
5419 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | ||
5420 | goto out; | ||
5421 | /* | ||
5422 | * all of the above have passed, it is safe to overwrite this extent | ||
5423 | * without cow | ||
5424 | */ | ||
5425 | ret = 1; | ||
5426 | out: | ||
5427 | btrfs_free_path(path); | ||
5428 | return ret; | ||
5429 | } | ||
5430 | |||
5431 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | ||
5432 | struct buffer_head *bh_result, int create) | ||
5433 | { | ||
5434 | struct extent_map *em; | ||
5435 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5436 | u64 start = iblock << inode->i_blkbits; | ||
5437 | u64 len = bh_result->b_size; | ||
5438 | struct btrfs_trans_handle *trans; | ||
5439 | |||
5440 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
5441 | if (IS_ERR(em)) | ||
5442 | return PTR_ERR(em); | ||
5443 | |||
5444 | /* | ||
5445 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | ||
5446 | * io. INLINE is special, and we could probably kludge it in here, but | ||
5447 | * it's still buffered so for safety lets just fall back to the generic | ||
5448 | * buffered path. | ||
5449 | * | ||
5450 | * For COMPRESSED we _have_ to read the entire extent in so we can | ||
5451 | * decompress it, so there will be buffering required no matter what we | ||
5452 | * do, so go ahead and fallback to buffered. | ||
5453 | * | ||
5454 | * We return -ENOTBLK because thats what makes DIO go ahead and go back | ||
5455 | * to buffered IO. Don't blame me, this is the price we pay for using | ||
5456 | * the generic code. | ||
5457 | */ | ||
5458 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | ||
5459 | em->block_start == EXTENT_MAP_INLINE) { | ||
5460 | free_extent_map(em); | ||
5461 | return -ENOTBLK; | ||
5462 | } | ||
5463 | |||
5464 | /* Just a good old fashioned hole, return */ | ||
5465 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | ||
5466 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
5467 | free_extent_map(em); | ||
5468 | /* DIO will do one hole at a time, so just unlock a sector */ | ||
5469 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | ||
5470 | start + root->sectorsize - 1, GFP_NOFS); | ||
5471 | return 0; | ||
5472 | } | ||
5473 | |||
5474 | /* | ||
5475 | * We don't allocate a new extent in the following cases | ||
5476 | * | ||
5477 | * 1) The inode is marked as NODATACOW. In this case we'll just use the | ||
5478 | * existing extent. | ||
5479 | * 2) The extent is marked as PREALLOC. We're good to go here and can | ||
5480 | * just use the extent. | ||
5481 | * | ||
5482 | */ | ||
5483 | if (!create) { | ||
5484 | len = em->len - (start - em->start); | ||
5485 | goto map; | ||
5486 | } | ||
5487 | |||
5488 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | ||
5489 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
5490 | em->block_start != EXTENT_MAP_HOLE)) { | ||
5491 | int type; | ||
5492 | int ret; | ||
5493 | u64 block_start; | ||
5494 | |||
5495 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5496 | type = BTRFS_ORDERED_PREALLOC; | ||
5497 | else | ||
5498 | type = BTRFS_ORDERED_NOCOW; | ||
5499 | len = min(len, em->len - (start - em->start)); | ||
5500 | block_start = em->block_start + (start - em->start); | ||
5501 | |||
5502 | /* | ||
5503 | * we're not going to log anything, but we do need | ||
5504 | * to make sure the current transaction stays open | ||
5505 | * while we look for nocow cross refs | ||
5506 | */ | ||
5507 | trans = btrfs_join_transaction(root, 0); | ||
5508 | if (!trans) | ||
5509 | goto must_cow; | ||
5510 | |||
5511 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | ||
5512 | ret = btrfs_add_ordered_extent_dio(inode, start, | ||
5513 | block_start, len, len, type); | ||
5514 | btrfs_end_transaction(trans, root); | ||
5515 | if (ret) { | ||
5516 | free_extent_map(em); | ||
5517 | return ret; | ||
5518 | } | ||
5519 | goto unlock; | ||
5520 | } | ||
5521 | btrfs_end_transaction(trans, root); | ||
5522 | } | ||
5523 | must_cow: | ||
5524 | /* | ||
5525 | * this will cow the extent, reset the len in case we changed | ||
5526 | * it above | ||
5527 | */ | ||
5528 | len = bh_result->b_size; | ||
5529 | free_extent_map(em); | ||
5530 | em = btrfs_new_extent_direct(inode, start, len); | ||
5531 | if (IS_ERR(em)) | ||
5532 | return PTR_ERR(em); | ||
5533 | len = min(len, em->len - (start - em->start)); | ||
5534 | unlock: | ||
5535 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
5536 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
5537 | 0, NULL, GFP_NOFS); | ||
5538 | map: | ||
5539 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | ||
5540 | inode->i_blkbits; | ||
5541 | bh_result->b_size = len; | ||
5542 | bh_result->b_bdev = em->bdev; | ||
5543 | set_buffer_mapped(bh_result); | ||
5544 | if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5545 | set_buffer_new(bh_result); | ||
5546 | |||
5547 | free_extent_map(em); | ||
5548 | |||
5549 | return 0; | ||
5550 | } | ||
5551 | |||
5552 | struct btrfs_dio_private { | ||
5553 | struct inode *inode; | ||
5554 | u64 logical_offset; | ||
5555 | u64 disk_bytenr; | ||
5556 | u64 bytes; | ||
5557 | u32 *csums; | ||
5558 | void *private; | ||
5559 | |||
5560 | /* number of bios pending for this dio */ | ||
5561 | atomic_t pending_bios; | ||
5562 | |||
5563 | /* IO errors */ | ||
5564 | int errors; | ||
5565 | |||
5566 | struct bio *orig_bio; | ||
5567 | }; | ||
5568 | |||
5569 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
5570 | { | ||
5571 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5572 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
5573 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5574 | struct inode *inode = dip->inode; | ||
5575 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5576 | u64 start; | ||
5577 | u32 *private = dip->csums; | ||
5578 | |||
5579 | start = dip->logical_offset; | ||
5580 | do { | ||
5581 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | ||
5582 | struct page *page = bvec->bv_page; | ||
5583 | char *kaddr; | ||
5584 | u32 csum = ~(u32)0; | ||
5585 | unsigned long flags; | ||
5586 | |||
5587 | local_irq_save(flags); | ||
5588 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
5589 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | ||
5590 | csum, bvec->bv_len); | ||
5591 | btrfs_csum_final(csum, (char *)&csum); | ||
5592 | kunmap_atomic(kaddr, KM_IRQ0); | ||
5593 | local_irq_restore(flags); | ||
5594 | |||
5595 | flush_dcache_page(bvec->bv_page); | ||
5596 | if (csum != *private) { | ||
5597 | printk(KERN_ERR "btrfs csum failed ino %lu off" | ||
5598 | " %llu csum %u private %u\n", | ||
5599 | inode->i_ino, (unsigned long long)start, | ||
5600 | csum, *private); | ||
5601 | err = -EIO; | ||
5602 | } | ||
5603 | } | ||
5604 | |||
5605 | start += bvec->bv_len; | ||
5606 | private++; | ||
5607 | bvec++; | ||
5608 | } while (bvec <= bvec_end); | ||
5609 | |||
5610 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | ||
5611 | dip->logical_offset + dip->bytes - 1, GFP_NOFS); | ||
5612 | bio->bi_private = dip->private; | ||
5613 | |||
5614 | kfree(dip->csums); | ||
5615 | kfree(dip); | ||
5616 | dio_end_io(bio, err); | ||
5617 | } | ||
5618 | |||
5619 | static void btrfs_endio_direct_write(struct bio *bio, int err) | ||
5620 | { | ||
5621 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5622 | struct inode *inode = dip->inode; | ||
5623 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5624 | struct btrfs_trans_handle *trans; | ||
5625 | struct btrfs_ordered_extent *ordered = NULL; | ||
5626 | struct extent_state *cached_state = NULL; | ||
5627 | u64 ordered_offset = dip->logical_offset; | ||
5628 | u64 ordered_bytes = dip->bytes; | ||
5629 | int ret; | ||
5630 | |||
5631 | if (err) | ||
5632 | goto out_done; | ||
5633 | again: | ||
5634 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, | ||
5635 | &ordered_offset, | ||
5636 | ordered_bytes); | ||
5637 | if (!ret) | ||
5638 | goto out_test; | ||
5639 | |||
5640 | BUG_ON(!ordered); | ||
5641 | |||
5642 | trans = btrfs_join_transaction(root, 1); | ||
5643 | if (!trans) { | ||
5644 | err = -ENOMEM; | ||
5645 | goto out; | ||
5646 | } | ||
5647 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5648 | |||
5649 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
5650 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5651 | if (!ret) | ||
5652 | ret = btrfs_update_inode(trans, root, inode); | ||
5653 | err = ret; | ||
5654 | goto out; | ||
5655 | } | ||
5656 | |||
5657 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5658 | ordered->file_offset + ordered->len - 1, 0, | ||
5659 | &cached_state, GFP_NOFS); | ||
5660 | |||
5661 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
5662 | ret = btrfs_mark_extent_written(trans, inode, | ||
5663 | ordered->file_offset, | ||
5664 | ordered->file_offset + | ||
5665 | ordered->len); | ||
5666 | if (ret) { | ||
5667 | err = ret; | ||
5668 | goto out_unlock; | ||
5669 | } | ||
5670 | } else { | ||
5671 | ret = insert_reserved_file_extent(trans, inode, | ||
5672 | ordered->file_offset, | ||
5673 | ordered->start, | ||
5674 | ordered->disk_len, | ||
5675 | ordered->len, | ||
5676 | ordered->len, | ||
5677 | 0, 0, 0, | ||
5678 | BTRFS_FILE_EXTENT_REG); | ||
5679 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
5680 | ordered->file_offset, ordered->len); | ||
5681 | if (ret) { | ||
5682 | err = ret; | ||
5683 | WARN_ON(1); | ||
5684 | goto out_unlock; | ||
5685 | } | ||
5686 | } | ||
5687 | |||
5688 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
5689 | btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5690 | btrfs_update_inode(trans, root, inode); | ||
5691 | out_unlock: | ||
5692 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5693 | ordered->file_offset + ordered->len - 1, | ||
5694 | &cached_state, GFP_NOFS); | ||
5695 | out: | ||
5696 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
5697 | btrfs_end_transaction(trans, root); | ||
5698 | ordered_offset = ordered->file_offset + ordered->len; | ||
5699 | btrfs_put_ordered_extent(ordered); | ||
5700 | btrfs_put_ordered_extent(ordered); | ||
5701 | |||
5702 | out_test: | ||
5703 | /* | ||
5704 | * our bio might span multiple ordered extents. If we haven't | ||
5705 | * completed the accounting for the whole dio, go back and try again | ||
5706 | */ | ||
5707 | if (ordered_offset < dip->logical_offset + dip->bytes) { | ||
5708 | ordered_bytes = dip->logical_offset + dip->bytes - | ||
5709 | ordered_offset; | ||
5710 | goto again; | ||
5711 | } | ||
5712 | out_done: | ||
5713 | bio->bi_private = dip->private; | ||
5714 | |||
5715 | kfree(dip->csums); | ||
5716 | kfree(dip); | ||
5717 | dio_end_io(bio, err); | ||
5718 | } | ||
5719 | |||
5720 | static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | ||
5721 | struct bio *bio, int mirror_num, | ||
5722 | unsigned long bio_flags, u64 offset) | ||
5723 | { | ||
5724 | int ret; | ||
5725 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5726 | ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); | ||
5727 | BUG_ON(ret); | ||
5728 | return 0; | ||
5729 | } | ||
5730 | |||
5731 | static void btrfs_end_dio_bio(struct bio *bio, int err) | ||
5732 | { | ||
5733 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5734 | |||
5735 | if (err) { | ||
5736 | printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " | ||
5737 | "sector %#Lx len %u err no %d\n", | ||
5738 | dip->inode->i_ino, bio->bi_rw, | ||
5739 | (unsigned long long)bio->bi_sector, bio->bi_size, err); | ||
5740 | dip->errors = 1; | ||
5741 | |||
5742 | /* | ||
5743 | * before atomic variable goto zero, we must make sure | ||
5744 | * dip->errors is perceived to be set. | ||
5745 | */ | ||
5746 | smp_mb__before_atomic_dec(); | ||
5747 | } | ||
5748 | |||
5749 | /* if there are more bios still pending for this dio, just exit */ | ||
5750 | if (!atomic_dec_and_test(&dip->pending_bios)) | ||
5751 | goto out; | ||
5752 | |||
5753 | if (dip->errors) | ||
5754 | bio_io_error(dip->orig_bio); | ||
5755 | else { | ||
5756 | set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); | ||
5757 | bio_endio(dip->orig_bio, 0); | ||
5758 | } | ||
5759 | out: | ||
5760 | bio_put(bio); | ||
5761 | } | ||
5762 | |||
5763 | static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | ||
5764 | u64 first_sector, gfp_t gfp_flags) | ||
5765 | { | ||
5766 | int nr_vecs = bio_get_nr_vecs(bdev); | ||
5767 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); | ||
5768 | } | ||
5769 | |||
5770 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | ||
5771 | int rw, u64 file_offset, int skip_sum, | ||
5772 | u32 *csums) | ||
5773 | { | ||
5774 | int write = rw & REQ_WRITE; | ||
5775 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5776 | int ret; | ||
5777 | |||
5778 | bio_get(bio); | ||
5779 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5780 | if (ret) | ||
5781 | goto err; | ||
5782 | |||
5783 | if (write && !skip_sum) { | ||
5784 | ret = btrfs_wq_submit_bio(root->fs_info, | ||
5785 | inode, rw, bio, 0, 0, | ||
5786 | file_offset, | ||
5787 | __btrfs_submit_bio_start_direct_io, | ||
5788 | __btrfs_submit_bio_done); | ||
5789 | goto err; | ||
5790 | } else if (!skip_sum) | ||
5791 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5792 | file_offset, csums); | ||
5793 | |||
5794 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5795 | err: | ||
5796 | bio_put(bio); | ||
5797 | return ret; | ||
5798 | } | ||
5799 | |||
5800 | static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | ||
5801 | int skip_sum) | ||
5802 | { | ||
5803 | struct inode *inode = dip->inode; | ||
5804 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5805 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
5806 | struct bio *bio; | ||
5807 | struct bio *orig_bio = dip->orig_bio; | ||
5808 | struct bio_vec *bvec = orig_bio->bi_io_vec; | ||
5809 | u64 start_sector = orig_bio->bi_sector; | ||
5810 | u64 file_offset = dip->logical_offset; | ||
5811 | u64 submit_len = 0; | ||
5812 | u64 map_length; | ||
5813 | int nr_pages = 0; | ||
5814 | u32 *csums = dip->csums; | ||
5815 | int ret = 0; | ||
5816 | |||
5817 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | ||
5818 | if (!bio) | ||
5819 | return -ENOMEM; | ||
5820 | bio->bi_private = dip; | ||
5821 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5822 | atomic_inc(&dip->pending_bios); | ||
5823 | |||
5824 | map_length = orig_bio->bi_size; | ||
5825 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5826 | &map_length, NULL, 0); | ||
5827 | if (ret) { | ||
5828 | bio_put(bio); | ||
5829 | return -EIO; | ||
5830 | } | ||
5831 | |||
5832 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | ||
5833 | if (unlikely(map_length < submit_len + bvec->bv_len || | ||
5834 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | ||
5835 | bvec->bv_offset) < bvec->bv_len)) { | ||
5836 | /* | ||
5837 | * inc the count before we submit the bio so | ||
5838 | * we know the end IO handler won't happen before | ||
5839 | * we inc the count. Otherwise, the dip might get freed | ||
5840 | * before we're done setting it up | ||
5841 | */ | ||
5842 | atomic_inc(&dip->pending_bios); | ||
5843 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | ||
5844 | file_offset, skip_sum, | ||
5845 | csums); | ||
5846 | if (ret) { | ||
5847 | bio_put(bio); | ||
5848 | atomic_dec(&dip->pending_bios); | ||
5849 | goto out_err; | ||
5850 | } | ||
5851 | |||
5852 | if (!skip_sum) | ||
5853 | csums = csums + nr_pages; | ||
5854 | start_sector += submit_len >> 9; | ||
5855 | file_offset += submit_len; | ||
5856 | |||
5857 | submit_len = 0; | ||
5858 | nr_pages = 0; | ||
5859 | |||
5860 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, | ||
5861 | start_sector, GFP_NOFS); | ||
5862 | if (!bio) | ||
5863 | goto out_err; | ||
5864 | bio->bi_private = dip; | ||
5865 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5866 | |||
5867 | map_length = orig_bio->bi_size; | ||
5868 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5869 | &map_length, NULL, 0); | ||
5870 | if (ret) { | ||
5871 | bio_put(bio); | ||
5872 | goto out_err; | ||
5873 | } | ||
5874 | } else { | ||
5875 | submit_len += bvec->bv_len; | ||
5876 | nr_pages ++; | ||
5877 | bvec++; | ||
5878 | } | ||
5879 | } | ||
5880 | |||
5881 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | ||
5882 | csums); | ||
5883 | if (!ret) | ||
5884 | return 0; | ||
5885 | |||
5886 | bio_put(bio); | ||
5887 | out_err: | ||
5888 | dip->errors = 1; | ||
5889 | /* | ||
5890 | * before atomic variable goto zero, we must | ||
5891 | * make sure dip->errors is perceived to be set. | ||
5892 | */ | ||
5893 | smp_mb__before_atomic_dec(); | ||
5894 | if (atomic_dec_and_test(&dip->pending_bios)) | ||
5895 | bio_io_error(dip->orig_bio); | ||
5896 | |||
5897 | /* bio_end_io() will handle error, so we needn't return it */ | ||
5898 | return 0; | ||
5899 | } | ||
5900 | |||
5901 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | ||
5902 | loff_t file_offset) | ||
5903 | { | ||
5904 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5905 | struct btrfs_dio_private *dip; | ||
5906 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5907 | int skip_sum; | ||
5908 | int write = rw & REQ_WRITE; | ||
5909 | int ret = 0; | ||
5910 | |||
5911 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
5912 | |||
5913 | dip = kmalloc(sizeof(*dip), GFP_NOFS); | ||
5914 | if (!dip) { | ||
5915 | ret = -ENOMEM; | ||
5916 | goto free_ordered; | ||
5917 | } | ||
5918 | dip->csums = NULL; | ||
5919 | |||
5920 | if (!skip_sum) { | ||
5921 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
5922 | if (!dip->csums) { | ||
5923 | ret = -ENOMEM; | ||
5924 | goto free_ordered; | ||
5925 | } | ||
5926 | } | ||
5927 | |||
5928 | dip->private = bio->bi_private; | ||
5929 | dip->inode = inode; | ||
5930 | dip->logical_offset = file_offset; | ||
5931 | |||
5932 | dip->bytes = 0; | ||
5933 | do { | ||
5934 | dip->bytes += bvec->bv_len; | ||
5935 | bvec++; | ||
5936 | } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); | ||
5937 | |||
5938 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | ||
5939 | bio->bi_private = dip; | ||
5940 | dip->errors = 0; | ||
5941 | dip->orig_bio = bio; | ||
5942 | atomic_set(&dip->pending_bios, 0); | ||
5943 | |||
5944 | if (write) | ||
5945 | bio->bi_end_io = btrfs_endio_direct_write; | ||
5946 | else | ||
5947 | bio->bi_end_io = btrfs_endio_direct_read; | ||
5948 | |||
5949 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); | ||
5950 | if (!ret) | ||
5951 | return; | ||
5952 | free_ordered: | ||
5953 | /* | ||
5954 | * If this is a write, we need to clean up the reserved space and kill | ||
5955 | * the ordered extent. | ||
5956 | */ | ||
5957 | if (write) { | ||
5958 | struct btrfs_ordered_extent *ordered; | ||
5959 | ordered = btrfs_lookup_ordered_extent(inode, file_offset); | ||
5960 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | ||
5961 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | ||
5962 | btrfs_free_reserved_extent(root, ordered->start, | ||
5963 | ordered->disk_len); | ||
5964 | btrfs_put_ordered_extent(ordered); | ||
5965 | btrfs_put_ordered_extent(ordered); | ||
5966 | } | ||
5967 | bio_endio(bio, ret); | ||
5968 | } | ||
5969 | |||
5970 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, | ||
5971 | const struct iovec *iov, loff_t offset, | ||
5972 | unsigned long nr_segs) | ||
5973 | { | ||
5974 | int seg; | ||
5975 | size_t size; | ||
5976 | unsigned long addr; | ||
5977 | unsigned blocksize_mask = root->sectorsize - 1; | ||
5978 | ssize_t retval = -EINVAL; | ||
5979 | loff_t end = offset; | ||
5980 | |||
5981 | if (offset & blocksize_mask) | ||
5982 | goto out; | ||
5983 | |||
5984 | /* Check the memory alignment. Blocks cannot straddle pages */ | ||
5985 | for (seg = 0; seg < nr_segs; seg++) { | ||
5986 | addr = (unsigned long)iov[seg].iov_base; | ||
5987 | size = iov[seg].iov_len; | ||
5988 | end += size; | ||
5989 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
5990 | goto out; | ||
5991 | } | ||
5992 | retval = 0; | ||
5993 | out: | ||
5994 | return retval; | ||
5995 | } | ||
4944 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 5996 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
4945 | const struct iovec *iov, loff_t offset, | 5997 | const struct iovec *iov, loff_t offset, |
4946 | unsigned long nr_segs) | 5998 | unsigned long nr_segs) |
4947 | { | 5999 | { |
4948 | return -EINVAL; | 6000 | struct file *file = iocb->ki_filp; |
6001 | struct inode *inode = file->f_mapping->host; | ||
6002 | struct btrfs_ordered_extent *ordered; | ||
6003 | struct extent_state *cached_state = NULL; | ||
6004 | u64 lockstart, lockend; | ||
6005 | ssize_t ret; | ||
6006 | int writing = rw & WRITE; | ||
6007 | int write_bits = 0; | ||
6008 | size_t count = iov_length(iov, nr_segs); | ||
6009 | |||
6010 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | ||
6011 | offset, nr_segs)) { | ||
6012 | return 0; | ||
6013 | } | ||
6014 | |||
6015 | lockstart = offset; | ||
6016 | lockend = offset + count - 1; | ||
6017 | |||
6018 | if (writing) { | ||
6019 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
6020 | if (ret) | ||
6021 | goto out; | ||
6022 | } | ||
6023 | |||
6024 | while (1) { | ||
6025 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6026 | 0, &cached_state, GFP_NOFS); | ||
6027 | /* | ||
6028 | * We're concerned with the entire range that we're going to be | ||
6029 | * doing DIO to, so we need to make sure theres no ordered | ||
6030 | * extents in this range. | ||
6031 | */ | ||
6032 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
6033 | lockend - lockstart + 1); | ||
6034 | if (!ordered) | ||
6035 | break; | ||
6036 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6037 | &cached_state, GFP_NOFS); | ||
6038 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
6039 | btrfs_put_ordered_extent(ordered); | ||
6040 | cond_resched(); | ||
6041 | } | ||
6042 | |||
6043 | /* | ||
6044 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
6045 | * the dirty or uptodate bits | ||
6046 | */ | ||
6047 | if (writing) { | ||
6048 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
6049 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6050 | EXTENT_DELALLOC, 0, NULL, &cached_state, | ||
6051 | GFP_NOFS); | ||
6052 | if (ret) { | ||
6053 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6054 | lockend, EXTENT_LOCKED | write_bits, | ||
6055 | 1, 0, &cached_state, GFP_NOFS); | ||
6056 | goto out; | ||
6057 | } | ||
6058 | } | ||
6059 | |||
6060 | free_extent_state(cached_state); | ||
6061 | cached_state = NULL; | ||
6062 | |||
6063 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
6064 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | ||
6065 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | ||
6066 | btrfs_submit_direct, 0); | ||
6067 | |||
6068 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
6069 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
6070 | offset + iov_length(iov, nr_segs) - 1, | ||
6071 | EXTENT_LOCKED | write_bits, 1, 0, | ||
6072 | &cached_state, GFP_NOFS); | ||
6073 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
6074 | /* | ||
6075 | * We're falling back to buffered, unlock the section we didn't | ||
6076 | * do IO on. | ||
6077 | */ | ||
6078 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
6079 | offset + iov_length(iov, nr_segs) - 1, | ||
6080 | EXTENT_LOCKED | write_bits, 1, 0, | ||
6081 | &cached_state, GFP_NOFS); | ||
6082 | } | ||
6083 | out: | ||
6084 | free_extent_state(cached_state); | ||
6085 | return ret; | ||
4949 | } | 6086 | } |
4950 | 6087 | ||
4951 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 6088 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
@@ -5021,6 +6158,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
5021 | { | 6158 | { |
5022 | struct extent_io_tree *tree; | 6159 | struct extent_io_tree *tree; |
5023 | struct btrfs_ordered_extent *ordered; | 6160 | struct btrfs_ordered_extent *ordered; |
6161 | struct extent_state *cached_state = NULL; | ||
5024 | u64 page_start = page_offset(page); | 6162 | u64 page_start = page_offset(page); |
5025 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 6163 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
5026 | 6164 | ||
@@ -5039,7 +6177,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
5039 | btrfs_releasepage(page, GFP_NOFS); | 6177 | btrfs_releasepage(page, GFP_NOFS); |
5040 | return; | 6178 | return; |
5041 | } | 6179 | } |
5042 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 6180 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state, |
6181 | GFP_NOFS); | ||
5043 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 6182 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, |
5044 | page_offset(page)); | 6183 | page_offset(page)); |
5045 | if (ordered) { | 6184 | if (ordered) { |
@@ -5050,7 +6189,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
5050 | clear_extent_bit(tree, page_start, page_end, | 6189 | clear_extent_bit(tree, page_start, page_end, |
5051 | EXTENT_DIRTY | EXTENT_DELALLOC | | 6190 | EXTENT_DIRTY | EXTENT_DELALLOC | |
5052 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, | 6191 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, |
5053 | NULL, GFP_NOFS); | 6192 | &cached_state, GFP_NOFS); |
5054 | /* | 6193 | /* |
5055 | * whoever cleared the private bit is responsible | 6194 | * whoever cleared the private bit is responsible |
5056 | * for the finish_ordered_io | 6195 | * for the finish_ordered_io |
@@ -5060,11 +6199,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
5060 | page_start, page_end); | 6199 | page_start, page_end); |
5061 | } | 6200 | } |
5062 | btrfs_put_ordered_extent(ordered); | 6201 | btrfs_put_ordered_extent(ordered); |
5063 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 6202 | cached_state = NULL; |
6203 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state, | ||
6204 | GFP_NOFS); | ||
5064 | } | 6205 | } |
5065 | clear_extent_bit(tree, page_start, page_end, | 6206 | clear_extent_bit(tree, page_start, page_end, |
5066 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 6207 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
5067 | EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); | 6208 | EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS); |
5068 | __btrfs_releasepage(page, GFP_NOFS); | 6209 | __btrfs_releasepage(page, GFP_NOFS); |
5069 | 6210 | ||
5070 | ClearPageChecked(page); | 6211 | ClearPageChecked(page); |
@@ -5097,6 +6238,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5097 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6238 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5098 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 6239 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
5099 | struct btrfs_ordered_extent *ordered; | 6240 | struct btrfs_ordered_extent *ordered; |
6241 | struct extent_state *cached_state = NULL; | ||
5100 | char *kaddr; | 6242 | char *kaddr; |
5101 | unsigned long zero_start; | 6243 | unsigned long zero_start; |
5102 | loff_t size; | 6244 | loff_t size; |
@@ -5104,7 +6246,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5104 | u64 page_start; | 6246 | u64 page_start; |
5105 | u64 page_end; | 6247 | u64 page_end; |
5106 | 6248 | ||
5107 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 6249 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
5108 | if (ret) { | 6250 | if (ret) { |
5109 | if (ret == -ENOMEM) | 6251 | if (ret == -ENOMEM) |
5110 | ret = VM_FAULT_OOM; | 6252 | ret = VM_FAULT_OOM; |
@@ -5113,13 +6255,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5113 | goto out; | 6255 | goto out; |
5114 | } | 6256 | } |
5115 | 6257 | ||
5116 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
5117 | if (ret) { | ||
5118 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5119 | ret = VM_FAULT_SIGBUS; | ||
5120 | goto out; | ||
5121 | } | ||
5122 | |||
5123 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 6258 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
5124 | again: | 6259 | again: |
5125 | lock_page(page); | 6260 | lock_page(page); |
@@ -5129,13 +6264,13 @@ again: | |||
5129 | 6264 | ||
5130 | if ((page->mapping != inode->i_mapping) || | 6265 | if ((page->mapping != inode->i_mapping) || |
5131 | (page_start >= size)) { | 6266 | (page_start >= size)) { |
5132 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5133 | /* page got truncated out from underneath us */ | 6267 | /* page got truncated out from underneath us */ |
5134 | goto out_unlock; | 6268 | goto out_unlock; |
5135 | } | 6269 | } |
5136 | wait_on_page_writeback(page); | 6270 | wait_on_page_writeback(page); |
5137 | 6271 | ||
5138 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 6272 | lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, |
6273 | GFP_NOFS); | ||
5139 | set_page_extent_mapped(page); | 6274 | set_page_extent_mapped(page); |
5140 | 6275 | ||
5141 | /* | 6276 | /* |
@@ -5144,7 +6279,8 @@ again: | |||
5144 | */ | 6279 | */ |
5145 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 6280 | ordered = btrfs_lookup_ordered_extent(inode, page_start); |
5146 | if (ordered) { | 6281 | if (ordered) { |
5147 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 6282 | unlock_extent_cached(io_tree, page_start, page_end, |
6283 | &cached_state, GFP_NOFS); | ||
5148 | unlock_page(page); | 6284 | unlock_page(page); |
5149 | btrfs_start_ordered_extent(inode, ordered, 1); | 6285 | btrfs_start_ordered_extent(inode, ordered, 1); |
5150 | btrfs_put_ordered_extent(ordered); | 6286 | btrfs_put_ordered_extent(ordered); |
@@ -5158,15 +6294,16 @@ again: | |||
5158 | * is probably a better way to do this, but for now keep consistent with | 6294 | * is probably a better way to do this, but for now keep consistent with |
5159 | * prepare_pages in the normal write path. | 6295 | * prepare_pages in the normal write path. |
5160 | */ | 6296 | */ |
5161 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, | 6297 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, |
5162 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | 6298 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, |
5163 | GFP_NOFS); | 6299 | 0, 0, &cached_state, GFP_NOFS); |
5164 | 6300 | ||
5165 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); | 6301 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, |
6302 | &cached_state); | ||
5166 | if (ret) { | 6303 | if (ret) { |
5167 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 6304 | unlock_extent_cached(io_tree, page_start, page_end, |
6305 | &cached_state, GFP_NOFS); | ||
5168 | ret = VM_FAULT_SIGBUS; | 6306 | ret = VM_FAULT_SIGBUS; |
5169 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5170 | goto out_unlock; | 6307 | goto out_unlock; |
5171 | } | 6308 | } |
5172 | ret = 0; | 6309 | ret = 0; |
@@ -5190,13 +6327,13 @@ again: | |||
5190 | BTRFS_I(inode)->last_trans = root->fs_info->generation; | 6327 | BTRFS_I(inode)->last_trans = root->fs_info->generation; |
5191 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | 6328 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
5192 | 6329 | ||
5193 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 6330 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
5194 | 6331 | ||
5195 | out_unlock: | 6332 | out_unlock: |
5196 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
5197 | if (!ret) | 6333 | if (!ret) |
5198 | return VM_FAULT_LOCKED; | 6334 | return VM_FAULT_LOCKED; |
5199 | unlock_page(page); | 6335 | unlock_page(page); |
6336 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
5200 | out: | 6337 | out: |
5201 | return ret; | 6338 | return ret; |
5202 | } | 6339 | } |
@@ -5221,8 +6358,10 @@ static void btrfs_truncate(struct inode *inode) | |||
5221 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6358 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
5222 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6359 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
5223 | 6360 | ||
5224 | trans = btrfs_start_transaction(root, 1); | 6361 | trans = btrfs_start_transaction(root, 0); |
6362 | BUG_ON(IS_ERR(trans)); | ||
5225 | btrfs_set_trans_block_group(trans, inode); | 6363 | btrfs_set_trans_block_group(trans, inode); |
6364 | trans->block_rsv = root->orphan_block_rsv; | ||
5226 | 6365 | ||
5227 | /* | 6366 | /* |
5228 | * setattr is responsible for setting the ordered_data_close flag, | 6367 | * setattr is responsible for setting the ordered_data_close flag, |
@@ -5245,6 +6384,23 @@ static void btrfs_truncate(struct inode *inode) | |||
5245 | btrfs_add_ordered_operation(trans, root, inode); | 6384 | btrfs_add_ordered_operation(trans, root, inode); |
5246 | 6385 | ||
5247 | while (1) { | 6386 | while (1) { |
6387 | if (!trans) { | ||
6388 | trans = btrfs_start_transaction(root, 0); | ||
6389 | BUG_ON(IS_ERR(trans)); | ||
6390 | btrfs_set_trans_block_group(trans, inode); | ||
6391 | trans->block_rsv = root->orphan_block_rsv; | ||
6392 | } | ||
6393 | |||
6394 | ret = btrfs_block_rsv_check(trans, root, | ||
6395 | root->orphan_block_rsv, 0, 5); | ||
6396 | if (ret) { | ||
6397 | BUG_ON(ret != -EAGAIN); | ||
6398 | ret = btrfs_commit_transaction(trans, root); | ||
6399 | BUG_ON(ret); | ||
6400 | trans = NULL; | ||
6401 | continue; | ||
6402 | } | ||
6403 | |||
5248 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6404 | ret = btrfs_truncate_inode_items(trans, root, inode, |
5249 | inode->i_size, | 6405 | inode->i_size, |
5250 | BTRFS_EXTENT_DATA_KEY); | 6406 | BTRFS_EXTENT_DATA_KEY); |
@@ -5256,10 +6412,8 @@ static void btrfs_truncate(struct inode *inode) | |||
5256 | 6412 | ||
5257 | nr = trans->blocks_used; | 6413 | nr = trans->blocks_used; |
5258 | btrfs_end_transaction(trans, root); | 6414 | btrfs_end_transaction(trans, root); |
6415 | trans = NULL; | ||
5259 | btrfs_btree_balance_dirty(root, nr); | 6416 | btrfs_btree_balance_dirty(root, nr); |
5260 | |||
5261 | trans = btrfs_start_transaction(root, 1); | ||
5262 | btrfs_set_trans_block_group(trans, inode); | ||
5263 | } | 6417 | } |
5264 | 6418 | ||
5265 | if (ret == 0 && inode->i_nlink > 0) { | 6419 | if (ret == 0 && inode->i_nlink > 0) { |
@@ -5320,21 +6474,54 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
5320 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6474 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
5321 | { | 6475 | { |
5322 | struct btrfs_inode *ei; | 6476 | struct btrfs_inode *ei; |
6477 | struct inode *inode; | ||
5323 | 6478 | ||
5324 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); | 6479 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); |
5325 | if (!ei) | 6480 | if (!ei) |
5326 | return NULL; | 6481 | return NULL; |
6482 | |||
6483 | ei->root = NULL; | ||
6484 | ei->space_info = NULL; | ||
6485 | ei->generation = 0; | ||
6486 | ei->sequence = 0; | ||
5327 | ei->last_trans = 0; | 6487 | ei->last_trans = 0; |
5328 | ei->last_sub_trans = 0; | 6488 | ei->last_sub_trans = 0; |
5329 | ei->logged_trans = 0; | 6489 | ei->logged_trans = 0; |
5330 | ei->outstanding_extents = 0; | 6490 | ei->delalloc_bytes = 0; |
5331 | ei->reserved_extents = 0; | 6491 | ei->reserved_bytes = 0; |
5332 | ei->root = NULL; | 6492 | ei->disk_i_size = 0; |
6493 | ei->flags = 0; | ||
6494 | ei->index_cnt = (u64)-1; | ||
6495 | ei->last_unlink_trans = 0; | ||
6496 | |||
5333 | spin_lock_init(&ei->accounting_lock); | 6497 | spin_lock_init(&ei->accounting_lock); |
6498 | atomic_set(&ei->outstanding_extents, 0); | ||
6499 | ei->reserved_extents = 0; | ||
6500 | |||
6501 | ei->ordered_data_close = 0; | ||
6502 | ei->orphan_meta_reserved = 0; | ||
6503 | ei->dummy_inode = 0; | ||
6504 | ei->force_compress = BTRFS_COMPRESS_NONE; | ||
6505 | |||
6506 | inode = &ei->vfs_inode; | ||
6507 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | ||
6508 | extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); | ||
6509 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); | ||
6510 | mutex_init(&ei->log_mutex); | ||
5334 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6511 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
5335 | INIT_LIST_HEAD(&ei->i_orphan); | 6512 | INIT_LIST_HEAD(&ei->i_orphan); |
6513 | INIT_LIST_HEAD(&ei->delalloc_inodes); | ||
5336 | INIT_LIST_HEAD(&ei->ordered_operations); | 6514 | INIT_LIST_HEAD(&ei->ordered_operations); |
5337 | return &ei->vfs_inode; | 6515 | RB_CLEAR_NODE(&ei->rb_node); |
6516 | |||
6517 | return inode; | ||
6518 | } | ||
6519 | |||
6520 | static void btrfs_i_callback(struct rcu_head *head) | ||
6521 | { | ||
6522 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
6523 | INIT_LIST_HEAD(&inode->i_dentry); | ||
6524 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | ||
5338 | } | 6525 | } |
5339 | 6526 | ||
5340 | void btrfs_destroy_inode(struct inode *inode) | 6527 | void btrfs_destroy_inode(struct inode *inode) |
@@ -5344,6 +6531,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5344 | 6531 | ||
5345 | WARN_ON(!list_empty(&inode->i_dentry)); | 6532 | WARN_ON(!list_empty(&inode->i_dentry)); |
5346 | WARN_ON(inode->i_data.nrpages); | 6533 | WARN_ON(inode->i_data.nrpages); |
6534 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | ||
6535 | WARN_ON(BTRFS_I(inode)->reserved_extents); | ||
5347 | 6536 | ||
5348 | /* | 6537 | /* |
5349 | * This can happen where we create an inode, but somebody else also | 6538 | * This can happen where we create an inode, but somebody else also |
@@ -5364,13 +6553,28 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5364 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6553 | spin_unlock(&root->fs_info->ordered_extent_lock); |
5365 | } | 6554 | } |
5366 | 6555 | ||
5367 | spin_lock(&root->list_lock); | 6556 | if (root == root->fs_info->tree_root) { |
6557 | struct btrfs_block_group_cache *block_group; | ||
6558 | |||
6559 | block_group = btrfs_lookup_block_group(root->fs_info, | ||
6560 | BTRFS_I(inode)->block_group); | ||
6561 | if (block_group && block_group->inode == inode) { | ||
6562 | spin_lock(&block_group->lock); | ||
6563 | block_group->inode = NULL; | ||
6564 | spin_unlock(&block_group->lock); | ||
6565 | btrfs_put_block_group(block_group); | ||
6566 | } else if (block_group) { | ||
6567 | btrfs_put_block_group(block_group); | ||
6568 | } | ||
6569 | } | ||
6570 | |||
6571 | spin_lock(&root->orphan_lock); | ||
5368 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6572 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
5369 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", | 6573 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", |
5370 | inode->i_ino); | 6574 | inode->i_ino); |
5371 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6575 | list_del_init(&BTRFS_I(inode)->i_orphan); |
5372 | } | 6576 | } |
5373 | spin_unlock(&root->list_lock); | 6577 | spin_unlock(&root->orphan_lock); |
5374 | 6578 | ||
5375 | while (1) { | 6579 | while (1) { |
5376 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6580 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -5389,17 +6593,18 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5389 | inode_tree_del(inode); | 6593 | inode_tree_del(inode); |
5390 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); | 6594 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); |
5391 | free: | 6595 | free: |
5392 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 6596 | call_rcu(&inode->i_rcu, btrfs_i_callback); |
5393 | } | 6597 | } |
5394 | 6598 | ||
5395 | void btrfs_drop_inode(struct inode *inode) | 6599 | int btrfs_drop_inode(struct inode *inode) |
5396 | { | 6600 | { |
5397 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6601 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5398 | 6602 | ||
5399 | if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) | 6603 | if (btrfs_root_refs(&root->root_item) == 0 && |
5400 | generic_delete_inode(inode); | 6604 | root != root->fs_info->tree_root) |
6605 | return 1; | ||
5401 | else | 6606 | else |
5402 | generic_drop_inode(inode); | 6607 | return generic_drop_inode(inode); |
5403 | } | 6608 | } |
5404 | 6609 | ||
5405 | static void init_once(void *foo) | 6610 | static void init_once(void *foo) |
@@ -5492,19 +6697,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5492 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 6697 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
5493 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 6698 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
5494 | return -ENOTEMPTY; | 6699 | return -ENOTEMPTY; |
5495 | |||
5496 | /* | ||
5497 | * We want to reserve the absolute worst case amount of items. So if | ||
5498 | * both inodes are subvols and we need to unlink them then that would | ||
5499 | * require 4 item modifications, but if they are both normal inodes it | ||
5500 | * would require 5 item modifications, so we'll assume their normal | ||
5501 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
5502 | * should cover the worst case number of items we'll modify. | ||
5503 | */ | ||
5504 | ret = btrfs_reserve_metadata_space(root, 11); | ||
5505 | if (ret) | ||
5506 | return ret; | ||
5507 | |||
5508 | /* | 6700 | /* |
5509 | * we're using rename to replace one file with another. | 6701 | * we're using rename to replace one file with another. |
5510 | * and the replacement file is large. Start IO on it now so | 6702 | * and the replacement file is large. Start IO on it now so |
@@ -5517,8 +6709,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5517 | /* close the racy window with snapshot create/destroy ioctl */ | 6709 | /* close the racy window with snapshot create/destroy ioctl */ |
5518 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6710 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
5519 | down_read(&root->fs_info->subvol_sem); | 6711 | down_read(&root->fs_info->subvol_sem); |
6712 | /* | ||
6713 | * We want to reserve the absolute worst case amount of items. So if | ||
6714 | * both inodes are subvols and we need to unlink them then that would | ||
6715 | * require 4 item modifications, but if they are both normal inodes it | ||
6716 | * would require 5 item modifications, so we'll assume their normal | ||
6717 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
6718 | * should cover the worst case number of items we'll modify. | ||
6719 | */ | ||
6720 | trans = btrfs_start_transaction(root, 20); | ||
6721 | if (IS_ERR(trans)) | ||
6722 | return PTR_ERR(trans); | ||
5520 | 6723 | ||
5521 | trans = btrfs_start_transaction(root, 1); | ||
5522 | btrfs_set_trans_block_group(trans, new_dir); | 6724 | btrfs_set_trans_block_group(trans, new_dir); |
5523 | 6725 | ||
5524 | if (dest != root) | 6726 | if (dest != root) |
@@ -5607,8 +6809,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5607 | BUG_ON(ret); | 6809 | BUG_ON(ret); |
5608 | 6810 | ||
5609 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | 6811 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { |
5610 | btrfs_log_new_name(trans, old_inode, old_dir, | 6812 | struct dentry *parent = dget_parent(new_dentry); |
5611 | new_dentry->d_parent); | 6813 | btrfs_log_new_name(trans, old_inode, old_dir, parent); |
6814 | dput(parent); | ||
5612 | btrfs_end_log_trans(root); | 6815 | btrfs_end_log_trans(root); |
5613 | } | 6816 | } |
5614 | out_fail: | 6817 | out_fail: |
@@ -5617,7 +6820,6 @@ out_fail: | |||
5617 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6820 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
5618 | up_read(&root->fs_info->subvol_sem); | 6821 | up_read(&root->fs_info->subvol_sem); |
5619 | 6822 | ||
5620 | btrfs_unreserve_metadata_space(root, 11); | ||
5621 | return ret; | 6823 | return ret; |
5622 | } | 6824 | } |
5623 | 6825 | ||
@@ -5669,6 +6871,58 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
5669 | return 0; | 6871 | return 0; |
5670 | } | 6872 | } |
5671 | 6873 | ||
6874 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput, | ||
6875 | int sync) | ||
6876 | { | ||
6877 | struct btrfs_inode *binode; | ||
6878 | struct inode *inode = NULL; | ||
6879 | |||
6880 | spin_lock(&root->fs_info->delalloc_lock); | ||
6881 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
6882 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
6883 | struct btrfs_inode, delalloc_inodes); | ||
6884 | inode = igrab(&binode->vfs_inode); | ||
6885 | if (inode) { | ||
6886 | list_move_tail(&binode->delalloc_inodes, | ||
6887 | &root->fs_info->delalloc_inodes); | ||
6888 | break; | ||
6889 | } | ||
6890 | |||
6891 | list_del_init(&binode->delalloc_inodes); | ||
6892 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
6893 | } | ||
6894 | spin_unlock(&root->fs_info->delalloc_lock); | ||
6895 | |||
6896 | if (inode) { | ||
6897 | if (sync) { | ||
6898 | filemap_write_and_wait(inode->i_mapping); | ||
6899 | /* | ||
6900 | * We have to do this because compression doesn't | ||
6901 | * actually set PG_writeback until it submits the pages | ||
6902 | * for IO, which happens in an async thread, so we could | ||
6903 | * race and not actually wait for any writeback pages | ||
6904 | * because they've not been submitted yet. Technically | ||
6905 | * this could still be the case for the ordered stuff | ||
6906 | * since the async thread may not have started to do its | ||
6907 | * work yet. If this becomes the case then we need to | ||
6908 | * figure out a way to make sure that in writepage we | ||
6909 | * wait for any async pages to be submitted before | ||
6910 | * returning so that fdatawait does what its supposed to | ||
6911 | * do. | ||
6912 | */ | ||
6913 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
6914 | } else { | ||
6915 | filemap_flush(inode->i_mapping); | ||
6916 | } | ||
6917 | if (delay_iput) | ||
6918 | btrfs_add_delayed_iput(inode); | ||
6919 | else | ||
6920 | iput(inode); | ||
6921 | return 1; | ||
6922 | } | ||
6923 | return 0; | ||
6924 | } | ||
6925 | |||
5672 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 6926 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
5673 | const char *symname) | 6927 | const char *symname) |
5674 | { | 6928 | { |
@@ -5692,29 +6946,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5692 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 6946 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
5693 | return -ENAMETOOLONG; | 6947 | return -ENAMETOOLONG; |
5694 | 6948 | ||
6949 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
6950 | if (err) | ||
6951 | return err; | ||
5695 | /* | 6952 | /* |
5696 | * 2 items for inode item and ref | 6953 | * 2 items for inode item and ref |
5697 | * 2 items for dir items | 6954 | * 2 items for dir items |
5698 | * 1 item for xattr if selinux is on | 6955 | * 1 item for xattr if selinux is on |
5699 | */ | 6956 | */ |
5700 | err = btrfs_reserve_metadata_space(root, 5); | 6957 | trans = btrfs_start_transaction(root, 5); |
5701 | if (err) | 6958 | if (IS_ERR(trans)) |
5702 | return err; | 6959 | return PTR_ERR(trans); |
5703 | 6960 | ||
5704 | trans = btrfs_start_transaction(root, 1); | ||
5705 | if (!trans) | ||
5706 | goto out_fail; | ||
5707 | btrfs_set_trans_block_group(trans, dir); | 6961 | btrfs_set_trans_block_group(trans, dir); |
5708 | 6962 | ||
5709 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
5710 | if (err) { | ||
5711 | err = -ENOSPC; | ||
5712 | goto out_unlock; | ||
5713 | } | ||
5714 | |||
5715 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 6963 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
5716 | dentry->d_name.len, | 6964 | dentry->d_name.len, dir->i_ino, objectid, |
5717 | dentry->d_parent->d_inode->i_ino, objectid, | ||
5718 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, | 6965 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, |
5719 | &index); | 6966 | &index); |
5720 | err = PTR_ERR(inode); | 6967 | err = PTR_ERR(inode); |
@@ -5728,7 +6975,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5728 | } | 6975 | } |
5729 | 6976 | ||
5730 | btrfs_set_trans_block_group(trans, inode); | 6977 | btrfs_set_trans_block_group(trans, inode); |
5731 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 6978 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
5732 | if (err) | 6979 | if (err) |
5733 | drop_inode = 1; | 6980 | drop_inode = 1; |
5734 | else { | 6981 | else { |
@@ -5783,8 +7030,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5783 | out_unlock: | 7030 | out_unlock: |
5784 | nr = trans->blocks_used; | 7031 | nr = trans->blocks_used; |
5785 | btrfs_end_transaction_throttle(trans, root); | 7032 | btrfs_end_transaction_throttle(trans, root); |
5786 | out_fail: | ||
5787 | btrfs_unreserve_metadata_space(root, 5); | ||
5788 | if (drop_inode) { | 7033 | if (drop_inode) { |
5789 | inode_dec_link_count(inode); | 7034 | inode_dec_link_count(inode); |
5790 | iput(inode); | 7035 | iput(inode); |
@@ -5793,36 +7038,35 @@ out_fail: | |||
5793 | return err; | 7038 | return err; |
5794 | } | 7039 | } |
5795 | 7040 | ||
5796 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 7041 | static int __btrfs_prealloc_file_range(struct inode *inode, int mode, |
5797 | u64 alloc_hint, int mode, loff_t actual_len) | 7042 | u64 start, u64 num_bytes, u64 min_size, |
7043 | loff_t actual_len, u64 *alloc_hint, | ||
7044 | struct btrfs_trans_handle *trans) | ||
5798 | { | 7045 | { |
5799 | struct btrfs_trans_handle *trans; | ||
5800 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7046 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5801 | struct btrfs_key ins; | 7047 | struct btrfs_key ins; |
5802 | u64 alloc_size; | ||
5803 | u64 cur_offset = start; | 7048 | u64 cur_offset = start; |
5804 | u64 num_bytes = end - start; | ||
5805 | int ret = 0; | ||
5806 | u64 i_size; | 7049 | u64 i_size; |
7050 | int ret = 0; | ||
7051 | bool own_trans = true; | ||
5807 | 7052 | ||
7053 | if (trans) | ||
7054 | own_trans = false; | ||
5808 | while (num_bytes > 0) { | 7055 | while (num_bytes > 0) { |
5809 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 7056 | if (own_trans) { |
5810 | 7057 | trans = btrfs_start_transaction(root, 3); | |
5811 | trans = btrfs_start_transaction(root, 1); | 7058 | if (IS_ERR(trans)) { |
5812 | 7059 | ret = PTR_ERR(trans); | |
5813 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 7060 | break; |
5814 | root->sectorsize, 0, alloc_hint, | 7061 | } |
5815 | (u64)-1, &ins, 1); | ||
5816 | if (ret) { | ||
5817 | WARN_ON(1); | ||
5818 | goto stop_trans; | ||
5819 | } | 7062 | } |
5820 | 7063 | ||
5821 | ret = btrfs_reserve_metadata_space(root, 3); | 7064 | ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, |
7065 | 0, *alloc_hint, (u64)-1, &ins, 1); | ||
5822 | if (ret) { | 7066 | if (ret) { |
5823 | btrfs_free_reserved_extent(root, ins.objectid, | 7067 | if (own_trans) |
5824 | ins.offset); | 7068 | btrfs_end_transaction(trans, root); |
5825 | goto stop_trans; | 7069 | break; |
5826 | } | 7070 | } |
5827 | 7071 | ||
5828 | ret = insert_reserved_file_extent(trans, inode, | 7072 | ret = insert_reserved_file_extent(trans, inode, |
@@ -5836,14 +7080,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
5836 | 7080 | ||
5837 | num_bytes -= ins.offset; | 7081 | num_bytes -= ins.offset; |
5838 | cur_offset += ins.offset; | 7082 | cur_offset += ins.offset; |
5839 | alloc_hint = ins.objectid + ins.offset; | 7083 | *alloc_hint = ins.objectid + ins.offset; |
5840 | 7084 | ||
5841 | inode->i_ctime = CURRENT_TIME; | 7085 | inode->i_ctime = CURRENT_TIME; |
5842 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 7086 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
5843 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 7087 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
5844 | cur_offset > inode->i_size) { | 7088 | (actual_len > inode->i_size) && |
7089 | (cur_offset > inode->i_size)) { | ||
5845 | if (cur_offset > actual_len) | 7090 | if (cur_offset > actual_len) |
5846 | i_size = actual_len; | 7091 | i_size = actual_len; |
5847 | else | 7092 | else |
5848 | i_size = cur_offset; | 7093 | i_size = cur_offset; |
5849 | i_size_write(inode, i_size); | 7094 | i_size_write(inode, i_size); |
@@ -5853,117 +7098,28 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
5853 | ret = btrfs_update_inode(trans, root, inode); | 7098 | ret = btrfs_update_inode(trans, root, inode); |
5854 | BUG_ON(ret); | 7099 | BUG_ON(ret); |
5855 | 7100 | ||
5856 | btrfs_end_transaction(trans, root); | 7101 | if (own_trans) |
5857 | btrfs_unreserve_metadata_space(root, 3); | 7102 | btrfs_end_transaction(trans, root); |
5858 | } | 7103 | } |
5859 | return ret; | 7104 | return ret; |
5860 | |||
5861 | stop_trans: | ||
5862 | btrfs_end_transaction(trans, root); | ||
5863 | return ret; | ||
5864 | |||
5865 | } | 7105 | } |
5866 | 7106 | ||
5867 | static long btrfs_fallocate(struct inode *inode, int mode, | 7107 | int btrfs_prealloc_file_range(struct inode *inode, int mode, |
5868 | loff_t offset, loff_t len) | 7108 | u64 start, u64 num_bytes, u64 min_size, |
7109 | loff_t actual_len, u64 *alloc_hint) | ||
5869 | { | 7110 | { |
5870 | u64 cur_offset; | 7111 | return __btrfs_prealloc_file_range(inode, mode, start, num_bytes, |
5871 | u64 last_byte; | 7112 | min_size, actual_len, alloc_hint, |
5872 | u64 alloc_start; | 7113 | NULL); |
5873 | u64 alloc_end; | 7114 | } |
5874 | u64 alloc_hint = 0; | ||
5875 | u64 locked_end; | ||
5876 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
5877 | struct extent_map *em; | ||
5878 | int ret; | ||
5879 | |||
5880 | alloc_start = offset & ~mask; | ||
5881 | alloc_end = (offset + len + mask) & ~mask; | ||
5882 | |||
5883 | /* | ||
5884 | * wait for ordered IO before we have any locks. We'll loop again | ||
5885 | * below with the locks held. | ||
5886 | */ | ||
5887 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
5888 | |||
5889 | mutex_lock(&inode->i_mutex); | ||
5890 | if (alloc_start > inode->i_size) { | ||
5891 | ret = btrfs_cont_expand(inode, alloc_start); | ||
5892 | if (ret) | ||
5893 | goto out; | ||
5894 | } | ||
5895 | |||
5896 | ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, | ||
5897 | alloc_end - alloc_start); | ||
5898 | if (ret) | ||
5899 | goto out; | ||
5900 | |||
5901 | locked_end = alloc_end - 1; | ||
5902 | while (1) { | ||
5903 | struct btrfs_ordered_extent *ordered; | ||
5904 | |||
5905 | /* the extent lock is ordered inside the running | ||
5906 | * transaction | ||
5907 | */ | ||
5908 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
5909 | GFP_NOFS); | ||
5910 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
5911 | alloc_end - 1); | ||
5912 | if (ordered && | ||
5913 | ordered->file_offset + ordered->len > alloc_start && | ||
5914 | ordered->file_offset < alloc_end) { | ||
5915 | btrfs_put_ordered_extent(ordered); | ||
5916 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
5917 | alloc_start, locked_end, GFP_NOFS); | ||
5918 | /* | ||
5919 | * we can't wait on the range with the transaction | ||
5920 | * running or with the extent lock held | ||
5921 | */ | ||
5922 | btrfs_wait_ordered_range(inode, alloc_start, | ||
5923 | alloc_end - alloc_start); | ||
5924 | } else { | ||
5925 | if (ordered) | ||
5926 | btrfs_put_ordered_extent(ordered); | ||
5927 | break; | ||
5928 | } | ||
5929 | } | ||
5930 | |||
5931 | cur_offset = alloc_start; | ||
5932 | while (1) { | ||
5933 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
5934 | alloc_end - cur_offset, 0); | ||
5935 | BUG_ON(IS_ERR(em) || !em); | ||
5936 | last_byte = min(extent_map_end(em), alloc_end); | ||
5937 | last_byte = (last_byte + mask) & ~mask; | ||
5938 | if (em->block_start == EXTENT_MAP_HOLE || | ||
5939 | (cur_offset >= inode->i_size && | ||
5940 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
5941 | ret = prealloc_file_range(inode, | ||
5942 | cur_offset, last_byte, | ||
5943 | alloc_hint, mode, offset+len); | ||
5944 | if (ret < 0) { | ||
5945 | free_extent_map(em); | ||
5946 | break; | ||
5947 | } | ||
5948 | } | ||
5949 | if (em->block_start <= EXTENT_MAP_LAST_BYTE) | ||
5950 | alloc_hint = em->block_start; | ||
5951 | free_extent_map(em); | ||
5952 | |||
5953 | cur_offset = last_byte; | ||
5954 | if (cur_offset >= alloc_end) { | ||
5955 | ret = 0; | ||
5956 | break; | ||
5957 | } | ||
5958 | } | ||
5959 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
5960 | GFP_NOFS); | ||
5961 | 7115 | ||
5962 | btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, | 7116 | int btrfs_prealloc_file_range_trans(struct inode *inode, |
5963 | alloc_end - alloc_start); | 7117 | struct btrfs_trans_handle *trans, int mode, |
5964 | out: | 7118 | u64 start, u64 num_bytes, u64 min_size, |
5965 | mutex_unlock(&inode->i_mutex); | 7119 | loff_t actual_len, u64 *alloc_hint) |
5966 | return ret; | 7120 | { |
7121 | return __btrfs_prealloc_file_range(inode, mode, start, num_bytes, | ||
7122 | min_size, actual_len, alloc_hint, trans); | ||
5967 | } | 7123 | } |
5968 | 7124 | ||
5969 | static int btrfs_set_page_dirty(struct page *page) | 7125 | static int btrfs_set_page_dirty(struct page *page) |
@@ -5971,11 +7127,15 @@ static int btrfs_set_page_dirty(struct page *page) | |||
5971 | return __set_page_dirty_nobuffers(page); | 7127 | return __set_page_dirty_nobuffers(page); |
5972 | } | 7128 | } |
5973 | 7129 | ||
5974 | static int btrfs_permission(struct inode *inode, int mask) | 7130 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) |
5975 | { | 7131 | { |
7132 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
7133 | |||
7134 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | ||
7135 | return -EROFS; | ||
5976 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7136 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
5977 | return -EACCES; | 7137 | return -EACCES; |
5978 | return generic_permission(inode, mask, btrfs_check_acl); | 7138 | return generic_permission(inode, mask, flags, btrfs_check_acl); |
5979 | } | 7139 | } |
5980 | 7140 | ||
5981 | static const struct inode_operations btrfs_dir_inode_operations = { | 7141 | static const struct inode_operations btrfs_dir_inode_operations = { |
@@ -6068,7 +7228,6 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
6068 | .listxattr = btrfs_listxattr, | 7228 | .listxattr = btrfs_listxattr, |
6069 | .removexattr = btrfs_removexattr, | 7229 | .removexattr = btrfs_removexattr, |
6070 | .permission = btrfs_permission, | 7230 | .permission = btrfs_permission, |
6071 | .fallocate = btrfs_fallocate, | ||
6072 | .fiemap = btrfs_fiemap, | 7231 | .fiemap = btrfs_fiemap, |
6073 | }; | 7232 | }; |
6074 | static const struct inode_operations btrfs_special_inode_operations = { | 7233 | static const struct inode_operations btrfs_special_inode_operations = { |
@@ -6084,6 +7243,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
6084 | .readlink = generic_readlink, | 7243 | .readlink = generic_readlink, |
6085 | .follow_link = page_follow_link_light, | 7244 | .follow_link = page_follow_link_light, |
6086 | .put_link = page_put_link, | 7245 | .put_link = page_put_link, |
7246 | .getattr = btrfs_getattr, | ||
6087 | .permission = btrfs_permission, | 7247 | .permission = btrfs_permission, |
6088 | .setxattr = btrfs_setxattr, | 7248 | .setxattr = btrfs_setxattr, |
6089 | .getxattr = btrfs_getxattr, | 7249 | .getxattr = btrfs_getxattr, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 645a17927a8f..a506a22b522a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/security.h> | 39 | #include <linux/security.h> |
40 | #include <linux/xattr.h> | 40 | #include <linux/xattr.h> |
41 | #include <linux/vmalloc.h> | 41 | #include <linux/vmalloc.h> |
42 | #include <linux/slab.h> | ||
42 | #include "compat.h" | 43 | #include "compat.h" |
43 | #include "ctree.h" | 44 | #include "ctree.h" |
44 | #include "disk-io.h" | 45 | #include "disk-io.h" |
@@ -146,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
146 | unsigned int flags, oldflags; | 147 | unsigned int flags, oldflags; |
147 | int ret; | 148 | int ret; |
148 | 149 | ||
150 | if (btrfs_root_readonly(root)) | ||
151 | return -EROFS; | ||
152 | |||
149 | if (copy_from_user(&flags, arg, sizeof(flags))) | 153 | if (copy_from_user(&flags, arg, sizeof(flags))) |
150 | return -EFAULT; | 154 | return -EFAULT; |
151 | 155 | ||
@@ -223,7 +227,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | |||
223 | 227 | ||
224 | static noinline int create_subvol(struct btrfs_root *root, | 228 | static noinline int create_subvol(struct btrfs_root *root, |
225 | struct dentry *dentry, | 229 | struct dentry *dentry, |
226 | char *name, int namelen) | 230 | char *name, int namelen, |
231 | u64 *async_transid) | ||
227 | { | 232 | { |
228 | struct btrfs_trans_handle *trans; | 233 | struct btrfs_trans_handle *trans; |
229 | struct btrfs_key key; | 234 | struct btrfs_key key; |
@@ -231,30 +236,34 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
231 | struct btrfs_inode_item *inode_item; | 236 | struct btrfs_inode_item *inode_item; |
232 | struct extent_buffer *leaf; | 237 | struct extent_buffer *leaf; |
233 | struct btrfs_root *new_root; | 238 | struct btrfs_root *new_root; |
234 | struct inode *dir = dentry->d_parent->d_inode; | 239 | struct dentry *parent = dget_parent(dentry); |
240 | struct inode *dir; | ||
235 | int ret; | 241 | int ret; |
236 | int err; | 242 | int err; |
237 | u64 objectid; | 243 | u64 objectid; |
238 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | 244 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
239 | u64 index = 0; | 245 | u64 index = 0; |
240 | 246 | ||
247 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | ||
248 | 0, &objectid); | ||
249 | if (ret) { | ||
250 | dput(parent); | ||
251 | return ret; | ||
252 | } | ||
253 | |||
254 | dir = parent->d_inode; | ||
255 | |||
241 | /* | 256 | /* |
242 | * 1 - inode item | 257 | * 1 - inode item |
243 | * 2 - refs | 258 | * 2 - refs |
244 | * 1 - root item | 259 | * 1 - root item |
245 | * 2 - dir items | 260 | * 2 - dir items |
246 | */ | 261 | */ |
247 | ret = btrfs_reserve_metadata_space(root, 6); | 262 | trans = btrfs_start_transaction(root, 6); |
248 | if (ret) | 263 | if (IS_ERR(trans)) { |
249 | return ret; | 264 | dput(parent); |
250 | 265 | return PTR_ERR(trans); | |
251 | trans = btrfs_start_transaction(root, 1); | 266 | } |
252 | BUG_ON(!trans); | ||
253 | |||
254 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | ||
255 | 0, &objectid); | ||
256 | if (ret) | ||
257 | goto fail; | ||
258 | 267 | ||
259 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 268 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
260 | 0, objectid, NULL, 0, 0, 0); | 269 | 0, objectid, NULL, 0, 0, 0); |
@@ -341,18 +350,24 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
341 | 350 | ||
342 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); | 351 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
343 | fail: | 352 | fail: |
344 | err = btrfs_commit_transaction(trans, root); | 353 | dput(parent); |
354 | if (async_transid) { | ||
355 | *async_transid = trans->transid; | ||
356 | err = btrfs_commit_transaction_async(trans, root, 1); | ||
357 | } else { | ||
358 | err = btrfs_commit_transaction(trans, root); | ||
359 | } | ||
345 | if (err && !ret) | 360 | if (err && !ret) |
346 | ret = err; | 361 | ret = err; |
347 | |||
348 | btrfs_unreserve_metadata_space(root, 6); | ||
349 | return ret; | 362 | return ret; |
350 | } | 363 | } |
351 | 364 | ||
352 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 365 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
353 | char *name, int namelen) | 366 | char *name, int namelen, u64 *async_transid, |
367 | bool readonly) | ||
354 | { | 368 | { |
355 | struct inode *inode; | 369 | struct inode *inode; |
370 | struct dentry *parent; | ||
356 | struct btrfs_pending_snapshot *pending_snapshot; | 371 | struct btrfs_pending_snapshot *pending_snapshot; |
357 | struct btrfs_trans_handle *trans; | 372 | struct btrfs_trans_handle *trans; |
358 | int ret; | 373 | int ret; |
@@ -360,42 +375,45 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
360 | if (!root->ref_cows) | 375 | if (!root->ref_cows) |
361 | return -EINVAL; | 376 | return -EINVAL; |
362 | 377 | ||
363 | /* | ||
364 | * 1 - inode item | ||
365 | * 2 - refs | ||
366 | * 1 - root item | ||
367 | * 2 - dir items | ||
368 | */ | ||
369 | ret = btrfs_reserve_metadata_space(root, 6); | ||
370 | if (ret) | ||
371 | goto fail; | ||
372 | |||
373 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 378 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
374 | if (!pending_snapshot) { | 379 | if (!pending_snapshot) |
375 | ret = -ENOMEM; | 380 | return -ENOMEM; |
376 | btrfs_unreserve_metadata_space(root, 6); | 381 | |
377 | goto fail; | 382 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
378 | } | ||
379 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | ||
380 | if (!pending_snapshot->name) { | ||
381 | ret = -ENOMEM; | ||
382 | kfree(pending_snapshot); | ||
383 | btrfs_unreserve_metadata_space(root, 6); | ||
384 | goto fail; | ||
385 | } | ||
386 | memcpy(pending_snapshot->name, name, namelen); | ||
387 | pending_snapshot->name[namelen] = '\0'; | ||
388 | pending_snapshot->dentry = dentry; | 383 | pending_snapshot->dentry = dentry; |
389 | trans = btrfs_start_transaction(root, 1); | ||
390 | BUG_ON(!trans); | ||
391 | pending_snapshot->root = root; | 384 | pending_snapshot->root = root; |
385 | pending_snapshot->readonly = readonly; | ||
386 | |||
387 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | ||
388 | if (IS_ERR(trans)) { | ||
389 | ret = PTR_ERR(trans); | ||
390 | goto fail; | ||
391 | } | ||
392 | |||
393 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); | ||
394 | BUG_ON(ret); | ||
395 | |||
392 | list_add(&pending_snapshot->list, | 396 | list_add(&pending_snapshot->list, |
393 | &trans->transaction->pending_snapshots); | 397 | &trans->transaction->pending_snapshots); |
394 | ret = btrfs_commit_transaction(trans, root); | 398 | if (async_transid) { |
399 | *async_transid = trans->transid; | ||
400 | ret = btrfs_commit_transaction_async(trans, | ||
401 | root->fs_info->extent_root, 1); | ||
402 | } else { | ||
403 | ret = btrfs_commit_transaction(trans, | ||
404 | root->fs_info->extent_root); | ||
405 | } | ||
395 | BUG_ON(ret); | 406 | BUG_ON(ret); |
396 | btrfs_unreserve_metadata_space(root, 6); | ||
397 | 407 | ||
398 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 408 | ret = pending_snapshot->error; |
409 | if (ret) | ||
410 | goto fail; | ||
411 | |||
412 | btrfs_orphan_cleanup(pending_snapshot->snap); | ||
413 | |||
414 | parent = dget_parent(dentry); | ||
415 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | ||
416 | dput(parent); | ||
399 | if (IS_ERR(inode)) { | 417 | if (IS_ERR(inode)) { |
400 | ret = PTR_ERR(inode); | 418 | ret = PTR_ERR(inode); |
401 | goto fail; | 419 | goto fail; |
@@ -404,9 +422,80 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
404 | d_instantiate(dentry, inode); | 422 | d_instantiate(dentry, inode); |
405 | ret = 0; | 423 | ret = 0; |
406 | fail: | 424 | fail: |
425 | kfree(pending_snapshot); | ||
407 | return ret; | 426 | return ret; |
408 | } | 427 | } |
409 | 428 | ||
429 | /* copy of check_sticky in fs/namei.c() | ||
430 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
431 | * minimal. | ||
432 | */ | ||
433 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) | ||
434 | { | ||
435 | uid_t fsuid = current_fsuid(); | ||
436 | |||
437 | if (!(dir->i_mode & S_ISVTX)) | ||
438 | return 0; | ||
439 | if (inode->i_uid == fsuid) | ||
440 | return 0; | ||
441 | if (dir->i_uid == fsuid) | ||
442 | return 0; | ||
443 | return !capable(CAP_FOWNER); | ||
444 | } | ||
445 | |||
446 | /* copy of may_delete in fs/namei.c() | ||
447 | * Check whether we can remove a link victim from directory dir, check | ||
448 | * whether the type of victim is right. | ||
449 | * 1. We can't do it if dir is read-only (done in permission()) | ||
450 | * 2. We should have write and exec permissions on dir | ||
451 | * 3. We can't remove anything from append-only dir | ||
452 | * 4. We can't do anything with immutable dir (done in permission()) | ||
453 | * 5. If the sticky bit on dir is set we should either | ||
454 | * a. be owner of dir, or | ||
455 | * b. be owner of victim, or | ||
456 | * c. have CAP_FOWNER capability | ||
457 | * 6. If the victim is append-only or immutable we can't do antyhing with | ||
458 | * links pointing to it. | ||
459 | * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. | ||
460 | * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. | ||
461 | * 9. We can't remove a root or mountpoint. | ||
462 | * 10. We don't allow removal of NFS sillyrenamed files; it's handled by | ||
463 | * nfs_async_unlink(). | ||
464 | */ | ||
465 | |||
466 | static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) | ||
467 | { | ||
468 | int error; | ||
469 | |||
470 | if (!victim->d_inode) | ||
471 | return -ENOENT; | ||
472 | |||
473 | BUG_ON(victim->d_parent->d_inode != dir); | ||
474 | audit_inode_child(victim, dir); | ||
475 | |||
476 | error = inode_permission(dir, MAY_WRITE | MAY_EXEC); | ||
477 | if (error) | ||
478 | return error; | ||
479 | if (IS_APPEND(dir)) | ||
480 | return -EPERM; | ||
481 | if (btrfs_check_sticky(dir, victim->d_inode)|| | ||
482 | IS_APPEND(victim->d_inode)|| | ||
483 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) | ||
484 | return -EPERM; | ||
485 | if (isdir) { | ||
486 | if (!S_ISDIR(victim->d_inode->i_mode)) | ||
487 | return -ENOTDIR; | ||
488 | if (IS_ROOT(victim)) | ||
489 | return -EBUSY; | ||
490 | } else if (S_ISDIR(victim->d_inode->i_mode)) | ||
491 | return -EISDIR; | ||
492 | if (IS_DEADDIR(dir)) | ||
493 | return -ENOENT; | ||
494 | if (victim->d_flags & DCACHE_NFSFS_RENAMED) | ||
495 | return -EBUSY; | ||
496 | return 0; | ||
497 | } | ||
498 | |||
410 | /* copy of may_create in fs/namei.c() */ | 499 | /* copy of may_create in fs/namei.c() */ |
411 | static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | 500 | static inline int btrfs_may_create(struct inode *dir, struct dentry *child) |
412 | { | 501 | { |
@@ -424,7 +513,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
424 | */ | 513 | */ |
425 | static noinline int btrfs_mksubvol(struct path *parent, | 514 | static noinline int btrfs_mksubvol(struct path *parent, |
426 | char *name, int namelen, | 515 | char *name, int namelen, |
427 | struct btrfs_root *snap_src) | 516 | struct btrfs_root *snap_src, |
517 | u64 *async_transid, bool readonly) | ||
428 | { | 518 | { |
429 | struct inode *dir = parent->dentry->d_inode; | 519 | struct inode *dir = parent->dentry->d_inode; |
430 | struct dentry *dentry; | 520 | struct dentry *dentry; |
@@ -456,10 +546,10 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
456 | 546 | ||
457 | if (snap_src) { | 547 | if (snap_src) { |
458 | error = create_snapshot(snap_src, dentry, | 548 | error = create_snapshot(snap_src, dentry, |
459 | name, namelen); | 549 | name, namelen, async_transid, readonly); |
460 | } else { | 550 | } else { |
461 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 551 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
462 | name, namelen); | 552 | name, namelen, async_transid); |
463 | } | 553 | } |
464 | if (!error) | 554 | if (!error) |
465 | fsnotify_mkdir(dir, dentry); | 555 | fsnotify_mkdir(dir, dentry); |
@@ -474,49 +564,182 @@ out_unlock: | |||
474 | return error; | 564 | return error; |
475 | } | 565 | } |
476 | 566 | ||
477 | static int btrfs_defrag_file(struct file *file) | 567 | static int should_defrag_range(struct inode *inode, u64 start, u64 len, |
568 | int thresh, u64 *last_len, u64 *skip, | ||
569 | u64 *defrag_end) | ||
570 | { | ||
571 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
572 | struct extent_map *em = NULL; | ||
573 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
574 | int ret = 1; | ||
575 | |||
576 | |||
577 | if (thresh == 0) | ||
578 | thresh = 256 * 1024; | ||
579 | |||
580 | /* | ||
581 | * make sure that once we start defragging and extent, we keep on | ||
582 | * defragging it | ||
583 | */ | ||
584 | if (start < *defrag_end) | ||
585 | return 1; | ||
586 | |||
587 | *skip = 0; | ||
588 | |||
589 | /* | ||
590 | * hopefully we have this extent in the tree already, try without | ||
591 | * the full extent lock | ||
592 | */ | ||
593 | read_lock(&em_tree->lock); | ||
594 | em = lookup_extent_mapping(em_tree, start, len); | ||
595 | read_unlock(&em_tree->lock); | ||
596 | |||
597 | if (!em) { | ||
598 | /* get the big lock and read metadata off disk */ | ||
599 | lock_extent(io_tree, start, start + len - 1, GFP_NOFS); | ||
600 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
601 | unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); | ||
602 | |||
603 | if (IS_ERR(em)) | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | /* this will cover holes, and inline extents */ | ||
608 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) | ||
609 | ret = 0; | ||
610 | |||
611 | /* | ||
612 | * we hit a real extent, if it is big don't bother defragging it again | ||
613 | */ | ||
614 | if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) | ||
615 | ret = 0; | ||
616 | |||
617 | /* | ||
618 | * last_len ends up being a counter of how many bytes we've defragged. | ||
619 | * every time we choose not to defrag an extent, we reset *last_len | ||
620 | * so that the next tiny extent will force a defrag. | ||
621 | * | ||
622 | * The end result of this is that tiny extents before a single big | ||
623 | * extent will force at least part of that big extent to be defragged. | ||
624 | */ | ||
625 | if (ret) { | ||
626 | *last_len += len; | ||
627 | *defrag_end = extent_map_end(em); | ||
628 | } else { | ||
629 | *last_len = 0; | ||
630 | *skip = extent_map_end(em); | ||
631 | *defrag_end = 0; | ||
632 | } | ||
633 | |||
634 | free_extent_map(em); | ||
635 | return ret; | ||
636 | } | ||
637 | |||
638 | static int btrfs_defrag_file(struct file *file, | ||
639 | struct btrfs_ioctl_defrag_range_args *range) | ||
478 | { | 640 | { |
479 | struct inode *inode = fdentry(file)->d_inode; | 641 | struct inode *inode = fdentry(file)->d_inode; |
480 | struct btrfs_root *root = BTRFS_I(inode)->root; | 642 | struct btrfs_root *root = BTRFS_I(inode)->root; |
481 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 643 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
482 | struct btrfs_ordered_extent *ordered; | 644 | struct btrfs_ordered_extent *ordered; |
483 | struct page *page; | 645 | struct page *page; |
646 | struct btrfs_super_block *disk_super; | ||
484 | unsigned long last_index; | 647 | unsigned long last_index; |
485 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | 648 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; |
486 | unsigned long total_read = 0; | 649 | unsigned long total_read = 0; |
650 | u64 features; | ||
487 | u64 page_start; | 651 | u64 page_start; |
488 | u64 page_end; | 652 | u64 page_end; |
653 | u64 last_len = 0; | ||
654 | u64 skip = 0; | ||
655 | u64 defrag_end = 0; | ||
489 | unsigned long i; | 656 | unsigned long i; |
490 | int ret; | 657 | int ret; |
658 | int compress_type = BTRFS_COMPRESS_ZLIB; | ||
491 | 659 | ||
492 | ret = btrfs_check_data_free_space(root, inode, inode->i_size); | 660 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { |
493 | if (ret) | 661 | if (range->compress_type > BTRFS_COMPRESS_TYPES) |
494 | return -ENOSPC; | 662 | return -EINVAL; |
663 | if (range->compress_type) | ||
664 | compress_type = range->compress_type; | ||
665 | } | ||
666 | |||
667 | if (inode->i_size == 0) | ||
668 | return 0; | ||
669 | |||
670 | if (range->start + range->len > range->start) { | ||
671 | last_index = min_t(u64, inode->i_size - 1, | ||
672 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; | ||
673 | } else { | ||
674 | last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; | ||
675 | } | ||
676 | |||
677 | i = range->start >> PAGE_CACHE_SHIFT; | ||
678 | while (i <= last_index) { | ||
679 | if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, | ||
680 | PAGE_CACHE_SIZE, | ||
681 | range->extent_thresh, | ||
682 | &last_len, &skip, | ||
683 | &defrag_end)) { | ||
684 | unsigned long next; | ||
685 | /* | ||
686 | * the should_defrag function tells us how much to skip | ||
687 | * bump our counter by the suggested amount | ||
688 | */ | ||
689 | next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
690 | i = max(i + 1, next); | ||
691 | continue; | ||
692 | } | ||
495 | 693 | ||
496 | mutex_lock(&inode->i_mutex); | ||
497 | last_index = inode->i_size >> PAGE_CACHE_SHIFT; | ||
498 | for (i = 0; i <= last_index; i++) { | ||
499 | if (total_read % ra_pages == 0) { | 694 | if (total_read % ra_pages == 0) { |
500 | btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, | 695 | btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, |
501 | min(last_index, i + ra_pages - 1)); | 696 | min(last_index, i + ra_pages - 1)); |
502 | } | 697 | } |
503 | total_read++; | 698 | total_read++; |
699 | mutex_lock(&inode->i_mutex); | ||
700 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | ||
701 | BTRFS_I(inode)->force_compress = compress_type; | ||
702 | |||
703 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | ||
704 | if (ret) | ||
705 | goto err_unlock; | ||
504 | again: | 706 | again: |
707 | if (inode->i_size == 0 || | ||
708 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { | ||
709 | ret = 0; | ||
710 | goto err_reservations; | ||
711 | } | ||
712 | |||
505 | page = grab_cache_page(inode->i_mapping, i); | 713 | page = grab_cache_page(inode->i_mapping, i); |
506 | if (!page) | 714 | if (!page) { |
507 | goto out_unlock; | 715 | ret = -ENOMEM; |
716 | goto err_reservations; | ||
717 | } | ||
718 | |||
508 | if (!PageUptodate(page)) { | 719 | if (!PageUptodate(page)) { |
509 | btrfs_readpage(NULL, page); | 720 | btrfs_readpage(NULL, page); |
510 | lock_page(page); | 721 | lock_page(page); |
511 | if (!PageUptodate(page)) { | 722 | if (!PageUptodate(page)) { |
512 | unlock_page(page); | 723 | unlock_page(page); |
513 | page_cache_release(page); | 724 | page_cache_release(page); |
514 | goto out_unlock; | 725 | ret = -EIO; |
726 | goto err_reservations; | ||
515 | } | 727 | } |
516 | } | 728 | } |
517 | 729 | ||
730 | if (page->mapping != inode->i_mapping) { | ||
731 | unlock_page(page); | ||
732 | page_cache_release(page); | ||
733 | goto again; | ||
734 | } | ||
735 | |||
518 | wait_on_page_writeback(page); | 736 | wait_on_page_writeback(page); |
519 | 737 | ||
738 | if (PageDirty(page)) { | ||
739 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
740 | goto loop_unlock; | ||
741 | } | ||
742 | |||
520 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 743 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
521 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 744 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
522 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 745 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); |
@@ -537,18 +760,60 @@ again: | |||
537 | * page if it is dirtied again later | 760 | * page if it is dirtied again later |
538 | */ | 761 | */ |
539 | clear_page_dirty_for_io(page); | 762 | clear_page_dirty_for_io(page); |
763 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, | ||
764 | page_end, EXTENT_DIRTY | EXTENT_DELALLOC | | ||
765 | EXTENT_DO_ACCOUNTING, GFP_NOFS); | ||
540 | 766 | ||
541 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 767 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); |
768 | ClearPageChecked(page); | ||
542 | set_page_dirty(page); | 769 | set_page_dirty(page); |
543 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 770 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
771 | |||
772 | loop_unlock: | ||
544 | unlock_page(page); | 773 | unlock_page(page); |
545 | page_cache_release(page); | 774 | page_cache_release(page); |
775 | mutex_unlock(&inode->i_mutex); | ||
776 | |||
546 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 777 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
778 | i++; | ||
779 | } | ||
780 | |||
781 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) | ||
782 | filemap_flush(inode->i_mapping); | ||
783 | |||
784 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { | ||
785 | /* the filemap_flush will queue IO into the worker threads, but | ||
786 | * we have to make sure the IO is actually started and that | ||
787 | * ordered extents get created before we return | ||
788 | */ | ||
789 | atomic_inc(&root->fs_info->async_submit_draining); | ||
790 | while (atomic_read(&root->fs_info->nr_async_submits) || | ||
791 | atomic_read(&root->fs_info->async_delalloc_pages)) { | ||
792 | wait_event(root->fs_info->async_submit_wait, | ||
793 | (atomic_read(&root->fs_info->nr_async_submits) == 0 && | ||
794 | atomic_read(&root->fs_info->async_delalloc_pages) == 0)); | ||
795 | } | ||
796 | atomic_dec(&root->fs_info->async_submit_draining); | ||
797 | |||
798 | mutex_lock(&inode->i_mutex); | ||
799 | BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; | ||
800 | mutex_unlock(&inode->i_mutex); | ||
801 | } | ||
802 | |||
803 | disk_super = &root->fs_info->super_copy; | ||
804 | features = btrfs_super_incompat_flags(disk_super); | ||
805 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | ||
806 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | ||
807 | btrfs_set_super_incompat_flags(disk_super, features); | ||
547 | } | 808 | } |
548 | 809 | ||
549 | out_unlock: | ||
550 | mutex_unlock(&inode->i_mutex); | ||
551 | return 0; | 810 | return 0; |
811 | |||
812 | err_reservations: | ||
813 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
814 | err_unlock: | ||
815 | mutex_unlock(&inode->i_mutex); | ||
816 | return ret; | ||
552 | } | 817 | } |
553 | 818 | ||
554 | static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | 819 | static noinline int btrfs_ioctl_resize(struct btrfs_root *root, |
@@ -563,7 +828,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
563 | char *sizestr; | 828 | char *sizestr; |
564 | char *devstr = NULL; | 829 | char *devstr = NULL; |
565 | int ret = 0; | 830 | int ret = 0; |
566 | int namelen; | ||
567 | int mod = 0; | 831 | int mod = 0; |
568 | 832 | ||
569 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 833 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
@@ -577,7 +841,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
577 | return PTR_ERR(vol_args); | 841 | return PTR_ERR(vol_args); |
578 | 842 | ||
579 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 843 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
580 | namelen = strlen(vol_args->name); | ||
581 | 844 | ||
582 | mutex_lock(&root->fs_info->volume_mutex); | 845 | mutex_lock(&root->fs_info->volume_mutex); |
583 | sizestr = vol_args->name; | 846 | sizestr = vol_args->name; |
@@ -608,7 +871,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
608 | mod = 1; | 871 | mod = 1; |
609 | sizestr++; | 872 | sizestr++; |
610 | } | 873 | } |
611 | new_size = btrfs_parse_size(sizestr); | 874 | new_size = memparse(sizestr, NULL); |
612 | if (new_size == 0) { | 875 | if (new_size == 0) { |
613 | ret = -EINVAL; | 876 | ret = -EINVAL; |
614 | goto out_unlock; | 877 | goto out_unlock; |
@@ -643,7 +906,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
643 | device->name, (unsigned long long)new_size); | 906 | device->name, (unsigned long long)new_size); |
644 | 907 | ||
645 | if (new_size > old_size) { | 908 | if (new_size > old_size) { |
646 | trans = btrfs_start_transaction(root, 1); | 909 | trans = btrfs_start_transaction(root, 0); |
647 | ret = btrfs_grow_device(trans, device, new_size); | 910 | ret = btrfs_grow_device(trans, device, new_size); |
648 | btrfs_commit_transaction(trans, root); | 911 | btrfs_commit_transaction(trans, root); |
649 | } else { | 912 | } else { |
@@ -656,11 +919,14 @@ out_unlock: | |||
656 | return ret; | 919 | return ret; |
657 | } | 920 | } |
658 | 921 | ||
659 | static noinline int btrfs_ioctl_snap_create(struct file *file, | 922 | static noinline int btrfs_ioctl_snap_create_transid(struct file *file, |
660 | void __user *arg, int subvol) | 923 | char *name, |
924 | unsigned long fd, | ||
925 | int subvol, | ||
926 | u64 *transid, | ||
927 | bool readonly) | ||
661 | { | 928 | { |
662 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 929 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
663 | struct btrfs_ioctl_vol_args *vol_args; | ||
664 | struct file *src_file; | 930 | struct file *src_file; |
665 | int namelen; | 931 | int namelen; |
666 | int ret = 0; | 932 | int ret = 0; |
@@ -668,23 +934,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
668 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 934 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
669 | return -EROFS; | 935 | return -EROFS; |
670 | 936 | ||
671 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 937 | namelen = strlen(name); |
672 | if (IS_ERR(vol_args)) | 938 | if (strchr(name, '/')) { |
673 | return PTR_ERR(vol_args); | ||
674 | |||
675 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
676 | namelen = strlen(vol_args->name); | ||
677 | if (strchr(vol_args->name, '/')) { | ||
678 | ret = -EINVAL; | 939 | ret = -EINVAL; |
679 | goto out; | 940 | goto out; |
680 | } | 941 | } |
681 | 942 | ||
682 | if (subvol) { | 943 | if (subvol) { |
683 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, | 944 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
684 | NULL); | 945 | NULL, transid, readonly); |
685 | } else { | 946 | } else { |
686 | struct inode *src_inode; | 947 | struct inode *src_inode; |
687 | src_file = fget(vol_args->fd); | 948 | src_file = fget(fd); |
688 | if (!src_file) { | 949 | if (!src_file) { |
689 | ret = -EINVAL; | 950 | ret = -EINVAL; |
690 | goto out; | 951 | goto out; |
@@ -698,15 +959,152 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
698 | fput(src_file); | 959 | fput(src_file); |
699 | goto out; | 960 | goto out; |
700 | } | 961 | } |
701 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, | 962 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
702 | BTRFS_I(src_inode)->root); | 963 | BTRFS_I(src_inode)->root, |
964 | transid, readonly); | ||
703 | fput(src_file); | 965 | fput(src_file); |
704 | } | 966 | } |
705 | out: | 967 | out: |
968 | return ret; | ||
969 | } | ||
970 | |||
971 | static noinline int btrfs_ioctl_snap_create(struct file *file, | ||
972 | void __user *arg, int subvol) | ||
973 | { | ||
974 | struct btrfs_ioctl_vol_args *vol_args; | ||
975 | int ret; | ||
976 | |||
977 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
978 | if (IS_ERR(vol_args)) | ||
979 | return PTR_ERR(vol_args); | ||
980 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
981 | |||
982 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
983 | vol_args->fd, subvol, | ||
984 | NULL, false); | ||
985 | |||
706 | kfree(vol_args); | 986 | kfree(vol_args); |
707 | return ret; | 987 | return ret; |
708 | } | 988 | } |
709 | 989 | ||
990 | static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | ||
991 | void __user *arg, int subvol) | ||
992 | { | ||
993 | struct btrfs_ioctl_vol_args_v2 *vol_args; | ||
994 | int ret; | ||
995 | u64 transid = 0; | ||
996 | u64 *ptr = NULL; | ||
997 | bool readonly = false; | ||
998 | |||
999 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
1000 | if (IS_ERR(vol_args)) | ||
1001 | return PTR_ERR(vol_args); | ||
1002 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
1003 | |||
1004 | if (vol_args->flags & | ||
1005 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { | ||
1006 | ret = -EOPNOTSUPP; | ||
1007 | goto out; | ||
1008 | } | ||
1009 | |||
1010 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
1011 | ptr = &transid; | ||
1012 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | ||
1013 | readonly = true; | ||
1014 | |||
1015 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
1016 | vol_args->fd, subvol, | ||
1017 | ptr, readonly); | ||
1018 | |||
1019 | if (ret == 0 && ptr && | ||
1020 | copy_to_user(arg + | ||
1021 | offsetof(struct btrfs_ioctl_vol_args_v2, | ||
1022 | transid), ptr, sizeof(*ptr))) | ||
1023 | ret = -EFAULT; | ||
1024 | out: | ||
1025 | kfree(vol_args); | ||
1026 | return ret; | ||
1027 | } | ||
1028 | |||
1029 | static noinline int btrfs_ioctl_subvol_getflags(struct file *file, | ||
1030 | void __user *arg) | ||
1031 | { | ||
1032 | struct inode *inode = fdentry(file)->d_inode; | ||
1033 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1034 | int ret = 0; | ||
1035 | u64 flags = 0; | ||
1036 | |||
1037 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1038 | return -EINVAL; | ||
1039 | |||
1040 | down_read(&root->fs_info->subvol_sem); | ||
1041 | if (btrfs_root_readonly(root)) | ||
1042 | flags |= BTRFS_SUBVOL_RDONLY; | ||
1043 | up_read(&root->fs_info->subvol_sem); | ||
1044 | |||
1045 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
1046 | ret = -EFAULT; | ||
1047 | |||
1048 | return ret; | ||
1049 | } | ||
1050 | |||
1051 | static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | ||
1052 | void __user *arg) | ||
1053 | { | ||
1054 | struct inode *inode = fdentry(file)->d_inode; | ||
1055 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1056 | struct btrfs_trans_handle *trans; | ||
1057 | u64 root_flags; | ||
1058 | u64 flags; | ||
1059 | int ret = 0; | ||
1060 | |||
1061 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
1062 | return -EROFS; | ||
1063 | |||
1064 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1065 | return -EINVAL; | ||
1066 | |||
1067 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
1068 | return -EFAULT; | ||
1069 | |||
1070 | if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) | ||
1071 | return -EINVAL; | ||
1072 | |||
1073 | if (flags & ~BTRFS_SUBVOL_RDONLY) | ||
1074 | return -EOPNOTSUPP; | ||
1075 | |||
1076 | down_write(&root->fs_info->subvol_sem); | ||
1077 | |||
1078 | /* nothing to do */ | ||
1079 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) | ||
1080 | goto out; | ||
1081 | |||
1082 | root_flags = btrfs_root_flags(&root->root_item); | ||
1083 | if (flags & BTRFS_SUBVOL_RDONLY) | ||
1084 | btrfs_set_root_flags(&root->root_item, | ||
1085 | root_flags | BTRFS_ROOT_SUBVOL_RDONLY); | ||
1086 | else | ||
1087 | btrfs_set_root_flags(&root->root_item, | ||
1088 | root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); | ||
1089 | |||
1090 | trans = btrfs_start_transaction(root, 1); | ||
1091 | if (IS_ERR(trans)) { | ||
1092 | ret = PTR_ERR(trans); | ||
1093 | goto out_reset; | ||
1094 | } | ||
1095 | |||
1096 | ret = btrfs_update_root(trans, root, | ||
1097 | &root->root_key, &root->root_item); | ||
1098 | |||
1099 | btrfs_commit_transaction(trans, root); | ||
1100 | out_reset: | ||
1101 | if (ret) | ||
1102 | btrfs_set_root_flags(&root->root_item, root_flags); | ||
1103 | out: | ||
1104 | up_write(&root->fs_info->subvol_sem); | ||
1105 | return ret; | ||
1106 | } | ||
1107 | |||
710 | /* | 1108 | /* |
711 | * helper to check if the subvolume references other subvolumes | 1109 | * helper to check if the subvolume references other subvolumes |
712 | */ | 1110 | */ |
@@ -743,6 +1141,322 @@ out: | |||
743 | return ret; | 1141 | return ret; |
744 | } | 1142 | } |
745 | 1143 | ||
1144 | static noinline int key_in_sk(struct btrfs_key *key, | ||
1145 | struct btrfs_ioctl_search_key *sk) | ||
1146 | { | ||
1147 | struct btrfs_key test; | ||
1148 | int ret; | ||
1149 | |||
1150 | test.objectid = sk->min_objectid; | ||
1151 | test.type = sk->min_type; | ||
1152 | test.offset = sk->min_offset; | ||
1153 | |||
1154 | ret = btrfs_comp_cpu_keys(key, &test); | ||
1155 | if (ret < 0) | ||
1156 | return 0; | ||
1157 | |||
1158 | test.objectid = sk->max_objectid; | ||
1159 | test.type = sk->max_type; | ||
1160 | test.offset = sk->max_offset; | ||
1161 | |||
1162 | ret = btrfs_comp_cpu_keys(key, &test); | ||
1163 | if (ret > 0) | ||
1164 | return 0; | ||
1165 | return 1; | ||
1166 | } | ||
1167 | |||
1168 | static noinline int copy_to_sk(struct btrfs_root *root, | ||
1169 | struct btrfs_path *path, | ||
1170 | struct btrfs_key *key, | ||
1171 | struct btrfs_ioctl_search_key *sk, | ||
1172 | char *buf, | ||
1173 | unsigned long *sk_offset, | ||
1174 | int *num_found) | ||
1175 | { | ||
1176 | u64 found_transid; | ||
1177 | struct extent_buffer *leaf; | ||
1178 | struct btrfs_ioctl_search_header sh; | ||
1179 | unsigned long item_off; | ||
1180 | unsigned long item_len; | ||
1181 | int nritems; | ||
1182 | int i; | ||
1183 | int slot; | ||
1184 | int found = 0; | ||
1185 | int ret = 0; | ||
1186 | |||
1187 | leaf = path->nodes[0]; | ||
1188 | slot = path->slots[0]; | ||
1189 | nritems = btrfs_header_nritems(leaf); | ||
1190 | |||
1191 | if (btrfs_header_generation(leaf) > sk->max_transid) { | ||
1192 | i = nritems; | ||
1193 | goto advance_key; | ||
1194 | } | ||
1195 | found_transid = btrfs_header_generation(leaf); | ||
1196 | |||
1197 | for (i = slot; i < nritems; i++) { | ||
1198 | item_off = btrfs_item_ptr_offset(leaf, i); | ||
1199 | item_len = btrfs_item_size_nr(leaf, i); | ||
1200 | |||
1201 | if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) | ||
1202 | item_len = 0; | ||
1203 | |||
1204 | if (sizeof(sh) + item_len + *sk_offset > | ||
1205 | BTRFS_SEARCH_ARGS_BUFSIZE) { | ||
1206 | ret = 1; | ||
1207 | goto overflow; | ||
1208 | } | ||
1209 | |||
1210 | btrfs_item_key_to_cpu(leaf, key, i); | ||
1211 | if (!key_in_sk(key, sk)) | ||
1212 | continue; | ||
1213 | |||
1214 | sh.objectid = key->objectid; | ||
1215 | sh.offset = key->offset; | ||
1216 | sh.type = key->type; | ||
1217 | sh.len = item_len; | ||
1218 | sh.transid = found_transid; | ||
1219 | |||
1220 | /* copy search result header */ | ||
1221 | memcpy(buf + *sk_offset, &sh, sizeof(sh)); | ||
1222 | *sk_offset += sizeof(sh); | ||
1223 | |||
1224 | if (item_len) { | ||
1225 | char *p = buf + *sk_offset; | ||
1226 | /* copy the item */ | ||
1227 | read_extent_buffer(leaf, p, | ||
1228 | item_off, item_len); | ||
1229 | *sk_offset += item_len; | ||
1230 | } | ||
1231 | found++; | ||
1232 | |||
1233 | if (*num_found >= sk->nr_items) | ||
1234 | break; | ||
1235 | } | ||
1236 | advance_key: | ||
1237 | ret = 0; | ||
1238 | if (key->offset < (u64)-1 && key->offset < sk->max_offset) | ||
1239 | key->offset++; | ||
1240 | else if (key->type < (u8)-1 && key->type < sk->max_type) { | ||
1241 | key->offset = 0; | ||
1242 | key->type++; | ||
1243 | } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { | ||
1244 | key->offset = 0; | ||
1245 | key->type = 0; | ||
1246 | key->objectid++; | ||
1247 | } else | ||
1248 | ret = 1; | ||
1249 | overflow: | ||
1250 | *num_found += found; | ||
1251 | return ret; | ||
1252 | } | ||
1253 | |||
1254 | static noinline int search_ioctl(struct inode *inode, | ||
1255 | struct btrfs_ioctl_search_args *args) | ||
1256 | { | ||
1257 | struct btrfs_root *root; | ||
1258 | struct btrfs_key key; | ||
1259 | struct btrfs_key max_key; | ||
1260 | struct btrfs_path *path; | ||
1261 | struct btrfs_ioctl_search_key *sk = &args->key; | ||
1262 | struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; | ||
1263 | int ret; | ||
1264 | int num_found = 0; | ||
1265 | unsigned long sk_offset = 0; | ||
1266 | |||
1267 | path = btrfs_alloc_path(); | ||
1268 | if (!path) | ||
1269 | return -ENOMEM; | ||
1270 | |||
1271 | if (sk->tree_id == 0) { | ||
1272 | /* search the root of the inode that was passed */ | ||
1273 | root = BTRFS_I(inode)->root; | ||
1274 | } else { | ||
1275 | key.objectid = sk->tree_id; | ||
1276 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
1277 | key.offset = (u64)-1; | ||
1278 | root = btrfs_read_fs_root_no_name(info, &key); | ||
1279 | if (IS_ERR(root)) { | ||
1280 | printk(KERN_ERR "could not find root %llu\n", | ||
1281 | sk->tree_id); | ||
1282 | btrfs_free_path(path); | ||
1283 | return -ENOENT; | ||
1284 | } | ||
1285 | } | ||
1286 | |||
1287 | key.objectid = sk->min_objectid; | ||
1288 | key.type = sk->min_type; | ||
1289 | key.offset = sk->min_offset; | ||
1290 | |||
1291 | max_key.objectid = sk->max_objectid; | ||
1292 | max_key.type = sk->max_type; | ||
1293 | max_key.offset = sk->max_offset; | ||
1294 | |||
1295 | path->keep_locks = 1; | ||
1296 | |||
1297 | while(1) { | ||
1298 | ret = btrfs_search_forward(root, &key, &max_key, path, 0, | ||
1299 | sk->min_transid); | ||
1300 | if (ret != 0) { | ||
1301 | if (ret > 0) | ||
1302 | ret = 0; | ||
1303 | goto err; | ||
1304 | } | ||
1305 | ret = copy_to_sk(root, path, &key, sk, args->buf, | ||
1306 | &sk_offset, &num_found); | ||
1307 | btrfs_release_path(root, path); | ||
1308 | if (ret || num_found >= sk->nr_items) | ||
1309 | break; | ||
1310 | |||
1311 | } | ||
1312 | ret = 0; | ||
1313 | err: | ||
1314 | sk->nr_items = num_found; | ||
1315 | btrfs_free_path(path); | ||
1316 | return ret; | ||
1317 | } | ||
1318 | |||
1319 | static noinline int btrfs_ioctl_tree_search(struct file *file, | ||
1320 | void __user *argp) | ||
1321 | { | ||
1322 | struct btrfs_ioctl_search_args *args; | ||
1323 | struct inode *inode; | ||
1324 | int ret; | ||
1325 | |||
1326 | if (!capable(CAP_SYS_ADMIN)) | ||
1327 | return -EPERM; | ||
1328 | |||
1329 | args = memdup_user(argp, sizeof(*args)); | ||
1330 | if (IS_ERR(args)) | ||
1331 | return PTR_ERR(args); | ||
1332 | |||
1333 | inode = fdentry(file)->d_inode; | ||
1334 | ret = search_ioctl(inode, args); | ||
1335 | if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) | ||
1336 | ret = -EFAULT; | ||
1337 | kfree(args); | ||
1338 | return ret; | ||
1339 | } | ||
1340 | |||
1341 | /* | ||
1342 | * Search INODE_REFs to identify path name of 'dirid' directory | ||
1343 | * in a 'tree_id' tree. and sets path name to 'name'. | ||
1344 | */ | ||
1345 | static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, | ||
1346 | u64 tree_id, u64 dirid, char *name) | ||
1347 | { | ||
1348 | struct btrfs_root *root; | ||
1349 | struct btrfs_key key; | ||
1350 | char *ptr; | ||
1351 | int ret = -1; | ||
1352 | int slot; | ||
1353 | int len; | ||
1354 | int total_len = 0; | ||
1355 | struct btrfs_inode_ref *iref; | ||
1356 | struct extent_buffer *l; | ||
1357 | struct btrfs_path *path; | ||
1358 | |||
1359 | if (dirid == BTRFS_FIRST_FREE_OBJECTID) { | ||
1360 | name[0]='\0'; | ||
1361 | return 0; | ||
1362 | } | ||
1363 | |||
1364 | path = btrfs_alloc_path(); | ||
1365 | if (!path) | ||
1366 | return -ENOMEM; | ||
1367 | |||
1368 | ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; | ||
1369 | |||
1370 | key.objectid = tree_id; | ||
1371 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
1372 | key.offset = (u64)-1; | ||
1373 | root = btrfs_read_fs_root_no_name(info, &key); | ||
1374 | if (IS_ERR(root)) { | ||
1375 | printk(KERN_ERR "could not find root %llu\n", tree_id); | ||
1376 | ret = -ENOENT; | ||
1377 | goto out; | ||
1378 | } | ||
1379 | |||
1380 | key.objectid = dirid; | ||
1381 | key.type = BTRFS_INODE_REF_KEY; | ||
1382 | key.offset = (u64)-1; | ||
1383 | |||
1384 | while(1) { | ||
1385 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
1386 | if (ret < 0) | ||
1387 | goto out; | ||
1388 | |||
1389 | l = path->nodes[0]; | ||
1390 | slot = path->slots[0]; | ||
1391 | if (ret > 0 && slot > 0) | ||
1392 | slot--; | ||
1393 | btrfs_item_key_to_cpu(l, &key, slot); | ||
1394 | |||
1395 | if (ret > 0 && (key.objectid != dirid || | ||
1396 | key.type != BTRFS_INODE_REF_KEY)) { | ||
1397 | ret = -ENOENT; | ||
1398 | goto out; | ||
1399 | } | ||
1400 | |||
1401 | iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); | ||
1402 | len = btrfs_inode_ref_name_len(l, iref); | ||
1403 | ptr -= len + 1; | ||
1404 | total_len += len + 1; | ||
1405 | if (ptr < name) | ||
1406 | goto out; | ||
1407 | |||
1408 | *(ptr + len) = '/'; | ||
1409 | read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); | ||
1410 | |||
1411 | if (key.offset == BTRFS_FIRST_FREE_OBJECTID) | ||
1412 | break; | ||
1413 | |||
1414 | btrfs_release_path(root, path); | ||
1415 | key.objectid = key.offset; | ||
1416 | key.offset = (u64)-1; | ||
1417 | dirid = key.objectid; | ||
1418 | |||
1419 | } | ||
1420 | if (ptr < name) | ||
1421 | goto out; | ||
1422 | memcpy(name, ptr, total_len); | ||
1423 | name[total_len]='\0'; | ||
1424 | ret = 0; | ||
1425 | out: | ||
1426 | btrfs_free_path(path); | ||
1427 | return ret; | ||
1428 | } | ||
1429 | |||
1430 | static noinline int btrfs_ioctl_ino_lookup(struct file *file, | ||
1431 | void __user *argp) | ||
1432 | { | ||
1433 | struct btrfs_ioctl_ino_lookup_args *args; | ||
1434 | struct inode *inode; | ||
1435 | int ret; | ||
1436 | |||
1437 | if (!capable(CAP_SYS_ADMIN)) | ||
1438 | return -EPERM; | ||
1439 | |||
1440 | args = memdup_user(argp, sizeof(*args)); | ||
1441 | if (IS_ERR(args)) | ||
1442 | return PTR_ERR(args); | ||
1443 | |||
1444 | inode = fdentry(file)->d_inode; | ||
1445 | |||
1446 | if (args->treeid == 0) | ||
1447 | args->treeid = BTRFS_I(inode)->root->root_key.objectid; | ||
1448 | |||
1449 | ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, | ||
1450 | args->treeid, args->objectid, | ||
1451 | args->name); | ||
1452 | |||
1453 | if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) | ||
1454 | ret = -EFAULT; | ||
1455 | |||
1456 | kfree(args); | ||
1457 | return ret; | ||
1458 | } | ||
1459 | |||
746 | static noinline int btrfs_ioctl_snap_destroy(struct file *file, | 1460 | static noinline int btrfs_ioctl_snap_destroy(struct file *file, |
747 | void __user *arg) | 1461 | void __user *arg) |
748 | { | 1462 | { |
@@ -758,9 +1472,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
758 | int ret; | 1472 | int ret; |
759 | int err = 0; | 1473 | int err = 0; |
760 | 1474 | ||
761 | if (!capable(CAP_SYS_ADMIN)) | ||
762 | return -EPERM; | ||
763 | |||
764 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1475 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
765 | if (IS_ERR(vol_args)) | 1476 | if (IS_ERR(vol_args)) |
766 | return PTR_ERR(vol_args); | 1477 | return PTR_ERR(vol_args); |
@@ -790,13 +1501,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
790 | } | 1501 | } |
791 | 1502 | ||
792 | inode = dentry->d_inode; | 1503 | inode = dentry->d_inode; |
1504 | dest = BTRFS_I(inode)->root; | ||
1505 | if (!capable(CAP_SYS_ADMIN)){ | ||
1506 | /* | ||
1507 | * Regular user. Only allow this with a special mount | ||
1508 | * option, when the user has write+exec access to the | ||
1509 | * subvol root, and when rmdir(2) would have been | ||
1510 | * allowed. | ||
1511 | * | ||
1512 | * Note that this is _not_ check that the subvol is | ||
1513 | * empty or doesn't contain data that we wouldn't | ||
1514 | * otherwise be able to delete. | ||
1515 | * | ||
1516 | * Users who want to delete empty subvols should try | ||
1517 | * rmdir(2). | ||
1518 | */ | ||
1519 | err = -EPERM; | ||
1520 | if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | ||
1521 | goto out_dput; | ||
1522 | |||
1523 | /* | ||
1524 | * Do not allow deletion if the parent dir is the same | ||
1525 | * as the dir to be deleted. That means the ioctl | ||
1526 | * must be called on the dentry referencing the root | ||
1527 | * of the subvol, not a random directory contained | ||
1528 | * within it. | ||
1529 | */ | ||
1530 | err = -EINVAL; | ||
1531 | if (root == dest) | ||
1532 | goto out_dput; | ||
1533 | |||
1534 | err = inode_permission(inode, MAY_WRITE | MAY_EXEC); | ||
1535 | if (err) | ||
1536 | goto out_dput; | ||
1537 | |||
1538 | /* check if subvolume may be deleted by a non-root user */ | ||
1539 | err = btrfs_may_delete(dir, dentry, 1); | ||
1540 | if (err) | ||
1541 | goto out_dput; | ||
1542 | } | ||
1543 | |||
793 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | 1544 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { |
794 | err = -EINVAL; | 1545 | err = -EINVAL; |
795 | goto out_dput; | 1546 | goto out_dput; |
796 | } | 1547 | } |
797 | 1548 | ||
798 | dest = BTRFS_I(inode)->root; | ||
799 | |||
800 | mutex_lock(&inode->i_mutex); | 1549 | mutex_lock(&inode->i_mutex); |
801 | err = d_invalidate(dentry); | 1550 | err = d_invalidate(dentry); |
802 | if (err) | 1551 | if (err) |
@@ -808,7 +1557,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
808 | if (err) | 1557 | if (err) |
809 | goto out_up_write; | 1558 | goto out_up_write; |
810 | 1559 | ||
811 | trans = btrfs_start_transaction(root, 1); | 1560 | trans = btrfs_start_transaction(root, 0); |
1561 | if (IS_ERR(trans)) { | ||
1562 | err = PTR_ERR(trans); | ||
1563 | goto out_up_write; | ||
1564 | } | ||
1565 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
1566 | |||
812 | ret = btrfs_unlink_subvol(trans, root, dir, | 1567 | ret = btrfs_unlink_subvol(trans, root, dir, |
813 | dest->root_key.objectid, | 1568 | dest->root_key.objectid, |
814 | dentry->d_name.name, | 1569 | dentry->d_name.name, |
@@ -822,12 +1577,14 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
822 | dest->root_item.drop_level = 0; | 1577 | dest->root_item.drop_level = 0; |
823 | btrfs_set_root_refs(&dest->root_item, 0); | 1578 | btrfs_set_root_refs(&dest->root_item, 0); |
824 | 1579 | ||
825 | ret = btrfs_insert_orphan_item(trans, | 1580 | if (!xchg(&dest->orphan_item_inserted, 1)) { |
826 | root->fs_info->tree_root, | 1581 | ret = btrfs_insert_orphan_item(trans, |
827 | dest->root_key.objectid); | 1582 | root->fs_info->tree_root, |
828 | BUG_ON(ret); | 1583 | dest->root_key.objectid); |
1584 | BUG_ON(ret); | ||
1585 | } | ||
829 | 1586 | ||
830 | ret = btrfs_commit_transaction(trans, root); | 1587 | ret = btrfs_end_transaction(trans, root); |
831 | BUG_ON(ret); | 1588 | BUG_ON(ret); |
832 | inode->i_flags |= S_DEAD; | 1589 | inode->i_flags |= S_DEAD; |
833 | out_up_write: | 1590 | out_up_write: |
@@ -849,12 +1606,16 @@ out: | |||
849 | return err; | 1606 | return err; |
850 | } | 1607 | } |
851 | 1608 | ||
852 | static int btrfs_ioctl_defrag(struct file *file) | 1609 | static int btrfs_ioctl_defrag(struct file *file, void __user *argp) |
853 | { | 1610 | { |
854 | struct inode *inode = fdentry(file)->d_inode; | 1611 | struct inode *inode = fdentry(file)->d_inode; |
855 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1612 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1613 | struct btrfs_ioctl_defrag_range_args *range; | ||
856 | int ret; | 1614 | int ret; |
857 | 1615 | ||
1616 | if (btrfs_root_readonly(root)) | ||
1617 | return -EROFS; | ||
1618 | |||
858 | ret = mnt_want_write(file->f_path.mnt); | 1619 | ret = mnt_want_write(file->f_path.mnt); |
859 | if (ret) | 1620 | if (ret) |
860 | return ret; | 1621 | return ret; |
@@ -865,16 +1626,44 @@ static int btrfs_ioctl_defrag(struct file *file) | |||
865 | ret = -EPERM; | 1626 | ret = -EPERM; |
866 | goto out; | 1627 | goto out; |
867 | } | 1628 | } |
868 | btrfs_defrag_root(root, 0); | 1629 | ret = btrfs_defrag_root(root, 0); |
869 | btrfs_defrag_root(root->fs_info->extent_root, 0); | 1630 | if (ret) |
1631 | goto out; | ||
1632 | ret = btrfs_defrag_root(root->fs_info->extent_root, 0); | ||
870 | break; | 1633 | break; |
871 | case S_IFREG: | 1634 | case S_IFREG: |
872 | if (!(file->f_mode & FMODE_WRITE)) { | 1635 | if (!(file->f_mode & FMODE_WRITE)) { |
873 | ret = -EINVAL; | 1636 | ret = -EINVAL; |
874 | goto out; | 1637 | goto out; |
875 | } | 1638 | } |
876 | btrfs_defrag_file(file); | 1639 | |
1640 | range = kzalloc(sizeof(*range), GFP_KERNEL); | ||
1641 | if (!range) { | ||
1642 | ret = -ENOMEM; | ||
1643 | goto out; | ||
1644 | } | ||
1645 | |||
1646 | if (argp) { | ||
1647 | if (copy_from_user(range, argp, | ||
1648 | sizeof(*range))) { | ||
1649 | ret = -EFAULT; | ||
1650 | kfree(range); | ||
1651 | goto out; | ||
1652 | } | ||
1653 | /* compression requires us to start the IO */ | ||
1654 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { | ||
1655 | range->flags |= BTRFS_DEFRAG_RANGE_START_IO; | ||
1656 | range->extent_thresh = (u32)-1; | ||
1657 | } | ||
1658 | } else { | ||
1659 | /* the rest are all set to zero by kzalloc */ | ||
1660 | range->len = (u64)-1; | ||
1661 | } | ||
1662 | ret = btrfs_defrag_file(file, range); | ||
1663 | kfree(range); | ||
877 | break; | 1664 | break; |
1665 | default: | ||
1666 | ret = -EINVAL; | ||
878 | } | 1667 | } |
879 | out: | 1668 | out: |
880 | mnt_drop_write(file->f_path.mnt); | 1669 | mnt_drop_write(file->f_path.mnt); |
@@ -952,9 +1741,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
952 | */ | 1741 | */ |
953 | 1742 | ||
954 | /* the destination must be opened for writing */ | 1743 | /* the destination must be opened for writing */ |
955 | if (!(file->f_mode & FMODE_WRITE)) | 1744 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
956 | return -EINVAL; | 1745 | return -EINVAL; |
957 | 1746 | ||
1747 | if (btrfs_root_readonly(root)) | ||
1748 | return -EROFS; | ||
1749 | |||
958 | ret = mnt_want_write(file->f_path.mnt); | 1750 | ret = mnt_want_write(file->f_path.mnt); |
959 | if (ret) | 1751 | if (ret) |
960 | return ret; | 1752 | return ret; |
@@ -964,12 +1756,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
964 | ret = -EBADF; | 1756 | ret = -EBADF; |
965 | goto out_drop_write; | 1757 | goto out_drop_write; |
966 | } | 1758 | } |
1759 | |||
967 | src = src_file->f_dentry->d_inode; | 1760 | src = src_file->f_dentry->d_inode; |
968 | 1761 | ||
969 | ret = -EINVAL; | 1762 | ret = -EINVAL; |
970 | if (src == inode) | 1763 | if (src == inode) |
971 | goto out_fput; | 1764 | goto out_fput; |
972 | 1765 | ||
1766 | /* the src must be open for reading */ | ||
1767 | if (!(src_file->f_mode & FMODE_READ)) | ||
1768 | goto out_fput; | ||
1769 | |||
973 | ret = -EISDIR; | 1770 | ret = -EISDIR; |
974 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) | 1771 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) |
975 | goto out_fput; | 1772 | goto out_fput; |
@@ -991,27 +1788,26 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
991 | path->reada = 2; | 1788 | path->reada = 2; |
992 | 1789 | ||
993 | if (inode < src) { | 1790 | if (inode < src) { |
994 | mutex_lock(&inode->i_mutex); | 1791 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
995 | mutex_lock(&src->i_mutex); | 1792 | mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); |
996 | } else { | 1793 | } else { |
997 | mutex_lock(&src->i_mutex); | 1794 | mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); |
998 | mutex_lock(&inode->i_mutex); | 1795 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
999 | } | 1796 | } |
1000 | 1797 | ||
1001 | /* determine range to clone */ | 1798 | /* determine range to clone */ |
1002 | ret = -EINVAL; | 1799 | ret = -EINVAL; |
1003 | if (off >= src->i_size || off + len > src->i_size) | 1800 | if (off + len > src->i_size || off + len < off) |
1004 | goto out_unlock; | 1801 | goto out_unlock; |
1005 | if (len == 0) | 1802 | if (len == 0) |
1006 | olen = len = src->i_size - off; | 1803 | olen = len = src->i_size - off; |
1007 | /* if we extend to eof, continue to block boundary */ | 1804 | /* if we extend to eof, continue to block boundary */ |
1008 | if (off + len == src->i_size) | 1805 | if (off + len == src->i_size) |
1009 | len = ((src->i_size + bs-1) & ~(bs-1)) | 1806 | len = ALIGN(src->i_size, bs) - off; |
1010 | - off; | ||
1011 | 1807 | ||
1012 | /* verify the end result is block aligned */ | 1808 | /* verify the end result is block aligned */ |
1013 | if ((off & (bs-1)) || | 1809 | if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || |
1014 | ((off + len) & (bs-1))) | 1810 | !IS_ALIGNED(destoff, bs)) |
1015 | goto out_unlock; | 1811 | goto out_unlock; |
1016 | 1812 | ||
1017 | /* do any pending delalloc/csum calc on src, one way or | 1813 | /* do any pending delalloc/csum calc on src, one way or |
@@ -1019,21 +1815,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1019 | while (1) { | 1815 | while (1) { |
1020 | struct btrfs_ordered_extent *ordered; | 1816 | struct btrfs_ordered_extent *ordered; |
1021 | lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 1817 | lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1022 | ordered = btrfs_lookup_first_ordered_extent(inode, off+len); | 1818 | ordered = btrfs_lookup_first_ordered_extent(src, off+len); |
1023 | if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) | 1819 | if (!ordered && |
1820 | !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, | ||
1821 | EXTENT_DELALLOC, 0, NULL)) | ||
1024 | break; | 1822 | break; |
1025 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 1823 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1026 | if (ordered) | 1824 | if (ordered) |
1027 | btrfs_put_ordered_extent(ordered); | 1825 | btrfs_put_ordered_extent(ordered); |
1028 | btrfs_wait_ordered_range(src, off, off+len); | 1826 | btrfs_wait_ordered_range(src, off, len); |
1029 | } | 1827 | } |
1030 | 1828 | ||
1031 | trans = btrfs_start_transaction(root, 1); | ||
1032 | BUG_ON(!trans); | ||
1033 | |||
1034 | /* punch hole in destination first */ | ||
1035 | btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); | ||
1036 | |||
1037 | /* clone data */ | 1829 | /* clone data */ |
1038 | key.objectid = src->i_ino; | 1830 | key.objectid = src->i_ino; |
1039 | key.type = BTRFS_EXTENT_DATA_KEY; | 1831 | key.type = BTRFS_EXTENT_DATA_KEY; |
@@ -1044,7 +1836,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1044 | * note the key will change type as we walk through the | 1836 | * note the key will change type as we walk through the |
1045 | * tree. | 1837 | * tree. |
1046 | */ | 1838 | */ |
1047 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 1839 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
1048 | if (ret < 0) | 1840 | if (ret < 0) |
1049 | goto out; | 1841 | goto out; |
1050 | 1842 | ||
@@ -1073,6 +1865,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1073 | u64 disko = 0, diskl = 0; | 1865 | u64 disko = 0, diskl = 0; |
1074 | u64 datao = 0, datal = 0; | 1866 | u64 datao = 0, datal = 0; |
1075 | u8 comp; | 1867 | u8 comp; |
1868 | u64 endoff; | ||
1076 | 1869 | ||
1077 | size = btrfs_item_size_nr(leaf, slot); | 1870 | size = btrfs_item_size_nr(leaf, slot); |
1078 | read_extent_buffer(leaf, buf, | 1871 | read_extent_buffer(leaf, buf, |
@@ -1099,7 +1892,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1099 | } | 1892 | } |
1100 | btrfs_release_path(root, path); | 1893 | btrfs_release_path(root, path); |
1101 | 1894 | ||
1102 | if (key.offset + datal < off || | 1895 | if (key.offset + datal <= off || |
1103 | key.offset >= off+len) | 1896 | key.offset >= off+len) |
1104 | goto next; | 1897 | goto next; |
1105 | 1898 | ||
@@ -1107,12 +1900,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1107 | new_key.objectid = inode->i_ino; | 1900 | new_key.objectid = inode->i_ino; |
1108 | new_key.offset = key.offset + destoff - off; | 1901 | new_key.offset = key.offset + destoff - off; |
1109 | 1902 | ||
1903 | trans = btrfs_start_transaction(root, 1); | ||
1904 | if (IS_ERR(trans)) { | ||
1905 | ret = PTR_ERR(trans); | ||
1906 | goto out; | ||
1907 | } | ||
1908 | |||
1110 | if (type == BTRFS_FILE_EXTENT_REG || | 1909 | if (type == BTRFS_FILE_EXTENT_REG || |
1111 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 1910 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
1911 | if (off > key.offset) { | ||
1912 | datao += off - key.offset; | ||
1913 | datal -= off - key.offset; | ||
1914 | } | ||
1915 | |||
1916 | if (key.offset + datal > off + len) | ||
1917 | datal = off + len - key.offset; | ||
1918 | |||
1919 | ret = btrfs_drop_extents(trans, inode, | ||
1920 | new_key.offset, | ||
1921 | new_key.offset + datal, | ||
1922 | &hint_byte, 1); | ||
1923 | BUG_ON(ret); | ||
1924 | |||
1112 | ret = btrfs_insert_empty_item(trans, root, path, | 1925 | ret = btrfs_insert_empty_item(trans, root, path, |
1113 | &new_key, size); | 1926 | &new_key, size); |
1114 | if (ret) | 1927 | BUG_ON(ret); |
1115 | goto out; | ||
1116 | 1928 | ||
1117 | leaf = path->nodes[0]; | 1929 | leaf = path->nodes[0]; |
1118 | slot = path->slots[0]; | 1930 | slot = path->slots[0]; |
@@ -1123,14 +1935,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1123 | extent = btrfs_item_ptr(leaf, slot, | 1935 | extent = btrfs_item_ptr(leaf, slot, |
1124 | struct btrfs_file_extent_item); | 1936 | struct btrfs_file_extent_item); |
1125 | 1937 | ||
1126 | if (off > key.offset) { | ||
1127 | datao += off - key.offset; | ||
1128 | datal -= off - key.offset; | ||
1129 | } | ||
1130 | |||
1131 | if (key.offset + datal > off + len) | ||
1132 | datal = off + len - key.offset; | ||
1133 | |||
1134 | /* disko == 0 means it's a hole */ | 1938 | /* disko == 0 means it's a hole */ |
1135 | if (!disko) | 1939 | if (!disko) |
1136 | datao = 0; | 1940 | datao = 0; |
@@ -1161,14 +1965,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1161 | 1965 | ||
1162 | if (comp && (skip || trim)) { | 1966 | if (comp && (skip || trim)) { |
1163 | ret = -EINVAL; | 1967 | ret = -EINVAL; |
1968 | btrfs_end_transaction(trans, root); | ||
1164 | goto out; | 1969 | goto out; |
1165 | } | 1970 | } |
1166 | size -= skip + trim; | 1971 | size -= skip + trim; |
1167 | datal -= skip + trim; | 1972 | datal -= skip + trim; |
1973 | |||
1974 | ret = btrfs_drop_extents(trans, inode, | ||
1975 | new_key.offset, | ||
1976 | new_key.offset + datal, | ||
1977 | &hint_byte, 1); | ||
1978 | BUG_ON(ret); | ||
1979 | |||
1168 | ret = btrfs_insert_empty_item(trans, root, path, | 1980 | ret = btrfs_insert_empty_item(trans, root, path, |
1169 | &new_key, size); | 1981 | &new_key, size); |
1170 | if (ret) | 1982 | BUG_ON(ret); |
1171 | goto out; | ||
1172 | 1983 | ||
1173 | if (skip) { | 1984 | if (skip) { |
1174 | u32 start = | 1985 | u32 start = |
@@ -1186,8 +1997,26 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1186 | } | 1997 | } |
1187 | 1998 | ||
1188 | btrfs_mark_buffer_dirty(leaf); | 1999 | btrfs_mark_buffer_dirty(leaf); |
1189 | } | 2000 | btrfs_release_path(root, path); |
1190 | 2001 | ||
2002 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
2003 | |||
2004 | /* | ||
2005 | * we round up to the block size at eof when | ||
2006 | * determining which extents to clone above, | ||
2007 | * but shouldn't round up the file size | ||
2008 | */ | ||
2009 | endoff = new_key.offset + datal; | ||
2010 | if (endoff > destoff+olen) | ||
2011 | endoff = destoff+olen; | ||
2012 | if (endoff > inode->i_size) | ||
2013 | btrfs_i_size_write(inode, endoff); | ||
2014 | |||
2015 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
2016 | ret = btrfs_update_inode(trans, root, inode); | ||
2017 | BUG_ON(ret); | ||
2018 | btrfs_end_transaction(trans, root); | ||
2019 | } | ||
1191 | next: | 2020 | next: |
1192 | btrfs_release_path(root, path); | 2021 | btrfs_release_path(root, path); |
1193 | key.offset++; | 2022 | key.offset++; |
@@ -1195,17 +2024,7 @@ next: | |||
1195 | ret = 0; | 2024 | ret = 0; |
1196 | out: | 2025 | out: |
1197 | btrfs_release_path(root, path); | 2026 | btrfs_release_path(root, path); |
1198 | if (ret == 0) { | ||
1199 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
1200 | if (destoff + olen > inode->i_size) | ||
1201 | btrfs_i_size_write(inode, destoff + olen); | ||
1202 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
1203 | ret = btrfs_update_inode(trans, root, inode); | ||
1204 | } | ||
1205 | btrfs_end_transaction(trans, root); | ||
1206 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 2027 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1207 | if (ret) | ||
1208 | vmtruncate(inode, 0); | ||
1209 | out_unlock: | 2028 | out_unlock: |
1210 | mutex_unlock(&src->i_mutex); | 2029 | mutex_unlock(&src->i_mutex); |
1211 | mutex_unlock(&inode->i_mutex); | 2030 | mutex_unlock(&inode->i_mutex); |
@@ -1249,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1249 | if (file->private_data) | 2068 | if (file->private_data) |
1250 | goto out; | 2069 | goto out; |
1251 | 2070 | ||
2071 | ret = -EROFS; | ||
2072 | if (btrfs_root_readonly(root)) | ||
2073 | goto out; | ||
2074 | |||
1252 | ret = mnt_want_write(file->f_path.mnt); | 2075 | ret = mnt_want_write(file->f_path.mnt); |
1253 | if (ret) | 2076 | if (ret) |
1254 | goto out; | 2077 | goto out; |
@@ -1274,6 +2097,209 @@ out: | |||
1274 | return ret; | 2097 | return ret; |
1275 | } | 2098 | } |
1276 | 2099 | ||
2100 | static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | ||
2101 | { | ||
2102 | struct inode *inode = fdentry(file)->d_inode; | ||
2103 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2104 | struct btrfs_root *new_root; | ||
2105 | struct btrfs_dir_item *di; | ||
2106 | struct btrfs_trans_handle *trans; | ||
2107 | struct btrfs_path *path; | ||
2108 | struct btrfs_key location; | ||
2109 | struct btrfs_disk_key disk_key; | ||
2110 | struct btrfs_super_block *disk_super; | ||
2111 | u64 features; | ||
2112 | u64 objectid = 0; | ||
2113 | u64 dir_id; | ||
2114 | |||
2115 | if (!capable(CAP_SYS_ADMIN)) | ||
2116 | return -EPERM; | ||
2117 | |||
2118 | if (copy_from_user(&objectid, argp, sizeof(objectid))) | ||
2119 | return -EFAULT; | ||
2120 | |||
2121 | if (!objectid) | ||
2122 | objectid = root->root_key.objectid; | ||
2123 | |||
2124 | location.objectid = objectid; | ||
2125 | location.type = BTRFS_ROOT_ITEM_KEY; | ||
2126 | location.offset = (u64)-1; | ||
2127 | |||
2128 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | ||
2129 | if (IS_ERR(new_root)) | ||
2130 | return PTR_ERR(new_root); | ||
2131 | |||
2132 | if (btrfs_root_refs(&new_root->root_item) == 0) | ||
2133 | return -ENOENT; | ||
2134 | |||
2135 | path = btrfs_alloc_path(); | ||
2136 | if (!path) | ||
2137 | return -ENOMEM; | ||
2138 | path->leave_spinning = 1; | ||
2139 | |||
2140 | trans = btrfs_start_transaction(root, 1); | ||
2141 | if (!trans) { | ||
2142 | btrfs_free_path(path); | ||
2143 | return -ENOMEM; | ||
2144 | } | ||
2145 | |||
2146 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | ||
2147 | di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, | ||
2148 | dir_id, "default", 7, 1); | ||
2149 | if (IS_ERR_OR_NULL(di)) { | ||
2150 | btrfs_free_path(path); | ||
2151 | btrfs_end_transaction(trans, root); | ||
2152 | printk(KERN_ERR "Umm, you don't have the default dir item, " | ||
2153 | "this isn't going to work\n"); | ||
2154 | return -ENOENT; | ||
2155 | } | ||
2156 | |||
2157 | btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); | ||
2158 | btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); | ||
2159 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
2160 | btrfs_free_path(path); | ||
2161 | |||
2162 | disk_super = &root->fs_info->super_copy; | ||
2163 | features = btrfs_super_incompat_flags(disk_super); | ||
2164 | if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { | ||
2165 | features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; | ||
2166 | btrfs_set_super_incompat_flags(disk_super, features); | ||
2167 | } | ||
2168 | btrfs_end_transaction(trans, root); | ||
2169 | |||
2170 | return 0; | ||
2171 | } | ||
2172 | |||
2173 | static void get_block_group_info(struct list_head *groups_list, | ||
2174 | struct btrfs_ioctl_space_info *space) | ||
2175 | { | ||
2176 | struct btrfs_block_group_cache *block_group; | ||
2177 | |||
2178 | space->total_bytes = 0; | ||
2179 | space->used_bytes = 0; | ||
2180 | space->flags = 0; | ||
2181 | list_for_each_entry(block_group, groups_list, list) { | ||
2182 | space->flags = block_group->flags; | ||
2183 | space->total_bytes += block_group->key.offset; | ||
2184 | space->used_bytes += | ||
2185 | btrfs_block_group_used(&block_group->item); | ||
2186 | } | ||
2187 | } | ||
2188 | |||
2189 | long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | ||
2190 | { | ||
2191 | struct btrfs_ioctl_space_args space_args; | ||
2192 | struct btrfs_ioctl_space_info space; | ||
2193 | struct btrfs_ioctl_space_info *dest; | ||
2194 | struct btrfs_ioctl_space_info *dest_orig; | ||
2195 | struct btrfs_ioctl_space_info *user_dest; | ||
2196 | struct btrfs_space_info *info; | ||
2197 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | ||
2198 | BTRFS_BLOCK_GROUP_SYSTEM, | ||
2199 | BTRFS_BLOCK_GROUP_METADATA, | ||
2200 | BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; | ||
2201 | int num_types = 4; | ||
2202 | int alloc_size; | ||
2203 | int ret = 0; | ||
2204 | int slot_count = 0; | ||
2205 | int i, c; | ||
2206 | |||
2207 | if (copy_from_user(&space_args, | ||
2208 | (struct btrfs_ioctl_space_args __user *)arg, | ||
2209 | sizeof(space_args))) | ||
2210 | return -EFAULT; | ||
2211 | |||
2212 | for (i = 0; i < num_types; i++) { | ||
2213 | struct btrfs_space_info *tmp; | ||
2214 | |||
2215 | info = NULL; | ||
2216 | rcu_read_lock(); | ||
2217 | list_for_each_entry_rcu(tmp, &root->fs_info->space_info, | ||
2218 | list) { | ||
2219 | if (tmp->flags == types[i]) { | ||
2220 | info = tmp; | ||
2221 | break; | ||
2222 | } | ||
2223 | } | ||
2224 | rcu_read_unlock(); | ||
2225 | |||
2226 | if (!info) | ||
2227 | continue; | ||
2228 | |||
2229 | down_read(&info->groups_sem); | ||
2230 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | ||
2231 | if (!list_empty(&info->block_groups[c])) | ||
2232 | slot_count++; | ||
2233 | } | ||
2234 | up_read(&info->groups_sem); | ||
2235 | } | ||
2236 | |||
2237 | /* space_slots == 0 means they are asking for a count */ | ||
2238 | if (space_args.space_slots == 0) { | ||
2239 | space_args.total_spaces = slot_count; | ||
2240 | goto out; | ||
2241 | } | ||
2242 | |||
2243 | slot_count = min_t(int, space_args.space_slots, slot_count); | ||
2244 | |||
2245 | alloc_size = sizeof(*dest) * slot_count; | ||
2246 | |||
2247 | /* we generally have at most 6 or so space infos, one for each raid | ||
2248 | * level. So, a whole page should be more than enough for everyone | ||
2249 | */ | ||
2250 | if (alloc_size > PAGE_CACHE_SIZE) | ||
2251 | return -ENOMEM; | ||
2252 | |||
2253 | space_args.total_spaces = 0; | ||
2254 | dest = kmalloc(alloc_size, GFP_NOFS); | ||
2255 | if (!dest) | ||
2256 | return -ENOMEM; | ||
2257 | dest_orig = dest; | ||
2258 | |||
2259 | /* now we have a buffer to copy into */ | ||
2260 | for (i = 0; i < num_types; i++) { | ||
2261 | struct btrfs_space_info *tmp; | ||
2262 | |||
2263 | info = NULL; | ||
2264 | rcu_read_lock(); | ||
2265 | list_for_each_entry_rcu(tmp, &root->fs_info->space_info, | ||
2266 | list) { | ||
2267 | if (tmp->flags == types[i]) { | ||
2268 | info = tmp; | ||
2269 | break; | ||
2270 | } | ||
2271 | } | ||
2272 | rcu_read_unlock(); | ||
2273 | |||
2274 | if (!info) | ||
2275 | continue; | ||
2276 | down_read(&info->groups_sem); | ||
2277 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | ||
2278 | if (!list_empty(&info->block_groups[c])) { | ||
2279 | get_block_group_info(&info->block_groups[c], | ||
2280 | &space); | ||
2281 | memcpy(dest, &space, sizeof(space)); | ||
2282 | dest++; | ||
2283 | space_args.total_spaces++; | ||
2284 | } | ||
2285 | } | ||
2286 | up_read(&info->groups_sem); | ||
2287 | } | ||
2288 | |||
2289 | user_dest = (struct btrfs_ioctl_space_info *) | ||
2290 | (arg + sizeof(struct btrfs_ioctl_space_args)); | ||
2291 | |||
2292 | if (copy_to_user(user_dest, dest_orig, alloc_size)) | ||
2293 | ret = -EFAULT; | ||
2294 | |||
2295 | kfree(dest_orig); | ||
2296 | out: | ||
2297 | if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) | ||
2298 | ret = -EFAULT; | ||
2299 | |||
2300 | return ret; | ||
2301 | } | ||
2302 | |||
1277 | /* | 2303 | /* |
1278 | * there are many ways the trans_start and trans_end ioctls can lead | 2304 | * there are many ways the trans_start and trans_end ioctls can lead |
1279 | * to deadlocks. They should only be used by applications that | 2305 | * to deadlocks. They should only be used by applications that |
@@ -1301,6 +2327,36 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
1301 | return 0; | 2327 | return 0; |
1302 | } | 2328 | } |
1303 | 2329 | ||
2330 | static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) | ||
2331 | { | ||
2332 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | ||
2333 | struct btrfs_trans_handle *trans; | ||
2334 | u64 transid; | ||
2335 | |||
2336 | trans = btrfs_start_transaction(root, 0); | ||
2337 | transid = trans->transid; | ||
2338 | btrfs_commit_transaction_async(trans, root, 0); | ||
2339 | |||
2340 | if (argp) | ||
2341 | if (copy_to_user(argp, &transid, sizeof(transid))) | ||
2342 | return -EFAULT; | ||
2343 | return 0; | ||
2344 | } | ||
2345 | |||
2346 | static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) | ||
2347 | { | ||
2348 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | ||
2349 | u64 transid; | ||
2350 | |||
2351 | if (argp) { | ||
2352 | if (copy_from_user(&transid, argp, sizeof(transid))) | ||
2353 | return -EFAULT; | ||
2354 | } else { | ||
2355 | transid = 0; /* current trans */ | ||
2356 | } | ||
2357 | return btrfs_wait_for_commit(root, transid); | ||
2358 | } | ||
2359 | |||
1304 | long btrfs_ioctl(struct file *file, unsigned int | 2360 | long btrfs_ioctl(struct file *file, unsigned int |
1305 | cmd, unsigned long arg) | 2361 | cmd, unsigned long arg) |
1306 | { | 2362 | { |
@@ -1316,12 +2372,22 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
1316 | return btrfs_ioctl_getversion(file, argp); | 2372 | return btrfs_ioctl_getversion(file, argp); |
1317 | case BTRFS_IOC_SNAP_CREATE: | 2373 | case BTRFS_IOC_SNAP_CREATE: |
1318 | return btrfs_ioctl_snap_create(file, argp, 0); | 2374 | return btrfs_ioctl_snap_create(file, argp, 0); |
2375 | case BTRFS_IOC_SNAP_CREATE_V2: | ||
2376 | return btrfs_ioctl_snap_create_v2(file, argp, 0); | ||
1319 | case BTRFS_IOC_SUBVOL_CREATE: | 2377 | case BTRFS_IOC_SUBVOL_CREATE: |
1320 | return btrfs_ioctl_snap_create(file, argp, 1); | 2378 | return btrfs_ioctl_snap_create(file, argp, 1); |
1321 | case BTRFS_IOC_SNAP_DESTROY: | 2379 | case BTRFS_IOC_SNAP_DESTROY: |
1322 | return btrfs_ioctl_snap_destroy(file, argp); | 2380 | return btrfs_ioctl_snap_destroy(file, argp); |
2381 | case BTRFS_IOC_SUBVOL_GETFLAGS: | ||
2382 | return btrfs_ioctl_subvol_getflags(file, argp); | ||
2383 | case BTRFS_IOC_SUBVOL_SETFLAGS: | ||
2384 | return btrfs_ioctl_subvol_setflags(file, argp); | ||
2385 | case BTRFS_IOC_DEFAULT_SUBVOL: | ||
2386 | return btrfs_ioctl_default_subvol(file, argp); | ||
1323 | case BTRFS_IOC_DEFRAG: | 2387 | case BTRFS_IOC_DEFRAG: |
1324 | return btrfs_ioctl_defrag(file); | 2388 | return btrfs_ioctl_defrag(file, NULL); |
2389 | case BTRFS_IOC_DEFRAG_RANGE: | ||
2390 | return btrfs_ioctl_defrag(file, argp); | ||
1325 | case BTRFS_IOC_RESIZE: | 2391 | case BTRFS_IOC_RESIZE: |
1326 | return btrfs_ioctl_resize(root, argp); | 2392 | return btrfs_ioctl_resize(root, argp); |
1327 | case BTRFS_IOC_ADD_DEV: | 2393 | case BTRFS_IOC_ADD_DEV: |
@@ -1338,9 +2404,19 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
1338 | return btrfs_ioctl_trans_start(file); | 2404 | return btrfs_ioctl_trans_start(file); |
1339 | case BTRFS_IOC_TRANS_END: | 2405 | case BTRFS_IOC_TRANS_END: |
1340 | return btrfs_ioctl_trans_end(file); | 2406 | return btrfs_ioctl_trans_end(file); |
2407 | case BTRFS_IOC_TREE_SEARCH: | ||
2408 | return btrfs_ioctl_tree_search(file, argp); | ||
2409 | case BTRFS_IOC_INO_LOOKUP: | ||
2410 | return btrfs_ioctl_ino_lookup(file, argp); | ||
2411 | case BTRFS_IOC_SPACE_INFO: | ||
2412 | return btrfs_ioctl_space_info(root, argp); | ||
1341 | case BTRFS_IOC_SYNC: | 2413 | case BTRFS_IOC_SYNC: |
1342 | btrfs_sync_fs(file->f_dentry->d_sb, 1); | 2414 | btrfs_sync_fs(file->f_dentry->d_sb, 1); |
1343 | return 0; | 2415 | return 0; |
2416 | case BTRFS_IOC_START_SYNC: | ||
2417 | return btrfs_ioctl_start_sync(file, argp); | ||
2418 | case BTRFS_IOC_WAIT_SYNC: | ||
2419 | return btrfs_ioctl_wait_sync(file, argp); | ||
1344 | } | 2420 | } |
1345 | 2421 | ||
1346 | return -ENOTTY; | 2422 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index bc49914475eb..8fb382167b13 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -22,20 +22,141 @@ | |||
22 | 22 | ||
23 | #define BTRFS_IOCTL_MAGIC 0x94 | 23 | #define BTRFS_IOCTL_MAGIC 0x94 |
24 | #define BTRFS_VOL_NAME_MAX 255 | 24 | #define BTRFS_VOL_NAME_MAX 255 |
25 | #define BTRFS_PATH_NAME_MAX 4087 | ||
26 | 25 | ||
27 | /* this should be 4k */ | 26 | /* this should be 4k */ |
27 | #define BTRFS_PATH_NAME_MAX 4087 | ||
28 | struct btrfs_ioctl_vol_args { | 28 | struct btrfs_ioctl_vol_args { |
29 | __s64 fd; | 29 | __s64 fd; |
30 | char name[BTRFS_PATH_NAME_MAX + 1]; | 30 | char name[BTRFS_PATH_NAME_MAX + 1]; |
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | ||
34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | ||
35 | |||
36 | #define BTRFS_SUBVOL_NAME_MAX 4039 | ||
37 | struct btrfs_ioctl_vol_args_v2 { | ||
38 | __s64 fd; | ||
39 | __u64 transid; | ||
40 | __u64 flags; | ||
41 | __u64 unused[4]; | ||
42 | char name[BTRFS_SUBVOL_NAME_MAX + 1]; | ||
43 | }; | ||
44 | |||
45 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 | ||
46 | struct btrfs_ioctl_ino_lookup_args { | ||
47 | __u64 treeid; | ||
48 | __u64 objectid; | ||
49 | char name[BTRFS_INO_LOOKUP_PATH_MAX]; | ||
50 | }; | ||
51 | |||
52 | struct btrfs_ioctl_search_key { | ||
53 | /* which root are we searching. 0 is the tree of tree roots */ | ||
54 | __u64 tree_id; | ||
55 | |||
56 | /* keys returned will be >= min and <= max */ | ||
57 | __u64 min_objectid; | ||
58 | __u64 max_objectid; | ||
59 | |||
60 | /* keys returned will be >= min and <= max */ | ||
61 | __u64 min_offset; | ||
62 | __u64 max_offset; | ||
63 | |||
64 | /* max and min transids to search for */ | ||
65 | __u64 min_transid; | ||
66 | __u64 max_transid; | ||
67 | |||
68 | /* keys returned will be >= min and <= max */ | ||
69 | __u32 min_type; | ||
70 | __u32 max_type; | ||
71 | |||
72 | /* | ||
73 | * how many items did userland ask for, and how many are we | ||
74 | * returning | ||
75 | */ | ||
76 | __u32 nr_items; | ||
77 | |||
78 | /* align to 64 bits */ | ||
79 | __u32 unused; | ||
80 | |||
81 | /* some extra for later */ | ||
82 | __u64 unused1; | ||
83 | __u64 unused2; | ||
84 | __u64 unused3; | ||
85 | __u64 unused4; | ||
86 | }; | ||
87 | |||
88 | struct btrfs_ioctl_search_header { | ||
89 | __u64 transid; | ||
90 | __u64 objectid; | ||
91 | __u64 offset; | ||
92 | __u32 type; | ||
93 | __u32 len; | ||
94 | }; | ||
95 | |||
96 | #define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) | ||
97 | /* | ||
98 | * the buf is an array of search headers where | ||
99 | * each header is followed by the actual item | ||
100 | * the type field is expanded to 32 bits for alignment | ||
101 | */ | ||
102 | struct btrfs_ioctl_search_args { | ||
103 | struct btrfs_ioctl_search_key key; | ||
104 | char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; | ||
105 | }; | ||
106 | |||
33 | struct btrfs_ioctl_clone_range_args { | 107 | struct btrfs_ioctl_clone_range_args { |
34 | __s64 src_fd; | 108 | __s64 src_fd; |
35 | __u64 src_offset, src_length; | 109 | __u64 src_offset, src_length; |
36 | __u64 dest_offset; | 110 | __u64 dest_offset; |
37 | }; | 111 | }; |
38 | 112 | ||
113 | /* flags for the defrag range ioctl */ | ||
114 | #define BTRFS_DEFRAG_RANGE_COMPRESS 1 | ||
115 | #define BTRFS_DEFRAG_RANGE_START_IO 2 | ||
116 | |||
117 | struct btrfs_ioctl_defrag_range_args { | ||
118 | /* start of the defrag operation */ | ||
119 | __u64 start; | ||
120 | |||
121 | /* number of bytes to defrag, use (u64)-1 to say all */ | ||
122 | __u64 len; | ||
123 | |||
124 | /* | ||
125 | * flags for the operation, which can include turning | ||
126 | * on compression for this one defrag | ||
127 | */ | ||
128 | __u64 flags; | ||
129 | |||
130 | /* | ||
131 | * any extent bigger than this will be considered | ||
132 | * already defragged. Use 0 to take the kernel default | ||
133 | * Use 1 to say every single extent must be rewritten | ||
134 | */ | ||
135 | __u32 extent_thresh; | ||
136 | |||
137 | /* | ||
138 | * which compression method to use if turning on compression | ||
139 | * for this defrag operation. If unspecified, zlib will | ||
140 | * be used | ||
141 | */ | ||
142 | __u32 compress_type; | ||
143 | |||
144 | /* spare for later */ | ||
145 | __u32 unused[4]; | ||
146 | }; | ||
147 | |||
148 | struct btrfs_ioctl_space_info { | ||
149 | __u64 flags; | ||
150 | __u64 total_bytes; | ||
151 | __u64 used_bytes; | ||
152 | }; | ||
153 | |||
154 | struct btrfs_ioctl_space_args { | ||
155 | __u64 space_slots; | ||
156 | __u64 total_spaces; | ||
157 | struct btrfs_ioctl_space_info spaces[0]; | ||
158 | }; | ||
159 | |||
39 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ | 160 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ |
40 | struct btrfs_ioctl_vol_args) | 161 | struct btrfs_ioctl_vol_args) |
41 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ | 162 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ |
@@ -67,4 +188,19 @@ struct btrfs_ioctl_clone_range_args { | |||
67 | struct btrfs_ioctl_vol_args) | 188 | struct btrfs_ioctl_vol_args) |
68 | #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ | 189 | #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ |
69 | struct btrfs_ioctl_vol_args) | 190 | struct btrfs_ioctl_vol_args) |
191 | #define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ | ||
192 | struct btrfs_ioctl_defrag_range_args) | ||
193 | #define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ | ||
194 | struct btrfs_ioctl_search_args) | ||
195 | #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ | ||
196 | struct btrfs_ioctl_ino_lookup_args) | ||
197 | #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) | ||
198 | #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ | ||
199 | struct btrfs_ioctl_space_args) | ||
200 | #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) | ||
201 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | ||
202 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ | ||
203 | struct btrfs_ioctl_vol_args_v2) | ||
204 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) | ||
205 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | ||
70 | #endif | 206 | #endif |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 1c36e5cd8f55..6151f2ea38bb 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -16,7 +16,6 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/gfp.h> | ||
20 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
21 | #include <linux/spinlock.h> | 20 | #include <linux/spinlock.h> |
22 | #include <linux/page-flags.h> | 21 | #include <linux/page-flags.h> |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..cc9b450399df --- /dev/null +++ b/fs/btrfs/lzo.c | |||
@@ -0,0 +1,420 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/pagemap.h> | ||
26 | #include <linux/bio.h> | ||
27 | #include <linux/lzo.h> | ||
28 | #include "compression.h" | ||
29 | |||
30 | #define LZO_LEN 4 | ||
31 | |||
32 | struct workspace { | ||
33 | void *mem; | ||
34 | void *buf; /* where compressed data goes */ | ||
35 | void *cbuf; /* where decompressed data goes */ | ||
36 | struct list_head list; | ||
37 | }; | ||
38 | |||
39 | static void lzo_free_workspace(struct list_head *ws) | ||
40 | { | ||
41 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
42 | |||
43 | vfree(workspace->buf); | ||
44 | vfree(workspace->cbuf); | ||
45 | vfree(workspace->mem); | ||
46 | kfree(workspace); | ||
47 | } | ||
48 | |||
49 | static struct list_head *lzo_alloc_workspace(void) | ||
50 | { | ||
51 | struct workspace *workspace; | ||
52 | |||
53 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
54 | if (!workspace) | ||
55 | return ERR_PTR(-ENOMEM); | ||
56 | |||
57 | workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); | ||
58 | workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
59 | workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
60 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) | ||
61 | goto fail; | ||
62 | |||
63 | INIT_LIST_HEAD(&workspace->list); | ||
64 | |||
65 | return &workspace->list; | ||
66 | fail: | ||
67 | lzo_free_workspace(&workspace->list); | ||
68 | return ERR_PTR(-ENOMEM); | ||
69 | } | ||
70 | |||
71 | static inline void write_compress_length(char *buf, size_t len) | ||
72 | { | ||
73 | __le32 dlen; | ||
74 | |||
75 | dlen = cpu_to_le32(len); | ||
76 | memcpy(buf, &dlen, LZO_LEN); | ||
77 | } | ||
78 | |||
79 | static inline size_t read_compress_length(char *buf) | ||
80 | { | ||
81 | __le32 dlen; | ||
82 | |||
83 | memcpy(&dlen, buf, LZO_LEN); | ||
84 | return le32_to_cpu(dlen); | ||
85 | } | ||
86 | |||
87 | static int lzo_compress_pages(struct list_head *ws, | ||
88 | struct address_space *mapping, | ||
89 | u64 start, unsigned long len, | ||
90 | struct page **pages, | ||
91 | unsigned long nr_dest_pages, | ||
92 | unsigned long *out_pages, | ||
93 | unsigned long *total_in, | ||
94 | unsigned long *total_out, | ||
95 | unsigned long max_out) | ||
96 | { | ||
97 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
98 | int ret = 0; | ||
99 | char *data_in; | ||
100 | char *cpage_out; | ||
101 | int nr_pages = 0; | ||
102 | struct page *in_page = NULL; | ||
103 | struct page *out_page = NULL; | ||
104 | unsigned long bytes_left; | ||
105 | |||
106 | size_t in_len; | ||
107 | size_t out_len; | ||
108 | char *buf; | ||
109 | unsigned long tot_in = 0; | ||
110 | unsigned long tot_out = 0; | ||
111 | unsigned long pg_bytes_left; | ||
112 | unsigned long out_offset; | ||
113 | unsigned long bytes; | ||
114 | |||
115 | *out_pages = 0; | ||
116 | *total_out = 0; | ||
117 | *total_in = 0; | ||
118 | |||
119 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
120 | data_in = kmap(in_page); | ||
121 | |||
122 | /* | ||
123 | * store the size of all chunks of compressed data in | ||
124 | * the first 4 bytes | ||
125 | */ | ||
126 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
127 | if (out_page == NULL) { | ||
128 | ret = -ENOMEM; | ||
129 | goto out; | ||
130 | } | ||
131 | cpage_out = kmap(out_page); | ||
132 | out_offset = LZO_LEN; | ||
133 | tot_out = LZO_LEN; | ||
134 | pages[0] = out_page; | ||
135 | nr_pages = 1; | ||
136 | pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
137 | |||
138 | /* compress at most one page of data each time */ | ||
139 | in_len = min(len, PAGE_CACHE_SIZE); | ||
140 | while (tot_in < len) { | ||
141 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, | ||
142 | &out_len, workspace->mem); | ||
143 | if (ret != LZO_E_OK) { | ||
144 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
145 | ret); | ||
146 | ret = -1; | ||
147 | goto out; | ||
148 | } | ||
149 | |||
150 | /* store the size of this chunk of compressed data */ | ||
151 | write_compress_length(cpage_out + out_offset, out_len); | ||
152 | tot_out += LZO_LEN; | ||
153 | out_offset += LZO_LEN; | ||
154 | pg_bytes_left -= LZO_LEN; | ||
155 | |||
156 | tot_in += in_len; | ||
157 | tot_out += out_len; | ||
158 | |||
159 | /* copy bytes from the working buffer into the pages */ | ||
160 | buf = workspace->cbuf; | ||
161 | while (out_len) { | ||
162 | bytes = min_t(unsigned long, pg_bytes_left, out_len); | ||
163 | |||
164 | memcpy(cpage_out + out_offset, buf, bytes); | ||
165 | |||
166 | out_len -= bytes; | ||
167 | pg_bytes_left -= bytes; | ||
168 | buf += bytes; | ||
169 | out_offset += bytes; | ||
170 | |||
171 | /* | ||
172 | * we need another page for writing out. | ||
173 | * | ||
174 | * Note if there's less than 4 bytes left, we just | ||
175 | * skip to a new page. | ||
176 | */ | ||
177 | if ((out_len == 0 && pg_bytes_left < LZO_LEN) || | ||
178 | pg_bytes_left == 0) { | ||
179 | if (pg_bytes_left) { | ||
180 | memset(cpage_out + out_offset, 0, | ||
181 | pg_bytes_left); | ||
182 | tot_out += pg_bytes_left; | ||
183 | } | ||
184 | |||
185 | /* we're done, don't allocate new page */ | ||
186 | if (out_len == 0 && tot_in >= len) | ||
187 | break; | ||
188 | |||
189 | kunmap(out_page); | ||
190 | if (nr_pages == nr_dest_pages) { | ||
191 | out_page = NULL; | ||
192 | ret = -1; | ||
193 | goto out; | ||
194 | } | ||
195 | |||
196 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
197 | if (out_page == NULL) { | ||
198 | ret = -ENOMEM; | ||
199 | goto out; | ||
200 | } | ||
201 | cpage_out = kmap(out_page); | ||
202 | pages[nr_pages++] = out_page; | ||
203 | |||
204 | pg_bytes_left = PAGE_CACHE_SIZE; | ||
205 | out_offset = 0; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* we're making it bigger, give up */ | ||
210 | if (tot_in > 8192 && tot_in < tot_out) | ||
211 | goto out; | ||
212 | |||
213 | /* we're all done */ | ||
214 | if (tot_in >= len) | ||
215 | break; | ||
216 | |||
217 | if (tot_out > max_out) | ||
218 | break; | ||
219 | |||
220 | bytes_left = len - tot_in; | ||
221 | kunmap(in_page); | ||
222 | page_cache_release(in_page); | ||
223 | |||
224 | start += PAGE_CACHE_SIZE; | ||
225 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
226 | data_in = kmap(in_page); | ||
227 | in_len = min(bytes_left, PAGE_CACHE_SIZE); | ||
228 | } | ||
229 | |||
230 | if (tot_out > tot_in) | ||
231 | goto out; | ||
232 | |||
233 | /* store the size of all chunks of compressed data */ | ||
234 | cpage_out = kmap(pages[0]); | ||
235 | write_compress_length(cpage_out, tot_out); | ||
236 | |||
237 | kunmap(pages[0]); | ||
238 | |||
239 | ret = 0; | ||
240 | *total_out = tot_out; | ||
241 | *total_in = tot_in; | ||
242 | out: | ||
243 | *out_pages = nr_pages; | ||
244 | if (out_page) | ||
245 | kunmap(out_page); | ||
246 | |||
247 | if (in_page) { | ||
248 | kunmap(in_page); | ||
249 | page_cache_release(in_page); | ||
250 | } | ||
251 | |||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | static int lzo_decompress_biovec(struct list_head *ws, | ||
256 | struct page **pages_in, | ||
257 | u64 disk_start, | ||
258 | struct bio_vec *bvec, | ||
259 | int vcnt, | ||
260 | size_t srclen) | ||
261 | { | ||
262 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
263 | int ret = 0, ret2; | ||
264 | char *data_in; | ||
265 | unsigned long page_in_index = 0; | ||
266 | unsigned long page_out_index = 0; | ||
267 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
268 | PAGE_CACHE_SIZE; | ||
269 | unsigned long buf_start; | ||
270 | unsigned long buf_offset = 0; | ||
271 | unsigned long bytes; | ||
272 | unsigned long working_bytes; | ||
273 | unsigned long pg_offset; | ||
274 | |||
275 | size_t in_len; | ||
276 | size_t out_len; | ||
277 | unsigned long in_offset; | ||
278 | unsigned long in_page_bytes_left; | ||
279 | unsigned long tot_in; | ||
280 | unsigned long tot_out; | ||
281 | unsigned long tot_len; | ||
282 | char *buf; | ||
283 | |||
284 | data_in = kmap(pages_in[0]); | ||
285 | tot_len = read_compress_length(data_in); | ||
286 | |||
287 | tot_in = LZO_LEN; | ||
288 | in_offset = LZO_LEN; | ||
289 | tot_len = min_t(size_t, srclen, tot_len); | ||
290 | in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
291 | |||
292 | tot_out = 0; | ||
293 | pg_offset = 0; | ||
294 | |||
295 | while (tot_in < tot_len) { | ||
296 | in_len = read_compress_length(data_in + in_offset); | ||
297 | in_page_bytes_left -= LZO_LEN; | ||
298 | in_offset += LZO_LEN; | ||
299 | tot_in += LZO_LEN; | ||
300 | |||
301 | tot_in += in_len; | ||
302 | working_bytes = in_len; | ||
303 | |||
304 | /* fast path: avoid using the working buffer */ | ||
305 | if (in_page_bytes_left >= in_len) { | ||
306 | buf = data_in + in_offset; | ||
307 | bytes = in_len; | ||
308 | goto cont; | ||
309 | } | ||
310 | |||
311 | /* copy bytes from the pages into the working buffer */ | ||
312 | buf = workspace->cbuf; | ||
313 | buf_offset = 0; | ||
314 | while (working_bytes) { | ||
315 | bytes = min(working_bytes, in_page_bytes_left); | ||
316 | |||
317 | memcpy(buf + buf_offset, data_in + in_offset, bytes); | ||
318 | buf_offset += bytes; | ||
319 | cont: | ||
320 | working_bytes -= bytes; | ||
321 | in_page_bytes_left -= bytes; | ||
322 | in_offset += bytes; | ||
323 | |||
324 | /* check if we need to pick another page */ | ||
325 | if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) | ||
326 | || in_page_bytes_left == 0) { | ||
327 | tot_in += in_page_bytes_left; | ||
328 | |||
329 | if (working_bytes == 0 && tot_in >= tot_len) | ||
330 | break; | ||
331 | |||
332 | kunmap(pages_in[page_in_index]); | ||
333 | page_in_index++; | ||
334 | if (page_in_index >= total_pages_in) { | ||
335 | ret = -1; | ||
336 | data_in = NULL; | ||
337 | goto done; | ||
338 | } | ||
339 | data_in = kmap(pages_in[page_in_index]); | ||
340 | |||
341 | in_page_bytes_left = PAGE_CACHE_SIZE; | ||
342 | in_offset = 0; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); | ||
347 | ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, | ||
348 | &out_len); | ||
349 | if (ret != LZO_E_OK) { | ||
350 | printk(KERN_WARNING "btrfs decompress failed\n"); | ||
351 | ret = -1; | ||
352 | break; | ||
353 | } | ||
354 | |||
355 | buf_start = tot_out; | ||
356 | tot_out += out_len; | ||
357 | |||
358 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, | ||
359 | tot_out, disk_start, | ||
360 | bvec, vcnt, | ||
361 | &page_out_index, &pg_offset); | ||
362 | if (ret2 == 0) | ||
363 | break; | ||
364 | } | ||
365 | done: | ||
366 | if (data_in) | ||
367 | kunmap(pages_in[page_in_index]); | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||
372 | struct page *dest_page, | ||
373 | unsigned long start_byte, | ||
374 | size_t srclen, size_t destlen) | ||
375 | { | ||
376 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
377 | size_t in_len; | ||
378 | size_t out_len; | ||
379 | size_t tot_len; | ||
380 | int ret = 0; | ||
381 | char *kaddr; | ||
382 | unsigned long bytes; | ||
383 | |||
384 | BUG_ON(srclen < LZO_LEN); | ||
385 | |||
386 | tot_len = read_compress_length(data_in); | ||
387 | data_in += LZO_LEN; | ||
388 | |||
389 | in_len = read_compress_length(data_in); | ||
390 | data_in += LZO_LEN; | ||
391 | |||
392 | out_len = PAGE_CACHE_SIZE; | ||
393 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | ||
394 | if (ret != LZO_E_OK) { | ||
395 | printk(KERN_WARNING "btrfs decompress failed!\n"); | ||
396 | ret = -1; | ||
397 | goto out; | ||
398 | } | ||
399 | |||
400 | if (out_len < start_byte) { | ||
401 | ret = -1; | ||
402 | goto out; | ||
403 | } | ||
404 | |||
405 | bytes = min_t(unsigned long, destlen, out_len - start_byte); | ||
406 | |||
407 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
408 | memcpy(kaddr, workspace->buf + start_byte, bytes); | ||
409 | kunmap_atomic(kaddr, KM_USER0); | ||
410 | out: | ||
411 | return ret; | ||
412 | } | ||
413 | |||
414 | struct btrfs_compress_op btrfs_lzo_compress = { | ||
415 | .alloc_workspace = lzo_alloc_workspace, | ||
416 | .free_workspace = lzo_free_workspace, | ||
417 | .compress_pages = lzo_compress_pages, | ||
418 | .decompress_biovec = lzo_decompress_biovec, | ||
419 | .decompress = lzo_decompress, | ||
420 | }; | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 5c2a9e78a949..2b61e1ddcd99 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -16,7 +16,6 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/gfp.h> | ||
20 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
21 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
22 | #include <linux/writeback.h> | 21 | #include <linux/writeback.h> |
@@ -125,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
125 | return 1; | 124 | return 1; |
126 | } | 125 | } |
127 | 126 | ||
127 | static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, | ||
128 | u64 len) | ||
129 | { | ||
130 | if (file_offset + len <= entry->file_offset || | ||
131 | entry->file_offset + entry->len <= file_offset) | ||
132 | return 0; | ||
133 | return 1; | ||
134 | } | ||
135 | |||
128 | /* | 136 | /* |
129 | * look find the first ordered struct that has this offset, otherwise | 137 | * look find the first ordered struct that has this offset, otherwise |
130 | * the first one less than this offset | 138 | * the first one less than this offset |
@@ -162,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
162 | * The tree is given a single reference on the ordered extent that was | 170 | * The tree is given a single reference on the ordered extent that was |
163 | * inserted. | 171 | * inserted. |
164 | */ | 172 | */ |
165 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
166 | u64 start, u64 len, u64 disk_len, int type) | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio, int compress_type) | ||
167 | { | 176 | { |
168 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
169 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -174,36 +183,65 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
174 | if (!entry) | 183 | if (!entry) |
175 | return -ENOMEM; | 184 | return -ENOMEM; |
176 | 185 | ||
177 | mutex_lock(&tree->mutex); | ||
178 | entry->file_offset = file_offset; | 186 | entry->file_offset = file_offset; |
179 | entry->start = start; | 187 | entry->start = start; |
180 | entry->len = len; | 188 | entry->len = len; |
181 | entry->disk_len = disk_len; | 189 | entry->disk_len = disk_len; |
182 | entry->bytes_left = len; | 190 | entry->bytes_left = len; |
183 | entry->inode = inode; | 191 | entry->inode = inode; |
192 | entry->compress_type = compress_type; | ||
184 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 193 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
185 | set_bit(type, &entry->flags); | 194 | set_bit(type, &entry->flags); |
186 | 195 | ||
196 | if (dio) | ||
197 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); | ||
198 | |||
187 | /* one ref for the tree */ | 199 | /* one ref for the tree */ |
188 | atomic_set(&entry->refs, 1); | 200 | atomic_set(&entry->refs, 1); |
189 | init_waitqueue_head(&entry->wait); | 201 | init_waitqueue_head(&entry->wait); |
190 | INIT_LIST_HEAD(&entry->list); | 202 | INIT_LIST_HEAD(&entry->list); |
191 | INIT_LIST_HEAD(&entry->root_extent_list); | 203 | INIT_LIST_HEAD(&entry->root_extent_list); |
192 | 204 | ||
205 | spin_lock(&tree->lock); | ||
193 | node = tree_insert(&tree->tree, file_offset, | 206 | node = tree_insert(&tree->tree, file_offset, |
194 | &entry->rb_node); | 207 | &entry->rb_node); |
195 | BUG_ON(node); | 208 | BUG_ON(node); |
209 | spin_unlock(&tree->lock); | ||
196 | 210 | ||
197 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 211 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
198 | list_add_tail(&entry->root_extent_list, | 212 | list_add_tail(&entry->root_extent_list, |
199 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 213 | &BTRFS_I(inode)->root->fs_info->ordered_extents); |
200 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 214 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
201 | 215 | ||
202 | mutex_unlock(&tree->mutex); | ||
203 | BUG_ON(node); | 216 | BUG_ON(node); |
204 | return 0; | 217 | return 0; |
205 | } | 218 | } |
206 | 219 | ||
220 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
221 | u64 start, u64 len, u64 disk_len, int type) | ||
222 | { | ||
223 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
224 | disk_len, type, 0, | ||
225 | BTRFS_COMPRESS_NONE); | ||
226 | } | ||
227 | |||
228 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
229 | u64 start, u64 len, u64 disk_len, int type) | ||
230 | { | ||
231 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
232 | disk_len, type, 1, | ||
233 | BTRFS_COMPRESS_NONE); | ||
234 | } | ||
235 | |||
236 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
237 | u64 start, u64 len, u64 disk_len, | ||
238 | int type, int compress_type) | ||
239 | { | ||
240 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
241 | disk_len, type, 0, | ||
242 | compress_type); | ||
243 | } | ||
244 | |||
207 | /* | 245 | /* |
208 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | 246 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted |
209 | * when an ordered extent is finished. If the list covers more than one | 247 | * when an ordered extent is finished. If the list covers more than one |
@@ -216,14 +254,81 @@ int btrfs_add_ordered_sum(struct inode *inode, | |||
216 | struct btrfs_ordered_inode_tree *tree; | 254 | struct btrfs_ordered_inode_tree *tree; |
217 | 255 | ||
218 | tree = &BTRFS_I(inode)->ordered_tree; | 256 | tree = &BTRFS_I(inode)->ordered_tree; |
219 | mutex_lock(&tree->mutex); | 257 | spin_lock(&tree->lock); |
220 | list_add_tail(&sum->list, &entry->list); | 258 | list_add_tail(&sum->list, &entry->list); |
221 | mutex_unlock(&tree->mutex); | 259 | spin_unlock(&tree->lock); |
222 | return 0; | 260 | return 0; |
223 | } | 261 | } |
224 | 262 | ||
225 | /* | 263 | /* |
226 | * this is used to account for finished IO across a given range | 264 | * this is used to account for finished IO across a given range |
265 | * of the file. The IO may span ordered extents. If | ||
266 | * a given ordered_extent is completely done, 1 is returned, otherwise | ||
267 | * 0. | ||
268 | * | ||
269 | * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used | ||
270 | * to make sure this function only returns 1 once for a given ordered extent. | ||
271 | * | ||
272 | * file_offset is updated to one byte past the range that is recorded as | ||
273 | * complete. This allows you to walk forward in the file. | ||
274 | */ | ||
275 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | ||
276 | struct btrfs_ordered_extent **cached, | ||
277 | u64 *file_offset, u64 io_size) | ||
278 | { | ||
279 | struct btrfs_ordered_inode_tree *tree; | ||
280 | struct rb_node *node; | ||
281 | struct btrfs_ordered_extent *entry = NULL; | ||
282 | int ret; | ||
283 | u64 dec_end; | ||
284 | u64 dec_start; | ||
285 | u64 to_dec; | ||
286 | |||
287 | tree = &BTRFS_I(inode)->ordered_tree; | ||
288 | spin_lock(&tree->lock); | ||
289 | node = tree_search(tree, *file_offset); | ||
290 | if (!node) { | ||
291 | ret = 1; | ||
292 | goto out; | ||
293 | } | ||
294 | |||
295 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
296 | if (!offset_in_entry(entry, *file_offset)) { | ||
297 | ret = 1; | ||
298 | goto out; | ||
299 | } | ||
300 | |||
301 | dec_start = max(*file_offset, entry->file_offset); | ||
302 | dec_end = min(*file_offset + io_size, entry->file_offset + | ||
303 | entry->len); | ||
304 | *file_offset = dec_end; | ||
305 | if (dec_start > dec_end) { | ||
306 | printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", | ||
307 | (unsigned long long)dec_start, | ||
308 | (unsigned long long)dec_end); | ||
309 | } | ||
310 | to_dec = dec_end - dec_start; | ||
311 | if (to_dec > entry->bytes_left) { | ||
312 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", | ||
313 | (unsigned long long)entry->bytes_left, | ||
314 | (unsigned long long)to_dec); | ||
315 | } | ||
316 | entry->bytes_left -= to_dec; | ||
317 | if (entry->bytes_left == 0) | ||
318 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | ||
319 | else | ||
320 | ret = 1; | ||
321 | out: | ||
322 | if (!ret && cached && entry) { | ||
323 | *cached = entry; | ||
324 | atomic_inc(&entry->refs); | ||
325 | } | ||
326 | spin_unlock(&tree->lock); | ||
327 | return ret == 0; | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * this is used to account for finished IO across a given range | ||
227 | * of the file. The IO should not span ordered extents. If | 332 | * of the file. The IO should not span ordered extents. If |
228 | * a given ordered_extent is completely done, 1 is returned, otherwise | 333 | * a given ordered_extent is completely done, 1 is returned, otherwise |
229 | * 0. | 334 | * 0. |
@@ -232,15 +337,16 @@ int btrfs_add_ordered_sum(struct inode *inode, | |||
232 | * to make sure this function only returns 1 once for a given ordered extent. | 337 | * to make sure this function only returns 1 once for a given ordered extent. |
233 | */ | 338 | */ |
234 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 339 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
340 | struct btrfs_ordered_extent **cached, | ||
235 | u64 file_offset, u64 io_size) | 341 | u64 file_offset, u64 io_size) |
236 | { | 342 | { |
237 | struct btrfs_ordered_inode_tree *tree; | 343 | struct btrfs_ordered_inode_tree *tree; |
238 | struct rb_node *node; | 344 | struct rb_node *node; |
239 | struct btrfs_ordered_extent *entry; | 345 | struct btrfs_ordered_extent *entry = NULL; |
240 | int ret; | 346 | int ret; |
241 | 347 | ||
242 | tree = &BTRFS_I(inode)->ordered_tree; | 348 | tree = &BTRFS_I(inode)->ordered_tree; |
243 | mutex_lock(&tree->mutex); | 349 | spin_lock(&tree->lock); |
244 | node = tree_search(tree, file_offset); | 350 | node = tree_search(tree, file_offset); |
245 | if (!node) { | 351 | if (!node) { |
246 | ret = 1; | 352 | ret = 1; |
@@ -264,7 +370,11 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
264 | else | 370 | else |
265 | ret = 1; | 371 | ret = 1; |
266 | out: | 372 | out: |
267 | mutex_unlock(&tree->mutex); | 373 | if (!ret && cached && entry) { |
374 | *cached = entry; | ||
375 | atomic_inc(&entry->refs); | ||
376 | } | ||
377 | spin_unlock(&tree->lock); | ||
268 | return ret == 0; | 378 | return ret == 0; |
269 | } | 379 | } |
270 | 380 | ||
@@ -291,13 +401,14 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
291 | 401 | ||
292 | /* | 402 | /* |
293 | * remove an ordered extent from the tree. No references are dropped | 403 | * remove an ordered extent from the tree. No references are dropped |
294 | * and you must wake_up entry->wait. You must hold the tree mutex | 404 | * and you must wake_up entry->wait. You must hold the tree lock |
295 | * while you call this function. | 405 | * while you call this function. |
296 | */ | 406 | */ |
297 | static int __btrfs_remove_ordered_extent(struct inode *inode, | 407 | static int __btrfs_remove_ordered_extent(struct inode *inode, |
298 | struct btrfs_ordered_extent *entry) | 408 | struct btrfs_ordered_extent *entry) |
299 | { | 409 | { |
300 | struct btrfs_ordered_inode_tree *tree; | 410 | struct btrfs_ordered_inode_tree *tree; |
411 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
301 | struct rb_node *node; | 412 | struct rb_node *node; |
302 | 413 | ||
303 | tree = &BTRFS_I(inode)->ordered_tree; | 414 | tree = &BTRFS_I(inode)->ordered_tree; |
@@ -306,13 +417,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
306 | tree->last = NULL; | 417 | tree->last = NULL; |
307 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 418 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
308 | 419 | ||
309 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 420 | spin_lock(&root->fs_info->ordered_extent_lock); |
310 | BTRFS_I(inode)->outstanding_extents--; | ||
311 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
312 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
313 | inode, 1); | ||
314 | |||
315 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
316 | list_del_init(&entry->root_extent_list); | 421 | list_del_init(&entry->root_extent_list); |
317 | 422 | ||
318 | /* | 423 | /* |
@@ -324,7 +429,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
324 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | 429 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { |
325 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 430 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
326 | } | 431 | } |
327 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 432 | spin_unlock(&root->fs_info->ordered_extent_lock); |
328 | 433 | ||
329 | return 0; | 434 | return 0; |
330 | } | 435 | } |
@@ -340,9 +445,9 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
340 | int ret; | 445 | int ret; |
341 | 446 | ||
342 | tree = &BTRFS_I(inode)->ordered_tree; | 447 | tree = &BTRFS_I(inode)->ordered_tree; |
343 | mutex_lock(&tree->mutex); | 448 | spin_lock(&tree->lock); |
344 | ret = __btrfs_remove_ordered_extent(inode, entry); | 449 | ret = __btrfs_remove_ordered_extent(inode, entry); |
345 | mutex_unlock(&tree->mutex); | 450 | spin_unlock(&tree->lock); |
346 | wake_up(&entry->wait); | 451 | wake_up(&entry->wait); |
347 | 452 | ||
348 | return ret; | 453 | return ret; |
@@ -485,7 +590,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
485 | * start IO on any dirty ones so the wait doesn't stall waiting | 590 | * start IO on any dirty ones so the wait doesn't stall waiting |
486 | * for pdflush to find them | 591 | * for pdflush to find them |
487 | */ | 592 | */ |
488 | filemap_fdatawrite_range(inode->i_mapping, start, end); | 593 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
594 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
489 | if (wait) { | 595 | if (wait) { |
490 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 596 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
491 | &entry->flags)); | 597 | &entry->flags)); |
@@ -499,7 +605,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
499 | { | 605 | { |
500 | u64 end; | 606 | u64 end; |
501 | u64 orig_end; | 607 | u64 orig_end; |
502 | u64 wait_end; | ||
503 | struct btrfs_ordered_extent *ordered; | 608 | struct btrfs_ordered_extent *ordered; |
504 | int found; | 609 | int found; |
505 | 610 | ||
@@ -510,7 +615,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
510 | if (orig_end > INT_LIMIT(loff_t)) | 615 | if (orig_end > INT_LIMIT(loff_t)) |
511 | orig_end = INT_LIMIT(loff_t); | 616 | orig_end = INT_LIMIT(loff_t); |
512 | } | 617 | } |
513 | wait_end = orig_end; | ||
514 | again: | 618 | again: |
515 | /* start IO across the range first to instantiate any delalloc | 619 | /* start IO across the range first to instantiate any delalloc |
516 | * extents | 620 | * extents |
@@ -567,7 +671,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
567 | struct btrfs_ordered_extent *entry = NULL; | 671 | struct btrfs_ordered_extent *entry = NULL; |
568 | 672 | ||
569 | tree = &BTRFS_I(inode)->ordered_tree; | 673 | tree = &BTRFS_I(inode)->ordered_tree; |
570 | mutex_lock(&tree->mutex); | 674 | spin_lock(&tree->lock); |
571 | node = tree_search(tree, file_offset); | 675 | node = tree_search(tree, file_offset); |
572 | if (!node) | 676 | if (!node) |
573 | goto out; | 677 | goto out; |
@@ -578,7 +682,48 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
578 | if (entry) | 682 | if (entry) |
579 | atomic_inc(&entry->refs); | 683 | atomic_inc(&entry->refs); |
580 | out: | 684 | out: |
581 | mutex_unlock(&tree->mutex); | 685 | spin_unlock(&tree->lock); |
686 | return entry; | ||
687 | } | ||
688 | |||
689 | /* Since the DIO code tries to lock a wide area we need to look for any ordered | ||
690 | * extents that exist in the range, rather than just the start of the range. | ||
691 | */ | ||
692 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
693 | u64 file_offset, | ||
694 | u64 len) | ||
695 | { | ||
696 | struct btrfs_ordered_inode_tree *tree; | ||
697 | struct rb_node *node; | ||
698 | struct btrfs_ordered_extent *entry = NULL; | ||
699 | |||
700 | tree = &BTRFS_I(inode)->ordered_tree; | ||
701 | spin_lock(&tree->lock); | ||
702 | node = tree_search(tree, file_offset); | ||
703 | if (!node) { | ||
704 | node = tree_search(tree, file_offset + len); | ||
705 | if (!node) | ||
706 | goto out; | ||
707 | } | ||
708 | |||
709 | while (1) { | ||
710 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
711 | if (range_overlaps(entry, file_offset, len)) | ||
712 | break; | ||
713 | |||
714 | if (entry->file_offset >= file_offset + len) { | ||
715 | entry = NULL; | ||
716 | break; | ||
717 | } | ||
718 | entry = NULL; | ||
719 | node = rb_next(node); | ||
720 | if (!node) | ||
721 | break; | ||
722 | } | ||
723 | out: | ||
724 | if (entry) | ||
725 | atomic_inc(&entry->refs); | ||
726 | spin_unlock(&tree->lock); | ||
582 | return entry; | 727 | return entry; |
583 | } | 728 | } |
584 | 729 | ||
@@ -594,7 +739,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) | |||
594 | struct btrfs_ordered_extent *entry = NULL; | 739 | struct btrfs_ordered_extent *entry = NULL; |
595 | 740 | ||
596 | tree = &BTRFS_I(inode)->ordered_tree; | 741 | tree = &BTRFS_I(inode)->ordered_tree; |
597 | mutex_lock(&tree->mutex); | 742 | spin_lock(&tree->lock); |
598 | node = tree_search(tree, file_offset); | 743 | node = tree_search(tree, file_offset); |
599 | if (!node) | 744 | if (!node) |
600 | goto out; | 745 | goto out; |
@@ -602,7 +747,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) | |||
602 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 747 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
603 | atomic_inc(&entry->refs); | 748 | atomic_inc(&entry->refs); |
604 | out: | 749 | out: |
605 | mutex_unlock(&tree->mutex); | 750 | spin_unlock(&tree->lock); |
606 | return entry; | 751 | return entry; |
607 | } | 752 | } |
608 | 753 | ||
@@ -629,7 +774,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
629 | else | 774 | else |
630 | offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); | 775 | offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); |
631 | 776 | ||
632 | mutex_lock(&tree->mutex); | 777 | spin_lock(&tree->lock); |
633 | disk_i_size = BTRFS_I(inode)->disk_i_size; | 778 | disk_i_size = BTRFS_I(inode)->disk_i_size; |
634 | 779 | ||
635 | /* truncate file */ | 780 | /* truncate file */ |
@@ -735,7 +880,7 @@ out: | |||
735 | */ | 880 | */ |
736 | if (ordered) | 881 | if (ordered) |
737 | __btrfs_remove_ordered_extent(inode, ordered); | 882 | __btrfs_remove_ordered_extent(inode, ordered); |
738 | mutex_unlock(&tree->mutex); | 883 | spin_unlock(&tree->lock); |
739 | if (ordered) | 884 | if (ordered) |
740 | wake_up(&ordered->wait); | 885 | wake_up(&ordered->wait); |
741 | return ret; | 886 | return ret; |
@@ -762,7 +907,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
762 | if (!ordered) | 907 | if (!ordered) |
763 | return 1; | 908 | return 1; |
764 | 909 | ||
765 | mutex_lock(&tree->mutex); | 910 | spin_lock(&tree->lock); |
766 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { | 911 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { |
767 | if (disk_bytenr >= ordered_sum->bytenr) { | 912 | if (disk_bytenr >= ordered_sum->bytenr) { |
768 | num_sectors = ordered_sum->len / sectorsize; | 913 | num_sectors = ordered_sum->len / sectorsize; |
@@ -777,7 +922,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
777 | } | 922 | } |
778 | } | 923 | } |
779 | out: | 924 | out: |
780 | mutex_unlock(&tree->mutex); | 925 | spin_unlock(&tree->lock); |
781 | btrfs_put_ordered_extent(ordered); | 926 | btrfs_put_ordered_extent(ordered); |
782 | return ret; | 927 | return ret; |
783 | } | 928 | } |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 1fe1282ef47c..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -21,7 +21,7 @@ | |||
21 | 21 | ||
22 | /* one of these per inode */ | 22 | /* one of these per inode */ |
23 | struct btrfs_ordered_inode_tree { | 23 | struct btrfs_ordered_inode_tree { |
24 | struct mutex mutex; | 24 | spinlock_t lock; |
25 | struct rb_root tree; | 25 | struct rb_root tree; |
26 | struct rb_node *last; | 26 | struct rb_node *last; |
27 | }; | 27 | }; |
@@ -68,10 +68,12 @@ struct btrfs_ordered_sum { | |||
68 | 68 | ||
69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
70 | 70 | ||
71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | 71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ |
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | ||
76 | |||
75 | struct btrfs_ordered_extent { | 77 | struct btrfs_ordered_extent { |
76 | /* logical offset in the file */ | 78 | /* logical offset in the file */ |
77 | u64 file_offset; | 79 | u64 file_offset; |
@@ -91,6 +93,9 @@ struct btrfs_ordered_extent { | |||
91 | /* flags (described above) */ | 93 | /* flags (described above) */ |
92 | unsigned long flags; | 94 | unsigned long flags; |
93 | 95 | ||
96 | /* compression algorithm */ | ||
97 | int compress_type; | ||
98 | |||
94 | /* reference count */ | 99 | /* reference count */ |
95 | atomic_t refs; | 100 | atomic_t refs; |
96 | 101 | ||
@@ -128,8 +133,8 @@ static inline int btrfs_ordered_sum_size(struct btrfs_root *root, | |||
128 | static inline void | 133 | static inline void |
129 | btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) | 134 | btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) |
130 | { | 135 | { |
131 | mutex_init(&t->mutex); | 136 | spin_lock_init(&t->lock); |
132 | t->tree.rb_node = NULL; | 137 | t->tree = RB_ROOT; |
133 | t->last = NULL; | 138 | t->last = NULL; |
134 | } | 139 | } |
135 | 140 | ||
@@ -137,9 +142,18 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); | |||
137 | int btrfs_remove_ordered_extent(struct inode *inode, | 142 | int btrfs_remove_ordered_extent(struct inode *inode, |
138 | struct btrfs_ordered_extent *entry); | 143 | struct btrfs_ordered_extent *entry); |
139 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 144 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
140 | u64 file_offset, u64 io_size); | 145 | struct btrfs_ordered_extent **cached, |
146 | u64 file_offset, u64 io_size); | ||
147 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | ||
148 | struct btrfs_ordered_extent **cached, | ||
149 | u64 *file_offset, u64 io_size); | ||
141 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 150 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
142 | u64 start, u64 len, u64 disk_len, int tyep); | 151 | u64 start, u64 len, u64 disk_len, int type); |
152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
153 | u64 start, u64 len, u64 disk_len, int type); | ||
154 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
155 | u64 start, u64 len, u64 disk_len, | ||
156 | int type, int compress_type); | ||
143 | int btrfs_add_ordered_sum(struct inode *inode, | 157 | int btrfs_add_ordered_sum(struct inode *inode, |
144 | struct btrfs_ordered_extent *entry, | 158 | struct btrfs_ordered_extent *entry, |
145 | struct btrfs_ordered_sum *sum); | 159 | struct btrfs_ordered_sum *sum); |
@@ -150,6 +164,9 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
150 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | 164 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); |
151 | struct btrfs_ordered_extent * | 165 | struct btrfs_ordered_extent * |
152 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | 166 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); |
167 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
168 | u64 file_offset, | ||
169 | u64 len); | ||
153 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 170 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
154 | struct btrfs_ordered_extent *ordered); | 171 | struct btrfs_ordered_extent *ordered); |
155 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 172 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 79cba5fbc28e..f8be250963a0 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | |||
56 | return -ENOMEM; | 56 | return -ENOMEM; |
57 | 57 | ||
58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
59 | if (ret) | 59 | if (ret < 0) |
60 | goto out; | 60 | goto out; |
61 | if (ret) { | ||
62 | ret = -ENOENT; | ||
63 | goto out; | ||
64 | } | ||
61 | 65 | ||
62 | ret = btrfs_del_item(trans, root, path); | 66 | ret = btrfs_del_item(trans, root, path); |
63 | 67 | ||
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index d0cc62bccb94..a97314cf6bd6 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | ||
20 | #include <linux/sort.h> | 21 | #include <linux/sort.h> |
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "ref-cache.h" | 23 | #include "ref-cache.h" |
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index bc283ad2db73..e2a55cb2072b 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h | |||
@@ -52,7 +52,7 @@ static inline size_t btrfs_leaf_ref_size(int nr_extents) | |||
52 | 52 | ||
53 | static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) | 53 | static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) |
54 | { | 54 | { |
55 | tree->root.rb_node = NULL; | 55 | tree->root = RB_ROOT; |
56 | INIT_LIST_HEAD(&tree->list); | 56 | INIT_LIST_HEAD(&tree->list); |
57 | spin_lock_init(&tree->lock); | 57 | spin_lock_init(&tree->lock); |
58 | } | 58 | } |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ed3e4a2ec2c8..045c9c2b2d7e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/writeback.h> | 21 | #include <linux/writeback.h> |
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/rbtree.h> | 23 | #include <linux/rbtree.h> |
24 | #include <linux/slab.h> | ||
24 | #include "ctree.h" | 25 | #include "ctree.h" |
25 | #include "disk-io.h" | 26 | #include "disk-io.h" |
26 | #include "transaction.h" | 27 | #include "transaction.h" |
@@ -28,6 +29,7 @@ | |||
28 | #include "locking.h" | 29 | #include "locking.h" |
29 | #include "btrfs_inode.h" | 30 | #include "btrfs_inode.h" |
30 | #include "async-thread.h" | 31 | #include "async-thread.h" |
32 | #include "free-space-cache.h" | ||
31 | 33 | ||
32 | /* | 34 | /* |
33 | * backref_node, mapping_node and tree_block start with this | 35 | * backref_node, mapping_node and tree_block start with this |
@@ -43,8 +45,12 @@ struct tree_entry { | |||
43 | struct backref_node { | 45 | struct backref_node { |
44 | struct rb_node rb_node; | 46 | struct rb_node rb_node; |
45 | u64 bytenr; | 47 | u64 bytenr; |
46 | /* objectid tree block owner */ | 48 | |
49 | u64 new_bytenr; | ||
50 | /* objectid of tree block owner, can be not uptodate */ | ||
47 | u64 owner; | 51 | u64 owner; |
52 | /* link to pending, changed or detached list */ | ||
53 | struct list_head list; | ||
48 | /* list of upper level blocks reference this block */ | 54 | /* list of upper level blocks reference this block */ |
49 | struct list_head upper; | 55 | struct list_head upper; |
50 | /* list of child blocks in the cache */ | 56 | /* list of child blocks in the cache */ |
@@ -55,9 +61,9 @@ struct backref_node { | |||
55 | struct extent_buffer *eb; | 61 | struct extent_buffer *eb; |
56 | /* level of tree block */ | 62 | /* level of tree block */ |
57 | unsigned int level:8; | 63 | unsigned int level:8; |
58 | /* 1 if the block is root of old snapshot */ | 64 | /* is the block in non-reference counted tree */ |
59 | unsigned int old_root:1; | 65 | unsigned int cowonly:1; |
60 | /* 1 if no child blocks in the cache */ | 66 | /* 1 if no child node in the cache */ |
61 | unsigned int lowest:1; | 67 | unsigned int lowest:1; |
62 | /* is the extent buffer locked */ | 68 | /* is the extent buffer locked */ |
63 | unsigned int locked:1; | 69 | unsigned int locked:1; |
@@ -65,6 +71,16 @@ struct backref_node { | |||
65 | unsigned int processed:1; | 71 | unsigned int processed:1; |
66 | /* have backrefs of this block been checked */ | 72 | /* have backrefs of this block been checked */ |
67 | unsigned int checked:1; | 73 | unsigned int checked:1; |
74 | /* | ||
75 | * 1 if corresponding block has been cowed but some upper | ||
76 | * level block pointers may not point to the new location | ||
77 | */ | ||
78 | unsigned int pending:1; | ||
79 | /* | ||
80 | * 1 if the backref node isn't connected to any other | ||
81 | * backref node. | ||
82 | */ | ||
83 | unsigned int detached:1; | ||
68 | }; | 84 | }; |
69 | 85 | ||
70 | /* | 86 | /* |
@@ -73,7 +89,6 @@ struct backref_node { | |||
73 | struct backref_edge { | 89 | struct backref_edge { |
74 | struct list_head list[2]; | 90 | struct list_head list[2]; |
75 | struct backref_node *node[2]; | 91 | struct backref_node *node[2]; |
76 | u64 blockptr; | ||
77 | }; | 92 | }; |
78 | 93 | ||
79 | #define LOWER 0 | 94 | #define LOWER 0 |
@@ -82,9 +97,25 @@ struct backref_edge { | |||
82 | struct backref_cache { | 97 | struct backref_cache { |
83 | /* red black tree of all backref nodes in the cache */ | 98 | /* red black tree of all backref nodes in the cache */ |
84 | struct rb_root rb_root; | 99 | struct rb_root rb_root; |
85 | /* list of backref nodes with no child block in the cache */ | 100 | /* for passing backref nodes to btrfs_reloc_cow_block */ |
101 | struct backref_node *path[BTRFS_MAX_LEVEL]; | ||
102 | /* | ||
103 | * list of blocks that have been cowed but some block | ||
104 | * pointers in upper level blocks may not reflect the | ||
105 | * new location | ||
106 | */ | ||
86 | struct list_head pending[BTRFS_MAX_LEVEL]; | 107 | struct list_head pending[BTRFS_MAX_LEVEL]; |
87 | spinlock_t lock; | 108 | /* list of backref nodes with no child node */ |
109 | struct list_head leaves; | ||
110 | /* list of blocks that have been cowed in current transaction */ | ||
111 | struct list_head changed; | ||
112 | /* list of detached backref node. */ | ||
113 | struct list_head detached; | ||
114 | |||
115 | u64 last_trans; | ||
116 | |||
117 | int nr_nodes; | ||
118 | int nr_edges; | ||
88 | }; | 119 | }; |
89 | 120 | ||
90 | /* | 121 | /* |
@@ -112,15 +143,6 @@ struct tree_block { | |||
112 | unsigned int key_ready:1; | 143 | unsigned int key_ready:1; |
113 | }; | 144 | }; |
114 | 145 | ||
115 | /* inode vector */ | ||
116 | #define INODEVEC_SIZE 16 | ||
117 | |||
118 | struct inodevec { | ||
119 | struct list_head list; | ||
120 | struct inode *inode[INODEVEC_SIZE]; | ||
121 | int nr; | ||
122 | }; | ||
123 | |||
124 | #define MAX_EXTENTS 128 | 146 | #define MAX_EXTENTS 128 |
125 | 147 | ||
126 | struct file_extent_cluster { | 148 | struct file_extent_cluster { |
@@ -137,58 +159,128 @@ struct reloc_control { | |||
137 | struct btrfs_root *extent_root; | 159 | struct btrfs_root *extent_root; |
138 | /* inode for moving data */ | 160 | /* inode for moving data */ |
139 | struct inode *data_inode; | 161 | struct inode *data_inode; |
140 | struct btrfs_workers workers; | 162 | |
163 | struct btrfs_block_rsv *block_rsv; | ||
164 | |||
165 | struct backref_cache backref_cache; | ||
166 | |||
167 | struct file_extent_cluster cluster; | ||
141 | /* tree blocks have been processed */ | 168 | /* tree blocks have been processed */ |
142 | struct extent_io_tree processed_blocks; | 169 | struct extent_io_tree processed_blocks; |
143 | /* map start of tree root to corresponding reloc tree */ | 170 | /* map start of tree root to corresponding reloc tree */ |
144 | struct mapping_tree reloc_root_tree; | 171 | struct mapping_tree reloc_root_tree; |
145 | /* list of reloc trees */ | 172 | /* list of reloc trees */ |
146 | struct list_head reloc_roots; | 173 | struct list_head reloc_roots; |
174 | /* size of metadata reservation for merging reloc trees */ | ||
175 | u64 merging_rsv_size; | ||
176 | /* size of relocated tree nodes */ | ||
177 | u64 nodes_relocated; | ||
178 | |||
147 | u64 search_start; | 179 | u64 search_start; |
148 | u64 extents_found; | 180 | u64 extents_found; |
149 | u64 extents_skipped; | 181 | |
150 | int stage; | 182 | unsigned int stage:8; |
151 | int create_reloc_root; | 183 | unsigned int create_reloc_tree:1; |
184 | unsigned int merge_reloc_tree:1; | ||
152 | unsigned int found_file_extent:1; | 185 | unsigned int found_file_extent:1; |
153 | unsigned int found_old_snapshot:1; | 186 | unsigned int commit_transaction:1; |
154 | }; | 187 | }; |
155 | 188 | ||
156 | /* stages of data relocation */ | 189 | /* stages of data relocation */ |
157 | #define MOVE_DATA_EXTENTS 0 | 190 | #define MOVE_DATA_EXTENTS 0 |
158 | #define UPDATE_DATA_PTRS 1 | 191 | #define UPDATE_DATA_PTRS 1 |
159 | 192 | ||
160 | /* | 193 | static void remove_backref_node(struct backref_cache *cache, |
161 | * merge reloc tree to corresponding fs tree in worker threads | 194 | struct backref_node *node); |
162 | */ | 195 | static void __mark_block_processed(struct reloc_control *rc, |
163 | struct async_merge { | 196 | struct backref_node *node); |
164 | struct btrfs_work work; | ||
165 | struct reloc_control *rc; | ||
166 | struct btrfs_root *root; | ||
167 | struct completion *done; | ||
168 | atomic_t *num_pending; | ||
169 | }; | ||
170 | 197 | ||
171 | static void mapping_tree_init(struct mapping_tree *tree) | 198 | static void mapping_tree_init(struct mapping_tree *tree) |
172 | { | 199 | { |
173 | tree->rb_root.rb_node = NULL; | 200 | tree->rb_root = RB_ROOT; |
174 | spin_lock_init(&tree->lock); | 201 | spin_lock_init(&tree->lock); |
175 | } | 202 | } |
176 | 203 | ||
177 | static void backref_cache_init(struct backref_cache *cache) | 204 | static void backref_cache_init(struct backref_cache *cache) |
178 | { | 205 | { |
179 | int i; | 206 | int i; |
180 | cache->rb_root.rb_node = NULL; | 207 | cache->rb_root = RB_ROOT; |
181 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | 208 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) |
182 | INIT_LIST_HEAD(&cache->pending[i]); | 209 | INIT_LIST_HEAD(&cache->pending[i]); |
183 | spin_lock_init(&cache->lock); | 210 | INIT_LIST_HEAD(&cache->changed); |
211 | INIT_LIST_HEAD(&cache->detached); | ||
212 | INIT_LIST_HEAD(&cache->leaves); | ||
184 | } | 213 | } |
185 | 214 | ||
186 | static void backref_node_init(struct backref_node *node) | 215 | static void backref_cache_cleanup(struct backref_cache *cache) |
187 | { | 216 | { |
188 | memset(node, 0, sizeof(*node)); | 217 | struct backref_node *node; |
189 | INIT_LIST_HEAD(&node->upper); | 218 | int i; |
190 | INIT_LIST_HEAD(&node->lower); | 219 | |
191 | RB_CLEAR_NODE(&node->rb_node); | 220 | while (!list_empty(&cache->detached)) { |
221 | node = list_entry(cache->detached.next, | ||
222 | struct backref_node, list); | ||
223 | remove_backref_node(cache, node); | ||
224 | } | ||
225 | |||
226 | while (!list_empty(&cache->leaves)) { | ||
227 | node = list_entry(cache->leaves.next, | ||
228 | struct backref_node, lower); | ||
229 | remove_backref_node(cache, node); | ||
230 | } | ||
231 | |||
232 | cache->last_trans = 0; | ||
233 | |||
234 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | ||
235 | BUG_ON(!list_empty(&cache->pending[i])); | ||
236 | BUG_ON(!list_empty(&cache->changed)); | ||
237 | BUG_ON(!list_empty(&cache->detached)); | ||
238 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
239 | BUG_ON(cache->nr_nodes); | ||
240 | BUG_ON(cache->nr_edges); | ||
241 | } | ||
242 | |||
243 | static struct backref_node *alloc_backref_node(struct backref_cache *cache) | ||
244 | { | ||
245 | struct backref_node *node; | ||
246 | |||
247 | node = kzalloc(sizeof(*node), GFP_NOFS); | ||
248 | if (node) { | ||
249 | INIT_LIST_HEAD(&node->list); | ||
250 | INIT_LIST_HEAD(&node->upper); | ||
251 | INIT_LIST_HEAD(&node->lower); | ||
252 | RB_CLEAR_NODE(&node->rb_node); | ||
253 | cache->nr_nodes++; | ||
254 | } | ||
255 | return node; | ||
256 | } | ||
257 | |||
258 | static void free_backref_node(struct backref_cache *cache, | ||
259 | struct backref_node *node) | ||
260 | { | ||
261 | if (node) { | ||
262 | cache->nr_nodes--; | ||
263 | kfree(node); | ||
264 | } | ||
265 | } | ||
266 | |||
267 | static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) | ||
268 | { | ||
269 | struct backref_edge *edge; | ||
270 | |||
271 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | ||
272 | if (edge) | ||
273 | cache->nr_edges++; | ||
274 | return edge; | ||
275 | } | ||
276 | |||
277 | static void free_backref_edge(struct backref_cache *cache, | ||
278 | struct backref_edge *edge) | ||
279 | { | ||
280 | if (edge) { | ||
281 | cache->nr_edges--; | ||
282 | kfree(edge); | ||
283 | } | ||
192 | } | 284 | } |
193 | 285 | ||
194 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | 286 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, |
@@ -249,6 +341,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node, | |||
249 | edges[idx++] = edge; | 341 | edges[idx++] = edge; |
250 | node = edge->node[UPPER]; | 342 | node = edge->node[UPPER]; |
251 | } | 343 | } |
344 | BUG_ON(node->detached); | ||
252 | *index = idx; | 345 | *index = idx; |
253 | return node; | 346 | return node; |
254 | } | 347 | } |
@@ -280,13 +373,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[], | |||
280 | return NULL; | 373 | return NULL; |
281 | } | 374 | } |
282 | 375 | ||
376 | static void unlock_node_buffer(struct backref_node *node) | ||
377 | { | ||
378 | if (node->locked) { | ||
379 | btrfs_tree_unlock(node->eb); | ||
380 | node->locked = 0; | ||
381 | } | ||
382 | } | ||
383 | |||
283 | static void drop_node_buffer(struct backref_node *node) | 384 | static void drop_node_buffer(struct backref_node *node) |
284 | { | 385 | { |
285 | if (node->eb) { | 386 | if (node->eb) { |
286 | if (node->locked) { | 387 | unlock_node_buffer(node); |
287 | btrfs_tree_unlock(node->eb); | ||
288 | node->locked = 0; | ||
289 | } | ||
290 | free_extent_buffer(node->eb); | 388 | free_extent_buffer(node->eb); |
291 | node->eb = NULL; | 389 | node->eb = NULL; |
292 | } | 390 | } |
@@ -295,14 +393,14 @@ static void drop_node_buffer(struct backref_node *node) | |||
295 | static void drop_backref_node(struct backref_cache *tree, | 393 | static void drop_backref_node(struct backref_cache *tree, |
296 | struct backref_node *node) | 394 | struct backref_node *node) |
297 | { | 395 | { |
298 | BUG_ON(!node->lowest); | ||
299 | BUG_ON(!list_empty(&node->upper)); | 396 | BUG_ON(!list_empty(&node->upper)); |
300 | 397 | ||
301 | drop_node_buffer(node); | 398 | drop_node_buffer(node); |
399 | list_del(&node->list); | ||
302 | list_del(&node->lower); | 400 | list_del(&node->lower); |
303 | 401 | if (!RB_EMPTY_NODE(&node->rb_node)) | |
304 | rb_erase(&node->rb_node, &tree->rb_root); | 402 | rb_erase(&node->rb_node, &tree->rb_root); |
305 | kfree(node); | 403 | free_backref_node(tree, node); |
306 | } | 404 | } |
307 | 405 | ||
308 | /* | 406 | /* |
@@ -317,27 +415,121 @@ static void remove_backref_node(struct backref_cache *cache, | |||
317 | if (!node) | 415 | if (!node) |
318 | return; | 416 | return; |
319 | 417 | ||
320 | BUG_ON(!node->lowest); | 418 | BUG_ON(!node->lowest && !node->detached); |
321 | while (!list_empty(&node->upper)) { | 419 | while (!list_empty(&node->upper)) { |
322 | edge = list_entry(node->upper.next, struct backref_edge, | 420 | edge = list_entry(node->upper.next, struct backref_edge, |
323 | list[LOWER]); | 421 | list[LOWER]); |
324 | upper = edge->node[UPPER]; | 422 | upper = edge->node[UPPER]; |
325 | list_del(&edge->list[LOWER]); | 423 | list_del(&edge->list[LOWER]); |
326 | list_del(&edge->list[UPPER]); | 424 | list_del(&edge->list[UPPER]); |
327 | kfree(edge); | 425 | free_backref_edge(cache, edge); |
426 | |||
427 | if (RB_EMPTY_NODE(&upper->rb_node)) { | ||
428 | BUG_ON(!list_empty(&node->upper)); | ||
429 | drop_backref_node(cache, node); | ||
430 | node = upper; | ||
431 | node->lowest = 1; | ||
432 | continue; | ||
433 | } | ||
328 | /* | 434 | /* |
329 | * add the node to pending list if no other | 435 | * add the node to leaf node list if no other |
330 | * child block cached. | 436 | * child block cached. |
331 | */ | 437 | */ |
332 | if (list_empty(&upper->lower)) { | 438 | if (list_empty(&upper->lower)) { |
333 | list_add_tail(&upper->lower, | 439 | list_add_tail(&upper->lower, &cache->leaves); |
334 | &cache->pending[upper->level]); | ||
335 | upper->lowest = 1; | 440 | upper->lowest = 1; |
336 | } | 441 | } |
337 | } | 442 | } |
443 | |||
338 | drop_backref_node(cache, node); | 444 | drop_backref_node(cache, node); |
339 | } | 445 | } |
340 | 446 | ||
447 | static void update_backref_node(struct backref_cache *cache, | ||
448 | struct backref_node *node, u64 bytenr) | ||
449 | { | ||
450 | struct rb_node *rb_node; | ||
451 | rb_erase(&node->rb_node, &cache->rb_root); | ||
452 | node->bytenr = bytenr; | ||
453 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | ||
454 | BUG_ON(rb_node); | ||
455 | } | ||
456 | |||
457 | /* | ||
458 | * update backref cache after a transaction commit | ||
459 | */ | ||
460 | static int update_backref_cache(struct btrfs_trans_handle *trans, | ||
461 | struct backref_cache *cache) | ||
462 | { | ||
463 | struct backref_node *node; | ||
464 | int level = 0; | ||
465 | |||
466 | if (cache->last_trans == 0) { | ||
467 | cache->last_trans = trans->transid; | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | if (cache->last_trans == trans->transid) | ||
472 | return 0; | ||
473 | |||
474 | /* | ||
475 | * detached nodes are used to avoid unnecessary backref | ||
476 | * lookup. transaction commit changes the extent tree. | ||
477 | * so the detached nodes are no longer useful. | ||
478 | */ | ||
479 | while (!list_empty(&cache->detached)) { | ||
480 | node = list_entry(cache->detached.next, | ||
481 | struct backref_node, list); | ||
482 | remove_backref_node(cache, node); | ||
483 | } | ||
484 | |||
485 | while (!list_empty(&cache->changed)) { | ||
486 | node = list_entry(cache->changed.next, | ||
487 | struct backref_node, list); | ||
488 | list_del_init(&node->list); | ||
489 | BUG_ON(node->pending); | ||
490 | update_backref_node(cache, node, node->new_bytenr); | ||
491 | } | ||
492 | |||
493 | /* | ||
494 | * some nodes can be left in the pending list if there were | ||
495 | * errors during processing the pending nodes. | ||
496 | */ | ||
497 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
498 | list_for_each_entry(node, &cache->pending[level], list) { | ||
499 | BUG_ON(!node->pending); | ||
500 | if (node->bytenr == node->new_bytenr) | ||
501 | continue; | ||
502 | update_backref_node(cache, node, node->new_bytenr); | ||
503 | } | ||
504 | } | ||
505 | |||
506 | cache->last_trans = 0; | ||
507 | return 1; | ||
508 | } | ||
509 | |||
510 | static int should_ignore_root(struct btrfs_root *root) | ||
511 | { | ||
512 | struct btrfs_root *reloc_root; | ||
513 | |||
514 | if (!root->ref_cows) | ||
515 | return 0; | ||
516 | |||
517 | reloc_root = root->reloc_root; | ||
518 | if (!reloc_root) | ||
519 | return 0; | ||
520 | |||
521 | if (btrfs_root_last_snapshot(&reloc_root->root_item) == | ||
522 | root->fs_info->running_transaction->transid - 1) | ||
523 | return 0; | ||
524 | /* | ||
525 | * if there is reloc tree and it was created in previous | ||
526 | * transaction backref lookup can find the reloc tree, | ||
527 | * so backref node for the fs tree root is useless for | ||
528 | * relocation. | ||
529 | */ | ||
530 | return 1; | ||
531 | } | ||
532 | |||
341 | /* | 533 | /* |
342 | * find reloc tree by address of tree root | 534 | * find reloc tree by address of tree root |
343 | */ | 535 | */ |
@@ -452,11 +644,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, | |||
452 | * for all upper level blocks that directly/indirectly reference the | 644 | * for all upper level blocks that directly/indirectly reference the |
453 | * block are also cached. | 645 | * block are also cached. |
454 | */ | 646 | */ |
455 | static struct backref_node *build_backref_tree(struct reloc_control *rc, | 647 | static noinline_for_stack |
456 | struct backref_cache *cache, | 648 | struct backref_node *build_backref_tree(struct reloc_control *rc, |
457 | struct btrfs_key *node_key, | 649 | struct btrfs_key *node_key, |
458 | int level, u64 bytenr) | 650 | int level, u64 bytenr) |
459 | { | 651 | { |
652 | struct backref_cache *cache = &rc->backref_cache; | ||
460 | struct btrfs_path *path1; | 653 | struct btrfs_path *path1; |
461 | struct btrfs_path *path2; | 654 | struct btrfs_path *path2; |
462 | struct extent_buffer *eb; | 655 | struct extent_buffer *eb; |
@@ -472,6 +665,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
472 | unsigned long end; | 665 | unsigned long end; |
473 | unsigned long ptr; | 666 | unsigned long ptr; |
474 | LIST_HEAD(list); | 667 | LIST_HEAD(list); |
668 | LIST_HEAD(useless); | ||
669 | int cowonly; | ||
475 | int ret; | 670 | int ret; |
476 | int err = 0; | 671 | int err = 0; |
477 | 672 | ||
@@ -482,15 +677,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
482 | goto out; | 677 | goto out; |
483 | } | 678 | } |
484 | 679 | ||
485 | node = kmalloc(sizeof(*node), GFP_NOFS); | 680 | node = alloc_backref_node(cache); |
486 | if (!node) { | 681 | if (!node) { |
487 | err = -ENOMEM; | 682 | err = -ENOMEM; |
488 | goto out; | 683 | goto out; |
489 | } | 684 | } |
490 | 685 | ||
491 | backref_node_init(node); | ||
492 | node->bytenr = bytenr; | 686 | node->bytenr = bytenr; |
493 | node->owner = 0; | ||
494 | node->level = level; | 687 | node->level = level; |
495 | node->lowest = 1; | 688 | node->lowest = 1; |
496 | cur = node; | 689 | cur = node; |
@@ -586,17 +779,21 @@ again: | |||
586 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 779 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
587 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || | 780 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || |
588 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | 781 | key.type == BTRFS_EXTENT_REF_V0_KEY) { |
589 | if (key.objectid == key.offset && | 782 | if (key.type == BTRFS_EXTENT_REF_V0_KEY) { |
590 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
591 | struct btrfs_extent_ref_v0 *ref0; | 783 | struct btrfs_extent_ref_v0 *ref0; |
592 | ref0 = btrfs_item_ptr(eb, path1->slots[0], | 784 | ref0 = btrfs_item_ptr(eb, path1->slots[0], |
593 | struct btrfs_extent_ref_v0); | 785 | struct btrfs_extent_ref_v0); |
594 | root = find_tree_root(rc, eb, ref0); | 786 | if (key.objectid == key.offset) { |
595 | if (root) | 787 | root = find_tree_root(rc, eb, ref0); |
596 | cur->root = root; | 788 | if (root && !should_ignore_root(root)) |
597 | else | 789 | cur->root = root; |
598 | cur->old_root = 1; | 790 | else |
599 | break; | 791 | list_add(&cur->list, &useless); |
792 | break; | ||
793 | } | ||
794 | if (is_cowonly_root(btrfs_ref_root_v0(eb, | ||
795 | ref0))) | ||
796 | cur->cowonly = 1; | ||
600 | } | 797 | } |
601 | #else | 798 | #else |
602 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | 799 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); |
@@ -613,22 +810,20 @@ again: | |||
613 | break; | 810 | break; |
614 | } | 811 | } |
615 | 812 | ||
616 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 813 | edge = alloc_backref_edge(cache); |
617 | if (!edge) { | 814 | if (!edge) { |
618 | err = -ENOMEM; | 815 | err = -ENOMEM; |
619 | goto out; | 816 | goto out; |
620 | } | 817 | } |
621 | rb_node = tree_search(&cache->rb_root, key.offset); | 818 | rb_node = tree_search(&cache->rb_root, key.offset); |
622 | if (!rb_node) { | 819 | if (!rb_node) { |
623 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 820 | upper = alloc_backref_node(cache); |
624 | if (!upper) { | 821 | if (!upper) { |
625 | kfree(edge); | 822 | free_backref_edge(cache, edge); |
626 | err = -ENOMEM; | 823 | err = -ENOMEM; |
627 | goto out; | 824 | goto out; |
628 | } | 825 | } |
629 | backref_node_init(upper); | ||
630 | upper->bytenr = key.offset; | 826 | upper->bytenr = key.offset; |
631 | upper->owner = 0; | ||
632 | upper->level = cur->level + 1; | 827 | upper->level = cur->level + 1; |
633 | /* | 828 | /* |
634 | * backrefs for the upper level block isn't | 829 | * backrefs for the upper level block isn't |
@@ -638,11 +833,12 @@ again: | |||
638 | } else { | 833 | } else { |
639 | upper = rb_entry(rb_node, struct backref_node, | 834 | upper = rb_entry(rb_node, struct backref_node, |
640 | rb_node); | 835 | rb_node); |
836 | BUG_ON(!upper->checked); | ||
641 | INIT_LIST_HEAD(&edge->list[UPPER]); | 837 | INIT_LIST_HEAD(&edge->list[UPPER]); |
642 | } | 838 | } |
643 | list_add(&edge->list[LOWER], &cur->upper); | 839 | list_add_tail(&edge->list[LOWER], &cur->upper); |
644 | edge->node[UPPER] = upper; | ||
645 | edge->node[LOWER] = cur; | 840 | edge->node[LOWER] = cur; |
841 | edge->node[UPPER] = upper; | ||
646 | 842 | ||
647 | goto next; | 843 | goto next; |
648 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { | 844 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { |
@@ -656,11 +852,17 @@ again: | |||
656 | goto out; | 852 | goto out; |
657 | } | 853 | } |
658 | 854 | ||
855 | if (!root->ref_cows) | ||
856 | cur->cowonly = 1; | ||
857 | |||
659 | if (btrfs_root_level(&root->root_item) == cur->level) { | 858 | if (btrfs_root_level(&root->root_item) == cur->level) { |
660 | /* tree root */ | 859 | /* tree root */ |
661 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 860 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
662 | cur->bytenr); | 861 | cur->bytenr); |
663 | cur->root = root; | 862 | if (should_ignore_root(root)) |
863 | list_add(&cur->list, &useless); | ||
864 | else | ||
865 | cur->root = root; | ||
664 | break; | 866 | break; |
665 | } | 867 | } |
666 | 868 | ||
@@ -691,11 +893,14 @@ again: | |||
691 | if (!path2->nodes[level]) { | 893 | if (!path2->nodes[level]) { |
692 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 894 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
693 | lower->bytenr); | 895 | lower->bytenr); |
694 | lower->root = root; | 896 | if (should_ignore_root(root)) |
897 | list_add(&lower->list, &useless); | ||
898 | else | ||
899 | lower->root = root; | ||
695 | break; | 900 | break; |
696 | } | 901 | } |
697 | 902 | ||
698 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 903 | edge = alloc_backref_edge(cache); |
699 | if (!edge) { | 904 | if (!edge) { |
700 | err = -ENOMEM; | 905 | err = -ENOMEM; |
701 | goto out; | 906 | goto out; |
@@ -704,16 +909,17 @@ again: | |||
704 | eb = path2->nodes[level]; | 909 | eb = path2->nodes[level]; |
705 | rb_node = tree_search(&cache->rb_root, eb->start); | 910 | rb_node = tree_search(&cache->rb_root, eb->start); |
706 | if (!rb_node) { | 911 | if (!rb_node) { |
707 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 912 | upper = alloc_backref_node(cache); |
708 | if (!upper) { | 913 | if (!upper) { |
709 | kfree(edge); | 914 | free_backref_edge(cache, edge); |
710 | err = -ENOMEM; | 915 | err = -ENOMEM; |
711 | goto out; | 916 | goto out; |
712 | } | 917 | } |
713 | backref_node_init(upper); | ||
714 | upper->bytenr = eb->start; | 918 | upper->bytenr = eb->start; |
715 | upper->owner = btrfs_header_owner(eb); | 919 | upper->owner = btrfs_header_owner(eb); |
716 | upper->level = lower->level + 1; | 920 | upper->level = lower->level + 1; |
921 | if (!root->ref_cows) | ||
922 | upper->cowonly = 1; | ||
717 | 923 | ||
718 | /* | 924 | /* |
719 | * if we know the block isn't shared | 925 | * if we know the block isn't shared |
@@ -743,10 +949,12 @@ again: | |||
743 | rb_node); | 949 | rb_node); |
744 | BUG_ON(!upper->checked); | 950 | BUG_ON(!upper->checked); |
745 | INIT_LIST_HEAD(&edge->list[UPPER]); | 951 | INIT_LIST_HEAD(&edge->list[UPPER]); |
952 | if (!upper->owner) | ||
953 | upper->owner = btrfs_header_owner(eb); | ||
746 | } | 954 | } |
747 | list_add_tail(&edge->list[LOWER], &lower->upper); | 955 | list_add_tail(&edge->list[LOWER], &lower->upper); |
748 | edge->node[UPPER] = upper; | ||
749 | edge->node[LOWER] = lower; | 956 | edge->node[LOWER] = lower; |
957 | edge->node[UPPER] = upper; | ||
750 | 958 | ||
751 | if (rb_node) | 959 | if (rb_node) |
752 | break; | 960 | break; |
@@ -784,8 +992,13 @@ next: | |||
784 | * into the cache. | 992 | * into the cache. |
785 | */ | 993 | */ |
786 | BUG_ON(!node->checked); | 994 | BUG_ON(!node->checked); |
787 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | 995 | cowonly = node->cowonly; |
788 | BUG_ON(rb_node); | 996 | if (!cowonly) { |
997 | rb_node = tree_insert(&cache->rb_root, node->bytenr, | ||
998 | &node->rb_node); | ||
999 | BUG_ON(rb_node); | ||
1000 | list_add_tail(&node->lower, &cache->leaves); | ||
1001 | } | ||
789 | 1002 | ||
790 | list_for_each_entry(edge, &node->upper, list[LOWER]) | 1003 | list_for_each_entry(edge, &node->upper, list[LOWER]) |
791 | list_add_tail(&edge->list[UPPER], &list); | 1004 | list_add_tail(&edge->list[UPPER], &list); |
@@ -794,6 +1007,14 @@ next: | |||
794 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); | 1007 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); |
795 | list_del_init(&edge->list[UPPER]); | 1008 | list_del_init(&edge->list[UPPER]); |
796 | upper = edge->node[UPPER]; | 1009 | upper = edge->node[UPPER]; |
1010 | if (upper->detached) { | ||
1011 | list_del(&edge->list[LOWER]); | ||
1012 | lower = edge->node[LOWER]; | ||
1013 | free_backref_edge(cache, edge); | ||
1014 | if (list_empty(&lower->upper)) | ||
1015 | list_add(&lower->list, &useless); | ||
1016 | continue; | ||
1017 | } | ||
797 | 1018 | ||
798 | if (!RB_EMPTY_NODE(&upper->rb_node)) { | 1019 | if (!RB_EMPTY_NODE(&upper->rb_node)) { |
799 | if (upper->lowest) { | 1020 | if (upper->lowest) { |
@@ -806,25 +1027,69 @@ next: | |||
806 | } | 1027 | } |
807 | 1028 | ||
808 | BUG_ON(!upper->checked); | 1029 | BUG_ON(!upper->checked); |
809 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | 1030 | BUG_ON(cowonly != upper->cowonly); |
810 | &upper->rb_node); | 1031 | if (!cowonly) { |
811 | BUG_ON(rb_node); | 1032 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, |
1033 | &upper->rb_node); | ||
1034 | BUG_ON(rb_node); | ||
1035 | } | ||
812 | 1036 | ||
813 | list_add_tail(&edge->list[UPPER], &upper->lower); | 1037 | list_add_tail(&edge->list[UPPER], &upper->lower); |
814 | 1038 | ||
815 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | 1039 | list_for_each_entry(edge, &upper->upper, list[LOWER]) |
816 | list_add_tail(&edge->list[UPPER], &list); | 1040 | list_add_tail(&edge->list[UPPER], &list); |
817 | } | 1041 | } |
1042 | /* | ||
1043 | * process useless backref nodes. backref nodes for tree leaves | ||
1044 | * are deleted from the cache. backref nodes for upper level | ||
1045 | * tree blocks are left in the cache to avoid unnecessary backref | ||
1046 | * lookup. | ||
1047 | */ | ||
1048 | while (!list_empty(&useless)) { | ||
1049 | upper = list_entry(useless.next, struct backref_node, list); | ||
1050 | list_del_init(&upper->list); | ||
1051 | BUG_ON(!list_empty(&upper->upper)); | ||
1052 | if (upper == node) | ||
1053 | node = NULL; | ||
1054 | if (upper->lowest) { | ||
1055 | list_del_init(&upper->lower); | ||
1056 | upper->lowest = 0; | ||
1057 | } | ||
1058 | while (!list_empty(&upper->lower)) { | ||
1059 | edge = list_entry(upper->lower.next, | ||
1060 | struct backref_edge, list[UPPER]); | ||
1061 | list_del(&edge->list[UPPER]); | ||
1062 | list_del(&edge->list[LOWER]); | ||
1063 | lower = edge->node[LOWER]; | ||
1064 | free_backref_edge(cache, edge); | ||
1065 | |||
1066 | if (list_empty(&lower->upper)) | ||
1067 | list_add(&lower->list, &useless); | ||
1068 | } | ||
1069 | __mark_block_processed(rc, upper); | ||
1070 | if (upper->level > 0) { | ||
1071 | list_add(&upper->list, &cache->detached); | ||
1072 | upper->detached = 1; | ||
1073 | } else { | ||
1074 | rb_erase(&upper->rb_node, &cache->rb_root); | ||
1075 | free_backref_node(cache, upper); | ||
1076 | } | ||
1077 | } | ||
818 | out: | 1078 | out: |
819 | btrfs_free_path(path1); | 1079 | btrfs_free_path(path1); |
820 | btrfs_free_path(path2); | 1080 | btrfs_free_path(path2); |
821 | if (err) { | 1081 | if (err) { |
822 | INIT_LIST_HEAD(&list); | 1082 | while (!list_empty(&useless)) { |
1083 | lower = list_entry(useless.next, | ||
1084 | struct backref_node, upper); | ||
1085 | list_del_init(&lower->upper); | ||
1086 | } | ||
823 | upper = node; | 1087 | upper = node; |
1088 | INIT_LIST_HEAD(&list); | ||
824 | while (upper) { | 1089 | while (upper) { |
825 | if (RB_EMPTY_NODE(&upper->rb_node)) { | 1090 | if (RB_EMPTY_NODE(&upper->rb_node)) { |
826 | list_splice_tail(&upper->upper, &list); | 1091 | list_splice_tail(&upper->upper, &list); |
827 | kfree(upper); | 1092 | free_backref_node(cache, upper); |
828 | } | 1093 | } |
829 | 1094 | ||
830 | if (list_empty(&list)) | 1095 | if (list_empty(&list)) |
@@ -832,15 +1097,104 @@ out: | |||
832 | 1097 | ||
833 | edge = list_entry(list.next, struct backref_edge, | 1098 | edge = list_entry(list.next, struct backref_edge, |
834 | list[LOWER]); | 1099 | list[LOWER]); |
1100 | list_del(&edge->list[LOWER]); | ||
835 | upper = edge->node[UPPER]; | 1101 | upper = edge->node[UPPER]; |
836 | kfree(edge); | 1102 | free_backref_edge(cache, edge); |
837 | } | 1103 | } |
838 | return ERR_PTR(err); | 1104 | return ERR_PTR(err); |
839 | } | 1105 | } |
1106 | BUG_ON(node && node->detached); | ||
840 | return node; | 1107 | return node; |
841 | } | 1108 | } |
842 | 1109 | ||
843 | /* | 1110 | /* |
1111 | * helper to add backref node for the newly created snapshot. | ||
1112 | * the backref node is created by cloning backref node that | ||
1113 | * corresponds to root of source tree | ||
1114 | */ | ||
1115 | static int clone_backref_node(struct btrfs_trans_handle *trans, | ||
1116 | struct reloc_control *rc, | ||
1117 | struct btrfs_root *src, | ||
1118 | struct btrfs_root *dest) | ||
1119 | { | ||
1120 | struct btrfs_root *reloc_root = src->reloc_root; | ||
1121 | struct backref_cache *cache = &rc->backref_cache; | ||
1122 | struct backref_node *node = NULL; | ||
1123 | struct backref_node *new_node; | ||
1124 | struct backref_edge *edge; | ||
1125 | struct backref_edge *new_edge; | ||
1126 | struct rb_node *rb_node; | ||
1127 | |||
1128 | if (cache->last_trans > 0) | ||
1129 | update_backref_cache(trans, cache); | ||
1130 | |||
1131 | rb_node = tree_search(&cache->rb_root, src->commit_root->start); | ||
1132 | if (rb_node) { | ||
1133 | node = rb_entry(rb_node, struct backref_node, rb_node); | ||
1134 | if (node->detached) | ||
1135 | node = NULL; | ||
1136 | else | ||
1137 | BUG_ON(node->new_bytenr != reloc_root->node->start); | ||
1138 | } | ||
1139 | |||
1140 | if (!node) { | ||
1141 | rb_node = tree_search(&cache->rb_root, | ||
1142 | reloc_root->commit_root->start); | ||
1143 | if (rb_node) { | ||
1144 | node = rb_entry(rb_node, struct backref_node, | ||
1145 | rb_node); | ||
1146 | BUG_ON(node->detached); | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | if (!node) | ||
1151 | return 0; | ||
1152 | |||
1153 | new_node = alloc_backref_node(cache); | ||
1154 | if (!new_node) | ||
1155 | return -ENOMEM; | ||
1156 | |||
1157 | new_node->bytenr = dest->node->start; | ||
1158 | new_node->level = node->level; | ||
1159 | new_node->lowest = node->lowest; | ||
1160 | new_node->root = dest; | ||
1161 | |||
1162 | if (!node->lowest) { | ||
1163 | list_for_each_entry(edge, &node->lower, list[UPPER]) { | ||
1164 | new_edge = alloc_backref_edge(cache); | ||
1165 | if (!new_edge) | ||
1166 | goto fail; | ||
1167 | |||
1168 | new_edge->node[UPPER] = new_node; | ||
1169 | new_edge->node[LOWER] = edge->node[LOWER]; | ||
1170 | list_add_tail(&new_edge->list[UPPER], | ||
1171 | &new_node->lower); | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | rb_node = tree_insert(&cache->rb_root, new_node->bytenr, | ||
1176 | &new_node->rb_node); | ||
1177 | BUG_ON(rb_node); | ||
1178 | |||
1179 | if (!new_node->lowest) { | ||
1180 | list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { | ||
1181 | list_add_tail(&new_edge->list[LOWER], | ||
1182 | &new_edge->node[LOWER]->upper); | ||
1183 | } | ||
1184 | } | ||
1185 | return 0; | ||
1186 | fail: | ||
1187 | while (!list_empty(&new_node->lower)) { | ||
1188 | new_edge = list_entry(new_node->lower.next, | ||
1189 | struct backref_edge, list[UPPER]); | ||
1190 | list_del(&new_edge->list[UPPER]); | ||
1191 | free_backref_edge(cache, new_edge); | ||
1192 | } | ||
1193 | free_backref_node(cache, new_node); | ||
1194 | return -ENOMEM; | ||
1195 | } | ||
1196 | |||
1197 | /* | ||
844 | * helper to add 'address of tree root -> reloc tree' mapping | 1198 | * helper to add 'address of tree root -> reloc tree' mapping |
845 | */ | 1199 | */ |
846 | static int __add_reloc_root(struct btrfs_root *root) | 1200 | static int __add_reloc_root(struct btrfs_root *root) |
@@ -900,12 +1254,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) | |||
900 | return 0; | 1254 | return 0; |
901 | } | 1255 | } |
902 | 1256 | ||
903 | /* | 1257 | static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, |
904 | * create reloc tree for a given fs tree. reloc tree is just a | 1258 | struct btrfs_root *root, u64 objectid) |
905 | * snapshot of the fs tree with special root objectid. | ||
906 | */ | ||
907 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
908 | struct btrfs_root *root) | ||
909 | { | 1259 | { |
910 | struct btrfs_root *reloc_root; | 1260 | struct btrfs_root *reloc_root; |
911 | struct extent_buffer *eb; | 1261 | struct extent_buffer *eb; |
@@ -913,36 +1263,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
913 | struct btrfs_key root_key; | 1263 | struct btrfs_key root_key; |
914 | int ret; | 1264 | int ret; |
915 | 1265 | ||
916 | if (root->reloc_root) { | ||
917 | reloc_root = root->reloc_root; | ||
918 | reloc_root->last_trans = trans->transid; | ||
919 | return 0; | ||
920 | } | ||
921 | |||
922 | if (!root->fs_info->reloc_ctl || | ||
923 | !root->fs_info->reloc_ctl->create_reloc_root || | ||
924 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
925 | return 0; | ||
926 | |||
927 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 1266 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
928 | BUG_ON(!root_item); | 1267 | BUG_ON(!root_item); |
929 | 1268 | ||
930 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 1269 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
931 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 1270 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
932 | root_key.offset = root->root_key.objectid; | 1271 | root_key.offset = objectid; |
933 | 1272 | ||
934 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, | 1273 | if (root->root_key.objectid == objectid) { |
935 | BTRFS_TREE_RELOC_OBJECTID); | 1274 | /* called by btrfs_init_reloc_root */ |
936 | BUG_ON(ret); | 1275 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, |
1276 | BTRFS_TREE_RELOC_OBJECTID); | ||
1277 | BUG_ON(ret); | ||
1278 | |||
1279 | btrfs_set_root_last_snapshot(&root->root_item, | ||
1280 | trans->transid - 1); | ||
1281 | } else { | ||
1282 | /* | ||
1283 | * called by btrfs_reloc_post_snapshot_hook. | ||
1284 | * the source tree is a reloc tree, all tree blocks | ||
1285 | * modified after it was created have RELOC flag | ||
1286 | * set in their headers. so it's OK to not update | ||
1287 | * the 'last_snapshot'. | ||
1288 | */ | ||
1289 | ret = btrfs_copy_root(trans, root, root->node, &eb, | ||
1290 | BTRFS_TREE_RELOC_OBJECTID); | ||
1291 | BUG_ON(ret); | ||
1292 | } | ||
937 | 1293 | ||
938 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); | ||
939 | memcpy(root_item, &root->root_item, sizeof(*root_item)); | 1294 | memcpy(root_item, &root->root_item, sizeof(*root_item)); |
940 | btrfs_set_root_refs(root_item, 1); | ||
941 | btrfs_set_root_bytenr(root_item, eb->start); | 1295 | btrfs_set_root_bytenr(root_item, eb->start); |
942 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | 1296 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); |
943 | btrfs_set_root_generation(root_item, trans->transid); | 1297 | btrfs_set_root_generation(root_item, trans->transid); |
944 | memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); | 1298 | |
945 | root_item->drop_level = 0; | 1299 | if (root->root_key.objectid == objectid) { |
1300 | btrfs_set_root_refs(root_item, 0); | ||
1301 | memset(&root_item->drop_progress, 0, | ||
1302 | sizeof(struct btrfs_disk_key)); | ||
1303 | root_item->drop_level = 0; | ||
1304 | } | ||
946 | 1305 | ||
947 | btrfs_tree_unlock(eb); | 1306 | btrfs_tree_unlock(eb); |
948 | free_extent_buffer(eb); | 1307 | free_extent_buffer(eb); |
@@ -956,6 +1315,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
956 | &root_key); | 1315 | &root_key); |
957 | BUG_ON(IS_ERR(reloc_root)); | 1316 | BUG_ON(IS_ERR(reloc_root)); |
958 | reloc_root->last_trans = trans->transid; | 1317 | reloc_root->last_trans = trans->transid; |
1318 | return reloc_root; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * create reloc tree for a given fs tree. reloc tree is just a | ||
1323 | * snapshot of the fs tree with special root objectid. | ||
1324 | */ | ||
1325 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
1326 | struct btrfs_root *root) | ||
1327 | { | ||
1328 | struct btrfs_root *reloc_root; | ||
1329 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
1330 | int clear_rsv = 0; | ||
1331 | |||
1332 | if (root->reloc_root) { | ||
1333 | reloc_root = root->reloc_root; | ||
1334 | reloc_root->last_trans = trans->transid; | ||
1335 | return 0; | ||
1336 | } | ||
1337 | |||
1338 | if (!rc || !rc->create_reloc_tree || | ||
1339 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
1340 | return 0; | ||
1341 | |||
1342 | if (!trans->block_rsv) { | ||
1343 | trans->block_rsv = rc->block_rsv; | ||
1344 | clear_rsv = 1; | ||
1345 | } | ||
1346 | reloc_root = create_reloc_root(trans, root, root->root_key.objectid); | ||
1347 | if (clear_rsv) | ||
1348 | trans->block_rsv = NULL; | ||
959 | 1349 | ||
960 | __add_reloc_root(reloc_root); | 1350 | __add_reloc_root(reloc_root); |
961 | root->reloc_root = reloc_root; | 1351 | root->reloc_root = reloc_root; |
@@ -979,7 +1369,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
979 | reloc_root = root->reloc_root; | 1369 | reloc_root = root->reloc_root; |
980 | root_item = &reloc_root->root_item; | 1370 | root_item = &reloc_root->root_item; |
981 | 1371 | ||
982 | if (btrfs_root_refs(root_item) == 0) { | 1372 | if (root->fs_info->reloc_ctl->merge_reloc_tree && |
1373 | btrfs_root_refs(root_item) == 0) { | ||
983 | root->reloc_root = NULL; | 1374 | root->reloc_root = NULL; |
984 | del = 1; | 1375 | del = 1; |
985 | } | 1376 | } |
@@ -1101,8 +1492,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, | |||
1101 | goto out; | 1492 | goto out; |
1102 | } | 1493 | } |
1103 | 1494 | ||
1104 | if (new_bytenr) | 1495 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
1105 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
1106 | ret = 0; | 1496 | ret = 0; |
1107 | out: | 1497 | out: |
1108 | btrfs_free_path(path); | 1498 | btrfs_free_path(path); |
@@ -1113,19 +1503,18 @@ out: | |||
1113 | * update file extent items in the tree leaf to point to | 1503 | * update file extent items in the tree leaf to point to |
1114 | * the new locations. | 1504 | * the new locations. |
1115 | */ | 1505 | */ |
1116 | static int replace_file_extents(struct btrfs_trans_handle *trans, | 1506 | static noinline_for_stack |
1117 | struct reloc_control *rc, | 1507 | int replace_file_extents(struct btrfs_trans_handle *trans, |
1118 | struct btrfs_root *root, | 1508 | struct reloc_control *rc, |
1119 | struct extent_buffer *leaf, | 1509 | struct btrfs_root *root, |
1120 | struct list_head *inode_list) | 1510 | struct extent_buffer *leaf) |
1121 | { | 1511 | { |
1122 | struct btrfs_key key; | 1512 | struct btrfs_key key; |
1123 | struct btrfs_file_extent_item *fi; | 1513 | struct btrfs_file_extent_item *fi; |
1124 | struct inode *inode = NULL; | 1514 | struct inode *inode = NULL; |
1125 | struct inodevec *ivec = NULL; | ||
1126 | u64 parent; | 1515 | u64 parent; |
1127 | u64 bytenr; | 1516 | u64 bytenr; |
1128 | u64 new_bytenr; | 1517 | u64 new_bytenr = 0; |
1129 | u64 num_bytes; | 1518 | u64 num_bytes; |
1130 | u64 end; | 1519 | u64 end; |
1131 | u32 nritems; | 1520 | u32 nritems; |
@@ -1165,21 +1554,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1165 | * to complete and drop the extent cache | 1554 | * to complete and drop the extent cache |
1166 | */ | 1555 | */ |
1167 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 1556 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
1168 | if (!ivec || ivec->nr == INODEVEC_SIZE) { | ||
1169 | ivec = kmalloc(sizeof(*ivec), GFP_NOFS); | ||
1170 | BUG_ON(!ivec); | ||
1171 | ivec->nr = 0; | ||
1172 | list_add_tail(&ivec->list, inode_list); | ||
1173 | } | ||
1174 | if (first) { | 1557 | if (first) { |
1175 | inode = find_next_inode(root, key.objectid); | 1558 | inode = find_next_inode(root, key.objectid); |
1176 | if (inode) | ||
1177 | ivec->inode[ivec->nr++] = inode; | ||
1178 | first = 0; | 1559 | first = 0; |
1179 | } else if (inode && inode->i_ino < key.objectid) { | 1560 | } else if (inode && inode->i_ino < key.objectid) { |
1561 | btrfs_add_delayed_iput(inode); | ||
1180 | inode = find_next_inode(root, key.objectid); | 1562 | inode = find_next_inode(root, key.objectid); |
1181 | if (inode) | ||
1182 | ivec->inode[ivec->nr++] = inode; | ||
1183 | } | 1563 | } |
1184 | if (inode && inode->i_ino == key.objectid) { | 1564 | if (inode && inode->i_ino == key.objectid) { |
1185 | end = key.offset + | 1565 | end = key.offset + |
@@ -1203,8 +1583,10 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1203 | 1583 | ||
1204 | ret = get_new_location(rc->data_inode, &new_bytenr, | 1584 | ret = get_new_location(rc->data_inode, &new_bytenr, |
1205 | bytenr, num_bytes); | 1585 | bytenr, num_bytes); |
1206 | if (ret > 0) | 1586 | if (ret > 0) { |
1587 | WARN_ON(1); | ||
1207 | continue; | 1588 | continue; |
1589 | } | ||
1208 | BUG_ON(ret < 0); | 1590 | BUG_ON(ret < 0); |
1209 | 1591 | ||
1210 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); | 1592 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); |
@@ -1224,6 +1606,8 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1224 | } | 1606 | } |
1225 | if (dirty) | 1607 | if (dirty) |
1226 | btrfs_mark_buffer_dirty(leaf); | 1608 | btrfs_mark_buffer_dirty(leaf); |
1609 | if (inode) | ||
1610 | btrfs_add_delayed_iput(inode); | ||
1227 | return 0; | 1611 | return 0; |
1228 | } | 1612 | } |
1229 | 1613 | ||
@@ -1247,11 +1631,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot, | |||
1247 | * if no block got replaced, 0 is returned. if there are other | 1631 | * if no block got replaced, 0 is returned. if there are other |
1248 | * errors, a negative error number is returned. | 1632 | * errors, a negative error number is returned. |
1249 | */ | 1633 | */ |
1250 | static int replace_path(struct btrfs_trans_handle *trans, | 1634 | static noinline_for_stack |
1251 | struct btrfs_root *dest, struct btrfs_root *src, | 1635 | int replace_path(struct btrfs_trans_handle *trans, |
1252 | struct btrfs_path *path, struct btrfs_key *next_key, | 1636 | struct btrfs_root *dest, struct btrfs_root *src, |
1253 | struct extent_buffer **leaf, | 1637 | struct btrfs_path *path, struct btrfs_key *next_key, |
1254 | int lowest_level, int max_level) | 1638 | int lowest_level, int max_level) |
1255 | { | 1639 | { |
1256 | struct extent_buffer *eb; | 1640 | struct extent_buffer *eb; |
1257 | struct extent_buffer *parent; | 1641 | struct extent_buffer *parent; |
@@ -1262,16 +1646,16 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1262 | u64 new_ptr_gen; | 1646 | u64 new_ptr_gen; |
1263 | u64 last_snapshot; | 1647 | u64 last_snapshot; |
1264 | u32 blocksize; | 1648 | u32 blocksize; |
1649 | int cow = 0; | ||
1265 | int level; | 1650 | int level; |
1266 | int ret; | 1651 | int ret; |
1267 | int slot; | 1652 | int slot; |
1268 | 1653 | ||
1269 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 1654 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
1270 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); | 1655 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); |
1271 | BUG_ON(lowest_level > 1 && leaf); | ||
1272 | 1656 | ||
1273 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); | 1657 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); |
1274 | 1658 | again: | |
1275 | slot = path->slots[lowest_level]; | 1659 | slot = path->slots[lowest_level]; |
1276 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); | 1660 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); |
1277 | 1661 | ||
@@ -1285,8 +1669,10 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1285 | return 0; | 1669 | return 0; |
1286 | } | 1670 | } |
1287 | 1671 | ||
1288 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); | 1672 | if (cow) { |
1289 | BUG_ON(ret); | 1673 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); |
1674 | BUG_ON(ret); | ||
1675 | } | ||
1290 | btrfs_set_lock_blocking(eb); | 1676 | btrfs_set_lock_blocking(eb); |
1291 | 1677 | ||
1292 | if (next_key) { | 1678 | if (next_key) { |
@@ -1330,7 +1716,7 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1330 | 1716 | ||
1331 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || | 1717 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || |
1332 | memcmp_node_keys(parent, slot, path, level)) { | 1718 | memcmp_node_keys(parent, slot, path, level)) { |
1333 | if (level <= lowest_level && !leaf) { | 1719 | if (level <= lowest_level) { |
1334 | ret = 0; | 1720 | ret = 0; |
1335 | break; | 1721 | break; |
1336 | } | 1722 | } |
@@ -1338,16 +1724,12 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1338 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1724 | eb = read_tree_block(dest, old_bytenr, blocksize, |
1339 | old_ptr_gen); | 1725 | old_ptr_gen); |
1340 | btrfs_tree_lock(eb); | 1726 | btrfs_tree_lock(eb); |
1341 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1727 | if (cow) { |
1342 | slot, &eb); | 1728 | ret = btrfs_cow_block(trans, dest, eb, parent, |
1343 | BUG_ON(ret); | 1729 | slot, &eb); |
1344 | btrfs_set_lock_blocking(eb); | 1730 | BUG_ON(ret); |
1345 | |||
1346 | if (level <= lowest_level) { | ||
1347 | *leaf = eb; | ||
1348 | ret = 0; | ||
1349 | break; | ||
1350 | } | 1731 | } |
1732 | btrfs_set_lock_blocking(eb); | ||
1351 | 1733 | ||
1352 | btrfs_tree_unlock(parent); | 1734 | btrfs_tree_unlock(parent); |
1353 | free_extent_buffer(parent); | 1735 | free_extent_buffer(parent); |
@@ -1356,6 +1738,13 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1356 | continue; | 1738 | continue; |
1357 | } | 1739 | } |
1358 | 1740 | ||
1741 | if (!cow) { | ||
1742 | btrfs_tree_unlock(parent); | ||
1743 | free_extent_buffer(parent); | ||
1744 | cow = 1; | ||
1745 | goto again; | ||
1746 | } | ||
1747 | |||
1359 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 1748 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1360 | path->slots[level]); | 1749 | path->slots[level]); |
1361 | btrfs_release_path(src, path); | 1750 | btrfs_release_path(src, path); |
@@ -1561,20 +1950,6 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
1561 | return 0; | 1950 | return 0; |
1562 | } | 1951 | } |
1563 | 1952 | ||
1564 | static void put_inodes(struct list_head *list) | ||
1565 | { | ||
1566 | struct inodevec *ivec; | ||
1567 | while (!list_empty(list)) { | ||
1568 | ivec = list_entry(list->next, struct inodevec, list); | ||
1569 | list_del(&ivec->list); | ||
1570 | while (ivec->nr > 0) { | ||
1571 | ivec->nr--; | ||
1572 | iput(ivec->inode[ivec->nr]); | ||
1573 | } | ||
1574 | kfree(ivec); | ||
1575 | } | ||
1576 | } | ||
1577 | |||
1578 | static int find_next_key(struct btrfs_path *path, int level, | 1953 | static int find_next_key(struct btrfs_path *path, int level, |
1579 | struct btrfs_key *key) | 1954 | struct btrfs_key *key) |
1580 | 1955 | ||
@@ -1607,13 +1982,14 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1607 | struct btrfs_root *reloc_root; | 1982 | struct btrfs_root *reloc_root; |
1608 | struct btrfs_root_item *root_item; | 1983 | struct btrfs_root_item *root_item; |
1609 | struct btrfs_path *path; | 1984 | struct btrfs_path *path; |
1610 | struct extent_buffer *leaf = NULL; | 1985 | struct extent_buffer *leaf; |
1611 | unsigned long nr; | 1986 | unsigned long nr; |
1612 | int level; | 1987 | int level; |
1613 | int max_level; | 1988 | int max_level; |
1614 | int replaced = 0; | 1989 | int replaced = 0; |
1615 | int ret; | 1990 | int ret; |
1616 | int err = 0; | 1991 | int err = 0; |
1992 | u32 min_reserved; | ||
1617 | 1993 | ||
1618 | path = btrfs_alloc_path(); | 1994 | path = btrfs_alloc_path(); |
1619 | if (!path) | 1995 | if (!path) |
@@ -1647,34 +2023,23 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1647 | btrfs_unlock_up_safe(path, 0); | 2023 | btrfs_unlock_up_safe(path, 0); |
1648 | } | 2024 | } |
1649 | 2025 | ||
1650 | if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { | 2026 | min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
1651 | trans = btrfs_start_transaction(root, 1); | 2027 | memset(&next_key, 0, sizeof(next_key)); |
1652 | 2028 | ||
1653 | leaf = path->nodes[0]; | 2029 | while (1) { |
1654 | btrfs_item_key_to_cpu(leaf, &key, 0); | 2030 | trans = btrfs_start_transaction(root, 0); |
1655 | btrfs_release_path(reloc_root, path); | 2031 | trans->block_rsv = rc->block_rsv; |
1656 | 2032 | ||
1657 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 2033 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
1658 | if (ret < 0) { | 2034 | min_reserved, 0); |
1659 | err = ret; | 2035 | if (ret) { |
1660 | goto out; | 2036 | BUG_ON(ret != -EAGAIN); |
2037 | ret = btrfs_commit_transaction(trans, root); | ||
2038 | BUG_ON(ret); | ||
2039 | continue; | ||
1661 | } | 2040 | } |
1662 | 2041 | ||
1663 | leaf = path->nodes[0]; | ||
1664 | btrfs_unlock_up_safe(path, 1); | ||
1665 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1666 | &inode_list); | ||
1667 | if (ret < 0) | ||
1668 | err = ret; | ||
1669 | goto out; | ||
1670 | } | ||
1671 | |||
1672 | memset(&next_key, 0, sizeof(next_key)); | ||
1673 | |||
1674 | while (1) { | ||
1675 | leaf = NULL; | ||
1676 | replaced = 0; | 2042 | replaced = 0; |
1677 | trans = btrfs_start_transaction(root, 1); | ||
1678 | max_level = level; | 2043 | max_level = level; |
1679 | 2044 | ||
1680 | ret = walk_down_reloc_tree(reloc_root, path, &level); | 2045 | ret = walk_down_reloc_tree(reloc_root, path, &level); |
@@ -1688,14 +2053,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1688 | if (!find_next_key(path, level, &key) && | 2053 | if (!find_next_key(path, level, &key) && |
1689 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { | 2054 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { |
1690 | ret = 0; | 2055 | ret = 0; |
1691 | } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { | ||
1692 | ret = replace_path(trans, root, reloc_root, | ||
1693 | path, &next_key, &leaf, | ||
1694 | level, max_level); | ||
1695 | } else { | 2056 | } else { |
1696 | ret = replace_path(trans, root, reloc_root, | 2057 | ret = replace_path(trans, root, reloc_root, path, |
1697 | path, &next_key, NULL, | 2058 | &next_key, level, max_level); |
1698 | level, max_level); | ||
1699 | } | 2059 | } |
1700 | if (ret < 0) { | 2060 | if (ret < 0) { |
1701 | err = ret; | 2061 | err = ret; |
@@ -1707,16 +2067,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1707 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 2067 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1708 | path->slots[level]); | 2068 | path->slots[level]); |
1709 | replaced = 1; | 2069 | replaced = 1; |
1710 | } else if (leaf) { | ||
1711 | /* | ||
1712 | * no block got replaced, try replacing file extents | ||
1713 | */ | ||
1714 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
1715 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1716 | &inode_list); | ||
1717 | btrfs_tree_unlock(leaf); | ||
1718 | free_extent_buffer(leaf); | ||
1719 | BUG_ON(ret < 0); | ||
1720 | } | 2070 | } |
1721 | 2071 | ||
1722 | ret = walk_up_reloc_tree(reloc_root, path, &level); | 2072 | ret = walk_up_reloc_tree(reloc_root, path, &level); |
@@ -1733,15 +2083,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1733 | root_item->drop_level = level; | 2083 | root_item->drop_level = level; |
1734 | 2084 | ||
1735 | nr = trans->blocks_used; | 2085 | nr = trans->blocks_used; |
1736 | btrfs_end_transaction(trans, root); | 2086 | btrfs_end_transaction_throttle(trans, root); |
1737 | 2087 | ||
1738 | btrfs_btree_balance_dirty(root, nr); | 2088 | btrfs_btree_balance_dirty(root, nr); |
1739 | 2089 | ||
1740 | /* | ||
1741 | * put inodes outside transaction, otherwise we may deadlock. | ||
1742 | */ | ||
1743 | put_inodes(&inode_list); | ||
1744 | |||
1745 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2090 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
1746 | invalidate_extent_cache(root, &key, &next_key); | 2091 | invalidate_extent_cache(root, &key, &next_key); |
1747 | } | 2092 | } |
@@ -1764,87 +2109,123 @@ out: | |||
1764 | sizeof(root_item->drop_progress)); | 2109 | sizeof(root_item->drop_progress)); |
1765 | root_item->drop_level = 0; | 2110 | root_item->drop_level = 0; |
1766 | btrfs_set_root_refs(root_item, 0); | 2111 | btrfs_set_root_refs(root_item, 0); |
2112 | btrfs_update_reloc_root(trans, root); | ||
1767 | } | 2113 | } |
1768 | 2114 | ||
1769 | nr = trans->blocks_used; | 2115 | nr = trans->blocks_used; |
1770 | btrfs_end_transaction(trans, root); | 2116 | btrfs_end_transaction_throttle(trans, root); |
1771 | 2117 | ||
1772 | btrfs_btree_balance_dirty(root, nr); | 2118 | btrfs_btree_balance_dirty(root, nr); |
1773 | 2119 | ||
1774 | put_inodes(&inode_list); | ||
1775 | |||
1776 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2120 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
1777 | invalidate_extent_cache(root, &key, &next_key); | 2121 | invalidate_extent_cache(root, &key, &next_key); |
1778 | 2122 | ||
1779 | return err; | 2123 | return err; |
1780 | } | 2124 | } |
1781 | 2125 | ||
1782 | /* | 2126 | static noinline_for_stack |
1783 | * callback for the work threads. | 2127 | int prepare_to_merge(struct reloc_control *rc, int err) |
1784 | * this function merges reloc tree with corresponding fs tree, | ||
1785 | * and then drops the reloc tree. | ||
1786 | */ | ||
1787 | static void merge_func(struct btrfs_work *work) | ||
1788 | { | 2128 | { |
1789 | struct btrfs_trans_handle *trans; | 2129 | struct btrfs_root *root = rc->extent_root; |
1790 | struct btrfs_root *root; | ||
1791 | struct btrfs_root *reloc_root; | 2130 | struct btrfs_root *reloc_root; |
1792 | struct async_merge *async; | 2131 | struct btrfs_trans_handle *trans; |
2132 | LIST_HEAD(reloc_roots); | ||
2133 | u64 num_bytes = 0; | ||
2134 | int ret; | ||
2135 | |||
2136 | mutex_lock(&root->fs_info->trans_mutex); | ||
2137 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | ||
2138 | rc->merging_rsv_size += rc->nodes_relocated * 2; | ||
2139 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2140 | again: | ||
2141 | if (!err) { | ||
2142 | num_bytes = rc->merging_rsv_size; | ||
2143 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | ||
2144 | num_bytes); | ||
2145 | if (ret) | ||
2146 | err = ret; | ||
2147 | } | ||
1793 | 2148 | ||
1794 | async = container_of(work, struct async_merge, work); | 2149 | trans = btrfs_join_transaction(rc->extent_root, 1); |
1795 | reloc_root = async->root; | 2150 | |
2151 | if (!err) { | ||
2152 | if (num_bytes != rc->merging_rsv_size) { | ||
2153 | btrfs_end_transaction(trans, rc->extent_root); | ||
2154 | btrfs_block_rsv_release(rc->extent_root, | ||
2155 | rc->block_rsv, num_bytes); | ||
2156 | goto again; | ||
2157 | } | ||
2158 | } | ||
2159 | |||
2160 | rc->merge_reloc_tree = 1; | ||
2161 | |||
2162 | while (!list_empty(&rc->reloc_roots)) { | ||
2163 | reloc_root = list_entry(rc->reloc_roots.next, | ||
2164 | struct btrfs_root, root_list); | ||
2165 | list_del_init(&reloc_root->root_list); | ||
1796 | 2166 | ||
1797 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { | ||
1798 | root = read_fs_root(reloc_root->fs_info, | 2167 | root = read_fs_root(reloc_root->fs_info, |
1799 | reloc_root->root_key.offset); | 2168 | reloc_root->root_key.offset); |
1800 | BUG_ON(IS_ERR(root)); | 2169 | BUG_ON(IS_ERR(root)); |
1801 | BUG_ON(root->reloc_root != reloc_root); | 2170 | BUG_ON(root->reloc_root != reloc_root); |
1802 | 2171 | ||
1803 | merge_reloc_root(async->rc, root); | 2172 | /* |
1804 | 2173 | * set reference count to 1, so btrfs_recover_relocation | |
1805 | trans = btrfs_start_transaction(root, 1); | 2174 | * knows it should resumes merging |
2175 | */ | ||
2176 | if (!err) | ||
2177 | btrfs_set_root_refs(&reloc_root->root_item, 1); | ||
1806 | btrfs_update_reloc_root(trans, root); | 2178 | btrfs_update_reloc_root(trans, root); |
1807 | btrfs_end_transaction(trans, root); | ||
1808 | } | ||
1809 | 2179 | ||
1810 | btrfs_drop_snapshot(reloc_root, 0); | 2180 | list_add(&reloc_root->root_list, &reloc_roots); |
2181 | } | ||
1811 | 2182 | ||
1812 | if (atomic_dec_and_test(async->num_pending)) | 2183 | list_splice(&reloc_roots, &rc->reloc_roots); |
1813 | complete(async->done); | ||
1814 | 2184 | ||
1815 | kfree(async); | 2185 | if (!err) |
2186 | btrfs_commit_transaction(trans, rc->extent_root); | ||
2187 | else | ||
2188 | btrfs_end_transaction(trans, rc->extent_root); | ||
2189 | return err; | ||
1816 | } | 2190 | } |
1817 | 2191 | ||
1818 | static int merge_reloc_roots(struct reloc_control *rc) | 2192 | static noinline_for_stack |
2193 | int merge_reloc_roots(struct reloc_control *rc) | ||
1819 | { | 2194 | { |
1820 | struct async_merge *async; | ||
1821 | struct btrfs_root *root; | 2195 | struct btrfs_root *root; |
1822 | struct completion done; | 2196 | struct btrfs_root *reloc_root; |
1823 | atomic_t num_pending; | 2197 | LIST_HEAD(reloc_roots); |
2198 | int found = 0; | ||
2199 | int ret; | ||
2200 | again: | ||
2201 | root = rc->extent_root; | ||
2202 | mutex_lock(&root->fs_info->trans_mutex); | ||
2203 | list_splice_init(&rc->reloc_roots, &reloc_roots); | ||
2204 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1824 | 2205 | ||
1825 | init_completion(&done); | 2206 | while (!list_empty(&reloc_roots)) { |
1826 | atomic_set(&num_pending, 1); | 2207 | found = 1; |
2208 | reloc_root = list_entry(reloc_roots.next, | ||
2209 | struct btrfs_root, root_list); | ||
1827 | 2210 | ||
1828 | while (!list_empty(&rc->reloc_roots)) { | 2211 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { |
1829 | root = list_entry(rc->reloc_roots.next, | 2212 | root = read_fs_root(reloc_root->fs_info, |
1830 | struct btrfs_root, root_list); | 2213 | reloc_root->root_key.offset); |
1831 | list_del_init(&root->root_list); | 2214 | BUG_ON(IS_ERR(root)); |
2215 | BUG_ON(root->reloc_root != reloc_root); | ||
1832 | 2216 | ||
1833 | async = kmalloc(sizeof(*async), GFP_NOFS); | 2217 | ret = merge_reloc_root(rc, root); |
1834 | BUG_ON(!async); | 2218 | BUG_ON(ret); |
1835 | async->work.func = merge_func; | 2219 | } else { |
1836 | async->work.flags = 0; | 2220 | list_del_init(&reloc_root->root_list); |
1837 | async->rc = rc; | 2221 | } |
1838 | async->root = root; | 2222 | btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); |
1839 | async->done = &done; | ||
1840 | async->num_pending = &num_pending; | ||
1841 | atomic_inc(&num_pending); | ||
1842 | btrfs_queue_worker(&rc->workers, &async->work); | ||
1843 | } | 2223 | } |
1844 | 2224 | ||
1845 | if (!atomic_dec_and_test(&num_pending)) | 2225 | if (found) { |
1846 | wait_for_completion(&done); | 2226 | found = 0; |
1847 | 2227 | goto again; | |
2228 | } | ||
1848 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2229 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
1849 | return 0; | 2230 | return 0; |
1850 | } | 2231 | } |
@@ -1875,119 +2256,167 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, | |||
1875 | return btrfs_record_root_in_trans(trans, root); | 2256 | return btrfs_record_root_in_trans(trans, root); |
1876 | } | 2257 | } |
1877 | 2258 | ||
1878 | /* | 2259 | static noinline_for_stack |
1879 | * select one tree from trees that references the block. | 2260 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, |
1880 | * for blocks in refernce counted trees, we preper reloc tree. | 2261 | struct reloc_control *rc, |
1881 | * if no reloc tree found and reloc_only is true, NULL is returned. | 2262 | struct backref_node *node, |
1882 | */ | 2263 | struct backref_edge *edges[], int *nr) |
1883 | static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, | ||
1884 | struct backref_node *node, | ||
1885 | struct backref_edge *edges[], | ||
1886 | int *nr, int reloc_only) | ||
1887 | { | 2264 | { |
1888 | struct backref_node *next; | 2265 | struct backref_node *next; |
1889 | struct btrfs_root *root; | 2266 | struct btrfs_root *root; |
1890 | int index; | 2267 | int index = 0; |
1891 | int loop = 0; | 2268 | |
1892 | again: | ||
1893 | index = 0; | ||
1894 | next = node; | 2269 | next = node; |
1895 | while (1) { | 2270 | while (1) { |
1896 | cond_resched(); | 2271 | cond_resched(); |
1897 | next = walk_up_backref(next, edges, &index); | 2272 | next = walk_up_backref(next, edges, &index); |
1898 | root = next->root; | 2273 | root = next->root; |
1899 | if (!root) { | 2274 | BUG_ON(!root); |
1900 | BUG_ON(!node->old_root); | 2275 | BUG_ON(!root->ref_cows); |
1901 | goto skip; | ||
1902 | } | ||
1903 | |||
1904 | /* no other choice for non-refernce counted tree */ | ||
1905 | if (!root->ref_cows) { | ||
1906 | BUG_ON(reloc_only); | ||
1907 | break; | ||
1908 | } | ||
1909 | 2276 | ||
1910 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2277 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
1911 | record_reloc_root_in_trans(trans, root); | 2278 | record_reloc_root_in_trans(trans, root); |
1912 | break; | 2279 | break; |
1913 | } | 2280 | } |
1914 | 2281 | ||
1915 | if (loop) { | 2282 | btrfs_record_root_in_trans(trans, root); |
1916 | btrfs_record_root_in_trans(trans, root); | 2283 | root = root->reloc_root; |
2284 | |||
2285 | if (next->new_bytenr != root->node->start) { | ||
2286 | BUG_ON(next->new_bytenr); | ||
2287 | BUG_ON(!list_empty(&next->list)); | ||
2288 | next->new_bytenr = root->node->start; | ||
2289 | next->root = root; | ||
2290 | list_add_tail(&next->list, | ||
2291 | &rc->backref_cache.changed); | ||
2292 | __mark_block_processed(rc, next); | ||
1917 | break; | 2293 | break; |
1918 | } | 2294 | } |
1919 | 2295 | ||
1920 | if (reloc_only || next != node) { | 2296 | WARN_ON(1); |
1921 | if (!root->reloc_root) | ||
1922 | btrfs_record_root_in_trans(trans, root); | ||
1923 | root = root->reloc_root; | ||
1924 | /* | ||
1925 | * if the reloc tree was created in current | ||
1926 | * transation, there is no node in backref tree | ||
1927 | * corresponds to the root of the reloc tree. | ||
1928 | */ | ||
1929 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
1930 | trans->transid - 1) | ||
1931 | break; | ||
1932 | } | ||
1933 | skip: | ||
1934 | root = NULL; | 2297 | root = NULL; |
1935 | next = walk_down_backref(edges, &index); | 2298 | next = walk_down_backref(edges, &index); |
1936 | if (!next || next->level <= node->level) | 2299 | if (!next || next->level <= node->level) |
1937 | break; | 2300 | break; |
1938 | } | 2301 | } |
2302 | if (!root) | ||
2303 | return NULL; | ||
1939 | 2304 | ||
1940 | if (!root && !loop && !reloc_only) { | 2305 | *nr = index; |
1941 | loop = 1; | 2306 | next = node; |
1942 | goto again; | 2307 | /* setup backref node path for btrfs_reloc_cow_block */ |
2308 | while (1) { | ||
2309 | rc->backref_cache.path[next->level] = next; | ||
2310 | if (--index < 0) | ||
2311 | break; | ||
2312 | next = edges[index]->node[UPPER]; | ||
1943 | } | 2313 | } |
1944 | |||
1945 | if (root) | ||
1946 | *nr = index; | ||
1947 | else | ||
1948 | *nr = 0; | ||
1949 | |||
1950 | return root; | 2314 | return root; |
1951 | } | 2315 | } |
1952 | 2316 | ||
2317 | /* | ||
2318 | * select a tree root for relocation. return NULL if the block | ||
2319 | * is reference counted. we should use do_relocation() in this | ||
2320 | * case. return a tree root pointer if the block isn't reference | ||
2321 | * counted. return -ENOENT if the block is root of reloc tree. | ||
2322 | */ | ||
1953 | static noinline_for_stack | 2323 | static noinline_for_stack |
1954 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | 2324 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, |
1955 | struct backref_node *node) | 2325 | struct backref_node *node) |
1956 | { | 2326 | { |
2327 | struct backref_node *next; | ||
2328 | struct btrfs_root *root; | ||
2329 | struct btrfs_root *fs_root = NULL; | ||
1957 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | 2330 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; |
1958 | int nr; | 2331 | int index = 0; |
1959 | return __select_one_root(trans, node, edges, &nr, 0); | 2332 | |
2333 | next = node; | ||
2334 | while (1) { | ||
2335 | cond_resched(); | ||
2336 | next = walk_up_backref(next, edges, &index); | ||
2337 | root = next->root; | ||
2338 | BUG_ON(!root); | ||
2339 | |||
2340 | /* no other choice for non-refernce counted tree */ | ||
2341 | if (!root->ref_cows) | ||
2342 | return root; | ||
2343 | |||
2344 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) | ||
2345 | fs_root = root; | ||
2346 | |||
2347 | if (next != node) | ||
2348 | return NULL; | ||
2349 | |||
2350 | next = walk_down_backref(edges, &index); | ||
2351 | if (!next || next->level <= node->level) | ||
2352 | break; | ||
2353 | } | ||
2354 | |||
2355 | if (!fs_root) | ||
2356 | return ERR_PTR(-ENOENT); | ||
2357 | return fs_root; | ||
1960 | } | 2358 | } |
1961 | 2359 | ||
1962 | static noinline_for_stack | 2360 | static noinline_for_stack |
1963 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | 2361 | u64 calcu_metadata_size(struct reloc_control *rc, |
1964 | struct backref_node *node, | 2362 | struct backref_node *node, int reserve) |
1965 | struct backref_edge *edges[], int *nr) | ||
1966 | { | 2363 | { |
1967 | return __select_one_root(trans, node, edges, nr, 1); | 2364 | struct backref_node *next = node; |
2365 | struct backref_edge *edge; | ||
2366 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
2367 | u64 num_bytes = 0; | ||
2368 | int index = 0; | ||
2369 | |||
2370 | BUG_ON(reserve && node->processed); | ||
2371 | |||
2372 | while (next) { | ||
2373 | cond_resched(); | ||
2374 | while (1) { | ||
2375 | if (next->processed && (reserve || next != node)) | ||
2376 | break; | ||
2377 | |||
2378 | num_bytes += btrfs_level_size(rc->extent_root, | ||
2379 | next->level); | ||
2380 | |||
2381 | if (list_empty(&next->upper)) | ||
2382 | break; | ||
2383 | |||
2384 | edge = list_entry(next->upper.next, | ||
2385 | struct backref_edge, list[LOWER]); | ||
2386 | edges[index++] = edge; | ||
2387 | next = edge->node[UPPER]; | ||
2388 | } | ||
2389 | next = walk_down_backref(edges, &index); | ||
2390 | } | ||
2391 | return num_bytes; | ||
1968 | } | 2392 | } |
1969 | 2393 | ||
1970 | static void grab_path_buffers(struct btrfs_path *path, | 2394 | static int reserve_metadata_space(struct btrfs_trans_handle *trans, |
1971 | struct backref_node *node, | 2395 | struct reloc_control *rc, |
1972 | struct backref_edge *edges[], int nr) | 2396 | struct backref_node *node) |
1973 | { | 2397 | { |
1974 | int i = 0; | 2398 | struct btrfs_root *root = rc->extent_root; |
1975 | while (1) { | 2399 | u64 num_bytes; |
1976 | drop_node_buffer(node); | 2400 | int ret; |
1977 | node->eb = path->nodes[node->level]; | ||
1978 | BUG_ON(!node->eb); | ||
1979 | if (path->locks[node->level]) | ||
1980 | node->locked = 1; | ||
1981 | path->nodes[node->level] = NULL; | ||
1982 | path->locks[node->level] = 0; | ||
1983 | |||
1984 | if (i >= nr) | ||
1985 | break; | ||
1986 | 2401 | ||
1987 | edges[i]->blockptr = node->eb->start; | 2402 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
1988 | node = edges[i]->node[UPPER]; | 2403 | |
1989 | i++; | 2404 | trans->block_rsv = rc->block_rsv; |
2405 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); | ||
2406 | if (ret) { | ||
2407 | if (ret == -EAGAIN) | ||
2408 | rc->commit_transaction = 1; | ||
2409 | return ret; | ||
1990 | } | 2410 | } |
2411 | |||
2412 | return 0; | ||
2413 | } | ||
2414 | |||
2415 | static void release_metadata_space(struct reloc_control *rc, | ||
2416 | struct backref_node *node) | ||
2417 | { | ||
2418 | u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; | ||
2419 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); | ||
1991 | } | 2420 | } |
1992 | 2421 | ||
1993 | /* | 2422 | /* |
@@ -1998,6 +2427,7 @@ static void grab_path_buffers(struct btrfs_path *path, | |||
1998 | * in that case this function just updates pointers. | 2427 | * in that case this function just updates pointers. |
1999 | */ | 2428 | */ |
2000 | static int do_relocation(struct btrfs_trans_handle *trans, | 2429 | static int do_relocation(struct btrfs_trans_handle *trans, |
2430 | struct reloc_control *rc, | ||
2001 | struct backref_node *node, | 2431 | struct backref_node *node, |
2002 | struct btrfs_key *key, | 2432 | struct btrfs_key *key, |
2003 | struct btrfs_path *path, int lowest) | 2433 | struct btrfs_path *path, int lowest) |
@@ -2018,18 +2448,25 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2018 | BUG_ON(lowest && node->eb); | 2448 | BUG_ON(lowest && node->eb); |
2019 | 2449 | ||
2020 | path->lowest_level = node->level + 1; | 2450 | path->lowest_level = node->level + 1; |
2451 | rc->backref_cache.path[node->level] = node; | ||
2021 | list_for_each_entry(edge, &node->upper, list[LOWER]) { | 2452 | list_for_each_entry(edge, &node->upper, list[LOWER]) { |
2022 | cond_resched(); | 2453 | cond_resched(); |
2023 | if (node->eb && node->eb->start == edge->blockptr) | ||
2024 | continue; | ||
2025 | 2454 | ||
2026 | upper = edge->node[UPPER]; | 2455 | upper = edge->node[UPPER]; |
2027 | root = select_reloc_root(trans, upper, edges, &nr); | 2456 | root = select_reloc_root(trans, rc, upper, edges, &nr); |
2028 | if (!root) | 2457 | BUG_ON(!root); |
2029 | continue; | 2458 | |
2030 | 2459 | if (upper->eb && !upper->locked) { | |
2031 | if (upper->eb && !upper->locked) | 2460 | if (!lowest) { |
2461 | ret = btrfs_bin_search(upper->eb, key, | ||
2462 | upper->level, &slot); | ||
2463 | BUG_ON(ret); | ||
2464 | bytenr = btrfs_node_blockptr(upper->eb, slot); | ||
2465 | if (node->eb->start == bytenr) | ||
2466 | goto next; | ||
2467 | } | ||
2032 | drop_node_buffer(upper); | 2468 | drop_node_buffer(upper); |
2469 | } | ||
2033 | 2470 | ||
2034 | if (!upper->eb) { | 2471 | if (!upper->eb) { |
2035 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 2472 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
@@ -2039,11 +2476,17 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2039 | } | 2476 | } |
2040 | BUG_ON(ret > 0); | 2477 | BUG_ON(ret > 0); |
2041 | 2478 | ||
2042 | slot = path->slots[upper->level]; | 2479 | if (!upper->eb) { |
2480 | upper->eb = path->nodes[upper->level]; | ||
2481 | path->nodes[upper->level] = NULL; | ||
2482 | } else { | ||
2483 | BUG_ON(upper->eb != path->nodes[upper->level]); | ||
2484 | } | ||
2043 | 2485 | ||
2044 | btrfs_unlock_up_safe(path, upper->level + 1); | 2486 | upper->locked = 1; |
2045 | grab_path_buffers(path, upper, edges, nr); | 2487 | path->locks[upper->level] = 0; |
2046 | 2488 | ||
2489 | slot = path->slots[upper->level]; | ||
2047 | btrfs_release_path(NULL, path); | 2490 | btrfs_release_path(NULL, path); |
2048 | } else { | 2491 | } else { |
2049 | ret = btrfs_bin_search(upper->eb, key, upper->level, | 2492 | ret = btrfs_bin_search(upper->eb, key, upper->level, |
@@ -2052,14 +2495,11 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2052 | } | 2495 | } |
2053 | 2496 | ||
2054 | bytenr = btrfs_node_blockptr(upper->eb, slot); | 2497 | bytenr = btrfs_node_blockptr(upper->eb, slot); |
2055 | if (!lowest) { | 2498 | if (lowest) { |
2056 | if (node->eb->start == bytenr) { | 2499 | BUG_ON(bytenr != node->bytenr); |
2057 | btrfs_tree_unlock(upper->eb); | ||
2058 | upper->locked = 0; | ||
2059 | continue; | ||
2060 | } | ||
2061 | } else { | 2500 | } else { |
2062 | BUG_ON(node->bytenr != bytenr); | 2501 | if (node->eb->start == bytenr) |
2502 | goto next; | ||
2063 | } | 2503 | } |
2064 | 2504 | ||
2065 | blocksize = btrfs_level_size(root, node->level); | 2505 | blocksize = btrfs_level_size(root, node->level); |
@@ -2071,13 +2511,13 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2071 | if (!node->eb) { | 2511 | if (!node->eb) { |
2072 | ret = btrfs_cow_block(trans, root, eb, upper->eb, | 2512 | ret = btrfs_cow_block(trans, root, eb, upper->eb, |
2073 | slot, &eb); | 2513 | slot, &eb); |
2514 | btrfs_tree_unlock(eb); | ||
2515 | free_extent_buffer(eb); | ||
2074 | if (ret < 0) { | 2516 | if (ret < 0) { |
2075 | err = ret; | 2517 | err = ret; |
2076 | break; | 2518 | goto next; |
2077 | } | 2519 | } |
2078 | btrfs_set_lock_blocking(eb); | 2520 | BUG_ON(node->eb != eb); |
2079 | node->eb = eb; | ||
2080 | node->locked = 1; | ||
2081 | } else { | 2521 | } else { |
2082 | btrfs_set_node_blockptr(upper->eb, slot, | 2522 | btrfs_set_node_blockptr(upper->eb, slot, |
2083 | node->eb->start); | 2523 | node->eb->start); |
@@ -2095,67 +2535,80 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2095 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); | 2535 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); |
2096 | BUG_ON(ret); | 2536 | BUG_ON(ret); |
2097 | } | 2537 | } |
2098 | if (!lowest) { | 2538 | next: |
2099 | btrfs_tree_unlock(upper->eb); | 2539 | if (!upper->pending) |
2100 | upper->locked = 0; | 2540 | drop_node_buffer(upper); |
2101 | } | 2541 | else |
2542 | unlock_node_buffer(upper); | ||
2543 | if (err) | ||
2544 | break; | ||
2102 | } | 2545 | } |
2546 | |||
2547 | if (!err && node->pending) { | ||
2548 | drop_node_buffer(node); | ||
2549 | list_move_tail(&node->list, &rc->backref_cache.changed); | ||
2550 | node->pending = 0; | ||
2551 | } | ||
2552 | |||
2103 | path->lowest_level = 0; | 2553 | path->lowest_level = 0; |
2554 | BUG_ON(err == -ENOSPC); | ||
2104 | return err; | 2555 | return err; |
2105 | } | 2556 | } |
2106 | 2557 | ||
2107 | static int link_to_upper(struct btrfs_trans_handle *trans, | 2558 | static int link_to_upper(struct btrfs_trans_handle *trans, |
2559 | struct reloc_control *rc, | ||
2108 | struct backref_node *node, | 2560 | struct backref_node *node, |
2109 | struct btrfs_path *path) | 2561 | struct btrfs_path *path) |
2110 | { | 2562 | { |
2111 | struct btrfs_key key; | 2563 | struct btrfs_key key; |
2112 | if (!node->eb || list_empty(&node->upper)) | ||
2113 | return 0; | ||
2114 | 2564 | ||
2115 | btrfs_node_key_to_cpu(node->eb, &key, 0); | 2565 | btrfs_node_key_to_cpu(node->eb, &key, 0); |
2116 | return do_relocation(trans, node, &key, path, 0); | 2566 | return do_relocation(trans, rc, node, &key, path, 0); |
2117 | } | 2567 | } |
2118 | 2568 | ||
2119 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, | 2569 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, |
2120 | struct backref_cache *cache, | 2570 | struct reloc_control *rc, |
2121 | struct btrfs_path *path) | 2571 | struct btrfs_path *path, int err) |
2122 | { | 2572 | { |
2573 | LIST_HEAD(list); | ||
2574 | struct backref_cache *cache = &rc->backref_cache; | ||
2123 | struct backref_node *node; | 2575 | struct backref_node *node; |
2124 | int level; | 2576 | int level; |
2125 | int ret; | 2577 | int ret; |
2126 | int err = 0; | ||
2127 | 2578 | ||
2128 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2579 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
2129 | while (!list_empty(&cache->pending[level])) { | 2580 | while (!list_empty(&cache->pending[level])) { |
2130 | node = list_entry(cache->pending[level].next, | 2581 | node = list_entry(cache->pending[level].next, |
2131 | struct backref_node, lower); | 2582 | struct backref_node, list); |
2132 | BUG_ON(node->level != level); | 2583 | list_move_tail(&node->list, &list); |
2584 | BUG_ON(!node->pending); | ||
2133 | 2585 | ||
2134 | ret = link_to_upper(trans, node, path); | 2586 | if (!err) { |
2135 | if (ret < 0) | 2587 | ret = link_to_upper(trans, rc, node, path); |
2136 | err = ret; | 2588 | if (ret < 0) |
2137 | /* | 2589 | err = ret; |
2138 | * this remove the node from the pending list and | 2590 | } |
2139 | * may add some other nodes to the level + 1 | ||
2140 | * pending list | ||
2141 | */ | ||
2142 | remove_backref_node(cache, node); | ||
2143 | } | 2591 | } |
2592 | list_splice_init(&list, &cache->pending[level]); | ||
2144 | } | 2593 | } |
2145 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
2146 | return err; | 2594 | return err; |
2147 | } | 2595 | } |
2148 | 2596 | ||
2149 | static void mark_block_processed(struct reloc_control *rc, | 2597 | static void mark_block_processed(struct reloc_control *rc, |
2150 | struct backref_node *node) | 2598 | u64 bytenr, u32 blocksize) |
2599 | { | ||
2600 | set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, | ||
2601 | EXTENT_DIRTY, GFP_NOFS); | ||
2602 | } | ||
2603 | |||
2604 | static void __mark_block_processed(struct reloc_control *rc, | ||
2605 | struct backref_node *node) | ||
2151 | { | 2606 | { |
2152 | u32 blocksize; | 2607 | u32 blocksize; |
2153 | if (node->level == 0 || | 2608 | if (node->level == 0 || |
2154 | in_block_group(node->bytenr, rc->block_group)) { | 2609 | in_block_group(node->bytenr, rc->block_group)) { |
2155 | blocksize = btrfs_level_size(rc->extent_root, node->level); | 2610 | blocksize = btrfs_level_size(rc->extent_root, node->level); |
2156 | set_extent_bits(&rc->processed_blocks, node->bytenr, | 2611 | mark_block_processed(rc, node->bytenr, blocksize); |
2157 | node->bytenr + blocksize - 1, EXTENT_DIRTY, | ||
2158 | GFP_NOFS); | ||
2159 | } | 2612 | } |
2160 | node->processed = 1; | 2613 | node->processed = 1; |
2161 | } | 2614 | } |
@@ -2178,7 +2631,7 @@ static void update_processed_blocks(struct reloc_control *rc, | |||
2178 | if (next->processed) | 2631 | if (next->processed) |
2179 | break; | 2632 | break; |
2180 | 2633 | ||
2181 | mark_block_processed(rc, next); | 2634 | __mark_block_processed(rc, next); |
2182 | 2635 | ||
2183 | if (list_empty(&next->upper)) | 2636 | if (list_empty(&next->upper)) |
2184 | break; | 2637 | break; |
@@ -2201,138 +2654,6 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
2201 | return 0; | 2654 | return 0; |
2202 | } | 2655 | } |
2203 | 2656 | ||
2204 | /* | ||
2205 | * check if there are any file extent pointers in the leaf point to | ||
2206 | * data require processing | ||
2207 | */ | ||
2208 | static int check_file_extents(struct reloc_control *rc, | ||
2209 | u64 bytenr, u32 blocksize, u64 ptr_gen) | ||
2210 | { | ||
2211 | struct btrfs_key found_key; | ||
2212 | struct btrfs_file_extent_item *fi; | ||
2213 | struct extent_buffer *leaf; | ||
2214 | u32 nritems; | ||
2215 | int i; | ||
2216 | int ret = 0; | ||
2217 | |||
2218 | leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); | ||
2219 | |||
2220 | nritems = btrfs_header_nritems(leaf); | ||
2221 | for (i = 0; i < nritems; i++) { | ||
2222 | cond_resched(); | ||
2223 | btrfs_item_key_to_cpu(leaf, &found_key, i); | ||
2224 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
2225 | continue; | ||
2226 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
2227 | if (btrfs_file_extent_type(leaf, fi) == | ||
2228 | BTRFS_FILE_EXTENT_INLINE) | ||
2229 | continue; | ||
2230 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
2231 | if (bytenr == 0) | ||
2232 | continue; | ||
2233 | if (in_block_group(bytenr, rc->block_group)) { | ||
2234 | ret = 1; | ||
2235 | break; | ||
2236 | } | ||
2237 | } | ||
2238 | free_extent_buffer(leaf); | ||
2239 | return ret; | ||
2240 | } | ||
2241 | |||
2242 | /* | ||
2243 | * scan child blocks of a given block to find blocks require processing | ||
2244 | */ | ||
2245 | static int add_child_blocks(struct btrfs_trans_handle *trans, | ||
2246 | struct reloc_control *rc, | ||
2247 | struct backref_node *node, | ||
2248 | struct rb_root *blocks) | ||
2249 | { | ||
2250 | struct tree_block *block; | ||
2251 | struct rb_node *rb_node; | ||
2252 | u64 bytenr; | ||
2253 | u64 ptr_gen; | ||
2254 | u32 blocksize; | ||
2255 | u32 nritems; | ||
2256 | int i; | ||
2257 | int err = 0; | ||
2258 | |||
2259 | nritems = btrfs_header_nritems(node->eb); | ||
2260 | blocksize = btrfs_level_size(rc->extent_root, node->level - 1); | ||
2261 | for (i = 0; i < nritems; i++) { | ||
2262 | cond_resched(); | ||
2263 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2264 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2265 | if (ptr_gen == trans->transid) | ||
2266 | continue; | ||
2267 | if (!in_block_group(bytenr, rc->block_group) && | ||
2268 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2269 | continue; | ||
2270 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2271 | continue; | ||
2272 | |||
2273 | readahead_tree_block(rc->extent_root, | ||
2274 | bytenr, blocksize, ptr_gen); | ||
2275 | } | ||
2276 | |||
2277 | for (i = 0; i < nritems; i++) { | ||
2278 | cond_resched(); | ||
2279 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2280 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2281 | if (ptr_gen == trans->transid) | ||
2282 | continue; | ||
2283 | if (!in_block_group(bytenr, rc->block_group) && | ||
2284 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2285 | continue; | ||
2286 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2287 | continue; | ||
2288 | if (!in_block_group(bytenr, rc->block_group) && | ||
2289 | !check_file_extents(rc, bytenr, blocksize, ptr_gen)) | ||
2290 | continue; | ||
2291 | |||
2292 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
2293 | if (!block) { | ||
2294 | err = -ENOMEM; | ||
2295 | break; | ||
2296 | } | ||
2297 | block->bytenr = bytenr; | ||
2298 | btrfs_node_key_to_cpu(node->eb, &block->key, i); | ||
2299 | block->level = node->level - 1; | ||
2300 | block->key_ready = 1; | ||
2301 | rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); | ||
2302 | BUG_ON(rb_node); | ||
2303 | } | ||
2304 | if (err) | ||
2305 | free_block_list(blocks); | ||
2306 | return err; | ||
2307 | } | ||
2308 | |||
2309 | /* | ||
2310 | * find adjacent blocks require processing | ||
2311 | */ | ||
2312 | static noinline_for_stack | ||
2313 | int add_adjacent_blocks(struct btrfs_trans_handle *trans, | ||
2314 | struct reloc_control *rc, | ||
2315 | struct backref_cache *cache, | ||
2316 | struct rb_root *blocks, int level, | ||
2317 | struct backref_node **upper) | ||
2318 | { | ||
2319 | struct backref_node *node; | ||
2320 | int ret = 0; | ||
2321 | |||
2322 | WARN_ON(!list_empty(&cache->pending[level])); | ||
2323 | |||
2324 | if (list_empty(&cache->pending[level + 1])) | ||
2325 | return 1; | ||
2326 | |||
2327 | node = list_entry(cache->pending[level + 1].next, | ||
2328 | struct backref_node, lower); | ||
2329 | if (node->eb) | ||
2330 | ret = add_child_blocks(trans, rc, node, blocks); | ||
2331 | |||
2332 | *upper = node; | ||
2333 | return ret; | ||
2334 | } | ||
2335 | |||
2336 | static int get_tree_block_key(struct reloc_control *rc, | 2657 | static int get_tree_block_key(struct reloc_control *rc, |
2337 | struct tree_block *block) | 2658 | struct tree_block *block) |
2338 | { | 2659 | { |
@@ -2370,40 +2691,53 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
2370 | struct btrfs_path *path) | 2691 | struct btrfs_path *path) |
2371 | { | 2692 | { |
2372 | struct btrfs_root *root; | 2693 | struct btrfs_root *root; |
2373 | int ret; | 2694 | int release = 0; |
2695 | int ret = 0; | ||
2374 | 2696 | ||
2697 | if (!node) | ||
2698 | return 0; | ||
2699 | |||
2700 | BUG_ON(node->processed); | ||
2375 | root = select_one_root(trans, node); | 2701 | root = select_one_root(trans, node); |
2376 | if (unlikely(!root)) { | 2702 | if (root == ERR_PTR(-ENOENT)) { |
2377 | rc->found_old_snapshot = 1; | ||
2378 | update_processed_blocks(rc, node); | 2703 | update_processed_blocks(rc, node); |
2379 | return 0; | 2704 | goto out; |
2380 | } | 2705 | } |
2381 | 2706 | ||
2382 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2707 | if (!root || root->ref_cows) { |
2383 | ret = do_relocation(trans, node, key, path, 1); | 2708 | ret = reserve_metadata_space(trans, rc, node); |
2384 | if (ret < 0) | 2709 | if (ret) |
2385 | goto out; | ||
2386 | if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { | ||
2387 | ret = replace_file_extents(trans, rc, root, | ||
2388 | node->eb, NULL); | ||
2389 | if (ret < 0) | ||
2390 | goto out; | ||
2391 | } | ||
2392 | drop_node_buffer(node); | ||
2393 | } else if (!root->ref_cows) { | ||
2394 | path->lowest_level = node->level; | ||
2395 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2396 | btrfs_release_path(root, path); | ||
2397 | if (ret < 0) | ||
2398 | goto out; | 2710 | goto out; |
2399 | } else if (root != node->root) { | 2711 | release = 1; |
2400 | WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); | ||
2401 | } | 2712 | } |
2402 | 2713 | ||
2403 | update_processed_blocks(rc, node); | 2714 | if (root) { |
2404 | ret = 0; | 2715 | if (root->ref_cows) { |
2716 | BUG_ON(node->new_bytenr); | ||
2717 | BUG_ON(!list_empty(&node->list)); | ||
2718 | btrfs_record_root_in_trans(trans, root); | ||
2719 | root = root->reloc_root; | ||
2720 | node->new_bytenr = root->node->start; | ||
2721 | node->root = root; | ||
2722 | list_add_tail(&node->list, &rc->backref_cache.changed); | ||
2723 | } else { | ||
2724 | path->lowest_level = node->level; | ||
2725 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2726 | btrfs_release_path(root, path); | ||
2727 | if (ret > 0) | ||
2728 | ret = 0; | ||
2729 | } | ||
2730 | if (!ret) | ||
2731 | update_processed_blocks(rc, node); | ||
2732 | } else { | ||
2733 | ret = do_relocation(trans, rc, node, key, path, 1); | ||
2734 | } | ||
2405 | out: | 2735 | out: |
2406 | drop_node_buffer(node); | 2736 | if (ret || node->level == 0 || node->cowonly) { |
2737 | if (release) | ||
2738 | release_metadata_space(rc, node); | ||
2739 | remove_backref_node(&rc->backref_cache, node); | ||
2740 | } | ||
2407 | return ret; | 2741 | return ret; |
2408 | } | 2742 | } |
2409 | 2743 | ||
@@ -2414,12 +2748,10 @@ static noinline_for_stack | |||
2414 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, | 2748 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, |
2415 | struct reloc_control *rc, struct rb_root *blocks) | 2749 | struct reloc_control *rc, struct rb_root *blocks) |
2416 | { | 2750 | { |
2417 | struct backref_cache *cache; | ||
2418 | struct backref_node *node; | 2751 | struct backref_node *node; |
2419 | struct btrfs_path *path; | 2752 | struct btrfs_path *path; |
2420 | struct tree_block *block; | 2753 | struct tree_block *block; |
2421 | struct rb_node *rb_node; | 2754 | struct rb_node *rb_node; |
2422 | int level = -1; | ||
2423 | int ret; | 2755 | int ret; |
2424 | int err = 0; | 2756 | int err = 0; |
2425 | 2757 | ||
@@ -2427,21 +2759,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2427 | if (!path) | 2759 | if (!path) |
2428 | return -ENOMEM; | 2760 | return -ENOMEM; |
2429 | 2761 | ||
2430 | cache = kmalloc(sizeof(*cache), GFP_NOFS); | ||
2431 | if (!cache) { | ||
2432 | btrfs_free_path(path); | ||
2433 | return -ENOMEM; | ||
2434 | } | ||
2435 | |||
2436 | backref_cache_init(cache); | ||
2437 | |||
2438 | rb_node = rb_first(blocks); | 2762 | rb_node = rb_first(blocks); |
2439 | while (rb_node) { | 2763 | while (rb_node) { |
2440 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2764 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2441 | if (level == -1) | ||
2442 | level = block->level; | ||
2443 | else | ||
2444 | BUG_ON(level != block->level); | ||
2445 | if (!block->key_ready) | 2765 | if (!block->key_ready) |
2446 | reada_tree_block(rc, block); | 2766 | reada_tree_block(rc, block); |
2447 | rb_node = rb_next(rb_node); | 2767 | rb_node = rb_next(rb_node); |
@@ -2459,7 +2779,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2459 | while (rb_node) { | 2779 | while (rb_node) { |
2460 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2780 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2461 | 2781 | ||
2462 | node = build_backref_tree(rc, cache, &block->key, | 2782 | node = build_backref_tree(rc, &block->key, |
2463 | block->level, block->bytenr); | 2783 | block->level, block->bytenr); |
2464 | if (IS_ERR(node)) { | 2784 | if (IS_ERR(node)) { |
2465 | err = PTR_ERR(node); | 2785 | err = PTR_ERR(node); |
@@ -2469,79 +2789,62 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2469 | ret = relocate_tree_block(trans, rc, node, &block->key, | 2789 | ret = relocate_tree_block(trans, rc, node, &block->key, |
2470 | path); | 2790 | path); |
2471 | if (ret < 0) { | 2791 | if (ret < 0) { |
2472 | err = ret; | 2792 | if (ret != -EAGAIN || rb_node == rb_first(blocks)) |
2793 | err = ret; | ||
2473 | goto out; | 2794 | goto out; |
2474 | } | 2795 | } |
2475 | remove_backref_node(cache, node); | ||
2476 | rb_node = rb_next(rb_node); | 2796 | rb_node = rb_next(rb_node); |
2477 | } | 2797 | } |
2478 | 2798 | out: | |
2479 | if (level > 0) | ||
2480 | goto out; | ||
2481 | |||
2482 | free_block_list(blocks); | 2799 | free_block_list(blocks); |
2800 | err = finish_pending_nodes(trans, rc, path, err); | ||
2483 | 2801 | ||
2484 | /* | 2802 | btrfs_free_path(path); |
2485 | * now backrefs of some upper level tree blocks have been cached, | 2803 | return err; |
2486 | * try relocating blocks referenced by these upper level blocks. | 2804 | } |
2487 | */ | ||
2488 | while (1) { | ||
2489 | struct backref_node *upper = NULL; | ||
2490 | if (trans->transaction->in_commit || | ||
2491 | trans->transaction->delayed_refs.flushing) | ||
2492 | break; | ||
2493 | 2805 | ||
2494 | ret = add_adjacent_blocks(trans, rc, cache, blocks, level, | 2806 | static noinline_for_stack |
2495 | &upper); | 2807 | int prealloc_file_extent_cluster(struct inode *inode, |
2496 | if (ret < 0) | 2808 | struct file_extent_cluster *cluster) |
2497 | err = ret; | 2809 | { |
2498 | if (ret != 0) | 2810 | u64 alloc_hint = 0; |
2499 | break; | 2811 | u64 start; |
2812 | u64 end; | ||
2813 | u64 offset = BTRFS_I(inode)->index_cnt; | ||
2814 | u64 num_bytes; | ||
2815 | int nr = 0; | ||
2816 | int ret = 0; | ||
2500 | 2817 | ||
2501 | rb_node = rb_first(blocks); | 2818 | BUG_ON(cluster->start != cluster->boundary[0]); |
2502 | while (rb_node) { | 2819 | mutex_lock(&inode->i_mutex); |
2503 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
2504 | if (trans->transaction->in_commit || | ||
2505 | trans->transaction->delayed_refs.flushing) | ||
2506 | goto out; | ||
2507 | BUG_ON(!block->key_ready); | ||
2508 | node = build_backref_tree(rc, cache, &block->key, | ||
2509 | level, block->bytenr); | ||
2510 | if (IS_ERR(node)) { | ||
2511 | err = PTR_ERR(node); | ||
2512 | goto out; | ||
2513 | } | ||
2514 | 2820 | ||
2515 | ret = relocate_tree_block(trans, rc, node, | 2821 | ret = btrfs_check_data_free_space(inode, cluster->end + |
2516 | &block->key, path); | 2822 | 1 - cluster->start); |
2517 | if (ret < 0) { | 2823 | if (ret) |
2518 | err = ret; | 2824 | goto out; |
2519 | goto out; | ||
2520 | } | ||
2521 | remove_backref_node(cache, node); | ||
2522 | rb_node = rb_next(rb_node); | ||
2523 | } | ||
2524 | free_block_list(blocks); | ||
2525 | 2825 | ||
2526 | if (upper) { | 2826 | while (nr < cluster->nr) { |
2527 | ret = link_to_upper(trans, upper, path); | 2827 | start = cluster->boundary[nr] - offset; |
2528 | if (ret < 0) { | 2828 | if (nr + 1 < cluster->nr) |
2529 | err = ret; | 2829 | end = cluster->boundary[nr + 1] - 1 - offset; |
2530 | break; | 2830 | else |
2531 | } | 2831 | end = cluster->end - offset; |
2532 | remove_backref_node(cache, upper); | 2832 | |
2533 | } | 2833 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); |
2834 | num_bytes = end + 1 - start; | ||
2835 | ret = btrfs_prealloc_file_range(inode, 0, start, | ||
2836 | num_bytes, num_bytes, | ||
2837 | end + 1, &alloc_hint); | ||
2838 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2839 | if (ret) | ||
2840 | break; | ||
2841 | nr++; | ||
2534 | } | 2842 | } |
2843 | btrfs_free_reserved_data_space(inode, cluster->end + | ||
2844 | 1 - cluster->start); | ||
2535 | out: | 2845 | out: |
2536 | free_block_list(blocks); | 2846 | mutex_unlock(&inode->i_mutex); |
2537 | 2847 | return ret; | |
2538 | ret = finish_pending_nodes(trans, cache, path); | ||
2539 | if (ret < 0) | ||
2540 | err = ret; | ||
2541 | |||
2542 | kfree(cache); | ||
2543 | btrfs_free_path(path); | ||
2544 | return err; | ||
2545 | } | 2848 | } |
2546 | 2849 | ||
2547 | static noinline_for_stack | 2850 | static noinline_for_stack |
@@ -2587,7 +2890,6 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2587 | u64 offset = BTRFS_I(inode)->index_cnt; | 2890 | u64 offset = BTRFS_I(inode)->index_cnt; |
2588 | unsigned long index; | 2891 | unsigned long index; |
2589 | unsigned long last_index; | 2892 | unsigned long last_index; |
2590 | unsigned int dirty_page = 0; | ||
2591 | struct page *page; | 2893 | struct page *page; |
2592 | struct file_ra_state *ra; | 2894 | struct file_ra_state *ra; |
2593 | int nr = 0; | 2895 | int nr = 0; |
@@ -2600,21 +2902,24 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2600 | if (!ra) | 2902 | if (!ra) |
2601 | return -ENOMEM; | 2903 | return -ENOMEM; |
2602 | 2904 | ||
2603 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | 2905 | ret = prealloc_file_extent_cluster(inode, cluster); |
2604 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | 2906 | if (ret) |
2907 | goto out; | ||
2605 | 2908 | ||
2606 | mutex_lock(&inode->i_mutex); | 2909 | file_ra_state_init(ra, inode->i_mapping); |
2607 | 2910 | ||
2608 | i_size_write(inode, cluster->end + 1 - offset); | ||
2609 | ret = setup_extent_mapping(inode, cluster->start - offset, | 2911 | ret = setup_extent_mapping(inode, cluster->start - offset, |
2610 | cluster->end - offset, cluster->start); | 2912 | cluster->end - offset, cluster->start); |
2611 | if (ret) | 2913 | if (ret) |
2612 | goto out_unlock; | 2914 | goto out; |
2613 | |||
2614 | file_ra_state_init(ra, inode->i_mapping); | ||
2615 | 2915 | ||
2616 | WARN_ON(cluster->start != cluster->boundary[0]); | 2916 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; |
2917 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
2617 | while (index <= last_index) { | 2918 | while (index <= last_index) { |
2919 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); | ||
2920 | if (ret) | ||
2921 | goto out; | ||
2922 | |||
2618 | page = find_lock_page(inode->i_mapping, index); | 2923 | page = find_lock_page(inode->i_mapping, index); |
2619 | if (!page) { | 2924 | if (!page) { |
2620 | page_cache_sync_readahead(inode->i_mapping, | 2925 | page_cache_sync_readahead(inode->i_mapping, |
@@ -2622,8 +2927,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2622 | last_index + 1 - index); | 2927 | last_index + 1 - index); |
2623 | page = grab_cache_page(inode->i_mapping, index); | 2928 | page = grab_cache_page(inode->i_mapping, index); |
2624 | if (!page) { | 2929 | if (!page) { |
2930 | btrfs_delalloc_release_metadata(inode, | ||
2931 | PAGE_CACHE_SIZE); | ||
2625 | ret = -ENOMEM; | 2932 | ret = -ENOMEM; |
2626 | goto out_unlock; | 2933 | goto out; |
2627 | } | 2934 | } |
2628 | } | 2935 | } |
2629 | 2936 | ||
@@ -2639,8 +2946,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2639 | if (!PageUptodate(page)) { | 2946 | if (!PageUptodate(page)) { |
2640 | unlock_page(page); | 2947 | unlock_page(page); |
2641 | page_cache_release(page); | 2948 | page_cache_release(page); |
2949 | btrfs_delalloc_release_metadata(inode, | ||
2950 | PAGE_CACHE_SIZE); | ||
2642 | ret = -EIO; | 2951 | ret = -EIO; |
2643 | goto out_unlock; | 2952 | goto out; |
2644 | } | 2953 | } |
2645 | } | 2954 | } |
2646 | 2955 | ||
@@ -2659,10 +2968,9 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2659 | EXTENT_BOUNDARY, GFP_NOFS); | 2968 | EXTENT_BOUNDARY, GFP_NOFS); |
2660 | nr++; | 2969 | nr++; |
2661 | } | 2970 | } |
2662 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
2663 | 2971 | ||
2972 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
2664 | set_page_dirty(page); | 2973 | set_page_dirty(page); |
2665 | dirty_page++; | ||
2666 | 2974 | ||
2667 | unlock_extent(&BTRFS_I(inode)->io_tree, | 2975 | unlock_extent(&BTRFS_I(inode)->io_tree, |
2668 | page_start, page_end, GFP_NOFS); | 2976 | page_start, page_end, GFP_NOFS); |
@@ -2670,20 +2978,11 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2670 | page_cache_release(page); | 2978 | page_cache_release(page); |
2671 | 2979 | ||
2672 | index++; | 2980 | index++; |
2673 | if (nr < cluster->nr && | 2981 | balance_dirty_pages_ratelimited(inode->i_mapping); |
2674 | page_end + 1 + offset == cluster->boundary[nr]) { | 2982 | btrfs_throttle(BTRFS_I(inode)->root); |
2675 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2676 | dirty_page); | ||
2677 | dirty_page = 0; | ||
2678 | } | ||
2679 | } | ||
2680 | if (dirty_page) { | ||
2681 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2682 | dirty_page); | ||
2683 | } | 2983 | } |
2684 | WARN_ON(nr != cluster->nr); | 2984 | WARN_ON(nr != cluster->nr); |
2685 | out_unlock: | 2985 | out: |
2686 | mutex_unlock(&inode->i_mutex); | ||
2687 | kfree(ra); | 2986 | kfree(ra); |
2688 | return ret; | 2987 | return ret; |
2689 | } | 2988 | } |
@@ -2795,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc, | |||
2795 | BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); | 3094 | BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); |
2796 | ret = get_ref_objectid_v0(rc, path, extent_key, | 3095 | ret = get_ref_objectid_v0(rc, path, extent_key, |
2797 | &ref_owner, NULL); | 3096 | &ref_owner, NULL); |
3097 | if (ret < 0) | ||
3098 | return ret; | ||
2798 | BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); | 3099 | BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); |
2799 | level = (int)ref_owner; | 3100 | level = (int)ref_owner; |
2800 | /* FIXME: get real generation */ | 3101 | /* FIXME: get real generation */ |
@@ -2869,9 +3170,6 @@ out: | |||
2869 | static int block_use_full_backref(struct reloc_control *rc, | 3170 | static int block_use_full_backref(struct reloc_control *rc, |
2870 | struct extent_buffer *eb) | 3171 | struct extent_buffer *eb) |
2871 | { | 3172 | { |
2872 | struct btrfs_path *path; | ||
2873 | struct btrfs_extent_item *ei; | ||
2874 | struct btrfs_key key; | ||
2875 | u64 flags; | 3173 | u64 flags; |
2876 | int ret; | 3174 | int ret; |
2877 | 3175 | ||
@@ -2879,28 +3177,62 @@ static int block_use_full_backref(struct reloc_control *rc, | |||
2879 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) | 3177 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) |
2880 | return 1; | 3178 | return 1; |
2881 | 3179 | ||
2882 | path = btrfs_alloc_path(); | 3180 | ret = btrfs_lookup_extent_info(NULL, rc->extent_root, |
2883 | BUG_ON(!path); | 3181 | eb->start, eb->len, NULL, &flags); |
2884 | |||
2885 | key.objectid = eb->start; | ||
2886 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2887 | key.offset = eb->len; | ||
2888 | |||
2889 | path->search_commit_root = 1; | ||
2890 | path->skip_locking = 1; | ||
2891 | ret = btrfs_search_slot(NULL, rc->extent_root, | ||
2892 | &key, path, 0, 0); | ||
2893 | BUG_ON(ret); | 3182 | BUG_ON(ret); |
2894 | 3183 | ||
2895 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2896 | struct btrfs_extent_item); | ||
2897 | flags = btrfs_extent_flags(path->nodes[0], ei); | ||
2898 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
2899 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) | 3184 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
2900 | ret = 1; | 3185 | ret = 1; |
2901 | else | 3186 | else |
2902 | ret = 0; | 3187 | ret = 0; |
3188 | return ret; | ||
3189 | } | ||
3190 | |||
3191 | static int delete_block_group_cache(struct btrfs_fs_info *fs_info, | ||
3192 | struct inode *inode, u64 ino) | ||
3193 | { | ||
3194 | struct btrfs_key key; | ||
3195 | struct btrfs_path *path; | ||
3196 | struct btrfs_root *root = fs_info->tree_root; | ||
3197 | struct btrfs_trans_handle *trans; | ||
3198 | unsigned long nr; | ||
3199 | int ret = 0; | ||
3200 | |||
3201 | if (inode) | ||
3202 | goto truncate; | ||
3203 | |||
3204 | key.objectid = ino; | ||
3205 | key.type = BTRFS_INODE_ITEM_KEY; | ||
3206 | key.offset = 0; | ||
3207 | |||
3208 | inode = btrfs_iget(fs_info->sb, &key, root, NULL); | ||
3209 | if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { | ||
3210 | if (inode && !IS_ERR(inode)) | ||
3211 | iput(inode); | ||
3212 | return -ENOENT; | ||
3213 | } | ||
3214 | |||
3215 | truncate: | ||
3216 | path = btrfs_alloc_path(); | ||
3217 | if (!path) { | ||
3218 | ret = -ENOMEM; | ||
3219 | goto out; | ||
3220 | } | ||
3221 | |||
3222 | trans = btrfs_join_transaction(root, 0); | ||
3223 | if (IS_ERR(trans)) { | ||
3224 | btrfs_free_path(path); | ||
3225 | goto out; | ||
3226 | } | ||
3227 | |||
3228 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); | ||
3229 | |||
2903 | btrfs_free_path(path); | 3230 | btrfs_free_path(path); |
3231 | nr = trans->blocks_used; | ||
3232 | btrfs_end_transaction(trans, root); | ||
3233 | btrfs_btree_balance_dirty(root, nr); | ||
3234 | out: | ||
3235 | iput(inode); | ||
2904 | return ret; | 3236 | return ret; |
2905 | } | 3237 | } |
2906 | 3238 | ||
@@ -2930,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc, | |||
2930 | int counted; | 3262 | int counted; |
2931 | int ret; | 3263 | int ret; |
2932 | 3264 | ||
2933 | path = btrfs_alloc_path(); | ||
2934 | if (!path) | ||
2935 | return -ENOMEM; | ||
2936 | |||
2937 | ref_root = btrfs_extent_data_ref_root(leaf, ref); | 3265 | ref_root = btrfs_extent_data_ref_root(leaf, ref); |
2938 | ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); | 3266 | ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); |
2939 | ref_offset = btrfs_extent_data_ref_offset(leaf, ref); | 3267 | ref_offset = btrfs_extent_data_ref_offset(leaf, ref); |
2940 | ref_count = btrfs_extent_data_ref_count(leaf, ref); | 3268 | ref_count = btrfs_extent_data_ref_count(leaf, ref); |
2941 | 3269 | ||
3270 | /* | ||
3271 | * This is an extent belonging to the free space cache, lets just delete | ||
3272 | * it and redo the search. | ||
3273 | */ | ||
3274 | if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { | ||
3275 | ret = delete_block_group_cache(rc->extent_root->fs_info, | ||
3276 | NULL, ref_objectid); | ||
3277 | if (ret != -ENOENT) | ||
3278 | return ret; | ||
3279 | ret = 0; | ||
3280 | } | ||
3281 | |||
3282 | path = btrfs_alloc_path(); | ||
3283 | if (!path) | ||
3284 | return -ENOMEM; | ||
3285 | |||
2942 | root = read_fs_root(rc->extent_root->fs_info, ref_root); | 3286 | root = read_fs_root(rc->extent_root->fs_info, ref_root); |
2943 | if (IS_ERR(root)) { | 3287 | if (IS_ERR(root)) { |
2944 | err = PTR_ERR(root); | 3288 | err = PTR_ERR(root); |
@@ -3073,22 +3417,10 @@ int add_data_references(struct reloc_control *rc, | |||
3073 | struct btrfs_extent_inline_ref *iref; | 3417 | struct btrfs_extent_inline_ref *iref; |
3074 | unsigned long ptr; | 3418 | unsigned long ptr; |
3075 | unsigned long end; | 3419 | unsigned long end; |
3076 | u32 blocksize; | 3420 | u32 blocksize = btrfs_level_size(rc->extent_root, 0); |
3077 | int ret; | 3421 | int ret; |
3078 | int err = 0; | 3422 | int err = 0; |
3079 | 3423 | ||
3080 | ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, | ||
3081 | extent_key->offset); | ||
3082 | BUG_ON(ret < 0); | ||
3083 | if (ret > 0) { | ||
3084 | /* the relocated data is fragmented */ | ||
3085 | rc->extents_skipped++; | ||
3086 | btrfs_release_path(rc->extent_root, path); | ||
3087 | return 0; | ||
3088 | } | ||
3089 | |||
3090 | blocksize = btrfs_level_size(rc->extent_root, 0); | ||
3091 | |||
3092 | eb = path->nodes[0]; | 3424 | eb = path->nodes[0]; |
3093 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); | 3425 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); |
3094 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); | 3426 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); |
@@ -3169,7 +3501,8 @@ int add_data_references(struct reloc_control *rc, | |||
3169 | */ | 3501 | */ |
3170 | static noinline_for_stack | 3502 | static noinline_for_stack |
3171 | int find_next_extent(struct btrfs_trans_handle *trans, | 3503 | int find_next_extent(struct btrfs_trans_handle *trans, |
3172 | struct reloc_control *rc, struct btrfs_path *path) | 3504 | struct reloc_control *rc, struct btrfs_path *path, |
3505 | struct btrfs_key *extent_key) | ||
3173 | { | 3506 | { |
3174 | struct btrfs_key key; | 3507 | struct btrfs_key key; |
3175 | struct extent_buffer *leaf; | 3508 | struct extent_buffer *leaf; |
@@ -3224,6 +3557,7 @@ next: | |||
3224 | rc->search_start = end + 1; | 3557 | rc->search_start = end + 1; |
3225 | } else { | 3558 | } else { |
3226 | rc->search_start = key.objectid + key.offset; | 3559 | rc->search_start = key.objectid + key.offset; |
3560 | memcpy(extent_key, &key, sizeof(key)); | ||
3227 | return 0; | 3561 | return 0; |
3228 | } | 3562 | } |
3229 | } | 3563 | } |
@@ -3261,12 +3595,47 @@ static int check_extent_flags(u64 flags) | |||
3261 | return 0; | 3595 | return 0; |
3262 | } | 3596 | } |
3263 | 3597 | ||
3598 | static noinline_for_stack | ||
3599 | int prepare_to_relocate(struct reloc_control *rc) | ||
3600 | { | ||
3601 | struct btrfs_trans_handle *trans; | ||
3602 | int ret; | ||
3603 | |||
3604 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | ||
3605 | if (!rc->block_rsv) | ||
3606 | return -ENOMEM; | ||
3607 | |||
3608 | /* | ||
3609 | * reserve some space for creating reloc trees. | ||
3610 | * btrfs_init_reloc_root will use them when there | ||
3611 | * is no reservation in transaction handle. | ||
3612 | */ | ||
3613 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | ||
3614 | rc->extent_root->nodesize * 256); | ||
3615 | if (ret) | ||
3616 | return ret; | ||
3617 | |||
3618 | rc->block_rsv->refill_used = 1; | ||
3619 | btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); | ||
3620 | |||
3621 | memset(&rc->cluster, 0, sizeof(rc->cluster)); | ||
3622 | rc->search_start = rc->block_group->key.objectid; | ||
3623 | rc->extents_found = 0; | ||
3624 | rc->nodes_relocated = 0; | ||
3625 | rc->merging_rsv_size = 0; | ||
3626 | |||
3627 | rc->create_reloc_tree = 1; | ||
3628 | set_reloc_control(rc); | ||
3629 | |||
3630 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
3631 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3632 | return 0; | ||
3633 | } | ||
3264 | 3634 | ||
3265 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3635 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
3266 | { | 3636 | { |
3267 | struct rb_root blocks = RB_ROOT; | 3637 | struct rb_root blocks = RB_ROOT; |
3268 | struct btrfs_key key; | 3638 | struct btrfs_key key; |
3269 | struct file_extent_cluster *cluster; | ||
3270 | struct btrfs_trans_handle *trans = NULL; | 3639 | struct btrfs_trans_handle *trans = NULL; |
3271 | struct btrfs_path *path; | 3640 | struct btrfs_path *path; |
3272 | struct btrfs_extent_item *ei; | 3641 | struct btrfs_extent_item *ei; |
@@ -3276,33 +3645,25 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3276 | int ret; | 3645 | int ret; |
3277 | int err = 0; | 3646 | int err = 0; |
3278 | 3647 | ||
3279 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
3280 | if (!cluster) | ||
3281 | return -ENOMEM; | ||
3282 | |||
3283 | path = btrfs_alloc_path(); | 3648 | path = btrfs_alloc_path(); |
3284 | if (!path) { | 3649 | if (!path) |
3285 | kfree(cluster); | ||
3286 | return -ENOMEM; | 3650 | return -ENOMEM; |
3287 | } | ||
3288 | 3651 | ||
3289 | rc->extents_found = 0; | 3652 | ret = prepare_to_relocate(rc); |
3290 | rc->extents_skipped = 0; | 3653 | if (ret) { |
3291 | 3654 | err = ret; | |
3292 | rc->search_start = rc->block_group->key.objectid; | 3655 | goto out_free; |
3293 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | 3656 | } |
3294 | GFP_NOFS); | ||
3295 | |||
3296 | rc->create_reloc_root = 1; | ||
3297 | set_reloc_control(rc); | ||
3298 | |||
3299 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3300 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3301 | 3657 | ||
3302 | while (1) { | 3658 | while (1) { |
3303 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3659 | trans = btrfs_start_transaction(rc->extent_root, 0); |
3660 | |||
3661 | if (update_backref_cache(trans, &rc->backref_cache)) { | ||
3662 | btrfs_end_transaction(trans, rc->extent_root); | ||
3663 | continue; | ||
3664 | } | ||
3304 | 3665 | ||
3305 | ret = find_next_extent(trans, rc, path); | 3666 | ret = find_next_extent(trans, rc, path, &key); |
3306 | if (ret < 0) | 3667 | if (ret < 0) |
3307 | err = ret; | 3668 | err = ret; |
3308 | if (ret != 0) | 3669 | if (ret != 0) |
@@ -3312,9 +3673,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3312 | 3673 | ||
3313 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3674 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3314 | struct btrfs_extent_item); | 3675 | struct btrfs_extent_item); |
3315 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | 3676 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); |
3316 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
3317 | path->slots[0]); | ||
3318 | if (item_size >= sizeof(*ei)) { | 3677 | if (item_size >= sizeof(*ei)) { |
3319 | flags = btrfs_extent_flags(path->nodes[0], ei); | 3678 | flags = btrfs_extent_flags(path->nodes[0], ei); |
3320 | ret = check_extent_flags(flags); | 3679 | ret = check_extent_flags(flags); |
@@ -3355,73 +3714,100 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3355 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 3714 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
3356 | ret = add_tree_block(rc, &key, path, &blocks); | 3715 | ret = add_tree_block(rc, &key, path, &blocks); |
3357 | } else if (rc->stage == UPDATE_DATA_PTRS && | 3716 | } else if (rc->stage == UPDATE_DATA_PTRS && |
3358 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3717 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3359 | ret = add_data_references(rc, &key, path, &blocks); | 3718 | ret = add_data_references(rc, &key, path, &blocks); |
3360 | } else { | 3719 | } else { |
3361 | btrfs_release_path(rc->extent_root, path); | 3720 | btrfs_release_path(rc->extent_root, path); |
3362 | ret = 0; | 3721 | ret = 0; |
3363 | } | 3722 | } |
3364 | if (ret < 0) { | 3723 | if (ret < 0) { |
3365 | err = 0; | 3724 | err = ret; |
3366 | break; | 3725 | break; |
3367 | } | 3726 | } |
3368 | 3727 | ||
3369 | if (!RB_EMPTY_ROOT(&blocks)) { | 3728 | if (!RB_EMPTY_ROOT(&blocks)) { |
3370 | ret = relocate_tree_blocks(trans, rc, &blocks); | 3729 | ret = relocate_tree_blocks(trans, rc, &blocks); |
3371 | if (ret < 0) { | 3730 | if (ret < 0) { |
3731 | if (ret != -EAGAIN) { | ||
3732 | err = ret; | ||
3733 | break; | ||
3734 | } | ||
3735 | rc->extents_found--; | ||
3736 | rc->search_start = key.objectid; | ||
3737 | } | ||
3738 | } | ||
3739 | |||
3740 | ret = btrfs_block_rsv_check(trans, rc->extent_root, | ||
3741 | rc->block_rsv, 0, 5); | ||
3742 | if (ret < 0) { | ||
3743 | if (ret != -EAGAIN) { | ||
3372 | err = ret; | 3744 | err = ret; |
3745 | WARN_ON(1); | ||
3373 | break; | 3746 | break; |
3374 | } | 3747 | } |
3748 | rc->commit_transaction = 1; | ||
3375 | } | 3749 | } |
3376 | 3750 | ||
3377 | nr = trans->blocks_used; | 3751 | if (rc->commit_transaction) { |
3378 | btrfs_end_transaction(trans, rc->extent_root); | 3752 | rc->commit_transaction = 0; |
3753 | ret = btrfs_commit_transaction(trans, rc->extent_root); | ||
3754 | BUG_ON(ret); | ||
3755 | } else { | ||
3756 | nr = trans->blocks_used; | ||
3757 | btrfs_end_transaction_throttle(trans, rc->extent_root); | ||
3758 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3759 | } | ||
3379 | trans = NULL; | 3760 | trans = NULL; |
3380 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3381 | 3761 | ||
3382 | if (rc->stage == MOVE_DATA_EXTENTS && | 3762 | if (rc->stage == MOVE_DATA_EXTENTS && |
3383 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3763 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3384 | rc->found_file_extent = 1; | 3764 | rc->found_file_extent = 1; |
3385 | ret = relocate_data_extent(rc->data_inode, | 3765 | ret = relocate_data_extent(rc->data_inode, |
3386 | &key, cluster); | 3766 | &key, &rc->cluster); |
3387 | if (ret < 0) { | 3767 | if (ret < 0) { |
3388 | err = ret; | 3768 | err = ret; |
3389 | break; | 3769 | break; |
3390 | } | 3770 | } |
3391 | } | 3771 | } |
3392 | } | 3772 | } |
3393 | btrfs_free_path(path); | 3773 | |
3774 | btrfs_release_path(rc->extent_root, path); | ||
3775 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
3776 | GFP_NOFS); | ||
3394 | 3777 | ||
3395 | if (trans) { | 3778 | if (trans) { |
3396 | nr = trans->blocks_used; | 3779 | nr = trans->blocks_used; |
3397 | btrfs_end_transaction(trans, rc->extent_root); | 3780 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
3398 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3781 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3399 | } | 3782 | } |
3400 | 3783 | ||
3401 | if (!err) { | 3784 | if (!err) { |
3402 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | 3785 | ret = relocate_file_extent_cluster(rc->data_inode, |
3786 | &rc->cluster); | ||
3403 | if (ret < 0) | 3787 | if (ret < 0) |
3404 | err = ret; | 3788 | err = ret; |
3405 | } | 3789 | } |
3406 | 3790 | ||
3407 | kfree(cluster); | 3791 | rc->create_reloc_tree = 0; |
3792 | set_reloc_control(rc); | ||
3408 | 3793 | ||
3409 | rc->create_reloc_root = 0; | 3794 | backref_cache_cleanup(&rc->backref_cache); |
3410 | smp_mb(); | 3795 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
3411 | 3796 | ||
3412 | if (rc->extents_found > 0) { | 3797 | err = prepare_to_merge(rc, err); |
3413 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3414 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3415 | } | ||
3416 | 3798 | ||
3417 | merge_reloc_roots(rc); | 3799 | merge_reloc_roots(rc); |
3418 | 3800 | ||
3801 | rc->merge_reloc_tree = 0; | ||
3419 | unset_reloc_control(rc); | 3802 | unset_reloc_control(rc); |
3803 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | ||
3420 | 3804 | ||
3421 | /* get rid of pinned extents */ | 3805 | /* get rid of pinned extents */ |
3422 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3806 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3423 | btrfs_commit_transaction(trans, rc->extent_root); | 3807 | btrfs_commit_transaction(trans, rc->extent_root); |
3424 | 3808 | out_free: | |
3809 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | ||
3810 | btrfs_free_path(path); | ||
3425 | return err; | 3811 | return err; |
3426 | } | 3812 | } |
3427 | 3813 | ||
@@ -3447,7 +3833,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
3447 | btrfs_set_inode_generation(leaf, item, 1); | 3833 | btrfs_set_inode_generation(leaf, item, 1); |
3448 | btrfs_set_inode_size(leaf, item, 0); | 3834 | btrfs_set_inode_size(leaf, item, 0); |
3449 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3835 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
3450 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3836 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
3837 | BTRFS_INODE_PREALLOC); | ||
3451 | btrfs_mark_buffer_dirty(leaf); | 3838 | btrfs_mark_buffer_dirty(leaf); |
3452 | btrfs_release_path(root, path); | 3839 | btrfs_release_path(root, path); |
3453 | out: | 3840 | out: |
@@ -3459,8 +3846,9 @@ out: | |||
3459 | * helper to create inode for data relocation. | 3846 | * helper to create inode for data relocation. |
3460 | * the inode is in data relocation tree and its link count is 0 | 3847 | * the inode is in data relocation tree and its link count is 0 |
3461 | */ | 3848 | */ |
3462 | static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | 3849 | static noinline_for_stack |
3463 | struct btrfs_block_group_cache *group) | 3850 | struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, |
3851 | struct btrfs_block_group_cache *group) | ||
3464 | { | 3852 | { |
3465 | struct inode *inode = NULL; | 3853 | struct inode *inode = NULL; |
3466 | struct btrfs_trans_handle *trans; | 3854 | struct btrfs_trans_handle *trans; |
@@ -3474,8 +3862,9 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3474 | if (IS_ERR(root)) | 3862 | if (IS_ERR(root)) |
3475 | return ERR_CAST(root); | 3863 | return ERR_CAST(root); |
3476 | 3864 | ||
3477 | trans = btrfs_start_transaction(root, 1); | 3865 | trans = btrfs_start_transaction(root, 6); |
3478 | BUG_ON(!trans); | 3866 | if (IS_ERR(trans)) |
3867 | return ERR_CAST(trans); | ||
3479 | 3868 | ||
3480 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 3869 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); |
3481 | if (err) | 3870 | if (err) |
@@ -3487,7 +3876,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3487 | key.objectid = objectid; | 3876 | key.objectid = objectid; |
3488 | key.type = BTRFS_INODE_ITEM_KEY; | 3877 | key.type = BTRFS_INODE_ITEM_KEY; |
3489 | key.offset = 0; | 3878 | key.offset = 0; |
3490 | inode = btrfs_iget(root->fs_info->sb, &key, root); | 3879 | inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); |
3491 | BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); | 3880 | BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); |
3492 | BTRFS_I(inode)->index_cnt = group->key.objectid; | 3881 | BTRFS_I(inode)->index_cnt = group->key.objectid; |
3493 | 3882 | ||
@@ -3495,7 +3884,6 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3495 | out: | 3884 | out: |
3496 | nr = trans->blocks_used; | 3885 | nr = trans->blocks_used; |
3497 | btrfs_end_transaction(trans, root); | 3886 | btrfs_end_transaction(trans, root); |
3498 | |||
3499 | btrfs_btree_balance_dirty(root, nr); | 3887 | btrfs_btree_balance_dirty(root, nr); |
3500 | if (err) { | 3888 | if (err) { |
3501 | if (inode) | 3889 | if (inode) |
@@ -3505,6 +3893,21 @@ out: | |||
3505 | return inode; | 3893 | return inode; |
3506 | } | 3894 | } |
3507 | 3895 | ||
3896 | static struct reloc_control *alloc_reloc_control(void) | ||
3897 | { | ||
3898 | struct reloc_control *rc; | ||
3899 | |||
3900 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
3901 | if (!rc) | ||
3902 | return NULL; | ||
3903 | |||
3904 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3905 | backref_cache_init(&rc->backref_cache); | ||
3906 | mapping_tree_init(&rc->reloc_root_tree); | ||
3907 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3908 | return rc; | ||
3909 | } | ||
3910 | |||
3508 | /* | 3911 | /* |
3509 | * function to relocate all extents in a block group. | 3912 | * function to relocate all extents in a block group. |
3510 | */ | 3913 | */ |
@@ -3512,25 +3915,49 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3512 | { | 3915 | { |
3513 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3916 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3514 | struct reloc_control *rc; | 3917 | struct reloc_control *rc; |
3918 | struct inode *inode; | ||
3919 | struct btrfs_path *path; | ||
3515 | int ret; | 3920 | int ret; |
3921 | int rw = 0; | ||
3516 | int err = 0; | 3922 | int err = 0; |
3517 | 3923 | ||
3518 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 3924 | rc = alloc_reloc_control(); |
3519 | if (!rc) | 3925 | if (!rc) |
3520 | return -ENOMEM; | 3926 | return -ENOMEM; |
3521 | 3927 | ||
3522 | mapping_tree_init(&rc->reloc_root_tree); | 3928 | rc->extent_root = extent_root; |
3523 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3524 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3525 | 3929 | ||
3526 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); | 3930 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); |
3527 | BUG_ON(!rc->block_group); | 3931 | BUG_ON(!rc->block_group); |
3528 | 3932 | ||
3529 | btrfs_init_workers(&rc->workers, "relocate", | 3933 | if (!rc->block_group->ro) { |
3530 | fs_info->thread_pool_size, NULL); | 3934 | ret = btrfs_set_block_group_ro(extent_root, rc->block_group); |
3935 | if (ret) { | ||
3936 | err = ret; | ||
3937 | goto out; | ||
3938 | } | ||
3939 | rw = 1; | ||
3940 | } | ||
3531 | 3941 | ||
3532 | rc->extent_root = extent_root; | 3942 | path = btrfs_alloc_path(); |
3533 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3943 | if (!path) { |
3944 | err = -ENOMEM; | ||
3945 | goto out; | ||
3946 | } | ||
3947 | |||
3948 | inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, | ||
3949 | path); | ||
3950 | btrfs_free_path(path); | ||
3951 | |||
3952 | if (!IS_ERR(inode)) | ||
3953 | ret = delete_block_group_cache(fs_info, inode, 0); | ||
3954 | else | ||
3955 | ret = PTR_ERR(inode); | ||
3956 | |||
3957 | if (ret && ret != -ENOENT) { | ||
3958 | err = ret; | ||
3959 | goto out; | ||
3960 | } | ||
3534 | 3961 | ||
3535 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); | 3962 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); |
3536 | if (IS_ERR(rc->data_inode)) { | 3963 | if (IS_ERR(rc->data_inode)) { |
@@ -3547,9 +3974,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3547 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 3974 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); |
3548 | 3975 | ||
3549 | while (1) { | 3976 | while (1) { |
3550 | rc->extents_found = 0; | ||
3551 | rc->extents_skipped = 0; | ||
3552 | |||
3553 | mutex_lock(&fs_info->cleaner_mutex); | 3977 | mutex_lock(&fs_info->cleaner_mutex); |
3554 | 3978 | ||
3555 | btrfs_clean_old_snapshots(fs_info->tree_root); | 3979 | btrfs_clean_old_snapshots(fs_info->tree_root); |
@@ -3558,7 +3982,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3558 | mutex_unlock(&fs_info->cleaner_mutex); | 3982 | mutex_unlock(&fs_info->cleaner_mutex); |
3559 | if (ret < 0) { | 3983 | if (ret < 0) { |
3560 | err = ret; | 3984 | err = ret; |
3561 | break; | 3985 | goto out; |
3562 | } | 3986 | } |
3563 | 3987 | ||
3564 | if (rc->extents_found == 0) | 3988 | if (rc->extents_found == 0) |
@@ -3572,18 +3996,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3572 | invalidate_mapping_pages(rc->data_inode->i_mapping, | 3996 | invalidate_mapping_pages(rc->data_inode->i_mapping, |
3573 | 0, -1); | 3997 | 0, -1); |
3574 | rc->stage = UPDATE_DATA_PTRS; | 3998 | rc->stage = UPDATE_DATA_PTRS; |
3575 | } else if (rc->stage == UPDATE_DATA_PTRS && | ||
3576 | rc->extents_skipped >= rc->extents_found) { | ||
3577 | iput(rc->data_inode); | ||
3578 | rc->data_inode = create_reloc_inode(fs_info, | ||
3579 | rc->block_group); | ||
3580 | if (IS_ERR(rc->data_inode)) { | ||
3581 | err = PTR_ERR(rc->data_inode); | ||
3582 | rc->data_inode = NULL; | ||
3583 | break; | ||
3584 | } | ||
3585 | rc->stage = MOVE_DATA_EXTENTS; | ||
3586 | rc->found_file_extent = 0; | ||
3587 | } | 3999 | } |
3588 | } | 4000 | } |
3589 | 4001 | ||
@@ -3596,8 +4008,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3596 | WARN_ON(rc->block_group->reserved > 0); | 4008 | WARN_ON(rc->block_group->reserved > 0); |
3597 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); | 4009 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); |
3598 | out: | 4010 | out: |
4011 | if (err && rw) | ||
4012 | btrfs_set_block_group_rw(extent_root, rc->block_group); | ||
3599 | iput(rc->data_inode); | 4013 | iput(rc->data_inode); |
3600 | btrfs_stop_workers(&rc->workers); | ||
3601 | btrfs_put_block_group(rc->block_group); | 4014 | btrfs_put_block_group(rc->block_group); |
3602 | kfree(rc); | 4015 | kfree(rc); |
3603 | return err; | 4016 | return err; |
@@ -3608,7 +4021,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
3608 | struct btrfs_trans_handle *trans; | 4021 | struct btrfs_trans_handle *trans; |
3609 | int ret; | 4022 | int ret; |
3610 | 4023 | ||
3611 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | 4024 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
3612 | 4025 | ||
3613 | memset(&root->root_item.drop_progress, 0, | 4026 | memset(&root->root_item.drop_progress, 0, |
3614 | sizeof(root->root_item.drop_progress)); | 4027 | sizeof(root->root_item.drop_progress)); |
@@ -3701,20 +4114,20 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3701 | if (list_empty(&reloc_roots)) | 4114 | if (list_empty(&reloc_roots)) |
3702 | goto out; | 4115 | goto out; |
3703 | 4116 | ||
3704 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 4117 | rc = alloc_reloc_control(); |
3705 | if (!rc) { | 4118 | if (!rc) { |
3706 | err = -ENOMEM; | 4119 | err = -ENOMEM; |
3707 | goto out; | 4120 | goto out; |
3708 | } | 4121 | } |
3709 | 4122 | ||
3710 | mapping_tree_init(&rc->reloc_root_tree); | ||
3711 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3712 | btrfs_init_workers(&rc->workers, "relocate", | ||
3713 | root->fs_info->thread_pool_size, NULL); | ||
3714 | rc->extent_root = root->fs_info->extent_root; | 4123 | rc->extent_root = root->fs_info->extent_root; |
3715 | 4124 | ||
3716 | set_reloc_control(rc); | 4125 | set_reloc_control(rc); |
3717 | 4126 | ||
4127 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
4128 | |||
4129 | rc->merge_reloc_tree = 1; | ||
4130 | |||
3718 | while (!list_empty(&reloc_roots)) { | 4131 | while (!list_empty(&reloc_roots)) { |
3719 | reloc_root = list_entry(reloc_roots.next, | 4132 | reloc_root = list_entry(reloc_roots.next, |
3720 | struct btrfs_root, root_list); | 4133 | struct btrfs_root, root_list); |
@@ -3734,20 +4147,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3734 | fs_root->reloc_root = reloc_root; | 4147 | fs_root->reloc_root = reloc_root; |
3735 | } | 4148 | } |
3736 | 4149 | ||
3737 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3738 | btrfs_commit_transaction(trans, rc->extent_root); | 4150 | btrfs_commit_transaction(trans, rc->extent_root); |
3739 | 4151 | ||
3740 | merge_reloc_roots(rc); | 4152 | merge_reloc_roots(rc); |
3741 | 4153 | ||
3742 | unset_reloc_control(rc); | 4154 | unset_reloc_control(rc); |
3743 | 4155 | ||
3744 | trans = btrfs_start_transaction(rc->extent_root, 1); | 4156 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3745 | btrfs_commit_transaction(trans, rc->extent_root); | 4157 | btrfs_commit_transaction(trans, rc->extent_root); |
3746 | out: | 4158 | out: |
3747 | if (rc) { | 4159 | kfree(rc); |
3748 | btrfs_stop_workers(&rc->workers); | ||
3749 | kfree(rc); | ||
3750 | } | ||
3751 | while (!list_empty(&reloc_roots)) { | 4160 | while (!list_empty(&reloc_roots)) { |
3752 | reloc_root = list_entry(reloc_roots.next, | 4161 | reloc_root = list_entry(reloc_roots.next, |
3753 | struct btrfs_root, root_list); | 4162 | struct btrfs_root, root_list); |
@@ -3764,7 +4173,8 @@ out: | |||
3764 | BTRFS_DATA_RELOC_TREE_OBJECTID); | 4173 | BTRFS_DATA_RELOC_TREE_OBJECTID); |
3765 | if (IS_ERR(fs_root)) | 4174 | if (IS_ERR(fs_root)) |
3766 | err = PTR_ERR(fs_root); | 4175 | err = PTR_ERR(fs_root); |
3767 | btrfs_orphan_cleanup(fs_root); | 4176 | else |
4177 | btrfs_orphan_cleanup(fs_root); | ||
3768 | } | 4178 | } |
3769 | return err; | 4179 | return err; |
3770 | } | 4180 | } |
@@ -3810,5 +4220,132 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
3810 | btrfs_add_ordered_sum(inode, ordered, sums); | 4220 | btrfs_add_ordered_sum(inode, ordered, sums); |
3811 | } | 4221 | } |
3812 | btrfs_put_ordered_extent(ordered); | 4222 | btrfs_put_ordered_extent(ordered); |
3813 | return 0; | 4223 | return ret; |
4224 | } | ||
4225 | |||
4226 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
4227 | struct btrfs_root *root, struct extent_buffer *buf, | ||
4228 | struct extent_buffer *cow) | ||
4229 | { | ||
4230 | struct reloc_control *rc; | ||
4231 | struct backref_node *node; | ||
4232 | int first_cow = 0; | ||
4233 | int level; | ||
4234 | int ret; | ||
4235 | |||
4236 | rc = root->fs_info->reloc_ctl; | ||
4237 | if (!rc) | ||
4238 | return; | ||
4239 | |||
4240 | BUG_ON(rc->stage == UPDATE_DATA_PTRS && | ||
4241 | root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); | ||
4242 | |||
4243 | level = btrfs_header_level(buf); | ||
4244 | if (btrfs_header_generation(buf) <= | ||
4245 | btrfs_root_last_snapshot(&root->root_item)) | ||
4246 | first_cow = 1; | ||
4247 | |||
4248 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && | ||
4249 | rc->create_reloc_tree) { | ||
4250 | WARN_ON(!first_cow && level == 0); | ||
4251 | |||
4252 | node = rc->backref_cache.path[level]; | ||
4253 | BUG_ON(node->bytenr != buf->start && | ||
4254 | node->new_bytenr != buf->start); | ||
4255 | |||
4256 | drop_node_buffer(node); | ||
4257 | extent_buffer_get(cow); | ||
4258 | node->eb = cow; | ||
4259 | node->new_bytenr = cow->start; | ||
4260 | |||
4261 | if (!node->pending) { | ||
4262 | list_move_tail(&node->list, | ||
4263 | &rc->backref_cache.pending[level]); | ||
4264 | node->pending = 1; | ||
4265 | } | ||
4266 | |||
4267 | if (first_cow) | ||
4268 | __mark_block_processed(rc, node); | ||
4269 | |||
4270 | if (first_cow && level > 0) | ||
4271 | rc->nodes_relocated += buf->len; | ||
4272 | } | ||
4273 | |||
4274 | if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { | ||
4275 | ret = replace_file_extents(trans, rc, root, cow); | ||
4276 | BUG_ON(ret); | ||
4277 | } | ||
4278 | } | ||
4279 | |||
4280 | /* | ||
4281 | * called before creating snapshot. it calculates metadata reservation | ||
4282 | * requried for relocating tree blocks in the snapshot | ||
4283 | */ | ||
4284 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
4285 | struct btrfs_pending_snapshot *pending, | ||
4286 | u64 *bytes_to_reserve) | ||
4287 | { | ||
4288 | struct btrfs_root *root; | ||
4289 | struct reloc_control *rc; | ||
4290 | |||
4291 | root = pending->root; | ||
4292 | if (!root->reloc_root) | ||
4293 | return; | ||
4294 | |||
4295 | rc = root->fs_info->reloc_ctl; | ||
4296 | if (!rc->merge_reloc_tree) | ||
4297 | return; | ||
4298 | |||
4299 | root = root->reloc_root; | ||
4300 | BUG_ON(btrfs_root_refs(&root->root_item) == 0); | ||
4301 | /* | ||
4302 | * relocation is in the stage of merging trees. the space | ||
4303 | * used by merging a reloc tree is twice the size of | ||
4304 | * relocated tree nodes in the worst case. half for cowing | ||
4305 | * the reloc tree, half for cowing the fs tree. the space | ||
4306 | * used by cowing the reloc tree will be freed after the | ||
4307 | * tree is dropped. if we create snapshot, cowing the fs | ||
4308 | * tree may use more space than it frees. so we need | ||
4309 | * reserve extra space. | ||
4310 | */ | ||
4311 | *bytes_to_reserve += rc->nodes_relocated; | ||
4312 | } | ||
4313 | |||
4314 | /* | ||
4315 | * called after snapshot is created. migrate block reservation | ||
4316 | * and create reloc root for the newly created snapshot | ||
4317 | */ | ||
4318 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
4319 | struct btrfs_pending_snapshot *pending) | ||
4320 | { | ||
4321 | struct btrfs_root *root = pending->root; | ||
4322 | struct btrfs_root *reloc_root; | ||
4323 | struct btrfs_root *new_root; | ||
4324 | struct reloc_control *rc; | ||
4325 | int ret; | ||
4326 | |||
4327 | if (!root->reloc_root) | ||
4328 | return; | ||
4329 | |||
4330 | rc = root->fs_info->reloc_ctl; | ||
4331 | rc->merging_rsv_size += rc->nodes_relocated; | ||
4332 | |||
4333 | if (rc->merge_reloc_tree) { | ||
4334 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
4335 | rc->block_rsv, | ||
4336 | rc->nodes_relocated); | ||
4337 | BUG_ON(ret); | ||
4338 | } | ||
4339 | |||
4340 | new_root = pending->snap; | ||
4341 | reloc_root = create_reloc_root(trans, root->reloc_root, | ||
4342 | new_root->root_key.objectid); | ||
4343 | |||
4344 | __add_reloc_root(reloc_root); | ||
4345 | new_root->reloc_root = reloc_root; | ||
4346 | |||
4347 | if (rc->create_reloc_tree) { | ||
4348 | ret = clone_backref_node(trans, rc, root, reloc_root); | ||
4349 | BUG_ON(ret); | ||
4350 | } | ||
3814 | } | 4351 | } |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 67fa2d29d663..6a1086e83ffc 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
181 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) | 181 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) |
182 | { | 182 | { |
183 | struct btrfs_root *dead_root; | 183 | struct btrfs_root *dead_root; |
184 | struct btrfs_item *item; | ||
185 | struct btrfs_root_item *ri; | 184 | struct btrfs_root_item *ri; |
186 | struct btrfs_key key; | 185 | struct btrfs_key key; |
187 | struct btrfs_key found_key; | 186 | struct btrfs_key found_key; |
@@ -214,7 +213,6 @@ again: | |||
214 | nritems = btrfs_header_nritems(leaf); | 213 | nritems = btrfs_header_nritems(leaf); |
215 | slot = path->slots[0]; | 214 | slot = path->slots[0]; |
216 | } | 215 | } |
217 | item = btrfs_item_nr(leaf, slot); | ||
218 | btrfs_item_key_to_cpu(leaf, &key, slot); | 216 | btrfs_item_key_to_cpu(leaf, &key, slot); |
219 | if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) | 217 | if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) |
220 | goto next; | 218 | goto next; |
@@ -259,6 +257,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
259 | struct extent_buffer *leaf; | 257 | struct extent_buffer *leaf; |
260 | struct btrfs_path *path; | 258 | struct btrfs_path *path; |
261 | struct btrfs_key key; | 259 | struct btrfs_key key; |
260 | struct btrfs_key root_key; | ||
261 | struct btrfs_root *root; | ||
262 | int err = 0; | 262 | int err = 0; |
263 | int ret; | 263 | int ret; |
264 | 264 | ||
@@ -270,6 +270,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
270 | key.type = BTRFS_ORPHAN_ITEM_KEY; | 270 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
271 | key.offset = 0; | 271 | key.offset = 0; |
272 | 272 | ||
273 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
274 | root_key.offset = (u64)-1; | ||
275 | |||
273 | while (1) { | 276 | while (1) { |
274 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | 277 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); |
275 | if (ret < 0) { | 278 | if (ret < 0) { |
@@ -294,13 +297,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
294 | key.type != BTRFS_ORPHAN_ITEM_KEY) | 297 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
295 | break; | 298 | break; |
296 | 299 | ||
297 | ret = btrfs_find_dead_roots(tree_root, key.offset); | 300 | root_key.objectid = key.offset; |
298 | if (ret) { | 301 | key.offset++; |
302 | |||
303 | root = btrfs_read_fs_root_no_name(tree_root->fs_info, | ||
304 | &root_key); | ||
305 | if (!IS_ERR(root)) | ||
306 | continue; | ||
307 | |||
308 | ret = PTR_ERR(root); | ||
309 | if (ret != -ENOENT) { | ||
299 | err = ret; | 310 | err = ret; |
300 | break; | 311 | break; |
301 | } | 312 | } |
302 | 313 | ||
303 | key.offset++; | 314 | ret = btrfs_find_dead_roots(tree_root, root_key.objectid); |
315 | if (ret) { | ||
316 | err = ret; | ||
317 | break; | ||
318 | } | ||
304 | } | 319 | } |
305 | 320 | ||
306 | btrfs_free_path(path); | 321 | btrfs_free_path(path); |
@@ -313,7 +328,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
313 | { | 328 | { |
314 | struct btrfs_path *path; | 329 | struct btrfs_path *path; |
315 | int ret; | 330 | int ret; |
316 | u32 refs; | ||
317 | struct btrfs_root_item *ri; | 331 | struct btrfs_root_item *ri; |
318 | struct extent_buffer *leaf; | 332 | struct extent_buffer *leaf; |
319 | 333 | ||
@@ -327,8 +341,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
327 | leaf = path->nodes[0]; | 341 | leaf = path->nodes[0]; |
328 | ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); | 342 | ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); |
329 | 343 | ||
330 | refs = btrfs_disk_root_refs(leaf, ri); | ||
331 | BUG_ON(refs != 0); | ||
332 | ret = btrfs_del_item(trans, root, path); | 344 | ret = btrfs_del_item(trans, root, path); |
333 | out: | 345 | out: |
334 | btrfs_free_path(path); | 346 | btrfs_free_path(path); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8a1ea6e64575..b2130c46fdb5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/namei.h> | 38 | #include <linux/namei.h> |
39 | #include <linux/miscdevice.h> | 39 | #include <linux/miscdevice.h> |
40 | #include <linux/magic.h> | 40 | #include <linux/magic.h> |
41 | #include <linux/slab.h> | ||
41 | #include "compat.h" | 42 | #include "compat.h" |
42 | #include "ctree.h" | 43 | #include "ctree.h" |
43 | #include "disk-io.h" | 44 | #include "disk-io.h" |
@@ -53,6 +54,90 @@ | |||
53 | 54 | ||
54 | static const struct super_operations btrfs_super_ops; | 55 | static const struct super_operations btrfs_super_ops; |
55 | 56 | ||
57 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | ||
58 | char nbuf[16]) | ||
59 | { | ||
60 | char *errstr = NULL; | ||
61 | |||
62 | switch (errno) { | ||
63 | case -EIO: | ||
64 | errstr = "IO failure"; | ||
65 | break; | ||
66 | case -ENOMEM: | ||
67 | errstr = "Out of memory"; | ||
68 | break; | ||
69 | case -EROFS: | ||
70 | errstr = "Readonly filesystem"; | ||
71 | break; | ||
72 | default: | ||
73 | if (nbuf) { | ||
74 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | ||
75 | errstr = nbuf; | ||
76 | } | ||
77 | break; | ||
78 | } | ||
79 | |||
80 | return errstr; | ||
81 | } | ||
82 | |||
83 | static void __save_error_info(struct btrfs_fs_info *fs_info) | ||
84 | { | ||
85 | /* | ||
86 | * today we only save the error info into ram. Long term we'll | ||
87 | * also send it down to the disk | ||
88 | */ | ||
89 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | ||
90 | } | ||
91 | |||
92 | /* NOTE: | ||
93 | * We move write_super stuff at umount in order to avoid deadlock | ||
94 | * for umount hold all lock. | ||
95 | */ | ||
96 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
97 | { | ||
98 | __save_error_info(fs_info); | ||
99 | } | ||
100 | |||
101 | /* btrfs handle error by forcing the filesystem readonly */ | ||
102 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | ||
103 | { | ||
104 | struct super_block *sb = fs_info->sb; | ||
105 | |||
106 | if (sb->s_flags & MS_RDONLY) | ||
107 | return; | ||
108 | |||
109 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
110 | sb->s_flags |= MS_RDONLY; | ||
111 | printk(KERN_INFO "btrfs is forced readonly\n"); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * __btrfs_std_error decodes expected errors from the caller and | ||
117 | * invokes the approciate error response. | ||
118 | */ | ||
119 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
120 | unsigned int line, int errno) | ||
121 | { | ||
122 | struct super_block *sb = fs_info->sb; | ||
123 | char nbuf[16]; | ||
124 | const char *errstr; | ||
125 | |||
126 | /* | ||
127 | * Special case: if the error is EROFS, and we're already | ||
128 | * under MS_RDONLY, then it is safe here. | ||
129 | */ | ||
130 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | ||
131 | return; | ||
132 | |||
133 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | ||
134 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | ||
135 | sb->s_id, function, line, errstr); | ||
136 | save_error_info(fs_info); | ||
137 | |||
138 | btrfs_handle_error(fs_info); | ||
139 | } | ||
140 | |||
56 | static void btrfs_put_super(struct super_block *sb) | 141 | static void btrfs_put_super(struct super_block *sb) |
57 | { | 142 | { |
58 | struct btrfs_root *root = btrfs_sb(sb); | 143 | struct btrfs_root *root = btrfs_sb(sb); |
@@ -60,30 +145,34 @@ static void btrfs_put_super(struct super_block *sb) | |||
60 | 145 | ||
61 | ret = close_ctree(root); | 146 | ret = close_ctree(root); |
62 | sb->s_fs_info = NULL; | 147 | sb->s_fs_info = NULL; |
148 | |||
149 | (void)ret; /* FIXME: need to fix VFS to return error? */ | ||
63 | } | 150 | } |
64 | 151 | ||
65 | enum { | 152 | enum { |
66 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 153 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
67 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 154 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
68 | Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, | 155 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
69 | Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, | 156 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
70 | Opt_flushoncommit, | 157 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
71 | Opt_discard, Opt_err, | 158 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, |
72 | }; | 159 | }; |
73 | 160 | ||
74 | static match_table_t tokens = { | 161 | static match_table_t tokens = { |
75 | {Opt_degraded, "degraded"}, | 162 | {Opt_degraded, "degraded"}, |
76 | {Opt_subvol, "subvol=%s"}, | 163 | {Opt_subvol, "subvol=%s"}, |
164 | {Opt_subvolid, "subvolid=%d"}, | ||
77 | {Opt_device, "device=%s"}, | 165 | {Opt_device, "device=%s"}, |
78 | {Opt_nodatasum, "nodatasum"}, | 166 | {Opt_nodatasum, "nodatasum"}, |
79 | {Opt_nodatacow, "nodatacow"}, | 167 | {Opt_nodatacow, "nodatacow"}, |
80 | {Opt_nobarrier, "nobarrier"}, | 168 | {Opt_nobarrier, "nobarrier"}, |
81 | {Opt_max_extent, "max_extent=%s"}, | ||
82 | {Opt_max_inline, "max_inline=%s"}, | 169 | {Opt_max_inline, "max_inline=%s"}, |
83 | {Opt_alloc_start, "alloc_start=%s"}, | 170 | {Opt_alloc_start, "alloc_start=%s"}, |
84 | {Opt_thread_pool, "thread_pool=%d"}, | 171 | {Opt_thread_pool, "thread_pool=%d"}, |
85 | {Opt_compress, "compress"}, | 172 | {Opt_compress, "compress"}, |
173 | {Opt_compress_type, "compress=%s"}, | ||
86 | {Opt_compress_force, "compress-force"}, | 174 | {Opt_compress_force, "compress-force"}, |
175 | {Opt_compress_force_type, "compress-force=%s"}, | ||
87 | {Opt_ssd, "ssd"}, | 176 | {Opt_ssd, "ssd"}, |
88 | {Opt_ssd_spread, "ssd_spread"}, | 177 | {Opt_ssd_spread, "ssd_spread"}, |
89 | {Opt_nossd, "nossd"}, | 178 | {Opt_nossd, "nossd"}, |
@@ -92,34 +181,12 @@ static match_table_t tokens = { | |||
92 | {Opt_flushoncommit, "flushoncommit"}, | 181 | {Opt_flushoncommit, "flushoncommit"}, |
93 | {Opt_ratio, "metadata_ratio=%d"}, | 182 | {Opt_ratio, "metadata_ratio=%d"}, |
94 | {Opt_discard, "discard"}, | 183 | {Opt_discard, "discard"}, |
184 | {Opt_space_cache, "space_cache"}, | ||
185 | {Opt_clear_cache, "clear_cache"}, | ||
186 | {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, | ||
95 | {Opt_err, NULL}, | 187 | {Opt_err, NULL}, |
96 | }; | 188 | }; |
97 | 189 | ||
98 | u64 btrfs_parse_size(char *str) | ||
99 | { | ||
100 | u64 res; | ||
101 | int mult = 1; | ||
102 | char *end; | ||
103 | char last; | ||
104 | |||
105 | res = simple_strtoul(str, &end, 10); | ||
106 | |||
107 | last = end[0]; | ||
108 | if (isalpha(last)) { | ||
109 | last = tolower(last); | ||
110 | switch (last) { | ||
111 | case 'g': | ||
112 | mult *= 1024; | ||
113 | case 'm': | ||
114 | mult *= 1024; | ||
115 | case 'k': | ||
116 | mult *= 1024; | ||
117 | } | ||
118 | res = res * mult; | ||
119 | } | ||
120 | return res; | ||
121 | } | ||
122 | |||
123 | /* | 190 | /* |
124 | * Regular mount options parser. Everything that is needed only when | 191 | * Regular mount options parser. Everything that is needed only when |
125 | * reading in a new superblock is parsed here. | 192 | * reading in a new superblock is parsed here. |
@@ -128,9 +195,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
128 | { | 195 | { |
129 | struct btrfs_fs_info *info = root->fs_info; | 196 | struct btrfs_fs_info *info = root->fs_info; |
130 | substring_t args[MAX_OPT_ARGS]; | 197 | substring_t args[MAX_OPT_ARGS]; |
131 | char *p, *num; | 198 | char *p, *num, *orig; |
132 | int intarg; | 199 | int intarg; |
133 | int ret = 0; | 200 | int ret = 0; |
201 | char *compress_type; | ||
202 | bool compress_force = false; | ||
134 | 203 | ||
135 | if (!options) | 204 | if (!options) |
136 | return 0; | 205 | return 0; |
@@ -143,6 +212,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
143 | if (!options) | 212 | if (!options) |
144 | return -ENOMEM; | 213 | return -ENOMEM; |
145 | 214 | ||
215 | orig = options; | ||
146 | 216 | ||
147 | while ((p = strsep(&options, ",")) != NULL) { | 217 | while ((p = strsep(&options, ",")) != NULL) { |
148 | int token; | 218 | int token; |
@@ -156,6 +226,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
156 | btrfs_set_opt(info->mount_opt, DEGRADED); | 226 | btrfs_set_opt(info->mount_opt, DEGRADED); |
157 | break; | 227 | break; |
158 | case Opt_subvol: | 228 | case Opt_subvol: |
229 | case Opt_subvolid: | ||
159 | case Opt_device: | 230 | case Opt_device: |
160 | /* | 231 | /* |
161 | * These are parsed by btrfs_parse_early_options | 232 | * These are parsed by btrfs_parse_early_options |
@@ -171,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
171 | btrfs_set_opt(info->mount_opt, NODATACOW); | 242 | btrfs_set_opt(info->mount_opt, NODATACOW); |
172 | btrfs_set_opt(info->mount_opt, NODATASUM); | 243 | btrfs_set_opt(info->mount_opt, NODATASUM); |
173 | break; | 244 | break; |
174 | case Opt_compress: | ||
175 | printk(KERN_INFO "btrfs: use compression\n"); | ||
176 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
177 | break; | ||
178 | case Opt_compress_force: | 245 | case Opt_compress_force: |
179 | printk(KERN_INFO "btrfs: forcing compression\n"); | 246 | case Opt_compress_force_type: |
180 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 247 | compress_force = true; |
248 | case Opt_compress: | ||
249 | case Opt_compress_type: | ||
250 | if (token == Opt_compress || | ||
251 | token == Opt_compress_force || | ||
252 | strcmp(args[0].from, "zlib") == 0) { | ||
253 | compress_type = "zlib"; | ||
254 | info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
255 | } else if (strcmp(args[0].from, "lzo") == 0) { | ||
256 | compress_type = "lzo"; | ||
257 | info->compress_type = BTRFS_COMPRESS_LZO; | ||
258 | } else { | ||
259 | ret = -EINVAL; | ||
260 | goto out; | ||
261 | } | ||
262 | |||
181 | btrfs_set_opt(info->mount_opt, COMPRESS); | 263 | btrfs_set_opt(info->mount_opt, COMPRESS); |
264 | if (compress_force) { | ||
265 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | ||
266 | pr_info("btrfs: force %s compression\n", | ||
267 | compress_type); | ||
268 | } else | ||
269 | pr_info("btrfs: use %s compression\n", | ||
270 | compress_type); | ||
182 | break; | 271 | break; |
183 | case Opt_ssd: | 272 | case Opt_ssd: |
184 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 273 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
@@ -210,22 +299,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
210 | info->thread_pool_size); | 299 | info->thread_pool_size); |
211 | } | 300 | } |
212 | break; | 301 | break; |
213 | case Opt_max_extent: | ||
214 | num = match_strdup(&args[0]); | ||
215 | if (num) { | ||
216 | info->max_extent = btrfs_parse_size(num); | ||
217 | kfree(num); | ||
218 | |||
219 | info->max_extent = max_t(u64, | ||
220 | info->max_extent, root->sectorsize); | ||
221 | printk(KERN_INFO "btrfs: max_extent at %llu\n", | ||
222 | (unsigned long long)info->max_extent); | ||
223 | } | ||
224 | break; | ||
225 | case Opt_max_inline: | 302 | case Opt_max_inline: |
226 | num = match_strdup(&args[0]); | 303 | num = match_strdup(&args[0]); |
227 | if (num) { | 304 | if (num) { |
228 | info->max_inline = btrfs_parse_size(num); | 305 | info->max_inline = memparse(num, NULL); |
229 | kfree(num); | 306 | kfree(num); |
230 | 307 | ||
231 | if (info->max_inline) { | 308 | if (info->max_inline) { |
@@ -240,7 +317,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
240 | case Opt_alloc_start: | 317 | case Opt_alloc_start: |
241 | num = match_strdup(&args[0]); | 318 | num = match_strdup(&args[0]); |
242 | if (num) { | 319 | if (num) { |
243 | info->alloc_start = btrfs_parse_size(num); | 320 | info->alloc_start = memparse(num, NULL); |
244 | kfree(num); | 321 | kfree(num); |
245 | printk(KERN_INFO | 322 | printk(KERN_INFO |
246 | "btrfs: allocations start at %llu\n", | 323 | "btrfs: allocations start at %llu\n", |
@@ -270,6 +347,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
270 | case Opt_discard: | 347 | case Opt_discard: |
271 | btrfs_set_opt(info->mount_opt, DISCARD); | 348 | btrfs_set_opt(info->mount_opt, DISCARD); |
272 | break; | 349 | break; |
350 | case Opt_space_cache: | ||
351 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); | ||
352 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | ||
353 | break; | ||
354 | case Opt_clear_cache: | ||
355 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); | ||
356 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); | ||
357 | break; | ||
358 | case Opt_user_subvol_rm_allowed: | ||
359 | btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); | ||
360 | break; | ||
273 | case Opt_err: | 361 | case Opt_err: |
274 | printk(KERN_INFO "btrfs: unrecognized mount option " | 362 | printk(KERN_INFO "btrfs: unrecognized mount option " |
275 | "'%s'\n", p); | 363 | "'%s'\n", p); |
@@ -280,7 +368,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
280 | } | 368 | } |
281 | } | 369 | } |
282 | out: | 370 | out: |
283 | kfree(options); | 371 | kfree(orig); |
284 | return ret; | 372 | return ret; |
285 | } | 373 | } |
286 | 374 | ||
@@ -291,12 +379,13 @@ out: | |||
291 | * only when we need to allocate a new super block. | 379 | * only when we need to allocate a new super block. |
292 | */ | 380 | */ |
293 | static int btrfs_parse_early_options(const char *options, fmode_t flags, | 381 | static int btrfs_parse_early_options(const char *options, fmode_t flags, |
294 | void *holder, char **subvol_name, | 382 | void *holder, char **subvol_name, u64 *subvol_objectid, |
295 | struct btrfs_fs_devices **fs_devices) | 383 | struct btrfs_fs_devices **fs_devices) |
296 | { | 384 | { |
297 | substring_t args[MAX_OPT_ARGS]; | 385 | substring_t args[MAX_OPT_ARGS]; |
298 | char *opts, *p; | 386 | char *opts, *p; |
299 | int error = 0; | 387 | int error = 0; |
388 | int intarg; | ||
300 | 389 | ||
301 | if (!options) | 390 | if (!options) |
302 | goto out; | 391 | goto out; |
@@ -319,6 +408,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
319 | case Opt_subvol: | 408 | case Opt_subvol: |
320 | *subvol_name = match_strdup(&args[0]); | 409 | *subvol_name = match_strdup(&args[0]); |
321 | break; | 410 | break; |
411 | case Opt_subvolid: | ||
412 | intarg = 0; | ||
413 | error = match_int(&args[0], &intarg); | ||
414 | if (!error) { | ||
415 | /* we want the original fs_tree */ | ||
416 | if (!intarg) | ||
417 | *subvol_objectid = | ||
418 | BTRFS_FS_TREE_OBJECTID; | ||
419 | else | ||
420 | *subvol_objectid = intarg; | ||
421 | } | ||
422 | break; | ||
322 | case Opt_device: | 423 | case Opt_device: |
323 | error = btrfs_scan_one_device(match_strdup(&args[0]), | 424 | error = btrfs_scan_one_device(match_strdup(&args[0]), |
324 | flags, holder, fs_devices); | 425 | flags, holder, fs_devices); |
@@ -346,13 +447,118 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
346 | return error; | 447 | return error; |
347 | } | 448 | } |
348 | 449 | ||
450 | static struct dentry *get_default_root(struct super_block *sb, | ||
451 | u64 subvol_objectid) | ||
452 | { | ||
453 | struct btrfs_root *root = sb->s_fs_info; | ||
454 | struct btrfs_root *new_root; | ||
455 | struct btrfs_dir_item *di; | ||
456 | struct btrfs_path *path; | ||
457 | struct btrfs_key location; | ||
458 | struct inode *inode; | ||
459 | struct dentry *dentry; | ||
460 | u64 dir_id; | ||
461 | int new = 0; | ||
462 | |||
463 | /* | ||
464 | * We have a specific subvol we want to mount, just setup location and | ||
465 | * go look up the root. | ||
466 | */ | ||
467 | if (subvol_objectid) { | ||
468 | location.objectid = subvol_objectid; | ||
469 | location.type = BTRFS_ROOT_ITEM_KEY; | ||
470 | location.offset = (u64)-1; | ||
471 | goto find_root; | ||
472 | } | ||
473 | |||
474 | path = btrfs_alloc_path(); | ||
475 | if (!path) | ||
476 | return ERR_PTR(-ENOMEM); | ||
477 | path->leave_spinning = 1; | ||
478 | |||
479 | /* | ||
480 | * Find the "default" dir item which points to the root item that we | ||
481 | * will mount by default if we haven't been given a specific subvolume | ||
482 | * to mount. | ||
483 | */ | ||
484 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | ||
485 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); | ||
486 | if (IS_ERR(di)) | ||
487 | return ERR_CAST(di); | ||
488 | if (!di) { | ||
489 | /* | ||
490 | * Ok the default dir item isn't there. This is weird since | ||
491 | * it's always been there, but don't freak out, just try and | ||
492 | * mount to root most subvolume. | ||
493 | */ | ||
494 | btrfs_free_path(path); | ||
495 | dir_id = BTRFS_FIRST_FREE_OBJECTID; | ||
496 | new_root = root->fs_info->fs_root; | ||
497 | goto setup_root; | ||
498 | } | ||
499 | |||
500 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); | ||
501 | btrfs_free_path(path); | ||
502 | |||
503 | find_root: | ||
504 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | ||
505 | if (IS_ERR(new_root)) | ||
506 | return ERR_CAST(new_root); | ||
507 | |||
508 | if (btrfs_root_refs(&new_root->root_item) == 0) | ||
509 | return ERR_PTR(-ENOENT); | ||
510 | |||
511 | dir_id = btrfs_root_dirid(&new_root->root_item); | ||
512 | setup_root: | ||
513 | location.objectid = dir_id; | ||
514 | location.type = BTRFS_INODE_ITEM_KEY; | ||
515 | location.offset = 0; | ||
516 | |||
517 | inode = btrfs_iget(sb, &location, new_root, &new); | ||
518 | if (IS_ERR(inode)) | ||
519 | return ERR_CAST(inode); | ||
520 | |||
521 | /* | ||
522 | * If we're just mounting the root most subvol put the inode and return | ||
523 | * a reference to the dentry. We will have already gotten a reference | ||
524 | * to the inode in btrfs_fill_super so we're good to go. | ||
525 | */ | ||
526 | if (!new && sb->s_root->d_inode == inode) { | ||
527 | iput(inode); | ||
528 | return dget(sb->s_root); | ||
529 | } | ||
530 | |||
531 | if (new) { | ||
532 | const struct qstr name = { .name = "/", .len = 1 }; | ||
533 | |||
534 | /* | ||
535 | * New inode, we need to make the dentry a sibling of s_root so | ||
536 | * everything gets cleaned up properly on unmount. | ||
537 | */ | ||
538 | dentry = d_alloc(sb->s_root, &name); | ||
539 | if (!dentry) { | ||
540 | iput(inode); | ||
541 | return ERR_PTR(-ENOMEM); | ||
542 | } | ||
543 | d_splice_alias(inode, dentry); | ||
544 | } else { | ||
545 | /* | ||
546 | * We found the inode in cache, just find a dentry for it and | ||
547 | * put the reference to the inode we just got. | ||
548 | */ | ||
549 | dentry = d_find_alias(inode); | ||
550 | iput(inode); | ||
551 | } | ||
552 | |||
553 | return dentry; | ||
554 | } | ||
555 | |||
349 | static int btrfs_fill_super(struct super_block *sb, | 556 | static int btrfs_fill_super(struct super_block *sb, |
350 | struct btrfs_fs_devices *fs_devices, | 557 | struct btrfs_fs_devices *fs_devices, |
351 | void *data, int silent) | 558 | void *data, int silent) |
352 | { | 559 | { |
353 | struct inode *inode; | 560 | struct inode *inode; |
354 | struct dentry *root_dentry; | 561 | struct dentry *root_dentry; |
355 | struct btrfs_super_block *disk_super; | ||
356 | struct btrfs_root *tree_root; | 562 | struct btrfs_root *tree_root; |
357 | struct btrfs_key key; | 563 | struct btrfs_key key; |
358 | int err; | 564 | int err; |
@@ -360,6 +566,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
360 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 566 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
361 | sb->s_magic = BTRFS_SUPER_MAGIC; | 567 | sb->s_magic = BTRFS_SUPER_MAGIC; |
362 | sb->s_op = &btrfs_super_ops; | 568 | sb->s_op = &btrfs_super_ops; |
569 | sb->s_d_op = &btrfs_dentry_operations; | ||
363 | sb->s_export_op = &btrfs_export_ops; | 570 | sb->s_export_op = &btrfs_export_ops; |
364 | sb->s_xattr = btrfs_xattr_handlers; | 571 | sb->s_xattr = btrfs_xattr_handlers; |
365 | sb->s_time_gran = 1; | 572 | sb->s_time_gran = 1; |
@@ -374,12 +581,11 @@ static int btrfs_fill_super(struct super_block *sb, | |||
374 | return PTR_ERR(tree_root); | 581 | return PTR_ERR(tree_root); |
375 | } | 582 | } |
376 | sb->s_fs_info = tree_root; | 583 | sb->s_fs_info = tree_root; |
377 | disk_super = &tree_root->fs_info->super_copy; | ||
378 | 584 | ||
379 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | 585 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; |
380 | key.type = BTRFS_INODE_ITEM_KEY; | 586 | key.type = BTRFS_INODE_ITEM_KEY; |
381 | key.offset = 0; | 587 | key.offset = 0; |
382 | inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root); | 588 | inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL); |
383 | if (IS_ERR(inode)) { | 589 | if (IS_ERR(inode)) { |
384 | err = PTR_ERR(inode); | 590 | err = PTR_ERR(inode); |
385 | goto fail_close; | 591 | goto fail_close; |
@@ -391,12 +597,6 @@ static int btrfs_fill_super(struct super_block *sb, | |||
391 | err = -ENOMEM; | 597 | err = -ENOMEM; |
392 | goto fail_close; | 598 | goto fail_close; |
393 | } | 599 | } |
394 | #if 0 | ||
395 | /* this does the super kobj at the same time */ | ||
396 | err = btrfs_sysfs_add_super(tree_root->fs_info); | ||
397 | if (err) | ||
398 | goto fail_close; | ||
399 | #endif | ||
400 | 600 | ||
401 | sb->s_root = root_dentry; | 601 | sb->s_root = root_dentry; |
402 | 602 | ||
@@ -422,7 +622,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
422 | btrfs_start_delalloc_inodes(root, 0); | 622 | btrfs_start_delalloc_inodes(root, 0); |
423 | btrfs_wait_ordered_extents(root, 0, 0); | 623 | btrfs_wait_ordered_extents(root, 0, 0); |
424 | 624 | ||
425 | trans = btrfs_start_transaction(root, 1); | 625 | trans = btrfs_start_transaction(root, 0); |
426 | ret = btrfs_commit_transaction(trans, root); | 626 | ret = btrfs_commit_transaction(trans, root); |
427 | return ret; | 627 | return ret; |
428 | } | 628 | } |
@@ -440,9 +640,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
440 | seq_puts(seq, ",nodatacow"); | 640 | seq_puts(seq, ",nodatacow"); |
441 | if (btrfs_test_opt(root, NOBARRIER)) | 641 | if (btrfs_test_opt(root, NOBARRIER)) |
442 | seq_puts(seq, ",nobarrier"); | 642 | seq_puts(seq, ",nobarrier"); |
443 | if (info->max_extent != (u64)-1) | ||
444 | seq_printf(seq, ",max_extent=%llu", | ||
445 | (unsigned long long)info->max_extent); | ||
446 | if (info->max_inline != 8192 * 1024) | 643 | if (info->max_inline != 8192 * 1024) |
447 | seq_printf(seq, ",max_inline=%llu", | 644 | seq_printf(seq, ",max_inline=%llu", |
448 | (unsigned long long)info->max_inline); | 645 | (unsigned long long)info->max_inline); |
@@ -473,36 +670,54 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
473 | 670 | ||
474 | static int btrfs_test_super(struct super_block *s, void *data) | 671 | static int btrfs_test_super(struct super_block *s, void *data) |
475 | { | 672 | { |
476 | struct btrfs_fs_devices *test_fs_devices = data; | 673 | struct btrfs_root *test_root = data; |
477 | struct btrfs_root *root = btrfs_sb(s); | 674 | struct btrfs_root *root = btrfs_sb(s); |
478 | 675 | ||
479 | return root->fs_info->fs_devices == test_fs_devices; | 676 | /* |
677 | * If this super block is going away, return false as it | ||
678 | * can't match as an existing super block. | ||
679 | */ | ||
680 | if (!atomic_read(&s->s_active)) | ||
681 | return 0; | ||
682 | return root->fs_info->fs_devices == test_root->fs_info->fs_devices; | ||
683 | } | ||
684 | |||
685 | static int btrfs_set_super(struct super_block *s, void *data) | ||
686 | { | ||
687 | s->s_fs_info = data; | ||
688 | |||
689 | return set_anon_super(s, data); | ||
480 | } | 690 | } |
481 | 691 | ||
692 | |||
482 | /* | 693 | /* |
483 | * Find a superblock for the given device / mount point. | 694 | * Find a superblock for the given device / mount point. |
484 | * | 695 | * |
485 | * Note: This is based on get_sb_bdev from fs/super.c with a few additions | 696 | * Note: This is based on get_sb_bdev from fs/super.c with a few additions |
486 | * for multiple device setup. Make sure to keep it in sync. | 697 | * for multiple device setup. Make sure to keep it in sync. |
487 | */ | 698 | */ |
488 | static int btrfs_get_sb(struct file_system_type *fs_type, int flags, | 699 | static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, |
489 | const char *dev_name, void *data, struct vfsmount *mnt) | 700 | const char *dev_name, void *data) |
490 | { | 701 | { |
491 | char *subvol_name = NULL; | ||
492 | struct block_device *bdev = NULL; | 702 | struct block_device *bdev = NULL; |
493 | struct super_block *s; | 703 | struct super_block *s; |
494 | struct dentry *root; | 704 | struct dentry *root; |
495 | struct btrfs_fs_devices *fs_devices = NULL; | 705 | struct btrfs_fs_devices *fs_devices = NULL; |
706 | struct btrfs_root *tree_root = NULL; | ||
707 | struct btrfs_fs_info *fs_info = NULL; | ||
496 | fmode_t mode = FMODE_READ; | 708 | fmode_t mode = FMODE_READ; |
709 | char *subvol_name = NULL; | ||
710 | u64 subvol_objectid = 0; | ||
497 | int error = 0; | 711 | int error = 0; |
498 | 712 | ||
499 | if (!(flags & MS_RDONLY)) | 713 | if (!(flags & MS_RDONLY)) |
500 | mode |= FMODE_WRITE; | 714 | mode |= FMODE_WRITE; |
501 | 715 | ||
502 | error = btrfs_parse_early_options(data, mode, fs_type, | 716 | error = btrfs_parse_early_options(data, mode, fs_type, |
503 | &subvol_name, &fs_devices); | 717 | &subvol_name, &subvol_objectid, |
718 | &fs_devices); | ||
504 | if (error) | 719 | if (error) |
505 | return error; | 720 | return ERR_PTR(error); |
506 | 721 | ||
507 | error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); | 722 | error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); |
508 | if (error) | 723 | if (error) |
@@ -517,8 +732,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, | |||
517 | goto error_close_devices; | 732 | goto error_close_devices; |
518 | } | 733 | } |
519 | 734 | ||
735 | /* | ||
736 | * Setup a dummy root and fs_info for test/set super. This is because | ||
737 | * we don't actually fill this stuff out until open_ctree, but we need | ||
738 | * it for searching for existing supers, so this lets us do that and | ||
739 | * then open_ctree will properly initialize everything later. | ||
740 | */ | ||
741 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); | ||
742 | tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
743 | if (!fs_info || !tree_root) { | ||
744 | error = -ENOMEM; | ||
745 | goto error_close_devices; | ||
746 | } | ||
747 | fs_info->tree_root = tree_root; | ||
748 | fs_info->fs_devices = fs_devices; | ||
749 | tree_root->fs_info = fs_info; | ||
750 | |||
520 | bdev = fs_devices->latest_bdev; | 751 | bdev = fs_devices->latest_bdev; |
521 | s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); | 752 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); |
522 | if (IS_ERR(s)) | 753 | if (IS_ERR(s)) |
523 | goto error_s; | 754 | goto error_s; |
524 | 755 | ||
@@ -546,40 +777,49 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, | |||
546 | s->s_flags |= MS_ACTIVE; | 777 | s->s_flags |= MS_ACTIVE; |
547 | } | 778 | } |
548 | 779 | ||
549 | if (!strcmp(subvol_name, ".")) | 780 | root = get_default_root(s, subvol_objectid); |
550 | root = dget(s->s_root); | 781 | if (IS_ERR(root)) { |
551 | else { | 782 | error = PTR_ERR(root); |
552 | mutex_lock(&s->s_root->d_inode->i_mutex); | 783 | deactivate_locked_super(s); |
553 | root = lookup_one_len(subvol_name, s->s_root, | 784 | goto error_free_subvol_name; |
785 | } | ||
786 | /* if they gave us a subvolume name bind mount into that */ | ||
787 | if (strcmp(subvol_name, ".")) { | ||
788 | struct dentry *new_root; | ||
789 | mutex_lock(&root->d_inode->i_mutex); | ||
790 | new_root = lookup_one_len(subvol_name, root, | ||
554 | strlen(subvol_name)); | 791 | strlen(subvol_name)); |
555 | mutex_unlock(&s->s_root->d_inode->i_mutex); | 792 | mutex_unlock(&root->d_inode->i_mutex); |
556 | 793 | ||
557 | if (IS_ERR(root)) { | 794 | if (IS_ERR(new_root)) { |
795 | dput(root); | ||
558 | deactivate_locked_super(s); | 796 | deactivate_locked_super(s); |
559 | error = PTR_ERR(root); | 797 | error = PTR_ERR(new_root); |
560 | goto error_free_subvol_name; | 798 | goto error_free_subvol_name; |
561 | } | 799 | } |
562 | if (!root->d_inode) { | 800 | if (!new_root->d_inode) { |
563 | dput(root); | 801 | dput(root); |
802 | dput(new_root); | ||
564 | deactivate_locked_super(s); | 803 | deactivate_locked_super(s); |
565 | error = -ENXIO; | 804 | error = -ENXIO; |
566 | goto error_free_subvol_name; | 805 | goto error_free_subvol_name; |
567 | } | 806 | } |
807 | dput(root); | ||
808 | root = new_root; | ||
568 | } | 809 | } |
569 | 810 | ||
570 | mnt->mnt_sb = s; | ||
571 | mnt->mnt_root = root; | ||
572 | |||
573 | kfree(subvol_name); | 811 | kfree(subvol_name); |
574 | return 0; | 812 | return root; |
575 | 813 | ||
576 | error_s: | 814 | error_s: |
577 | error = PTR_ERR(s); | 815 | error = PTR_ERR(s); |
578 | error_close_devices: | 816 | error_close_devices: |
579 | btrfs_close_devices(fs_devices); | 817 | btrfs_close_devices(fs_devices); |
818 | kfree(fs_info); | ||
819 | kfree(tree_root); | ||
580 | error_free_subvol_name: | 820 | error_free_subvol_name: |
581 | kfree(subvol_name); | 821 | kfree(subvol_name); |
582 | return error; | 822 | return ERR_PTR(error); |
583 | } | 823 | } |
584 | 824 | ||
585 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) | 825 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) |
@@ -606,11 +846,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
606 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 846 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) |
607 | return -EINVAL; | 847 | return -EINVAL; |
608 | 848 | ||
609 | /* recover relocation */ | 849 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
610 | ret = btrfs_recover_relocation(root); | ||
611 | WARN_ON(ret); | 850 | WARN_ON(ret); |
612 | 851 | ||
613 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 852 | /* recover relocation */ |
853 | ret = btrfs_recover_relocation(root); | ||
614 | WARN_ON(ret); | 854 | WARN_ON(ret); |
615 | 855 | ||
616 | sb->s_flags &= ~MS_RDONLY; | 856 | sb->s_flags &= ~MS_RDONLY; |
@@ -619,20 +859,167 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
619 | return 0; | 859 | return 0; |
620 | } | 860 | } |
621 | 861 | ||
862 | /* | ||
863 | * The helper to calc the free space on the devices that can be used to store | ||
864 | * file data. | ||
865 | */ | ||
866 | static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | ||
867 | { | ||
868 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
869 | struct btrfs_device_info *devices_info; | ||
870 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
871 | struct btrfs_device *device; | ||
872 | u64 skip_space; | ||
873 | u64 type; | ||
874 | u64 avail_space; | ||
875 | u64 used_space; | ||
876 | u64 min_stripe_size; | ||
877 | int min_stripes = 1; | ||
878 | int i = 0, nr_devices; | ||
879 | int ret; | ||
880 | |||
881 | nr_devices = fs_info->fs_devices->rw_devices; | ||
882 | BUG_ON(!nr_devices); | ||
883 | |||
884 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | ||
885 | GFP_NOFS); | ||
886 | if (!devices_info) | ||
887 | return -ENOMEM; | ||
888 | |||
889 | /* calc min stripe number for data space alloction */ | ||
890 | type = btrfs_get_alloc_profile(root, 1); | ||
891 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
892 | min_stripes = 2; | ||
893 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
894 | min_stripes = 2; | ||
895 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
896 | min_stripes = 4; | ||
897 | |||
898 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
899 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | ||
900 | else | ||
901 | min_stripe_size = BTRFS_STRIPE_LEN; | ||
902 | |||
903 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | ||
904 | if (!device->in_fs_metadata) | ||
905 | continue; | ||
906 | |||
907 | avail_space = device->total_bytes - device->bytes_used; | ||
908 | |||
909 | /* align with stripe_len */ | ||
910 | do_div(avail_space, BTRFS_STRIPE_LEN); | ||
911 | avail_space *= BTRFS_STRIPE_LEN; | ||
912 | |||
913 | /* | ||
914 | * In order to avoid overwritting the superblock on the drive, | ||
915 | * btrfs starts at an offset of at least 1MB when doing chunk | ||
916 | * allocation. | ||
917 | */ | ||
918 | skip_space = 1024 * 1024; | ||
919 | |||
920 | /* user can set the offset in fs_info->alloc_start. */ | ||
921 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
922 | device->total_bytes) | ||
923 | skip_space = max(fs_info->alloc_start, skip_space); | ||
924 | |||
925 | /* | ||
926 | * btrfs can not use the free space in [0, skip_space - 1], | ||
927 | * we must subtract it from the total. In order to implement | ||
928 | * it, we account the used space in this range first. | ||
929 | */ | ||
930 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | ||
931 | &used_space); | ||
932 | if (ret) { | ||
933 | kfree(devices_info); | ||
934 | return ret; | ||
935 | } | ||
936 | |||
937 | /* calc the free space in [0, skip_space - 1] */ | ||
938 | skip_space -= used_space; | ||
939 | |||
940 | /* | ||
941 | * we can use the free space in [0, skip_space - 1], subtract | ||
942 | * it from the total. | ||
943 | */ | ||
944 | if (avail_space && avail_space >= skip_space) | ||
945 | avail_space -= skip_space; | ||
946 | else | ||
947 | avail_space = 0; | ||
948 | |||
949 | if (avail_space < min_stripe_size) | ||
950 | continue; | ||
951 | |||
952 | devices_info[i].dev = device; | ||
953 | devices_info[i].max_avail = avail_space; | ||
954 | |||
955 | i++; | ||
956 | } | ||
957 | |||
958 | nr_devices = i; | ||
959 | |||
960 | btrfs_descending_sort_devices(devices_info, nr_devices); | ||
961 | |||
962 | i = nr_devices - 1; | ||
963 | avail_space = 0; | ||
964 | while (nr_devices >= min_stripes) { | ||
965 | if (devices_info[i].max_avail >= min_stripe_size) { | ||
966 | int j; | ||
967 | u64 alloc_size; | ||
968 | |||
969 | avail_space += devices_info[i].max_avail * min_stripes; | ||
970 | alloc_size = devices_info[i].max_avail; | ||
971 | for (j = i + 1 - min_stripes; j <= i; j++) | ||
972 | devices_info[j].max_avail -= alloc_size; | ||
973 | } | ||
974 | i--; | ||
975 | nr_devices--; | ||
976 | } | ||
977 | |||
978 | kfree(devices_info); | ||
979 | *free_bytes = avail_space; | ||
980 | return 0; | ||
981 | } | ||
982 | |||
622 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 983 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
623 | { | 984 | { |
624 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 985 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
625 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | 986 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; |
987 | struct list_head *head = &root->fs_info->space_info; | ||
988 | struct btrfs_space_info *found; | ||
989 | u64 total_used = 0; | ||
990 | u64 total_free_data = 0; | ||
626 | int bits = dentry->d_sb->s_blocksize_bits; | 991 | int bits = dentry->d_sb->s_blocksize_bits; |
627 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 992 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
993 | int ret; | ||
994 | |||
995 | /* holding chunk_muext to avoid allocating new chunks */ | ||
996 | mutex_lock(&root->fs_info->chunk_mutex); | ||
997 | rcu_read_lock(); | ||
998 | list_for_each_entry_rcu(found, head, list) { | ||
999 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { | ||
1000 | total_free_data += found->disk_total - found->disk_used; | ||
1001 | total_free_data -= | ||
1002 | btrfs_account_ro_block_groups_free_space(found); | ||
1003 | } | ||
1004 | |||
1005 | total_used += found->disk_used; | ||
1006 | } | ||
1007 | rcu_read_unlock(); | ||
628 | 1008 | ||
629 | buf->f_namelen = BTRFS_NAME_LEN; | 1009 | buf->f_namelen = BTRFS_NAME_LEN; |
630 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 1010 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
631 | buf->f_bfree = buf->f_blocks - | 1011 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
632 | (btrfs_super_bytes_used(disk_super) >> bits); | ||
633 | buf->f_bavail = buf->f_bfree; | ||
634 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1012 | buf->f_bsize = dentry->d_sb->s_blocksize; |
635 | buf->f_type = BTRFS_SUPER_MAGIC; | 1013 | buf->f_type = BTRFS_SUPER_MAGIC; |
1014 | buf->f_bavail = total_free_data; | ||
1015 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | ||
1016 | if (ret) { | ||
1017 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
1018 | return ret; | ||
1019 | } | ||
1020 | buf->f_bavail += total_free_data; | ||
1021 | buf->f_bavail = buf->f_bavail >> bits; | ||
1022 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
636 | 1023 | ||
637 | /* We treat it as constant endianness (it doesn't matter _which_) | 1024 | /* We treat it as constant endianness (it doesn't matter _which_) |
638 | because we want the fsid to come out the same whether mounted | 1025 | because we want the fsid to come out the same whether mounted |
@@ -649,7 +1036,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
649 | static struct file_system_type btrfs_fs_type = { | 1036 | static struct file_system_type btrfs_fs_type = { |
650 | .owner = THIS_MODULE, | 1037 | .owner = THIS_MODULE, |
651 | .name = "btrfs", | 1038 | .name = "btrfs", |
652 | .get_sb = btrfs_get_sb, | 1039 | .mount = btrfs_mount, |
653 | .kill_sb = kill_anon_super, | 1040 | .kill_sb = kill_anon_super, |
654 | .fs_flags = FS_REQUIRES_DEV, | 1041 | .fs_flags = FS_REQUIRES_DEV, |
655 | }; | 1042 | }; |
@@ -700,7 +1087,7 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
700 | 1087 | ||
701 | static const struct super_operations btrfs_super_ops = { | 1088 | static const struct super_operations btrfs_super_ops = { |
702 | .drop_inode = btrfs_drop_inode, | 1089 | .drop_inode = btrfs_drop_inode, |
703 | .delete_inode = btrfs_delete_inode, | 1090 | .evict_inode = btrfs_evict_inode, |
704 | .put_super = btrfs_put_super, | 1091 | .put_super = btrfs_put_super, |
705 | .sync_fs = btrfs_sync_fs, | 1092 | .sync_fs = btrfs_sync_fs, |
706 | .show_options = btrfs_show_options, | 1093 | .show_options = btrfs_show_options, |
@@ -718,14 +1105,18 @@ static const struct file_operations btrfs_ctl_fops = { | |||
718 | .unlocked_ioctl = btrfs_control_ioctl, | 1105 | .unlocked_ioctl = btrfs_control_ioctl, |
719 | .compat_ioctl = btrfs_control_ioctl, | 1106 | .compat_ioctl = btrfs_control_ioctl, |
720 | .owner = THIS_MODULE, | 1107 | .owner = THIS_MODULE, |
1108 | .llseek = noop_llseek, | ||
721 | }; | 1109 | }; |
722 | 1110 | ||
723 | static struct miscdevice btrfs_misc = { | 1111 | static struct miscdevice btrfs_misc = { |
724 | .minor = MISC_DYNAMIC_MINOR, | 1112 | .minor = BTRFS_MINOR, |
725 | .name = "btrfs-control", | 1113 | .name = "btrfs-control", |
726 | .fops = &btrfs_ctl_fops | 1114 | .fops = &btrfs_ctl_fops |
727 | }; | 1115 | }; |
728 | 1116 | ||
1117 | MODULE_ALIAS_MISCDEV(BTRFS_MINOR); | ||
1118 | MODULE_ALIAS("devname:btrfs-control"); | ||
1119 | |||
729 | static int btrfs_interface_init(void) | 1120 | static int btrfs_interface_init(void) |
730 | { | 1121 | { |
731 | return misc_register(&btrfs_misc); | 1122 | return misc_register(&btrfs_misc); |
@@ -745,10 +1136,14 @@ static int __init init_btrfs_fs(void) | |||
745 | if (err) | 1136 | if (err) |
746 | return err; | 1137 | return err; |
747 | 1138 | ||
748 | err = btrfs_init_cachep(); | 1139 | err = btrfs_init_compress(); |
749 | if (err) | 1140 | if (err) |
750 | goto free_sysfs; | 1141 | goto free_sysfs; |
751 | 1142 | ||
1143 | err = btrfs_init_cachep(); | ||
1144 | if (err) | ||
1145 | goto free_compress; | ||
1146 | |||
752 | err = extent_io_init(); | 1147 | err = extent_io_init(); |
753 | if (err) | 1148 | if (err) |
754 | goto free_cachep; | 1149 | goto free_cachep; |
@@ -776,6 +1171,8 @@ free_extent_io: | |||
776 | extent_io_exit(); | 1171 | extent_io_exit(); |
777 | free_cachep: | 1172 | free_cachep: |
778 | btrfs_destroy_cachep(); | 1173 | btrfs_destroy_cachep(); |
1174 | free_compress: | ||
1175 | btrfs_exit_compress(); | ||
779 | free_sysfs: | 1176 | free_sysfs: |
780 | btrfs_exit_sysfs(); | 1177 | btrfs_exit_sysfs(); |
781 | return err; | 1178 | return err; |
@@ -790,7 +1187,7 @@ static void __exit exit_btrfs_fs(void) | |||
790 | unregister_filesystem(&btrfs_fs_type); | 1187 | unregister_filesystem(&btrfs_fs_type); |
791 | btrfs_exit_sysfs(); | 1188 | btrfs_exit_sysfs(); |
792 | btrfs_cleanup_fs_uuids(); | 1189 | btrfs_cleanup_fs_uuids(); |
793 | btrfs_zlib_exit(); | 1190 | btrfs_exit_compress(); |
794 | } | 1191 | } |
795 | 1192 | ||
796 | module_init(init_btrfs_fs) | 1193 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index a240b6fa81df..4ce16ef702a3 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -164,12 +164,12 @@ static void btrfs_root_release(struct kobject *kobj) | |||
164 | complete(&root->kobj_unregister); | 164 | complete(&root->kobj_unregister); |
165 | } | 165 | } |
166 | 166 | ||
167 | static struct sysfs_ops btrfs_super_attr_ops = { | 167 | static const struct sysfs_ops btrfs_super_attr_ops = { |
168 | .show = btrfs_super_attr_show, | 168 | .show = btrfs_super_attr_show, |
169 | .store = btrfs_super_attr_store, | 169 | .store = btrfs_super_attr_store, |
170 | }; | 170 | }; |
171 | 171 | ||
172 | static struct sysfs_ops btrfs_root_attr_ops = { | 172 | static const struct sysfs_ops btrfs_root_attr_ops = { |
173 | .show = btrfs_root_attr_show, | 173 | .show = btrfs_root_attr_show, |
174 | .store = btrfs_root_attr_store, | 174 | .store = btrfs_root_attr_store, |
175 | }; | 175 | }; |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b2acc79f1b34..bae5c7b8bbe2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/slab.h> | ||
20 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
21 | #include <linux/writeback.h> | 22 | #include <linux/writeback.h> |
22 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
@@ -69,7 +70,7 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
69 | cur_trans->commit_done = 0; | 70 | cur_trans->commit_done = 0; |
70 | cur_trans->start_time = get_seconds(); | 71 | cur_trans->start_time = get_seconds(); |
71 | 72 | ||
72 | cur_trans->delayed_refs.root.rb_node = NULL; | 73 | cur_trans->delayed_refs.root = RB_ROOT; |
73 | cur_trans->delayed_refs.num_entries = 0; | 74 | cur_trans->delayed_refs.num_entries = 0; |
74 | cur_trans->delayed_refs.num_heads_ready = 0; | 75 | cur_trans->delayed_refs.num_heads_ready = 0; |
75 | cur_trans->delayed_refs.num_heads = 0; | 76 | cur_trans->delayed_refs.num_heads = 0; |
@@ -147,18 +148,13 @@ static void wait_current_trans(struct btrfs_root *root) | |||
147 | while (1) { | 148 | while (1) { |
148 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 149 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, |
149 | TASK_UNINTERRUPTIBLE); | 150 | TASK_UNINTERRUPTIBLE); |
150 | if (cur_trans->blocked) { | 151 | if (!cur_trans->blocked) |
151 | mutex_unlock(&root->fs_info->trans_mutex); | ||
152 | schedule(); | ||
153 | mutex_lock(&root->fs_info->trans_mutex); | ||
154 | finish_wait(&root->fs_info->transaction_wait, | ||
155 | &wait); | ||
156 | } else { | ||
157 | finish_wait(&root->fs_info->transaction_wait, | ||
158 | &wait); | ||
159 | break; | 152 | break; |
160 | } | 153 | mutex_unlock(&root->fs_info->trans_mutex); |
154 | schedule(); | ||
155 | mutex_lock(&root->fs_info->trans_mutex); | ||
161 | } | 156 | } |
157 | finish_wait(&root->fs_info->transaction_wait, &wait); | ||
162 | put_transaction(cur_trans); | 158 | put_transaction(cur_trans); |
163 | } | 159 | } |
164 | } | 160 | } |
@@ -167,56 +163,103 @@ enum btrfs_trans_type { | |||
167 | TRANS_START, | 163 | TRANS_START, |
168 | TRANS_JOIN, | 164 | TRANS_JOIN, |
169 | TRANS_USERSPACE, | 165 | TRANS_USERSPACE, |
166 | TRANS_JOIN_NOLOCK, | ||
170 | }; | 167 | }; |
171 | 168 | ||
172 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 169 | static int may_wait_transaction(struct btrfs_root *root, int type) |
173 | int num_blocks, int type) | ||
174 | { | 170 | { |
175 | struct btrfs_trans_handle *h = | ||
176 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
177 | int ret; | ||
178 | |||
179 | mutex_lock(&root->fs_info->trans_mutex); | ||
180 | if (!root->fs_info->log_root_recovering && | 171 | if (!root->fs_info->log_root_recovering && |
181 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | 172 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || |
182 | type == TRANS_USERSPACE)) | 173 | type == TRANS_USERSPACE)) |
174 | return 1; | ||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | ||
179 | u64 num_items, int type) | ||
180 | { | ||
181 | struct btrfs_trans_handle *h; | ||
182 | struct btrfs_transaction *cur_trans; | ||
183 | int ret; | ||
184 | |||
185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
186 | return ERR_PTR(-EROFS); | ||
187 | again: | ||
188 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
189 | if (!h) | ||
190 | return ERR_PTR(-ENOMEM); | ||
191 | |||
192 | if (type != TRANS_JOIN_NOLOCK) | ||
193 | mutex_lock(&root->fs_info->trans_mutex); | ||
194 | if (may_wait_transaction(root, type)) | ||
183 | wait_current_trans(root); | 195 | wait_current_trans(root); |
196 | |||
184 | ret = join_transaction(root); | 197 | ret = join_transaction(root); |
185 | BUG_ON(ret); | 198 | BUG_ON(ret); |
186 | 199 | ||
187 | h->transid = root->fs_info->running_transaction->transid; | 200 | cur_trans = root->fs_info->running_transaction; |
188 | h->transaction = root->fs_info->running_transaction; | 201 | cur_trans->use_count++; |
189 | h->blocks_reserved = num_blocks; | 202 | if (type != TRANS_JOIN_NOLOCK) |
203 | mutex_unlock(&root->fs_info->trans_mutex); | ||
204 | |||
205 | h->transid = cur_trans->transid; | ||
206 | h->transaction = cur_trans; | ||
190 | h->blocks_used = 0; | 207 | h->blocks_used = 0; |
191 | h->block_group = 0; | 208 | h->block_group = 0; |
192 | h->alloc_exclude_nr = 0; | 209 | h->bytes_reserved = 0; |
193 | h->alloc_exclude_start = 0; | ||
194 | h->delayed_ref_updates = 0; | 210 | h->delayed_ref_updates = 0; |
211 | h->block_rsv = NULL; | ||
195 | 212 | ||
196 | if (!current->journal_info && type != TRANS_USERSPACE) | 213 | smp_mb(); |
197 | current->journal_info = h; | 214 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
215 | btrfs_commit_transaction(h, root); | ||
216 | goto again; | ||
217 | } | ||
218 | |||
219 | if (num_items > 0) { | ||
220 | ret = btrfs_trans_reserve_metadata(h, root, num_items); | ||
221 | if (ret == -EAGAIN) { | ||
222 | btrfs_commit_transaction(h, root); | ||
223 | goto again; | ||
224 | } | ||
225 | if (ret < 0) { | ||
226 | btrfs_end_transaction(h, root); | ||
227 | return ERR_PTR(ret); | ||
228 | } | ||
229 | } | ||
198 | 230 | ||
199 | root->fs_info->running_transaction->use_count++; | 231 | if (type != TRANS_JOIN_NOLOCK) |
232 | mutex_lock(&root->fs_info->trans_mutex); | ||
200 | record_root_in_trans(h, root); | 233 | record_root_in_trans(h, root); |
201 | mutex_unlock(&root->fs_info->trans_mutex); | 234 | if (type != TRANS_JOIN_NOLOCK) |
235 | mutex_unlock(&root->fs_info->trans_mutex); | ||
236 | |||
237 | if (!current->journal_info && type != TRANS_USERSPACE) | ||
238 | current->journal_info = h; | ||
202 | return h; | 239 | return h; |
203 | } | 240 | } |
204 | 241 | ||
205 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 242 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
206 | int num_blocks) | 243 | int num_items) |
207 | { | 244 | { |
208 | return start_transaction(root, num_blocks, TRANS_START); | 245 | return start_transaction(root, num_items, TRANS_START); |
209 | } | 246 | } |
210 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 247 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
211 | int num_blocks) | 248 | int num_blocks) |
212 | { | 249 | { |
213 | return start_transaction(root, num_blocks, TRANS_JOIN); | 250 | return start_transaction(root, 0, TRANS_JOIN); |
251 | } | ||
252 | |||
253 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, | ||
254 | int num_blocks) | ||
255 | { | ||
256 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); | ||
214 | } | 257 | } |
215 | 258 | ||
216 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 259 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
217 | int num_blocks) | 260 | int num_blocks) |
218 | { | 261 | { |
219 | return start_transaction(r, num_blocks, TRANS_USERSPACE); | 262 | return start_transaction(r, 0, TRANS_USERSPACE); |
220 | } | 263 | } |
221 | 264 | ||
222 | /* wait for a transaction commit to be fully complete */ | 265 | /* wait for a transaction commit to be fully complete */ |
@@ -239,6 +282,58 @@ static noinline int wait_for_commit(struct btrfs_root *root, | |||
239 | return 0; | 282 | return 0; |
240 | } | 283 | } |
241 | 284 | ||
285 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | ||
286 | { | ||
287 | struct btrfs_transaction *cur_trans = NULL, *t; | ||
288 | int ret; | ||
289 | |||
290 | mutex_lock(&root->fs_info->trans_mutex); | ||
291 | |||
292 | ret = 0; | ||
293 | if (transid) { | ||
294 | if (transid <= root->fs_info->last_trans_committed) | ||
295 | goto out_unlock; | ||
296 | |||
297 | /* find specified transaction */ | ||
298 | list_for_each_entry(t, &root->fs_info->trans_list, list) { | ||
299 | if (t->transid == transid) { | ||
300 | cur_trans = t; | ||
301 | break; | ||
302 | } | ||
303 | if (t->transid > transid) | ||
304 | break; | ||
305 | } | ||
306 | ret = -EINVAL; | ||
307 | if (!cur_trans) | ||
308 | goto out_unlock; /* bad transid */ | ||
309 | } else { | ||
310 | /* find newest transaction that is committing | committed */ | ||
311 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, | ||
312 | list) { | ||
313 | if (t->in_commit) { | ||
314 | if (t->commit_done) | ||
315 | goto out_unlock; | ||
316 | cur_trans = t; | ||
317 | break; | ||
318 | } | ||
319 | } | ||
320 | if (!cur_trans) | ||
321 | goto out_unlock; /* nothing committing|committed */ | ||
322 | } | ||
323 | |||
324 | cur_trans->use_count++; | ||
325 | mutex_unlock(&root->fs_info->trans_mutex); | ||
326 | |||
327 | wait_for_commit(root, cur_trans); | ||
328 | |||
329 | mutex_lock(&root->fs_info->trans_mutex); | ||
330 | put_transaction(cur_trans); | ||
331 | ret = 0; | ||
332 | out_unlock: | ||
333 | mutex_unlock(&root->fs_info->trans_mutex); | ||
334 | return ret; | ||
335 | } | ||
336 | |||
242 | #if 0 | 337 | #if 0 |
243 | /* | 338 | /* |
244 | * rate limit against the drop_snapshot code. This helps to slow down new | 339 | * rate limit against the drop_snapshot code. This helps to slow down new |
@@ -290,10 +385,36 @@ void btrfs_throttle(struct btrfs_root *root) | |||
290 | mutex_unlock(&root->fs_info->trans_mutex); | 385 | mutex_unlock(&root->fs_info->trans_mutex); |
291 | } | 386 | } |
292 | 387 | ||
388 | static int should_end_transaction(struct btrfs_trans_handle *trans, | ||
389 | struct btrfs_root *root) | ||
390 | { | ||
391 | int ret; | ||
392 | ret = btrfs_block_rsv_check(trans, root, | ||
393 | &root->fs_info->global_block_rsv, 0, 5); | ||
394 | return ret ? 1 : 0; | ||
395 | } | ||
396 | |||
397 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
398 | struct btrfs_root *root) | ||
399 | { | ||
400 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
401 | int updates; | ||
402 | |||
403 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | ||
404 | return 1; | ||
405 | |||
406 | updates = trans->delayed_ref_updates; | ||
407 | trans->delayed_ref_updates = 0; | ||
408 | if (updates) | ||
409 | btrfs_run_delayed_refs(trans, root, updates); | ||
410 | |||
411 | return should_end_transaction(trans, root); | ||
412 | } | ||
413 | |||
293 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 414 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
294 | struct btrfs_root *root, int throttle) | 415 | struct btrfs_root *root, int throttle, int lock) |
295 | { | 416 | { |
296 | struct btrfs_transaction *cur_trans; | 417 | struct btrfs_transaction *cur_trans = trans->transaction; |
297 | struct btrfs_fs_info *info = root->fs_info; | 418 | struct btrfs_fs_info *info = root->fs_info; |
298 | int count = 0; | 419 | int count = 0; |
299 | 420 | ||
@@ -317,16 +438,31 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
317 | count++; | 438 | count++; |
318 | } | 439 | } |
319 | 440 | ||
320 | mutex_lock(&info->trans_mutex); | 441 | btrfs_trans_release_metadata(trans, root); |
321 | cur_trans = info->running_transaction; | 442 | |
322 | WARN_ON(cur_trans != trans->transaction); | 443 | if (lock && !root->fs_info->open_ioctl_trans && |
444 | should_end_transaction(trans, root)) | ||
445 | trans->transaction->blocked = 1; | ||
446 | |||
447 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | ||
448 | if (throttle) | ||
449 | return btrfs_commit_transaction(trans, root); | ||
450 | else | ||
451 | wake_up_process(info->transaction_kthread); | ||
452 | } | ||
453 | |||
454 | if (lock) | ||
455 | mutex_lock(&info->trans_mutex); | ||
456 | WARN_ON(cur_trans != info->running_transaction); | ||
323 | WARN_ON(cur_trans->num_writers < 1); | 457 | WARN_ON(cur_trans->num_writers < 1); |
324 | cur_trans->num_writers--; | 458 | cur_trans->num_writers--; |
325 | 459 | ||
460 | smp_mb(); | ||
326 | if (waitqueue_active(&cur_trans->writer_wait)) | 461 | if (waitqueue_active(&cur_trans->writer_wait)) |
327 | wake_up(&cur_trans->writer_wait); | 462 | wake_up(&cur_trans->writer_wait); |
328 | put_transaction(cur_trans); | 463 | put_transaction(cur_trans); |
329 | mutex_unlock(&info->trans_mutex); | 464 | if (lock) |
465 | mutex_unlock(&info->trans_mutex); | ||
330 | 466 | ||
331 | if (current->journal_info == trans) | 467 | if (current->journal_info == trans) |
332 | current->journal_info = NULL; | 468 | current->journal_info = NULL; |
@@ -342,13 +478,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
342 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 478 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
343 | struct btrfs_root *root) | 479 | struct btrfs_root *root) |
344 | { | 480 | { |
345 | return __btrfs_end_transaction(trans, root, 0); | 481 | return __btrfs_end_transaction(trans, root, 0, 1); |
346 | } | 482 | } |
347 | 483 | ||
348 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 484 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
349 | struct btrfs_root *root) | 485 | struct btrfs_root *root) |
350 | { | 486 | { |
351 | return __btrfs_end_transaction(trans, root, 1); | 487 | return __btrfs_end_transaction(trans, root, 1, 1); |
488 | } | ||
489 | |||
490 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
491 | struct btrfs_root *root) | ||
492 | { | ||
493 | return __btrfs_end_transaction(trans, root, 0, 0); | ||
352 | } | 494 | } |
353 | 495 | ||
354 | /* | 496 | /* |
@@ -607,6 +749,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
607 | 749 | ||
608 | btrfs_free_log(trans, root); | 750 | btrfs_free_log(trans, root); |
609 | btrfs_update_reloc_root(trans, root); | 751 | btrfs_update_reloc_root(trans, root); |
752 | btrfs_orphan_commit_root(trans, root); | ||
610 | 753 | ||
611 | if (root->commit_root != root->node) { | 754 | if (root->commit_root != root->node) { |
612 | switch_commit_root(root); | 755 | switch_commit_root(root); |
@@ -631,30 +774,30 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
631 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | 774 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) |
632 | { | 775 | { |
633 | struct btrfs_fs_info *info = root->fs_info; | 776 | struct btrfs_fs_info *info = root->fs_info; |
634 | int ret; | ||
635 | struct btrfs_trans_handle *trans; | 777 | struct btrfs_trans_handle *trans; |
778 | int ret; | ||
636 | unsigned long nr; | 779 | unsigned long nr; |
637 | 780 | ||
638 | smp_mb(); | 781 | if (xchg(&root->defrag_running, 1)) |
639 | if (root->defrag_running) | ||
640 | return 0; | 782 | return 0; |
641 | trans = btrfs_start_transaction(root, 1); | 783 | |
642 | while (1) { | 784 | while (1) { |
643 | root->defrag_running = 1; | 785 | trans = btrfs_start_transaction(root, 0); |
786 | if (IS_ERR(trans)) | ||
787 | return PTR_ERR(trans); | ||
788 | |||
644 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 789 | ret = btrfs_defrag_leaves(trans, root, cacheonly); |
790 | |||
645 | nr = trans->blocks_used; | 791 | nr = trans->blocks_used; |
646 | btrfs_end_transaction(trans, root); | 792 | btrfs_end_transaction(trans, root); |
647 | btrfs_btree_balance_dirty(info->tree_root, nr); | 793 | btrfs_btree_balance_dirty(info->tree_root, nr); |
648 | cond_resched(); | 794 | cond_resched(); |
649 | 795 | ||
650 | trans = btrfs_start_transaction(root, 1); | ||
651 | if (root->fs_info->closing || ret != -EAGAIN) | 796 | if (root->fs_info->closing || ret != -EAGAIN) |
652 | break; | 797 | break; |
653 | } | 798 | } |
654 | root->defrag_running = 0; | 799 | root->defrag_running = 0; |
655 | smp_mb(); | 800 | return ret; |
656 | btrfs_end_transaction(trans, root); | ||
657 | return 0; | ||
658 | } | 801 | } |
659 | 802 | ||
660 | #if 0 | 803 | #if 0 |
@@ -760,28 +903,80 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
760 | struct btrfs_root_item *new_root_item; | 903 | struct btrfs_root_item *new_root_item; |
761 | struct btrfs_root *tree_root = fs_info->tree_root; | 904 | struct btrfs_root *tree_root = fs_info->tree_root; |
762 | struct btrfs_root *root = pending->root; | 905 | struct btrfs_root *root = pending->root; |
906 | struct btrfs_root *parent_root; | ||
907 | struct inode *parent_inode; | ||
908 | struct dentry *parent; | ||
909 | struct dentry *dentry; | ||
763 | struct extent_buffer *tmp; | 910 | struct extent_buffer *tmp; |
764 | struct extent_buffer *old; | 911 | struct extent_buffer *old; |
765 | int ret; | 912 | int ret; |
913 | u64 to_reserve = 0; | ||
914 | u64 index = 0; | ||
766 | u64 objectid; | 915 | u64 objectid; |
916 | u64 root_flags; | ||
767 | 917 | ||
768 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 918 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
769 | if (!new_root_item) { | 919 | if (!new_root_item) { |
770 | ret = -ENOMEM; | 920 | pending->error = -ENOMEM; |
771 | goto fail; | 921 | goto fail; |
772 | } | 922 | } |
923 | |||
773 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | 924 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); |
774 | if (ret) | 925 | if (ret) { |
926 | pending->error = ret; | ||
775 | goto fail; | 927 | goto fail; |
928 | } | ||
929 | |||
930 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | ||
931 | btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); | ||
932 | |||
933 | if (to_reserve > 0) { | ||
934 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | ||
935 | to_reserve); | ||
936 | if (ret) { | ||
937 | pending->error = ret; | ||
938 | goto fail; | ||
939 | } | ||
940 | } | ||
941 | |||
942 | key.objectid = objectid; | ||
943 | key.offset = (u64)-1; | ||
944 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
945 | |||
946 | trans->block_rsv = &pending->block_rsv; | ||
947 | |||
948 | dentry = pending->dentry; | ||
949 | parent = dget_parent(dentry); | ||
950 | parent_inode = parent->d_inode; | ||
951 | parent_root = BTRFS_I(parent_inode)->root; | ||
952 | record_root_in_trans(trans, parent_root); | ||
953 | |||
954 | /* | ||
955 | * insert the directory item | ||
956 | */ | ||
957 | ret = btrfs_set_inode_index(parent_inode, &index); | ||
958 | BUG_ON(ret); | ||
959 | ret = btrfs_insert_dir_item(trans, parent_root, | ||
960 | dentry->d_name.name, dentry->d_name.len, | ||
961 | parent_inode->i_ino, &key, | ||
962 | BTRFS_FT_DIR, index); | ||
963 | BUG_ON(ret); | ||
964 | |||
965 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
966 | dentry->d_name.len * 2); | ||
967 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
968 | BUG_ON(ret); | ||
776 | 969 | ||
777 | record_root_in_trans(trans, root); | 970 | record_root_in_trans(trans, root); |
778 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 971 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
779 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 972 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
780 | 973 | ||
781 | key.objectid = objectid; | 974 | root_flags = btrfs_root_flags(new_root_item); |
782 | /* record when the snapshot was created in key.offset */ | 975 | if (pending->readonly) |
783 | key.offset = trans->transid; | 976 | root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; |
784 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 977 | else |
978 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | ||
979 | btrfs_set_root_flags(new_root_item, root_flags); | ||
785 | 980 | ||
786 | old = btrfs_lock_root_node(root); | 981 | old = btrfs_lock_root_node(root); |
787 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 982 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
@@ -792,62 +987,33 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
792 | free_extent_buffer(old); | 987 | free_extent_buffer(old); |
793 | 988 | ||
794 | btrfs_set_root_node(new_root_item, tmp); | 989 | btrfs_set_root_node(new_root_item, tmp); |
795 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 990 | /* record when the snapshot was created in key.offset */ |
796 | new_root_item); | 991 | key.offset = trans->transid; |
992 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); | ||
797 | btrfs_tree_unlock(tmp); | 993 | btrfs_tree_unlock(tmp); |
798 | free_extent_buffer(tmp); | 994 | free_extent_buffer(tmp); |
799 | if (ret) | 995 | BUG_ON(ret); |
800 | goto fail; | ||
801 | |||
802 | key.offset = (u64)-1; | ||
803 | memcpy(&pending->root_key, &key, sizeof(key)); | ||
804 | fail: | ||
805 | kfree(new_root_item); | ||
806 | return ret; | ||
807 | } | ||
808 | |||
809 | static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, | ||
810 | struct btrfs_pending_snapshot *pending) | ||
811 | { | ||
812 | int ret; | ||
813 | int namelen; | ||
814 | u64 index = 0; | ||
815 | struct btrfs_trans_handle *trans; | ||
816 | struct inode *parent_inode; | ||
817 | struct btrfs_root *parent_root; | ||
818 | |||
819 | parent_inode = pending->dentry->d_parent->d_inode; | ||
820 | parent_root = BTRFS_I(parent_inode)->root; | ||
821 | trans = btrfs_join_transaction(parent_root, 1); | ||
822 | 996 | ||
823 | /* | 997 | /* |
824 | * insert the directory item | 998 | * insert root back/forward references |
825 | */ | 999 | */ |
826 | namelen = strlen(pending->name); | 1000 | ret = btrfs_add_root_ref(trans, tree_root, objectid, |
827 | ret = btrfs_set_inode_index(parent_inode, &index); | ||
828 | ret = btrfs_insert_dir_item(trans, parent_root, | ||
829 | pending->name, namelen, | ||
830 | parent_inode->i_ino, | ||
831 | &pending->root_key, BTRFS_FT_DIR, index); | ||
832 | |||
833 | if (ret) | ||
834 | goto fail; | ||
835 | |||
836 | btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); | ||
837 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
838 | BUG_ON(ret); | ||
839 | |||
840 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | ||
841 | pending->root_key.objectid, | ||
842 | parent_root->root_key.objectid, | 1001 | parent_root->root_key.objectid, |
843 | parent_inode->i_ino, index, pending->name, | 1002 | parent_inode->i_ino, index, |
844 | namelen); | 1003 | dentry->d_name.name, dentry->d_name.len); |
845 | |||
846 | BUG_ON(ret); | 1004 | BUG_ON(ret); |
1005 | dput(parent); | ||
847 | 1006 | ||
1007 | key.offset = (u64)-1; | ||
1008 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
1009 | BUG_ON(IS_ERR(pending->snap)); | ||
1010 | |||
1011 | btrfs_reloc_post_snapshot(trans, pending); | ||
1012 | btrfs_orphan_post_snapshot(trans, pending); | ||
848 | fail: | 1013 | fail: |
849 | btrfs_end_transaction(trans, fs_info->fs_root); | 1014 | kfree(new_root_item); |
850 | return ret; | 1015 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
1016 | return 0; | ||
851 | } | 1017 | } |
852 | 1018 | ||
853 | /* | 1019 | /* |
@@ -867,25 +1033,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, | |||
867 | return 0; | 1033 | return 0; |
868 | } | 1034 | } |
869 | 1035 | ||
870 | static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, | ||
871 | struct btrfs_fs_info *fs_info) | ||
872 | { | ||
873 | struct btrfs_pending_snapshot *pending; | ||
874 | struct list_head *head = &trans->transaction->pending_snapshots; | ||
875 | int ret; | ||
876 | |||
877 | while (!list_empty(head)) { | ||
878 | pending = list_entry(head->next, | ||
879 | struct btrfs_pending_snapshot, list); | ||
880 | ret = finish_pending_snapshot(fs_info, pending); | ||
881 | BUG_ON(ret); | ||
882 | list_del(&pending->list); | ||
883 | kfree(pending->name); | ||
884 | kfree(pending); | ||
885 | } | ||
886 | return 0; | ||
887 | } | ||
888 | |||
889 | static void update_super_roots(struct btrfs_root *root) | 1036 | static void update_super_roots(struct btrfs_root *root) |
890 | { | 1037 | { |
891 | struct btrfs_root_item *root_item; | 1038 | struct btrfs_root_item *root_item; |
@@ -902,6 +1049,8 @@ static void update_super_roots(struct btrfs_root *root) | |||
902 | super->root = root_item->bytenr; | 1049 | super->root = root_item->bytenr; |
903 | super->generation = root_item->generation; | 1050 | super->generation = root_item->generation; |
904 | super->root_level = root_item->level; | 1051 | super->root_level = root_item->level; |
1052 | if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE)) | ||
1053 | super->cache_generation = root_item->generation; | ||
905 | } | 1054 | } |
906 | 1055 | ||
907 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 1056 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) |
@@ -914,11 +1063,137 @@ int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | |||
914 | return ret; | 1063 | return ret; |
915 | } | 1064 | } |
916 | 1065 | ||
1066 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | ||
1067 | { | ||
1068 | int ret = 0; | ||
1069 | spin_lock(&info->new_trans_lock); | ||
1070 | if (info->running_transaction) | ||
1071 | ret = info->running_transaction->blocked; | ||
1072 | spin_unlock(&info->new_trans_lock); | ||
1073 | return ret; | ||
1074 | } | ||
1075 | |||
1076 | /* | ||
1077 | * wait for the current transaction commit to start and block subsequent | ||
1078 | * transaction joins | ||
1079 | */ | ||
1080 | static void wait_current_trans_commit_start(struct btrfs_root *root, | ||
1081 | struct btrfs_transaction *trans) | ||
1082 | { | ||
1083 | DEFINE_WAIT(wait); | ||
1084 | |||
1085 | if (trans->in_commit) | ||
1086 | return; | ||
1087 | |||
1088 | while (1) { | ||
1089 | prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, | ||
1090 | TASK_UNINTERRUPTIBLE); | ||
1091 | if (trans->in_commit) { | ||
1092 | finish_wait(&root->fs_info->transaction_blocked_wait, | ||
1093 | &wait); | ||
1094 | break; | ||
1095 | } | ||
1096 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1097 | schedule(); | ||
1098 | mutex_lock(&root->fs_info->trans_mutex); | ||
1099 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); | ||
1100 | } | ||
1101 | } | ||
1102 | |||
1103 | /* | ||
1104 | * wait for the current transaction to start and then become unblocked. | ||
1105 | * caller holds ref. | ||
1106 | */ | ||
1107 | static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | ||
1108 | struct btrfs_transaction *trans) | ||
1109 | { | ||
1110 | DEFINE_WAIT(wait); | ||
1111 | |||
1112 | if (trans->commit_done || (trans->in_commit && !trans->blocked)) | ||
1113 | return; | ||
1114 | |||
1115 | while (1) { | ||
1116 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | ||
1117 | TASK_UNINTERRUPTIBLE); | ||
1118 | if (trans->commit_done || | ||
1119 | (trans->in_commit && !trans->blocked)) { | ||
1120 | finish_wait(&root->fs_info->transaction_wait, | ||
1121 | &wait); | ||
1122 | break; | ||
1123 | } | ||
1124 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1125 | schedule(); | ||
1126 | mutex_lock(&root->fs_info->trans_mutex); | ||
1127 | finish_wait(&root->fs_info->transaction_wait, | ||
1128 | &wait); | ||
1129 | } | ||
1130 | } | ||
1131 | |||
1132 | /* | ||
1133 | * commit transactions asynchronously. once btrfs_commit_transaction_async | ||
1134 | * returns, any subsequent transaction will not be allowed to join. | ||
1135 | */ | ||
1136 | struct btrfs_async_commit { | ||
1137 | struct btrfs_trans_handle *newtrans; | ||
1138 | struct btrfs_root *root; | ||
1139 | struct delayed_work work; | ||
1140 | }; | ||
1141 | |||
1142 | static void do_async_commit(struct work_struct *work) | ||
1143 | { | ||
1144 | struct btrfs_async_commit *ac = | ||
1145 | container_of(work, struct btrfs_async_commit, work.work); | ||
1146 | |||
1147 | btrfs_commit_transaction(ac->newtrans, ac->root); | ||
1148 | kfree(ac); | ||
1149 | } | ||
1150 | |||
1151 | int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | ||
1152 | struct btrfs_root *root, | ||
1153 | int wait_for_unblock) | ||
1154 | { | ||
1155 | struct btrfs_async_commit *ac; | ||
1156 | struct btrfs_transaction *cur_trans; | ||
1157 | |||
1158 | ac = kmalloc(sizeof(*ac), GFP_NOFS); | ||
1159 | BUG_ON(!ac); | ||
1160 | |||
1161 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | ||
1162 | ac->root = root; | ||
1163 | ac->newtrans = btrfs_join_transaction(root, 0); | ||
1164 | |||
1165 | /* take transaction reference */ | ||
1166 | mutex_lock(&root->fs_info->trans_mutex); | ||
1167 | cur_trans = trans->transaction; | ||
1168 | cur_trans->use_count++; | ||
1169 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1170 | |||
1171 | btrfs_end_transaction(trans, root); | ||
1172 | schedule_delayed_work(&ac->work, 0); | ||
1173 | |||
1174 | /* wait for transaction to start and unblock */ | ||
1175 | mutex_lock(&root->fs_info->trans_mutex); | ||
1176 | if (wait_for_unblock) | ||
1177 | wait_current_trans_commit_start_and_unblock(root, cur_trans); | ||
1178 | else | ||
1179 | wait_current_trans_commit_start(root, cur_trans); | ||
1180 | put_transaction(cur_trans); | ||
1181 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1182 | |||
1183 | return 0; | ||
1184 | } | ||
1185 | |||
1186 | /* | ||
1187 | * btrfs_transaction state sequence: | ||
1188 | * in_commit = 0, blocked = 0 (initial) | ||
1189 | * in_commit = 1, blocked = 1 | ||
1190 | * blocked = 0 | ||
1191 | * commit_done = 1 | ||
1192 | */ | ||
917 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 1193 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
918 | struct btrfs_root *root) | 1194 | struct btrfs_root *root) |
919 | { | 1195 | { |
920 | unsigned long joined = 0; | 1196 | unsigned long joined = 0; |
921 | unsigned long timeout = 1; | ||
922 | struct btrfs_transaction *cur_trans; | 1197 | struct btrfs_transaction *cur_trans; |
923 | struct btrfs_transaction *prev_trans = NULL; | 1198 | struct btrfs_transaction *prev_trans = NULL; |
924 | DEFINE_WAIT(wait); | 1199 | DEFINE_WAIT(wait); |
@@ -935,6 +1210,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
935 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1210 | ret = btrfs_run_delayed_refs(trans, root, 0); |
936 | BUG_ON(ret); | 1211 | BUG_ON(ret); |
937 | 1212 | ||
1213 | btrfs_trans_release_metadata(trans, root); | ||
1214 | |||
938 | cur_trans = trans->transaction; | 1215 | cur_trans = trans->transaction; |
939 | /* | 1216 | /* |
940 | * set the flushing flag so procs in this transaction have to | 1217 | * set the flushing flag so procs in this transaction have to |
@@ -963,6 +1240,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
963 | 1240 | ||
964 | trans->transaction->in_commit = 1; | 1241 | trans->transaction->in_commit = 1; |
965 | trans->transaction->blocked = 1; | 1242 | trans->transaction->blocked = 1; |
1243 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
1244 | |||
966 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1245 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
967 | prev_trans = list_entry(cur_trans->list.prev, | 1246 | prev_trans = list_entry(cur_trans->list.prev, |
968 | struct btrfs_transaction, list); | 1247 | struct btrfs_transaction, list); |
@@ -987,23 +1266,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
987 | snap_pending = 1; | 1266 | snap_pending = 1; |
988 | 1267 | ||
989 | WARN_ON(cur_trans != trans->transaction); | 1268 | WARN_ON(cur_trans != trans->transaction); |
990 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
991 | TASK_UNINTERRUPTIBLE); | ||
992 | |||
993 | if (cur_trans->num_writers > 1) | ||
994 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
995 | else if (should_grow) | ||
996 | timeout = 1; | ||
997 | |||
998 | mutex_unlock(&root->fs_info->trans_mutex); | 1269 | mutex_unlock(&root->fs_info->trans_mutex); |
999 | 1270 | ||
1000 | if (flush_on_commit) { | 1271 | if (flush_on_commit || snap_pending) { |
1001 | btrfs_start_delalloc_inodes(root, 1); | 1272 | btrfs_start_delalloc_inodes(root, 1); |
1002 | ret = btrfs_wait_ordered_extents(root, 0, 1); | 1273 | ret = btrfs_wait_ordered_extents(root, 0, 1); |
1003 | BUG_ON(ret); | 1274 | BUG_ON(ret); |
1004 | } else if (snap_pending) { | ||
1005 | ret = btrfs_wait_ordered_extents(root, 0, 1); | ||
1006 | BUG_ON(ret); | ||
1007 | } | 1275 | } |
1008 | 1276 | ||
1009 | /* | 1277 | /* |
@@ -1015,9 +1283,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1015 | */ | 1283 | */ |
1016 | btrfs_run_ordered_operations(root, 1); | 1284 | btrfs_run_ordered_operations(root, 1); |
1017 | 1285 | ||
1286 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
1287 | TASK_UNINTERRUPTIBLE); | ||
1288 | |||
1018 | smp_mb(); | 1289 | smp_mb(); |
1019 | if (cur_trans->num_writers > 1 || should_grow) | 1290 | if (cur_trans->num_writers > 1) |
1020 | schedule_timeout(timeout); | 1291 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); |
1292 | else if (should_grow) | ||
1293 | schedule_timeout(1); | ||
1021 | 1294 | ||
1022 | mutex_lock(&root->fs_info->trans_mutex); | 1295 | mutex_lock(&root->fs_info->trans_mutex); |
1023 | finish_wait(&cur_trans->writer_wait, &wait); | 1296 | finish_wait(&cur_trans->writer_wait, &wait); |
@@ -1100,9 +1373,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1100 | 1373 | ||
1101 | btrfs_finish_extent_commit(trans, root); | 1374 | btrfs_finish_extent_commit(trans, root); |
1102 | 1375 | ||
1103 | /* do the directory inserts of any pending snapshot creations */ | ||
1104 | finish_pending_snapshots(trans, root->fs_info); | ||
1105 | |||
1106 | mutex_lock(&root->fs_info->trans_mutex); | 1376 | mutex_lock(&root->fs_info->trans_mutex); |
1107 | 1377 | ||
1108 | cur_trans->commit_done = 1; | 1378 | cur_trans->commit_done = 1; |
@@ -1145,9 +1415,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1145 | 1415 | ||
1146 | if (btrfs_header_backref_rev(root->node) < | 1416 | if (btrfs_header_backref_rev(root->node) < |
1147 | BTRFS_MIXED_BACKREF_REV) | 1417 | BTRFS_MIXED_BACKREF_REV) |
1148 | btrfs_drop_snapshot(root, 0); | 1418 | btrfs_drop_snapshot(root, NULL, 0); |
1149 | else | 1419 | else |
1150 | btrfs_drop_snapshot(root, 1); | 1420 | btrfs_drop_snapshot(root, NULL, 1); |
1151 | } | 1421 | } |
1152 | return 0; | 1422 | return 0; |
1153 | } | 1423 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 93c7ccb33118..229a594cacd5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -45,20 +45,24 @@ struct btrfs_transaction { | |||
45 | 45 | ||
46 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
47 | u64 transid; | 47 | u64 transid; |
48 | u64 block_group; | ||
49 | u64 bytes_reserved; | ||
48 | unsigned long blocks_reserved; | 50 | unsigned long blocks_reserved; |
49 | unsigned long blocks_used; | 51 | unsigned long blocks_used; |
50 | struct btrfs_transaction *transaction; | ||
51 | u64 block_group; | ||
52 | u64 alloc_exclude_start; | ||
53 | u64 alloc_exclude_nr; | ||
54 | unsigned long delayed_ref_updates; | 52 | unsigned long delayed_ref_updates; |
53 | struct btrfs_transaction *transaction; | ||
54 | struct btrfs_block_rsv *block_rsv; | ||
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
58 | struct dentry *dentry; | 58 | struct dentry *dentry; |
59 | struct btrfs_root *root; | 59 | struct btrfs_root *root; |
60 | char *name; | 60 | struct btrfs_root *snap; |
61 | struct btrfs_key root_key; | 61 | /* block reservation for the operation */ |
62 | struct btrfs_block_rsv block_rsv; | ||
63 | /* extra metadata reseration for relocation */ | ||
64 | int error; | ||
65 | bool readonly; | ||
62 | struct list_head list; | 66 | struct list_head list; |
63 | }; | 67 | }; |
64 | 68 | ||
@@ -84,12 +88,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
84 | 88 | ||
85 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 89 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
86 | struct btrfs_root *root); | 90 | struct btrfs_root *root); |
91 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
92 | struct btrfs_root *root); | ||
87 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 93 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
88 | int num_blocks); | 94 | int num_items); |
89 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 95 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
90 | int num_blocks); | 96 | int num_blocks); |
97 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, | ||
98 | int num_blocks); | ||
91 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 99 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
92 | int num_blocks); | 100 | int num_blocks); |
101 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | ||
93 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 102 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
94 | struct btrfs_root *root); | 103 | struct btrfs_root *root); |
95 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 104 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, |
@@ -101,8 +110,13 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); | |||
101 | int btrfs_clean_old_snapshots(struct btrfs_root *root); | 110 | int btrfs_clean_old_snapshots(struct btrfs_root *root); |
102 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 111 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
103 | struct btrfs_root *root); | 112 | struct btrfs_root *root); |
113 | int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | ||
114 | struct btrfs_root *root, | ||
115 | int wait_for_unblock); | ||
104 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 116 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
105 | struct btrfs_root *root); | 117 | struct btrfs_root *root); |
118 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
119 | struct btrfs_root *root); | ||
106 | void btrfs_throttle(struct btrfs_root *root); | 120 | void btrfs_throttle(struct btrfs_root *root); |
107 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 121 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
108 | struct btrfs_root *root); | 122 | struct btrfs_root *root); |
@@ -112,5 +126,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
112 | struct extent_io_tree *dirty_pages, int mark); | 126 | struct extent_io_tree *dirty_pages, int mark); |
113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 127 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
114 | struct extent_io_tree *dirty_pages, int mark); | 128 | struct extent_io_tree *dirty_pages, int mark); |
129 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); | ||
115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 130 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
116 | #endif | 131 | #endif |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b10eacdb1620..992ab425599d 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
36 | int ret = 0; | 36 | int ret = 0; |
37 | int wret; | 37 | int wret; |
38 | int level; | 38 | int level; |
39 | int orig_level; | ||
40 | int is_extent = 0; | 39 | int is_extent = 0; |
41 | int next_key_ret = 0; | 40 | int next_key_ret = 0; |
42 | u64 last_ret = 0; | 41 | u64 last_ret = 0; |
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
64 | return -ENOMEM; | 63 | return -ENOMEM; |
65 | 64 | ||
66 | level = btrfs_header_level(root->node); | 65 | level = btrfs_header_level(root->node); |
67 | orig_level = level; | ||
68 | 66 | ||
69 | if (level == 0) | 67 | if (level == 0) |
70 | goto out; | 68 | goto out; |
@@ -117,13 +115,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
117 | path->nodes[1], 0, | 115 | path->nodes[1], 0, |
118 | cache_only, &last_ret, | 116 | cache_only, &last_ret, |
119 | &root->defrag_progress); | 117 | &root->defrag_progress); |
120 | WARN_ON(ret && ret != -EAGAIN); | 118 | if (ret) { |
119 | WARN_ON(ret == -EAGAIN); | ||
120 | goto out; | ||
121 | } | ||
121 | if (next_key_ret == 0) { | 122 | if (next_key_ret == 0) { |
122 | memcpy(&root->defrag_progress, &key, sizeof(key)); | 123 | memcpy(&root->defrag_progress, &key, sizeof(key)); |
123 | ret = -EAGAIN; | 124 | ret = -EAGAIN; |
124 | } | 125 | } |
125 | |||
126 | btrfs_release_path(root, path); | ||
127 | out: | 126 | out: |
128 | if (path) | 127 | if (path) |
129 | btrfs_free_path(path); | 128 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4a9434b622ec..054744ac5719 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | ||
20 | #include "ctree.h" | 21 | #include "ctree.h" |
21 | #include "transaction.h" | 22 | #include "transaction.h" |
22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
@@ -134,6 +135,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
134 | struct btrfs_root *root) | 135 | struct btrfs_root *root) |
135 | { | 136 | { |
136 | int ret; | 137 | int ret; |
138 | int err = 0; | ||
137 | 139 | ||
138 | mutex_lock(&root->log_mutex); | 140 | mutex_lock(&root->log_mutex); |
139 | if (root->log_root) { | 141 | if (root->log_root) { |
@@ -154,17 +156,19 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
154 | mutex_lock(&root->fs_info->tree_log_mutex); | 156 | mutex_lock(&root->fs_info->tree_log_mutex); |
155 | if (!root->fs_info->log_root_tree) { | 157 | if (!root->fs_info->log_root_tree) { |
156 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 158 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
157 | BUG_ON(ret); | 159 | if (ret) |
160 | err = ret; | ||
158 | } | 161 | } |
159 | if (!root->log_root) { | 162 | if (err == 0 && !root->log_root) { |
160 | ret = btrfs_add_log_tree(trans, root); | 163 | ret = btrfs_add_log_tree(trans, root); |
161 | BUG_ON(ret); | 164 | if (ret) |
165 | err = ret; | ||
162 | } | 166 | } |
163 | mutex_unlock(&root->fs_info->tree_log_mutex); | 167 | mutex_unlock(&root->fs_info->tree_log_mutex); |
164 | root->log_batch++; | 168 | root->log_batch++; |
165 | atomic_inc(&root->log_writers); | 169 | atomic_inc(&root->log_writers); |
166 | mutex_unlock(&root->log_mutex); | 170 | mutex_unlock(&root->log_mutex); |
167 | return 0; | 171 | return err; |
168 | } | 172 | } |
169 | 173 | ||
170 | /* | 174 | /* |
@@ -375,7 +379,7 @@ insert: | |||
375 | BUG_ON(ret); | 379 | BUG_ON(ret); |
376 | } | 380 | } |
377 | } else if (ret) { | 381 | } else if (ret) { |
378 | BUG(); | 382 | return ret; |
379 | } | 383 | } |
380 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], | 384 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], |
381 | path->slots[0]); | 385 | path->slots[0]); |
@@ -445,7 +449,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, | |||
445 | key.objectid = objectid; | 449 | key.objectid = objectid; |
446 | key.type = BTRFS_INODE_ITEM_KEY; | 450 | key.type = BTRFS_INODE_ITEM_KEY; |
447 | key.offset = 0; | 451 | key.offset = 0; |
448 | inode = btrfs_iget(root->fs_info->sb, &key, root); | 452 | inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); |
449 | if (IS_ERR(inode)) { | 453 | if (IS_ERR(inode)) { |
450 | inode = NULL; | 454 | inode = NULL; |
451 | } else if (is_bad_inode(inode)) { | 455 | } else if (is_bad_inode(inode)) { |
@@ -782,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
782 | { | 786 | { |
783 | struct inode *dir; | 787 | struct inode *dir; |
784 | int ret; | 788 | int ret; |
785 | struct btrfs_key location; | ||
786 | struct btrfs_inode_ref *ref; | 789 | struct btrfs_inode_ref *ref; |
787 | struct btrfs_dir_item *di; | 790 | struct btrfs_dir_item *di; |
788 | struct inode *inode; | 791 | struct inode *inode; |
@@ -791,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
791 | unsigned long ref_ptr; | 794 | unsigned long ref_ptr; |
792 | unsigned long ref_end; | 795 | unsigned long ref_end; |
793 | 796 | ||
794 | location.objectid = key->objectid; | ||
795 | location.type = BTRFS_INODE_ITEM_KEY; | ||
796 | location.offset = 0; | ||
797 | |||
798 | /* | 797 | /* |
799 | * it is possible that we didn't log all the parent directories | 798 | * it is possible that we didn't log all the parent directories |
800 | * for a given inode. If we don't find the dir, just don't | 799 | * for a given inode. If we don't find the dir, just don't |
@@ -1579,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1579 | struct btrfs_path *path; | 1578 | struct btrfs_path *path; |
1580 | struct btrfs_root *root = wc->replay_dest; | 1579 | struct btrfs_root *root = wc->replay_dest; |
1581 | struct btrfs_key key; | 1580 | struct btrfs_key key; |
1582 | u32 item_size; | ||
1583 | int level; | 1581 | int level; |
1584 | int i; | 1582 | int i; |
1585 | int ret; | 1583 | int ret; |
@@ -1597,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1597 | nritems = btrfs_header_nritems(eb); | 1595 | nritems = btrfs_header_nritems(eb); |
1598 | for (i = 0; i < nritems; i++) { | 1596 | for (i = 0; i < nritems; i++) { |
1599 | btrfs_item_key_to_cpu(eb, &key, i); | 1597 | btrfs_item_key_to_cpu(eb, &key, i); |
1600 | item_size = btrfs_item_size_nr(eb, i); | ||
1601 | 1598 | ||
1602 | /* inode keys are done during the first stage */ | 1599 | /* inode keys are done during the first stage */ |
1603 | if (key.type == BTRFS_INODE_ITEM_KEY && | 1600 | if (key.type == BTRFS_INODE_ITEM_KEY && |
@@ -1664,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1664 | struct walk_control *wc) | 1661 | struct walk_control *wc) |
1665 | { | 1662 | { |
1666 | u64 root_owner; | 1663 | u64 root_owner; |
1667 | u64 root_gen; | ||
1668 | u64 bytenr; | 1664 | u64 bytenr; |
1669 | u64 ptr_gen; | 1665 | u64 ptr_gen; |
1670 | struct extent_buffer *next; | 1666 | struct extent_buffer *next; |
@@ -1694,13 +1690,12 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1694 | 1690 | ||
1695 | parent = path->nodes[*level]; | 1691 | parent = path->nodes[*level]; |
1696 | root_owner = btrfs_header_owner(parent); | 1692 | root_owner = btrfs_header_owner(parent); |
1697 | root_gen = btrfs_header_generation(parent); | ||
1698 | 1693 | ||
1699 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1694 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1700 | 1695 | ||
1701 | wc->process_func(root, next, wc, ptr_gen); | ||
1702 | |||
1703 | if (*level == 1) { | 1696 | if (*level == 1) { |
1697 | wc->process_func(root, next, wc, ptr_gen); | ||
1698 | |||
1704 | path->slots[*level]++; | 1699 | path->slots[*level]++; |
1705 | if (wc->free) { | 1700 | if (wc->free) { |
1706 | btrfs_read_buffer(next, ptr_gen); | 1701 | btrfs_read_buffer(next, ptr_gen); |
@@ -1733,35 +1728,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1733 | WARN_ON(*level < 0); | 1728 | WARN_ON(*level < 0); |
1734 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 1729 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
1735 | 1730 | ||
1736 | if (path->nodes[*level] == root->node) | 1731 | path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); |
1737 | parent = path->nodes[*level]; | ||
1738 | else | ||
1739 | parent = path->nodes[*level + 1]; | ||
1740 | |||
1741 | bytenr = path->nodes[*level]->start; | ||
1742 | |||
1743 | blocksize = btrfs_level_size(root, *level); | ||
1744 | root_owner = btrfs_header_owner(parent); | ||
1745 | root_gen = btrfs_header_generation(parent); | ||
1746 | |||
1747 | wc->process_func(root, path->nodes[*level], wc, | ||
1748 | btrfs_header_generation(path->nodes[*level])); | ||
1749 | |||
1750 | if (wc->free) { | ||
1751 | next = path->nodes[*level]; | ||
1752 | btrfs_tree_lock(next); | ||
1753 | clean_tree_block(trans, root, next); | ||
1754 | btrfs_set_lock_blocking(next); | ||
1755 | btrfs_wait_tree_block_writeback(next); | ||
1756 | btrfs_tree_unlock(next); | ||
1757 | |||
1758 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | ||
1759 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); | ||
1760 | BUG_ON(ret); | ||
1761 | } | ||
1762 | free_extent_buffer(path->nodes[*level]); | ||
1763 | path->nodes[*level] = NULL; | ||
1764 | *level += 1; | ||
1765 | 1732 | ||
1766 | cond_resched(); | 1733 | cond_resched(); |
1767 | return 0; | 1734 | return 0; |
@@ -1773,16 +1740,13 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1773 | struct walk_control *wc) | 1740 | struct walk_control *wc) |
1774 | { | 1741 | { |
1775 | u64 root_owner; | 1742 | u64 root_owner; |
1776 | u64 root_gen; | ||
1777 | int i; | 1743 | int i; |
1778 | int slot; | 1744 | int slot; |
1779 | int ret; | 1745 | int ret; |
1780 | 1746 | ||
1781 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 1747 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { |
1782 | slot = path->slots[i]; | 1748 | slot = path->slots[i]; |
1783 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 1749 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { |
1784 | struct extent_buffer *node; | ||
1785 | node = path->nodes[i]; | ||
1786 | path->slots[i]++; | 1750 | path->slots[i]++; |
1787 | *level = i; | 1751 | *level = i; |
1788 | WARN_ON(*level == 0); | 1752 | WARN_ON(*level == 0); |
@@ -1795,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1795 | parent = path->nodes[*level + 1]; | 1759 | parent = path->nodes[*level + 1]; |
1796 | 1760 | ||
1797 | root_owner = btrfs_header_owner(parent); | 1761 | root_owner = btrfs_header_owner(parent); |
1798 | root_gen = btrfs_header_generation(parent); | ||
1799 | wc->process_func(root, path->nodes[*level], wc, | 1762 | wc->process_func(root, path->nodes[*level], wc, |
1800 | btrfs_header_generation(path->nodes[*level])); | 1763 | btrfs_header_generation(path->nodes[*level])); |
1801 | if (wc->free) { | 1764 | if (wc->free) { |
@@ -2046,7 +2009,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2046 | mutex_unlock(&log_root_tree->log_mutex); | 2009 | mutex_unlock(&log_root_tree->log_mutex); |
2047 | 2010 | ||
2048 | ret = update_log_root(trans, log); | 2011 | ret = update_log_root(trans, log); |
2049 | BUG_ON(ret); | ||
2050 | 2012 | ||
2051 | mutex_lock(&log_root_tree->log_mutex); | 2013 | mutex_lock(&log_root_tree->log_mutex); |
2052 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { | 2014 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { |
@@ -2055,6 +2017,15 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2055 | wake_up(&log_root_tree->log_writer_wait); | 2017 | wake_up(&log_root_tree->log_writer_wait); |
2056 | } | 2018 | } |
2057 | 2019 | ||
2020 | if (ret) { | ||
2021 | BUG_ON(ret != -ENOSPC); | ||
2022 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2023 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | ||
2024 | mutex_unlock(&log_root_tree->log_mutex); | ||
2025 | ret = -EAGAIN; | ||
2026 | goto out; | ||
2027 | } | ||
2028 | |||
2058 | index2 = log_root_tree->log_transid % 2; | 2029 | index2 = log_root_tree->log_transid % 2; |
2059 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2030 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2060 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2031 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
@@ -2128,15 +2099,10 @@ out: | |||
2128 | return 0; | 2099 | return 0; |
2129 | } | 2100 | } |
2130 | 2101 | ||
2131 | /* | 2102 | static void free_log_tree(struct btrfs_trans_handle *trans, |
2132 | * free all the extents used by the tree log. This should be called | 2103 | struct btrfs_root *log) |
2133 | * at commit time of the full transaction | ||
2134 | */ | ||
2135 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
2136 | { | 2104 | { |
2137 | int ret; | 2105 | int ret; |
2138 | struct btrfs_root *log; | ||
2139 | struct key; | ||
2140 | u64 start; | 2106 | u64 start; |
2141 | u64 end; | 2107 | u64 end; |
2142 | struct walk_control wc = { | 2108 | struct walk_control wc = { |
@@ -2144,10 +2110,6 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2144 | .process_func = process_one_buffer | 2110 | .process_func = process_one_buffer |
2145 | }; | 2111 | }; |
2146 | 2112 | ||
2147 | if (!root->log_root || root->fs_info->log_root_recovering) | ||
2148 | return 0; | ||
2149 | |||
2150 | log = root->log_root; | ||
2151 | ret = walk_log_tree(trans, log, &wc); | 2113 | ret = walk_log_tree(trans, log, &wc); |
2152 | BUG_ON(ret); | 2114 | BUG_ON(ret); |
2153 | 2115 | ||
@@ -2161,14 +2123,30 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2161 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | 2123 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
2162 | } | 2124 | } |
2163 | 2125 | ||
2164 | if (log->log_transid > 0) { | ||
2165 | ret = btrfs_del_root(trans, root->fs_info->log_root_tree, | ||
2166 | &log->root_key); | ||
2167 | BUG_ON(ret); | ||
2168 | } | ||
2169 | root->log_root = NULL; | ||
2170 | free_extent_buffer(log->node); | 2126 | free_extent_buffer(log->node); |
2171 | kfree(log); | 2127 | kfree(log); |
2128 | } | ||
2129 | |||
2130 | /* | ||
2131 | * free all the extents used by the tree log. This should be called | ||
2132 | * at commit time of the full transaction | ||
2133 | */ | ||
2134 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
2135 | { | ||
2136 | if (root->log_root) { | ||
2137 | free_log_tree(trans, root->log_root); | ||
2138 | root->log_root = NULL; | ||
2139 | } | ||
2140 | return 0; | ||
2141 | } | ||
2142 | |||
2143 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
2144 | struct btrfs_fs_info *fs_info) | ||
2145 | { | ||
2146 | if (fs_info->log_root_tree) { | ||
2147 | free_log_tree(trans, fs_info->log_root_tree); | ||
2148 | fs_info->log_root_tree = NULL; | ||
2149 | } | ||
2172 | return 0; | 2150 | return 0; |
2173 | } | 2151 | } |
2174 | 2152 | ||
@@ -2202,6 +2180,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2202 | struct btrfs_dir_item *di; | 2180 | struct btrfs_dir_item *di; |
2203 | struct btrfs_path *path; | 2181 | struct btrfs_path *path; |
2204 | int ret; | 2182 | int ret; |
2183 | int err = 0; | ||
2205 | int bytes_del = 0; | 2184 | int bytes_del = 0; |
2206 | 2185 | ||
2207 | if (BTRFS_I(dir)->logged_trans < trans->transid) | 2186 | if (BTRFS_I(dir)->logged_trans < trans->transid) |
@@ -2217,7 +2196,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2217 | path = btrfs_alloc_path(); | 2196 | path = btrfs_alloc_path(); |
2218 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2197 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, |
2219 | name, name_len, -1); | 2198 | name, name_len, -1); |
2220 | if (di && !IS_ERR(di)) { | 2199 | if (IS_ERR(di)) { |
2200 | err = PTR_ERR(di); | ||
2201 | goto fail; | ||
2202 | } | ||
2203 | if (di) { | ||
2221 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2204 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
2222 | bytes_del += name_len; | 2205 | bytes_del += name_len; |
2223 | BUG_ON(ret); | 2206 | BUG_ON(ret); |
@@ -2225,7 +2208,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2225 | btrfs_release_path(log, path); | 2208 | btrfs_release_path(log, path); |
2226 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, | 2209 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, |
2227 | index, name, name_len, -1); | 2210 | index, name, name_len, -1); |
2228 | if (di && !IS_ERR(di)) { | 2211 | if (IS_ERR(di)) { |
2212 | err = PTR_ERR(di); | ||
2213 | goto fail; | ||
2214 | } | ||
2215 | if (di) { | ||
2229 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2216 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
2230 | bytes_del += name_len; | 2217 | bytes_del += name_len; |
2231 | BUG_ON(ret); | 2218 | BUG_ON(ret); |
@@ -2243,6 +2230,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2243 | btrfs_release_path(log, path); | 2230 | btrfs_release_path(log, path); |
2244 | 2231 | ||
2245 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); | 2232 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); |
2233 | if (ret < 0) { | ||
2234 | err = ret; | ||
2235 | goto fail; | ||
2236 | } | ||
2246 | if (ret == 0) { | 2237 | if (ret == 0) { |
2247 | struct btrfs_inode_item *item; | 2238 | struct btrfs_inode_item *item; |
2248 | u64 i_size; | 2239 | u64 i_size; |
@@ -2260,12 +2251,16 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2260 | ret = 0; | 2251 | ret = 0; |
2261 | btrfs_release_path(log, path); | 2252 | btrfs_release_path(log, path); |
2262 | } | 2253 | } |
2263 | 2254 | fail: | |
2264 | btrfs_free_path(path); | 2255 | btrfs_free_path(path); |
2265 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2256 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2257 | if (ret == -ENOSPC) { | ||
2258 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2259 | ret = 0; | ||
2260 | } | ||
2266 | btrfs_end_log_trans(root); | 2261 | btrfs_end_log_trans(root); |
2267 | 2262 | ||
2268 | return 0; | 2263 | return err; |
2269 | } | 2264 | } |
2270 | 2265 | ||
2271 | /* see comments for btrfs_del_dir_entries_in_log */ | 2266 | /* see comments for btrfs_del_dir_entries_in_log */ |
@@ -2290,6 +2285,10 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2290 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2285 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
2291 | dirid, &index); | 2286 | dirid, &index); |
2292 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2287 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2288 | if (ret == -ENOSPC) { | ||
2289 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2290 | ret = 0; | ||
2291 | } | ||
2293 | btrfs_end_log_trans(root); | 2292 | btrfs_end_log_trans(root); |
2294 | 2293 | ||
2295 | return ret; | 2294 | return ret; |
@@ -2317,7 +2316,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, | |||
2317 | else | 2316 | else |
2318 | key.type = BTRFS_DIR_LOG_INDEX_KEY; | 2317 | key.type = BTRFS_DIR_LOG_INDEX_KEY; |
2319 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); | 2318 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); |
2320 | BUG_ON(ret); | 2319 | if (ret) |
2320 | return ret; | ||
2321 | 2321 | ||
2322 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2322 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
2323 | struct btrfs_dir_log_item); | 2323 | struct btrfs_dir_log_item); |
@@ -2342,6 +2342,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2342 | struct btrfs_key max_key; | 2342 | struct btrfs_key max_key; |
2343 | struct btrfs_root *log = root->log_root; | 2343 | struct btrfs_root *log = root->log_root; |
2344 | struct extent_buffer *src; | 2344 | struct extent_buffer *src; |
2345 | int err = 0; | ||
2345 | int ret; | 2346 | int ret; |
2346 | int i; | 2347 | int i; |
2347 | int nritems; | 2348 | int nritems; |
@@ -2404,6 +2405,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2404 | ret = overwrite_item(trans, log, dst_path, | 2405 | ret = overwrite_item(trans, log, dst_path, |
2405 | path->nodes[0], path->slots[0], | 2406 | path->nodes[0], path->slots[0], |
2406 | &tmp); | 2407 | &tmp); |
2408 | if (ret) { | ||
2409 | err = ret; | ||
2410 | goto done; | ||
2411 | } | ||
2407 | } | 2412 | } |
2408 | } | 2413 | } |
2409 | btrfs_release_path(root, path); | 2414 | btrfs_release_path(root, path); |
@@ -2431,7 +2436,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2431 | goto done; | 2436 | goto done; |
2432 | ret = overwrite_item(trans, log, dst_path, src, i, | 2437 | ret = overwrite_item(trans, log, dst_path, src, i, |
2433 | &min_key); | 2438 | &min_key); |
2434 | BUG_ON(ret); | 2439 | if (ret) { |
2440 | err = ret; | ||
2441 | goto done; | ||
2442 | } | ||
2435 | } | 2443 | } |
2436 | path->slots[0] = nritems; | 2444 | path->slots[0] = nritems; |
2437 | 2445 | ||
@@ -2453,22 +2461,30 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2453 | ret = overwrite_item(trans, log, dst_path, | 2461 | ret = overwrite_item(trans, log, dst_path, |
2454 | path->nodes[0], path->slots[0], | 2462 | path->nodes[0], path->slots[0], |
2455 | &tmp); | 2463 | &tmp); |
2456 | 2464 | if (ret) | |
2457 | BUG_ON(ret); | 2465 | err = ret; |
2458 | last_offset = tmp.offset; | 2466 | else |
2467 | last_offset = tmp.offset; | ||
2459 | goto done; | 2468 | goto done; |
2460 | } | 2469 | } |
2461 | } | 2470 | } |
2462 | done: | 2471 | done: |
2463 | *last_offset_ret = last_offset; | ||
2464 | btrfs_release_path(root, path); | 2472 | btrfs_release_path(root, path); |
2465 | btrfs_release_path(log, dst_path); | 2473 | btrfs_release_path(log, dst_path); |
2466 | 2474 | ||
2467 | /* insert the log range keys to indicate where the log is valid */ | 2475 | if (err == 0) { |
2468 | ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, | 2476 | *last_offset_ret = last_offset; |
2469 | first_offset, last_offset); | 2477 | /* |
2470 | BUG_ON(ret); | 2478 | * insert the log range keys to indicate where the log |
2471 | return 0; | 2479 | * is valid |
2480 | */ | ||
2481 | ret = insert_dir_log_key(trans, log, path, key_type, | ||
2482 | inode->i_ino, first_offset, | ||
2483 | last_offset); | ||
2484 | if (ret) | ||
2485 | err = ret; | ||
2486 | } | ||
2487 | return err; | ||
2472 | } | 2488 | } |
2473 | 2489 | ||
2474 | /* | 2490 | /* |
@@ -2500,7 +2516,8 @@ again: | |||
2500 | ret = log_dir_items(trans, root, inode, path, | 2516 | ret = log_dir_items(trans, root, inode, path, |
2501 | dst_path, key_type, min_key, | 2517 | dst_path, key_type, min_key, |
2502 | &max_key); | 2518 | &max_key); |
2503 | BUG_ON(ret); | 2519 | if (ret) |
2520 | return ret; | ||
2504 | if (max_key == (u64)-1) | 2521 | if (max_key == (u64)-1) |
2505 | break; | 2522 | break; |
2506 | min_key = max_key + 1; | 2523 | min_key = max_key + 1; |
@@ -2534,8 +2551,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2534 | 2551 | ||
2535 | while (1) { | 2552 | while (1) { |
2536 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); | 2553 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); |
2537 | 2554 | BUG_ON(ret == 0); | |
2538 | if (ret != 1) | 2555 | if (ret < 0) |
2539 | break; | 2556 | break; |
2540 | 2557 | ||
2541 | if (path->slots[0] == 0) | 2558 | if (path->slots[0] == 0) |
@@ -2553,7 +2570,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2553 | btrfs_release_path(log, path); | 2570 | btrfs_release_path(log, path); |
2554 | } | 2571 | } |
2555 | btrfs_release_path(log, path); | 2572 | btrfs_release_path(log, path); |
2556 | return 0; | 2573 | return ret; |
2557 | } | 2574 | } |
2558 | 2575 | ||
2559 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2576 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
@@ -2586,7 +2603,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2586 | } | 2603 | } |
2587 | ret = btrfs_insert_empty_items(trans, log, dst_path, | 2604 | ret = btrfs_insert_empty_items(trans, log, dst_path, |
2588 | ins_keys, ins_sizes, nr); | 2605 | ins_keys, ins_sizes, nr); |
2589 | BUG_ON(ret); | 2606 | if (ret) { |
2607 | kfree(ins_data); | ||
2608 | return ret; | ||
2609 | } | ||
2590 | 2610 | ||
2591 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { | 2611 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { |
2592 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], | 2612 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], |
@@ -2659,16 +2679,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2659 | * we have to do this after the loop above to avoid changing the | 2679 | * we have to do this after the loop above to avoid changing the |
2660 | * log tree while trying to change the log tree. | 2680 | * log tree while trying to change the log tree. |
2661 | */ | 2681 | */ |
2682 | ret = 0; | ||
2662 | while (!list_empty(&ordered_sums)) { | 2683 | while (!list_empty(&ordered_sums)) { |
2663 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, | 2684 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, |
2664 | struct btrfs_ordered_sum, | 2685 | struct btrfs_ordered_sum, |
2665 | list); | 2686 | list); |
2666 | ret = btrfs_csum_file_blocks(trans, log, sums); | 2687 | if (!ret) |
2667 | BUG_ON(ret); | 2688 | ret = btrfs_csum_file_blocks(trans, log, sums); |
2668 | list_del(&sums->list); | 2689 | list_del(&sums->list); |
2669 | kfree(sums); | 2690 | kfree(sums); |
2670 | } | 2691 | } |
2671 | return 0; | 2692 | return ret; |
2672 | } | 2693 | } |
2673 | 2694 | ||
2674 | /* log a single inode in the tree log. | 2695 | /* log a single inode in the tree log. |
@@ -2695,7 +2716,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2695 | struct btrfs_key max_key; | 2716 | struct btrfs_key max_key; |
2696 | struct btrfs_root *log = root->log_root; | 2717 | struct btrfs_root *log = root->log_root; |
2697 | struct extent_buffer *src = NULL; | 2718 | struct extent_buffer *src = NULL; |
2698 | u32 size; | 2719 | int err = 0; |
2699 | int ret; | 2720 | int ret; |
2700 | int nritems; | 2721 | int nritems; |
2701 | int ins_start_slot = 0; | 2722 | int ins_start_slot = 0; |
@@ -2738,7 +2759,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2738 | } else { | 2759 | } else { |
2739 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 2760 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); |
2740 | } | 2761 | } |
2741 | BUG_ON(ret); | 2762 | if (ret) { |
2763 | err = ret; | ||
2764 | goto out_unlock; | ||
2765 | } | ||
2742 | path->keep_locks = 1; | 2766 | path->keep_locks = 1; |
2743 | 2767 | ||
2744 | while (1) { | 2768 | while (1) { |
@@ -2755,7 +2779,6 @@ again: | |||
2755 | break; | 2779 | break; |
2756 | 2780 | ||
2757 | src = path->nodes[0]; | 2781 | src = path->nodes[0]; |
2758 | size = btrfs_item_size_nr(src, path->slots[0]); | ||
2759 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { | 2782 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { |
2760 | ins_nr++; | 2783 | ins_nr++; |
2761 | goto next_slot; | 2784 | goto next_slot; |
@@ -2767,7 +2790,10 @@ again: | |||
2767 | 2790 | ||
2768 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 2791 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, |
2769 | ins_nr, inode_only); | 2792 | ins_nr, inode_only); |
2770 | BUG_ON(ret); | 2793 | if (ret) { |
2794 | err = ret; | ||
2795 | goto out_unlock; | ||
2796 | } | ||
2771 | ins_nr = 1; | 2797 | ins_nr = 1; |
2772 | ins_start_slot = path->slots[0]; | 2798 | ins_start_slot = path->slots[0]; |
2773 | next_slot: | 2799 | next_slot: |
@@ -2783,7 +2809,10 @@ next_slot: | |||
2783 | ret = copy_items(trans, log, dst_path, src, | 2809 | ret = copy_items(trans, log, dst_path, src, |
2784 | ins_start_slot, | 2810 | ins_start_slot, |
2785 | ins_nr, inode_only); | 2811 | ins_nr, inode_only); |
2786 | BUG_ON(ret); | 2812 | if (ret) { |
2813 | err = ret; | ||
2814 | goto out_unlock; | ||
2815 | } | ||
2787 | ins_nr = 0; | 2816 | ins_nr = 0; |
2788 | } | 2817 | } |
2789 | btrfs_release_path(root, path); | 2818 | btrfs_release_path(root, path); |
@@ -2801,7 +2830,10 @@ next_slot: | |||
2801 | ret = copy_items(trans, log, dst_path, src, | 2830 | ret = copy_items(trans, log, dst_path, src, |
2802 | ins_start_slot, | 2831 | ins_start_slot, |
2803 | ins_nr, inode_only); | 2832 | ins_nr, inode_only); |
2804 | BUG_ON(ret); | 2833 | if (ret) { |
2834 | err = ret; | ||
2835 | goto out_unlock; | ||
2836 | } | ||
2805 | ins_nr = 0; | 2837 | ins_nr = 0; |
2806 | } | 2838 | } |
2807 | WARN_ON(ins_nr); | 2839 | WARN_ON(ins_nr); |
@@ -2809,14 +2841,18 @@ next_slot: | |||
2809 | btrfs_release_path(root, path); | 2841 | btrfs_release_path(root, path); |
2810 | btrfs_release_path(log, dst_path); | 2842 | btrfs_release_path(log, dst_path); |
2811 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2843 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
2812 | BUG_ON(ret); | 2844 | if (ret) { |
2845 | err = ret; | ||
2846 | goto out_unlock; | ||
2847 | } | ||
2813 | } | 2848 | } |
2814 | BTRFS_I(inode)->logged_trans = trans->transid; | 2849 | BTRFS_I(inode)->logged_trans = trans->transid; |
2850 | out_unlock: | ||
2815 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2851 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2816 | 2852 | ||
2817 | btrfs_free_path(path); | 2853 | btrfs_free_path(path); |
2818 | btrfs_free_path(dst_path); | 2854 | btrfs_free_path(dst_path); |
2819 | return 0; | 2855 | return err; |
2820 | } | 2856 | } |
2821 | 2857 | ||
2822 | /* | 2858 | /* |
@@ -2833,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2833 | { | 2869 | { |
2834 | int ret = 0; | 2870 | int ret = 0; |
2835 | struct btrfs_root *root; | 2871 | struct btrfs_root *root; |
2872 | struct dentry *old_parent = NULL; | ||
2836 | 2873 | ||
2837 | /* | 2874 | /* |
2838 | * for regular files, if its inode is already on disk, we don't | 2875 | * for regular files, if its inode is already on disk, we don't |
@@ -2874,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2874 | if (IS_ROOT(parent)) | 2911 | if (IS_ROOT(parent)) |
2875 | break; | 2912 | break; |
2876 | 2913 | ||
2877 | parent = parent->d_parent; | 2914 | parent = dget_parent(parent); |
2915 | dput(old_parent); | ||
2916 | old_parent = parent; | ||
2878 | inode = parent->d_inode; | 2917 | inode = parent->d_inode; |
2879 | 2918 | ||
2880 | } | 2919 | } |
2920 | dput(old_parent); | ||
2881 | out: | 2921 | out: |
2882 | return ret; | 2922 | return ret; |
2883 | } | 2923 | } |
@@ -2909,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2909 | { | 2949 | { |
2910 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 2950 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
2911 | struct super_block *sb; | 2951 | struct super_block *sb; |
2952 | struct dentry *old_parent = NULL; | ||
2912 | int ret = 0; | 2953 | int ret = 0; |
2913 | u64 last_committed = root->fs_info->last_trans_committed; | 2954 | u64 last_committed = root->fs_info->last_trans_committed; |
2914 | 2955 | ||
@@ -2941,10 +2982,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2941 | goto end_no_trans; | 2982 | goto end_no_trans; |
2942 | } | 2983 | } |
2943 | 2984 | ||
2944 | start_log_trans(trans, root); | 2985 | ret = start_log_trans(trans, root); |
2986 | if (ret) | ||
2987 | goto end_trans; | ||
2945 | 2988 | ||
2946 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2989 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2947 | BUG_ON(ret); | 2990 | if (ret) |
2991 | goto end_trans; | ||
2948 | 2992 | ||
2949 | /* | 2993 | /* |
2950 | * for regular files, if its inode is already on disk, we don't | 2994 | * for regular files, if its inode is already on disk, we don't |
@@ -2954,8 +2998,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2954 | */ | 2998 | */ |
2955 | if (S_ISREG(inode->i_mode) && | 2999 | if (S_ISREG(inode->i_mode) && |
2956 | BTRFS_I(inode)->generation <= last_committed && | 3000 | BTRFS_I(inode)->generation <= last_committed && |
2957 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | 3001 | BTRFS_I(inode)->last_unlink_trans <= last_committed) { |
2958 | goto no_parent; | 3002 | ret = 0; |
3003 | goto end_trans; | ||
3004 | } | ||
2959 | 3005 | ||
2960 | inode_only = LOG_INODE_EXISTS; | 3006 | inode_only = LOG_INODE_EXISTS; |
2961 | while (1) { | 3007 | while (1) { |
@@ -2969,15 +3015,24 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2969 | if (BTRFS_I(inode)->generation > | 3015 | if (BTRFS_I(inode)->generation > |
2970 | root->fs_info->last_trans_committed) { | 3016 | root->fs_info->last_trans_committed) { |
2971 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 3017 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2972 | BUG_ON(ret); | 3018 | if (ret) |
3019 | goto end_trans; | ||
2973 | } | 3020 | } |
2974 | if (IS_ROOT(parent)) | 3021 | if (IS_ROOT(parent)) |
2975 | break; | 3022 | break; |
2976 | 3023 | ||
2977 | parent = parent->d_parent; | 3024 | parent = dget_parent(parent); |
3025 | dput(old_parent); | ||
3026 | old_parent = parent; | ||
2978 | } | 3027 | } |
2979 | no_parent: | ||
2980 | ret = 0; | 3028 | ret = 0; |
3029 | end_trans: | ||
3030 | dput(old_parent); | ||
3031 | if (ret < 0) { | ||
3032 | BUG_ON(ret != -ENOSPC); | ||
3033 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
3034 | ret = 1; | ||
3035 | } | ||
2981 | btrfs_end_log_trans(root); | 3036 | btrfs_end_log_trans(root); |
2982 | end_no_trans: | 3037 | end_no_trans: |
2983 | return ret; | 3038 | return ret; |
@@ -2992,8 +3047,13 @@ end_no_trans: | |||
2992 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 3047 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
2993 | struct btrfs_root *root, struct dentry *dentry) | 3048 | struct btrfs_root *root, struct dentry *dentry) |
2994 | { | 3049 | { |
2995 | return btrfs_log_inode_parent(trans, root, dentry->d_inode, | 3050 | struct dentry *parent = dget_parent(dentry); |
2996 | dentry->d_parent, 0); | 3051 | int ret; |
3052 | |||
3053 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | ||
3054 | dput(parent); | ||
3055 | |||
3056 | return ret; | ||
2997 | } | 3057 | } |
2998 | 3058 | ||
2999 | /* | 3059 | /* |
@@ -3019,7 +3079,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3019 | path = btrfs_alloc_path(); | 3079 | path = btrfs_alloc_path(); |
3020 | BUG_ON(!path); | 3080 | BUG_ON(!path); |
3021 | 3081 | ||
3022 | trans = btrfs_start_transaction(fs_info->tree_root, 1); | 3082 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3023 | 3083 | ||
3024 | wc.trans = trans; | 3084 | wc.trans = trans; |
3025 | wc.pin = 1; | 3085 | wc.pin = 1; |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 0776eacb5083..3dfae84c8cc8 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -25,6 +25,8 @@ | |||
25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
26 | struct btrfs_root *root); | 26 | struct btrfs_root *root); |
27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
29 | struct btrfs_fs_info *fs_info); | ||
28 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
29 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
30 | struct btrfs_root *root, struct dentry *dentry); | 32 | struct btrfs_root *root, struct dentry *dentry); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 41ecbb2347f2..d158530233b7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -17,10 +17,12 @@ | |||
17 | */ | 17 | */ |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/bio.h> | 19 | #include <linux/bio.h> |
20 | #include <linux/slab.h> | ||
20 | #include <linux/buffer_head.h> | 21 | #include <linux/buffer_head.h> |
21 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
22 | #include <linux/random.h> | 23 | #include <linux/random.h> |
23 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | ||
24 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
25 | #include "compat.h" | 27 | #include "compat.h" |
26 | #include "ctree.h" | 28 | #include "ctree.h" |
@@ -256,13 +258,13 @@ loop_lock: | |||
256 | wake_up(&fs_info->async_submit_wait); | 258 | wake_up(&fs_info->async_submit_wait); |
257 | 259 | ||
258 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 260 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
259 | submit_bio(cur->bi_rw, cur); | ||
260 | num_run++; | ||
261 | batch_run++; | ||
262 | 261 | ||
263 | if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) | 262 | if (cur->bi_rw & REQ_SYNC) |
264 | num_sync_run++; | 263 | num_sync_run++; |
265 | 264 | ||
265 | submit_bio(cur->bi_rw, cur); | ||
266 | num_run++; | ||
267 | batch_run++; | ||
266 | if (need_resched()) { | 268 | if (need_resched()) { |
267 | if (num_sync_run) { | 269 | if (num_sync_run) { |
268 | blk_run_backing_dev(bdi, NULL); | 270 | blk_run_backing_dev(bdi, NULL); |
@@ -325,16 +327,6 @@ loop_lock: | |||
325 | num_sync_run = 0; | 327 | num_sync_run = 0; |
326 | blk_run_backing_dev(bdi, NULL); | 328 | blk_run_backing_dev(bdi, NULL); |
327 | } | 329 | } |
328 | |||
329 | cond_resched(); | ||
330 | if (again) | ||
331 | goto loop; | ||
332 | |||
333 | spin_lock(&device->io_lock); | ||
334 | if (device->pending_bios.head || device->pending_sync_bios.head) | ||
335 | goto loop_lock; | ||
336 | spin_unlock(&device->io_lock); | ||
337 | |||
338 | /* | 330 | /* |
339 | * IO has already been through a long path to get here. Checksumming, | 331 | * IO has already been through a long path to get here. Checksumming, |
340 | * async helper threads, perhaps compression. We've done a pretty | 332 | * async helper threads, perhaps compression. We've done a pretty |
@@ -346,6 +338,16 @@ loop_lock: | |||
346 | * cared about found its way down here. | 338 | * cared about found its way down here. |
347 | */ | 339 | */ |
348 | blk_run_backing_dev(bdi, NULL); | 340 | blk_run_backing_dev(bdi, NULL); |
341 | |||
342 | cond_resched(); | ||
343 | if (again) | ||
344 | goto loop; | ||
345 | |||
346 | spin_lock(&device->io_lock); | ||
347 | if (device->pending_bios.head || device->pending_sync_bios.head) | ||
348 | goto loop_lock; | ||
349 | spin_unlock(&device->io_lock); | ||
350 | |||
349 | done: | 351 | done: |
350 | return 0; | 352 | return 0; |
351 | } | 353 | } |
@@ -365,6 +367,7 @@ static noinline int device_list_add(const char *path, | |||
365 | struct btrfs_device *device; | 367 | struct btrfs_device *device; |
366 | struct btrfs_fs_devices *fs_devices; | 368 | struct btrfs_fs_devices *fs_devices; |
367 | u64 found_transid = btrfs_super_generation(disk_super); | 369 | u64 found_transid = btrfs_super_generation(disk_super); |
370 | char *name; | ||
368 | 371 | ||
369 | fs_devices = find_fsid(disk_super->fsid); | 372 | fs_devices = find_fsid(disk_super->fsid); |
370 | if (!fs_devices) { | 373 | if (!fs_devices) { |
@@ -396,7 +399,6 @@ static noinline int device_list_add(const char *path, | |||
396 | device->work.func = pending_bios_fn; | 399 | device->work.func = pending_bios_fn; |
397 | memcpy(device->uuid, disk_super->dev_item.uuid, | 400 | memcpy(device->uuid, disk_super->dev_item.uuid, |
398 | BTRFS_UUID_SIZE); | 401 | BTRFS_UUID_SIZE); |
399 | device->barriers = 1; | ||
400 | spin_lock_init(&device->io_lock); | 402 | spin_lock_init(&device->io_lock); |
401 | device->name = kstrdup(path, GFP_NOFS); | 403 | device->name = kstrdup(path, GFP_NOFS); |
402 | if (!device->name) { | 404 | if (!device->name) { |
@@ -411,6 +413,16 @@ static noinline int device_list_add(const char *path, | |||
411 | 413 | ||
412 | device->fs_devices = fs_devices; | 414 | device->fs_devices = fs_devices; |
413 | fs_devices->num_devices++; | 415 | fs_devices->num_devices++; |
416 | } else if (!device->name || strcmp(device->name, path)) { | ||
417 | name = kstrdup(path, GFP_NOFS); | ||
418 | if (!name) | ||
419 | return -ENOMEM; | ||
420 | kfree(device->name); | ||
421 | device->name = name; | ||
422 | if (device->missing) { | ||
423 | fs_devices->missing_devices--; | ||
424 | device->missing = 0; | ||
425 | } | ||
414 | } | 426 | } |
415 | 427 | ||
416 | if (found_transid > fs_devices->latest_trans) { | 428 | if (found_transid > fs_devices->latest_trans) { |
@@ -454,7 +466,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
454 | device->devid = orig_dev->devid; | 466 | device->devid = orig_dev->devid; |
455 | device->work.func = pending_bios_fn; | 467 | device->work.func = pending_bios_fn; |
456 | memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); | 468 | memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); |
457 | device->barriers = 1; | ||
458 | spin_lock_init(&device->io_lock); | 469 | spin_lock_init(&device->io_lock); |
459 | INIT_LIST_HEAD(&device->dev_list); | 470 | INIT_LIST_HEAD(&device->dev_list); |
460 | INIT_LIST_HEAD(&device->dev_alloc_list); | 471 | INIT_LIST_HEAD(&device->dev_alloc_list); |
@@ -483,7 +494,7 @@ again: | |||
483 | continue; | 494 | continue; |
484 | 495 | ||
485 | if (device->bdev) { | 496 | if (device->bdev) { |
486 | close_bdev_exclusive(device->bdev, device->mode); | 497 | blkdev_put(device->bdev, device->mode); |
487 | device->bdev = NULL; | 498 | device->bdev = NULL; |
488 | fs_devices->open_devices--; | 499 | fs_devices->open_devices--; |
489 | } | 500 | } |
@@ -517,7 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
517 | 528 | ||
518 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | 529 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
519 | if (device->bdev) { | 530 | if (device->bdev) { |
520 | close_bdev_exclusive(device->bdev, device->mode); | 531 | blkdev_put(device->bdev, device->mode); |
521 | fs_devices->open_devices--; | 532 | fs_devices->open_devices--; |
522 | } | 533 | } |
523 | if (device->writeable) { | 534 | if (device->writeable) { |
@@ -574,13 +585,15 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
574 | int seeding = 1; | 585 | int seeding = 1; |
575 | int ret = 0; | 586 | int ret = 0; |
576 | 587 | ||
588 | flags |= FMODE_EXCL; | ||
589 | |||
577 | list_for_each_entry(device, head, dev_list) { | 590 | list_for_each_entry(device, head, dev_list) { |
578 | if (device->bdev) | 591 | if (device->bdev) |
579 | continue; | 592 | continue; |
580 | if (!device->name) | 593 | if (!device->name) |
581 | continue; | 594 | continue; |
582 | 595 | ||
583 | bdev = open_bdev_exclusive(device->name, flags, holder); | 596 | bdev = blkdev_get_by_path(device->name, flags, holder); |
584 | if (IS_ERR(bdev)) { | 597 | if (IS_ERR(bdev)) { |
585 | printk(KERN_INFO "open %s failed\n", device->name); | 598 | printk(KERN_INFO "open %s failed\n", device->name); |
586 | goto error; | 599 | goto error; |
@@ -588,11 +601,13 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
588 | set_blocksize(bdev, 4096); | 601 | set_blocksize(bdev, 4096); |
589 | 602 | ||
590 | bh = btrfs_read_dev_super(bdev); | 603 | bh = btrfs_read_dev_super(bdev); |
591 | if (!bh) | 604 | if (!bh) { |
605 | ret = -EINVAL; | ||
592 | goto error_close; | 606 | goto error_close; |
607 | } | ||
593 | 608 | ||
594 | disk_super = (struct btrfs_super_block *)bh->b_data; | 609 | disk_super = (struct btrfs_super_block *)bh->b_data; |
595 | devid = le64_to_cpu(disk_super->dev_item.devid); | 610 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
596 | if (devid != device->devid) | 611 | if (devid != device->devid) |
597 | goto error_brelse; | 612 | goto error_brelse; |
598 | 613 | ||
@@ -632,7 +647,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
632 | error_brelse: | 647 | error_brelse: |
633 | brelse(bh); | 648 | brelse(bh); |
634 | error_close: | 649 | error_close: |
635 | close_bdev_exclusive(bdev, FMODE_READ); | 650 | blkdev_put(bdev, flags); |
636 | error: | 651 | error: |
637 | continue; | 652 | continue; |
638 | } | 653 | } |
@@ -678,7 +693,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
678 | 693 | ||
679 | mutex_lock(&uuid_mutex); | 694 | mutex_lock(&uuid_mutex); |
680 | 695 | ||
681 | bdev = open_bdev_exclusive(path, flags, holder); | 696 | flags |= FMODE_EXCL; |
697 | bdev = blkdev_get_by_path(path, flags, holder); | ||
682 | 698 | ||
683 | if (IS_ERR(bdev)) { | 699 | if (IS_ERR(bdev)) { |
684 | ret = PTR_ERR(bdev); | 700 | ret = PTR_ERR(bdev); |
@@ -690,11 +706,11 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
690 | goto error_close; | 706 | goto error_close; |
691 | bh = btrfs_read_dev_super(bdev); | 707 | bh = btrfs_read_dev_super(bdev); |
692 | if (!bh) { | 708 | if (!bh) { |
693 | ret = -EIO; | 709 | ret = -EINVAL; |
694 | goto error_close; | 710 | goto error_close; |
695 | } | 711 | } |
696 | disk_super = (struct btrfs_super_block *)bh->b_data; | 712 | disk_super = (struct btrfs_super_block *)bh->b_data; |
697 | devid = le64_to_cpu(disk_super->dev_item.devid); | 713 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
698 | transid = btrfs_super_generation(disk_super); | 714 | transid = btrfs_super_generation(disk_super); |
699 | if (disk_super->label[0]) | 715 | if (disk_super->label[0]) |
700 | printk(KERN_INFO "device label %s ", disk_super->label); | 716 | printk(KERN_INFO "device label %s ", disk_super->label); |
@@ -710,65 +726,173 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
710 | 726 | ||
711 | brelse(bh); | 727 | brelse(bh); |
712 | error_close: | 728 | error_close: |
713 | close_bdev_exclusive(bdev, flags); | 729 | blkdev_put(bdev, flags); |
714 | error: | 730 | error: |
715 | mutex_unlock(&uuid_mutex); | 731 | mutex_unlock(&uuid_mutex); |
716 | return ret; | 732 | return ret; |
717 | } | 733 | } |
718 | 734 | ||
735 | /* helper to account the used device space in the range */ | ||
736 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
737 | u64 end, u64 *length) | ||
738 | { | ||
739 | struct btrfs_key key; | ||
740 | struct btrfs_root *root = device->dev_root; | ||
741 | struct btrfs_dev_extent *dev_extent; | ||
742 | struct btrfs_path *path; | ||
743 | u64 extent_end; | ||
744 | int ret; | ||
745 | int slot; | ||
746 | struct extent_buffer *l; | ||
747 | |||
748 | *length = 0; | ||
749 | |||
750 | if (start >= device->total_bytes) | ||
751 | return 0; | ||
752 | |||
753 | path = btrfs_alloc_path(); | ||
754 | if (!path) | ||
755 | return -ENOMEM; | ||
756 | path->reada = 2; | ||
757 | |||
758 | key.objectid = device->devid; | ||
759 | key.offset = start; | ||
760 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
761 | |||
762 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
763 | if (ret < 0) | ||
764 | goto out; | ||
765 | if (ret > 0) { | ||
766 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | ||
767 | if (ret < 0) | ||
768 | goto out; | ||
769 | } | ||
770 | |||
771 | while (1) { | ||
772 | l = path->nodes[0]; | ||
773 | slot = path->slots[0]; | ||
774 | if (slot >= btrfs_header_nritems(l)) { | ||
775 | ret = btrfs_next_leaf(root, path); | ||
776 | if (ret == 0) | ||
777 | continue; | ||
778 | if (ret < 0) | ||
779 | goto out; | ||
780 | |||
781 | break; | ||
782 | } | ||
783 | btrfs_item_key_to_cpu(l, &key, slot); | ||
784 | |||
785 | if (key.objectid < device->devid) | ||
786 | goto next; | ||
787 | |||
788 | if (key.objectid > device->devid) | ||
789 | break; | ||
790 | |||
791 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
792 | goto next; | ||
793 | |||
794 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
795 | extent_end = key.offset + btrfs_dev_extent_length(l, | ||
796 | dev_extent); | ||
797 | if (key.offset <= start && extent_end > end) { | ||
798 | *length = end - start + 1; | ||
799 | break; | ||
800 | } else if (key.offset <= start && extent_end > start) | ||
801 | *length += extent_end - start; | ||
802 | else if (key.offset > start && extent_end <= end) | ||
803 | *length += extent_end - key.offset; | ||
804 | else if (key.offset > start && key.offset <= end) { | ||
805 | *length += end - key.offset + 1; | ||
806 | break; | ||
807 | } else if (key.offset > end) | ||
808 | break; | ||
809 | |||
810 | next: | ||
811 | path->slots[0]++; | ||
812 | } | ||
813 | ret = 0; | ||
814 | out: | ||
815 | btrfs_free_path(path); | ||
816 | return ret; | ||
817 | } | ||
818 | |||
719 | /* | 819 | /* |
820 | * find_free_dev_extent - find free space in the specified device | ||
821 | * @trans: transaction handler | ||
822 | * @device: the device which we search the free space in | ||
823 | * @num_bytes: the size of the free space that we need | ||
824 | * @start: store the start of the free space. | ||
825 | * @len: the size of the free space. that we find, or the size of the max | ||
826 | * free space if we don't find suitable free space | ||
827 | * | ||
720 | * this uses a pretty simple search, the expectation is that it is | 828 | * this uses a pretty simple search, the expectation is that it is |
721 | * called very infrequently and that a given device has a small number | 829 | * called very infrequently and that a given device has a small number |
722 | * of extents | 830 | * of extents |
831 | * | ||
832 | * @start is used to store the start of the free space if we find. But if we | ||
833 | * don't find suitable free space, it will be used to store the start position | ||
834 | * of the max free space. | ||
835 | * | ||
836 | * @len is used to store the size of the free space that we find. | ||
837 | * But if we don't find suitable free space, it is used to store the size of | ||
838 | * the max free space. | ||
723 | */ | 839 | */ |
724 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 840 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
725 | struct btrfs_device *device, u64 num_bytes, | 841 | struct btrfs_device *device, u64 num_bytes, |
726 | u64 *start, u64 *max_avail) | 842 | u64 *start, u64 *len) |
727 | { | 843 | { |
728 | struct btrfs_key key; | 844 | struct btrfs_key key; |
729 | struct btrfs_root *root = device->dev_root; | 845 | struct btrfs_root *root = device->dev_root; |
730 | struct btrfs_dev_extent *dev_extent = NULL; | 846 | struct btrfs_dev_extent *dev_extent; |
731 | struct btrfs_path *path; | 847 | struct btrfs_path *path; |
732 | u64 hole_size = 0; | 848 | u64 hole_size; |
733 | u64 last_byte = 0; | 849 | u64 max_hole_start; |
734 | u64 search_start = 0; | 850 | u64 max_hole_size; |
851 | u64 extent_end; | ||
852 | u64 search_start; | ||
735 | u64 search_end = device->total_bytes; | 853 | u64 search_end = device->total_bytes; |
736 | int ret; | 854 | int ret; |
737 | int slot = 0; | 855 | int slot; |
738 | int start_found; | ||
739 | struct extent_buffer *l; | 856 | struct extent_buffer *l; |
740 | 857 | ||
741 | path = btrfs_alloc_path(); | ||
742 | if (!path) | ||
743 | return -ENOMEM; | ||
744 | path->reada = 2; | ||
745 | start_found = 0; | ||
746 | |||
747 | /* FIXME use last free of some kind */ | 858 | /* FIXME use last free of some kind */ |
748 | 859 | ||
749 | /* we don't want to overwrite the superblock on the drive, | 860 | /* we don't want to overwrite the superblock on the drive, |
750 | * so we make sure to start at an offset of at least 1MB | 861 | * so we make sure to start at an offset of at least 1MB |
751 | */ | 862 | */ |
752 | search_start = max((u64)1024 * 1024, search_start); | 863 | search_start = 1024 * 1024; |
753 | 864 | ||
754 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | 865 | if (root->fs_info->alloc_start + num_bytes <= search_end) |
755 | search_start = max(root->fs_info->alloc_start, search_start); | 866 | search_start = max(root->fs_info->alloc_start, search_start); |
756 | 867 | ||
868 | max_hole_start = search_start; | ||
869 | max_hole_size = 0; | ||
870 | |||
871 | if (search_start >= search_end) { | ||
872 | ret = -ENOSPC; | ||
873 | goto error; | ||
874 | } | ||
875 | |||
876 | path = btrfs_alloc_path(); | ||
877 | if (!path) { | ||
878 | ret = -ENOMEM; | ||
879 | goto error; | ||
880 | } | ||
881 | path->reada = 2; | ||
882 | |||
757 | key.objectid = device->devid; | 883 | key.objectid = device->devid; |
758 | key.offset = search_start; | 884 | key.offset = search_start; |
759 | key.type = BTRFS_DEV_EXTENT_KEY; | 885 | key.type = BTRFS_DEV_EXTENT_KEY; |
886 | |||
760 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 887 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
761 | if (ret < 0) | 888 | if (ret < 0) |
762 | goto error; | 889 | goto out; |
763 | if (ret > 0) { | 890 | if (ret > 0) { |
764 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | 891 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
765 | if (ret < 0) | 892 | if (ret < 0) |
766 | goto error; | 893 | goto out; |
767 | if (ret > 0) | ||
768 | start_found = 1; | ||
769 | } | 894 | } |
770 | l = path->nodes[0]; | 895 | |
771 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
772 | while (1) { | 896 | while (1) { |
773 | l = path->nodes[0]; | 897 | l = path->nodes[0]; |
774 | slot = path->slots[0]; | 898 | slot = path->slots[0]; |
@@ -777,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
777 | if (ret == 0) | 901 | if (ret == 0) |
778 | continue; | 902 | continue; |
779 | if (ret < 0) | 903 | if (ret < 0) |
780 | goto error; | 904 | goto out; |
781 | no_more_items: | 905 | |
782 | if (!start_found) { | 906 | break; |
783 | if (search_start >= search_end) { | ||
784 | ret = -ENOSPC; | ||
785 | goto error; | ||
786 | } | ||
787 | *start = search_start; | ||
788 | start_found = 1; | ||
789 | goto check_pending; | ||
790 | } | ||
791 | *start = last_byte > search_start ? | ||
792 | last_byte : search_start; | ||
793 | if (search_end <= *start) { | ||
794 | ret = -ENOSPC; | ||
795 | goto error; | ||
796 | } | ||
797 | goto check_pending; | ||
798 | } | 907 | } |
799 | btrfs_item_key_to_cpu(l, &key, slot); | 908 | btrfs_item_key_to_cpu(l, &key, slot); |
800 | 909 | ||
@@ -802,48 +911,62 @@ no_more_items: | |||
802 | goto next; | 911 | goto next; |
803 | 912 | ||
804 | if (key.objectid > device->devid) | 913 | if (key.objectid > device->devid) |
805 | goto no_more_items; | 914 | break; |
806 | 915 | ||
807 | if (key.offset >= search_start && key.offset > last_byte && | 916 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) |
808 | start_found) { | 917 | goto next; |
809 | if (last_byte < search_start) | ||
810 | last_byte = search_start; | ||
811 | hole_size = key.offset - last_byte; | ||
812 | 918 | ||
813 | if (hole_size > *max_avail) | 919 | if (key.offset > search_start) { |
814 | *max_avail = hole_size; | 920 | hole_size = key.offset - search_start; |
815 | 921 | ||
816 | if (key.offset > last_byte && | 922 | if (hole_size > max_hole_size) { |
817 | hole_size >= num_bytes) { | 923 | max_hole_start = search_start; |
818 | *start = last_byte; | 924 | max_hole_size = hole_size; |
819 | goto check_pending; | 925 | } |
926 | |||
927 | /* | ||
928 | * If this free space is greater than which we need, | ||
929 | * it must be the max free space that we have found | ||
930 | * until now, so max_hole_start must point to the start | ||
931 | * of this free space and the length of this free space | ||
932 | * is stored in max_hole_size. Thus, we return | ||
933 | * max_hole_start and max_hole_size and go back to the | ||
934 | * caller. | ||
935 | */ | ||
936 | if (hole_size >= num_bytes) { | ||
937 | ret = 0; | ||
938 | goto out; | ||
820 | } | 939 | } |
821 | } | 940 | } |
822 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
823 | goto next; | ||
824 | 941 | ||
825 | start_found = 1; | ||
826 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 942 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
827 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | 943 | extent_end = key.offset + btrfs_dev_extent_length(l, |
944 | dev_extent); | ||
945 | if (extent_end > search_start) | ||
946 | search_start = extent_end; | ||
828 | next: | 947 | next: |
829 | path->slots[0]++; | 948 | path->slots[0]++; |
830 | cond_resched(); | 949 | cond_resched(); |
831 | } | 950 | } |
832 | check_pending: | ||
833 | /* we have to make sure we didn't find an extent that has already | ||
834 | * been allocated by the map tree or the original allocation | ||
835 | */ | ||
836 | BUG_ON(*start < search_start); | ||
837 | 951 | ||
838 | if (*start + num_bytes > search_end) { | 952 | hole_size = search_end- search_start; |
839 | ret = -ENOSPC; | 953 | if (hole_size > max_hole_size) { |
840 | goto error; | 954 | max_hole_start = search_start; |
955 | max_hole_size = hole_size; | ||
841 | } | 956 | } |
842 | /* check for pending inserts here */ | ||
843 | ret = 0; | ||
844 | 957 | ||
845 | error: | 958 | /* See above. */ |
959 | if (hole_size < num_bytes) | ||
960 | ret = -ENOSPC; | ||
961 | else | ||
962 | ret = 0; | ||
963 | |||
964 | out: | ||
846 | btrfs_free_path(path); | 965 | btrfs_free_path(path); |
966 | error: | ||
967 | *start = max_hole_start; | ||
968 | if (len) | ||
969 | *len = max_hole_size; | ||
847 | return ret; | 970 | return ret; |
848 | } | 971 | } |
849 | 972 | ||
@@ -1089,7 +1212,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1089 | if (!path) | 1212 | if (!path) |
1090 | return -ENOMEM; | 1213 | return -ENOMEM; |
1091 | 1214 | ||
1092 | trans = btrfs_start_transaction(root, 1); | 1215 | trans = btrfs_start_transaction(root, 0); |
1093 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1216 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1094 | key.type = BTRFS_DEV_ITEM_KEY; | 1217 | key.type = BTRFS_DEV_ITEM_KEY; |
1095 | key.offset = device->devid; | 1218 | key.offset = device->devid; |
@@ -1173,8 +1296,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1173 | goto out; | 1296 | goto out; |
1174 | } | 1297 | } |
1175 | } else { | 1298 | } else { |
1176 | bdev = open_bdev_exclusive(device_path, FMODE_READ, | 1299 | bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, |
1177 | root->fs_info->bdev_holder); | 1300 | root->fs_info->bdev_holder); |
1178 | if (IS_ERR(bdev)) { | 1301 | if (IS_ERR(bdev)) { |
1179 | ret = PTR_ERR(bdev); | 1302 | ret = PTR_ERR(bdev); |
1180 | goto out; | 1303 | goto out; |
@@ -1183,11 +1306,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1183 | set_blocksize(bdev, 4096); | 1306 | set_blocksize(bdev, 4096); |
1184 | bh = btrfs_read_dev_super(bdev); | 1307 | bh = btrfs_read_dev_super(bdev); |
1185 | if (!bh) { | 1308 | if (!bh) { |
1186 | ret = -EIO; | 1309 | ret = -EINVAL; |
1187 | goto error_close; | 1310 | goto error_close; |
1188 | } | 1311 | } |
1189 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1312 | disk_super = (struct btrfs_super_block *)bh->b_data; |
1190 | devid = le64_to_cpu(disk_super->dev_item.devid); | 1313 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
1191 | dev_uuid = disk_super->dev_item.uuid; | 1314 | dev_uuid = disk_super->dev_item.uuid; |
1192 | device = btrfs_find_device(root, devid, dev_uuid, | 1315 | device = btrfs_find_device(root, devid, dev_uuid, |
1193 | disk_super->fsid); | 1316 | disk_super->fsid); |
@@ -1230,6 +1353,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1230 | 1353 | ||
1231 | device->fs_devices->num_devices--; | 1354 | device->fs_devices->num_devices--; |
1232 | 1355 | ||
1356 | if (device->missing) | ||
1357 | root->fs_info->fs_devices->missing_devices--; | ||
1358 | |||
1233 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1359 | next_device = list_entry(root->fs_info->fs_devices->devices.next, |
1234 | struct btrfs_device, dev_list); | 1360 | struct btrfs_device, dev_list); |
1235 | if (device->bdev == root->fs_info->sb->s_bdev) | 1361 | if (device->bdev == root->fs_info->sb->s_bdev) |
@@ -1238,7 +1364,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1238 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; | 1364 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; |
1239 | 1365 | ||
1240 | if (device->bdev) { | 1366 | if (device->bdev) { |
1241 | close_bdev_exclusive(device->bdev, device->mode); | 1367 | blkdev_put(device->bdev, device->mode); |
1242 | device->bdev = NULL; | 1368 | device->bdev = NULL; |
1243 | device->fs_devices->open_devices--; | 1369 | device->fs_devices->open_devices--; |
1244 | } | 1370 | } |
@@ -1281,7 +1407,7 @@ error_brelse: | |||
1281 | brelse(bh); | 1407 | brelse(bh); |
1282 | error_close: | 1408 | error_close: |
1283 | if (bdev) | 1409 | if (bdev) |
1284 | close_bdev_exclusive(bdev, FMODE_READ); | 1410 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
1285 | out: | 1411 | out: |
1286 | mutex_unlock(&root->fs_info->volume_mutex); | 1412 | mutex_unlock(&root->fs_info->volume_mutex); |
1287 | mutex_unlock(&uuid_mutex); | 1413 | mutex_unlock(&uuid_mutex); |
@@ -1433,7 +1559,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1433 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) | 1559 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) |
1434 | return -EINVAL; | 1560 | return -EINVAL; |
1435 | 1561 | ||
1436 | bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); | 1562 | bdev = blkdev_get_by_path(device_path, FMODE_EXCL, |
1563 | root->fs_info->bdev_holder); | ||
1437 | if (IS_ERR(bdev)) | 1564 | if (IS_ERR(bdev)) |
1438 | return PTR_ERR(bdev); | 1565 | return PTR_ERR(bdev); |
1439 | 1566 | ||
@@ -1478,10 +1605,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1478 | goto error; | 1605 | goto error; |
1479 | } | 1606 | } |
1480 | 1607 | ||
1481 | trans = btrfs_start_transaction(root, 1); | 1608 | trans = btrfs_start_transaction(root, 0); |
1482 | lock_chunks(root); | 1609 | lock_chunks(root); |
1483 | 1610 | ||
1484 | device->barriers = 1; | ||
1485 | device->writeable = 1; | 1611 | device->writeable = 1; |
1486 | device->work.func = pending_bios_fn; | 1612 | device->work.func = pending_bios_fn; |
1487 | generate_random_uuid(device->uuid); | 1613 | generate_random_uuid(device->uuid); |
@@ -1560,7 +1686,7 @@ out: | |||
1560 | mutex_unlock(&root->fs_info->volume_mutex); | 1686 | mutex_unlock(&root->fs_info->volume_mutex); |
1561 | return ret; | 1687 | return ret; |
1562 | error: | 1688 | error: |
1563 | close_bdev_exclusive(bdev, 0); | 1689 | blkdev_put(bdev, FMODE_EXCL); |
1564 | if (seeding_dev) { | 1690 | if (seeding_dev) { |
1565 | mutex_unlock(&uuid_mutex); | 1691 | mutex_unlock(&uuid_mutex); |
1566 | up_write(&sb->s_umount); | 1692 | up_write(&sb->s_umount); |
@@ -1743,9 +1869,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1743 | 1869 | ||
1744 | /* step one, relocate all the extents inside this chunk */ | 1870 | /* step one, relocate all the extents inside this chunk */ |
1745 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1871 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
1746 | BUG_ON(ret); | 1872 | if (ret) |
1873 | return ret; | ||
1747 | 1874 | ||
1748 | trans = btrfs_start_transaction(root, 1); | 1875 | trans = btrfs_start_transaction(root, 0); |
1749 | BUG_ON(!trans); | 1876 | BUG_ON(!trans); |
1750 | 1877 | ||
1751 | lock_chunks(root); | 1878 | lock_chunks(root); |
@@ -1892,7 +2019,6 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1892 | u64 size_to_free; | 2019 | u64 size_to_free; |
1893 | struct btrfs_path *path; | 2020 | struct btrfs_path *path; |
1894 | struct btrfs_key key; | 2021 | struct btrfs_key key; |
1895 | struct btrfs_chunk *chunk; | ||
1896 | struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; | 2022 | struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; |
1897 | struct btrfs_trans_handle *trans; | 2023 | struct btrfs_trans_handle *trans; |
1898 | struct btrfs_key found_key; | 2024 | struct btrfs_key found_key; |
@@ -1900,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1900 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 2026 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) |
1901 | return -EROFS; | 2027 | return -EROFS; |
1902 | 2028 | ||
2029 | if (!capable(CAP_SYS_ADMIN)) | ||
2030 | return -EPERM; | ||
2031 | |||
1903 | mutex_lock(&dev_root->fs_info->volume_mutex); | 2032 | mutex_lock(&dev_root->fs_info->volume_mutex); |
1904 | dev_root = dev_root->fs_info->dev_root; | 2033 | dev_root = dev_root->fs_info->dev_root; |
1905 | 2034 | ||
@@ -1917,7 +2046,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1917 | break; | 2046 | break; |
1918 | BUG_ON(ret); | 2047 | BUG_ON(ret); |
1919 | 2048 | ||
1920 | trans = btrfs_start_transaction(dev_root, 1); | 2049 | trans = btrfs_start_transaction(dev_root, 0); |
1921 | BUG_ON(!trans); | 2050 | BUG_ON(!trans); |
1922 | 2051 | ||
1923 | ret = btrfs_grow_device(trans, device, old_size); | 2052 | ret = btrfs_grow_device(trans, device, old_size); |
@@ -1956,9 +2085,6 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1956 | if (found_key.objectid != key.objectid) | 2085 | if (found_key.objectid != key.objectid) |
1957 | break; | 2086 | break; |
1958 | 2087 | ||
1959 | chunk = btrfs_item_ptr(path->nodes[0], | ||
1960 | path->slots[0], | ||
1961 | struct btrfs_chunk); | ||
1962 | /* chunk zero is special */ | 2088 | /* chunk zero is special */ |
1963 | if (found_key.offset == 0) | 2089 | if (found_key.offset == 0) |
1964 | break; | 2090 | break; |
@@ -2086,11 +2212,7 @@ again: | |||
2086 | } | 2212 | } |
2087 | 2213 | ||
2088 | /* Shrinking succeeded, else we would be at "done". */ | 2214 | /* Shrinking succeeded, else we would be at "done". */ |
2089 | trans = btrfs_start_transaction(root, 1); | 2215 | trans = btrfs_start_transaction(root, 0); |
2090 | if (!trans) { | ||
2091 | ret = -ENOMEM; | ||
2092 | goto done; | ||
2093 | } | ||
2094 | lock_chunks(root); | 2216 | lock_chunks(root); |
2095 | 2217 | ||
2096 | device->disk_total_bytes = new_size; | 2218 | device->disk_total_bytes = new_size; |
@@ -2145,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | |||
2145 | return calc_size * num_stripes; | 2267 | return calc_size * num_stripes; |
2146 | } | 2268 | } |
2147 | 2269 | ||
2148 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2270 | /* Used to sort the devices by max_avail(descending sort) */ |
2149 | struct btrfs_root *extent_root, | 2271 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) |
2150 | struct map_lookup **map_ret, | ||
2151 | u64 *num_bytes, u64 *stripe_size, | ||
2152 | u64 start, u64 type) | ||
2153 | { | 2272 | { |
2154 | struct btrfs_fs_info *info = extent_root->fs_info; | 2273 | if (((struct btrfs_device_info *)dev_info1)->max_avail > |
2155 | struct btrfs_device *device = NULL; | 2274 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2156 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 2275 | return -1; |
2157 | struct list_head *cur; | 2276 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < |
2158 | struct map_lookup *map = NULL; | 2277 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2159 | struct extent_map_tree *em_tree; | 2278 | return 1; |
2160 | struct extent_map *em; | 2279 | else |
2161 | struct list_head private_devs; | 2280 | return 0; |
2162 | int min_stripe_size = 1 * 1024 * 1024; | 2281 | } |
2163 | u64 calc_size = 1024 * 1024 * 1024; | ||
2164 | u64 max_chunk_size = calc_size; | ||
2165 | u64 min_free; | ||
2166 | u64 avail; | ||
2167 | u64 max_avail = 0; | ||
2168 | u64 dev_offset; | ||
2169 | int num_stripes = 1; | ||
2170 | int min_stripes = 1; | ||
2171 | int sub_stripes = 0; | ||
2172 | int looped = 0; | ||
2173 | int ret; | ||
2174 | int index; | ||
2175 | int stripe_len = 64 * 1024; | ||
2176 | 2282 | ||
2177 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2283 | static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, |
2178 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2284 | int *num_stripes, int *min_stripes, |
2179 | WARN_ON(1); | 2285 | int *sub_stripes) |
2180 | type &= ~BTRFS_BLOCK_GROUP_DUP; | 2286 | { |
2181 | } | 2287 | *num_stripes = 1; |
2182 | if (list_empty(&fs_devices->alloc_list)) | 2288 | *min_stripes = 1; |
2183 | return -ENOSPC; | 2289 | *sub_stripes = 0; |
2184 | 2290 | ||
2185 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2291 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { |
2186 | num_stripes = fs_devices->rw_devices; | 2292 | *num_stripes = fs_devices->rw_devices; |
2187 | min_stripes = 2; | 2293 | *min_stripes = 2; |
2188 | } | 2294 | } |
2189 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 2295 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { |
2190 | num_stripes = 2; | 2296 | *num_stripes = 2; |
2191 | min_stripes = 2; | 2297 | *min_stripes = 2; |
2192 | } | 2298 | } |
2193 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2299 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
2194 | num_stripes = min_t(u64, 2, fs_devices->rw_devices); | 2300 | if (fs_devices->rw_devices < 2) |
2195 | if (num_stripes < 2) | ||
2196 | return -ENOSPC; | 2301 | return -ENOSPC; |
2197 | min_stripes = 2; | 2302 | *num_stripes = 2; |
2303 | *min_stripes = 2; | ||
2198 | } | 2304 | } |
2199 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2305 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
2200 | num_stripes = fs_devices->rw_devices; | 2306 | *num_stripes = fs_devices->rw_devices; |
2201 | if (num_stripes < 4) | 2307 | if (*num_stripes < 4) |
2202 | return -ENOSPC; | 2308 | return -ENOSPC; |
2203 | num_stripes &= ~(u32)1; | 2309 | *num_stripes &= ~(u32)1; |
2204 | sub_stripes = 2; | 2310 | *sub_stripes = 2; |
2205 | min_stripes = 4; | 2311 | *min_stripes = 4; |
2206 | } | 2312 | } |
2207 | 2313 | ||
2314 | return 0; | ||
2315 | } | ||
2316 | |||
2317 | static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, | ||
2318 | u64 proposed_size, u64 type, | ||
2319 | int num_stripes, int small_stripe) | ||
2320 | { | ||
2321 | int min_stripe_size = 1 * 1024 * 1024; | ||
2322 | u64 calc_size = proposed_size; | ||
2323 | u64 max_chunk_size = calc_size; | ||
2324 | int ncopies = 1; | ||
2325 | |||
2326 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
2327 | BTRFS_BLOCK_GROUP_DUP | | ||
2328 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2329 | ncopies = 2; | ||
2330 | |||
2208 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2331 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
2209 | max_chunk_size = 10 * calc_size; | 2332 | max_chunk_size = 10 * calc_size; |
2210 | min_stripe_size = 64 * 1024 * 1024; | 2333 | min_stripe_size = 64 * 1024 * 1024; |
@@ -2221,43 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2221 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2344 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
2222 | max_chunk_size); | 2345 | max_chunk_size); |
2223 | 2346 | ||
2224 | again: | 2347 | if (calc_size * num_stripes > max_chunk_size * ncopies) { |
2225 | max_avail = 0; | 2348 | calc_size = max_chunk_size * ncopies; |
2226 | if (!map || map->num_stripes != num_stripes) { | 2349 | do_div(calc_size, num_stripes); |
2227 | kfree(map); | 2350 | do_div(calc_size, BTRFS_STRIPE_LEN); |
2228 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | 2351 | calc_size *= BTRFS_STRIPE_LEN; |
2229 | if (!map) | 2352 | } |
2230 | return -ENOMEM; | 2353 | |
2354 | /* we don't want tiny stripes */ | ||
2355 | if (!small_stripe) | ||
2356 | calc_size = max_t(u64, min_stripe_size, calc_size); | ||
2357 | |||
2358 | /* | ||
2359 | * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure | ||
2360 | * we end up with something bigger than a stripe | ||
2361 | */ | ||
2362 | calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); | ||
2363 | |||
2364 | do_div(calc_size, BTRFS_STRIPE_LEN); | ||
2365 | calc_size *= BTRFS_STRIPE_LEN; | ||
2366 | |||
2367 | return calc_size; | ||
2368 | } | ||
2369 | |||
2370 | static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, | ||
2371 | int num_stripes) | ||
2372 | { | ||
2373 | struct map_lookup *new; | ||
2374 | size_t len = map_lookup_size(num_stripes); | ||
2375 | |||
2376 | BUG_ON(map->num_stripes < num_stripes); | ||
2377 | |||
2378 | if (map->num_stripes == num_stripes) | ||
2379 | return map; | ||
2380 | |||
2381 | new = kmalloc(len, GFP_NOFS); | ||
2382 | if (!new) { | ||
2383 | /* just change map->num_stripes */ | ||
2231 | map->num_stripes = num_stripes; | 2384 | map->num_stripes = num_stripes; |
2385 | return map; | ||
2232 | } | 2386 | } |
2233 | 2387 | ||
2234 | if (calc_size * num_stripes > max_chunk_size) { | 2388 | memcpy(new, map, len); |
2235 | calc_size = max_chunk_size; | 2389 | new->num_stripes = num_stripes; |
2236 | do_div(calc_size, num_stripes); | 2390 | kfree(map); |
2237 | do_div(calc_size, stripe_len); | 2391 | return new; |
2238 | calc_size *= stripe_len; | 2392 | } |
2393 | |||
2394 | /* | ||
2395 | * helper to allocate device space from btrfs_device_info, in which we stored | ||
2396 | * max free space information of every device. It is used when we can not | ||
2397 | * allocate chunks by default size. | ||
2398 | * | ||
2399 | * By this helper, we can allocate a new chunk as larger as possible. | ||
2400 | */ | ||
2401 | static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, | ||
2402 | struct btrfs_fs_devices *fs_devices, | ||
2403 | struct btrfs_device_info *devices, | ||
2404 | int nr_device, u64 type, | ||
2405 | struct map_lookup **map_lookup, | ||
2406 | int min_stripes, u64 *stripe_size) | ||
2407 | { | ||
2408 | int i, index, sort_again = 0; | ||
2409 | int min_devices = min_stripes; | ||
2410 | u64 max_avail, min_free; | ||
2411 | struct map_lookup *map = *map_lookup; | ||
2412 | int ret; | ||
2413 | |||
2414 | if (nr_device < min_stripes) | ||
2415 | return -ENOSPC; | ||
2416 | |||
2417 | btrfs_descending_sort_devices(devices, nr_device); | ||
2418 | |||
2419 | max_avail = devices[0].max_avail; | ||
2420 | if (!max_avail) | ||
2421 | return -ENOSPC; | ||
2422 | |||
2423 | for (i = 0; i < nr_device; i++) { | ||
2424 | /* | ||
2425 | * if dev_offset = 0, it means the free space of this device | ||
2426 | * is less than what we need, and we didn't search max avail | ||
2427 | * extent on this device, so do it now. | ||
2428 | */ | ||
2429 | if (!devices[i].dev_offset) { | ||
2430 | ret = find_free_dev_extent(trans, devices[i].dev, | ||
2431 | max_avail, | ||
2432 | &devices[i].dev_offset, | ||
2433 | &devices[i].max_avail); | ||
2434 | if (ret != 0 && ret != -ENOSPC) | ||
2435 | return ret; | ||
2436 | sort_again = 1; | ||
2437 | } | ||
2239 | } | 2438 | } |
2240 | /* we don't want tiny stripes */ | ||
2241 | calc_size = max_t(u64, min_stripe_size, calc_size); | ||
2242 | 2439 | ||
2243 | do_div(calc_size, stripe_len); | 2440 | /* we update the max avail free extent of each devices, sort again */ |
2244 | calc_size *= stripe_len; | 2441 | if (sort_again) |
2442 | btrfs_descending_sort_devices(devices, nr_device); | ||
2443 | |||
2444 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2445 | min_devices = 1; | ||
2446 | |||
2447 | if (!devices[min_devices - 1].max_avail) | ||
2448 | return -ENOSPC; | ||
2449 | |||
2450 | max_avail = devices[min_devices - 1].max_avail; | ||
2451 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2452 | do_div(max_avail, 2); | ||
2453 | |||
2454 | max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, | ||
2455 | min_stripes, 1); | ||
2456 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2457 | min_free = max_avail * 2; | ||
2458 | else | ||
2459 | min_free = max_avail; | ||
2460 | |||
2461 | if (min_free > devices[min_devices - 1].max_avail) | ||
2462 | return -ENOSPC; | ||
2463 | |||
2464 | map = __shrink_map_lookup_stripes(map, min_stripes); | ||
2465 | *stripe_size = max_avail; | ||
2466 | |||
2467 | index = 0; | ||
2468 | for (i = 0; i < min_stripes; i++) { | ||
2469 | map->stripes[i].dev = devices[index].dev; | ||
2470 | map->stripes[i].physical = devices[index].dev_offset; | ||
2471 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2472 | i++; | ||
2473 | map->stripes[i].dev = devices[index].dev; | ||
2474 | map->stripes[i].physical = devices[index].dev_offset + | ||
2475 | max_avail; | ||
2476 | } | ||
2477 | index++; | ||
2478 | } | ||
2479 | *map_lookup = map; | ||
2480 | |||
2481 | return 0; | ||
2482 | } | ||
2483 | |||
2484 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | ||
2485 | struct btrfs_root *extent_root, | ||
2486 | struct map_lookup **map_ret, | ||
2487 | u64 *num_bytes, u64 *stripe_size, | ||
2488 | u64 start, u64 type) | ||
2489 | { | ||
2490 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2491 | struct btrfs_device *device = NULL; | ||
2492 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | ||
2493 | struct list_head *cur; | ||
2494 | struct map_lookup *map; | ||
2495 | struct extent_map_tree *em_tree; | ||
2496 | struct extent_map *em; | ||
2497 | struct btrfs_device_info *devices_info; | ||
2498 | struct list_head private_devs; | ||
2499 | u64 calc_size = 1024 * 1024 * 1024; | ||
2500 | u64 min_free; | ||
2501 | u64 avail; | ||
2502 | u64 dev_offset; | ||
2503 | int num_stripes; | ||
2504 | int min_stripes; | ||
2505 | int sub_stripes; | ||
2506 | int min_devices; /* the min number of devices we need */ | ||
2507 | int i; | ||
2508 | int ret; | ||
2509 | int index; | ||
2510 | |||
2511 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | ||
2512 | (type & BTRFS_BLOCK_GROUP_DUP)) { | ||
2513 | WARN_ON(1); | ||
2514 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
2515 | } | ||
2516 | if (list_empty(&fs_devices->alloc_list)) | ||
2517 | return -ENOSPC; | ||
2518 | |||
2519 | ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, | ||
2520 | &min_stripes, &sub_stripes); | ||
2521 | if (ret) | ||
2522 | return ret; | ||
2523 | |||
2524 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | ||
2525 | GFP_NOFS); | ||
2526 | if (!devices_info) | ||
2527 | return -ENOMEM; | ||
2528 | |||
2529 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2530 | if (!map) { | ||
2531 | ret = -ENOMEM; | ||
2532 | goto error; | ||
2533 | } | ||
2534 | map->num_stripes = num_stripes; | ||
2245 | 2535 | ||
2246 | cur = fs_devices->alloc_list.next; | 2536 | cur = fs_devices->alloc_list.next; |
2247 | index = 0; | 2537 | index = 0; |
2538 | i = 0; | ||
2248 | 2539 | ||
2249 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2540 | calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, |
2541 | num_stripes, 0); | ||
2542 | |||
2543 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2250 | min_free = calc_size * 2; | 2544 | min_free = calc_size * 2; |
2251 | else | 2545 | min_devices = 1; |
2546 | } else { | ||
2252 | min_free = calc_size; | 2547 | min_free = calc_size; |
2253 | 2548 | min_devices = min_stripes; | |
2254 | /* | 2549 | } |
2255 | * we add 1MB because we never use the first 1MB of the device, unless | ||
2256 | * we've looped, then we are likely allocating the maximum amount of | ||
2257 | * space left already | ||
2258 | */ | ||
2259 | if (!looped) | ||
2260 | min_free += 1024 * 1024; | ||
2261 | 2550 | ||
2262 | INIT_LIST_HEAD(&private_devs); | 2551 | INIT_LIST_HEAD(&private_devs); |
2263 | while (index < num_stripes) { | 2552 | while (index < num_stripes) { |
@@ -2270,27 +2559,39 @@ again: | |||
2270 | cur = cur->next; | 2559 | cur = cur->next; |
2271 | 2560 | ||
2272 | if (device->in_fs_metadata && avail >= min_free) { | 2561 | if (device->in_fs_metadata && avail >= min_free) { |
2273 | ret = find_free_dev_extent(trans, device, | 2562 | ret = find_free_dev_extent(trans, device, min_free, |
2274 | min_free, &dev_offset, | 2563 | &devices_info[i].dev_offset, |
2275 | &max_avail); | 2564 | &devices_info[i].max_avail); |
2276 | if (ret == 0) { | 2565 | if (ret == 0) { |
2277 | list_move_tail(&device->dev_alloc_list, | 2566 | list_move_tail(&device->dev_alloc_list, |
2278 | &private_devs); | 2567 | &private_devs); |
2279 | map->stripes[index].dev = device; | 2568 | map->stripes[index].dev = device; |
2280 | map->stripes[index].physical = dev_offset; | 2569 | map->stripes[index].physical = |
2570 | devices_info[i].dev_offset; | ||
2281 | index++; | 2571 | index++; |
2282 | if (type & BTRFS_BLOCK_GROUP_DUP) { | 2572 | if (type & BTRFS_BLOCK_GROUP_DUP) { |
2283 | map->stripes[index].dev = device; | 2573 | map->stripes[index].dev = device; |
2284 | map->stripes[index].physical = | 2574 | map->stripes[index].physical = |
2285 | dev_offset + calc_size; | 2575 | devices_info[i].dev_offset + |
2576 | calc_size; | ||
2286 | index++; | 2577 | index++; |
2287 | } | 2578 | } |
2288 | } | 2579 | } else if (ret != -ENOSPC) |
2289 | } else if (device->in_fs_metadata && avail > max_avail) | 2580 | goto error; |
2290 | max_avail = avail; | 2581 | |
2582 | devices_info[i].dev = device; | ||
2583 | i++; | ||
2584 | } else if (device->in_fs_metadata && | ||
2585 | avail >= BTRFS_STRIPE_LEN) { | ||
2586 | devices_info[i].dev = device; | ||
2587 | devices_info[i].max_avail = avail; | ||
2588 | i++; | ||
2589 | } | ||
2590 | |||
2291 | if (cur == &fs_devices->alloc_list) | 2591 | if (cur == &fs_devices->alloc_list) |
2292 | break; | 2592 | break; |
2293 | } | 2593 | } |
2594 | |||
2294 | list_splice(&private_devs, &fs_devices->alloc_list); | 2595 | list_splice(&private_devs, &fs_devices->alloc_list); |
2295 | if (index < num_stripes) { | 2596 | if (index < num_stripes) { |
2296 | if (index >= min_stripes) { | 2597 | if (index >= min_stripes) { |
@@ -2299,34 +2600,36 @@ again: | |||
2299 | num_stripes /= sub_stripes; | 2600 | num_stripes /= sub_stripes; |
2300 | num_stripes *= sub_stripes; | 2601 | num_stripes *= sub_stripes; |
2301 | } | 2602 | } |
2302 | looped = 1; | 2603 | |
2303 | goto again; | 2604 | map = __shrink_map_lookup_stripes(map, num_stripes); |
2304 | } | 2605 | } else if (i >= min_devices) { |
2305 | if (!looped && max_avail > 0) { | 2606 | ret = __btrfs_alloc_tiny_space(trans, fs_devices, |
2306 | looped = 1; | 2607 | devices_info, i, type, |
2307 | calc_size = max_avail; | 2608 | &map, min_stripes, |
2308 | goto again; | 2609 | &calc_size); |
2610 | if (ret) | ||
2611 | goto error; | ||
2612 | } else { | ||
2613 | ret = -ENOSPC; | ||
2614 | goto error; | ||
2309 | } | 2615 | } |
2310 | kfree(map); | ||
2311 | return -ENOSPC; | ||
2312 | } | 2616 | } |
2313 | map->sector_size = extent_root->sectorsize; | 2617 | map->sector_size = extent_root->sectorsize; |
2314 | map->stripe_len = stripe_len; | 2618 | map->stripe_len = BTRFS_STRIPE_LEN; |
2315 | map->io_align = stripe_len; | 2619 | map->io_align = BTRFS_STRIPE_LEN; |
2316 | map->io_width = stripe_len; | 2620 | map->io_width = BTRFS_STRIPE_LEN; |
2317 | map->type = type; | 2621 | map->type = type; |
2318 | map->num_stripes = num_stripes; | ||
2319 | map->sub_stripes = sub_stripes; | 2622 | map->sub_stripes = sub_stripes; |
2320 | 2623 | ||
2321 | *map_ret = map; | 2624 | *map_ret = map; |
2322 | *stripe_size = calc_size; | 2625 | *stripe_size = calc_size; |
2323 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2626 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2324 | num_stripes, sub_stripes); | 2627 | map->num_stripes, sub_stripes); |
2325 | 2628 | ||
2326 | em = alloc_extent_map(GFP_NOFS); | 2629 | em = alloc_extent_map(GFP_NOFS); |
2327 | if (!em) { | 2630 | if (!em) { |
2328 | kfree(map); | 2631 | ret = -ENOMEM; |
2329 | return -ENOMEM; | 2632 | goto error; |
2330 | } | 2633 | } |
2331 | em->bdev = (struct block_device *)map; | 2634 | em->bdev = (struct block_device *)map; |
2332 | em->start = start; | 2635 | em->start = start; |
@@ -2359,7 +2662,13 @@ again: | |||
2359 | index++; | 2662 | index++; |
2360 | } | 2663 | } |
2361 | 2664 | ||
2665 | kfree(devices_info); | ||
2362 | return 0; | 2666 | return 0; |
2667 | |||
2668 | error: | ||
2669 | kfree(map); | ||
2670 | kfree(devices_info); | ||
2671 | return ret; | ||
2363 | } | 2672 | } |
2364 | 2673 | ||
2365 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 2674 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, |
@@ -2638,7 +2947,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2638 | int max_errors = 0; | 2947 | int max_errors = 0; |
2639 | struct btrfs_multi_bio *multi = NULL; | 2948 | struct btrfs_multi_bio *multi = NULL; |
2640 | 2949 | ||
2641 | if (multi_ret && !(rw & (1 << BIO_RW))) | 2950 | if (multi_ret && !(rw & REQ_WRITE)) |
2642 | stripes_allocated = 1; | 2951 | stripes_allocated = 1; |
2643 | again: | 2952 | again: |
2644 | if (multi_ret) { | 2953 | if (multi_ret) { |
@@ -2674,7 +2983,7 @@ again: | |||
2674 | mirror_num = 0; | 2983 | mirror_num = 0; |
2675 | 2984 | ||
2676 | /* if our multi bio struct is too small, back off and try again */ | 2985 | /* if our multi bio struct is too small, back off and try again */ |
2677 | if (rw & (1 << BIO_RW)) { | 2986 | if (rw & REQ_WRITE) { |
2678 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | 2987 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
2679 | BTRFS_BLOCK_GROUP_DUP)) { | 2988 | BTRFS_BLOCK_GROUP_DUP)) { |
2680 | stripes_required = map->num_stripes; | 2989 | stripes_required = map->num_stripes; |
@@ -2684,7 +2993,7 @@ again: | |||
2684 | max_errors = 1; | 2993 | max_errors = 1; |
2685 | } | 2994 | } |
2686 | } | 2995 | } |
2687 | if (multi_ret && (rw & (1 << BIO_RW)) && | 2996 | if (multi_ret && (rw & REQ_WRITE) && |
2688 | stripes_allocated < stripes_required) { | 2997 | stripes_allocated < stripes_required) { |
2689 | stripes_allocated = map->num_stripes; | 2998 | stripes_allocated = map->num_stripes; |
2690 | free_extent_map(em); | 2999 | free_extent_map(em); |
@@ -2720,7 +3029,7 @@ again: | |||
2720 | num_stripes = 1; | 3029 | num_stripes = 1; |
2721 | stripe_index = 0; | 3030 | stripe_index = 0; |
2722 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 3031 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
2723 | if (unplug_page || (rw & (1 << BIO_RW))) | 3032 | if (unplug_page || (rw & REQ_WRITE)) |
2724 | num_stripes = map->num_stripes; | 3033 | num_stripes = map->num_stripes; |
2725 | else if (mirror_num) | 3034 | else if (mirror_num) |
2726 | stripe_index = mirror_num - 1; | 3035 | stripe_index = mirror_num - 1; |
@@ -2731,7 +3040,7 @@ again: | |||
2731 | } | 3040 | } |
2732 | 3041 | ||
2733 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 3042 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
2734 | if (rw & (1 << BIO_RW)) | 3043 | if (rw & REQ_WRITE) |
2735 | num_stripes = map->num_stripes; | 3044 | num_stripes = map->num_stripes; |
2736 | else if (mirror_num) | 3045 | else if (mirror_num) |
2737 | stripe_index = mirror_num - 1; | 3046 | stripe_index = mirror_num - 1; |
@@ -2742,7 +3051,7 @@ again: | |||
2742 | stripe_index = do_div(stripe_nr, factor); | 3051 | stripe_index = do_div(stripe_nr, factor); |
2743 | stripe_index *= map->sub_stripes; | 3052 | stripe_index *= map->sub_stripes; |
2744 | 3053 | ||
2745 | if (unplug_page || (rw & (1 << BIO_RW))) | 3054 | if (unplug_page || (rw & REQ_WRITE)) |
2746 | num_stripes = map->sub_stripes; | 3055 | num_stripes = map->sub_stripes; |
2747 | else if (mirror_num) | 3056 | else if (mirror_num) |
2748 | stripe_index += mirror_num - 1; | 3057 | stripe_index += mirror_num - 1; |
@@ -2932,7 +3241,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2932 | struct btrfs_pending_bios *pending_bios; | 3241 | struct btrfs_pending_bios *pending_bios; |
2933 | 3242 | ||
2934 | /* don't bother with additional async steps for reads, right now */ | 3243 | /* don't bother with additional async steps for reads, right now */ |
2935 | if (!(rw & (1 << BIO_RW))) { | 3244 | if (!(rw & REQ_WRITE)) { |
2936 | bio_get(bio); | 3245 | bio_get(bio); |
2937 | submit_bio(rw, bio); | 3246 | submit_bio(rw, bio); |
2938 | bio_put(bio); | 3247 | bio_put(bio); |
@@ -2951,7 +3260,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2951 | bio->bi_rw |= rw; | 3260 | bio->bi_rw |= rw; |
2952 | 3261 | ||
2953 | spin_lock(&device->io_lock); | 3262 | spin_lock(&device->io_lock); |
2954 | if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) | 3263 | if (bio->bi_rw & REQ_SYNC) |
2955 | pending_bios = &device->pending_sync_bios; | 3264 | pending_bios = &device->pending_sync_bios; |
2956 | else | 3265 | else |
2957 | pending_bios = &device->pending_bios; | 3266 | pending_bios = &device->pending_bios; |
@@ -3021,8 +3330,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
3021 | } | 3330 | } |
3022 | bio->bi_sector = multi->stripes[dev_nr].physical >> 9; | 3331 | bio->bi_sector = multi->stripes[dev_nr].physical >> 9; |
3023 | dev = multi->stripes[dev_nr].dev; | 3332 | dev = multi->stripes[dev_nr].dev; |
3024 | BUG_ON(rw == WRITE && !dev->writeable); | 3333 | if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { |
3025 | if (dev && dev->bdev) { | ||
3026 | bio->bi_bdev = dev->bdev; | 3334 | bio->bi_bdev = dev->bdev; |
3027 | if (async_submit) | 3335 | if (async_submit) |
3028 | schedule_bio(root, dev, rw, bio); | 3336 | schedule_bio(root, dev, rw, bio); |
@@ -3071,12 +3379,13 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
3071 | return NULL; | 3379 | return NULL; |
3072 | list_add(&device->dev_list, | 3380 | list_add(&device->dev_list, |
3073 | &fs_devices->devices); | 3381 | &fs_devices->devices); |
3074 | device->barriers = 1; | ||
3075 | device->dev_root = root->fs_info->dev_root; | 3382 | device->dev_root = root->fs_info->dev_root; |
3076 | device->devid = devid; | 3383 | device->devid = devid; |
3077 | device->work.func = pending_bios_fn; | 3384 | device->work.func = pending_bios_fn; |
3078 | device->fs_devices = fs_devices; | 3385 | device->fs_devices = fs_devices; |
3386 | device->missing = 1; | ||
3079 | fs_devices->num_devices++; | 3387 | fs_devices->num_devices++; |
3388 | fs_devices->missing_devices++; | ||
3080 | spin_lock_init(&device->io_lock); | 3389 | spin_lock_init(&device->io_lock); |
3081 | INIT_LIST_HEAD(&device->dev_alloc_list); | 3390 | INIT_LIST_HEAD(&device->dev_alloc_list); |
3082 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); | 3391 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); |
@@ -3274,6 +3583,15 @@ static int read_one_dev(struct btrfs_root *root, | |||
3274 | device = add_missing_dev(root, devid, dev_uuid); | 3583 | device = add_missing_dev(root, devid, dev_uuid); |
3275 | if (!device) | 3584 | if (!device) |
3276 | return -ENOMEM; | 3585 | return -ENOMEM; |
3586 | } else if (!device->missing) { | ||
3587 | /* | ||
3588 | * this happens when a device that was properly setup | ||
3589 | * in the device info lists suddenly goes bad. | ||
3590 | * device->bdev is NULL, and so we have to set | ||
3591 | * device->missing to one here | ||
3592 | */ | ||
3593 | root->fs_info->fs_devices->missing_devices++; | ||
3594 | device->missing = 1; | ||
3277 | } | 3595 | } |
3278 | } | 3596 | } |
3279 | 3597 | ||
@@ -3382,6 +3700,8 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) | |||
3382 | key.type = 0; | 3700 | key.type = 0; |
3383 | again: | 3701 | again: |
3384 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 3702 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
3703 | if (ret < 0) | ||
3704 | goto error; | ||
3385 | while (1) { | 3705 | while (1) { |
3386 | leaf = path->nodes[0]; | 3706 | leaf = path->nodes[0]; |
3387 | slot = path->slots[0]; | 3707 | slot = path->slots[0]; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 31b0fabdd2ea..7fb59d45fe8c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -20,8 +20,11 @@ | |||
20 | #define __BTRFS_VOLUMES_ | 20 | #define __BTRFS_VOLUMES_ |
21 | 21 | ||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | ||
23 | #include "async-thread.h" | 24 | #include "async-thread.h" |
24 | 25 | ||
26 | #define BTRFS_STRIPE_LEN (64 * 1024) | ||
27 | |||
25 | struct buffer_head; | 28 | struct buffer_head; |
26 | struct btrfs_pending_bios { | 29 | struct btrfs_pending_bios { |
27 | struct bio *head; | 30 | struct bio *head; |
@@ -42,15 +45,15 @@ struct btrfs_device { | |||
42 | int running_pending; | 45 | int running_pending; |
43 | u64 generation; | 46 | u64 generation; |
44 | 47 | ||
45 | int barriers; | ||
46 | int writeable; | 48 | int writeable; |
47 | int in_fs_metadata; | 49 | int in_fs_metadata; |
50 | int missing; | ||
48 | 51 | ||
49 | spinlock_t io_lock; | 52 | spinlock_t io_lock; |
50 | 53 | ||
51 | struct block_device *bdev; | 54 | struct block_device *bdev; |
52 | 55 | ||
53 | /* the mode sent to open_bdev_exclusive */ | 56 | /* the mode sent to blkdev_get */ |
54 | fmode_t mode; | 57 | fmode_t mode; |
55 | 58 | ||
56 | char *name; | 59 | char *name; |
@@ -94,6 +97,7 @@ struct btrfs_fs_devices { | |||
94 | u64 num_devices; | 97 | u64 num_devices; |
95 | u64 open_devices; | 98 | u64 open_devices; |
96 | u64 rw_devices; | 99 | u64 rw_devices; |
100 | u64 missing_devices; | ||
97 | u64 total_rw_bytes; | 101 | u64 total_rw_bytes; |
98 | struct block_device *latest_bdev; | 102 | struct block_device *latest_bdev; |
99 | 103 | ||
@@ -135,6 +139,30 @@ struct btrfs_multi_bio { | |||
135 | struct btrfs_bio_stripe stripes[]; | 139 | struct btrfs_bio_stripe stripes[]; |
136 | }; | 140 | }; |
137 | 141 | ||
142 | struct btrfs_device_info { | ||
143 | struct btrfs_device *dev; | ||
144 | u64 dev_offset; | ||
145 | u64 max_avail; | ||
146 | }; | ||
147 | |||
148 | /* Used to sort the devices by max_avail(descending sort) */ | ||
149 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | ||
150 | |||
151 | /* | ||
152 | * sort the devices by max_avail, in which max free extent size of each device | ||
153 | * is stored.(Descending Sort) | ||
154 | */ | ||
155 | static inline void btrfs_descending_sort_devices( | ||
156 | struct btrfs_device_info *devices, | ||
157 | size_t nr_devices) | ||
158 | { | ||
159 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
160 | btrfs_cmp_device_free_bytes, NULL); | ||
161 | } | ||
162 | |||
163 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
164 | u64 end, u64 *length); | ||
165 | |||
138 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | 166 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ |
139 | (sizeof(struct btrfs_bio_stripe) * (n))) | 167 | (sizeof(struct btrfs_bio_stripe) * (n))) |
140 | 168 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 193b58f7d3f3..a5776531dc2b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
154 | if (trans) | 154 | if (trans) |
155 | return do_setxattr(trans, inode, name, value, size, flags); | 155 | return do_setxattr(trans, inode, name, value, size, flags); |
156 | 156 | ||
157 | ret = btrfs_reserve_metadata_space(root, 2); | 157 | trans = btrfs_start_transaction(root, 2); |
158 | if (ret) | 158 | if (IS_ERR(trans)) |
159 | return ret; | 159 | return PTR_ERR(trans); |
160 | 160 | ||
161 | trans = btrfs_start_transaction(root, 1); | ||
162 | if (!trans) { | ||
163 | ret = -ENOMEM; | ||
164 | goto out; | ||
165 | } | ||
166 | btrfs_set_trans_block_group(trans, inode); | 161 | btrfs_set_trans_block_group(trans, inode); |
167 | 162 | ||
168 | ret = do_setxattr(trans, inode, name, value, size, flags); | 163 | ret = do_setxattr(trans, inode, name, value, size, flags); |
@@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
174 | BUG_ON(ret); | 169 | BUG_ON(ret); |
175 | out: | 170 | out: |
176 | btrfs_end_transaction_throttle(trans, root); | 171 | btrfs_end_transaction_throttle(trans, root); |
177 | btrfs_unreserve_metadata_space(root, 2); | ||
178 | return ret; | 172 | return ret; |
179 | } | 173 | } |
180 | 174 | ||
@@ -184,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
184 | struct inode *inode = dentry->d_inode; | 178 | struct inode *inode = dentry->d_inode; |
185 | struct btrfs_root *root = BTRFS_I(inode)->root; | 179 | struct btrfs_root *root = BTRFS_I(inode)->root; |
186 | struct btrfs_path *path; | 180 | struct btrfs_path *path; |
187 | struct btrfs_item *item; | ||
188 | struct extent_buffer *leaf; | 181 | struct extent_buffer *leaf; |
189 | struct btrfs_dir_item *di; | 182 | struct btrfs_dir_item *di; |
190 | int ret = 0, slot, advance; | 183 | int ret = 0, slot, advance; |
@@ -240,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
240 | } | 233 | } |
241 | advance = 1; | 234 | advance = 1; |
242 | 235 | ||
243 | item = btrfs_item_nr(leaf, slot); | ||
244 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 236 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
245 | 237 | ||
246 | /* check to make sure this item is what we want */ | 238 | /* check to make sure this item is what we want */ |
@@ -282,7 +274,7 @@ err: | |||
282 | * List of handlers for synthetic system.* attributes. All real ondisk | 274 | * List of handlers for synthetic system.* attributes. All real ondisk |
283 | * attributes are handled directly. | 275 | * attributes are handled directly. |
284 | */ | 276 | */ |
285 | struct xattr_handler *btrfs_xattr_handlers[] = { | 277 | const struct xattr_handler *btrfs_xattr_handlers[] = { |
286 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 278 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
287 | &btrfs_xattr_acl_access_handler, | 279 | &btrfs_xattr_acl_access_handler, |
288 | &btrfs_xattr_acl_default_handler, | 280 | &btrfs_xattr_acl_default_handler, |
@@ -324,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | |||
324 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
325 | size_t size, int flags) | 317 | size_t size, int flags) |
326 | { | 318 | { |
319 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
320 | |||
321 | /* | ||
322 | * The permission on security.* and system.* is not checked | ||
323 | * in permission(). | ||
324 | */ | ||
325 | if (btrfs_root_readonly(root)) | ||
326 | return -EROFS; | ||
327 | |||
327 | /* | 328 | /* |
328 | * If this is a request for a synthetic attribute in the system.* | 329 | * If this is a request for a synthetic attribute in the system.* |
329 | * namespace use the generic infrastructure to resolve a handler | 330 | * namespace use the generic infrastructure to resolve a handler |
@@ -344,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
344 | 345 | ||
345 | int btrfs_removexattr(struct dentry *dentry, const char *name) | 346 | int btrfs_removexattr(struct dentry *dentry, const char *name) |
346 | { | 347 | { |
348 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
349 | |||
350 | /* | ||
351 | * The permission on security.* and system.* is not checked | ||
352 | * in permission(). | ||
353 | */ | ||
354 | if (btrfs_root_readonly(root)) | ||
355 | return -EROFS; | ||
356 | |||
347 | /* | 357 | /* |
348 | * If this is a request for a synthetic attribute in the system.* | 358 | * If this is a request for a synthetic attribute in the system.* |
349 | * namespace use the generic infrastructure to resolve a handler | 359 | * namespace use the generic infrastructure to resolve a handler |
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 721efa0346e0..7a43fd640bbb 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h | |||
@@ -21,9 +21,9 @@ | |||
21 | 21 | ||
22 | #include <linux/xattr.h> | 22 | #include <linux/xattr.h> |
23 | 23 | ||
24 | extern struct xattr_handler btrfs_xattr_acl_access_handler; | 24 | extern const struct xattr_handler btrfs_xattr_acl_access_handler; |
25 | extern struct xattr_handler btrfs_xattr_acl_default_handler; | 25 | extern const struct xattr_handler btrfs_xattr_acl_default_handler; |
26 | extern struct xattr_handler *btrfs_xattr_handlers[]; | 26 | extern const struct xattr_handler *btrfs_xattr_handlers[]; |
27 | 27 | ||
28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, | 28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, |
29 | void *buffer, size_t size); | 29 | void *buffer, size_t size); |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 3e2b90eaa239..f5ec2d44150d 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -32,15 +32,6 @@ | |||
32 | #include <linux/bio.h> | 32 | #include <linux/bio.h> |
33 | #include "compression.h" | 33 | #include "compression.h" |
34 | 34 | ||
35 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
36 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
37 | If it doesn't manage to finish, call it again with | ||
38 | avail_in == 0 and avail_out set to the remaining 12 | ||
39 | bytes for it to clean up. | ||
40 | Q: Is 12 bytes sufficient? | ||
41 | */ | ||
42 | #define STREAM_END_SPACE 12 | ||
43 | |||
44 | struct workspace { | 35 | struct workspace { |
45 | z_stream inf_strm; | 36 | z_stream inf_strm; |
46 | z_stream def_strm; | 37 | z_stream def_strm; |
@@ -48,169 +39,62 @@ struct workspace { | |||
48 | struct list_head list; | 39 | struct list_head list; |
49 | }; | 40 | }; |
50 | 41 | ||
51 | static LIST_HEAD(idle_workspace); | 42 | static void zlib_free_workspace(struct list_head *ws) |
52 | static DEFINE_SPINLOCK(workspace_lock); | 43 | { |
53 | static unsigned long num_workspace; | 44 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
54 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
55 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
56 | 45 | ||
57 | /* | 46 | vfree(workspace->def_strm.workspace); |
58 | * this finds an available zlib workspace or allocates a new one | 47 | vfree(workspace->inf_strm.workspace); |
59 | * NULL or an ERR_PTR is returned if things go bad. | 48 | kfree(workspace->buf); |
60 | */ | 49 | kfree(workspace); |
61 | static struct workspace *find_zlib_workspace(void) | 50 | } |
51 | |||
52 | static struct list_head *zlib_alloc_workspace(void) | ||
62 | { | 53 | { |
63 | struct workspace *workspace; | 54 | struct workspace *workspace; |
64 | int ret; | ||
65 | int cpus = num_online_cpus(); | ||
66 | |||
67 | again: | ||
68 | spin_lock(&workspace_lock); | ||
69 | if (!list_empty(&idle_workspace)) { | ||
70 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
71 | list); | ||
72 | list_del(&workspace->list); | ||
73 | num_workspace--; | ||
74 | spin_unlock(&workspace_lock); | ||
75 | return workspace; | ||
76 | 55 | ||
77 | } | ||
78 | spin_unlock(&workspace_lock); | ||
79 | if (atomic_read(&alloc_workspace) > cpus) { | ||
80 | DEFINE_WAIT(wait); | ||
81 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
82 | if (atomic_read(&alloc_workspace) > cpus) | ||
83 | schedule(); | ||
84 | finish_wait(&workspace_wait, &wait); | ||
85 | goto again; | ||
86 | } | ||
87 | atomic_inc(&alloc_workspace); | ||
88 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
89 | if (!workspace) { | 57 | if (!workspace) |
90 | ret = -ENOMEM; | 58 | return ERR_PTR(-ENOMEM); |
91 | goto fail; | ||
92 | } | ||
93 | 59 | ||
94 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | 60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); |
95 | if (!workspace->def_strm.workspace) { | ||
96 | ret = -ENOMEM; | ||
97 | goto fail; | ||
98 | } | ||
99 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | 61 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
100 | if (!workspace->inf_strm.workspace) { | ||
101 | ret = -ENOMEM; | ||
102 | goto fail_inflate; | ||
103 | } | ||
104 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 62 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
105 | if (!workspace->buf) { | 63 | if (!workspace->def_strm.workspace || |
106 | ret = -ENOMEM; | 64 | !workspace->inf_strm.workspace || !workspace->buf) |
107 | goto fail_kmalloc; | 65 | goto fail; |
108 | } | ||
109 | return workspace; | ||
110 | |||
111 | fail_kmalloc: | ||
112 | vfree(workspace->inf_strm.workspace); | ||
113 | fail_inflate: | ||
114 | vfree(workspace->def_strm.workspace); | ||
115 | fail: | ||
116 | kfree(workspace); | ||
117 | atomic_dec(&alloc_workspace); | ||
118 | wake_up(&workspace_wait); | ||
119 | return ERR_PTR(ret); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * put a workspace struct back on the list or free it if we have enough | ||
124 | * idle ones sitting around | ||
125 | */ | ||
126 | static int free_workspace(struct workspace *workspace) | ||
127 | { | ||
128 | spin_lock(&workspace_lock); | ||
129 | if (num_workspace < num_online_cpus()) { | ||
130 | list_add_tail(&workspace->list, &idle_workspace); | ||
131 | num_workspace++; | ||
132 | spin_unlock(&workspace_lock); | ||
133 | if (waitqueue_active(&workspace_wait)) | ||
134 | wake_up(&workspace_wait); | ||
135 | return 0; | ||
136 | } | ||
137 | spin_unlock(&workspace_lock); | ||
138 | vfree(workspace->def_strm.workspace); | ||
139 | vfree(workspace->inf_strm.workspace); | ||
140 | kfree(workspace->buf); | ||
141 | kfree(workspace); | ||
142 | 66 | ||
143 | atomic_dec(&alloc_workspace); | 67 | INIT_LIST_HEAD(&workspace->list); |
144 | if (waitqueue_active(&workspace_wait)) | ||
145 | wake_up(&workspace_wait); | ||
146 | return 0; | ||
147 | } | ||
148 | 68 | ||
149 | /* | 69 | return &workspace->list; |
150 | * cleanup function for module exit | 70 | fail: |
151 | */ | 71 | zlib_free_workspace(&workspace->list); |
152 | static void free_workspaces(void) | 72 | return ERR_PTR(-ENOMEM); |
153 | { | ||
154 | struct workspace *workspace; | ||
155 | while (!list_empty(&idle_workspace)) { | ||
156 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
157 | list); | ||
158 | list_del(&workspace->list); | ||
159 | vfree(workspace->def_strm.workspace); | ||
160 | vfree(workspace->inf_strm.workspace); | ||
161 | kfree(workspace->buf); | ||
162 | kfree(workspace); | ||
163 | atomic_dec(&alloc_workspace); | ||
164 | } | ||
165 | } | 73 | } |
166 | 74 | ||
167 | /* | 75 | static int zlib_compress_pages(struct list_head *ws, |
168 | * given an address space and start/len, compress the bytes. | 76 | struct address_space *mapping, |
169 | * | 77 | u64 start, unsigned long len, |
170 | * pages are allocated to hold the compressed result and stored | 78 | struct page **pages, |
171 | * in 'pages' | 79 | unsigned long nr_dest_pages, |
172 | * | 80 | unsigned long *out_pages, |
173 | * out_pages is used to return the number of pages allocated. There | 81 | unsigned long *total_in, |
174 | * may be pages allocated even if we return an error | 82 | unsigned long *total_out, |
175 | * | 83 | unsigned long max_out) |
176 | * total_in is used to return the number of bytes actually read. It | ||
177 | * may be smaller then len if we had to exit early because we | ||
178 | * ran out of room in the pages array or because we cross the | ||
179 | * max_out threshold. | ||
180 | * | ||
181 | * total_out is used to return the total number of compressed bytes | ||
182 | * | ||
183 | * max_out tells us the max number of bytes that we're allowed to | ||
184 | * stuff into pages | ||
185 | */ | ||
186 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
187 | u64 start, unsigned long len, | ||
188 | struct page **pages, | ||
189 | unsigned long nr_dest_pages, | ||
190 | unsigned long *out_pages, | ||
191 | unsigned long *total_in, | ||
192 | unsigned long *total_out, | ||
193 | unsigned long max_out) | ||
194 | { | 84 | { |
85 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
195 | int ret; | 86 | int ret; |
196 | struct workspace *workspace; | ||
197 | char *data_in; | 87 | char *data_in; |
198 | char *cpage_out; | 88 | char *cpage_out; |
199 | int nr_pages = 0; | 89 | int nr_pages = 0; |
200 | struct page *in_page = NULL; | 90 | struct page *in_page = NULL; |
201 | struct page *out_page = NULL; | 91 | struct page *out_page = NULL; |
202 | int out_written = 0; | ||
203 | int in_read = 0; | ||
204 | unsigned long bytes_left; | 92 | unsigned long bytes_left; |
205 | 93 | ||
206 | *out_pages = 0; | 94 | *out_pages = 0; |
207 | *total_out = 0; | 95 | *total_out = 0; |
208 | *total_in = 0; | 96 | *total_in = 0; |
209 | 97 | ||
210 | workspace = find_zlib_workspace(); | ||
211 | if (IS_ERR(workspace)) | ||
212 | return -1; | ||
213 | |||
214 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 98 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
215 | printk(KERN_WARNING "deflateInit failed\n"); | 99 | printk(KERN_WARNING "deflateInit failed\n"); |
216 | ret = -1; | 100 | ret = -1; |
@@ -224,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
224 | data_in = kmap(in_page); | 108 | data_in = kmap(in_page); |
225 | 109 | ||
226 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 110 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
111 | if (out_page == NULL) { | ||
112 | ret = -1; | ||
113 | goto out; | ||
114 | } | ||
227 | cpage_out = kmap(out_page); | 115 | cpage_out = kmap(out_page); |
228 | pages[0] = out_page; | 116 | pages[0] = out_page; |
229 | nr_pages = 1; | 117 | nr_pages = 1; |
@@ -233,9 +121,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
233 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | 121 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; |
234 | workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); | 122 | workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); |
235 | 123 | ||
236 | out_written = 0; | ||
237 | in_read = 0; | ||
238 | |||
239 | while (workspace->def_strm.total_in < len) { | 124 | while (workspace->def_strm.total_in < len) { |
240 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | 125 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); |
241 | if (ret != Z_OK) { | 126 | if (ret != Z_OK) { |
@@ -265,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
265 | goto out; | 150 | goto out; |
266 | } | 151 | } |
267 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 152 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
153 | if (out_page == NULL) { | ||
154 | ret = -1; | ||
155 | goto out; | ||
156 | } | ||
268 | cpage_out = kmap(out_page); | 157 | cpage_out = kmap(out_page); |
269 | pages[nr_pages] = out_page; | 158 | pages[nr_pages] = out_page; |
270 | nr_pages++; | 159 | nr_pages++; |
@@ -319,55 +208,26 @@ out: | |||
319 | kunmap(in_page); | 208 | kunmap(in_page); |
320 | page_cache_release(in_page); | 209 | page_cache_release(in_page); |
321 | } | 210 | } |
322 | free_workspace(workspace); | ||
323 | return ret; | 211 | return ret; |
324 | } | 212 | } |
325 | 213 | ||
326 | /* | 214 | static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, |
327 | * pages_in is an array of pages with compressed data. | 215 | u64 disk_start, |
328 | * | 216 | struct bio_vec *bvec, |
329 | * disk_start is the starting logical offset of this array in the file | 217 | int vcnt, |
330 | * | 218 | size_t srclen) |
331 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
332 | * | ||
333 | * vcnt is the count of pages in the biovec | ||
334 | * | ||
335 | * srclen is the number of bytes in pages_in | ||
336 | * | ||
337 | * The basic idea is that we have a bio that was created by readpages. | ||
338 | * The pages in the bio are for the uncompressed data, and they may not | ||
339 | * be contiguous. They all correspond to the range of bytes covered by | ||
340 | * the compressed extent. | ||
341 | */ | ||
342 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
343 | u64 disk_start, | ||
344 | struct bio_vec *bvec, | ||
345 | int vcnt, | ||
346 | size_t srclen) | ||
347 | { | 219 | { |
348 | int ret = 0; | 220 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
221 | int ret = 0, ret2; | ||
349 | int wbits = MAX_WBITS; | 222 | int wbits = MAX_WBITS; |
350 | struct workspace *workspace; | ||
351 | char *data_in; | 223 | char *data_in; |
352 | size_t total_out = 0; | 224 | size_t total_out = 0; |
353 | unsigned long page_bytes_left; | ||
354 | unsigned long page_in_index = 0; | 225 | unsigned long page_in_index = 0; |
355 | unsigned long page_out_index = 0; | 226 | unsigned long page_out_index = 0; |
356 | struct page *page_out; | ||
357 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 227 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / |
358 | PAGE_CACHE_SIZE; | 228 | PAGE_CACHE_SIZE; |
359 | unsigned long buf_start; | 229 | unsigned long buf_start; |
360 | unsigned long buf_offset; | ||
361 | unsigned long bytes; | ||
362 | unsigned long working_bytes; | ||
363 | unsigned long pg_offset; | 230 | unsigned long pg_offset; |
364 | unsigned long start_byte; | ||
365 | unsigned long current_buf_start; | ||
366 | char *kaddr; | ||
367 | |||
368 | workspace = find_zlib_workspace(); | ||
369 | if (IS_ERR(workspace)) | ||
370 | return -ENOMEM; | ||
371 | 231 | ||
372 | data_in = kmap(pages_in[page_in_index]); | 232 | data_in = kmap(pages_in[page_in_index]); |
373 | workspace->inf_strm.next_in = data_in; | 233 | workspace->inf_strm.next_in = data_in; |
@@ -377,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
377 | workspace->inf_strm.total_out = 0; | 237 | workspace->inf_strm.total_out = 0; |
378 | workspace->inf_strm.next_out = workspace->buf; | 238 | workspace->inf_strm.next_out = workspace->buf; |
379 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 239 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
380 | page_out = bvec[page_out_index].bv_page; | ||
381 | page_bytes_left = PAGE_CACHE_SIZE; | ||
382 | pg_offset = 0; | 240 | pg_offset = 0; |
383 | 241 | ||
384 | /* If it's deflate, and it's got no preset dictionary, then | 242 | /* If it's deflate, and it's got no preset dictionary, then |
@@ -394,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
394 | 252 | ||
395 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 253 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
396 | printk(KERN_WARNING "inflateInit failed\n"); | 254 | printk(KERN_WARNING "inflateInit failed\n"); |
397 | ret = -1; | 255 | return -1; |
398 | goto out; | ||
399 | } | 256 | } |
400 | while (workspace->inf_strm.total_in < srclen) { | 257 | while (workspace->inf_strm.total_in < srclen) { |
401 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 258 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
402 | if (ret != Z_OK && ret != Z_STREAM_END) | 259 | if (ret != Z_OK && ret != Z_STREAM_END) |
403 | break; | 260 | break; |
404 | /* | ||
405 | * buf start is the byte offset we're of the start of | ||
406 | * our workspace buffer | ||
407 | */ | ||
408 | buf_start = total_out; | ||
409 | 261 | ||
410 | /* total_out is the last byte of the workspace buffer */ | 262 | buf_start = total_out; |
411 | total_out = workspace->inf_strm.total_out; | 263 | total_out = workspace->inf_strm.total_out; |
412 | 264 | ||
413 | working_bytes = total_out - buf_start; | 265 | /* we didn't make progress in this inflate call, we're done */ |
414 | 266 | if (buf_start == total_out) | |
415 | /* | ||
416 | * start byte is the first byte of the page we're currently | ||
417 | * copying into relative to the start of the compressed data. | ||
418 | */ | ||
419 | start_byte = page_offset(page_out) - disk_start; | ||
420 | |||
421 | if (working_bytes == 0) { | ||
422 | /* we didn't make progress in this inflate | ||
423 | * call, we're done | ||
424 | */ | ||
425 | if (ret != Z_STREAM_END) | ||
426 | ret = -1; | ||
427 | break; | 267 | break; |
428 | } | ||
429 | 268 | ||
430 | /* we haven't yet hit data corresponding to this page */ | 269 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, |
431 | if (total_out <= start_byte) | 270 | total_out, disk_start, |
432 | goto next; | 271 | bvec, vcnt, |
433 | 272 | &page_out_index, &pg_offset); | |
434 | /* | 273 | if (ret2 == 0) { |
435 | * the start of the data we care about is offset into | 274 | ret = 0; |
436 | * the middle of our working buffer | 275 | goto done; |
437 | */ | ||
438 | if (total_out > start_byte && buf_start < start_byte) { | ||
439 | buf_offset = start_byte - buf_start; | ||
440 | working_bytes -= buf_offset; | ||
441 | } else { | ||
442 | buf_offset = 0; | ||
443 | } | ||
444 | current_buf_start = buf_start; | ||
445 | |||
446 | /* copy bytes from the working buffer into the pages */ | ||
447 | while (working_bytes > 0) { | ||
448 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
449 | PAGE_CACHE_SIZE - buf_offset); | ||
450 | bytes = min(bytes, working_bytes); | ||
451 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
452 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
453 | bytes); | ||
454 | kunmap_atomic(kaddr, KM_USER0); | ||
455 | flush_dcache_page(page_out); | ||
456 | |||
457 | pg_offset += bytes; | ||
458 | page_bytes_left -= bytes; | ||
459 | buf_offset += bytes; | ||
460 | working_bytes -= bytes; | ||
461 | current_buf_start += bytes; | ||
462 | |||
463 | /* check if we need to pick another page */ | ||
464 | if (page_bytes_left == 0) { | ||
465 | page_out_index++; | ||
466 | if (page_out_index >= vcnt) { | ||
467 | ret = 0; | ||
468 | goto done; | ||
469 | } | ||
470 | |||
471 | page_out = bvec[page_out_index].bv_page; | ||
472 | pg_offset = 0; | ||
473 | page_bytes_left = PAGE_CACHE_SIZE; | ||
474 | start_byte = page_offset(page_out) - disk_start; | ||
475 | |||
476 | /* | ||
477 | * make sure our new page is covered by this | ||
478 | * working buffer | ||
479 | */ | ||
480 | if (total_out <= start_byte) | ||
481 | goto next; | ||
482 | |||
483 | /* the next page in the biovec might not | ||
484 | * be adjacent to the last page, but it | ||
485 | * might still be found inside this working | ||
486 | * buffer. bump our offset pointer | ||
487 | */ | ||
488 | if (total_out > start_byte && | ||
489 | current_buf_start < start_byte) { | ||
490 | buf_offset = start_byte - buf_start; | ||
491 | working_bytes = total_out - start_byte; | ||
492 | current_buf_start = buf_start + | ||
493 | buf_offset; | ||
494 | } | ||
495 | } | ||
496 | } | 276 | } |
497 | next: | 277 | |
498 | workspace->inf_strm.next_out = workspace->buf; | 278 | workspace->inf_strm.next_out = workspace->buf; |
499 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 279 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
500 | 280 | ||
@@ -521,35 +301,21 @@ done: | |||
521 | zlib_inflateEnd(&workspace->inf_strm); | 301 | zlib_inflateEnd(&workspace->inf_strm); |
522 | if (data_in) | 302 | if (data_in) |
523 | kunmap(pages_in[page_in_index]); | 303 | kunmap(pages_in[page_in_index]); |
524 | out: | ||
525 | free_workspace(workspace); | ||
526 | return ret; | 304 | return ret; |
527 | } | 305 | } |
528 | 306 | ||
529 | /* | 307 | static int zlib_decompress(struct list_head *ws, unsigned char *data_in, |
530 | * a less complex decompression routine. Our compressed data fits in a | 308 | struct page *dest_page, |
531 | * single page, and we want to read a single page out of it. | 309 | unsigned long start_byte, |
532 | * start_byte tells us the offset into the compressed data we're interested in | 310 | size_t srclen, size_t destlen) |
533 | */ | ||
534 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
535 | struct page *dest_page, | ||
536 | unsigned long start_byte, | ||
537 | size_t srclen, size_t destlen) | ||
538 | { | 311 | { |
312 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
539 | int ret = 0; | 313 | int ret = 0; |
540 | int wbits = MAX_WBITS; | 314 | int wbits = MAX_WBITS; |
541 | struct workspace *workspace; | ||
542 | unsigned long bytes_left = destlen; | 315 | unsigned long bytes_left = destlen; |
543 | unsigned long total_out = 0; | 316 | unsigned long total_out = 0; |
544 | char *kaddr; | 317 | char *kaddr; |
545 | 318 | ||
546 | if (destlen > PAGE_CACHE_SIZE) | ||
547 | return -ENOMEM; | ||
548 | |||
549 | workspace = find_zlib_workspace(); | ||
550 | if (IS_ERR(workspace)) | ||
551 | return -ENOMEM; | ||
552 | |||
553 | workspace->inf_strm.next_in = data_in; | 319 | workspace->inf_strm.next_in = data_in; |
554 | workspace->inf_strm.avail_in = srclen; | 320 | workspace->inf_strm.avail_in = srclen; |
555 | workspace->inf_strm.total_in = 0; | 321 | workspace->inf_strm.total_in = 0; |
@@ -570,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, | |||
570 | 336 | ||
571 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 337 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
572 | printk(KERN_WARNING "inflateInit failed\n"); | 338 | printk(KERN_WARNING "inflateInit failed\n"); |
573 | ret = -1; | 339 | return -1; |
574 | goto out; | ||
575 | } | 340 | } |
576 | 341 | ||
577 | while (bytes_left > 0) { | 342 | while (bytes_left > 0) { |
@@ -621,12 +386,13 @@ next: | |||
621 | ret = 0; | 386 | ret = 0; |
622 | 387 | ||
623 | zlib_inflateEnd(&workspace->inf_strm); | 388 | zlib_inflateEnd(&workspace->inf_strm); |
624 | out: | ||
625 | free_workspace(workspace); | ||
626 | return ret; | 389 | return ret; |
627 | } | 390 | } |
628 | 391 | ||
629 | void btrfs_zlib_exit(void) | 392 | struct btrfs_compress_op btrfs_zlib_compress = { |
630 | { | 393 | .alloc_workspace = zlib_alloc_workspace, |
631 | free_workspaces(); | 394 | .free_workspace = zlib_free_workspace, |
632 | } | 395 | .compress_pages = zlib_compress_pages, |
396 | .decompress_biovec = zlib_decompress_biovec, | ||
397 | .decompress = zlib_decompress, | ||
398 | }; | ||