diff options
Diffstat (limited to 'fs/btrfs')
41 files changed, 5444 insertions, 2135 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -4,6 +4,8 @@ config BTRFS_FS | |||
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select ZLIB_INFLATE | 5 | select ZLIB_INFLATE |
6 | select ZLIB_DEFLATE | 6 | select ZLIB_DEFLATE |
7 | select LZO_COMPRESS | ||
8 | select LZO_DECOMPRESS | ||
7 | help | 9 | help |
8 | Btrfs is a new filesystem with extents, writable snapshotting, | 10 | Btrfs is a new filesystem with extents, writable snapshotting, |
9 | support for multiple devices and many more features. | 11 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..31610ea73aec 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o | 10 | compression.o delayed-ref.o relocation.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6ae2c8cac9d5..5d505aaa72fb 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
37 | char *value = NULL; | 37 | char *value = NULL; |
38 | struct posix_acl *acl; | 38 | struct posix_acl *acl; |
39 | 39 | ||
40 | if (!IS_POSIXACL(inode)) | ||
41 | return NULL; | ||
42 | |||
40 | acl = get_cached_acl(inode, type); | 43 | acl = get_cached_acl(inode, type); |
41 | if (acl != ACL_NOT_CACHED) | 44 | if (acl != ACL_NOT_CACHED) |
42 | return acl; | 45 | return acl; |
@@ -60,8 +63,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
60 | size = __btrfs_getxattr(inode, name, value, size); | 63 | size = __btrfs_getxattr(inode, name, value, size); |
61 | if (size > 0) { | 64 | if (size > 0) { |
62 | acl = posix_acl_from_xattr(value, size); | 65 | acl = posix_acl_from_xattr(value, size); |
63 | if (IS_ERR(acl)) | 66 | if (IS_ERR(acl)) { |
67 | kfree(value); | ||
64 | return acl; | 68 | return acl; |
69 | } | ||
65 | set_cached_acl(inode, type, acl); | 70 | set_cached_acl(inode, type, acl); |
66 | } | 71 | } |
67 | kfree(value); | 72 | kfree(value); |
@@ -82,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, | |||
82 | struct posix_acl *acl; | 87 | struct posix_acl *acl; |
83 | int ret = 0; | 88 | int ret = 0; |
84 | 89 | ||
90 | if (!IS_POSIXACL(dentry->d_inode)) | ||
91 | return -EOPNOTSUPP; | ||
92 | |||
85 | acl = btrfs_get_acl(dentry->d_inode, type); | 93 | acl = btrfs_get_acl(dentry->d_inode, type); |
86 | 94 | ||
87 | if (IS_ERR(acl)) | 95 | if (IS_ERR(acl)) |
@@ -162,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
162 | int ret; | 170 | int ret; |
163 | struct posix_acl *acl = NULL; | 171 | struct posix_acl *acl = NULL; |
164 | 172 | ||
165 | if (!is_owner_or_cap(dentry->d_inode)) | 173 | if (!inode_owner_or_capable(dentry->d_inode)) |
166 | return -EPERM; | 174 | return -EPERM; |
167 | 175 | ||
168 | if (!IS_POSIXACL(dentry->d_inode)) | 176 | if (!IS_POSIXACL(dentry->d_inode)) |
@@ -170,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
170 | 178 | ||
171 | if (value) { | 179 | if (value) { |
172 | acl = posix_acl_from_xattr(value, size); | 180 | acl = posix_acl_from_xattr(value, size); |
173 | if (acl == NULL) { | 181 | if (acl) { |
174 | value = NULL; | 182 | ret = posix_acl_valid(acl); |
175 | size = 0; | 183 | if (ret) |
184 | goto out; | ||
176 | } else if (IS_ERR(acl)) { | 185 | } else if (IS_ERR(acl)) { |
177 | return PTR_ERR(acl); | 186 | return PTR_ERR(acl); |
178 | } | 187 | } |
179 | } | 188 | } |
180 | 189 | ||
181 | ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); | 190 | ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); |
182 | 191 | out: | |
183 | posix_acl_release(acl); | 192 | posix_acl_release(acl); |
184 | 193 | ||
185 | return ret; | 194 | return ret; |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6ad63f17eca0..57c3bb2884ce 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -136,9 +136,8 @@ struct btrfs_inode { | |||
136 | * items we think we'll end up using, and reserved_extents is the number | 136 | * items we think we'll end up using, and reserved_extents is the number |
137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
138 | */ | 138 | */ |
139 | spinlock_t accounting_lock; | ||
140 | atomic_t outstanding_extents; | 139 | atomic_t outstanding_extents; |
141 | int reserved_extents; | 140 | atomic_t reserved_extents; |
142 | 141 | ||
143 | /* | 142 | /* |
144 | * ordered_data_close is set by truncate when a file that used | 143 | * ordered_data_close is set by truncate when a file that used |
@@ -157,7 +156,7 @@ struct btrfs_inode { | |||
157 | /* | 156 | /* |
158 | * always compress this one file | 157 | * always compress this one file |
159 | */ | 158 | */ |
160 | unsigned force_compress:1; | 159 | unsigned force_compress:4; |
161 | 160 | ||
162 | struct inode vfs_inode; | 161 | struct inode vfs_inode; |
163 | }; | 162 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50bc4bd5c56..41d1d7c70e29 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -62,6 +62,9 @@ struct compressed_bio { | |||
62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
64 | 64 | ||
65 | /* the compression algorithm for this bio */ | ||
66 | int compress_type; | ||
67 | |||
65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
67 | 70 | ||
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
173 | /* ok, we're the last bio for this extent, lets start | 176 | /* ok, we're the last bio for this extent, lets start |
174 | * the decompression. | 177 | * the decompression. |
175 | */ | 178 | */ |
176 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 179 | ret = btrfs_decompress_biovec(cb->compress_type, |
177 | cb->start, | 180 | cb->compressed_pages, |
178 | cb->orig_bio->bi_io_vec, | 181 | cb->start, |
179 | cb->orig_bio->bi_vcnt, | 182 | cb->orig_bio->bi_io_vec, |
180 | cb->compressed_len); | 183 | cb->orig_bio->bi_vcnt, |
184 | cb->compressed_len); | ||
181 | csum_failed: | 185 | csum_failed: |
182 | if (ret) | 186 | if (ret) |
183 | cb->errors = 1; | 187 | cb->errors = 1; |
@@ -336,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
336 | 340 | ||
337 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | 341 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
338 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 342 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
343 | if (!cb) | ||
344 | return -ENOMEM; | ||
339 | atomic_set(&cb->pending_bios, 0); | 345 | atomic_set(&cb->pending_bios, 0); |
340 | cb->errors = 0; | 346 | cb->errors = 0; |
341 | cb->inode = inode; | 347 | cb->inode = inode; |
@@ -350,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
350 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 356 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
351 | 357 | ||
352 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | 358 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); |
359 | if(!bio) { | ||
360 | kfree(cb); | ||
361 | return -ENOMEM; | ||
362 | } | ||
353 | bio->bi_private = cb; | 363 | bio->bi_private = cb; |
354 | bio->bi_end_io = end_compressed_bio_write; | 364 | bio->bi_end_io = end_compressed_bio_write; |
355 | atomic_inc(&cb->pending_bios); | 365 | atomic_inc(&cb->pending_bios); |
@@ -558,7 +568,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
558 | u64 em_len; | 568 | u64 em_len; |
559 | u64 em_start; | 569 | u64 em_start; |
560 | struct extent_map *em; | 570 | struct extent_map *em; |
561 | int ret; | 571 | int ret = -ENOMEM; |
562 | u32 *sums; | 572 | u32 *sums; |
563 | 573 | ||
564 | tree = &BTRFS_I(inode)->io_tree; | 574 | tree = &BTRFS_I(inode)->io_tree; |
@@ -573,6 +583,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
573 | 583 | ||
574 | compressed_len = em->block_len; | 584 | compressed_len = em->block_len; |
575 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 585 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
586 | if (!cb) | ||
587 | goto out; | ||
588 | |||
576 | atomic_set(&cb->pending_bios, 0); | 589 | atomic_set(&cb->pending_bios, 0); |
577 | cb->errors = 0; | 590 | cb->errors = 0; |
578 | cb->inode = inode; | 591 | cb->inode = inode; |
@@ -588,17 +601,23 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
588 | 601 | ||
589 | cb->len = uncompressed_len; | 602 | cb->len = uncompressed_len; |
590 | cb->compressed_len = compressed_len; | 603 | cb->compressed_len = compressed_len; |
604 | cb->compress_type = extent_compress_type(bio_flags); | ||
591 | cb->orig_bio = bio; | 605 | cb->orig_bio = bio; |
592 | 606 | ||
593 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 607 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
594 | PAGE_CACHE_SIZE; | 608 | PAGE_CACHE_SIZE; |
595 | cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, | 609 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, |
596 | GFP_NOFS); | 610 | GFP_NOFS); |
611 | if (!cb->compressed_pages) | ||
612 | goto fail1; | ||
613 | |||
597 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 614 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
598 | 615 | ||
599 | for (page_index = 0; page_index < nr_pages; page_index++) { | 616 | for (page_index = 0; page_index < nr_pages; page_index++) { |
600 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | | 617 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | |
601 | __GFP_HIGHMEM); | 618 | __GFP_HIGHMEM); |
619 | if (!cb->compressed_pages[page_index]) | ||
620 | goto fail2; | ||
602 | } | 621 | } |
603 | cb->nr_pages = nr_pages; | 622 | cb->nr_pages = nr_pages; |
604 | 623 | ||
@@ -609,6 +628,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
609 | cb->len = uncompressed_len; | 628 | cb->len = uncompressed_len; |
610 | 629 | ||
611 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | 630 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); |
631 | if (!comp_bio) | ||
632 | goto fail2; | ||
612 | comp_bio->bi_private = cb; | 633 | comp_bio->bi_private = cb; |
613 | comp_bio->bi_end_io = end_compressed_bio_read; | 634 | comp_bio->bi_end_io = end_compressed_bio_read; |
614 | atomic_inc(&cb->pending_bios); | 635 | atomic_inc(&cb->pending_bios); |
@@ -642,8 +663,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
642 | atomic_inc(&cb->pending_bios); | 663 | atomic_inc(&cb->pending_bios); |
643 | 664 | ||
644 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 665 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
645 | btrfs_lookup_bio_sums(root, inode, comp_bio, | 666 | ret = btrfs_lookup_bio_sums(root, inode, |
646 | sums); | 667 | comp_bio, sums); |
668 | BUG_ON(ret); | ||
647 | } | 669 | } |
648 | sums += (comp_bio->bi_size + root->sectorsize - 1) / | 670 | sums += (comp_bio->bi_size + root->sectorsize - 1) / |
649 | root->sectorsize; | 671 | root->sectorsize; |
@@ -668,12 +690,339 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
668 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 690 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); |
669 | BUG_ON(ret); | 691 | BUG_ON(ret); |
670 | 692 | ||
671 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) | 693 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
672 | btrfs_lookup_bio_sums(root, inode, comp_bio, sums); | 694 | ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); |
695 | BUG_ON(ret); | ||
696 | } | ||
673 | 697 | ||
674 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); | 698 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
675 | BUG_ON(ret); | 699 | BUG_ON(ret); |
676 | 700 | ||
677 | bio_put(comp_bio); | 701 | bio_put(comp_bio); |
678 | return 0; | 702 | return 0; |
703 | |||
704 | fail2: | ||
705 | for (page_index = 0; page_index < nr_pages; page_index++) | ||
706 | free_page((unsigned long)cb->compressed_pages[page_index]); | ||
707 | |||
708 | kfree(cb->compressed_pages); | ||
709 | fail1: | ||
710 | kfree(cb); | ||
711 | out: | ||
712 | free_extent_map(em); | ||
713 | return ret; | ||
714 | } | ||
715 | |||
716 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
717 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
718 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
719 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
720 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
721 | |||
722 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
723 | &btrfs_zlib_compress, | ||
724 | &btrfs_lzo_compress, | ||
725 | }; | ||
726 | |||
727 | int __init btrfs_init_compress(void) | ||
728 | { | ||
729 | int i; | ||
730 | |||
731 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
732 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
733 | spin_lock_init(&comp_workspace_lock[i]); | ||
734 | atomic_set(&comp_alloc_workspace[i], 0); | ||
735 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
736 | } | ||
737 | return 0; | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * this finds an available workspace or allocates a new one | ||
742 | * ERR_PTR is returned if things go bad. | ||
743 | */ | ||
744 | static struct list_head *find_workspace(int type) | ||
745 | { | ||
746 | struct list_head *workspace; | ||
747 | int cpus = num_online_cpus(); | ||
748 | int idx = type - 1; | ||
749 | |||
750 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
751 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
752 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
753 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
754 | int *num_workspace = &comp_num_workspace[idx]; | ||
755 | again: | ||
756 | spin_lock(workspace_lock); | ||
757 | if (!list_empty(idle_workspace)) { | ||
758 | workspace = idle_workspace->next; | ||
759 | list_del(workspace); | ||
760 | (*num_workspace)--; | ||
761 | spin_unlock(workspace_lock); | ||
762 | return workspace; | ||
763 | |||
764 | } | ||
765 | if (atomic_read(alloc_workspace) > cpus) { | ||
766 | DEFINE_WAIT(wait); | ||
767 | |||
768 | spin_unlock(workspace_lock); | ||
769 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
770 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
771 | schedule(); | ||
772 | finish_wait(workspace_wait, &wait); | ||
773 | goto again; | ||
774 | } | ||
775 | atomic_inc(alloc_workspace); | ||
776 | spin_unlock(workspace_lock); | ||
777 | |||
778 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
779 | if (IS_ERR(workspace)) { | ||
780 | atomic_dec(alloc_workspace); | ||
781 | wake_up(workspace_wait); | ||
782 | } | ||
783 | return workspace; | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | * put a workspace struct back on the list or free it if we have enough | ||
788 | * idle ones sitting around | ||
789 | */ | ||
790 | static void free_workspace(int type, struct list_head *workspace) | ||
791 | { | ||
792 | int idx = type - 1; | ||
793 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
794 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
795 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
796 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
797 | int *num_workspace = &comp_num_workspace[idx]; | ||
798 | |||
799 | spin_lock(workspace_lock); | ||
800 | if (*num_workspace < num_online_cpus()) { | ||
801 | list_add_tail(workspace, idle_workspace); | ||
802 | (*num_workspace)++; | ||
803 | spin_unlock(workspace_lock); | ||
804 | goto wake; | ||
805 | } | ||
806 | spin_unlock(workspace_lock); | ||
807 | |||
808 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
809 | atomic_dec(alloc_workspace); | ||
810 | wake: | ||
811 | if (waitqueue_active(workspace_wait)) | ||
812 | wake_up(workspace_wait); | ||
813 | } | ||
814 | |||
815 | /* | ||
816 | * cleanup function for module exit | ||
817 | */ | ||
818 | static void free_workspaces(void) | ||
819 | { | ||
820 | struct list_head *workspace; | ||
821 | int i; | ||
822 | |||
823 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
824 | while (!list_empty(&comp_idle_workspace[i])) { | ||
825 | workspace = comp_idle_workspace[i].next; | ||
826 | list_del(workspace); | ||
827 | btrfs_compress_op[i]->free_workspace(workspace); | ||
828 | atomic_dec(&comp_alloc_workspace[i]); | ||
829 | } | ||
830 | } | ||
831 | } | ||
832 | |||
833 | /* | ||
834 | * given an address space and start/len, compress the bytes. | ||
835 | * | ||
836 | * pages are allocated to hold the compressed result and stored | ||
837 | * in 'pages' | ||
838 | * | ||
839 | * out_pages is used to return the number of pages allocated. There | ||
840 | * may be pages allocated even if we return an error | ||
841 | * | ||
842 | * total_in is used to return the number of bytes actually read. It | ||
843 | * may be smaller then len if we had to exit early because we | ||
844 | * ran out of room in the pages array or because we cross the | ||
845 | * max_out threshold. | ||
846 | * | ||
847 | * total_out is used to return the total number of compressed bytes | ||
848 | * | ||
849 | * max_out tells us the max number of bytes that we're allowed to | ||
850 | * stuff into pages | ||
851 | */ | ||
852 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
853 | u64 start, unsigned long len, | ||
854 | struct page **pages, | ||
855 | unsigned long nr_dest_pages, | ||
856 | unsigned long *out_pages, | ||
857 | unsigned long *total_in, | ||
858 | unsigned long *total_out, | ||
859 | unsigned long max_out) | ||
860 | { | ||
861 | struct list_head *workspace; | ||
862 | int ret; | ||
863 | |||
864 | workspace = find_workspace(type); | ||
865 | if (IS_ERR(workspace)) | ||
866 | return -1; | ||
867 | |||
868 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
869 | start, len, pages, | ||
870 | nr_dest_pages, out_pages, | ||
871 | total_in, total_out, | ||
872 | max_out); | ||
873 | free_workspace(type, workspace); | ||
874 | return ret; | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * pages_in is an array of pages with compressed data. | ||
879 | * | ||
880 | * disk_start is the starting logical offset of this array in the file | ||
881 | * | ||
882 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
883 | * | ||
884 | * vcnt is the count of pages in the biovec | ||
885 | * | ||
886 | * srclen is the number of bytes in pages_in | ||
887 | * | ||
888 | * The basic idea is that we have a bio that was created by readpages. | ||
889 | * The pages in the bio are for the uncompressed data, and they may not | ||
890 | * be contiguous. They all correspond to the range of bytes covered by | ||
891 | * the compressed extent. | ||
892 | */ | ||
893 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
894 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
895 | { | ||
896 | struct list_head *workspace; | ||
897 | int ret; | ||
898 | |||
899 | workspace = find_workspace(type); | ||
900 | if (IS_ERR(workspace)) | ||
901 | return -ENOMEM; | ||
902 | |||
903 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
904 | disk_start, | ||
905 | bvec, vcnt, srclen); | ||
906 | free_workspace(type, workspace); | ||
907 | return ret; | ||
908 | } | ||
909 | |||
910 | /* | ||
911 | * a less complex decompression routine. Our compressed data fits in a | ||
912 | * single page, and we want to read a single page out of it. | ||
913 | * start_byte tells us the offset into the compressed data we're interested in | ||
914 | */ | ||
915 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
916 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
917 | { | ||
918 | struct list_head *workspace; | ||
919 | int ret; | ||
920 | |||
921 | workspace = find_workspace(type); | ||
922 | if (IS_ERR(workspace)) | ||
923 | return -ENOMEM; | ||
924 | |||
925 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
926 | dest_page, start_byte, | ||
927 | srclen, destlen); | ||
928 | |||
929 | free_workspace(type, workspace); | ||
930 | return ret; | ||
931 | } | ||
932 | |||
933 | void btrfs_exit_compress(void) | ||
934 | { | ||
935 | free_workspaces(); | ||
936 | } | ||
937 | |||
938 | /* | ||
939 | * Copy uncompressed data from working buffer to pages. | ||
940 | * | ||
941 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
942 | * | ||
943 | * total_out is the last byte of the buffer | ||
944 | */ | ||
945 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
946 | unsigned long total_out, u64 disk_start, | ||
947 | struct bio_vec *bvec, int vcnt, | ||
948 | unsigned long *page_index, | ||
949 | unsigned long *pg_offset) | ||
950 | { | ||
951 | unsigned long buf_offset; | ||
952 | unsigned long current_buf_start; | ||
953 | unsigned long start_byte; | ||
954 | unsigned long working_bytes = total_out - buf_start; | ||
955 | unsigned long bytes; | ||
956 | char *kaddr; | ||
957 | struct page *page_out = bvec[*page_index].bv_page; | ||
958 | |||
959 | /* | ||
960 | * start byte is the first byte of the page we're currently | ||
961 | * copying into relative to the start of the compressed data. | ||
962 | */ | ||
963 | start_byte = page_offset(page_out) - disk_start; | ||
964 | |||
965 | /* we haven't yet hit data corresponding to this page */ | ||
966 | if (total_out <= start_byte) | ||
967 | return 1; | ||
968 | |||
969 | /* | ||
970 | * the start of the data we care about is offset into | ||
971 | * the middle of our working buffer | ||
972 | */ | ||
973 | if (total_out > start_byte && buf_start < start_byte) { | ||
974 | buf_offset = start_byte - buf_start; | ||
975 | working_bytes -= buf_offset; | ||
976 | } else { | ||
977 | buf_offset = 0; | ||
978 | } | ||
979 | current_buf_start = buf_start; | ||
980 | |||
981 | /* copy bytes from the working buffer into the pages */ | ||
982 | while (working_bytes > 0) { | ||
983 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
984 | PAGE_CACHE_SIZE - buf_offset); | ||
985 | bytes = min(bytes, working_bytes); | ||
986 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
987 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
988 | kunmap_atomic(kaddr, KM_USER0); | ||
989 | flush_dcache_page(page_out); | ||
990 | |||
991 | *pg_offset += bytes; | ||
992 | buf_offset += bytes; | ||
993 | working_bytes -= bytes; | ||
994 | current_buf_start += bytes; | ||
995 | |||
996 | /* check if we need to pick another page */ | ||
997 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
998 | (*page_index)++; | ||
999 | if (*page_index >= vcnt) | ||
1000 | return 0; | ||
1001 | |||
1002 | page_out = bvec[*page_index].bv_page; | ||
1003 | *pg_offset = 0; | ||
1004 | start_byte = page_offset(page_out) - disk_start; | ||
1005 | |||
1006 | /* | ||
1007 | * make sure our new page is covered by this | ||
1008 | * working buffer | ||
1009 | */ | ||
1010 | if (total_out <= start_byte) | ||
1011 | return 1; | ||
1012 | |||
1013 | /* | ||
1014 | * the next page in the biovec might not be adjacent | ||
1015 | * to the last page, but it might still be found | ||
1016 | * inside this working buffer. bump our offset pointer | ||
1017 | */ | ||
1018 | if (total_out > start_byte && | ||
1019 | current_buf_start < start_byte) { | ||
1020 | buf_offset = start_byte - buf_start; | ||
1021 | working_bytes = total_out - start_byte; | ||
1022 | current_buf_start = buf_start + buf_offset; | ||
1023 | } | ||
1024 | } | ||
1025 | } | ||
1026 | |||
1027 | return 1; | ||
679 | } | 1028 | } |
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..51000174b9d7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -19,24 +19,27 @@ | |||
19 | #ifndef __BTRFS_COMPRESSION_ | 19 | #ifndef __BTRFS_COMPRESSION_ |
20 | #define __BTRFS_COMPRESSION_ | 20 | #define __BTRFS_COMPRESSION_ |
21 | 21 | ||
22 | int btrfs_zlib_decompress(unsigned char *data_in, | 22 | int btrfs_init_compress(void); |
23 | struct page *dest_page, | 23 | void btrfs_exit_compress(void); |
24 | unsigned long start_byte, | 24 | |
25 | size_t srclen, size_t destlen); | 25 | int btrfs_compress_pages(int type, struct address_space *mapping, |
26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | 26 | u64 start, unsigned long len, |
27 | u64 start, unsigned long len, | 27 | struct page **pages, |
28 | struct page **pages, | 28 | unsigned long nr_dest_pages, |
29 | unsigned long nr_dest_pages, | 29 | unsigned long *out_pages, |
30 | unsigned long *out_pages, | 30 | unsigned long *total_in, |
31 | unsigned long *total_in, | 31 | unsigned long *total_out, |
32 | unsigned long *total_out, | 32 | unsigned long max_out); |
33 | unsigned long max_out); | 33 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, |
34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | 34 | struct bio_vec *bvec, int vcnt, size_t srclen); |
35 | u64 disk_start, | 35 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, |
36 | struct bio_vec *bvec, | 36 | unsigned long start_byte, size_t srclen, size_t destlen); |
37 | int vcnt, | 37 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, |
38 | size_t srclen); | 38 | unsigned long total_out, u64 disk_start, |
39 | void btrfs_zlib_exit(void); | 39 | struct bio_vec *bvec, int vcnt, |
40 | unsigned long *page_index, | ||
41 | unsigned long *pg_offset); | ||
42 | |||
40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | 43 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, |
41 | unsigned long len, u64 disk_start, | 44 | unsigned long len, u64 disk_start, |
42 | unsigned long compressed_len, | 45 | unsigned long compressed_len, |
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
44 | unsigned long nr_pages); | 47 | unsigned long nr_pages); |
45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 48 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
46 | int mirror_num, unsigned long bio_flags); | 49 | int mirror_num, unsigned long bio_flags); |
50 | |||
51 | struct btrfs_compress_op { | ||
52 | struct list_head *(*alloc_workspace)(void); | ||
53 | |||
54 | void (*free_workspace)(struct list_head *workspace); | ||
55 | |||
56 | int (*compress_pages)(struct list_head *workspace, | ||
57 | struct address_space *mapping, | ||
58 | u64 start, unsigned long len, | ||
59 | struct page **pages, | ||
60 | unsigned long nr_dest_pages, | ||
61 | unsigned long *out_pages, | ||
62 | unsigned long *total_in, | ||
63 | unsigned long *total_out, | ||
64 | unsigned long max_out); | ||
65 | |||
66 | int (*decompress_biovec)(struct list_head *workspace, | ||
67 | struct page **pages_in, | ||
68 | u64 disk_start, | ||
69 | struct bio_vec *bvec, | ||
70 | int vcnt, | ||
71 | size_t srclen); | ||
72 | |||
73 | int (*decompress)(struct list_head *workspace, | ||
74 | unsigned char *data_in, | ||
75 | struct page *dest_page, | ||
76 | unsigned long start_byte, | ||
77 | size_t srclen, size_t destlen); | ||
78 | }; | ||
79 | |||
80 | extern struct btrfs_compress_op btrfs_zlib_compress; | ||
81 | extern struct btrfs_compress_op btrfs_lzo_compress; | ||
82 | |||
47 | #endif | 83 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ac171599258..84d7ca1fe0ba 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
105 | /* this also releases the path */ | 105 | /* this also releases the path */ |
106 | void btrfs_free_path(struct btrfs_path *p) | 106 | void btrfs_free_path(struct btrfs_path *p) |
107 | { | 107 | { |
108 | if (!p) | ||
109 | return; | ||
108 | btrfs_release_path(NULL, p); | 110 | btrfs_release_path(NULL, p); |
109 | kmem_cache_free(btrfs_path_cachep, p); | 111 | kmem_cache_free(btrfs_path_cachep, p); |
110 | } | 112 | } |
@@ -145,10 +147,11 @@ noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | |||
145 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | 147 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) |
146 | { | 148 | { |
147 | struct extent_buffer *eb; | 149 | struct extent_buffer *eb; |
148 | spin_lock(&root->node_lock); | 150 | |
149 | eb = root->node; | 151 | rcu_read_lock(); |
152 | eb = rcu_dereference(root->node); | ||
150 | extent_buffer_get(eb); | 153 | extent_buffer_get(eb); |
151 | spin_unlock(&root->node_lock); | 154 | rcu_read_unlock(); |
152 | return eb; | 155 | return eb; |
153 | } | 156 | } |
154 | 157 | ||
@@ -163,14 +166,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
163 | while (1) { | 166 | while (1) { |
164 | eb = btrfs_root_node(root); | 167 | eb = btrfs_root_node(root); |
165 | btrfs_tree_lock(eb); | 168 | btrfs_tree_lock(eb); |
166 | 169 | if (eb == root->node) | |
167 | spin_lock(&root->node_lock); | ||
168 | if (eb == root->node) { | ||
169 | spin_unlock(&root->node_lock); | ||
170 | break; | 170 | break; |
171 | } | ||
172 | spin_unlock(&root->node_lock); | ||
173 | |||
174 | btrfs_tree_unlock(eb); | 171 | btrfs_tree_unlock(eb); |
175 | free_extent_buffer(eb); | 172 | free_extent_buffer(eb); |
176 | } | 173 | } |
@@ -456,10 +453,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
456 | else | 453 | else |
457 | parent_start = 0; | 454 | parent_start = 0; |
458 | 455 | ||
459 | spin_lock(&root->node_lock); | ||
460 | root->node = cow; | ||
461 | extent_buffer_get(cow); | 456 | extent_buffer_get(cow); |
462 | spin_unlock(&root->node_lock); | 457 | rcu_assign_pointer(root->node, cow); |
463 | 458 | ||
464 | btrfs_free_tree_block(trans, root, buf, parent_start, | 459 | btrfs_free_tree_block(trans, root, buf, parent_start, |
465 | last_ref); | 460 | last_ref); |
@@ -540,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
540 | 535 | ||
541 | ret = __btrfs_cow_block(trans, root, buf, parent, | 536 | ret = __btrfs_cow_block(trans, root, buf, parent, |
542 | parent_slot, cow_ret, search_start, 0); | 537 | parent_slot, cow_ret, search_start, 0); |
538 | |||
539 | trace_btrfs_cow_block(root, buf, *cow_ret); | ||
540 | |||
543 | return ret; | 541 | return ret; |
544 | } | 542 | } |
545 | 543 | ||
@@ -684,6 +682,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
684 | if (!cur) { | 682 | if (!cur) { |
685 | cur = read_tree_block(root, blocknr, | 683 | cur = read_tree_block(root, blocknr, |
686 | blocksize, gen); | 684 | blocksize, gen); |
685 | if (!cur) | ||
686 | return -EIO; | ||
687 | } else if (!uptodate) { | 687 | } else if (!uptodate) { |
688 | btrfs_read_buffer(cur, gen); | 688 | btrfs_read_buffer(cur, gen); |
689 | } | 689 | } |
@@ -730,122 +730,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, | |||
730 | return btrfs_item_offset_nr(leaf, nr - 1); | 730 | return btrfs_item_offset_nr(leaf, nr - 1); |
731 | } | 731 | } |
732 | 732 | ||
733 | /* | ||
734 | * extra debugging checks to make sure all the items in a key are | ||
735 | * well formed and in the proper order | ||
736 | */ | ||
737 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, | ||
738 | int level) | ||
739 | { | ||
740 | struct extent_buffer *parent = NULL; | ||
741 | struct extent_buffer *node = path->nodes[level]; | ||
742 | struct btrfs_disk_key parent_key; | ||
743 | struct btrfs_disk_key node_key; | ||
744 | int parent_slot; | ||
745 | int slot; | ||
746 | struct btrfs_key cpukey; | ||
747 | u32 nritems = btrfs_header_nritems(node); | ||
748 | |||
749 | if (path->nodes[level + 1]) | ||
750 | parent = path->nodes[level + 1]; | ||
751 | |||
752 | slot = path->slots[level]; | ||
753 | BUG_ON(nritems == 0); | ||
754 | if (parent) { | ||
755 | parent_slot = path->slots[level + 1]; | ||
756 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
757 | btrfs_node_key(node, &node_key, 0); | ||
758 | BUG_ON(memcmp(&parent_key, &node_key, | ||
759 | sizeof(struct btrfs_disk_key))); | ||
760 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
761 | btrfs_header_bytenr(node)); | ||
762 | } | ||
763 | BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); | ||
764 | if (slot != 0) { | ||
765 | btrfs_node_key_to_cpu(node, &cpukey, slot - 1); | ||
766 | btrfs_node_key(node, &node_key, slot); | ||
767 | BUG_ON(comp_keys(&node_key, &cpukey) <= 0); | ||
768 | } | ||
769 | if (slot < nritems - 1) { | ||
770 | btrfs_node_key_to_cpu(node, &cpukey, slot + 1); | ||
771 | btrfs_node_key(node, &node_key, slot); | ||
772 | BUG_ON(comp_keys(&node_key, &cpukey) >= 0); | ||
773 | } | ||
774 | return 0; | ||
775 | } | ||
776 | |||
777 | /* | ||
778 | * extra checking to make sure all the items in a leaf are | ||
779 | * well formed and in the proper order | ||
780 | */ | ||
781 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, | ||
782 | int level) | ||
783 | { | ||
784 | struct extent_buffer *leaf = path->nodes[level]; | ||
785 | struct extent_buffer *parent = NULL; | ||
786 | int parent_slot; | ||
787 | struct btrfs_key cpukey; | ||
788 | struct btrfs_disk_key parent_key; | ||
789 | struct btrfs_disk_key leaf_key; | ||
790 | int slot = path->slots[0]; | ||
791 | |||
792 | u32 nritems = btrfs_header_nritems(leaf); | ||
793 | |||
794 | if (path->nodes[level + 1]) | ||
795 | parent = path->nodes[level + 1]; | ||
796 | |||
797 | if (nritems == 0) | ||
798 | return 0; | ||
799 | |||
800 | if (parent) { | ||
801 | parent_slot = path->slots[level + 1]; | ||
802 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
803 | btrfs_item_key(leaf, &leaf_key, 0); | ||
804 | |||
805 | BUG_ON(memcmp(&parent_key, &leaf_key, | ||
806 | sizeof(struct btrfs_disk_key))); | ||
807 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
808 | btrfs_header_bytenr(leaf)); | ||
809 | } | ||
810 | if (slot != 0 && slot < nritems - 1) { | ||
811 | btrfs_item_key(leaf, &leaf_key, slot); | ||
812 | btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); | ||
813 | if (comp_keys(&leaf_key, &cpukey) <= 0) { | ||
814 | btrfs_print_leaf(root, leaf); | ||
815 | printk(KERN_CRIT "slot %d offset bad key\n", slot); | ||
816 | BUG_ON(1); | ||
817 | } | ||
818 | if (btrfs_item_offset_nr(leaf, slot - 1) != | ||
819 | btrfs_item_end_nr(leaf, slot)) { | ||
820 | btrfs_print_leaf(root, leaf); | ||
821 | printk(KERN_CRIT "slot %d offset bad\n", slot); | ||
822 | BUG_ON(1); | ||
823 | } | ||
824 | } | ||
825 | if (slot < nritems - 1) { | ||
826 | btrfs_item_key(leaf, &leaf_key, slot); | ||
827 | btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); | ||
828 | BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); | ||
829 | if (btrfs_item_offset_nr(leaf, slot) != | ||
830 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
831 | btrfs_print_leaf(root, leaf); | ||
832 | printk(KERN_CRIT "slot %d offset bad\n", slot); | ||
833 | BUG_ON(1); | ||
834 | } | ||
835 | } | ||
836 | BUG_ON(btrfs_item_offset_nr(leaf, 0) + | ||
837 | btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); | ||
838 | return 0; | ||
839 | } | ||
840 | |||
841 | static noinline int check_block(struct btrfs_root *root, | ||
842 | struct btrfs_path *path, int level) | ||
843 | { | ||
844 | return 0; | ||
845 | if (level == 0) | ||
846 | return check_leaf(root, path, level); | ||
847 | return check_node(root, path, level); | ||
848 | } | ||
849 | 733 | ||
850 | /* | 734 | /* |
851 | * search for key in the extent_buffer. The items start at offset p, | 735 | * search for key in the extent_buffer. The items start at offset p, |
@@ -1044,9 +928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1044 | goto enospc; | 928 | goto enospc; |
1045 | } | 929 | } |
1046 | 930 | ||
1047 | spin_lock(&root->node_lock); | 931 | rcu_assign_pointer(root->node, child); |
1048 | root->node = child; | ||
1049 | spin_unlock(&root->node_lock); | ||
1050 | 932 | ||
1051 | add_root_to_dirty_list(root); | 933 | add_root_to_dirty_list(root); |
1052 | btrfs_tree_unlock(child); | 934 | btrfs_tree_unlock(child); |
@@ -1186,7 +1068,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1186 | } | 1068 | } |
1187 | } | 1069 | } |
1188 | /* double check we haven't messed things up */ | 1070 | /* double check we haven't messed things up */ |
1189 | check_block(root, path, level); | ||
1190 | if (orig_ptr != | 1071 | if (orig_ptr != |
1191 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) | 1072 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) |
1192 | BUG(); | 1073 | BUG(); |
@@ -1796,12 +1677,6 @@ cow_done: | |||
1796 | if (!cow) | 1677 | if (!cow) |
1797 | btrfs_unlock_up_safe(p, level + 1); | 1678 | btrfs_unlock_up_safe(p, level + 1); |
1798 | 1679 | ||
1799 | ret = check_block(root, p, level); | ||
1800 | if (ret) { | ||
1801 | ret = -1; | ||
1802 | goto done; | ||
1803 | } | ||
1804 | |||
1805 | ret = bin_search(b, key, level, &slot); | 1680 | ret = bin_search(b, key, level, &slot); |
1806 | 1681 | ||
1807 | if (level != 0) { | 1682 | if (level != 0) { |
@@ -2128,10 +2003,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2128 | 2003 | ||
2129 | btrfs_mark_buffer_dirty(c); | 2004 | btrfs_mark_buffer_dirty(c); |
2130 | 2005 | ||
2131 | spin_lock(&root->node_lock); | ||
2132 | old = root->node; | 2006 | old = root->node; |
2133 | root->node = c; | 2007 | rcu_assign_pointer(root->node, c); |
2134 | spin_unlock(&root->node_lock); | ||
2135 | 2008 | ||
2136 | /* the super has an extra ref to root->node */ | 2009 | /* the super has an extra ref to root->node */ |
2137 | free_extent_buffer(old); | 2010 | free_extent_buffer(old); |
@@ -2514,6 +2387,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2514 | btrfs_assert_tree_locked(path->nodes[1]); | 2387 | btrfs_assert_tree_locked(path->nodes[1]); |
2515 | 2388 | ||
2516 | right = read_node_slot(root, upper, slot + 1); | 2389 | right = read_node_slot(root, upper, slot + 1); |
2390 | if (right == NULL) | ||
2391 | return 1; | ||
2392 | |||
2517 | btrfs_tree_lock(right); | 2393 | btrfs_tree_lock(right); |
2518 | btrfs_set_lock_blocking(right); | 2394 | btrfs_set_lock_blocking(right); |
2519 | 2395 | ||
@@ -2764,6 +2640,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2764 | btrfs_assert_tree_locked(path->nodes[1]); | 2640 | btrfs_assert_tree_locked(path->nodes[1]); |
2765 | 2641 | ||
2766 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2642 | left = read_node_slot(root, path->nodes[1], slot - 1); |
2643 | if (left == NULL) | ||
2644 | return 1; | ||
2645 | |||
2767 | btrfs_tree_lock(left); | 2646 | btrfs_tree_lock(left); |
2768 | btrfs_set_lock_blocking(left); | 2647 | btrfs_set_lock_blocking(left); |
2769 | 2648 | ||
@@ -3832,7 +3711,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3832 | unsigned long ptr; | 3711 | unsigned long ptr; |
3833 | 3712 | ||
3834 | path = btrfs_alloc_path(); | 3713 | path = btrfs_alloc_path(); |
3835 | BUG_ON(!path); | 3714 | if (!path) |
3715 | return -ENOMEM; | ||
3836 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); | 3716 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); |
3837 | if (!ret) { | 3717 | if (!ret) { |
3838 | leaf = path->nodes[0]; | 3718 | leaf = path->nodes[0]; |
@@ -4209,6 +4089,7 @@ find_next_key: | |||
4209 | } | 4089 | } |
4210 | btrfs_set_path_blocking(path); | 4090 | btrfs_set_path_blocking(path); |
4211 | cur = read_node_slot(root, cur, slot); | 4091 | cur = read_node_slot(root, cur, slot); |
4092 | BUG_ON(!cur); | ||
4212 | 4093 | ||
4213 | btrfs_tree_lock(cur); | 4094 | btrfs_tree_lock(cur); |
4214 | 4095 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b875d445ea81..2e61fe1b6b8c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/kobject.h> | 30 | #include <linux/kobject.h> |
31 | #include <trace/events/btrfs.h> | ||
31 | #include <asm/kmap_types.h> | 32 | #include <asm/kmap_types.h> |
32 | #include "extent_io.h" | 33 | #include "extent_io.h" |
33 | #include "extent_map.h" | 34 | #include "extent_map.h" |
@@ -40,6 +41,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; | |||
40 | extern struct kmem_cache *btrfs_transaction_cachep; | 41 | extern struct kmem_cache *btrfs_transaction_cachep; |
41 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 42 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
42 | extern struct kmem_cache *btrfs_path_cachep; | 43 | extern struct kmem_cache *btrfs_path_cachep; |
44 | extern struct kmem_cache *btrfs_free_space_cachep; | ||
43 | struct btrfs_ordered_sum; | 45 | struct btrfs_ordered_sum; |
44 | 46 | ||
45 | #define BTRFS_MAGIC "_BHRfS_M" | 47 | #define BTRFS_MAGIC "_BHRfS_M" |
@@ -295,6 +297,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
295 | #define BTRFS_FSID_SIZE 16 | 297 | #define BTRFS_FSID_SIZE 16 |
296 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) | 298 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) |
297 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | 299 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) |
300 | |||
301 | /* | ||
302 | * File system states | ||
303 | */ | ||
304 | |||
305 | /* Errors detected */ | ||
306 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | ||
307 | |||
298 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | 308 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) |
299 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | 309 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) |
300 | 310 | ||
@@ -399,13 +409,15 @@ struct btrfs_super_block { | |||
399 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) | 409 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) |
400 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) | 410 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) |
401 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) | 411 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) |
412 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) | ||
402 | 413 | ||
403 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 414 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
404 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 415 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
405 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 416 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
406 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ | 417 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ |
407 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | 418 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
408 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | 419 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
420 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | ||
409 | 421 | ||
410 | /* | 422 | /* |
411 | * A leaf is full of items. offset and size tell us where to find | 423 | * A leaf is full of items. offset and size tell us where to find |
@@ -552,9 +564,11 @@ struct btrfs_timespec { | |||
552 | } __attribute__ ((__packed__)); | 564 | } __attribute__ ((__packed__)); |
553 | 565 | ||
554 | enum btrfs_compression_type { | 566 | enum btrfs_compression_type { |
555 | BTRFS_COMPRESS_NONE = 0, | 567 | BTRFS_COMPRESS_NONE = 0, |
556 | BTRFS_COMPRESS_ZLIB = 1, | 568 | BTRFS_COMPRESS_ZLIB = 1, |
557 | BTRFS_COMPRESS_LAST = 2, | 569 | BTRFS_COMPRESS_LZO = 2, |
570 | BTRFS_COMPRESS_TYPES = 2, | ||
571 | BTRFS_COMPRESS_LAST = 3, | ||
558 | }; | 572 | }; |
559 | 573 | ||
560 | struct btrfs_inode_item { | 574 | struct btrfs_inode_item { |
@@ -598,6 +612,8 @@ struct btrfs_dir_item { | |||
598 | u8 type; | 612 | u8 type; |
599 | } __attribute__ ((__packed__)); | 613 | } __attribute__ ((__packed__)); |
600 | 614 | ||
615 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | ||
616 | |||
601 | struct btrfs_root_item { | 617 | struct btrfs_root_item { |
602 | struct btrfs_inode_item inode; | 618 | struct btrfs_inode_item inode; |
603 | __le64 generation; | 619 | __le64 generation; |
@@ -715,8 +731,19 @@ struct btrfs_space_info { | |||
715 | u64 disk_total; /* total bytes on disk, takes mirrors into | 731 | u64 disk_total; /* total bytes on disk, takes mirrors into |
716 | account */ | 732 | account */ |
717 | 733 | ||
718 | int full; /* indicates that we cannot allocate any more | 734 | /* |
735 | * we bump reservation progress every time we decrement | ||
736 | * bytes_reserved. This way people waiting for reservations | ||
737 | * know something good has happened and they can check | ||
738 | * for progress. The number here isn't to be trusted, it | ||
739 | * just shows reclaim activity | ||
740 | */ | ||
741 | unsigned long reservation_progress; | ||
742 | |||
743 | int full:1; /* indicates that we cannot allocate any more | ||
719 | chunks for this space */ | 744 | chunks for this space */ |
745 | int chunk_alloc:1; /* set if we are allocating a chunk */ | ||
746 | |||
720 | int force_alloc; /* set if we need to force a chunk alloc for | 747 | int force_alloc; /* set if we need to force a chunk alloc for |
721 | this space */ | 748 | this space */ |
722 | 749 | ||
@@ -759,9 +786,6 @@ struct btrfs_free_cluster { | |||
759 | /* first extent starting offset */ | 786 | /* first extent starting offset */ |
760 | u64 window_start; | 787 | u64 window_start; |
761 | 788 | ||
762 | /* if this cluster simply points at a bitmap in the block group */ | ||
763 | bool points_to_bitmap; | ||
764 | |||
765 | struct btrfs_block_group_cache *block_group; | 789 | struct btrfs_block_group_cache *block_group; |
766 | /* | 790 | /* |
767 | * when a cluster is allocated from a block group, we put the | 791 | * when a cluster is allocated from a block group, we put the |
@@ -896,7 +920,8 @@ struct btrfs_fs_info { | |||
896 | */ | 920 | */ |
897 | u64 last_trans_log_full_commit; | 921 | u64 last_trans_log_full_commit; |
898 | u64 open_ioctl_trans; | 922 | u64 open_ioctl_trans; |
899 | unsigned long mount_opt; | 923 | unsigned long mount_opt:20; |
924 | unsigned long compress_type:4; | ||
900 | u64 max_inline; | 925 | u64 max_inline; |
901 | u64 alloc_start; | 926 | u64 alloc_start; |
902 | struct btrfs_transaction *running_transaction; | 927 | struct btrfs_transaction *running_transaction; |
@@ -1051,6 +1076,9 @@ struct btrfs_fs_info { | |||
1051 | unsigned metadata_ratio; | 1076 | unsigned metadata_ratio; |
1052 | 1077 | ||
1053 | void *bdev_holder; | 1078 | void *bdev_holder; |
1079 | |||
1080 | /* filesystem state */ | ||
1081 | u64 fs_state; | ||
1054 | }; | 1082 | }; |
1055 | 1083 | ||
1056 | /* | 1084 | /* |
@@ -1236,6 +1264,7 @@ struct btrfs_root { | |||
1236 | #define BTRFS_MOUNT_SPACE_CACHE (1 << 12) | 1264 | #define BTRFS_MOUNT_SPACE_CACHE (1 << 12) |
1237 | #define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) | 1265 | #define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) |
1238 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) | 1266 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) |
1267 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) | ||
1239 | 1268 | ||
1240 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1269 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1241 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1270 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1255,6 +1284,9 @@ struct btrfs_root { | |||
1255 | #define BTRFS_INODE_NODUMP (1 << 8) | 1284 | #define BTRFS_INODE_NODUMP (1 << 8) |
1256 | #define BTRFS_INODE_NOATIME (1 << 9) | 1285 | #define BTRFS_INODE_NOATIME (1 << 9) |
1257 | #define BTRFS_INODE_DIRSYNC (1 << 10) | 1286 | #define BTRFS_INODE_DIRSYNC (1 << 10) |
1287 | #define BTRFS_INODE_COMPRESS (1 << 11) | ||
1288 | |||
1289 | #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) | ||
1258 | 1290 | ||
1259 | /* some macros to generate set/get funcs for the struct fields. This | 1291 | /* some macros to generate set/get funcs for the struct fields. This |
1260 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple | 1292 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple |
@@ -1894,6 +1926,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | |||
1894 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, | 1926 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, |
1895 | last_snapshot, 64); | 1927 | last_snapshot, 64); |
1896 | 1928 | ||
1929 | static inline bool btrfs_root_readonly(struct btrfs_root *root) | ||
1930 | { | ||
1931 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | ||
1932 | } | ||
1933 | |||
1897 | /* struct btrfs_super_block */ | 1934 | /* struct btrfs_super_block */ |
1898 | 1935 | ||
1899 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 1936 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
@@ -2124,6 +2161,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2124 | u64 root_objectid, u64 owner, u64 offset); | 2161 | u64 root_objectid, u64 owner, u64 offset); |
2125 | 2162 | ||
2126 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2163 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
2164 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
2165 | u64 num_bytes, int reserve, int sinfo); | ||
2127 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 2166 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
2128 | struct btrfs_root *root); | 2167 | struct btrfs_root *root); |
2129 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2168 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
@@ -2146,6 +2185,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2146 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2185 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2147 | struct btrfs_root *root, u64 group_start); | 2186 | struct btrfs_root *root, u64 group_start); |
2148 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2187 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2188 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | ||
2149 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2189 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2150 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2190 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2151 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2191 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
@@ -2189,6 +2229,16 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
2189 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2229 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
2190 | struct btrfs_block_group_cache *cache); | 2230 | struct btrfs_block_group_cache *cache); |
2191 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); | 2231 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); |
2232 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | ||
2233 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | ||
2234 | u64 start, u64 end); | ||
2235 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
2236 | u64 num_bytes, u64 *actual_bytes); | ||
2237 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | ||
2238 | struct btrfs_root *root, u64 type); | ||
2239 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); | ||
2240 | |||
2241 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | ||
2192 | /* ctree.c */ | 2242 | /* ctree.c */ |
2193 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2243 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2194 | int level, int *slot); | 2244 | int level, int *slot); |
@@ -2313,6 +2363,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | |||
2313 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | 2363 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); |
2314 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2364 | int btrfs_set_root_node(struct btrfs_root_item *item, |
2315 | struct extent_buffer *node); | 2365 | struct extent_buffer *node); |
2366 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); | ||
2367 | |||
2316 | /* dir-item.c */ | 2368 | /* dir-item.c */ |
2317 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | 2369 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, |
2318 | struct btrfs_root *root, const char *name, | 2370 | struct btrfs_root *root, const char *name, |
@@ -2350,6 +2402,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
2350 | struct btrfs_path *path, u64 dir, | 2402 | struct btrfs_path *path, u64 dir, |
2351 | const char *name, u16 name_len, | 2403 | const char *name, u16 name_len, |
2352 | int mod); | 2404 | int mod); |
2405 | int verify_dir_item(struct btrfs_root *root, | ||
2406 | struct extent_buffer *leaf, | ||
2407 | struct btrfs_dir_item *dir_item); | ||
2353 | 2408 | ||
2354 | /* orphan.c */ | 2409 | /* orphan.c */ |
2355 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | 2410 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, |
@@ -2486,7 +2541,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2486 | struct inode *inode); | 2541 | struct inode *inode); |
2487 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2542 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2488 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2543 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2489 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2544 | int btrfs_orphan_cleanup(struct btrfs_root *root); |
2490 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | 2545 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, |
2491 | struct btrfs_pending_snapshot *pending, | 2546 | struct btrfs_pending_snapshot *pending, |
2492 | u64 *bytes_to_reserve); | 2547 | u64 *bytes_to_reserve); |
@@ -2494,7 +2549,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | |||
2494 | struct btrfs_pending_snapshot *pending); | 2549 | struct btrfs_pending_snapshot *pending); |
2495 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | 2550 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, |
2496 | struct btrfs_root *root); | 2551 | struct btrfs_root *root); |
2497 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2552 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); |
2498 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2553 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2499 | void btrfs_add_delayed_iput(struct inode *inode); | 2554 | void btrfs_add_delayed_iput(struct inode *inode); |
2500 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2555 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
@@ -2523,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | |||
2523 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2578 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2524 | struct inode *inode, u64 start, u64 end); | 2579 | struct inode *inode, u64 start, u64 end); |
2525 | int btrfs_release_file(struct inode *inode, struct file *file); | 2580 | int btrfs_release_file(struct inode *inode, struct file *file); |
2581 | void btrfs_drop_pages(struct page **pages, size_t num_pages); | ||
2582 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | ||
2583 | struct page **pages, size_t num_pages, | ||
2584 | loff_t pos, size_t write_bytes, | ||
2585 | struct extent_state **cached); | ||
2526 | 2586 | ||
2527 | /* tree-defrag.c */ | 2587 | /* tree-defrag.c */ |
2528 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 2588 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
@@ -2542,6 +2602,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | |||
2542 | /* super.c */ | 2602 | /* super.c */ |
2543 | int btrfs_parse_options(struct btrfs_root *root, char *options); | 2603 | int btrfs_parse_options(struct btrfs_root *root, char *options); |
2544 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2604 | int btrfs_sync_fs(struct super_block *sb, int wait); |
2605 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
2606 | unsigned int line, int errno); | ||
2607 | |||
2608 | #define btrfs_std_error(fs_info, errno) \ | ||
2609 | do { \ | ||
2610 | if ((errno)) \ | ||
2611 | __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ | ||
2612 | } while (0) | ||
2545 | 2613 | ||
2546 | /* acl.c */ | 2614 | /* acl.c */ |
2547 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2615 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index e807b143b857..bce28f653899 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, | |||
483 | INIT_LIST_HEAD(&head_ref->cluster); | 483 | INIT_LIST_HEAD(&head_ref->cluster); |
484 | mutex_init(&head_ref->mutex); | 484 | mutex_init(&head_ref->mutex); |
485 | 485 | ||
486 | trace_btrfs_delayed_ref_head(ref, head_ref, action); | ||
487 | |||
486 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 488 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
487 | 489 | ||
488 | if (existing) { | 490 | if (existing) { |
@@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
537 | } | 539 | } |
538 | full_ref->level = level; | 540 | full_ref->level = level; |
539 | 541 | ||
542 | trace_btrfs_delayed_tree_ref(ref, full_ref, action); | ||
543 | |||
540 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 544 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
541 | 545 | ||
542 | if (existing) { | 546 | if (existing) { |
@@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
591 | full_ref->objectid = owner; | 595 | full_ref->objectid = owner; |
592 | full_ref->offset = offset; | 596 | full_ref->offset = offset; |
593 | 597 | ||
598 | trace_btrfs_delayed_data_ref(ref, full_ref, action); | ||
599 | |||
594 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 600 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
595 | 601 | ||
596 | if (existing) { | 602 | if (existing) { |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f0cad5ae5be7..c62f02f6ae69 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -151,7 +151,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
151 | ret = PTR_ERR(dir_item); | 151 | ret = PTR_ERR(dir_item); |
152 | if (ret == -EEXIST) | 152 | if (ret == -EEXIST) |
153 | goto second_insert; | 153 | goto second_insert; |
154 | goto out; | 154 | goto out_free; |
155 | } | 155 | } |
156 | 156 | ||
157 | leaf = path->nodes[0]; | 157 | leaf = path->nodes[0]; |
@@ -170,7 +170,7 @@ second_insert: | |||
170 | /* FIXME, use some real flag for selecting the extra index */ | 170 | /* FIXME, use some real flag for selecting the extra index */ |
171 | if (root == root->fs_info->tree_root) { | 171 | if (root == root->fs_info->tree_root) { |
172 | ret = 0; | 172 | ret = 0; |
173 | goto out; | 173 | goto out_free; |
174 | } | 174 | } |
175 | btrfs_release_path(root, path); | 175 | btrfs_release_path(root, path); |
176 | 176 | ||
@@ -180,7 +180,7 @@ second_insert: | |||
180 | name, name_len); | 180 | name, name_len); |
181 | if (IS_ERR(dir_item)) { | 181 | if (IS_ERR(dir_item)) { |
182 | ret2 = PTR_ERR(dir_item); | 182 | ret2 = PTR_ERR(dir_item); |
183 | goto out; | 183 | goto out_free; |
184 | } | 184 | } |
185 | leaf = path->nodes[0]; | 185 | leaf = path->nodes[0]; |
186 | btrfs_cpu_key_to_disk(&disk_key, location); | 186 | btrfs_cpu_key_to_disk(&disk_key, location); |
@@ -192,7 +192,9 @@ second_insert: | |||
192 | name_ptr = (unsigned long)(dir_item + 1); | 192 | name_ptr = (unsigned long)(dir_item + 1); |
193 | write_extent_buffer(leaf, name, name_ptr, name_len); | 193 | write_extent_buffer(leaf, name, name_ptr, name_len); |
194 | btrfs_mark_buffer_dirty(leaf); | 194 | btrfs_mark_buffer_dirty(leaf); |
195 | out: | 195 | |
196 | out_free: | ||
197 | |||
196 | btrfs_free_path(path); | 198 | btrfs_free_path(path); |
197 | if (ret) | 199 | if (ret) |
198 | return ret; | 200 | return ret; |
@@ -377,6 +379,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | |||
377 | 379 | ||
378 | leaf = path->nodes[0]; | 380 | leaf = path->nodes[0]; |
379 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); | 381 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); |
382 | if (verify_dir_item(root, leaf, dir_item)) | ||
383 | return NULL; | ||
384 | |||
380 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); | 385 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); |
381 | while (cur < total_len) { | 386 | while (cur < total_len) { |
382 | this_len = sizeof(*dir_item) + | 387 | this_len = sizeof(*dir_item) + |
@@ -429,3 +434,35 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | |||
429 | } | 434 | } |
430 | return ret; | 435 | return ret; |
431 | } | 436 | } |
437 | |||
438 | int verify_dir_item(struct btrfs_root *root, | ||
439 | struct extent_buffer *leaf, | ||
440 | struct btrfs_dir_item *dir_item) | ||
441 | { | ||
442 | u16 namelen = BTRFS_NAME_LEN; | ||
443 | u8 type = btrfs_dir_type(leaf, dir_item); | ||
444 | |||
445 | if (type >= BTRFS_FT_MAX) { | ||
446 | printk(KERN_CRIT "btrfs: invalid dir item type: %d\n", | ||
447 | (int)type); | ||
448 | return 1; | ||
449 | } | ||
450 | |||
451 | if (type == BTRFS_FT_XATTR) | ||
452 | namelen = XATTR_NAME_MAX; | ||
453 | |||
454 | if (btrfs_dir_name_len(leaf, dir_item) > namelen) { | ||
455 | printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n", | ||
456 | (unsigned)btrfs_dir_data_len(leaf, dir_item)); | ||
457 | return 1; | ||
458 | } | ||
459 | |||
460 | /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ | ||
461 | if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) { | ||
462 | printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n", | ||
463 | (unsigned)btrfs_dir_data_len(leaf, dir_item)); | ||
464 | return 1; | ||
465 | } | ||
466 | |||
467 | return 0; | ||
468 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 51d2e4de34eb..68c84c8c24bd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/migrate.h> | 31 | #include <linux/migrate.h> |
32 | #include <asm/unaligned.h> | ||
32 | #include "compat.h" | 33 | #include "compat.h" |
33 | #include "ctree.h" | 34 | #include "ctree.h" |
34 | #include "disk-io.h" | 35 | #include "disk-io.h" |
@@ -44,6 +45,20 @@ | |||
44 | static struct extent_io_ops btree_extent_io_ops; | 45 | static struct extent_io_ops btree_extent_io_ops; |
45 | static void end_workqueue_fn(struct btrfs_work *work); | 46 | static void end_workqueue_fn(struct btrfs_work *work); |
46 | static void free_fs_root(struct btrfs_root *root); | 47 | static void free_fs_root(struct btrfs_root *root); |
48 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
49 | int read_only); | ||
50 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | ||
51 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | ||
52 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
53 | struct btrfs_root *root); | ||
54 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | ||
55 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | ||
56 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
57 | struct extent_io_tree *dirty_pages, | ||
58 | int mark); | ||
59 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
60 | struct extent_io_tree *pinned_extents); | ||
61 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
47 | 62 | ||
48 | /* | 63 | /* |
49 | * end_io_wq structs are used to do processing in task context when an IO is | 64 | * end_io_wq structs are used to do processing in task context when an IO is |
@@ -184,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | |||
184 | 199 | ||
185 | void btrfs_csum_final(u32 crc, char *result) | 200 | void btrfs_csum_final(u32 crc, char *result) |
186 | { | 201 | { |
187 | *(__le32 *)result = ~cpu_to_le32(crc); | 202 | put_unaligned_le32(~crc, result); |
188 | } | 203 | } |
189 | 204 | ||
190 | /* | 205 | /* |
@@ -309,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
309 | int num_copies = 0; | 324 | int num_copies = 0; |
310 | int mirror_num = 0; | 325 | int mirror_num = 0; |
311 | 326 | ||
327 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
312 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 328 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
313 | while (1) { | 329 | while (1) { |
314 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 330 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, |
@@ -317,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
317 | !verify_parent_transid(io_tree, eb, parent_transid)) | 333 | !verify_parent_transid(io_tree, eb, parent_transid)) |
318 | return ret; | 334 | return ret; |
319 | 335 | ||
336 | /* | ||
337 | * This buffer's crc is fine, but its contents are corrupted, so | ||
338 | * there is no reason to read the other copies, they won't be | ||
339 | * any less wrong. | ||
340 | */ | ||
341 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | ||
342 | return ret; | ||
343 | |||
320 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 344 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
321 | eb->start, eb->len); | 345 | eb->start, eb->len); |
322 | if (num_copies == 1) | 346 | if (num_copies == 1) |
@@ -345,14 +369,22 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
345 | 369 | ||
346 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 370 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
347 | 371 | ||
348 | if (page->private == EXTENT_PAGE_PRIVATE) | 372 | if (page->private == EXTENT_PAGE_PRIVATE) { |
373 | WARN_ON(1); | ||
349 | goto out; | 374 | goto out; |
350 | if (!page->private) | 375 | } |
376 | if (!page->private) { | ||
377 | WARN_ON(1); | ||
351 | goto out; | 378 | goto out; |
379 | } | ||
352 | len = page->private >> 2; | 380 | len = page->private >> 2; |
353 | WARN_ON(len == 0); | 381 | WARN_ON(len == 0); |
354 | 382 | ||
355 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 383 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
384 | if (eb == NULL) { | ||
385 | WARN_ON(1); | ||
386 | goto out; | ||
387 | } | ||
356 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 388 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
357 | btrfs_header_generation(eb)); | 389 | btrfs_header_generation(eb)); |
358 | BUG_ON(ret); | 390 | BUG_ON(ret); |
@@ -397,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
397 | return ret; | 429 | return ret; |
398 | } | 430 | } |
399 | 431 | ||
432 | #define CORRUPT(reason, eb, root, slot) \ | ||
433 | printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ | ||
434 | "root=%llu, slot=%d\n", reason, \ | ||
435 | (unsigned long long)btrfs_header_bytenr(eb), \ | ||
436 | (unsigned long long)root->objectid, slot) | ||
437 | |||
438 | static noinline int check_leaf(struct btrfs_root *root, | ||
439 | struct extent_buffer *leaf) | ||
440 | { | ||
441 | struct btrfs_key key; | ||
442 | struct btrfs_key leaf_key; | ||
443 | u32 nritems = btrfs_header_nritems(leaf); | ||
444 | int slot; | ||
445 | |||
446 | if (nritems == 0) | ||
447 | return 0; | ||
448 | |||
449 | /* Check the 0 item */ | ||
450 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | ||
451 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
452 | CORRUPT("invalid item offset size pair", leaf, root, 0); | ||
453 | return -EIO; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Check to make sure each items keys are in the correct order and their | ||
458 | * offsets make sense. We only have to loop through nritems-1 because | ||
459 | * we check the current slot against the next slot, which verifies the | ||
460 | * next slot's offset+size makes sense and that the current's slot | ||
461 | * offset is correct. | ||
462 | */ | ||
463 | for (slot = 0; slot < nritems - 1; slot++) { | ||
464 | btrfs_item_key_to_cpu(leaf, &leaf_key, slot); | ||
465 | btrfs_item_key_to_cpu(leaf, &key, slot + 1); | ||
466 | |||
467 | /* Make sure the keys are in the right order */ | ||
468 | if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { | ||
469 | CORRUPT("bad key order", leaf, root, slot); | ||
470 | return -EIO; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * Make sure the offset and ends are right, remember that the | ||
475 | * item data starts at the end of the leaf and grows towards the | ||
476 | * front. | ||
477 | */ | ||
478 | if (btrfs_item_offset_nr(leaf, slot) != | ||
479 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
480 | CORRUPT("slot offset bad", leaf, root, slot); | ||
481 | return -EIO; | ||
482 | } | ||
483 | |||
484 | /* | ||
485 | * Check to make sure that we don't point outside of the leaf, | ||
486 | * just incase all the items are consistent to eachother, but | ||
487 | * all point outside of the leaf. | ||
488 | */ | ||
489 | if (btrfs_item_end_nr(leaf, slot) > | ||
490 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
491 | CORRUPT("slot end outside of leaf", leaf, root, slot); | ||
492 | return -EIO; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
400 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 499 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
401 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | 500 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) |
402 | { | 501 | { |
@@ -427,6 +526,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
427 | WARN_ON(len == 0); | 526 | WARN_ON(len == 0); |
428 | 527 | ||
429 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 528 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
529 | if (eb == NULL) { | ||
530 | ret = -EIO; | ||
531 | goto out; | ||
532 | } | ||
430 | 533 | ||
431 | found_start = btrfs_header_bytenr(eb); | 534 | found_start = btrfs_header_bytenr(eb); |
432 | if (found_start != start) { | 535 | if (found_start != start) { |
@@ -459,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
459 | btrfs_set_buffer_lockdep_class(eb, found_level); | 562 | btrfs_set_buffer_lockdep_class(eb, found_level); |
460 | 563 | ||
461 | ret = csum_tree_block(root, eb, 1); | 564 | ret = csum_tree_block(root, eb, 1); |
462 | if (ret) | 565 | if (ret) { |
566 | ret = -EIO; | ||
567 | goto err; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * If this is a leaf block and it is corrupt, set the corrupt bit so | ||
572 | * that we don't try and read the other copies of this block, just | ||
573 | * return -EIO. | ||
574 | */ | ||
575 | if (found_level == 0 && check_leaf(root, eb)) { | ||
576 | set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
463 | ret = -EIO; | 577 | ret = -EIO; |
578 | } | ||
464 | 579 | ||
465 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 580 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
466 | end = eb->start + end - 1; | 581 | end = eb->start + end - 1; |
@@ -821,7 +936,6 @@ static const struct address_space_operations btree_aops = { | |||
821 | .writepages = btree_writepages, | 936 | .writepages = btree_writepages, |
822 | .releasepage = btree_releasepage, | 937 | .releasepage = btree_releasepage, |
823 | .invalidatepage = btree_invalidatepage, | 938 | .invalidatepage = btree_invalidatepage, |
824 | .sync_page = block_sync_page, | ||
825 | #ifdef CONFIG_MIGRATION | 939 | #ifdef CONFIG_MIGRATION |
826 | .migratepage = btree_migratepage, | 940 | .migratepage = btree_migratepage, |
827 | #endif | 941 | #endif |
@@ -1134,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1134 | root, fs_info, location->objectid); | 1248 | root, fs_info, location->objectid); |
1135 | 1249 | ||
1136 | path = btrfs_alloc_path(); | 1250 | path = btrfs_alloc_path(); |
1137 | BUG_ON(!path); | 1251 | if (!path) { |
1252 | kfree(root); | ||
1253 | return ERR_PTR(-ENOMEM); | ||
1254 | } | ||
1138 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1255 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1139 | if (ret == 0) { | 1256 | if (ret == 0) { |
1140 | l = path->nodes[0]; | 1257 | l = path->nodes[0]; |
@@ -1145,6 +1262,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1145 | } | 1262 | } |
1146 | btrfs_free_path(path); | 1263 | btrfs_free_path(path); |
1147 | if (ret) { | 1264 | if (ret) { |
1265 | kfree(root); | ||
1148 | if (ret > 0) | 1266 | if (ret > 0) |
1149 | ret = -ENOENT; | 1267 | ret = -ENOENT; |
1150 | return ERR_PTR(ret); | 1268 | return ERR_PTR(ret); |
@@ -1157,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1157 | root->commit_root = btrfs_root_node(root); | 1275 | root->commit_root = btrfs_root_node(root); |
1158 | BUG_ON(!root->node); | 1276 | BUG_ON(!root->node); |
1159 | out: | 1277 | out: |
1160 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) | 1278 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
1161 | root->ref_cows = 1; | 1279 | root->ref_cows = 1; |
1280 | btrfs_check_and_init_root_item(&root->root_item); | ||
1281 | } | ||
1162 | 1282 | ||
1163 | return root; | 1283 | return root; |
1164 | } | 1284 | } |
@@ -1304,82 +1424,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1304 | } | 1424 | } |
1305 | 1425 | ||
1306 | /* | 1426 | /* |
1307 | * this unplugs every device on the box, and it is only used when page | ||
1308 | * is null | ||
1309 | */ | ||
1310 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1311 | { | ||
1312 | struct btrfs_device *device; | ||
1313 | struct btrfs_fs_info *info; | ||
1314 | |||
1315 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | ||
1316 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | ||
1317 | if (!device->bdev) | ||
1318 | continue; | ||
1319 | |||
1320 | bdi = blk_get_backing_dev_info(device->bdev); | ||
1321 | if (bdi->unplug_io_fn) | ||
1322 | bdi->unplug_io_fn(bdi, page); | ||
1323 | } | ||
1324 | } | ||
1325 | |||
1326 | static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1327 | { | ||
1328 | struct inode *inode; | ||
1329 | struct extent_map_tree *em_tree; | ||
1330 | struct extent_map *em; | ||
1331 | struct address_space *mapping; | ||
1332 | u64 offset; | ||
1333 | |||
1334 | /* the generic O_DIRECT read code does this */ | ||
1335 | if (1 || !page) { | ||
1336 | __unplug_io_fn(bdi, page); | ||
1337 | return; | ||
1338 | } | ||
1339 | |||
1340 | /* | ||
1341 | * page->mapping may change at any time. Get a consistent copy | ||
1342 | * and use that for everything below | ||
1343 | */ | ||
1344 | smp_mb(); | ||
1345 | mapping = page->mapping; | ||
1346 | if (!mapping) | ||
1347 | return; | ||
1348 | |||
1349 | inode = mapping->host; | ||
1350 | |||
1351 | /* | ||
1352 | * don't do the expensive searching for a small number of | ||
1353 | * devices | ||
1354 | */ | ||
1355 | if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) { | ||
1356 | __unplug_io_fn(bdi, page); | ||
1357 | return; | ||
1358 | } | ||
1359 | |||
1360 | offset = page_offset(page); | ||
1361 | |||
1362 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
1363 | read_lock(&em_tree->lock); | ||
1364 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
1365 | read_unlock(&em_tree->lock); | ||
1366 | if (!em) { | ||
1367 | __unplug_io_fn(bdi, page); | ||
1368 | return; | ||
1369 | } | ||
1370 | |||
1371 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
1372 | free_extent_map(em); | ||
1373 | __unplug_io_fn(bdi, page); | ||
1374 | return; | ||
1375 | } | ||
1376 | offset = offset - em->start; | ||
1377 | btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
1378 | em->block_start + offset, page); | ||
1379 | free_extent_map(em); | ||
1380 | } | ||
1381 | |||
1382 | /* | ||
1383 | * If this fails, caller must call bdi_destroy() to get rid of the | 1427 | * If this fails, caller must call bdi_destroy() to get rid of the |
1384 | * bdi again. | 1428 | * bdi again. |
1385 | */ | 1429 | */ |
@@ -1393,8 +1437,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1393 | return err; | 1437 | return err; |
1394 | 1438 | ||
1395 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1439 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1396 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | ||
1397 | bdi->unplug_io_data = info; | ||
1398 | bdi->congested_fn = btrfs_congested_fn; | 1440 | bdi->congested_fn = btrfs_congested_fn; |
1399 | bdi->congested_data = info; | 1441 | bdi->congested_data = info; |
1400 | return 0; | 1442 | return 0; |
@@ -1527,6 +1569,7 @@ static int transaction_kthread(void *arg) | |||
1527 | spin_unlock(&root->fs_info->new_trans_lock); | 1569 | spin_unlock(&root->fs_info->new_trans_lock); |
1528 | 1570 | ||
1529 | trans = btrfs_join_transaction(root, 1); | 1571 | trans = btrfs_join_transaction(root, 1); |
1572 | BUG_ON(IS_ERR(trans)); | ||
1530 | if (transid == trans->transid) { | 1573 | if (transid == trans->transid) { |
1531 | ret = btrfs_commit_transaction(trans, root); | 1574 | ret = btrfs_commit_transaction(trans, root); |
1532 | BUG_ON(ret); | 1575 | BUG_ON(ret); |
@@ -1604,6 +1647,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1604 | goto fail_bdi; | 1647 | goto fail_bdi; |
1605 | } | 1648 | } |
1606 | 1649 | ||
1650 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | ||
1651 | |||
1607 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1652 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1608 | INIT_LIST_HEAD(&fs_info->trans_list); | 1653 | INIT_LIST_HEAD(&fs_info->trans_list); |
1609 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1654 | INIT_LIST_HEAD(&fs_info->dead_roots); |
@@ -1713,8 +1758,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1713 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1758 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
1714 | 1759 | ||
1715 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 1760 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
1716 | if (!bh) | 1761 | if (!bh) { |
1762 | err = -EINVAL; | ||
1717 | goto fail_iput; | 1763 | goto fail_iput; |
1764 | } | ||
1718 | 1765 | ||
1719 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 1766 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
1720 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 1767 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, |
@@ -1727,6 +1774,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1727 | if (!btrfs_super_root(disk_super)) | 1774 | if (!btrfs_super_root(disk_super)) |
1728 | goto fail_iput; | 1775 | goto fail_iput; |
1729 | 1776 | ||
1777 | /* check FS state, whether FS is broken. */ | ||
1778 | fs_info->fs_state |= btrfs_super_flags(disk_super); | ||
1779 | |||
1780 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | ||
1781 | |||
1782 | /* | ||
1783 | * In the long term, we'll store the compression type in the super | ||
1784 | * block, and it'll be used for per file compression control. | ||
1785 | */ | ||
1786 | fs_info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
1787 | |||
1730 | ret = btrfs_parse_options(tree_root, options); | 1788 | ret = btrfs_parse_options(tree_root, options); |
1731 | if (ret) { | 1789 | if (ret) { |
1732 | err = ret; | 1790 | err = ret; |
@@ -1744,10 +1802,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1744 | } | 1802 | } |
1745 | 1803 | ||
1746 | features = btrfs_super_incompat_flags(disk_super); | 1804 | features = btrfs_super_incompat_flags(disk_super); |
1747 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | 1805 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
1748 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 1806 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
1749 | btrfs_set_super_incompat_flags(disk_super, features); | 1807 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
1750 | } | 1808 | btrfs_set_super_incompat_flags(disk_super, features); |
1751 | 1809 | ||
1752 | features = btrfs_super_compat_ro_flags(disk_super) & | 1810 | features = btrfs_super_compat_ro_flags(disk_super) & |
1753 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1811 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
@@ -1932,6 +1990,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1932 | fs_info->metadata_alloc_profile = (u64)-1; | 1990 | fs_info->metadata_alloc_profile = (u64)-1; |
1933 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1991 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
1934 | 1992 | ||
1993 | ret = btrfs_init_space_info(fs_info); | ||
1994 | if (ret) { | ||
1995 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | ||
1996 | goto fail_block_groups; | ||
1997 | } | ||
1998 | |||
1935 | ret = btrfs_read_block_groups(extent_root); | 1999 | ret = btrfs_read_block_groups(extent_root); |
1936 | if (ret) { | 2000 | if (ret) { |
1937 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 2001 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
@@ -1957,7 +2021,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1957 | btrfs_set_opt(fs_info->mount_opt, SSD); | 2021 | btrfs_set_opt(fs_info->mount_opt, SSD); |
1958 | } | 2022 | } |
1959 | 2023 | ||
1960 | if (btrfs_super_log_root(disk_super) != 0) { | 2024 | /* do not make disk changes in broken FS */ |
2025 | if (btrfs_super_log_root(disk_super) != 0 && | ||
2026 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
1961 | u64 bytenr = btrfs_super_log_root(disk_super); | 2027 | u64 bytenr = btrfs_super_log_root(disk_super); |
1962 | 2028 | ||
1963 | if (fs_devices->rw_devices == 0) { | 2029 | if (fs_devices->rw_devices == 0) { |
@@ -2021,9 +2087,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
2021 | 2087 | ||
2022 | if (!(sb->s_flags & MS_RDONLY)) { | 2088 | if (!(sb->s_flags & MS_RDONLY)) { |
2023 | down_read(&fs_info->cleanup_work_sem); | 2089 | down_read(&fs_info->cleanup_work_sem); |
2024 | btrfs_orphan_cleanup(fs_info->fs_root); | 2090 | err = btrfs_orphan_cleanup(fs_info->fs_root); |
2025 | btrfs_orphan_cleanup(fs_info->tree_root); | 2091 | if (!err) |
2092 | err = btrfs_orphan_cleanup(fs_info->tree_root); | ||
2026 | up_read(&fs_info->cleanup_work_sem); | 2093 | up_read(&fs_info->cleanup_work_sem); |
2094 | if (err) { | ||
2095 | close_ctree(tree_root); | ||
2096 | return ERR_PTR(err); | ||
2097 | } | ||
2027 | } | 2098 | } |
2028 | 2099 | ||
2029 | return tree_root; | 2100 | return tree_root; |
@@ -2398,8 +2469,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2398 | 2469 | ||
2399 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2470 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
2400 | for (i = 0; i < ret; i++) { | 2471 | for (i = 0; i < ret; i++) { |
2472 | int err; | ||
2473 | |||
2401 | root_objectid = gang[i]->root_key.objectid; | 2474 | root_objectid = gang[i]->root_key.objectid; |
2402 | btrfs_orphan_cleanup(gang[i]); | 2475 | err = btrfs_orphan_cleanup(gang[i]); |
2476 | if (err) | ||
2477 | return err; | ||
2403 | } | 2478 | } |
2404 | root_objectid++; | 2479 | root_objectid++; |
2405 | } | 2480 | } |
@@ -2421,10 +2496,14 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2421 | up_write(&root->fs_info->cleanup_work_sem); | 2496 | up_write(&root->fs_info->cleanup_work_sem); |
2422 | 2497 | ||
2423 | trans = btrfs_join_transaction(root, 1); | 2498 | trans = btrfs_join_transaction(root, 1); |
2499 | if (IS_ERR(trans)) | ||
2500 | return PTR_ERR(trans); | ||
2424 | ret = btrfs_commit_transaction(trans, root); | 2501 | ret = btrfs_commit_transaction(trans, root); |
2425 | BUG_ON(ret); | 2502 | BUG_ON(ret); |
2426 | /* run commit again to drop the original snapshot */ | 2503 | /* run commit again to drop the original snapshot */ |
2427 | trans = btrfs_join_transaction(root, 1); | 2504 | trans = btrfs_join_transaction(root, 1); |
2505 | if (IS_ERR(trans)) | ||
2506 | return PTR_ERR(trans); | ||
2428 | btrfs_commit_transaction(trans, root); | 2507 | btrfs_commit_transaction(trans, root); |
2429 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2508 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2430 | BUG_ON(ret); | 2509 | BUG_ON(ret); |
@@ -2442,8 +2521,28 @@ int close_ctree(struct btrfs_root *root) | |||
2442 | smp_mb(); | 2521 | smp_mb(); |
2443 | 2522 | ||
2444 | btrfs_put_block_group_cache(fs_info); | 2523 | btrfs_put_block_group_cache(fs_info); |
2524 | |||
2525 | /* | ||
2526 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
2527 | * | ||
2528 | * 1. when btrfs flips readonly somewhere else before | ||
2529 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
2530 | * and btrfs will skip to write sb directly to keep | ||
2531 | * ERROR state on disk. | ||
2532 | * | ||
2533 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
2534 | * and in such case, btrfs cannot write sb via btrfs_commit_super, | ||
2535 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
2536 | * btrfs will cleanup all FS resources first and write sb then. | ||
2537 | */ | ||
2445 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2538 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2446 | ret = btrfs_commit_super(root); | 2539 | ret = btrfs_commit_super(root); |
2540 | if (ret) | ||
2541 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
2542 | } | ||
2543 | |||
2544 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
2545 | ret = btrfs_error_commit_super(root); | ||
2447 | if (ret) | 2546 | if (ret) |
2448 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2547 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2449 | } | 2548 | } |
@@ -2502,6 +2601,8 @@ int close_ctree(struct btrfs_root *root) | |||
2502 | kfree(fs_info->chunk_root); | 2601 | kfree(fs_info->chunk_root); |
2503 | kfree(fs_info->dev_root); | 2602 | kfree(fs_info->dev_root); |
2504 | kfree(fs_info->csum_root); | 2603 | kfree(fs_info->csum_root); |
2604 | kfree(fs_info); | ||
2605 | |||
2505 | return 0; | 2606 | return 0; |
2506 | } | 2607 | } |
2507 | 2608 | ||
@@ -2619,6 +2720,355 @@ out: | |||
2619 | return 0; | 2720 | return 0; |
2620 | } | 2721 | } |
2621 | 2722 | ||
2723 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
2724 | int read_only) | ||
2725 | { | ||
2726 | if (read_only) | ||
2727 | return; | ||
2728 | |||
2729 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
2730 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
2731 | "running btrfsck is recommended\n"); | ||
2732 | } | ||
2733 | |||
2734 | int btrfs_error_commit_super(struct btrfs_root *root) | ||
2735 | { | ||
2736 | int ret; | ||
2737 | |||
2738 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
2739 | btrfs_run_delayed_iputs(root); | ||
2740 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
2741 | |||
2742 | down_write(&root->fs_info->cleanup_work_sem); | ||
2743 | up_write(&root->fs_info->cleanup_work_sem); | ||
2744 | |||
2745 | /* cleanup FS via transaction */ | ||
2746 | btrfs_cleanup_transaction(root); | ||
2747 | |||
2748 | ret = write_ctree_super(NULL, root, 0); | ||
2749 | |||
2750 | return ret; | ||
2751 | } | ||
2752 | |||
2753 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | ||
2754 | { | ||
2755 | struct btrfs_inode *btrfs_inode; | ||
2756 | struct list_head splice; | ||
2757 | |||
2758 | INIT_LIST_HEAD(&splice); | ||
2759 | |||
2760 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
2761 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2762 | |||
2763 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
2764 | while (!list_empty(&splice)) { | ||
2765 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2766 | ordered_operations); | ||
2767 | |||
2768 | list_del_init(&btrfs_inode->ordered_operations); | ||
2769 | |||
2770 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2771 | } | ||
2772 | |||
2773 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2774 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
2775 | |||
2776 | return 0; | ||
2777 | } | ||
2778 | |||
2779 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | ||
2780 | { | ||
2781 | struct list_head splice; | ||
2782 | struct btrfs_ordered_extent *ordered; | ||
2783 | struct inode *inode; | ||
2784 | |||
2785 | INIT_LIST_HEAD(&splice); | ||
2786 | |||
2787 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2788 | |||
2789 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
2790 | while (!list_empty(&splice)) { | ||
2791 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | ||
2792 | root_extent_list); | ||
2793 | |||
2794 | list_del_init(&ordered->root_extent_list); | ||
2795 | atomic_inc(&ordered->refs); | ||
2796 | |||
2797 | /* the inode may be getting freed (in sys_unlink path). */ | ||
2798 | inode = igrab(ordered->inode); | ||
2799 | |||
2800 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2801 | if (inode) | ||
2802 | iput(inode); | ||
2803 | |||
2804 | atomic_set(&ordered->refs, 1); | ||
2805 | btrfs_put_ordered_extent(ordered); | ||
2806 | |||
2807 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2808 | } | ||
2809 | |||
2810 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2811 | |||
2812 | return 0; | ||
2813 | } | ||
2814 | |||
2815 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
2816 | struct btrfs_root *root) | ||
2817 | { | ||
2818 | struct rb_node *node; | ||
2819 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2820 | struct btrfs_delayed_ref_node *ref; | ||
2821 | int ret = 0; | ||
2822 | |||
2823 | delayed_refs = &trans->delayed_refs; | ||
2824 | |||
2825 | spin_lock(&delayed_refs->lock); | ||
2826 | if (delayed_refs->num_entries == 0) { | ||
2827 | printk(KERN_INFO "delayed_refs has NO entry\n"); | ||
2828 | return ret; | ||
2829 | } | ||
2830 | |||
2831 | node = rb_first(&delayed_refs->root); | ||
2832 | while (node) { | ||
2833 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2834 | node = rb_next(node); | ||
2835 | |||
2836 | ref->in_tree = 0; | ||
2837 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
2838 | delayed_refs->num_entries--; | ||
2839 | |||
2840 | atomic_set(&ref->refs, 1); | ||
2841 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2842 | struct btrfs_delayed_ref_head *head; | ||
2843 | |||
2844 | head = btrfs_delayed_node_to_head(ref); | ||
2845 | mutex_lock(&head->mutex); | ||
2846 | kfree(head->extent_op); | ||
2847 | delayed_refs->num_heads--; | ||
2848 | if (list_empty(&head->cluster)) | ||
2849 | delayed_refs->num_heads_ready--; | ||
2850 | list_del_init(&head->cluster); | ||
2851 | mutex_unlock(&head->mutex); | ||
2852 | } | ||
2853 | |||
2854 | spin_unlock(&delayed_refs->lock); | ||
2855 | btrfs_put_delayed_ref(ref); | ||
2856 | |||
2857 | cond_resched(); | ||
2858 | spin_lock(&delayed_refs->lock); | ||
2859 | } | ||
2860 | |||
2861 | spin_unlock(&delayed_refs->lock); | ||
2862 | |||
2863 | return ret; | ||
2864 | } | ||
2865 | |||
2866 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | ||
2867 | { | ||
2868 | struct btrfs_pending_snapshot *snapshot; | ||
2869 | struct list_head splice; | ||
2870 | |||
2871 | INIT_LIST_HEAD(&splice); | ||
2872 | |||
2873 | list_splice_init(&t->pending_snapshots, &splice); | ||
2874 | |||
2875 | while (!list_empty(&splice)) { | ||
2876 | snapshot = list_entry(splice.next, | ||
2877 | struct btrfs_pending_snapshot, | ||
2878 | list); | ||
2879 | |||
2880 | list_del_init(&snapshot->list); | ||
2881 | |||
2882 | kfree(snapshot); | ||
2883 | } | ||
2884 | |||
2885 | return 0; | ||
2886 | } | ||
2887 | |||
2888 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | ||
2889 | { | ||
2890 | struct btrfs_inode *btrfs_inode; | ||
2891 | struct list_head splice; | ||
2892 | |||
2893 | INIT_LIST_HEAD(&splice); | ||
2894 | |||
2895 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2896 | |||
2897 | spin_lock(&root->fs_info->delalloc_lock); | ||
2898 | |||
2899 | while (!list_empty(&splice)) { | ||
2900 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2901 | delalloc_inodes); | ||
2902 | |||
2903 | list_del_init(&btrfs_inode->delalloc_inodes); | ||
2904 | |||
2905 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2906 | } | ||
2907 | |||
2908 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2909 | |||
2910 | return 0; | ||
2911 | } | ||
2912 | |||
2913 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
2914 | struct extent_io_tree *dirty_pages, | ||
2915 | int mark) | ||
2916 | { | ||
2917 | int ret; | ||
2918 | struct page *page; | ||
2919 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
2920 | struct extent_buffer *eb; | ||
2921 | u64 start = 0; | ||
2922 | u64 end; | ||
2923 | u64 offset; | ||
2924 | unsigned long index; | ||
2925 | |||
2926 | while (1) { | ||
2927 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
2928 | mark); | ||
2929 | if (ret) | ||
2930 | break; | ||
2931 | |||
2932 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | ||
2933 | while (start <= end) { | ||
2934 | index = start >> PAGE_CACHE_SHIFT; | ||
2935 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
2936 | page = find_get_page(btree_inode->i_mapping, index); | ||
2937 | if (!page) | ||
2938 | continue; | ||
2939 | offset = page_offset(page); | ||
2940 | |||
2941 | spin_lock(&dirty_pages->buffer_lock); | ||
2942 | eb = radix_tree_lookup( | ||
2943 | &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, | ||
2944 | offset >> PAGE_CACHE_SHIFT); | ||
2945 | spin_unlock(&dirty_pages->buffer_lock); | ||
2946 | if (eb) { | ||
2947 | ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, | ||
2948 | &eb->bflags); | ||
2949 | atomic_set(&eb->refs, 1); | ||
2950 | } | ||
2951 | if (PageWriteback(page)) | ||
2952 | end_page_writeback(page); | ||
2953 | |||
2954 | lock_page(page); | ||
2955 | if (PageDirty(page)) { | ||
2956 | clear_page_dirty_for_io(page); | ||
2957 | spin_lock_irq(&page->mapping->tree_lock); | ||
2958 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
2959 | page_index(page), | ||
2960 | PAGECACHE_TAG_DIRTY); | ||
2961 | spin_unlock_irq(&page->mapping->tree_lock); | ||
2962 | } | ||
2963 | |||
2964 | page->mapping->a_ops->invalidatepage(page, 0); | ||
2965 | unlock_page(page); | ||
2966 | } | ||
2967 | } | ||
2968 | |||
2969 | return ret; | ||
2970 | } | ||
2971 | |||
2972 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
2973 | struct extent_io_tree *pinned_extents) | ||
2974 | { | ||
2975 | struct extent_io_tree *unpin; | ||
2976 | u64 start; | ||
2977 | u64 end; | ||
2978 | int ret; | ||
2979 | |||
2980 | unpin = pinned_extents; | ||
2981 | while (1) { | ||
2982 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
2983 | EXTENT_DIRTY); | ||
2984 | if (ret) | ||
2985 | break; | ||
2986 | |||
2987 | /* opt_discard */ | ||
2988 | if (btrfs_test_opt(root, DISCARD)) | ||
2989 | ret = btrfs_error_discard_extent(root, start, | ||
2990 | end + 1 - start, | ||
2991 | NULL); | ||
2992 | |||
2993 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
2994 | btrfs_error_unpin_extent_range(root, start, end); | ||
2995 | cond_resched(); | ||
2996 | } | ||
2997 | |||
2998 | return 0; | ||
2999 | } | ||
3000 | |||
3001 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
3002 | { | ||
3003 | struct btrfs_transaction *t; | ||
3004 | LIST_HEAD(list); | ||
3005 | |||
3006 | WARN_ON(1); | ||
3007 | |||
3008 | mutex_lock(&root->fs_info->trans_mutex); | ||
3009 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
3010 | |||
3011 | list_splice_init(&root->fs_info->trans_list, &list); | ||
3012 | while (!list_empty(&list)) { | ||
3013 | t = list_entry(list.next, struct btrfs_transaction, list); | ||
3014 | if (!t) | ||
3015 | break; | ||
3016 | |||
3017 | btrfs_destroy_ordered_operations(root); | ||
3018 | |||
3019 | btrfs_destroy_ordered_extents(root); | ||
3020 | |||
3021 | btrfs_destroy_delayed_refs(t, root); | ||
3022 | |||
3023 | btrfs_block_rsv_release(root, | ||
3024 | &root->fs_info->trans_block_rsv, | ||
3025 | t->dirty_pages.dirty_bytes); | ||
3026 | |||
3027 | /* FIXME: cleanup wait for commit */ | ||
3028 | t->in_commit = 1; | ||
3029 | t->blocked = 1; | ||
3030 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
3031 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
3032 | |||
3033 | t->blocked = 0; | ||
3034 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
3035 | wake_up(&root->fs_info->transaction_wait); | ||
3036 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3037 | |||
3038 | mutex_lock(&root->fs_info->trans_mutex); | ||
3039 | t->commit_done = 1; | ||
3040 | if (waitqueue_active(&t->commit_wait)) | ||
3041 | wake_up(&t->commit_wait); | ||
3042 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3043 | |||
3044 | mutex_lock(&root->fs_info->trans_mutex); | ||
3045 | |||
3046 | btrfs_destroy_pending_snapshots(t); | ||
3047 | |||
3048 | btrfs_destroy_delalloc_inodes(root); | ||
3049 | |||
3050 | spin_lock(&root->fs_info->new_trans_lock); | ||
3051 | root->fs_info->running_transaction = NULL; | ||
3052 | spin_unlock(&root->fs_info->new_trans_lock); | ||
3053 | |||
3054 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | ||
3055 | EXTENT_DIRTY); | ||
3056 | |||
3057 | btrfs_destroy_pinned_extent(root, | ||
3058 | root->fs_info->pinned_extents); | ||
3059 | |||
3060 | atomic_set(&t->use_count, 0); | ||
3061 | list_del_init(&t->list); | ||
3062 | memset(t, 0, sizeof(*t)); | ||
3063 | kmem_cache_free(btrfs_transaction_cachep, t); | ||
3064 | } | ||
3065 | |||
3066 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
3067 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3068 | |||
3069 | return 0; | ||
3070 | } | ||
3071 | |||
2622 | static struct extent_io_ops btree_extent_io_ops = { | 3072 | static struct extent_io_ops btree_extent_io_ops = { |
2623 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3073 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
2624 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3074 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 88e825a0bf21..07b20dc2fd95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
52 | struct btrfs_root *root, int max_mirrors); | 52 | struct btrfs_root *root, int max_mirrors); |
53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
54 | int btrfs_commit_super(struct btrfs_root *root); | 54 | int btrfs_commit_super(struct btrfs_root *root); |
55 | int btrfs_error_commit_super(struct btrfs_root *root); | ||
55 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 56 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
56 | u64 bytenr, u32 blocksize); | 57 | u64 bytenr, u32 blocksize); |
57 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | 58 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9786963b07e5..b4ffad859adb 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
21 | int len = *max_len; | 21 | int len = *max_len; |
22 | int type; | 22 | int type; |
23 | 23 | ||
24 | if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || | 24 | if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { |
25 | (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) | 25 | *max_len = BTRFS_FID_SIZE_CONNECTABLE; |
26 | return 255; | 26 | return 255; |
27 | } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { | ||
28 | *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE; | ||
29 | return 255; | ||
30 | } | ||
27 | 31 | ||
28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; | 32 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; |
29 | type = FILEID_BTRFS_WITHOUT_PARENT; | 33 | type = FILEID_BTRFS_WITHOUT_PARENT; |
@@ -171,6 +175,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child) | |||
171 | int ret; | 175 | int ret; |
172 | 176 | ||
173 | path = btrfs_alloc_path(); | 177 | path = btrfs_alloc_path(); |
178 | if (!path) | ||
179 | return ERR_PTR(-ENOMEM); | ||
174 | 180 | ||
175 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | 181 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { |
176 | key.objectid = root->root_key.objectid; | 182 | key.objectid = root->root_key.objectid; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 227e5815d838..31f33ba56fe8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -33,11 +33,28 @@ | |||
33 | #include "locking.h" | 33 | #include "locking.h" |
34 | #include "free-space-cache.h" | 34 | #include "free-space-cache.h" |
35 | 35 | ||
36 | /* control flags for do_chunk_alloc's force field | ||
37 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | ||
38 | * if we really need one. | ||
39 | * | ||
40 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
41 | * | ||
42 | * CHUNK_ALLOC_LIMITED means to only try and allocate one | ||
43 | * if we have very few chunks already allocated. This is | ||
44 | * used as part of the clustering code to help make sure | ||
45 | * we have a good pool of storage to cluster in, without | ||
46 | * filling the FS with empty chunks | ||
47 | * | ||
48 | */ | ||
49 | enum { | ||
50 | CHUNK_ALLOC_NO_FORCE = 0, | ||
51 | CHUNK_ALLOC_FORCE = 1, | ||
52 | CHUNK_ALLOC_LIMITED = 2, | ||
53 | }; | ||
54 | |||
36 | static int update_block_group(struct btrfs_trans_handle *trans, | 55 | static int update_block_group(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 56 | struct btrfs_root *root, |
38 | u64 bytenr, u64 num_bytes, int alloc); | 57 | u64 bytenr, u64 num_bytes, int alloc); |
39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve, int sinfo); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 58 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 59 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 60 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -320,11 +337,6 @@ static int caching_kthread(void *data) | |||
320 | if (!path) | 337 | if (!path) |
321 | return -ENOMEM; | 338 | return -ENOMEM; |
322 | 339 | ||
323 | exclude_super_stripes(extent_root, block_group); | ||
324 | spin_lock(&block_group->space_info->lock); | ||
325 | block_group->space_info->bytes_readonly += block_group->bytes_super; | ||
326 | spin_unlock(&block_group->space_info->lock); | ||
327 | |||
328 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 340 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
329 | 341 | ||
330 | /* | 342 | /* |
@@ -447,7 +459,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
447 | * allocate blocks for the tree root we can't do the fast caching since | 459 | * allocate blocks for the tree root we can't do the fast caching since |
448 | * we likely hold important locks. | 460 | * we likely hold important locks. |
449 | */ | 461 | */ |
450 | if (!trans->transaction->in_commit && | 462 | if (trans && (!trans->transaction->in_commit) && |
451 | (root && root != root->fs_info->tree_root)) { | 463 | (root && root != root->fs_info->tree_root)) { |
452 | spin_lock(&cache->lock); | 464 | spin_lock(&cache->lock); |
453 | if (cache->cached != BTRFS_CACHE_NO) { | 465 | if (cache->cached != BTRFS_CACHE_NO) { |
@@ -467,14 +479,16 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
467 | cache->cached = BTRFS_CACHE_NO; | 479 | cache->cached = BTRFS_CACHE_NO; |
468 | } | 480 | } |
469 | spin_unlock(&cache->lock); | 481 | spin_unlock(&cache->lock); |
470 | if (ret == 1) | 482 | if (ret == 1) { |
483 | free_excluded_extents(fs_info->extent_root, cache); | ||
471 | return 0; | 484 | return 0; |
485 | } | ||
472 | } | 486 | } |
473 | 487 | ||
474 | if (load_cache_only) | 488 | if (load_cache_only) |
475 | return 0; | 489 | return 0; |
476 | 490 | ||
477 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | 491 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); |
478 | BUG_ON(!caching_ctl); | 492 | BUG_ON(!caching_ctl); |
479 | 493 | ||
480 | INIT_LIST_HEAD(&caching_ctl->list); | 494 | INIT_LIST_HEAD(&caching_ctl->list); |
@@ -1743,39 +1757,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1743 | return ret; | 1757 | return ret; |
1744 | } | 1758 | } |
1745 | 1759 | ||
1746 | static void btrfs_issue_discard(struct block_device *bdev, | 1760 | static int btrfs_issue_discard(struct block_device *bdev, |
1747 | u64 start, u64 len) | 1761 | u64 start, u64 len) |
1748 | { | 1762 | { |
1749 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); | 1763 | return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); |
1750 | } | 1764 | } |
1751 | 1765 | ||
1752 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1766 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1753 | u64 num_bytes) | 1767 | u64 num_bytes, u64 *actual_bytes) |
1754 | { | 1768 | { |
1755 | int ret; | 1769 | int ret; |
1756 | u64 map_length = num_bytes; | 1770 | u64 discarded_bytes = 0; |
1757 | struct btrfs_multi_bio *multi = NULL; | 1771 | struct btrfs_multi_bio *multi = NULL; |
1758 | 1772 | ||
1759 | if (!btrfs_test_opt(root, DISCARD)) | ||
1760 | return 0; | ||
1761 | 1773 | ||
1762 | /* Tell the block device(s) that the sectors can be discarded */ | 1774 | /* Tell the block device(s) that the sectors can be discarded */ |
1763 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1775 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, |
1764 | bytenr, &map_length, &multi, 0); | 1776 | bytenr, &num_bytes, &multi, 0); |
1765 | if (!ret) { | 1777 | if (!ret) { |
1766 | struct btrfs_bio_stripe *stripe = multi->stripes; | 1778 | struct btrfs_bio_stripe *stripe = multi->stripes; |
1767 | int i; | 1779 | int i; |
1768 | 1780 | ||
1769 | if (map_length > num_bytes) | ||
1770 | map_length = num_bytes; | ||
1771 | 1781 | ||
1772 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1782 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1773 | btrfs_issue_discard(stripe->dev->bdev, | 1783 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1774 | stripe->physical, | 1784 | stripe->physical, |
1775 | map_length); | 1785 | stripe->length); |
1786 | if (!ret) | ||
1787 | discarded_bytes += stripe->length; | ||
1788 | else if (ret != -EOPNOTSUPP) | ||
1789 | break; | ||
1776 | } | 1790 | } |
1777 | kfree(multi); | 1791 | kfree(multi); |
1778 | } | 1792 | } |
1793 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1794 | ret = 0; | ||
1795 | |||
1796 | if (actual_bytes) | ||
1797 | *actual_bytes = discarded_bytes; | ||
1798 | |||
1779 | 1799 | ||
1780 | return ret; | 1800 | return ret; |
1781 | } | 1801 | } |
@@ -3018,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3018 | found->bytes_readonly = 0; | 3038 | found->bytes_readonly = 0; |
3019 | found->bytes_may_use = 0; | 3039 | found->bytes_may_use = 0; |
3020 | found->full = 0; | 3040 | found->full = 0; |
3021 | found->force_alloc = 0; | 3041 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
3042 | found->chunk_alloc = 0; | ||
3022 | *space_info = found; | 3043 | *space_info = found; |
3023 | list_add_rcu(&found->list, &info->space_info); | 3044 | list_add_rcu(&found->list, &info->space_info); |
3024 | atomic_set(&found->caching_threads, 0); | 3045 | atomic_set(&found->caching_threads, 0); |
@@ -3089,7 +3110,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3089 | return btrfs_reduce_alloc_profile(root, flags); | 3110 | return btrfs_reduce_alloc_profile(root, flags); |
3090 | } | 3111 | } |
3091 | 3112 | ||
3092 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3113 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
3093 | { | 3114 | { |
3094 | u64 flags; | 3115 | u64 flags; |
3095 | 3116 | ||
@@ -3149,7 +3170,7 @@ again: | |||
3149 | if (!data_sinfo->full && alloc_chunk) { | 3170 | if (!data_sinfo->full && alloc_chunk) { |
3150 | u64 alloc_target; | 3171 | u64 alloc_target; |
3151 | 3172 | ||
3152 | data_sinfo->force_alloc = 1; | 3173 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; |
3153 | spin_unlock(&data_sinfo->lock); | 3174 | spin_unlock(&data_sinfo->lock); |
3154 | alloc: | 3175 | alloc: |
3155 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3176 | alloc_target = btrfs_get_alloc_profile(root, 1); |
@@ -3159,10 +3180,15 @@ alloc: | |||
3159 | 3180 | ||
3160 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3181 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
3161 | bytes + 2 * 1024 * 1024, | 3182 | bytes + 2 * 1024 * 1024, |
3162 | alloc_target, 0); | 3183 | alloc_target, |
3184 | CHUNK_ALLOC_NO_FORCE); | ||
3163 | btrfs_end_transaction(trans, root); | 3185 | btrfs_end_transaction(trans, root); |
3164 | if (ret < 0) | 3186 | if (ret < 0) { |
3165 | return ret; | 3187 | if (ret != -ENOSPC) |
3188 | return ret; | ||
3189 | else | ||
3190 | goto commit_trans; | ||
3191 | } | ||
3166 | 3192 | ||
3167 | if (!data_sinfo) { | 3193 | if (!data_sinfo) { |
3168 | btrfs_set_inode_space_info(root, inode); | 3194 | btrfs_set_inode_space_info(root, inode); |
@@ -3173,6 +3199,7 @@ alloc: | |||
3173 | spin_unlock(&data_sinfo->lock); | 3199 | spin_unlock(&data_sinfo->lock); |
3174 | 3200 | ||
3175 | /* commit the current transaction and try again */ | 3201 | /* commit the current transaction and try again */ |
3202 | commit_trans: | ||
3176 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3203 | if (!committed && !root->fs_info->open_ioctl_trans) { |
3177 | committed = 1; | 3204 | committed = 1; |
3178 | trans = btrfs_join_transaction(root, 1); | 3205 | trans = btrfs_join_transaction(root, 1); |
@@ -3233,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
3233 | rcu_read_lock(); | 3260 | rcu_read_lock(); |
3234 | list_for_each_entry_rcu(found, head, list) { | 3261 | list_for_each_entry_rcu(found, head, list) { |
3235 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | 3262 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) |
3236 | found->force_alloc = 1; | 3263 | found->force_alloc = CHUNK_ALLOC_FORCE; |
3237 | } | 3264 | } |
3238 | rcu_read_unlock(); | 3265 | rcu_read_unlock(); |
3239 | } | 3266 | } |
3240 | 3267 | ||
3241 | static int should_alloc_chunk(struct btrfs_root *root, | 3268 | static int should_alloc_chunk(struct btrfs_root *root, |
3242 | struct btrfs_space_info *sinfo, u64 alloc_bytes) | 3269 | struct btrfs_space_info *sinfo, u64 alloc_bytes, |
3270 | int force) | ||
3243 | { | 3271 | { |
3244 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3272 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
3273 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; | ||
3245 | u64 thresh; | 3274 | u64 thresh; |
3246 | 3275 | ||
3247 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3276 | if (force == CHUNK_ALLOC_FORCE) |
3248 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | 3277 | return 1; |
3278 | |||
3279 | /* | ||
3280 | * in limited mode, we want to have some free space up to | ||
3281 | * about 1% of the FS size. | ||
3282 | */ | ||
3283 | if (force == CHUNK_ALLOC_LIMITED) { | ||
3284 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
3285 | thresh = max_t(u64, 64 * 1024 * 1024, | ||
3286 | div_factor_fine(thresh, 1)); | ||
3287 | |||
3288 | if (num_bytes - num_allocated < thresh) | ||
3289 | return 1; | ||
3290 | } | ||
3291 | |||
3292 | /* | ||
3293 | * we have two similar checks here, one based on percentage | ||
3294 | * and once based on a hard number of 256MB. The idea | ||
3295 | * is that if we have a good amount of free | ||
3296 | * room, don't allocate a chunk. A good mount is | ||
3297 | * less than 80% utilized of the chunks we have allocated, | ||
3298 | * or more than 256MB free | ||
3299 | */ | ||
3300 | if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
3249 | return 0; | 3301 | return 0; |
3250 | 3302 | ||
3251 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3303 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) |
3252 | alloc_bytes < div_factor(num_bytes, 8)) | ||
3253 | return 0; | 3304 | return 0; |
3254 | 3305 | ||
3255 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | 3306 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); |
3307 | |||
3308 | /* 256MB or 5% of the FS */ | ||
3256 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | 3309 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); |
3257 | 3310 | ||
3258 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) | 3311 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) |
3259 | return 0; | 3312 | return 0; |
3260 | |||
3261 | return 1; | 3313 | return 1; |
3262 | } | 3314 | } |
3263 | 3315 | ||
@@ -3267,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3267 | { | 3319 | { |
3268 | struct btrfs_space_info *space_info; | 3320 | struct btrfs_space_info *space_info; |
3269 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3321 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3322 | int wait_for_alloc = 0; | ||
3270 | int ret = 0; | 3323 | int ret = 0; |
3271 | 3324 | ||
3272 | mutex_lock(&fs_info->chunk_mutex); | ||
3273 | |||
3274 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 3325 | flags = btrfs_reduce_alloc_profile(extent_root, flags); |
3275 | 3326 | ||
3276 | space_info = __find_space_info(extent_root->fs_info, flags); | 3327 | space_info = __find_space_info(extent_root->fs_info, flags); |
@@ -3281,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3281 | } | 3332 | } |
3282 | BUG_ON(!space_info); | 3333 | BUG_ON(!space_info); |
3283 | 3334 | ||
3335 | again: | ||
3284 | spin_lock(&space_info->lock); | 3336 | spin_lock(&space_info->lock); |
3285 | if (space_info->force_alloc) | 3337 | if (space_info->force_alloc) |
3286 | force = 1; | 3338 | force = space_info->force_alloc; |
3287 | if (space_info->full) { | 3339 | if (space_info->full) { |
3288 | spin_unlock(&space_info->lock); | 3340 | spin_unlock(&space_info->lock); |
3289 | goto out; | 3341 | return 0; |
3290 | } | 3342 | } |
3291 | 3343 | ||
3292 | if (!force && !should_alloc_chunk(extent_root, space_info, | 3344 | if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { |
3293 | alloc_bytes)) { | ||
3294 | spin_unlock(&space_info->lock); | 3345 | spin_unlock(&space_info->lock); |
3295 | goto out; | 3346 | return 0; |
3347 | } else if (space_info->chunk_alloc) { | ||
3348 | wait_for_alloc = 1; | ||
3349 | } else { | ||
3350 | space_info->chunk_alloc = 1; | ||
3296 | } | 3351 | } |
3352 | |||
3297 | spin_unlock(&space_info->lock); | 3353 | spin_unlock(&space_info->lock); |
3298 | 3354 | ||
3355 | mutex_lock(&fs_info->chunk_mutex); | ||
3356 | |||
3357 | /* | ||
3358 | * The chunk_mutex is held throughout the entirety of a chunk | ||
3359 | * allocation, so once we've acquired the chunk_mutex we know that the | ||
3360 | * other guy is done and we need to recheck and see if we should | ||
3361 | * allocate. | ||
3362 | */ | ||
3363 | if (wait_for_alloc) { | ||
3364 | mutex_unlock(&fs_info->chunk_mutex); | ||
3365 | wait_for_alloc = 0; | ||
3366 | goto again; | ||
3367 | } | ||
3368 | |||
3299 | /* | 3369 | /* |
3300 | * If we have mixed data/metadata chunks we want to make sure we keep | 3370 | * If we have mixed data/metadata chunks we want to make sure we keep |
3301 | * allocating mixed chunks instead of individual chunks. | 3371 | * allocating mixed chunks instead of individual chunks. |
@@ -3321,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3321 | space_info->full = 1; | 3391 | space_info->full = 1; |
3322 | else | 3392 | else |
3323 | ret = 1; | 3393 | ret = 1; |
3324 | space_info->force_alloc = 0; | 3394 | |
3395 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | ||
3396 | space_info->chunk_alloc = 0; | ||
3325 | spin_unlock(&space_info->lock); | 3397 | spin_unlock(&space_info->lock); |
3326 | out: | ||
3327 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3398 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3328 | return ret; | 3399 | return ret; |
3329 | } | 3400 | } |
@@ -3339,21 +3410,24 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3339 | u64 reserved; | 3410 | u64 reserved; |
3340 | u64 max_reclaim; | 3411 | u64 max_reclaim; |
3341 | u64 reclaimed = 0; | 3412 | u64 reclaimed = 0; |
3342 | int pause = 1; | 3413 | long time_left; |
3343 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3414 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
3415 | int loops = 0; | ||
3416 | unsigned long progress; | ||
3344 | 3417 | ||
3345 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3418 | block_rsv = &root->fs_info->delalloc_block_rsv; |
3346 | space_info = block_rsv->space_info; | 3419 | space_info = block_rsv->space_info; |
3347 | 3420 | ||
3348 | smp_mb(); | 3421 | smp_mb(); |
3349 | reserved = space_info->bytes_reserved; | 3422 | reserved = space_info->bytes_reserved; |
3423 | progress = space_info->reservation_progress; | ||
3350 | 3424 | ||
3351 | if (reserved == 0) | 3425 | if (reserved == 0) |
3352 | return 0; | 3426 | return 0; |
3353 | 3427 | ||
3354 | max_reclaim = min(reserved, to_reclaim); | 3428 | max_reclaim = min(reserved, to_reclaim); |
3355 | 3429 | ||
3356 | while (1) { | 3430 | while (loops < 1024) { |
3357 | /* have the flusher threads jump in and do some IO */ | 3431 | /* have the flusher threads jump in and do some IO */ |
3358 | smp_mb(); | 3432 | smp_mb(); |
3359 | nr_pages = min_t(unsigned long, nr_pages, | 3433 | nr_pages = min_t(unsigned long, nr_pages, |
@@ -3366,17 +3440,31 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3366 | reserved = space_info->bytes_reserved; | 3440 | reserved = space_info->bytes_reserved; |
3367 | spin_unlock(&space_info->lock); | 3441 | spin_unlock(&space_info->lock); |
3368 | 3442 | ||
3443 | loops++; | ||
3444 | |||
3369 | if (reserved == 0 || reclaimed >= max_reclaim) | 3445 | if (reserved == 0 || reclaimed >= max_reclaim) |
3370 | break; | 3446 | break; |
3371 | 3447 | ||
3372 | if (trans && trans->transaction->blocked) | 3448 | if (trans && trans->transaction->blocked) |
3373 | return -EAGAIN; | 3449 | return -EAGAIN; |
3374 | 3450 | ||
3375 | __set_current_state(TASK_INTERRUPTIBLE); | 3451 | time_left = schedule_timeout_interruptible(1); |
3376 | schedule_timeout(pause); | 3452 | |
3377 | pause <<= 1; | 3453 | /* We were interrupted, exit */ |
3378 | if (pause > HZ / 10) | 3454 | if (time_left) |
3379 | pause = HZ / 10; | 3455 | break; |
3456 | |||
3457 | /* we've kicked the IO a few times, if anything has been freed, | ||
3458 | * exit. There is no sense in looping here for a long time | ||
3459 | * when we really need to commit the transaction, or there are | ||
3460 | * just too many writers without enough free space | ||
3461 | */ | ||
3462 | |||
3463 | if (loops > 3) { | ||
3464 | smp_mb(); | ||
3465 | if (progress != space_info->reservation_progress) | ||
3466 | break; | ||
3467 | } | ||
3380 | 3468 | ||
3381 | } | 3469 | } |
3382 | return reclaimed >= to_reclaim; | 3470 | return reclaimed >= to_reclaim; |
@@ -3583,10 +3671,23 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | |||
3583 | 3671 | ||
3584 | if (num_bytes > 0) { | 3672 | if (num_bytes > 0) { |
3585 | if (dest) { | 3673 | if (dest) { |
3586 | block_rsv_add_bytes(dest, num_bytes, 0); | 3674 | spin_lock(&dest->lock); |
3587 | } else { | 3675 | if (!dest->full) { |
3676 | u64 bytes_to_add; | ||
3677 | |||
3678 | bytes_to_add = dest->size - dest->reserved; | ||
3679 | bytes_to_add = min(num_bytes, bytes_to_add); | ||
3680 | dest->reserved += bytes_to_add; | ||
3681 | if (dest->reserved >= dest->size) | ||
3682 | dest->full = 1; | ||
3683 | num_bytes -= bytes_to_add; | ||
3684 | } | ||
3685 | spin_unlock(&dest->lock); | ||
3686 | } | ||
3687 | if (num_bytes) { | ||
3588 | spin_lock(&space_info->lock); | 3688 | spin_lock(&space_info->lock); |
3589 | space_info->bytes_reserved -= num_bytes; | 3689 | space_info->bytes_reserved -= num_bytes; |
3690 | space_info->reservation_progress++; | ||
3590 | spin_unlock(&space_info->lock); | 3691 | spin_unlock(&space_info->lock); |
3591 | } | 3692 | } |
3592 | } | 3693 | } |
@@ -3721,11 +3822,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3721 | return 0; | 3822 | return 0; |
3722 | } | 3823 | } |
3723 | 3824 | ||
3724 | WARN_ON(1); | ||
3725 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3726 | block_rsv->size, block_rsv->reserved, | ||
3727 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3728 | |||
3729 | return -ENOSPC; | 3825 | return -ENOSPC; |
3730 | } | 3826 | } |
3731 | 3827 | ||
@@ -3824,6 +3920,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3824 | if (block_rsv->reserved >= block_rsv->size) { | 3920 | if (block_rsv->reserved >= block_rsv->size) { |
3825 | num_bytes = block_rsv->reserved - block_rsv->size; | 3921 | num_bytes = block_rsv->reserved - block_rsv->size; |
3826 | sinfo->bytes_reserved -= num_bytes; | 3922 | sinfo->bytes_reserved -= num_bytes; |
3923 | sinfo->reservation_progress++; | ||
3827 | block_rsv->reserved = block_rsv->size; | 3924 | block_rsv->reserved = block_rsv->size; |
3828 | block_rsv->full = 1; | 3925 | block_rsv->full = 1; |
3829 | } | 3926 | } |
@@ -3968,6 +4065,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3968 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 4065 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3969 | u64 to_reserve; | 4066 | u64 to_reserve; |
3970 | int nr_extents; | 4067 | int nr_extents; |
4068 | int reserved_extents; | ||
3971 | int ret; | 4069 | int ret; |
3972 | 4070 | ||
3973 | if (btrfs_transaction_in_commit(root->fs_info)) | 4071 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -3975,26 +4073,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3975 | 4073 | ||
3976 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4074 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3977 | 4075 | ||
3978 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3979 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 4076 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; |
3980 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | 4077 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
3981 | nr_extents -= BTRFS_I(inode)->reserved_extents; | 4078 | |
4079 | if (nr_extents > reserved_extents) { | ||
4080 | nr_extents -= reserved_extents; | ||
3982 | to_reserve = calc_trans_metadata_size(root, nr_extents); | 4081 | to_reserve = calc_trans_metadata_size(root, nr_extents); |
3983 | } else { | 4082 | } else { |
3984 | nr_extents = 0; | 4083 | nr_extents = 0; |
3985 | to_reserve = 0; | 4084 | to_reserve = 0; |
3986 | } | 4085 | } |
3987 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3988 | 4086 | ||
3989 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4087 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3990 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4088 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3991 | if (ret) | 4089 | if (ret) |
3992 | return ret; | 4090 | return ret; |
3993 | 4091 | ||
3994 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4092 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); |
3995 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3996 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 4093 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
3997 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3998 | 4094 | ||
3999 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4095 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
4000 | 4096 | ||
@@ -4009,19 +4105,30 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4009 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4105 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4010 | u64 to_free; | 4106 | u64 to_free; |
4011 | int nr_extents; | 4107 | int nr_extents; |
4108 | int reserved_extents; | ||
4012 | 4109 | ||
4013 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4110 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4014 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4111 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
4112 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
4015 | 4113 | ||
4016 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4114 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
4017 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | 4115 | do { |
4018 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | 4116 | int old, new; |
4019 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | 4117 | |
4020 | BTRFS_I(inode)->reserved_extents -= nr_extents; | 4118 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); |
4021 | } else { | 4119 | if (nr_extents >= reserved_extents) { |
4022 | nr_extents = 0; | 4120 | nr_extents = 0; |
4023 | } | 4121 | break; |
4024 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 4122 | } |
4123 | old = reserved_extents; | ||
4124 | nr_extents = reserved_extents - nr_extents; | ||
4125 | new = reserved_extents - nr_extents; | ||
4126 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4127 | reserved_extents, new); | ||
4128 | if (likely(old == reserved_extents)) | ||
4129 | break; | ||
4130 | reserved_extents = old; | ||
4131 | } while (1); | ||
4025 | 4132 | ||
4026 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4133 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4027 | if (nr_extents > 0) | 4134 | if (nr_extents > 0) |
@@ -4112,6 +4219,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4112 | btrfs_set_block_group_used(&cache->item, old_val); | 4219 | btrfs_set_block_group_used(&cache->item, old_val); |
4113 | cache->reserved -= num_bytes; | 4220 | cache->reserved -= num_bytes; |
4114 | cache->space_info->bytes_reserved -= num_bytes; | 4221 | cache->space_info->bytes_reserved -= num_bytes; |
4222 | cache->space_info->reservation_progress++; | ||
4115 | cache->space_info->bytes_used += num_bytes; | 4223 | cache->space_info->bytes_used += num_bytes; |
4116 | cache->space_info->disk_used += num_bytes * factor; | 4224 | cache->space_info->disk_used += num_bytes * factor; |
4117 | spin_unlock(&cache->lock); | 4225 | spin_unlock(&cache->lock); |
@@ -4163,6 +4271,7 @@ static int pin_down_extent(struct btrfs_root *root, | |||
4163 | if (reserved) { | 4271 | if (reserved) { |
4164 | cache->reserved -= num_bytes; | 4272 | cache->reserved -= num_bytes; |
4165 | cache->space_info->bytes_reserved -= num_bytes; | 4273 | cache->space_info->bytes_reserved -= num_bytes; |
4274 | cache->space_info->reservation_progress++; | ||
4166 | } | 4275 | } |
4167 | spin_unlock(&cache->lock); | 4276 | spin_unlock(&cache->lock); |
4168 | spin_unlock(&cache->space_info->lock); | 4277 | spin_unlock(&cache->space_info->lock); |
@@ -4193,8 +4302,8 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
4193 | * update size of reserved extents. this function may return -EAGAIN | 4302 | * update size of reserved extents. this function may return -EAGAIN |
4194 | * if 'reserve' is true or 'sinfo' is false. | 4303 | * if 'reserve' is true or 'sinfo' is false. |
4195 | */ | 4304 | */ |
4196 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | 4305 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
4197 | u64 num_bytes, int reserve, int sinfo) | 4306 | u64 num_bytes, int reserve, int sinfo) |
4198 | { | 4307 | { |
4199 | int ret = 0; | 4308 | int ret = 0; |
4200 | if (sinfo) { | 4309 | if (sinfo) { |
@@ -4213,6 +4322,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
4213 | space_info->bytes_readonly += num_bytes; | 4322 | space_info->bytes_readonly += num_bytes; |
4214 | cache->reserved -= num_bytes; | 4323 | cache->reserved -= num_bytes; |
4215 | space_info->bytes_reserved -= num_bytes; | 4324 | space_info->bytes_reserved -= num_bytes; |
4325 | space_info->reservation_progress++; | ||
4216 | } | 4326 | } |
4217 | spin_unlock(&cache->lock); | 4327 | spin_unlock(&cache->lock); |
4218 | spin_unlock(&space_info->lock); | 4328 | spin_unlock(&space_info->lock); |
@@ -4332,7 +4442,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4332 | if (ret) | 4442 | if (ret) |
4333 | break; | 4443 | break; |
4334 | 4444 | ||
4335 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 4445 | if (btrfs_test_opt(root, DISCARD)) |
4446 | ret = btrfs_discard_extent(root, start, | ||
4447 | end + 1 - start, NULL); | ||
4336 | 4448 | ||
4337 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 4449 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
4338 | unpin_extent_range(root, start, end); | 4450 | unpin_extent_range(root, start, end); |
@@ -4673,10 +4785,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4673 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 4785 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
4674 | 4786 | ||
4675 | btrfs_add_free_space(cache, buf->start, buf->len); | 4787 | btrfs_add_free_space(cache, buf->start, buf->len); |
4676 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | 4788 | ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); |
4677 | if (ret == -EAGAIN) { | 4789 | if (ret == -EAGAIN) { |
4678 | /* block group became read-only */ | 4790 | /* block group became read-only */ |
4679 | update_reserved_bytes(cache, buf->len, 0, 1); | 4791 | btrfs_update_reserved_bytes(cache, buf->len, 0, 1); |
4680 | goto out; | 4792 | goto out; |
4681 | } | 4793 | } |
4682 | 4794 | ||
@@ -4691,6 +4803,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4691 | if (ret) { | 4803 | if (ret) { |
4692 | spin_lock(&cache->space_info->lock); | 4804 | spin_lock(&cache->space_info->lock); |
4693 | cache->space_info->bytes_reserved -= buf->len; | 4805 | cache->space_info->bytes_reserved -= buf->len; |
4806 | cache->space_info->reservation_progress++; | ||
4694 | spin_unlock(&cache->space_info->lock); | 4807 | spin_unlock(&cache->space_info->lock); |
4695 | } | 4808 | } |
4696 | goto out; | 4809 | goto out; |
@@ -4712,6 +4825,11 @@ pin: | |||
4712 | } | 4825 | } |
4713 | } | 4826 | } |
4714 | out: | 4827 | out: |
4828 | /* | ||
4829 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | ||
4830 | * anymore. | ||
4831 | */ | ||
4832 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); | ||
4715 | btrfs_put_block_group(cache); | 4833 | btrfs_put_block_group(cache); |
4716 | } | 4834 | } |
4717 | 4835 | ||
@@ -5159,7 +5277,7 @@ checks: | |||
5159 | search_start - offset); | 5277 | search_start - offset); |
5160 | BUG_ON(offset > search_start); | 5278 | BUG_ON(offset > search_start); |
5161 | 5279 | ||
5162 | ret = update_reserved_bytes(block_group, num_bytes, 1, | 5280 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, |
5163 | (data & BTRFS_BLOCK_GROUP_DATA)); | 5281 | (data & BTRFS_BLOCK_GROUP_DATA)); |
5164 | if (ret == -EAGAIN) { | 5282 | if (ret == -EAGAIN) { |
5165 | btrfs_add_free_space(block_group, offset, num_bytes); | 5283 | btrfs_add_free_space(block_group, offset, num_bytes); |
@@ -5250,11 +5368,13 @@ loop: | |||
5250 | 5368 | ||
5251 | if (allowed_chunk_alloc) { | 5369 | if (allowed_chunk_alloc) { |
5252 | ret = do_chunk_alloc(trans, root, num_bytes + | 5370 | ret = do_chunk_alloc(trans, root, num_bytes + |
5253 | 2 * 1024 * 1024, data, 1); | 5371 | 2 * 1024 * 1024, data, |
5372 | CHUNK_ALLOC_LIMITED); | ||
5254 | allowed_chunk_alloc = 0; | 5373 | allowed_chunk_alloc = 0; |
5255 | done_chunk_alloc = 1; | 5374 | done_chunk_alloc = 1; |
5256 | } else if (!done_chunk_alloc) { | 5375 | } else if (!done_chunk_alloc && |
5257 | space_info->force_alloc = 1; | 5376 | space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) { |
5377 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
5258 | } | 5378 | } |
5259 | 5379 | ||
5260 | if (loop < LOOP_NO_EMPTY_SIZE) { | 5380 | if (loop < LOOP_NO_EMPTY_SIZE) { |
@@ -5340,7 +5460,8 @@ again: | |||
5340 | */ | 5460 | */ |
5341 | if (empty_size || root->ref_cows) | 5461 | if (empty_size || root->ref_cows) |
5342 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 5462 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
5343 | num_bytes + 2 * 1024 * 1024, data, 0); | 5463 | num_bytes + 2 * 1024 * 1024, data, |
5464 | CHUNK_ALLOC_NO_FORCE); | ||
5344 | 5465 | ||
5345 | WARN_ON(num_bytes < root->sectorsize); | 5466 | WARN_ON(num_bytes < root->sectorsize); |
5346 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5467 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
@@ -5352,10 +5473,10 @@ again: | |||
5352 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 5473 | num_bytes = num_bytes & ~(root->sectorsize - 1); |
5353 | num_bytes = max(num_bytes, min_alloc_size); | 5474 | num_bytes = max(num_bytes, min_alloc_size); |
5354 | do_chunk_alloc(trans, root->fs_info->extent_root, | 5475 | do_chunk_alloc(trans, root->fs_info->extent_root, |
5355 | num_bytes, data, 1); | 5476 | num_bytes, data, CHUNK_ALLOC_FORCE); |
5356 | goto again; | 5477 | goto again; |
5357 | } | 5478 | } |
5358 | if (ret == -ENOSPC) { | 5479 | if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { |
5359 | struct btrfs_space_info *sinfo; | 5480 | struct btrfs_space_info *sinfo; |
5360 | 5481 | ||
5361 | sinfo = __find_space_info(root->fs_info, data); | 5482 | sinfo = __find_space_info(root->fs_info, data); |
@@ -5365,6 +5486,8 @@ again: | |||
5365 | dump_space_info(sinfo, num_bytes, 1); | 5486 | dump_space_info(sinfo, num_bytes, 1); |
5366 | } | 5487 | } |
5367 | 5488 | ||
5489 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); | ||
5490 | |||
5368 | return ret; | 5491 | return ret; |
5369 | } | 5492 | } |
5370 | 5493 | ||
@@ -5380,12 +5503,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5380 | return -ENOSPC; | 5503 | return -ENOSPC; |
5381 | } | 5504 | } |
5382 | 5505 | ||
5383 | ret = btrfs_discard_extent(root, start, len); | 5506 | if (btrfs_test_opt(root, DISCARD)) |
5507 | ret = btrfs_discard_extent(root, start, len, NULL); | ||
5384 | 5508 | ||
5385 | btrfs_add_free_space(cache, start, len); | 5509 | btrfs_add_free_space(cache, start, len); |
5386 | update_reserved_bytes(cache, len, 0, 1); | 5510 | btrfs_update_reserved_bytes(cache, len, 0, 1); |
5387 | btrfs_put_block_group(cache); | 5511 | btrfs_put_block_group(cache); |
5388 | 5512 | ||
5513 | trace_btrfs_reserved_extent_free(root, start, len); | ||
5514 | |||
5389 | return ret; | 5515 | return ret; |
5390 | } | 5516 | } |
5391 | 5517 | ||
@@ -5412,7 +5538,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
5412 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); | 5538 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); |
5413 | 5539 | ||
5414 | path = btrfs_alloc_path(); | 5540 | path = btrfs_alloc_path(); |
5415 | BUG_ON(!path); | 5541 | if (!path) |
5542 | return -ENOMEM; | ||
5416 | 5543 | ||
5417 | path->leave_spinning = 1; | 5544 | path->leave_spinning = 1; |
5418 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 5545 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
@@ -5582,7 +5709,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5582 | put_caching_control(caching_ctl); | 5709 | put_caching_control(caching_ctl); |
5583 | } | 5710 | } |
5584 | 5711 | ||
5585 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); | 5712 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); |
5586 | BUG_ON(ret); | 5713 | BUG_ON(ret); |
5587 | btrfs_put_block_group(block_group); | 5714 | btrfs_put_block_group(block_group); |
5588 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5715 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
@@ -5633,6 +5760,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5633 | struct btrfs_root *root, u32 blocksize) | 5760 | struct btrfs_root *root, u32 blocksize) |
5634 | { | 5761 | { |
5635 | struct btrfs_block_rsv *block_rsv; | 5762 | struct btrfs_block_rsv *block_rsv; |
5763 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
5636 | int ret; | 5764 | int ret; |
5637 | 5765 | ||
5638 | block_rsv = get_block_rsv(trans, root); | 5766 | block_rsv = get_block_rsv(trans, root); |
@@ -5640,14 +5768,39 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5640 | if (block_rsv->size == 0) { | 5768 | if (block_rsv->size == 0) { |
5641 | ret = reserve_metadata_bytes(trans, root, block_rsv, | 5769 | ret = reserve_metadata_bytes(trans, root, block_rsv, |
5642 | blocksize, 0); | 5770 | blocksize, 0); |
5643 | if (ret) | 5771 | /* |
5772 | * If we couldn't reserve metadata bytes try and use some from | ||
5773 | * the global reserve. | ||
5774 | */ | ||
5775 | if (ret && block_rsv != global_rsv) { | ||
5776 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
5777 | if (!ret) | ||
5778 | return global_rsv; | ||
5779 | return ERR_PTR(ret); | ||
5780 | } else if (ret) { | ||
5644 | return ERR_PTR(ret); | 5781 | return ERR_PTR(ret); |
5782 | } | ||
5645 | return block_rsv; | 5783 | return block_rsv; |
5646 | } | 5784 | } |
5647 | 5785 | ||
5648 | ret = block_rsv_use_bytes(block_rsv, blocksize); | 5786 | ret = block_rsv_use_bytes(block_rsv, blocksize); |
5649 | if (!ret) | 5787 | if (!ret) |
5650 | return block_rsv; | 5788 | return block_rsv; |
5789 | if (ret) { | ||
5790 | WARN_ON(1); | ||
5791 | ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, | ||
5792 | 0); | ||
5793 | if (!ret) { | ||
5794 | spin_lock(&block_rsv->lock); | ||
5795 | block_rsv->size += blocksize; | ||
5796 | spin_unlock(&block_rsv->lock); | ||
5797 | return block_rsv; | ||
5798 | } else if (ret && block_rsv != global_rsv) { | ||
5799 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
5800 | if (!ret) | ||
5801 | return global_rsv; | ||
5802 | } | ||
5803 | } | ||
5651 | 5804 | ||
5652 | return ERR_PTR(-ENOSPC); | 5805 | return ERR_PTR(-ENOSPC); |
5653 | } | 5806 | } |
@@ -5989,6 +6142,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
5989 | if (reada && level == 1) | 6142 | if (reada && level == 1) |
5990 | reada_walk_down(trans, root, wc, path); | 6143 | reada_walk_down(trans, root, wc, path); |
5991 | next = read_tree_block(root, bytenr, blocksize, generation); | 6144 | next = read_tree_block(root, bytenr, blocksize, generation); |
6145 | if (!next) | ||
6146 | return -EIO; | ||
5992 | btrfs_tree_lock(next); | 6147 | btrfs_tree_lock(next); |
5993 | btrfs_set_lock_blocking(next); | 6148 | btrfs_set_lock_blocking(next); |
5994 | } | 6149 | } |
@@ -6221,6 +6376,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6221 | BUG_ON(!wc); | 6376 | BUG_ON(!wc); |
6222 | 6377 | ||
6223 | trans = btrfs_start_transaction(tree_root, 0); | 6378 | trans = btrfs_start_transaction(tree_root, 0); |
6379 | BUG_ON(IS_ERR(trans)); | ||
6380 | |||
6224 | if (block_rsv) | 6381 | if (block_rsv) |
6225 | trans->block_rsv = block_rsv; | 6382 | trans->block_rsv = block_rsv; |
6226 | 6383 | ||
@@ -6318,6 +6475,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6318 | 6475 | ||
6319 | btrfs_end_transaction_throttle(trans, tree_root); | 6476 | btrfs_end_transaction_throttle(trans, tree_root); |
6320 | trans = btrfs_start_transaction(tree_root, 0); | 6477 | trans = btrfs_start_transaction(tree_root, 0); |
6478 | BUG_ON(IS_ERR(trans)); | ||
6321 | if (block_rsv) | 6479 | if (block_rsv) |
6322 | trans->block_rsv = block_rsv; | 6480 | trans->block_rsv = block_rsv; |
6323 | } | 6481 | } |
@@ -6377,10 +6535,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6377 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 6535 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
6378 | 6536 | ||
6379 | path = btrfs_alloc_path(); | 6537 | path = btrfs_alloc_path(); |
6380 | BUG_ON(!path); | 6538 | if (!path) |
6539 | return -ENOMEM; | ||
6381 | 6540 | ||
6382 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6541 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6383 | BUG_ON(!wc); | 6542 | if (!wc) { |
6543 | btrfs_free_path(path); | ||
6544 | return -ENOMEM; | ||
6545 | } | ||
6384 | 6546 | ||
6385 | btrfs_assert_tree_locked(parent); | 6547 | btrfs_assert_tree_locked(parent); |
6386 | parent_level = btrfs_header_level(parent); | 6548 | parent_level = btrfs_header_level(parent); |
@@ -6446,6 +6608,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start, | |||
6446 | int ret = 0; | 6608 | int ret = 0; |
6447 | 6609 | ||
6448 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 6610 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
6611 | if (!ra) | ||
6612 | return -ENOMEM; | ||
6449 | 6613 | ||
6450 | mutex_lock(&inode->i_mutex); | 6614 | mutex_lock(&inode->i_mutex); |
6451 | first_index = start >> PAGE_CACHE_SHIFT; | 6615 | first_index = start >> PAGE_CACHE_SHIFT; |
@@ -6531,7 +6695,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, | |||
6531 | u64 end = start + extent_key->offset - 1; | 6695 | u64 end = start + extent_key->offset - 1; |
6532 | 6696 | ||
6533 | em = alloc_extent_map(GFP_NOFS); | 6697 | em = alloc_extent_map(GFP_NOFS); |
6534 | BUG_ON(!em || IS_ERR(em)); | 6698 | BUG_ON(!em); |
6535 | 6699 | ||
6536 | em->start = start; | 6700 | em->start = start; |
6537 | em->len = extent_key->offset; | 6701 | em->len = extent_key->offset; |
@@ -6836,7 +7000,11 @@ static noinline int get_new_locations(struct inode *reloc_inode, | |||
6836 | } | 7000 | } |
6837 | 7001 | ||
6838 | path = btrfs_alloc_path(); | 7002 | path = btrfs_alloc_path(); |
6839 | BUG_ON(!path); | 7003 | if (!path) { |
7004 | if (exts != *extents) | ||
7005 | kfree(exts); | ||
7006 | return -ENOMEM; | ||
7007 | } | ||
6840 | 7008 | ||
6841 | cur_pos = extent_key->objectid - offset; | 7009 | cur_pos = extent_key->objectid - offset; |
6842 | last_byte = extent_key->objectid + extent_key->offset; | 7010 | last_byte = extent_key->objectid + extent_key->offset; |
@@ -6878,6 +7046,10 @@ static noinline int get_new_locations(struct inode *reloc_inode, | |||
6878 | struct disk_extent *old = exts; | 7046 | struct disk_extent *old = exts; |
6879 | max *= 2; | 7047 | max *= 2; |
6880 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); | 7048 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); |
7049 | if (!exts) { | ||
7050 | ret = -ENOMEM; | ||
7051 | goto out; | ||
7052 | } | ||
6881 | memcpy(exts, old, sizeof(*exts) * nr); | 7053 | memcpy(exts, old, sizeof(*exts) * nr); |
6882 | if (old != *extents) | 7054 | if (old != *extents) |
6883 | kfree(old); | 7055 | kfree(old); |
@@ -7360,7 +7532,8 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
7360 | int ret; | 7532 | int ret; |
7361 | 7533 | ||
7362 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); | 7534 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); |
7363 | BUG_ON(!new_extent); | 7535 | if (!new_extent) |
7536 | return -ENOMEM; | ||
7364 | 7537 | ||
7365 | ref = btrfs_lookup_leaf_ref(root, leaf->start); | 7538 | ref = btrfs_lookup_leaf_ref(root, leaf->start); |
7366 | BUG_ON(!ref); | 7539 | BUG_ON(!ref); |
@@ -7477,7 +7650,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) | |||
7477 | BUG_ON(reloc_root->commit_root != NULL); | 7650 | BUG_ON(reloc_root->commit_root != NULL); |
7478 | while (1) { | 7651 | while (1) { |
7479 | trans = btrfs_join_transaction(root, 1); | 7652 | trans = btrfs_join_transaction(root, 1); |
7480 | BUG_ON(!trans); | 7653 | BUG_ON(IS_ERR(trans)); |
7481 | 7654 | ||
7482 | mutex_lock(&root->fs_info->drop_mutex); | 7655 | mutex_lock(&root->fs_info->drop_mutex); |
7483 | ret = btrfs_drop_snapshot(trans, reloc_root); | 7656 | ret = btrfs_drop_snapshot(trans, reloc_root); |
@@ -7535,7 +7708,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | |||
7535 | 7708 | ||
7536 | if (found) { | 7709 | if (found) { |
7537 | trans = btrfs_start_transaction(root, 1); | 7710 | trans = btrfs_start_transaction(root, 1); |
7538 | BUG_ON(!trans); | 7711 | BUG_ON(IS_ERR(trans)); |
7539 | ret = btrfs_commit_transaction(trans, root); | 7712 | ret = btrfs_commit_transaction(trans, root); |
7540 | BUG_ON(ret); | 7713 | BUG_ON(ret); |
7541 | } | 7714 | } |
@@ -7546,7 +7719,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | |||
7546 | 7719 | ||
7547 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | 7720 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); |
7548 | BUG_ON(!reloc_root); | 7721 | BUG_ON(!reloc_root); |
7549 | btrfs_orphan_cleanup(reloc_root); | 7722 | ret = btrfs_orphan_cleanup(reloc_root); |
7723 | BUG_ON(ret); | ||
7550 | return 0; | 7724 | return 0; |
7551 | } | 7725 | } |
7552 | 7726 | ||
@@ -7564,7 +7738,8 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, | |||
7564 | return 0; | 7738 | return 0; |
7565 | 7739 | ||
7566 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 7740 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
7567 | BUG_ON(!root_item); | 7741 | if (!root_item) |
7742 | return -ENOMEM; | ||
7568 | 7743 | ||
7569 | ret = btrfs_copy_root(trans, root, root->commit_root, | 7744 | ret = btrfs_copy_root(trans, root, root->commit_root, |
7570 | &eb, BTRFS_TREE_RELOC_OBJECTID); | 7745 | &eb, BTRFS_TREE_RELOC_OBJECTID); |
@@ -7590,7 +7765,7 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, | |||
7590 | 7765 | ||
7591 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | 7766 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, |
7592 | &root_key); | 7767 | &root_key); |
7593 | BUG_ON(!reloc_root); | 7768 | BUG_ON(IS_ERR(reloc_root)); |
7594 | reloc_root->last_trans = trans->transid; | 7769 | reloc_root->last_trans = trans->transid; |
7595 | reloc_root->commit_root = NULL; | 7770 | reloc_root->commit_root = NULL; |
7596 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; | 7771 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; |
@@ -7779,7 +7954,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, | |||
7779 | 7954 | ||
7780 | 7955 | ||
7781 | trans = btrfs_start_transaction(extent_root, 1); | 7956 | trans = btrfs_start_transaction(extent_root, 1); |
7782 | BUG_ON(!trans); | 7957 | BUG_ON(IS_ERR(trans)); |
7783 | 7958 | ||
7784 | if (extent_key->objectid == 0) { | 7959 | if (extent_key->objectid == 0) { |
7785 | ret = del_extent_zero(trans, extent_root, path, extent_key); | 7960 | ret = del_extent_zero(trans, extent_root, path, extent_key); |
@@ -7843,6 +8018,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, | |||
7843 | 8018 | ||
7844 | eb = read_tree_block(found_root, block_start, | 8019 | eb = read_tree_block(found_root, block_start, |
7845 | block_size, 0); | 8020 | block_size, 0); |
8021 | if (!eb) { | ||
8022 | ret = -EIO; | ||
8023 | goto out; | ||
8024 | } | ||
7846 | btrfs_tree_lock(eb); | 8025 | btrfs_tree_lock(eb); |
7847 | BUG_ON(level != btrfs_header_level(eb)); | 8026 | BUG_ON(level != btrfs_header_level(eb)); |
7848 | 8027 | ||
@@ -7970,13 +8149,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
7970 | 8149 | ||
7971 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 8150 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
7972 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 8151 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
7973 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | 8152 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { |
7974 | sinfo->bytes_readonly += num_bytes; | 8153 | sinfo->bytes_readonly += num_bytes; |
7975 | sinfo->bytes_reserved += cache->reserved_pinned; | 8154 | sinfo->bytes_reserved += cache->reserved_pinned; |
7976 | cache->reserved_pinned = 0; | 8155 | cache->reserved_pinned = 0; |
7977 | cache->ro = 1; | 8156 | cache->ro = 1; |
7978 | ret = 0; | 8157 | ret = 0; |
7979 | } | 8158 | } |
8159 | |||
7980 | spin_unlock(&cache->lock); | 8160 | spin_unlock(&cache->lock); |
7981 | spin_unlock(&sinfo->lock); | 8161 | spin_unlock(&sinfo->lock); |
7982 | return ret; | 8162 | return ret; |
@@ -7997,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
7997 | 8177 | ||
7998 | alloc_flags = update_block_group_flags(root, cache->flags); | 8178 | alloc_flags = update_block_group_flags(root, cache->flags); |
7999 | if (alloc_flags != cache->flags) | 8179 | if (alloc_flags != cache->flags) |
8000 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 8180 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
8181 | CHUNK_ALLOC_FORCE); | ||
8001 | 8182 | ||
8002 | ret = set_block_group_ro(cache); | 8183 | ret = set_block_group_ro(cache); |
8003 | if (!ret) | 8184 | if (!ret) |
8004 | goto out; | 8185 | goto out; |
8005 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 8186 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
8006 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 8187 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
8188 | CHUNK_ALLOC_FORCE); | ||
8007 | if (ret < 0) | 8189 | if (ret < 0) |
8008 | goto out; | 8190 | goto out; |
8009 | ret = set_block_group_ro(cache); | 8191 | ret = set_block_group_ro(cache); |
@@ -8012,6 +8194,70 @@ out: | |||
8012 | return ret; | 8194 | return ret; |
8013 | } | 8195 | } |
8014 | 8196 | ||
8197 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | ||
8198 | struct btrfs_root *root, u64 type) | ||
8199 | { | ||
8200 | u64 alloc_flags = get_alloc_profile(root, type); | ||
8201 | return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | ||
8202 | CHUNK_ALLOC_FORCE); | ||
8203 | } | ||
8204 | |||
8205 | /* | ||
8206 | * helper to account the unused space of all the readonly block group in the | ||
8207 | * list. takes mirrors into account. | ||
8208 | */ | ||
8209 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | ||
8210 | { | ||
8211 | struct btrfs_block_group_cache *block_group; | ||
8212 | u64 free_bytes = 0; | ||
8213 | int factor; | ||
8214 | |||
8215 | list_for_each_entry(block_group, groups_list, list) { | ||
8216 | spin_lock(&block_group->lock); | ||
8217 | |||
8218 | if (!block_group->ro) { | ||
8219 | spin_unlock(&block_group->lock); | ||
8220 | continue; | ||
8221 | } | ||
8222 | |||
8223 | if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
8224 | BTRFS_BLOCK_GROUP_RAID10 | | ||
8225 | BTRFS_BLOCK_GROUP_DUP)) | ||
8226 | factor = 2; | ||
8227 | else | ||
8228 | factor = 1; | ||
8229 | |||
8230 | free_bytes += (block_group->key.offset - | ||
8231 | btrfs_block_group_used(&block_group->item)) * | ||
8232 | factor; | ||
8233 | |||
8234 | spin_unlock(&block_group->lock); | ||
8235 | } | ||
8236 | |||
8237 | return free_bytes; | ||
8238 | } | ||
8239 | |||
8240 | /* | ||
8241 | * helper to account the unused space of all the readonly block group in the | ||
8242 | * space_info. takes mirrors into account. | ||
8243 | */ | ||
8244 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
8245 | { | ||
8246 | int i; | ||
8247 | u64 free_bytes = 0; | ||
8248 | |||
8249 | spin_lock(&sinfo->lock); | ||
8250 | |||
8251 | for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
8252 | if (!list_empty(&sinfo->block_groups[i])) | ||
8253 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
8254 | &sinfo->block_groups[i]); | ||
8255 | |||
8256 | spin_unlock(&sinfo->lock); | ||
8257 | |||
8258 | return free_bytes; | ||
8259 | } | ||
8260 | |||
8015 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 8261 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
8016 | struct btrfs_block_group_cache *cache) | 8262 | struct btrfs_block_group_cache *cache) |
8017 | { | 8263 | { |
@@ -8092,7 +8338,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8092 | mutex_lock(&root->fs_info->chunk_mutex); | 8338 | mutex_lock(&root->fs_info->chunk_mutex); |
8093 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8339 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
8094 | u64 min_free = btrfs_block_group_used(&block_group->item); | 8340 | u64 min_free = btrfs_block_group_used(&block_group->item); |
8095 | u64 dev_offset, max_avail; | 8341 | u64 dev_offset; |
8096 | 8342 | ||
8097 | /* | 8343 | /* |
8098 | * check to make sure we can actually find a chunk with enough | 8344 | * check to make sure we can actually find a chunk with enough |
@@ -8100,7 +8346,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8100 | */ | 8346 | */ |
8101 | if (device->total_bytes > device->bytes_used + min_free) { | 8347 | if (device->total_bytes > device->bytes_used + min_free) { |
8102 | ret = find_free_dev_extent(NULL, device, min_free, | 8348 | ret = find_free_dev_extent(NULL, device, min_free, |
8103 | &dev_offset, &max_avail); | 8349 | &dev_offset, NULL); |
8104 | if (!ret) | 8350 | if (!ret) |
8105 | break; | 8351 | break; |
8106 | ret = -1; | 8352 | ret = -1; |
@@ -8213,6 +8459,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
8213 | if (block_group->cached == BTRFS_CACHE_STARTED) | 8459 | if (block_group->cached == BTRFS_CACHE_STARTED) |
8214 | wait_block_group_cache_done(block_group); | 8460 | wait_block_group_cache_done(block_group); |
8215 | 8461 | ||
8462 | /* | ||
8463 | * We haven't cached this block group, which means we could | ||
8464 | * possibly have excluded extents on this block group. | ||
8465 | */ | ||
8466 | if (block_group->cached == BTRFS_CACHE_NO) | ||
8467 | free_excluded_extents(info->extent_root, block_group); | ||
8468 | |||
8216 | btrfs_remove_free_space_cache(block_group); | 8469 | btrfs_remove_free_space_cache(block_group); |
8217 | btrfs_put_block_group(block_group); | 8470 | btrfs_put_block_group(block_group); |
8218 | 8471 | ||
@@ -8328,6 +8581,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
8328 | cache->sectorsize = root->sectorsize; | 8581 | cache->sectorsize = root->sectorsize; |
8329 | 8582 | ||
8330 | /* | 8583 | /* |
8584 | * We need to exclude the super stripes now so that the space | ||
8585 | * info has super bytes accounted for, otherwise we'll think | ||
8586 | * we have more space than we actually do. | ||
8587 | */ | ||
8588 | exclude_super_stripes(root, cache); | ||
8589 | |||
8590 | /* | ||
8331 | * check for two cases, either we are full, and therefore | 8591 | * check for two cases, either we are full, and therefore |
8332 | * don't need to bother with the caching work since we won't | 8592 | * don't need to bother with the caching work since we won't |
8333 | * find any space, or we are empty, and we can just add all | 8593 | * find any space, or we are empty, and we can just add all |
@@ -8335,12 +8595,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
8335 | * time, particularly in the full case. | 8595 | * time, particularly in the full case. |
8336 | */ | 8596 | */ |
8337 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 8597 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
8338 | exclude_super_stripes(root, cache); | ||
8339 | cache->last_byte_to_unpin = (u64)-1; | 8598 | cache->last_byte_to_unpin = (u64)-1; |
8340 | cache->cached = BTRFS_CACHE_FINISHED; | 8599 | cache->cached = BTRFS_CACHE_FINISHED; |
8341 | free_excluded_extents(root, cache); | 8600 | free_excluded_extents(root, cache); |
8342 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 8601 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
8343 | exclude_super_stripes(root, cache); | ||
8344 | cache->last_byte_to_unpin = (u64)-1; | 8602 | cache->last_byte_to_unpin = (u64)-1; |
8345 | cache->cached = BTRFS_CACHE_FINISHED; | 8603 | cache->cached = BTRFS_CACHE_FINISHED; |
8346 | add_new_free_space(cache, root->fs_info, | 8604 | add_new_free_space(cache, root->fs_info, |
@@ -8482,6 +8740,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8482 | BUG_ON(!block_group); | 8740 | BUG_ON(!block_group); |
8483 | BUG_ON(!block_group->ro); | 8741 | BUG_ON(!block_group->ro); |
8484 | 8742 | ||
8743 | /* | ||
8744 | * Free the reserved super bytes from this block group before | ||
8745 | * remove it. | ||
8746 | */ | ||
8747 | free_excluded_extents(root, block_group); | ||
8748 | |||
8485 | memcpy(&key, &block_group->key, sizeof(key)); | 8749 | memcpy(&key, &block_group->key, sizeof(key)); |
8486 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | | 8750 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | |
8487 | BTRFS_BLOCK_GROUP_RAID1 | | 8751 | BTRFS_BLOCK_GROUP_RAID1 | |
@@ -8584,3 +8848,85 @@ out: | |||
8584 | btrfs_free_path(path); | 8848 | btrfs_free_path(path); |
8585 | return ret; | 8849 | return ret; |
8586 | } | 8850 | } |
8851 | |||
8852 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | ||
8853 | { | ||
8854 | struct btrfs_space_info *space_info; | ||
8855 | int ret; | ||
8856 | |||
8857 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, | ||
8858 | &space_info); | ||
8859 | if (ret) | ||
8860 | return ret; | ||
8861 | |||
8862 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, | ||
8863 | &space_info); | ||
8864 | if (ret) | ||
8865 | return ret; | ||
8866 | |||
8867 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, | ||
8868 | &space_info); | ||
8869 | if (ret) | ||
8870 | return ret; | ||
8871 | |||
8872 | return ret; | ||
8873 | } | ||
8874 | |||
8875 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | ||
8876 | { | ||
8877 | return unpin_extent_range(root, start, end); | ||
8878 | } | ||
8879 | |||
8880 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
8881 | u64 num_bytes, u64 *actual_bytes) | ||
8882 | { | ||
8883 | return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); | ||
8884 | } | ||
8885 | |||
8886 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | ||
8887 | { | ||
8888 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
8889 | struct btrfs_block_group_cache *cache = NULL; | ||
8890 | u64 group_trimmed; | ||
8891 | u64 start; | ||
8892 | u64 end; | ||
8893 | u64 trimmed = 0; | ||
8894 | int ret = 0; | ||
8895 | |||
8896 | cache = btrfs_lookup_block_group(fs_info, range->start); | ||
8897 | |||
8898 | while (cache) { | ||
8899 | if (cache->key.objectid >= (range->start + range->len)) { | ||
8900 | btrfs_put_block_group(cache); | ||
8901 | break; | ||
8902 | } | ||
8903 | |||
8904 | start = max(range->start, cache->key.objectid); | ||
8905 | end = min(range->start + range->len, | ||
8906 | cache->key.objectid + cache->key.offset); | ||
8907 | |||
8908 | if (end - start >= range->minlen) { | ||
8909 | if (!block_group_cache_done(cache)) { | ||
8910 | ret = cache_block_group(cache, NULL, root, 0); | ||
8911 | if (!ret) | ||
8912 | wait_block_group_cache_done(cache); | ||
8913 | } | ||
8914 | ret = btrfs_trim_block_group(cache, | ||
8915 | &group_trimmed, | ||
8916 | start, | ||
8917 | end, | ||
8918 | range->minlen); | ||
8919 | |||
8920 | trimmed += group_trimmed; | ||
8921 | if (ret) { | ||
8922 | btrfs_put_block_group(cache); | ||
8923 | break; | ||
8924 | } | ||
8925 | } | ||
8926 | |||
8927 | cache = next_block_group(fs_info->tree_root, cache); | ||
8928 | } | ||
8929 | |||
8930 | range->len = trimmed; | ||
8931 | return ret; | ||
8932 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e86b9f36507..315138605088 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state, | |||
690 | } | 690 | } |
691 | } | 691 | } |
692 | 692 | ||
693 | static void uncache_state(struct extent_state **cached_ptr) | ||
694 | { | ||
695 | if (cached_ptr && (*cached_ptr)) { | ||
696 | struct extent_state *state = *cached_ptr; | ||
697 | *cached_ptr = NULL; | ||
698 | free_extent_state(state); | ||
699 | } | ||
700 | } | ||
701 | |||
693 | /* | 702 | /* |
694 | * set some bits on a range in the tree. This may require allocations or | 703 | * set some bits on a range in the tree. This may require allocations or |
695 | * sleeping, so the gfp mask is used to indicate what is allowed. | 704 | * sleeping, so the gfp mask is used to indicate what is allowed. |
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | |||
940 | } | 949 | } |
941 | 950 | ||
942 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 951 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
943 | gfp_t mask) | 952 | struct extent_state **cached_state, gfp_t mask) |
944 | { | 953 | { |
945 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 954 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, |
946 | NULL, mask); | 955 | NULL, cached_state, mask); |
947 | } | 956 | } |
948 | 957 | ||
949 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 958 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | |||
1012 | mask); | 1021 | mask); |
1013 | } | 1022 | } |
1014 | 1023 | ||
1015 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1024 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) |
1016 | gfp_t mask) | ||
1017 | { | 1025 | { |
1018 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, | 1026 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
1019 | mask); | 1027 | mask); |
@@ -1433,12 +1441,13 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1433 | */ | 1441 | */ |
1434 | u64 count_range_bits(struct extent_io_tree *tree, | 1442 | u64 count_range_bits(struct extent_io_tree *tree, |
1435 | u64 *start, u64 search_end, u64 max_bytes, | 1443 | u64 *start, u64 search_end, u64 max_bytes, |
1436 | unsigned long bits) | 1444 | unsigned long bits, int contig) |
1437 | { | 1445 | { |
1438 | struct rb_node *node; | 1446 | struct rb_node *node; |
1439 | struct extent_state *state; | 1447 | struct extent_state *state; |
1440 | u64 cur_start = *start; | 1448 | u64 cur_start = *start; |
1441 | u64 total_bytes = 0; | 1449 | u64 total_bytes = 0; |
1450 | u64 last = 0; | ||
1442 | int found = 0; | 1451 | int found = 0; |
1443 | 1452 | ||
1444 | if (search_end <= cur_start) { | 1453 | if (search_end <= cur_start) { |
@@ -1463,7 +1472,9 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1463 | state = rb_entry(node, struct extent_state, rb_node); | 1472 | state = rb_entry(node, struct extent_state, rb_node); |
1464 | if (state->start > search_end) | 1473 | if (state->start > search_end) |
1465 | break; | 1474 | break; |
1466 | if (state->end >= cur_start && (state->state & bits)) { | 1475 | if (contig && found && state->start > last + 1) |
1476 | break; | ||
1477 | if (state->end >= cur_start && (state->state & bits) == bits) { | ||
1467 | total_bytes += min(search_end, state->end) + 1 - | 1478 | total_bytes += min(search_end, state->end) + 1 - |
1468 | max(cur_start, state->start); | 1479 | max(cur_start, state->start); |
1469 | if (total_bytes >= max_bytes) | 1480 | if (total_bytes >= max_bytes) |
@@ -1472,6 +1483,9 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1472 | *start = state->start; | 1483 | *start = state->start; |
1473 | found = 1; | 1484 | found = 1; |
1474 | } | 1485 | } |
1486 | last = state->end; | ||
1487 | } else if (contig && found) { | ||
1488 | break; | ||
1475 | } | 1489 | } |
1476 | node = rb_next(node); | 1490 | node = rb_next(node); |
1477 | if (!node) | 1491 | if (!node) |
@@ -1729,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1729 | 1743 | ||
1730 | do { | 1744 | do { |
1731 | struct page *page = bvec->bv_page; | 1745 | struct page *page = bvec->bv_page; |
1746 | struct extent_state *cached = NULL; | ||
1747 | struct extent_state *state; | ||
1748 | |||
1732 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 1749 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
1733 | 1750 | ||
1734 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 1751 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
@@ -1743,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1743 | if (++bvec <= bvec_end) | 1760 | if (++bvec <= bvec_end) |
1744 | prefetchw(&bvec->bv_page->flags); | 1761 | prefetchw(&bvec->bv_page->flags); |
1745 | 1762 | ||
1763 | spin_lock(&tree->lock); | ||
1764 | state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); | ||
1765 | if (state && state->start == start) { | ||
1766 | /* | ||
1767 | * take a reference on the state, unlock will drop | ||
1768 | * the ref | ||
1769 | */ | ||
1770 | cache_state(state, &cached); | ||
1771 | } | ||
1772 | spin_unlock(&tree->lock); | ||
1773 | |||
1746 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 1774 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
1747 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 1775 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
1748 | NULL); | 1776 | state); |
1749 | if (ret) | 1777 | if (ret) |
1750 | uptodate = 0; | 1778 | uptodate = 0; |
1751 | } | 1779 | } |
@@ -1758,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1758 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 1786 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
1759 | if (err) | 1787 | if (err) |
1760 | uptodate = 0; | 1788 | uptodate = 0; |
1789 | uncache_state(&cached); | ||
1761 | continue; | 1790 | continue; |
1762 | } | 1791 | } |
1763 | } | 1792 | } |
1764 | 1793 | ||
1765 | if (uptodate) { | 1794 | if (uptodate) { |
1766 | set_extent_uptodate(tree, start, end, | 1795 | set_extent_uptodate(tree, start, end, &cached, |
1767 | GFP_ATOMIC); | 1796 | GFP_ATOMIC); |
1768 | } | 1797 | } |
1769 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1798 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
1770 | 1799 | ||
1771 | if (whole_page) { | 1800 | if (whole_page) { |
1772 | if (uptodate) { | 1801 | if (uptodate) { |
@@ -1805,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) | |||
1805 | 1834 | ||
1806 | do { | 1835 | do { |
1807 | struct page *page = bvec->bv_page; | 1836 | struct page *page = bvec->bv_page; |
1837 | struct extent_state *cached = NULL; | ||
1808 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 1838 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
1809 | 1839 | ||
1810 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 1840 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
@@ -1815,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) | |||
1815 | prefetchw(&bvec->bv_page->flags); | 1845 | prefetchw(&bvec->bv_page->flags); |
1816 | 1846 | ||
1817 | if (uptodate) { | 1847 | if (uptodate) { |
1818 | set_extent_uptodate(tree, start, end, GFP_ATOMIC); | 1848 | set_extent_uptodate(tree, start, end, &cached, |
1849 | GFP_ATOMIC); | ||
1819 | } else { | 1850 | } else { |
1820 | ClearPageUptodate(page); | 1851 | ClearPageUptodate(page); |
1821 | SetPageError(page); | 1852 | SetPageError(page); |
1822 | } | 1853 | } |
1823 | 1854 | ||
1824 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1855 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
1825 | 1856 | ||
1826 | } while (bvec >= bio->bi_io_vec); | 1857 | } while (bvec >= bio->bi_io_vec); |
1827 | 1858 | ||
@@ -1865,7 +1896,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1865 | bio_get(bio); | 1896 | bio_get(bio); |
1866 | 1897 | ||
1867 | if (tree->ops && tree->ops->submit_bio_hook) | 1898 | if (tree->ops && tree->ops->submit_bio_hook) |
1868 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1899 | ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1869 | mirror_num, bio_flags, start); | 1900 | mirror_num, bio_flags, start); |
1870 | else | 1901 | else |
1871 | submit_bio(rw, bio); | 1902 | submit_bio(rw, bio); |
@@ -1920,6 +1951,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1920 | nr = bio_get_nr_vecs(bdev); | 1951 | nr = bio_get_nr_vecs(bdev); |
1921 | 1952 | ||
1922 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1953 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1954 | if (!bio) | ||
1955 | return -ENOMEM; | ||
1923 | 1956 | ||
1924 | bio_add_page(bio, page, page_size, offset); | 1957 | bio_add_page(bio, page, page_size, offset); |
1925 | bio->bi_end_io = end_io_func; | 1958 | bio->bi_end_io = end_io_func; |
@@ -1944,6 +1977,7 @@ void set_page_extent_mapped(struct page *page) | |||
1944 | 1977 | ||
1945 | static void set_page_extent_head(struct page *page, unsigned long len) | 1978 | static void set_page_extent_head(struct page *page, unsigned long len) |
1946 | { | 1979 | { |
1980 | WARN_ON(!PagePrivate(page)); | ||
1947 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); | 1981 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); |
1948 | } | 1982 | } |
1949 | 1983 | ||
@@ -2007,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2007 | while (cur <= end) { | 2041 | while (cur <= end) { |
2008 | if (cur >= last_byte) { | 2042 | if (cur >= last_byte) { |
2009 | char *userpage; | 2043 | char *userpage; |
2044 | struct extent_state *cached = NULL; | ||
2045 | |||
2010 | iosize = PAGE_CACHE_SIZE - page_offset; | 2046 | iosize = PAGE_CACHE_SIZE - page_offset; |
2011 | userpage = kmap_atomic(page, KM_USER0); | 2047 | userpage = kmap_atomic(page, KM_USER0); |
2012 | memset(userpage + page_offset, 0, iosize); | 2048 | memset(userpage + page_offset, 0, iosize); |
2013 | flush_dcache_page(page); | 2049 | flush_dcache_page(page); |
2014 | kunmap_atomic(userpage, KM_USER0); | 2050 | kunmap_atomic(userpage, KM_USER0); |
2015 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2051 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2016 | GFP_NOFS); | 2052 | &cached, GFP_NOFS); |
2017 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2053 | unlock_extent_cached(tree, cur, cur + iosize - 1, |
2054 | &cached, GFP_NOFS); | ||
2018 | break; | 2055 | break; |
2019 | } | 2056 | } |
2020 | em = get_extent(inode, page, page_offset, cur, | 2057 | em = get_extent(inode, page, page_offset, cur, |
@@ -2028,8 +2065,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2028 | BUG_ON(extent_map_end(em) <= cur); | 2065 | BUG_ON(extent_map_end(em) <= cur); |
2029 | BUG_ON(end < cur); | 2066 | BUG_ON(end < cur); |
2030 | 2067 | ||
2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2068 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; | 2069 | this_bio_flag = EXTENT_BIO_COMPRESSED; |
2070 | extent_set_compress_type(&this_bio_flag, | ||
2071 | em->compress_type); | ||
2072 | } | ||
2033 | 2073 | ||
2034 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 2074 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
2035 | cur_end = min(extent_map_end(em) - 1, end); | 2075 | cur_end = min(extent_map_end(em) - 1, end); |
@@ -2051,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2051 | /* we've found a hole, just zero and go on */ | 2091 | /* we've found a hole, just zero and go on */ |
2052 | if (block_start == EXTENT_MAP_HOLE) { | 2092 | if (block_start == EXTENT_MAP_HOLE) { |
2053 | char *userpage; | 2093 | char *userpage; |
2094 | struct extent_state *cached = NULL; | ||
2095 | |||
2054 | userpage = kmap_atomic(page, KM_USER0); | 2096 | userpage = kmap_atomic(page, KM_USER0); |
2055 | memset(userpage + page_offset, 0, iosize); | 2097 | memset(userpage + page_offset, 0, iosize); |
2056 | flush_dcache_page(page); | 2098 | flush_dcache_page(page); |
2057 | kunmap_atomic(userpage, KM_USER0); | 2099 | kunmap_atomic(userpage, KM_USER0); |
2058 | 2100 | ||
2059 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2101 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2060 | GFP_NOFS); | 2102 | &cached, GFP_NOFS); |
2061 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2103 | unlock_extent_cached(tree, cur, cur + iosize - 1, |
2104 | &cached, GFP_NOFS); | ||
2062 | cur = cur + iosize; | 2105 | cur = cur + iosize; |
2063 | page_offset += iosize; | 2106 | page_offset += iosize; |
2064 | continue; | 2107 | continue; |
@@ -2123,7 +2166,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2123 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, | 2166 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, |
2124 | &bio_flags); | 2167 | &bio_flags); |
2125 | if (bio) | 2168 | if (bio) |
2126 | submit_one_bio(READ, bio, 0, bio_flags); | 2169 | ret = submit_one_bio(READ, bio, 0, bio_flags); |
2127 | return ret; | 2170 | return ret; |
2128 | } | 2171 | } |
2129 | 2172 | ||
@@ -2176,10 +2219,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2176 | unsigned long nr_written = 0; | 2219 | unsigned long nr_written = 0; |
2177 | 2220 | ||
2178 | if (wbc->sync_mode == WB_SYNC_ALL) | 2221 | if (wbc->sync_mode == WB_SYNC_ALL) |
2179 | write_flags = WRITE_SYNC_PLUG; | 2222 | write_flags = WRITE_SYNC; |
2180 | else | 2223 | else |
2181 | write_flags = WRITE; | 2224 | write_flags = WRITE; |
2182 | 2225 | ||
2226 | trace___extent_writepage(page, inode, wbc); | ||
2227 | |||
2183 | WARN_ON(!PageLocked(page)); | 2228 | WARN_ON(!PageLocked(page)); |
2184 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2229 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2185 | if (page->index > end_index || | 2230 | if (page->index > end_index || |
@@ -2775,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2775 | iocount++; | 2820 | iocount++; |
2776 | block_start = block_start + iosize; | 2821 | block_start = block_start + iosize; |
2777 | } else { | 2822 | } else { |
2778 | set_extent_uptodate(tree, block_start, cur_end, | 2823 | struct extent_state *cached = NULL; |
2824 | |||
2825 | set_extent_uptodate(tree, block_start, cur_end, &cached, | ||
2779 | GFP_NOFS); | 2826 | GFP_NOFS); |
2780 | unlock_extent(tree, block_start, cur_end, GFP_NOFS); | 2827 | unlock_extent_cached(tree, block_start, cur_end, |
2828 | &cached, GFP_NOFS); | ||
2781 | block_start = cur_end + 1; | 2829 | block_start = cur_end + 1; |
2782 | } | 2830 | } |
2783 | page_offset = block_start & (PAGE_CACHE_SIZE - 1); | 2831 | page_offset = block_start & (PAGE_CACHE_SIZE - 1); |
@@ -2816,9 +2864,17 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
2816 | * at this point we can safely clear everything except the | 2864 | * at this point we can safely clear everything except the |
2817 | * locked bit and the nodatasum bit | 2865 | * locked bit and the nodatasum bit |
2818 | */ | 2866 | */ |
2819 | clear_extent_bit(tree, start, end, | 2867 | ret = clear_extent_bit(tree, start, end, |
2820 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | 2868 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), |
2821 | 0, 0, NULL, mask); | 2869 | 0, 0, NULL, mask); |
2870 | |||
2871 | /* if clear_extent_bit failed for enomem reasons, | ||
2872 | * we can't allow the release to continue. | ||
2873 | */ | ||
2874 | if (ret < 0) | ||
2875 | ret = 0; | ||
2876 | else | ||
2877 | ret = 1; | ||
2822 | } | 2878 | } |
2823 | return ret; | 2879 | return ret; |
2824 | } | 2880 | } |
@@ -2898,6 +2954,46 @@ out: | |||
2898 | return sector; | 2954 | return sector; |
2899 | } | 2955 | } |
2900 | 2956 | ||
2957 | /* | ||
2958 | * helper function for fiemap, which doesn't want to see any holes. | ||
2959 | * This maps until we find something past 'last' | ||
2960 | */ | ||
2961 | static struct extent_map *get_extent_skip_holes(struct inode *inode, | ||
2962 | u64 offset, | ||
2963 | u64 last, | ||
2964 | get_extent_t *get_extent) | ||
2965 | { | ||
2966 | u64 sectorsize = BTRFS_I(inode)->root->sectorsize; | ||
2967 | struct extent_map *em; | ||
2968 | u64 len; | ||
2969 | |||
2970 | if (offset >= last) | ||
2971 | return NULL; | ||
2972 | |||
2973 | while(1) { | ||
2974 | len = last - offset; | ||
2975 | if (len == 0) | ||
2976 | break; | ||
2977 | len = (len + sectorsize - 1) & ~(sectorsize - 1); | ||
2978 | em = get_extent(inode, NULL, 0, offset, len, 0); | ||
2979 | if (!em || IS_ERR(em)) | ||
2980 | return em; | ||
2981 | |||
2982 | /* if this isn't a hole return it */ | ||
2983 | if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) && | ||
2984 | em->block_start != EXTENT_MAP_HOLE) { | ||
2985 | return em; | ||
2986 | } | ||
2987 | |||
2988 | /* this is a hole, advance to the next extent */ | ||
2989 | offset = extent_map_end(em); | ||
2990 | free_extent_map(em); | ||
2991 | if (offset >= last) | ||
2992 | break; | ||
2993 | } | ||
2994 | return NULL; | ||
2995 | } | ||
2996 | |||
2901 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2997 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2902 | __u64 start, __u64 len, get_extent_t *get_extent) | 2998 | __u64 start, __u64 len, get_extent_t *get_extent) |
2903 | { | 2999 | { |
@@ -2907,16 +3003,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2907 | u32 flags = 0; | 3003 | u32 flags = 0; |
2908 | u32 found_type; | 3004 | u32 found_type; |
2909 | u64 last; | 3005 | u64 last; |
3006 | u64 last_for_get_extent = 0; | ||
2910 | u64 disko = 0; | 3007 | u64 disko = 0; |
3008 | u64 isize = i_size_read(inode); | ||
2911 | struct btrfs_key found_key; | 3009 | struct btrfs_key found_key; |
2912 | struct extent_map *em = NULL; | 3010 | struct extent_map *em = NULL; |
2913 | struct extent_state *cached_state = NULL; | 3011 | struct extent_state *cached_state = NULL; |
2914 | struct btrfs_path *path; | 3012 | struct btrfs_path *path; |
2915 | struct btrfs_file_extent_item *item; | 3013 | struct btrfs_file_extent_item *item; |
2916 | int end = 0; | 3014 | int end = 0; |
2917 | u64 em_start = 0, em_len = 0; | 3015 | u64 em_start = 0; |
3016 | u64 em_len = 0; | ||
3017 | u64 em_end = 0; | ||
2918 | unsigned long emflags; | 3018 | unsigned long emflags; |
2919 | int hole = 0; | ||
2920 | 3019 | ||
2921 | if (len == 0) | 3020 | if (len == 0) |
2922 | return -EINVAL; | 3021 | return -EINVAL; |
@@ -2926,6 +3025,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2926 | return -ENOMEM; | 3025 | return -ENOMEM; |
2927 | path->leave_spinning = 1; | 3026 | path->leave_spinning = 1; |
2928 | 3027 | ||
3028 | /* | ||
3029 | * lookup the last file extent. We're not using i_size here | ||
3030 | * because there might be preallocation past i_size | ||
3031 | */ | ||
2929 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | 3032 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, |
2930 | path, inode->i_ino, -1, 0); | 3033 | path, inode->i_ino, -1, 0); |
2931 | if (ret < 0) { | 3034 | if (ret < 0) { |
@@ -2939,18 +3042,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2939 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | 3042 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); |
2940 | found_type = btrfs_key_type(&found_key); | 3043 | found_type = btrfs_key_type(&found_key); |
2941 | 3044 | ||
2942 | /* No extents, just return */ | 3045 | /* No extents, but there might be delalloc bits */ |
2943 | if (found_key.objectid != inode->i_ino || | 3046 | if (found_key.objectid != inode->i_ino || |
2944 | found_type != BTRFS_EXTENT_DATA_KEY) { | 3047 | found_type != BTRFS_EXTENT_DATA_KEY) { |
2945 | btrfs_free_path(path); | 3048 | /* have to trust i_size as the end */ |
2946 | return 0; | 3049 | last = (u64)-1; |
3050 | last_for_get_extent = isize; | ||
3051 | } else { | ||
3052 | /* | ||
3053 | * remember the start of the last extent. There are a | ||
3054 | * bunch of different factors that go into the length of the | ||
3055 | * extent, so its much less complex to remember where it started | ||
3056 | */ | ||
3057 | last = found_key.offset; | ||
3058 | last_for_get_extent = last + 1; | ||
2947 | } | 3059 | } |
2948 | last = found_key.offset; | ||
2949 | btrfs_free_path(path); | 3060 | btrfs_free_path(path); |
2950 | 3061 | ||
3062 | /* | ||
3063 | * we might have some extents allocated but more delalloc past those | ||
3064 | * extents. so, we trust isize unless the start of the last extent is | ||
3065 | * beyond isize | ||
3066 | */ | ||
3067 | if (last < isize) { | ||
3068 | last = (u64)-1; | ||
3069 | last_for_get_extent = isize; | ||
3070 | } | ||
3071 | |||
2951 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 3072 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
2952 | &cached_state, GFP_NOFS); | 3073 | &cached_state, GFP_NOFS); |
2953 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3074 | |
3075 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
3076 | get_extent); | ||
2954 | if (!em) | 3077 | if (!em) |
2955 | goto out; | 3078 | goto out; |
2956 | if (IS_ERR(em)) { | 3079 | if (IS_ERR(em)) { |
@@ -2959,22 +3082,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2959 | } | 3082 | } |
2960 | 3083 | ||
2961 | while (!end) { | 3084 | while (!end) { |
2962 | hole = 0; | 3085 | u64 offset_in_extent; |
2963 | off = em->start + em->len; | ||
2964 | if (off >= max) | ||
2965 | end = 1; | ||
2966 | 3086 | ||
2967 | if (em->block_start == EXTENT_MAP_HOLE) { | 3087 | /* break if the extent we found is outside the range */ |
2968 | hole = 1; | 3088 | if (em->start >= max || extent_map_end(em) < off) |
2969 | goto next; | 3089 | break; |
2970 | } | ||
2971 | 3090 | ||
2972 | em_start = em->start; | 3091 | /* |
2973 | em_len = em->len; | 3092 | * get_extent may return an extent that starts before our |
3093 | * requested range. We have to make sure the ranges | ||
3094 | * we return to fiemap always move forward and don't | ||
3095 | * overlap, so adjust the offsets here | ||
3096 | */ | ||
3097 | em_start = max(em->start, off); | ||
2974 | 3098 | ||
3099 | /* | ||
3100 | * record the offset from the start of the extent | ||
3101 | * for adjusting the disk offset below | ||
3102 | */ | ||
3103 | offset_in_extent = em_start - em->start; | ||
3104 | em_end = extent_map_end(em); | ||
3105 | em_len = em_end - em_start; | ||
3106 | emflags = em->flags; | ||
2975 | disko = 0; | 3107 | disko = 0; |
2976 | flags = 0; | 3108 | flags = 0; |
2977 | 3109 | ||
3110 | /* | ||
3111 | * bump off for our next call to get_extent | ||
3112 | */ | ||
3113 | off = extent_map_end(em); | ||
3114 | if (off >= max) | ||
3115 | end = 1; | ||
3116 | |||
2978 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { | 3117 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2979 | end = 1; | 3118 | end = 1; |
2980 | flags |= FIEMAP_EXTENT_LAST; | 3119 | flags |= FIEMAP_EXTENT_LAST; |
@@ -2985,42 +3124,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2985 | flags |= (FIEMAP_EXTENT_DELALLOC | | 3124 | flags |= (FIEMAP_EXTENT_DELALLOC | |
2986 | FIEMAP_EXTENT_UNKNOWN); | 3125 | FIEMAP_EXTENT_UNKNOWN); |
2987 | } else { | 3126 | } else { |
2988 | disko = em->block_start; | 3127 | disko = em->block_start + offset_in_extent; |
2989 | } | 3128 | } |
2990 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 3129 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2991 | flags |= FIEMAP_EXTENT_ENCODED; | 3130 | flags |= FIEMAP_EXTENT_ENCODED; |
2992 | 3131 | ||
2993 | next: | ||
2994 | emflags = em->flags; | ||
2995 | free_extent_map(em); | 3132 | free_extent_map(em); |
2996 | em = NULL; | 3133 | em = NULL; |
2997 | if (!end) { | 3134 | if ((em_start >= last) || em_len == (u64)-1 || |
2998 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3135 | (last == (u64)-1 && isize <= em_end)) { |
2999 | if (!em) | ||
3000 | goto out; | ||
3001 | if (IS_ERR(em)) { | ||
3002 | ret = PTR_ERR(em); | ||
3003 | goto out; | ||
3004 | } | ||
3005 | emflags = em->flags; | ||
3006 | } | ||
3007 | |||
3008 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | ||
3009 | flags |= FIEMAP_EXTENT_LAST; | 3136 | flags |= FIEMAP_EXTENT_LAST; |
3010 | end = 1; | 3137 | end = 1; |
3011 | } | 3138 | } |
3012 | 3139 | ||
3013 | if (em_start == last) { | 3140 | /* now scan forward to see if this is really the last extent. */ |
3141 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
3142 | get_extent); | ||
3143 | if (IS_ERR(em)) { | ||
3144 | ret = PTR_ERR(em); | ||
3145 | goto out; | ||
3146 | } | ||
3147 | if (!em) { | ||
3014 | flags |= FIEMAP_EXTENT_LAST; | 3148 | flags |= FIEMAP_EXTENT_LAST; |
3015 | end = 1; | 3149 | end = 1; |
3016 | } | 3150 | } |
3017 | 3151 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | |
3018 | if (!hole) { | 3152 | em_len, flags); |
3019 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 3153 | if (ret) |
3020 | em_len, flags); | 3154 | goto out_free; |
3021 | if (ret) | ||
3022 | goto out_free; | ||
3023 | } | ||
3024 | } | 3155 | } |
3025 | out_free: | 3156 | out_free: |
3026 | free_extent_map(em); | 3157 | free_extent_map(em); |
@@ -3072,6 +3203,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3072 | #endif | 3203 | #endif |
3073 | 3204 | ||
3074 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 3205 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
3206 | if (eb == NULL) | ||
3207 | return NULL; | ||
3075 | eb->start = start; | 3208 | eb->start = start; |
3076 | eb->len = len; | 3209 | eb->len = len; |
3077 | spin_lock_init(&eb->lock); | 3210 | spin_lock_init(&eb->lock); |
@@ -3187,7 +3320,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3187 | } | 3320 | } |
3188 | if (!PageUptodate(p)) | 3321 | if (!PageUptodate(p)) |
3189 | uptodate = 0; | 3322 | uptodate = 0; |
3190 | unlock_page(p); | 3323 | |
3324 | /* | ||
3325 | * see below about how we avoid a nasty race with release page | ||
3326 | * and why we unlock later | ||
3327 | */ | ||
3328 | if (i != 0) | ||
3329 | unlock_page(p); | ||
3191 | } | 3330 | } |
3192 | if (uptodate) | 3331 | if (uptodate) |
3193 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3332 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
@@ -3211,9 +3350,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3211 | atomic_inc(&eb->refs); | 3350 | atomic_inc(&eb->refs); |
3212 | spin_unlock(&tree->buffer_lock); | 3351 | spin_unlock(&tree->buffer_lock); |
3213 | radix_tree_preload_end(); | 3352 | radix_tree_preload_end(); |
3353 | |||
3354 | /* | ||
3355 | * there is a race where release page may have | ||
3356 | * tried to find this extent buffer in the radix | ||
3357 | * but failed. It will tell the VM it is safe to | ||
3358 | * reclaim the, and it will clear the page private bit. | ||
3359 | * We must make sure to set the page private bit properly | ||
3360 | * after the extent buffer is in the radix tree so | ||
3361 | * it doesn't get lost | ||
3362 | */ | ||
3363 | set_page_extent_mapped(eb->first_page); | ||
3364 | set_page_extent_head(eb->first_page, eb->len); | ||
3365 | if (!page0) | ||
3366 | unlock_page(eb->first_page); | ||
3214 | return eb; | 3367 | return eb; |
3215 | 3368 | ||
3216 | free_eb: | 3369 | free_eb: |
3370 | if (eb->first_page && !page0) | ||
3371 | unlock_page(eb->first_page); | ||
3372 | |||
3217 | if (!atomic_dec_and_test(&eb->refs)) | 3373 | if (!atomic_dec_and_test(&eb->refs)) |
3218 | return exists; | 3374 | return exists; |
3219 | btrfs_release_extent_buffer(eb); | 3375 | btrfs_release_extent_buffer(eb); |
@@ -3264,10 +3420,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3264 | continue; | 3420 | continue; |
3265 | 3421 | ||
3266 | lock_page(page); | 3422 | lock_page(page); |
3423 | WARN_ON(!PagePrivate(page)); | ||
3424 | |||
3425 | set_page_extent_mapped(page); | ||
3267 | if (i == 0) | 3426 | if (i == 0) |
3268 | set_page_extent_head(page, eb->len); | 3427 | set_page_extent_head(page, eb->len); |
3269 | else | ||
3270 | set_page_private(page, EXTENT_PAGE_PRIVATE); | ||
3271 | 3428 | ||
3272 | clear_page_dirty_for_io(page); | 3429 | clear_page_dirty_for_io(page); |
3273 | spin_lock_irq(&page->mapping->tree_lock); | 3430 | spin_lock_irq(&page->mapping->tree_lock); |
@@ -3334,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3334 | num_pages = num_extent_pages(eb->start, eb->len); | 3491 | num_pages = num_extent_pages(eb->start, eb->len); |
3335 | 3492 | ||
3336 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3493 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3337 | GFP_NOFS); | 3494 | NULL, GFP_NOFS); |
3338 | for (i = 0; i < num_pages; i++) { | 3495 | for (i = 0; i < num_pages; i++) { |
3339 | page = extent_buffer_page(eb, i); | 3496 | page = extent_buffer_page(eb, i); |
3340 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3497 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
@@ -3457,6 +3614,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3457 | 3614 | ||
3458 | for (i = start_i; i < num_pages; i++) { | 3615 | for (i = start_i; i < num_pages; i++) { |
3459 | page = extent_buffer_page(eb, i); | 3616 | page = extent_buffer_page(eb, i); |
3617 | |||
3618 | WARN_ON(!PagePrivate(page)); | ||
3619 | |||
3620 | set_page_extent_mapped(page); | ||
3621 | if (i == 0) | ||
3622 | set_page_extent_head(page, eb->len); | ||
3623 | |||
3460 | if (inc_all_pages) | 3624 | if (inc_all_pages) |
3461 | page_cache_get(page); | 3625 | page_cache_get(page); |
3462 | if (!PageUptodate(page)) { | 3626 | if (!PageUptodate(page)) { |
@@ -3562,6 +3726,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3562 | "wanted %lu %lu\n", (unsigned long long)eb->start, | 3726 | "wanted %lu %lu\n", (unsigned long long)eb->start, |
3563 | eb->len, start, min_len); | 3727 | eb->len, start, min_len); |
3564 | WARN_ON(1); | 3728 | WARN_ON(1); |
3729 | return -EINVAL; | ||
3565 | } | 3730 | } |
3566 | 3731 | ||
3567 | p = extent_buffer_page(eb, i); | 3732 | p = extent_buffer_page(eb, i); |
@@ -3754,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
3754 | kunmap_atomic(dst_kaddr, KM_USER0); | 3919 | kunmap_atomic(dst_kaddr, KM_USER0); |
3755 | } | 3920 | } |
3756 | 3921 | ||
3922 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | ||
3923 | { | ||
3924 | unsigned long distance = (src > dst) ? src - dst : dst - src; | ||
3925 | return distance < len; | ||
3926 | } | ||
3927 | |||
3757 | static void copy_pages(struct page *dst_page, struct page *src_page, | 3928 | static void copy_pages(struct page *dst_page, struct page *src_page, |
3758 | unsigned long dst_off, unsigned long src_off, | 3929 | unsigned long dst_off, unsigned long src_off, |
3759 | unsigned long len) | 3930 | unsigned long len) |
@@ -3761,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
3761 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3932 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); |
3762 | char *src_kaddr; | 3933 | char *src_kaddr; |
3763 | 3934 | ||
3764 | if (dst_page != src_page) | 3935 | if (dst_page != src_page) { |
3765 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 3936 | src_kaddr = kmap_atomic(src_page, KM_USER1); |
3766 | else | 3937 | } else { |
3767 | src_kaddr = dst_kaddr; | 3938 | src_kaddr = dst_kaddr; |
3939 | BUG_ON(areas_overlap(src_off, dst_off, len)); | ||
3940 | } | ||
3768 | 3941 | ||
3769 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 3942 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
3770 | kunmap_atomic(dst_kaddr, KM_USER0); | 3943 | kunmap_atomic(dst_kaddr, KM_USER0); |
@@ -3839,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
3839 | "len %lu len %lu\n", dst_offset, len, dst->len); | 4012 | "len %lu len %lu\n", dst_offset, len, dst->len); |
3840 | BUG_ON(1); | 4013 | BUG_ON(1); |
3841 | } | 4014 | } |
3842 | if (dst_offset < src_offset) { | 4015 | if (!areas_overlap(src_offset, dst_offset, len)) { |
3843 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); | 4016 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); |
3844 | return; | 4017 | return; |
3845 | } | 4018 | } |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4183c8178f01..af2d7179c372 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -20,13 +20,18 @@ | |||
20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
22 | 22 | ||
23 | /* flags for bio submission */ | 23 | /* |
24 | * flags for bio submission. The high bits indicate the compression | ||
25 | * type for this bio | ||
26 | */ | ||
24 | #define EXTENT_BIO_COMPRESSED 1 | 27 | #define EXTENT_BIO_COMPRESSED 1 |
28 | #define EXTENT_BIO_FLAG_SHIFT 16 | ||
25 | 29 | ||
26 | /* these are bit numbers for test/set bit */ | 30 | /* these are bit numbers for test/set bit */ |
27 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
28 | #define EXTENT_BUFFER_BLOCKING 1 | 32 | #define EXTENT_BUFFER_BLOCKING 1 |
29 | #define EXTENT_BUFFER_DIRTY 2 | 33 | #define EXTENT_BUFFER_DIRTY 2 |
34 | #define EXTENT_BUFFER_CORRUPT 3 | ||
30 | 35 | ||
31 | /* these are flags for extent_clear_unlock_delalloc */ | 36 | /* these are flags for extent_clear_unlock_delalloc */ |
32 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 37 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
@@ -135,6 +140,17 @@ struct extent_buffer { | |||
135 | wait_queue_head_t lock_wq; | 140 | wait_queue_head_t lock_wq; |
136 | }; | 141 | }; |
137 | 142 | ||
143 | static inline void extent_set_compress_type(unsigned long *bio_flags, | ||
144 | int compress_type) | ||
145 | { | ||
146 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; | ||
147 | } | ||
148 | |||
149 | static inline int extent_compress_type(unsigned long bio_flags) | ||
150 | { | ||
151 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; | ||
152 | } | ||
153 | |||
138 | struct extent_map_tree; | 154 | struct extent_map_tree; |
139 | 155 | ||
140 | static inline struct extent_state *extent_state_next(struct extent_state *state) | 156 | static inline struct extent_state *extent_state_next(struct extent_state *state) |
@@ -176,7 +192,7 @@ void extent_io_exit(void); | |||
176 | 192 | ||
177 | u64 count_range_bits(struct extent_io_tree *tree, | 193 | u64 count_range_bits(struct extent_io_tree *tree, |
178 | u64 *start, u64 search_end, | 194 | u64 *start, u64 search_end, |
179 | u64 max_bytes, unsigned long bits); | 195 | u64 max_bytes, unsigned long bits, int contig); |
180 | 196 | ||
181 | void free_extent_state(struct extent_state *state); | 197 | void free_extent_state(struct extent_state *state); |
182 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 198 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -192,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
192 | int bits, int exclusive_bits, u64 *failed_start, | 208 | int bits, int exclusive_bits, u64 *failed_start, |
193 | struct extent_state **cached_state, gfp_t mask); | 209 | struct extent_state **cached_state, gfp_t mask); |
194 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 210 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
195 | gfp_t mask); | 211 | struct extent_state **cached_state, gfp_t mask); |
196 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 212 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
197 | gfp_t mask); | 213 | gfp_t mask); |
198 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 214 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 23cb8da3ff66..a24a3f2fa13e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
5 | #include <linux/hardirq.h> | 5 | #include <linux/hardirq.h> |
6 | #include "ctree.h" | ||
6 | #include "extent_map.h" | 7 | #include "extent_map.h" |
7 | 8 | ||
8 | 9 | ||
@@ -50,10 +51,11 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
50 | { | 51 | { |
51 | struct extent_map *em; | 52 | struct extent_map *em; |
52 | em = kmem_cache_alloc(extent_map_cache, mask); | 53 | em = kmem_cache_alloc(extent_map_cache, mask); |
53 | if (!em || IS_ERR(em)) | 54 | if (!em) |
54 | return em; | 55 | return NULL; |
55 | em->in_tree = 0; | 56 | em->in_tree = 0; |
56 | em->flags = 0; | 57 | em->flags = 0; |
58 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
57 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
58 | return em; | 60 | return em; |
59 | } | 61 | } |
@@ -241,7 +243,7 @@ out: | |||
241 | * Insert @em into @tree or perform a simple forward/backward merge with | 243 | * Insert @em into @tree or perform a simple forward/backward merge with |
242 | * existing mappings. The extent_map struct passed in will be inserted | 244 | * existing mappings. The extent_map struct passed in will be inserted |
243 | * into the tree directly, with an additional reference taken, or a | 245 | * into the tree directly, with an additional reference taken, or a |
244 | * reference dropped if the merge attempt was successfull. | 246 | * reference dropped if the merge attempt was successful. |
245 | */ | 247 | */ |
246 | int add_extent_mapping(struct extent_map_tree *tree, | 248 | int add_extent_mapping(struct extent_map_tree *tree, |
247 | struct extent_map *em) | 249 | struct extent_map *em) |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..28b44dbd1e35 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -26,7 +26,8 @@ struct extent_map { | |||
26 | unsigned long flags; | 26 | unsigned long flags; |
27 | struct block_device *bdev; | 27 | struct block_device *bdev; |
28 | atomic_t refs; | 28 | atomic_t refs; |
29 | int in_tree; | 29 | unsigned int in_tree:1; |
30 | unsigned int compress_type:4; | ||
30 | }; | 31 | }; |
31 | 32 | ||
32 | struct extent_map_tree { | 33 | struct extent_map_tree { |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a562a250ae77..a6a9d4e8b491 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
48 | struct extent_buffer *leaf; | 48 | struct extent_buffer *leaf; |
49 | 49 | ||
50 | path = btrfs_alloc_path(); | 50 | path = btrfs_alloc_path(); |
51 | BUG_ON(!path); | 51 | if (!path) |
52 | return -ENOMEM; | ||
52 | file_key.objectid = objectid; | 53 | file_key.objectid = objectid; |
53 | file_key.offset = pos; | 54 | file_key.offset = pos; |
54 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 55 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); |
@@ -169,6 +170,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
169 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 170 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
170 | 171 | ||
171 | path = btrfs_alloc_path(); | 172 | path = btrfs_alloc_path(); |
173 | if (!path) | ||
174 | return -ENOMEM; | ||
172 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) | 175 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) |
173 | path->reada = 2; | 176 | path->reada = 2; |
174 | 177 | ||
@@ -536,6 +539,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
536 | root = root->fs_info->csum_root; | 539 | root = root->fs_info->csum_root; |
537 | 540 | ||
538 | path = btrfs_alloc_path(); | 541 | path = btrfs_alloc_path(); |
542 | if (!path) | ||
543 | return -ENOMEM; | ||
539 | 544 | ||
540 | while (1) { | 545 | while (1) { |
541 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 546 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
@@ -548,7 +553,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
548 | if (path->slots[0] == 0) | 553 | if (path->slots[0] == 0) |
549 | goto out; | 554 | goto out; |
550 | path->slots[0]--; | 555 | path->slots[0]--; |
556 | } else if (ret < 0) { | ||
557 | goto out; | ||
551 | } | 558 | } |
559 | |||
552 | leaf = path->nodes[0]; | 560 | leaf = path->nodes[0]; |
553 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 561 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
554 | 562 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 66836d85763b..75899a01dded 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/falloc.h> | ||
27 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
29 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
@@ -44,14 +45,14 @@ | |||
44 | * and be replaced with calls into generic code. | 45 | * and be replaced with calls into generic code. |
45 | */ | 46 | */ |
46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
47 | int write_bytes, | 48 | size_t write_bytes, |
48 | struct page **prepared_pages, | 49 | struct page **prepared_pages, |
49 | struct iov_iter *i) | 50 | struct iov_iter *i) |
50 | { | 51 | { |
51 | size_t copied = 0; | 52 | size_t copied = 0; |
53 | size_t total_copied = 0; | ||
52 | int pg = 0; | 54 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 55 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
54 | int total_copied = 0; | ||
55 | 56 | ||
56 | while (write_bytes > 0) { | 57 | while (write_bytes > 0) { |
57 | size_t count = min_t(size_t, | 58 | size_t count = min_t(size_t, |
@@ -69,14 +70,26 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
69 | 70 | ||
70 | /* Flush processor's dcache for this page */ | 71 | /* Flush processor's dcache for this page */ |
71 | flush_dcache_page(page); | 72 | flush_dcache_page(page); |
73 | |||
74 | /* | ||
75 | * if we get a partial write, we can end up with | ||
76 | * partially up to date pages. These add | ||
77 | * a lot of complexity, so make sure they don't | ||
78 | * happen by forcing this copy to be retried. | ||
79 | * | ||
80 | * The rest of the btrfs_file_write code will fall | ||
81 | * back to page at a time copies after we return 0. | ||
82 | */ | ||
83 | if (!PageUptodate(page) && copied < count) | ||
84 | copied = 0; | ||
85 | |||
72 | iov_iter_advance(i, copied); | 86 | iov_iter_advance(i, copied); |
73 | write_bytes -= copied; | 87 | write_bytes -= copied; |
74 | total_copied += copied; | 88 | total_copied += copied; |
75 | 89 | ||
76 | /* Return to btrfs_file_aio_write to fault page */ | 90 | /* Return to btrfs_file_aio_write to fault page */ |
77 | if (unlikely(copied == 0)) { | 91 | if (unlikely(copied == 0)) |
78 | break; | 92 | break; |
79 | } | ||
80 | 93 | ||
81 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 94 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
82 | offset += copied; | 95 | offset += copied; |
@@ -91,12 +104,10 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
91 | /* | 104 | /* |
92 | * unlocks pages after btrfs_file_write is done with them | 105 | * unlocks pages after btrfs_file_write is done with them |
93 | */ | 106 | */ |
94 | static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | 107 | void btrfs_drop_pages(struct page **pages, size_t num_pages) |
95 | { | 108 | { |
96 | size_t i; | 109 | size_t i; |
97 | for (i = 0; i < num_pages; i++) { | 110 | for (i = 0; i < num_pages; i++) { |
98 | if (!pages[i]) | ||
99 | break; | ||
100 | /* page checked is some magic around finding pages that | 111 | /* page checked is some magic around finding pages that |
101 | * have been modified without going through btrfs_set_page_dirty | 112 | * have been modified without going through btrfs_set_page_dirty |
102 | * clear it here | 113 | * clear it here |
@@ -116,17 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
116 | * this also makes the decision about creating an inline extent vs | 127 | * this also makes the decision about creating an inline extent vs |
117 | * doing real data extents, marking pages dirty and delalloc as required. | 128 | * doing real data extents, marking pages dirty and delalloc as required. |
118 | */ | 129 | */ |
119 | static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | 130 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, |
120 | struct btrfs_root *root, | 131 | struct page **pages, size_t num_pages, |
121 | struct file *file, | 132 | loff_t pos, size_t write_bytes, |
122 | struct page **pages, | 133 | struct extent_state **cached) |
123 | size_t num_pages, | ||
124 | loff_t pos, | ||
125 | size_t write_bytes) | ||
126 | { | 134 | { |
127 | int err = 0; | 135 | int err = 0; |
128 | int i; | 136 | int i; |
129 | struct inode *inode = fdentry(file)->d_inode; | ||
130 | u64 num_bytes; | 137 | u64 num_bytes; |
131 | u64 start_pos; | 138 | u64 start_pos; |
132 | u64 end_of_last_block; | 139 | u64 end_of_last_block; |
@@ -139,8 +146,9 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
139 | 146 | ||
140 | end_of_last_block = start_pos + num_bytes - 1; | 147 | end_of_last_block = start_pos + num_bytes - 1; |
141 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 148 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
142 | NULL); | 149 | cached); |
143 | BUG_ON(err); | 150 | if (err) |
151 | return err; | ||
144 | 152 | ||
145 | for (i = 0; i < num_pages; i++) { | 153 | for (i = 0; i < num_pages; i++) { |
146 | struct page *p = pages[i]; | 154 | struct page *p = pages[i]; |
@@ -148,13 +156,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
148 | ClearPageChecked(p); | 156 | ClearPageChecked(p); |
149 | set_page_dirty(p); | 157 | set_page_dirty(p); |
150 | } | 158 | } |
151 | if (end_pos > isize) { | 159 | |
160 | /* | ||
161 | * we've only changed i_size in ram, and we haven't updated | ||
162 | * the disk i_size. There is no need to log the inode | ||
163 | * at this time. | ||
164 | */ | ||
165 | if (end_pos > isize) | ||
152 | i_size_write(inode, end_pos); | 166 | i_size_write(inode, end_pos); |
153 | /* we've only changed i_size in ram, and we haven't updated | ||
154 | * the disk i_size. There is no need to log the inode | ||
155 | * at this time. | ||
156 | */ | ||
157 | } | ||
158 | return 0; | 167 | return 0; |
159 | } | 168 | } |
160 | 169 | ||
@@ -185,6 +194,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
185 | split = alloc_extent_map(GFP_NOFS); | 194 | split = alloc_extent_map(GFP_NOFS); |
186 | if (!split2) | 195 | if (!split2) |
187 | split2 = alloc_extent_map(GFP_NOFS); | 196 | split2 = alloc_extent_map(GFP_NOFS); |
197 | BUG_ON(!split || !split2); | ||
188 | 198 | ||
189 | write_lock(&em_tree->lock); | 199 | write_lock(&em_tree->lock); |
190 | em = lookup_extent_mapping(em_tree, start, len); | 200 | em = lookup_extent_mapping(em_tree, start, len); |
@@ -224,6 +234,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
224 | 234 | ||
225 | split->bdev = em->bdev; | 235 | split->bdev = em->bdev; |
226 | split->flags = flags; | 236 | split->flags = flags; |
237 | split->compress_type = em->compress_type; | ||
227 | ret = add_extent_mapping(em_tree, split); | 238 | ret = add_extent_mapping(em_tree, split); |
228 | BUG_ON(ret); | 239 | BUG_ON(ret); |
229 | free_extent_map(split); | 240 | free_extent_map(split); |
@@ -238,6 +249,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
238 | split->len = em->start + em->len - (start + len); | 249 | split->len = em->start + em->len - (start + len); |
239 | split->bdev = em->bdev; | 250 | split->bdev = em->bdev; |
240 | split->flags = flags; | 251 | split->flags = flags; |
252 | split->compress_type = em->compress_type; | ||
241 | 253 | ||
242 | if (compressed) { | 254 | if (compressed) { |
243 | split->block_len = em->block_len; | 255 | split->block_len = em->block_len; |
@@ -593,6 +605,8 @@ again: | |||
593 | key.offset = split; | 605 | key.offset = split; |
594 | 606 | ||
595 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 607 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
608 | if (ret < 0) | ||
609 | goto out; | ||
596 | if (ret > 0 && path->slots[0] > 0) | 610 | if (ret > 0 && path->slots[0] > 0) |
597 | path->slots[0]--; | 611 | path->slots[0]--; |
598 | 612 | ||
@@ -759,6 +773,27 @@ out: | |||
759 | } | 773 | } |
760 | 774 | ||
761 | /* | 775 | /* |
776 | * on error we return an unlocked page and the error value | ||
777 | * on success we return a locked page and 0 | ||
778 | */ | ||
779 | static int prepare_uptodate_page(struct page *page, u64 pos) | ||
780 | { | ||
781 | int ret = 0; | ||
782 | |||
783 | if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { | ||
784 | ret = btrfs_readpage(NULL, page); | ||
785 | if (ret) | ||
786 | return ret; | ||
787 | lock_page(page); | ||
788 | if (!PageUptodate(page)) { | ||
789 | unlock_page(page); | ||
790 | return -EIO; | ||
791 | } | ||
792 | } | ||
793 | return 0; | ||
794 | } | ||
795 | |||
796 | /* | ||
762 | * this gets pages into the page cache and locks them down, it also properly | 797 | * this gets pages into the page cache and locks them down, it also properly |
763 | * waits for data=ordered extents to finish before allowing the pages to be | 798 | * waits for data=ordered extents to finish before allowing the pages to be |
764 | * modified. | 799 | * modified. |
@@ -773,6 +808,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
773 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 808 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
774 | struct inode *inode = fdentry(file)->d_inode; | 809 | struct inode *inode = fdentry(file)->d_inode; |
775 | int err = 0; | 810 | int err = 0; |
811 | int faili = 0; | ||
776 | u64 start_pos; | 812 | u64 start_pos; |
777 | u64 last_pos; | 813 | u64 last_pos; |
778 | 814 | ||
@@ -780,21 +816,33 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
780 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | 816 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; |
781 | 817 | ||
782 | if (start_pos > inode->i_size) { | 818 | if (start_pos > inode->i_size) { |
783 | err = btrfs_cont_expand(inode, start_pos); | 819 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); |
784 | if (err) | 820 | if (err) |
785 | return err; | 821 | return err; |
786 | } | 822 | } |
787 | 823 | ||
788 | memset(pages, 0, num_pages * sizeof(struct page *)); | ||
789 | again: | 824 | again: |
790 | for (i = 0; i < num_pages; i++) { | 825 | for (i = 0; i < num_pages; i++) { |
791 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 826 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
792 | if (!pages[i]) { | 827 | if (!pages[i]) { |
828 | faili = i - 1; | ||
793 | err = -ENOMEM; | 829 | err = -ENOMEM; |
794 | BUG_ON(1); | 830 | goto fail; |
831 | } | ||
832 | |||
833 | if (i == 0) | ||
834 | err = prepare_uptodate_page(pages[i], pos); | ||
835 | if (i == num_pages - 1) | ||
836 | err = prepare_uptodate_page(pages[i], | ||
837 | pos + write_bytes); | ||
838 | if (err) { | ||
839 | page_cache_release(pages[i]); | ||
840 | faili = i - 1; | ||
841 | goto fail; | ||
795 | } | 842 | } |
796 | wait_on_page_writeback(pages[i]); | 843 | wait_on_page_writeback(pages[i]); |
797 | } | 844 | } |
845 | err = 0; | ||
798 | if (start_pos < inode->i_size) { | 846 | if (start_pos < inode->i_size) { |
799 | struct btrfs_ordered_extent *ordered; | 847 | struct btrfs_ordered_extent *ordered; |
800 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 848 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
@@ -834,176 +882,103 @@ again: | |||
834 | WARN_ON(!PageLocked(pages[i])); | 882 | WARN_ON(!PageLocked(pages[i])); |
835 | } | 883 | } |
836 | return 0; | 884 | return 0; |
885 | fail: | ||
886 | while (faili >= 0) { | ||
887 | unlock_page(pages[faili]); | ||
888 | page_cache_release(pages[faili]); | ||
889 | faili--; | ||
890 | } | ||
891 | return err; | ||
892 | |||
837 | } | 893 | } |
838 | 894 | ||
839 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 895 | static noinline ssize_t __btrfs_buffered_write(struct file *file, |
840 | const struct iovec *iov, | 896 | struct iov_iter *i, |
841 | unsigned long nr_segs, loff_t pos) | 897 | loff_t pos) |
842 | { | 898 | { |
843 | struct file *file = iocb->ki_filp; | ||
844 | struct inode *inode = fdentry(file)->d_inode; | 899 | struct inode *inode = fdentry(file)->d_inode; |
845 | struct btrfs_root *root = BTRFS_I(inode)->root; | 900 | struct btrfs_root *root = BTRFS_I(inode)->root; |
846 | struct page *pinned[2]; | ||
847 | struct page **pages = NULL; | 901 | struct page **pages = NULL; |
848 | struct iov_iter i; | ||
849 | loff_t *ppos = &iocb->ki_pos; | ||
850 | loff_t start_pos; | ||
851 | ssize_t num_written = 0; | ||
852 | ssize_t err = 0; | ||
853 | size_t count; | ||
854 | size_t ocount; | ||
855 | int ret = 0; | ||
856 | int nrptrs; | ||
857 | unsigned long first_index; | 902 | unsigned long first_index; |
858 | unsigned long last_index; | 903 | unsigned long last_index; |
859 | int will_write; | 904 | size_t num_written = 0; |
860 | int buffered = 0; | 905 | int nrptrs; |
861 | int copied = 0; | 906 | int ret = 0; |
862 | int dirty_pages = 0; | ||
863 | |||
864 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | ||
865 | (file->f_flags & O_DIRECT)); | ||
866 | |||
867 | pinned[0] = NULL; | ||
868 | pinned[1] = NULL; | ||
869 | |||
870 | start_pos = pos; | ||
871 | |||
872 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
873 | |||
874 | mutex_lock(&inode->i_mutex); | ||
875 | |||
876 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
877 | if (err) | ||
878 | goto out; | ||
879 | count = ocount; | ||
880 | |||
881 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
882 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
883 | if (err) | ||
884 | goto out; | ||
885 | |||
886 | if (count == 0) | ||
887 | goto out; | ||
888 | |||
889 | err = file_remove_suid(file); | ||
890 | if (err) | ||
891 | goto out; | ||
892 | |||
893 | file_update_time(file); | ||
894 | BTRFS_I(inode)->sequence++; | ||
895 | |||
896 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
897 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
898 | pos, ppos, count, | ||
899 | ocount); | ||
900 | /* | ||
901 | * the generic O_DIRECT will update in-memory i_size after the | ||
902 | * DIOs are done. But our endio handlers that update the on | ||
903 | * disk i_size never update past the in memory i_size. So we | ||
904 | * need one more update here to catch any additions to the | ||
905 | * file | ||
906 | */ | ||
907 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
908 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
909 | mark_inode_dirty(inode); | ||
910 | } | ||
911 | |||
912 | if (num_written < 0) { | ||
913 | ret = num_written; | ||
914 | num_written = 0; | ||
915 | goto out; | ||
916 | } else if (num_written == count) { | ||
917 | /* pick up pos changes done by the generic code */ | ||
918 | pos = *ppos; | ||
919 | goto out; | ||
920 | } | ||
921 | /* | ||
922 | * We are going to do buffered for the rest of the range, so we | ||
923 | * need to make sure to invalidate the buffered pages when we're | ||
924 | * done. | ||
925 | */ | ||
926 | buffered = 1; | ||
927 | pos += num_written; | ||
928 | } | ||
929 | 907 | ||
930 | iov_iter_init(&i, iov, nr_segs, count, num_written); | 908 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
931 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
932 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 909 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
933 | (sizeof(struct page *))); | 910 | (sizeof(struct page *))); |
934 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 911 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
935 | 912 | if (!pages) | |
936 | /* generic_write_checks can change our pos */ | 913 | return -ENOMEM; |
937 | start_pos = pos; | ||
938 | 914 | ||
939 | first_index = pos >> PAGE_CACHE_SHIFT; | 915 | first_index = pos >> PAGE_CACHE_SHIFT; |
940 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; | 916 | last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; |
941 | |||
942 | /* | ||
943 | * there are lots of better ways to do this, but this code | ||
944 | * makes sure the first and last page in the file range are | ||
945 | * up to date and ready for cow | ||
946 | */ | ||
947 | if ((pos & (PAGE_CACHE_SIZE - 1))) { | ||
948 | pinned[0] = grab_cache_page(inode->i_mapping, first_index); | ||
949 | if (!PageUptodate(pinned[0])) { | ||
950 | ret = btrfs_readpage(NULL, pinned[0]); | ||
951 | BUG_ON(ret); | ||
952 | wait_on_page_locked(pinned[0]); | ||
953 | } else { | ||
954 | unlock_page(pinned[0]); | ||
955 | } | ||
956 | } | ||
957 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { | ||
958 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | ||
959 | if (!PageUptodate(pinned[1])) { | ||
960 | ret = btrfs_readpage(NULL, pinned[1]); | ||
961 | BUG_ON(ret); | ||
962 | wait_on_page_locked(pinned[1]); | ||
963 | } else { | ||
964 | unlock_page(pinned[1]); | ||
965 | } | ||
966 | } | ||
967 | 917 | ||
968 | while (iov_iter_count(&i) > 0) { | 918 | while (iov_iter_count(i) > 0) { |
969 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 919 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
970 | size_t write_bytes = min(iov_iter_count(&i), | 920 | size_t write_bytes = min(iov_iter_count(i), |
971 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 921 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
972 | offset); | 922 | offset); |
973 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 923 | size_t num_pages = (write_bytes + offset + |
974 | PAGE_CACHE_SHIFT; | 924 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
925 | size_t dirty_pages; | ||
926 | size_t copied; | ||
975 | 927 | ||
976 | WARN_ON(num_pages > nrptrs); | 928 | WARN_ON(num_pages > nrptrs); |
977 | memset(pages, 0, sizeof(struct page *) * nrptrs); | ||
978 | 929 | ||
979 | /* | 930 | /* |
980 | * Fault pages before locking them in prepare_pages | 931 | * Fault pages before locking them in prepare_pages |
981 | * to avoid recursive lock | 932 | * to avoid recursive lock |
982 | */ | 933 | */ |
983 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | 934 | if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { |
984 | ret = -EFAULT; | 935 | ret = -EFAULT; |
985 | goto out; | 936 | break; |
986 | } | 937 | } |
987 | 938 | ||
988 | ret = btrfs_delalloc_reserve_space(inode, | 939 | ret = btrfs_delalloc_reserve_space(inode, |
989 | num_pages << PAGE_CACHE_SHIFT); | 940 | num_pages << PAGE_CACHE_SHIFT); |
990 | if (ret) | 941 | if (ret) |
991 | goto out; | 942 | break; |
992 | 943 | ||
944 | /* | ||
945 | * This is going to setup the pages array with the number of | ||
946 | * pages we want, so we don't really need to worry about the | ||
947 | * contents of pages from loop to loop | ||
948 | */ | ||
993 | ret = prepare_pages(root, file, pages, num_pages, | 949 | ret = prepare_pages(root, file, pages, num_pages, |
994 | pos, first_index, last_index, | 950 | pos, first_index, last_index, |
995 | write_bytes); | 951 | write_bytes); |
996 | if (ret) { | 952 | if (ret) { |
997 | btrfs_delalloc_release_space(inode, | 953 | btrfs_delalloc_release_space(inode, |
998 | num_pages << PAGE_CACHE_SHIFT); | 954 | num_pages << PAGE_CACHE_SHIFT); |
999 | goto out; | 955 | break; |
1000 | } | 956 | } |
1001 | 957 | ||
1002 | copied = btrfs_copy_from_user(pos, num_pages, | 958 | copied = btrfs_copy_from_user(pos, num_pages, |
1003 | write_bytes, pages, &i); | 959 | write_bytes, pages, i); |
1004 | dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> | ||
1005 | PAGE_CACHE_SHIFT; | ||
1006 | 960 | ||
961 | /* | ||
962 | * if we have trouble faulting in the pages, fall | ||
963 | * back to one page at a time | ||
964 | */ | ||
965 | if (copied < write_bytes) | ||
966 | nrptrs = 1; | ||
967 | |||
968 | if (copied == 0) | ||
969 | dirty_pages = 0; | ||
970 | else | ||
971 | dirty_pages = (copied + offset + | ||
972 | PAGE_CACHE_SIZE - 1) >> | ||
973 | PAGE_CACHE_SHIFT; | ||
974 | |||
975 | /* | ||
976 | * If we had a short copy we need to release the excess delaloc | ||
977 | * bytes we reserved. We need to increment outstanding_extents | ||
978 | * because btrfs_delalloc_release_space will decrement it, but | ||
979 | * we still have an outstanding extent for the chunk we actually | ||
980 | * managed to copy. | ||
981 | */ | ||
1007 | if (num_pages > dirty_pages) { | 982 | if (num_pages > dirty_pages) { |
1008 | if (copied > 0) | 983 | if (copied > 0) |
1009 | atomic_inc( | 984 | atomic_inc( |
@@ -1014,43 +989,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1014 | } | 989 | } |
1015 | 990 | ||
1016 | if (copied > 0) { | 991 | if (copied > 0) { |
1017 | dirty_and_release_pages(NULL, root, file, pages, | 992 | ret = btrfs_dirty_pages(root, inode, pages, |
1018 | dirty_pages, pos, copied); | 993 | dirty_pages, pos, copied, |
994 | NULL); | ||
995 | if (ret) { | ||
996 | btrfs_delalloc_release_space(inode, | ||
997 | dirty_pages << PAGE_CACHE_SHIFT); | ||
998 | btrfs_drop_pages(pages, num_pages); | ||
999 | break; | ||
1000 | } | ||
1019 | } | 1001 | } |
1020 | 1002 | ||
1021 | btrfs_drop_pages(pages, num_pages); | 1003 | btrfs_drop_pages(pages, num_pages); |
1022 | 1004 | ||
1023 | if (copied > 0) { | 1005 | cond_resched(); |
1024 | if (will_write) { | 1006 | |
1025 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1007 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1026 | pos + copied - 1); | 1008 | dirty_pages); |
1027 | } else { | 1009 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1028 | balance_dirty_pages_ratelimited_nr( | 1010 | btrfs_btree_balance_dirty(root, 1); |
1029 | inode->i_mapping, | 1011 | btrfs_throttle(root); |
1030 | dirty_pages); | ||
1031 | if (dirty_pages < | ||
1032 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1033 | btrfs_btree_balance_dirty(root, 1); | ||
1034 | btrfs_throttle(root); | ||
1035 | } | ||
1036 | } | ||
1037 | 1012 | ||
1038 | pos += copied; | 1013 | pos += copied; |
1039 | num_written += copied; | 1014 | num_written += copied; |
1015 | } | ||
1040 | 1016 | ||
1041 | cond_resched(); | 1017 | kfree(pages); |
1018 | |||
1019 | return num_written ? num_written : ret; | ||
1020 | } | ||
1021 | |||
1022 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, | ||
1023 | const struct iovec *iov, | ||
1024 | unsigned long nr_segs, loff_t pos, | ||
1025 | loff_t *ppos, size_t count, size_t ocount) | ||
1026 | { | ||
1027 | struct file *file = iocb->ki_filp; | ||
1028 | struct inode *inode = fdentry(file)->d_inode; | ||
1029 | struct iov_iter i; | ||
1030 | ssize_t written; | ||
1031 | ssize_t written_buffered; | ||
1032 | loff_t endbyte; | ||
1033 | int err; | ||
1034 | |||
1035 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, | ||
1036 | count, ocount); | ||
1037 | |||
1038 | /* | ||
1039 | * the generic O_DIRECT will update in-memory i_size after the | ||
1040 | * DIOs are done. But our endio handlers that update the on | ||
1041 | * disk i_size never update past the in memory i_size. So we | ||
1042 | * need one more update here to catch any additions to the | ||
1043 | * file | ||
1044 | */ | ||
1045 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
1046 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
1047 | mark_inode_dirty(inode); | ||
1048 | } | ||
1049 | |||
1050 | if (written < 0 || written == count) | ||
1051 | return written; | ||
1052 | |||
1053 | pos += written; | ||
1054 | count -= written; | ||
1055 | iov_iter_init(&i, iov, nr_segs, count, written); | ||
1056 | written_buffered = __btrfs_buffered_write(file, &i, pos); | ||
1057 | if (written_buffered < 0) { | ||
1058 | err = written_buffered; | ||
1059 | goto out; | ||
1042 | } | 1060 | } |
1061 | endbyte = pos + written_buffered - 1; | ||
1062 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); | ||
1063 | if (err) | ||
1064 | goto out; | ||
1065 | written += written_buffered; | ||
1066 | *ppos = pos + written_buffered; | ||
1067 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, | ||
1068 | endbyte >> PAGE_CACHE_SHIFT); | ||
1043 | out: | 1069 | out: |
1044 | mutex_unlock(&inode->i_mutex); | 1070 | return written ? written : err; |
1045 | if (ret) | 1071 | } |
1046 | err = ret; | ||
1047 | 1072 | ||
1048 | kfree(pages); | 1073 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
1049 | if (pinned[0]) | 1074 | const struct iovec *iov, |
1050 | page_cache_release(pinned[0]); | 1075 | unsigned long nr_segs, loff_t pos) |
1051 | if (pinned[1]) | 1076 | { |
1052 | page_cache_release(pinned[1]); | 1077 | struct file *file = iocb->ki_filp; |
1053 | *ppos = pos; | 1078 | struct inode *inode = fdentry(file)->d_inode; |
1079 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1080 | loff_t *ppos = &iocb->ki_pos; | ||
1081 | ssize_t num_written = 0; | ||
1082 | ssize_t err = 0; | ||
1083 | size_t count, ocount; | ||
1084 | |||
1085 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
1086 | |||
1087 | mutex_lock(&inode->i_mutex); | ||
1088 | |||
1089 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
1090 | if (err) { | ||
1091 | mutex_unlock(&inode->i_mutex); | ||
1092 | goto out; | ||
1093 | } | ||
1094 | count = ocount; | ||
1095 | |||
1096 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
1097 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
1098 | if (err) { | ||
1099 | mutex_unlock(&inode->i_mutex); | ||
1100 | goto out; | ||
1101 | } | ||
1102 | |||
1103 | if (count == 0) { | ||
1104 | mutex_unlock(&inode->i_mutex); | ||
1105 | goto out; | ||
1106 | } | ||
1107 | |||
1108 | err = file_remove_suid(file); | ||
1109 | if (err) { | ||
1110 | mutex_unlock(&inode->i_mutex); | ||
1111 | goto out; | ||
1112 | } | ||
1113 | |||
1114 | /* | ||
1115 | * If BTRFS flips readonly due to some impossible error | ||
1116 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
1117 | * although we have opened a file as writable, we have | ||
1118 | * to stop this write operation to ensure FS consistency. | ||
1119 | */ | ||
1120 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
1121 | mutex_unlock(&inode->i_mutex); | ||
1122 | err = -EROFS; | ||
1123 | goto out; | ||
1124 | } | ||
1125 | |||
1126 | file_update_time(file); | ||
1127 | BTRFS_I(inode)->sequence++; | ||
1128 | |||
1129 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
1130 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | ||
1131 | pos, ppos, count, ocount); | ||
1132 | } else { | ||
1133 | struct iov_iter i; | ||
1134 | |||
1135 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
1136 | |||
1137 | num_written = __btrfs_buffered_write(file, &i, pos); | ||
1138 | if (num_written > 0) | ||
1139 | *ppos = pos + num_written; | ||
1140 | } | ||
1141 | |||
1142 | mutex_unlock(&inode->i_mutex); | ||
1054 | 1143 | ||
1055 | /* | 1144 | /* |
1056 | * we want to make sure fsync finds this change | 1145 | * we want to make sure fsync finds this change |
@@ -1065,43 +1154,12 @@ out: | |||
1065 | * one running right now. | 1154 | * one running right now. |
1066 | */ | 1155 | */ |
1067 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1156 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
1068 | 1157 | if (num_written > 0 || num_written == -EIOCBQUEUED) { | |
1069 | if (num_written > 0 && will_write) { | 1158 | err = generic_write_sync(file, pos, num_written); |
1070 | struct btrfs_trans_handle *trans; | 1159 | if (err < 0 && num_written > 0) |
1071 | |||
1072 | err = btrfs_wait_ordered_range(inode, start_pos, num_written); | ||
1073 | if (err) | ||
1074 | num_written = err; | 1160 | num_written = err; |
1075 | |||
1076 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
1077 | trans = btrfs_start_transaction(root, 0); | ||
1078 | if (IS_ERR(trans)) { | ||
1079 | num_written = PTR_ERR(trans); | ||
1080 | goto done; | ||
1081 | } | ||
1082 | mutex_lock(&inode->i_mutex); | ||
1083 | ret = btrfs_log_dentry_safe(trans, root, | ||
1084 | file->f_dentry); | ||
1085 | mutex_unlock(&inode->i_mutex); | ||
1086 | if (ret == 0) { | ||
1087 | ret = btrfs_sync_log(trans, root); | ||
1088 | if (ret == 0) | ||
1089 | btrfs_end_transaction(trans, root); | ||
1090 | else | ||
1091 | btrfs_commit_transaction(trans, root); | ||
1092 | } else if (ret != BTRFS_NO_LOG_SYNC) { | ||
1093 | btrfs_commit_transaction(trans, root); | ||
1094 | } else { | ||
1095 | btrfs_end_transaction(trans, root); | ||
1096 | } | ||
1097 | } | ||
1098 | if (file->f_flags & O_DIRECT && buffered) { | ||
1099 | invalidate_mapping_pages(inode->i_mapping, | ||
1100 | start_pos >> PAGE_CACHE_SHIFT, | ||
1101 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | ||
1102 | } | ||
1103 | } | 1161 | } |
1104 | done: | 1162 | out: |
1105 | current->backing_dev_info = NULL; | 1163 | current->backing_dev_info = NULL; |
1106 | return num_written ? num_written : err; | 1164 | return num_written ? num_written : err; |
1107 | } | 1165 | } |
@@ -1144,6 +1202,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1144 | int ret = 0; | 1202 | int ret = 0; |
1145 | struct btrfs_trans_handle *trans; | 1203 | struct btrfs_trans_handle *trans; |
1146 | 1204 | ||
1205 | trace_btrfs_sync_file(file, datasync); | ||
1147 | 1206 | ||
1148 | /* we wait first, since the writeback may change the inode */ | 1207 | /* we wait first, since the writeback may change the inode */ |
1149 | root->log_batch++; | 1208 | root->log_batch++; |
@@ -1237,6 +1296,118 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1237 | return 0; | 1296 | return 0; |
1238 | } | 1297 | } |
1239 | 1298 | ||
1299 | static long btrfs_fallocate(struct file *file, int mode, | ||
1300 | loff_t offset, loff_t len) | ||
1301 | { | ||
1302 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1303 | struct extent_state *cached_state = NULL; | ||
1304 | u64 cur_offset; | ||
1305 | u64 last_byte; | ||
1306 | u64 alloc_start; | ||
1307 | u64 alloc_end; | ||
1308 | u64 alloc_hint = 0; | ||
1309 | u64 locked_end; | ||
1310 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1311 | struct extent_map *em; | ||
1312 | int ret; | ||
1313 | |||
1314 | alloc_start = offset & ~mask; | ||
1315 | alloc_end = (offset + len + mask) & ~mask; | ||
1316 | |||
1317 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1318 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
1319 | return -EOPNOTSUPP; | ||
1320 | |||
1321 | /* | ||
1322 | * wait for ordered IO before we have any locks. We'll loop again | ||
1323 | * below with the locks held. | ||
1324 | */ | ||
1325 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
1326 | |||
1327 | mutex_lock(&inode->i_mutex); | ||
1328 | ret = inode_newsize_ok(inode, alloc_end); | ||
1329 | if (ret) | ||
1330 | goto out; | ||
1331 | |||
1332 | if (alloc_start > inode->i_size) { | ||
1333 | ret = btrfs_cont_expand(inode, i_size_read(inode), | ||
1334 | alloc_start); | ||
1335 | if (ret) | ||
1336 | goto out; | ||
1337 | } | ||
1338 | |||
1339 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
1340 | if (ret) | ||
1341 | goto out; | ||
1342 | |||
1343 | locked_end = alloc_end - 1; | ||
1344 | while (1) { | ||
1345 | struct btrfs_ordered_extent *ordered; | ||
1346 | |||
1347 | /* the extent lock is ordered inside the running | ||
1348 | * transaction | ||
1349 | */ | ||
1350 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
1351 | locked_end, 0, &cached_state, GFP_NOFS); | ||
1352 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
1353 | alloc_end - 1); | ||
1354 | if (ordered && | ||
1355 | ordered->file_offset + ordered->len > alloc_start && | ||
1356 | ordered->file_offset < alloc_end) { | ||
1357 | btrfs_put_ordered_extent(ordered); | ||
1358 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
1359 | alloc_start, locked_end, | ||
1360 | &cached_state, GFP_NOFS); | ||
1361 | /* | ||
1362 | * we can't wait on the range with the transaction | ||
1363 | * running or with the extent lock held | ||
1364 | */ | ||
1365 | btrfs_wait_ordered_range(inode, alloc_start, | ||
1366 | alloc_end - alloc_start); | ||
1367 | } else { | ||
1368 | if (ordered) | ||
1369 | btrfs_put_ordered_extent(ordered); | ||
1370 | break; | ||
1371 | } | ||
1372 | } | ||
1373 | |||
1374 | cur_offset = alloc_start; | ||
1375 | while (1) { | ||
1376 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
1377 | alloc_end - cur_offset, 0); | ||
1378 | BUG_ON(IS_ERR(em) || !em); | ||
1379 | last_byte = min(extent_map_end(em), alloc_end); | ||
1380 | last_byte = (last_byte + mask) & ~mask; | ||
1381 | if (em->block_start == EXTENT_MAP_HOLE || | ||
1382 | (cur_offset >= inode->i_size && | ||
1383 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
1384 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
1385 | last_byte - cur_offset, | ||
1386 | 1 << inode->i_blkbits, | ||
1387 | offset + len, | ||
1388 | &alloc_hint); | ||
1389 | if (ret < 0) { | ||
1390 | free_extent_map(em); | ||
1391 | break; | ||
1392 | } | ||
1393 | } | ||
1394 | free_extent_map(em); | ||
1395 | |||
1396 | cur_offset = last_byte; | ||
1397 | if (cur_offset >= alloc_end) { | ||
1398 | ret = 0; | ||
1399 | break; | ||
1400 | } | ||
1401 | } | ||
1402 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
1403 | &cached_state, GFP_NOFS); | ||
1404 | |||
1405 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
1406 | out: | ||
1407 | mutex_unlock(&inode->i_mutex); | ||
1408 | return ret; | ||
1409 | } | ||
1410 | |||
1240 | const struct file_operations btrfs_file_operations = { | 1411 | const struct file_operations btrfs_file_operations = { |
1241 | .llseek = generic_file_llseek, | 1412 | .llseek = generic_file_llseek, |
1242 | .read = do_sync_read, | 1413 | .read = do_sync_read, |
@@ -1248,6 +1419,7 @@ const struct file_operations btrfs_file_operations = { | |||
1248 | .open = generic_file_open, | 1419 | .open = generic_file_open, |
1249 | .release = btrfs_release_file, | 1420 | .release = btrfs_release_file, |
1250 | .fsync = btrfs_sync_file, | 1421 | .fsync = btrfs_sync_file, |
1422 | .fallocate = btrfs_fallocate, | ||
1251 | .unlocked_ioctl = btrfs_ioctl, | 1423 | .unlocked_ioctl = btrfs_ioctl, |
1252 | #ifdef CONFIG_COMPAT | 1424 | #ifdef CONFIG_COMPAT |
1253 | .compat_ioctl = btrfs_ioctl, | 1425 | .compat_ioctl = btrfs_ioctl, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 60d684266959..11d2e9cea09e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "free-space-cache.h" | 24 | #include "free-space-cache.h" |
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "disk-io.h" | 26 | #include "disk-io.h" |
27 | #include "extent_io.h" | ||
27 | 28 | ||
28 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 29 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
29 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 30 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
81 | return ERR_PTR(-ENOENT); | 82 | return ERR_PTR(-ENOENT); |
82 | } | 83 | } |
83 | 84 | ||
85 | inode->i_mapping->flags &= ~__GFP_FS; | ||
86 | |||
84 | spin_lock(&block_group->lock); | 87 | spin_lock(&block_group->lock); |
85 | if (!root->fs_info->closing) { | 88 | if (!root->fs_info->closing) { |
86 | block_group->inode = igrab(inode); | 89 | block_group->inode = igrab(inode); |
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
222 | u64 num_entries; | 225 | u64 num_entries; |
223 | u64 num_bitmaps; | 226 | u64 num_bitmaps; |
224 | u64 generation; | 227 | u64 generation; |
228 | u64 used = btrfs_block_group_used(&block_group->item); | ||
225 | u32 cur_crc = ~(u32)0; | 229 | u32 cur_crc = ~(u32)0; |
226 | pgoff_t index = 0; | 230 | pgoff_t index = 0; |
227 | unsigned long first_page_offset; | 231 | unsigned long first_page_offset; |
@@ -393,7 +397,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
393 | break; | 397 | break; |
394 | 398 | ||
395 | need_loop = 1; | 399 | need_loop = 1; |
396 | e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 400 | e = kmem_cache_zalloc(btrfs_free_space_cachep, |
401 | GFP_NOFS); | ||
397 | if (!e) { | 402 | if (!e) { |
398 | kunmap(page); | 403 | kunmap(page); |
399 | unlock_page(page); | 404 | unlock_page(page); |
@@ -405,7 +410,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
405 | e->bytes = le64_to_cpu(entry->bytes); | 410 | e->bytes = le64_to_cpu(entry->bytes); |
406 | if (!e->bytes) { | 411 | if (!e->bytes) { |
407 | kunmap(page); | 412 | kunmap(page); |
408 | kfree(e); | 413 | kmem_cache_free(btrfs_free_space_cachep, e); |
409 | unlock_page(page); | 414 | unlock_page(page); |
410 | page_cache_release(page); | 415 | page_cache_release(page); |
411 | goto free_cache; | 416 | goto free_cache; |
@@ -420,7 +425,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
420 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 425 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
421 | if (!e->bitmap) { | 426 | if (!e->bitmap) { |
422 | kunmap(page); | 427 | kunmap(page); |
423 | kfree(e); | 428 | kmem_cache_free( |
429 | btrfs_free_space_cachep, e); | ||
424 | unlock_page(page); | 430 | unlock_page(page); |
425 | page_cache_release(page); | 431 | page_cache_release(page); |
426 | goto free_cache; | 432 | goto free_cache; |
@@ -465,6 +471,17 @@ next: | |||
465 | index++; | 471 | index++; |
466 | } | 472 | } |
467 | 473 | ||
474 | spin_lock(&block_group->tree_lock); | ||
475 | if (block_group->free_space != (block_group->key.offset - used - | ||
476 | block_group->bytes_super)) { | ||
477 | spin_unlock(&block_group->tree_lock); | ||
478 | printk(KERN_ERR "block group %llu has an wrong amount of free " | ||
479 | "space\n", block_group->key.objectid); | ||
480 | ret = 0; | ||
481 | goto free_cache; | ||
482 | } | ||
483 | spin_unlock(&block_group->tree_lock); | ||
484 | |||
468 | ret = 1; | 485 | ret = 1; |
469 | out: | 486 | out: |
470 | kfree(checksums); | 487 | kfree(checksums); |
@@ -491,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
491 | struct inode *inode; | 508 | struct inode *inode; |
492 | struct rb_node *node; | 509 | struct rb_node *node; |
493 | struct list_head *pos, *n; | 510 | struct list_head *pos, *n; |
511 | struct page **pages; | ||
494 | struct page *page; | 512 | struct page *page; |
495 | struct extent_state *cached_state = NULL; | 513 | struct extent_state *cached_state = NULL; |
514 | struct btrfs_free_cluster *cluster = NULL; | ||
515 | struct extent_io_tree *unpin = NULL; | ||
496 | struct list_head bitmap_list; | 516 | struct list_head bitmap_list; |
497 | struct btrfs_key key; | 517 | struct btrfs_key key; |
518 | u64 start, end, len; | ||
498 | u64 bytes = 0; | 519 | u64 bytes = 0; |
499 | u32 *crc, *checksums; | 520 | u32 *crc, *checksums; |
500 | pgoff_t index = 0, last_index = 0; | ||
501 | unsigned long first_page_offset; | 521 | unsigned long first_page_offset; |
502 | int num_checksums; | 522 | int index = 0, num_pages = 0; |
503 | int entries = 0; | 523 | int entries = 0; |
504 | int bitmaps = 0; | 524 | int bitmaps = 0; |
505 | int ret = 0; | 525 | int ret = 0; |
526 | bool next_page = false; | ||
527 | bool out_of_space = false; | ||
506 | 528 | ||
507 | root = root->fs_info->tree_root; | 529 | root = root->fs_info->tree_root; |
508 | 530 | ||
@@ -530,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
530 | return 0; | 552 | return 0; |
531 | } | 553 | } |
532 | 554 | ||
533 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 555 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
556 | PAGE_CACHE_SHIFT; | ||
534 | filemap_write_and_wait(inode->i_mapping); | 557 | filemap_write_and_wait(inode->i_mapping); |
535 | btrfs_wait_ordered_range(inode, inode->i_size & | 558 | btrfs_wait_ordered_range(inode, inode->i_size & |
536 | ~(root->sectorsize - 1), (u64)-1); | 559 | ~(root->sectorsize - 1), (u64)-1); |
537 | 560 | ||
538 | /* We need a checksum per page. */ | 561 | /* We need a checksum per page. */ |
539 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | 562 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); |
540 | crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
541 | if (!crc) { | 563 | if (!crc) { |
542 | iput(inode); | 564 | iput(inode); |
543 | return 0; | 565 | return 0; |
544 | } | 566 | } |
545 | 567 | ||
568 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | ||
569 | if (!pages) { | ||
570 | kfree(crc); | ||
571 | iput(inode); | ||
572 | return 0; | ||
573 | } | ||
574 | |||
546 | /* Since the first page has all of our checksums and our generation we | 575 | /* Since the first page has all of our checksums and our generation we |
547 | * need to calculate the offset into the page that we can start writing | 576 | * need to calculate the offset into the page that we can start writing |
548 | * our entries. | 577 | * our entries. |
549 | */ | 578 | */ |
550 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | 579 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); |
580 | |||
581 | /* Get the cluster for this block_group if it exists */ | ||
582 | if (!list_empty(&block_group->cluster_list)) | ||
583 | cluster = list_entry(block_group->cluster_list.next, | ||
584 | struct btrfs_free_cluster, | ||
585 | block_group_list); | ||
586 | |||
587 | /* | ||
588 | * We shouldn't have switched the pinned extents yet so this is the | ||
589 | * right one | ||
590 | */ | ||
591 | unpin = root->fs_info->pinned_extents; | ||
551 | 592 | ||
552 | /* | 593 | /* |
553 | * Lock all pages first so we can lock the extent safely. | 594 | * Lock all pages first so we can lock the extent safely. |
@@ -557,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
557 | * after find_get_page at this point. Just putting this here so people | 598 | * after find_get_page at this point. Just putting this here so people |
558 | * know and don't freak out. | 599 | * know and don't freak out. |
559 | */ | 600 | */ |
560 | while (index <= last_index) { | 601 | while (index < num_pages) { |
561 | page = grab_cache_page(inode->i_mapping, index); | 602 | page = grab_cache_page(inode->i_mapping, index); |
562 | if (!page) { | 603 | if (!page) { |
563 | pgoff_t i = 0; | 604 | int i; |
564 | 605 | ||
565 | while (i < index) { | 606 | for (i = 0; i < num_pages; i++) { |
566 | page = find_get_page(inode->i_mapping, i); | 607 | unlock_page(pages[i]); |
567 | unlock_page(page); | 608 | page_cache_release(pages[i]); |
568 | page_cache_release(page); | ||
569 | page_cache_release(page); | ||
570 | i++; | ||
571 | } | 609 | } |
572 | goto out_free; | 610 | goto out_free; |
573 | } | 611 | } |
612 | pages[index] = page; | ||
574 | index++; | 613 | index++; |
575 | } | 614 | } |
576 | 615 | ||
@@ -578,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
578 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 617 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
579 | 0, &cached_state, GFP_NOFS); | 618 | 0, &cached_state, GFP_NOFS); |
580 | 619 | ||
620 | /* | ||
621 | * When searching for pinned extents, we need to start at our start | ||
622 | * offset. | ||
623 | */ | ||
624 | start = block_group->key.objectid; | ||
625 | |||
581 | /* Write out the extent entries */ | 626 | /* Write out the extent entries */ |
582 | do { | 627 | do { |
583 | struct btrfs_free_space_entry *entry; | 628 | struct btrfs_free_space_entry *entry; |
@@ -585,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
585 | unsigned long offset = 0; | 630 | unsigned long offset = 0; |
586 | unsigned long start_offset = 0; | 631 | unsigned long start_offset = 0; |
587 | 632 | ||
633 | next_page = false; | ||
634 | |||
588 | if (index == 0) { | 635 | if (index == 0) { |
589 | start_offset = first_page_offset; | 636 | start_offset = first_page_offset; |
590 | offset = start_offset; | 637 | offset = start_offset; |
591 | } | 638 | } |
592 | 639 | ||
593 | page = find_get_page(inode->i_mapping, index); | 640 | if (index >= num_pages) { |
641 | out_of_space = true; | ||
642 | break; | ||
643 | } | ||
644 | |||
645 | page = pages[index]; | ||
594 | 646 | ||
595 | addr = kmap(page); | 647 | addr = kmap(page); |
596 | entry = addr + start_offset; | 648 | entry = addr + start_offset; |
597 | 649 | ||
598 | memset(addr, 0, PAGE_CACHE_SIZE); | 650 | memset(addr, 0, PAGE_CACHE_SIZE); |
599 | while (1) { | 651 | while (node && !next_page) { |
600 | struct btrfs_free_space *e; | 652 | struct btrfs_free_space *e; |
601 | 653 | ||
602 | e = rb_entry(node, struct btrfs_free_space, offset_index); | 654 | e = rb_entry(node, struct btrfs_free_space, offset_index); |
@@ -612,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
612 | entry->type = BTRFS_FREE_SPACE_EXTENT; | 664 | entry->type = BTRFS_FREE_SPACE_EXTENT; |
613 | } | 665 | } |
614 | node = rb_next(node); | 666 | node = rb_next(node); |
615 | if (!node) | 667 | if (!node && cluster) { |
616 | break; | 668 | node = rb_first(&cluster->root); |
669 | cluster = NULL; | ||
670 | } | ||
617 | offset += sizeof(struct btrfs_free_space_entry); | 671 | offset += sizeof(struct btrfs_free_space_entry); |
618 | if (offset + sizeof(struct btrfs_free_space_entry) >= | 672 | if (offset + sizeof(struct btrfs_free_space_entry) >= |
619 | PAGE_CACHE_SIZE) | 673 | PAGE_CACHE_SIZE) |
674 | next_page = true; | ||
675 | entry++; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * We want to add any pinned extents to our free space cache | ||
680 | * so we don't leak the space | ||
681 | */ | ||
682 | while (!next_page && (start < block_group->key.objectid + | ||
683 | block_group->key.offset)) { | ||
684 | ret = find_first_extent_bit(unpin, start, &start, &end, | ||
685 | EXTENT_DIRTY); | ||
686 | if (ret) { | ||
687 | ret = 0; | ||
620 | break; | 688 | break; |
689 | } | ||
690 | |||
691 | /* This pinned extent is out of our range */ | ||
692 | if (start >= block_group->key.objectid + | ||
693 | block_group->key.offset) | ||
694 | break; | ||
695 | |||
696 | len = block_group->key.objectid + | ||
697 | block_group->key.offset - start; | ||
698 | len = min(len, end + 1 - start); | ||
699 | |||
700 | entries++; | ||
701 | entry->offset = cpu_to_le64(start); | ||
702 | entry->bytes = cpu_to_le64(len); | ||
703 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
704 | |||
705 | start = end + 1; | ||
706 | offset += sizeof(struct btrfs_free_space_entry); | ||
707 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
708 | PAGE_CACHE_SIZE) | ||
709 | next_page = true; | ||
621 | entry++; | 710 | entry++; |
622 | } | 711 | } |
623 | *crc = ~(u32)0; | 712 | *crc = ~(u32)0; |
@@ -630,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
630 | 719 | ||
631 | bytes += PAGE_CACHE_SIZE; | 720 | bytes += PAGE_CACHE_SIZE; |
632 | 721 | ||
633 | ClearPageChecked(page); | ||
634 | set_page_extent_mapped(page); | ||
635 | SetPageUptodate(page); | ||
636 | set_page_dirty(page); | ||
637 | |||
638 | /* | ||
639 | * We need to release our reference we got for grab_cache_page, | ||
640 | * except for the first page which will hold our checksums, we | ||
641 | * do that below. | ||
642 | */ | ||
643 | if (index != 0) { | ||
644 | unlock_page(page); | ||
645 | page_cache_release(page); | ||
646 | } | ||
647 | |||
648 | page_cache_release(page); | ||
649 | |||
650 | index++; | 722 | index++; |
651 | } while (node); | 723 | } while (node || next_page); |
652 | 724 | ||
653 | /* Write out the bitmaps */ | 725 | /* Write out the bitmaps */ |
654 | list_for_each_safe(pos, n, &bitmap_list) { | 726 | list_for_each_safe(pos, n, &bitmap_list) { |
@@ -656,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
656 | struct btrfs_free_space *entry = | 728 | struct btrfs_free_space *entry = |
657 | list_entry(pos, struct btrfs_free_space, list); | 729 | list_entry(pos, struct btrfs_free_space, list); |
658 | 730 | ||
659 | page = find_get_page(inode->i_mapping, index); | 731 | if (index >= num_pages) { |
732 | out_of_space = true; | ||
733 | break; | ||
734 | } | ||
735 | page = pages[index]; | ||
660 | 736 | ||
661 | addr = kmap(page); | 737 | addr = kmap(page); |
662 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 738 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
@@ -667,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
667 | crc++; | 743 | crc++; |
668 | bytes += PAGE_CACHE_SIZE; | 744 | bytes += PAGE_CACHE_SIZE; |
669 | 745 | ||
670 | ClearPageChecked(page); | ||
671 | set_page_extent_mapped(page); | ||
672 | SetPageUptodate(page); | ||
673 | set_page_dirty(page); | ||
674 | unlock_page(page); | ||
675 | page_cache_release(page); | ||
676 | page_cache_release(page); | ||
677 | list_del_init(&entry->list); | 746 | list_del_init(&entry->list); |
678 | index++; | 747 | index++; |
679 | } | 748 | } |
680 | 749 | ||
750 | if (out_of_space) { | ||
751 | btrfs_drop_pages(pages, num_pages); | ||
752 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
753 | i_size_read(inode) - 1, &cached_state, | ||
754 | GFP_NOFS); | ||
755 | ret = 0; | ||
756 | goto out_free; | ||
757 | } | ||
758 | |||
681 | /* Zero out the rest of the pages just to make sure */ | 759 | /* Zero out the rest of the pages just to make sure */ |
682 | while (index <= last_index) { | 760 | while (index < num_pages) { |
683 | void *addr; | 761 | void *addr; |
684 | 762 | ||
685 | page = find_get_page(inode->i_mapping, index); | 763 | page = pages[index]; |
686 | |||
687 | addr = kmap(page); | 764 | addr = kmap(page); |
688 | memset(addr, 0, PAGE_CACHE_SIZE); | 765 | memset(addr, 0, PAGE_CACHE_SIZE); |
689 | kunmap(page); | 766 | kunmap(page); |
690 | ClearPageChecked(page); | ||
691 | set_page_extent_mapped(page); | ||
692 | SetPageUptodate(page); | ||
693 | set_page_dirty(page); | ||
694 | unlock_page(page); | ||
695 | page_cache_release(page); | ||
696 | page_cache_release(page); | ||
697 | bytes += PAGE_CACHE_SIZE; | 767 | bytes += PAGE_CACHE_SIZE; |
698 | index++; | 768 | index++; |
699 | } | 769 | } |
700 | 770 | ||
701 | btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); | ||
702 | |||
703 | /* Write the checksums and trans id to the first page */ | 771 | /* Write the checksums and trans id to the first page */ |
704 | { | 772 | { |
705 | void *addr; | 773 | void *addr; |
706 | u64 *gen; | 774 | u64 *gen; |
707 | 775 | ||
708 | page = find_get_page(inode->i_mapping, 0); | 776 | page = pages[0]; |
709 | 777 | ||
710 | addr = kmap(page); | 778 | addr = kmap(page); |
711 | memcpy(addr, checksums, sizeof(u32) * num_checksums); | 779 | memcpy(addr, checksums, sizeof(u32) * num_pages); |
712 | gen = addr + (sizeof(u32) * num_checksums); | 780 | gen = addr + (sizeof(u32) * num_pages); |
713 | *gen = trans->transid; | 781 | *gen = trans->transid; |
714 | kunmap(page); | 782 | kunmap(page); |
715 | ClearPageChecked(page); | ||
716 | set_page_extent_mapped(page); | ||
717 | SetPageUptodate(page); | ||
718 | set_page_dirty(page); | ||
719 | unlock_page(page); | ||
720 | page_cache_release(page); | ||
721 | page_cache_release(page); | ||
722 | } | 783 | } |
723 | BTRFS_I(inode)->generation = trans->transid; | ||
724 | 784 | ||
785 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | ||
786 | bytes, &cached_state); | ||
787 | btrfs_drop_pages(pages, num_pages); | ||
725 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 788 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
726 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 789 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); |
727 | 790 | ||
791 | if (ret) { | ||
792 | ret = 0; | ||
793 | goto out_free; | ||
794 | } | ||
795 | |||
796 | BTRFS_I(inode)->generation = trans->transid; | ||
797 | |||
728 | filemap_write_and_wait(inode->i_mapping); | 798 | filemap_write_and_wait(inode->i_mapping); |
729 | 799 | ||
730 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 800 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
@@ -775,6 +845,7 @@ out_free: | |||
775 | BTRFS_I(inode)->generation = 0; | 845 | BTRFS_I(inode)->generation = 0; |
776 | } | 846 | } |
777 | kfree(checksums); | 847 | kfree(checksums); |
848 | kfree(pages); | ||
778 | btrfs_update_inode(trans, root, inode); | 849 | btrfs_update_inode(trans, root, inode); |
779 | iput(inode); | 850 | iput(inode); |
780 | return ret; | 851 | return ret; |
@@ -987,11 +1058,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, | |||
987 | return entry; | 1058 | return entry; |
988 | } | 1059 | } |
989 | 1060 | ||
990 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, | 1061 | static inline void |
991 | struct btrfs_free_space *info) | 1062 | __unlink_free_space(struct btrfs_block_group_cache *block_group, |
1063 | struct btrfs_free_space *info) | ||
992 | { | 1064 | { |
993 | rb_erase(&info->offset_index, &block_group->free_space_offset); | 1065 | rb_erase(&info->offset_index, &block_group->free_space_offset); |
994 | block_group->free_extents--; | 1066 | block_group->free_extents--; |
1067 | } | ||
1068 | |||
1069 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, | ||
1070 | struct btrfs_free_space *info) | ||
1071 | { | ||
1072 | __unlink_free_space(block_group, info); | ||
995 | block_group->free_space -= info->bytes; | 1073 | block_group->free_space -= info->bytes; |
996 | } | 1074 | } |
997 | 1075 | ||
@@ -1016,14 +1094,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |||
1016 | u64 max_bytes; | 1094 | u64 max_bytes; |
1017 | u64 bitmap_bytes; | 1095 | u64 bitmap_bytes; |
1018 | u64 extent_bytes; | 1096 | u64 extent_bytes; |
1097 | u64 size = block_group->key.offset; | ||
1019 | 1098 | ||
1020 | /* | 1099 | /* |
1021 | * The goal is to keep the total amount of memory used per 1gb of space | 1100 | * The goal is to keep the total amount of memory used per 1gb of space |
1022 | * at or below 32k, so we need to adjust how much memory we allow to be | 1101 | * at or below 32k, so we need to adjust how much memory we allow to be |
1023 | * used by extent based free space tracking | 1102 | * used by extent based free space tracking |
1024 | */ | 1103 | */ |
1025 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | 1104 | if (size < 1024 * 1024 * 1024) |
1026 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | 1105 | max_bytes = MAX_CACHE_BYTES_PER_GIG; |
1106 | else | ||
1107 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | ||
1108 | div64_u64(size, 1024 * 1024 * 1024); | ||
1027 | 1109 | ||
1028 | /* | 1110 | /* |
1029 | * we want to account for 1 more bitmap than what we have so we can make | 1111 | * we want to account for 1 more bitmap than what we have so we can make |
@@ -1171,6 +1253,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | |||
1171 | recalculate_thresholds(block_group); | 1253 | recalculate_thresholds(block_group); |
1172 | } | 1254 | } |
1173 | 1255 | ||
1256 | static void free_bitmap(struct btrfs_block_group_cache *block_group, | ||
1257 | struct btrfs_free_space *bitmap_info) | ||
1258 | { | ||
1259 | unlink_free_space(block_group, bitmap_info); | ||
1260 | kfree(bitmap_info->bitmap); | ||
1261 | kmem_cache_free(btrfs_free_space_cachep, bitmap_info); | ||
1262 | block_group->total_bitmaps--; | ||
1263 | recalculate_thresholds(block_group); | ||
1264 | } | ||
1265 | |||
1174 | static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, | 1266 | static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, |
1175 | struct btrfs_free_space *bitmap_info, | 1267 | struct btrfs_free_space *bitmap_info, |
1176 | u64 *offset, u64 *bytes) | 1268 | u64 *offset, u64 *bytes) |
@@ -1195,6 +1287,7 @@ again: | |||
1195 | */ | 1287 | */ |
1196 | search_start = *offset; | 1288 | search_start = *offset; |
1197 | search_bytes = *bytes; | 1289 | search_bytes = *bytes; |
1290 | search_bytes = min(search_bytes, end - search_start + 1); | ||
1198 | ret = search_bitmap(block_group, bitmap_info, &search_start, | 1291 | ret = search_bitmap(block_group, bitmap_info, &search_start, |
1199 | &search_bytes); | 1292 | &search_bytes); |
1200 | BUG_ON(ret < 0 || search_start != *offset); | 1293 | BUG_ON(ret < 0 || search_start != *offset); |
@@ -1211,13 +1304,8 @@ again: | |||
1211 | 1304 | ||
1212 | if (*bytes) { | 1305 | if (*bytes) { |
1213 | struct rb_node *next = rb_next(&bitmap_info->offset_index); | 1306 | struct rb_node *next = rb_next(&bitmap_info->offset_index); |
1214 | if (!bitmap_info->bytes) { | 1307 | if (!bitmap_info->bytes) |
1215 | unlink_free_space(block_group, bitmap_info); | 1308 | free_bitmap(block_group, bitmap_info); |
1216 | kfree(bitmap_info->bitmap); | ||
1217 | kfree(bitmap_info); | ||
1218 | block_group->total_bitmaps--; | ||
1219 | recalculate_thresholds(block_group); | ||
1220 | } | ||
1221 | 1309 | ||
1222 | /* | 1310 | /* |
1223 | * no entry after this bitmap, but we still have bytes to | 1311 | * no entry after this bitmap, but we still have bytes to |
@@ -1250,13 +1338,8 @@ again: | |||
1250 | return -EAGAIN; | 1338 | return -EAGAIN; |
1251 | 1339 | ||
1252 | goto again; | 1340 | goto again; |
1253 | } else if (!bitmap_info->bytes) { | 1341 | } else if (!bitmap_info->bytes) |
1254 | unlink_free_space(block_group, bitmap_info); | 1342 | free_bitmap(block_group, bitmap_info); |
1255 | kfree(bitmap_info->bitmap); | ||
1256 | kfree(bitmap_info); | ||
1257 | block_group->total_bitmaps--; | ||
1258 | recalculate_thresholds(block_group); | ||
1259 | } | ||
1260 | 1343 | ||
1261 | return 0; | 1344 | return 0; |
1262 | } | 1345 | } |
@@ -1273,9 +1356,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, | |||
1273 | * If we are below the extents threshold then we can add this as an | 1356 | * If we are below the extents threshold then we can add this as an |
1274 | * extent, and don't have to deal with the bitmap | 1357 | * extent, and don't have to deal with the bitmap |
1275 | */ | 1358 | */ |
1276 | if (block_group->free_extents < block_group->extents_thresh && | 1359 | if (block_group->free_extents < block_group->extents_thresh) { |
1277 | info->bytes > block_group->sectorsize * 4) | 1360 | /* |
1278 | return 0; | 1361 | * If this block group has some small extents we don't want to |
1362 | * use up all of our free slots in the cache with them, we want | ||
1363 | * to reserve them to larger extents, however if we have plent | ||
1364 | * of cache left then go ahead an dadd them, no sense in adding | ||
1365 | * the overhead of a bitmap if we don't have to. | ||
1366 | */ | ||
1367 | if (info->bytes <= block_group->sectorsize * 4) { | ||
1368 | if (block_group->free_extents * 2 <= | ||
1369 | block_group->extents_thresh) | ||
1370 | return 0; | ||
1371 | } else { | ||
1372 | return 0; | ||
1373 | } | ||
1374 | } | ||
1279 | 1375 | ||
1280 | /* | 1376 | /* |
1281 | * some block groups are so tiny they can't be enveloped by a bitmap, so | 1377 | * some block groups are so tiny they can't be enveloped by a bitmap, so |
@@ -1330,8 +1426,8 @@ new_bitmap: | |||
1330 | 1426 | ||
1331 | /* no pre-allocated info, allocate a new one */ | 1427 | /* no pre-allocated info, allocate a new one */ |
1332 | if (!info) { | 1428 | if (!info) { |
1333 | info = kzalloc(sizeof(struct btrfs_free_space), | 1429 | info = kmem_cache_zalloc(btrfs_free_space_cachep, |
1334 | GFP_NOFS); | 1430 | GFP_NOFS); |
1335 | if (!info) { | 1431 | if (!info) { |
1336 | spin_lock(&block_group->tree_lock); | 1432 | spin_lock(&block_group->tree_lock); |
1337 | ret = -ENOMEM; | 1433 | ret = -ENOMEM; |
@@ -1353,28 +1449,20 @@ out: | |||
1353 | if (info) { | 1449 | if (info) { |
1354 | if (info->bitmap) | 1450 | if (info->bitmap) |
1355 | kfree(info->bitmap); | 1451 | kfree(info->bitmap); |
1356 | kfree(info); | 1452 | kmem_cache_free(btrfs_free_space_cachep, info); |
1357 | } | 1453 | } |
1358 | 1454 | ||
1359 | return ret; | 1455 | return ret; |
1360 | } | 1456 | } |
1361 | 1457 | ||
1362 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 1458 | bool try_merge_free_space(struct btrfs_block_group_cache *block_group, |
1363 | u64 offset, u64 bytes) | 1459 | struct btrfs_free_space *info, bool update_stat) |
1364 | { | 1460 | { |
1365 | struct btrfs_free_space *right_info = NULL; | 1461 | struct btrfs_free_space *left_info; |
1366 | struct btrfs_free_space *left_info = NULL; | 1462 | struct btrfs_free_space *right_info; |
1367 | struct btrfs_free_space *info = NULL; | 1463 | bool merged = false; |
1368 | int ret = 0; | 1464 | u64 offset = info->offset; |
1369 | 1465 | u64 bytes = info->bytes; | |
1370 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | ||
1371 | if (!info) | ||
1372 | return -ENOMEM; | ||
1373 | |||
1374 | info->offset = offset; | ||
1375 | info->bytes = bytes; | ||
1376 | |||
1377 | spin_lock(&block_group->tree_lock); | ||
1378 | 1466 | ||
1379 | /* | 1467 | /* |
1380 | * first we want to see if there is free space adjacent to the range we | 1468 | * first we want to see if there is free space adjacent to the range we |
@@ -1388,40 +1476,65 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
1388 | else | 1476 | else |
1389 | left_info = tree_search_offset(block_group, offset - 1, 0, 0); | 1477 | left_info = tree_search_offset(block_group, offset - 1, 0, 0); |
1390 | 1478 | ||
1391 | /* | ||
1392 | * If there was no extent directly to the left or right of this new | ||
1393 | * extent then we know we're going to have to allocate a new extent, so | ||
1394 | * before we do that see if we need to drop this into a bitmap | ||
1395 | */ | ||
1396 | if ((!left_info || left_info->bitmap) && | ||
1397 | (!right_info || right_info->bitmap)) { | ||
1398 | ret = insert_into_bitmap(block_group, info); | ||
1399 | |||
1400 | if (ret < 0) { | ||
1401 | goto out; | ||
1402 | } else if (ret) { | ||
1403 | ret = 0; | ||
1404 | goto out; | ||
1405 | } | ||
1406 | } | ||
1407 | |||
1408 | if (right_info && !right_info->bitmap) { | 1479 | if (right_info && !right_info->bitmap) { |
1409 | unlink_free_space(block_group, right_info); | 1480 | if (update_stat) |
1481 | unlink_free_space(block_group, right_info); | ||
1482 | else | ||
1483 | __unlink_free_space(block_group, right_info); | ||
1410 | info->bytes += right_info->bytes; | 1484 | info->bytes += right_info->bytes; |
1411 | kfree(right_info); | 1485 | kmem_cache_free(btrfs_free_space_cachep, right_info); |
1486 | merged = true; | ||
1412 | } | 1487 | } |
1413 | 1488 | ||
1414 | if (left_info && !left_info->bitmap && | 1489 | if (left_info && !left_info->bitmap && |
1415 | left_info->offset + left_info->bytes == offset) { | 1490 | left_info->offset + left_info->bytes == offset) { |
1416 | unlink_free_space(block_group, left_info); | 1491 | if (update_stat) |
1492 | unlink_free_space(block_group, left_info); | ||
1493 | else | ||
1494 | __unlink_free_space(block_group, left_info); | ||
1417 | info->offset = left_info->offset; | 1495 | info->offset = left_info->offset; |
1418 | info->bytes += left_info->bytes; | 1496 | info->bytes += left_info->bytes; |
1419 | kfree(left_info); | 1497 | kmem_cache_free(btrfs_free_space_cachep, left_info); |
1498 | merged = true; | ||
1420 | } | 1499 | } |
1421 | 1500 | ||
1501 | return merged; | ||
1502 | } | ||
1503 | |||
1504 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
1505 | u64 offset, u64 bytes) | ||
1506 | { | ||
1507 | struct btrfs_free_space *info; | ||
1508 | int ret = 0; | ||
1509 | |||
1510 | info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); | ||
1511 | if (!info) | ||
1512 | return -ENOMEM; | ||
1513 | |||
1514 | info->offset = offset; | ||
1515 | info->bytes = bytes; | ||
1516 | |||
1517 | spin_lock(&block_group->tree_lock); | ||
1518 | |||
1519 | if (try_merge_free_space(block_group, info, true)) | ||
1520 | goto link; | ||
1521 | |||
1522 | /* | ||
1523 | * There was no extent directly to the left or right of this new | ||
1524 | * extent then we know we're going to have to allocate a new extent, so | ||
1525 | * before we do that see if we need to drop this into a bitmap | ||
1526 | */ | ||
1527 | ret = insert_into_bitmap(block_group, info); | ||
1528 | if (ret < 0) { | ||
1529 | goto out; | ||
1530 | } else if (ret) { | ||
1531 | ret = 0; | ||
1532 | goto out; | ||
1533 | } | ||
1534 | link: | ||
1422 | ret = link_free_space(block_group, info); | 1535 | ret = link_free_space(block_group, info); |
1423 | if (ret) | 1536 | if (ret) |
1424 | kfree(info); | 1537 | kmem_cache_free(btrfs_free_space_cachep, info); |
1425 | out: | 1538 | out: |
1426 | spin_unlock(&block_group->tree_lock); | 1539 | spin_unlock(&block_group->tree_lock); |
1427 | 1540 | ||
@@ -1491,7 +1604,7 @@ again: | |||
1491 | kfree(info->bitmap); | 1604 | kfree(info->bitmap); |
1492 | block_group->total_bitmaps--; | 1605 | block_group->total_bitmaps--; |
1493 | } | 1606 | } |
1494 | kfree(info); | 1607 | kmem_cache_free(btrfs_free_space_cachep, info); |
1495 | goto out_lock; | 1608 | goto out_lock; |
1496 | } | 1609 | } |
1497 | 1610 | ||
@@ -1527,7 +1640,7 @@ again: | |||
1527 | /* the hole we're creating ends at the end | 1640 | /* the hole we're creating ends at the end |
1528 | * of the info struct, just free the info | 1641 | * of the info struct, just free the info |
1529 | */ | 1642 | */ |
1530 | kfree(info); | 1643 | kmem_cache_free(btrfs_free_space_cachep, info); |
1531 | } | 1644 | } |
1532 | spin_unlock(&block_group->tree_lock); | 1645 | spin_unlock(&block_group->tree_lock); |
1533 | 1646 | ||
@@ -1600,29 +1713,28 @@ __btrfs_return_cluster_to_free_space( | |||
1600 | { | 1713 | { |
1601 | struct btrfs_free_space *entry; | 1714 | struct btrfs_free_space *entry; |
1602 | struct rb_node *node; | 1715 | struct rb_node *node; |
1603 | bool bitmap; | ||
1604 | 1716 | ||
1605 | spin_lock(&cluster->lock); | 1717 | spin_lock(&cluster->lock); |
1606 | if (cluster->block_group != block_group) | 1718 | if (cluster->block_group != block_group) |
1607 | goto out; | 1719 | goto out; |
1608 | 1720 | ||
1609 | bitmap = cluster->points_to_bitmap; | ||
1610 | cluster->block_group = NULL; | 1721 | cluster->block_group = NULL; |
1611 | cluster->window_start = 0; | 1722 | cluster->window_start = 0; |
1612 | list_del_init(&cluster->block_group_list); | 1723 | list_del_init(&cluster->block_group_list); |
1613 | cluster->points_to_bitmap = false; | ||
1614 | |||
1615 | if (bitmap) | ||
1616 | goto out; | ||
1617 | 1724 | ||
1618 | node = rb_first(&cluster->root); | 1725 | node = rb_first(&cluster->root); |
1619 | while (node) { | 1726 | while (node) { |
1727 | bool bitmap; | ||
1728 | |||
1620 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1729 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1621 | node = rb_next(&entry->offset_index); | 1730 | node = rb_next(&entry->offset_index); |
1622 | rb_erase(&entry->offset_index, &cluster->root); | 1731 | rb_erase(&entry->offset_index, &cluster->root); |
1623 | BUG_ON(entry->bitmap); | 1732 | |
1733 | bitmap = (entry->bitmap != NULL); | ||
1734 | if (!bitmap) | ||
1735 | try_merge_free_space(block_group, entry, false); | ||
1624 | tree_insert_offset(&block_group->free_space_offset, | 1736 | tree_insert_offset(&block_group->free_space_offset, |
1625 | entry->offset, &entry->offset_index, 0); | 1737 | entry->offset, &entry->offset_index, bitmap); |
1626 | } | 1738 | } |
1627 | cluster->root = RB_ROOT; | 1739 | cluster->root = RB_ROOT; |
1628 | 1740 | ||
@@ -1659,7 +1771,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
1659 | unlink_free_space(block_group, info); | 1771 | unlink_free_space(block_group, info); |
1660 | if (info->bitmap) | 1772 | if (info->bitmap) |
1661 | kfree(info->bitmap); | 1773 | kfree(info->bitmap); |
1662 | kfree(info); | 1774 | kmem_cache_free(btrfs_free_space_cachep, info); |
1663 | if (need_resched()) { | 1775 | if (need_resched()) { |
1664 | spin_unlock(&block_group->tree_lock); | 1776 | spin_unlock(&block_group->tree_lock); |
1665 | cond_resched(); | 1777 | cond_resched(); |
@@ -1685,19 +1797,14 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
1685 | ret = offset; | 1797 | ret = offset; |
1686 | if (entry->bitmap) { | 1798 | if (entry->bitmap) { |
1687 | bitmap_clear_bits(block_group, entry, offset, bytes); | 1799 | bitmap_clear_bits(block_group, entry, offset, bytes); |
1688 | if (!entry->bytes) { | 1800 | if (!entry->bytes) |
1689 | unlink_free_space(block_group, entry); | 1801 | free_bitmap(block_group, entry); |
1690 | kfree(entry->bitmap); | ||
1691 | kfree(entry); | ||
1692 | block_group->total_bitmaps--; | ||
1693 | recalculate_thresholds(block_group); | ||
1694 | } | ||
1695 | } else { | 1802 | } else { |
1696 | unlink_free_space(block_group, entry); | 1803 | unlink_free_space(block_group, entry); |
1697 | entry->offset += bytes; | 1804 | entry->offset += bytes; |
1698 | entry->bytes -= bytes; | 1805 | entry->bytes -= bytes; |
1699 | if (!entry->bytes) | 1806 | if (!entry->bytes) |
1700 | kfree(entry); | 1807 | kmem_cache_free(btrfs_free_space_cachep, entry); |
1701 | else | 1808 | else |
1702 | link_free_space(block_group, entry); | 1809 | link_free_space(block_group, entry); |
1703 | } | 1810 | } |
@@ -1750,48 +1857,24 @@ int btrfs_return_cluster_to_free_space( | |||
1750 | 1857 | ||
1751 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | 1858 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, |
1752 | struct btrfs_free_cluster *cluster, | 1859 | struct btrfs_free_cluster *cluster, |
1860 | struct btrfs_free_space *entry, | ||
1753 | u64 bytes, u64 min_start) | 1861 | u64 bytes, u64 min_start) |
1754 | { | 1862 | { |
1755 | struct btrfs_free_space *entry; | ||
1756 | int err; | 1863 | int err; |
1757 | u64 search_start = cluster->window_start; | 1864 | u64 search_start = cluster->window_start; |
1758 | u64 search_bytes = bytes; | 1865 | u64 search_bytes = bytes; |
1759 | u64 ret = 0; | 1866 | u64 ret = 0; |
1760 | 1867 | ||
1761 | spin_lock(&block_group->tree_lock); | ||
1762 | spin_lock(&cluster->lock); | ||
1763 | |||
1764 | if (!cluster->points_to_bitmap) | ||
1765 | goto out; | ||
1766 | |||
1767 | if (cluster->block_group != block_group) | ||
1768 | goto out; | ||
1769 | |||
1770 | /* | ||
1771 | * search_start is the beginning of the bitmap, but at some point it may | ||
1772 | * be a good idea to point to the actual start of the free area in the | ||
1773 | * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only | ||
1774 | * to 1 to make sure we get the bitmap entry | ||
1775 | */ | ||
1776 | entry = tree_search_offset(block_group, | ||
1777 | offset_to_bitmap(block_group, search_start), | ||
1778 | 1, 0); | ||
1779 | if (!entry || !entry->bitmap) | ||
1780 | goto out; | ||
1781 | |||
1782 | search_start = min_start; | 1868 | search_start = min_start; |
1783 | search_bytes = bytes; | 1869 | search_bytes = bytes; |
1784 | 1870 | ||
1785 | err = search_bitmap(block_group, entry, &search_start, | 1871 | err = search_bitmap(block_group, entry, &search_start, |
1786 | &search_bytes); | 1872 | &search_bytes); |
1787 | if (err) | 1873 | if (err) |
1788 | goto out; | 1874 | return 0; |
1789 | 1875 | ||
1790 | ret = search_start; | 1876 | ret = search_start; |
1791 | bitmap_clear_bits(block_group, entry, ret, bytes); | 1877 | bitmap_clear_bits(block_group, entry, ret, bytes); |
1792 | out: | ||
1793 | spin_unlock(&cluster->lock); | ||
1794 | spin_unlock(&block_group->tree_lock); | ||
1795 | 1878 | ||
1796 | return ret; | 1879 | return ret; |
1797 | } | 1880 | } |
@@ -1809,10 +1892,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1809 | struct rb_node *node; | 1892 | struct rb_node *node; |
1810 | u64 ret = 0; | 1893 | u64 ret = 0; |
1811 | 1894 | ||
1812 | if (cluster->points_to_bitmap) | ||
1813 | return btrfs_alloc_from_bitmap(block_group, cluster, bytes, | ||
1814 | min_start); | ||
1815 | |||
1816 | spin_lock(&cluster->lock); | 1895 | spin_lock(&cluster->lock); |
1817 | if (bytes > cluster->max_size) | 1896 | if (bytes > cluster->max_size) |
1818 | goto out; | 1897 | goto out; |
@@ -1825,9 +1904,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1825 | goto out; | 1904 | goto out; |
1826 | 1905 | ||
1827 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1906 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1828 | |||
1829 | while(1) { | 1907 | while(1) { |
1830 | if (entry->bytes < bytes || entry->offset < min_start) { | 1908 | if (entry->bytes < bytes || |
1909 | (!entry->bitmap && entry->offset < min_start)) { | ||
1831 | struct rb_node *node; | 1910 | struct rb_node *node; |
1832 | 1911 | ||
1833 | node = rb_next(&entry->offset_index); | 1912 | node = rb_next(&entry->offset_index); |
@@ -1837,20 +1916,53 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1837 | offset_index); | 1916 | offset_index); |
1838 | continue; | 1917 | continue; |
1839 | } | 1918 | } |
1840 | ret = entry->offset; | ||
1841 | 1919 | ||
1842 | entry->offset += bytes; | 1920 | if (entry->bitmap) { |
1843 | entry->bytes -= bytes; | 1921 | ret = btrfs_alloc_from_bitmap(block_group, |
1922 | cluster, entry, bytes, | ||
1923 | min_start); | ||
1924 | if (ret == 0) { | ||
1925 | struct rb_node *node; | ||
1926 | node = rb_next(&entry->offset_index); | ||
1927 | if (!node) | ||
1928 | break; | ||
1929 | entry = rb_entry(node, struct btrfs_free_space, | ||
1930 | offset_index); | ||
1931 | continue; | ||
1932 | } | ||
1933 | } else { | ||
1844 | 1934 | ||
1845 | if (entry->bytes == 0) { | 1935 | ret = entry->offset; |
1846 | rb_erase(&entry->offset_index, &cluster->root); | 1936 | |
1847 | kfree(entry); | 1937 | entry->offset += bytes; |
1938 | entry->bytes -= bytes; | ||
1848 | } | 1939 | } |
1940 | |||
1941 | if (entry->bytes == 0) | ||
1942 | rb_erase(&entry->offset_index, &cluster->root); | ||
1849 | break; | 1943 | break; |
1850 | } | 1944 | } |
1851 | out: | 1945 | out: |
1852 | spin_unlock(&cluster->lock); | 1946 | spin_unlock(&cluster->lock); |
1853 | 1947 | ||
1948 | if (!ret) | ||
1949 | return 0; | ||
1950 | |||
1951 | spin_lock(&block_group->tree_lock); | ||
1952 | |||
1953 | block_group->free_space -= bytes; | ||
1954 | if (entry->bytes == 0) { | ||
1955 | block_group->free_extents--; | ||
1956 | if (entry->bitmap) { | ||
1957 | kfree(entry->bitmap); | ||
1958 | block_group->total_bitmaps--; | ||
1959 | recalculate_thresholds(block_group); | ||
1960 | } | ||
1961 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
1962 | } | ||
1963 | |||
1964 | spin_unlock(&block_group->tree_lock); | ||
1965 | |||
1854 | return ret; | 1966 | return ret; |
1855 | } | 1967 | } |
1856 | 1968 | ||
@@ -1866,12 +1978,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
1866 | unsigned long found_bits; | 1978 | unsigned long found_bits; |
1867 | unsigned long start = 0; | 1979 | unsigned long start = 0; |
1868 | unsigned long total_found = 0; | 1980 | unsigned long total_found = 0; |
1981 | int ret; | ||
1869 | bool found = false; | 1982 | bool found = false; |
1870 | 1983 | ||
1871 | i = offset_to_bit(entry->offset, block_group->sectorsize, | 1984 | i = offset_to_bit(entry->offset, block_group->sectorsize, |
1872 | max_t(u64, offset, entry->offset)); | 1985 | max_t(u64, offset, entry->offset)); |
1873 | search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | 1986 | search_bits = bytes_to_bits(bytes, block_group->sectorsize); |
1874 | total_bits = bytes_to_bits(bytes, block_group->sectorsize); | 1987 | total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); |
1875 | 1988 | ||
1876 | again: | 1989 | again: |
1877 | found_bits = 0; | 1990 | found_bits = 0; |
@@ -1888,7 +2001,7 @@ again: | |||
1888 | } | 2001 | } |
1889 | 2002 | ||
1890 | if (!found_bits) | 2003 | if (!found_bits) |
1891 | return -1; | 2004 | return -ENOSPC; |
1892 | 2005 | ||
1893 | if (!found) { | 2006 | if (!found) { |
1894 | start = i; | 2007 | start = i; |
@@ -1912,189 +2025,208 @@ again: | |||
1912 | 2025 | ||
1913 | cluster->window_start = start * block_group->sectorsize + | 2026 | cluster->window_start = start * block_group->sectorsize + |
1914 | entry->offset; | 2027 | entry->offset; |
1915 | cluster->points_to_bitmap = true; | 2028 | rb_erase(&entry->offset_index, &block_group->free_space_offset); |
2029 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
2030 | &entry->offset_index, 1); | ||
2031 | BUG_ON(ret); | ||
1916 | 2032 | ||
1917 | return 0; | 2033 | return 0; |
1918 | } | 2034 | } |
1919 | 2035 | ||
1920 | /* | 2036 | /* |
1921 | * here we try to find a cluster of blocks in a block group. The goal | 2037 | * This searches the block group for just extents to fill the cluster with. |
1922 | * is to find at least bytes free and up to empty_size + bytes free. | ||
1923 | * We might not find them all in one contiguous area. | ||
1924 | * | ||
1925 | * returns zero and sets up cluster if things worked out, otherwise | ||
1926 | * it returns -enospc | ||
1927 | */ | 2038 | */ |
1928 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | 2039 | static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, |
1929 | struct btrfs_root *root, | 2040 | struct btrfs_free_cluster *cluster, |
1930 | struct btrfs_block_group_cache *block_group, | 2041 | u64 offset, u64 bytes, u64 min_bytes) |
1931 | struct btrfs_free_cluster *cluster, | ||
1932 | u64 offset, u64 bytes, u64 empty_size) | ||
1933 | { | 2042 | { |
2043 | struct btrfs_free_space *first = NULL; | ||
1934 | struct btrfs_free_space *entry = NULL; | 2044 | struct btrfs_free_space *entry = NULL; |
2045 | struct btrfs_free_space *prev = NULL; | ||
2046 | struct btrfs_free_space *last; | ||
1935 | struct rb_node *node; | 2047 | struct rb_node *node; |
1936 | struct btrfs_free_space *next; | ||
1937 | struct btrfs_free_space *last = NULL; | ||
1938 | u64 min_bytes; | ||
1939 | u64 window_start; | 2048 | u64 window_start; |
1940 | u64 window_free; | 2049 | u64 window_free; |
1941 | u64 max_extent = 0; | 2050 | u64 max_extent; |
1942 | bool found_bitmap = false; | 2051 | u64 max_gap = 128 * 1024; |
1943 | int ret; | ||
1944 | 2052 | ||
1945 | /* for metadata, allow allocates with more holes */ | 2053 | entry = tree_search_offset(block_group, offset, 0, 1); |
1946 | if (btrfs_test_opt(root, SSD_SPREAD)) { | 2054 | if (!entry) |
1947 | min_bytes = bytes + empty_size; | 2055 | return -ENOSPC; |
1948 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
1949 | /* | ||
1950 | * we want to do larger allocations when we are | ||
1951 | * flushing out the delayed refs, it helps prevent | ||
1952 | * making more work as we go along. | ||
1953 | */ | ||
1954 | if (trans->transaction->delayed_refs.flushing) | ||
1955 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
1956 | else | ||
1957 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
1958 | } else | ||
1959 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
1960 | |||
1961 | spin_lock(&block_group->tree_lock); | ||
1962 | spin_lock(&cluster->lock); | ||
1963 | |||
1964 | /* someone already found a cluster, hooray */ | ||
1965 | if (cluster->block_group) { | ||
1966 | ret = 0; | ||
1967 | goto out; | ||
1968 | } | ||
1969 | again: | ||
1970 | entry = tree_search_offset(block_group, offset, found_bitmap, 1); | ||
1971 | if (!entry) { | ||
1972 | ret = -ENOSPC; | ||
1973 | goto out; | ||
1974 | } | ||
1975 | 2056 | ||
1976 | /* | 2057 | /* |
1977 | * If found_bitmap is true, we exhausted our search for extent entries, | 2058 | * We don't want bitmaps, so just move along until we find a normal |
1978 | * and we just want to search all of the bitmaps that we can find, and | 2059 | * extent entry. |
1979 | * ignore any extent entries we find. | ||
1980 | */ | 2060 | */ |
1981 | while (entry->bitmap || found_bitmap || | 2061 | while (entry->bitmap) { |
1982 | (!entry->bitmap && entry->bytes < min_bytes)) { | 2062 | node = rb_next(&entry->offset_index); |
1983 | struct rb_node *node = rb_next(&entry->offset_index); | 2063 | if (!node) |
1984 | 2064 | return -ENOSPC; | |
1985 | if (entry->bitmap && entry->bytes > bytes + empty_size) { | ||
1986 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, | ||
1987 | offset, bytes + empty_size, | ||
1988 | min_bytes); | ||
1989 | if (!ret) | ||
1990 | goto got_it; | ||
1991 | } | ||
1992 | |||
1993 | if (!node) { | ||
1994 | ret = -ENOSPC; | ||
1995 | goto out; | ||
1996 | } | ||
1997 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2065 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1998 | } | 2066 | } |
1999 | 2067 | ||
2000 | /* | ||
2001 | * We already searched all the extent entries from the passed in offset | ||
2002 | * to the end and didn't find enough space for the cluster, and we also | ||
2003 | * didn't find any bitmaps that met our criteria, just go ahead and exit | ||
2004 | */ | ||
2005 | if (found_bitmap) { | ||
2006 | ret = -ENOSPC; | ||
2007 | goto out; | ||
2008 | } | ||
2009 | |||
2010 | cluster->points_to_bitmap = false; | ||
2011 | window_start = entry->offset; | 2068 | window_start = entry->offset; |
2012 | window_free = entry->bytes; | 2069 | window_free = entry->bytes; |
2013 | last = entry; | ||
2014 | max_extent = entry->bytes; | 2070 | max_extent = entry->bytes; |
2071 | first = entry; | ||
2072 | last = entry; | ||
2073 | prev = entry; | ||
2015 | 2074 | ||
2016 | while (1) { | 2075 | while (window_free <= min_bytes) { |
2017 | /* out window is just right, lets fill it */ | 2076 | node = rb_next(&entry->offset_index); |
2018 | if (window_free >= bytes + empty_size) | 2077 | if (!node) |
2019 | break; | 2078 | return -ENOSPC; |
2020 | 2079 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
2021 | node = rb_next(&last->offset_index); | ||
2022 | if (!node) { | ||
2023 | if (found_bitmap) | ||
2024 | goto again; | ||
2025 | ret = -ENOSPC; | ||
2026 | goto out; | ||
2027 | } | ||
2028 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2029 | 2080 | ||
2030 | /* | 2081 | if (entry->bitmap) |
2031 | * we found a bitmap, so if this search doesn't result in a | ||
2032 | * cluster, we know to go and search again for the bitmaps and | ||
2033 | * start looking for space there | ||
2034 | */ | ||
2035 | if (next->bitmap) { | ||
2036 | if (!found_bitmap) | ||
2037 | offset = next->offset; | ||
2038 | found_bitmap = true; | ||
2039 | last = next; | ||
2040 | continue; | 2082 | continue; |
2041 | } | ||
2042 | |||
2043 | /* | 2083 | /* |
2044 | * we haven't filled the empty size and the window is | 2084 | * we haven't filled the empty size and the window is |
2045 | * very large. reset and try again | 2085 | * very large. reset and try again |
2046 | */ | 2086 | */ |
2047 | if (next->offset - (last->offset + last->bytes) > 128 * 1024 || | 2087 | if (entry->offset - (prev->offset + prev->bytes) > max_gap || |
2048 | next->offset - window_start > (bytes + empty_size) * 2) { | 2088 | entry->offset - window_start > (min_bytes * 2)) { |
2049 | entry = next; | 2089 | first = entry; |
2050 | window_start = entry->offset; | 2090 | window_start = entry->offset; |
2051 | window_free = entry->bytes; | 2091 | window_free = entry->bytes; |
2052 | last = entry; | 2092 | last = entry; |
2053 | max_extent = entry->bytes; | 2093 | max_extent = entry->bytes; |
2054 | } else { | 2094 | } else { |
2055 | last = next; | 2095 | last = entry; |
2056 | window_free += next->bytes; | 2096 | window_free += entry->bytes; |
2057 | if (entry->bytes > max_extent) | 2097 | if (entry->bytes > max_extent) |
2058 | max_extent = entry->bytes; | 2098 | max_extent = entry->bytes; |
2059 | } | 2099 | } |
2100 | prev = entry; | ||
2060 | } | 2101 | } |
2061 | 2102 | ||
2062 | cluster->window_start = entry->offset; | 2103 | cluster->window_start = first->offset; |
2104 | |||
2105 | node = &first->offset_index; | ||
2063 | 2106 | ||
2064 | /* | 2107 | /* |
2065 | * now we've found our entries, pull them out of the free space | 2108 | * now we've found our entries, pull them out of the free space |
2066 | * cache and put them into the cluster rbtree | 2109 | * cache and put them into the cluster rbtree |
2067 | * | ||
2068 | * The cluster includes an rbtree, but only uses the offset index | ||
2069 | * of each free space cache entry. | ||
2070 | */ | 2110 | */ |
2071 | while (1) { | 2111 | do { |
2112 | int ret; | ||
2113 | |||
2114 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2072 | node = rb_next(&entry->offset_index); | 2115 | node = rb_next(&entry->offset_index); |
2073 | if (entry->bitmap && node) { | 2116 | if (entry->bitmap) |
2074 | entry = rb_entry(node, struct btrfs_free_space, | ||
2075 | offset_index); | ||
2076 | continue; | 2117 | continue; |
2077 | } else if (entry->bitmap && !node) { | ||
2078 | break; | ||
2079 | } | ||
2080 | 2118 | ||
2081 | rb_erase(&entry->offset_index, &block_group->free_space_offset); | 2119 | rb_erase(&entry->offset_index, &block_group->free_space_offset); |
2082 | ret = tree_insert_offset(&cluster->root, entry->offset, | 2120 | ret = tree_insert_offset(&cluster->root, entry->offset, |
2083 | &entry->offset_index, 0); | 2121 | &entry->offset_index, 0); |
2084 | BUG_ON(ret); | 2122 | BUG_ON(ret); |
2123 | } while (node && entry != last); | ||
2085 | 2124 | ||
2086 | if (!node || entry == last) | 2125 | cluster->max_size = max_extent; |
2087 | break; | 2126 | |
2127 | return 0; | ||
2128 | } | ||
2129 | |||
2130 | /* | ||
2131 | * This specifically looks for bitmaps that may work in the cluster, we assume | ||
2132 | * that we have already failed to find extents that will work. | ||
2133 | */ | ||
2134 | static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | ||
2135 | struct btrfs_free_cluster *cluster, | ||
2136 | u64 offset, u64 bytes, u64 min_bytes) | ||
2137 | { | ||
2138 | struct btrfs_free_space *entry; | ||
2139 | struct rb_node *node; | ||
2140 | int ret = -ENOSPC; | ||
2141 | |||
2142 | if (block_group->total_bitmaps == 0) | ||
2143 | return -ENOSPC; | ||
2144 | |||
2145 | entry = tree_search_offset(block_group, | ||
2146 | offset_to_bitmap(block_group, offset), | ||
2147 | 0, 1); | ||
2148 | if (!entry) | ||
2149 | return -ENOSPC; | ||
2088 | 2150 | ||
2151 | node = &entry->offset_index; | ||
2152 | do { | ||
2089 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2153 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2154 | node = rb_next(&entry->offset_index); | ||
2155 | if (!entry->bitmap) | ||
2156 | continue; | ||
2157 | if (entry->bytes < min_bytes) | ||
2158 | continue; | ||
2159 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | ||
2160 | bytes, min_bytes); | ||
2161 | } while (ret && node); | ||
2162 | |||
2163 | return ret; | ||
2164 | } | ||
2165 | |||
2166 | /* | ||
2167 | * here we try to find a cluster of blocks in a block group. The goal | ||
2168 | * is to find at least bytes free and up to empty_size + bytes free. | ||
2169 | * We might not find them all in one contiguous area. | ||
2170 | * | ||
2171 | * returns zero and sets up cluster if things worked out, otherwise | ||
2172 | * it returns -enospc | ||
2173 | */ | ||
2174 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
2175 | struct btrfs_root *root, | ||
2176 | struct btrfs_block_group_cache *block_group, | ||
2177 | struct btrfs_free_cluster *cluster, | ||
2178 | u64 offset, u64 bytes, u64 empty_size) | ||
2179 | { | ||
2180 | u64 min_bytes; | ||
2181 | int ret; | ||
2182 | |||
2183 | /* for metadata, allow allocates with more holes */ | ||
2184 | if (btrfs_test_opt(root, SSD_SPREAD)) { | ||
2185 | min_bytes = bytes + empty_size; | ||
2186 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
2187 | /* | ||
2188 | * we want to do larger allocations when we are | ||
2189 | * flushing out the delayed refs, it helps prevent | ||
2190 | * making more work as we go along. | ||
2191 | */ | ||
2192 | if (trans->transaction->delayed_refs.flushing) | ||
2193 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
2194 | else | ||
2195 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
2196 | } else | ||
2197 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
2198 | |||
2199 | spin_lock(&block_group->tree_lock); | ||
2200 | |||
2201 | /* | ||
2202 | * If we know we don't have enough space to make a cluster don't even | ||
2203 | * bother doing all the work to try and find one. | ||
2204 | */ | ||
2205 | if (block_group->free_space < min_bytes) { | ||
2206 | spin_unlock(&block_group->tree_lock); | ||
2207 | return -ENOSPC; | ||
2090 | } | 2208 | } |
2091 | 2209 | ||
2092 | cluster->max_size = max_extent; | 2210 | spin_lock(&cluster->lock); |
2093 | got_it: | 2211 | |
2094 | ret = 0; | 2212 | /* someone already found a cluster, hooray */ |
2095 | atomic_inc(&block_group->count); | 2213 | if (cluster->block_group) { |
2096 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | 2214 | ret = 0; |
2097 | cluster->block_group = block_group; | 2215 | goto out; |
2216 | } | ||
2217 | |||
2218 | ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, | ||
2219 | min_bytes); | ||
2220 | if (ret) | ||
2221 | ret = setup_cluster_bitmap(block_group, cluster, offset, | ||
2222 | bytes, min_bytes); | ||
2223 | |||
2224 | if (!ret) { | ||
2225 | atomic_inc(&block_group->count); | ||
2226 | list_add_tail(&cluster->block_group_list, | ||
2227 | &block_group->cluster_list); | ||
2228 | cluster->block_group = block_group; | ||
2229 | } | ||
2098 | out: | 2230 | out: |
2099 | spin_unlock(&cluster->lock); | 2231 | spin_unlock(&cluster->lock); |
2100 | spin_unlock(&block_group->tree_lock); | 2232 | spin_unlock(&block_group->tree_lock); |
@@ -2111,8 +2243,99 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
2111 | spin_lock_init(&cluster->refill_lock); | 2243 | spin_lock_init(&cluster->refill_lock); |
2112 | cluster->root = RB_ROOT; | 2244 | cluster->root = RB_ROOT; |
2113 | cluster->max_size = 0; | 2245 | cluster->max_size = 0; |
2114 | cluster->points_to_bitmap = false; | ||
2115 | INIT_LIST_HEAD(&cluster->block_group_list); | 2246 | INIT_LIST_HEAD(&cluster->block_group_list); |
2116 | cluster->block_group = NULL; | 2247 | cluster->block_group = NULL; |
2117 | } | 2248 | } |
2118 | 2249 | ||
2250 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
2251 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
2252 | { | ||
2253 | struct btrfs_free_space *entry = NULL; | ||
2254 | struct btrfs_fs_info *fs_info = block_group->fs_info; | ||
2255 | u64 bytes = 0; | ||
2256 | u64 actually_trimmed; | ||
2257 | int ret = 0; | ||
2258 | |||
2259 | *trimmed = 0; | ||
2260 | |||
2261 | while (start < end) { | ||
2262 | spin_lock(&block_group->tree_lock); | ||
2263 | |||
2264 | if (block_group->free_space < minlen) { | ||
2265 | spin_unlock(&block_group->tree_lock); | ||
2266 | break; | ||
2267 | } | ||
2268 | |||
2269 | entry = tree_search_offset(block_group, start, 0, 1); | ||
2270 | if (!entry) | ||
2271 | entry = tree_search_offset(block_group, | ||
2272 | offset_to_bitmap(block_group, | ||
2273 | start), | ||
2274 | 1, 1); | ||
2275 | |||
2276 | if (!entry || entry->offset >= end) { | ||
2277 | spin_unlock(&block_group->tree_lock); | ||
2278 | break; | ||
2279 | } | ||
2280 | |||
2281 | if (entry->bitmap) { | ||
2282 | ret = search_bitmap(block_group, entry, &start, &bytes); | ||
2283 | if (!ret) { | ||
2284 | if (start >= end) { | ||
2285 | spin_unlock(&block_group->tree_lock); | ||
2286 | break; | ||
2287 | } | ||
2288 | bytes = min(bytes, end - start); | ||
2289 | bitmap_clear_bits(block_group, entry, | ||
2290 | start, bytes); | ||
2291 | if (entry->bytes == 0) | ||
2292 | free_bitmap(block_group, entry); | ||
2293 | } else { | ||
2294 | start = entry->offset + BITS_PER_BITMAP * | ||
2295 | block_group->sectorsize; | ||
2296 | spin_unlock(&block_group->tree_lock); | ||
2297 | ret = 0; | ||
2298 | continue; | ||
2299 | } | ||
2300 | } else { | ||
2301 | start = entry->offset; | ||
2302 | bytes = min(entry->bytes, end - start); | ||
2303 | unlink_free_space(block_group, entry); | ||
2304 | kfree(entry); | ||
2305 | } | ||
2306 | |||
2307 | spin_unlock(&block_group->tree_lock); | ||
2308 | |||
2309 | if (bytes >= minlen) { | ||
2310 | int update_ret; | ||
2311 | update_ret = btrfs_update_reserved_bytes(block_group, | ||
2312 | bytes, 1, 1); | ||
2313 | |||
2314 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2315 | start, | ||
2316 | bytes, | ||
2317 | &actually_trimmed); | ||
2318 | |||
2319 | btrfs_add_free_space(block_group, | ||
2320 | start, bytes); | ||
2321 | if (!update_ret) | ||
2322 | btrfs_update_reserved_bytes(block_group, | ||
2323 | bytes, 0, 1); | ||
2324 | |||
2325 | if (ret) | ||
2326 | break; | ||
2327 | *trimmed += actually_trimmed; | ||
2328 | } | ||
2329 | start += bytes; | ||
2330 | bytes = 0; | ||
2331 | |||
2332 | if (fatal_signal_pending(current)) { | ||
2333 | ret = -ERESTARTSYS; | ||
2334 | break; | ||
2335 | } | ||
2336 | |||
2337 | cond_resched(); | ||
2338 | } | ||
2339 | |||
2340 | return ret; | ||
2341 | } | ||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index e49ca5c321b5..65c3b935289f 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
68 | int btrfs_return_cluster_to_free_space( | 68 | int btrfs_return_cluster_to_free_space( |
69 | struct btrfs_block_group_cache *block_group, | 69 | struct btrfs_block_group_cache *block_group, |
70 | struct btrfs_free_cluster *cluster); | 70 | struct btrfs_free_cluster *cluster); |
71 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
72 | u64 *trimmed, u64 start, u64 end, u64 minlen); | ||
71 | #endif | 73 | #endif |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c56eb5909172..c05a08f4c411 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -30,7 +30,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
30 | int slot; | 30 | int slot; |
31 | 31 | ||
32 | path = btrfs_alloc_path(); | 32 | path = btrfs_alloc_path(); |
33 | BUG_ON(!path); | 33 | if (!path) |
34 | return -ENOMEM; | ||
34 | 35 | ||
35 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; | 36 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; |
36 | search_key.type = -1; | 37 | search_key.type = -1; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a3798a3aa0d2..fcd66b6a8086 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include "tree-log.h" | 50 | #include "tree-log.h" |
51 | #include "compression.h" | 51 | #include "compression.h" |
52 | #include "locking.h" | 52 | #include "locking.h" |
53 | #include "free-space-cache.h" | ||
53 | 54 | ||
54 | struct btrfs_iget_args { | 55 | struct btrfs_iget_args { |
55 | u64 ino; | 56 | u64 ino; |
@@ -70,6 +71,7 @@ static struct kmem_cache *btrfs_inode_cachep; | |||
70 | struct kmem_cache *btrfs_trans_handle_cachep; | 71 | struct kmem_cache *btrfs_trans_handle_cachep; |
71 | struct kmem_cache *btrfs_transaction_cachep; | 72 | struct kmem_cache *btrfs_transaction_cachep; |
72 | struct kmem_cache *btrfs_path_cachep; | 73 | struct kmem_cache *btrfs_path_cachep; |
74 | struct kmem_cache *btrfs_free_space_cachep; | ||
73 | 75 | ||
74 | #define S_SHIFT 12 | 76 | #define S_SHIFT 12 |
75 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | 77 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { |
@@ -82,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
82 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, | 84 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, |
83 | }; | 85 | }; |
84 | 86 | ||
85 | static void btrfs_truncate(struct inode *inode); | 87 | static int btrfs_setsize(struct inode *inode, loff_t newsize); |
88 | static int btrfs_truncate(struct inode *inode); | ||
86 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | 89 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); |
87 | static noinline int cow_file_range(struct inode *inode, | 90 | static noinline int cow_file_range(struct inode *inode, |
88 | struct page *locked_page, | 91 | struct page *locked_page, |
@@ -90,13 +93,14 @@ static noinline int cow_file_range(struct inode *inode, | |||
90 | unsigned long *nr_written, int unlock); | 93 | unsigned long *nr_written, int unlock); |
91 | 94 | ||
92 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | 95 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, |
93 | struct inode *inode, struct inode *dir) | 96 | struct inode *inode, struct inode *dir, |
97 | const struct qstr *qstr) | ||
94 | { | 98 | { |
95 | int err; | 99 | int err; |
96 | 100 | ||
97 | err = btrfs_init_acl(trans, inode, dir); | 101 | err = btrfs_init_acl(trans, inode, dir); |
98 | if (!err) | 102 | if (!err) |
99 | err = btrfs_xattr_security_init(trans, inode, dir); | 103 | err = btrfs_xattr_security_init(trans, inode, dir, qstr); |
100 | return err; | 104 | return err; |
101 | } | 105 | } |
102 | 106 | ||
@@ -108,6 +112,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | |||
108 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | 112 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, |
109 | struct btrfs_root *root, struct inode *inode, | 113 | struct btrfs_root *root, struct inode *inode, |
110 | u64 start, size_t size, size_t compressed_size, | 114 | u64 start, size_t size, size_t compressed_size, |
115 | int compress_type, | ||
111 | struct page **compressed_pages) | 116 | struct page **compressed_pages) |
112 | { | 117 | { |
113 | struct btrfs_key key; | 118 | struct btrfs_key key; |
@@ -122,12 +127,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
122 | size_t cur_size = size; | 127 | size_t cur_size = size; |
123 | size_t datasize; | 128 | size_t datasize; |
124 | unsigned long offset; | 129 | unsigned long offset; |
125 | int use_compress = 0; | ||
126 | 130 | ||
127 | if (compressed_size && compressed_pages) { | 131 | if (compressed_size && compressed_pages) |
128 | use_compress = 1; | ||
129 | cur_size = compressed_size; | 132 | cur_size = compressed_size; |
130 | } | ||
131 | 133 | ||
132 | path = btrfs_alloc_path(); | 134 | path = btrfs_alloc_path(); |
133 | if (!path) | 135 | if (!path) |
@@ -159,7 +161,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | 161 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); |
160 | ptr = btrfs_file_extent_inline_start(ei); | 162 | ptr = btrfs_file_extent_inline_start(ei); |
161 | 163 | ||
162 | if (use_compress) { | 164 | if (compress_type != BTRFS_COMPRESS_NONE) { |
163 | struct page *cpage; | 165 | struct page *cpage; |
164 | int i = 0; | 166 | int i = 0; |
165 | while (compressed_size > 0) { | 167 | while (compressed_size > 0) { |
@@ -176,7 +178,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
176 | compressed_size -= cur_size; | 178 | compressed_size -= cur_size; |
177 | } | 179 | } |
178 | btrfs_set_file_extent_compression(leaf, ei, | 180 | btrfs_set_file_extent_compression(leaf, ei, |
179 | BTRFS_COMPRESS_ZLIB); | 181 | compress_type); |
180 | } else { | 182 | } else { |
181 | page = find_get_page(inode->i_mapping, | 183 | page = find_get_page(inode->i_mapping, |
182 | start >> PAGE_CACHE_SHIFT); | 184 | start >> PAGE_CACHE_SHIFT); |
@@ -217,7 +219,7 @@ fail: | |||
217 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | 219 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, |
218 | struct btrfs_root *root, | 220 | struct btrfs_root *root, |
219 | struct inode *inode, u64 start, u64 end, | 221 | struct inode *inode, u64 start, u64 end, |
220 | size_t compressed_size, | 222 | size_t compressed_size, int compress_type, |
221 | struct page **compressed_pages) | 223 | struct page **compressed_pages) |
222 | { | 224 | { |
223 | u64 isize = i_size_read(inode); | 225 | u64 isize = i_size_read(inode); |
@@ -250,7 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
250 | inline_len = min_t(u64, isize, actual_end); | 252 | inline_len = min_t(u64, isize, actual_end); |
251 | ret = insert_inline_extent(trans, root, inode, start, | 253 | ret = insert_inline_extent(trans, root, inode, start, |
252 | inline_len, compressed_size, | 254 | inline_len, compressed_size, |
253 | compressed_pages); | 255 | compress_type, compressed_pages); |
254 | BUG_ON(ret); | 256 | BUG_ON(ret); |
255 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | 257 | btrfs_delalloc_release_metadata(inode, end + 1 - start); |
256 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 258 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
@@ -263,6 +265,7 @@ struct async_extent { | |||
263 | u64 compressed_size; | 265 | u64 compressed_size; |
264 | struct page **pages; | 266 | struct page **pages; |
265 | unsigned long nr_pages; | 267 | unsigned long nr_pages; |
268 | int compress_type; | ||
266 | struct list_head list; | 269 | struct list_head list; |
267 | }; | 270 | }; |
268 | 271 | ||
@@ -280,16 +283,19 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
280 | u64 start, u64 ram_size, | 283 | u64 start, u64 ram_size, |
281 | u64 compressed_size, | 284 | u64 compressed_size, |
282 | struct page **pages, | 285 | struct page **pages, |
283 | unsigned long nr_pages) | 286 | unsigned long nr_pages, |
287 | int compress_type) | ||
284 | { | 288 | { |
285 | struct async_extent *async_extent; | 289 | struct async_extent *async_extent; |
286 | 290 | ||
287 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); | 291 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); |
292 | BUG_ON(!async_extent); | ||
288 | async_extent->start = start; | 293 | async_extent->start = start; |
289 | async_extent->ram_size = ram_size; | 294 | async_extent->ram_size = ram_size; |
290 | async_extent->compressed_size = compressed_size; | 295 | async_extent->compressed_size = compressed_size; |
291 | async_extent->pages = pages; | 296 | async_extent->pages = pages; |
292 | async_extent->nr_pages = nr_pages; | 297 | async_extent->nr_pages = nr_pages; |
298 | async_extent->compress_type = compress_type; | ||
293 | list_add_tail(&async_extent->list, &cow->extents); | 299 | list_add_tail(&async_extent->list, &cow->extents); |
294 | return 0; | 300 | return 0; |
295 | } | 301 | } |
@@ -332,6 +338,7 @@ static noinline int compress_file_range(struct inode *inode, | |||
332 | unsigned long max_uncompressed = 128 * 1024; | 338 | unsigned long max_uncompressed = 128 * 1024; |
333 | int i; | 339 | int i; |
334 | int will_compress; | 340 | int will_compress; |
341 | int compress_type = root->fs_info->compress_type; | ||
335 | 342 | ||
336 | actual_end = min_t(u64, isize, end + 1); | 343 | actual_end = min_t(u64, isize, end + 1); |
337 | again: | 344 | again: |
@@ -377,16 +384,22 @@ again: | |||
377 | */ | 384 | */ |
378 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && | 385 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && |
379 | (btrfs_test_opt(root, COMPRESS) || | 386 | (btrfs_test_opt(root, COMPRESS) || |
380 | (BTRFS_I(inode)->force_compress))) { | 387 | (BTRFS_I(inode)->force_compress) || |
388 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | ||
381 | WARN_ON(pages); | 389 | WARN_ON(pages); |
382 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 390 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
391 | BUG_ON(!pages); | ||
392 | |||
393 | if (BTRFS_I(inode)->force_compress) | ||
394 | compress_type = BTRFS_I(inode)->force_compress; | ||
383 | 395 | ||
384 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | 396 | ret = btrfs_compress_pages(compress_type, |
385 | total_compressed, pages, | 397 | inode->i_mapping, start, |
386 | nr_pages, &nr_pages_ret, | 398 | total_compressed, pages, |
387 | &total_in, | 399 | nr_pages, &nr_pages_ret, |
388 | &total_compressed, | 400 | &total_in, |
389 | max_compressed); | 401 | &total_compressed, |
402 | max_compressed); | ||
390 | 403 | ||
391 | if (!ret) { | 404 | if (!ret) { |
392 | unsigned long offset = total_compressed & | 405 | unsigned long offset = total_compressed & |
@@ -408,7 +421,7 @@ again: | |||
408 | } | 421 | } |
409 | if (start == 0) { | 422 | if (start == 0) { |
410 | trans = btrfs_join_transaction(root, 1); | 423 | trans = btrfs_join_transaction(root, 1); |
411 | BUG_ON(!trans); | 424 | BUG_ON(IS_ERR(trans)); |
412 | btrfs_set_trans_block_group(trans, inode); | 425 | btrfs_set_trans_block_group(trans, inode); |
413 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 426 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
414 | 427 | ||
@@ -418,12 +431,13 @@ again: | |||
418 | * to make an uncompressed inline extent. | 431 | * to make an uncompressed inline extent. |
419 | */ | 432 | */ |
420 | ret = cow_file_range_inline(trans, root, inode, | 433 | ret = cow_file_range_inline(trans, root, inode, |
421 | start, end, 0, NULL); | 434 | start, end, 0, 0, NULL); |
422 | } else { | 435 | } else { |
423 | /* try making a compressed inline extent */ | 436 | /* try making a compressed inline extent */ |
424 | ret = cow_file_range_inline(trans, root, inode, | 437 | ret = cow_file_range_inline(trans, root, inode, |
425 | start, end, | 438 | start, end, |
426 | total_compressed, pages); | 439 | total_compressed, |
440 | compress_type, pages); | ||
427 | } | 441 | } |
428 | if (ret == 0) { | 442 | if (ret == 0) { |
429 | /* | 443 | /* |
@@ -493,7 +507,8 @@ again: | |||
493 | * and will submit them to the elevator. | 507 | * and will submit them to the elevator. |
494 | */ | 508 | */ |
495 | add_async_extent(async_cow, start, num_bytes, | 509 | add_async_extent(async_cow, start, num_bytes, |
496 | total_compressed, pages, nr_pages_ret); | 510 | total_compressed, pages, nr_pages_ret, |
511 | compress_type); | ||
497 | 512 | ||
498 | if (start + num_bytes < end) { | 513 | if (start + num_bytes < end) { |
499 | start += num_bytes; | 514 | start += num_bytes; |
@@ -515,7 +530,8 @@ cleanup_and_bail_uncompressed: | |||
515 | __set_page_dirty_nobuffers(locked_page); | 530 | __set_page_dirty_nobuffers(locked_page); |
516 | /* unlocked later on in the async handlers */ | 531 | /* unlocked later on in the async handlers */ |
517 | } | 532 | } |
518 | add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); | 533 | add_async_extent(async_cow, start, end - start + 1, |
534 | 0, NULL, 0, BTRFS_COMPRESS_NONE); | ||
519 | *num_added += 1; | 535 | *num_added += 1; |
520 | } | 536 | } |
521 | 537 | ||
@@ -602,6 +618,7 @@ retry: | |||
602 | GFP_NOFS); | 618 | GFP_NOFS); |
603 | 619 | ||
604 | trans = btrfs_join_transaction(root, 1); | 620 | trans = btrfs_join_transaction(root, 1); |
621 | BUG_ON(IS_ERR(trans)); | ||
605 | ret = btrfs_reserve_extent(trans, root, | 622 | ret = btrfs_reserve_extent(trans, root, |
606 | async_extent->compressed_size, | 623 | async_extent->compressed_size, |
607 | async_extent->compressed_size, | 624 | async_extent->compressed_size, |
@@ -633,6 +650,7 @@ retry: | |||
633 | async_extent->ram_size - 1, 0); | 650 | async_extent->ram_size - 1, 0); |
634 | 651 | ||
635 | em = alloc_extent_map(GFP_NOFS); | 652 | em = alloc_extent_map(GFP_NOFS); |
653 | BUG_ON(!em); | ||
636 | em->start = async_extent->start; | 654 | em->start = async_extent->start; |
637 | em->len = async_extent->ram_size; | 655 | em->len = async_extent->ram_size; |
638 | em->orig_start = em->start; | 656 | em->orig_start = em->start; |
@@ -640,6 +658,7 @@ retry: | |||
640 | em->block_start = ins.objectid; | 658 | em->block_start = ins.objectid; |
641 | em->block_len = ins.offset; | 659 | em->block_len = ins.offset; |
642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 660 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
661 | em->compress_type = async_extent->compress_type; | ||
643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 662 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
644 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 663 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
645 | 664 | ||
@@ -656,11 +675,13 @@ retry: | |||
656 | async_extent->ram_size - 1, 0); | 675 | async_extent->ram_size - 1, 0); |
657 | } | 676 | } |
658 | 677 | ||
659 | ret = btrfs_add_ordered_extent(inode, async_extent->start, | 678 | ret = btrfs_add_ordered_extent_compress(inode, |
660 | ins.objectid, | 679 | async_extent->start, |
661 | async_extent->ram_size, | 680 | ins.objectid, |
662 | ins.offset, | 681 | async_extent->ram_size, |
663 | BTRFS_ORDERED_COMPRESSED); | 682 | ins.offset, |
683 | BTRFS_ORDERED_COMPRESSED, | ||
684 | async_extent->compress_type); | ||
664 | BUG_ON(ret); | 685 | BUG_ON(ret); |
665 | 686 | ||
666 | /* | 687 | /* |
@@ -758,7 +779,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
758 | 779 | ||
759 | BUG_ON(root == root->fs_info->tree_root); | 780 | BUG_ON(root == root->fs_info->tree_root); |
760 | trans = btrfs_join_transaction(root, 1); | 781 | trans = btrfs_join_transaction(root, 1); |
761 | BUG_ON(!trans); | 782 | BUG_ON(IS_ERR(trans)); |
762 | btrfs_set_trans_block_group(trans, inode); | 783 | btrfs_set_trans_block_group(trans, inode); |
763 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 784 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
764 | 785 | ||
@@ -770,7 +791,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
770 | if (start == 0) { | 791 | if (start == 0) { |
771 | /* lets try to make an inline extent */ | 792 | /* lets try to make an inline extent */ |
772 | ret = cow_file_range_inline(trans, root, inode, | 793 | ret = cow_file_range_inline(trans, root, inode, |
773 | start, end, 0, NULL); | 794 | start, end, 0, 0, NULL); |
774 | if (ret == 0) { | 795 | if (ret == 0) { |
775 | extent_clear_unlock_delalloc(inode, | 796 | extent_clear_unlock_delalloc(inode, |
776 | &BTRFS_I(inode)->io_tree, | 797 | &BTRFS_I(inode)->io_tree, |
@@ -806,6 +827,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
806 | BUG_ON(ret); | 827 | BUG_ON(ret); |
807 | 828 | ||
808 | em = alloc_extent_map(GFP_NOFS); | 829 | em = alloc_extent_map(GFP_NOFS); |
830 | BUG_ON(!em); | ||
809 | em->start = start; | 831 | em->start = start; |
810 | em->orig_start = em->start; | 832 | em->orig_start = em->start; |
811 | ram_size = ins.offset; | 833 | ram_size = ins.offset; |
@@ -1036,7 +1058,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1036 | } else { | 1058 | } else { |
1037 | trans = btrfs_join_transaction(root, 1); | 1059 | trans = btrfs_join_transaction(root, 1); |
1038 | } | 1060 | } |
1039 | BUG_ON(!trans); | 1061 | BUG_ON(IS_ERR(trans)); |
1040 | 1062 | ||
1041 | cow_start = (u64)-1; | 1063 | cow_start = (u64)-1; |
1042 | cur_offset = start; | 1064 | cur_offset = start; |
@@ -1155,6 +1177,7 @@ out_check: | |||
1155 | struct extent_map_tree *em_tree; | 1177 | struct extent_map_tree *em_tree; |
1156 | em_tree = &BTRFS_I(inode)->extent_tree; | 1178 | em_tree = &BTRFS_I(inode)->extent_tree; |
1157 | em = alloc_extent_map(GFP_NOFS); | 1179 | em = alloc_extent_map(GFP_NOFS); |
1180 | BUG_ON(!em); | ||
1158 | em->start = cur_offset; | 1181 | em->start = cur_offset; |
1159 | em->orig_start = em->start; | 1182 | em->orig_start = em->start; |
1160 | em->len = num_bytes; | 1183 | em->len = num_bytes; |
@@ -1236,7 +1259,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1236 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1259 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1237 | page_started, 0, nr_written); | 1260 | page_started, 0, nr_written); |
1238 | else if (!btrfs_test_opt(root, COMPRESS) && | 1261 | else if (!btrfs_test_opt(root, COMPRESS) && |
1239 | !(BTRFS_I(inode)->force_compress)) | 1262 | !(BTRFS_I(inode)->force_compress) && |
1263 | !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) | ||
1240 | ret = cow_file_range(inode, locked_page, start, end, | 1264 | ret = cow_file_range(inode, locked_page, start, end, |
1241 | page_started, nr_written, 1); | 1265 | page_started, nr_written, 1); |
1242 | else | 1266 | else |
@@ -1443,8 +1467,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1443 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1467 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
1444 | return btrfs_submit_compressed_read(inode, bio, | 1468 | return btrfs_submit_compressed_read(inode, bio, |
1445 | mirror_num, bio_flags); | 1469 | mirror_num, bio_flags); |
1446 | } else if (!skip_sum) | 1470 | } else if (!skip_sum) { |
1447 | btrfs_lookup_bio_sums(root, inode, bio, NULL); | 1471 | ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); |
1472 | if (ret) | ||
1473 | return ret; | ||
1474 | } | ||
1448 | goto mapit; | 1475 | goto mapit; |
1449 | } else if (!skip_sum) { | 1476 | } else if (!skip_sum) { |
1450 | /* csum items have already been cloned */ | 1477 | /* csum items have already been cloned */ |
@@ -1544,6 +1571,7 @@ out: | |||
1544 | out_page: | 1571 | out_page: |
1545 | unlock_page(page); | 1572 | unlock_page(page); |
1546 | page_cache_release(page); | 1573 | page_cache_release(page); |
1574 | kfree(fixup); | ||
1547 | } | 1575 | } |
1548 | 1576 | ||
1549 | /* | 1577 | /* |
@@ -1670,7 +1698,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1670 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1698 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1671 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1699 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1672 | struct extent_state *cached_state = NULL; | 1700 | struct extent_state *cached_state = NULL; |
1673 | int compressed = 0; | 1701 | int compress_type = 0; |
1674 | int ret; | 1702 | int ret; |
1675 | bool nolock = false; | 1703 | bool nolock = false; |
1676 | 1704 | ||
@@ -1690,7 +1718,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1690 | trans = btrfs_join_transaction_nolock(root, 1); | 1718 | trans = btrfs_join_transaction_nolock(root, 1); |
1691 | else | 1719 | else |
1692 | trans = btrfs_join_transaction(root, 1); | 1720 | trans = btrfs_join_transaction(root, 1); |
1693 | BUG_ON(!trans); | 1721 | BUG_ON(IS_ERR(trans)); |
1694 | btrfs_set_trans_block_group(trans, inode); | 1722 | btrfs_set_trans_block_group(trans, inode); |
1695 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1723 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1696 | ret = btrfs_update_inode(trans, root, inode); | 1724 | ret = btrfs_update_inode(trans, root, inode); |
@@ -1707,13 +1735,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1707 | trans = btrfs_join_transaction_nolock(root, 1); | 1735 | trans = btrfs_join_transaction_nolock(root, 1); |
1708 | else | 1736 | else |
1709 | trans = btrfs_join_transaction(root, 1); | 1737 | trans = btrfs_join_transaction(root, 1); |
1738 | BUG_ON(IS_ERR(trans)); | ||
1710 | btrfs_set_trans_block_group(trans, inode); | 1739 | btrfs_set_trans_block_group(trans, inode); |
1711 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1740 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1712 | 1741 | ||
1713 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1742 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1714 | compressed = 1; | 1743 | compress_type = ordered_extent->compress_type; |
1715 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1744 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1716 | BUG_ON(compressed); | 1745 | BUG_ON(compress_type); |
1717 | ret = btrfs_mark_extent_written(trans, inode, | 1746 | ret = btrfs_mark_extent_written(trans, inode, |
1718 | ordered_extent->file_offset, | 1747 | ordered_extent->file_offset, |
1719 | ordered_extent->file_offset + | 1748 | ordered_extent->file_offset + |
@@ -1727,7 +1756,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1727 | ordered_extent->disk_len, | 1756 | ordered_extent->disk_len, |
1728 | ordered_extent->len, | 1757 | ordered_extent->len, |
1729 | ordered_extent->len, | 1758 | ordered_extent->len, |
1730 | compressed, 0, 0, | 1759 | compress_type, 0, 0, |
1731 | BTRFS_FILE_EXTENT_REG); | 1760 | BTRFS_FILE_EXTENT_REG); |
1732 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | 1761 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
1733 | ordered_extent->file_offset, | 1762 | ordered_extent->file_offset, |
@@ -1741,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1741 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1770 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
1742 | &ordered_extent->list); | 1771 | &ordered_extent->list); |
1743 | 1772 | ||
1744 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1773 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1745 | ret = btrfs_update_inode(trans, root, inode); | 1774 | if (!ret) { |
1746 | BUG_ON(ret); | 1775 | ret = btrfs_update_inode(trans, root, inode); |
1776 | BUG_ON(ret); | ||
1777 | } | ||
1778 | ret = 0; | ||
1747 | out: | 1779 | out: |
1748 | if (nolock) { | 1780 | if (nolock) { |
1749 | if (trans) | 1781 | if (trans) |
@@ -1765,6 +1797,8 @@ out: | |||
1765 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1797 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
1766 | struct extent_state *state, int uptodate) | 1798 | struct extent_state *state, int uptodate) |
1767 | { | 1799 | { |
1800 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | ||
1801 | |||
1768 | ClearPagePrivate2(page); | 1802 | ClearPagePrivate2(page); |
1769 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1803 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
1770 | } | 1804 | } |
@@ -1829,6 +1863,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1829 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 1863 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
1830 | logical = em->block_start; | 1864 | logical = em->block_start; |
1831 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | 1865 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
1866 | extent_set_compress_type(&failrec->bio_flags, | ||
1867 | em->compress_type); | ||
1832 | } | 1868 | } |
1833 | failrec->logical = logical; | 1869 | failrec->logical = logical; |
1834 | free_extent_map(em); | 1870 | free_extent_map(em); |
@@ -1873,10 +1909,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1873 | else | 1909 | else |
1874 | rw = READ; | 1910 | rw = READ; |
1875 | 1911 | ||
1876 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1912 | ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
1877 | failrec->last_mirror, | 1913 | failrec->last_mirror, |
1878 | failrec->bio_flags, 0); | 1914 | failrec->bio_flags, 0); |
1879 | return 0; | 1915 | return ret; |
1880 | } | 1916 | } |
1881 | 1917 | ||
1882 | /* | 1918 | /* |
@@ -1892,7 +1928,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start) | |||
1892 | 1928 | ||
1893 | private = 0; | 1929 | private = 0; |
1894 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | 1930 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, |
1895 | (u64)-1, 1, EXTENT_DIRTY)) { | 1931 | (u64)-1, 1, EXTENT_DIRTY, 0)) { |
1896 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, | 1932 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, |
1897 | start, &private_failure); | 1933 | start, &private_failure); |
1898 | if (ret == 0) { | 1934 | if (ret == 0) { |
@@ -2188,8 +2224,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2188 | insert = 1; | 2224 | insert = 1; |
2189 | #endif | 2225 | #endif |
2190 | insert = 1; | 2226 | insert = 1; |
2191 | } else { | ||
2192 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
2193 | } | 2227 | } |
2194 | 2228 | ||
2195 | if (!BTRFS_I(inode)->orphan_meta_reserved) { | 2229 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
@@ -2260,7 +2294,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2260 | * this cleans up any orphans that may be left on the list from the last use | 2294 | * this cleans up any orphans that may be left on the list from the last use |
2261 | * of this root. | 2295 | * of this root. |
2262 | */ | 2296 | */ |
2263 | void btrfs_orphan_cleanup(struct btrfs_root *root) | 2297 | int btrfs_orphan_cleanup(struct btrfs_root *root) |
2264 | { | 2298 | { |
2265 | struct btrfs_path *path; | 2299 | struct btrfs_path *path; |
2266 | struct extent_buffer *leaf; | 2300 | struct extent_buffer *leaf; |
@@ -2270,10 +2304,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2270 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2304 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2271 | 2305 | ||
2272 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) | 2306 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
2273 | return; | 2307 | return 0; |
2274 | 2308 | ||
2275 | path = btrfs_alloc_path(); | 2309 | path = btrfs_alloc_path(); |
2276 | BUG_ON(!path); | 2310 | if (!path) { |
2311 | ret = -ENOMEM; | ||
2312 | goto out; | ||
2313 | } | ||
2277 | path->reada = -1; | 2314 | path->reada = -1; |
2278 | 2315 | ||
2279 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 2316 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
@@ -2282,18 +2319,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2282 | 2319 | ||
2283 | while (1) { | 2320 | while (1) { |
2284 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 2321 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
2285 | if (ret < 0) { | 2322 | if (ret < 0) |
2286 | printk(KERN_ERR "Error searching slot for orphan: %d" | 2323 | goto out; |
2287 | "\n", ret); | ||
2288 | break; | ||
2289 | } | ||
2290 | 2324 | ||
2291 | /* | 2325 | /* |
2292 | * if ret == 0 means we found what we were searching for, which | 2326 | * if ret == 0 means we found what we were searching for, which |
2293 | * is weird, but possible, so only screw with path if we didnt | 2327 | * is weird, but possible, so only screw with path if we didn't |
2294 | * find the key and see if we have stuff that matches | 2328 | * find the key and see if we have stuff that matches |
2295 | */ | 2329 | */ |
2296 | if (ret > 0) { | 2330 | if (ret > 0) { |
2331 | ret = 0; | ||
2297 | if (path->slots[0] == 0) | 2332 | if (path->slots[0] == 0) |
2298 | break; | 2333 | break; |
2299 | path->slots[0]--; | 2334 | path->slots[0]--; |
@@ -2321,7 +2356,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2321 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2356 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2322 | found_key.offset = 0; | 2357 | found_key.offset = 0; |
2323 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2358 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2324 | BUG_ON(IS_ERR(inode)); | 2359 | if (IS_ERR(inode)) { |
2360 | ret = PTR_ERR(inode); | ||
2361 | goto out; | ||
2362 | } | ||
2325 | 2363 | ||
2326 | /* | 2364 | /* |
2327 | * add this inode to the orphan list so btrfs_orphan_del does | 2365 | * add this inode to the orphan list so btrfs_orphan_del does |
@@ -2339,6 +2377,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2339 | */ | 2377 | */ |
2340 | if (is_bad_inode(inode)) { | 2378 | if (is_bad_inode(inode)) { |
2341 | trans = btrfs_start_transaction(root, 0); | 2379 | trans = btrfs_start_transaction(root, 0); |
2380 | if (IS_ERR(trans)) { | ||
2381 | ret = PTR_ERR(trans); | ||
2382 | goto out; | ||
2383 | } | ||
2342 | btrfs_orphan_del(trans, inode); | 2384 | btrfs_orphan_del(trans, inode); |
2343 | btrfs_end_transaction(trans, root); | 2385 | btrfs_end_transaction(trans, root); |
2344 | iput(inode); | 2386 | iput(inode); |
@@ -2347,17 +2389,22 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2347 | 2389 | ||
2348 | /* if we have links, this was a truncate, lets do that */ | 2390 | /* if we have links, this was a truncate, lets do that */ |
2349 | if (inode->i_nlink) { | 2391 | if (inode->i_nlink) { |
2392 | if (!S_ISREG(inode->i_mode)) { | ||
2393 | WARN_ON(1); | ||
2394 | iput(inode); | ||
2395 | continue; | ||
2396 | } | ||
2350 | nr_truncate++; | 2397 | nr_truncate++; |
2351 | btrfs_truncate(inode); | 2398 | ret = btrfs_truncate(inode); |
2352 | } else { | 2399 | } else { |
2353 | nr_unlink++; | 2400 | nr_unlink++; |
2354 | } | 2401 | } |
2355 | 2402 | ||
2356 | /* this will do delete_inode and everything for us */ | 2403 | /* this will do delete_inode and everything for us */ |
2357 | iput(inode); | 2404 | iput(inode); |
2405 | if (ret) | ||
2406 | goto out; | ||
2358 | } | 2407 | } |
2359 | btrfs_free_path(path); | ||
2360 | |||
2361 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | 2408 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; |
2362 | 2409 | ||
2363 | if (root->orphan_block_rsv) | 2410 | if (root->orphan_block_rsv) |
@@ -2366,13 +2413,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2366 | 2413 | ||
2367 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 2414 | if (root->orphan_block_rsv || root->orphan_item_inserted) { |
2368 | trans = btrfs_join_transaction(root, 1); | 2415 | trans = btrfs_join_transaction(root, 1); |
2369 | btrfs_end_transaction(trans, root); | 2416 | if (!IS_ERR(trans)) |
2417 | btrfs_end_transaction(trans, root); | ||
2370 | } | 2418 | } |
2371 | 2419 | ||
2372 | if (nr_unlink) | 2420 | if (nr_unlink) |
2373 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2421 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
2374 | if (nr_truncate) | 2422 | if (nr_truncate) |
2375 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2423 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
2424 | |||
2425 | out: | ||
2426 | if (ret) | ||
2427 | printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); | ||
2428 | btrfs_free_path(path); | ||
2429 | return ret; | ||
2376 | } | 2430 | } |
2377 | 2431 | ||
2378 | /* | 2432 | /* |
@@ -2539,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2539 | struct btrfs_inode_item *item, | 2593 | struct btrfs_inode_item *item, |
2540 | struct inode *inode) | 2594 | struct inode *inode) |
2541 | { | 2595 | { |
2596 | if (!leaf->map_token) | ||
2597 | map_private_extent_buffer(leaf, (unsigned long)item, | ||
2598 | sizeof(struct btrfs_inode_item), | ||
2599 | &leaf->map_token, &leaf->kaddr, | ||
2600 | &leaf->map_start, &leaf->map_len, | ||
2601 | KM_USER1); | ||
2602 | |||
2542 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2603 | btrfs_set_inode_uid(leaf, item, inode->i_uid); |
2543 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2604 | btrfs_set_inode_gid(leaf, item, inode->i_gid); |
2544 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2605 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
@@ -2567,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2567 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2628 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2568 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2629 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2569 | btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); | 2630 | btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); |
2631 | |||
2632 | if (leaf->map_token) { | ||
2633 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2634 | leaf->map_token = NULL; | ||
2635 | } | ||
2570 | } | 2636 | } |
2571 | 2637 | ||
2572 | /* | 2638 | /* |
@@ -2611,10 +2677,10 @@ failed: | |||
2611 | * recovery code. It remove a link in a directory with a given name, and | 2677 | * recovery code. It remove a link in a directory with a given name, and |
2612 | * also drops the back refs in the inode to the directory | 2678 | * also drops the back refs in the inode to the directory |
2613 | */ | 2679 | */ |
2614 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | 2680 | static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, |
2615 | struct btrfs_root *root, | 2681 | struct btrfs_root *root, |
2616 | struct inode *dir, struct inode *inode, | 2682 | struct inode *dir, struct inode *inode, |
2617 | const char *name, int name_len) | 2683 | const char *name, int name_len) |
2618 | { | 2684 | { |
2619 | struct btrfs_path *path; | 2685 | struct btrfs_path *path; |
2620 | int ret = 0; | 2686 | int ret = 0; |
@@ -2626,7 +2692,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2626 | path = btrfs_alloc_path(); | 2692 | path = btrfs_alloc_path(); |
2627 | if (!path) { | 2693 | if (!path) { |
2628 | ret = -ENOMEM; | 2694 | ret = -ENOMEM; |
2629 | goto err; | 2695 | goto out; |
2630 | } | 2696 | } |
2631 | 2697 | ||
2632 | path->leave_spinning = 1; | 2698 | path->leave_spinning = 1; |
@@ -2686,12 +2752,25 @@ err: | |||
2686 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 2752 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
2687 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2753 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2688 | btrfs_update_inode(trans, root, dir); | 2754 | btrfs_update_inode(trans, root, dir); |
2689 | btrfs_drop_nlink(inode); | ||
2690 | ret = btrfs_update_inode(trans, root, inode); | ||
2691 | out: | 2755 | out: |
2692 | return ret; | 2756 | return ret; |
2693 | } | 2757 | } |
2694 | 2758 | ||
2759 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | ||
2760 | struct btrfs_root *root, | ||
2761 | struct inode *dir, struct inode *inode, | ||
2762 | const char *name, int name_len) | ||
2763 | { | ||
2764 | int ret; | ||
2765 | ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); | ||
2766 | if (!ret) { | ||
2767 | btrfs_drop_nlink(inode); | ||
2768 | ret = btrfs_update_inode(trans, root, inode); | ||
2769 | } | ||
2770 | return ret; | ||
2771 | } | ||
2772 | |||
2773 | |||
2695 | /* helper to check if there is any shared block in the path */ | 2774 | /* helper to check if there is any shared block in the path */ |
2696 | static int check_path_shared(struct btrfs_root *root, | 2775 | static int check_path_shared(struct btrfs_root *root, |
2697 | struct btrfs_path *path) | 2776 | struct btrfs_path *path) |
@@ -2699,9 +2778,10 @@ static int check_path_shared(struct btrfs_root *root, | |||
2699 | struct extent_buffer *eb; | 2778 | struct extent_buffer *eb; |
2700 | int level; | 2779 | int level; |
2701 | u64 refs = 1; | 2780 | u64 refs = 1; |
2702 | int uninitialized_var(ret); | ||
2703 | 2781 | ||
2704 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2782 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
2783 | int ret; | ||
2784 | |||
2705 | if (!path->nodes[level]) | 2785 | if (!path->nodes[level]) |
2706 | break; | 2786 | break; |
2707 | eb = path->nodes[level]; | 2787 | eb = path->nodes[level]; |
@@ -2712,7 +2792,7 @@ static int check_path_shared(struct btrfs_root *root, | |||
2712 | if (refs > 1) | 2792 | if (refs > 1) |
2713 | return 1; | 2793 | return 1; |
2714 | } | 2794 | } |
2715 | return ret; /* XXX callers? */ | 2795 | return 0; |
2716 | } | 2796 | } |
2717 | 2797 | ||
2718 | /* | 2798 | /* |
@@ -3512,7 +3592,13 @@ out: | |||
3512 | return ret; | 3592 | return ret; |
3513 | } | 3593 | } |
3514 | 3594 | ||
3515 | int btrfs_cont_expand(struct inode *inode, loff_t size) | 3595 | /* |
3596 | * This function puts in dummy file extents for the area we're creating a hole | ||
3597 | * for. So if we are truncating this file to a larger size we need to insert | ||
3598 | * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for | ||
3599 | * the range between oldsize and size | ||
3600 | */ | ||
3601 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | ||
3516 | { | 3602 | { |
3517 | struct btrfs_trans_handle *trans; | 3603 | struct btrfs_trans_handle *trans; |
3518 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3604 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -3520,7 +3606,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3520 | struct extent_map *em = NULL; | 3606 | struct extent_map *em = NULL; |
3521 | struct extent_state *cached_state = NULL; | 3607 | struct extent_state *cached_state = NULL; |
3522 | u64 mask = root->sectorsize - 1; | 3608 | u64 mask = root->sectorsize - 1; |
3523 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3609 | u64 hole_start = (oldsize + mask) & ~mask; |
3524 | u64 block_end = (size + mask) & ~mask; | 3610 | u64 block_end = (size + mask) & ~mask; |
3525 | u64 last_byte; | 3611 | u64 last_byte; |
3526 | u64 cur_offset; | 3612 | u64 cur_offset; |
@@ -3565,13 +3651,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3565 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3651 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3566 | cur_offset + hole_size, | 3652 | cur_offset + hole_size, |
3567 | &hint_byte, 1); | 3653 | &hint_byte, 1); |
3568 | BUG_ON(err); | 3654 | if (err) |
3655 | break; | ||
3569 | 3656 | ||
3570 | err = btrfs_insert_file_extent(trans, root, | 3657 | err = btrfs_insert_file_extent(trans, root, |
3571 | inode->i_ino, cur_offset, 0, | 3658 | inode->i_ino, cur_offset, 0, |
3572 | 0, hole_size, 0, hole_size, | 3659 | 0, hole_size, 0, hole_size, |
3573 | 0, 0, 0); | 3660 | 0, 0, 0); |
3574 | BUG_ON(err); | 3661 | if (err) |
3662 | break; | ||
3575 | 3663 | ||
3576 | btrfs_drop_extent_cache(inode, hole_start, | 3664 | btrfs_drop_extent_cache(inode, hole_start, |
3577 | last_byte - 1, 0); | 3665 | last_byte - 1, 0); |
@@ -3591,94 +3679,58 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3591 | return err; | 3679 | return err; |
3592 | } | 3680 | } |
3593 | 3681 | ||
3594 | static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | 3682 | static int btrfs_setsize(struct inode *inode, loff_t newsize) |
3595 | { | 3683 | { |
3596 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3684 | loff_t oldsize = i_size_read(inode); |
3597 | struct btrfs_trans_handle *trans; | ||
3598 | unsigned long nr; | ||
3599 | int ret; | 3685 | int ret; |
3600 | 3686 | ||
3601 | if (attr->ia_size == inode->i_size) | 3687 | if (newsize == oldsize) |
3602 | return 0; | 3688 | return 0; |
3603 | 3689 | ||
3604 | if (attr->ia_size > inode->i_size) { | 3690 | if (newsize > oldsize) { |
3605 | unsigned long limit; | 3691 | i_size_write(inode, newsize); |
3606 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | 3692 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
3607 | if (attr->ia_size > inode->i_sb->s_maxbytes) | 3693 | truncate_pagecache(inode, oldsize, newsize); |
3608 | return -EFBIG; | 3694 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
3609 | if (limit != RLIM_INFINITY && attr->ia_size > limit) { | ||
3610 | send_sig(SIGXFSZ, current, 0); | ||
3611 | return -EFBIG; | ||
3612 | } | ||
3613 | } | ||
3614 | |||
3615 | trans = btrfs_start_transaction(root, 5); | ||
3616 | if (IS_ERR(trans)) | ||
3617 | return PTR_ERR(trans); | ||
3618 | |||
3619 | btrfs_set_trans_block_group(trans, inode); | ||
3620 | |||
3621 | ret = btrfs_orphan_add(trans, inode); | ||
3622 | BUG_ON(ret); | ||
3623 | |||
3624 | nr = trans->blocks_used; | ||
3625 | btrfs_end_transaction(trans, root); | ||
3626 | btrfs_btree_balance_dirty(root, nr); | ||
3627 | |||
3628 | if (attr->ia_size > inode->i_size) { | ||
3629 | ret = btrfs_cont_expand(inode, attr->ia_size); | ||
3630 | if (ret) { | 3695 | if (ret) { |
3631 | btrfs_truncate(inode); | 3696 | btrfs_setsize(inode, oldsize); |
3632 | return ret; | 3697 | return ret; |
3633 | } | 3698 | } |
3634 | 3699 | ||
3635 | i_size_write(inode, attr->ia_size); | 3700 | mark_inode_dirty(inode); |
3636 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3701 | } else { |
3637 | 3702 | ||
3638 | trans = btrfs_start_transaction(root, 0); | 3703 | /* |
3639 | BUG_ON(IS_ERR(trans)); | 3704 | * We're truncating a file that used to have good data down to |
3640 | btrfs_set_trans_block_group(trans, inode); | 3705 | * zero. Make sure it gets into the ordered flush list so that |
3641 | trans->block_rsv = root->orphan_block_rsv; | 3706 | * any new writes get down to disk quickly. |
3642 | BUG_ON(!trans->block_rsv); | 3707 | */ |
3708 | if (newsize == 0) | ||
3709 | BTRFS_I(inode)->ordered_data_close = 1; | ||
3643 | 3710 | ||
3644 | ret = btrfs_update_inode(trans, root, inode); | 3711 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
3645 | BUG_ON(ret); | 3712 | truncate_setsize(inode, newsize); |
3646 | if (inode->i_nlink > 0) { | 3713 | ret = btrfs_truncate(inode); |
3647 | ret = btrfs_orphan_del(trans, inode); | ||
3648 | BUG_ON(ret); | ||
3649 | } | ||
3650 | nr = trans->blocks_used; | ||
3651 | btrfs_end_transaction(trans, root); | ||
3652 | btrfs_btree_balance_dirty(root, nr); | ||
3653 | return 0; | ||
3654 | } | 3714 | } |
3655 | 3715 | ||
3656 | /* | 3716 | return ret; |
3657 | * We're truncating a file that used to have good data down to | ||
3658 | * zero. Make sure it gets into the ordered flush list so that | ||
3659 | * any new writes get down to disk quickly. | ||
3660 | */ | ||
3661 | if (attr->ia_size == 0) | ||
3662 | BTRFS_I(inode)->ordered_data_close = 1; | ||
3663 | |||
3664 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | ||
3665 | ret = vmtruncate(inode, attr->ia_size); | ||
3666 | BUG_ON(ret); | ||
3667 | |||
3668 | return 0; | ||
3669 | } | 3717 | } |
3670 | 3718 | ||
3671 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3719 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
3672 | { | 3720 | { |
3673 | struct inode *inode = dentry->d_inode; | 3721 | struct inode *inode = dentry->d_inode; |
3722 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3674 | int err; | 3723 | int err; |
3675 | 3724 | ||
3725 | if (btrfs_root_readonly(root)) | ||
3726 | return -EROFS; | ||
3727 | |||
3676 | err = inode_change_ok(inode, attr); | 3728 | err = inode_change_ok(inode, attr); |
3677 | if (err) | 3729 | if (err) |
3678 | return err; | 3730 | return err; |
3679 | 3731 | ||
3680 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { | 3732 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
3681 | err = btrfs_setattr_size(inode, attr); | 3733 | err = btrfs_setsize(inode, attr->ia_size); |
3682 | if (err) | 3734 | if (err) |
3683 | return err; | 3735 | return err; |
3684 | } | 3736 | } |
@@ -3701,6 +3753,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3701 | unsigned long nr; | 3753 | unsigned long nr; |
3702 | int ret; | 3754 | int ret; |
3703 | 3755 | ||
3756 | trace_btrfs_inode_evict(inode); | ||
3757 | |||
3704 | truncate_inode_pages(&inode->i_data, 0); | 3758 | truncate_inode_pages(&inode->i_data, 0); |
3705 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | 3759 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3706 | root == root->fs_info->tree_root)) | 3760 | root == root->fs_info->tree_root)) |
@@ -4043,7 +4097,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
4043 | BTRFS_I(inode)->root = root; | 4097 | BTRFS_I(inode)->root = root; |
4044 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | 4098 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); |
4045 | btrfs_read_locked_inode(inode); | 4099 | btrfs_read_locked_inode(inode); |
4046 | |||
4047 | inode_tree_add(inode); | 4100 | inode_tree_add(inode); |
4048 | unlock_new_inode(inode); | 4101 | unlock_new_inode(inode); |
4049 | if (new) | 4102 | if (new) |
@@ -4115,11 +4168,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4115 | } | 4168 | } |
4116 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | 4169 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); |
4117 | 4170 | ||
4118 | if (root != sub_root) { | 4171 | if (!IS_ERR(inode) && root != sub_root) { |
4119 | down_read(&root->fs_info->cleanup_work_sem); | 4172 | down_read(&root->fs_info->cleanup_work_sem); |
4120 | if (!(inode->i_sb->s_flags & MS_RDONLY)) | 4173 | if (!(inode->i_sb->s_flags & MS_RDONLY)) |
4121 | btrfs_orphan_cleanup(sub_root); | 4174 | ret = btrfs_orphan_cleanup(sub_root); |
4122 | up_read(&root->fs_info->cleanup_work_sem); | 4175 | up_read(&root->fs_info->cleanup_work_sem); |
4176 | if (ret) | ||
4177 | inode = ERR_PTR(ret); | ||
4123 | } | 4178 | } |
4124 | 4179 | ||
4125 | return inode; | 4180 | return inode; |
@@ -4167,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4167 | struct btrfs_key found_key; | 4222 | struct btrfs_key found_key; |
4168 | struct btrfs_path *path; | 4223 | struct btrfs_path *path; |
4169 | int ret; | 4224 | int ret; |
4170 | u32 nritems; | ||
4171 | struct extent_buffer *leaf; | 4225 | struct extent_buffer *leaf; |
4172 | int slot; | 4226 | int slot; |
4173 | int advance; | ||
4174 | unsigned char d_type; | 4227 | unsigned char d_type; |
4175 | int over = 0; | 4228 | int over = 0; |
4176 | u32 di_cur; | 4229 | u32 di_cur; |
@@ -4213,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4213 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4266 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4214 | if (ret < 0) | 4267 | if (ret < 0) |
4215 | goto err; | 4268 | goto err; |
4216 | advance = 0; | ||
4217 | 4269 | ||
4218 | while (1) { | 4270 | while (1) { |
4219 | leaf = path->nodes[0]; | 4271 | leaf = path->nodes[0]; |
4220 | nritems = btrfs_header_nritems(leaf); | ||
4221 | slot = path->slots[0]; | 4272 | slot = path->slots[0]; |
4222 | if (advance || slot >= nritems) { | 4273 | if (slot >= btrfs_header_nritems(leaf)) { |
4223 | if (slot >= nritems - 1) { | 4274 | ret = btrfs_next_leaf(root, path); |
4224 | ret = btrfs_next_leaf(root, path); | 4275 | if (ret < 0) |
4225 | if (ret) | 4276 | goto err; |
4226 | break; | 4277 | else if (ret > 0) |
4227 | leaf = path->nodes[0]; | 4278 | break; |
4228 | nritems = btrfs_header_nritems(leaf); | 4279 | continue; |
4229 | slot = path->slots[0]; | ||
4230 | } else { | ||
4231 | slot++; | ||
4232 | path->slots[0]++; | ||
4233 | } | ||
4234 | } | 4280 | } |
4235 | 4281 | ||
4236 | advance = 1; | ||
4237 | item = btrfs_item_nr(leaf, slot); | 4282 | item = btrfs_item_nr(leaf, slot); |
4238 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 4283 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
4239 | 4284 | ||
@@ -4242,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4242 | if (btrfs_key_type(&found_key) != key_type) | 4287 | if (btrfs_key_type(&found_key) != key_type) |
4243 | break; | 4288 | break; |
4244 | if (found_key.offset < filp->f_pos) | 4289 | if (found_key.offset < filp->f_pos) |
4245 | continue; | 4290 | goto next; |
4246 | 4291 | ||
4247 | filp->f_pos = found_key.offset; | 4292 | filp->f_pos = found_key.offset; |
4248 | 4293 | ||
@@ -4253,6 +4298,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4253 | while (di_cur < di_total) { | 4298 | while (di_cur < di_total) { |
4254 | struct btrfs_key location; | 4299 | struct btrfs_key location; |
4255 | 4300 | ||
4301 | if (verify_dir_item(root, leaf, di)) | ||
4302 | break; | ||
4303 | |||
4256 | name_len = btrfs_dir_name_len(leaf, di); | 4304 | name_len = btrfs_dir_name_len(leaf, di); |
4257 | if (name_len <= sizeof(tmp_name)) { | 4305 | if (name_len <= sizeof(tmp_name)) { |
4258 | name_ptr = tmp_name; | 4306 | name_ptr = tmp_name; |
@@ -4292,6 +4340,8 @@ skip: | |||
4292 | di_cur += di_len; | 4340 | di_cur += di_len; |
4293 | di = (struct btrfs_dir_item *)((char *)di + di_len); | 4341 | di = (struct btrfs_dir_item *)((char *)di + di_len); |
4294 | } | 4342 | } |
4343 | next: | ||
4344 | path->slots[0]++; | ||
4295 | } | 4345 | } |
4296 | 4346 | ||
4297 | /* Reached end of directory/root. Bump pos past the last item. */ | 4347 | /* Reached end of directory/root. Bump pos past the last item. */ |
@@ -4328,6 +4378,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4328 | trans = btrfs_join_transaction_nolock(root, 1); | 4378 | trans = btrfs_join_transaction_nolock(root, 1); |
4329 | else | 4379 | else |
4330 | trans = btrfs_join_transaction(root, 1); | 4380 | trans = btrfs_join_transaction(root, 1); |
4381 | if (IS_ERR(trans)) | ||
4382 | return PTR_ERR(trans); | ||
4331 | btrfs_set_trans_block_group(trans, inode); | 4383 | btrfs_set_trans_block_group(trans, inode); |
4332 | if (nolock) | 4384 | if (nolock) |
4333 | ret = btrfs_end_transaction_nolock(trans, root); | 4385 | ret = btrfs_end_transaction_nolock(trans, root); |
@@ -4353,6 +4405,7 @@ void btrfs_dirty_inode(struct inode *inode) | |||
4353 | return; | 4405 | return; |
4354 | 4406 | ||
4355 | trans = btrfs_join_transaction(root, 1); | 4407 | trans = btrfs_join_transaction(root, 1); |
4408 | BUG_ON(IS_ERR(trans)); | ||
4356 | btrfs_set_trans_block_group(trans, inode); | 4409 | btrfs_set_trans_block_group(trans, inode); |
4357 | 4410 | ||
4358 | ret = btrfs_update_inode(trans, root, inode); | 4411 | ret = btrfs_update_inode(trans, root, inode); |
@@ -4481,12 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4481 | BUG_ON(!path); | 4534 | BUG_ON(!path); |
4482 | 4535 | ||
4483 | inode = new_inode(root->fs_info->sb); | 4536 | inode = new_inode(root->fs_info->sb); |
4484 | if (!inode) | 4537 | if (!inode) { |
4538 | btrfs_free_path(path); | ||
4485 | return ERR_PTR(-ENOMEM); | 4539 | return ERR_PTR(-ENOMEM); |
4540 | } | ||
4486 | 4541 | ||
4487 | if (dir) { | 4542 | if (dir) { |
4543 | trace_btrfs_inode_request(dir); | ||
4544 | |||
4488 | ret = btrfs_set_inode_index(dir, index); | 4545 | ret = btrfs_set_inode_index(dir, index); |
4489 | if (ret) { | 4546 | if (ret) { |
4547 | btrfs_free_path(path); | ||
4490 | iput(inode); | 4548 | iput(inode); |
4491 | return ERR_PTR(ret); | 4549 | return ERR_PTR(ret); |
4492 | } | 4550 | } |
@@ -4553,12 +4611,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4553 | if ((mode & S_IFREG)) { | 4611 | if ((mode & S_IFREG)) { |
4554 | if (btrfs_test_opt(root, NODATASUM)) | 4612 | if (btrfs_test_opt(root, NODATASUM)) |
4555 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4613 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
4556 | if (btrfs_test_opt(root, NODATACOW)) | 4614 | if (btrfs_test_opt(root, NODATACOW) || |
4615 | (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) | ||
4557 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | 4616 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
4558 | } | 4617 | } |
4559 | 4618 | ||
4560 | insert_inode_hash(inode); | 4619 | insert_inode_hash(inode); |
4561 | inode_tree_add(inode); | 4620 | inode_tree_add(inode); |
4621 | |||
4622 | trace_btrfs_inode_new(inode); | ||
4623 | |||
4562 | return inode; | 4624 | return inode; |
4563 | fail: | 4625 | fail: |
4564 | if (dir) | 4626 | if (dir) |
@@ -4673,7 +4735,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4673 | if (IS_ERR(inode)) | 4735 | if (IS_ERR(inode)) |
4674 | goto out_unlock; | 4736 | goto out_unlock; |
4675 | 4737 | ||
4676 | err = btrfs_init_inode_security(trans, inode, dir); | 4738 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
4677 | if (err) { | 4739 | if (err) { |
4678 | drop_inode = 1; | 4740 | drop_inode = 1; |
4679 | goto out_unlock; | 4741 | goto out_unlock; |
@@ -4734,7 +4796,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4734 | if (IS_ERR(inode)) | 4796 | if (IS_ERR(inode)) |
4735 | goto out_unlock; | 4797 | goto out_unlock; |
4736 | 4798 | ||
4737 | err = btrfs_init_inode_security(trans, inode, dir); | 4799 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
4738 | if (err) { | 4800 | if (err) { |
4739 | drop_inode = 1; | 4801 | drop_inode = 1; |
4740 | goto out_unlock; | 4802 | goto out_unlock; |
@@ -4775,30 +4837,31 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4775 | int err; | 4837 | int err; |
4776 | int drop_inode = 0; | 4838 | int drop_inode = 0; |
4777 | 4839 | ||
4778 | if (inode->i_nlink == 0) | ||
4779 | return -ENOENT; | ||
4780 | |||
4781 | /* do not allow sys_link's with other subvols of the same device */ | 4840 | /* do not allow sys_link's with other subvols of the same device */ |
4782 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4841 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4783 | return -EPERM; | 4842 | return -EXDEV; |
4784 | 4843 | ||
4785 | btrfs_inc_nlink(inode); | 4844 | if (inode->i_nlink == ~0U) |
4786 | inode->i_ctime = CURRENT_TIME; | 4845 | return -EMLINK; |
4787 | 4846 | ||
4788 | err = btrfs_set_inode_index(dir, &index); | 4847 | err = btrfs_set_inode_index(dir, &index); |
4789 | if (err) | 4848 | if (err) |
4790 | goto fail; | 4849 | goto fail; |
4791 | 4850 | ||
4792 | /* | 4851 | /* |
4793 | * 1 item for inode ref | 4852 | * 2 items for inode and inode ref |
4794 | * 2 items for dir items | 4853 | * 2 items for dir items |
4854 | * 1 item for parent inode | ||
4795 | */ | 4855 | */ |
4796 | trans = btrfs_start_transaction(root, 3); | 4856 | trans = btrfs_start_transaction(root, 5); |
4797 | if (IS_ERR(trans)) { | 4857 | if (IS_ERR(trans)) { |
4798 | err = PTR_ERR(trans); | 4858 | err = PTR_ERR(trans); |
4799 | goto fail; | 4859 | goto fail; |
4800 | } | 4860 | } |
4801 | 4861 | ||
4862 | btrfs_inc_nlink(inode); | ||
4863 | inode->i_ctime = CURRENT_TIME; | ||
4864 | |||
4802 | btrfs_set_trans_block_group(trans, dir); | 4865 | btrfs_set_trans_block_group(trans, dir); |
4803 | ihold(inode); | 4866 | ihold(inode); |
4804 | 4867 | ||
@@ -4862,7 +4925,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4862 | 4925 | ||
4863 | drop_on_err = 1; | 4926 | drop_on_err = 1; |
4864 | 4927 | ||
4865 | err = btrfs_init_inode_security(trans, inode, dir); | 4928 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
4866 | if (err) | 4929 | if (err) |
4867 | goto out_fail; | 4930 | goto out_fail; |
4868 | 4931 | ||
@@ -4928,8 +4991,10 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4928 | size_t max_size; | 4991 | size_t max_size; |
4929 | unsigned long inline_size; | 4992 | unsigned long inline_size; |
4930 | unsigned long ptr; | 4993 | unsigned long ptr; |
4994 | int compress_type; | ||
4931 | 4995 | ||
4932 | WARN_ON(pg_offset != 0); | 4996 | WARN_ON(pg_offset != 0); |
4997 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
4933 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | 4998 | max_size = btrfs_file_extent_ram_bytes(leaf, item); |
4934 | inline_size = btrfs_file_extent_inline_item_len(leaf, | 4999 | inline_size = btrfs_file_extent_inline_item_len(leaf, |
4935 | btrfs_item_nr(leaf, path->slots[0])); | 5000 | btrfs_item_nr(leaf, path->slots[0])); |
@@ -4939,8 +5004,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4939 | read_extent_buffer(leaf, tmp, ptr, inline_size); | 5004 | read_extent_buffer(leaf, tmp, ptr, inline_size); |
4940 | 5005 | ||
4941 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 5006 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
4942 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | 5007 | ret = btrfs_decompress(compress_type, tmp, page, |
4943 | inline_size, max_size); | 5008 | extent_offset, inline_size, max_size); |
4944 | if (ret) { | 5009 | if (ret) { |
4945 | char *kaddr = kmap_atomic(page, KM_USER0); | 5010 | char *kaddr = kmap_atomic(page, KM_USER0); |
4946 | unsigned long copy_size = min_t(u64, | 5011 | unsigned long copy_size = min_t(u64, |
@@ -4982,7 +5047,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4982 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5047 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4983 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 5048 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
4984 | struct btrfs_trans_handle *trans = NULL; | 5049 | struct btrfs_trans_handle *trans = NULL; |
4985 | int compressed; | 5050 | int compress_type; |
4986 | 5051 | ||
4987 | again: | 5052 | again: |
4988 | read_lock(&em_tree->lock); | 5053 | read_lock(&em_tree->lock); |
@@ -5041,7 +5106,7 @@ again: | |||
5041 | 5106 | ||
5042 | found_type = btrfs_file_extent_type(leaf, item); | 5107 | found_type = btrfs_file_extent_type(leaf, item); |
5043 | extent_start = found_key.offset; | 5108 | extent_start = found_key.offset; |
5044 | compressed = btrfs_file_extent_compression(leaf, item); | 5109 | compress_type = btrfs_file_extent_compression(leaf, item); |
5045 | if (found_type == BTRFS_FILE_EXTENT_REG || | 5110 | if (found_type == BTRFS_FILE_EXTENT_REG || |
5046 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 5111 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
5047 | extent_end = extent_start + | 5112 | extent_end = extent_start + |
@@ -5087,8 +5152,9 @@ again: | |||
5087 | em->block_start = EXTENT_MAP_HOLE; | 5152 | em->block_start = EXTENT_MAP_HOLE; |
5088 | goto insert; | 5153 | goto insert; |
5089 | } | 5154 | } |
5090 | if (compressed) { | 5155 | if (compress_type != BTRFS_COMPRESS_NONE) { |
5091 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5156 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5157 | em->compress_type = compress_type; | ||
5092 | em->block_start = bytenr; | 5158 | em->block_start = bytenr; |
5093 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5159 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, |
5094 | item); | 5160 | item); |
@@ -5122,12 +5188,14 @@ again: | |||
5122 | em->len = (copy_size + root->sectorsize - 1) & | 5188 | em->len = (copy_size + root->sectorsize - 1) & |
5123 | ~((u64)root->sectorsize - 1); | 5189 | ~((u64)root->sectorsize - 1); |
5124 | em->orig_start = EXTENT_MAP_INLINE; | 5190 | em->orig_start = EXTENT_MAP_INLINE; |
5125 | if (compressed) | 5191 | if (compress_type) { |
5126 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5192 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5193 | em->compress_type = compress_type; | ||
5194 | } | ||
5127 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 5195 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
5128 | if (create == 0 && !PageUptodate(page)) { | 5196 | if (create == 0 && !PageUptodate(page)) { |
5129 | if (btrfs_file_extent_compression(leaf, item) == | 5197 | if (btrfs_file_extent_compression(leaf, item) != |
5130 | BTRFS_COMPRESS_ZLIB) { | 5198 | BTRFS_COMPRESS_NONE) { |
5131 | ret = uncompress_inline(path, inode, page, | 5199 | ret = uncompress_inline(path, inode, page, |
5132 | pg_offset, | 5200 | pg_offset, |
5133 | extent_offset, item); | 5201 | extent_offset, item); |
@@ -5152,6 +5220,8 @@ again: | |||
5152 | em = NULL; | 5220 | em = NULL; |
5153 | btrfs_release_path(root, path); | 5221 | btrfs_release_path(root, path); |
5154 | trans = btrfs_join_transaction(root, 1); | 5222 | trans = btrfs_join_transaction(root, 1); |
5223 | if (IS_ERR(trans)) | ||
5224 | return ERR_CAST(trans); | ||
5155 | goto again; | 5225 | goto again; |
5156 | } | 5226 | } |
5157 | map = kmap(page); | 5227 | map = kmap(page); |
@@ -5161,7 +5231,7 @@ again: | |||
5161 | btrfs_mark_buffer_dirty(leaf); | 5231 | btrfs_mark_buffer_dirty(leaf); |
5162 | } | 5232 | } |
5163 | set_extent_uptodate(io_tree, em->start, | 5233 | set_extent_uptodate(io_tree, em->start, |
5164 | extent_map_end(em) - 1, GFP_NOFS); | 5234 | extent_map_end(em) - 1, NULL, GFP_NOFS); |
5165 | goto insert; | 5235 | goto insert; |
5166 | } else { | 5236 | } else { |
5167 | printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); | 5237 | printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); |
@@ -5228,6 +5298,9 @@ insert: | |||
5228 | } | 5298 | } |
5229 | write_unlock(&em_tree->lock); | 5299 | write_unlock(&em_tree->lock); |
5230 | out: | 5300 | out: |
5301 | |||
5302 | trace_btrfs_get_extent(root, em); | ||
5303 | |||
5231 | if (path) | 5304 | if (path) |
5232 | btrfs_free_path(path); | 5305 | btrfs_free_path(path); |
5233 | if (trans) { | 5306 | if (trans) { |
@@ -5242,22 +5315,157 @@ out: | |||
5242 | return em; | 5315 | return em; |
5243 | } | 5316 | } |
5244 | 5317 | ||
5318 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, | ||
5319 | size_t pg_offset, u64 start, u64 len, | ||
5320 | int create) | ||
5321 | { | ||
5322 | struct extent_map *em; | ||
5323 | struct extent_map *hole_em = NULL; | ||
5324 | u64 range_start = start; | ||
5325 | u64 end; | ||
5326 | u64 found; | ||
5327 | u64 found_end; | ||
5328 | int err = 0; | ||
5329 | |||
5330 | em = btrfs_get_extent(inode, page, pg_offset, start, len, create); | ||
5331 | if (IS_ERR(em)) | ||
5332 | return em; | ||
5333 | if (em) { | ||
5334 | /* | ||
5335 | * if our em maps to a hole, there might | ||
5336 | * actually be delalloc bytes behind it | ||
5337 | */ | ||
5338 | if (em->block_start != EXTENT_MAP_HOLE) | ||
5339 | return em; | ||
5340 | else | ||
5341 | hole_em = em; | ||
5342 | } | ||
5343 | |||
5344 | /* check to see if we've wrapped (len == -1 or similar) */ | ||
5345 | end = start + len; | ||
5346 | if (end < start) | ||
5347 | end = (u64)-1; | ||
5348 | else | ||
5349 | end -= 1; | ||
5350 | |||
5351 | em = NULL; | ||
5352 | |||
5353 | /* ok, we didn't find anything, lets look for delalloc */ | ||
5354 | found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start, | ||
5355 | end, len, EXTENT_DELALLOC, 1); | ||
5356 | found_end = range_start + found; | ||
5357 | if (found_end < range_start) | ||
5358 | found_end = (u64)-1; | ||
5359 | |||
5360 | /* | ||
5361 | * we didn't find anything useful, return | ||
5362 | * the original results from get_extent() | ||
5363 | */ | ||
5364 | if (range_start > end || found_end <= start) { | ||
5365 | em = hole_em; | ||
5366 | hole_em = NULL; | ||
5367 | goto out; | ||
5368 | } | ||
5369 | |||
5370 | /* adjust the range_start to make sure it doesn't | ||
5371 | * go backwards from the start they passed in | ||
5372 | */ | ||
5373 | range_start = max(start,range_start); | ||
5374 | found = found_end - range_start; | ||
5375 | |||
5376 | if (found > 0) { | ||
5377 | u64 hole_start = start; | ||
5378 | u64 hole_len = len; | ||
5379 | |||
5380 | em = alloc_extent_map(GFP_NOFS); | ||
5381 | if (!em) { | ||
5382 | err = -ENOMEM; | ||
5383 | goto out; | ||
5384 | } | ||
5385 | /* | ||
5386 | * when btrfs_get_extent can't find anything it | ||
5387 | * returns one huge hole | ||
5388 | * | ||
5389 | * make sure what it found really fits our range, and | ||
5390 | * adjust to make sure it is based on the start from | ||
5391 | * the caller | ||
5392 | */ | ||
5393 | if (hole_em) { | ||
5394 | u64 calc_end = extent_map_end(hole_em); | ||
5395 | |||
5396 | if (calc_end <= start || (hole_em->start > end)) { | ||
5397 | free_extent_map(hole_em); | ||
5398 | hole_em = NULL; | ||
5399 | } else { | ||
5400 | hole_start = max(hole_em->start, start); | ||
5401 | hole_len = calc_end - hole_start; | ||
5402 | } | ||
5403 | } | ||
5404 | em->bdev = NULL; | ||
5405 | if (hole_em && range_start > hole_start) { | ||
5406 | /* our hole starts before our delalloc, so we | ||
5407 | * have to return just the parts of the hole | ||
5408 | * that go until the delalloc starts | ||
5409 | */ | ||
5410 | em->len = min(hole_len, | ||
5411 | range_start - hole_start); | ||
5412 | em->start = hole_start; | ||
5413 | em->orig_start = hole_start; | ||
5414 | /* | ||
5415 | * don't adjust block start at all, | ||
5416 | * it is fixed at EXTENT_MAP_HOLE | ||
5417 | */ | ||
5418 | em->block_start = hole_em->block_start; | ||
5419 | em->block_len = hole_len; | ||
5420 | } else { | ||
5421 | em->start = range_start; | ||
5422 | em->len = found; | ||
5423 | em->orig_start = range_start; | ||
5424 | em->block_start = EXTENT_MAP_DELALLOC; | ||
5425 | em->block_len = found; | ||
5426 | } | ||
5427 | } else if (hole_em) { | ||
5428 | return hole_em; | ||
5429 | } | ||
5430 | out: | ||
5431 | |||
5432 | free_extent_map(hole_em); | ||
5433 | if (err) { | ||
5434 | free_extent_map(em); | ||
5435 | return ERR_PTR(err); | ||
5436 | } | ||
5437 | return em; | ||
5438 | } | ||
5439 | |||
5245 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | 5440 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
5441 | struct extent_map *em, | ||
5246 | u64 start, u64 len) | 5442 | u64 start, u64 len) |
5247 | { | 5443 | { |
5248 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5444 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5249 | struct btrfs_trans_handle *trans; | 5445 | struct btrfs_trans_handle *trans; |
5250 | struct extent_map *em; | ||
5251 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5446 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
5252 | struct btrfs_key ins; | 5447 | struct btrfs_key ins; |
5253 | u64 alloc_hint; | 5448 | u64 alloc_hint; |
5254 | int ret; | 5449 | int ret; |
5450 | bool insert = false; | ||
5255 | 5451 | ||
5256 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | 5452 | /* |
5453 | * Ok if the extent map we looked up is a hole and is for the exact | ||
5454 | * range we want, there is no reason to allocate a new one, however if | ||
5455 | * it is not right then we need to free this one and drop the cache for | ||
5456 | * our range. | ||
5457 | */ | ||
5458 | if (em->block_start != EXTENT_MAP_HOLE || em->start != start || | ||
5459 | em->len != len) { | ||
5460 | free_extent_map(em); | ||
5461 | em = NULL; | ||
5462 | insert = true; | ||
5463 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
5464 | } | ||
5257 | 5465 | ||
5258 | trans = btrfs_join_transaction(root, 0); | 5466 | trans = btrfs_join_transaction(root, 0); |
5259 | if (!trans) | 5467 | if (IS_ERR(trans)) |
5260 | return ERR_PTR(-ENOMEM); | 5468 | return ERR_CAST(trans); |
5261 | 5469 | ||
5262 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 5470 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
5263 | 5471 | ||
@@ -5269,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5269 | goto out; | 5477 | goto out; |
5270 | } | 5478 | } |
5271 | 5479 | ||
5272 | em = alloc_extent_map(GFP_NOFS); | ||
5273 | if (!em) { | 5480 | if (!em) { |
5274 | em = ERR_PTR(-ENOMEM); | 5481 | em = alloc_extent_map(GFP_NOFS); |
5275 | goto out; | 5482 | if (!em) { |
5483 | em = ERR_PTR(-ENOMEM); | ||
5484 | goto out; | ||
5485 | } | ||
5276 | } | 5486 | } |
5277 | 5487 | ||
5278 | em->start = start; | 5488 | em->start = start; |
@@ -5282,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5282 | em->block_start = ins.objectid; | 5492 | em->block_start = ins.objectid; |
5283 | em->block_len = ins.offset; | 5493 | em->block_len = ins.offset; |
5284 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 5494 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
5495 | |||
5496 | /* | ||
5497 | * We need to do this because if we're using the original em we searched | ||
5498 | * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that. | ||
5499 | */ | ||
5500 | em->flags = 0; | ||
5285 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 5501 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
5286 | 5502 | ||
5287 | while (1) { | 5503 | while (insert) { |
5288 | write_lock(&em_tree->lock); | 5504 | write_lock(&em_tree->lock); |
5289 | ret = add_extent_mapping(em_tree, em); | 5505 | ret = add_extent_mapping(em_tree, em); |
5290 | write_unlock(&em_tree->lock); | 5506 | write_unlock(&em_tree->lock); |
@@ -5481,7 +5697,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5481 | * while we look for nocow cross refs | 5697 | * while we look for nocow cross refs |
5482 | */ | 5698 | */ |
5483 | trans = btrfs_join_transaction(root, 0); | 5699 | trans = btrfs_join_transaction(root, 0); |
5484 | if (!trans) | 5700 | if (IS_ERR(trans)) |
5485 | goto must_cow; | 5701 | goto must_cow; |
5486 | 5702 | ||
5487 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | 5703 | if (can_nocow_odirect(trans, inode, start, len) == 1) { |
@@ -5502,8 +5718,7 @@ must_cow: | |||
5502 | * it above | 5718 | * it above |
5503 | */ | 5719 | */ |
5504 | len = bh_result->b_size; | 5720 | len = bh_result->b_size; |
5505 | free_extent_map(em); | 5721 | em = btrfs_new_extent_direct(inode, em, start, len); |
5506 | em = btrfs_new_extent_direct(inode, start, len); | ||
5507 | if (IS_ERR(em)) | 5722 | if (IS_ERR(em)) |
5508 | return PTR_ERR(em); | 5723 | return PTR_ERR(em); |
5509 | len = min(len, em->len - (start - em->start)); | 5724 | len = min(len, em->len - (start - em->start)); |
@@ -5589,6 +5804,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5589 | 5804 | ||
5590 | kfree(dip->csums); | 5805 | kfree(dip->csums); |
5591 | kfree(dip); | 5806 | kfree(dip); |
5807 | |||
5808 | /* If we had a csum failure make sure to clear the uptodate flag */ | ||
5809 | if (err) | ||
5810 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
5592 | dio_end_io(bio, err); | 5811 | dio_end_io(bio, err); |
5593 | } | 5812 | } |
5594 | 5813 | ||
@@ -5616,7 +5835,7 @@ again: | |||
5616 | BUG_ON(!ordered); | 5835 | BUG_ON(!ordered); |
5617 | 5836 | ||
5618 | trans = btrfs_join_transaction(root, 1); | 5837 | trans = btrfs_join_transaction(root, 1); |
5619 | if (!trans) { | 5838 | if (IS_ERR(trans)) { |
5620 | err = -ENOMEM; | 5839 | err = -ENOMEM; |
5621 | goto out; | 5840 | goto out; |
5622 | } | 5841 | } |
@@ -5662,8 +5881,10 @@ again: | |||
5662 | } | 5881 | } |
5663 | 5882 | ||
5664 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | 5883 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); |
5665 | btrfs_ordered_update_i_size(inode, 0, ordered); | 5884 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5666 | btrfs_update_inode(trans, root, inode); | 5885 | if (!ret) |
5886 | btrfs_update_inode(trans, root, inode); | ||
5887 | ret = 0; | ||
5667 | out_unlock: | 5888 | out_unlock: |
5668 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | 5889 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, |
5669 | ordered->file_offset + ordered->len - 1, | 5890 | ordered->file_offset + ordered->len - 1, |
@@ -5690,6 +5911,10 @@ out_done: | |||
5690 | 5911 | ||
5691 | kfree(dip->csums); | 5912 | kfree(dip->csums); |
5692 | kfree(dip); | 5913 | kfree(dip); |
5914 | |||
5915 | /* If we had an error make sure to clear the uptodate flag */ | ||
5916 | if (err) | ||
5917 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
5693 | dio_end_io(bio, err); | 5918 | dio_end_io(bio, err); |
5694 | } | 5919 | } |
5695 | 5920 | ||
@@ -5745,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | |||
5745 | 5970 | ||
5746 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | 5971 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, |
5747 | int rw, u64 file_offset, int skip_sum, | 5972 | int rw, u64 file_offset, int skip_sum, |
5748 | u32 *csums) | 5973 | u32 *csums, int async_submit) |
5749 | { | 5974 | { |
5750 | int write = rw & REQ_WRITE; | 5975 | int write = rw & REQ_WRITE; |
5751 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5976 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -5756,18 +5981,33 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
5756 | if (ret) | 5981 | if (ret) |
5757 | goto err; | 5982 | goto err; |
5758 | 5983 | ||
5759 | if (write && !skip_sum) { | 5984 | if (skip_sum) |
5985 | goto map; | ||
5986 | |||
5987 | if (write && async_submit) { | ||
5760 | ret = btrfs_wq_submit_bio(root->fs_info, | 5988 | ret = btrfs_wq_submit_bio(root->fs_info, |
5761 | inode, rw, bio, 0, 0, | 5989 | inode, rw, bio, 0, 0, |
5762 | file_offset, | 5990 | file_offset, |
5763 | __btrfs_submit_bio_start_direct_io, | 5991 | __btrfs_submit_bio_start_direct_io, |
5764 | __btrfs_submit_bio_done); | 5992 | __btrfs_submit_bio_done); |
5765 | goto err; | 5993 | goto err; |
5766 | } else if (!skip_sum) | 5994 | } else if (write) { |
5767 | btrfs_lookup_bio_sums_dio(root, inode, bio, | 5995 | /* |
5996 | * If we aren't doing async submit, calculate the csum of the | ||
5997 | * bio now. | ||
5998 | */ | ||
5999 | ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); | ||
6000 | if (ret) | ||
6001 | goto err; | ||
6002 | } else if (!skip_sum) { | ||
6003 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5768 | file_offset, csums); | 6004 | file_offset, csums); |
6005 | if (ret) | ||
6006 | goto err; | ||
6007 | } | ||
5769 | 6008 | ||
5770 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | 6009 | map: |
6010 | ret = btrfs_map_bio(root, rw, bio, 0, async_submit); | ||
5771 | err: | 6011 | err: |
5772 | bio_put(bio); | 6012 | bio_put(bio); |
5773 | return ret; | 6013 | return ret; |
@@ -5789,13 +6029,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5789 | int nr_pages = 0; | 6029 | int nr_pages = 0; |
5790 | u32 *csums = dip->csums; | 6030 | u32 *csums = dip->csums; |
5791 | int ret = 0; | 6031 | int ret = 0; |
5792 | 6032 | int async_submit = 0; | |
5793 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | 6033 | int write = rw & REQ_WRITE; |
5794 | if (!bio) | ||
5795 | return -ENOMEM; | ||
5796 | bio->bi_private = dip; | ||
5797 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5798 | atomic_inc(&dip->pending_bios); | ||
5799 | 6034 | ||
5800 | map_length = orig_bio->bi_size; | 6035 | map_length = orig_bio->bi_size; |
5801 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | 6036 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, |
@@ -5805,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5805 | return -EIO; | 6040 | return -EIO; |
5806 | } | 6041 | } |
5807 | 6042 | ||
6043 | if (map_length >= orig_bio->bi_size) { | ||
6044 | bio = orig_bio; | ||
6045 | goto submit; | ||
6046 | } | ||
6047 | |||
6048 | async_submit = 1; | ||
6049 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | ||
6050 | if (!bio) | ||
6051 | return -ENOMEM; | ||
6052 | bio->bi_private = dip; | ||
6053 | bio->bi_end_io = btrfs_end_dio_bio; | ||
6054 | atomic_inc(&dip->pending_bios); | ||
6055 | |||
5808 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | 6056 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { |
5809 | if (unlikely(map_length < submit_len + bvec->bv_len || | 6057 | if (unlikely(map_length < submit_len + bvec->bv_len || |
5810 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | 6058 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, |
@@ -5818,14 +6066,15 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5818 | atomic_inc(&dip->pending_bios); | 6066 | atomic_inc(&dip->pending_bios); |
5819 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | 6067 | ret = __btrfs_submit_dio_bio(bio, inode, rw, |
5820 | file_offset, skip_sum, | 6068 | file_offset, skip_sum, |
5821 | csums); | 6069 | csums, async_submit); |
5822 | if (ret) { | 6070 | if (ret) { |
5823 | bio_put(bio); | 6071 | bio_put(bio); |
5824 | atomic_dec(&dip->pending_bios); | 6072 | atomic_dec(&dip->pending_bios); |
5825 | goto out_err; | 6073 | goto out_err; |
5826 | } | 6074 | } |
5827 | 6075 | ||
5828 | if (!skip_sum) | 6076 | /* Write's use the ordered csums */ |
6077 | if (!write && !skip_sum) | ||
5829 | csums = csums + nr_pages; | 6078 | csums = csums + nr_pages; |
5830 | start_sector += submit_len >> 9; | 6079 | start_sector += submit_len >> 9; |
5831 | file_offset += submit_len; | 6080 | file_offset += submit_len; |
@@ -5854,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5854 | } | 6103 | } |
5855 | } | 6104 | } |
5856 | 6105 | ||
6106 | submit: | ||
5857 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | 6107 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, |
5858 | csums); | 6108 | csums, async_submit); |
5859 | if (!ret) | 6109 | if (!ret) |
5860 | return 0; | 6110 | return 0; |
5861 | 6111 | ||
@@ -5893,9 +6143,11 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
5893 | } | 6143 | } |
5894 | dip->csums = NULL; | 6144 | dip->csums = NULL; |
5895 | 6145 | ||
5896 | if (!skip_sum) { | 6146 | /* Write's use the ordered csum stuff, so we don't need dip->csums */ |
6147 | if (!write && !skip_sum) { | ||
5897 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | 6148 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); |
5898 | if (!dip->csums) { | 6149 | if (!dip->csums) { |
6150 | kfree(dip); | ||
5899 | ret = -ENOMEM; | 6151 | ret = -ENOMEM; |
5900 | goto free_ordered; | 6152 | goto free_ordered; |
5901 | } | 6153 | } |
@@ -5948,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
5948 | unsigned long nr_segs) | 6200 | unsigned long nr_segs) |
5949 | { | 6201 | { |
5950 | int seg; | 6202 | int seg; |
6203 | int i; | ||
5951 | size_t size; | 6204 | size_t size; |
5952 | unsigned long addr; | 6205 | unsigned long addr; |
5953 | unsigned blocksize_mask = root->sectorsize - 1; | 6206 | unsigned blocksize_mask = root->sectorsize - 1; |
@@ -5962,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
5962 | addr = (unsigned long)iov[seg].iov_base; | 6215 | addr = (unsigned long)iov[seg].iov_base; |
5963 | size = iov[seg].iov_len; | 6216 | size = iov[seg].iov_len; |
5964 | end += size; | 6217 | end += size; |
5965 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | 6218 | if ((addr & blocksize_mask) || (size & blocksize_mask)) |
5966 | goto out; | 6219 | goto out; |
6220 | |||
6221 | /* If this is a write we don't need to check anymore */ | ||
6222 | if (rw & WRITE) | ||
6223 | continue; | ||
6224 | |||
6225 | /* | ||
6226 | * Check to make sure we don't have duplicate iov_base's in this | ||
6227 | * iovec, if so return EINVAL, otherwise we'll get csum errors | ||
6228 | * when reading back. | ||
6229 | */ | ||
6230 | for (i = seg + 1; i < nr_segs; i++) { | ||
6231 | if (iov[seg].iov_base == iov[i].iov_base) | ||
6232 | goto out; | ||
6233 | } | ||
5967 | } | 6234 | } |
5968 | retval = 0; | 6235 | retval = 0; |
5969 | out: | 6236 | out: |
@@ -6064,7 +6331,7 @@ out: | |||
6064 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 6331 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
6065 | __u64 start, __u64 len) | 6332 | __u64 start, __u64 len) |
6066 | { | 6333 | { |
6067 | return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); | 6334 | return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap); |
6068 | } | 6335 | } |
6069 | 6336 | ||
6070 | int btrfs_readpage(struct file *file, struct page *page) | 6337 | int btrfs_readpage(struct file *file, struct page *page) |
@@ -6314,28 +6581,42 @@ out: | |||
6314 | return ret; | 6581 | return ret; |
6315 | } | 6582 | } |
6316 | 6583 | ||
6317 | static void btrfs_truncate(struct inode *inode) | 6584 | static int btrfs_truncate(struct inode *inode) |
6318 | { | 6585 | { |
6319 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6586 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6320 | int ret; | 6587 | int ret; |
6588 | int err = 0; | ||
6321 | struct btrfs_trans_handle *trans; | 6589 | struct btrfs_trans_handle *trans; |
6322 | unsigned long nr; | 6590 | unsigned long nr; |
6323 | u64 mask = root->sectorsize - 1; | 6591 | u64 mask = root->sectorsize - 1; |
6324 | 6592 | ||
6325 | if (!S_ISREG(inode->i_mode)) { | ||
6326 | WARN_ON(1); | ||
6327 | return; | ||
6328 | } | ||
6329 | |||
6330 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6593 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
6331 | if (ret) | 6594 | if (ret) |
6332 | return; | 6595 | return ret; |
6333 | 6596 | ||
6334 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6597 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
6335 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6598 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
6336 | 6599 | ||
6600 | trans = btrfs_start_transaction(root, 5); | ||
6601 | if (IS_ERR(trans)) | ||
6602 | return PTR_ERR(trans); | ||
6603 | |||
6604 | btrfs_set_trans_block_group(trans, inode); | ||
6605 | |||
6606 | ret = btrfs_orphan_add(trans, inode); | ||
6607 | if (ret) { | ||
6608 | btrfs_end_transaction(trans, root); | ||
6609 | return ret; | ||
6610 | } | ||
6611 | |||
6612 | nr = trans->blocks_used; | ||
6613 | btrfs_end_transaction(trans, root); | ||
6614 | btrfs_btree_balance_dirty(root, nr); | ||
6615 | |||
6616 | /* Now start a transaction for the truncate */ | ||
6337 | trans = btrfs_start_transaction(root, 0); | 6617 | trans = btrfs_start_transaction(root, 0); |
6338 | BUG_ON(IS_ERR(trans)); | 6618 | if (IS_ERR(trans)) |
6619 | return PTR_ERR(trans); | ||
6339 | btrfs_set_trans_block_group(trans, inode); | 6620 | btrfs_set_trans_block_group(trans, inode); |
6340 | trans->block_rsv = root->orphan_block_rsv; | 6621 | trans->block_rsv = root->orphan_block_rsv; |
6341 | 6622 | ||
@@ -6362,29 +6643,38 @@ static void btrfs_truncate(struct inode *inode) | |||
6362 | while (1) { | 6643 | while (1) { |
6363 | if (!trans) { | 6644 | if (!trans) { |
6364 | trans = btrfs_start_transaction(root, 0); | 6645 | trans = btrfs_start_transaction(root, 0); |
6365 | BUG_ON(IS_ERR(trans)); | 6646 | if (IS_ERR(trans)) |
6647 | return PTR_ERR(trans); | ||
6366 | btrfs_set_trans_block_group(trans, inode); | 6648 | btrfs_set_trans_block_group(trans, inode); |
6367 | trans->block_rsv = root->orphan_block_rsv; | 6649 | trans->block_rsv = root->orphan_block_rsv; |
6368 | } | 6650 | } |
6369 | 6651 | ||
6370 | ret = btrfs_block_rsv_check(trans, root, | 6652 | ret = btrfs_block_rsv_check(trans, root, |
6371 | root->orphan_block_rsv, 0, 5); | 6653 | root->orphan_block_rsv, 0, 5); |
6372 | if (ret) { | 6654 | if (ret == -EAGAIN) { |
6373 | BUG_ON(ret != -EAGAIN); | ||
6374 | ret = btrfs_commit_transaction(trans, root); | 6655 | ret = btrfs_commit_transaction(trans, root); |
6375 | BUG_ON(ret); | 6656 | if (ret) |
6657 | return ret; | ||
6376 | trans = NULL; | 6658 | trans = NULL; |
6377 | continue; | 6659 | continue; |
6660 | } else if (ret) { | ||
6661 | err = ret; | ||
6662 | break; | ||
6378 | } | 6663 | } |
6379 | 6664 | ||
6380 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6665 | ret = btrfs_truncate_inode_items(trans, root, inode, |
6381 | inode->i_size, | 6666 | inode->i_size, |
6382 | BTRFS_EXTENT_DATA_KEY); | 6667 | BTRFS_EXTENT_DATA_KEY); |
6383 | if (ret != -EAGAIN) | 6668 | if (ret != -EAGAIN) { |
6669 | err = ret; | ||
6384 | break; | 6670 | break; |
6671 | } | ||
6385 | 6672 | ||
6386 | ret = btrfs_update_inode(trans, root, inode); | 6673 | ret = btrfs_update_inode(trans, root, inode); |
6387 | BUG_ON(ret); | 6674 | if (ret) { |
6675 | err = ret; | ||
6676 | break; | ||
6677 | } | ||
6388 | 6678 | ||
6389 | nr = trans->blocks_used; | 6679 | nr = trans->blocks_used; |
6390 | btrfs_end_transaction(trans, root); | 6680 | btrfs_end_transaction(trans, root); |
@@ -6394,16 +6684,27 @@ static void btrfs_truncate(struct inode *inode) | |||
6394 | 6684 | ||
6395 | if (ret == 0 && inode->i_nlink > 0) { | 6685 | if (ret == 0 && inode->i_nlink > 0) { |
6396 | ret = btrfs_orphan_del(trans, inode); | 6686 | ret = btrfs_orphan_del(trans, inode); |
6397 | BUG_ON(ret); | 6687 | if (ret) |
6688 | err = ret; | ||
6689 | } else if (ret && inode->i_nlink > 0) { | ||
6690 | /* | ||
6691 | * Failed to do the truncate, remove us from the in memory | ||
6692 | * orphan list. | ||
6693 | */ | ||
6694 | ret = btrfs_orphan_del(NULL, inode); | ||
6398 | } | 6695 | } |
6399 | 6696 | ||
6400 | ret = btrfs_update_inode(trans, root, inode); | 6697 | ret = btrfs_update_inode(trans, root, inode); |
6401 | BUG_ON(ret); | 6698 | if (ret && !err) |
6699 | err = ret; | ||
6402 | 6700 | ||
6403 | nr = trans->blocks_used; | 6701 | nr = trans->blocks_used; |
6404 | ret = btrfs_end_transaction_throttle(trans, root); | 6702 | ret = btrfs_end_transaction_throttle(trans, root); |
6405 | BUG_ON(ret); | 6703 | if (ret && !err) |
6704 | err = ret; | ||
6406 | btrfs_btree_balance_dirty(root, nr); | 6705 | btrfs_btree_balance_dirty(root, nr); |
6706 | |||
6707 | return err; | ||
6407 | } | 6708 | } |
6408 | 6709 | ||
6409 | /* | 6710 | /* |
@@ -6470,14 +6771,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6470 | ei->index_cnt = (u64)-1; | 6771 | ei->index_cnt = (u64)-1; |
6471 | ei->last_unlink_trans = 0; | 6772 | ei->last_unlink_trans = 0; |
6472 | 6773 | ||
6473 | spin_lock_init(&ei->accounting_lock); | ||
6474 | atomic_set(&ei->outstanding_extents, 0); | 6774 | atomic_set(&ei->outstanding_extents, 0); |
6475 | ei->reserved_extents = 0; | 6775 | atomic_set(&ei->reserved_extents, 0); |
6476 | 6776 | ||
6477 | ei->ordered_data_close = 0; | 6777 | ei->ordered_data_close = 0; |
6478 | ei->orphan_meta_reserved = 0; | 6778 | ei->orphan_meta_reserved = 0; |
6479 | ei->dummy_inode = 0; | 6779 | ei->dummy_inode = 0; |
6480 | ei->force_compress = 0; | 6780 | ei->force_compress = BTRFS_COMPRESS_NONE; |
6481 | 6781 | ||
6482 | inode = &ei->vfs_inode; | 6782 | inode = &ei->vfs_inode; |
6483 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | 6783 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); |
@@ -6508,7 +6808,7 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6508 | WARN_ON(!list_empty(&inode->i_dentry)); | 6808 | WARN_ON(!list_empty(&inode->i_dentry)); |
6509 | WARN_ON(inode->i_data.nrpages); | 6809 | WARN_ON(inode->i_data.nrpages); |
6510 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6810 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); |
6511 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 6811 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); |
6512 | 6812 | ||
6513 | /* | 6813 | /* |
6514 | * This can happen where we create an inode, but somebody else also | 6814 | * This can happen where we create an inode, but somebody else also |
@@ -6600,6 +6900,8 @@ void btrfs_destroy_cachep(void) | |||
6600 | kmem_cache_destroy(btrfs_transaction_cachep); | 6900 | kmem_cache_destroy(btrfs_transaction_cachep); |
6601 | if (btrfs_path_cachep) | 6901 | if (btrfs_path_cachep) |
6602 | kmem_cache_destroy(btrfs_path_cachep); | 6902 | kmem_cache_destroy(btrfs_path_cachep); |
6903 | if (btrfs_free_space_cachep) | ||
6904 | kmem_cache_destroy(btrfs_free_space_cachep); | ||
6603 | } | 6905 | } |
6604 | 6906 | ||
6605 | int btrfs_init_cachep(void) | 6907 | int btrfs_init_cachep(void) |
@@ -6628,6 +6930,12 @@ int btrfs_init_cachep(void) | |||
6628 | if (!btrfs_path_cachep) | 6930 | if (!btrfs_path_cachep) |
6629 | goto fail; | 6931 | goto fail; |
6630 | 6932 | ||
6933 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache", | ||
6934 | sizeof(struct btrfs_free_space), 0, | ||
6935 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
6936 | if (!btrfs_free_space_cachep) | ||
6937 | goto fail; | ||
6938 | |||
6631 | return 0; | 6939 | return 0; |
6632 | fail: | 6940 | fail: |
6633 | btrfs_destroy_cachep(); | 6941 | btrfs_destroy_cachep(); |
@@ -6646,6 +6954,26 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
6646 | return 0; | 6954 | return 0; |
6647 | } | 6955 | } |
6648 | 6956 | ||
6957 | /* | ||
6958 | * If a file is moved, it will inherit the cow and compression flags of the new | ||
6959 | * directory. | ||
6960 | */ | ||
6961 | static void fixup_inode_flags(struct inode *dir, struct inode *inode) | ||
6962 | { | ||
6963 | struct btrfs_inode *b_dir = BTRFS_I(dir); | ||
6964 | struct btrfs_inode *b_inode = BTRFS_I(inode); | ||
6965 | |||
6966 | if (b_dir->flags & BTRFS_INODE_NODATACOW) | ||
6967 | b_inode->flags |= BTRFS_INODE_NODATACOW; | ||
6968 | else | ||
6969 | b_inode->flags &= ~BTRFS_INODE_NODATACOW; | ||
6970 | |||
6971 | if (b_dir->flags & BTRFS_INODE_COMPRESS) | ||
6972 | b_inode->flags |= BTRFS_INODE_COMPRESS; | ||
6973 | else | ||
6974 | b_inode->flags &= ~BTRFS_INODE_COMPRESS; | ||
6975 | } | ||
6976 | |||
6649 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 6977 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
6650 | struct inode *new_dir, struct dentry *new_dentry) | 6978 | struct inode *new_dir, struct dentry *new_dentry) |
6651 | { | 6979 | { |
@@ -6694,8 +7022,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6694 | * should cover the worst case number of items we'll modify. | 7022 | * should cover the worst case number of items we'll modify. |
6695 | */ | 7023 | */ |
6696 | trans = btrfs_start_transaction(root, 20); | 7024 | trans = btrfs_start_transaction(root, 20); |
6697 | if (IS_ERR(trans)) | 7025 | if (IS_ERR(trans)) { |
6698 | return PTR_ERR(trans); | 7026 | ret = PTR_ERR(trans); |
7027 | goto out_notrans; | ||
7028 | } | ||
6699 | 7029 | ||
6700 | btrfs_set_trans_block_group(trans, new_dir); | 7030 | btrfs_set_trans_block_group(trans, new_dir); |
6701 | 7031 | ||
@@ -6748,11 +7078,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6748 | old_dentry->d_name.name, | 7078 | old_dentry->d_name.name, |
6749 | old_dentry->d_name.len); | 7079 | old_dentry->d_name.len); |
6750 | } else { | 7080 | } else { |
6751 | btrfs_inc_nlink(old_dentry->d_inode); | 7081 | ret = __btrfs_unlink_inode(trans, root, old_dir, |
6752 | ret = btrfs_unlink_inode(trans, root, old_dir, | 7082 | old_dentry->d_inode, |
6753 | old_dentry->d_inode, | 7083 | old_dentry->d_name.name, |
6754 | old_dentry->d_name.name, | 7084 | old_dentry->d_name.len); |
6755 | old_dentry->d_name.len); | 7085 | if (!ret) |
7086 | ret = btrfs_update_inode(trans, root, old_inode); | ||
6756 | } | 7087 | } |
6757 | BUG_ON(ret); | 7088 | BUG_ON(ret); |
6758 | 7089 | ||
@@ -6779,6 +7110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6779 | } | 7110 | } |
6780 | } | 7111 | } |
6781 | 7112 | ||
7113 | fixup_inode_flags(new_dir, old_inode); | ||
7114 | |||
6782 | ret = btrfs_add_link(trans, new_dir, old_inode, | 7115 | ret = btrfs_add_link(trans, new_dir, old_inode, |
6783 | new_dentry->d_name.name, | 7116 | new_dentry->d_name.name, |
6784 | new_dentry->d_name.len, 0, index); | 7117 | new_dentry->d_name.len, 0, index); |
@@ -6792,7 +7125,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6792 | } | 7125 | } |
6793 | out_fail: | 7126 | out_fail: |
6794 | btrfs_end_transaction_throttle(trans, root); | 7127 | btrfs_end_transaction_throttle(trans, root); |
6795 | 7128 | out_notrans: | |
6796 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 7129 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
6797 | up_read(&root->fs_info->subvol_sem); | 7130 | up_read(&root->fs_info->subvol_sem); |
6798 | 7131 | ||
@@ -6944,7 +7277,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6944 | if (IS_ERR(inode)) | 7277 | if (IS_ERR(inode)) |
6945 | goto out_unlock; | 7278 | goto out_unlock; |
6946 | 7279 | ||
6947 | err = btrfs_init_inode_security(trans, inode, dir); | 7280 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
6948 | if (err) { | 7281 | if (err) { |
6949 | drop_inode = 1; | 7282 | drop_inode = 1; |
6950 | goto out_unlock; | 7283 | goto out_unlock; |
@@ -7098,116 +7431,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, | |||
7098 | min_size, actual_len, alloc_hint, trans); | 7431 | min_size, actual_len, alloc_hint, trans); |
7099 | } | 7432 | } |
7100 | 7433 | ||
7101 | static long btrfs_fallocate(struct inode *inode, int mode, | ||
7102 | loff_t offset, loff_t len) | ||
7103 | { | ||
7104 | struct extent_state *cached_state = NULL; | ||
7105 | u64 cur_offset; | ||
7106 | u64 last_byte; | ||
7107 | u64 alloc_start; | ||
7108 | u64 alloc_end; | ||
7109 | u64 alloc_hint = 0; | ||
7110 | u64 locked_end; | ||
7111 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
7112 | struct extent_map *em; | ||
7113 | int ret; | ||
7114 | |||
7115 | alloc_start = offset & ~mask; | ||
7116 | alloc_end = (offset + len + mask) & ~mask; | ||
7117 | |||
7118 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
7119 | if (mode && (mode != FALLOC_FL_KEEP_SIZE)) | ||
7120 | return -EOPNOTSUPP; | ||
7121 | |||
7122 | /* | ||
7123 | * wait for ordered IO before we have any locks. We'll loop again | ||
7124 | * below with the locks held. | ||
7125 | */ | ||
7126 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
7127 | |||
7128 | mutex_lock(&inode->i_mutex); | ||
7129 | ret = inode_newsize_ok(inode, alloc_end); | ||
7130 | if (ret) | ||
7131 | goto out; | ||
7132 | |||
7133 | if (alloc_start > inode->i_size) { | ||
7134 | ret = btrfs_cont_expand(inode, alloc_start); | ||
7135 | if (ret) | ||
7136 | goto out; | ||
7137 | } | ||
7138 | |||
7139 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
7140 | if (ret) | ||
7141 | goto out; | ||
7142 | |||
7143 | locked_end = alloc_end - 1; | ||
7144 | while (1) { | ||
7145 | struct btrfs_ordered_extent *ordered; | ||
7146 | |||
7147 | /* the extent lock is ordered inside the running | ||
7148 | * transaction | ||
7149 | */ | ||
7150 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
7151 | locked_end, 0, &cached_state, GFP_NOFS); | ||
7152 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
7153 | alloc_end - 1); | ||
7154 | if (ordered && | ||
7155 | ordered->file_offset + ordered->len > alloc_start && | ||
7156 | ordered->file_offset < alloc_end) { | ||
7157 | btrfs_put_ordered_extent(ordered); | ||
7158 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
7159 | alloc_start, locked_end, | ||
7160 | &cached_state, GFP_NOFS); | ||
7161 | /* | ||
7162 | * we can't wait on the range with the transaction | ||
7163 | * running or with the extent lock held | ||
7164 | */ | ||
7165 | btrfs_wait_ordered_range(inode, alloc_start, | ||
7166 | alloc_end - alloc_start); | ||
7167 | } else { | ||
7168 | if (ordered) | ||
7169 | btrfs_put_ordered_extent(ordered); | ||
7170 | break; | ||
7171 | } | ||
7172 | } | ||
7173 | |||
7174 | cur_offset = alloc_start; | ||
7175 | while (1) { | ||
7176 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
7177 | alloc_end - cur_offset, 0); | ||
7178 | BUG_ON(IS_ERR(em) || !em); | ||
7179 | last_byte = min(extent_map_end(em), alloc_end); | ||
7180 | last_byte = (last_byte + mask) & ~mask; | ||
7181 | if (em->block_start == EXTENT_MAP_HOLE || | ||
7182 | (cur_offset >= inode->i_size && | ||
7183 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
7184 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
7185 | last_byte - cur_offset, | ||
7186 | 1 << inode->i_blkbits, | ||
7187 | offset + len, | ||
7188 | &alloc_hint); | ||
7189 | if (ret < 0) { | ||
7190 | free_extent_map(em); | ||
7191 | break; | ||
7192 | } | ||
7193 | } | ||
7194 | free_extent_map(em); | ||
7195 | |||
7196 | cur_offset = last_byte; | ||
7197 | if (cur_offset >= alloc_end) { | ||
7198 | ret = 0; | ||
7199 | break; | ||
7200 | } | ||
7201 | } | ||
7202 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
7203 | &cached_state, GFP_NOFS); | ||
7204 | |||
7205 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
7206 | out: | ||
7207 | mutex_unlock(&inode->i_mutex); | ||
7208 | return ret; | ||
7209 | } | ||
7210 | |||
7211 | static int btrfs_set_page_dirty(struct page *page) | 7434 | static int btrfs_set_page_dirty(struct page *page) |
7212 | { | 7435 | { |
7213 | return __set_page_dirty_nobuffers(page); | 7436 | return __set_page_dirty_nobuffers(page); |
@@ -7215,6 +7438,10 @@ static int btrfs_set_page_dirty(struct page *page) | |||
7215 | 7438 | ||
7216 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) | 7439 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) |
7217 | { | 7440 | { |
7441 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
7442 | |||
7443 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | ||
7444 | return -EROFS; | ||
7218 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7445 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
7219 | return -EACCES; | 7446 | return -EACCES; |
7220 | return generic_permission(inode, mask, flags, btrfs_check_acl); | 7447 | return generic_permission(inode, mask, flags, btrfs_check_acl); |
@@ -7286,7 +7513,6 @@ static const struct address_space_operations btrfs_aops = { | |||
7286 | .writepage = btrfs_writepage, | 7513 | .writepage = btrfs_writepage, |
7287 | .writepages = btrfs_writepages, | 7514 | .writepages = btrfs_writepages, |
7288 | .readpages = btrfs_readpages, | 7515 | .readpages = btrfs_readpages, |
7289 | .sync_page = block_sync_page, | ||
7290 | .direct_IO = btrfs_direct_IO, | 7516 | .direct_IO = btrfs_direct_IO, |
7291 | .invalidatepage = btrfs_invalidatepage, | 7517 | .invalidatepage = btrfs_invalidatepage, |
7292 | .releasepage = btrfs_releasepage, | 7518 | .releasepage = btrfs_releasepage, |
@@ -7302,7 +7528,6 @@ static const struct address_space_operations btrfs_symlink_aops = { | |||
7302 | }; | 7528 | }; |
7303 | 7529 | ||
7304 | static const struct inode_operations btrfs_file_inode_operations = { | 7530 | static const struct inode_operations btrfs_file_inode_operations = { |
7305 | .truncate = btrfs_truncate, | ||
7306 | .getattr = btrfs_getattr, | 7531 | .getattr = btrfs_getattr, |
7307 | .setattr = btrfs_setattr, | 7532 | .setattr = btrfs_setattr, |
7308 | .setxattr = btrfs_setxattr, | 7533 | .setxattr = btrfs_setxattr, |
@@ -7310,7 +7535,6 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
7310 | .listxattr = btrfs_listxattr, | 7535 | .listxattr = btrfs_listxattr, |
7311 | .removexattr = btrfs_removexattr, | 7536 | .removexattr = btrfs_removexattr, |
7312 | .permission = btrfs_permission, | 7537 | .permission = btrfs_permission, |
7313 | .fallocate = btrfs_fallocate, | ||
7314 | .fiemap = btrfs_fiemap, | 7538 | .fiemap = btrfs_fiemap, |
7315 | }; | 7539 | }; |
7316 | static const struct inode_operations btrfs_special_inode_operations = { | 7540 | static const struct inode_operations btrfs_special_inode_operations = { |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f87552a1d7ea..ffb48d6c5433 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/xattr.h> | 40 | #include <linux/xattr.h> |
41 | #include <linux/vmalloc.h> | 41 | #include <linux/vmalloc.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/blkdev.h> | ||
43 | #include "compat.h" | 44 | #include "compat.h" |
44 | #include "ctree.h" | 45 | #include "ctree.h" |
45 | #include "disk-io.h" | 46 | #include "disk-io.h" |
@@ -138,6 +139,24 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg) | |||
138 | return 0; | 139 | return 0; |
139 | } | 140 | } |
140 | 141 | ||
142 | static int check_flags(unsigned int flags) | ||
143 | { | ||
144 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | ||
145 | FS_NOATIME_FL | FS_NODUMP_FL | \ | ||
146 | FS_SYNC_FL | FS_DIRSYNC_FL | \ | ||
147 | FS_NOCOMP_FL | FS_COMPR_FL | \ | ||
148 | FS_NOCOW_FL | FS_COW_FL)) | ||
149 | return -EOPNOTSUPP; | ||
150 | |||
151 | if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) | ||
152 | return -EINVAL; | ||
153 | |||
154 | if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL)) | ||
155 | return -EINVAL; | ||
156 | |||
157 | return 0; | ||
158 | } | ||
159 | |||
141 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | 160 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) |
142 | { | 161 | { |
143 | struct inode *inode = file->f_path.dentry->d_inode; | 162 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -147,15 +166,17 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
147 | unsigned int flags, oldflags; | 166 | unsigned int flags, oldflags; |
148 | int ret; | 167 | int ret; |
149 | 168 | ||
169 | if (btrfs_root_readonly(root)) | ||
170 | return -EROFS; | ||
171 | |||
150 | if (copy_from_user(&flags, arg, sizeof(flags))) | 172 | if (copy_from_user(&flags, arg, sizeof(flags))) |
151 | return -EFAULT; | 173 | return -EFAULT; |
152 | 174 | ||
153 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | 175 | ret = check_flags(flags); |
154 | FS_NOATIME_FL | FS_NODUMP_FL | \ | 176 | if (ret) |
155 | FS_SYNC_FL | FS_DIRSYNC_FL)) | 177 | return ret; |
156 | return -EOPNOTSUPP; | ||
157 | 178 | ||
158 | if (!is_owner_or_cap(inode)) | 179 | if (!inode_owner_or_capable(inode)) |
159 | return -EACCES; | 180 | return -EACCES; |
160 | 181 | ||
161 | mutex_lock(&inode->i_mutex); | 182 | mutex_lock(&inode->i_mutex); |
@@ -198,9 +219,25 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
198 | else | 219 | else |
199 | ip->flags &= ~BTRFS_INODE_DIRSYNC; | 220 | ip->flags &= ~BTRFS_INODE_DIRSYNC; |
200 | 221 | ||
222 | /* | ||
223 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS | ||
224 | * flag may be changed automatically if compression code won't make | ||
225 | * things smaller. | ||
226 | */ | ||
227 | if (flags & FS_NOCOMP_FL) { | ||
228 | ip->flags &= ~BTRFS_INODE_COMPRESS; | ||
229 | ip->flags |= BTRFS_INODE_NOCOMPRESS; | ||
230 | } else if (flags & FS_COMPR_FL) { | ||
231 | ip->flags |= BTRFS_INODE_COMPRESS; | ||
232 | ip->flags &= ~BTRFS_INODE_NOCOMPRESS; | ||
233 | } | ||
234 | if (flags & FS_NOCOW_FL) | ||
235 | ip->flags |= BTRFS_INODE_NODATACOW; | ||
236 | else if (flags & FS_COW_FL) | ||
237 | ip->flags &= ~BTRFS_INODE_NODATACOW; | ||
201 | 238 | ||
202 | trans = btrfs_join_transaction(root, 1); | 239 | trans = btrfs_join_transaction(root, 1); |
203 | BUG_ON(!trans); | 240 | BUG_ON(IS_ERR(trans)); |
204 | 241 | ||
205 | ret = btrfs_update_inode(trans, root, inode); | 242 | ret = btrfs_update_inode(trans, root, inode); |
206 | BUG_ON(ret); | 243 | BUG_ON(ret); |
@@ -210,9 +247,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
210 | btrfs_end_transaction(trans, root); | 247 | btrfs_end_transaction(trans, root); |
211 | 248 | ||
212 | mnt_drop_write(file->f_path.mnt); | 249 | mnt_drop_write(file->f_path.mnt); |
250 | |||
251 | ret = 0; | ||
213 | out_unlock: | 252 | out_unlock: |
214 | mutex_unlock(&inode->i_mutex); | 253 | mutex_unlock(&inode->i_mutex); |
215 | return 0; | 254 | return ret; |
216 | } | 255 | } |
217 | 256 | ||
218 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | 257 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) |
@@ -222,6 +261,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | |||
222 | return put_user(inode->i_generation, arg); | 261 | return put_user(inode->i_generation, arg); |
223 | } | 262 | } |
224 | 263 | ||
264 | static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | ||
265 | { | ||
266 | struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; | ||
267 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
268 | struct btrfs_device *device; | ||
269 | struct request_queue *q; | ||
270 | struct fstrim_range range; | ||
271 | u64 minlen = ULLONG_MAX; | ||
272 | u64 num_devices = 0; | ||
273 | int ret; | ||
274 | |||
275 | if (!capable(CAP_SYS_ADMIN)) | ||
276 | return -EPERM; | ||
277 | |||
278 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
279 | list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { | ||
280 | if (!device->bdev) | ||
281 | continue; | ||
282 | q = bdev_get_queue(device->bdev); | ||
283 | if (blk_queue_discard(q)) { | ||
284 | num_devices++; | ||
285 | minlen = min((u64)q->limits.discard_granularity, | ||
286 | minlen); | ||
287 | } | ||
288 | } | ||
289 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
290 | if (!num_devices) | ||
291 | return -EOPNOTSUPP; | ||
292 | |||
293 | if (copy_from_user(&range, arg, sizeof(range))) | ||
294 | return -EFAULT; | ||
295 | |||
296 | range.minlen = max(range.minlen, minlen); | ||
297 | ret = btrfs_trim_fs(root, &range); | ||
298 | if (ret < 0) | ||
299 | return ret; | ||
300 | |||
301 | if (copy_to_user(arg, &range, sizeof(range))) | ||
302 | return -EFAULT; | ||
303 | |||
304 | return 0; | ||
305 | } | ||
306 | |||
225 | static noinline int create_subvol(struct btrfs_root *root, | 307 | static noinline int create_subvol(struct btrfs_root *root, |
226 | struct dentry *dentry, | 308 | struct dentry *dentry, |
227 | char *name, int namelen, | 309 | char *name, int namelen, |
@@ -291,6 +373,10 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
291 | inode_item->nbytes = cpu_to_le64(root->leafsize); | 373 | inode_item->nbytes = cpu_to_le64(root->leafsize); |
292 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); | 374 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); |
293 | 375 | ||
376 | root_item.flags = 0; | ||
377 | root_item.byte_limit = 0; | ||
378 | inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); | ||
379 | |||
294 | btrfs_set_root_bytenr(&root_item, leaf->start); | 380 | btrfs_set_root_bytenr(&root_item, leaf->start); |
295 | btrfs_set_root_generation(&root_item, trans->transid); | 381 | btrfs_set_root_generation(&root_item, trans->transid); |
296 | btrfs_set_root_level(&root_item, 0); | 382 | btrfs_set_root_level(&root_item, 0); |
@@ -360,7 +446,8 @@ fail: | |||
360 | } | 446 | } |
361 | 447 | ||
362 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 448 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
363 | char *name, int namelen, u64 *async_transid) | 449 | char *name, int namelen, u64 *async_transid, |
450 | bool readonly) | ||
364 | { | 451 | { |
365 | struct inode *inode; | 452 | struct inode *inode; |
366 | struct dentry *parent; | 453 | struct dentry *parent; |
@@ -378,6 +465,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
378 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 465 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
379 | pending_snapshot->dentry = dentry; | 466 | pending_snapshot->dentry = dentry; |
380 | pending_snapshot->root = root; | 467 | pending_snapshot->root = root; |
468 | pending_snapshot->readonly = readonly; | ||
381 | 469 | ||
382 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 470 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); |
383 | if (IS_ERR(trans)) { | 471 | if (IS_ERR(trans)) { |
@@ -404,7 +492,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
404 | if (ret) | 492 | if (ret) |
405 | goto fail; | 493 | goto fail; |
406 | 494 | ||
407 | btrfs_orphan_cleanup(pending_snapshot->snap); | 495 | ret = btrfs_orphan_cleanup(pending_snapshot->snap); |
496 | if (ret) | ||
497 | goto fail; | ||
408 | 498 | ||
409 | parent = dget_parent(dentry); | 499 | parent = dget_parent(dentry); |
410 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | 500 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); |
@@ -509,7 +599,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
509 | static noinline int btrfs_mksubvol(struct path *parent, | 599 | static noinline int btrfs_mksubvol(struct path *parent, |
510 | char *name, int namelen, | 600 | char *name, int namelen, |
511 | struct btrfs_root *snap_src, | 601 | struct btrfs_root *snap_src, |
512 | u64 *async_transid) | 602 | u64 *async_transid, bool readonly) |
513 | { | 603 | { |
514 | struct inode *dir = parent->dentry->d_inode; | 604 | struct inode *dir = parent->dentry->d_inode; |
515 | struct dentry *dentry; | 605 | struct dentry *dentry; |
@@ -541,7 +631,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
541 | 631 | ||
542 | if (snap_src) { | 632 | if (snap_src) { |
543 | error = create_snapshot(snap_src, dentry, | 633 | error = create_snapshot(snap_src, dentry, |
544 | name, namelen, async_transid); | 634 | name, namelen, async_transid, readonly); |
545 | } else { | 635 | } else { |
546 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 636 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
547 | name, namelen, async_transid); | 637 | name, namelen, async_transid); |
@@ -638,9 +728,11 @@ static int btrfs_defrag_file(struct file *file, | |||
638 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 728 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
639 | struct btrfs_ordered_extent *ordered; | 729 | struct btrfs_ordered_extent *ordered; |
640 | struct page *page; | 730 | struct page *page; |
731 | struct btrfs_super_block *disk_super; | ||
641 | unsigned long last_index; | 732 | unsigned long last_index; |
642 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | 733 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; |
643 | unsigned long total_read = 0; | 734 | unsigned long total_read = 0; |
735 | u64 features; | ||
644 | u64 page_start; | 736 | u64 page_start; |
645 | u64 page_end; | 737 | u64 page_end; |
646 | u64 last_len = 0; | 738 | u64 last_len = 0; |
@@ -648,6 +740,14 @@ static int btrfs_defrag_file(struct file *file, | |||
648 | u64 defrag_end = 0; | 740 | u64 defrag_end = 0; |
649 | unsigned long i; | 741 | unsigned long i; |
650 | int ret; | 742 | int ret; |
743 | int compress_type = BTRFS_COMPRESS_ZLIB; | ||
744 | |||
745 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { | ||
746 | if (range->compress_type > BTRFS_COMPRESS_TYPES) | ||
747 | return -EINVAL; | ||
748 | if (range->compress_type) | ||
749 | compress_type = range->compress_type; | ||
750 | } | ||
651 | 751 | ||
652 | if (inode->i_size == 0) | 752 | if (inode->i_size == 0) |
653 | return 0; | 753 | return 0; |
@@ -683,7 +783,7 @@ static int btrfs_defrag_file(struct file *file, | |||
683 | total_read++; | 783 | total_read++; |
684 | mutex_lock(&inode->i_mutex); | 784 | mutex_lock(&inode->i_mutex); |
685 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 785 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
686 | BTRFS_I(inode)->force_compress = 1; | 786 | BTRFS_I(inode)->force_compress = compress_type; |
687 | 787 | ||
688 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 788 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
689 | if (ret) | 789 | if (ret) |
@@ -781,10 +881,17 @@ loop_unlock: | |||
781 | atomic_dec(&root->fs_info->async_submit_draining); | 881 | atomic_dec(&root->fs_info->async_submit_draining); |
782 | 882 | ||
783 | mutex_lock(&inode->i_mutex); | 883 | mutex_lock(&inode->i_mutex); |
784 | BTRFS_I(inode)->force_compress = 0; | 884 | BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; |
785 | mutex_unlock(&inode->i_mutex); | 885 | mutex_unlock(&inode->i_mutex); |
786 | } | 886 | } |
787 | 887 | ||
888 | disk_super = &root->fs_info->super_copy; | ||
889 | features = btrfs_super_incompat_flags(disk_super); | ||
890 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | ||
891 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | ||
892 | btrfs_set_super_incompat_flags(disk_super, features); | ||
893 | } | ||
894 | |||
788 | return 0; | 895 | return 0; |
789 | 896 | ||
790 | err_reservations: | 897 | err_reservations: |
@@ -885,6 +992,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
885 | 992 | ||
886 | if (new_size > old_size) { | 993 | if (new_size > old_size) { |
887 | trans = btrfs_start_transaction(root, 0); | 994 | trans = btrfs_start_transaction(root, 0); |
995 | if (IS_ERR(trans)) { | ||
996 | ret = PTR_ERR(trans); | ||
997 | goto out_unlock; | ||
998 | } | ||
888 | ret = btrfs_grow_device(trans, device, new_size); | 999 | ret = btrfs_grow_device(trans, device, new_size); |
889 | btrfs_commit_transaction(trans, root); | 1000 | btrfs_commit_transaction(trans, root); |
890 | } else { | 1001 | } else { |
@@ -901,7 +1012,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
901 | char *name, | 1012 | char *name, |
902 | unsigned long fd, | 1013 | unsigned long fd, |
903 | int subvol, | 1014 | int subvol, |
904 | u64 *transid) | 1015 | u64 *transid, |
1016 | bool readonly) | ||
905 | { | 1017 | { |
906 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 1018 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
907 | struct file *src_file; | 1019 | struct file *src_file; |
@@ -919,7 +1031,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
919 | 1031 | ||
920 | if (subvol) { | 1032 | if (subvol) { |
921 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1033 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
922 | NULL, transid); | 1034 | NULL, transid, readonly); |
923 | } else { | 1035 | } else { |
924 | struct inode *src_inode; | 1036 | struct inode *src_inode; |
925 | src_file = fget(fd); | 1037 | src_file = fget(fd); |
@@ -938,7 +1050,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
938 | } | 1050 | } |
939 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1051 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
940 | BTRFS_I(src_inode)->root, | 1052 | BTRFS_I(src_inode)->root, |
941 | transid); | 1053 | transid, readonly); |
942 | fput(src_file); | 1054 | fput(src_file); |
943 | } | 1055 | } |
944 | out: | 1056 | out: |
@@ -946,61 +1058,145 @@ out: | |||
946 | } | 1058 | } |
947 | 1059 | ||
948 | static noinline int btrfs_ioctl_snap_create(struct file *file, | 1060 | static noinline int btrfs_ioctl_snap_create(struct file *file, |
949 | void __user *arg, int subvol, | 1061 | void __user *arg, int subvol) |
950 | int v2) | ||
951 | { | 1062 | { |
952 | struct btrfs_ioctl_vol_args *vol_args = NULL; | 1063 | struct btrfs_ioctl_vol_args *vol_args; |
953 | struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; | ||
954 | char *name; | ||
955 | u64 fd; | ||
956 | int ret; | 1064 | int ret; |
957 | 1065 | ||
958 | if (v2) { | 1066 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
959 | u64 transid = 0; | 1067 | if (IS_ERR(vol_args)) |
960 | u64 *ptr = NULL; | 1068 | return PTR_ERR(vol_args); |
1069 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
961 | 1070 | ||
962 | vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2)); | 1071 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
963 | if (IS_ERR(vol_args_v2)) | 1072 | vol_args->fd, subvol, |
964 | return PTR_ERR(vol_args_v2); | 1073 | NULL, false); |
965 | 1074 | ||
966 | if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { | 1075 | kfree(vol_args); |
967 | ret = -EINVAL; | 1076 | return ret; |
968 | goto out; | 1077 | } |
969 | } | ||
970 | |||
971 | name = vol_args_v2->name; | ||
972 | fd = vol_args_v2->fd; | ||
973 | vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
974 | 1078 | ||
975 | if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC) | 1079 | static noinline int btrfs_ioctl_snap_create_v2(struct file *file, |
976 | ptr = &transid; | 1080 | void __user *arg, int subvol) |
1081 | { | ||
1082 | struct btrfs_ioctl_vol_args_v2 *vol_args; | ||
1083 | int ret; | ||
1084 | u64 transid = 0; | ||
1085 | u64 *ptr = NULL; | ||
1086 | bool readonly = false; | ||
977 | 1087 | ||
978 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | 1088 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
979 | subvol, ptr); | 1089 | if (IS_ERR(vol_args)) |
1090 | return PTR_ERR(vol_args); | ||
1091 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
980 | 1092 | ||
981 | if (ret == 0 && ptr && | 1093 | if (vol_args->flags & |
982 | copy_to_user(arg + | 1094 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { |
983 | offsetof(struct btrfs_ioctl_vol_args_v2, | 1095 | ret = -EOPNOTSUPP; |
984 | transid), ptr, sizeof(*ptr))) | 1096 | goto out; |
985 | ret = -EFAULT; | ||
986 | } else { | ||
987 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
988 | if (IS_ERR(vol_args)) | ||
989 | return PTR_ERR(vol_args); | ||
990 | name = vol_args->name; | ||
991 | fd = vol_args->fd; | ||
992 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
993 | |||
994 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | ||
995 | subvol, NULL); | ||
996 | } | 1097 | } |
1098 | |||
1099 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
1100 | ptr = &transid; | ||
1101 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | ||
1102 | readonly = true; | ||
1103 | |||
1104 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
1105 | vol_args->fd, subvol, | ||
1106 | ptr, readonly); | ||
1107 | |||
1108 | if (ret == 0 && ptr && | ||
1109 | copy_to_user(arg + | ||
1110 | offsetof(struct btrfs_ioctl_vol_args_v2, | ||
1111 | transid), ptr, sizeof(*ptr))) | ||
1112 | ret = -EFAULT; | ||
997 | out: | 1113 | out: |
998 | kfree(vol_args); | 1114 | kfree(vol_args); |
999 | kfree(vol_args_v2); | 1115 | return ret; |
1116 | } | ||
1117 | |||
1118 | static noinline int btrfs_ioctl_subvol_getflags(struct file *file, | ||
1119 | void __user *arg) | ||
1120 | { | ||
1121 | struct inode *inode = fdentry(file)->d_inode; | ||
1122 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1123 | int ret = 0; | ||
1124 | u64 flags = 0; | ||
1125 | |||
1126 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1127 | return -EINVAL; | ||
1128 | |||
1129 | down_read(&root->fs_info->subvol_sem); | ||
1130 | if (btrfs_root_readonly(root)) | ||
1131 | flags |= BTRFS_SUBVOL_RDONLY; | ||
1132 | up_read(&root->fs_info->subvol_sem); | ||
1133 | |||
1134 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
1135 | ret = -EFAULT; | ||
1000 | 1136 | ||
1001 | return ret; | 1137 | return ret; |
1002 | } | 1138 | } |
1003 | 1139 | ||
1140 | static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | ||
1141 | void __user *arg) | ||
1142 | { | ||
1143 | struct inode *inode = fdentry(file)->d_inode; | ||
1144 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1145 | struct btrfs_trans_handle *trans; | ||
1146 | u64 root_flags; | ||
1147 | u64 flags; | ||
1148 | int ret = 0; | ||
1149 | |||
1150 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
1151 | return -EROFS; | ||
1152 | |||
1153 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1154 | return -EINVAL; | ||
1155 | |||
1156 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
1157 | return -EFAULT; | ||
1158 | |||
1159 | if (flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
1160 | return -EINVAL; | ||
1161 | |||
1162 | if (flags & ~BTRFS_SUBVOL_RDONLY) | ||
1163 | return -EOPNOTSUPP; | ||
1164 | |||
1165 | if (!inode_owner_or_capable(inode)) | ||
1166 | return -EACCES; | ||
1167 | |||
1168 | down_write(&root->fs_info->subvol_sem); | ||
1169 | |||
1170 | /* nothing to do */ | ||
1171 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) | ||
1172 | goto out; | ||
1173 | |||
1174 | root_flags = btrfs_root_flags(&root->root_item); | ||
1175 | if (flags & BTRFS_SUBVOL_RDONLY) | ||
1176 | btrfs_set_root_flags(&root->root_item, | ||
1177 | root_flags | BTRFS_ROOT_SUBVOL_RDONLY); | ||
1178 | else | ||
1179 | btrfs_set_root_flags(&root->root_item, | ||
1180 | root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); | ||
1181 | |||
1182 | trans = btrfs_start_transaction(root, 1); | ||
1183 | if (IS_ERR(trans)) { | ||
1184 | ret = PTR_ERR(trans); | ||
1185 | goto out_reset; | ||
1186 | } | ||
1187 | |||
1188 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
1189 | &root->root_key, &root->root_item); | ||
1190 | |||
1191 | btrfs_commit_transaction(trans, root); | ||
1192 | out_reset: | ||
1193 | if (ret) | ||
1194 | btrfs_set_root_flags(&root->root_item, root_flags); | ||
1195 | out: | ||
1196 | up_write(&root->fs_info->subvol_sem); | ||
1197 | return ret; | ||
1198 | } | ||
1199 | |||
1004 | /* | 1200 | /* |
1005 | * helper to check if the subvolume references other subvolumes | 1201 | * helper to check if the subvolume references other subvolumes |
1006 | */ | 1202 | */ |
@@ -1509,6 +1705,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1509 | struct btrfs_ioctl_defrag_range_args *range; | 1705 | struct btrfs_ioctl_defrag_range_args *range; |
1510 | int ret; | 1706 | int ret; |
1511 | 1707 | ||
1708 | if (btrfs_root_readonly(root)) | ||
1709 | return -EROFS; | ||
1710 | |||
1512 | ret = mnt_want_write(file->f_path.mnt); | 1711 | ret = mnt_want_write(file->f_path.mnt); |
1513 | if (ret) | 1712 | if (ret) |
1514 | return ret; | 1713 | return ret; |
@@ -1637,6 +1836,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1637 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) | 1836 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
1638 | return -EINVAL; | 1837 | return -EINVAL; |
1639 | 1838 | ||
1839 | if (btrfs_root_readonly(root)) | ||
1840 | return -EROFS; | ||
1841 | |||
1640 | ret = mnt_want_write(file->f_path.mnt); | 1842 | ret = mnt_want_write(file->f_path.mnt); |
1641 | if (ret) | 1843 | if (ret) |
1642 | return ret; | 1844 | return ret; |
@@ -1788,7 +1990,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1788 | 1990 | ||
1789 | memcpy(&new_key, &key, sizeof(new_key)); | 1991 | memcpy(&new_key, &key, sizeof(new_key)); |
1790 | new_key.objectid = inode->i_ino; | 1992 | new_key.objectid = inode->i_ino; |
1791 | new_key.offset = key.offset + destoff - off; | 1993 | if (off <= key.offset) |
1994 | new_key.offset = key.offset + destoff - off; | ||
1995 | else | ||
1996 | new_key.offset = destoff; | ||
1792 | 1997 | ||
1793 | trans = btrfs_start_transaction(root, 1); | 1998 | trans = btrfs_start_transaction(root, 1); |
1794 | if (IS_ERR(trans)) { | 1999 | if (IS_ERR(trans)) { |
@@ -1958,6 +2163,10 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1958 | if (file->private_data) | 2163 | if (file->private_data) |
1959 | goto out; | 2164 | goto out; |
1960 | 2165 | ||
2166 | ret = -EROFS; | ||
2167 | if (btrfs_root_readonly(root)) | ||
2168 | goto out; | ||
2169 | |||
1961 | ret = mnt_want_write(file->f_path.mnt); | 2170 | ret = mnt_want_write(file->f_path.mnt); |
1962 | if (ret) | 2171 | if (ret) |
1963 | goto out; | 2172 | goto out; |
@@ -1968,7 +2177,7 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1968 | 2177 | ||
1969 | ret = -ENOMEM; | 2178 | ret = -ENOMEM; |
1970 | trans = btrfs_start_ioctl_transaction(root, 0); | 2179 | trans = btrfs_start_ioctl_transaction(root, 0); |
1971 | if (!trans) | 2180 | if (IS_ERR(trans)) |
1972 | goto out_drop; | 2181 | goto out_drop; |
1973 | 2182 | ||
1974 | file->private_data = trans; | 2183 | file->private_data = trans; |
@@ -2024,9 +2233,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
2024 | path->leave_spinning = 1; | 2233 | path->leave_spinning = 1; |
2025 | 2234 | ||
2026 | trans = btrfs_start_transaction(root, 1); | 2235 | trans = btrfs_start_transaction(root, 1); |
2027 | if (!trans) { | 2236 | if (IS_ERR(trans)) { |
2028 | btrfs_free_path(path); | 2237 | btrfs_free_path(path); |
2029 | return -ENOMEM; | 2238 | return PTR_ERR(trans); |
2030 | } | 2239 | } |
2031 | 2240 | ||
2032 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 2241 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); |
@@ -2078,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2078 | struct btrfs_ioctl_space_info space; | 2287 | struct btrfs_ioctl_space_info space; |
2079 | struct btrfs_ioctl_space_info *dest; | 2288 | struct btrfs_ioctl_space_info *dest; |
2080 | struct btrfs_ioctl_space_info *dest_orig; | 2289 | struct btrfs_ioctl_space_info *dest_orig; |
2081 | struct btrfs_ioctl_space_info *user_dest; | 2290 | struct btrfs_ioctl_space_info __user *user_dest; |
2082 | struct btrfs_space_info *info; | 2291 | struct btrfs_space_info *info; |
2083 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | 2292 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, |
2084 | BTRFS_BLOCK_GROUP_SYSTEM, | 2293 | BTRFS_BLOCK_GROUP_SYSTEM, |
@@ -2087,7 +2296,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2087 | int num_types = 4; | 2296 | int num_types = 4; |
2088 | int alloc_size; | 2297 | int alloc_size; |
2089 | int ret = 0; | 2298 | int ret = 0; |
2090 | int slot_count = 0; | 2299 | u64 slot_count = 0; |
2091 | int i, c; | 2300 | int i, c; |
2092 | 2301 | ||
2093 | if (copy_from_user(&space_args, | 2302 | if (copy_from_user(&space_args, |
@@ -2126,7 +2335,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2126 | goto out; | 2335 | goto out; |
2127 | } | 2336 | } |
2128 | 2337 | ||
2129 | slot_count = min_t(int, space_args.space_slots, slot_count); | 2338 | slot_count = min_t(u64, space_args.space_slots, slot_count); |
2130 | 2339 | ||
2131 | alloc_size = sizeof(*dest) * slot_count; | 2340 | alloc_size = sizeof(*dest) * slot_count; |
2132 | 2341 | ||
@@ -2146,6 +2355,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2146 | for (i = 0; i < num_types; i++) { | 2355 | for (i = 0; i < num_types; i++) { |
2147 | struct btrfs_space_info *tmp; | 2356 | struct btrfs_space_info *tmp; |
2148 | 2357 | ||
2358 | if (!slot_count) | ||
2359 | break; | ||
2360 | |||
2149 | info = NULL; | 2361 | info = NULL; |
2150 | rcu_read_lock(); | 2362 | rcu_read_lock(); |
2151 | list_for_each_entry_rcu(tmp, &root->fs_info->space_info, | 2363 | list_for_each_entry_rcu(tmp, &root->fs_info->space_info, |
@@ -2167,7 +2379,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2167 | memcpy(dest, &space, sizeof(space)); | 2379 | memcpy(dest, &space, sizeof(space)); |
2168 | dest++; | 2380 | dest++; |
2169 | space_args.total_spaces++; | 2381 | space_args.total_spaces++; |
2382 | slot_count--; | ||
2170 | } | 2383 | } |
2384 | if (!slot_count) | ||
2385 | break; | ||
2171 | } | 2386 | } |
2172 | up_read(&info->groups_sem); | 2387 | up_read(&info->groups_sem); |
2173 | } | 2388 | } |
@@ -2218,10 +2433,17 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp | |||
2218 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | 2433 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; |
2219 | struct btrfs_trans_handle *trans; | 2434 | struct btrfs_trans_handle *trans; |
2220 | u64 transid; | 2435 | u64 transid; |
2436 | int ret; | ||
2221 | 2437 | ||
2222 | trans = btrfs_start_transaction(root, 0); | 2438 | trans = btrfs_start_transaction(root, 0); |
2439 | if (IS_ERR(trans)) | ||
2440 | return PTR_ERR(trans); | ||
2223 | transid = trans->transid; | 2441 | transid = trans->transid; |
2224 | btrfs_commit_transaction_async(trans, root, 0); | 2442 | ret = btrfs_commit_transaction_async(trans, root, 0); |
2443 | if (ret) { | ||
2444 | btrfs_end_transaction(trans, root); | ||
2445 | return ret; | ||
2446 | } | ||
2225 | 2447 | ||
2226 | if (argp) | 2448 | if (argp) |
2227 | if (copy_to_user(argp, &transid, sizeof(transid))) | 2449 | if (copy_to_user(argp, &transid, sizeof(transid))) |
@@ -2256,14 +2478,20 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2256 | return btrfs_ioctl_setflags(file, argp); | 2478 | return btrfs_ioctl_setflags(file, argp); |
2257 | case FS_IOC_GETVERSION: | 2479 | case FS_IOC_GETVERSION: |
2258 | return btrfs_ioctl_getversion(file, argp); | 2480 | return btrfs_ioctl_getversion(file, argp); |
2481 | case FITRIM: | ||
2482 | return btrfs_ioctl_fitrim(file, argp); | ||
2259 | case BTRFS_IOC_SNAP_CREATE: | 2483 | case BTRFS_IOC_SNAP_CREATE: |
2260 | return btrfs_ioctl_snap_create(file, argp, 0, 0); | 2484 | return btrfs_ioctl_snap_create(file, argp, 0); |
2261 | case BTRFS_IOC_SNAP_CREATE_V2: | 2485 | case BTRFS_IOC_SNAP_CREATE_V2: |
2262 | return btrfs_ioctl_snap_create(file, argp, 0, 1); | 2486 | return btrfs_ioctl_snap_create_v2(file, argp, 0); |
2263 | case BTRFS_IOC_SUBVOL_CREATE: | 2487 | case BTRFS_IOC_SUBVOL_CREATE: |
2264 | return btrfs_ioctl_snap_create(file, argp, 1, 0); | 2488 | return btrfs_ioctl_snap_create(file, argp, 1); |
2265 | case BTRFS_IOC_SNAP_DESTROY: | 2489 | case BTRFS_IOC_SNAP_DESTROY: |
2266 | return btrfs_ioctl_snap_destroy(file, argp); | 2490 | return btrfs_ioctl_snap_destroy(file, argp); |
2491 | case BTRFS_IOC_SUBVOL_GETFLAGS: | ||
2492 | return btrfs_ioctl_subvol_getflags(file, argp); | ||
2493 | case BTRFS_IOC_SUBVOL_SETFLAGS: | ||
2494 | return btrfs_ioctl_subvol_setflags(file, argp); | ||
2267 | case BTRFS_IOC_DEFAULT_SUBVOL: | 2495 | case BTRFS_IOC_DEFAULT_SUBVOL: |
2268 | return btrfs_ioctl_default_subvol(file, argp); | 2496 | return btrfs_ioctl_default_subvol(file, argp); |
2269 | case BTRFS_IOC_DEFRAG: | 2497 | case BTRFS_IOC_DEFRAG: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index c344d12c646b..8fb382167b13 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args { | |||
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | ||
34 | 35 | ||
35 | #define BTRFS_SUBVOL_NAME_MAX 4039 | 36 | #define BTRFS_SUBVOL_NAME_MAX 4039 |
36 | struct btrfs_ioctl_vol_args_v2 { | 37 | struct btrfs_ioctl_vol_args_v2 { |
@@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args { | |||
133 | */ | 134 | */ |
134 | __u32 extent_thresh; | 135 | __u32 extent_thresh; |
135 | 136 | ||
137 | /* | ||
138 | * which compression method to use if turning on compression | ||
139 | * for this defrag operation. If unspecified, zlib will | ||
140 | * be used | ||
141 | */ | ||
142 | __u32 compress_type; | ||
143 | |||
136 | /* spare for later */ | 144 | /* spare for later */ |
137 | __u32 unused[5]; | 145 | __u32 unused[4]; |
138 | }; | 146 | }; |
139 | 147 | ||
140 | struct btrfs_ioctl_space_info { | 148 | struct btrfs_ioctl_space_info { |
@@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args { | |||
193 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | 201 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) |
194 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ | 202 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ |
195 | struct btrfs_ioctl_vol_args_v2) | 203 | struct btrfs_ioctl_vol_args_v2) |
204 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) | ||
205 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | ||
196 | #endif | 206 | #endif |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..a178f5ebea78 --- /dev/null +++ b/fs/btrfs/lzo.c | |||
@@ -0,0 +1,427 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/pagemap.h> | ||
26 | #include <linux/bio.h> | ||
27 | #include <linux/lzo.h> | ||
28 | #include "compression.h" | ||
29 | |||
30 | #define LZO_LEN 4 | ||
31 | |||
32 | struct workspace { | ||
33 | void *mem; | ||
34 | void *buf; /* where compressed data goes */ | ||
35 | void *cbuf; /* where decompressed data goes */ | ||
36 | struct list_head list; | ||
37 | }; | ||
38 | |||
39 | static void lzo_free_workspace(struct list_head *ws) | ||
40 | { | ||
41 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
42 | |||
43 | vfree(workspace->buf); | ||
44 | vfree(workspace->cbuf); | ||
45 | vfree(workspace->mem); | ||
46 | kfree(workspace); | ||
47 | } | ||
48 | |||
49 | static struct list_head *lzo_alloc_workspace(void) | ||
50 | { | ||
51 | struct workspace *workspace; | ||
52 | |||
53 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
54 | if (!workspace) | ||
55 | return ERR_PTR(-ENOMEM); | ||
56 | |||
57 | workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); | ||
58 | workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
59 | workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
60 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) | ||
61 | goto fail; | ||
62 | |||
63 | INIT_LIST_HEAD(&workspace->list); | ||
64 | |||
65 | return &workspace->list; | ||
66 | fail: | ||
67 | lzo_free_workspace(&workspace->list); | ||
68 | return ERR_PTR(-ENOMEM); | ||
69 | } | ||
70 | |||
71 | static inline void write_compress_length(char *buf, size_t len) | ||
72 | { | ||
73 | __le32 dlen; | ||
74 | |||
75 | dlen = cpu_to_le32(len); | ||
76 | memcpy(buf, &dlen, LZO_LEN); | ||
77 | } | ||
78 | |||
79 | static inline size_t read_compress_length(char *buf) | ||
80 | { | ||
81 | __le32 dlen; | ||
82 | |||
83 | memcpy(&dlen, buf, LZO_LEN); | ||
84 | return le32_to_cpu(dlen); | ||
85 | } | ||
86 | |||
87 | static int lzo_compress_pages(struct list_head *ws, | ||
88 | struct address_space *mapping, | ||
89 | u64 start, unsigned long len, | ||
90 | struct page **pages, | ||
91 | unsigned long nr_dest_pages, | ||
92 | unsigned long *out_pages, | ||
93 | unsigned long *total_in, | ||
94 | unsigned long *total_out, | ||
95 | unsigned long max_out) | ||
96 | { | ||
97 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
98 | int ret = 0; | ||
99 | char *data_in; | ||
100 | char *cpage_out; | ||
101 | int nr_pages = 0; | ||
102 | struct page *in_page = NULL; | ||
103 | struct page *out_page = NULL; | ||
104 | unsigned long bytes_left; | ||
105 | |||
106 | size_t in_len; | ||
107 | size_t out_len; | ||
108 | char *buf; | ||
109 | unsigned long tot_in = 0; | ||
110 | unsigned long tot_out = 0; | ||
111 | unsigned long pg_bytes_left; | ||
112 | unsigned long out_offset; | ||
113 | unsigned long bytes; | ||
114 | |||
115 | *out_pages = 0; | ||
116 | *total_out = 0; | ||
117 | *total_in = 0; | ||
118 | |||
119 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
120 | data_in = kmap(in_page); | ||
121 | |||
122 | /* | ||
123 | * store the size of all chunks of compressed data in | ||
124 | * the first 4 bytes | ||
125 | */ | ||
126 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
127 | if (out_page == NULL) { | ||
128 | ret = -ENOMEM; | ||
129 | goto out; | ||
130 | } | ||
131 | cpage_out = kmap(out_page); | ||
132 | out_offset = LZO_LEN; | ||
133 | tot_out = LZO_LEN; | ||
134 | pages[0] = out_page; | ||
135 | nr_pages = 1; | ||
136 | pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
137 | |||
138 | /* compress at most one page of data each time */ | ||
139 | in_len = min(len, PAGE_CACHE_SIZE); | ||
140 | while (tot_in < len) { | ||
141 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, | ||
142 | &out_len, workspace->mem); | ||
143 | if (ret != LZO_E_OK) { | ||
144 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
145 | ret); | ||
146 | ret = -1; | ||
147 | goto out; | ||
148 | } | ||
149 | |||
150 | /* store the size of this chunk of compressed data */ | ||
151 | write_compress_length(cpage_out + out_offset, out_len); | ||
152 | tot_out += LZO_LEN; | ||
153 | out_offset += LZO_LEN; | ||
154 | pg_bytes_left -= LZO_LEN; | ||
155 | |||
156 | tot_in += in_len; | ||
157 | tot_out += out_len; | ||
158 | |||
159 | /* copy bytes from the working buffer into the pages */ | ||
160 | buf = workspace->cbuf; | ||
161 | while (out_len) { | ||
162 | bytes = min_t(unsigned long, pg_bytes_left, out_len); | ||
163 | |||
164 | memcpy(cpage_out + out_offset, buf, bytes); | ||
165 | |||
166 | out_len -= bytes; | ||
167 | pg_bytes_left -= bytes; | ||
168 | buf += bytes; | ||
169 | out_offset += bytes; | ||
170 | |||
171 | /* | ||
172 | * we need another page for writing out. | ||
173 | * | ||
174 | * Note if there's less than 4 bytes left, we just | ||
175 | * skip to a new page. | ||
176 | */ | ||
177 | if ((out_len == 0 && pg_bytes_left < LZO_LEN) || | ||
178 | pg_bytes_left == 0) { | ||
179 | if (pg_bytes_left) { | ||
180 | memset(cpage_out + out_offset, 0, | ||
181 | pg_bytes_left); | ||
182 | tot_out += pg_bytes_left; | ||
183 | } | ||
184 | |||
185 | /* we're done, don't allocate new page */ | ||
186 | if (out_len == 0 && tot_in >= len) | ||
187 | break; | ||
188 | |||
189 | kunmap(out_page); | ||
190 | if (nr_pages == nr_dest_pages) { | ||
191 | out_page = NULL; | ||
192 | ret = -1; | ||
193 | goto out; | ||
194 | } | ||
195 | |||
196 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
197 | if (out_page == NULL) { | ||
198 | ret = -ENOMEM; | ||
199 | goto out; | ||
200 | } | ||
201 | cpage_out = kmap(out_page); | ||
202 | pages[nr_pages++] = out_page; | ||
203 | |||
204 | pg_bytes_left = PAGE_CACHE_SIZE; | ||
205 | out_offset = 0; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* we're making it bigger, give up */ | ||
210 | if (tot_in > 8192 && tot_in < tot_out) | ||
211 | goto out; | ||
212 | |||
213 | /* we're all done */ | ||
214 | if (tot_in >= len) | ||
215 | break; | ||
216 | |||
217 | if (tot_out > max_out) | ||
218 | break; | ||
219 | |||
220 | bytes_left = len - tot_in; | ||
221 | kunmap(in_page); | ||
222 | page_cache_release(in_page); | ||
223 | |||
224 | start += PAGE_CACHE_SIZE; | ||
225 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
226 | data_in = kmap(in_page); | ||
227 | in_len = min(bytes_left, PAGE_CACHE_SIZE); | ||
228 | } | ||
229 | |||
230 | if (tot_out > tot_in) | ||
231 | goto out; | ||
232 | |||
233 | /* store the size of all chunks of compressed data */ | ||
234 | cpage_out = kmap(pages[0]); | ||
235 | write_compress_length(cpage_out, tot_out); | ||
236 | |||
237 | kunmap(pages[0]); | ||
238 | |||
239 | ret = 0; | ||
240 | *total_out = tot_out; | ||
241 | *total_in = tot_in; | ||
242 | out: | ||
243 | *out_pages = nr_pages; | ||
244 | if (out_page) | ||
245 | kunmap(out_page); | ||
246 | |||
247 | if (in_page) { | ||
248 | kunmap(in_page); | ||
249 | page_cache_release(in_page); | ||
250 | } | ||
251 | |||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | static int lzo_decompress_biovec(struct list_head *ws, | ||
256 | struct page **pages_in, | ||
257 | u64 disk_start, | ||
258 | struct bio_vec *bvec, | ||
259 | int vcnt, | ||
260 | size_t srclen) | ||
261 | { | ||
262 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
263 | int ret = 0, ret2; | ||
264 | char *data_in; | ||
265 | unsigned long page_in_index = 0; | ||
266 | unsigned long page_out_index = 0; | ||
267 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
268 | PAGE_CACHE_SIZE; | ||
269 | unsigned long buf_start; | ||
270 | unsigned long buf_offset = 0; | ||
271 | unsigned long bytes; | ||
272 | unsigned long working_bytes; | ||
273 | unsigned long pg_offset; | ||
274 | |||
275 | size_t in_len; | ||
276 | size_t out_len; | ||
277 | unsigned long in_offset; | ||
278 | unsigned long in_page_bytes_left; | ||
279 | unsigned long tot_in; | ||
280 | unsigned long tot_out; | ||
281 | unsigned long tot_len; | ||
282 | char *buf; | ||
283 | bool may_late_unmap, need_unmap; | ||
284 | |||
285 | data_in = kmap(pages_in[0]); | ||
286 | tot_len = read_compress_length(data_in); | ||
287 | |||
288 | tot_in = LZO_LEN; | ||
289 | in_offset = LZO_LEN; | ||
290 | tot_len = min_t(size_t, srclen, tot_len); | ||
291 | in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
292 | |||
293 | tot_out = 0; | ||
294 | pg_offset = 0; | ||
295 | |||
296 | while (tot_in < tot_len) { | ||
297 | in_len = read_compress_length(data_in + in_offset); | ||
298 | in_page_bytes_left -= LZO_LEN; | ||
299 | in_offset += LZO_LEN; | ||
300 | tot_in += LZO_LEN; | ||
301 | |||
302 | tot_in += in_len; | ||
303 | working_bytes = in_len; | ||
304 | may_late_unmap = need_unmap = false; | ||
305 | |||
306 | /* fast path: avoid using the working buffer */ | ||
307 | if (in_page_bytes_left >= in_len) { | ||
308 | buf = data_in + in_offset; | ||
309 | bytes = in_len; | ||
310 | may_late_unmap = true; | ||
311 | goto cont; | ||
312 | } | ||
313 | |||
314 | /* copy bytes from the pages into the working buffer */ | ||
315 | buf = workspace->cbuf; | ||
316 | buf_offset = 0; | ||
317 | while (working_bytes) { | ||
318 | bytes = min(working_bytes, in_page_bytes_left); | ||
319 | |||
320 | memcpy(buf + buf_offset, data_in + in_offset, bytes); | ||
321 | buf_offset += bytes; | ||
322 | cont: | ||
323 | working_bytes -= bytes; | ||
324 | in_page_bytes_left -= bytes; | ||
325 | in_offset += bytes; | ||
326 | |||
327 | /* check if we need to pick another page */ | ||
328 | if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) | ||
329 | || in_page_bytes_left == 0) { | ||
330 | tot_in += in_page_bytes_left; | ||
331 | |||
332 | if (working_bytes == 0 && tot_in >= tot_len) | ||
333 | break; | ||
334 | |||
335 | if (page_in_index + 1 >= total_pages_in) { | ||
336 | ret = -1; | ||
337 | goto done; | ||
338 | } | ||
339 | |||
340 | if (may_late_unmap) | ||
341 | need_unmap = true; | ||
342 | else | ||
343 | kunmap(pages_in[page_in_index]); | ||
344 | |||
345 | data_in = kmap(pages_in[++page_in_index]); | ||
346 | |||
347 | in_page_bytes_left = PAGE_CACHE_SIZE; | ||
348 | in_offset = 0; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); | ||
353 | ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, | ||
354 | &out_len); | ||
355 | if (need_unmap) | ||
356 | kunmap(pages_in[page_in_index - 1]); | ||
357 | if (ret != LZO_E_OK) { | ||
358 | printk(KERN_WARNING "btrfs decompress failed\n"); | ||
359 | ret = -1; | ||
360 | break; | ||
361 | } | ||
362 | |||
363 | buf_start = tot_out; | ||
364 | tot_out += out_len; | ||
365 | |||
366 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, | ||
367 | tot_out, disk_start, | ||
368 | bvec, vcnt, | ||
369 | &page_out_index, &pg_offset); | ||
370 | if (ret2 == 0) | ||
371 | break; | ||
372 | } | ||
373 | done: | ||
374 | kunmap(pages_in[page_in_index]); | ||
375 | return ret; | ||
376 | } | ||
377 | |||
378 | static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||
379 | struct page *dest_page, | ||
380 | unsigned long start_byte, | ||
381 | size_t srclen, size_t destlen) | ||
382 | { | ||
383 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
384 | size_t in_len; | ||
385 | size_t out_len; | ||
386 | size_t tot_len; | ||
387 | int ret = 0; | ||
388 | char *kaddr; | ||
389 | unsigned long bytes; | ||
390 | |||
391 | BUG_ON(srclen < LZO_LEN); | ||
392 | |||
393 | tot_len = read_compress_length(data_in); | ||
394 | data_in += LZO_LEN; | ||
395 | |||
396 | in_len = read_compress_length(data_in); | ||
397 | data_in += LZO_LEN; | ||
398 | |||
399 | out_len = PAGE_CACHE_SIZE; | ||
400 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | ||
401 | if (ret != LZO_E_OK) { | ||
402 | printk(KERN_WARNING "btrfs decompress failed!\n"); | ||
403 | ret = -1; | ||
404 | goto out; | ||
405 | } | ||
406 | |||
407 | if (out_len < start_byte) { | ||
408 | ret = -1; | ||
409 | goto out; | ||
410 | } | ||
411 | |||
412 | bytes = min_t(unsigned long, destlen, out_len - start_byte); | ||
413 | |||
414 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
415 | memcpy(kaddr, workspace->buf + start_byte, bytes); | ||
416 | kunmap_atomic(kaddr, KM_USER0); | ||
417 | out: | ||
418 | return ret; | ||
419 | } | ||
420 | |||
421 | struct btrfs_compress_op btrfs_lzo_compress = { | ||
422 | .alloc_workspace = lzo_alloc_workspace, | ||
423 | .free_workspace = lzo_free_workspace, | ||
424 | .compress_pages = lzo_compress_pages, | ||
425 | .decompress_biovec = lzo_decompress_biovec, | ||
426 | .decompress = lzo_decompress, | ||
427 | }; | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ae7737e352c9..a1c940425307 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
141 | u64 file_offset) | 141 | u64 file_offset) |
142 | { | 142 | { |
143 | struct rb_root *root = &tree->tree; | 143 | struct rb_root *root = &tree->tree; |
144 | struct rb_node *prev; | 144 | struct rb_node *prev = NULL; |
145 | struct rb_node *ret; | 145 | struct rb_node *ret; |
146 | struct btrfs_ordered_extent *entry; | 146 | struct btrfs_ordered_extent *entry; |
147 | 147 | ||
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
172 | */ | 172 | */ |
173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
174 | u64 start, u64 len, u64 disk_len, | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | 175 | int type, int dio, int compress_type) |
176 | { | 176 | { |
177 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
178 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
189 | entry->disk_len = disk_len; | 189 | entry->disk_len = disk_len; |
190 | entry->bytes_left = len; | 190 | entry->bytes_left = len; |
191 | entry->inode = inode; | 191 | entry->inode = inode; |
192 | entry->compress_type = compress_type; | ||
192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 193 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
193 | set_bit(type, &entry->flags); | 194 | set_bit(type, &entry->flags); |
194 | 195 | ||
@@ -201,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
201 | INIT_LIST_HEAD(&entry->list); | 202 | INIT_LIST_HEAD(&entry->list); |
202 | INIT_LIST_HEAD(&entry->root_extent_list); | 203 | INIT_LIST_HEAD(&entry->root_extent_list); |
203 | 204 | ||
205 | trace_btrfs_ordered_extent_add(inode, entry); | ||
206 | |||
204 | spin_lock(&tree->lock); | 207 | spin_lock(&tree->lock); |
205 | node = tree_insert(&tree->tree, file_offset, | 208 | node = tree_insert(&tree->tree, file_offset, |
206 | &entry->rb_node); | 209 | &entry->rb_node); |
@@ -220,14 +223,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
220 | u64 start, u64 len, u64 disk_len, int type) | 223 | u64 start, u64 len, u64 disk_len, int type) |
221 | { | 224 | { |
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 225 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
223 | disk_len, type, 0); | 226 | disk_len, type, 0, |
227 | BTRFS_COMPRESS_NONE); | ||
224 | } | 228 | } |
225 | 229 | ||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 230 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
227 | u64 start, u64 len, u64 disk_len, int type) | 231 | u64 start, u64 len, u64 disk_len, int type) |
228 | { | 232 | { |
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 233 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
230 | disk_len, type, 1); | 234 | disk_len, type, 1, |
235 | BTRFS_COMPRESS_NONE); | ||
236 | } | ||
237 | |||
238 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
239 | u64 start, u64 len, u64 disk_len, | ||
240 | int type, int compress_type) | ||
241 | { | ||
242 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
243 | disk_len, type, 0, | ||
244 | compress_type); | ||
231 | } | 245 | } |
232 | 246 | ||
233 | /* | 247 | /* |
@@ -375,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
375 | struct list_head *cur; | 389 | struct list_head *cur; |
376 | struct btrfs_ordered_sum *sum; | 390 | struct btrfs_ordered_sum *sum; |
377 | 391 | ||
392 | trace_btrfs_ordered_extent_put(entry->inode, entry); | ||
393 | |||
378 | if (atomic_dec_and_test(&entry->refs)) { | 394 | if (atomic_dec_and_test(&entry->refs)) { |
379 | while (!list_empty(&entry->list)) { | 395 | while (!list_empty(&entry->list)) { |
380 | cur = entry->list.next; | 396 | cur = entry->list.next; |
@@ -408,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
408 | spin_lock(&root->fs_info->ordered_extent_lock); | 424 | spin_lock(&root->fs_info->ordered_extent_lock); |
409 | list_del_init(&entry->root_extent_list); | 425 | list_del_init(&entry->root_extent_list); |
410 | 426 | ||
427 | trace_btrfs_ordered_extent_remove(inode, entry); | ||
428 | |||
411 | /* | 429 | /* |
412 | * we have no more ordered extents for this inode and | 430 | * we have no more ordered extents for this inode and |
413 | * no dirty pages. We can safely remove it from the | 431 | * no dirty pages. We can safely remove it from the |
@@ -573,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
573 | u64 start = entry->file_offset; | 591 | u64 start = entry->file_offset; |
574 | u64 end = start + entry->len - 1; | 592 | u64 end = start + entry->len - 1; |
575 | 593 | ||
594 | trace_btrfs_ordered_extent_start(inode, entry); | ||
595 | |||
576 | /* | 596 | /* |
577 | * pages in the range can be dirty, clean or writeback. We | 597 | * pages in the range can be dirty, clean or writeback. We |
578 | * start IO on any dirty ones so the wait doesn't stall waiting | 598 | * start IO on any dirty ones so the wait doesn't stall waiting |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 61dca83119dd..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum { | |||
68 | 68 | ||
69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
70 | 70 | ||
71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | 71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ |
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent { | |||
93 | /* flags (described above) */ | 93 | /* flags (described above) */ |
94 | unsigned long flags; | 94 | unsigned long flags; |
95 | 95 | ||
96 | /* compression algorithm */ | ||
97 | int compress_type; | ||
98 | |||
96 | /* reference count */ | 99 | /* reference count */ |
97 | atomic_t refs; | 100 | atomic_t refs; |
98 | 101 | ||
@@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
148 | u64 start, u64 len, u64 disk_len, int type); | 151 | u64 start, u64 len, u64 disk_len, int type); |
149 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
150 | u64 start, u64 len, u64 disk_len, int type); | 153 | u64 start, u64 len, u64 disk_len, int type); |
154 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
155 | u64 start, u64 len, u64 disk_len, | ||
156 | int type, int compress_type); | ||
151 | int btrfs_add_ordered_sum(struct inode *inode, | 157 | int btrfs_add_ordered_sum(struct inode *inode, |
152 | struct btrfs_ordered_extent *entry, | 158 | struct btrfs_ordered_extent *entry, |
153 | struct btrfs_ordered_sum *sum); | 159 | struct btrfs_ordered_sum *sum); |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0d126be22b63..fb2605d998e9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
260 | #else | 260 | #else |
261 | BUG(); | 261 | BUG(); |
262 | #endif | 262 | #endif |
263 | break; | ||
263 | case BTRFS_BLOCK_GROUP_ITEM_KEY: | 264 | case BTRFS_BLOCK_GROUP_ITEM_KEY: |
264 | bi = btrfs_item_ptr(l, i, | 265 | bi = btrfs_item_ptr(l, i, |
265 | struct btrfs_block_group_item); | 266 | struct btrfs_block_group_item); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 045c9c2b2d7e..199a80134312 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, | |||
1157 | new_node->bytenr = dest->node->start; | 1157 | new_node->bytenr = dest->node->start; |
1158 | new_node->level = node->level; | 1158 | new_node->level = node->level; |
1159 | new_node->lowest = node->lowest; | 1159 | new_node->lowest = node->lowest; |
1160 | new_node->checked = 1; | ||
1160 | new_node->root = dest; | 1161 | new_node->root = dest; |
1161 | 1162 | ||
1162 | if (!node->lowest) { | 1163 | if (!node->lowest) { |
@@ -1723,6 +1724,7 @@ again: | |||
1723 | 1724 | ||
1724 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1725 | eb = read_tree_block(dest, old_bytenr, blocksize, |
1725 | old_ptr_gen); | 1726 | old_ptr_gen); |
1727 | BUG_ON(!eb); | ||
1726 | btrfs_tree_lock(eb); | 1728 | btrfs_tree_lock(eb); |
1727 | if (cow) { | 1729 | if (cow) { |
1728 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1730 | ret = btrfs_cow_block(trans, dest, eb, parent, |
@@ -2028,6 +2030,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
2028 | 2030 | ||
2029 | while (1) { | 2031 | while (1) { |
2030 | trans = btrfs_start_transaction(root, 0); | 2032 | trans = btrfs_start_transaction(root, 0); |
2033 | BUG_ON(IS_ERR(trans)); | ||
2031 | trans->block_rsv = rc->block_rsv; | 2034 | trans->block_rsv = rc->block_rsv; |
2032 | 2035 | ||
2033 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, | 2036 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
@@ -2147,6 +2150,12 @@ again: | |||
2147 | } | 2150 | } |
2148 | 2151 | ||
2149 | trans = btrfs_join_transaction(rc->extent_root, 1); | 2152 | trans = btrfs_join_transaction(rc->extent_root, 1); |
2153 | if (IS_ERR(trans)) { | ||
2154 | if (!err) | ||
2155 | btrfs_block_rsv_release(rc->extent_root, | ||
2156 | rc->block_rsv, num_bytes); | ||
2157 | return PTR_ERR(trans); | ||
2158 | } | ||
2150 | 2159 | ||
2151 | if (!err) { | 2160 | if (!err) { |
2152 | if (num_bytes != rc->merging_rsv_size) { | 2161 | if (num_bytes != rc->merging_rsv_size) { |
@@ -2337,7 +2346,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | |||
2337 | root = next->root; | 2346 | root = next->root; |
2338 | BUG_ON(!root); | 2347 | BUG_ON(!root); |
2339 | 2348 | ||
2340 | /* no other choice for non-refernce counted tree */ | 2349 | /* no other choice for non-references counted tree */ |
2341 | if (!root->ref_cows) | 2350 | if (!root->ref_cows) |
2342 | return root; | 2351 | return root; |
2343 | 2352 | ||
@@ -2505,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2505 | blocksize = btrfs_level_size(root, node->level); | 2514 | blocksize = btrfs_level_size(root, node->level); |
2506 | generation = btrfs_node_ptr_generation(upper->eb, slot); | 2515 | generation = btrfs_node_ptr_generation(upper->eb, slot); |
2507 | eb = read_tree_block(root, bytenr, blocksize, generation); | 2516 | eb = read_tree_block(root, bytenr, blocksize, generation); |
2517 | if (!eb) { | ||
2518 | err = -EIO; | ||
2519 | goto next; | ||
2520 | } | ||
2508 | btrfs_tree_lock(eb); | 2521 | btrfs_tree_lock(eb); |
2509 | btrfs_set_lock_blocking(eb); | 2522 | btrfs_set_lock_blocking(eb); |
2510 | 2523 | ||
@@ -2662,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc, | |||
2662 | BUG_ON(block->key_ready); | 2675 | BUG_ON(block->key_ready); |
2663 | eb = read_tree_block(rc->extent_root, block->bytenr, | 2676 | eb = read_tree_block(rc->extent_root, block->bytenr, |
2664 | block->key.objectid, block->key.offset); | 2677 | block->key.objectid, block->key.offset); |
2678 | BUG_ON(!eb); | ||
2665 | WARN_ON(btrfs_header_level(eb) != block->level); | 2679 | WARN_ON(btrfs_header_level(eb) != block->level); |
2666 | if (block->level == 0) | 2680 | if (block->level == 0) |
2667 | btrfs_item_key_to_cpu(eb, &block->key, 0); | 2681 | btrfs_item_key_to_cpu(eb, &block->key, 0); |
@@ -3222,6 +3236,7 @@ truncate: | |||
3222 | trans = btrfs_join_transaction(root, 0); | 3236 | trans = btrfs_join_transaction(root, 0); |
3223 | if (IS_ERR(trans)) { | 3237 | if (IS_ERR(trans)) { |
3224 | btrfs_free_path(path); | 3238 | btrfs_free_path(path); |
3239 | ret = PTR_ERR(trans); | ||
3225 | goto out; | 3240 | goto out; |
3226 | } | 3241 | } |
3227 | 3242 | ||
@@ -3628,6 +3643,7 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3628 | set_reloc_control(rc); | 3643 | set_reloc_control(rc); |
3629 | 3644 | ||
3630 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3645 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3646 | BUG_ON(IS_ERR(trans)); | ||
3631 | btrfs_commit_transaction(trans, rc->extent_root); | 3647 | btrfs_commit_transaction(trans, rc->extent_root); |
3632 | return 0; | 3648 | return 0; |
3633 | } | 3649 | } |
@@ -3644,6 +3660,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3644 | u32 item_size; | 3660 | u32 item_size; |
3645 | int ret; | 3661 | int ret; |
3646 | int err = 0; | 3662 | int err = 0; |
3663 | int progress = 0; | ||
3647 | 3664 | ||
3648 | path = btrfs_alloc_path(); | 3665 | path = btrfs_alloc_path(); |
3649 | if (!path) | 3666 | if (!path) |
@@ -3656,8 +3673,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3656 | } | 3673 | } |
3657 | 3674 | ||
3658 | while (1) { | 3675 | while (1) { |
3676 | progress++; | ||
3659 | trans = btrfs_start_transaction(rc->extent_root, 0); | 3677 | trans = btrfs_start_transaction(rc->extent_root, 0); |
3660 | 3678 | BUG_ON(IS_ERR(trans)); | |
3679 | restart: | ||
3661 | if (update_backref_cache(trans, &rc->backref_cache)) { | 3680 | if (update_backref_cache(trans, &rc->backref_cache)) { |
3662 | btrfs_end_transaction(trans, rc->extent_root); | 3681 | btrfs_end_transaction(trans, rc->extent_root); |
3663 | continue; | 3682 | continue; |
@@ -3770,6 +3789,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3770 | } | 3789 | } |
3771 | } | 3790 | } |
3772 | } | 3791 | } |
3792 | if (trans && progress && err == -ENOSPC) { | ||
3793 | ret = btrfs_force_chunk_alloc(trans, rc->extent_root, | ||
3794 | rc->block_group->flags); | ||
3795 | if (ret == 0) { | ||
3796 | err = 0; | ||
3797 | progress = 0; | ||
3798 | goto restart; | ||
3799 | } | ||
3800 | } | ||
3773 | 3801 | ||
3774 | btrfs_release_path(rc->extent_root, path); | 3802 | btrfs_release_path(rc->extent_root, path); |
3775 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | 3803 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, |
@@ -3804,7 +3832,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3804 | 3832 | ||
3805 | /* get rid of pinned extents */ | 3833 | /* get rid of pinned extents */ |
3806 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3834 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3807 | btrfs_commit_transaction(trans, rc->extent_root); | 3835 | if (IS_ERR(trans)) |
3836 | err = PTR_ERR(trans); | ||
3837 | else | ||
3838 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3808 | out_free: | 3839 | out_free: |
3809 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | 3840 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); |
3810 | btrfs_free_path(path); | 3841 | btrfs_free_path(path); |
@@ -4022,6 +4053,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
4022 | int ret; | 4053 | int ret; |
4023 | 4054 | ||
4024 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); | 4055 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
4056 | BUG_ON(IS_ERR(trans)); | ||
4025 | 4057 | ||
4026 | memset(&root->root_item.drop_progress, 0, | 4058 | memset(&root->root_item.drop_progress, 0, |
4027 | sizeof(root->root_item.drop_progress)); | 4059 | sizeof(root->root_item.drop_progress)); |
@@ -4125,6 +4157,11 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4125 | set_reloc_control(rc); | 4157 | set_reloc_control(rc); |
4126 | 4158 | ||
4127 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4159 | trans = btrfs_join_transaction(rc->extent_root, 1); |
4160 | if (IS_ERR(trans)) { | ||
4161 | unset_reloc_control(rc); | ||
4162 | err = PTR_ERR(trans); | ||
4163 | goto out_free; | ||
4164 | } | ||
4128 | 4165 | ||
4129 | rc->merge_reloc_tree = 1; | 4166 | rc->merge_reloc_tree = 1; |
4130 | 4167 | ||
@@ -4154,9 +4191,13 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4154 | unset_reloc_control(rc); | 4191 | unset_reloc_control(rc); |
4155 | 4192 | ||
4156 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4193 | trans = btrfs_join_transaction(rc->extent_root, 1); |
4157 | btrfs_commit_transaction(trans, rc->extent_root); | 4194 | if (IS_ERR(trans)) |
4158 | out: | 4195 | err = PTR_ERR(trans); |
4196 | else | ||
4197 | btrfs_commit_transaction(trans, rc->extent_root); | ||
4198 | out_free: | ||
4159 | kfree(rc); | 4199 | kfree(rc); |
4200 | out: | ||
4160 | while (!list_empty(&reloc_roots)) { | 4201 | while (!list_empty(&reloc_roots)) { |
4161 | reloc_root = list_entry(reloc_roots.next, | 4202 | reloc_root = list_entry(reloc_roots.next, |
4162 | struct btrfs_root, root_list); | 4203 | struct btrfs_root, root_list); |
@@ -4174,7 +4215,7 @@ out: | |||
4174 | if (IS_ERR(fs_root)) | 4215 | if (IS_ERR(fs_root)) |
4175 | err = PTR_ERR(fs_root); | 4216 | err = PTR_ERR(fs_root); |
4176 | else | 4217 | else |
4177 | btrfs_orphan_cleanup(fs_root); | 4218 | err = btrfs_orphan_cleanup(fs_root); |
4178 | } | 4219 | } |
4179 | return err; | 4220 | return err; |
4180 | } | 4221 | } |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6a1086e83ffc..6928bff62daa 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -88,7 +88,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
88 | search_key.offset = (u64)-1; | 88 | search_key.offset = (u64)-1; |
89 | 89 | ||
90 | path = btrfs_alloc_path(); | 90 | path = btrfs_alloc_path(); |
91 | BUG_ON(!path); | 91 | if (!path) |
92 | return -ENOMEM; | ||
92 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | 93 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
93 | if (ret < 0) | 94 | if (ret < 0) |
94 | goto out; | 95 | goto out; |
@@ -332,7 +333,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
332 | struct extent_buffer *leaf; | 333 | struct extent_buffer *leaf; |
333 | 334 | ||
334 | path = btrfs_alloc_path(); | 335 | path = btrfs_alloc_path(); |
335 | BUG_ON(!path); | 336 | if (!path) |
337 | return -ENOMEM; | ||
336 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); | 338 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); |
337 | if (ret < 0) | 339 | if (ret < 0) |
338 | goto out; | 340 | goto out; |
@@ -471,3 +473,21 @@ again: | |||
471 | btrfs_free_path(path); | 473 | btrfs_free_path(path); |
472 | return 0; | 474 | return 0; |
473 | } | 475 | } |
476 | |||
477 | /* | ||
478 | * Old btrfs forgets to init root_item->flags and root_item->byte_limit | ||
479 | * for subvolumes. To work around this problem, we steal a bit from | ||
480 | * root_item->inode_item->flags, and use it to indicate if those fields | ||
481 | * have been properly initialized. | ||
482 | */ | ||
483 | void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) | ||
484 | { | ||
485 | u64 inode_flags = le64_to_cpu(root_item->inode.flags); | ||
486 | |||
487 | if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { | ||
488 | inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; | ||
489 | root_item->inode.flags = cpu_to_le64(inode_flags); | ||
490 | root_item->flags = 0; | ||
491 | root_item->byte_limit = 0; | ||
492 | } | ||
493 | } | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 22acdaa78ce1..0ac712efcdf2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -52,8 +52,95 @@ | |||
52 | #include "export.h" | 52 | #include "export.h" |
53 | #include "compression.h" | 53 | #include "compression.h" |
54 | 54 | ||
55 | #define CREATE_TRACE_POINTS | ||
56 | #include <trace/events/btrfs.h> | ||
57 | |||
55 | static const struct super_operations btrfs_super_ops; | 58 | static const struct super_operations btrfs_super_ops; |
56 | 59 | ||
60 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | ||
61 | char nbuf[16]) | ||
62 | { | ||
63 | char *errstr = NULL; | ||
64 | |||
65 | switch (errno) { | ||
66 | case -EIO: | ||
67 | errstr = "IO failure"; | ||
68 | break; | ||
69 | case -ENOMEM: | ||
70 | errstr = "Out of memory"; | ||
71 | break; | ||
72 | case -EROFS: | ||
73 | errstr = "Readonly filesystem"; | ||
74 | break; | ||
75 | default: | ||
76 | if (nbuf) { | ||
77 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | ||
78 | errstr = nbuf; | ||
79 | } | ||
80 | break; | ||
81 | } | ||
82 | |||
83 | return errstr; | ||
84 | } | ||
85 | |||
86 | static void __save_error_info(struct btrfs_fs_info *fs_info) | ||
87 | { | ||
88 | /* | ||
89 | * today we only save the error info into ram. Long term we'll | ||
90 | * also send it down to the disk | ||
91 | */ | ||
92 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | ||
93 | } | ||
94 | |||
95 | /* NOTE: | ||
96 | * We move write_super stuff at umount in order to avoid deadlock | ||
97 | * for umount hold all lock. | ||
98 | */ | ||
99 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
100 | { | ||
101 | __save_error_info(fs_info); | ||
102 | } | ||
103 | |||
104 | /* btrfs handle error by forcing the filesystem readonly */ | ||
105 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | ||
106 | { | ||
107 | struct super_block *sb = fs_info->sb; | ||
108 | |||
109 | if (sb->s_flags & MS_RDONLY) | ||
110 | return; | ||
111 | |||
112 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
113 | sb->s_flags |= MS_RDONLY; | ||
114 | printk(KERN_INFO "btrfs is forced readonly\n"); | ||
115 | } | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * __btrfs_std_error decodes expected errors from the caller and | ||
120 | * invokes the approciate error response. | ||
121 | */ | ||
122 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
123 | unsigned int line, int errno) | ||
124 | { | ||
125 | struct super_block *sb = fs_info->sb; | ||
126 | char nbuf[16]; | ||
127 | const char *errstr; | ||
128 | |||
129 | /* | ||
130 | * Special case: if the error is EROFS, and we're already | ||
131 | * under MS_RDONLY, then it is safe here. | ||
132 | */ | ||
133 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | ||
134 | return; | ||
135 | |||
136 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | ||
137 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | ||
138 | sb->s_id, function, line, errstr); | ||
139 | save_error_info(fs_info); | ||
140 | |||
141 | btrfs_handle_error(fs_info); | ||
142 | } | ||
143 | |||
57 | static void btrfs_put_super(struct super_block *sb) | 144 | static void btrfs_put_super(struct super_block *sb) |
58 | { | 145 | { |
59 | struct btrfs_root *root = btrfs_sb(sb); | 146 | struct btrfs_root *root = btrfs_sb(sb); |
@@ -69,9 +156,10 @@ enum { | |||
69 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, | 156 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
70 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, | 157 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
71 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, | 158 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
72 | Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, | 159 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
73 | Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, | 160 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
74 | Opt_user_subvol_rm_allowed, | 161 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, |
162 | Opt_enospc_debug, Opt_subvolrootid, Opt_err, | ||
75 | }; | 163 | }; |
76 | 164 | ||
77 | static match_table_t tokens = { | 165 | static match_table_t tokens = { |
@@ -86,7 +174,9 @@ static match_table_t tokens = { | |||
86 | {Opt_alloc_start, "alloc_start=%s"}, | 174 | {Opt_alloc_start, "alloc_start=%s"}, |
87 | {Opt_thread_pool, "thread_pool=%d"}, | 175 | {Opt_thread_pool, "thread_pool=%d"}, |
88 | {Opt_compress, "compress"}, | 176 | {Opt_compress, "compress"}, |
177 | {Opt_compress_type, "compress=%s"}, | ||
89 | {Opt_compress_force, "compress-force"}, | 178 | {Opt_compress_force, "compress-force"}, |
179 | {Opt_compress_force_type, "compress-force=%s"}, | ||
90 | {Opt_ssd, "ssd"}, | 180 | {Opt_ssd, "ssd"}, |
91 | {Opt_ssd_spread, "ssd_spread"}, | 181 | {Opt_ssd_spread, "ssd_spread"}, |
92 | {Opt_nossd, "nossd"}, | 182 | {Opt_nossd, "nossd"}, |
@@ -98,6 +188,8 @@ static match_table_t tokens = { | |||
98 | {Opt_space_cache, "space_cache"}, | 188 | {Opt_space_cache, "space_cache"}, |
99 | {Opt_clear_cache, "clear_cache"}, | 189 | {Opt_clear_cache, "clear_cache"}, |
100 | {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, | 190 | {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, |
191 | {Opt_enospc_debug, "enospc_debug"}, | ||
192 | {Opt_subvolrootid, "subvolrootid=%d"}, | ||
101 | {Opt_err, NULL}, | 193 | {Opt_err, NULL}, |
102 | }; | 194 | }; |
103 | 195 | ||
@@ -112,6 +204,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
112 | char *p, *num, *orig; | 204 | char *p, *num, *orig; |
113 | int intarg; | 205 | int intarg; |
114 | int ret = 0; | 206 | int ret = 0; |
207 | char *compress_type; | ||
208 | bool compress_force = false; | ||
115 | 209 | ||
116 | if (!options) | 210 | if (!options) |
117 | return 0; | 211 | return 0; |
@@ -139,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
139 | break; | 233 | break; |
140 | case Opt_subvol: | 234 | case Opt_subvol: |
141 | case Opt_subvolid: | 235 | case Opt_subvolid: |
236 | case Opt_subvolrootid: | ||
142 | case Opt_device: | 237 | case Opt_device: |
143 | /* | 238 | /* |
144 | * These are parsed by btrfs_parse_early_options | 239 | * These are parsed by btrfs_parse_early_options |
@@ -154,14 +249,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
154 | btrfs_set_opt(info->mount_opt, NODATACOW); | 249 | btrfs_set_opt(info->mount_opt, NODATACOW); |
155 | btrfs_set_opt(info->mount_opt, NODATASUM); | 250 | btrfs_set_opt(info->mount_opt, NODATASUM); |
156 | break; | 251 | break; |
157 | case Opt_compress: | ||
158 | printk(KERN_INFO "btrfs: use compression\n"); | ||
159 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
160 | break; | ||
161 | case Opt_compress_force: | 252 | case Opt_compress_force: |
162 | printk(KERN_INFO "btrfs: forcing compression\n"); | 253 | case Opt_compress_force_type: |
163 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 254 | compress_force = true; |
255 | case Opt_compress: | ||
256 | case Opt_compress_type: | ||
257 | if (token == Opt_compress || | ||
258 | token == Opt_compress_force || | ||
259 | strcmp(args[0].from, "zlib") == 0) { | ||
260 | compress_type = "zlib"; | ||
261 | info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
262 | } else if (strcmp(args[0].from, "lzo") == 0) { | ||
263 | compress_type = "lzo"; | ||
264 | info->compress_type = BTRFS_COMPRESS_LZO; | ||
265 | } else { | ||
266 | ret = -EINVAL; | ||
267 | goto out; | ||
268 | } | ||
269 | |||
164 | btrfs_set_opt(info->mount_opt, COMPRESS); | 270 | btrfs_set_opt(info->mount_opt, COMPRESS); |
271 | if (compress_force) { | ||
272 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | ||
273 | pr_info("btrfs: force %s compression\n", | ||
274 | compress_type); | ||
275 | } else | ||
276 | pr_info("btrfs: use %s compression\n", | ||
277 | compress_type); | ||
165 | break; | 278 | break; |
166 | case Opt_ssd: | 279 | case Opt_ssd: |
167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 280 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
@@ -252,6 +365,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
252 | case Opt_user_subvol_rm_allowed: | 365 | case Opt_user_subvol_rm_allowed: |
253 | btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); | 366 | btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); |
254 | break; | 367 | break; |
368 | case Opt_enospc_debug: | ||
369 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); | ||
370 | break; | ||
255 | case Opt_err: | 371 | case Opt_err: |
256 | printk(KERN_INFO "btrfs: unrecognized mount option " | 372 | printk(KERN_INFO "btrfs: unrecognized mount option " |
257 | "'%s'\n", p); | 373 | "'%s'\n", p); |
@@ -274,10 +390,10 @@ out: | |||
274 | */ | 390 | */ |
275 | static int btrfs_parse_early_options(const char *options, fmode_t flags, | 391 | static int btrfs_parse_early_options(const char *options, fmode_t flags, |
276 | void *holder, char **subvol_name, u64 *subvol_objectid, | 392 | void *holder, char **subvol_name, u64 *subvol_objectid, |
277 | struct btrfs_fs_devices **fs_devices) | 393 | u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) |
278 | { | 394 | { |
279 | substring_t args[MAX_OPT_ARGS]; | 395 | substring_t args[MAX_OPT_ARGS]; |
280 | char *opts, *p; | 396 | char *opts, *orig, *p; |
281 | int error = 0; | 397 | int error = 0; |
282 | int intarg; | 398 | int intarg; |
283 | 399 | ||
@@ -291,6 +407,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
291 | opts = kstrdup(options, GFP_KERNEL); | 407 | opts = kstrdup(options, GFP_KERNEL); |
292 | if (!opts) | 408 | if (!opts) |
293 | return -ENOMEM; | 409 | return -ENOMEM; |
410 | orig = opts; | ||
294 | 411 | ||
295 | while ((p = strsep(&opts, ",")) != NULL) { | 412 | while ((p = strsep(&opts, ",")) != NULL) { |
296 | int token; | 413 | int token; |
@@ -314,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
314 | *subvol_objectid = intarg; | 431 | *subvol_objectid = intarg; |
315 | } | 432 | } |
316 | break; | 433 | break; |
434 | case Opt_subvolrootid: | ||
435 | intarg = 0; | ||
436 | error = match_int(&args[0], &intarg); | ||
437 | if (!error) { | ||
438 | /* we want the original fs_tree */ | ||
439 | if (!intarg) | ||
440 | *subvol_rootid = | ||
441 | BTRFS_FS_TREE_OBJECTID; | ||
442 | else | ||
443 | *subvol_rootid = intarg; | ||
444 | } | ||
445 | break; | ||
317 | case Opt_device: | 446 | case Opt_device: |
318 | error = btrfs_scan_one_device(match_strdup(&args[0]), | 447 | error = btrfs_scan_one_device(match_strdup(&args[0]), |
319 | flags, holder, fs_devices); | 448 | flags, holder, fs_devices); |
@@ -326,7 +455,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
326 | } | 455 | } |
327 | 456 | ||
328 | out_free_opts: | 457 | out_free_opts: |
329 | kfree(opts); | 458 | kfree(orig); |
330 | out: | 459 | out: |
331 | /* | 460 | /* |
332 | * If no subvolume name is specified we use the default one. Allocate | 461 | * If no subvolume name is specified we use the default one. Allocate |
@@ -508,6 +637,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
508 | struct btrfs_root *root = btrfs_sb(sb); | 637 | struct btrfs_root *root = btrfs_sb(sb); |
509 | int ret; | 638 | int ret; |
510 | 639 | ||
640 | trace_btrfs_sync_fs(wait); | ||
641 | |||
511 | if (!wait) { | 642 | if (!wait) { |
512 | filemap_flush(root->fs_info->btree_inode->i_mapping); | 643 | filemap_flush(root->fs_info->btree_inode->i_mapping); |
513 | return 0; | 644 | return 0; |
@@ -517,6 +648,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
517 | btrfs_wait_ordered_extents(root, 0, 0); | 648 | btrfs_wait_ordered_extents(root, 0, 0); |
518 | 649 | ||
519 | trans = btrfs_start_transaction(root, 0); | 650 | trans = btrfs_start_transaction(root, 0); |
651 | if (IS_ERR(trans)) | ||
652 | return PTR_ERR(trans); | ||
520 | ret = btrfs_commit_transaction(trans, root); | 653 | ret = btrfs_commit_transaction(trans, root); |
521 | return ret; | 654 | return ret; |
522 | } | 655 | } |
@@ -525,6 +658,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
525 | { | 658 | { |
526 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); | 659 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); |
527 | struct btrfs_fs_info *info = root->fs_info; | 660 | struct btrfs_fs_info *info = root->fs_info; |
661 | char *compress_type; | ||
528 | 662 | ||
529 | if (btrfs_test_opt(root, DEGRADED)) | 663 | if (btrfs_test_opt(root, DEGRADED)) |
530 | seq_puts(seq, ",degraded"); | 664 | seq_puts(seq, ",degraded"); |
@@ -543,8 +677,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
543 | if (info->thread_pool_size != min_t(unsigned long, | 677 | if (info->thread_pool_size != min_t(unsigned long, |
544 | num_online_cpus() + 2, 8)) | 678 | num_online_cpus() + 2, 8)) |
545 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | 679 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); |
546 | if (btrfs_test_opt(root, COMPRESS)) | 680 | if (btrfs_test_opt(root, COMPRESS)) { |
547 | seq_puts(seq, ",compress"); | 681 | if (info->compress_type == BTRFS_COMPRESS_ZLIB) |
682 | compress_type = "zlib"; | ||
683 | else | ||
684 | compress_type = "lzo"; | ||
685 | if (btrfs_test_opt(root, FORCE_COMPRESS)) | ||
686 | seq_printf(seq, ",compress-force=%s", compress_type); | ||
687 | else | ||
688 | seq_printf(seq, ",compress=%s", compress_type); | ||
689 | } | ||
548 | if (btrfs_test_opt(root, NOSSD)) | 690 | if (btrfs_test_opt(root, NOSSD)) |
549 | seq_puts(seq, ",nossd"); | 691 | seq_puts(seq, ",nossd"); |
550 | if (btrfs_test_opt(root, SSD_SPREAD)) | 692 | if (btrfs_test_opt(root, SSD_SPREAD)) |
@@ -559,6 +701,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
559 | seq_puts(seq, ",discard"); | 701 | seq_puts(seq, ",discard"); |
560 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) | 702 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) |
561 | seq_puts(seq, ",noacl"); | 703 | seq_puts(seq, ",noacl"); |
704 | if (btrfs_test_opt(root, SPACE_CACHE)) | ||
705 | seq_puts(seq, ",space_cache"); | ||
706 | if (btrfs_test_opt(root, CLEAR_CACHE)) | ||
707 | seq_puts(seq, ",clear_cache"); | ||
708 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | ||
709 | seq_puts(seq, ",user_subvol_rm_allowed"); | ||
562 | return 0; | 710 | return 0; |
563 | } | 711 | } |
564 | 712 | ||
@@ -602,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
602 | fmode_t mode = FMODE_READ; | 750 | fmode_t mode = FMODE_READ; |
603 | char *subvol_name = NULL; | 751 | char *subvol_name = NULL; |
604 | u64 subvol_objectid = 0; | 752 | u64 subvol_objectid = 0; |
753 | u64 subvol_rootid = 0; | ||
605 | int error = 0; | 754 | int error = 0; |
606 | 755 | ||
607 | if (!(flags & MS_RDONLY)) | 756 | if (!(flags & MS_RDONLY)) |
@@ -609,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
609 | 758 | ||
610 | error = btrfs_parse_early_options(data, mode, fs_type, | 759 | error = btrfs_parse_early_options(data, mode, fs_type, |
611 | &subvol_name, &subvol_objectid, | 760 | &subvol_name, &subvol_objectid, |
612 | &fs_devices); | 761 | &subvol_rootid, &fs_devices); |
613 | if (error) | 762 | if (error) |
614 | return ERR_PTR(error); | 763 | return ERR_PTR(error); |
615 | 764 | ||
@@ -655,6 +804,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
655 | } | 804 | } |
656 | 805 | ||
657 | btrfs_close_devices(fs_devices); | 806 | btrfs_close_devices(fs_devices); |
807 | kfree(fs_info); | ||
808 | kfree(tree_root); | ||
658 | } else { | 809 | } else { |
659 | char b[BDEVNAME_SIZE]; | 810 | char b[BDEVNAME_SIZE]; |
660 | 811 | ||
@@ -671,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
671 | s->s_flags |= MS_ACTIVE; | 822 | s->s_flags |= MS_ACTIVE; |
672 | } | 823 | } |
673 | 824 | ||
674 | root = get_default_root(s, subvol_objectid); | ||
675 | if (IS_ERR(root)) { | ||
676 | error = PTR_ERR(root); | ||
677 | deactivate_locked_super(s); | ||
678 | goto error_free_subvol_name; | ||
679 | } | ||
680 | /* if they gave us a subvolume name bind mount into that */ | 825 | /* if they gave us a subvolume name bind mount into that */ |
681 | if (strcmp(subvol_name, ".")) { | 826 | if (strcmp(subvol_name, ".")) { |
682 | struct dentry *new_root; | 827 | struct dentry *new_root; |
828 | |||
829 | root = get_default_root(s, subvol_rootid); | ||
830 | if (IS_ERR(root)) { | ||
831 | error = PTR_ERR(root); | ||
832 | deactivate_locked_super(s); | ||
833 | goto error_free_subvol_name; | ||
834 | } | ||
835 | |||
683 | mutex_lock(&root->d_inode->i_mutex); | 836 | mutex_lock(&root->d_inode->i_mutex); |
684 | new_root = lookup_one_len(subvol_name, root, | 837 | new_root = lookup_one_len(subvol_name, root, |
685 | strlen(subvol_name)); | 838 | strlen(subvol_name)); |
@@ -700,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
700 | } | 853 | } |
701 | dput(root); | 854 | dput(root); |
702 | root = new_root; | 855 | root = new_root; |
856 | } else { | ||
857 | root = get_default_root(s, subvol_objectid); | ||
858 | if (IS_ERR(root)) { | ||
859 | error = PTR_ERR(root); | ||
860 | deactivate_locked_super(s); | ||
861 | goto error_free_subvol_name; | ||
862 | } | ||
703 | } | 863 | } |
704 | 864 | ||
705 | kfree(subvol_name); | 865 | kfree(subvol_name); |
@@ -753,6 +913,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
753 | return 0; | 913 | return 0; |
754 | } | 914 | } |
755 | 915 | ||
916 | /* | ||
917 | * The helper to calc the free space on the devices that can be used to store | ||
918 | * file data. | ||
919 | */ | ||
920 | static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | ||
921 | { | ||
922 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
923 | struct btrfs_device_info *devices_info; | ||
924 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
925 | struct btrfs_device *device; | ||
926 | u64 skip_space; | ||
927 | u64 type; | ||
928 | u64 avail_space; | ||
929 | u64 used_space; | ||
930 | u64 min_stripe_size; | ||
931 | int min_stripes = 1; | ||
932 | int i = 0, nr_devices; | ||
933 | int ret; | ||
934 | |||
935 | nr_devices = fs_info->fs_devices->rw_devices; | ||
936 | BUG_ON(!nr_devices); | ||
937 | |||
938 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | ||
939 | GFP_NOFS); | ||
940 | if (!devices_info) | ||
941 | return -ENOMEM; | ||
942 | |||
943 | /* calc min stripe number for data space alloction */ | ||
944 | type = btrfs_get_alloc_profile(root, 1); | ||
945 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
946 | min_stripes = 2; | ||
947 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
948 | min_stripes = 2; | ||
949 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
950 | min_stripes = 4; | ||
951 | |||
952 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
953 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | ||
954 | else | ||
955 | min_stripe_size = BTRFS_STRIPE_LEN; | ||
956 | |||
957 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | ||
958 | if (!device->in_fs_metadata) | ||
959 | continue; | ||
960 | |||
961 | avail_space = device->total_bytes - device->bytes_used; | ||
962 | |||
963 | /* align with stripe_len */ | ||
964 | do_div(avail_space, BTRFS_STRIPE_LEN); | ||
965 | avail_space *= BTRFS_STRIPE_LEN; | ||
966 | |||
967 | /* | ||
968 | * In order to avoid overwritting the superblock on the drive, | ||
969 | * btrfs starts at an offset of at least 1MB when doing chunk | ||
970 | * allocation. | ||
971 | */ | ||
972 | skip_space = 1024 * 1024; | ||
973 | |||
974 | /* user can set the offset in fs_info->alloc_start. */ | ||
975 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
976 | device->total_bytes) | ||
977 | skip_space = max(fs_info->alloc_start, skip_space); | ||
978 | |||
979 | /* | ||
980 | * btrfs can not use the free space in [0, skip_space - 1], | ||
981 | * we must subtract it from the total. In order to implement | ||
982 | * it, we account the used space in this range first. | ||
983 | */ | ||
984 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | ||
985 | &used_space); | ||
986 | if (ret) { | ||
987 | kfree(devices_info); | ||
988 | return ret; | ||
989 | } | ||
990 | |||
991 | /* calc the free space in [0, skip_space - 1] */ | ||
992 | skip_space -= used_space; | ||
993 | |||
994 | /* | ||
995 | * we can use the free space in [0, skip_space - 1], subtract | ||
996 | * it from the total. | ||
997 | */ | ||
998 | if (avail_space && avail_space >= skip_space) | ||
999 | avail_space -= skip_space; | ||
1000 | else | ||
1001 | avail_space = 0; | ||
1002 | |||
1003 | if (avail_space < min_stripe_size) | ||
1004 | continue; | ||
1005 | |||
1006 | devices_info[i].dev = device; | ||
1007 | devices_info[i].max_avail = avail_space; | ||
1008 | |||
1009 | i++; | ||
1010 | } | ||
1011 | |||
1012 | nr_devices = i; | ||
1013 | |||
1014 | btrfs_descending_sort_devices(devices_info, nr_devices); | ||
1015 | |||
1016 | i = nr_devices - 1; | ||
1017 | avail_space = 0; | ||
1018 | while (nr_devices >= min_stripes) { | ||
1019 | if (devices_info[i].max_avail >= min_stripe_size) { | ||
1020 | int j; | ||
1021 | u64 alloc_size; | ||
1022 | |||
1023 | avail_space += devices_info[i].max_avail * min_stripes; | ||
1024 | alloc_size = devices_info[i].max_avail; | ||
1025 | for (j = i + 1 - min_stripes; j <= i; j++) | ||
1026 | devices_info[j].max_avail -= alloc_size; | ||
1027 | } | ||
1028 | i--; | ||
1029 | nr_devices--; | ||
1030 | } | ||
1031 | |||
1032 | kfree(devices_info); | ||
1033 | *free_bytes = avail_space; | ||
1034 | return 0; | ||
1035 | } | ||
1036 | |||
756 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 1037 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
757 | { | 1038 | { |
758 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 1039 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
@@ -760,17 +1041,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
760 | struct list_head *head = &root->fs_info->space_info; | 1041 | struct list_head *head = &root->fs_info->space_info; |
761 | struct btrfs_space_info *found; | 1042 | struct btrfs_space_info *found; |
762 | u64 total_used = 0; | 1043 | u64 total_used = 0; |
763 | u64 total_used_data = 0; | 1044 | u64 total_free_data = 0; |
764 | int bits = dentry->d_sb->s_blocksize_bits; | 1045 | int bits = dentry->d_sb->s_blocksize_bits; |
765 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 1046 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
1047 | int ret; | ||
766 | 1048 | ||
1049 | /* holding chunk_muext to avoid allocating new chunks */ | ||
1050 | mutex_lock(&root->fs_info->chunk_mutex); | ||
767 | rcu_read_lock(); | 1051 | rcu_read_lock(); |
768 | list_for_each_entry_rcu(found, head, list) { | 1052 | list_for_each_entry_rcu(found, head, list) { |
769 | if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | | 1053 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { |
770 | BTRFS_BLOCK_GROUP_SYSTEM)) | 1054 | total_free_data += found->disk_total - found->disk_used; |
771 | total_used_data += found->disk_total; | 1055 | total_free_data -= |
772 | else | 1056 | btrfs_account_ro_block_groups_free_space(found); |
773 | total_used_data += found->disk_used; | 1057 | } |
1058 | |||
774 | total_used += found->disk_used; | 1059 | total_used += found->disk_used; |
775 | } | 1060 | } |
776 | rcu_read_unlock(); | 1061 | rcu_read_unlock(); |
@@ -778,9 +1063,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
778 | buf->f_namelen = BTRFS_NAME_LEN; | 1063 | buf->f_namelen = BTRFS_NAME_LEN; |
779 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 1064 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
780 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 1065 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
781 | buf->f_bavail = buf->f_blocks - (total_used_data >> bits); | ||
782 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1066 | buf->f_bsize = dentry->d_sb->s_blocksize; |
783 | buf->f_type = BTRFS_SUPER_MAGIC; | 1067 | buf->f_type = BTRFS_SUPER_MAGIC; |
1068 | buf->f_bavail = total_free_data; | ||
1069 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | ||
1070 | if (ret) { | ||
1071 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
1072 | return ret; | ||
1073 | } | ||
1074 | buf->f_bavail += total_free_data; | ||
1075 | buf->f_bavail = buf->f_bavail >> bits; | ||
1076 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
784 | 1077 | ||
785 | /* We treat it as constant endianness (it doesn't matter _which_) | 1078 | /* We treat it as constant endianness (it doesn't matter _which_) |
786 | because we want the fsid to come out the same whether mounted | 1079 | because we want the fsid to come out the same whether mounted |
@@ -897,10 +1190,14 @@ static int __init init_btrfs_fs(void) | |||
897 | if (err) | 1190 | if (err) |
898 | return err; | 1191 | return err; |
899 | 1192 | ||
900 | err = btrfs_init_cachep(); | 1193 | err = btrfs_init_compress(); |
901 | if (err) | 1194 | if (err) |
902 | goto free_sysfs; | 1195 | goto free_sysfs; |
903 | 1196 | ||
1197 | err = btrfs_init_cachep(); | ||
1198 | if (err) | ||
1199 | goto free_compress; | ||
1200 | |||
904 | err = extent_io_init(); | 1201 | err = extent_io_init(); |
905 | if (err) | 1202 | if (err) |
906 | goto free_cachep; | 1203 | goto free_cachep; |
@@ -928,6 +1225,8 @@ free_extent_io: | |||
928 | extent_io_exit(); | 1225 | extent_io_exit(); |
929 | free_cachep: | 1226 | free_cachep: |
930 | btrfs_destroy_cachep(); | 1227 | btrfs_destroy_cachep(); |
1228 | free_compress: | ||
1229 | btrfs_exit_compress(); | ||
931 | free_sysfs: | 1230 | free_sysfs: |
932 | btrfs_exit_sysfs(); | 1231 | btrfs_exit_sysfs(); |
933 | return err; | 1232 | return err; |
@@ -942,7 +1241,7 @@ static void __exit exit_btrfs_fs(void) | |||
942 | unregister_filesystem(&btrfs_fs_type); | 1241 | unregister_filesystem(&btrfs_fs_type); |
943 | btrfs_exit_sysfs(); | 1242 | btrfs_exit_sysfs(); |
944 | btrfs_cleanup_fs_uuids(); | 1243 | btrfs_cleanup_fs_uuids(); |
945 | btrfs_zlib_exit(); | 1244 | btrfs_exit_compress(); |
946 | } | 1245 | } |
947 | 1246 | ||
948 | module_init(init_btrfs_fs) | 1247 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f50e931fc217..c571734d5e5a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -32,10 +32,8 @@ | |||
32 | 32 | ||
33 | static noinline void put_transaction(struct btrfs_transaction *transaction) | 33 | static noinline void put_transaction(struct btrfs_transaction *transaction) |
34 | { | 34 | { |
35 | WARN_ON(transaction->use_count == 0); | 35 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
36 | transaction->use_count--; | 36 | if (atomic_dec_and_test(&transaction->use_count)) { |
37 | if (transaction->use_count == 0) { | ||
38 | list_del_init(&transaction->list); | ||
39 | memset(transaction, 0, sizeof(*transaction)); | 37 | memset(transaction, 0, sizeof(*transaction)); |
40 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 38 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
41 | } | 39 | } |
@@ -57,16 +55,17 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
57 | if (!cur_trans) { | 55 | if (!cur_trans) { |
58 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | 56 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, |
59 | GFP_NOFS); | 57 | GFP_NOFS); |
60 | BUG_ON(!cur_trans); | 58 | if (!cur_trans) |
59 | return -ENOMEM; | ||
61 | root->fs_info->generation++; | 60 | root->fs_info->generation++; |
62 | cur_trans->num_writers = 1; | 61 | atomic_set(&cur_trans->num_writers, 1); |
63 | cur_trans->num_joined = 0; | 62 | cur_trans->num_joined = 0; |
64 | cur_trans->transid = root->fs_info->generation; | 63 | cur_trans->transid = root->fs_info->generation; |
65 | init_waitqueue_head(&cur_trans->writer_wait); | 64 | init_waitqueue_head(&cur_trans->writer_wait); |
66 | init_waitqueue_head(&cur_trans->commit_wait); | 65 | init_waitqueue_head(&cur_trans->commit_wait); |
67 | cur_trans->in_commit = 0; | 66 | cur_trans->in_commit = 0; |
68 | cur_trans->blocked = 0; | 67 | cur_trans->blocked = 0; |
69 | cur_trans->use_count = 1; | 68 | atomic_set(&cur_trans->use_count, 1); |
70 | cur_trans->commit_done = 0; | 69 | cur_trans->commit_done = 0; |
71 | cur_trans->start_time = get_seconds(); | 70 | cur_trans->start_time = get_seconds(); |
72 | 71 | ||
@@ -87,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
87 | root->fs_info->running_transaction = cur_trans; | 86 | root->fs_info->running_transaction = cur_trans; |
88 | spin_unlock(&root->fs_info->new_trans_lock); | 87 | spin_unlock(&root->fs_info->new_trans_lock); |
89 | } else { | 88 | } else { |
90 | cur_trans->num_writers++; | 89 | atomic_inc(&cur_trans->num_writers); |
91 | cur_trans->num_joined++; | 90 | cur_trans->num_joined++; |
92 | } | 91 | } |
93 | 92 | ||
@@ -144,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root) | |||
144 | cur_trans = root->fs_info->running_transaction; | 143 | cur_trans = root->fs_info->running_transaction; |
145 | if (cur_trans && cur_trans->blocked) { | 144 | if (cur_trans && cur_trans->blocked) { |
146 | DEFINE_WAIT(wait); | 145 | DEFINE_WAIT(wait); |
147 | cur_trans->use_count++; | 146 | atomic_inc(&cur_trans->use_count); |
148 | while (1) { | 147 | while (1) { |
149 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 148 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, |
150 | TASK_UNINTERRUPTIBLE); | 149 | TASK_UNINTERRUPTIBLE); |
@@ -180,7 +179,11 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
180 | { | 179 | { |
181 | struct btrfs_trans_handle *h; | 180 | struct btrfs_trans_handle *h; |
182 | struct btrfs_transaction *cur_trans; | 181 | struct btrfs_transaction *cur_trans; |
182 | int retries = 0; | ||
183 | int ret; | 183 | int ret; |
184 | |||
185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
186 | return ERR_PTR(-EROFS); | ||
184 | again: | 187 | again: |
185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 188 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
186 | if (!h) | 189 | if (!h) |
@@ -192,10 +195,15 @@ again: | |||
192 | wait_current_trans(root); | 195 | wait_current_trans(root); |
193 | 196 | ||
194 | ret = join_transaction(root); | 197 | ret = join_transaction(root); |
195 | BUG_ON(ret); | 198 | if (ret < 0) { |
199 | kmem_cache_free(btrfs_trans_handle_cachep, h); | ||
200 | if (type != TRANS_JOIN_NOLOCK) | ||
201 | mutex_unlock(&root->fs_info->trans_mutex); | ||
202 | return ERR_PTR(ret); | ||
203 | } | ||
196 | 204 | ||
197 | cur_trans = root->fs_info->running_transaction; | 205 | cur_trans = root->fs_info->running_transaction; |
198 | cur_trans->use_count++; | 206 | atomic_inc(&cur_trans->use_count); |
199 | if (type != TRANS_JOIN_NOLOCK) | 207 | if (type != TRANS_JOIN_NOLOCK) |
200 | mutex_unlock(&root->fs_info->trans_mutex); | 208 | mutex_unlock(&root->fs_info->trans_mutex); |
201 | 209 | ||
@@ -215,10 +223,18 @@ again: | |||
215 | 223 | ||
216 | if (num_items > 0) { | 224 | if (num_items > 0) { |
217 | ret = btrfs_trans_reserve_metadata(h, root, num_items); | 225 | ret = btrfs_trans_reserve_metadata(h, root, num_items); |
218 | if (ret == -EAGAIN) { | 226 | if (ret == -EAGAIN && !retries) { |
227 | retries++; | ||
219 | btrfs_commit_transaction(h, root); | 228 | btrfs_commit_transaction(h, root); |
220 | goto again; | 229 | goto again; |
230 | } else if (ret == -EAGAIN) { | ||
231 | /* | ||
232 | * We have already retried and got EAGAIN, so really we | ||
233 | * don't have space, so set ret to -ENOSPC. | ||
234 | */ | ||
235 | ret = -ENOSPC; | ||
221 | } | 236 | } |
237 | |||
222 | if (ret < 0) { | 238 | if (ret < 0) { |
223 | btrfs_end_transaction(h, root); | 239 | btrfs_end_transaction(h, root); |
224 | return ERR_PTR(ret); | 240 | return ERR_PTR(ret); |
@@ -318,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
318 | goto out_unlock; /* nothing committing|committed */ | 334 | goto out_unlock; /* nothing committing|committed */ |
319 | } | 335 | } |
320 | 336 | ||
321 | cur_trans->use_count++; | 337 | atomic_inc(&cur_trans->use_count); |
322 | mutex_unlock(&root->fs_info->trans_mutex); | 338 | mutex_unlock(&root->fs_info->trans_mutex); |
323 | 339 | ||
324 | wait_for_commit(root, cur_trans); | 340 | wait_for_commit(root, cur_trans); |
@@ -448,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
448 | wake_up_process(info->transaction_kthread); | 464 | wake_up_process(info->transaction_kthread); |
449 | } | 465 | } |
450 | 466 | ||
451 | if (lock) | ||
452 | mutex_lock(&info->trans_mutex); | ||
453 | WARN_ON(cur_trans != info->running_transaction); | 467 | WARN_ON(cur_trans != info->running_transaction); |
454 | WARN_ON(cur_trans->num_writers < 1); | 468 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); |
455 | cur_trans->num_writers--; | 469 | atomic_dec(&cur_trans->num_writers); |
456 | 470 | ||
457 | smp_mb(); | 471 | smp_mb(); |
458 | if (waitqueue_active(&cur_trans->writer_wait)) | 472 | if (waitqueue_active(&cur_trans->writer_wait)) |
459 | wake_up(&cur_trans->writer_wait); | 473 | wake_up(&cur_trans->writer_wait); |
460 | put_transaction(cur_trans); | 474 | put_transaction(cur_trans); |
461 | if (lock) | ||
462 | mutex_unlock(&info->trans_mutex); | ||
463 | 475 | ||
464 | if (current->journal_info == trans) | 476 | if (current->journal_info == trans) |
465 | current->journal_info = NULL; | 477 | current->journal_info = NULL; |
@@ -910,6 +922,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
910 | u64 to_reserve = 0; | 922 | u64 to_reserve = 0; |
911 | u64 index = 0; | 923 | u64 index = 0; |
912 | u64 objectid; | 924 | u64 objectid; |
925 | u64 root_flags; | ||
913 | 926 | ||
914 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 927 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
915 | if (!new_root_item) { | 928 | if (!new_root_item) { |
@@ -966,6 +979,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
966 | record_root_in_trans(trans, root); | 979 | record_root_in_trans(trans, root); |
967 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 980 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
968 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 981 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
982 | btrfs_check_and_init_root_item(new_root_item); | ||
983 | |||
984 | root_flags = btrfs_root_flags(new_root_item); | ||
985 | if (pending->readonly) | ||
986 | root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; | ||
987 | else | ||
988 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | ||
989 | btrfs_set_root_flags(new_root_item, root_flags); | ||
969 | 990 | ||
970 | old = btrfs_lock_root_node(root); | 991 | old = btrfs_lock_root_node(root); |
971 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 992 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
@@ -1145,16 +1166,22 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1145 | struct btrfs_transaction *cur_trans; | 1166 | struct btrfs_transaction *cur_trans; |
1146 | 1167 | ||
1147 | ac = kmalloc(sizeof(*ac), GFP_NOFS); | 1168 | ac = kmalloc(sizeof(*ac), GFP_NOFS); |
1148 | BUG_ON(!ac); | 1169 | if (!ac) |
1170 | return -ENOMEM; | ||
1149 | 1171 | ||
1150 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | 1172 | INIT_DELAYED_WORK(&ac->work, do_async_commit); |
1151 | ac->root = root; | 1173 | ac->root = root; |
1152 | ac->newtrans = btrfs_join_transaction(root, 0); | 1174 | ac->newtrans = btrfs_join_transaction(root, 0); |
1175 | if (IS_ERR(ac->newtrans)) { | ||
1176 | int err = PTR_ERR(ac->newtrans); | ||
1177 | kfree(ac); | ||
1178 | return err; | ||
1179 | } | ||
1153 | 1180 | ||
1154 | /* take transaction reference */ | 1181 | /* take transaction reference */ |
1155 | mutex_lock(&root->fs_info->trans_mutex); | 1182 | mutex_lock(&root->fs_info->trans_mutex); |
1156 | cur_trans = trans->transaction; | 1183 | cur_trans = trans->transaction; |
1157 | cur_trans->use_count++; | 1184 | atomic_inc(&cur_trans->use_count); |
1158 | mutex_unlock(&root->fs_info->trans_mutex); | 1185 | mutex_unlock(&root->fs_info->trans_mutex); |
1159 | 1186 | ||
1160 | btrfs_end_transaction(trans, root); | 1187 | btrfs_end_transaction(trans, root); |
@@ -1213,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1213 | 1240 | ||
1214 | mutex_lock(&root->fs_info->trans_mutex); | 1241 | mutex_lock(&root->fs_info->trans_mutex); |
1215 | if (cur_trans->in_commit) { | 1242 | if (cur_trans->in_commit) { |
1216 | cur_trans->use_count++; | 1243 | atomic_inc(&cur_trans->use_count); |
1217 | mutex_unlock(&root->fs_info->trans_mutex); | 1244 | mutex_unlock(&root->fs_info->trans_mutex); |
1218 | btrfs_end_transaction(trans, root); | 1245 | btrfs_end_transaction(trans, root); |
1219 | 1246 | ||
@@ -1235,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1235 | prev_trans = list_entry(cur_trans->list.prev, | 1262 | prev_trans = list_entry(cur_trans->list.prev, |
1236 | struct btrfs_transaction, list); | 1263 | struct btrfs_transaction, list); |
1237 | if (!prev_trans->commit_done) { | 1264 | if (!prev_trans->commit_done) { |
1238 | prev_trans->use_count++; | 1265 | atomic_inc(&prev_trans->use_count); |
1239 | mutex_unlock(&root->fs_info->trans_mutex); | 1266 | mutex_unlock(&root->fs_info->trans_mutex); |
1240 | 1267 | ||
1241 | wait_for_commit(root, prev_trans); | 1268 | wait_for_commit(root, prev_trans); |
@@ -1276,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1276 | TASK_UNINTERRUPTIBLE); | 1303 | TASK_UNINTERRUPTIBLE); |
1277 | 1304 | ||
1278 | smp_mb(); | 1305 | smp_mb(); |
1279 | if (cur_trans->num_writers > 1) | 1306 | if (atomic_read(&cur_trans->num_writers) > 1) |
1280 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); | 1307 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); |
1281 | else if (should_grow) | 1308 | else if (should_grow) |
1282 | schedule_timeout(1); | 1309 | schedule_timeout(1); |
1283 | 1310 | ||
1284 | mutex_lock(&root->fs_info->trans_mutex); | 1311 | mutex_lock(&root->fs_info->trans_mutex); |
1285 | finish_wait(&cur_trans->writer_wait, &wait); | 1312 | finish_wait(&cur_trans->writer_wait, &wait); |
1286 | } while (cur_trans->num_writers > 1 || | 1313 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
1287 | (should_grow && cur_trans->num_joined != joined)); | 1314 | (should_grow && cur_trans->num_joined != joined)); |
1288 | 1315 | ||
1289 | ret = create_pending_snapshots(trans, root->fs_info); | 1316 | ret = create_pending_snapshots(trans, root->fs_info); |
@@ -1370,9 +1397,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1370 | 1397 | ||
1371 | wake_up(&cur_trans->commit_wait); | 1398 | wake_up(&cur_trans->commit_wait); |
1372 | 1399 | ||
1400 | list_del_init(&cur_trans->list); | ||
1373 | put_transaction(cur_trans); | 1401 | put_transaction(cur_trans); |
1374 | put_transaction(cur_trans); | 1402 | put_transaction(cur_trans); |
1375 | 1403 | ||
1404 | trace_btrfs_transaction_commit(root); | ||
1405 | |||
1376 | mutex_unlock(&root->fs_info->trans_mutex); | 1406 | mutex_unlock(&root->fs_info->trans_mutex); |
1377 | 1407 | ||
1378 | if (current->journal_info == trans) | 1408 | if (current->journal_info == trans) |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f104b57ad4ef..e441acc6c584 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -27,11 +27,11 @@ struct btrfs_transaction { | |||
27 | * total writers in this transaction, it must be zero before the | 27 | * total writers in this transaction, it must be zero before the |
28 | * transaction can end | 28 | * transaction can end |
29 | */ | 29 | */ |
30 | unsigned long num_writers; | 30 | atomic_t num_writers; |
31 | 31 | ||
32 | unsigned long num_joined; | 32 | unsigned long num_joined; |
33 | int in_commit; | 33 | int in_commit; |
34 | int use_count; | 34 | atomic_t use_count; |
35 | int commit_done; | 35 | int commit_done; |
36 | int blocked; | 36 | int blocked; |
37 | struct list_head list; | 37 | struct list_head list; |
@@ -62,6 +62,7 @@ struct btrfs_pending_snapshot { | |||
62 | struct btrfs_block_rsv block_rsv; | 62 | struct btrfs_block_rsv block_rsv; |
63 | /* extra metadata reseration for relocation */ | 63 | /* extra metadata reseration for relocation */ |
64 | int error; | 64 | int error; |
65 | bool readonly; | ||
65 | struct list_head list; | 66 | struct list_head list; |
66 | }; | 67 | }; |
67 | 68 | ||
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 054744ac5719..c50271ad3157 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, | |||
338 | } | 338 | } |
339 | dst_copy = kmalloc(item_size, GFP_NOFS); | 339 | dst_copy = kmalloc(item_size, GFP_NOFS); |
340 | src_copy = kmalloc(item_size, GFP_NOFS); | 340 | src_copy = kmalloc(item_size, GFP_NOFS); |
341 | if (!dst_copy || !src_copy) { | ||
342 | btrfs_release_path(root, path); | ||
343 | kfree(dst_copy); | ||
344 | kfree(src_copy); | ||
345 | return -ENOMEM; | ||
346 | } | ||
341 | 347 | ||
342 | read_extent_buffer(eb, src_copy, src_ptr, item_size); | 348 | read_extent_buffer(eb, src_copy, src_ptr, item_size); |
343 | 349 | ||
@@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, | |||
665 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | 671 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
666 | name_len = btrfs_dir_name_len(leaf, di); | 672 | name_len = btrfs_dir_name_len(leaf, di); |
667 | name = kmalloc(name_len, GFP_NOFS); | 673 | name = kmalloc(name_len, GFP_NOFS); |
674 | if (!name) | ||
675 | return -ENOMEM; | ||
676 | |||
668 | read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); | 677 | read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); |
669 | btrfs_release_path(root, path); | 678 | btrfs_release_path(root, path); |
670 | 679 | ||
@@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log, | |||
744 | int match = 0; | 753 | int match = 0; |
745 | 754 | ||
746 | path = btrfs_alloc_path(); | 755 | path = btrfs_alloc_path(); |
756 | if (!path) | ||
757 | return -ENOMEM; | ||
758 | |||
747 | ret = btrfs_search_slot(NULL, log, key, path, 0, 0); | 759 | ret = btrfs_search_slot(NULL, log, key, path, 0, 0); |
748 | if (ret != 0) | 760 | if (ret != 0) |
749 | goto out; | 761 | goto out; |
@@ -787,12 +799,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
787 | struct inode *dir; | 799 | struct inode *dir; |
788 | int ret; | 800 | int ret; |
789 | struct btrfs_inode_ref *ref; | 801 | struct btrfs_inode_ref *ref; |
790 | struct btrfs_dir_item *di; | ||
791 | struct inode *inode; | 802 | struct inode *inode; |
792 | char *name; | 803 | char *name; |
793 | int namelen; | 804 | int namelen; |
794 | unsigned long ref_ptr; | 805 | unsigned long ref_ptr; |
795 | unsigned long ref_end; | 806 | unsigned long ref_end; |
807 | int search_done = 0; | ||
796 | 808 | ||
797 | /* | 809 | /* |
798 | * it is possible that we didn't log all the parent directories | 810 | * it is possible that we didn't log all the parent directories |
@@ -833,7 +845,10 @@ again: | |||
833 | * existing back reference, and we don't want to create | 845 | * existing back reference, and we don't want to create |
834 | * dangling pointers in the directory. | 846 | * dangling pointers in the directory. |
835 | */ | 847 | */ |
836 | conflict_again: | 848 | |
849 | if (search_done) | ||
850 | goto insert; | ||
851 | |||
837 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | 852 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); |
838 | if (ret == 0) { | 853 | if (ret == 0) { |
839 | char *victim_name; | 854 | char *victim_name; |
@@ -874,37 +889,21 @@ conflict_again: | |||
874 | ret = btrfs_unlink_inode(trans, root, dir, | 889 | ret = btrfs_unlink_inode(trans, root, dir, |
875 | inode, victim_name, | 890 | inode, victim_name, |
876 | victim_name_len); | 891 | victim_name_len); |
877 | kfree(victim_name); | ||
878 | btrfs_release_path(root, path); | ||
879 | goto conflict_again; | ||
880 | } | 892 | } |
881 | kfree(victim_name); | 893 | kfree(victim_name); |
882 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 894 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
883 | } | 895 | } |
884 | BUG_ON(ret); | 896 | BUG_ON(ret); |
885 | } | ||
886 | btrfs_release_path(root, path); | ||
887 | 897 | ||
888 | /* look for a conflicting sequence number */ | 898 | /* |
889 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | 899 | * NOTE: we have searched root tree and checked the |
890 | btrfs_inode_ref_index(eb, ref), | 900 | * coresponding ref, it does not need to check again. |
891 | name, namelen, 0); | 901 | */ |
892 | if (di && !IS_ERR(di)) { | 902 | search_done = 1; |
893 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
894 | BUG_ON(ret); | ||
895 | } | ||
896 | btrfs_release_path(root, path); | ||
897 | |||
898 | |||
899 | /* look for a conflicting name */ | ||
900 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
901 | name, namelen, 0); | ||
902 | if (di && !IS_ERR(di)) { | ||
903 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
904 | BUG_ON(ret); | ||
905 | } | 903 | } |
906 | btrfs_release_path(root, path); | 904 | btrfs_release_path(root, path); |
907 | 905 | ||
906 | insert: | ||
908 | /* insert our name */ | 907 | /* insert our name */ |
909 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | 908 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, |
910 | btrfs_inode_ref_index(eb, ref)); | 909 | btrfs_inode_ref_index(eb, ref)); |
@@ -967,6 +966,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
967 | key.offset = (u64)-1; | 966 | key.offset = (u64)-1; |
968 | 967 | ||
969 | path = btrfs_alloc_path(); | 968 | path = btrfs_alloc_path(); |
969 | if (!path) | ||
970 | return -ENOMEM; | ||
970 | 971 | ||
971 | while (1) { | 972 | while (1) { |
972 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 973 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
@@ -1178,6 +1179,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1178 | 1179 | ||
1179 | name_len = btrfs_dir_name_len(eb, di); | 1180 | name_len = btrfs_dir_name_len(eb, di); |
1180 | name = kmalloc(name_len, GFP_NOFS); | 1181 | name = kmalloc(name_len, GFP_NOFS); |
1182 | if (!name) | ||
1183 | return -ENOMEM; | ||
1184 | |||
1181 | log_type = btrfs_dir_type(eb, di); | 1185 | log_type = btrfs_dir_type(eb, di); |
1182 | read_extent_buffer(eb, name, (unsigned long)(di + 1), | 1186 | read_extent_buffer(eb, name, (unsigned long)(di + 1), |
1183 | name_len); | 1187 | name_len); |
@@ -1269,6 +1273,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, | |||
1269 | ptr_end = ptr + item_size; | 1273 | ptr_end = ptr + item_size; |
1270 | while (ptr < ptr_end) { | 1274 | while (ptr < ptr_end) { |
1271 | di = (struct btrfs_dir_item *)ptr; | 1275 | di = (struct btrfs_dir_item *)ptr; |
1276 | if (verify_dir_item(root, eb, di)) | ||
1277 | return -EIO; | ||
1272 | name_len = btrfs_dir_name_len(eb, di); | 1278 | name_len = btrfs_dir_name_len(eb, di); |
1273 | ret = replay_one_name(trans, root, path, eb, di, key); | 1279 | ret = replay_one_name(trans, root, path, eb, di, key); |
1274 | BUG_ON(ret); | 1280 | BUG_ON(ret); |
@@ -1395,6 +1401,11 @@ again: | |||
1395 | ptr_end = ptr + item_size; | 1401 | ptr_end = ptr + item_size; |
1396 | while (ptr < ptr_end) { | 1402 | while (ptr < ptr_end) { |
1397 | di = (struct btrfs_dir_item *)ptr; | 1403 | di = (struct btrfs_dir_item *)ptr; |
1404 | if (verify_dir_item(root, eb, di)) { | ||
1405 | ret = -EIO; | ||
1406 | goto out; | ||
1407 | } | ||
1408 | |||
1398 | name_len = btrfs_dir_name_len(eb, di); | 1409 | name_len = btrfs_dir_name_len(eb, di); |
1399 | name = kmalloc(name_len, GFP_NOFS); | 1410 | name = kmalloc(name_len, GFP_NOFS); |
1400 | if (!name) { | 1411 | if (!name) { |
@@ -1692,6 +1703,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1692 | root_owner = btrfs_header_owner(parent); | 1703 | root_owner = btrfs_header_owner(parent); |
1693 | 1704 | ||
1694 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1705 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1706 | if (!next) | ||
1707 | return -ENOMEM; | ||
1695 | 1708 | ||
1696 | if (*level == 1) { | 1709 | if (*level == 1) { |
1697 | wc->process_func(root, next, wc, ptr_gen); | 1710 | wc->process_func(root, next, wc, ptr_gen); |
@@ -1802,7 +1815,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1802 | int orig_level; | 1815 | int orig_level; |
1803 | 1816 | ||
1804 | path = btrfs_alloc_path(); | 1817 | path = btrfs_alloc_path(); |
1805 | BUG_ON(!path); | 1818 | if (!path) |
1819 | return -ENOMEM; | ||
1806 | 1820 | ||
1807 | level = btrfs_header_level(log->node); | 1821 | level = btrfs_header_level(log->node); |
1808 | orig_level = level; | 1822 | orig_level = level; |
@@ -2032,6 +2046,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2032 | wait_log_commit(trans, log_root_tree, | 2046 | wait_log_commit(trans, log_root_tree, |
2033 | log_root_tree->log_transid); | 2047 | log_root_tree->log_transid); |
2034 | mutex_unlock(&log_root_tree->log_mutex); | 2048 | mutex_unlock(&log_root_tree->log_mutex); |
2049 | ret = 0; | ||
2035 | goto out; | 2050 | goto out; |
2036 | } | 2051 | } |
2037 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2052 | atomic_set(&log_root_tree->log_commit[index2], 1); |
@@ -2096,7 +2111,7 @@ out: | |||
2096 | smp_mb(); | 2111 | smp_mb(); |
2097 | if (waitqueue_active(&root->log_commit_wait[index1])) | 2112 | if (waitqueue_active(&root->log_commit_wait[index1])) |
2098 | wake_up(&root->log_commit_wait[index1]); | 2113 | wake_up(&root->log_commit_wait[index1]); |
2099 | return 0; | 2114 | return ret; |
2100 | } | 2115 | } |
2101 | 2116 | ||
2102 | static void free_log_tree(struct btrfs_trans_handle *trans, | 2117 | static void free_log_tree(struct btrfs_trans_handle *trans, |
@@ -2194,6 +2209,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2194 | 2209 | ||
2195 | log = root->log_root; | 2210 | log = root->log_root; |
2196 | path = btrfs_alloc_path(); | 2211 | path = btrfs_alloc_path(); |
2212 | if (!path) | ||
2213 | return -ENOMEM; | ||
2214 | |||
2197 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2215 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, |
2198 | name, name_len, -1); | 2216 | name, name_len, -1); |
2199 | if (IS_ERR(di)) { | 2217 | if (IS_ERR(di)) { |
@@ -2594,6 +2612,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2594 | 2612 | ||
2595 | ins_data = kmalloc(nr * sizeof(struct btrfs_key) + | 2613 | ins_data = kmalloc(nr * sizeof(struct btrfs_key) + |
2596 | nr * sizeof(u32), GFP_NOFS); | 2614 | nr * sizeof(u32), GFP_NOFS); |
2615 | if (!ins_data) | ||
2616 | return -ENOMEM; | ||
2617 | |||
2597 | ins_sizes = (u32 *)ins_data; | 2618 | ins_sizes = (u32 *)ins_data; |
2598 | ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); | 2619 | ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); |
2599 | 2620 | ||
@@ -2725,7 +2746,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2725 | log = root->log_root; | 2746 | log = root->log_root; |
2726 | 2747 | ||
2727 | path = btrfs_alloc_path(); | 2748 | path = btrfs_alloc_path(); |
2749 | if (!path) | ||
2750 | return -ENOMEM; | ||
2728 | dst_path = btrfs_alloc_path(); | 2751 | dst_path = btrfs_alloc_path(); |
2752 | if (!dst_path) { | ||
2753 | btrfs_free_path(path); | ||
2754 | return -ENOMEM; | ||
2755 | } | ||
2729 | 2756 | ||
2730 | min_key.objectid = inode->i_ino; | 2757 | min_key.objectid = inode->i_ino; |
2731 | min_key.type = BTRFS_INODE_ITEM_KEY; | 2758 | min_key.type = BTRFS_INODE_ITEM_KEY; |
@@ -3075,16 +3102,20 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3075 | .stage = 0, | 3102 | .stage = 0, |
3076 | }; | 3103 | }; |
3077 | 3104 | ||
3078 | fs_info->log_root_recovering = 1; | ||
3079 | path = btrfs_alloc_path(); | 3105 | path = btrfs_alloc_path(); |
3080 | BUG_ON(!path); | 3106 | if (!path) |
3107 | return -ENOMEM; | ||
3108 | |||
3109 | fs_info->log_root_recovering = 1; | ||
3081 | 3110 | ||
3082 | trans = btrfs_start_transaction(fs_info->tree_root, 0); | 3111 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3112 | BUG_ON(IS_ERR(trans)); | ||
3083 | 3113 | ||
3084 | wc.trans = trans; | 3114 | wc.trans = trans; |
3085 | wc.pin = 1; | 3115 | wc.pin = 1; |
3086 | 3116 | ||
3087 | walk_log_tree(trans, log_root_tree, &wc); | 3117 | ret = walk_log_tree(trans, log_root_tree, &wc); |
3118 | BUG_ON(ret); | ||
3088 | 3119 | ||
3089 | again: | 3120 | again: |
3090 | key.objectid = BTRFS_TREE_LOG_OBJECTID; | 3121 | key.objectid = BTRFS_TREE_LOG_OBJECTID; |
@@ -3108,8 +3139,7 @@ again: | |||
3108 | 3139 | ||
3109 | log = btrfs_read_fs_root_no_radix(log_root_tree, | 3140 | log = btrfs_read_fs_root_no_radix(log_root_tree, |
3110 | &found_key); | 3141 | &found_key); |
3111 | BUG_ON(!log); | 3142 | BUG_ON(IS_ERR(log)); |
3112 | |||
3113 | 3143 | ||
3114 | tmp_key.objectid = found_key.offset; | 3144 | tmp_key.objectid = found_key.offset; |
3115 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; | 3145 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1718e1a5c320..309a57b9fc85 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include "compat.h" | 27 | #include "compat.h" |
27 | #include "ctree.h" | 28 | #include "ctree.h" |
@@ -32,17 +33,6 @@ | |||
32 | #include "volumes.h" | 33 | #include "volumes.h" |
33 | #include "async-thread.h" | 34 | #include "async-thread.h" |
34 | 35 | ||
35 | struct map_lookup { | ||
36 | u64 type; | ||
37 | int io_align; | ||
38 | int io_width; | ||
39 | int stripe_len; | ||
40 | int sector_size; | ||
41 | int num_stripes; | ||
42 | int sub_stripes; | ||
43 | struct btrfs_bio_stripe stripes[]; | ||
44 | }; | ||
45 | |||
46 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 36 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
47 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
48 | struct btrfs_device *device); | 38 | struct btrfs_device *device); |
@@ -161,7 +151,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
161 | struct bio *cur; | 151 | struct bio *cur; |
162 | int again = 0; | 152 | int again = 0; |
163 | unsigned long num_run; | 153 | unsigned long num_run; |
164 | unsigned long num_sync_run; | ||
165 | unsigned long batch_run = 0; | 154 | unsigned long batch_run = 0; |
166 | unsigned long limit; | 155 | unsigned long limit; |
167 | unsigned long last_waited = 0; | 156 | unsigned long last_waited = 0; |
@@ -172,11 +161,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
172 | limit = btrfs_async_submit_limit(fs_info); | 161 | limit = btrfs_async_submit_limit(fs_info); |
173 | limit = limit * 2 / 3; | 162 | limit = limit * 2 / 3; |
174 | 163 | ||
175 | /* we want to make sure that every time we switch from the sync | ||
176 | * list to the normal list, we unplug | ||
177 | */ | ||
178 | num_sync_run = 0; | ||
179 | |||
180 | loop: | 164 | loop: |
181 | spin_lock(&device->io_lock); | 165 | spin_lock(&device->io_lock); |
182 | 166 | ||
@@ -222,15 +206,6 @@ loop_lock: | |||
222 | 206 | ||
223 | spin_unlock(&device->io_lock); | 207 | spin_unlock(&device->io_lock); |
224 | 208 | ||
225 | /* | ||
226 | * if we're doing the regular priority list, make sure we unplug | ||
227 | * for any high prio bios we've sent down | ||
228 | */ | ||
229 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
230 | num_sync_run = 0; | ||
231 | blk_run_backing_dev(bdi, NULL); | ||
232 | } | ||
233 | |||
234 | while (pending) { | 209 | while (pending) { |
235 | 210 | ||
236 | rmb(); | 211 | rmb(); |
@@ -258,19 +233,11 @@ loop_lock: | |||
258 | 233 | ||
259 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 234 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
260 | 235 | ||
261 | if (cur->bi_rw & REQ_SYNC) | ||
262 | num_sync_run++; | ||
263 | |||
264 | submit_bio(cur->bi_rw, cur); | 236 | submit_bio(cur->bi_rw, cur); |
265 | num_run++; | 237 | num_run++; |
266 | batch_run++; | 238 | batch_run++; |
267 | if (need_resched()) { | 239 | if (need_resched()) |
268 | if (num_sync_run) { | ||
269 | blk_run_backing_dev(bdi, NULL); | ||
270 | num_sync_run = 0; | ||
271 | } | ||
272 | cond_resched(); | 240 | cond_resched(); |
273 | } | ||
274 | 241 | ||
275 | /* | 242 | /* |
276 | * we made progress, there is more work to do and the bdi | 243 | * we made progress, there is more work to do and the bdi |
@@ -303,13 +270,8 @@ loop_lock: | |||
303 | * against it before looping | 270 | * against it before looping |
304 | */ | 271 | */ |
305 | last_waited = ioc->last_waited; | 272 | last_waited = ioc->last_waited; |
306 | if (need_resched()) { | 273 | if (need_resched()) |
307 | if (num_sync_run) { | ||
308 | blk_run_backing_dev(bdi, NULL); | ||
309 | num_sync_run = 0; | ||
310 | } | ||
311 | cond_resched(); | 274 | cond_resched(); |
312 | } | ||
313 | continue; | 275 | continue; |
314 | } | 276 | } |
315 | spin_lock(&device->io_lock); | 277 | spin_lock(&device->io_lock); |
@@ -322,22 +284,6 @@ loop_lock: | |||
322 | } | 284 | } |
323 | } | 285 | } |
324 | 286 | ||
325 | if (num_sync_run) { | ||
326 | num_sync_run = 0; | ||
327 | blk_run_backing_dev(bdi, NULL); | ||
328 | } | ||
329 | /* | ||
330 | * IO has already been through a long path to get here. Checksumming, | ||
331 | * async helper threads, perhaps compression. We've done a pretty | ||
332 | * good job of collecting a batch of IO and should just unplug | ||
333 | * the device right away. | ||
334 | * | ||
335 | * This will help anyone who is waiting on the IO, they might have | ||
336 | * already unplugged, but managed to do so before the bio they | ||
337 | * cared about found its way down here. | ||
338 | */ | ||
339 | blk_run_backing_dev(bdi, NULL); | ||
340 | |||
341 | cond_resched(); | 287 | cond_resched(); |
342 | if (again) | 288 | if (again) |
343 | goto loop; | 289 | goto loop; |
@@ -600,8 +546,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
600 | set_blocksize(bdev, 4096); | 546 | set_blocksize(bdev, 4096); |
601 | 547 | ||
602 | bh = btrfs_read_dev_super(bdev); | 548 | bh = btrfs_read_dev_super(bdev); |
603 | if (!bh) | 549 | if (!bh) { |
550 | ret = -EINVAL; | ||
604 | goto error_close; | 551 | goto error_close; |
552 | } | ||
605 | 553 | ||
606 | disk_super = (struct btrfs_super_block *)bh->b_data; | 554 | disk_super = (struct btrfs_super_block *)bh->b_data; |
607 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 555 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
@@ -703,7 +651,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
703 | goto error_close; | 651 | goto error_close; |
704 | bh = btrfs_read_dev_super(bdev); | 652 | bh = btrfs_read_dev_super(bdev); |
705 | if (!bh) { | 653 | if (!bh) { |
706 | ret = -EIO; | 654 | ret = -EINVAL; |
707 | goto error_close; | 655 | goto error_close; |
708 | } | 656 | } |
709 | disk_super = (struct btrfs_super_block *)bh->b_data; | 657 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -729,59 +677,167 @@ error: | |||
729 | return ret; | 677 | return ret; |
730 | } | 678 | } |
731 | 679 | ||
680 | /* helper to account the used device space in the range */ | ||
681 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
682 | u64 end, u64 *length) | ||
683 | { | ||
684 | struct btrfs_key key; | ||
685 | struct btrfs_root *root = device->dev_root; | ||
686 | struct btrfs_dev_extent *dev_extent; | ||
687 | struct btrfs_path *path; | ||
688 | u64 extent_end; | ||
689 | int ret; | ||
690 | int slot; | ||
691 | struct extent_buffer *l; | ||
692 | |||
693 | *length = 0; | ||
694 | |||
695 | if (start >= device->total_bytes) | ||
696 | return 0; | ||
697 | |||
698 | path = btrfs_alloc_path(); | ||
699 | if (!path) | ||
700 | return -ENOMEM; | ||
701 | path->reada = 2; | ||
702 | |||
703 | key.objectid = device->devid; | ||
704 | key.offset = start; | ||
705 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
706 | |||
707 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
708 | if (ret < 0) | ||
709 | goto out; | ||
710 | if (ret > 0) { | ||
711 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | ||
712 | if (ret < 0) | ||
713 | goto out; | ||
714 | } | ||
715 | |||
716 | while (1) { | ||
717 | l = path->nodes[0]; | ||
718 | slot = path->slots[0]; | ||
719 | if (slot >= btrfs_header_nritems(l)) { | ||
720 | ret = btrfs_next_leaf(root, path); | ||
721 | if (ret == 0) | ||
722 | continue; | ||
723 | if (ret < 0) | ||
724 | goto out; | ||
725 | |||
726 | break; | ||
727 | } | ||
728 | btrfs_item_key_to_cpu(l, &key, slot); | ||
729 | |||
730 | if (key.objectid < device->devid) | ||
731 | goto next; | ||
732 | |||
733 | if (key.objectid > device->devid) | ||
734 | break; | ||
735 | |||
736 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
737 | goto next; | ||
738 | |||
739 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
740 | extent_end = key.offset + btrfs_dev_extent_length(l, | ||
741 | dev_extent); | ||
742 | if (key.offset <= start && extent_end > end) { | ||
743 | *length = end - start + 1; | ||
744 | break; | ||
745 | } else if (key.offset <= start && extent_end > start) | ||
746 | *length += extent_end - start; | ||
747 | else if (key.offset > start && extent_end <= end) | ||
748 | *length += extent_end - key.offset; | ||
749 | else if (key.offset > start && key.offset <= end) { | ||
750 | *length += end - key.offset + 1; | ||
751 | break; | ||
752 | } else if (key.offset > end) | ||
753 | break; | ||
754 | |||
755 | next: | ||
756 | path->slots[0]++; | ||
757 | } | ||
758 | ret = 0; | ||
759 | out: | ||
760 | btrfs_free_path(path); | ||
761 | return ret; | ||
762 | } | ||
763 | |||
732 | /* | 764 | /* |
765 | * find_free_dev_extent - find free space in the specified device | ||
766 | * @trans: transaction handler | ||
767 | * @device: the device which we search the free space in | ||
768 | * @num_bytes: the size of the free space that we need | ||
769 | * @start: store the start of the free space. | ||
770 | * @len: the size of the free space. that we find, or the size of the max | ||
771 | * free space if we don't find suitable free space | ||
772 | * | ||
733 | * this uses a pretty simple search, the expectation is that it is | 773 | * this uses a pretty simple search, the expectation is that it is |
734 | * called very infrequently and that a given device has a small number | 774 | * called very infrequently and that a given device has a small number |
735 | * of extents | 775 | * of extents |
776 | * | ||
777 | * @start is used to store the start of the free space if we find. But if we | ||
778 | * don't find suitable free space, it will be used to store the start position | ||
779 | * of the max free space. | ||
780 | * | ||
781 | * @len is used to store the size of the free space that we find. | ||
782 | * But if we don't find suitable free space, it is used to store the size of | ||
783 | * the max free space. | ||
736 | */ | 784 | */ |
737 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 785 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
738 | struct btrfs_device *device, u64 num_bytes, | 786 | struct btrfs_device *device, u64 num_bytes, |
739 | u64 *start, u64 *max_avail) | 787 | u64 *start, u64 *len) |
740 | { | 788 | { |
741 | struct btrfs_key key; | 789 | struct btrfs_key key; |
742 | struct btrfs_root *root = device->dev_root; | 790 | struct btrfs_root *root = device->dev_root; |
743 | struct btrfs_dev_extent *dev_extent = NULL; | 791 | struct btrfs_dev_extent *dev_extent; |
744 | struct btrfs_path *path; | 792 | struct btrfs_path *path; |
745 | u64 hole_size = 0; | 793 | u64 hole_size; |
746 | u64 last_byte = 0; | 794 | u64 max_hole_start; |
747 | u64 search_start = 0; | 795 | u64 max_hole_size; |
796 | u64 extent_end; | ||
797 | u64 search_start; | ||
748 | u64 search_end = device->total_bytes; | 798 | u64 search_end = device->total_bytes; |
749 | int ret; | 799 | int ret; |
750 | int slot = 0; | 800 | int slot; |
751 | int start_found; | ||
752 | struct extent_buffer *l; | 801 | struct extent_buffer *l; |
753 | 802 | ||
754 | path = btrfs_alloc_path(); | ||
755 | if (!path) | ||
756 | return -ENOMEM; | ||
757 | path->reada = 2; | ||
758 | start_found = 0; | ||
759 | |||
760 | /* FIXME use last free of some kind */ | 803 | /* FIXME use last free of some kind */ |
761 | 804 | ||
762 | /* we don't want to overwrite the superblock on the drive, | 805 | /* we don't want to overwrite the superblock on the drive, |
763 | * so we make sure to start at an offset of at least 1MB | 806 | * so we make sure to start at an offset of at least 1MB |
764 | */ | 807 | */ |
765 | search_start = max((u64)1024 * 1024, search_start); | 808 | search_start = 1024 * 1024; |
766 | 809 | ||
767 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | 810 | if (root->fs_info->alloc_start + num_bytes <= search_end) |
768 | search_start = max(root->fs_info->alloc_start, search_start); | 811 | search_start = max(root->fs_info->alloc_start, search_start); |
769 | 812 | ||
813 | max_hole_start = search_start; | ||
814 | max_hole_size = 0; | ||
815 | |||
816 | if (search_start >= search_end) { | ||
817 | ret = -ENOSPC; | ||
818 | goto error; | ||
819 | } | ||
820 | |||
821 | path = btrfs_alloc_path(); | ||
822 | if (!path) { | ||
823 | ret = -ENOMEM; | ||
824 | goto error; | ||
825 | } | ||
826 | path->reada = 2; | ||
827 | |||
770 | key.objectid = device->devid; | 828 | key.objectid = device->devid; |
771 | key.offset = search_start; | 829 | key.offset = search_start; |
772 | key.type = BTRFS_DEV_EXTENT_KEY; | 830 | key.type = BTRFS_DEV_EXTENT_KEY; |
831 | |||
773 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 832 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
774 | if (ret < 0) | 833 | if (ret < 0) |
775 | goto error; | 834 | goto out; |
776 | if (ret > 0) { | 835 | if (ret > 0) { |
777 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | 836 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
778 | if (ret < 0) | 837 | if (ret < 0) |
779 | goto error; | 838 | goto out; |
780 | if (ret > 0) | ||
781 | start_found = 1; | ||
782 | } | 839 | } |
783 | l = path->nodes[0]; | 840 | |
784 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
785 | while (1) { | 841 | while (1) { |
786 | l = path->nodes[0]; | 842 | l = path->nodes[0]; |
787 | slot = path->slots[0]; | 843 | slot = path->slots[0]; |
@@ -790,24 +846,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
790 | if (ret == 0) | 846 | if (ret == 0) |
791 | continue; | 847 | continue; |
792 | if (ret < 0) | 848 | if (ret < 0) |
793 | goto error; | 849 | goto out; |
794 | no_more_items: | 850 | |
795 | if (!start_found) { | 851 | break; |
796 | if (search_start >= search_end) { | ||
797 | ret = -ENOSPC; | ||
798 | goto error; | ||
799 | } | ||
800 | *start = search_start; | ||
801 | start_found = 1; | ||
802 | goto check_pending; | ||
803 | } | ||
804 | *start = last_byte > search_start ? | ||
805 | last_byte : search_start; | ||
806 | if (search_end <= *start) { | ||
807 | ret = -ENOSPC; | ||
808 | goto error; | ||
809 | } | ||
810 | goto check_pending; | ||
811 | } | 852 | } |
812 | btrfs_item_key_to_cpu(l, &key, slot); | 853 | btrfs_item_key_to_cpu(l, &key, slot); |
813 | 854 | ||
@@ -815,48 +856,62 @@ no_more_items: | |||
815 | goto next; | 856 | goto next; |
816 | 857 | ||
817 | if (key.objectid > device->devid) | 858 | if (key.objectid > device->devid) |
818 | goto no_more_items; | 859 | break; |
819 | 860 | ||
820 | if (key.offset >= search_start && key.offset > last_byte && | 861 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) |
821 | start_found) { | 862 | goto next; |
822 | if (last_byte < search_start) | ||
823 | last_byte = search_start; | ||
824 | hole_size = key.offset - last_byte; | ||
825 | 863 | ||
826 | if (hole_size > *max_avail) | 864 | if (key.offset > search_start) { |
827 | *max_avail = hole_size; | 865 | hole_size = key.offset - search_start; |
828 | 866 | ||
829 | if (key.offset > last_byte && | 867 | if (hole_size > max_hole_size) { |
830 | hole_size >= num_bytes) { | 868 | max_hole_start = search_start; |
831 | *start = last_byte; | 869 | max_hole_size = hole_size; |
832 | goto check_pending; | 870 | } |
871 | |||
872 | /* | ||
873 | * If this free space is greater than which we need, | ||
874 | * it must be the max free space that we have found | ||
875 | * until now, so max_hole_start must point to the start | ||
876 | * of this free space and the length of this free space | ||
877 | * is stored in max_hole_size. Thus, we return | ||
878 | * max_hole_start and max_hole_size and go back to the | ||
879 | * caller. | ||
880 | */ | ||
881 | if (hole_size >= num_bytes) { | ||
882 | ret = 0; | ||
883 | goto out; | ||
833 | } | 884 | } |
834 | } | 885 | } |
835 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
836 | goto next; | ||
837 | 886 | ||
838 | start_found = 1; | ||
839 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 887 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
840 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | 888 | extent_end = key.offset + btrfs_dev_extent_length(l, |
889 | dev_extent); | ||
890 | if (extent_end > search_start) | ||
891 | search_start = extent_end; | ||
841 | next: | 892 | next: |
842 | path->slots[0]++; | 893 | path->slots[0]++; |
843 | cond_resched(); | 894 | cond_resched(); |
844 | } | 895 | } |
845 | check_pending: | ||
846 | /* we have to make sure we didn't find an extent that has already | ||
847 | * been allocated by the map tree or the original allocation | ||
848 | */ | ||
849 | BUG_ON(*start < search_start); | ||
850 | 896 | ||
851 | if (*start + num_bytes > search_end) { | 897 | hole_size = search_end- search_start; |
852 | ret = -ENOSPC; | 898 | if (hole_size > max_hole_size) { |
853 | goto error; | 899 | max_hole_start = search_start; |
900 | max_hole_size = hole_size; | ||
854 | } | 901 | } |
855 | /* check for pending inserts here */ | ||
856 | ret = 0; | ||
857 | 902 | ||
858 | error: | 903 | /* See above. */ |
904 | if (hole_size < num_bytes) | ||
905 | ret = -ENOSPC; | ||
906 | else | ||
907 | ret = 0; | ||
908 | |||
909 | out: | ||
859 | btrfs_free_path(path); | 910 | btrfs_free_path(path); |
911 | error: | ||
912 | *start = max_hole_start; | ||
913 | if (len) | ||
914 | *len = max_hole_size; | ||
860 | return ret; | 915 | return ret; |
861 | } | 916 | } |
862 | 917 | ||
@@ -1103,6 +1158,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1103 | return -ENOMEM; | 1158 | return -ENOMEM; |
1104 | 1159 | ||
1105 | trans = btrfs_start_transaction(root, 0); | 1160 | trans = btrfs_start_transaction(root, 0); |
1161 | if (IS_ERR(trans)) { | ||
1162 | btrfs_free_path(path); | ||
1163 | return PTR_ERR(trans); | ||
1164 | } | ||
1106 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1165 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1107 | key.type = BTRFS_DEV_ITEM_KEY; | 1166 | key.type = BTRFS_DEV_ITEM_KEY; |
1108 | key.offset = device->devid; | 1167 | key.offset = device->devid; |
@@ -1196,7 +1255,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1196 | set_blocksize(bdev, 4096); | 1255 | set_blocksize(bdev, 4096); |
1197 | bh = btrfs_read_dev_super(bdev); | 1256 | bh = btrfs_read_dev_super(bdev); |
1198 | if (!bh) { | 1257 | if (!bh) { |
1199 | ret = -EIO; | 1258 | ret = -EINVAL; |
1200 | goto error_close; | 1259 | goto error_close; |
1201 | } | 1260 | } |
1202 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1261 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -1224,11 +1283,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1224 | 1283 | ||
1225 | ret = btrfs_shrink_device(device, 0); | 1284 | ret = btrfs_shrink_device(device, 0); |
1226 | if (ret) | 1285 | if (ret) |
1227 | goto error_brelse; | 1286 | goto error_undo; |
1228 | 1287 | ||
1229 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); | 1288 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); |
1230 | if (ret) | 1289 | if (ret) |
1231 | goto error_brelse; | 1290 | goto error_undo; |
1232 | 1291 | ||
1233 | device->in_fs_metadata = 0; | 1292 | device->in_fs_metadata = 0; |
1234 | 1293 | ||
@@ -1302,6 +1361,13 @@ out: | |||
1302 | mutex_unlock(&root->fs_info->volume_mutex); | 1361 | mutex_unlock(&root->fs_info->volume_mutex); |
1303 | mutex_unlock(&uuid_mutex); | 1362 | mutex_unlock(&uuid_mutex); |
1304 | return ret; | 1363 | return ret; |
1364 | error_undo: | ||
1365 | if (device->writeable) { | ||
1366 | list_add(&device->dev_alloc_list, | ||
1367 | &root->fs_info->fs_devices->alloc_list); | ||
1368 | root->fs_info->fs_devices->rw_devices++; | ||
1369 | } | ||
1370 | goto error_brelse; | ||
1305 | } | 1371 | } |
1306 | 1372 | ||
1307 | /* | 1373 | /* |
@@ -1491,11 +1557,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1491 | 1557 | ||
1492 | ret = find_next_devid(root, &device->devid); | 1558 | ret = find_next_devid(root, &device->devid); |
1493 | if (ret) { | 1559 | if (ret) { |
1560 | kfree(device->name); | ||
1494 | kfree(device); | 1561 | kfree(device); |
1495 | goto error; | 1562 | goto error; |
1496 | } | 1563 | } |
1497 | 1564 | ||
1498 | trans = btrfs_start_transaction(root, 0); | 1565 | trans = btrfs_start_transaction(root, 0); |
1566 | if (IS_ERR(trans)) { | ||
1567 | kfree(device->name); | ||
1568 | kfree(device); | ||
1569 | ret = PTR_ERR(trans); | ||
1570 | goto error; | ||
1571 | } | ||
1572 | |||
1499 | lock_chunks(root); | 1573 | lock_chunks(root); |
1500 | 1574 | ||
1501 | device->writeable = 1; | 1575 | device->writeable = 1; |
@@ -1511,7 +1585,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1511 | device->dev_root = root->fs_info->dev_root; | 1585 | device->dev_root = root->fs_info->dev_root; |
1512 | device->bdev = bdev; | 1586 | device->bdev = bdev; |
1513 | device->in_fs_metadata = 1; | 1587 | device->in_fs_metadata = 1; |
1514 | device->mode = 0; | 1588 | device->mode = FMODE_EXCL; |
1515 | set_blocksize(device->bdev, 4096); | 1589 | set_blocksize(device->bdev, 4096); |
1516 | 1590 | ||
1517 | if (seeding_dev) { | 1591 | if (seeding_dev) { |
@@ -1763,7 +1837,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1763 | return ret; | 1837 | return ret; |
1764 | 1838 | ||
1765 | trans = btrfs_start_transaction(root, 0); | 1839 | trans = btrfs_start_transaction(root, 0); |
1766 | BUG_ON(!trans); | 1840 | BUG_ON(IS_ERR(trans)); |
1767 | 1841 | ||
1768 | lock_chunks(root); | 1842 | lock_chunks(root); |
1769 | 1843 | ||
@@ -1794,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1794 | 1868 | ||
1795 | BUG_ON(ret); | 1869 | BUG_ON(ret); |
1796 | 1870 | ||
1871 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); | ||
1872 | |||
1797 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 1873 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
1798 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | 1874 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); |
1799 | BUG_ON(ret); | 1875 | BUG_ON(ret); |
@@ -1916,6 +1992,9 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1916 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 1992 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) |
1917 | return -EROFS; | 1993 | return -EROFS; |
1918 | 1994 | ||
1995 | if (!capable(CAP_SYS_ADMIN)) | ||
1996 | return -EPERM; | ||
1997 | |||
1919 | mutex_lock(&dev_root->fs_info->volume_mutex); | 1998 | mutex_lock(&dev_root->fs_info->volume_mutex); |
1920 | dev_root = dev_root->fs_info->dev_root; | 1999 | dev_root = dev_root->fs_info->dev_root; |
1921 | 2000 | ||
@@ -1934,7 +2013,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1934 | BUG_ON(ret); | 2013 | BUG_ON(ret); |
1935 | 2014 | ||
1936 | trans = btrfs_start_transaction(dev_root, 0); | 2015 | trans = btrfs_start_transaction(dev_root, 0); |
1937 | BUG_ON(!trans); | 2016 | BUG_ON(IS_ERR(trans)); |
1938 | 2017 | ||
1939 | ret = btrfs_grow_device(trans, device, old_size); | 2018 | ret = btrfs_grow_device(trans, device, old_size); |
1940 | BUG_ON(ret); | 2019 | BUG_ON(ret); |
@@ -2100,6 +2179,11 @@ again: | |||
2100 | 2179 | ||
2101 | /* Shrinking succeeded, else we would be at "done". */ | 2180 | /* Shrinking succeeded, else we would be at "done". */ |
2102 | trans = btrfs_start_transaction(root, 0); | 2181 | trans = btrfs_start_transaction(root, 0); |
2182 | if (IS_ERR(trans)) { | ||
2183 | ret = PTR_ERR(trans); | ||
2184 | goto done; | ||
2185 | } | ||
2186 | |||
2103 | lock_chunks(root); | 2187 | lock_chunks(root); |
2104 | 2188 | ||
2105 | device->disk_total_bytes = new_size; | 2189 | device->disk_total_bytes = new_size; |
@@ -2154,66 +2238,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | |||
2154 | return calc_size * num_stripes; | 2238 | return calc_size * num_stripes; |
2155 | } | 2239 | } |
2156 | 2240 | ||
2157 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2241 | /* Used to sort the devices by max_avail(descending sort) */ |
2158 | struct btrfs_root *extent_root, | 2242 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) |
2159 | struct map_lookup **map_ret, | ||
2160 | u64 *num_bytes, u64 *stripe_size, | ||
2161 | u64 start, u64 type) | ||
2162 | { | 2243 | { |
2163 | struct btrfs_fs_info *info = extent_root->fs_info; | 2244 | if (((struct btrfs_device_info *)dev_info1)->max_avail > |
2164 | struct btrfs_device *device = NULL; | 2245 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2165 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 2246 | return -1; |
2166 | struct list_head *cur; | 2247 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < |
2167 | struct map_lookup *map = NULL; | 2248 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2168 | struct extent_map_tree *em_tree; | 2249 | return 1; |
2169 | struct extent_map *em; | 2250 | else |
2170 | struct list_head private_devs; | 2251 | return 0; |
2171 | int min_stripe_size = 1 * 1024 * 1024; | 2252 | } |
2172 | u64 calc_size = 1024 * 1024 * 1024; | ||
2173 | u64 max_chunk_size = calc_size; | ||
2174 | u64 min_free; | ||
2175 | u64 avail; | ||
2176 | u64 max_avail = 0; | ||
2177 | u64 dev_offset; | ||
2178 | int num_stripes = 1; | ||
2179 | int min_stripes = 1; | ||
2180 | int sub_stripes = 0; | ||
2181 | int looped = 0; | ||
2182 | int ret; | ||
2183 | int index; | ||
2184 | int stripe_len = 64 * 1024; | ||
2185 | 2253 | ||
2186 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2254 | static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, |
2187 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2255 | int *num_stripes, int *min_stripes, |
2188 | WARN_ON(1); | 2256 | int *sub_stripes) |
2189 | type &= ~BTRFS_BLOCK_GROUP_DUP; | 2257 | { |
2190 | } | 2258 | *num_stripes = 1; |
2191 | if (list_empty(&fs_devices->alloc_list)) | 2259 | *min_stripes = 1; |
2192 | return -ENOSPC; | 2260 | *sub_stripes = 0; |
2193 | 2261 | ||
2194 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2262 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { |
2195 | num_stripes = fs_devices->rw_devices; | 2263 | *num_stripes = fs_devices->rw_devices; |
2196 | min_stripes = 2; | 2264 | *min_stripes = 2; |
2197 | } | 2265 | } |
2198 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 2266 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { |
2199 | num_stripes = 2; | 2267 | *num_stripes = 2; |
2200 | min_stripes = 2; | 2268 | *min_stripes = 2; |
2201 | } | 2269 | } |
2202 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2270 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
2203 | if (fs_devices->rw_devices < 2) | 2271 | if (fs_devices->rw_devices < 2) |
2204 | return -ENOSPC; | 2272 | return -ENOSPC; |
2205 | num_stripes = 2; | 2273 | *num_stripes = 2; |
2206 | min_stripes = 2; | 2274 | *min_stripes = 2; |
2207 | } | 2275 | } |
2208 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2276 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
2209 | num_stripes = fs_devices->rw_devices; | 2277 | *num_stripes = fs_devices->rw_devices; |
2210 | if (num_stripes < 4) | 2278 | if (*num_stripes < 4) |
2211 | return -ENOSPC; | 2279 | return -ENOSPC; |
2212 | num_stripes &= ~(u32)1; | 2280 | *num_stripes &= ~(u32)1; |
2213 | sub_stripes = 2; | 2281 | *sub_stripes = 2; |
2214 | min_stripes = 4; | 2282 | *min_stripes = 4; |
2215 | } | 2283 | } |
2216 | 2284 | ||
2285 | return 0; | ||
2286 | } | ||
2287 | |||
2288 | static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, | ||
2289 | u64 proposed_size, u64 type, | ||
2290 | int num_stripes, int small_stripe) | ||
2291 | { | ||
2292 | int min_stripe_size = 1 * 1024 * 1024; | ||
2293 | u64 calc_size = proposed_size; | ||
2294 | u64 max_chunk_size = calc_size; | ||
2295 | int ncopies = 1; | ||
2296 | |||
2297 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
2298 | BTRFS_BLOCK_GROUP_DUP | | ||
2299 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2300 | ncopies = 2; | ||
2301 | |||
2217 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2302 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
2218 | max_chunk_size = 10 * calc_size; | 2303 | max_chunk_size = 10 * calc_size; |
2219 | min_stripe_size = 64 * 1024 * 1024; | 2304 | min_stripe_size = 64 * 1024 * 1024; |
@@ -2230,51 +2315,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2230 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2315 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
2231 | max_chunk_size); | 2316 | max_chunk_size); |
2232 | 2317 | ||
2233 | again: | 2318 | if (calc_size * num_stripes > max_chunk_size * ncopies) { |
2234 | max_avail = 0; | 2319 | calc_size = max_chunk_size * ncopies; |
2235 | if (!map || map->num_stripes != num_stripes) { | ||
2236 | kfree(map); | ||
2237 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2238 | if (!map) | ||
2239 | return -ENOMEM; | ||
2240 | map->num_stripes = num_stripes; | ||
2241 | } | ||
2242 | |||
2243 | if (calc_size * num_stripes > max_chunk_size) { | ||
2244 | calc_size = max_chunk_size; | ||
2245 | do_div(calc_size, num_stripes); | 2320 | do_div(calc_size, num_stripes); |
2246 | do_div(calc_size, stripe_len); | 2321 | do_div(calc_size, BTRFS_STRIPE_LEN); |
2247 | calc_size *= stripe_len; | 2322 | calc_size *= BTRFS_STRIPE_LEN; |
2248 | } | 2323 | } |
2249 | 2324 | ||
2250 | /* we don't want tiny stripes */ | 2325 | /* we don't want tiny stripes */ |
2251 | if (!looped) | 2326 | if (!small_stripe) |
2252 | calc_size = max_t(u64, min_stripe_size, calc_size); | 2327 | calc_size = max_t(u64, min_stripe_size, calc_size); |
2253 | 2328 | ||
2254 | /* | 2329 | /* |
2255 | * we're about to do_div by the stripe_len so lets make sure | 2330 | * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure |
2256 | * we end up with something bigger than a stripe | 2331 | * we end up with something bigger than a stripe |
2257 | */ | 2332 | */ |
2258 | calc_size = max_t(u64, calc_size, stripe_len * 4); | 2333 | calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); |
2334 | |||
2335 | do_div(calc_size, BTRFS_STRIPE_LEN); | ||
2336 | calc_size *= BTRFS_STRIPE_LEN; | ||
2337 | |||
2338 | return calc_size; | ||
2339 | } | ||
2340 | |||
2341 | static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, | ||
2342 | int num_stripes) | ||
2343 | { | ||
2344 | struct map_lookup *new; | ||
2345 | size_t len = map_lookup_size(num_stripes); | ||
2346 | |||
2347 | BUG_ON(map->num_stripes < num_stripes); | ||
2348 | |||
2349 | if (map->num_stripes == num_stripes) | ||
2350 | return map; | ||
2351 | |||
2352 | new = kmalloc(len, GFP_NOFS); | ||
2353 | if (!new) { | ||
2354 | /* just change map->num_stripes */ | ||
2355 | map->num_stripes = num_stripes; | ||
2356 | return map; | ||
2357 | } | ||
2358 | |||
2359 | memcpy(new, map, len); | ||
2360 | new->num_stripes = num_stripes; | ||
2361 | kfree(map); | ||
2362 | return new; | ||
2363 | } | ||
2364 | |||
2365 | /* | ||
2366 | * helper to allocate device space from btrfs_device_info, in which we stored | ||
2367 | * max free space information of every device. It is used when we can not | ||
2368 | * allocate chunks by default size. | ||
2369 | * | ||
2370 | * By this helper, we can allocate a new chunk as larger as possible. | ||
2371 | */ | ||
2372 | static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, | ||
2373 | struct btrfs_fs_devices *fs_devices, | ||
2374 | struct btrfs_device_info *devices, | ||
2375 | int nr_device, u64 type, | ||
2376 | struct map_lookup **map_lookup, | ||
2377 | int min_stripes, u64 *stripe_size) | ||
2378 | { | ||
2379 | int i, index, sort_again = 0; | ||
2380 | int min_devices = min_stripes; | ||
2381 | u64 max_avail, min_free; | ||
2382 | struct map_lookup *map = *map_lookup; | ||
2383 | int ret; | ||
2384 | |||
2385 | if (nr_device < min_stripes) | ||
2386 | return -ENOSPC; | ||
2387 | |||
2388 | btrfs_descending_sort_devices(devices, nr_device); | ||
2389 | |||
2390 | max_avail = devices[0].max_avail; | ||
2391 | if (!max_avail) | ||
2392 | return -ENOSPC; | ||
2393 | |||
2394 | for (i = 0; i < nr_device; i++) { | ||
2395 | /* | ||
2396 | * if dev_offset = 0, it means the free space of this device | ||
2397 | * is less than what we need, and we didn't search max avail | ||
2398 | * extent on this device, so do it now. | ||
2399 | */ | ||
2400 | if (!devices[i].dev_offset) { | ||
2401 | ret = find_free_dev_extent(trans, devices[i].dev, | ||
2402 | max_avail, | ||
2403 | &devices[i].dev_offset, | ||
2404 | &devices[i].max_avail); | ||
2405 | if (ret != 0 && ret != -ENOSPC) | ||
2406 | return ret; | ||
2407 | sort_again = 1; | ||
2408 | } | ||
2409 | } | ||
2410 | |||
2411 | /* we update the max avail free extent of each devices, sort again */ | ||
2412 | if (sort_again) | ||
2413 | btrfs_descending_sort_devices(devices, nr_device); | ||
2414 | |||
2415 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2416 | min_devices = 1; | ||
2417 | |||
2418 | if (!devices[min_devices - 1].max_avail) | ||
2419 | return -ENOSPC; | ||
2420 | |||
2421 | max_avail = devices[min_devices - 1].max_avail; | ||
2422 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2423 | do_div(max_avail, 2); | ||
2424 | |||
2425 | max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, | ||
2426 | min_stripes, 1); | ||
2427 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2428 | min_free = max_avail * 2; | ||
2429 | else | ||
2430 | min_free = max_avail; | ||
2431 | |||
2432 | if (min_free > devices[min_devices - 1].max_avail) | ||
2433 | return -ENOSPC; | ||
2434 | |||
2435 | map = __shrink_map_lookup_stripes(map, min_stripes); | ||
2436 | *stripe_size = max_avail; | ||
2437 | |||
2438 | index = 0; | ||
2439 | for (i = 0; i < min_stripes; i++) { | ||
2440 | map->stripes[i].dev = devices[index].dev; | ||
2441 | map->stripes[i].physical = devices[index].dev_offset; | ||
2442 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2443 | i++; | ||
2444 | map->stripes[i].dev = devices[index].dev; | ||
2445 | map->stripes[i].physical = devices[index].dev_offset + | ||
2446 | max_avail; | ||
2447 | } | ||
2448 | index++; | ||
2449 | } | ||
2450 | *map_lookup = map; | ||
2451 | |||
2452 | return 0; | ||
2453 | } | ||
2454 | |||
2455 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | ||
2456 | struct btrfs_root *extent_root, | ||
2457 | struct map_lookup **map_ret, | ||
2458 | u64 *num_bytes, u64 *stripe_size, | ||
2459 | u64 start, u64 type) | ||
2460 | { | ||
2461 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2462 | struct btrfs_device *device = NULL; | ||
2463 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | ||
2464 | struct list_head *cur; | ||
2465 | struct map_lookup *map; | ||
2466 | struct extent_map_tree *em_tree; | ||
2467 | struct extent_map *em; | ||
2468 | struct btrfs_device_info *devices_info; | ||
2469 | struct list_head private_devs; | ||
2470 | u64 calc_size = 1024 * 1024 * 1024; | ||
2471 | u64 min_free; | ||
2472 | u64 avail; | ||
2473 | u64 dev_offset; | ||
2474 | int num_stripes; | ||
2475 | int min_stripes; | ||
2476 | int sub_stripes; | ||
2477 | int min_devices; /* the min number of devices we need */ | ||
2478 | int i; | ||
2479 | int ret; | ||
2480 | int index; | ||
2259 | 2481 | ||
2260 | do_div(calc_size, stripe_len); | 2482 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && |
2261 | calc_size *= stripe_len; | 2483 | (type & BTRFS_BLOCK_GROUP_DUP)) { |
2484 | WARN_ON(1); | ||
2485 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
2486 | } | ||
2487 | if (list_empty(&fs_devices->alloc_list)) | ||
2488 | return -ENOSPC; | ||
2489 | |||
2490 | ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, | ||
2491 | &min_stripes, &sub_stripes); | ||
2492 | if (ret) | ||
2493 | return ret; | ||
2494 | |||
2495 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | ||
2496 | GFP_NOFS); | ||
2497 | if (!devices_info) | ||
2498 | return -ENOMEM; | ||
2499 | |||
2500 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2501 | if (!map) { | ||
2502 | ret = -ENOMEM; | ||
2503 | goto error; | ||
2504 | } | ||
2505 | map->num_stripes = num_stripes; | ||
2262 | 2506 | ||
2263 | cur = fs_devices->alloc_list.next; | 2507 | cur = fs_devices->alloc_list.next; |
2264 | index = 0; | 2508 | index = 0; |
2509 | i = 0; | ||
2265 | 2510 | ||
2266 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2511 | calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, |
2512 | num_stripes, 0); | ||
2513 | |||
2514 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2267 | min_free = calc_size * 2; | 2515 | min_free = calc_size * 2; |
2268 | else | 2516 | min_devices = 1; |
2517 | } else { | ||
2269 | min_free = calc_size; | 2518 | min_free = calc_size; |
2270 | 2519 | min_devices = min_stripes; | |
2271 | /* | 2520 | } |
2272 | * we add 1MB because we never use the first 1MB of the device, unless | ||
2273 | * we've looped, then we are likely allocating the maximum amount of | ||
2274 | * space left already | ||
2275 | */ | ||
2276 | if (!looped) | ||
2277 | min_free += 1024 * 1024; | ||
2278 | 2521 | ||
2279 | INIT_LIST_HEAD(&private_devs); | 2522 | INIT_LIST_HEAD(&private_devs); |
2280 | while (index < num_stripes) { | 2523 | while (index < num_stripes) { |
@@ -2287,27 +2530,39 @@ again: | |||
2287 | cur = cur->next; | 2530 | cur = cur->next; |
2288 | 2531 | ||
2289 | if (device->in_fs_metadata && avail >= min_free) { | 2532 | if (device->in_fs_metadata && avail >= min_free) { |
2290 | ret = find_free_dev_extent(trans, device, | 2533 | ret = find_free_dev_extent(trans, device, min_free, |
2291 | min_free, &dev_offset, | 2534 | &devices_info[i].dev_offset, |
2292 | &max_avail); | 2535 | &devices_info[i].max_avail); |
2293 | if (ret == 0) { | 2536 | if (ret == 0) { |
2294 | list_move_tail(&device->dev_alloc_list, | 2537 | list_move_tail(&device->dev_alloc_list, |
2295 | &private_devs); | 2538 | &private_devs); |
2296 | map->stripes[index].dev = device; | 2539 | map->stripes[index].dev = device; |
2297 | map->stripes[index].physical = dev_offset; | 2540 | map->stripes[index].physical = |
2541 | devices_info[i].dev_offset; | ||
2298 | index++; | 2542 | index++; |
2299 | if (type & BTRFS_BLOCK_GROUP_DUP) { | 2543 | if (type & BTRFS_BLOCK_GROUP_DUP) { |
2300 | map->stripes[index].dev = device; | 2544 | map->stripes[index].dev = device; |
2301 | map->stripes[index].physical = | 2545 | map->stripes[index].physical = |
2302 | dev_offset + calc_size; | 2546 | devices_info[i].dev_offset + |
2547 | calc_size; | ||
2303 | index++; | 2548 | index++; |
2304 | } | 2549 | } |
2305 | } | 2550 | } else if (ret != -ENOSPC) |
2306 | } else if (device->in_fs_metadata && avail > max_avail) | 2551 | goto error; |
2307 | max_avail = avail; | 2552 | |
2553 | devices_info[i].dev = device; | ||
2554 | i++; | ||
2555 | } else if (device->in_fs_metadata && | ||
2556 | avail >= BTRFS_STRIPE_LEN) { | ||
2557 | devices_info[i].dev = device; | ||
2558 | devices_info[i].max_avail = avail; | ||
2559 | i++; | ||
2560 | } | ||
2561 | |||
2308 | if (cur == &fs_devices->alloc_list) | 2562 | if (cur == &fs_devices->alloc_list) |
2309 | break; | 2563 | break; |
2310 | } | 2564 | } |
2565 | |||
2311 | list_splice(&private_devs, &fs_devices->alloc_list); | 2566 | list_splice(&private_devs, &fs_devices->alloc_list); |
2312 | if (index < num_stripes) { | 2567 | if (index < num_stripes) { |
2313 | if (index >= min_stripes) { | 2568 | if (index >= min_stripes) { |
@@ -2316,34 +2571,38 @@ again: | |||
2316 | num_stripes /= sub_stripes; | 2571 | num_stripes /= sub_stripes; |
2317 | num_stripes *= sub_stripes; | 2572 | num_stripes *= sub_stripes; |
2318 | } | 2573 | } |
2319 | looped = 1; | 2574 | |
2320 | goto again; | 2575 | map = __shrink_map_lookup_stripes(map, num_stripes); |
2321 | } | 2576 | } else if (i >= min_devices) { |
2322 | if (!looped && max_avail > 0) { | 2577 | ret = __btrfs_alloc_tiny_space(trans, fs_devices, |
2323 | looped = 1; | 2578 | devices_info, i, type, |
2324 | calc_size = max_avail; | 2579 | &map, min_stripes, |
2325 | goto again; | 2580 | &calc_size); |
2581 | if (ret) | ||
2582 | goto error; | ||
2583 | } else { | ||
2584 | ret = -ENOSPC; | ||
2585 | goto error; | ||
2326 | } | 2586 | } |
2327 | kfree(map); | ||
2328 | return -ENOSPC; | ||
2329 | } | 2587 | } |
2330 | map->sector_size = extent_root->sectorsize; | 2588 | map->sector_size = extent_root->sectorsize; |
2331 | map->stripe_len = stripe_len; | 2589 | map->stripe_len = BTRFS_STRIPE_LEN; |
2332 | map->io_align = stripe_len; | 2590 | map->io_align = BTRFS_STRIPE_LEN; |
2333 | map->io_width = stripe_len; | 2591 | map->io_width = BTRFS_STRIPE_LEN; |
2334 | map->type = type; | 2592 | map->type = type; |
2335 | map->num_stripes = num_stripes; | ||
2336 | map->sub_stripes = sub_stripes; | 2593 | map->sub_stripes = sub_stripes; |
2337 | 2594 | ||
2338 | *map_ret = map; | 2595 | *map_ret = map; |
2339 | *stripe_size = calc_size; | 2596 | *stripe_size = calc_size; |
2340 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2597 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2341 | num_stripes, sub_stripes); | 2598 | map->num_stripes, sub_stripes); |
2599 | |||
2600 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); | ||
2342 | 2601 | ||
2343 | em = alloc_extent_map(GFP_NOFS); | 2602 | em = alloc_extent_map(GFP_NOFS); |
2344 | if (!em) { | 2603 | if (!em) { |
2345 | kfree(map); | 2604 | ret = -ENOMEM; |
2346 | return -ENOMEM; | 2605 | goto error; |
2347 | } | 2606 | } |
2348 | em->bdev = (struct block_device *)map; | 2607 | em->bdev = (struct block_device *)map; |
2349 | em->start = start; | 2608 | em->start = start; |
@@ -2376,7 +2635,13 @@ again: | |||
2376 | index++; | 2635 | index++; |
2377 | } | 2636 | } |
2378 | 2637 | ||
2638 | kfree(devices_info); | ||
2379 | return 0; | 2639 | return 0; |
2640 | |||
2641 | error: | ||
2642 | kfree(map); | ||
2643 | kfree(devices_info); | ||
2644 | return ret; | ||
2380 | } | 2645 | } |
2381 | 2646 | ||
2382 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 2647 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, |
@@ -2442,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2442 | item_size); | 2707 | item_size); |
2443 | BUG_ON(ret); | 2708 | BUG_ON(ret); |
2444 | } | 2709 | } |
2710 | |||
2445 | kfree(chunk); | 2711 | kfree(chunk); |
2446 | return 0; | 2712 | return 0; |
2447 | } | 2713 | } |
@@ -2639,14 +2905,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num, | |||
2639 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 2905 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
2640 | u64 logical, u64 *length, | 2906 | u64 logical, u64 *length, |
2641 | struct btrfs_multi_bio **multi_ret, | 2907 | struct btrfs_multi_bio **multi_ret, |
2642 | int mirror_num, struct page *unplug_page) | 2908 | int mirror_num) |
2643 | { | 2909 | { |
2644 | struct extent_map *em; | 2910 | struct extent_map *em; |
2645 | struct map_lookup *map; | 2911 | struct map_lookup *map; |
2646 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2912 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
2647 | u64 offset; | 2913 | u64 offset; |
2648 | u64 stripe_offset; | 2914 | u64 stripe_offset; |
2915 | u64 stripe_end_offset; | ||
2649 | u64 stripe_nr; | 2916 | u64 stripe_nr; |
2917 | u64 stripe_nr_orig; | ||
2918 | u64 stripe_nr_end; | ||
2650 | int stripes_allocated = 8; | 2919 | int stripes_allocated = 8; |
2651 | int stripes_required = 1; | 2920 | int stripes_required = 1; |
2652 | int stripe_index; | 2921 | int stripe_index; |
@@ -2655,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2655 | int max_errors = 0; | 2924 | int max_errors = 0; |
2656 | struct btrfs_multi_bio *multi = NULL; | 2925 | struct btrfs_multi_bio *multi = NULL; |
2657 | 2926 | ||
2658 | if (multi_ret && !(rw & REQ_WRITE)) | 2927 | if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) |
2659 | stripes_allocated = 1; | 2928 | stripes_allocated = 1; |
2660 | again: | 2929 | again: |
2661 | if (multi_ret) { | 2930 | if (multi_ret) { |
@@ -2671,11 +2940,6 @@ again: | |||
2671 | em = lookup_extent_mapping(em_tree, logical, *length); | 2940 | em = lookup_extent_mapping(em_tree, logical, *length); |
2672 | read_unlock(&em_tree->lock); | 2941 | read_unlock(&em_tree->lock); |
2673 | 2942 | ||
2674 | if (!em && unplug_page) { | ||
2675 | kfree(multi); | ||
2676 | return 0; | ||
2677 | } | ||
2678 | |||
2679 | if (!em) { | 2943 | if (!em) { |
2680 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", | 2944 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", |
2681 | (unsigned long long)logical, | 2945 | (unsigned long long)logical, |
@@ -2701,7 +2965,15 @@ again: | |||
2701 | max_errors = 1; | 2965 | max_errors = 1; |
2702 | } | 2966 | } |
2703 | } | 2967 | } |
2704 | if (multi_ret && (rw & REQ_WRITE) && | 2968 | if (rw & REQ_DISCARD) { |
2969 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | | ||
2970 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2971 | BTRFS_BLOCK_GROUP_DUP | | ||
2972 | BTRFS_BLOCK_GROUP_RAID10)) { | ||
2973 | stripes_required = map->num_stripes; | ||
2974 | } | ||
2975 | } | ||
2976 | if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
2705 | stripes_allocated < stripes_required) { | 2977 | stripes_allocated < stripes_required) { |
2706 | stripes_allocated = map->num_stripes; | 2978 | stripes_allocated = map->num_stripes; |
2707 | free_extent_map(em); | 2979 | free_extent_map(em); |
@@ -2721,23 +2993,37 @@ again: | |||
2721 | /* stripe_offset is the offset of this block in its stripe*/ | 2993 | /* stripe_offset is the offset of this block in its stripe*/ |
2722 | stripe_offset = offset - stripe_offset; | 2994 | stripe_offset = offset - stripe_offset; |
2723 | 2995 | ||
2724 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | 2996 | if (rw & REQ_DISCARD) |
2725 | BTRFS_BLOCK_GROUP_RAID10 | | 2997 | *length = min_t(u64, em->len - offset, *length); |
2726 | BTRFS_BLOCK_GROUP_DUP)) { | 2998 | else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | |
2999 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3000 | BTRFS_BLOCK_GROUP_RAID10 | | ||
3001 | BTRFS_BLOCK_GROUP_DUP)) { | ||
2727 | /* we limit the length of each bio to what fits in a stripe */ | 3002 | /* we limit the length of each bio to what fits in a stripe */ |
2728 | *length = min_t(u64, em->len - offset, | 3003 | *length = min_t(u64, em->len - offset, |
2729 | map->stripe_len - stripe_offset); | 3004 | map->stripe_len - stripe_offset); |
2730 | } else { | 3005 | } else { |
2731 | *length = em->len - offset; | 3006 | *length = em->len - offset; |
2732 | } | 3007 | } |
2733 | 3008 | ||
2734 | if (!multi_ret && !unplug_page) | 3009 | if (!multi_ret) |
2735 | goto out; | 3010 | goto out; |
2736 | 3011 | ||
2737 | num_stripes = 1; | 3012 | num_stripes = 1; |
2738 | stripe_index = 0; | 3013 | stripe_index = 0; |
2739 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 3014 | stripe_nr_orig = stripe_nr; |
2740 | if (unplug_page || (rw & REQ_WRITE)) | 3015 | stripe_nr_end = (offset + *length + map->stripe_len - 1) & |
3016 | (~(map->stripe_len - 1)); | ||
3017 | do_div(stripe_nr_end, map->stripe_len); | ||
3018 | stripe_end_offset = stripe_nr_end * map->stripe_len - | ||
3019 | (offset + *length); | ||
3020 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
3021 | if (rw & REQ_DISCARD) | ||
3022 | num_stripes = min_t(u64, map->num_stripes, | ||
3023 | stripe_nr_end - stripe_nr_orig); | ||
3024 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
3025 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | ||
3026 | if (rw & (REQ_WRITE | REQ_DISCARD)) | ||
2741 | num_stripes = map->num_stripes; | 3027 | num_stripes = map->num_stripes; |
2742 | else if (mirror_num) | 3028 | else if (mirror_num) |
2743 | stripe_index = mirror_num - 1; | 3029 | stripe_index = mirror_num - 1; |
@@ -2748,7 +3034,7 @@ again: | |||
2748 | } | 3034 | } |
2749 | 3035 | ||
2750 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 3036 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
2751 | if (rw & REQ_WRITE) | 3037 | if (rw & (REQ_WRITE | REQ_DISCARD)) |
2752 | num_stripes = map->num_stripes; | 3038 | num_stripes = map->num_stripes; |
2753 | else if (mirror_num) | 3039 | else if (mirror_num) |
2754 | stripe_index = mirror_num - 1; | 3040 | stripe_index = mirror_num - 1; |
@@ -2759,8 +3045,12 @@ again: | |||
2759 | stripe_index = do_div(stripe_nr, factor); | 3045 | stripe_index = do_div(stripe_nr, factor); |
2760 | stripe_index *= map->sub_stripes; | 3046 | stripe_index *= map->sub_stripes; |
2761 | 3047 | ||
2762 | if (unplug_page || (rw & REQ_WRITE)) | 3048 | if (rw & REQ_WRITE) |
2763 | num_stripes = map->sub_stripes; | 3049 | num_stripes = map->sub_stripes; |
3050 | else if (rw & REQ_DISCARD) | ||
3051 | num_stripes = min_t(u64, map->sub_stripes * | ||
3052 | (stripe_nr_end - stripe_nr_orig), | ||
3053 | map->num_stripes); | ||
2764 | else if (mirror_num) | 3054 | else if (mirror_num) |
2765 | stripe_index += mirror_num - 1; | 3055 | stripe_index += mirror_num - 1; |
2766 | else { | 3056 | else { |
@@ -2778,24 +3068,101 @@ again: | |||
2778 | } | 3068 | } |
2779 | BUG_ON(stripe_index >= map->num_stripes); | 3069 | BUG_ON(stripe_index >= map->num_stripes); |
2780 | 3070 | ||
2781 | for (i = 0; i < num_stripes; i++) { | 3071 | if (rw & REQ_DISCARD) { |
2782 | if (unplug_page) { | 3072 | for (i = 0; i < num_stripes; i++) { |
2783 | struct btrfs_device *device; | ||
2784 | struct backing_dev_info *bdi; | ||
2785 | |||
2786 | device = map->stripes[stripe_index].dev; | ||
2787 | if (device->bdev) { | ||
2788 | bdi = blk_get_backing_dev_info(device->bdev); | ||
2789 | if (bdi->unplug_io_fn) | ||
2790 | bdi->unplug_io_fn(bdi, unplug_page); | ||
2791 | } | ||
2792 | } else { | ||
2793 | multi->stripes[i].physical = | 3073 | multi->stripes[i].physical = |
2794 | map->stripes[stripe_index].physical + | 3074 | map->stripes[stripe_index].physical + |
2795 | stripe_offset + stripe_nr * map->stripe_len; | 3075 | stripe_offset + stripe_nr * map->stripe_len; |
2796 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | 3076 | multi->stripes[i].dev = map->stripes[stripe_index].dev; |
3077 | |||
3078 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
3079 | u64 stripes; | ||
3080 | u32 last_stripe = 0; | ||
3081 | int j; | ||
3082 | |||
3083 | div_u64_rem(stripe_nr_end - 1, | ||
3084 | map->num_stripes, | ||
3085 | &last_stripe); | ||
3086 | |||
3087 | for (j = 0; j < map->num_stripes; j++) { | ||
3088 | u32 test; | ||
3089 | |||
3090 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3091 | map->num_stripes, &test); | ||
3092 | if (test == stripe_index) | ||
3093 | break; | ||
3094 | } | ||
3095 | stripes = stripe_nr_end - 1 - j; | ||
3096 | do_div(stripes, map->num_stripes); | ||
3097 | multi->stripes[i].length = map->stripe_len * | ||
3098 | (stripes - stripe_nr + 1); | ||
3099 | |||
3100 | if (i == 0) { | ||
3101 | multi->stripes[i].length -= | ||
3102 | stripe_offset; | ||
3103 | stripe_offset = 0; | ||
3104 | } | ||
3105 | if (stripe_index == last_stripe) | ||
3106 | multi->stripes[i].length -= | ||
3107 | stripe_end_offset; | ||
3108 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
3109 | u64 stripes; | ||
3110 | int j; | ||
3111 | int factor = map->num_stripes / | ||
3112 | map->sub_stripes; | ||
3113 | u32 last_stripe = 0; | ||
3114 | |||
3115 | div_u64_rem(stripe_nr_end - 1, | ||
3116 | factor, &last_stripe); | ||
3117 | last_stripe *= map->sub_stripes; | ||
3118 | |||
3119 | for (j = 0; j < factor; j++) { | ||
3120 | u32 test; | ||
3121 | |||
3122 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3123 | factor, &test); | ||
3124 | |||
3125 | if (test == | ||
3126 | stripe_index / map->sub_stripes) | ||
3127 | break; | ||
3128 | } | ||
3129 | stripes = stripe_nr_end - 1 - j; | ||
3130 | do_div(stripes, factor); | ||
3131 | multi->stripes[i].length = map->stripe_len * | ||
3132 | (stripes - stripe_nr + 1); | ||
3133 | |||
3134 | if (i < map->sub_stripes) { | ||
3135 | multi->stripes[i].length -= | ||
3136 | stripe_offset; | ||
3137 | if (i == map->sub_stripes - 1) | ||
3138 | stripe_offset = 0; | ||
3139 | } | ||
3140 | if (stripe_index >= last_stripe && | ||
3141 | stripe_index <= (last_stripe + | ||
3142 | map->sub_stripes - 1)) { | ||
3143 | multi->stripes[i].length -= | ||
3144 | stripe_end_offset; | ||
3145 | } | ||
3146 | } else | ||
3147 | multi->stripes[i].length = *length; | ||
3148 | |||
3149 | stripe_index++; | ||
3150 | if (stripe_index == map->num_stripes) { | ||
3151 | /* This could only happen for RAID0/10 */ | ||
3152 | stripe_index = 0; | ||
3153 | stripe_nr++; | ||
3154 | } | ||
3155 | } | ||
3156 | } else { | ||
3157 | for (i = 0; i < num_stripes; i++) { | ||
3158 | multi->stripes[i].physical = | ||
3159 | map->stripes[stripe_index].physical + | ||
3160 | stripe_offset + | ||
3161 | stripe_nr * map->stripe_len; | ||
3162 | multi->stripes[i].dev = | ||
3163 | map->stripes[stripe_index].dev; | ||
3164 | stripe_index++; | ||
2797 | } | 3165 | } |
2798 | stripe_index++; | ||
2799 | } | 3166 | } |
2800 | if (multi_ret) { | 3167 | if (multi_ret) { |
2801 | *multi_ret = multi; | 3168 | *multi_ret = multi; |
@@ -2812,7 +3179,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2812 | struct btrfs_multi_bio **multi_ret, int mirror_num) | 3179 | struct btrfs_multi_bio **multi_ret, int mirror_num) |
2813 | { | 3180 | { |
2814 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, | 3181 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, |
2815 | mirror_num, NULL); | 3182 | mirror_num); |
2816 | } | 3183 | } |
2817 | 3184 | ||
2818 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 3185 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
@@ -2880,14 +3247,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
2880 | return 0; | 3247 | return 0; |
2881 | } | 3248 | } |
2882 | 3249 | ||
2883 | int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, | ||
2884 | u64 logical, struct page *page) | ||
2885 | { | ||
2886 | u64 length = PAGE_CACHE_SIZE; | ||
2887 | return __btrfs_map_block(map_tree, READ, logical, &length, | ||
2888 | NULL, 0, page); | ||
2889 | } | ||
2890 | |||
2891 | static void end_bio_multi_stripe(struct bio *bio, int err) | 3250 | static void end_bio_multi_stripe(struct bio *bio, int err) |
2892 | { | 3251 | { |
2893 | struct btrfs_multi_bio *multi = bio->bi_private; | 3252 | struct btrfs_multi_bio *multi = bio->bi_private; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1be781079450..cc2eadaf7a27 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -20,8 +20,11 @@ | |||
20 | #define __BTRFS_VOLUMES_ | 20 | #define __BTRFS_VOLUMES_ |
21 | 21 | ||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | ||
23 | #include "async-thread.h" | 24 | #include "async-thread.h" |
24 | 25 | ||
26 | #define BTRFS_STRIPE_LEN (64 * 1024) | ||
27 | |||
25 | struct buffer_head; | 28 | struct buffer_head; |
26 | struct btrfs_pending_bios { | 29 | struct btrfs_pending_bios { |
27 | struct bio *head; | 30 | struct bio *head; |
@@ -123,6 +126,7 @@ struct btrfs_fs_devices { | |||
123 | struct btrfs_bio_stripe { | 126 | struct btrfs_bio_stripe { |
124 | struct btrfs_device *dev; | 127 | struct btrfs_device *dev; |
125 | u64 physical; | 128 | u64 physical; |
129 | u64 length; /* only used for discard mappings */ | ||
126 | }; | 130 | }; |
127 | 131 | ||
128 | struct btrfs_multi_bio { | 132 | struct btrfs_multi_bio { |
@@ -136,6 +140,41 @@ struct btrfs_multi_bio { | |||
136 | struct btrfs_bio_stripe stripes[]; | 140 | struct btrfs_bio_stripe stripes[]; |
137 | }; | 141 | }; |
138 | 142 | ||
143 | struct btrfs_device_info { | ||
144 | struct btrfs_device *dev; | ||
145 | u64 dev_offset; | ||
146 | u64 max_avail; | ||
147 | }; | ||
148 | |||
149 | struct map_lookup { | ||
150 | u64 type; | ||
151 | int io_align; | ||
152 | int io_width; | ||
153 | int stripe_len; | ||
154 | int sector_size; | ||
155 | int num_stripes; | ||
156 | int sub_stripes; | ||
157 | struct btrfs_bio_stripe stripes[]; | ||
158 | }; | ||
159 | |||
160 | /* Used to sort the devices by max_avail(descending sort) */ | ||
161 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | ||
162 | |||
163 | /* | ||
164 | * sort the devices by max_avail, in which max free extent size of each device | ||
165 | * is stored.(Descending Sort) | ||
166 | */ | ||
167 | static inline void btrfs_descending_sort_devices( | ||
168 | struct btrfs_device_info *devices, | ||
169 | size_t nr_devices) | ||
170 | { | ||
171 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
172 | btrfs_cmp_device_free_bytes, NULL); | ||
173 | } | ||
174 | |||
175 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
176 | u64 end, u64 *length); | ||
177 | |||
139 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | 178 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ |
140 | (sizeof(struct btrfs_bio_stripe) * (n))) | 179 | (sizeof(struct btrfs_bio_stripe) * (n))) |
141 | 180 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 698fdd2c739c..cfd660550ded 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
180 | struct btrfs_path *path; | 180 | struct btrfs_path *path; |
181 | struct extent_buffer *leaf; | 181 | struct extent_buffer *leaf; |
182 | struct btrfs_dir_item *di; | 182 | struct btrfs_dir_item *di; |
183 | int ret = 0, slot, advance; | 183 | int ret = 0, slot; |
184 | size_t total_size = 0, size_left = size; | 184 | size_t total_size = 0, size_left = size; |
185 | unsigned long name_ptr; | 185 | unsigned long name_ptr; |
186 | size_t name_len; | 186 | size_t name_len; |
187 | u32 nritems; | ||
188 | 187 | ||
189 | /* | 188 | /* |
190 | * ok we want all objects associated with this id. | 189 | * ok we want all objects associated with this id. |
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
204 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 203 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
205 | if (ret < 0) | 204 | if (ret < 0) |
206 | goto err; | 205 | goto err; |
207 | advance = 0; | 206 | |
208 | while (1) { | 207 | while (1) { |
209 | leaf = path->nodes[0]; | 208 | leaf = path->nodes[0]; |
210 | nritems = btrfs_header_nritems(leaf); | ||
211 | slot = path->slots[0]; | 209 | slot = path->slots[0]; |
212 | 210 | ||
213 | /* this is where we start walking through the path */ | 211 | /* this is where we start walking through the path */ |
214 | if (advance || slot >= nritems) { | 212 | if (slot >= btrfs_header_nritems(leaf)) { |
215 | /* | 213 | /* |
216 | * if we've reached the last slot in this leaf we need | 214 | * if we've reached the last slot in this leaf we need |
217 | * to go to the next leaf and reset everything | 215 | * to go to the next leaf and reset everything |
218 | */ | 216 | */ |
219 | if (slot >= nritems-1) { | 217 | ret = btrfs_next_leaf(root, path); |
220 | ret = btrfs_next_leaf(root, path); | 218 | if (ret < 0) |
221 | if (ret) | 219 | goto err; |
222 | break; | 220 | else if (ret > 0) |
223 | leaf = path->nodes[0]; | 221 | break; |
224 | nritems = btrfs_header_nritems(leaf); | 222 | continue; |
225 | slot = path->slots[0]; | ||
226 | } else { | ||
227 | /* | ||
228 | * just walking through the slots on this leaf | ||
229 | */ | ||
230 | slot++; | ||
231 | path->slots[0]++; | ||
232 | } | ||
233 | } | 223 | } |
234 | advance = 1; | ||
235 | 224 | ||
236 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 225 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
237 | 226 | ||
@@ -242,13 +231,15 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
242 | break; | 231 | break; |
243 | 232 | ||
244 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 233 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
234 | if (verify_dir_item(root, leaf, di)) | ||
235 | continue; | ||
245 | 236 | ||
246 | name_len = btrfs_dir_name_len(leaf, di); | 237 | name_len = btrfs_dir_name_len(leaf, di); |
247 | total_size += name_len + 1; | 238 | total_size += name_len + 1; |
248 | 239 | ||
249 | /* we are just looking for how big our buffer needs to be */ | 240 | /* we are just looking for how big our buffer needs to be */ |
250 | if (!size) | 241 | if (!size) |
251 | continue; | 242 | goto next; |
252 | 243 | ||
253 | if (!buffer || (name_len + 1) > size_left) { | 244 | if (!buffer || (name_len + 1) > size_left) { |
254 | ret = -ERANGE; | 245 | ret = -ERANGE; |
@@ -261,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
261 | 252 | ||
262 | size_left -= name_len + 1; | 253 | size_left -= name_len + 1; |
263 | buffer += name_len + 1; | 254 | buffer += name_len + 1; |
255 | next: | ||
256 | path->slots[0]++; | ||
264 | } | 257 | } |
265 | ret = total_size; | 258 | ret = total_size; |
266 | 259 | ||
@@ -316,6 +309,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | |||
316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 309 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
317 | size_t size, int flags) | 310 | size_t size, int flags) |
318 | { | 311 | { |
312 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
313 | |||
314 | /* | ||
315 | * The permission on security.* and system.* is not checked | ||
316 | * in permission(). | ||
317 | */ | ||
318 | if (btrfs_root_readonly(root)) | ||
319 | return -EROFS; | ||
320 | |||
319 | /* | 321 | /* |
320 | * If this is a request for a synthetic attribute in the system.* | 322 | * If this is a request for a synthetic attribute in the system.* |
321 | * namespace use the generic infrastructure to resolve a handler | 323 | * namespace use the generic infrastructure to resolve a handler |
@@ -336,6 +338,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
336 | 338 | ||
337 | int btrfs_removexattr(struct dentry *dentry, const char *name) | 339 | int btrfs_removexattr(struct dentry *dentry, const char *name) |
338 | { | 340 | { |
341 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
342 | |||
343 | /* | ||
344 | * The permission on security.* and system.* is not checked | ||
345 | * in permission(). | ||
346 | */ | ||
347 | if (btrfs_root_readonly(root)) | ||
348 | return -EROFS; | ||
349 | |||
339 | /* | 350 | /* |
340 | * If this is a request for a synthetic attribute in the system.* | 351 | * If this is a request for a synthetic attribute in the system.* |
341 | * namespace use the generic infrastructure to resolve a handler | 352 | * namespace use the generic infrastructure to resolve a handler |
@@ -352,7 +363,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) | |||
352 | } | 363 | } |
353 | 364 | ||
354 | int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, | 365 | int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, |
355 | struct inode *inode, struct inode *dir) | 366 | struct inode *inode, struct inode *dir, |
367 | const struct qstr *qstr) | ||
356 | { | 368 | { |
357 | int err; | 369 | int err; |
358 | size_t len; | 370 | size_t len; |
@@ -360,7 +372,8 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, | |||
360 | char *suffix; | 372 | char *suffix; |
361 | char *name; | 373 | char *name; |
362 | 374 | ||
363 | err = security_inode_init_security(inode, dir, &suffix, &value, &len); | 375 | err = security_inode_init_security(inode, dir, qstr, &suffix, &value, |
376 | &len); | ||
364 | if (err) { | 377 | if (err) { |
365 | if (err == -EOPNOTSUPP) | 378 | if (err == -EOPNOTSUPP) |
366 | return 0; | 379 | return 0; |
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 7a43fd640bbb..b3cc8039134b 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h | |||
@@ -37,6 +37,7 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name, | |||
37 | extern int btrfs_removexattr(struct dentry *dentry, const char *name); | 37 | extern int btrfs_removexattr(struct dentry *dentry, const char *name); |
38 | 38 | ||
39 | extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, | 39 | extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, |
40 | struct inode *inode, struct inode *dir); | 40 | struct inode *inode, struct inode *dir, |
41 | const struct qstr *qstr); | ||
41 | 42 | ||
42 | #endif /* __XATTR__ */ | 43 | #endif /* __XATTR__ */ |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b9cd5445f71c..faccd47c6c46 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -32,15 +32,6 @@ | |||
32 | #include <linux/bio.h> | 32 | #include <linux/bio.h> |
33 | #include "compression.h" | 33 | #include "compression.h" |
34 | 34 | ||
35 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
36 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
37 | If it doesn't manage to finish, call it again with | ||
38 | avail_in == 0 and avail_out set to the remaining 12 | ||
39 | bytes for it to clean up. | ||
40 | Q: Is 12 bytes sufficient? | ||
41 | */ | ||
42 | #define STREAM_END_SPACE 12 | ||
43 | |||
44 | struct workspace { | 35 | struct workspace { |
45 | z_stream inf_strm; | 36 | z_stream inf_strm; |
46 | z_stream def_strm; | 37 | z_stream def_strm; |
@@ -48,152 +39,52 @@ struct workspace { | |||
48 | struct list_head list; | 39 | struct list_head list; |
49 | }; | 40 | }; |
50 | 41 | ||
51 | static LIST_HEAD(idle_workspace); | 42 | static void zlib_free_workspace(struct list_head *ws) |
52 | static DEFINE_SPINLOCK(workspace_lock); | ||
53 | static unsigned long num_workspace; | ||
54 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
55 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
56 | |||
57 | /* | ||
58 | * this finds an available zlib workspace or allocates a new one | ||
59 | * NULL or an ERR_PTR is returned if things go bad. | ||
60 | */ | ||
61 | static struct workspace *find_zlib_workspace(void) | ||
62 | { | 43 | { |
63 | struct workspace *workspace; | 44 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
64 | int ret; | ||
65 | int cpus = num_online_cpus(); | ||
66 | |||
67 | again: | ||
68 | spin_lock(&workspace_lock); | ||
69 | if (!list_empty(&idle_workspace)) { | ||
70 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
71 | list); | ||
72 | list_del(&workspace->list); | ||
73 | num_workspace--; | ||
74 | spin_unlock(&workspace_lock); | ||
75 | return workspace; | ||
76 | |||
77 | } | ||
78 | spin_unlock(&workspace_lock); | ||
79 | if (atomic_read(&alloc_workspace) > cpus) { | ||
80 | DEFINE_WAIT(wait); | ||
81 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
82 | if (atomic_read(&alloc_workspace) > cpus) | ||
83 | schedule(); | ||
84 | finish_wait(&workspace_wait, &wait); | ||
85 | goto again; | ||
86 | } | ||
87 | atomic_inc(&alloc_workspace); | ||
88 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
89 | if (!workspace) { | ||
90 | ret = -ENOMEM; | ||
91 | goto fail; | ||
92 | } | ||
93 | |||
94 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | ||
95 | if (!workspace->def_strm.workspace) { | ||
96 | ret = -ENOMEM; | ||
97 | goto fail; | ||
98 | } | ||
99 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | ||
100 | if (!workspace->inf_strm.workspace) { | ||
101 | ret = -ENOMEM; | ||
102 | goto fail_inflate; | ||
103 | } | ||
104 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
105 | if (!workspace->buf) { | ||
106 | ret = -ENOMEM; | ||
107 | goto fail_kmalloc; | ||
108 | } | ||
109 | return workspace; | ||
110 | 45 | ||
111 | fail_kmalloc: | ||
112 | vfree(workspace->inf_strm.workspace); | ||
113 | fail_inflate: | ||
114 | vfree(workspace->def_strm.workspace); | ||
115 | fail: | ||
116 | kfree(workspace); | ||
117 | atomic_dec(&alloc_workspace); | ||
118 | wake_up(&workspace_wait); | ||
119 | return ERR_PTR(ret); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * put a workspace struct back on the list or free it if we have enough | ||
124 | * idle ones sitting around | ||
125 | */ | ||
126 | static int free_workspace(struct workspace *workspace) | ||
127 | { | ||
128 | spin_lock(&workspace_lock); | ||
129 | if (num_workspace < num_online_cpus()) { | ||
130 | list_add_tail(&workspace->list, &idle_workspace); | ||
131 | num_workspace++; | ||
132 | spin_unlock(&workspace_lock); | ||
133 | if (waitqueue_active(&workspace_wait)) | ||
134 | wake_up(&workspace_wait); | ||
135 | return 0; | ||
136 | } | ||
137 | spin_unlock(&workspace_lock); | ||
138 | vfree(workspace->def_strm.workspace); | 46 | vfree(workspace->def_strm.workspace); |
139 | vfree(workspace->inf_strm.workspace); | 47 | vfree(workspace->inf_strm.workspace); |
140 | kfree(workspace->buf); | 48 | kfree(workspace->buf); |
141 | kfree(workspace); | 49 | kfree(workspace); |
142 | |||
143 | atomic_dec(&alloc_workspace); | ||
144 | if (waitqueue_active(&workspace_wait)) | ||
145 | wake_up(&workspace_wait); | ||
146 | return 0; | ||
147 | } | 50 | } |
148 | 51 | ||
149 | /* | 52 | static struct list_head *zlib_alloc_workspace(void) |
150 | * cleanup function for module exit | ||
151 | */ | ||
152 | static void free_workspaces(void) | ||
153 | { | 53 | { |
154 | struct workspace *workspace; | 54 | struct workspace *workspace; |
155 | while (!list_empty(&idle_workspace)) { | 55 | |
156 | workspace = list_entry(idle_workspace.next, struct workspace, | 56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
157 | list); | 57 | if (!workspace) |
158 | list_del(&workspace->list); | 58 | return ERR_PTR(-ENOMEM); |
159 | vfree(workspace->def_strm.workspace); | 59 | |
160 | vfree(workspace->inf_strm.workspace); | 60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize( |
161 | kfree(workspace->buf); | 61 | MAX_WBITS, MAX_MEM_LEVEL)); |
162 | kfree(workspace); | 62 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
163 | atomic_dec(&alloc_workspace); | 63 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
164 | } | 64 | if (!workspace->def_strm.workspace || |
65 | !workspace->inf_strm.workspace || !workspace->buf) | ||
66 | goto fail; | ||
67 | |||
68 | INIT_LIST_HEAD(&workspace->list); | ||
69 | |||
70 | return &workspace->list; | ||
71 | fail: | ||
72 | zlib_free_workspace(&workspace->list); | ||
73 | return ERR_PTR(-ENOMEM); | ||
165 | } | 74 | } |
166 | 75 | ||
167 | /* | 76 | static int zlib_compress_pages(struct list_head *ws, |
168 | * given an address space and start/len, compress the bytes. | 77 | struct address_space *mapping, |
169 | * | 78 | u64 start, unsigned long len, |
170 | * pages are allocated to hold the compressed result and stored | 79 | struct page **pages, |
171 | * in 'pages' | 80 | unsigned long nr_dest_pages, |
172 | * | 81 | unsigned long *out_pages, |
173 | * out_pages is used to return the number of pages allocated. There | 82 | unsigned long *total_in, |
174 | * may be pages allocated even if we return an error | 83 | unsigned long *total_out, |
175 | * | 84 | unsigned long max_out) |
176 | * total_in is used to return the number of bytes actually read. It | ||
177 | * may be smaller then len if we had to exit early because we | ||
178 | * ran out of room in the pages array or because we cross the | ||
179 | * max_out threshold. | ||
180 | * | ||
181 | * total_out is used to return the total number of compressed bytes | ||
182 | * | ||
183 | * max_out tells us the max number of bytes that we're allowed to | ||
184 | * stuff into pages | ||
185 | */ | ||
186 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
187 | u64 start, unsigned long len, | ||
188 | struct page **pages, | ||
189 | unsigned long nr_dest_pages, | ||
190 | unsigned long *out_pages, | ||
191 | unsigned long *total_in, | ||
192 | unsigned long *total_out, | ||
193 | unsigned long max_out) | ||
194 | { | 85 | { |
86 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
195 | int ret; | 87 | int ret; |
196 | struct workspace *workspace; | ||
197 | char *data_in; | 88 | char *data_in; |
198 | char *cpage_out; | 89 | char *cpage_out; |
199 | int nr_pages = 0; | 90 | int nr_pages = 0; |
@@ -205,10 +96,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
205 | *total_out = 0; | 96 | *total_out = 0; |
206 | *total_in = 0; | 97 | *total_in = 0; |
207 | 98 | ||
208 | workspace = find_zlib_workspace(); | ||
209 | if (IS_ERR(workspace)) | ||
210 | return -1; | ||
211 | |||
212 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
213 | printk(KERN_WARNING "deflateInit failed\n"); | 100 | printk(KERN_WARNING "deflateInit failed\n"); |
214 | ret = -1; | 101 | ret = -1; |
@@ -222,6 +109,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
222 | data_in = kmap(in_page); | 109 | data_in = kmap(in_page); |
223 | 110 | ||
224 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 111 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
112 | if (out_page == NULL) { | ||
113 | ret = -1; | ||
114 | goto out; | ||
115 | } | ||
225 | cpage_out = kmap(out_page); | 116 | cpage_out = kmap(out_page); |
226 | pages[0] = out_page; | 117 | pages[0] = out_page; |
227 | nr_pages = 1; | 118 | nr_pages = 1; |
@@ -260,6 +151,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
260 | goto out; | 151 | goto out; |
261 | } | 152 | } |
262 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 153 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
154 | if (out_page == NULL) { | ||
155 | ret = -1; | ||
156 | goto out; | ||
157 | } | ||
263 | cpage_out = kmap(out_page); | 158 | cpage_out = kmap(out_page); |
264 | pages[nr_pages] = out_page; | 159 | pages[nr_pages] = out_page; |
265 | nr_pages++; | 160 | nr_pages++; |
@@ -314,55 +209,26 @@ out: | |||
314 | kunmap(in_page); | 209 | kunmap(in_page); |
315 | page_cache_release(in_page); | 210 | page_cache_release(in_page); |
316 | } | 211 | } |
317 | free_workspace(workspace); | ||
318 | return ret; | 212 | return ret; |
319 | } | 213 | } |
320 | 214 | ||
321 | /* | 215 | static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, |
322 | * pages_in is an array of pages with compressed data. | 216 | u64 disk_start, |
323 | * | 217 | struct bio_vec *bvec, |
324 | * disk_start is the starting logical offset of this array in the file | 218 | int vcnt, |
325 | * | 219 | size_t srclen) |
326 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
327 | * | ||
328 | * vcnt is the count of pages in the biovec | ||
329 | * | ||
330 | * srclen is the number of bytes in pages_in | ||
331 | * | ||
332 | * The basic idea is that we have a bio that was created by readpages. | ||
333 | * The pages in the bio are for the uncompressed data, and they may not | ||
334 | * be contiguous. They all correspond to the range of bytes covered by | ||
335 | * the compressed extent. | ||
336 | */ | ||
337 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
338 | u64 disk_start, | ||
339 | struct bio_vec *bvec, | ||
340 | int vcnt, | ||
341 | size_t srclen) | ||
342 | { | 220 | { |
343 | int ret = 0; | 221 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
222 | int ret = 0, ret2; | ||
344 | int wbits = MAX_WBITS; | 223 | int wbits = MAX_WBITS; |
345 | struct workspace *workspace; | ||
346 | char *data_in; | 224 | char *data_in; |
347 | size_t total_out = 0; | 225 | size_t total_out = 0; |
348 | unsigned long page_bytes_left; | ||
349 | unsigned long page_in_index = 0; | 226 | unsigned long page_in_index = 0; |
350 | unsigned long page_out_index = 0; | 227 | unsigned long page_out_index = 0; |
351 | struct page *page_out; | ||
352 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 228 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / |
353 | PAGE_CACHE_SIZE; | 229 | PAGE_CACHE_SIZE; |
354 | unsigned long buf_start; | 230 | unsigned long buf_start; |
355 | unsigned long buf_offset; | ||
356 | unsigned long bytes; | ||
357 | unsigned long working_bytes; | ||
358 | unsigned long pg_offset; | 231 | unsigned long pg_offset; |
359 | unsigned long start_byte; | ||
360 | unsigned long current_buf_start; | ||
361 | char *kaddr; | ||
362 | |||
363 | workspace = find_zlib_workspace(); | ||
364 | if (IS_ERR(workspace)) | ||
365 | return -ENOMEM; | ||
366 | 232 | ||
367 | data_in = kmap(pages_in[page_in_index]); | 233 | data_in = kmap(pages_in[page_in_index]); |
368 | workspace->inf_strm.next_in = data_in; | 234 | workspace->inf_strm.next_in = data_in; |
@@ -372,8 +238,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
372 | workspace->inf_strm.total_out = 0; | 238 | workspace->inf_strm.total_out = 0; |
373 | workspace->inf_strm.next_out = workspace->buf; | 239 | workspace->inf_strm.next_out = workspace->buf; |
374 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 240 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
375 | page_out = bvec[page_out_index].bv_page; | ||
376 | page_bytes_left = PAGE_CACHE_SIZE; | ||
377 | pg_offset = 0; | 241 | pg_offset = 0; |
378 | 242 | ||
379 | /* If it's deflate, and it's got no preset dictionary, then | 243 | /* If it's deflate, and it's got no preset dictionary, then |
@@ -389,107 +253,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
389 | 253 | ||
390 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
391 | printk(KERN_WARNING "inflateInit failed\n"); | 255 | printk(KERN_WARNING "inflateInit failed\n"); |
392 | ret = -1; | 256 | return -1; |
393 | goto out; | ||
394 | } | 257 | } |
395 | while (workspace->inf_strm.total_in < srclen) { | 258 | while (workspace->inf_strm.total_in < srclen) { |
396 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 259 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
397 | if (ret != Z_OK && ret != Z_STREAM_END) | 260 | if (ret != Z_OK && ret != Z_STREAM_END) |
398 | break; | 261 | break; |
399 | /* | ||
400 | * buf start is the byte offset we're of the start of | ||
401 | * our workspace buffer | ||
402 | */ | ||
403 | buf_start = total_out; | ||
404 | 262 | ||
405 | /* total_out is the last byte of the workspace buffer */ | 263 | buf_start = total_out; |
406 | total_out = workspace->inf_strm.total_out; | 264 | total_out = workspace->inf_strm.total_out; |
407 | 265 | ||
408 | working_bytes = total_out - buf_start; | 266 | /* we didn't make progress in this inflate call, we're done */ |
409 | 267 | if (buf_start == total_out) | |
410 | /* | ||
411 | * start byte is the first byte of the page we're currently | ||
412 | * copying into relative to the start of the compressed data. | ||
413 | */ | ||
414 | start_byte = page_offset(page_out) - disk_start; | ||
415 | |||
416 | if (working_bytes == 0) { | ||
417 | /* we didn't make progress in this inflate | ||
418 | * call, we're done | ||
419 | */ | ||
420 | if (ret != Z_STREAM_END) | ||
421 | ret = -1; | ||
422 | break; | 268 | break; |
423 | } | ||
424 | 269 | ||
425 | /* we haven't yet hit data corresponding to this page */ | 270 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, |
426 | if (total_out <= start_byte) | 271 | total_out, disk_start, |
427 | goto next; | 272 | bvec, vcnt, |
428 | 273 | &page_out_index, &pg_offset); | |
429 | /* | 274 | if (ret2 == 0) { |
430 | * the start of the data we care about is offset into | 275 | ret = 0; |
431 | * the middle of our working buffer | 276 | goto done; |
432 | */ | ||
433 | if (total_out > start_byte && buf_start < start_byte) { | ||
434 | buf_offset = start_byte - buf_start; | ||
435 | working_bytes -= buf_offset; | ||
436 | } else { | ||
437 | buf_offset = 0; | ||
438 | } | ||
439 | current_buf_start = buf_start; | ||
440 | |||
441 | /* copy bytes from the working buffer into the pages */ | ||
442 | while (working_bytes > 0) { | ||
443 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
444 | PAGE_CACHE_SIZE - buf_offset); | ||
445 | bytes = min(bytes, working_bytes); | ||
446 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
447 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
448 | bytes); | ||
449 | kunmap_atomic(kaddr, KM_USER0); | ||
450 | flush_dcache_page(page_out); | ||
451 | |||
452 | pg_offset += bytes; | ||
453 | page_bytes_left -= bytes; | ||
454 | buf_offset += bytes; | ||
455 | working_bytes -= bytes; | ||
456 | current_buf_start += bytes; | ||
457 | |||
458 | /* check if we need to pick another page */ | ||
459 | if (page_bytes_left == 0) { | ||
460 | page_out_index++; | ||
461 | if (page_out_index >= vcnt) { | ||
462 | ret = 0; | ||
463 | goto done; | ||
464 | } | ||
465 | |||
466 | page_out = bvec[page_out_index].bv_page; | ||
467 | pg_offset = 0; | ||
468 | page_bytes_left = PAGE_CACHE_SIZE; | ||
469 | start_byte = page_offset(page_out) - disk_start; | ||
470 | |||
471 | /* | ||
472 | * make sure our new page is covered by this | ||
473 | * working buffer | ||
474 | */ | ||
475 | if (total_out <= start_byte) | ||
476 | goto next; | ||
477 | |||
478 | /* the next page in the biovec might not | ||
479 | * be adjacent to the last page, but it | ||
480 | * might still be found inside this working | ||
481 | * buffer. bump our offset pointer | ||
482 | */ | ||
483 | if (total_out > start_byte && | ||
484 | current_buf_start < start_byte) { | ||
485 | buf_offset = start_byte - buf_start; | ||
486 | working_bytes = total_out - start_byte; | ||
487 | current_buf_start = buf_start + | ||
488 | buf_offset; | ||
489 | } | ||
490 | } | ||
491 | } | 277 | } |
492 | next: | 278 | |
493 | workspace->inf_strm.next_out = workspace->buf; | 279 | workspace->inf_strm.next_out = workspace->buf; |
494 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 280 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
495 | 281 | ||
@@ -516,35 +302,21 @@ done: | |||
516 | zlib_inflateEnd(&workspace->inf_strm); | 302 | zlib_inflateEnd(&workspace->inf_strm); |
517 | if (data_in) | 303 | if (data_in) |
518 | kunmap(pages_in[page_in_index]); | 304 | kunmap(pages_in[page_in_index]); |
519 | out: | ||
520 | free_workspace(workspace); | ||
521 | return ret; | 305 | return ret; |
522 | } | 306 | } |
523 | 307 | ||
524 | /* | 308 | static int zlib_decompress(struct list_head *ws, unsigned char *data_in, |
525 | * a less complex decompression routine. Our compressed data fits in a | 309 | struct page *dest_page, |
526 | * single page, and we want to read a single page out of it. | 310 | unsigned long start_byte, |
527 | * start_byte tells us the offset into the compressed data we're interested in | 311 | size_t srclen, size_t destlen) |
528 | */ | ||
529 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
530 | struct page *dest_page, | ||
531 | unsigned long start_byte, | ||
532 | size_t srclen, size_t destlen) | ||
533 | { | 312 | { |
313 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
534 | int ret = 0; | 314 | int ret = 0; |
535 | int wbits = MAX_WBITS; | 315 | int wbits = MAX_WBITS; |
536 | struct workspace *workspace; | ||
537 | unsigned long bytes_left = destlen; | 316 | unsigned long bytes_left = destlen; |
538 | unsigned long total_out = 0; | 317 | unsigned long total_out = 0; |
539 | char *kaddr; | 318 | char *kaddr; |
540 | 319 | ||
541 | if (destlen > PAGE_CACHE_SIZE) | ||
542 | return -ENOMEM; | ||
543 | |||
544 | workspace = find_zlib_workspace(); | ||
545 | if (IS_ERR(workspace)) | ||
546 | return -ENOMEM; | ||
547 | |||
548 | workspace->inf_strm.next_in = data_in; | 320 | workspace->inf_strm.next_in = data_in; |
549 | workspace->inf_strm.avail_in = srclen; | 321 | workspace->inf_strm.avail_in = srclen; |
550 | workspace->inf_strm.total_in = 0; | 322 | workspace->inf_strm.total_in = 0; |
@@ -565,8 +337,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, | |||
565 | 337 | ||
566 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
567 | printk(KERN_WARNING "inflateInit failed\n"); | 339 | printk(KERN_WARNING "inflateInit failed\n"); |
568 | ret = -1; | 340 | return -1; |
569 | goto out; | ||
570 | } | 341 | } |
571 | 342 | ||
572 | while (bytes_left > 0) { | 343 | while (bytes_left > 0) { |
@@ -616,12 +387,13 @@ next: | |||
616 | ret = 0; | 387 | ret = 0; |
617 | 388 | ||
618 | zlib_inflateEnd(&workspace->inf_strm); | 389 | zlib_inflateEnd(&workspace->inf_strm); |
619 | out: | ||
620 | free_workspace(workspace); | ||
621 | return ret; | 390 | return ret; |
622 | } | 391 | } |
623 | 392 | ||
624 | void btrfs_zlib_exit(void) | 393 | struct btrfs_compress_op btrfs_zlib_compress = { |
625 | { | 394 | .alloc_workspace = zlib_alloc_workspace, |
626 | free_workspaces(); | 395 | .free_workspace = zlib_free_workspace, |
627 | } | 396 | .compress_pages = zlib_compress_pages, |
397 | .decompress_biovec = zlib_decompress_biovec, | ||
398 | .decompress = zlib_decompress, | ||
399 | }; | ||