diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 19:41:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 19:41:23 -0400 |
commit | 8c27cb3566762613a23c080e3db7d0501af9a787 (patch) | |
tree | 32b2752e320b6cb3ecf289dd00b5145a6de947e6 | |
parent | 7114f51fcb979f167ab5f625ac74059dcb1afc28 (diff) | |
parent | 848c23b78fafdcd3270b06a30737f8dbd70c347f (diff) |
Merge branch 'for-4.13-part1' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"The core updates improve error handling (mostly related to bios), with
the usual incremental work on the GFP_NOFS (mis)use removal,
refactoring or cleanups. Except the two top patches, all have been in
for-next for an extensive amount of time.
User visible changes:
- statx support
- quota override tunable
- improved compression thresholds
- obsoleted mount option alloc_start
Core updates:
- bio-related updates:
- faster bio cloning
- no allocation failures
- preallocated flush bios
- more kvzalloc use, memalloc_nofs protections, GFP_NOFS updates
- prep work for btree_inode removal
- dir-item validation
- qgoup fixes and updates
- cleanups:
- removed unused struct members, unused code, refactoring
- argument refactoring (fs_info/root, caller -> callee sink)
- SEARCH_TREE ioctl docs"
* 'for-4.13-part1' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (115 commits)
btrfs: Remove false alert when fiemap range is smaller than on-disk extent
btrfs: Don't clear SGID when inheriting ACLs
btrfs: fix integer overflow in calc_reclaim_items_nr
btrfs: scrub: fix target device intialization while setting up scrub context
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
btrfs: qgroup: Introduce extent changeset for qgroup reserve functions
btrfs: qgroup: Fix qgroup reserved space underflow caused by buffered write and quotas being enabled
btrfs: qgroup: Return actually freed bytes for qgroup release or free data
btrfs: qgroup: Cleanup btrfs_qgroup_prepare_account_extents function
btrfs: qgroup: Add quick exit for non-fs extents
Btrfs: rework delayed ref total_bytes_pinned accounting
Btrfs: return old and new total ref mods when adding delayed refs
Btrfs: always account pinned bytes when dropping a tree block ref
Btrfs: update total_bytes_pinned when pinning down extents
Btrfs: make BUG_ON() in add_pinned_bytes() an ASSERT()
Btrfs: make add_pinned_bytes() take an s64 num_bytes instead of u64
btrfs: fix validation of XATTR_ITEM dir items
btrfs: Verify dir_item in iterate_object_props
btrfs: Check name_len before in btrfs_del_root_ref
btrfs: Check name_len before reading btrfs_get_name
...
47 files changed, 1723 insertions, 1415 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 247b8dfaf6e5..8d8370ddb6b2 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -78,12 +78,6 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
78 | switch (type) { | 78 | switch (type) { |
79 | case ACL_TYPE_ACCESS: | 79 | case ACL_TYPE_ACCESS: |
80 | name = XATTR_NAME_POSIX_ACL_ACCESS; | 80 | name = XATTR_NAME_POSIX_ACL_ACCESS; |
81 | if (acl) { | ||
82 | ret = posix_acl_update_mode(inode, &inode->i_mode, &acl); | ||
83 | if (ret) | ||
84 | return ret; | ||
85 | } | ||
86 | ret = 0; | ||
87 | break; | 81 | break; |
88 | case ACL_TYPE_DEFAULT: | 82 | case ACL_TYPE_DEFAULT: |
89 | if (!S_ISDIR(inode->i_mode)) | 83 | if (!S_ISDIR(inode->i_mode)) |
@@ -119,6 +113,13 @@ out: | |||
119 | 113 | ||
120 | int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | 114 | int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) |
121 | { | 115 | { |
116 | int ret; | ||
117 | |||
118 | if (type == ACL_TYPE_ACCESS && acl) { | ||
119 | ret = posix_acl_update_mode(inode, &inode->i_mode, &acl); | ||
120 | if (ret) | ||
121 | return ret; | ||
122 | } | ||
122 | return __btrfs_set_acl(NULL, inode, acl, type); | 123 | return __btrfs_set_acl(NULL, inode, acl, type); |
123 | } | 124 | } |
124 | 125 | ||
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 24865da63d8f..f723c11bb763 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -16,7 +16,7 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/vmalloc.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/rbtree.h> | 20 | #include <linux/rbtree.h> |
21 | #include "ctree.h" | 21 | #include "ctree.h" |
22 | #include "disk-io.h" | 22 | #include "disk-io.h" |
@@ -2305,7 +2305,7 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) | |||
2305 | size_t alloc_bytes; | 2305 | size_t alloc_bytes; |
2306 | 2306 | ||
2307 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); | 2307 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); |
2308 | data = vmalloc(alloc_bytes); | 2308 | data = kvmalloc(alloc_bytes, GFP_KERNEL); |
2309 | if (!data) | 2309 | if (!data) |
2310 | return ERR_PTR(-ENOMEM); | 2310 | return ERR_PTR(-ENOMEM); |
2311 | 2311 | ||
@@ -2339,9 +2339,9 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | |||
2339 | if (IS_ERR(fspath)) | 2339 | if (IS_ERR(fspath)) |
2340 | return (void *)fspath; | 2340 | return (void *)fspath; |
2341 | 2341 | ||
2342 | ifp = kmalloc(sizeof(*ifp), GFP_NOFS); | 2342 | ifp = kmalloc(sizeof(*ifp), GFP_KERNEL); |
2343 | if (!ifp) { | 2343 | if (!ifp) { |
2344 | vfree(fspath); | 2344 | kvfree(fspath); |
2345 | return ERR_PTR(-ENOMEM); | 2345 | return ERR_PTR(-ENOMEM); |
2346 | } | 2346 | } |
2347 | 2347 | ||
@@ -2356,6 +2356,6 @@ void free_ipath(struct inode_fs_paths *ipath) | |||
2356 | { | 2356 | { |
2357 | if (!ipath) | 2357 | if (!ipath) |
2358 | return; | 2358 | return; |
2359 | vfree(ipath->fspath); | 2359 | kvfree(ipath->fspath); |
2360 | kfree(ipath); | 2360 | kfree(ipath); |
2361 | } | 2361 | } |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 4ded1c3f92b8..11d37c94ce05 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -94,7 +94,7 @@ | |||
94 | #include <linux/mutex.h> | 94 | #include <linux/mutex.h> |
95 | #include <linux/genhd.h> | 95 | #include <linux/genhd.h> |
96 | #include <linux/blkdev.h> | 96 | #include <linux/blkdev.h> |
97 | #include <linux/vmalloc.h> | 97 | #include <linux/mm.h> |
98 | #include <linux/string.h> | 98 | #include <linux/string.h> |
99 | #include "ctree.h" | 99 | #include "ctree.h" |
100 | #include "disk-io.h" | 100 | #include "disk-io.h" |
@@ -1638,12 +1638,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, | |||
1638 | struct bio *bio; | 1638 | struct bio *bio; |
1639 | unsigned int j; | 1639 | unsigned int j; |
1640 | 1640 | ||
1641 | bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); | 1641 | bio = btrfs_io_bio_alloc(num_pages - i); |
1642 | if (!bio) { | ||
1643 | pr_info("btrfsic: bio_alloc() for %u pages failed!\n", | ||
1644 | num_pages - i); | ||
1645 | return -1; | ||
1646 | } | ||
1647 | bio->bi_bdev = block_ctx->dev->bdev; | 1642 | bio->bi_bdev = block_ctx->dev->bdev; |
1648 | bio->bi_iter.bi_sector = dev_bytenr >> 9; | 1643 | bio->bi_iter.bi_sector = dev_bytenr >> 9; |
1649 | bio_set_op_attrs(bio, REQ_OP_READ, 0); | 1644 | bio_set_op_attrs(bio, REQ_OP_READ, 0); |
@@ -1668,14 +1663,8 @@ static int btrfsic_read_block(struct btrfsic_state *state, | |||
1668 | dev_bytenr += (j - i) * PAGE_SIZE; | 1663 | dev_bytenr += (j - i) * PAGE_SIZE; |
1669 | i = j; | 1664 | i = j; |
1670 | } | 1665 | } |
1671 | for (i = 0; i < num_pages; i++) { | 1666 | for (i = 0; i < num_pages; i++) |
1672 | block_ctx->datav[i] = kmap(block_ctx->pagev[i]); | 1667 | block_ctx->datav[i] = kmap(block_ctx->pagev[i]); |
1673 | if (!block_ctx->datav[i]) { | ||
1674 | pr_info("btrfsic: kmap() failed (dev %s)!\n", | ||
1675 | block_ctx->dev->name); | ||
1676 | return -1; | ||
1677 | } | ||
1678 | } | ||
1679 | 1668 | ||
1680 | return block_ctx->len; | 1669 | return block_ctx->len; |
1681 | } | 1670 | } |
@@ -2822,44 +2811,47 @@ static void __btrfsic_submit_bio(struct bio *bio) | |||
2822 | dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); | 2811 | dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); |
2823 | if (NULL != dev_state && | 2812 | if (NULL != dev_state && |
2824 | (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { | 2813 | (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { |
2825 | unsigned int i; | 2814 | unsigned int i = 0; |
2826 | u64 dev_bytenr; | 2815 | u64 dev_bytenr; |
2827 | u64 cur_bytenr; | 2816 | u64 cur_bytenr; |
2828 | struct bio_vec *bvec; | 2817 | struct bio_vec bvec; |
2818 | struct bvec_iter iter; | ||
2829 | int bio_is_patched; | 2819 | int bio_is_patched; |
2830 | char **mapped_datav; | 2820 | char **mapped_datav; |
2821 | unsigned int segs = bio_segments(bio); | ||
2831 | 2822 | ||
2832 | dev_bytenr = 512 * bio->bi_iter.bi_sector; | 2823 | dev_bytenr = 512 * bio->bi_iter.bi_sector; |
2833 | bio_is_patched = 0; | 2824 | bio_is_patched = 0; |
2834 | if (dev_state->state->print_mask & | 2825 | if (dev_state->state->print_mask & |
2835 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | 2826 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) |
2836 | pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", | 2827 | pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", |
2837 | bio_op(bio), bio->bi_opf, bio->bi_vcnt, | 2828 | bio_op(bio), bio->bi_opf, segs, |
2838 | (unsigned long long)bio->bi_iter.bi_sector, | 2829 | (unsigned long long)bio->bi_iter.bi_sector, |
2839 | dev_bytenr, bio->bi_bdev); | 2830 | dev_bytenr, bio->bi_bdev); |
2840 | 2831 | ||
2841 | mapped_datav = kmalloc_array(bio->bi_vcnt, | 2832 | mapped_datav = kmalloc_array(segs, |
2842 | sizeof(*mapped_datav), GFP_NOFS); | 2833 | sizeof(*mapped_datav), GFP_NOFS); |
2843 | if (!mapped_datav) | 2834 | if (!mapped_datav) |
2844 | goto leave; | 2835 | goto leave; |
2845 | cur_bytenr = dev_bytenr; | 2836 | cur_bytenr = dev_bytenr; |
2846 | 2837 | ||
2847 | bio_for_each_segment_all(bvec, bio, i) { | 2838 | bio_for_each_segment(bvec, bio, iter) { |
2848 | BUG_ON(bvec->bv_len != PAGE_SIZE); | 2839 | BUG_ON(bvec.bv_len != PAGE_SIZE); |
2849 | mapped_datav[i] = kmap(bvec->bv_page); | 2840 | mapped_datav[i] = kmap(bvec.bv_page); |
2841 | i++; | ||
2850 | 2842 | ||
2851 | if (dev_state->state->print_mask & | 2843 | if (dev_state->state->print_mask & |
2852 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) | 2844 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) |
2853 | pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n", | 2845 | pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n", |
2854 | i, cur_bytenr, bvec->bv_len, bvec->bv_offset); | 2846 | i, cur_bytenr, bvec.bv_len, bvec.bv_offset); |
2855 | cur_bytenr += bvec->bv_len; | 2847 | cur_bytenr += bvec.bv_len; |
2856 | } | 2848 | } |
2857 | btrfsic_process_written_block(dev_state, dev_bytenr, | 2849 | btrfsic_process_written_block(dev_state, dev_bytenr, |
2858 | mapped_datav, bio->bi_vcnt, | 2850 | mapped_datav, segs, |
2859 | bio, &bio_is_patched, | 2851 | bio, &bio_is_patched, |
2860 | NULL, bio->bi_opf); | 2852 | NULL, bio->bi_opf); |
2861 | bio_for_each_segment_all(bvec, bio, i) | 2853 | bio_for_each_segment(bvec, bio, iter) |
2862 | kunmap(bvec->bv_page); | 2854 | kunmap(bvec.bv_page); |
2863 | kfree(mapped_datav); | 2855 | kfree(mapped_datav); |
2864 | } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) { | 2856 | } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) { |
2865 | if (dev_state->state->print_mask & | 2857 | if (dev_state->state->print_mask & |
@@ -2923,13 +2915,10 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info, | |||
2923 | fs_info->sectorsize, PAGE_SIZE); | 2915 | fs_info->sectorsize, PAGE_SIZE); |
2924 | return -1; | 2916 | return -1; |
2925 | } | 2917 | } |
2926 | state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); | 2918 | state = kvzalloc(sizeof(*state), GFP_KERNEL); |
2927 | if (!state) { | 2919 | if (!state) { |
2928 | state = vzalloc(sizeof(*state)); | 2920 | pr_info("btrfs check-integrity: allocation failed!\n"); |
2929 | if (!state) { | 2921 | return -1; |
2930 | pr_info("btrfs check-integrity: vzalloc() failed!\n"); | ||
2931 | return -1; | ||
2932 | } | ||
2933 | } | 2922 | } |
2934 | 2923 | ||
2935 | if (!btrfsic_is_initialized) { | 2924 | if (!btrfsic_is_initialized) { |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index a2fad39f79ba..2c0b7b57fcd5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/writeback.h> | 32 | #include <linux/writeback.h> |
33 | #include <linux/bit_spinlock.h> | 33 | #include <linux/bit_spinlock.h> |
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/sched/mm.h> | ||
35 | #include "ctree.h" | 36 | #include "ctree.h" |
36 | #include "disk-io.h" | 37 | #include "disk-io.h" |
37 | #include "transaction.h" | 38 | #include "transaction.h" |
@@ -42,48 +43,7 @@ | |||
42 | #include "extent_io.h" | 43 | #include "extent_io.h" |
43 | #include "extent_map.h" | 44 | #include "extent_map.h" |
44 | 45 | ||
45 | struct compressed_bio { | 46 | static int btrfs_decompress_bio(struct compressed_bio *cb); |
46 | /* number of bios pending for this compressed extent */ | ||
47 | refcount_t pending_bios; | ||
48 | |||
49 | /* the pages with the compressed data on them */ | ||
50 | struct page **compressed_pages; | ||
51 | |||
52 | /* inode that owns this data */ | ||
53 | struct inode *inode; | ||
54 | |||
55 | /* starting offset in the inode for our pages */ | ||
56 | u64 start; | ||
57 | |||
58 | /* number of bytes in the inode we're working on */ | ||
59 | unsigned long len; | ||
60 | |||
61 | /* number of bytes on disk */ | ||
62 | unsigned long compressed_len; | ||
63 | |||
64 | /* the compression algorithm for this bio */ | ||
65 | int compress_type; | ||
66 | |||
67 | /* number of compressed pages in the array */ | ||
68 | unsigned long nr_pages; | ||
69 | |||
70 | /* IO errors */ | ||
71 | int errors; | ||
72 | int mirror_num; | ||
73 | |||
74 | /* for reads, this is the bio we are copying the data into */ | ||
75 | struct bio *orig_bio; | ||
76 | |||
77 | /* | ||
78 | * the start of a variable length array of checksums only | ||
79 | * used by reads | ||
80 | */ | ||
81 | u32 sums; | ||
82 | }; | ||
83 | |||
84 | static int btrfs_decompress_bio(int type, struct page **pages_in, | ||
85 | u64 disk_start, struct bio *orig_bio, | ||
86 | size_t srclen); | ||
87 | 47 | ||
88 | static inline int compressed_bio_size(struct btrfs_fs_info *fs_info, | 48 | static inline int compressed_bio_size(struct btrfs_fs_info *fs_info, |
89 | unsigned long disk_size) | 49 | unsigned long disk_size) |
@@ -94,12 +54,6 @@ static inline int compressed_bio_size(struct btrfs_fs_info *fs_info, | |||
94 | (DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size; | 54 | (DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size; |
95 | } | 55 | } |
96 | 56 | ||
97 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | ||
98 | u64 first_byte, gfp_t gfp_flags) | ||
99 | { | ||
100 | return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags); | ||
101 | } | ||
102 | |||
103 | static int check_compressed_csum(struct btrfs_inode *inode, | 57 | static int check_compressed_csum(struct btrfs_inode *inode, |
104 | struct compressed_bio *cb, | 58 | struct compressed_bio *cb, |
105 | u64 disk_start) | 59 | u64 disk_start) |
@@ -173,11 +127,8 @@ static void end_compressed_bio_read(struct bio *bio) | |||
173 | /* ok, we're the last bio for this extent, lets start | 127 | /* ok, we're the last bio for this extent, lets start |
174 | * the decompression. | 128 | * the decompression. |
175 | */ | 129 | */ |
176 | ret = btrfs_decompress_bio(cb->compress_type, | 130 | ret = btrfs_decompress_bio(cb); |
177 | cb->compressed_pages, | 131 | |
178 | cb->start, | ||
179 | cb->orig_bio, | ||
180 | cb->compressed_len); | ||
181 | csum_failed: | 132 | csum_failed: |
182 | if (ret) | 133 | if (ret) |
183 | cb->errors = 1; | 134 | cb->errors = 1; |
@@ -355,11 +306,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
355 | 306 | ||
356 | bdev = fs_info->fs_devices->latest_bdev; | 307 | bdev = fs_info->fs_devices->latest_bdev; |
357 | 308 | ||
358 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | 309 | bio = btrfs_bio_alloc(bdev, first_byte); |
359 | if (!bio) { | ||
360 | kfree(cb); | ||
361 | return BLK_STS_RESOURCE; | ||
362 | } | ||
363 | bio_set_op_attrs(bio, REQ_OP_WRITE, 0); | 310 | bio_set_op_attrs(bio, REQ_OP_WRITE, 0); |
364 | bio->bi_private = cb; | 311 | bio->bi_private = cb; |
365 | bio->bi_end_io = end_compressed_bio_write; | 312 | bio->bi_end_io = end_compressed_bio_write; |
@@ -406,8 +353,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
406 | 353 | ||
407 | bio_put(bio); | 354 | bio_put(bio); |
408 | 355 | ||
409 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | 356 | bio = btrfs_bio_alloc(bdev, first_byte); |
410 | BUG_ON(!bio); | ||
411 | bio_set_op_attrs(bio, REQ_OP_WRITE, 0); | 357 | bio_set_op_attrs(bio, REQ_OP_WRITE, 0); |
412 | bio->bi_private = cb; | 358 | bio->bi_private = cb; |
413 | bio->bi_end_io = end_compressed_bio_write; | 359 | bio->bi_end_io = end_compressed_bio_write; |
@@ -650,9 +596,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
650 | /* include any pages we added in add_ra-bio_pages */ | 596 | /* include any pages we added in add_ra-bio_pages */ |
651 | cb->len = bio->bi_iter.bi_size; | 597 | cb->len = bio->bi_iter.bi_size; |
652 | 598 | ||
653 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | 599 | comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); |
654 | if (!comp_bio) | ||
655 | goto fail2; | ||
656 | bio_set_op_attrs (comp_bio, REQ_OP_READ, 0); | 600 | bio_set_op_attrs (comp_bio, REQ_OP_READ, 0); |
657 | comp_bio->bi_private = cb; | 601 | comp_bio->bi_private = cb; |
658 | comp_bio->bi_end_io = end_compressed_bio_read; | 602 | comp_bio->bi_end_io = end_compressed_bio_read; |
@@ -703,9 +647,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
703 | 647 | ||
704 | bio_put(comp_bio); | 648 | bio_put(comp_bio); |
705 | 649 | ||
706 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, | 650 | comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); |
707 | GFP_NOFS); | ||
708 | BUG_ON(!comp_bio); | ||
709 | bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); | 651 | bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); |
710 | comp_bio->bi_private = cb; | 652 | comp_bio->bi_private = cb; |
711 | comp_bio->bi_end_io = end_compressed_bio_read; | 653 | comp_bio->bi_end_io = end_compressed_bio_read; |
@@ -801,6 +743,7 @@ static struct list_head *find_workspace(int type) | |||
801 | struct list_head *workspace; | 743 | struct list_head *workspace; |
802 | int cpus = num_online_cpus(); | 744 | int cpus = num_online_cpus(); |
803 | int idx = type - 1; | 745 | int idx = type - 1; |
746 | unsigned nofs_flag; | ||
804 | 747 | ||
805 | struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; | 748 | struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; |
806 | spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; | 749 | spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; |
@@ -830,7 +773,15 @@ again: | |||
830 | atomic_inc(total_ws); | 773 | atomic_inc(total_ws); |
831 | spin_unlock(ws_lock); | 774 | spin_unlock(ws_lock); |
832 | 775 | ||
776 | /* | ||
777 | * Allocation helpers call vmalloc that can't use GFP_NOFS, so we have | ||
778 | * to turn it off here because we might get called from the restricted | ||
779 | * context of btrfs_compress_bio/btrfs_compress_pages | ||
780 | */ | ||
781 | nofs_flag = memalloc_nofs_save(); | ||
833 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | 782 | workspace = btrfs_compress_op[idx]->alloc_workspace(); |
783 | memalloc_nofs_restore(nofs_flag); | ||
784 | |||
834 | if (IS_ERR(workspace)) { | 785 | if (IS_ERR(workspace)) { |
835 | atomic_dec(total_ws); | 786 | atomic_dec(total_ws); |
836 | wake_up(ws_wait); | 787 | wake_up(ws_wait); |
@@ -961,19 +912,16 @@ int btrfs_compress_pages(int type, struct address_space *mapping, | |||
961 | * be contiguous. They all correspond to the range of bytes covered by | 912 | * be contiguous. They all correspond to the range of bytes covered by |
962 | * the compressed extent. | 913 | * the compressed extent. |
963 | */ | 914 | */ |
964 | static int btrfs_decompress_bio(int type, struct page **pages_in, | 915 | static int btrfs_decompress_bio(struct compressed_bio *cb) |
965 | u64 disk_start, struct bio *orig_bio, | ||
966 | size_t srclen) | ||
967 | { | 916 | { |
968 | struct list_head *workspace; | 917 | struct list_head *workspace; |
969 | int ret; | 918 | int ret; |
919 | int type = cb->compress_type; | ||
970 | 920 | ||
971 | workspace = find_workspace(type); | 921 | workspace = find_workspace(type); |
972 | 922 | ret = btrfs_compress_op[type - 1]->decompress_bio(workspace, cb); | |
973 | ret = btrfs_compress_op[type-1]->decompress_bio(workspace, pages_in, | ||
974 | disk_start, orig_bio, | ||
975 | srclen); | ||
976 | free_workspace(type, workspace); | 923 | free_workspace(type, workspace); |
924 | |||
977 | return ret; | 925 | return ret; |
978 | } | 926 | } |
979 | 927 | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 680d4265d601..87f6d3332163 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -34,6 +34,45 @@ | |||
34 | /* Maximum size of data before compression */ | 34 | /* Maximum size of data before compression */ |
35 | #define BTRFS_MAX_UNCOMPRESSED (SZ_128K) | 35 | #define BTRFS_MAX_UNCOMPRESSED (SZ_128K) |
36 | 36 | ||
37 | struct compressed_bio { | ||
38 | /* number of bios pending for this compressed extent */ | ||
39 | refcount_t pending_bios; | ||
40 | |||
41 | /* the pages with the compressed data on them */ | ||
42 | struct page **compressed_pages; | ||
43 | |||
44 | /* inode that owns this data */ | ||
45 | struct inode *inode; | ||
46 | |||
47 | /* starting offset in the inode for our pages */ | ||
48 | u64 start; | ||
49 | |||
50 | /* number of bytes in the inode we're working on */ | ||
51 | unsigned long len; | ||
52 | |||
53 | /* number of bytes on disk */ | ||
54 | unsigned long compressed_len; | ||
55 | |||
56 | /* the compression algorithm for this bio */ | ||
57 | int compress_type; | ||
58 | |||
59 | /* number of compressed pages in the array */ | ||
60 | unsigned long nr_pages; | ||
61 | |||
62 | /* IO errors */ | ||
63 | int errors; | ||
64 | int mirror_num; | ||
65 | |||
66 | /* for reads, this is the bio we are copying the data into */ | ||
67 | struct bio *orig_bio; | ||
68 | |||
69 | /* | ||
70 | * the start of a variable length array of checksums only | ||
71 | * used by reads | ||
72 | */ | ||
73 | u32 sums; | ||
74 | }; | ||
75 | |||
37 | void btrfs_init_compress(void); | 76 | void btrfs_init_compress(void); |
38 | void btrfs_exit_compress(void); | 77 | void btrfs_exit_compress(void); |
39 | 78 | ||
@@ -78,10 +117,7 @@ struct btrfs_compress_op { | |||
78 | unsigned long *total_out); | 117 | unsigned long *total_out); |
79 | 118 | ||
80 | int (*decompress_bio)(struct list_head *workspace, | 119 | int (*decompress_bio)(struct list_head *workspace, |
81 | struct page **pages_in, | 120 | struct compressed_bio *cb); |
82 | u64 disk_start, | ||
83 | struct bio *orig_bio, | ||
84 | size_t srclen); | ||
85 | 121 | ||
86 | int (*decompress)(struct list_head *workspace, | 122 | int (*decompress)(struct list_head *workspace, |
87 | unsigned char *data_in, | 123 | unsigned char *data_in, |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a3a75f1de002..3f4daa9d6e2c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/rbtree.h> | 21 | #include <linux/rbtree.h> |
22 | #include <linux/vmalloc.h> | 22 | #include <linux/mm.h> |
23 | #include "ctree.h" | 23 | #include "ctree.h" |
24 | #include "disk-io.h" | 24 | #include "disk-io.h" |
25 | #include "transaction.h" | 25 | #include "transaction.h" |
@@ -3667,14 +3667,14 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, | |||
3667 | /* make room in the right data area */ | 3667 | /* make room in the right data area */ |
3668 | data_end = leaf_data_end(fs_info, right); | 3668 | data_end = leaf_data_end(fs_info, right); |
3669 | memmove_extent_buffer(right, | 3669 | memmove_extent_buffer(right, |
3670 | btrfs_leaf_data(right) + data_end - push_space, | 3670 | BTRFS_LEAF_DATA_OFFSET + data_end - push_space, |
3671 | btrfs_leaf_data(right) + data_end, | 3671 | BTRFS_LEAF_DATA_OFFSET + data_end, |
3672 | BTRFS_LEAF_DATA_SIZE(fs_info) - data_end); | 3672 | BTRFS_LEAF_DATA_SIZE(fs_info) - data_end); |
3673 | 3673 | ||
3674 | /* copy from the left data area */ | 3674 | /* copy from the left data area */ |
3675 | copy_extent_buffer(right, left, btrfs_leaf_data(right) + | 3675 | copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET + |
3676 | BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, | 3676 | BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, |
3677 | btrfs_leaf_data(left) + leaf_data_end(fs_info, left), | 3677 | BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, left), |
3678 | push_space); | 3678 | push_space); |
3679 | 3679 | ||
3680 | memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), | 3680 | memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), |
@@ -3888,9 +3888,9 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, | |||
3888 | push_space = BTRFS_LEAF_DATA_SIZE(fs_info) - | 3888 | push_space = BTRFS_LEAF_DATA_SIZE(fs_info) - |
3889 | btrfs_item_offset_nr(right, push_items - 1); | 3889 | btrfs_item_offset_nr(right, push_items - 1); |
3890 | 3890 | ||
3891 | copy_extent_buffer(left, right, btrfs_leaf_data(left) + | 3891 | copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET + |
3892 | leaf_data_end(fs_info, left) - push_space, | 3892 | leaf_data_end(fs_info, left) - push_space, |
3893 | btrfs_leaf_data(right) + | 3893 | BTRFS_LEAF_DATA_OFFSET + |
3894 | btrfs_item_offset_nr(right, push_items - 1), | 3894 | btrfs_item_offset_nr(right, push_items - 1), |
3895 | push_space); | 3895 | push_space); |
3896 | old_left_nritems = btrfs_header_nritems(left); | 3896 | old_left_nritems = btrfs_header_nritems(left); |
@@ -3917,9 +3917,9 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, | |||
3917 | if (push_items < right_nritems) { | 3917 | if (push_items < right_nritems) { |
3918 | push_space = btrfs_item_offset_nr(right, push_items - 1) - | 3918 | push_space = btrfs_item_offset_nr(right, push_items - 1) - |
3919 | leaf_data_end(fs_info, right); | 3919 | leaf_data_end(fs_info, right); |
3920 | memmove_extent_buffer(right, btrfs_leaf_data(right) + | 3920 | memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + |
3921 | BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, | 3921 | BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, |
3922 | btrfs_leaf_data(right) + | 3922 | BTRFS_LEAF_DATA_OFFSET + |
3923 | leaf_data_end(fs_info, right), push_space); | 3923 | leaf_data_end(fs_info, right), push_space); |
3924 | 3924 | ||
3925 | memmove_extent_buffer(right, btrfs_item_nr_offset(0), | 3925 | memmove_extent_buffer(right, btrfs_item_nr_offset(0), |
@@ -4069,8 +4069,8 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, | |||
4069 | nritems * sizeof(struct btrfs_item)); | 4069 | nritems * sizeof(struct btrfs_item)); |
4070 | 4070 | ||
4071 | copy_extent_buffer(right, l, | 4071 | copy_extent_buffer(right, l, |
4072 | btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(fs_info) - | 4072 | BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - |
4073 | data_copy_size, btrfs_leaf_data(l) + | 4073 | data_copy_size, BTRFS_LEAF_DATA_OFFSET + |
4074 | leaf_data_end(fs_info, l), data_copy_size); | 4074 | leaf_data_end(fs_info, l), data_copy_size); |
4075 | 4075 | ||
4076 | rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid); | 4076 | rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid); |
@@ -4607,8 +4607,8 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info, | |||
4607 | 4607 | ||
4608 | /* shift the data */ | 4608 | /* shift the data */ |
4609 | if (from_end) { | 4609 | if (from_end) { |
4610 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 4610 | memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + |
4611 | data_end + size_diff, btrfs_leaf_data(leaf) + | 4611 | data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + |
4612 | data_end, old_data_start + new_size - data_end); | 4612 | data_end, old_data_start + new_size - data_end); |
4613 | } else { | 4613 | } else { |
4614 | struct btrfs_disk_key disk_key; | 4614 | struct btrfs_disk_key disk_key; |
@@ -4634,8 +4634,8 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info, | |||
4634 | } | 4634 | } |
4635 | } | 4635 | } |
4636 | 4636 | ||
4637 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 4637 | memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + |
4638 | data_end + size_diff, btrfs_leaf_data(leaf) + | 4638 | data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + |
4639 | data_end, old_data_start - data_end); | 4639 | data_end, old_data_start - data_end); |
4640 | 4640 | ||
4641 | offset = btrfs_disk_key_offset(&disk_key); | 4641 | offset = btrfs_disk_key_offset(&disk_key); |
@@ -4707,8 +4707,8 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
4707 | } | 4707 | } |
4708 | 4708 | ||
4709 | /* shift the data */ | 4709 | /* shift the data */ |
4710 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 4710 | memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + |
4711 | data_end - data_size, btrfs_leaf_data(leaf) + | 4711 | data_end - data_size, BTRFS_LEAF_DATA_OFFSET + |
4712 | data_end, old_data - data_end); | 4712 | data_end, old_data - data_end); |
4713 | 4713 | ||
4714 | data_end = old_data; | 4714 | data_end = old_data; |
@@ -4790,8 +4790,8 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, | |||
4790 | (nritems - slot) * sizeof(struct btrfs_item)); | 4790 | (nritems - slot) * sizeof(struct btrfs_item)); |
4791 | 4791 | ||
4792 | /* shift the data */ | 4792 | /* shift the data */ |
4793 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 4793 | memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + |
4794 | data_end - total_data, btrfs_leaf_data(leaf) + | 4794 | data_end - total_data, BTRFS_LEAF_DATA_OFFSET + |
4795 | data_end, old_data - data_end); | 4795 | data_end, old_data - data_end); |
4796 | data_end = old_data; | 4796 | data_end = old_data; |
4797 | } | 4797 | } |
@@ -4983,9 +4983,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
4983 | if (slot + nr != nritems) { | 4983 | if (slot + nr != nritems) { |
4984 | int data_end = leaf_data_end(fs_info, leaf); | 4984 | int data_end = leaf_data_end(fs_info, leaf); |
4985 | 4985 | ||
4986 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 4986 | memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + |
4987 | data_end + dsize, | 4987 | data_end + dsize, |
4988 | btrfs_leaf_data(leaf) + data_end, | 4988 | BTRFS_LEAF_DATA_OFFSET + data_end, |
4989 | last_off - data_end); | 4989 | last_off - data_end); |
4990 | 4990 | ||
4991 | for (i = slot + nr; i < nritems; i++) { | 4991 | for (i = slot + nr; i < nritems; i++) { |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a0d0c79d95ed..3f3eb7b17cac 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -48,7 +48,6 @@ struct btrfs_trans_handle; | |||
48 | struct btrfs_transaction; | 48 | struct btrfs_transaction; |
49 | struct btrfs_pending_snapshot; | 49 | struct btrfs_pending_snapshot; |
50 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 50 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
51 | extern struct kmem_cache *btrfs_transaction_cachep; | ||
52 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 51 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
53 | extern struct kmem_cache *btrfs_path_cachep; | 52 | extern struct kmem_cache *btrfs_path_cachep; |
54 | extern struct kmem_cache *btrfs_free_space_cachep; | 53 | extern struct kmem_cache *btrfs_free_space_cachep; |
@@ -716,6 +715,10 @@ struct btrfs_delayed_root; | |||
716 | #define BTRFS_FS_BTREE_ERR 11 | 715 | #define BTRFS_FS_BTREE_ERR 11 |
717 | #define BTRFS_FS_LOG1_ERR 12 | 716 | #define BTRFS_FS_LOG1_ERR 12 |
718 | #define BTRFS_FS_LOG2_ERR 13 | 717 | #define BTRFS_FS_LOG2_ERR 13 |
718 | #define BTRFS_FS_QUOTA_OVERRIDE 14 | ||
719 | /* Used to record internally whether fs has been frozen */ | ||
720 | #define BTRFS_FS_FROZEN 15 | ||
721 | |||
719 | /* | 722 | /* |
720 | * Indicate that a whole-filesystem exclusive operation is running | 723 | * Indicate that a whole-filesystem exclusive operation is running |
721 | * (device replace, resize, device add/delete, balance) | 724 | * (device replace, resize, device add/delete, balance) |
@@ -748,8 +751,7 @@ struct btrfs_fs_info { | |||
748 | struct rb_root block_group_cache_tree; | 751 | struct rb_root block_group_cache_tree; |
749 | 752 | ||
750 | /* keep track of unallocated space */ | 753 | /* keep track of unallocated space */ |
751 | spinlock_t free_chunk_lock; | 754 | atomic64_t free_chunk_space; |
752 | u64 free_chunk_space; | ||
753 | 755 | ||
754 | struct extent_io_tree freed_extents[2]; | 756 | struct extent_io_tree freed_extents[2]; |
755 | struct extent_io_tree *pinned_extents; | 757 | struct extent_io_tree *pinned_extents; |
@@ -797,17 +799,7 @@ struct btrfs_fs_info { | |||
797 | * so it is also safe. | 799 | * so it is also safe. |
798 | */ | 800 | */ |
799 | u64 max_inline; | 801 | u64 max_inline; |
800 | /* | 802 | |
801 | * Protected by ->chunk_mutex and sb->s_umount. | ||
802 | * | ||
803 | * The reason that we use two lock to protect it is because only | ||
804 | * remount and mount operations can change it and these two operations | ||
805 | * are under sb->s_umount, but the read side (chunk allocation) can not | ||
806 | * acquire sb->s_umount or the deadlock would happen. So we use two | ||
807 | * locks to protect it. On the write side, we must acquire two locks, | ||
808 | * and on the read side, we just need acquire one of them. | ||
809 | */ | ||
810 | u64 alloc_start; | ||
811 | struct btrfs_transaction *running_transaction; | 803 | struct btrfs_transaction *running_transaction; |
812 | wait_queue_head_t transaction_throttle; | 804 | wait_queue_head_t transaction_throttle; |
813 | wait_queue_head_t transaction_wait; | 805 | wait_queue_head_t transaction_wait; |
@@ -1107,9 +1099,6 @@ struct btrfs_fs_info { | |||
1107 | */ | 1099 | */ |
1108 | struct list_head pinned_chunks; | 1100 | struct list_head pinned_chunks; |
1109 | 1101 | ||
1110 | /* Used to record internally whether fs has been frozen */ | ||
1111 | int fs_frozen; | ||
1112 | |||
1113 | /* Cached block sizes */ | 1102 | /* Cached block sizes */ |
1114 | u32 nodesize; | 1103 | u32 nodesize; |
1115 | u32 sectorsize; | 1104 | u32 sectorsize; |
@@ -1277,21 +1266,20 @@ struct btrfs_root { | |||
1277 | /* For qgroup metadata space reserve */ | 1266 | /* For qgroup metadata space reserve */ |
1278 | atomic64_t qgroup_meta_rsv; | 1267 | atomic64_t qgroup_meta_rsv; |
1279 | }; | 1268 | }; |
1269 | |||
1280 | static inline u32 btrfs_inode_sectorsize(const struct inode *inode) | 1270 | static inline u32 btrfs_inode_sectorsize(const struct inode *inode) |
1281 | { | 1271 | { |
1282 | return btrfs_sb(inode->i_sb)->sectorsize; | 1272 | return btrfs_sb(inode->i_sb)->sectorsize; |
1283 | } | 1273 | } |
1284 | 1274 | ||
1285 | static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 blocksize) | ||
1286 | { | ||
1287 | return blocksize - sizeof(struct btrfs_header); | ||
1288 | } | ||
1289 | |||
1290 | static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) | 1275 | static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) |
1291 | { | 1276 | { |
1292 | return __BTRFS_LEAF_DATA_SIZE(info->nodesize); | 1277 | |
1278 | return info->nodesize - sizeof(struct btrfs_header); | ||
1293 | } | 1279 | } |
1294 | 1280 | ||
1281 | #define BTRFS_LEAF_DATA_OFFSET offsetof(struct btrfs_leaf, items) | ||
1282 | |||
1295 | static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info) | 1283 | static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info) |
1296 | { | 1284 | { |
1297 | return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item); | 1285 | return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item); |
@@ -1553,8 +1541,27 @@ static inline void btrfs_set_##name(type *s, u##bits val) \ | |||
1553 | s->member = cpu_to_le##bits(val); \ | 1541 | s->member = cpu_to_le##bits(val); \ |
1554 | } | 1542 | } |
1555 | 1543 | ||
1544 | |||
1545 | static inline u64 btrfs_device_total_bytes(struct extent_buffer *eb, | ||
1546 | struct btrfs_dev_item *s) | ||
1547 | { | ||
1548 | BUILD_BUG_ON(sizeof(u64) != | ||
1549 | sizeof(((struct btrfs_dev_item *)0))->total_bytes); | ||
1550 | return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, | ||
1551 | total_bytes)); | ||
1552 | } | ||
1553 | static inline void btrfs_set_device_total_bytes(struct extent_buffer *eb, | ||
1554 | struct btrfs_dev_item *s, | ||
1555 | u64 val) | ||
1556 | { | ||
1557 | BUILD_BUG_ON(sizeof(u64) != | ||
1558 | sizeof(((struct btrfs_dev_item *)0))->total_bytes); | ||
1559 | WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize)); | ||
1560 | btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val); | ||
1561 | } | ||
1562 | |||
1563 | |||
1556 | BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); | 1564 | BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); |
1557 | BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); | ||
1558 | BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); | 1565 | BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); |
1559 | BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); | 1566 | BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); |
1560 | BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); | 1567 | BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); |
@@ -2324,10 +2331,6 @@ static inline int btrfs_super_csum_size(struct btrfs_super_block *s) | |||
2324 | return btrfs_csum_sizes[t]; | 2331 | return btrfs_csum_sizes[t]; |
2325 | } | 2332 | } |
2326 | 2333 | ||
2327 | static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) | ||
2328 | { | ||
2329 | return offsetof(struct btrfs_leaf, items); | ||
2330 | } | ||
2331 | 2334 | ||
2332 | /* | 2335 | /* |
2333 | * The leaf data grows from end-to-front in the node. | 2336 | * The leaf data grows from end-to-front in the node. |
@@ -2538,11 +2541,11 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, | |||
2538 | 2541 | ||
2539 | /* helper function to cast into the data area of the leaf. */ | 2542 | /* helper function to cast into the data area of the leaf. */ |
2540 | #define btrfs_item_ptr(leaf, slot, type) \ | 2543 | #define btrfs_item_ptr(leaf, slot, type) \ |
2541 | ((type *)(btrfs_leaf_data(leaf) + \ | 2544 | ((type *)(BTRFS_LEAF_DATA_OFFSET + \ |
2542 | btrfs_item_offset_nr(leaf, slot))) | 2545 | btrfs_item_offset_nr(leaf, slot))) |
2543 | 2546 | ||
2544 | #define btrfs_item_ptr_offset(leaf, slot) \ | 2547 | #define btrfs_item_ptr_offset(leaf, slot) \ |
2545 | ((unsigned long)(btrfs_leaf_data(leaf) + \ | 2548 | ((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \ |
2546 | btrfs_item_offset_nr(leaf, slot))) | 2549 | btrfs_item_offset_nr(leaf, slot))) |
2547 | 2550 | ||
2548 | static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | 2551 | static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) |
@@ -2680,7 +2683,9 @@ void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache); | |||
2680 | void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache); | 2683 | void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache); |
2681 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | 2684 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, |
2682 | struct btrfs_fs_info *fs_info); | 2685 | struct btrfs_fs_info *fs_info); |
2683 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 2686 | u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info); |
2687 | u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info); | ||
2688 | u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info); | ||
2684 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2689 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2685 | 2690 | ||
2686 | enum btrfs_reserve_flush_enum { | 2691 | enum btrfs_reserve_flush_enum { |
@@ -2703,9 +2708,13 @@ enum btrfs_flush_state { | |||
2703 | COMMIT_TRANS = 6, | 2708 | COMMIT_TRANS = 6, |
2704 | }; | 2709 | }; |
2705 | 2710 | ||
2706 | int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len); | ||
2707 | int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes); | 2711 | int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes); |
2708 | void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len); | 2712 | int btrfs_check_data_free_space(struct inode *inode, |
2713 | struct extent_changeset **reserved, u64 start, u64 len); | ||
2714 | void btrfs_free_reserved_data_space(struct inode *inode, | ||
2715 | struct extent_changeset *reserved, u64 start, u64 len); | ||
2716 | void btrfs_delalloc_release_space(struct inode *inode, | ||
2717 | struct extent_changeset *reserved, u64 start, u64 len); | ||
2709 | void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, | 2718 | void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, |
2710 | u64 len); | 2719 | u64 len); |
2711 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 2720 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
@@ -2722,8 +2731,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, | |||
2722 | struct btrfs_block_rsv *rsv); | 2731 | struct btrfs_block_rsv *rsv); |
2723 | int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); | 2732 | int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); |
2724 | void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes); | 2733 | void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes); |
2725 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len); | 2734 | int btrfs_delalloc_reserve_space(struct inode *inode, |
2726 | void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len); | 2735 | struct extent_changeset **reserved, u64 start, u64 len); |
2727 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); | 2736 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); |
2728 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, | 2737 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, |
2729 | unsigned short type); | 2738 | unsigned short type); |
@@ -3031,12 +3040,14 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
3031 | const char *name, u16 name_len, | 3040 | const char *name, u16 name_len, |
3032 | int mod); | 3041 | int mod); |
3033 | int verify_dir_item(struct btrfs_fs_info *fs_info, | 3042 | int verify_dir_item(struct btrfs_fs_info *fs_info, |
3034 | struct extent_buffer *leaf, | 3043 | struct extent_buffer *leaf, int slot, |
3035 | struct btrfs_dir_item *dir_item); | 3044 | struct btrfs_dir_item *dir_item); |
3036 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, | 3045 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, |
3037 | struct btrfs_path *path, | 3046 | struct btrfs_path *path, |
3038 | const char *name, | 3047 | const char *name, |
3039 | int name_len); | 3048 | int name_len); |
3049 | bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot, | ||
3050 | unsigned long start, u16 name_len); | ||
3040 | 3051 | ||
3041 | /* orphan.c */ | 3052 | /* orphan.c */ |
3042 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | 3053 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, |
@@ -3171,6 +3182,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
3171 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 3182 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
3172 | size_t size, struct bio *bio, | 3183 | size_t size, struct bio *bio, |
3173 | unsigned long bio_flags); | 3184 | unsigned long bio_flags); |
3185 | void btrfs_set_range_writeback(void *private_data, u64 start, u64 end); | ||
3174 | int btrfs_page_mkwrite(struct vm_fault *vmf); | 3186 | int btrfs_page_mkwrite(struct vm_fault *vmf); |
3175 | int btrfs_readpage(struct file *file, struct page *page); | 3187 | int btrfs_readpage(struct file *file, struct page *page); |
3176 | void btrfs_evict_inode(struct inode *inode); | 3188 | void btrfs_evict_inode(struct inode *inode); |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index be70d90dfee5..93ffa898df6d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -470,7 +470,8 @@ add_tail: | |||
470 | static noinline void | 470 | static noinline void |
471 | update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, | 471 | update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, |
472 | struct btrfs_delayed_ref_node *existing, | 472 | struct btrfs_delayed_ref_node *existing, |
473 | struct btrfs_delayed_ref_node *update) | 473 | struct btrfs_delayed_ref_node *update, |
474 | int *old_ref_mod_ret) | ||
474 | { | 475 | { |
475 | struct btrfs_delayed_ref_head *existing_ref; | 476 | struct btrfs_delayed_ref_head *existing_ref; |
476 | struct btrfs_delayed_ref_head *ref; | 477 | struct btrfs_delayed_ref_head *ref; |
@@ -523,6 +524,8 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, | |||
523 | * currently, for refs we just added we know we're a-ok. | 524 | * currently, for refs we just added we know we're a-ok. |
524 | */ | 525 | */ |
525 | old_ref_mod = existing_ref->total_ref_mod; | 526 | old_ref_mod = existing_ref->total_ref_mod; |
527 | if (old_ref_mod_ret) | ||
528 | *old_ref_mod_ret = old_ref_mod; | ||
526 | existing->ref_mod += update->ref_mod; | 529 | existing->ref_mod += update->ref_mod; |
527 | existing_ref->total_ref_mod += update->ref_mod; | 530 | existing_ref->total_ref_mod += update->ref_mod; |
528 | 531 | ||
@@ -550,7 +553,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
550 | struct btrfs_delayed_ref_node *ref, | 553 | struct btrfs_delayed_ref_node *ref, |
551 | struct btrfs_qgroup_extent_record *qrecord, | 554 | struct btrfs_qgroup_extent_record *qrecord, |
552 | u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, | 555 | u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, |
553 | int action, int is_data, int *qrecord_inserted_ret) | 556 | int action, int is_data, int *qrecord_inserted_ret, |
557 | int *old_ref_mod, int *new_ref_mod) | ||
554 | { | 558 | { |
555 | struct btrfs_delayed_ref_head *existing; | 559 | struct btrfs_delayed_ref_head *existing; |
556 | struct btrfs_delayed_ref_head *head_ref = NULL; | 560 | struct btrfs_delayed_ref_head *head_ref = NULL; |
@@ -638,7 +642,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
638 | if (existing) { | 642 | if (existing) { |
639 | WARN_ON(ref_root && reserved && existing->qgroup_ref_root | 643 | WARN_ON(ref_root && reserved && existing->qgroup_ref_root |
640 | && existing->qgroup_reserved); | 644 | && existing->qgroup_reserved); |
641 | update_existing_head_ref(delayed_refs, &existing->node, ref); | 645 | update_existing_head_ref(delayed_refs, &existing->node, ref, |
646 | old_ref_mod); | ||
642 | /* | 647 | /* |
643 | * we've updated the existing ref, free the newly | 648 | * we've updated the existing ref, free the newly |
644 | * allocated ref | 649 | * allocated ref |
@@ -646,6 +651,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
646 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); | 651 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); |
647 | head_ref = existing; | 652 | head_ref = existing; |
648 | } else { | 653 | } else { |
654 | if (old_ref_mod) | ||
655 | *old_ref_mod = 0; | ||
649 | if (is_data && count_mod < 0) | 656 | if (is_data && count_mod < 0) |
650 | delayed_refs->pending_csums += num_bytes; | 657 | delayed_refs->pending_csums += num_bytes; |
651 | delayed_refs->num_heads++; | 658 | delayed_refs->num_heads++; |
@@ -655,6 +662,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
655 | } | 662 | } |
656 | if (qrecord_inserted_ret) | 663 | if (qrecord_inserted_ret) |
657 | *qrecord_inserted_ret = qrecord_inserted; | 664 | *qrecord_inserted_ret = qrecord_inserted; |
665 | if (new_ref_mod) | ||
666 | *new_ref_mod = head_ref->total_ref_mod; | ||
658 | return head_ref; | 667 | return head_ref; |
659 | } | 668 | } |
660 | 669 | ||
@@ -778,7 +787,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
778 | struct btrfs_trans_handle *trans, | 787 | struct btrfs_trans_handle *trans, |
779 | u64 bytenr, u64 num_bytes, u64 parent, | 788 | u64 bytenr, u64 num_bytes, u64 parent, |
780 | u64 ref_root, int level, int action, | 789 | u64 ref_root, int level, int action, |
781 | struct btrfs_delayed_extent_op *extent_op) | 790 | struct btrfs_delayed_extent_op *extent_op, |
791 | int *old_ref_mod, int *new_ref_mod) | ||
782 | { | 792 | { |
783 | struct btrfs_delayed_tree_ref *ref; | 793 | struct btrfs_delayed_tree_ref *ref; |
784 | struct btrfs_delayed_ref_head *head_ref; | 794 | struct btrfs_delayed_ref_head *head_ref; |
@@ -813,7 +823,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
813 | */ | 823 | */ |
814 | head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, | 824 | head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, |
815 | bytenr, num_bytes, 0, 0, action, 0, | 825 | bytenr, num_bytes, 0, 0, action, 0, |
816 | &qrecord_inserted); | 826 | &qrecord_inserted, old_ref_mod, |
827 | new_ref_mod); | ||
817 | 828 | ||
818 | add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, | 829 | add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, |
819 | num_bytes, parent, ref_root, level, action); | 830 | num_bytes, parent, ref_root, level, action); |
@@ -838,7 +849,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
838 | struct btrfs_trans_handle *trans, | 849 | struct btrfs_trans_handle *trans, |
839 | u64 bytenr, u64 num_bytes, | 850 | u64 bytenr, u64 num_bytes, |
840 | u64 parent, u64 ref_root, | 851 | u64 parent, u64 ref_root, |
841 | u64 owner, u64 offset, u64 reserved, int action) | 852 | u64 owner, u64 offset, u64 reserved, int action, |
853 | int *old_ref_mod, int *new_ref_mod) | ||
842 | { | 854 | { |
843 | struct btrfs_delayed_data_ref *ref; | 855 | struct btrfs_delayed_data_ref *ref; |
844 | struct btrfs_delayed_ref_head *head_ref; | 856 | struct btrfs_delayed_ref_head *head_ref; |
@@ -878,7 +890,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
878 | */ | 890 | */ |
879 | head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, | 891 | head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, |
880 | bytenr, num_bytes, ref_root, reserved, | 892 | bytenr, num_bytes, ref_root, reserved, |
881 | action, 1, &qrecord_inserted); | 893 | action, 1, &qrecord_inserted, |
894 | old_ref_mod, new_ref_mod); | ||
882 | 895 | ||
883 | add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, | 896 | add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, |
884 | num_bytes, parent, ref_root, owner, offset, | 897 | num_bytes, parent, ref_root, owner, offset, |
@@ -909,7 +922,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | |||
909 | 922 | ||
910 | add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, | 923 | add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, |
911 | num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, | 924 | num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, |
912 | extent_op->is_data, NULL); | 925 | extent_op->is_data, NULL, NULL, NULL); |
913 | 926 | ||
914 | spin_unlock(&delayed_refs->lock); | 927 | spin_unlock(&delayed_refs->lock); |
915 | return 0; | 928 | return 0; |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index c0264ff01b53..ce88e4ac5276 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -247,12 +247,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
247 | struct btrfs_trans_handle *trans, | 247 | struct btrfs_trans_handle *trans, |
248 | u64 bytenr, u64 num_bytes, u64 parent, | 248 | u64 bytenr, u64 num_bytes, u64 parent, |
249 | u64 ref_root, int level, int action, | 249 | u64 ref_root, int level, int action, |
250 | struct btrfs_delayed_extent_op *extent_op); | 250 | struct btrfs_delayed_extent_op *extent_op, |
251 | int *old_ref_mod, int *new_ref_mod); | ||
251 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | 252 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, |
252 | struct btrfs_trans_handle *trans, | 253 | struct btrfs_trans_handle *trans, |
253 | u64 bytenr, u64 num_bytes, | 254 | u64 bytenr, u64 num_bytes, |
254 | u64 parent, u64 ref_root, | 255 | u64 parent, u64 ref_root, |
255 | u64 owner, u64 offset, u64 reserved, int action); | 256 | u64 owner, u64 offset, u64 reserved, int action, |
257 | int *old_ref_mod, int *new_ref_mod); | ||
256 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | 258 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
257 | struct btrfs_trans_handle *trans, | 259 | struct btrfs_trans_handle *trans, |
258 | u64 bytenr, u64 num_bytes, | 260 | u64 bytenr, u64 num_bytes, |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5fe1ca8abc70..bee3edeea7a3 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -388,7 +388,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, | |||
388 | if (ret) | 388 | if (ret) |
389 | btrfs_err(fs_info, "kobj add dev failed %d", ret); | 389 | btrfs_err(fs_info, "kobj add dev failed %d", ret); |
390 | 390 | ||
391 | btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); | 391 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); |
392 | 392 | ||
393 | /* force writing the updated state information to disk */ | 393 | /* force writing the updated state information to disk */ |
394 | trans = btrfs_start_transaction(root, 0); | 394 | trans = btrfs_start_transaction(root, 0); |
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
507 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 507 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
508 | return ret; | 508 | return ret; |
509 | } | 509 | } |
510 | btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); | 510 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); |
511 | 511 | ||
512 | trans = btrfs_start_transaction(root, 0); | 512 | trans = btrfs_start_transaction(root, 0); |
513 | if (IS_ERR(trans)) { | 513 | if (IS_ERR(trans)) { |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c24d615e3d7f..41cb9196eaa8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -395,8 +395,6 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, | |||
395 | 395 | ||
396 | leaf = path->nodes[0]; | 396 | leaf = path->nodes[0]; |
397 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); | 397 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); |
398 | if (verify_dir_item(fs_info, leaf, dir_item)) | ||
399 | return NULL; | ||
400 | 398 | ||
401 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); | 399 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); |
402 | while (cur < total_len) { | 400 | while (cur < total_len) { |
@@ -405,6 +403,8 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, | |||
405 | btrfs_dir_data_len(leaf, dir_item); | 403 | btrfs_dir_data_len(leaf, dir_item); |
406 | name_ptr = (unsigned long)(dir_item + 1); | 404 | name_ptr = (unsigned long)(dir_item + 1); |
407 | 405 | ||
406 | if (verify_dir_item(fs_info, leaf, path->slots[0], dir_item)) | ||
407 | return NULL; | ||
408 | if (btrfs_dir_name_len(leaf, dir_item) == name_len && | 408 | if (btrfs_dir_name_len(leaf, dir_item) == name_len && |
409 | memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) | 409 | memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) |
410 | return dir_item; | 410 | return dir_item; |
@@ -453,9 +453,11 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | |||
453 | 453 | ||
454 | int verify_dir_item(struct btrfs_fs_info *fs_info, | 454 | int verify_dir_item(struct btrfs_fs_info *fs_info, |
455 | struct extent_buffer *leaf, | 455 | struct extent_buffer *leaf, |
456 | int slot, | ||
456 | struct btrfs_dir_item *dir_item) | 457 | struct btrfs_dir_item *dir_item) |
457 | { | 458 | { |
458 | u16 namelen = BTRFS_NAME_LEN; | 459 | u16 namelen = BTRFS_NAME_LEN; |
460 | int ret; | ||
459 | u8 type = btrfs_dir_type(leaf, dir_item); | 461 | u8 type = btrfs_dir_type(leaf, dir_item); |
460 | 462 | ||
461 | if (type >= BTRFS_FT_MAX) { | 463 | if (type >= BTRFS_FT_MAX) { |
@@ -472,6 +474,12 @@ int verify_dir_item(struct btrfs_fs_info *fs_info, | |||
472 | return 1; | 474 | return 1; |
473 | } | 475 | } |
474 | 476 | ||
477 | namelen = btrfs_dir_name_len(leaf, dir_item); | ||
478 | ret = btrfs_is_name_len_valid(leaf, slot, | ||
479 | (unsigned long)(dir_item + 1), namelen); | ||
480 | if (!ret) | ||
481 | return 1; | ||
482 | |||
475 | /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ | 483 | /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ |
476 | if ((btrfs_dir_data_len(leaf, dir_item) + | 484 | if ((btrfs_dir_data_len(leaf, dir_item) + |
477 | btrfs_dir_name_len(leaf, dir_item)) > | 485 | btrfs_dir_name_len(leaf, dir_item)) > |
@@ -484,3 +492,67 @@ int verify_dir_item(struct btrfs_fs_info *fs_info, | |||
484 | 492 | ||
485 | return 0; | 493 | return 0; |
486 | } | 494 | } |
495 | |||
496 | bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot, | ||
497 | unsigned long start, u16 name_len) | ||
498 | { | ||
499 | struct btrfs_fs_info *fs_info = leaf->fs_info; | ||
500 | struct btrfs_key key; | ||
501 | u32 read_start; | ||
502 | u32 read_end; | ||
503 | u32 item_start; | ||
504 | u32 item_end; | ||
505 | u32 size; | ||
506 | bool ret = true; | ||
507 | |||
508 | ASSERT(start > BTRFS_LEAF_DATA_OFFSET); | ||
509 | |||
510 | read_start = start - BTRFS_LEAF_DATA_OFFSET; | ||
511 | read_end = read_start + name_len; | ||
512 | item_start = btrfs_item_offset_nr(leaf, slot); | ||
513 | item_end = btrfs_item_end_nr(leaf, slot); | ||
514 | |||
515 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
516 | |||
517 | switch (key.type) { | ||
518 | case BTRFS_DIR_ITEM_KEY: | ||
519 | case BTRFS_XATTR_ITEM_KEY: | ||
520 | case BTRFS_DIR_INDEX_KEY: | ||
521 | size = sizeof(struct btrfs_dir_item); | ||
522 | break; | ||
523 | case BTRFS_INODE_REF_KEY: | ||
524 | size = sizeof(struct btrfs_inode_ref); | ||
525 | break; | ||
526 | case BTRFS_INODE_EXTREF_KEY: | ||
527 | size = sizeof(struct btrfs_inode_extref); | ||
528 | break; | ||
529 | case BTRFS_ROOT_REF_KEY: | ||
530 | case BTRFS_ROOT_BACKREF_KEY: | ||
531 | size = sizeof(struct btrfs_root_ref); | ||
532 | break; | ||
533 | default: | ||
534 | ret = false; | ||
535 | goto out; | ||
536 | } | ||
537 | |||
538 | if (read_start < item_start) { | ||
539 | ret = false; | ||
540 | goto out; | ||
541 | } | ||
542 | if (read_end > item_end) { | ||
543 | ret = false; | ||
544 | goto out; | ||
545 | } | ||
546 | |||
547 | /* there shall be item(s) before name */ | ||
548 | if (read_start - item_start < size) { | ||
549 | ret = false; | ||
550 | goto out; | ||
551 | } | ||
552 | |||
553 | out: | ||
554 | if (!ret) | ||
555 | btrfs_crit(fs_info, "invalid dir item name len: %u", | ||
556 | (unsigned int)name_len); | ||
557 | return ret; | ||
558 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6036d15b47b8..5abcbdc743fa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -89,7 +89,6 @@ struct btrfs_end_io_wq { | |||
89 | struct btrfs_fs_info *info; | 89 | struct btrfs_fs_info *info; |
90 | blk_status_t status; | 90 | blk_status_t status; |
91 | enum btrfs_wq_endio_type metadata; | 91 | enum btrfs_wq_endio_type metadata; |
92 | struct list_head list; | ||
93 | struct btrfs_work work; | 92 | struct btrfs_work work; |
94 | }; | 93 | }; |
95 | 94 | ||
@@ -118,9 +117,9 @@ void btrfs_end_io_wq_exit(void) | |||
118 | * just before they are sent down the IO stack. | 117 | * just before they are sent down the IO stack. |
119 | */ | 118 | */ |
120 | struct async_submit_bio { | 119 | struct async_submit_bio { |
121 | struct inode *inode; | 120 | void *private_data; |
121 | struct btrfs_fs_info *fs_info; | ||
122 | struct bio *bio; | 122 | struct bio *bio; |
123 | struct list_head list; | ||
124 | extent_submit_bio_hook_t *submit_bio_start; | 123 | extent_submit_bio_hook_t *submit_bio_start; |
125 | extent_submit_bio_hook_t *submit_bio_done; | 124 | extent_submit_bio_hook_t *submit_bio_done; |
126 | int mirror_num; | 125 | int mirror_num; |
@@ -871,7 +870,7 @@ static void run_one_async_start(struct btrfs_work *work) | |||
871 | blk_status_t ret; | 870 | blk_status_t ret; |
872 | 871 | ||
873 | async = container_of(work, struct async_submit_bio, work); | 872 | async = container_of(work, struct async_submit_bio, work); |
874 | ret = async->submit_bio_start(async->inode, async->bio, | 873 | ret = async->submit_bio_start(async->private_data, async->bio, |
875 | async->mirror_num, async->bio_flags, | 874 | async->mirror_num, async->bio_flags, |
876 | async->bio_offset); | 875 | async->bio_offset); |
877 | if (ret) | 876 | if (ret) |
@@ -885,7 +884,7 @@ static void run_one_async_done(struct btrfs_work *work) | |||
885 | int limit; | 884 | int limit; |
886 | 885 | ||
887 | async = container_of(work, struct async_submit_bio, work); | 886 | async = container_of(work, struct async_submit_bio, work); |
888 | fs_info = BTRFS_I(async->inode)->root->fs_info; | 887 | fs_info = async->fs_info; |
889 | 888 | ||
890 | limit = btrfs_async_submit_limit(fs_info); | 889 | limit = btrfs_async_submit_limit(fs_info); |
891 | limit = limit * 2 / 3; | 890 | limit = limit * 2 / 3; |
@@ -904,7 +903,7 @@ static void run_one_async_done(struct btrfs_work *work) | |||
904 | return; | 903 | return; |
905 | } | 904 | } |
906 | 905 | ||
907 | async->submit_bio_done(async->inode, async->bio, async->mirror_num, | 906 | async->submit_bio_done(async->private_data, async->bio, async->mirror_num, |
908 | async->bio_flags, async->bio_offset); | 907 | async->bio_flags, async->bio_offset); |
909 | } | 908 | } |
910 | 909 | ||
@@ -916,11 +915,11 @@ static void run_one_async_free(struct btrfs_work *work) | |||
916 | kfree(async); | 915 | kfree(async); |
917 | } | 916 | } |
918 | 917 | ||
919 | blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, | 918 | blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, |
920 | struct inode *inode, struct bio *bio, int mirror_num, | 919 | int mirror_num, unsigned long bio_flags, |
921 | unsigned long bio_flags, u64 bio_offset, | 920 | u64 bio_offset, void *private_data, |
922 | extent_submit_bio_hook_t *submit_bio_start, | 921 | extent_submit_bio_hook_t *submit_bio_start, |
923 | extent_submit_bio_hook_t *submit_bio_done) | 922 | extent_submit_bio_hook_t *submit_bio_done) |
924 | { | 923 | { |
925 | struct async_submit_bio *async; | 924 | struct async_submit_bio *async; |
926 | 925 | ||
@@ -928,7 +927,8 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, | |||
928 | if (!async) | 927 | if (!async) |
929 | return BLK_STS_RESOURCE; | 928 | return BLK_STS_RESOURCE; |
930 | 929 | ||
931 | async->inode = inode; | 930 | async->private_data = private_data; |
931 | async->fs_info = fs_info; | ||
932 | async->bio = bio; | 932 | async->bio = bio; |
933 | async->mirror_num = mirror_num; | 933 | async->mirror_num = mirror_num; |
934 | async->submit_bio_start = submit_bio_start; | 934 | async->submit_bio_start = submit_bio_start; |
@@ -974,9 +974,9 @@ static blk_status_t btree_csum_one_bio(struct bio *bio) | |||
974 | return errno_to_blk_status(ret); | 974 | return errno_to_blk_status(ret); |
975 | } | 975 | } |
976 | 976 | ||
977 | static blk_status_t __btree_submit_bio_start(struct inode *inode, | 977 | static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio, |
978 | struct bio *bio, int mirror_num, unsigned long bio_flags, | 978 | int mirror_num, unsigned long bio_flags, |
979 | u64 bio_offset) | 979 | u64 bio_offset) |
980 | { | 980 | { |
981 | /* | 981 | /* |
982 | * when we're called for a write, we're already in the async | 982 | * when we're called for a write, we're already in the async |
@@ -985,10 +985,11 @@ static blk_status_t __btree_submit_bio_start(struct inode *inode, | |||
985 | return btree_csum_one_bio(bio); | 985 | return btree_csum_one_bio(bio); |
986 | } | 986 | } |
987 | 987 | ||
988 | static blk_status_t __btree_submit_bio_done(struct inode *inode, | 988 | static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio, |
989 | struct bio *bio, int mirror_num, unsigned long bio_flags, | 989 | int mirror_num, unsigned long bio_flags, |
990 | u64 bio_offset) | 990 | u64 bio_offset) |
991 | { | 991 | { |
992 | struct inode *inode = private_data; | ||
992 | blk_status_t ret; | 993 | blk_status_t ret; |
993 | 994 | ||
994 | /* | 995 | /* |
@@ -1014,10 +1015,11 @@ static int check_async_write(unsigned long bio_flags) | |||
1014 | return 1; | 1015 | return 1; |
1015 | } | 1016 | } |
1016 | 1017 | ||
1017 | static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, | 1018 | static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, |
1018 | int mirror_num, unsigned long bio_flags, | 1019 | int mirror_num, unsigned long bio_flags, |
1019 | u64 bio_offset) | 1020 | u64 bio_offset) |
1020 | { | 1021 | { |
1022 | struct inode *inode = private_data; | ||
1021 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 1023 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
1022 | int async = check_async_write(bio_flags); | 1024 | int async = check_async_write(bio_flags); |
1023 | blk_status_t ret; | 1025 | blk_status_t ret; |
@@ -1042,8 +1044,8 @@ static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, | |||
1042 | * kthread helpers are used to submit writes so that | 1044 | * kthread helpers are used to submit writes so that |
1043 | * checksumming can happen in parallel across all CPUs | 1045 | * checksumming can happen in parallel across all CPUs |
1044 | */ | 1046 | */ |
1045 | ret = btrfs_wq_submit_bio(fs_info, inode, bio, mirror_num, 0, | 1047 | ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, |
1046 | bio_offset, | 1048 | bio_offset, private_data, |
1047 | __btree_submit_bio_start, | 1049 | __btree_submit_bio_start, |
1048 | __btree_submit_bio_done); | 1050 | __btree_submit_bio_done); |
1049 | } | 1051 | } |
@@ -1221,10 +1223,10 @@ int btrfs_write_tree_block(struct extent_buffer *buf) | |||
1221 | buf->start + buf->len - 1); | 1223 | buf->start + buf->len - 1); |
1222 | } | 1224 | } |
1223 | 1225 | ||
1224 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | 1226 | void btrfs_wait_tree_block_writeback(struct extent_buffer *buf) |
1225 | { | 1227 | { |
1226 | return filemap_fdatawait_range(buf->pages[0]->mapping, | 1228 | filemap_fdatawait_range(buf->pages[0]->mapping, |
1227 | buf->start, buf->start + buf->len - 1); | 1229 | buf->start, buf->start + buf->len - 1); |
1228 | } | 1230 | } |
1229 | 1231 | ||
1230 | struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, | 1232 | struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, |
@@ -1346,8 +1348,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, | |||
1346 | root->log_transid_committed = -1; | 1348 | root->log_transid_committed = -1; |
1347 | root->last_log_commit = 0; | 1349 | root->last_log_commit = 0; |
1348 | if (!dummy) | 1350 | if (!dummy) |
1349 | extent_io_tree_init(&root->dirty_log_pages, | 1351 | extent_io_tree_init(&root->dirty_log_pages, NULL); |
1350 | fs_info->btree_inode->i_mapping); | ||
1351 | 1352 | ||
1352 | memset(&root->root_key, 0, sizeof(root->root_key)); | 1353 | memset(&root->root_key, 0, sizeof(root->root_key)); |
1353 | memset(&root->root_item, 0, sizeof(root->root_item)); | 1354 | memset(&root->root_item, 0, sizeof(root->root_item)); |
@@ -2308,7 +2309,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) | |||
2308 | inode->i_mapping->a_ops = &btree_aops; | 2309 | inode->i_mapping->a_ops = &btree_aops; |
2309 | 2310 | ||
2310 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | 2311 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
2311 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping); | 2312 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode); |
2312 | BTRFS_I(inode)->io_tree.track_uptodate = 0; | 2313 | BTRFS_I(inode)->io_tree.track_uptodate = 0; |
2313 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree); | 2314 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree); |
2314 | 2315 | ||
@@ -2625,7 +2626,6 @@ int open_ctree(struct super_block *sb, | |||
2625 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 2626 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
2626 | spin_lock_init(&fs_info->delayed_iput_lock); | 2627 | spin_lock_init(&fs_info->delayed_iput_lock); |
2627 | spin_lock_init(&fs_info->defrag_inodes_lock); | 2628 | spin_lock_init(&fs_info->defrag_inodes_lock); |
2628 | spin_lock_init(&fs_info->free_chunk_lock); | ||
2629 | spin_lock_init(&fs_info->tree_mod_seq_lock); | 2629 | spin_lock_init(&fs_info->tree_mod_seq_lock); |
2630 | spin_lock_init(&fs_info->super_lock); | 2630 | spin_lock_init(&fs_info->super_lock); |
2631 | spin_lock_init(&fs_info->qgroup_op_lock); | 2631 | spin_lock_init(&fs_info->qgroup_op_lock); |
@@ -2661,12 +2661,11 @@ int open_ctree(struct super_block *sb, | |||
2661 | atomic_set(&fs_info->qgroup_op_seq, 0); | 2661 | atomic_set(&fs_info->qgroup_op_seq, 0); |
2662 | atomic_set(&fs_info->reada_works_cnt, 0); | 2662 | atomic_set(&fs_info->reada_works_cnt, 0); |
2663 | atomic64_set(&fs_info->tree_mod_seq, 0); | 2663 | atomic64_set(&fs_info->tree_mod_seq, 0); |
2664 | fs_info->fs_frozen = 0; | ||
2665 | fs_info->sb = sb; | 2664 | fs_info->sb = sb; |
2666 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; | 2665 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; |
2667 | fs_info->metadata_ratio = 0; | 2666 | fs_info->metadata_ratio = 0; |
2668 | fs_info->defrag_inodes = RB_ROOT; | 2667 | fs_info->defrag_inodes = RB_ROOT; |
2669 | fs_info->free_chunk_space = 0; | 2668 | atomic64_set(&fs_info->free_chunk_space, 0); |
2670 | fs_info->tree_mod_log = RB_ROOT; | 2669 | fs_info->tree_mod_log = RB_ROOT; |
2671 | fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; | 2670 | fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; |
2672 | fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ | 2671 | fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ |
@@ -2703,10 +2702,8 @@ int open_ctree(struct super_block *sb, | |||
2703 | fs_info->block_group_cache_tree = RB_ROOT; | 2702 | fs_info->block_group_cache_tree = RB_ROOT; |
2704 | fs_info->first_logical_byte = (u64)-1; | 2703 | fs_info->first_logical_byte = (u64)-1; |
2705 | 2704 | ||
2706 | extent_io_tree_init(&fs_info->freed_extents[0], | 2705 | extent_io_tree_init(&fs_info->freed_extents[0], NULL); |
2707 | fs_info->btree_inode->i_mapping); | 2706 | extent_io_tree_init(&fs_info->freed_extents[1], NULL); |
2708 | extent_io_tree_init(&fs_info->freed_extents[1], | ||
2709 | fs_info->btree_inode->i_mapping); | ||
2710 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 2707 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
2711 | set_bit(BTRFS_FS_BARRIER, &fs_info->flags); | 2708 | set_bit(BTRFS_FS_BARRIER, &fs_info->flags); |
2712 | 2709 | ||
@@ -3484,65 +3481,61 @@ static int write_dev_supers(struct btrfs_device *device, | |||
3484 | */ | 3481 | */ |
3485 | static void btrfs_end_empty_barrier(struct bio *bio) | 3482 | static void btrfs_end_empty_barrier(struct bio *bio) |
3486 | { | 3483 | { |
3487 | if (bio->bi_private) | 3484 | complete(bio->bi_private); |
3488 | complete(bio->bi_private); | ||
3489 | bio_put(bio); | ||
3490 | } | 3485 | } |
3491 | 3486 | ||
3492 | /* | 3487 | /* |
3493 | * trigger flushes for one the devices. If you pass wait == 0, the flushes are | 3488 | * Submit a flush request to the device if it supports it. Error handling is |
3494 | * sent down. With wait == 1, it waits for the previous flush. | 3489 | * done in the waiting counterpart. |
3495 | * | ||
3496 | * any device where the flush fails with eopnotsupp are flagged as not-barrier | ||
3497 | * capable | ||
3498 | */ | 3490 | */ |
3499 | static blk_status_t write_dev_flush(struct btrfs_device *device, int wait) | 3491 | static void write_dev_flush(struct btrfs_device *device) |
3500 | { | 3492 | { |
3501 | struct request_queue *q = bdev_get_queue(device->bdev); | 3493 | struct request_queue *q = bdev_get_queue(device->bdev); |
3502 | struct bio *bio; | 3494 | struct bio *bio = device->flush_bio; |
3503 | blk_status_t ret = 0; | ||
3504 | 3495 | ||
3505 | if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) | 3496 | if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) |
3506 | return 0; | 3497 | return; |
3507 | 3498 | ||
3508 | if (wait) { | 3499 | bio_reset(bio); |
3509 | bio = device->flush_bio; | 3500 | bio->bi_end_io = btrfs_end_empty_barrier; |
3510 | if (!bio) | 3501 | bio->bi_bdev = device->bdev; |
3511 | return 0; | 3502 | bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; |
3503 | init_completion(&device->flush_wait); | ||
3504 | bio->bi_private = &device->flush_wait; | ||
3512 | 3505 | ||
3513 | wait_for_completion(&device->flush_wait); | 3506 | submit_bio(bio); |
3507 | device->flush_bio_sent = 1; | ||
3508 | } | ||
3514 | 3509 | ||
3515 | if (bio->bi_status) { | 3510 | /* |
3516 | ret = bio->bi_status; | 3511 | * If the flush bio has been submitted by write_dev_flush, wait for it. |
3517 | btrfs_dev_stat_inc_and_print(device, | 3512 | */ |
3518 | BTRFS_DEV_STAT_FLUSH_ERRS); | 3513 | static blk_status_t wait_dev_flush(struct btrfs_device *device) |
3519 | } | 3514 | { |
3515 | struct bio *bio = device->flush_bio; | ||
3520 | 3516 | ||
3521 | /* drop the reference from the wait == 0 run */ | 3517 | if (!device->flush_bio_sent) |
3522 | bio_put(bio); | 3518 | return 0; |
3523 | device->flush_bio = NULL; | ||
3524 | 3519 | ||
3525 | return ret; | 3520 | device->flush_bio_sent = 0; |
3526 | } | 3521 | wait_for_completion_io(&device->flush_wait); |
3527 | 3522 | ||
3528 | /* | 3523 | return bio->bi_status; |
3529 | * one reference for us, and we leave it for the | 3524 | } |
3530 | * caller | ||
3531 | */ | ||
3532 | device->flush_bio = NULL; | ||
3533 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | ||
3534 | if (!bio) | ||
3535 | return BLK_STS_RESOURCE; | ||
3536 | 3525 | ||
3537 | bio->bi_end_io = btrfs_end_empty_barrier; | 3526 | static int check_barrier_error(struct btrfs_fs_devices *fsdevs) |
3538 | bio->bi_bdev = device->bdev; | 3527 | { |
3539 | bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; | 3528 | int dev_flush_error = 0; |
3540 | init_completion(&device->flush_wait); | 3529 | struct btrfs_device *dev; |
3541 | bio->bi_private = &device->flush_wait; | ||
3542 | device->flush_bio = bio; | ||
3543 | 3530 | ||
3544 | bio_get(bio); | 3531 | list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) { |
3545 | btrfsic_submit_bio(bio); | 3532 | if (!dev->bdev || dev->last_flush_error) |
3533 | dev_flush_error++; | ||
3534 | } | ||
3535 | |||
3536 | if (dev_flush_error > | ||
3537 | fsdevs->fs_info->num_tolerated_disk_barrier_failures) | ||
3538 | return -EIO; | ||
3546 | 3539 | ||
3547 | return 0; | 3540 | return 0; |
3548 | } | 3541 | } |
@@ -3555,7 +3548,6 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3555 | { | 3548 | { |
3556 | struct list_head *head; | 3549 | struct list_head *head; |
3557 | struct btrfs_device *dev; | 3550 | struct btrfs_device *dev; |
3558 | int errors_send = 0; | ||
3559 | int errors_wait = 0; | 3551 | int errors_wait = 0; |
3560 | blk_status_t ret; | 3552 | blk_status_t ret; |
3561 | 3553 | ||
@@ -3564,16 +3556,13 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3564 | list_for_each_entry_rcu(dev, head, dev_list) { | 3556 | list_for_each_entry_rcu(dev, head, dev_list) { |
3565 | if (dev->missing) | 3557 | if (dev->missing) |
3566 | continue; | 3558 | continue; |
3567 | if (!dev->bdev) { | 3559 | if (!dev->bdev) |
3568 | errors_send++; | ||
3569 | continue; | 3560 | continue; |
3570 | } | ||
3571 | if (!dev->in_fs_metadata || !dev->writeable) | 3561 | if (!dev->in_fs_metadata || !dev->writeable) |
3572 | continue; | 3562 | continue; |
3573 | 3563 | ||
3574 | ret = write_dev_flush(dev, 0); | 3564 | write_dev_flush(dev); |
3575 | if (ret) | 3565 | dev->last_flush_error = 0; |
3576 | errors_send++; | ||
3577 | } | 3566 | } |
3578 | 3567 | ||
3579 | /* wait for all the barriers */ | 3568 | /* wait for all the barriers */ |
@@ -3587,13 +3576,23 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3587 | if (!dev->in_fs_metadata || !dev->writeable) | 3576 | if (!dev->in_fs_metadata || !dev->writeable) |
3588 | continue; | 3577 | continue; |
3589 | 3578 | ||
3590 | ret = write_dev_flush(dev, 1); | 3579 | ret = wait_dev_flush(dev); |
3591 | if (ret) | 3580 | if (ret) { |
3581 | dev->last_flush_error = ret; | ||
3582 | btrfs_dev_stat_inc_and_print(dev, | ||
3583 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
3592 | errors_wait++; | 3584 | errors_wait++; |
3585 | } | ||
3586 | } | ||
3587 | |||
3588 | if (errors_wait) { | ||
3589 | /* | ||
3590 | * At some point we need the status of all disks | ||
3591 | * to arrive at the volume status. So error checking | ||
3592 | * is being pushed to a separate loop. | ||
3593 | */ | ||
3594 | return check_barrier_error(info->fs_devices); | ||
3593 | } | 3595 | } |
3594 | if (errors_send > info->num_tolerated_disk_barrier_failures || | ||
3595 | errors_wait > info->num_tolerated_disk_barrier_failures) | ||
3596 | return -EIO; | ||
3597 | return 0; | 3596 | return 0; |
3598 | } | 3597 | } |
3599 | 3598 | ||
@@ -4577,11 +4576,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | |||
4577 | 4576 | ||
4578 | cur_trans->state =TRANS_STATE_COMPLETED; | 4577 | cur_trans->state =TRANS_STATE_COMPLETED; |
4579 | wake_up(&cur_trans->commit_wait); | 4578 | wake_up(&cur_trans->commit_wait); |
4580 | |||
4581 | /* | ||
4582 | memset(cur_trans, 0, sizeof(*cur_trans)); | ||
4583 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | ||
4584 | */ | ||
4585 | } | 4579 | } |
4586 | 4580 | ||
4587 | static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) | 4581 | static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) |
@@ -4637,6 +4631,12 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) | |||
4637 | return 0; | 4631 | return 0; |
4638 | } | 4632 | } |
4639 | 4633 | ||
4634 | static struct btrfs_fs_info *btree_fs_info(void *private_data) | ||
4635 | { | ||
4636 | struct inode *inode = private_data; | ||
4637 | return btrfs_sb(inode->i_sb); | ||
4638 | } | ||
4639 | |||
4640 | static const struct extent_io_ops btree_extent_io_ops = { | 4640 | static const struct extent_io_ops btree_extent_io_ops = { |
4641 | /* mandatory callbacks */ | 4641 | /* mandatory callbacks */ |
4642 | .submit_bio_hook = btree_submit_bio_hook, | 4642 | .submit_bio_hook = btree_submit_bio_hook, |
@@ -4644,6 +4644,8 @@ static const struct extent_io_ops btree_extent_io_ops = { | |||
4644 | /* note we're sharing with inode.c for the merge bio hook */ | 4644 | /* note we're sharing with inode.c for the merge bio hook */ |
4645 | .merge_bio_hook = btrfs_merge_bio_hook, | 4645 | .merge_bio_hook = btrfs_merge_bio_hook, |
4646 | .readpage_io_failed_hook = btree_io_failed_hook, | 4646 | .readpage_io_failed_hook = btree_io_failed_hook, |
4647 | .set_range_writeback = btrfs_set_range_writeback, | ||
4648 | .tree_fs_info = btree_fs_info, | ||
4647 | 4649 | ||
4648 | /* optional callbacks */ | 4650 | /* optional callbacks */ |
4649 | }; | 4651 | }; |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c581927555f3..0a634d3ffc16 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -120,14 +120,14 @@ u32 btrfs_csum_data(const char *data, u32 seed, size_t len); | |||
120 | void btrfs_csum_final(u32 crc, u8 *result); | 120 | void btrfs_csum_final(u32 crc, u8 *result); |
121 | blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 121 | blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
122 | enum btrfs_wq_endio_type metadata); | 122 | enum btrfs_wq_endio_type metadata); |
123 | blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, | 123 | blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, |
124 | struct inode *inode, struct bio *bio, int mirror_num, | 124 | int mirror_num, unsigned long bio_flags, |
125 | unsigned long bio_flags, u64 bio_offset, | 125 | u64 bio_offset, void *private_data, |
126 | extent_submit_bio_hook_t *submit_bio_start, | 126 | extent_submit_bio_hook_t *submit_bio_start, |
127 | extent_submit_bio_hook_t *submit_bio_done); | 127 | extent_submit_bio_hook_t *submit_bio_done); |
128 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 128 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
129 | int btrfs_write_tree_block(struct extent_buffer *buf); | 129 | int btrfs_write_tree_block(struct extent_buffer *buf); |
130 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | 130 | void btrfs_wait_tree_block_writeback(struct extent_buffer *buf); |
131 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | 131 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, |
132 | struct btrfs_fs_info *fs_info); | 132 | struct btrfs_fs_info *fs_info); |
133 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 133 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 87144c9f9593..fa66980726c9 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -282,6 +282,11 @@ static int btrfs_get_name(struct dentry *parent, char *name, | |||
282 | name_len = btrfs_inode_ref_name_len(leaf, iref); | 282 | name_len = btrfs_inode_ref_name_len(leaf, iref); |
283 | } | 283 | } |
284 | 284 | ||
285 | ret = btrfs_is_name_len_valid(leaf, path->slots[0], name_ptr, name_len); | ||
286 | if (!ret) { | ||
287 | btrfs_free_path(path); | ||
288 | return -EIO; | ||
289 | } | ||
285 | read_extent_buffer(leaf, name, name_ptr, name_len); | 290 | read_extent_buffer(leaf, name, name_ptr, name_len); |
286 | btrfs_free_path(path); | 291 | btrfs_free_path(path); |
287 | 292 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 33d979e9ea2a..375f8c728d91 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -97,10 +97,11 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
97 | u64 num_bytes, int delalloc); | 97 | u64 num_bytes, int delalloc); |
98 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | 98 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, |
99 | u64 num_bytes); | 99 | u64 num_bytes); |
100 | static int __reserve_metadata_bytes(struct btrfs_root *root, | 100 | static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, |
101 | struct btrfs_space_info *space_info, | 101 | struct btrfs_space_info *space_info, |
102 | u64 orig_bytes, | 102 | u64 orig_bytes, |
103 | enum btrfs_reserve_flush_enum flush); | 103 | enum btrfs_reserve_flush_enum flush, |
104 | bool system_chunk); | ||
104 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | 105 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, |
105 | struct btrfs_space_info *space_info, | 106 | struct btrfs_space_info *space_info, |
106 | u64 num_bytes); | 107 | u64 num_bytes); |
@@ -766,6 +767,26 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
766 | return NULL; | 767 | return NULL; |
767 | } | 768 | } |
768 | 769 | ||
770 | static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes, | ||
771 | u64 owner, u64 root_objectid) | ||
772 | { | ||
773 | struct btrfs_space_info *space_info; | ||
774 | u64 flags; | ||
775 | |||
776 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
777 | if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) | ||
778 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
779 | else | ||
780 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
781 | } else { | ||
782 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
783 | } | ||
784 | |||
785 | space_info = __find_space_info(fs_info, flags); | ||
786 | ASSERT(space_info); | ||
787 | percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); | ||
788 | } | ||
789 | |||
769 | /* | 790 | /* |
770 | * after adding space to the filesystem, we need to clear the full flags | 791 | * after adding space to the filesystem, we need to clear the full flags |
771 | * on all the space infos. | 792 | * on all the space infos. |
@@ -2092,6 +2113,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
2092 | u64 bytenr, u64 num_bytes, u64 parent, | 2113 | u64 bytenr, u64 num_bytes, u64 parent, |
2093 | u64 root_objectid, u64 owner, u64 offset) | 2114 | u64 root_objectid, u64 owner, u64 offset) |
2094 | { | 2115 | { |
2116 | int old_ref_mod, new_ref_mod; | ||
2095 | int ret; | 2117 | int ret; |
2096 | 2118 | ||
2097 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && | 2119 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && |
@@ -2099,15 +2121,21 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
2099 | 2121 | ||
2100 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 2122 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
2101 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, | 2123 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
2102 | num_bytes, | 2124 | num_bytes, parent, |
2103 | parent, root_objectid, (int)owner, | 2125 | root_objectid, (int)owner, |
2104 | BTRFS_ADD_DELAYED_REF, NULL); | 2126 | BTRFS_ADD_DELAYED_REF, NULL, |
2127 | &old_ref_mod, &new_ref_mod); | ||
2105 | } else { | 2128 | } else { |
2106 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, | 2129 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, |
2107 | num_bytes, parent, root_objectid, | 2130 | num_bytes, parent, |
2108 | owner, offset, 0, | 2131 | root_objectid, owner, offset, |
2109 | BTRFS_ADD_DELAYED_REF); | 2132 | 0, BTRFS_ADD_DELAYED_REF, |
2133 | &old_ref_mod, &new_ref_mod); | ||
2110 | } | 2134 | } |
2135 | |||
2136 | if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) | ||
2137 | add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid); | ||
2138 | |||
2111 | return ret; | 2139 | return ret; |
2112 | } | 2140 | } |
2113 | 2141 | ||
@@ -2411,6 +2439,16 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
2411 | head = btrfs_delayed_node_to_head(node); | 2439 | head = btrfs_delayed_node_to_head(node); |
2412 | trace_run_delayed_ref_head(fs_info, node, head, node->action); | 2440 | trace_run_delayed_ref_head(fs_info, node, head, node->action); |
2413 | 2441 | ||
2442 | if (head->total_ref_mod < 0) { | ||
2443 | struct btrfs_block_group_cache *cache; | ||
2444 | |||
2445 | cache = btrfs_lookup_block_group(fs_info, node->bytenr); | ||
2446 | ASSERT(cache); | ||
2447 | percpu_counter_add(&cache->space_info->total_bytes_pinned, | ||
2448 | -node->num_bytes); | ||
2449 | btrfs_put_block_group(cache); | ||
2450 | } | ||
2451 | |||
2414 | if (insert_reserved) { | 2452 | if (insert_reserved) { |
2415 | btrfs_pin_extent(fs_info, node->bytenr, | 2453 | btrfs_pin_extent(fs_info, node->bytenr, |
2416 | node->num_bytes, 1); | 2454 | node->num_bytes, 1); |
@@ -3364,6 +3402,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, | |||
3364 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 3402 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
3365 | struct btrfs_root *root = fs_info->tree_root; | 3403 | struct btrfs_root *root = fs_info->tree_root; |
3366 | struct inode *inode = NULL; | 3404 | struct inode *inode = NULL; |
3405 | struct extent_changeset *data_reserved = NULL; | ||
3367 | u64 alloc_hint = 0; | 3406 | u64 alloc_hint = 0; |
3368 | int dcs = BTRFS_DC_ERROR; | 3407 | int dcs = BTRFS_DC_ERROR; |
3369 | u64 num_pages = 0; | 3408 | u64 num_pages = 0; |
@@ -3483,7 +3522,7 @@ again: | |||
3483 | num_pages *= 16; | 3522 | num_pages *= 16; |
3484 | num_pages *= PAGE_SIZE; | 3523 | num_pages *= PAGE_SIZE; |
3485 | 3524 | ||
3486 | ret = btrfs_check_data_free_space(inode, 0, num_pages); | 3525 | ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages); |
3487 | if (ret) | 3526 | if (ret) |
3488 | goto out_put; | 3527 | goto out_put; |
3489 | 3528 | ||
@@ -3514,6 +3553,7 @@ out: | |||
3514 | block_group->disk_cache_state = dcs; | 3553 | block_group->disk_cache_state = dcs; |
3515 | spin_unlock(&block_group->lock); | 3554 | spin_unlock(&block_group->lock); |
3516 | 3555 | ||
3556 | extent_changeset_free(data_reserved); | ||
3517 | return ret; | 3557 | return ret; |
3518 | } | 3558 | } |
3519 | 3559 | ||
@@ -3924,88 +3964,83 @@ static const char *alloc_name(u64 flags) | |||
3924 | }; | 3964 | }; |
3925 | } | 3965 | } |
3926 | 3966 | ||
3927 | static int update_space_info(struct btrfs_fs_info *info, u64 flags, | 3967 | static int create_space_info(struct btrfs_fs_info *info, u64 flags, |
3928 | u64 total_bytes, u64 bytes_used, | 3968 | struct btrfs_space_info **new) |
3929 | u64 bytes_readonly, | ||
3930 | struct btrfs_space_info **space_info) | ||
3931 | { | 3969 | { |
3932 | struct btrfs_space_info *found; | 3970 | |
3971 | struct btrfs_space_info *space_info; | ||
3933 | int i; | 3972 | int i; |
3934 | int factor; | ||
3935 | int ret; | 3973 | int ret; |
3936 | 3974 | ||
3937 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | 3975 | space_info = kzalloc(sizeof(*space_info), GFP_NOFS); |
3938 | BTRFS_BLOCK_GROUP_RAID10)) | 3976 | if (!space_info) |
3939 | factor = 2; | ||
3940 | else | ||
3941 | factor = 1; | ||
3942 | |||
3943 | found = __find_space_info(info, flags); | ||
3944 | if (found) { | ||
3945 | spin_lock(&found->lock); | ||
3946 | found->total_bytes += total_bytes; | ||
3947 | found->disk_total += total_bytes * factor; | ||
3948 | found->bytes_used += bytes_used; | ||
3949 | found->disk_used += bytes_used * factor; | ||
3950 | found->bytes_readonly += bytes_readonly; | ||
3951 | if (total_bytes > 0) | ||
3952 | found->full = 0; | ||
3953 | space_info_add_new_bytes(info, found, total_bytes - | ||
3954 | bytes_used - bytes_readonly); | ||
3955 | spin_unlock(&found->lock); | ||
3956 | *space_info = found; | ||
3957 | return 0; | ||
3958 | } | ||
3959 | found = kzalloc(sizeof(*found), GFP_NOFS); | ||
3960 | if (!found) | ||
3961 | return -ENOMEM; | 3977 | return -ENOMEM; |
3962 | 3978 | ||
3963 | ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL); | 3979 | ret = percpu_counter_init(&space_info->total_bytes_pinned, 0, |
3980 | GFP_KERNEL); | ||
3964 | if (ret) { | 3981 | if (ret) { |
3965 | kfree(found); | 3982 | kfree(space_info); |
3966 | return ret; | 3983 | return ret; |
3967 | } | 3984 | } |
3968 | 3985 | ||
3969 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | 3986 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
3970 | INIT_LIST_HEAD(&found->block_groups[i]); | 3987 | INIT_LIST_HEAD(&space_info->block_groups[i]); |
3971 | init_rwsem(&found->groups_sem); | 3988 | init_rwsem(&space_info->groups_sem); |
3972 | spin_lock_init(&found->lock); | 3989 | spin_lock_init(&space_info->lock); |
3973 | found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; | 3990 | space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; |
3974 | found->total_bytes = total_bytes; | 3991 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; |
3975 | found->disk_total = total_bytes * factor; | 3992 | init_waitqueue_head(&space_info->wait); |
3976 | found->bytes_used = bytes_used; | 3993 | INIT_LIST_HEAD(&space_info->ro_bgs); |
3977 | found->disk_used = bytes_used * factor; | 3994 | INIT_LIST_HEAD(&space_info->tickets); |
3978 | found->bytes_pinned = 0; | 3995 | INIT_LIST_HEAD(&space_info->priority_tickets); |
3979 | found->bytes_reserved = 0; | 3996 | |
3980 | found->bytes_readonly = bytes_readonly; | 3997 | ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, |
3981 | found->bytes_may_use = 0; | ||
3982 | found->full = 0; | ||
3983 | found->max_extent_size = 0; | ||
3984 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; | ||
3985 | found->chunk_alloc = 0; | ||
3986 | found->flush = 0; | ||
3987 | init_waitqueue_head(&found->wait); | ||
3988 | INIT_LIST_HEAD(&found->ro_bgs); | ||
3989 | INIT_LIST_HEAD(&found->tickets); | ||
3990 | INIT_LIST_HEAD(&found->priority_tickets); | ||
3991 | |||
3992 | ret = kobject_init_and_add(&found->kobj, &space_info_ktype, | ||
3993 | info->space_info_kobj, "%s", | 3998 | info->space_info_kobj, "%s", |
3994 | alloc_name(found->flags)); | 3999 | alloc_name(space_info->flags)); |
3995 | if (ret) { | 4000 | if (ret) { |
3996 | percpu_counter_destroy(&found->total_bytes_pinned); | 4001 | percpu_counter_destroy(&space_info->total_bytes_pinned); |
3997 | kfree(found); | 4002 | kfree(space_info); |
3998 | return ret; | 4003 | return ret; |
3999 | } | 4004 | } |
4000 | 4005 | ||
4001 | *space_info = found; | 4006 | *new = space_info; |
4002 | list_add_rcu(&found->list, &info->space_info); | 4007 | list_add_rcu(&space_info->list, &info->space_info); |
4003 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 4008 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
4004 | info->data_sinfo = found; | 4009 | info->data_sinfo = space_info; |
4005 | 4010 | ||
4006 | return ret; | 4011 | return ret; |
4007 | } | 4012 | } |
4008 | 4013 | ||
4014 | static void update_space_info(struct btrfs_fs_info *info, u64 flags, | ||
4015 | u64 total_bytes, u64 bytes_used, | ||
4016 | u64 bytes_readonly, | ||
4017 | struct btrfs_space_info **space_info) | ||
4018 | { | ||
4019 | struct btrfs_space_info *found; | ||
4020 | int factor; | ||
4021 | |||
4022 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | ||
4023 | BTRFS_BLOCK_GROUP_RAID10)) | ||
4024 | factor = 2; | ||
4025 | else | ||
4026 | factor = 1; | ||
4027 | |||
4028 | found = __find_space_info(info, flags); | ||
4029 | ASSERT(found); | ||
4030 | spin_lock(&found->lock); | ||
4031 | found->total_bytes += total_bytes; | ||
4032 | found->disk_total += total_bytes * factor; | ||
4033 | found->bytes_used += bytes_used; | ||
4034 | found->disk_used += bytes_used * factor; | ||
4035 | found->bytes_readonly += bytes_readonly; | ||
4036 | if (total_bytes > 0) | ||
4037 | found->full = 0; | ||
4038 | space_info_add_new_bytes(info, found, total_bytes - | ||
4039 | bytes_used - bytes_readonly); | ||
4040 | spin_unlock(&found->lock); | ||
4041 | *space_info = found; | ||
4042 | } | ||
4043 | |||
4009 | static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | 4044 | static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) |
4010 | { | 4045 | { |
4011 | u64 extra_flags = chunk_to_extended(flags) & | 4046 | u64 extra_flags = chunk_to_extended(flags) & |
@@ -4121,7 +4156,7 @@ static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags) | |||
4121 | return btrfs_reduce_alloc_profile(fs_info, flags); | 4156 | return btrfs_reduce_alloc_profile(fs_info, flags); |
4122 | } | 4157 | } |
4123 | 4158 | ||
4124 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 4159 | static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data) |
4125 | { | 4160 | { |
4126 | struct btrfs_fs_info *fs_info = root->fs_info; | 4161 | struct btrfs_fs_info *fs_info = root->fs_info; |
4127 | u64 flags; | 4162 | u64 flags; |
@@ -4138,6 +4173,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | |||
4138 | return ret; | 4173 | return ret; |
4139 | } | 4174 | } |
4140 | 4175 | ||
4176 | u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info) | ||
4177 | { | ||
4178 | return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA); | ||
4179 | } | ||
4180 | |||
4181 | u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info) | ||
4182 | { | ||
4183 | return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
4184 | } | ||
4185 | |||
4186 | u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info) | ||
4187 | { | ||
4188 | return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
4189 | } | ||
4190 | |||
4141 | static u64 btrfs_space_info_used(struct btrfs_space_info *s_info, | 4191 | static u64 btrfs_space_info_used(struct btrfs_space_info *s_info, |
4142 | bool may_use_included) | 4192 | bool may_use_included) |
4143 | { | 4193 | { |
@@ -4187,7 +4237,7 @@ again: | |||
4187 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; | 4237 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; |
4188 | spin_unlock(&data_sinfo->lock); | 4238 | spin_unlock(&data_sinfo->lock); |
4189 | alloc: | 4239 | alloc: |
4190 | alloc_target = btrfs_get_alloc_profile(root, 1); | 4240 | alloc_target = btrfs_data_alloc_profile(fs_info); |
4191 | /* | 4241 | /* |
4192 | * It is ugly that we don't call nolock join | 4242 | * It is ugly that we don't call nolock join |
4193 | * transaction for the free space inode case here. | 4243 | * transaction for the free space inode case here. |
@@ -4238,7 +4288,7 @@ commit_trans: | |||
4238 | 4288 | ||
4239 | if (need_commit > 0) { | 4289 | if (need_commit > 0) { |
4240 | btrfs_start_delalloc_roots(fs_info, 0, -1); | 4290 | btrfs_start_delalloc_roots(fs_info, 0, -1); |
4241 | btrfs_wait_ordered_roots(fs_info, -1, 0, | 4291 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, |
4242 | (u64)-1); | 4292 | (u64)-1); |
4243 | } | 4293 | } |
4244 | 4294 | ||
@@ -4278,12 +4328,8 @@ commit_trans: | |||
4278 | return ret; | 4328 | return ret; |
4279 | } | 4329 | } |
4280 | 4330 | ||
4281 | /* | 4331 | int btrfs_check_data_free_space(struct inode *inode, |
4282 | * New check_data_free_space() with ability for precious data reservation | 4332 | struct extent_changeset **reserved, u64 start, u64 len) |
4283 | * Will replace old btrfs_check_data_free_space(), but for patch split, | ||
4284 | * add a new function first and then replace it. | ||
4285 | */ | ||
4286 | int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len) | ||
4287 | { | 4333 | { |
4288 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 4334 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
4289 | int ret; | 4335 | int ret; |
@@ -4298,9 +4344,11 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len) | |||
4298 | return ret; | 4344 | return ret; |
4299 | 4345 | ||
4300 | /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ | 4346 | /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ |
4301 | ret = btrfs_qgroup_reserve_data(inode, start, len); | 4347 | ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); |
4302 | if (ret) | 4348 | if (ret < 0) |
4303 | btrfs_free_reserved_data_space_noquota(inode, start, len); | 4349 | btrfs_free_reserved_data_space_noquota(inode, start, len); |
4350 | else | ||
4351 | ret = 0; | ||
4304 | return ret; | 4352 | return ret; |
4305 | } | 4353 | } |
4306 | 4354 | ||
@@ -4341,7 +4389,8 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, | |||
4341 | * This one will handle the per-inode data rsv map for accurate reserved | 4389 | * This one will handle the per-inode data rsv map for accurate reserved |
4342 | * space framework. | 4390 | * space framework. |
4343 | */ | 4391 | */ |
4344 | void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len) | 4392 | void btrfs_free_reserved_data_space(struct inode *inode, |
4393 | struct extent_changeset *reserved, u64 start, u64 len) | ||
4345 | { | 4394 | { |
4346 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4395 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4347 | 4396 | ||
@@ -4351,7 +4400,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len) | |||
4351 | start = round_down(start, root->fs_info->sectorsize); | 4400 | start = round_down(start, root->fs_info->sectorsize); |
4352 | 4401 | ||
4353 | btrfs_free_reserved_data_space_noquota(inode, start, len); | 4402 | btrfs_free_reserved_data_space_noquota(inode, start, len); |
4354 | btrfs_qgroup_free_data(inode, start, len); | 4403 | btrfs_qgroup_free_data(inode, reserved, start, len); |
4355 | } | 4404 | } |
4356 | 4405 | ||
4357 | static void force_metadata_allocation(struct btrfs_fs_info *info) | 4406 | static void force_metadata_allocation(struct btrfs_fs_info *info) |
@@ -4463,9 +4512,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, | |||
4463 | } | 4512 | } |
4464 | 4513 | ||
4465 | if (left < thresh) { | 4514 | if (left < thresh) { |
4466 | u64 flags; | 4515 | u64 flags = btrfs_system_alloc_profile(fs_info); |
4467 | 4516 | ||
4468 | flags = btrfs_get_alloc_profile(fs_info->chunk_root, 0); | ||
4469 | /* | 4517 | /* |
4470 | * Ignore failure to create system chunk. We might end up not | 4518 | * Ignore failure to create system chunk. We might end up not |
4471 | * needing it, as we might not need to COW all nodes/leafs from | 4519 | * needing it, as we might not need to COW all nodes/leafs from |
@@ -4506,10 +4554,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4506 | 4554 | ||
4507 | space_info = __find_space_info(fs_info, flags); | 4555 | space_info = __find_space_info(fs_info, flags); |
4508 | if (!space_info) { | 4556 | if (!space_info) { |
4509 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); | 4557 | ret = create_space_info(fs_info, flags, &space_info); |
4510 | BUG_ON(ret); /* -ENOMEM */ | 4558 | if (ret) |
4559 | return ret; | ||
4511 | } | 4560 | } |
4512 | BUG_ON(!space_info); /* Logic error */ | ||
4513 | 4561 | ||
4514 | again: | 4562 | again: |
4515 | spin_lock(&space_info->lock); | 4563 | spin_lock(&space_info->lock); |
@@ -4614,11 +4662,11 @@ out: | |||
4614 | return ret; | 4662 | return ret; |
4615 | } | 4663 | } |
4616 | 4664 | ||
4617 | static int can_overcommit(struct btrfs_root *root, | 4665 | static int can_overcommit(struct btrfs_fs_info *fs_info, |
4618 | struct btrfs_space_info *space_info, u64 bytes, | 4666 | struct btrfs_space_info *space_info, u64 bytes, |
4619 | enum btrfs_reserve_flush_enum flush) | 4667 | enum btrfs_reserve_flush_enum flush, |
4668 | bool system_chunk) | ||
4620 | { | 4669 | { |
4621 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
4622 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | 4670 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
4623 | u64 profile; | 4671 | u64 profile; |
4624 | u64 space_size; | 4672 | u64 space_size; |
@@ -4629,7 +4677,11 @@ static int can_overcommit(struct btrfs_root *root, | |||
4629 | if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) | 4677 | if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) |
4630 | return 0; | 4678 | return 0; |
4631 | 4679 | ||
4632 | profile = btrfs_get_alloc_profile(root, 0); | 4680 | if (system_chunk) |
4681 | profile = btrfs_system_alloc_profile(fs_info); | ||
4682 | else | ||
4683 | profile = btrfs_metadata_alloc_profile(fs_info); | ||
4684 | |||
4633 | used = btrfs_space_info_used(space_info, false); | 4685 | used = btrfs_space_info_used(space_info, false); |
4634 | 4686 | ||
4635 | /* | 4687 | /* |
@@ -4646,9 +4698,7 @@ static int can_overcommit(struct btrfs_root *root, | |||
4646 | 4698 | ||
4647 | used += space_info->bytes_may_use; | 4699 | used += space_info->bytes_may_use; |
4648 | 4700 | ||
4649 | spin_lock(&fs_info->free_chunk_lock); | 4701 | avail = atomic64_read(&fs_info->free_chunk_space); |
4650 | avail = fs_info->free_chunk_space; | ||
4651 | spin_unlock(&fs_info->free_chunk_lock); | ||
4652 | 4702 | ||
4653 | /* | 4703 | /* |
4654 | * If we have dup, raid1 or raid10 then only half of the free | 4704 | * If we have dup, raid1 or raid10 then only half of the free |
@@ -4698,14 +4748,14 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info, | |||
4698 | } | 4748 | } |
4699 | } | 4749 | } |
4700 | 4750 | ||
4701 | static inline int calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, | 4751 | static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, |
4702 | u64 to_reclaim) | 4752 | u64 to_reclaim) |
4703 | { | 4753 | { |
4704 | u64 bytes; | 4754 | u64 bytes; |
4705 | int nr; | 4755 | u64 nr; |
4706 | 4756 | ||
4707 | bytes = btrfs_calc_trans_metadata_size(fs_info, 1); | 4757 | bytes = btrfs_calc_trans_metadata_size(fs_info, 1); |
4708 | nr = (int)div64_u64(to_reclaim, bytes); | 4758 | nr = div64_u64(to_reclaim, bytes); |
4709 | if (!nr) | 4759 | if (!nr) |
4710 | nr = 1; | 4760 | nr = 1; |
4711 | return nr; | 4761 | return nr; |
@@ -4716,24 +4766,23 @@ static inline int calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, | |||
4716 | /* | 4766 | /* |
4717 | * shrink metadata reservation for delalloc | 4767 | * shrink metadata reservation for delalloc |
4718 | */ | 4768 | */ |
4719 | static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | 4769 | static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, |
4720 | bool wait_ordered) | 4770 | u64 orig, bool wait_ordered) |
4721 | { | 4771 | { |
4722 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
4723 | struct btrfs_block_rsv *block_rsv; | 4772 | struct btrfs_block_rsv *block_rsv; |
4724 | struct btrfs_space_info *space_info; | 4773 | struct btrfs_space_info *space_info; |
4725 | struct btrfs_trans_handle *trans; | 4774 | struct btrfs_trans_handle *trans; |
4726 | u64 delalloc_bytes; | 4775 | u64 delalloc_bytes; |
4727 | u64 max_reclaim; | 4776 | u64 max_reclaim; |
4777 | u64 items; | ||
4728 | long time_left; | 4778 | long time_left; |
4729 | unsigned long nr_pages; | 4779 | unsigned long nr_pages; |
4730 | int loops; | 4780 | int loops; |
4731 | int items; | ||
4732 | enum btrfs_reserve_flush_enum flush; | 4781 | enum btrfs_reserve_flush_enum flush; |
4733 | 4782 | ||
4734 | /* Calc the number of the pages we need flush for space reservation */ | 4783 | /* Calc the number of the pages we need flush for space reservation */ |
4735 | items = calc_reclaim_items_nr(fs_info, to_reclaim); | 4784 | items = calc_reclaim_items_nr(fs_info, to_reclaim); |
4736 | to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM; | 4785 | to_reclaim = items * EXTENT_SIZE_PER_ITEM; |
4737 | 4786 | ||
4738 | trans = (struct btrfs_trans_handle *)current->journal_info; | 4787 | trans = (struct btrfs_trans_handle *)current->journal_info; |
4739 | block_rsv = &fs_info->delalloc_block_rsv; | 4788 | block_rsv = &fs_info->delalloc_block_rsv; |
@@ -4776,7 +4825,7 @@ skip_async: | |||
4776 | else | 4825 | else |
4777 | flush = BTRFS_RESERVE_NO_FLUSH; | 4826 | flush = BTRFS_RESERVE_NO_FLUSH; |
4778 | spin_lock(&space_info->lock); | 4827 | spin_lock(&space_info->lock); |
4779 | if (can_overcommit(root, space_info, orig, flush)) { | 4828 | if (can_overcommit(fs_info, space_info, orig, flush, false)) { |
4780 | spin_unlock(&space_info->lock); | 4829 | spin_unlock(&space_info->lock); |
4781 | break; | 4830 | break; |
4782 | } | 4831 | } |
@@ -4838,7 +4887,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, | |||
4838 | 4887 | ||
4839 | spin_lock(&delayed_rsv->lock); | 4888 | spin_lock(&delayed_rsv->lock); |
4840 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4889 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4841 | bytes - delayed_rsv->size) >= 0) { | 4890 | bytes - delayed_rsv->size) < 0) { |
4842 | spin_unlock(&delayed_rsv->lock); | 4891 | spin_unlock(&delayed_rsv->lock); |
4843 | return -ENOSPC; | 4892 | return -ENOSPC; |
4844 | } | 4893 | } |
@@ -4886,7 +4935,7 @@ static int flush_space(struct btrfs_fs_info *fs_info, | |||
4886 | break; | 4935 | break; |
4887 | case FLUSH_DELALLOC: | 4936 | case FLUSH_DELALLOC: |
4888 | case FLUSH_DELALLOC_WAIT: | 4937 | case FLUSH_DELALLOC_WAIT: |
4889 | shrink_delalloc(root, num_bytes * 2, orig_bytes, | 4938 | shrink_delalloc(fs_info, num_bytes * 2, orig_bytes, |
4890 | state == FLUSH_DELALLOC_WAIT); | 4939 | state == FLUSH_DELALLOC_WAIT); |
4891 | break; | 4940 | break; |
4892 | case ALLOC_CHUNK: | 4941 | case ALLOC_CHUNK: |
@@ -4896,7 +4945,7 @@ static int flush_space(struct btrfs_fs_info *fs_info, | |||
4896 | break; | 4945 | break; |
4897 | } | 4946 | } |
4898 | ret = do_chunk_alloc(trans, fs_info, | 4947 | ret = do_chunk_alloc(trans, fs_info, |
4899 | btrfs_get_alloc_profile(root, 0), | 4948 | btrfs_metadata_alloc_profile(fs_info), |
4900 | CHUNK_ALLOC_NO_FORCE); | 4949 | CHUNK_ALLOC_NO_FORCE); |
4901 | btrfs_end_transaction(trans); | 4950 | btrfs_end_transaction(trans); |
4902 | if (ret > 0 || ret == -ENOSPC) | 4951 | if (ret > 0 || ret == -ENOSPC) |
@@ -4917,8 +4966,9 @@ static int flush_space(struct btrfs_fs_info *fs_info, | |||
4917 | } | 4966 | } |
4918 | 4967 | ||
4919 | static inline u64 | 4968 | static inline u64 |
4920 | btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | 4969 | btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, |
4921 | struct btrfs_space_info *space_info) | 4970 | struct btrfs_space_info *space_info, |
4971 | bool system_chunk) | ||
4922 | { | 4972 | { |
4923 | struct reserve_ticket *ticket; | 4973 | struct reserve_ticket *ticket; |
4924 | u64 used; | 4974 | u64 used; |
@@ -4933,14 +4983,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | |||
4933 | return to_reclaim; | 4983 | return to_reclaim; |
4934 | 4984 | ||
4935 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); | 4985 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); |
4936 | if (can_overcommit(root, space_info, to_reclaim, | 4986 | if (can_overcommit(fs_info, space_info, to_reclaim, |
4937 | BTRFS_RESERVE_FLUSH_ALL)) | 4987 | BTRFS_RESERVE_FLUSH_ALL, system_chunk)) |
4938 | return 0; | 4988 | return 0; |
4939 | 4989 | ||
4940 | used = space_info->bytes_used + space_info->bytes_reserved + | 4990 | used = btrfs_space_info_used(space_info, true); |
4941 | space_info->bytes_pinned + space_info->bytes_readonly + | 4991 | |
4942 | space_info->bytes_may_use; | 4992 | if (can_overcommit(fs_info, space_info, SZ_1M, |
4943 | if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL)) | 4993 | BTRFS_RESERVE_FLUSH_ALL, system_chunk)) |
4944 | expected = div_factor_fine(space_info->total_bytes, 95); | 4994 | expected = div_factor_fine(space_info->total_bytes, 95); |
4945 | else | 4995 | else |
4946 | expected = div_factor_fine(space_info->total_bytes, 90); | 4996 | expected = div_factor_fine(space_info->total_bytes, 90); |
@@ -4954,17 +5004,18 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | |||
4954 | return to_reclaim; | 5004 | return to_reclaim; |
4955 | } | 5005 | } |
4956 | 5006 | ||
4957 | static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | 5007 | static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, |
4958 | struct btrfs_root *root, u64 used) | 5008 | struct btrfs_space_info *space_info, |
5009 | u64 used, bool system_chunk) | ||
4959 | { | 5010 | { |
4960 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
4961 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); | 5011 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); |
4962 | 5012 | ||
4963 | /* If we're just plain full then async reclaim just slows us down. */ | 5013 | /* If we're just plain full then async reclaim just slows us down. */ |
4964 | if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) | 5014 | if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) |
4965 | return 0; | 5015 | return 0; |
4966 | 5016 | ||
4967 | if (!btrfs_calc_reclaim_metadata_size(root, space_info)) | 5017 | if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info, |
5018 | system_chunk)) | ||
4968 | return 0; | 5019 | return 0; |
4969 | 5020 | ||
4970 | return (used >= thresh && !btrfs_fs_closing(fs_info) && | 5021 | return (used >= thresh && !btrfs_fs_closing(fs_info) && |
@@ -5001,8 +5052,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | |||
5001 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 5052 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
5002 | 5053 | ||
5003 | spin_lock(&space_info->lock); | 5054 | spin_lock(&space_info->lock); |
5004 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | 5055 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, |
5005 | space_info); | 5056 | false); |
5006 | if (!to_reclaim) { | 5057 | if (!to_reclaim) { |
5007 | space_info->flush = 0; | 5058 | space_info->flush = 0; |
5008 | spin_unlock(&space_info->lock); | 5059 | spin_unlock(&space_info->lock); |
@@ -5024,8 +5075,9 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | |||
5024 | spin_unlock(&space_info->lock); | 5075 | spin_unlock(&space_info->lock); |
5025 | return; | 5076 | return; |
5026 | } | 5077 | } |
5027 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | 5078 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, |
5028 | space_info); | 5079 | space_info, |
5080 | false); | ||
5029 | ticket = list_first_entry(&space_info->tickets, | 5081 | ticket = list_first_entry(&space_info->tickets, |
5030 | struct reserve_ticket, list); | 5082 | struct reserve_ticket, list); |
5031 | if (last_tickets_id == space_info->tickets_id) { | 5083 | if (last_tickets_id == space_info->tickets_id) { |
@@ -5063,8 +5115,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, | |||
5063 | int flush_state = FLUSH_DELAYED_ITEMS_NR; | 5115 | int flush_state = FLUSH_DELAYED_ITEMS_NR; |
5064 | 5116 | ||
5065 | spin_lock(&space_info->lock); | 5117 | spin_lock(&space_info->lock); |
5066 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root, | 5118 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, |
5067 | space_info); | 5119 | false); |
5068 | if (!to_reclaim) { | 5120 | if (!to_reclaim) { |
5069 | spin_unlock(&space_info->lock); | 5121 | spin_unlock(&space_info->lock); |
5070 | return; | 5122 | return; |
@@ -5143,12 +5195,12 @@ static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, | |||
5143 | * regain reservations will be made and this will fail if there is not enough | 5195 | * regain reservations will be made and this will fail if there is not enough |
5144 | * space already. | 5196 | * space already. |
5145 | */ | 5197 | */ |
5146 | static int __reserve_metadata_bytes(struct btrfs_root *root, | 5198 | static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, |
5147 | struct btrfs_space_info *space_info, | 5199 | struct btrfs_space_info *space_info, |
5148 | u64 orig_bytes, | 5200 | u64 orig_bytes, |
5149 | enum btrfs_reserve_flush_enum flush) | 5201 | enum btrfs_reserve_flush_enum flush, |
5202 | bool system_chunk) | ||
5150 | { | 5203 | { |
5151 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
5152 | struct reserve_ticket ticket; | 5204 | struct reserve_ticket ticket; |
5153 | u64 used; | 5205 | u64 used; |
5154 | int ret = 0; | 5206 | int ret = 0; |
@@ -5170,7 +5222,8 @@ static int __reserve_metadata_bytes(struct btrfs_root *root, | |||
5170 | trace_btrfs_space_reservation(fs_info, "space_info", | 5222 | trace_btrfs_space_reservation(fs_info, "space_info", |
5171 | space_info->flags, orig_bytes, 1); | 5223 | space_info->flags, orig_bytes, 1); |
5172 | ret = 0; | 5224 | ret = 0; |
5173 | } else if (can_overcommit(root, space_info, orig_bytes, flush)) { | 5225 | } else if (can_overcommit(fs_info, space_info, orig_bytes, flush, |
5226 | system_chunk)) { | ||
5174 | space_info->bytes_may_use += orig_bytes; | 5227 | space_info->bytes_may_use += orig_bytes; |
5175 | trace_btrfs_space_reservation(fs_info, "space_info", | 5228 | trace_btrfs_space_reservation(fs_info, "space_info", |
5176 | space_info->flags, orig_bytes, 1); | 5229 | space_info->flags, orig_bytes, 1); |
@@ -5197,7 +5250,7 @@ static int __reserve_metadata_bytes(struct btrfs_root *root, | |||
5197 | orig_bytes, flush, | 5250 | orig_bytes, flush, |
5198 | "enospc"); | 5251 | "enospc"); |
5199 | queue_work(system_unbound_wq, | 5252 | queue_work(system_unbound_wq, |
5200 | &root->fs_info->async_reclaim_work); | 5253 | &fs_info->async_reclaim_work); |
5201 | } | 5254 | } |
5202 | } else { | 5255 | } else { |
5203 | list_add_tail(&ticket.list, | 5256 | list_add_tail(&ticket.list, |
@@ -5211,7 +5264,8 @@ static int __reserve_metadata_bytes(struct btrfs_root *root, | |||
5211 | * the async reclaim as we will panic. | 5264 | * the async reclaim as we will panic. |
5212 | */ | 5265 | */ |
5213 | if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && | 5266 | if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && |
5214 | need_do_async_reclaim(space_info, root, used) && | 5267 | need_do_async_reclaim(fs_info, space_info, |
5268 | used, system_chunk) && | ||
5215 | !work_busy(&fs_info->async_reclaim_work)) { | 5269 | !work_busy(&fs_info->async_reclaim_work)) { |
5216 | trace_btrfs_trigger_flush(fs_info, space_info->flags, | 5270 | trace_btrfs_trigger_flush(fs_info, space_info->flags, |
5217 | orig_bytes, flush, "preempt"); | 5271 | orig_bytes, flush, "preempt"); |
@@ -5269,9 +5323,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root, | |||
5269 | struct btrfs_fs_info *fs_info = root->fs_info; | 5323 | struct btrfs_fs_info *fs_info = root->fs_info; |
5270 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | 5324 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
5271 | int ret; | 5325 | int ret; |
5326 | bool system_chunk = (root == fs_info->chunk_root); | ||
5272 | 5327 | ||
5273 | ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes, | 5328 | ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info, |
5274 | flush); | 5329 | orig_bytes, flush, system_chunk); |
5275 | if (ret == -ENOSPC && | 5330 | if (ret == -ENOSPC && |
5276 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { | 5331 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { |
5277 | if (block_rsv != global_rsv && | 5332 | if (block_rsv != global_rsv && |
@@ -5380,9 +5435,7 @@ static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, | |||
5380 | * overcommit, and if we can't then we just need to free up our space | 5435 | * overcommit, and if we can't then we just need to free up our space |
5381 | * and not satisfy any requests. | 5436 | * and not satisfy any requests. |
5382 | */ | 5437 | */ |
5383 | used = space_info->bytes_used + space_info->bytes_reserved + | 5438 | used = btrfs_space_info_used(space_info, true); |
5384 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
5385 | space_info->bytes_may_use; | ||
5386 | if (used - num_bytes >= space_info->total_bytes) | 5439 | if (used - num_bytes >= space_info->total_bytes) |
5387 | check_overcommit = true; | 5440 | check_overcommit = true; |
5388 | again: | 5441 | again: |
@@ -5394,8 +5447,7 @@ again: | |||
5394 | * adding the ticket space would be a double count. | 5447 | * adding the ticket space would be a double count. |
5395 | */ | 5448 | */ |
5396 | if (check_overcommit && | 5449 | if (check_overcommit && |
5397 | !can_overcommit(fs_info->extent_root, space_info, 0, | 5450 | !can_overcommit(fs_info, space_info, 0, flush, false)) |
5398 | flush)) | ||
5399 | break; | 5451 | break; |
5400 | if (num_bytes >= ticket->bytes) { | 5452 | if (num_bytes >= ticket->bytes) { |
5401 | list_del_init(&ticket->list); | 5453 | list_del_init(&ticket->list); |
@@ -6124,6 +6176,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) | |||
6124 | * @inode: inode we're writing to | 6176 | * @inode: inode we're writing to |
6125 | * @start: start range we are writing to | 6177 | * @start: start range we are writing to |
6126 | * @len: how long the range we are writing to | 6178 | * @len: how long the range we are writing to |
6179 | * @reserved: mandatory parameter, record actually reserved qgroup ranges of | ||
6180 | * current reservation. | ||
6127 | * | 6181 | * |
6128 | * This will do the following things | 6182 | * This will do the following things |
6129 | * | 6183 | * |
@@ -6141,16 +6195,17 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) | |||
6141 | * Return 0 for success | 6195 | * Return 0 for success |
6142 | * Return <0 for error(-ENOSPC or -EQUOT) | 6196 | * Return <0 for error(-ENOSPC or -EQUOT) |
6143 | */ | 6197 | */ |
6144 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len) | 6198 | int btrfs_delalloc_reserve_space(struct inode *inode, |
6199 | struct extent_changeset **reserved, u64 start, u64 len) | ||
6145 | { | 6200 | { |
6146 | int ret; | 6201 | int ret; |
6147 | 6202 | ||
6148 | ret = btrfs_check_data_free_space(inode, start, len); | 6203 | ret = btrfs_check_data_free_space(inode, reserved, start, len); |
6149 | if (ret < 0) | 6204 | if (ret < 0) |
6150 | return ret; | 6205 | return ret; |
6151 | ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len); | 6206 | ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len); |
6152 | if (ret < 0) | 6207 | if (ret < 0) |
6153 | btrfs_free_reserved_data_space(inode, start, len); | 6208 | btrfs_free_reserved_data_space(inode, *reserved, start, len); |
6154 | return ret; | 6209 | return ret; |
6155 | } | 6210 | } |
6156 | 6211 | ||
@@ -6169,10 +6224,11 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len) | |||
6169 | * list if there are no delalloc bytes left. | 6224 | * list if there are no delalloc bytes left. |
6170 | * Also it will handle the qgroup reserved space. | 6225 | * Also it will handle the qgroup reserved space. |
6171 | */ | 6226 | */ |
6172 | void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len) | 6227 | void btrfs_delalloc_release_space(struct inode *inode, |
6228 | struct extent_changeset *reserved, u64 start, u64 len) | ||
6173 | { | 6229 | { |
6174 | btrfs_delalloc_release_metadata(BTRFS_I(inode), len); | 6230 | btrfs_delalloc_release_metadata(BTRFS_I(inode), len); |
6175 | btrfs_free_reserved_data_space(inode, start, len); | 6231 | btrfs_free_reserved_data_space(inode, reserved, start, len); |
6176 | } | 6232 | } |
6177 | 6233 | ||
6178 | static int update_block_group(struct btrfs_trans_handle *trans, | 6234 | static int update_block_group(struct btrfs_trans_handle *trans, |
@@ -6248,6 +6304,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
6248 | trace_btrfs_space_reservation(info, "pinned", | 6304 | trace_btrfs_space_reservation(info, "pinned", |
6249 | cache->space_info->flags, | 6305 | cache->space_info->flags, |
6250 | num_bytes, 1); | 6306 | num_bytes, 1); |
6307 | percpu_counter_add(&cache->space_info->total_bytes_pinned, | ||
6308 | num_bytes); | ||
6251 | set_extent_dirty(info->pinned_extents, | 6309 | set_extent_dirty(info->pinned_extents, |
6252 | bytenr, bytenr + num_bytes - 1, | 6310 | bytenr, bytenr + num_bytes - 1, |
6253 | GFP_NOFS | __GFP_NOFAIL); | 6311 | GFP_NOFS | __GFP_NOFAIL); |
@@ -6324,6 +6382,7 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info, | |||
6324 | 6382 | ||
6325 | trace_btrfs_space_reservation(fs_info, "pinned", | 6383 | trace_btrfs_space_reservation(fs_info, "pinned", |
6326 | cache->space_info->flags, num_bytes, 1); | 6384 | cache->space_info->flags, num_bytes, 1); |
6385 | percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes); | ||
6327 | set_extent_dirty(fs_info->pinned_extents, bytenr, | 6386 | set_extent_dirty(fs_info->pinned_extents, bytenr, |
6328 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | 6387 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); |
6329 | return 0; | 6388 | return 0; |
@@ -6794,27 +6853,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
6794 | return 0; | 6853 | return 0; |
6795 | } | 6854 | } |
6796 | 6855 | ||
6797 | static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes, | ||
6798 | u64 owner, u64 root_objectid) | ||
6799 | { | ||
6800 | struct btrfs_space_info *space_info; | ||
6801 | u64 flags; | ||
6802 | |||
6803 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
6804 | if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) | ||
6805 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
6806 | else | ||
6807 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
6808 | } else { | ||
6809 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
6810 | } | ||
6811 | |||
6812 | space_info = __find_space_info(fs_info, flags); | ||
6813 | BUG_ON(!space_info); /* Logic bug */ | ||
6814 | percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); | ||
6815 | } | ||
6816 | |||
6817 | |||
6818 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 6856 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
6819 | struct btrfs_fs_info *info, | 6857 | struct btrfs_fs_info *info, |
6820 | struct btrfs_delayed_ref_node *node, u64 parent, | 6858 | struct btrfs_delayed_ref_node *node, u64 parent, |
@@ -7037,8 +7075,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
7037 | goto out; | 7075 | goto out; |
7038 | } | 7076 | } |
7039 | } | 7077 | } |
7040 | add_pinned_bytes(info, -num_bytes, owner_objectid, | ||
7041 | root_objectid); | ||
7042 | } else { | 7078 | } else { |
7043 | if (found_extent) { | 7079 | if (found_extent) { |
7044 | BUG_ON(is_data && refs_to_drop != | 7080 | BUG_ON(is_data && refs_to_drop != |
@@ -7170,19 +7206,19 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
7170 | int ret; | 7206 | int ret; |
7171 | 7207 | ||
7172 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 7208 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
7173 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, | 7209 | int old_ref_mod, new_ref_mod; |
7174 | buf->start, buf->len, | 7210 | |
7175 | parent, | 7211 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start, |
7212 | buf->len, parent, | ||
7176 | root->root_key.objectid, | 7213 | root->root_key.objectid, |
7177 | btrfs_header_level(buf), | 7214 | btrfs_header_level(buf), |
7178 | BTRFS_DROP_DELAYED_REF, NULL); | 7215 | BTRFS_DROP_DELAYED_REF, NULL, |
7216 | &old_ref_mod, &new_ref_mod); | ||
7179 | BUG_ON(ret); /* -ENOMEM */ | 7217 | BUG_ON(ret); /* -ENOMEM */ |
7218 | pin = old_ref_mod >= 0 && new_ref_mod < 0; | ||
7180 | } | 7219 | } |
7181 | 7220 | ||
7182 | if (!last_ref) | 7221 | if (last_ref && btrfs_header_generation(buf) == trans->transid) { |
7183 | return; | ||
7184 | |||
7185 | if (btrfs_header_generation(buf) == trans->transid) { | ||
7186 | struct btrfs_block_group_cache *cache; | 7222 | struct btrfs_block_group_cache *cache; |
7187 | 7223 | ||
7188 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 7224 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
@@ -7191,6 +7227,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
7191 | goto out; | 7227 | goto out; |
7192 | } | 7228 | } |
7193 | 7229 | ||
7230 | pin = 0; | ||
7194 | cache = btrfs_lookup_block_group(fs_info, buf->start); | 7231 | cache = btrfs_lookup_block_group(fs_info, buf->start); |
7195 | 7232 | ||
7196 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 7233 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
@@ -7206,18 +7243,19 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
7206 | btrfs_free_reserved_bytes(cache, buf->len, 0); | 7243 | btrfs_free_reserved_bytes(cache, buf->len, 0); |
7207 | btrfs_put_block_group(cache); | 7244 | btrfs_put_block_group(cache); |
7208 | trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len); | 7245 | trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len); |
7209 | pin = 0; | ||
7210 | } | 7246 | } |
7211 | out: | 7247 | out: |
7212 | if (pin) | 7248 | if (pin) |
7213 | add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf), | 7249 | add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf), |
7214 | root->root_key.objectid); | 7250 | root->root_key.objectid); |
7215 | 7251 | ||
7216 | /* | 7252 | if (last_ref) { |
7217 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | 7253 | /* |
7218 | * anymore. | 7254 | * Deleting the buffer, clear the corrupt flag since it doesn't |
7219 | */ | 7255 | * matter anymore. |
7220 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); | 7256 | */ |
7257 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); | ||
7258 | } | ||
7221 | } | 7259 | } |
7222 | 7260 | ||
7223 | /* Can return -ENOMEM */ | 7261 | /* Can return -ENOMEM */ |
@@ -7226,12 +7264,12 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
7226 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, | 7264 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
7227 | u64 owner, u64 offset) | 7265 | u64 owner, u64 offset) |
7228 | { | 7266 | { |
7267 | int old_ref_mod, new_ref_mod; | ||
7229 | int ret; | 7268 | int ret; |
7230 | 7269 | ||
7231 | if (btrfs_is_testing(fs_info)) | 7270 | if (btrfs_is_testing(fs_info)) |
7232 | return 0; | 7271 | return 0; |
7233 | 7272 | ||
7234 | add_pinned_bytes(fs_info, num_bytes, owner, root_objectid); | ||
7235 | 7273 | ||
7236 | /* | 7274 | /* |
7237 | * tree log blocks never actually go into the extent allocation | 7275 | * tree log blocks never actually go into the extent allocation |
@@ -7241,19 +7279,25 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
7241 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); | 7279 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
7242 | /* unlocks the pinned mutex */ | 7280 | /* unlocks the pinned mutex */ |
7243 | btrfs_pin_extent(fs_info, bytenr, num_bytes, 1); | 7281 | btrfs_pin_extent(fs_info, bytenr, num_bytes, 1); |
7282 | old_ref_mod = new_ref_mod = 0; | ||
7244 | ret = 0; | 7283 | ret = 0; |
7245 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 7284 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
7246 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, | 7285 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
7247 | num_bytes, | 7286 | num_bytes, parent, |
7248 | parent, root_objectid, (int)owner, | 7287 | root_objectid, (int)owner, |
7249 | BTRFS_DROP_DELAYED_REF, NULL); | 7288 | BTRFS_DROP_DELAYED_REF, NULL, |
7289 | &old_ref_mod, &new_ref_mod); | ||
7250 | } else { | 7290 | } else { |
7251 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, | 7291 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, |
7252 | num_bytes, | 7292 | num_bytes, parent, |
7253 | parent, root_objectid, owner, | 7293 | root_objectid, owner, offset, |
7254 | offset, 0, | 7294 | 0, BTRFS_DROP_DELAYED_REF, |
7255 | BTRFS_DROP_DELAYED_REF); | 7295 | &old_ref_mod, &new_ref_mod); |
7256 | } | 7296 | } |
7297 | |||
7298 | if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) | ||
7299 | add_pinned_bytes(fs_info, num_bytes, owner, root_objectid); | ||
7300 | |||
7257 | return ret; | 7301 | return ret; |
7258 | } | 7302 | } |
7259 | 7303 | ||
@@ -7956,7 +8000,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, | |||
7956 | u64 flags; | 8000 | u64 flags; |
7957 | int ret; | 8001 | int ret; |
7958 | 8002 | ||
7959 | flags = btrfs_get_alloc_profile(root, is_data); | 8003 | flags = get_alloc_profile_by_root(root, is_data); |
7960 | again: | 8004 | again: |
7961 | WARN_ON(num_bytes < fs_info->sectorsize); | 8005 | WARN_ON(num_bytes < fs_info->sectorsize); |
7962 | ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size, | 8006 | ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size, |
@@ -8200,9 +8244,9 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
8200 | BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); | 8244 | BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); |
8201 | 8245 | ||
8202 | ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, | 8246 | ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, |
8203 | ins->offset, 0, | 8247 | ins->offset, 0, root_objectid, owner, |
8204 | root_objectid, owner, offset, | 8248 | offset, ram_bytes, |
8205 | ram_bytes, BTRFS_ADD_DELAYED_EXTENT); | 8249 | BTRFS_ADD_DELAYED_EXTENT, NULL, NULL); |
8206 | return ret; | 8250 | return ret; |
8207 | } | 8251 | } |
8208 | 8252 | ||
@@ -8422,11 +8466,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
8422 | extent_op->is_data = false; | 8466 | extent_op->is_data = false; |
8423 | extent_op->level = level; | 8467 | extent_op->level = level; |
8424 | 8468 | ||
8425 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, | 8469 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid, |
8426 | ins.objectid, ins.offset, | 8470 | ins.offset, parent, |
8427 | parent, root_objectid, level, | 8471 | root_objectid, level, |
8428 | BTRFS_ADD_DELAYED_EXTENT, | 8472 | BTRFS_ADD_DELAYED_EXTENT, |
8429 | extent_op); | 8473 | extent_op, NULL, NULL); |
8430 | if (ret) | 8474 | if (ret) |
8431 | goto out_free_delayed; | 8475 | goto out_free_delayed; |
8432 | } | 8476 | } |
@@ -10059,19 +10103,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) | |||
10059 | } | 10103 | } |
10060 | 10104 | ||
10061 | trace_btrfs_add_block_group(info, cache, 0); | 10105 | trace_btrfs_add_block_group(info, cache, 0); |
10062 | ret = update_space_info(info, cache->flags, found_key.offset, | 10106 | update_space_info(info, cache->flags, found_key.offset, |
10063 | btrfs_block_group_used(&cache->item), | 10107 | btrfs_block_group_used(&cache->item), |
10064 | cache->bytes_super, &space_info); | 10108 | cache->bytes_super, &space_info); |
10065 | if (ret) { | ||
10066 | btrfs_remove_free_space_cache(cache); | ||
10067 | spin_lock(&info->block_group_cache_lock); | ||
10068 | rb_erase(&cache->cache_node, | ||
10069 | &info->block_group_cache_tree); | ||
10070 | RB_CLEAR_NODE(&cache->cache_node); | ||
10071 | spin_unlock(&info->block_group_cache_lock); | ||
10072 | btrfs_put_block_group(cache); | ||
10073 | goto error; | ||
10074 | } | ||
10075 | 10109 | ||
10076 | cache->space_info = space_info; | 10110 | cache->space_info = space_info; |
10077 | 10111 | ||
@@ -10203,16 +10237,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
10203 | } | 10237 | } |
10204 | #endif | 10238 | #endif |
10205 | /* | 10239 | /* |
10206 | * Call to ensure the corresponding space_info object is created and | 10240 | * Ensure the corresponding space_info object is created and |
10207 | * assigned to our block group, but don't update its counters just yet. | 10241 | * assigned to our block group. We want our bg to be added to the rbtree |
10208 | * We want our bg to be added to the rbtree with its ->space_info set. | 10242 | * with its ->space_info set. |
10209 | */ | 10243 | */ |
10210 | ret = update_space_info(fs_info, cache->flags, 0, 0, 0, | 10244 | cache->space_info = __find_space_info(fs_info, cache->flags); |
10211 | &cache->space_info); | 10245 | if (!cache->space_info) { |
10212 | if (ret) { | 10246 | ret = create_space_info(fs_info, cache->flags, |
10213 | btrfs_remove_free_space_cache(cache); | 10247 | &cache->space_info); |
10214 | btrfs_put_block_group(cache); | 10248 | if (ret) { |
10215 | return ret; | 10249 | btrfs_remove_free_space_cache(cache); |
10250 | btrfs_put_block_group(cache); | ||
10251 | return ret; | ||
10252 | } | ||
10216 | } | 10253 | } |
10217 | 10254 | ||
10218 | ret = btrfs_add_block_group_cache(fs_info, cache); | 10255 | ret = btrfs_add_block_group_cache(fs_info, cache); |
@@ -10227,18 +10264,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
10227 | * the rbtree, update the space info's counters. | 10264 | * the rbtree, update the space info's counters. |
10228 | */ | 10265 | */ |
10229 | trace_btrfs_add_block_group(fs_info, cache, 1); | 10266 | trace_btrfs_add_block_group(fs_info, cache, 1); |
10230 | ret = update_space_info(fs_info, cache->flags, size, bytes_used, | 10267 | update_space_info(fs_info, cache->flags, size, bytes_used, |
10231 | cache->bytes_super, &cache->space_info); | 10268 | cache->bytes_super, &cache->space_info); |
10232 | if (ret) { | ||
10233 | btrfs_remove_free_space_cache(cache); | ||
10234 | spin_lock(&fs_info->block_group_cache_lock); | ||
10235 | rb_erase(&cache->cache_node, | ||
10236 | &fs_info->block_group_cache_tree); | ||
10237 | RB_CLEAR_NODE(&cache->cache_node); | ||
10238 | spin_unlock(&fs_info->block_group_cache_lock); | ||
10239 | btrfs_put_block_group(cache); | ||
10240 | return ret; | ||
10241 | } | ||
10242 | update_global_block_rsv(fs_info); | 10269 | update_global_block_rsv(fs_info); |
10243 | 10270 | ||
10244 | __link_block_group(cache->space_info, cache); | 10271 | __link_block_group(cache->space_info, cache); |
@@ -10786,21 +10813,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | |||
10786 | mixed = 1; | 10813 | mixed = 1; |
10787 | 10814 | ||
10788 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | 10815 | flags = BTRFS_BLOCK_GROUP_SYSTEM; |
10789 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); | 10816 | ret = create_space_info(fs_info, flags, &space_info); |
10790 | if (ret) | 10817 | if (ret) |
10791 | goto out; | 10818 | goto out; |
10792 | 10819 | ||
10793 | if (mixed) { | 10820 | if (mixed) { |
10794 | flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; | 10821 | flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; |
10795 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); | 10822 | ret = create_space_info(fs_info, flags, &space_info); |
10796 | } else { | 10823 | } else { |
10797 | flags = BTRFS_BLOCK_GROUP_METADATA; | 10824 | flags = BTRFS_BLOCK_GROUP_METADATA; |
10798 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); | 10825 | ret = create_space_info(fs_info, flags, &space_info); |
10799 | if (ret) | 10826 | if (ret) |
10800 | goto out; | 10827 | goto out; |
10801 | 10828 | ||
10802 | flags = BTRFS_BLOCK_GROUP_DATA; | 10829 | flags = BTRFS_BLOCK_GROUP_DATA; |
10803 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); | 10830 | ret = create_space_info(fs_info, flags, &space_info); |
10804 | } | 10831 | } |
10805 | out: | 10832 | out: |
10806 | return ret; | 10833 | return ret; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d1cd60140817..7a18b5762ac9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -87,19 +87,9 @@ void btrfs_leak_debug_check(void) | |||
87 | static inline void __btrfs_debug_check_extent_io_range(const char *caller, | 87 | static inline void __btrfs_debug_check_extent_io_range(const char *caller, |
88 | struct extent_io_tree *tree, u64 start, u64 end) | 88 | struct extent_io_tree *tree, u64 start, u64 end) |
89 | { | 89 | { |
90 | struct inode *inode; | 90 | if (tree->ops && tree->ops->check_extent_io_range) |
91 | u64 isize; | 91 | tree->ops->check_extent_io_range(tree->private_data, caller, |
92 | 92 | start, end); | |
93 | if (!tree->mapping) | ||
94 | return; | ||
95 | |||
96 | inode = tree->mapping->host; | ||
97 | isize = i_size_read(inode); | ||
98 | if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { | ||
99 | btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, | ||
100 | "%s: ino %llu isize %llu odd range [%llu,%llu]", | ||
101 | caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); | ||
102 | } | ||
103 | } | 93 | } |
104 | #else | 94 | #else |
105 | #define btrfs_leak_debug_add(new, head) do {} while (0) | 95 | #define btrfs_leak_debug_add(new, head) do {} while (0) |
@@ -154,9 +144,9 @@ static noinline void flush_write_bio(void *data); | |||
154 | static inline struct btrfs_fs_info * | 144 | static inline struct btrfs_fs_info * |
155 | tree_fs_info(struct extent_io_tree *tree) | 145 | tree_fs_info(struct extent_io_tree *tree) |
156 | { | 146 | { |
157 | if (!tree->mapping) | 147 | if (tree->ops) |
158 | return NULL; | 148 | return tree->ops->tree_fs_info(tree->private_data); |
159 | return btrfs_sb(tree->mapping->host->i_sb); | 149 | return NULL; |
160 | } | 150 | } |
161 | 151 | ||
162 | int __init extent_io_init(void) | 152 | int __init extent_io_init(void) |
@@ -214,13 +204,13 @@ void extent_io_exit(void) | |||
214 | } | 204 | } |
215 | 205 | ||
216 | void extent_io_tree_init(struct extent_io_tree *tree, | 206 | void extent_io_tree_init(struct extent_io_tree *tree, |
217 | struct address_space *mapping) | 207 | void *private_data) |
218 | { | 208 | { |
219 | tree->state = RB_ROOT; | 209 | tree->state = RB_ROOT; |
220 | tree->ops = NULL; | 210 | tree->ops = NULL; |
221 | tree->dirty_bytes = 0; | 211 | tree->dirty_bytes = 0; |
222 | spin_lock_init(&tree->lock); | 212 | spin_lock_init(&tree->lock); |
223 | tree->mapping = mapping; | 213 | tree->private_data = private_data; |
224 | } | 214 | } |
225 | 215 | ||
226 | static struct extent_state *alloc_extent_state(gfp_t mask) | 216 | static struct extent_state *alloc_extent_state(gfp_t mask) |
@@ -370,8 +360,7 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | |||
370 | struct extent_state *other) | 360 | struct extent_state *other) |
371 | { | 361 | { |
372 | if (tree->ops && tree->ops->merge_extent_hook) | 362 | if (tree->ops && tree->ops->merge_extent_hook) |
373 | tree->ops->merge_extent_hook(tree->mapping->host, new, | 363 | tree->ops->merge_extent_hook(tree->private_data, new, other); |
374 | other); | ||
375 | } | 364 | } |
376 | 365 | ||
377 | /* | 366 | /* |
@@ -422,15 +411,14 @@ static void set_state_cb(struct extent_io_tree *tree, | |||
422 | struct extent_state *state, unsigned *bits) | 411 | struct extent_state *state, unsigned *bits) |
423 | { | 412 | { |
424 | if (tree->ops && tree->ops->set_bit_hook) | 413 | if (tree->ops && tree->ops->set_bit_hook) |
425 | tree->ops->set_bit_hook(tree->mapping->host, state, bits); | 414 | tree->ops->set_bit_hook(tree->private_data, state, bits); |
426 | } | 415 | } |
427 | 416 | ||
428 | static void clear_state_cb(struct extent_io_tree *tree, | 417 | static void clear_state_cb(struct extent_io_tree *tree, |
429 | struct extent_state *state, unsigned *bits) | 418 | struct extent_state *state, unsigned *bits) |
430 | { | 419 | { |
431 | if (tree->ops && tree->ops->clear_bit_hook) | 420 | if (tree->ops && tree->ops->clear_bit_hook) |
432 | tree->ops->clear_bit_hook(BTRFS_I(tree->mapping->host), | 421 | tree->ops->clear_bit_hook(tree->private_data, state, bits); |
433 | state, bits); | ||
434 | } | 422 | } |
435 | 423 | ||
436 | static void set_state_bits(struct extent_io_tree *tree, | 424 | static void set_state_bits(struct extent_io_tree *tree, |
@@ -479,7 +467,7 @@ static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, | |||
479 | u64 split) | 467 | u64 split) |
480 | { | 468 | { |
481 | if (tree->ops && tree->ops->split_extent_hook) | 469 | if (tree->ops && tree->ops->split_extent_hook) |
482 | tree->ops->split_extent_hook(tree->mapping->host, orig, split); | 470 | tree->ops->split_extent_hook(tree->private_data, orig, split); |
483 | } | 471 | } |
484 | 472 | ||
485 | /* | 473 | /* |
@@ -1403,17 +1391,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) | |||
1403 | */ | 1391 | */ |
1404 | static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 1392 | static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
1405 | { | 1393 | { |
1406 | unsigned long index = start >> PAGE_SHIFT; | 1394 | tree->ops->set_range_writeback(tree->private_data, start, end); |
1407 | unsigned long end_index = end >> PAGE_SHIFT; | ||
1408 | struct page *page; | ||
1409 | |||
1410 | while (index <= end_index) { | ||
1411 | page = find_get_page(tree->mapping, index); | ||
1412 | BUG_ON(!page); /* Pages should be in the extent_io_tree */ | ||
1413 | set_page_writeback(page); | ||
1414 | put_page(page); | ||
1415 | index++; | ||
1416 | } | ||
1417 | } | 1395 | } |
1418 | 1396 | ||
1419 | /* find the first state struct with 'bits' set after 'start', and | 1397 | /* find the first state struct with 'bits' set after 'start', and |
@@ -1962,11 +1940,12 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) | |||
1962 | SetPageUptodate(page); | 1940 | SetPageUptodate(page); |
1963 | } | 1941 | } |
1964 | 1942 | ||
1965 | int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec) | 1943 | int free_io_failure(struct extent_io_tree *failure_tree, |
1944 | struct extent_io_tree *io_tree, | ||
1945 | struct io_failure_record *rec) | ||
1966 | { | 1946 | { |
1967 | int ret; | 1947 | int ret; |
1968 | int err = 0; | 1948 | int err = 0; |
1969 | struct extent_io_tree *failure_tree = &inode->io_failure_tree; | ||
1970 | 1949 | ||
1971 | set_state_failrec(failure_tree, rec->start, NULL); | 1950 | set_state_failrec(failure_tree, rec->start, NULL); |
1972 | ret = clear_extent_bits(failure_tree, rec->start, | 1951 | ret = clear_extent_bits(failure_tree, rec->start, |
@@ -1975,7 +1954,7 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec) | |||
1975 | if (ret) | 1954 | if (ret) |
1976 | err = ret; | 1955 | err = ret; |
1977 | 1956 | ||
1978 | ret = clear_extent_bits(&inode->io_tree, rec->start, | 1957 | ret = clear_extent_bits(io_tree, rec->start, |
1979 | rec->start + rec->len - 1, | 1958 | rec->start + rec->len - 1, |
1980 | EXTENT_DAMAGED); | 1959 | EXTENT_DAMAGED); |
1981 | if (ret && !err) | 1960 | if (ret && !err) |
@@ -1995,11 +1974,10 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec) | |||
1995 | * currently, there can be no more than two copies of every data bit. thus, | 1974 | * currently, there can be no more than two copies of every data bit. thus, |
1996 | * exactly one rewrite is required. | 1975 | * exactly one rewrite is required. |
1997 | */ | 1976 | */ |
1998 | int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, | 1977 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, |
1999 | u64 logical, struct page *page, | 1978 | u64 length, u64 logical, struct page *page, |
2000 | unsigned int pg_offset, int mirror_num) | 1979 | unsigned int pg_offset, int mirror_num) |
2001 | { | 1980 | { |
2002 | struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||
2003 | struct bio *bio; | 1981 | struct bio *bio; |
2004 | struct btrfs_device *dev; | 1982 | struct btrfs_device *dev; |
2005 | u64 map_length = 0; | 1983 | u64 map_length = 0; |
@@ -2010,9 +1988,7 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, | |||
2010 | ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); | 1988 | ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); |
2011 | BUG_ON(!mirror_num); | 1989 | BUG_ON(!mirror_num); |
2012 | 1990 | ||
2013 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 1991 | bio = btrfs_io_bio_alloc(1); |
2014 | if (!bio) | ||
2015 | return -EIO; | ||
2016 | bio->bi_iter.bi_size = 0; | 1992 | bio->bi_iter.bi_size = 0; |
2017 | map_length = length; | 1993 | map_length = length; |
2018 | 1994 | ||
@@ -2071,7 +2047,7 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, | |||
2071 | 2047 | ||
2072 | btrfs_info_rl_in_rcu(fs_info, | 2048 | btrfs_info_rl_in_rcu(fs_info, |
2073 | "read error corrected: ino %llu off %llu (dev %s sector %llu)", | 2049 | "read error corrected: ino %llu off %llu (dev %s sector %llu)", |
2074 | btrfs_ino(inode), start, | 2050 | ino, start, |
2075 | rcu_str_deref(dev->name), sector); | 2051 | rcu_str_deref(dev->name), sector); |
2076 | btrfs_bio_counter_dec(fs_info); | 2052 | btrfs_bio_counter_dec(fs_info); |
2077 | bio_put(bio); | 2053 | bio_put(bio); |
@@ -2091,8 +2067,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info, | |||
2091 | for (i = 0; i < num_pages; i++) { | 2067 | for (i = 0; i < num_pages; i++) { |
2092 | struct page *p = eb->pages[i]; | 2068 | struct page *p = eb->pages[i]; |
2093 | 2069 | ||
2094 | ret = repair_io_failure(BTRFS_I(fs_info->btree_inode), start, | 2070 | ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p, |
2095 | PAGE_SIZE, start, p, | ||
2096 | start - page_offset(p), mirror_num); | 2071 | start - page_offset(p), mirror_num); |
2097 | if (ret) | 2072 | if (ret) |
2098 | break; | 2073 | break; |
@@ -2106,24 +2081,24 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info, | |||
2106 | * each time an IO finishes, we do a fast check in the IO failure tree | 2081 | * each time an IO finishes, we do a fast check in the IO failure tree |
2107 | * to see if we need to process or clean up an io_failure_record | 2082 | * to see if we need to process or clean up an io_failure_record |
2108 | */ | 2083 | */ |
2109 | int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page, | 2084 | int clean_io_failure(struct btrfs_fs_info *fs_info, |
2110 | unsigned int pg_offset) | 2085 | struct extent_io_tree *failure_tree, |
2086 | struct extent_io_tree *io_tree, u64 start, | ||
2087 | struct page *page, u64 ino, unsigned int pg_offset) | ||
2111 | { | 2088 | { |
2112 | u64 private; | 2089 | u64 private; |
2113 | struct io_failure_record *failrec; | 2090 | struct io_failure_record *failrec; |
2114 | struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||
2115 | struct extent_state *state; | 2091 | struct extent_state *state; |
2116 | int num_copies; | 2092 | int num_copies; |
2117 | int ret; | 2093 | int ret; |
2118 | 2094 | ||
2119 | private = 0; | 2095 | private = 0; |
2120 | ret = count_range_bits(&inode->io_failure_tree, &private, | 2096 | ret = count_range_bits(failure_tree, &private, (u64)-1, 1, |
2121 | (u64)-1, 1, EXTENT_DIRTY, 0); | 2097 | EXTENT_DIRTY, 0); |
2122 | if (!ret) | 2098 | if (!ret) |
2123 | return 0; | 2099 | return 0; |
2124 | 2100 | ||
2125 | ret = get_state_failrec(&inode->io_failure_tree, start, | 2101 | ret = get_state_failrec(failure_tree, start, &failrec); |
2126 | &failrec); | ||
2127 | if (ret) | 2102 | if (ret) |
2128 | return 0; | 2103 | return 0; |
2129 | 2104 | ||
@@ -2139,25 +2114,25 @@ int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page, | |||
2139 | if (fs_info->sb->s_flags & MS_RDONLY) | 2114 | if (fs_info->sb->s_flags & MS_RDONLY) |
2140 | goto out; | 2115 | goto out; |
2141 | 2116 | ||
2142 | spin_lock(&inode->io_tree.lock); | 2117 | spin_lock(&io_tree->lock); |
2143 | state = find_first_extent_bit_state(&inode->io_tree, | 2118 | state = find_first_extent_bit_state(io_tree, |
2144 | failrec->start, | 2119 | failrec->start, |
2145 | EXTENT_LOCKED); | 2120 | EXTENT_LOCKED); |
2146 | spin_unlock(&inode->io_tree.lock); | 2121 | spin_unlock(&io_tree->lock); |
2147 | 2122 | ||
2148 | if (state && state->start <= failrec->start && | 2123 | if (state && state->start <= failrec->start && |
2149 | state->end >= failrec->start + failrec->len - 1) { | 2124 | state->end >= failrec->start + failrec->len - 1) { |
2150 | num_copies = btrfs_num_copies(fs_info, failrec->logical, | 2125 | num_copies = btrfs_num_copies(fs_info, failrec->logical, |
2151 | failrec->len); | 2126 | failrec->len); |
2152 | if (num_copies > 1) { | 2127 | if (num_copies > 1) { |
2153 | repair_io_failure(inode, start, failrec->len, | 2128 | repair_io_failure(fs_info, ino, start, failrec->len, |
2154 | failrec->logical, page, | 2129 | failrec->logical, page, pg_offset, |
2155 | pg_offset, failrec->failed_mirror); | 2130 | failrec->failed_mirror); |
2156 | } | 2131 | } |
2157 | } | 2132 | } |
2158 | 2133 | ||
2159 | out: | 2134 | out: |
2160 | free_io_failure(inode, failrec); | 2135 | free_io_failure(failure_tree, io_tree, failrec); |
2161 | 2136 | ||
2162 | return 0; | 2137 | return 0; |
2163 | } | 2138 | } |
@@ -2357,10 +2332,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, | |||
2357 | struct btrfs_io_bio *btrfs_failed_bio; | 2332 | struct btrfs_io_bio *btrfs_failed_bio; |
2358 | struct btrfs_io_bio *btrfs_bio; | 2333 | struct btrfs_io_bio *btrfs_bio; |
2359 | 2334 | ||
2360 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 2335 | bio = btrfs_io_bio_alloc(1); |
2361 | if (!bio) | ||
2362 | return NULL; | ||
2363 | |||
2364 | bio->bi_end_io = endio_func; | 2336 | bio->bi_end_io = endio_func; |
2365 | bio->bi_iter.bi_sector = failrec->logical >> 9; | 2337 | bio->bi_iter.bi_sector = failrec->logical >> 9; |
2366 | bio->bi_bdev = fs_info->fs_devices->latest_bdev; | 2338 | bio->bi_bdev = fs_info->fs_devices->latest_bdev; |
@@ -2398,6 +2370,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2398 | struct io_failure_record *failrec; | 2370 | struct io_failure_record *failrec; |
2399 | struct inode *inode = page->mapping->host; | 2371 | struct inode *inode = page->mapping->host; |
2400 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | 2372 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
2373 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
2401 | struct bio *bio; | 2374 | struct bio *bio; |
2402 | int read_mode = 0; | 2375 | int read_mode = 0; |
2403 | blk_status_t status; | 2376 | blk_status_t status; |
@@ -2411,7 +2384,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2411 | 2384 | ||
2412 | ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror); | 2385 | ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror); |
2413 | if (!ret) { | 2386 | if (!ret) { |
2414 | free_io_failure(BTRFS_I(inode), failrec); | 2387 | free_io_failure(failure_tree, tree, failrec); |
2415 | return -EIO; | 2388 | return -EIO; |
2416 | } | 2389 | } |
2417 | 2390 | ||
@@ -2424,7 +2397,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2424 | (int)phy_offset, failed_bio->bi_end_io, | 2397 | (int)phy_offset, failed_bio->bi_end_io, |
2425 | NULL); | 2398 | NULL); |
2426 | if (!bio) { | 2399 | if (!bio) { |
2427 | free_io_failure(BTRFS_I(inode), failrec); | 2400 | free_io_failure(failure_tree, tree, failrec); |
2428 | return -EIO; | 2401 | return -EIO; |
2429 | } | 2402 | } |
2430 | bio_set_op_attrs(bio, REQ_OP_READ, read_mode); | 2403 | bio_set_op_attrs(bio, REQ_OP_READ, read_mode); |
@@ -2433,10 +2406,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2433 | "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", | 2406 | "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", |
2434 | read_mode, failrec->this_mirror, failrec->in_validation); | 2407 | read_mode, failrec->this_mirror, failrec->in_validation); |
2435 | 2408 | ||
2436 | status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, | 2409 | status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror, |
2437 | failrec->bio_flags, 0); | 2410 | failrec->bio_flags, 0); |
2438 | if (status) { | 2411 | if (status) { |
2439 | free_io_failure(BTRFS_I(inode), failrec); | 2412 | free_io_failure(failure_tree, tree, failrec); |
2440 | bio_put(bio); | 2413 | bio_put(bio); |
2441 | ret = blk_status_to_errno(status); | 2414 | ret = blk_status_to_errno(status); |
2442 | } | 2415 | } |
@@ -2542,7 +2515,7 @@ static void end_bio_extent_readpage(struct bio *bio) | |||
2542 | struct bio_vec *bvec; | 2515 | struct bio_vec *bvec; |
2543 | int uptodate = !bio->bi_status; | 2516 | int uptodate = !bio->bi_status; |
2544 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | 2517 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
2545 | struct extent_io_tree *tree; | 2518 | struct extent_io_tree *tree, *failure_tree; |
2546 | u64 offset = 0; | 2519 | u64 offset = 0; |
2547 | u64 start; | 2520 | u64 start; |
2548 | u64 end; | 2521 | u64 end; |
@@ -2563,6 +2536,7 @@ static void end_bio_extent_readpage(struct bio *bio) | |||
2563 | (u64)bio->bi_iter.bi_sector, bio->bi_status, | 2536 | (u64)bio->bi_iter.bi_sector, bio->bi_status, |
2564 | io_bio->mirror_num); | 2537 | io_bio->mirror_num); |
2565 | tree = &BTRFS_I(inode)->io_tree; | 2538 | tree = &BTRFS_I(inode)->io_tree; |
2539 | failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
2566 | 2540 | ||
2567 | /* We always issue full-page reads, but if some block | 2541 | /* We always issue full-page reads, but if some block |
2568 | * in a page fails to read, blk_update_request() will | 2542 | * in a page fails to read, blk_update_request() will |
@@ -2592,8 +2566,10 @@ static void end_bio_extent_readpage(struct bio *bio) | |||
2592 | if (ret) | 2566 | if (ret) |
2593 | uptodate = 0; | 2567 | uptodate = 0; |
2594 | else | 2568 | else |
2595 | clean_io_failure(BTRFS_I(inode), start, | 2569 | clean_io_failure(BTRFS_I(inode)->root->fs_info, |
2596 | page, 0); | 2570 | failure_tree, tree, start, |
2571 | page, | ||
2572 | btrfs_ino(BTRFS_I(inode)), 0); | ||
2597 | } | 2573 | } |
2598 | 2574 | ||
2599 | if (likely(uptodate)) | 2575 | if (likely(uptodate)) |
@@ -2682,67 +2658,70 @@ readpage_ok: | |||
2682 | } | 2658 | } |
2683 | 2659 | ||
2684 | /* | 2660 | /* |
2685 | * this allocates from the btrfs_bioset. We're returning a bio right now | 2661 | * Initialize the members up to but not including 'bio'. Use after allocating a |
2686 | * but you can call btrfs_io_bio for the appropriate container_of magic | 2662 | * new bio by bio_alloc_bioset as it does not initialize the bytes outside of |
2663 | * 'bio' because use of __GFP_ZERO is not supported. | ||
2687 | */ | 2664 | */ |
2688 | struct bio * | 2665 | static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) |
2689 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | ||
2690 | gfp_t gfp_flags) | ||
2691 | { | 2666 | { |
2692 | struct btrfs_io_bio *btrfs_bio; | 2667 | memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio)); |
2693 | struct bio *bio; | 2668 | } |
2694 | |||
2695 | bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); | ||
2696 | 2669 | ||
2697 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | 2670 | /* |
2698 | while (!bio && (nr_vecs /= 2)) { | 2671 | * The following helpers allocate a bio. As it's backed by a bioset, it'll |
2699 | bio = bio_alloc_bioset(gfp_flags, | 2672 | * never fail. We're returning a bio right now but you can call btrfs_io_bio |
2700 | nr_vecs, btrfs_bioset); | 2673 | * for the appropriate container_of magic |
2701 | } | 2674 | */ |
2702 | } | 2675 | struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte) |
2676 | { | ||
2677 | struct bio *bio; | ||
2703 | 2678 | ||
2704 | if (bio) { | 2679 | bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset); |
2705 | bio->bi_bdev = bdev; | 2680 | bio->bi_bdev = bdev; |
2706 | bio->bi_iter.bi_sector = first_sector; | 2681 | bio->bi_iter.bi_sector = first_byte >> 9; |
2707 | btrfs_bio = btrfs_io_bio(bio); | 2682 | btrfs_io_bio_init(btrfs_io_bio(bio)); |
2708 | btrfs_bio->csum = NULL; | ||
2709 | btrfs_bio->csum_allocated = NULL; | ||
2710 | btrfs_bio->end_io = NULL; | ||
2711 | } | ||
2712 | return bio; | 2683 | return bio; |
2713 | } | 2684 | } |
2714 | 2685 | ||
2715 | struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) | 2686 | struct bio *btrfs_bio_clone(struct bio *bio) |
2716 | { | 2687 | { |
2717 | struct btrfs_io_bio *btrfs_bio; | 2688 | struct btrfs_io_bio *btrfs_bio; |
2718 | struct bio *new; | 2689 | struct bio *new; |
2719 | 2690 | ||
2720 | new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset); | 2691 | /* Bio allocation backed by a bioset does not fail */ |
2721 | if (new) { | 2692 | new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset); |
2722 | btrfs_bio = btrfs_io_bio(new); | 2693 | btrfs_bio = btrfs_io_bio(new); |
2723 | btrfs_bio->csum = NULL; | 2694 | btrfs_io_bio_init(btrfs_bio); |
2724 | btrfs_bio->csum_allocated = NULL; | 2695 | btrfs_bio->iter = bio->bi_iter; |
2725 | btrfs_bio->end_io = NULL; | ||
2726 | } | ||
2727 | return new; | 2696 | return new; |
2728 | } | 2697 | } |
2729 | 2698 | ||
2730 | /* this also allocates from the btrfs_bioset */ | 2699 | struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) |
2731 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
2732 | { | 2700 | { |
2733 | struct btrfs_io_bio *btrfs_bio; | ||
2734 | struct bio *bio; | 2701 | struct bio *bio; |
2735 | 2702 | ||
2736 | bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); | 2703 | /* Bio allocation backed by a bioset does not fail */ |
2737 | if (bio) { | 2704 | bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset); |
2738 | btrfs_bio = btrfs_io_bio(bio); | 2705 | btrfs_io_bio_init(btrfs_io_bio(bio)); |
2739 | btrfs_bio->csum = NULL; | ||
2740 | btrfs_bio->csum_allocated = NULL; | ||
2741 | btrfs_bio->end_io = NULL; | ||
2742 | } | ||
2743 | return bio; | 2706 | return bio; |
2744 | } | 2707 | } |
2745 | 2708 | ||
2709 | struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size) | ||
2710 | { | ||
2711 | struct bio *bio; | ||
2712 | struct btrfs_io_bio *btrfs_bio; | ||
2713 | |||
2714 | /* this will never fail when it's backed by a bioset */ | ||
2715 | bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset); | ||
2716 | ASSERT(bio); | ||
2717 | |||
2718 | btrfs_bio = btrfs_io_bio(bio); | ||
2719 | btrfs_io_bio_init(btrfs_bio); | ||
2720 | |||
2721 | bio_trim(bio, offset >> 9, size >> 9); | ||
2722 | btrfs_bio->iter = bio->bi_iter; | ||
2723 | return bio; | ||
2724 | } | ||
2746 | 2725 | ||
2747 | static int __must_check submit_one_bio(struct bio *bio, int mirror_num, | 2726 | static int __must_check submit_one_bio(struct bio *bio, int mirror_num, |
2748 | unsigned long bio_flags) | 2727 | unsigned long bio_flags) |
@@ -2759,7 +2738,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, | |||
2759 | bio_get(bio); | 2738 | bio_get(bio); |
2760 | 2739 | ||
2761 | if (tree->ops) | 2740 | if (tree->ops) |
2762 | ret = tree->ops->submit_bio_hook(page->mapping->host, bio, | 2741 | ret = tree->ops->submit_bio_hook(tree->private_data, bio, |
2763 | mirror_num, bio_flags, start); | 2742 | mirror_num, bio_flags, start); |
2764 | else | 2743 | else |
2765 | btrfsic_submit_bio(bio); | 2744 | btrfsic_submit_bio(bio); |
@@ -2822,11 +2801,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree, | |||
2822 | } | 2801 | } |
2823 | } | 2802 | } |
2824 | 2803 | ||
2825 | bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES, | 2804 | bio = btrfs_bio_alloc(bdev, sector << 9); |
2826 | GFP_NOFS | __GFP_HIGH); | ||
2827 | if (!bio) | ||
2828 | return -ENOMEM; | ||
2829 | |||
2830 | bio_add_page(bio, page, page_size, offset); | 2805 | bio_add_page(bio, page, page_size, offset); |
2831 | bio->bi_end_io = end_io_func; | 2806 | bio->bi_end_io = end_io_func; |
2832 | bio->bi_private = tree; | 2807 | bio->bi_private = tree; |
@@ -3762,7 +3737,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3762 | * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 | 3737 | * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 |
3763 | */ | 3738 | */ |
3764 | start = btrfs_item_nr_offset(nritems); | 3739 | start = btrfs_item_nr_offset(nritems); |
3765 | end = btrfs_leaf_data(eb) + leaf_data_end(fs_info, eb); | 3740 | end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb); |
3766 | memzero_extent_buffer(eb, start, end - start); | 3741 | memzero_extent_buffer(eb, start, end - start); |
3767 | } | 3742 | } |
3768 | 3743 | ||
@@ -4468,29 +4443,25 @@ try_submit_last: | |||
4468 | } | 4443 | } |
4469 | 4444 | ||
4470 | /* | 4445 | /* |
4471 | * Sanity check for fiemap cache | 4446 | * Emit last fiemap cache |
4472 | * | 4447 | * |
4473 | * All fiemap cache should be submitted by emit_fiemap_extent() | 4448 | * The last fiemap cache may still be cached in the following case: |
4474 | * Iteration should be terminated either by last fiemap extent or | 4449 | * 0 4k 8k |
4475 | * fieinfo->fi_extents_max. | 4450 | * |<- Fiemap range ->| |
4476 | * So no cached fiemap should exist. | 4451 | * |<------------ First extent ----------->| |
4452 | * | ||
4453 | * In this case, the first extent range will be cached but not emitted. | ||
4454 | * So we must emit it before ending extent_fiemap(). | ||
4477 | */ | 4455 | */ |
4478 | static int check_fiemap_cache(struct btrfs_fs_info *fs_info, | 4456 | static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info, |
4479 | struct fiemap_extent_info *fieinfo, | 4457 | struct fiemap_extent_info *fieinfo, |
4480 | struct fiemap_cache *cache) | 4458 | struct fiemap_cache *cache) |
4481 | { | 4459 | { |
4482 | int ret; | 4460 | int ret; |
4483 | 4461 | ||
4484 | if (!cache->cached) | 4462 | if (!cache->cached) |
4485 | return 0; | 4463 | return 0; |
4486 | 4464 | ||
4487 | /* Small and recoverbale problem, only to info developer */ | ||
4488 | #ifdef CONFIG_BTRFS_DEBUG | ||
4489 | WARN_ON(1); | ||
4490 | #endif | ||
4491 | btrfs_warn(fs_info, | ||
4492 | "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x", | ||
4493 | cache->offset, cache->phys, cache->len, cache->flags); | ||
4494 | ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, | 4465 | ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, |
4495 | cache->len, cache->flags); | 4466 | cache->len, cache->flags); |
4496 | cache->cached = false; | 4467 | cache->cached = false; |
@@ -4706,7 +4677,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4706 | } | 4677 | } |
4707 | out_free: | 4678 | out_free: |
4708 | if (!ret) | 4679 | if (!ret) |
4709 | ret = check_fiemap_cache(root->fs_info, fieinfo, &cache); | 4680 | ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache); |
4710 | free_extent_map(em); | 4681 | free_extent_map(em); |
4711 | out: | 4682 | out: |
4712 | btrfs_free_path(path); | 4683 | btrfs_free_path(path); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 487ca0207cb6..3fb8513bf02e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -92,9 +92,9 @@ struct btrfs_inode; | |||
92 | struct btrfs_io_bio; | 92 | struct btrfs_io_bio; |
93 | struct io_failure_record; | 93 | struct io_failure_record; |
94 | 94 | ||
95 | typedef blk_status_t (extent_submit_bio_hook_t)(struct inode *inode, | 95 | typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio, |
96 | struct bio *bio, int mirror_num, unsigned long bio_flags, | 96 | int mirror_num, unsigned long bio_flags, |
97 | u64 bio_offset); | 97 | u64 bio_offset); |
98 | struct extent_io_ops { | 98 | struct extent_io_ops { |
99 | /* | 99 | /* |
100 | * The following callbacks must be allways defined, the function | 100 | * The following callbacks must be allways defined, the function |
@@ -108,32 +108,36 @@ struct extent_io_ops { | |||
108 | size_t size, struct bio *bio, | 108 | size_t size, struct bio *bio, |
109 | unsigned long bio_flags); | 109 | unsigned long bio_flags); |
110 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); | 110 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); |
111 | struct btrfs_fs_info *(*tree_fs_info)(void *private_data); | ||
112 | void (*set_range_writeback)(void *private_data, u64 start, u64 end); | ||
111 | 113 | ||
112 | /* | 114 | /* |
113 | * Optional hooks, called if the pointer is not NULL | 115 | * Optional hooks, called if the pointer is not NULL |
114 | */ | 116 | */ |
115 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, | 117 | int (*fill_delalloc)(void *private_data, struct page *locked_page, |
116 | u64 start, u64 end, int *page_started, | 118 | u64 start, u64 end, int *page_started, |
117 | unsigned long *nr_written); | 119 | unsigned long *nr_written); |
118 | 120 | ||
119 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); | 121 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); |
120 | void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 122 | void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
121 | struct extent_state *state, int uptodate); | 123 | struct extent_state *state, int uptodate); |
122 | void (*set_bit_hook)(struct inode *inode, struct extent_state *state, | 124 | void (*set_bit_hook)(void *private_data, struct extent_state *state, |
123 | unsigned *bits); | 125 | unsigned *bits); |
124 | void (*clear_bit_hook)(struct btrfs_inode *inode, | 126 | void (*clear_bit_hook)(void *private_data, |
125 | struct extent_state *state, | 127 | struct extent_state *state, |
126 | unsigned *bits); | 128 | unsigned *bits); |
127 | void (*merge_extent_hook)(struct inode *inode, | 129 | void (*merge_extent_hook)(void *private_data, |
128 | struct extent_state *new, | 130 | struct extent_state *new, |
129 | struct extent_state *other); | 131 | struct extent_state *other); |
130 | void (*split_extent_hook)(struct inode *inode, | 132 | void (*split_extent_hook)(void *private_data, |
131 | struct extent_state *orig, u64 split); | 133 | struct extent_state *orig, u64 split); |
134 | void (*check_extent_io_range)(void *private_data, const char *caller, | ||
135 | u64 start, u64 end); | ||
132 | }; | 136 | }; |
133 | 137 | ||
134 | struct extent_io_tree { | 138 | struct extent_io_tree { |
135 | struct rb_root state; | 139 | struct rb_root state; |
136 | struct address_space *mapping; | 140 | void *private_data; |
137 | u64 dirty_bytes; | 141 | u64 dirty_bytes; |
138 | int track_uptodate; | 142 | int track_uptodate; |
139 | spinlock_t lock; | 143 | spinlock_t lock; |
@@ -205,12 +209,46 @@ struct extent_buffer { | |||
205 | */ | 209 | */ |
206 | struct extent_changeset { | 210 | struct extent_changeset { |
207 | /* How many bytes are set/cleared in this operation */ | 211 | /* How many bytes are set/cleared in this operation */ |
208 | u64 bytes_changed; | 212 | unsigned int bytes_changed; |
209 | 213 | ||
210 | /* Changed ranges */ | 214 | /* Changed ranges */ |
211 | struct ulist range_changed; | 215 | struct ulist range_changed; |
212 | }; | 216 | }; |
213 | 217 | ||
218 | static inline void extent_changeset_init(struct extent_changeset *changeset) | ||
219 | { | ||
220 | changeset->bytes_changed = 0; | ||
221 | ulist_init(&changeset->range_changed); | ||
222 | } | ||
223 | |||
224 | static inline struct extent_changeset *extent_changeset_alloc(void) | ||
225 | { | ||
226 | struct extent_changeset *ret; | ||
227 | |||
228 | ret = kmalloc(sizeof(*ret), GFP_KERNEL); | ||
229 | if (!ret) | ||
230 | return NULL; | ||
231 | |||
232 | extent_changeset_init(ret); | ||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | static inline void extent_changeset_release(struct extent_changeset *changeset) | ||
237 | { | ||
238 | if (!changeset) | ||
239 | return; | ||
240 | changeset->bytes_changed = 0; | ||
241 | ulist_release(&changeset->range_changed); | ||
242 | } | ||
243 | |||
244 | static inline void extent_changeset_free(struct extent_changeset *changeset) | ||
245 | { | ||
246 | if (!changeset) | ||
247 | return; | ||
248 | extent_changeset_release(changeset); | ||
249 | kfree(changeset); | ||
250 | } | ||
251 | |||
214 | static inline void extent_set_compress_type(unsigned long *bio_flags, | 252 | static inline void extent_set_compress_type(unsigned long *bio_flags, |
215 | int compress_type) | 253 | int compress_type) |
216 | { | 254 | { |
@@ -230,8 +268,7 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, | |||
230 | u64 start, u64 len, | 268 | u64 start, u64 len, |
231 | int create); | 269 | int create); |
232 | 270 | ||
233 | void extent_io_tree_init(struct extent_io_tree *tree, | 271 | void extent_io_tree_init(struct extent_io_tree *tree, void *private_data); |
234 | struct address_space *mapping); | ||
235 | int try_release_extent_mapping(struct extent_map_tree *map, | 272 | int try_release_extent_mapping(struct extent_map_tree *map, |
236 | struct extent_io_tree *tree, struct page *page, | 273 | struct extent_io_tree *tree, struct page *page, |
237 | gfp_t mask); | 274 | gfp_t mask); |
@@ -459,20 +496,21 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, | |||
459 | u64 delalloc_end, struct page *locked_page, | 496 | u64 delalloc_end, struct page *locked_page, |
460 | unsigned bits_to_clear, | 497 | unsigned bits_to_clear, |
461 | unsigned long page_ops); | 498 | unsigned long page_ops); |
462 | struct bio * | 499 | struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte); |
463 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 500 | struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs); |
464 | gfp_t gfp_flags); | 501 | struct bio *btrfs_bio_clone(struct bio *bio); |
465 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs); | 502 | struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size); |
466 | struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask); | ||
467 | 503 | ||
468 | struct btrfs_fs_info; | 504 | struct btrfs_fs_info; |
469 | struct btrfs_inode; | 505 | struct btrfs_inode; |
470 | 506 | ||
471 | int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, | 507 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, |
472 | u64 logical, struct page *page, | 508 | u64 length, u64 logical, struct page *page, |
473 | unsigned int pg_offset, int mirror_num); | 509 | unsigned int pg_offset, int mirror_num); |
474 | int clean_io_failure(struct btrfs_inode *inode, u64 start, | 510 | int clean_io_failure(struct btrfs_fs_info *fs_info, |
475 | struct page *page, unsigned int pg_offset); | 511 | struct extent_io_tree *failure_tree, |
512 | struct extent_io_tree *io_tree, u64 start, | ||
513 | struct page *page, u64 ino, unsigned int pg_offset); | ||
476 | void end_extent_writepage(struct page *page, int err, u64 start, u64 end); | 514 | void end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
477 | int repair_eb_io_failure(struct btrfs_fs_info *fs_info, | 515 | int repair_eb_io_failure(struct btrfs_fs_info *fs_info, |
478 | struct extent_buffer *eb, int mirror_num); | 516 | struct extent_buffer *eb, int mirror_num); |
@@ -507,7 +545,9 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, | |||
507 | struct io_failure_record *failrec, | 545 | struct io_failure_record *failrec, |
508 | struct page *page, int pg_offset, int icsum, | 546 | struct page *page, int pg_offset, int icsum, |
509 | bio_end_io_t *endio_func, void *data); | 547 | bio_end_io_t *endio_func, void *data); |
510 | int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec); | 548 | int free_io_failure(struct extent_io_tree *failure_tree, |
549 | struct extent_io_tree *io_tree, | ||
550 | struct io_failure_record *rec); | ||
511 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 551 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
512 | noinline u64 find_lock_delalloc_range(struct inode *inode, | 552 | noinline u64 find_lock_delalloc_range(struct inode *inode, |
513 | struct extent_io_tree *tree, | 553 | struct extent_io_tree *tree, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 5b1c7090e546..fdcb41002623 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -164,7 +164,8 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio | |||
164 | u64 logical_offset, u32 *dst, int dio) | 164 | u64 logical_offset, u32 *dst, int dio) |
165 | { | 165 | { |
166 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 166 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
167 | struct bio_vec *bvec; | 167 | struct bio_vec bvec; |
168 | struct bvec_iter iter; | ||
168 | struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); | 169 | struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); |
169 | struct btrfs_csum_item *item = NULL; | 170 | struct btrfs_csum_item *item = NULL; |
170 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 171 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
@@ -177,7 +178,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio | |||
177 | u64 page_bytes_left; | 178 | u64 page_bytes_left; |
178 | u32 diff; | 179 | u32 diff; |
179 | int nblocks; | 180 | int nblocks; |
180 | int count = 0, i; | 181 | int count = 0; |
181 | u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); | 182 | u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
182 | 183 | ||
183 | path = btrfs_alloc_path(); | 184 | path = btrfs_alloc_path(); |
@@ -206,8 +207,6 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio | |||
206 | if (bio->bi_iter.bi_size > PAGE_SIZE * 8) | 207 | if (bio->bi_iter.bi_size > PAGE_SIZE * 8) |
207 | path->reada = READA_FORWARD; | 208 | path->reada = READA_FORWARD; |
208 | 209 | ||
209 | WARN_ON(bio->bi_vcnt <= 0); | ||
210 | |||
211 | /* | 210 | /* |
212 | * the free space stuff is only read when it hasn't been | 211 | * the free space stuff is only read when it hasn't been |
213 | * updated in the current transaction. So, we can safely | 212 | * updated in the current transaction. So, we can safely |
@@ -223,13 +222,13 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio | |||
223 | if (dio) | 222 | if (dio) |
224 | offset = logical_offset; | 223 | offset = logical_offset; |
225 | 224 | ||
226 | bio_for_each_segment_all(bvec, bio, i) { | 225 | bio_for_each_segment(bvec, bio, iter) { |
227 | page_bytes_left = bvec->bv_len; | 226 | page_bytes_left = bvec.bv_len; |
228 | if (count) | 227 | if (count) |
229 | goto next; | 228 | goto next; |
230 | 229 | ||
231 | if (!dio) | 230 | if (!dio) |
232 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 231 | offset = page_offset(bvec.bv_page) + bvec.bv_offset; |
233 | count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, | 232 | count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, |
234 | (u32 *)csum, nblocks); | 233 | (u32 *)csum, nblocks); |
235 | if (count) | 234 | if (count) |
@@ -440,15 +439,15 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, | |||
440 | struct btrfs_ordered_sum *sums; | 439 | struct btrfs_ordered_sum *sums; |
441 | struct btrfs_ordered_extent *ordered = NULL; | 440 | struct btrfs_ordered_extent *ordered = NULL; |
442 | char *data; | 441 | char *data; |
443 | struct bio_vec *bvec; | 442 | struct bvec_iter iter; |
443 | struct bio_vec bvec; | ||
444 | int index; | 444 | int index; |
445 | int nr_sectors; | 445 | int nr_sectors; |
446 | int i, j; | ||
447 | unsigned long total_bytes = 0; | 446 | unsigned long total_bytes = 0; |
448 | unsigned long this_sum_bytes = 0; | 447 | unsigned long this_sum_bytes = 0; |
448 | int i; | ||
449 | u64 offset; | 449 | u64 offset; |
450 | 450 | ||
451 | WARN_ON(bio->bi_vcnt <= 0); | ||
452 | sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), | 451 | sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), |
453 | GFP_NOFS); | 452 | GFP_NOFS); |
454 | if (!sums) | 453 | if (!sums) |
@@ -465,19 +464,19 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, | |||
465 | sums->bytenr = (u64)bio->bi_iter.bi_sector << 9; | 464 | sums->bytenr = (u64)bio->bi_iter.bi_sector << 9; |
466 | index = 0; | 465 | index = 0; |
467 | 466 | ||
468 | bio_for_each_segment_all(bvec, bio, j) { | 467 | bio_for_each_segment(bvec, bio, iter) { |
469 | if (!contig) | 468 | if (!contig) |
470 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 469 | offset = page_offset(bvec.bv_page) + bvec.bv_offset; |
471 | 470 | ||
472 | if (!ordered) { | 471 | if (!ordered) { |
473 | ordered = btrfs_lookup_ordered_extent(inode, offset); | 472 | ordered = btrfs_lookup_ordered_extent(inode, offset); |
474 | BUG_ON(!ordered); /* Logic error */ | 473 | BUG_ON(!ordered); /* Logic error */ |
475 | } | 474 | } |
476 | 475 | ||
477 | data = kmap_atomic(bvec->bv_page); | 476 | data = kmap_atomic(bvec.bv_page); |
478 | 477 | ||
479 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, | 478 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, |
480 | bvec->bv_len + fs_info->sectorsize | 479 | bvec.bv_len + fs_info->sectorsize |
481 | - 1); | 480 | - 1); |
482 | 481 | ||
483 | for (i = 0; i < nr_sectors; i++) { | 482 | for (i = 0; i < nr_sectors; i++) { |
@@ -504,12 +503,12 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, | |||
504 | + total_bytes; | 503 | + total_bytes; |
505 | index = 0; | 504 | index = 0; |
506 | 505 | ||
507 | data = kmap_atomic(bvec->bv_page); | 506 | data = kmap_atomic(bvec.bv_page); |
508 | } | 507 | } |
509 | 508 | ||
510 | sums->sums[index] = ~(u32)0; | 509 | sums->sums[index] = ~(u32)0; |
511 | sums->sums[index] | 510 | sums->sums[index] |
512 | = btrfs_csum_data(data + bvec->bv_offset | 511 | = btrfs_csum_data(data + bvec.bv_offset |
513 | + (i * fs_info->sectorsize), | 512 | + (i * fs_info->sectorsize), |
514 | sums->sums[index], | 513 | sums->sums[index], |
515 | fs_info->sectorsize); | 514 | fs_info->sectorsize); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 59e2dccdf75b..24338702ea5b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1581,6 +1581,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1581 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1581 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1582 | struct page **pages = NULL; | 1582 | struct page **pages = NULL; |
1583 | struct extent_state *cached_state = NULL; | 1583 | struct extent_state *cached_state = NULL; |
1584 | struct extent_changeset *data_reserved = NULL; | ||
1584 | u64 release_bytes = 0; | 1585 | u64 release_bytes = 0; |
1585 | u64 lockstart; | 1586 | u64 lockstart; |
1586 | u64 lockend; | 1587 | u64 lockend; |
@@ -1628,7 +1629,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1628 | reserve_bytes = round_up(write_bytes + sector_offset, | 1629 | reserve_bytes = round_up(write_bytes + sector_offset, |
1629 | fs_info->sectorsize); | 1630 | fs_info->sectorsize); |
1630 | 1631 | ||
1631 | ret = btrfs_check_data_free_space(inode, pos, write_bytes); | 1632 | extent_changeset_release(data_reserved); |
1633 | ret = btrfs_check_data_free_space(inode, &data_reserved, pos, | ||
1634 | write_bytes); | ||
1632 | if (ret < 0) { | 1635 | if (ret < 0) { |
1633 | if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | | 1636 | if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | |
1634 | BTRFS_INODE_PREALLOC)) && | 1637 | BTRFS_INODE_PREALLOC)) && |
@@ -1657,8 +1660,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1657 | reserve_bytes); | 1660 | reserve_bytes); |
1658 | if (ret) { | 1661 | if (ret) { |
1659 | if (!only_release_metadata) | 1662 | if (!only_release_metadata) |
1660 | btrfs_free_reserved_data_space(inode, pos, | 1663 | btrfs_free_reserved_data_space(inode, |
1661 | write_bytes); | 1664 | data_reserved, pos, |
1665 | write_bytes); | ||
1662 | else | 1666 | else |
1663 | btrfs_end_write_no_snapshoting(root); | 1667 | btrfs_end_write_no_snapshoting(root); |
1664 | break; | 1668 | break; |
@@ -1740,8 +1744,9 @@ again: | |||
1740 | __pos = round_down(pos, | 1744 | __pos = round_down(pos, |
1741 | fs_info->sectorsize) + | 1745 | fs_info->sectorsize) + |
1742 | (dirty_pages << PAGE_SHIFT); | 1746 | (dirty_pages << PAGE_SHIFT); |
1743 | btrfs_delalloc_release_space(inode, __pos, | 1747 | btrfs_delalloc_release_space(inode, |
1744 | release_bytes); | 1748 | data_reserved, __pos, |
1749 | release_bytes); | ||
1745 | } | 1750 | } |
1746 | } | 1751 | } |
1747 | 1752 | ||
@@ -1796,12 +1801,13 @@ again: | |||
1796 | btrfs_delalloc_release_metadata(BTRFS_I(inode), | 1801 | btrfs_delalloc_release_metadata(BTRFS_I(inode), |
1797 | release_bytes); | 1802 | release_bytes); |
1798 | } else { | 1803 | } else { |
1799 | btrfs_delalloc_release_space(inode, | 1804 | btrfs_delalloc_release_space(inode, data_reserved, |
1800 | round_down(pos, fs_info->sectorsize), | 1805 | round_down(pos, fs_info->sectorsize), |
1801 | release_bytes); | 1806 | release_bytes); |
1802 | } | 1807 | } |
1803 | } | 1808 | } |
1804 | 1809 | ||
1810 | extent_changeset_free(data_reserved); | ||
1805 | return num_written ? num_written : ret; | 1811 | return num_written ? num_written : ret; |
1806 | } | 1812 | } |
1807 | 1813 | ||
@@ -2405,10 +2411,13 @@ out: | |||
2405 | */ | 2411 | */ |
2406 | static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) | 2412 | static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) |
2407 | { | 2413 | { |
2414 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | ||
2408 | struct extent_map *em; | 2415 | struct extent_map *em; |
2409 | int ret = 0; | 2416 | int ret = 0; |
2410 | 2417 | ||
2411 | em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0); | 2418 | em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, |
2419 | round_down(*start, fs_info->sectorsize), | ||
2420 | round_up(*len, fs_info->sectorsize), 0); | ||
2412 | if (IS_ERR(em)) | 2421 | if (IS_ERR(em)) |
2413 | return PTR_ERR(em); | 2422 | return PTR_ERR(em); |
2414 | 2423 | ||
@@ -2784,6 +2793,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2784 | { | 2793 | { |
2785 | struct inode *inode = file_inode(file); | 2794 | struct inode *inode = file_inode(file); |
2786 | struct extent_state *cached_state = NULL; | 2795 | struct extent_state *cached_state = NULL; |
2796 | struct extent_changeset *data_reserved = NULL; | ||
2787 | struct falloc_range *range; | 2797 | struct falloc_range *range; |
2788 | struct falloc_range *tmp; | 2798 | struct falloc_range *tmp; |
2789 | struct list_head reserve_list; | 2799 | struct list_head reserve_list; |
@@ -2913,8 +2923,8 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2913 | free_extent_map(em); | 2923 | free_extent_map(em); |
2914 | break; | 2924 | break; |
2915 | } | 2925 | } |
2916 | ret = btrfs_qgroup_reserve_data(inode, cur_offset, | 2926 | ret = btrfs_qgroup_reserve_data(inode, &data_reserved, |
2917 | last_byte - cur_offset); | 2927 | cur_offset, last_byte - cur_offset); |
2918 | if (ret < 0) { | 2928 | if (ret < 0) { |
2919 | free_extent_map(em); | 2929 | free_extent_map(em); |
2920 | break; | 2930 | break; |
@@ -2925,8 +2935,8 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2925 | * range, free reserved data space first, otherwise | 2935 | * range, free reserved data space first, otherwise |
2926 | * it'll result in false ENOSPC error. | 2936 | * it'll result in false ENOSPC error. |
2927 | */ | 2937 | */ |
2928 | btrfs_free_reserved_data_space(inode, cur_offset, | 2938 | btrfs_free_reserved_data_space(inode, data_reserved, |
2929 | last_byte - cur_offset); | 2939 | cur_offset, last_byte - cur_offset); |
2930 | } | 2940 | } |
2931 | free_extent_map(em); | 2941 | free_extent_map(em); |
2932 | cur_offset = last_byte; | 2942 | cur_offset = last_byte; |
@@ -2945,8 +2955,9 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2945 | range->len, i_blocksize(inode), | 2955 | range->len, i_blocksize(inode), |
2946 | offset + len, &alloc_hint); | 2956 | offset + len, &alloc_hint); |
2947 | else | 2957 | else |
2948 | btrfs_free_reserved_data_space(inode, range->start, | 2958 | btrfs_free_reserved_data_space(inode, |
2949 | range->len); | 2959 | data_reserved, range->start, |
2960 | range->len); | ||
2950 | list_del(&range->list); | 2961 | list_del(&range->list); |
2951 | kfree(range); | 2962 | kfree(range); |
2952 | } | 2963 | } |
@@ -2984,8 +2995,9 @@ out: | |||
2984 | inode_unlock(inode); | 2995 | inode_unlock(inode); |
2985 | /* Let go of our reservation. */ | 2996 | /* Let go of our reservation. */ |
2986 | if (ret != 0) | 2997 | if (ret != 0) |
2987 | btrfs_free_reserved_data_space(inode, alloc_start, | 2998 | btrfs_free_reserved_data_space(inode, data_reserved, |
2988 | alloc_end - cur_offset); | 2999 | alloc_start, alloc_end - cur_offset); |
3000 | extent_changeset_free(data_reserved); | ||
2989 | return ret; | 3001 | return ret; |
2990 | } | 3002 | } |
2991 | 3003 | ||
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index fc0bd8406758..a5e34de06c2f 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c | |||
@@ -17,7 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/vmalloc.h> | 20 | #include <linux/sched/mm.h> |
21 | #include "ctree.h" | 21 | #include "ctree.h" |
22 | #include "disk-io.h" | 22 | #include "disk-io.h" |
23 | #include "locking.h" | 23 | #include "locking.h" |
@@ -153,21 +153,21 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize) | |||
153 | 153 | ||
154 | static u8 *alloc_bitmap(u32 bitmap_size) | 154 | static u8 *alloc_bitmap(u32 bitmap_size) |
155 | { | 155 | { |
156 | void *mem; | 156 | u8 *ret; |
157 | unsigned int nofs_flag; | ||
157 | 158 | ||
158 | /* | 159 | /* |
159 | * The allocation size varies, observed numbers were < 4K up to 16K. | 160 | * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse |
160 | * Using vmalloc unconditionally would be too heavy, we'll try | 161 | * into the filesystem as the free space bitmap can be modified in the |
161 | * contiguous allocations first. | 162 | * critical section of a transaction commit. |
163 | * | ||
164 | * TODO: push the memalloc_nofs_{save,restore}() to the caller where we | ||
165 | * know that recursion is unsafe. | ||
162 | */ | 166 | */ |
163 | if (bitmap_size <= PAGE_SIZE) | 167 | nofs_flag = memalloc_nofs_save(); |
164 | return kzalloc(bitmap_size, GFP_NOFS); | 168 | ret = kvzalloc(bitmap_size, GFP_KERNEL); |
165 | 169 | memalloc_nofs_restore(nofs_flag); | |
166 | mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN); | 170 | return ret; |
167 | if (mem) | ||
168 | return mem; | ||
169 | |||
170 | return __vmalloc(bitmap_size, GFP_NOFS | __GFP_ZERO, PAGE_KERNEL); | ||
171 | } | 171 | } |
172 | 172 | ||
173 | int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, | 173 | int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, |
@@ -1188,11 +1188,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) | |||
1188 | btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); | 1188 | btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); |
1189 | clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); | 1189 | clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); |
1190 | 1190 | ||
1191 | ret = btrfs_commit_transaction(trans); | 1191 | return btrfs_commit_transaction(trans); |
1192 | if (ret) | ||
1193 | return ret; | ||
1194 | |||
1195 | return 0; | ||
1196 | 1192 | ||
1197 | abort: | 1193 | abort: |
1198 | clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); | 1194 | clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); |
@@ -1277,11 +1273,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) | |||
1277 | free_extent_buffer(free_space_root->commit_root); | 1273 | free_extent_buffer(free_space_root->commit_root); |
1278 | kfree(free_space_root); | 1274 | kfree(free_space_root); |
1279 | 1275 | ||
1280 | ret = btrfs_commit_transaction(trans); | 1276 | return btrfs_commit_transaction(trans); |
1281 | if (ret) | ||
1282 | return ret; | ||
1283 | |||
1284 | return 0; | ||
1285 | 1277 | ||
1286 | abort: | 1278 | abort: |
1287 | btrfs_abort_transaction(trans, ret); | 1279 | btrfs_abort_transaction(trans, ret); |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 5c6c20ec64d8..d02019747d00 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -400,6 +400,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root, | |||
400 | struct btrfs_path *path; | 400 | struct btrfs_path *path; |
401 | struct inode *inode; | 401 | struct inode *inode; |
402 | struct btrfs_block_rsv *rsv; | 402 | struct btrfs_block_rsv *rsv; |
403 | struct extent_changeset *data_reserved = NULL; | ||
403 | u64 num_bytes; | 404 | u64 num_bytes; |
404 | u64 alloc_hint = 0; | 405 | u64 alloc_hint = 0; |
405 | int ret; | 406 | int ret; |
@@ -492,7 +493,7 @@ again: | |||
492 | /* Just to make sure we have enough space */ | 493 | /* Just to make sure we have enough space */ |
493 | prealloc += 8 * PAGE_SIZE; | 494 | prealloc += 8 * PAGE_SIZE; |
494 | 495 | ||
495 | ret = btrfs_delalloc_reserve_space(inode, 0, prealloc); | 496 | ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc); |
496 | if (ret) | 497 | if (ret) |
497 | goto out_put; | 498 | goto out_put; |
498 | 499 | ||
@@ -516,6 +517,7 @@ out: | |||
516 | trans->bytes_reserved = num_bytes; | 517 | trans->bytes_reserved = num_bytes; |
517 | 518 | ||
518 | btrfs_free_path(path); | 519 | btrfs_free_path(path); |
520 | extent_changeset_free(data_reserved); | ||
519 | return ret; | 521 | return ret; |
520 | } | 522 | } |
521 | 523 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 556c93060606..8d050314591c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -86,7 +86,6 @@ static const struct extent_io_ops btrfs_extent_io_ops; | |||
86 | 86 | ||
87 | static struct kmem_cache *btrfs_inode_cachep; | 87 | static struct kmem_cache *btrfs_inode_cachep; |
88 | struct kmem_cache *btrfs_trans_handle_cachep; | 88 | struct kmem_cache *btrfs_trans_handle_cachep; |
89 | struct kmem_cache *btrfs_transaction_cachep; | ||
90 | struct kmem_cache *btrfs_path_cachep; | 89 | struct kmem_cache *btrfs_path_cachep; |
91 | struct kmem_cache *btrfs_free_space_cachep; | 90 | struct kmem_cache *btrfs_free_space_cachep; |
92 | 91 | ||
@@ -178,7 +177,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
178 | char *kaddr; | 177 | char *kaddr; |
179 | unsigned long ptr; | 178 | unsigned long ptr; |
180 | struct btrfs_file_extent_item *ei; | 179 | struct btrfs_file_extent_item *ei; |
181 | int err = 0; | ||
182 | int ret; | 180 | int ret; |
183 | size_t cur_size = size; | 181 | size_t cur_size = size; |
184 | unsigned long offset; | 182 | unsigned long offset; |
@@ -200,10 +198,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
200 | path->leave_spinning = 1; | 198 | path->leave_spinning = 1; |
201 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 199 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
202 | datasize); | 200 | datasize); |
203 | if (ret) { | 201 | if (ret) |
204 | err = ret; | ||
205 | goto fail; | 202 | goto fail; |
206 | } | ||
207 | } | 203 | } |
208 | leaf = path->nodes[0]; | 204 | leaf = path->nodes[0]; |
209 | ei = btrfs_item_ptr(leaf, path->slots[0], | 205 | ei = btrfs_item_ptr(leaf, path->slots[0], |
@@ -258,9 +254,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
258 | BTRFS_I(inode)->disk_i_size = inode->i_size; | 254 | BTRFS_I(inode)->disk_i_size = inode->i_size; |
259 | ret = btrfs_update_inode(trans, root, inode); | 255 | ret = btrfs_update_inode(trans, root, inode); |
260 | 256 | ||
261 | return ret; | ||
262 | fail: | 257 | fail: |
263 | return err; | 258 | return ret; |
264 | } | 259 | } |
265 | 260 | ||
266 | 261 | ||
@@ -350,7 +345,7 @@ out: | |||
350 | * And at reserve time, it's always aligned to page size, so | 345 | * And at reserve time, it's always aligned to page size, so |
351 | * just free one page here. | 346 | * just free one page here. |
352 | */ | 347 | */ |
353 | btrfs_qgroup_free_data(inode, 0, PAGE_SIZE); | 348 | btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE); |
354 | btrfs_free_path(path); | 349 | btrfs_free_path(path); |
355 | btrfs_end_transaction(trans); | 350 | btrfs_end_transaction(trans); |
356 | return ret; | 351 | return ret; |
@@ -608,12 +603,11 @@ cont: | |||
608 | 603 | ||
609 | /* | 604 | /* |
610 | * one last check to make sure the compression is really a | 605 | * one last check to make sure the compression is really a |
611 | * win, compare the page count read with the blocks on disk | 606 | * win, compare the page count read with the blocks on disk, |
607 | * compression must free at least one sector size | ||
612 | */ | 608 | */ |
613 | total_in = ALIGN(total_in, PAGE_SIZE); | 609 | total_in = ALIGN(total_in, PAGE_SIZE); |
614 | if (total_compressed >= total_in) { | 610 | if (total_compressed + blocksize <= total_in) { |
615 | will_compress = 0; | ||
616 | } else { | ||
617 | num_bytes = total_in; | 611 | num_bytes = total_in; |
618 | *num_added += 1; | 612 | *num_added += 1; |
619 | 613 | ||
@@ -1568,10 +1562,11 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end) | |||
1568 | /* | 1562 | /* |
1569 | * extent_io.c call back to do delayed allocation processing | 1563 | * extent_io.c call back to do delayed allocation processing |
1570 | */ | 1564 | */ |
1571 | static int run_delalloc_range(struct inode *inode, struct page *locked_page, | 1565 | static int run_delalloc_range(void *private_data, struct page *locked_page, |
1572 | u64 start, u64 end, int *page_started, | 1566 | u64 start, u64 end, int *page_started, |
1573 | unsigned long *nr_written) | 1567 | unsigned long *nr_written) |
1574 | { | 1568 | { |
1569 | struct inode *inode = private_data; | ||
1575 | int ret; | 1570 | int ret; |
1576 | int force_cow = need_force_cow(inode, start, end); | 1571 | int force_cow = need_force_cow(inode, start, end); |
1577 | 1572 | ||
@@ -1595,9 +1590,10 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1595 | return ret; | 1590 | return ret; |
1596 | } | 1591 | } |
1597 | 1592 | ||
1598 | static void btrfs_split_extent_hook(struct inode *inode, | 1593 | static void btrfs_split_extent_hook(void *private_data, |
1599 | struct extent_state *orig, u64 split) | 1594 | struct extent_state *orig, u64 split) |
1600 | { | 1595 | { |
1596 | struct inode *inode = private_data; | ||
1601 | u64 size; | 1597 | u64 size; |
1602 | 1598 | ||
1603 | /* not delalloc, ignore it */ | 1599 | /* not delalloc, ignore it */ |
@@ -1632,10 +1628,11 @@ static void btrfs_split_extent_hook(struct inode *inode, | |||
1632 | * extents, such as when we are doing sequential writes, so we can properly | 1628 | * extents, such as when we are doing sequential writes, so we can properly |
1633 | * account for the metadata space we'll need. | 1629 | * account for the metadata space we'll need. |
1634 | */ | 1630 | */ |
1635 | static void btrfs_merge_extent_hook(struct inode *inode, | 1631 | static void btrfs_merge_extent_hook(void *private_data, |
1636 | struct extent_state *new, | 1632 | struct extent_state *new, |
1637 | struct extent_state *other) | 1633 | struct extent_state *other) |
1638 | { | 1634 | { |
1635 | struct inode *inode = private_data; | ||
1639 | u64 new_size, old_size; | 1636 | u64 new_size, old_size; |
1640 | u32 num_extents; | 1637 | u32 num_extents; |
1641 | 1638 | ||
@@ -1735,9 +1732,10 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root, | |||
1735 | * bytes in this file, and to maintain the list of inodes that | 1732 | * bytes in this file, and to maintain the list of inodes that |
1736 | * have pending delalloc work to be done. | 1733 | * have pending delalloc work to be done. |
1737 | */ | 1734 | */ |
1738 | static void btrfs_set_bit_hook(struct inode *inode, | 1735 | static void btrfs_set_bit_hook(void *private_data, |
1739 | struct extent_state *state, unsigned *bits) | 1736 | struct extent_state *state, unsigned *bits) |
1740 | { | 1737 | { |
1738 | struct inode *inode = private_data; | ||
1741 | 1739 | ||
1742 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 1740 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
1743 | 1741 | ||
@@ -1789,10 +1787,11 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1789 | /* | 1787 | /* |
1790 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1788 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1791 | */ | 1789 | */ |
1792 | static void btrfs_clear_bit_hook(struct btrfs_inode *inode, | 1790 | static void btrfs_clear_bit_hook(void *private_data, |
1793 | struct extent_state *state, | 1791 | struct extent_state *state, |
1794 | unsigned *bits) | 1792 | unsigned *bits) |
1795 | { | 1793 | { |
1794 | struct btrfs_inode *inode = BTRFS_I((struct inode *)private_data); | ||
1796 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); | 1795 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); |
1797 | u64 len = state->end + 1 - state->start; | 1796 | u64 len = state->end + 1 - state->start; |
1798 | u32 num_extents = count_max_extents(len); | 1797 | u32 num_extents = count_max_extents(len); |
@@ -1900,10 +1899,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
1900 | * At IO completion time the cums attached on the ordered extent record | 1899 | * At IO completion time the cums attached on the ordered extent record |
1901 | * are inserted into the btree | 1900 | * are inserted into the btree |
1902 | */ | 1901 | */ |
1903 | static blk_status_t __btrfs_submit_bio_start(struct inode *inode, | 1902 | static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio, |
1904 | struct bio *bio, int mirror_num, unsigned long bio_flags, | 1903 | int mirror_num, unsigned long bio_flags, |
1905 | u64 bio_offset) | 1904 | u64 bio_offset) |
1906 | { | 1905 | { |
1906 | struct inode *inode = private_data; | ||
1907 | blk_status_t ret = 0; | 1907 | blk_status_t ret = 0; |
1908 | 1908 | ||
1909 | ret = btrfs_csum_one_bio(inode, bio, 0, 0); | 1909 | ret = btrfs_csum_one_bio(inode, bio, 0, 0); |
@@ -1919,10 +1919,11 @@ static blk_status_t __btrfs_submit_bio_start(struct inode *inode, | |||
1919 | * At IO completion time the cums attached on the ordered extent record | 1919 | * At IO completion time the cums attached on the ordered extent record |
1920 | * are inserted into the btree | 1920 | * are inserted into the btree |
1921 | */ | 1921 | */ |
1922 | static blk_status_t __btrfs_submit_bio_done(struct inode *inode, | 1922 | static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio, |
1923 | struct bio *bio, int mirror_num, unsigned long bio_flags, | 1923 | int mirror_num, unsigned long bio_flags, |
1924 | u64 bio_offset) | 1924 | u64 bio_offset) |
1925 | { | 1925 | { |
1926 | struct inode *inode = private_data; | ||
1926 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 1927 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
1927 | blk_status_t ret; | 1928 | blk_status_t ret; |
1928 | 1929 | ||
@@ -1938,10 +1939,11 @@ static blk_status_t __btrfs_submit_bio_done(struct inode *inode, | |||
1938 | * extent_io.c submission hook. This does the right thing for csum calculation | 1939 | * extent_io.c submission hook. This does the right thing for csum calculation |
1939 | * on write, or reading the csums from the tree before a read | 1940 | * on write, or reading the csums from the tree before a read |
1940 | */ | 1941 | */ |
1941 | static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, | 1942 | static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, |
1942 | int mirror_num, unsigned long bio_flags, | 1943 | int mirror_num, unsigned long bio_flags, |
1943 | u64 bio_offset) | 1944 | u64 bio_offset) |
1944 | { | 1945 | { |
1946 | struct inode *inode = private_data; | ||
1945 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 1947 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
1946 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1948 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1947 | enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; | 1949 | enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; |
@@ -1975,8 +1977,8 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, | |||
1975 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | 1977 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) |
1976 | goto mapit; | 1978 | goto mapit; |
1977 | /* we're doing a write, do the async checksumming */ | 1979 | /* we're doing a write, do the async checksumming */ |
1978 | ret = btrfs_wq_submit_bio(fs_info, inode, bio, mirror_num, | 1980 | ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, |
1979 | bio_flags, bio_offset, | 1981 | bio_offset, inode, |
1980 | __btrfs_submit_bio_start, | 1982 | __btrfs_submit_bio_start, |
1981 | __btrfs_submit_bio_done); | 1983 | __btrfs_submit_bio_done); |
1982 | goto out; | 1984 | goto out; |
@@ -2034,6 +2036,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) | |||
2034 | struct btrfs_writepage_fixup *fixup; | 2036 | struct btrfs_writepage_fixup *fixup; |
2035 | struct btrfs_ordered_extent *ordered; | 2037 | struct btrfs_ordered_extent *ordered; |
2036 | struct extent_state *cached_state = NULL; | 2038 | struct extent_state *cached_state = NULL; |
2039 | struct extent_changeset *data_reserved = NULL; | ||
2037 | struct page *page; | 2040 | struct page *page; |
2038 | struct inode *inode; | 2041 | struct inode *inode; |
2039 | u64 page_start; | 2042 | u64 page_start; |
@@ -2071,7 +2074,7 @@ again: | |||
2071 | goto again; | 2074 | goto again; |
2072 | } | 2075 | } |
2073 | 2076 | ||
2074 | ret = btrfs_delalloc_reserve_space(inode, page_start, | 2077 | ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, |
2075 | PAGE_SIZE); | 2078 | PAGE_SIZE); |
2076 | if (ret) { | 2079 | if (ret) { |
2077 | mapping_set_error(page->mapping, ret); | 2080 | mapping_set_error(page->mapping, ret); |
@@ -2091,6 +2094,7 @@ out_page: | |||
2091 | unlock_page(page); | 2094 | unlock_page(page); |
2092 | put_page(page); | 2095 | put_page(page); |
2093 | kfree(fixup); | 2096 | kfree(fixup); |
2097 | extent_changeset_free(data_reserved); | ||
2094 | } | 2098 | } |
2095 | 2099 | ||
2096 | /* | 2100 | /* |
@@ -2142,6 +2146,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
2142 | struct btrfs_path *path; | 2146 | struct btrfs_path *path; |
2143 | struct extent_buffer *leaf; | 2147 | struct extent_buffer *leaf; |
2144 | struct btrfs_key ins; | 2148 | struct btrfs_key ins; |
2149 | u64 qg_released; | ||
2145 | int extent_inserted = 0; | 2150 | int extent_inserted = 0; |
2146 | int ret; | 2151 | int ret; |
2147 | 2152 | ||
@@ -2197,13 +2202,17 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
2197 | ins.objectid = disk_bytenr; | 2202 | ins.objectid = disk_bytenr; |
2198 | ins.offset = disk_num_bytes; | 2203 | ins.offset = disk_num_bytes; |
2199 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 2204 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
2200 | ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid, | 2205 | |
2201 | btrfs_ino(BTRFS_I(inode)), file_pos, ram_bytes, &ins); | ||
2202 | /* | 2206 | /* |
2203 | * Release the reserved range from inode dirty range map, as it is | 2207 | * Release the reserved range from inode dirty range map, as it is |
2204 | * already moved into delayed_ref_head | 2208 | * already moved into delayed_ref_head |
2205 | */ | 2209 | */ |
2206 | btrfs_qgroup_release_data(inode, file_pos, ram_bytes); | 2210 | ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes); |
2211 | if (ret < 0) | ||
2212 | goto out; | ||
2213 | qg_released = ret; | ||
2214 | ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid, | ||
2215 | btrfs_ino(BTRFS_I(inode)), file_pos, qg_released, &ins); | ||
2207 | out: | 2216 | out: |
2208 | btrfs_free_path(path); | 2217 | btrfs_free_path(path); |
2209 | 2218 | ||
@@ -2925,7 +2934,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
2925 | * space for NOCOW range. | 2934 | * space for NOCOW range. |
2926 | * As NOCOW won't cause a new delayed ref, just free the space | 2935 | * As NOCOW won't cause a new delayed ref, just free the space |
2927 | */ | 2936 | */ |
2928 | btrfs_qgroup_free_data(inode, ordered_extent->file_offset, | 2937 | btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, |
2929 | ordered_extent->len); | 2938 | ordered_extent->len); |
2930 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 2939 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
2931 | if (nolock) | 2940 | if (nolock) |
@@ -4761,6 +4770,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, | |||
4761 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 4770 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
4762 | struct btrfs_ordered_extent *ordered; | 4771 | struct btrfs_ordered_extent *ordered; |
4763 | struct extent_state *cached_state = NULL; | 4772 | struct extent_state *cached_state = NULL; |
4773 | struct extent_changeset *data_reserved = NULL; | ||
4764 | char *kaddr; | 4774 | char *kaddr; |
4765 | u32 blocksize = fs_info->sectorsize; | 4775 | u32 blocksize = fs_info->sectorsize; |
4766 | pgoff_t index = from >> PAGE_SHIFT; | 4776 | pgoff_t index = from >> PAGE_SHIFT; |
@@ -4775,7 +4785,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, | |||
4775 | (!len || ((len & (blocksize - 1)) == 0))) | 4785 | (!len || ((len & (blocksize - 1)) == 0))) |
4776 | goto out; | 4786 | goto out; |
4777 | 4787 | ||
4778 | ret = btrfs_delalloc_reserve_space(inode, | 4788 | ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
4779 | round_down(from, blocksize), blocksize); | 4789 | round_down(from, blocksize), blocksize); |
4780 | if (ret) | 4790 | if (ret) |
4781 | goto out; | 4791 | goto out; |
@@ -4783,7 +4793,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, | |||
4783 | again: | 4793 | again: |
4784 | page = find_or_create_page(mapping, index, mask); | 4794 | page = find_or_create_page(mapping, index, mask); |
4785 | if (!page) { | 4795 | if (!page) { |
4786 | btrfs_delalloc_release_space(inode, | 4796 | btrfs_delalloc_release_space(inode, data_reserved, |
4787 | round_down(from, blocksize), | 4797 | round_down(from, blocksize), |
4788 | blocksize); | 4798 | blocksize); |
4789 | ret = -ENOMEM; | 4799 | ret = -ENOMEM; |
@@ -4855,11 +4865,12 @@ again: | |||
4855 | 4865 | ||
4856 | out_unlock: | 4866 | out_unlock: |
4857 | if (ret) | 4867 | if (ret) |
4858 | btrfs_delalloc_release_space(inode, block_start, | 4868 | btrfs_delalloc_release_space(inode, data_reserved, block_start, |
4859 | blocksize); | 4869 | blocksize); |
4860 | unlock_page(page); | 4870 | unlock_page(page); |
4861 | put_page(page); | 4871 | put_page(page); |
4862 | out: | 4872 | out: |
4873 | extent_changeset_free(data_reserved); | ||
4863 | return ret; | 4874 | return ret; |
4864 | } | 4875 | } |
4865 | 4876 | ||
@@ -5254,7 +5265,7 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
5254 | * Note, end is the bytenr of last byte, so we need + 1 here. | 5265 | * Note, end is the bytenr of last byte, so we need + 1 here. |
5255 | */ | 5266 | */ |
5256 | if (state->state & EXTENT_DELALLOC) | 5267 | if (state->state & EXTENT_DELALLOC) |
5257 | btrfs_qgroup_free_data(inode, start, end - start + 1); | 5268 | btrfs_qgroup_free_data(inode, NULL, start, end - start + 1); |
5258 | 5269 | ||
5259 | clear_extent_bit(io_tree, start, end, | 5270 | clear_extent_bit(io_tree, start, end, |
5260 | EXTENT_LOCKED | EXTENT_DIRTY | | 5271 | EXTENT_LOCKED | EXTENT_DIRTY | |
@@ -5867,7 +5878,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5867 | struct inode *inode = file_inode(file); | 5878 | struct inode *inode = file_inode(file); |
5868 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 5879 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
5869 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5880 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5870 | struct btrfs_item *item; | ||
5871 | struct btrfs_dir_item *di; | 5881 | struct btrfs_dir_item *di; |
5872 | struct btrfs_key key; | 5882 | struct btrfs_key key; |
5873 | struct btrfs_key found_key; | 5883 | struct btrfs_key found_key; |
@@ -5918,7 +5928,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5918 | continue; | 5928 | continue; |
5919 | } | 5929 | } |
5920 | 5930 | ||
5921 | item = btrfs_item_nr(slot); | ||
5922 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 5931 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
5923 | 5932 | ||
5924 | if (found_key.objectid != key.objectid) | 5933 | if (found_key.objectid != key.objectid) |
@@ -5933,7 +5942,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5933 | ctx->pos = found_key.offset; | 5942 | ctx->pos = found_key.offset; |
5934 | 5943 | ||
5935 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 5944 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
5936 | if (verify_dir_item(fs_info, leaf, di)) | 5945 | if (verify_dir_item(fs_info, leaf, slot, di)) |
5937 | goto next; | 5946 | goto next; |
5938 | 5947 | ||
5939 | name_len = btrfs_dir_name_len(leaf, di); | 5948 | name_len = btrfs_dir_name_len(leaf, di); |
@@ -7479,7 +7488,7 @@ out: | |||
7479 | bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) | 7488 | bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) |
7480 | { | 7489 | { |
7481 | struct radix_tree_root *root = &inode->i_mapping->page_tree; | 7490 | struct radix_tree_root *root = &inode->i_mapping->page_tree; |
7482 | int found = false; | 7491 | bool found = false; |
7483 | void **pagep = NULL; | 7492 | void **pagep = NULL; |
7484 | struct page *page = NULL; | 7493 | struct page *page = NULL; |
7485 | unsigned long start_idx; | 7494 | unsigned long start_idx; |
@@ -7977,9 +7986,12 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, | |||
7977 | bio_end_io_t *repair_endio, void *repair_arg) | 7986 | bio_end_io_t *repair_endio, void *repair_arg) |
7978 | { | 7987 | { |
7979 | struct io_failure_record *failrec; | 7988 | struct io_failure_record *failrec; |
7989 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
7990 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
7980 | struct bio *bio; | 7991 | struct bio *bio; |
7981 | int isector; | 7992 | int isector; |
7982 | int read_mode = 0; | 7993 | int read_mode = 0; |
7994 | int segs; | ||
7983 | int ret; | 7995 | int ret; |
7984 | 7996 | ||
7985 | BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); | 7997 | BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); |
@@ -7991,13 +8003,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, | |||
7991 | ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, | 8003 | ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, |
7992 | failed_mirror); | 8004 | failed_mirror); |
7993 | if (!ret) { | 8005 | if (!ret) { |
7994 | free_io_failure(BTRFS_I(inode), failrec); | 8006 | free_io_failure(failure_tree, io_tree, failrec); |
7995 | return -EIO; | 8007 | return -EIO; |
7996 | } | 8008 | } |
7997 | 8009 | ||
7998 | if ((failed_bio->bi_vcnt > 1) | 8010 | segs = bio_segments(failed_bio); |
7999 | || (failed_bio->bi_io_vec->bv_len | 8011 | if (segs > 1 || |
8000 | > btrfs_inode_sectorsize(inode))) | 8012 | (failed_bio->bi_io_vec->bv_len > btrfs_inode_sectorsize(inode))) |
8001 | read_mode |= REQ_FAILFAST_DEV; | 8013 | read_mode |= REQ_FAILFAST_DEV; |
8002 | 8014 | ||
8003 | isector = start - btrfs_io_bio(failed_bio)->logical; | 8015 | isector = start - btrfs_io_bio(failed_bio)->logical; |
@@ -8005,7 +8017,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, | |||
8005 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, | 8017 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, |
8006 | pgoff, isector, repair_endio, repair_arg); | 8018 | pgoff, isector, repair_endio, repair_arg); |
8007 | if (!bio) { | 8019 | if (!bio) { |
8008 | free_io_failure(BTRFS_I(inode), failrec); | 8020 | free_io_failure(failure_tree, io_tree, failrec); |
8009 | return -EIO; | 8021 | return -EIO; |
8010 | } | 8022 | } |
8011 | bio_set_op_attrs(bio, REQ_OP_READ, read_mode); | 8023 | bio_set_op_attrs(bio, REQ_OP_READ, read_mode); |
@@ -8016,7 +8028,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, | |||
8016 | 8028 | ||
8017 | ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror); | 8029 | ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror); |
8018 | if (ret) { | 8030 | if (ret) { |
8019 | free_io_failure(BTRFS_I(inode), failrec); | 8031 | free_io_failure(failure_tree, io_tree, failrec); |
8020 | bio_put(bio); | 8032 | bio_put(bio); |
8021 | } | 8033 | } |
8022 | 8034 | ||
@@ -8033,19 +8045,24 @@ struct btrfs_retry_complete { | |||
8033 | static void btrfs_retry_endio_nocsum(struct bio *bio) | 8045 | static void btrfs_retry_endio_nocsum(struct bio *bio) |
8034 | { | 8046 | { |
8035 | struct btrfs_retry_complete *done = bio->bi_private; | 8047 | struct btrfs_retry_complete *done = bio->bi_private; |
8048 | struct inode *inode = done->inode; | ||
8036 | struct bio_vec *bvec; | 8049 | struct bio_vec *bvec; |
8050 | struct extent_io_tree *io_tree, *failure_tree; | ||
8037 | int i; | 8051 | int i; |
8038 | 8052 | ||
8039 | if (bio->bi_status) | 8053 | if (bio->bi_status) |
8040 | goto end; | 8054 | goto end; |
8041 | 8055 | ||
8042 | ASSERT(bio->bi_vcnt == 1); | 8056 | ASSERT(bio->bi_vcnt == 1); |
8043 | ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode)); | 8057 | io_tree = &BTRFS_I(inode)->io_tree; |
8058 | failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
8059 | ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode)); | ||
8044 | 8060 | ||
8045 | done->uptodate = 1; | 8061 | done->uptodate = 1; |
8046 | bio_for_each_segment_all(bvec, bio, i) | 8062 | bio_for_each_segment_all(bvec, bio, i) |
8047 | clean_io_failure(BTRFS_I(done->inode), done->start, | 8063 | clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, |
8048 | bvec->bv_page, 0); | 8064 | io_tree, done->start, bvec->bv_page, |
8065 | btrfs_ino(BTRFS_I(inode)), 0); | ||
8049 | end: | 8066 | end: |
8050 | complete(&done->done); | 8067 | complete(&done->done); |
8051 | bio_put(bio); | 8068 | bio_put(bio); |
@@ -8055,36 +8072,40 @@ static int __btrfs_correct_data_nocsum(struct inode *inode, | |||
8055 | struct btrfs_io_bio *io_bio) | 8072 | struct btrfs_io_bio *io_bio) |
8056 | { | 8073 | { |
8057 | struct btrfs_fs_info *fs_info; | 8074 | struct btrfs_fs_info *fs_info; |
8058 | struct bio_vec *bvec; | 8075 | struct bio_vec bvec; |
8076 | struct bvec_iter iter; | ||
8059 | struct btrfs_retry_complete done; | 8077 | struct btrfs_retry_complete done; |
8060 | u64 start; | 8078 | u64 start; |
8061 | unsigned int pgoff; | 8079 | unsigned int pgoff; |
8062 | u32 sectorsize; | 8080 | u32 sectorsize; |
8063 | int nr_sectors; | 8081 | int nr_sectors; |
8064 | int i; | ||
8065 | int ret; | 8082 | int ret; |
8083 | int err = 0; | ||
8066 | 8084 | ||
8067 | fs_info = BTRFS_I(inode)->root->fs_info; | 8085 | fs_info = BTRFS_I(inode)->root->fs_info; |
8068 | sectorsize = fs_info->sectorsize; | 8086 | sectorsize = fs_info->sectorsize; |
8069 | 8087 | ||
8070 | start = io_bio->logical; | 8088 | start = io_bio->logical; |
8071 | done.inode = inode; | 8089 | done.inode = inode; |
8090 | io_bio->bio.bi_iter = io_bio->iter; | ||
8072 | 8091 | ||
8073 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { | 8092 | bio_for_each_segment(bvec, &io_bio->bio, iter) { |
8074 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); | 8093 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); |
8075 | pgoff = bvec->bv_offset; | 8094 | pgoff = bvec.bv_offset; |
8076 | 8095 | ||
8077 | next_block_or_try_again: | 8096 | next_block_or_try_again: |
8078 | done.uptodate = 0; | 8097 | done.uptodate = 0; |
8079 | done.start = start; | 8098 | done.start = start; |
8080 | init_completion(&done.done); | 8099 | init_completion(&done.done); |
8081 | 8100 | ||
8082 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, | 8101 | ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page, |
8083 | pgoff, start, start + sectorsize - 1, | 8102 | pgoff, start, start + sectorsize - 1, |
8084 | io_bio->mirror_num, | 8103 | io_bio->mirror_num, |
8085 | btrfs_retry_endio_nocsum, &done); | 8104 | btrfs_retry_endio_nocsum, &done); |
8086 | if (ret) | 8105 | if (ret) { |
8087 | return ret; | 8106 | err = ret; |
8107 | goto next; | ||
8108 | } | ||
8088 | 8109 | ||
8089 | wait_for_completion(&done.done); | 8110 | wait_for_completion(&done.done); |
8090 | 8111 | ||
@@ -8093,6 +8114,7 @@ next_block_or_try_again: | |||
8093 | goto next_block_or_try_again; | 8114 | goto next_block_or_try_again; |
8094 | } | 8115 | } |
8095 | 8116 | ||
8117 | next: | ||
8096 | start += sectorsize; | 8118 | start += sectorsize; |
8097 | 8119 | ||
8098 | nr_sectors--; | 8120 | nr_sectors--; |
@@ -8103,13 +8125,15 @@ next_block_or_try_again: | |||
8103 | } | 8125 | } |
8104 | } | 8126 | } |
8105 | 8127 | ||
8106 | return 0; | 8128 | return err; |
8107 | } | 8129 | } |
8108 | 8130 | ||
8109 | static void btrfs_retry_endio(struct bio *bio) | 8131 | static void btrfs_retry_endio(struct bio *bio) |
8110 | { | 8132 | { |
8111 | struct btrfs_retry_complete *done = bio->bi_private; | 8133 | struct btrfs_retry_complete *done = bio->bi_private; |
8112 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | 8134 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
8135 | struct extent_io_tree *io_tree, *failure_tree; | ||
8136 | struct inode *inode = done->inode; | ||
8113 | struct bio_vec *bvec; | 8137 | struct bio_vec *bvec; |
8114 | int uptodate; | 8138 | int uptodate; |
8115 | int ret; | 8139 | int ret; |
@@ -8123,13 +8147,19 @@ static void btrfs_retry_endio(struct bio *bio) | |||
8123 | ASSERT(bio->bi_vcnt == 1); | 8147 | ASSERT(bio->bi_vcnt == 1); |
8124 | ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode)); | 8148 | ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode)); |
8125 | 8149 | ||
8150 | io_tree = &BTRFS_I(inode)->io_tree; | ||
8151 | failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
8152 | |||
8126 | bio_for_each_segment_all(bvec, bio, i) { | 8153 | bio_for_each_segment_all(bvec, bio, i) { |
8127 | ret = __readpage_endio_check(done->inode, io_bio, i, | 8154 | ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, |
8128 | bvec->bv_page, bvec->bv_offset, | 8155 | bvec->bv_offset, done->start, |
8129 | done->start, bvec->bv_len); | 8156 | bvec->bv_len); |
8130 | if (!ret) | 8157 | if (!ret) |
8131 | clean_io_failure(BTRFS_I(done->inode), done->start, | 8158 | clean_io_failure(BTRFS_I(inode)->root->fs_info, |
8132 | bvec->bv_page, bvec->bv_offset); | 8159 | failure_tree, io_tree, done->start, |
8160 | bvec->bv_page, | ||
8161 | btrfs_ino(BTRFS_I(inode)), | ||
8162 | bvec->bv_offset); | ||
8133 | else | 8163 | else |
8134 | uptodate = 0; | 8164 | uptodate = 0; |
8135 | } | 8165 | } |
@@ -8144,7 +8174,8 @@ static blk_status_t __btrfs_subio_endio_read(struct inode *inode, | |||
8144 | struct btrfs_io_bio *io_bio, blk_status_t err) | 8174 | struct btrfs_io_bio *io_bio, blk_status_t err) |
8145 | { | 8175 | { |
8146 | struct btrfs_fs_info *fs_info; | 8176 | struct btrfs_fs_info *fs_info; |
8147 | struct bio_vec *bvec; | 8177 | struct bio_vec bvec; |
8178 | struct bvec_iter iter; | ||
8148 | struct btrfs_retry_complete done; | 8179 | struct btrfs_retry_complete done; |
8149 | u64 start; | 8180 | u64 start; |
8150 | u64 offset = 0; | 8181 | u64 offset = 0; |
@@ -8152,7 +8183,7 @@ static blk_status_t __btrfs_subio_endio_read(struct inode *inode, | |||
8152 | int nr_sectors; | 8183 | int nr_sectors; |
8153 | unsigned int pgoff; | 8184 | unsigned int pgoff; |
8154 | int csum_pos; | 8185 | int csum_pos; |
8155 | int i; | 8186 | bool uptodate = (err == 0); |
8156 | int ret; | 8187 | int ret; |
8157 | 8188 | ||
8158 | fs_info = BTRFS_I(inode)->root->fs_info; | 8189 | fs_info = BTRFS_I(inode)->root->fs_info; |
@@ -8161,24 +8192,26 @@ static blk_status_t __btrfs_subio_endio_read(struct inode *inode, | |||
8161 | err = 0; | 8192 | err = 0; |
8162 | start = io_bio->logical; | 8193 | start = io_bio->logical; |
8163 | done.inode = inode; | 8194 | done.inode = inode; |
8195 | io_bio->bio.bi_iter = io_bio->iter; | ||
8164 | 8196 | ||
8165 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { | 8197 | bio_for_each_segment(bvec, &io_bio->bio, iter) { |
8166 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); | 8198 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); |
8167 | 8199 | ||
8168 | pgoff = bvec->bv_offset; | 8200 | pgoff = bvec.bv_offset; |
8169 | next_block: | 8201 | next_block: |
8170 | csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset); | 8202 | if (uptodate) { |
8171 | ret = __readpage_endio_check(inode, io_bio, csum_pos, | 8203 | csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset); |
8172 | bvec->bv_page, pgoff, start, | 8204 | ret = __readpage_endio_check(inode, io_bio, csum_pos, |
8173 | sectorsize); | 8205 | bvec.bv_page, pgoff, start, sectorsize); |
8174 | if (likely(!ret)) | 8206 | if (likely(!ret)) |
8175 | goto next; | 8207 | goto next; |
8208 | } | ||
8176 | try_again: | 8209 | try_again: |
8177 | done.uptodate = 0; | 8210 | done.uptodate = 0; |
8178 | done.start = start; | 8211 | done.start = start; |
8179 | init_completion(&done.done); | 8212 | init_completion(&done.done); |
8180 | 8213 | ||
8181 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, | 8214 | ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page, |
8182 | pgoff, start, start + sectorsize - 1, | 8215 | pgoff, start, start + sectorsize - 1, |
8183 | io_bio->mirror_num, | 8216 | io_bio->mirror_num, |
8184 | btrfs_retry_endio, &done); | 8217 | btrfs_retry_endio, &done); |
@@ -8233,8 +8266,11 @@ static void btrfs_endio_direct_read(struct bio *bio) | |||
8233 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | 8266 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
8234 | blk_status_t err = bio->bi_status; | 8267 | blk_status_t err = bio->bi_status; |
8235 | 8268 | ||
8236 | if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) | 8269 | if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) { |
8237 | err = btrfs_subio_endio_read(inode, io_bio, err); | 8270 | err = btrfs_subio_endio_read(inode, io_bio, err); |
8271 | if (!err) | ||
8272 | bio->bi_status = 0; | ||
8273 | } | ||
8238 | 8274 | ||
8239 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | 8275 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, |
8240 | dip->logical_offset + dip->bytes - 1); | 8276 | dip->logical_offset + dip->bytes - 1); |
@@ -8307,10 +8343,11 @@ static void btrfs_endio_direct_write(struct bio *bio) | |||
8307 | bio_put(bio); | 8343 | bio_put(bio); |
8308 | } | 8344 | } |
8309 | 8345 | ||
8310 | static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode, | 8346 | static blk_status_t __btrfs_submit_bio_start_direct_io(void *private_data, |
8311 | struct bio *bio, int mirror_num, | 8347 | struct bio *bio, int mirror_num, |
8312 | unsigned long bio_flags, u64 offset) | 8348 | unsigned long bio_flags, u64 offset) |
8313 | { | 8349 | { |
8350 | struct inode *inode = private_data; | ||
8314 | blk_status_t ret; | 8351 | blk_status_t ret; |
8315 | ret = btrfs_csum_one_bio(inode, bio, offset, 1); | 8352 | ret = btrfs_csum_one_bio(inode, bio, offset, 1); |
8316 | BUG_ON(ret); /* -ENOMEM */ | 8353 | BUG_ON(ret); /* -ENOMEM */ |
@@ -8357,16 +8394,6 @@ out: | |||
8357 | bio_put(bio); | 8394 | bio_put(bio); |
8358 | } | 8395 | } |
8359 | 8396 | ||
8360 | static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | ||
8361 | u64 first_sector, gfp_t gfp_flags) | ||
8362 | { | ||
8363 | struct bio *bio; | ||
8364 | bio = btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags); | ||
8365 | if (bio) | ||
8366 | bio_associate_current(bio); | ||
8367 | return bio; | ||
8368 | } | ||
8369 | |||
8370 | static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, | 8397 | static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, |
8371 | struct btrfs_dio_private *dip, | 8398 | struct btrfs_dio_private *dip, |
8372 | struct bio *bio, | 8399 | struct bio *bio, |
@@ -8422,8 +8449,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
8422 | goto map; | 8449 | goto map; |
8423 | 8450 | ||
8424 | if (write && async_submit) { | 8451 | if (write && async_submit) { |
8425 | ret = btrfs_wq_submit_bio(fs_info, inode, bio, 0, 0, | 8452 | ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0, |
8426 | file_offset, | 8453 | file_offset, inode, |
8427 | __btrfs_submit_bio_start_direct_io, | 8454 | __btrfs_submit_bio_start_direct_io, |
8428 | __btrfs_submit_bio_done); | 8455 | __btrfs_submit_bio_done); |
8429 | goto err; | 8456 | goto err; |
@@ -8453,103 +8480,83 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, | |||
8453 | { | 8480 | { |
8454 | struct inode *inode = dip->inode; | 8481 | struct inode *inode = dip->inode; |
8455 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 8482 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
8456 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
8457 | struct bio *bio; | 8483 | struct bio *bio; |
8458 | struct bio *orig_bio = dip->orig_bio; | 8484 | struct bio *orig_bio = dip->orig_bio; |
8459 | struct bio_vec *bvec; | ||
8460 | u64 start_sector = orig_bio->bi_iter.bi_sector; | 8485 | u64 start_sector = orig_bio->bi_iter.bi_sector; |
8461 | u64 file_offset = dip->logical_offset; | 8486 | u64 file_offset = dip->logical_offset; |
8462 | u64 submit_len = 0; | ||
8463 | u64 map_length; | 8487 | u64 map_length; |
8464 | u32 blocksize = fs_info->sectorsize; | ||
8465 | int async_submit = 0; | 8488 | int async_submit = 0; |
8466 | int nr_sectors; | 8489 | u64 submit_len; |
8490 | int clone_offset = 0; | ||
8491 | int clone_len; | ||
8467 | int ret; | 8492 | int ret; |
8468 | int i, j; | ||
8469 | 8493 | ||
8470 | map_length = orig_bio->bi_iter.bi_size; | 8494 | map_length = orig_bio->bi_iter.bi_size; |
8495 | submit_len = map_length; | ||
8471 | ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9, | 8496 | ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9, |
8472 | &map_length, NULL, 0); | 8497 | &map_length, NULL, 0); |
8473 | if (ret) | 8498 | if (ret) |
8474 | return -EIO; | 8499 | return -EIO; |
8475 | 8500 | ||
8476 | if (map_length >= orig_bio->bi_iter.bi_size) { | 8501 | if (map_length >= submit_len) { |
8477 | bio = orig_bio; | 8502 | bio = orig_bio; |
8478 | dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED; | 8503 | dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED; |
8479 | goto submit; | 8504 | goto submit; |
8480 | } | 8505 | } |
8481 | 8506 | ||
8482 | /* async crcs make it difficult to collect full stripe writes. */ | 8507 | /* async crcs make it difficult to collect full stripe writes. */ |
8483 | if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK) | 8508 | if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK) |
8484 | async_submit = 0; | 8509 | async_submit = 0; |
8485 | else | 8510 | else |
8486 | async_submit = 1; | 8511 | async_submit = 1; |
8487 | 8512 | ||
8488 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | 8513 | /* bio split */ |
8489 | if (!bio) | 8514 | ASSERT(map_length <= INT_MAX); |
8490 | return -ENOMEM; | ||
8491 | |||
8492 | bio->bi_opf = orig_bio->bi_opf; | ||
8493 | bio->bi_private = dip; | ||
8494 | bio->bi_end_io = btrfs_end_dio_bio; | ||
8495 | btrfs_io_bio(bio)->logical = file_offset; | ||
8496 | atomic_inc(&dip->pending_bios); | 8515 | atomic_inc(&dip->pending_bios); |
8516 | do { | ||
8517 | clone_len = min_t(int, submit_len, map_length); | ||
8497 | 8518 | ||
8498 | bio_for_each_segment_all(bvec, orig_bio, j) { | 8519 | /* |
8499 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); | 8520 | * This will never fail as it's passing GPF_NOFS and |
8500 | i = 0; | 8521 | * the allocation is backed by btrfs_bioset. |
8501 | next_block: | 8522 | */ |
8502 | if (unlikely(map_length < submit_len + blocksize || | 8523 | bio = btrfs_bio_clone_partial(orig_bio, clone_offset, |
8503 | bio_add_page(bio, bvec->bv_page, blocksize, | 8524 | clone_len); |
8504 | bvec->bv_offset + (i * blocksize)) < blocksize)) { | 8525 | bio->bi_private = dip; |
8505 | /* | 8526 | bio->bi_end_io = btrfs_end_dio_bio; |
8506 | * inc the count before we submit the bio so | 8527 | btrfs_io_bio(bio)->logical = file_offset; |
8507 | * we know the end IO handler won't happen before | 8528 | |
8508 | * we inc the count. Otherwise, the dip might get freed | 8529 | ASSERT(submit_len >= clone_len); |
8509 | * before we're done setting it up | 8530 | submit_len -= clone_len; |
8510 | */ | 8531 | if (submit_len == 0) |
8511 | atomic_inc(&dip->pending_bios); | 8532 | break; |
8512 | ret = __btrfs_submit_dio_bio(bio, inode, | ||
8513 | file_offset, skip_sum, | ||
8514 | async_submit); | ||
8515 | if (ret) { | ||
8516 | bio_put(bio); | ||
8517 | atomic_dec(&dip->pending_bios); | ||
8518 | goto out_err; | ||
8519 | } | ||
8520 | |||
8521 | start_sector += submit_len >> 9; | ||
8522 | file_offset += submit_len; | ||
8523 | 8533 | ||
8524 | submit_len = 0; | 8534 | /* |
8535 | * Increase the count before we submit the bio so we know | ||
8536 | * the end IO handler won't happen before we increase the | ||
8537 | * count. Otherwise, the dip might get freed before we're | ||
8538 | * done setting it up. | ||
8539 | */ | ||
8540 | atomic_inc(&dip->pending_bios); | ||
8525 | 8541 | ||
8526 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, | 8542 | ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, |
8527 | start_sector, GFP_NOFS); | 8543 | async_submit); |
8528 | if (!bio) | 8544 | if (ret) { |
8529 | goto out_err; | 8545 | bio_put(bio); |
8530 | bio->bi_opf = orig_bio->bi_opf; | 8546 | atomic_dec(&dip->pending_bios); |
8531 | bio->bi_private = dip; | 8547 | goto out_err; |
8532 | bio->bi_end_io = btrfs_end_dio_bio; | 8548 | } |
8533 | btrfs_io_bio(bio)->logical = file_offset; | ||
8534 | 8549 | ||
8535 | map_length = orig_bio->bi_iter.bi_size; | 8550 | clone_offset += clone_len; |
8536 | ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), | 8551 | start_sector += clone_len >> 9; |
8537 | start_sector << 9, | 8552 | file_offset += clone_len; |
8538 | &map_length, NULL, 0); | ||
8539 | if (ret) { | ||
8540 | bio_put(bio); | ||
8541 | goto out_err; | ||
8542 | } | ||
8543 | 8553 | ||
8544 | goto next_block; | 8554 | map_length = submit_len; |
8545 | } else { | 8555 | ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), |
8546 | submit_len += blocksize; | 8556 | start_sector << 9, &map_length, NULL, 0); |
8547 | if (--nr_sectors) { | 8557 | if (ret) |
8548 | i++; | 8558 | goto out_err; |
8549 | goto next_block; | 8559 | } while (submit_len > 0); |
8550 | } | ||
8551 | } | ||
8552 | } | ||
8553 | 8560 | ||
8554 | submit: | 8561 | submit: |
8555 | ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, | 8562 | ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, |
@@ -8576,19 +8583,15 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, | |||
8576 | loff_t file_offset) | 8583 | loff_t file_offset) |
8577 | { | 8584 | { |
8578 | struct btrfs_dio_private *dip = NULL; | 8585 | struct btrfs_dio_private *dip = NULL; |
8579 | struct bio *io_bio = NULL; | 8586 | struct bio *bio = NULL; |
8580 | struct btrfs_io_bio *btrfs_bio; | 8587 | struct btrfs_io_bio *io_bio; |
8581 | int skip_sum; | 8588 | int skip_sum; |
8582 | bool write = (bio_op(dio_bio) == REQ_OP_WRITE); | 8589 | bool write = (bio_op(dio_bio) == REQ_OP_WRITE); |
8583 | int ret = 0; | 8590 | int ret = 0; |
8584 | 8591 | ||
8585 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 8592 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
8586 | 8593 | ||
8587 | io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); | 8594 | bio = btrfs_bio_clone(dio_bio); |
8588 | if (!io_bio) { | ||
8589 | ret = -ENOMEM; | ||
8590 | goto free_ordered; | ||
8591 | } | ||
8592 | 8595 | ||
8593 | dip = kzalloc(sizeof(*dip), GFP_NOFS); | 8596 | dip = kzalloc(sizeof(*dip), GFP_NOFS); |
8594 | if (!dip) { | 8597 | if (!dip) { |
@@ -8601,17 +8604,17 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, | |||
8601 | dip->logical_offset = file_offset; | 8604 | dip->logical_offset = file_offset; |
8602 | dip->bytes = dio_bio->bi_iter.bi_size; | 8605 | dip->bytes = dio_bio->bi_iter.bi_size; |
8603 | dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; | 8606 | dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; |
8604 | io_bio->bi_private = dip; | 8607 | bio->bi_private = dip; |
8605 | dip->orig_bio = io_bio; | 8608 | dip->orig_bio = bio; |
8606 | dip->dio_bio = dio_bio; | 8609 | dip->dio_bio = dio_bio; |
8607 | atomic_set(&dip->pending_bios, 0); | 8610 | atomic_set(&dip->pending_bios, 0); |
8608 | btrfs_bio = btrfs_io_bio(io_bio); | 8611 | io_bio = btrfs_io_bio(bio); |
8609 | btrfs_bio->logical = file_offset; | 8612 | io_bio->logical = file_offset; |
8610 | 8613 | ||
8611 | if (write) { | 8614 | if (write) { |
8612 | io_bio->bi_end_io = btrfs_endio_direct_write; | 8615 | bio->bi_end_io = btrfs_endio_direct_write; |
8613 | } else { | 8616 | } else { |
8614 | io_bio->bi_end_io = btrfs_endio_direct_read; | 8617 | bio->bi_end_io = btrfs_endio_direct_read; |
8615 | dip->subio_endio = btrfs_subio_endio_read; | 8618 | dip->subio_endio = btrfs_subio_endio_read; |
8616 | } | 8619 | } |
8617 | 8620 | ||
@@ -8634,8 +8637,8 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, | |||
8634 | if (!ret) | 8637 | if (!ret) |
8635 | return; | 8638 | return; |
8636 | 8639 | ||
8637 | if (btrfs_bio->end_io) | 8640 | if (io_bio->end_io) |
8638 | btrfs_bio->end_io(btrfs_bio, ret); | 8641 | io_bio->end_io(io_bio, ret); |
8639 | 8642 | ||
8640 | free_ordered: | 8643 | free_ordered: |
8641 | /* | 8644 | /* |
@@ -8647,16 +8650,15 @@ free_ordered: | |||
8647 | * same as btrfs_endio_direct_[write|read] because we can't call these | 8650 | * same as btrfs_endio_direct_[write|read] because we can't call these |
8648 | * callbacks - they require an allocated dip and a clone of dio_bio. | 8651 | * callbacks - they require an allocated dip and a clone of dio_bio. |
8649 | */ | 8652 | */ |
8650 | if (io_bio && dip) { | 8653 | if (bio && dip) { |
8651 | io_bio->bi_status = BLK_STS_IOERR; | 8654 | bio_io_error(bio); |
8652 | bio_endio(io_bio); | ||
8653 | /* | 8655 | /* |
8654 | * The end io callbacks free our dip, do the final put on io_bio | 8656 | * The end io callbacks free our dip, do the final put on bio |
8655 | * and all the cleanup and final put for dio_bio (through | 8657 | * and all the cleanup and final put for dio_bio (through |
8656 | * dio_end_io()). | 8658 | * dio_end_io()). |
8657 | */ | 8659 | */ |
8658 | dip = NULL; | 8660 | dip = NULL; |
8659 | io_bio = NULL; | 8661 | bio = NULL; |
8660 | } else { | 8662 | } else { |
8661 | if (write) | 8663 | if (write) |
8662 | __endio_write_update_ordered(inode, | 8664 | __endio_write_update_ordered(inode, |
@@ -8674,8 +8676,8 @@ free_ordered: | |||
8674 | */ | 8676 | */ |
8675 | dio_end_io(dio_bio); | 8677 | dio_end_io(dio_bio); |
8676 | } | 8678 | } |
8677 | if (io_bio) | 8679 | if (bio) |
8678 | bio_put(io_bio); | 8680 | bio_put(bio); |
8679 | kfree(dip); | 8681 | kfree(dip); |
8680 | } | 8682 | } |
8681 | 8683 | ||
@@ -8719,6 +8721,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8719 | struct inode *inode = file->f_mapping->host; | 8721 | struct inode *inode = file->f_mapping->host; |
8720 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 8722 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
8721 | struct btrfs_dio_data dio_data = { 0 }; | 8723 | struct btrfs_dio_data dio_data = { 0 }; |
8724 | struct extent_changeset *data_reserved = NULL; | ||
8722 | loff_t offset = iocb->ki_pos; | 8725 | loff_t offset = iocb->ki_pos; |
8723 | size_t count = 0; | 8726 | size_t count = 0; |
8724 | int flags = 0; | 8727 | int flags = 0; |
@@ -8758,7 +8761,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8758 | ret = -EAGAIN; | 8761 | ret = -EAGAIN; |
8759 | goto out; | 8762 | goto out; |
8760 | } | 8763 | } |
8761 | ret = btrfs_delalloc_reserve_space(inode, offset, count); | 8764 | ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
8765 | offset, count); | ||
8762 | if (ret) | 8766 | if (ret) |
8763 | goto out; | 8767 | goto out; |
8764 | dio_data.outstanding_extents = count_max_extents(count); | 8768 | dio_data.outstanding_extents = count_max_extents(count); |
@@ -8790,8 +8794,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8790 | current->journal_info = NULL; | 8794 | current->journal_info = NULL; |
8791 | if (ret < 0 && ret != -EIOCBQUEUED) { | 8795 | if (ret < 0 && ret != -EIOCBQUEUED) { |
8792 | if (dio_data.reserve) | 8796 | if (dio_data.reserve) |
8793 | btrfs_delalloc_release_space(inode, offset, | 8797 | btrfs_delalloc_release_space(inode, data_reserved, |
8794 | dio_data.reserve); | 8798 | offset, dio_data.reserve); |
8795 | /* | 8799 | /* |
8796 | * On error we might have left some ordered extents | 8800 | * On error we might have left some ordered extents |
8797 | * without submitting corresponding bios for them, so | 8801 | * without submitting corresponding bios for them, so |
@@ -8806,8 +8810,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8806 | dio_data.unsubmitted_oe_range_start, | 8810 | dio_data.unsubmitted_oe_range_start, |
8807 | false); | 8811 | false); |
8808 | } else if (ret >= 0 && (size_t)ret < count) | 8812 | } else if (ret >= 0 && (size_t)ret < count) |
8809 | btrfs_delalloc_release_space(inode, offset, | 8813 | btrfs_delalloc_release_space(inode, data_reserved, |
8810 | count - (size_t)ret); | 8814 | offset, count - (size_t)ret); |
8811 | } | 8815 | } |
8812 | out: | 8816 | out: |
8813 | if (wakeup) | 8817 | if (wakeup) |
@@ -8815,6 +8819,7 @@ out: | |||
8815 | if (relock) | 8819 | if (relock) |
8816 | inode_lock(inode); | 8820 | inode_lock(inode); |
8817 | 8821 | ||
8822 | extent_changeset_free(data_reserved); | ||
8818 | return ret; | 8823 | return ret; |
8819 | } | 8824 | } |
8820 | 8825 | ||
@@ -9005,7 +9010,7 @@ again: | |||
9005 | * free the entire extent. | 9010 | * free the entire extent. |
9006 | */ | 9011 | */ |
9007 | if (PageDirty(page)) | 9012 | if (PageDirty(page)) |
9008 | btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE); | 9013 | btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE); |
9009 | if (!inode_evicting) { | 9014 | if (!inode_evicting) { |
9010 | clear_extent_bit(tree, page_start, page_end, | 9015 | clear_extent_bit(tree, page_start, page_end, |
9011 | EXTENT_LOCKED | EXTENT_DIRTY | | 9016 | EXTENT_LOCKED | EXTENT_DIRTY | |
@@ -9047,6 +9052,7 @@ int btrfs_page_mkwrite(struct vm_fault *vmf) | |||
9047 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 9052 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
9048 | struct btrfs_ordered_extent *ordered; | 9053 | struct btrfs_ordered_extent *ordered; |
9049 | struct extent_state *cached_state = NULL; | 9054 | struct extent_state *cached_state = NULL; |
9055 | struct extent_changeset *data_reserved = NULL; | ||
9050 | char *kaddr; | 9056 | char *kaddr; |
9051 | unsigned long zero_start; | 9057 | unsigned long zero_start; |
9052 | loff_t size; | 9058 | loff_t size; |
@@ -9072,7 +9078,7 @@ int btrfs_page_mkwrite(struct vm_fault *vmf) | |||
9072 | * end up waiting indefinitely to get a lock on the page currently | 9078 | * end up waiting indefinitely to get a lock on the page currently |
9073 | * being processed by btrfs_page_mkwrite() function. | 9079 | * being processed by btrfs_page_mkwrite() function. |
9074 | */ | 9080 | */ |
9075 | ret = btrfs_delalloc_reserve_space(inode, page_start, | 9081 | ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, |
9076 | reserved_space); | 9082 | reserved_space); |
9077 | if (!ret) { | 9083 | if (!ret) { |
9078 | ret = file_update_time(vmf->vma->vm_file); | 9084 | ret = file_update_time(vmf->vma->vm_file); |
@@ -9126,8 +9132,8 @@ again: | |||
9126 | spin_lock(&BTRFS_I(inode)->lock); | 9132 | spin_lock(&BTRFS_I(inode)->lock); |
9127 | BTRFS_I(inode)->outstanding_extents++; | 9133 | BTRFS_I(inode)->outstanding_extents++; |
9128 | spin_unlock(&BTRFS_I(inode)->lock); | 9134 | spin_unlock(&BTRFS_I(inode)->lock); |
9129 | btrfs_delalloc_release_space(inode, page_start, | 9135 | btrfs_delalloc_release_space(inode, data_reserved, |
9130 | PAGE_SIZE - reserved_space); | 9136 | page_start, PAGE_SIZE - reserved_space); |
9131 | } | 9137 | } |
9132 | } | 9138 | } |
9133 | 9139 | ||
@@ -9178,13 +9184,16 @@ again: | |||
9178 | out_unlock: | 9184 | out_unlock: |
9179 | if (!ret) { | 9185 | if (!ret) { |
9180 | sb_end_pagefault(inode->i_sb); | 9186 | sb_end_pagefault(inode->i_sb); |
9187 | extent_changeset_free(data_reserved); | ||
9181 | return VM_FAULT_LOCKED; | 9188 | return VM_FAULT_LOCKED; |
9182 | } | 9189 | } |
9183 | unlock_page(page); | 9190 | unlock_page(page); |
9184 | out: | 9191 | out: |
9185 | btrfs_delalloc_release_space(inode, page_start, reserved_space); | 9192 | btrfs_delalloc_release_space(inode, data_reserved, page_start, |
9193 | reserved_space); | ||
9186 | out_noreserve: | 9194 | out_noreserve: |
9187 | sb_end_pagefault(inode->i_sb); | 9195 | sb_end_pagefault(inode->i_sb); |
9196 | extent_changeset_free(data_reserved); | ||
9188 | return ret; | 9197 | return ret; |
9189 | } | 9198 | } |
9190 | 9199 | ||
@@ -9406,8 +9415,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
9406 | 9415 | ||
9407 | inode = &ei->vfs_inode; | 9416 | inode = &ei->vfs_inode; |
9408 | extent_map_tree_init(&ei->extent_tree); | 9417 | extent_map_tree_init(&ei->extent_tree); |
9409 | extent_io_tree_init(&ei->io_tree, &inode->i_data); | 9418 | extent_io_tree_init(&ei->io_tree, inode); |
9410 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); | 9419 | extent_io_tree_init(&ei->io_failure_tree, inode); |
9411 | ei->io_tree.track_uptodate = 1; | 9420 | ei->io_tree.track_uptodate = 1; |
9412 | ei->io_failure_tree.track_uptodate = 1; | 9421 | ei->io_failure_tree.track_uptodate = 1; |
9413 | atomic_set(&ei->sync_writers, 0); | 9422 | atomic_set(&ei->sync_writers, 0); |
@@ -9516,7 +9525,6 @@ void btrfs_destroy_cachep(void) | |||
9516 | rcu_barrier(); | 9525 | rcu_barrier(); |
9517 | kmem_cache_destroy(btrfs_inode_cachep); | 9526 | kmem_cache_destroy(btrfs_inode_cachep); |
9518 | kmem_cache_destroy(btrfs_trans_handle_cachep); | 9527 | kmem_cache_destroy(btrfs_trans_handle_cachep); |
9519 | kmem_cache_destroy(btrfs_transaction_cachep); | ||
9520 | kmem_cache_destroy(btrfs_path_cachep); | 9528 | kmem_cache_destroy(btrfs_path_cachep); |
9521 | kmem_cache_destroy(btrfs_free_space_cachep); | 9529 | kmem_cache_destroy(btrfs_free_space_cachep); |
9522 | } | 9530 | } |
@@ -9536,12 +9544,6 @@ int btrfs_init_cachep(void) | |||
9536 | if (!btrfs_trans_handle_cachep) | 9544 | if (!btrfs_trans_handle_cachep) |
9537 | goto fail; | 9545 | goto fail; |
9538 | 9546 | ||
9539 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction", | ||
9540 | sizeof(struct btrfs_transaction), 0, | ||
9541 | SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL); | ||
9542 | if (!btrfs_transaction_cachep) | ||
9543 | goto fail; | ||
9544 | |||
9545 | btrfs_path_cachep = kmem_cache_create("btrfs_path", | 9547 | btrfs_path_cachep = kmem_cache_create("btrfs_path", |
9546 | sizeof(struct btrfs_path), 0, | 9548 | sizeof(struct btrfs_path), 0, |
9547 | SLAB_MEM_SPREAD, NULL); | 9549 | SLAB_MEM_SPREAD, NULL); |
@@ -9566,6 +9568,24 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat, | |||
9566 | u64 delalloc_bytes; | 9568 | u64 delalloc_bytes; |
9567 | struct inode *inode = d_inode(path->dentry); | 9569 | struct inode *inode = d_inode(path->dentry); |
9568 | u32 blocksize = inode->i_sb->s_blocksize; | 9570 | u32 blocksize = inode->i_sb->s_blocksize; |
9571 | u32 bi_flags = BTRFS_I(inode)->flags; | ||
9572 | |||
9573 | stat->result_mask |= STATX_BTIME; | ||
9574 | stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec; | ||
9575 | stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec; | ||
9576 | if (bi_flags & BTRFS_INODE_APPEND) | ||
9577 | stat->attributes |= STATX_ATTR_APPEND; | ||
9578 | if (bi_flags & BTRFS_INODE_COMPRESS) | ||
9579 | stat->attributes |= STATX_ATTR_COMPRESSED; | ||
9580 | if (bi_flags & BTRFS_INODE_IMMUTABLE) | ||
9581 | stat->attributes |= STATX_ATTR_IMMUTABLE; | ||
9582 | if (bi_flags & BTRFS_INODE_NODUMP) | ||
9583 | stat->attributes |= STATX_ATTR_NODUMP; | ||
9584 | |||
9585 | stat->attributes_mask |= (STATX_ATTR_APPEND | | ||
9586 | STATX_ATTR_COMPRESSED | | ||
9587 | STATX_ATTR_IMMUTABLE | | ||
9588 | STATX_ATTR_NODUMP); | ||
9569 | 9589 | ||
9570 | generic_fillattr(inode, stat); | 9590 | generic_fillattr(inode, stat); |
9571 | stat->dev = BTRFS_I(inode)->root->anon_dev; | 9591 | stat->dev = BTRFS_I(inode)->root->anon_dev; |
@@ -10540,7 +10560,7 @@ next: | |||
10540 | btrfs_end_transaction(trans); | 10560 | btrfs_end_transaction(trans); |
10541 | } | 10561 | } |
10542 | if (cur_offset < end) | 10562 | if (cur_offset < end) |
10543 | btrfs_free_reserved_data_space(inode, cur_offset, | 10563 | btrfs_free_reserved_data_space(inode, NULL, cur_offset, |
10544 | end - cur_offset + 1); | 10564 | end - cur_offset + 1); |
10545 | return ret; | 10565 | return ret; |
10546 | } | 10566 | } |
@@ -10661,6 +10681,42 @@ static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror) | |||
10661 | return -EAGAIN; | 10681 | return -EAGAIN; |
10662 | } | 10682 | } |
10663 | 10683 | ||
10684 | static struct btrfs_fs_info *iotree_fs_info(void *private_data) | ||
10685 | { | ||
10686 | struct inode *inode = private_data; | ||
10687 | return btrfs_sb(inode->i_sb); | ||
10688 | } | ||
10689 | |||
10690 | static void btrfs_check_extent_io_range(void *private_data, const char *caller, | ||
10691 | u64 start, u64 end) | ||
10692 | { | ||
10693 | struct inode *inode = private_data; | ||
10694 | u64 isize; | ||
10695 | |||
10696 | isize = i_size_read(inode); | ||
10697 | if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { | ||
10698 | btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, | ||
10699 | "%s: ino %llu isize %llu odd range [%llu,%llu]", | ||
10700 | caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); | ||
10701 | } | ||
10702 | } | ||
10703 | |||
10704 | void btrfs_set_range_writeback(void *private_data, u64 start, u64 end) | ||
10705 | { | ||
10706 | struct inode *inode = private_data; | ||
10707 | unsigned long index = start >> PAGE_SHIFT; | ||
10708 | unsigned long end_index = end >> PAGE_SHIFT; | ||
10709 | struct page *page; | ||
10710 | |||
10711 | while (index <= end_index) { | ||
10712 | page = find_get_page(inode->i_mapping, index); | ||
10713 | ASSERT(page); /* Pages should be in the extent_io_tree */ | ||
10714 | set_page_writeback(page); | ||
10715 | put_page(page); | ||
10716 | index++; | ||
10717 | } | ||
10718 | } | ||
10719 | |||
10664 | static const struct inode_operations btrfs_dir_inode_operations = { | 10720 | static const struct inode_operations btrfs_dir_inode_operations = { |
10665 | .getattr = btrfs_getattr, | 10721 | .getattr = btrfs_getattr, |
10666 | .lookup = btrfs_lookup, | 10722 | .lookup = btrfs_lookup, |
@@ -10704,6 +10760,8 @@ static const struct extent_io_ops btrfs_extent_io_ops = { | |||
10704 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, | 10760 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, |
10705 | .merge_bio_hook = btrfs_merge_bio_hook, | 10761 | .merge_bio_hook = btrfs_merge_bio_hook, |
10706 | .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, | 10762 | .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, |
10763 | .tree_fs_info = iotree_fs_info, | ||
10764 | .set_range_writeback = btrfs_set_range_writeback, | ||
10707 | 10765 | ||
10708 | /* optional callbacks */ | 10766 | /* optional callbacks */ |
10709 | .fill_delalloc = run_delalloc_range, | 10767 | .fill_delalloc = run_delalloc_range, |
@@ -10713,6 +10771,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = { | |||
10713 | .clear_bit_hook = btrfs_clear_bit_hook, | 10771 | .clear_bit_hook = btrfs_clear_bit_hook, |
10714 | .merge_extent_hook = btrfs_merge_extent_hook, | 10772 | .merge_extent_hook = btrfs_merge_extent_hook, |
10715 | .split_extent_hook = btrfs_split_extent_hook, | 10773 | .split_extent_hook = btrfs_split_extent_hook, |
10774 | .check_extent_io_range = btrfs_check_extent_io_range, | ||
10716 | }; | 10775 | }; |
10717 | 10776 | ||
10718 | /* | 10777 | /* |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e176375f374f..fa1b78cf25f6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -37,7 +37,7 @@ | |||
37 | #include <linux/bit_spinlock.h> | 37 | #include <linux/bit_spinlock.h> |
38 | #include <linux/security.h> | 38 | #include <linux/security.h> |
39 | #include <linux/xattr.h> | 39 | #include <linux/xattr.h> |
40 | #include <linux/vmalloc.h> | 40 | #include <linux/mm.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/blkdev.h> | 42 | #include <linux/blkdev.h> |
43 | #include <linux/uuid.h> | 43 | #include <linux/uuid.h> |
@@ -689,7 +689,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
689 | if (ret) | 689 | if (ret) |
690 | goto dec_and_free; | 690 | goto dec_and_free; |
691 | 691 | ||
692 | btrfs_wait_ordered_extents(root, -1, 0, (u64)-1); | 692 | btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); |
693 | 693 | ||
694 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, | 694 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
695 | BTRFS_BLOCK_RSV_TEMP); | 695 | BTRFS_BLOCK_RSV_TEMP); |
@@ -1127,6 +1127,7 @@ static int cluster_pages_for_defrag(struct inode *inode, | |||
1127 | struct btrfs_ordered_extent *ordered; | 1127 | struct btrfs_ordered_extent *ordered; |
1128 | struct extent_state *cached_state = NULL; | 1128 | struct extent_state *cached_state = NULL; |
1129 | struct extent_io_tree *tree; | 1129 | struct extent_io_tree *tree; |
1130 | struct extent_changeset *data_reserved = NULL; | ||
1130 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | 1131 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); |
1131 | 1132 | ||
1132 | file_end = (isize - 1) >> PAGE_SHIFT; | 1133 | file_end = (isize - 1) >> PAGE_SHIFT; |
@@ -1135,7 +1136,7 @@ static int cluster_pages_for_defrag(struct inode *inode, | |||
1135 | 1136 | ||
1136 | page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); | 1137 | page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); |
1137 | 1138 | ||
1138 | ret = btrfs_delalloc_reserve_space(inode, | 1139 | ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
1139 | start_index << PAGE_SHIFT, | 1140 | start_index << PAGE_SHIFT, |
1140 | page_cnt << PAGE_SHIFT); | 1141 | page_cnt << PAGE_SHIFT); |
1141 | if (ret) | 1142 | if (ret) |
@@ -1226,7 +1227,7 @@ again: | |||
1226 | spin_lock(&BTRFS_I(inode)->lock); | 1227 | spin_lock(&BTRFS_I(inode)->lock); |
1227 | BTRFS_I(inode)->outstanding_extents++; | 1228 | BTRFS_I(inode)->outstanding_extents++; |
1228 | spin_unlock(&BTRFS_I(inode)->lock); | 1229 | spin_unlock(&BTRFS_I(inode)->lock); |
1229 | btrfs_delalloc_release_space(inode, | 1230 | btrfs_delalloc_release_space(inode, data_reserved, |
1230 | start_index << PAGE_SHIFT, | 1231 | start_index << PAGE_SHIFT, |
1231 | (page_cnt - i_done) << PAGE_SHIFT); | 1232 | (page_cnt - i_done) << PAGE_SHIFT); |
1232 | } | 1233 | } |
@@ -1247,15 +1248,17 @@ again: | |||
1247 | unlock_page(pages[i]); | 1248 | unlock_page(pages[i]); |
1248 | put_page(pages[i]); | 1249 | put_page(pages[i]); |
1249 | } | 1250 | } |
1251 | extent_changeset_free(data_reserved); | ||
1250 | return i_done; | 1252 | return i_done; |
1251 | out: | 1253 | out: |
1252 | for (i = 0; i < i_done; i++) { | 1254 | for (i = 0; i < i_done; i++) { |
1253 | unlock_page(pages[i]); | 1255 | unlock_page(pages[i]); |
1254 | put_page(pages[i]); | 1256 | put_page(pages[i]); |
1255 | } | 1257 | } |
1256 | btrfs_delalloc_release_space(inode, | 1258 | btrfs_delalloc_release_space(inode, data_reserved, |
1257 | start_index << PAGE_SHIFT, | 1259 | start_index << PAGE_SHIFT, |
1258 | page_cnt << PAGE_SHIFT); | 1260 | page_cnt << PAGE_SHIFT); |
1261 | extent_changeset_free(data_reserved); | ||
1259 | return ret; | 1262 | return ret; |
1260 | 1263 | ||
1261 | } | 1264 | } |
@@ -4588,7 +4591,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, | |||
4588 | 4591 | ||
4589 | out: | 4592 | out: |
4590 | btrfs_free_path(path); | 4593 | btrfs_free_path(path); |
4591 | vfree(inodes); | 4594 | kvfree(inodes); |
4592 | kfree(loi); | 4595 | kfree(loi); |
4593 | 4596 | ||
4594 | return ret; | 4597 | return ret; |
@@ -4897,7 +4900,6 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) | |||
4897 | goto out; | 4900 | goto out; |
4898 | } | 4901 | } |
4899 | 4902 | ||
4900 | /* FIXME: check if the IDs really exist */ | ||
4901 | if (sa->assign) { | 4903 | if (sa->assign) { |
4902 | ret = btrfs_add_qgroup_relation(trans, fs_info, | 4904 | ret = btrfs_add_qgroup_relation(trans, fs_info, |
4903 | sa->src, sa->dst); | 4905 | sa->src, sa->dst); |
@@ -4956,7 +4958,6 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) | |||
4956 | goto out; | 4958 | goto out; |
4957 | } | 4959 | } |
4958 | 4960 | ||
4959 | /* FIXME: check if the IDs really exist */ | ||
4960 | if (sa->create) { | 4961 | if (sa->create) { |
4961 | ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid); | 4962 | ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid); |
4962 | } else { | 4963 | } else { |
@@ -5010,7 +5011,6 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) | |||
5010 | qgroupid = root->root_key.objectid; | 5011 | qgroupid = root->root_key.objectid; |
5011 | } | 5012 | } |
5012 | 5013 | ||
5013 | /* FIXME: check if the IDs really exist */ | ||
5014 | ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim); | 5014 | ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim); |
5015 | 5015 | ||
5016 | err = btrfs_end_transaction(trans); | 5016 | err = btrfs_end_transaction(trans); |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index f48c8c14dc14..d433e75d489a 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
@@ -18,13 +18,14 @@ | |||
18 | 18 | ||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/vmalloc.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/err.h> | 23 | #include <linux/err.h> |
24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
25 | #include <linux/pagemap.h> | 25 | #include <linux/pagemap.h> |
26 | #include <linux/bio.h> | 26 | #include <linux/bio.h> |
27 | #include <linux/lzo.h> | 27 | #include <linux/lzo.h> |
28 | #include <linux/refcount.h> | ||
28 | #include "compression.h" | 29 | #include "compression.h" |
29 | 30 | ||
30 | #define LZO_LEN 4 | 31 | #define LZO_LEN 4 |
@@ -40,9 +41,9 @@ static void lzo_free_workspace(struct list_head *ws) | |||
40 | { | 41 | { |
41 | struct workspace *workspace = list_entry(ws, struct workspace, list); | 42 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
42 | 43 | ||
43 | vfree(workspace->buf); | 44 | kvfree(workspace->buf); |
44 | vfree(workspace->cbuf); | 45 | kvfree(workspace->cbuf); |
45 | vfree(workspace->mem); | 46 | kvfree(workspace->mem); |
46 | kfree(workspace); | 47 | kfree(workspace); |
47 | } | 48 | } |
48 | 49 | ||
@@ -50,13 +51,13 @@ static struct list_head *lzo_alloc_workspace(void) | |||
50 | { | 51 | { |
51 | struct workspace *workspace; | 52 | struct workspace *workspace; |
52 | 53 | ||
53 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 54 | workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); |
54 | if (!workspace) | 55 | if (!workspace) |
55 | return ERR_PTR(-ENOMEM); | 56 | return ERR_PTR(-ENOMEM); |
56 | 57 | ||
57 | workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); | 58 | workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); |
58 | workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE)); | 59 | workspace->buf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL); |
59 | workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_SIZE)); | 60 | workspace->cbuf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL); |
60 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) | 61 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) |
61 | goto fail; | 62 | goto fail; |
62 | 63 | ||
@@ -141,7 +142,7 @@ static int lzo_compress_pages(struct list_head *ws, | |||
141 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, | 142 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, |
142 | &out_len, workspace->mem); | 143 | &out_len, workspace->mem); |
143 | if (ret != LZO_E_OK) { | 144 | if (ret != LZO_E_OK) { |
144 | pr_debug("BTRFS: deflate in loop returned %d\n", | 145 | pr_debug("BTRFS: lzo in loop returned %d\n", |
145 | ret); | 146 | ret); |
146 | ret = -EIO; | 147 | ret = -EIO; |
147 | goto out; | 148 | goto out; |
@@ -229,8 +230,10 @@ static int lzo_compress_pages(struct list_head *ws, | |||
229 | in_len = min(bytes_left, PAGE_SIZE); | 230 | in_len = min(bytes_left, PAGE_SIZE); |
230 | } | 231 | } |
231 | 232 | ||
232 | if (tot_out > tot_in) | 233 | if (tot_out >= tot_in) { |
234 | ret = -E2BIG; | ||
233 | goto out; | 235 | goto out; |
236 | } | ||
234 | 237 | ||
235 | /* store the size of all chunks of compressed data */ | 238 | /* store the size of all chunks of compressed data */ |
236 | cpage_out = kmap(pages[0]); | 239 | cpage_out = kmap(pages[0]); |
@@ -254,16 +257,13 @@ out: | |||
254 | return ret; | 257 | return ret; |
255 | } | 258 | } |
256 | 259 | ||
257 | static int lzo_decompress_bio(struct list_head *ws, | 260 | static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) |
258 | struct page **pages_in, | ||
259 | u64 disk_start, | ||
260 | struct bio *orig_bio, | ||
261 | size_t srclen) | ||
262 | { | 261 | { |
263 | struct workspace *workspace = list_entry(ws, struct workspace, list); | 262 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
264 | int ret = 0, ret2; | 263 | int ret = 0, ret2; |
265 | char *data_in; | 264 | char *data_in; |
266 | unsigned long page_in_index = 0; | 265 | unsigned long page_in_index = 0; |
266 | size_t srclen = cb->compressed_len; | ||
267 | unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); | 267 | unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); |
268 | unsigned long buf_start; | 268 | unsigned long buf_start; |
269 | unsigned long buf_offset = 0; | 269 | unsigned long buf_offset = 0; |
@@ -278,6 +278,9 @@ static int lzo_decompress_bio(struct list_head *ws, | |||
278 | unsigned long tot_len; | 278 | unsigned long tot_len; |
279 | char *buf; | 279 | char *buf; |
280 | bool may_late_unmap, need_unmap; | 280 | bool may_late_unmap, need_unmap; |
281 | struct page **pages_in = cb->compressed_pages; | ||
282 | u64 disk_start = cb->start; | ||
283 | struct bio *orig_bio = cb->orig_bio; | ||
281 | 284 | ||
282 | data_in = kmap(pages_in[0]); | 285 | data_in = kmap(pages_in[0]); |
283 | tot_len = read_compress_length(data_in); | 286 | tot_len = read_compress_length(data_in); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7b40e2e7292a..a3aca495e33e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -663,7 +663,7 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) | |||
663 | * wait for all the ordered extents in a root. This is done when balancing | 663 | * wait for all the ordered extents in a root. This is done when balancing |
664 | * space between drives. | 664 | * space between drives. |
665 | */ | 665 | */ |
666 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, | 666 | u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, |
667 | const u64 range_start, const u64 range_len) | 667 | const u64 range_start, const u64 range_len) |
668 | { | 668 | { |
669 | struct btrfs_fs_info *fs_info = root->fs_info; | 669 | struct btrfs_fs_info *fs_info = root->fs_info; |
@@ -671,7 +671,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, | |||
671 | LIST_HEAD(skipped); | 671 | LIST_HEAD(skipped); |
672 | LIST_HEAD(works); | 672 | LIST_HEAD(works); |
673 | struct btrfs_ordered_extent *ordered, *next; | 673 | struct btrfs_ordered_extent *ordered, *next; |
674 | int count = 0; | 674 | u64 count = 0; |
675 | const u64 range_end = range_start + range_len; | 675 | const u64 range_end = range_start + range_len; |
676 | 676 | ||
677 | mutex_lock(&root->ordered_extent_mutex); | 677 | mutex_lock(&root->ordered_extent_mutex); |
@@ -701,7 +701,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, | |||
701 | 701 | ||
702 | cond_resched(); | 702 | cond_resched(); |
703 | spin_lock(&root->ordered_extent_lock); | 703 | spin_lock(&root->ordered_extent_lock); |
704 | if (nr != -1) | 704 | if (nr != U64_MAX) |
705 | nr--; | 705 | nr--; |
706 | count++; | 706 | count++; |
707 | } | 707 | } |
@@ -720,13 +720,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, | |||
720 | return count; | 720 | return count; |
721 | } | 721 | } |
722 | 722 | ||
723 | int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, | 723 | u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, |
724 | const u64 range_start, const u64 range_len) | 724 | const u64 range_start, const u64 range_len) |
725 | { | 725 | { |
726 | struct btrfs_root *root; | 726 | struct btrfs_root *root; |
727 | struct list_head splice; | 727 | struct list_head splice; |
728 | int done; | 728 | u64 total_done = 0; |
729 | int total_done = 0; | 729 | u64 done; |
730 | 730 | ||
731 | INIT_LIST_HEAD(&splice); | 731 | INIT_LIST_HEAD(&splice); |
732 | 732 | ||
@@ -748,9 +748,8 @@ int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, | |||
748 | total_done += done; | 748 | total_done += done; |
749 | 749 | ||
750 | spin_lock(&fs_info->ordered_root_lock); | 750 | spin_lock(&fs_info->ordered_root_lock); |
751 | if (nr != -1) { | 751 | if (nr != U64_MAX) { |
752 | nr -= done; | 752 | nr -= done; |
753 | WARN_ON(nr < 0); | ||
754 | } | 753 | } |
755 | } | 754 | } |
756 | list_splice_tail(&splice, &fs_info->ordered_roots); | 755 | list_splice_tail(&splice, &fs_info->ordered_roots); |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index e0c1d5b8d859..56c4c0ee6381 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -200,9 +200,9 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
200 | struct btrfs_ordered_extent *ordered); | 200 | struct btrfs_ordered_extent *ordered); |
201 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | 201 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, |
202 | u32 *sum, int len); | 202 | u32 *sum, int len); |
203 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, | 203 | u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, |
204 | const u64 range_start, const u64 range_len); | 204 | const u64 range_start, const u64 range_len); |
205 | int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, | 205 | u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, |
206 | const u64 range_start, const u64 range_len); | 206 | const u64 range_start, const u64 range_len); |
207 | void btrfs_get_logged_extents(struct btrfs_inode *inode, | 207 | void btrfs_get_logged_extents(struct btrfs_inode *inode, |
208 | struct list_head *logged_list, | 208 | struct list_head *logged_list, |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index cdafbf92ef0c..fcae61e175f3 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -261,8 +261,11 @@ void btrfs_print_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *l) | |||
261 | case BTRFS_BLOCK_GROUP_ITEM_KEY: | 261 | case BTRFS_BLOCK_GROUP_ITEM_KEY: |
262 | bi = btrfs_item_ptr(l, i, | 262 | bi = btrfs_item_ptr(l, i, |
263 | struct btrfs_block_group_item); | 263 | struct btrfs_block_group_item); |
264 | pr_info("\t\tblock group used %llu\n", | 264 | pr_info( |
265 | btrfs_disk_block_group_used(l, bi)); | 265 | "\t\tblock group used %llu chunk_objectid %llu flags %llu\n", |
266 | btrfs_disk_block_group_used(l, bi), | ||
267 | btrfs_disk_block_group_chunk_objectid(l, bi), | ||
268 | btrfs_disk_block_group_flags(l, bi)); | ||
266 | break; | 269 | break; |
267 | case BTRFS_CHUNK_ITEM_KEY: | 270 | case BTRFS_CHUNK_ITEM_KEY: |
268 | print_chunk(l, btrfs_item_ptr(l, i, | 271 | print_chunk(l, btrfs_item_ptr(l, i, |
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index d6cb155ef7a1..4b23ae5d0e5c 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c | |||
@@ -164,6 +164,7 @@ static int iterate_object_props(struct btrfs_root *root, | |||
164 | size_t), | 164 | size_t), |
165 | void *ctx) | 165 | void *ctx) |
166 | { | 166 | { |
167 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
167 | int ret; | 168 | int ret; |
168 | char *name_buf = NULL; | 169 | char *name_buf = NULL; |
169 | char *value_buf = NULL; | 170 | char *value_buf = NULL; |
@@ -214,6 +215,12 @@ static int iterate_object_props(struct btrfs_root *root, | |||
214 | name_ptr = (unsigned long)(di + 1); | 215 | name_ptr = (unsigned long)(di + 1); |
215 | data_ptr = name_ptr + name_len; | 216 | data_ptr = name_ptr + name_len; |
216 | 217 | ||
218 | if (verify_dir_item(fs_info, leaf, | ||
219 | path->slots[0], di)) { | ||
220 | ret = -EIO; | ||
221 | goto out; | ||
222 | } | ||
223 | |||
217 | if (name_len <= XATTR_BTRFS_PREFIX_LEN || | 224 | if (name_len <= XATTR_BTRFS_PREFIX_LEN || |
218 | memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX, | 225 | memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX, |
219 | name_ptr, | 226 | name_ptr, |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index deffbeb74a0b..4ce351efe281 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1406,38 +1406,6 @@ out: | |||
1406 | return ret; | 1406 | return ret; |
1407 | } | 1407 | } |
1408 | 1408 | ||
1409 | int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, | ||
1410 | struct btrfs_fs_info *fs_info) | ||
1411 | { | ||
1412 | struct btrfs_qgroup_extent_record *record; | ||
1413 | struct btrfs_delayed_ref_root *delayed_refs; | ||
1414 | struct rb_node *node; | ||
1415 | u64 qgroup_to_skip; | ||
1416 | int ret = 0; | ||
1417 | |||
1418 | delayed_refs = &trans->transaction->delayed_refs; | ||
1419 | qgroup_to_skip = delayed_refs->qgroup_to_skip; | ||
1420 | |||
1421 | /* | ||
1422 | * No need to do lock, since this function will only be called in | ||
1423 | * btrfs_commit_transaction(). | ||
1424 | */ | ||
1425 | node = rb_first(&delayed_refs->dirty_extent_root); | ||
1426 | while (node) { | ||
1427 | record = rb_entry(node, struct btrfs_qgroup_extent_record, | ||
1428 | node); | ||
1429 | if (WARN_ON(!record->old_roots)) | ||
1430 | ret = btrfs_find_all_roots(NULL, fs_info, | ||
1431 | record->bytenr, 0, &record->old_roots); | ||
1432 | if (ret < 0) | ||
1433 | break; | ||
1434 | if (qgroup_to_skip) | ||
1435 | ulist_del(record->old_roots, qgroup_to_skip, 0); | ||
1436 | node = rb_next(node); | ||
1437 | } | ||
1438 | return ret; | ||
1439 | } | ||
1440 | |||
1441 | int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, | 1409 | int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, |
1442 | struct btrfs_delayed_ref_root *delayed_refs, | 1410 | struct btrfs_delayed_ref_root *delayed_refs, |
1443 | struct btrfs_qgroup_extent_record *record) | 1411 | struct btrfs_qgroup_extent_record *record) |
@@ -1559,6 +1527,7 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, | |||
1559 | if (ret) | 1527 | if (ret) |
1560 | return ret; | 1528 | return ret; |
1561 | } | 1529 | } |
1530 | cond_resched(); | ||
1562 | return 0; | 1531 | return 0; |
1563 | } | 1532 | } |
1564 | 1533 | ||
@@ -1918,6 +1887,35 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info, | |||
1918 | return 0; | 1887 | return 0; |
1919 | } | 1888 | } |
1920 | 1889 | ||
1890 | /* | ||
1891 | * Check if the @roots potentially is a list of fs tree roots | ||
1892 | * | ||
1893 | * Return 0 for definitely not a fs/subvol tree roots ulist | ||
1894 | * Return 1 for possible fs/subvol tree roots in the list (considering an empty | ||
1895 | * one as well) | ||
1896 | */ | ||
1897 | static int maybe_fs_roots(struct ulist *roots) | ||
1898 | { | ||
1899 | struct ulist_node *unode; | ||
1900 | struct ulist_iterator uiter; | ||
1901 | |||
1902 | /* Empty one, still possible for fs roots */ | ||
1903 | if (!roots || roots->nnodes == 0) | ||
1904 | return 1; | ||
1905 | |||
1906 | ULIST_ITER_INIT(&uiter); | ||
1907 | unode = ulist_next(roots, &uiter); | ||
1908 | if (!unode) | ||
1909 | return 1; | ||
1910 | |||
1911 | /* | ||
1912 | * If it contains fs tree roots, then it must belong to fs/subvol | ||
1913 | * trees. | ||
1914 | * If it contains a non-fs tree, it won't be shared with fs/subvol trees. | ||
1915 | */ | ||
1916 | return is_fstree(unode->val); | ||
1917 | } | ||
1918 | |||
1921 | int | 1919 | int |
1922 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, | 1920 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, |
1923 | struct btrfs_fs_info *fs_info, | 1921 | struct btrfs_fs_info *fs_info, |
@@ -1934,10 +1932,20 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, | |||
1934 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) | 1932 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) |
1935 | return 0; | 1933 | return 0; |
1936 | 1934 | ||
1937 | if (new_roots) | 1935 | if (new_roots) { |
1936 | if (!maybe_fs_roots(new_roots)) | ||
1937 | goto out_free; | ||
1938 | nr_new_roots = new_roots->nnodes; | 1938 | nr_new_roots = new_roots->nnodes; |
1939 | if (old_roots) | 1939 | } |
1940 | if (old_roots) { | ||
1941 | if (!maybe_fs_roots(old_roots)) | ||
1942 | goto out_free; | ||
1940 | nr_old_roots = old_roots->nnodes; | 1943 | nr_old_roots = old_roots->nnodes; |
1944 | } | ||
1945 | |||
1946 | /* Quick exit, either not fs tree roots, or won't affect any qgroup */ | ||
1947 | if (nr_old_roots == 0 && nr_new_roots == 0) | ||
1948 | goto out_free; | ||
1941 | 1949 | ||
1942 | BUG_ON(!fs_info->quota_root); | 1950 | BUG_ON(!fs_info->quota_root); |
1943 | 1951 | ||
@@ -2017,6 +2025,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, | |||
2017 | 2025 | ||
2018 | if (!ret) { | 2026 | if (!ret) { |
2019 | /* | 2027 | /* |
2028 | * Old roots should be searched when inserting qgroup | ||
2029 | * extent record | ||
2030 | */ | ||
2031 | if (WARN_ON(!record->old_roots)) { | ||
2032 | /* Search commit root to find old_roots */ | ||
2033 | ret = btrfs_find_all_roots(NULL, fs_info, | ||
2034 | record->bytenr, 0, | ||
2035 | &record->old_roots); | ||
2036 | if (ret < 0) | ||
2037 | goto cleanup; | ||
2038 | } | ||
2039 | |||
2040 | /* | ||
2020 | * Use SEQ_LAST as time_seq to do special search, which | 2041 | * Use SEQ_LAST as time_seq to do special search, which |
2021 | * doesn't lock tree or delayed_refs and search current | 2042 | * doesn't lock tree or delayed_refs and search current |
2022 | * root. It's safe inside commit_transaction(). | 2043 | * root. It's safe inside commit_transaction(). |
@@ -2025,8 +2046,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, | |||
2025 | record->bytenr, SEQ_LAST, &new_roots); | 2046 | record->bytenr, SEQ_LAST, &new_roots); |
2026 | if (ret < 0) | 2047 | if (ret < 0) |
2027 | goto cleanup; | 2048 | goto cleanup; |
2028 | if (qgroup_to_skip) | 2049 | if (qgroup_to_skip) { |
2029 | ulist_del(new_roots, qgroup_to_skip, 0); | 2050 | ulist_del(new_roots, qgroup_to_skip, 0); |
2051 | ulist_del(record->old_roots, qgroup_to_skip, | ||
2052 | 0); | ||
2053 | } | ||
2030 | ret = btrfs_qgroup_account_extent(trans, fs_info, | 2054 | ret = btrfs_qgroup_account_extent(trans, fs_info, |
2031 | record->bytenr, record->num_bytes, | 2055 | record->bytenr, record->num_bytes, |
2032 | record->old_roots, new_roots); | 2056 | record->old_roots, new_roots); |
@@ -2338,6 +2362,11 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) | |||
2338 | 2362 | ||
2339 | if (num_bytes == 0) | 2363 | if (num_bytes == 0) |
2340 | return 0; | 2364 | return 0; |
2365 | |||
2366 | if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) && | ||
2367 | capable(CAP_SYS_RESOURCE)) | ||
2368 | enforce = false; | ||
2369 | |||
2341 | retry: | 2370 | retry: |
2342 | spin_lock(&fs_info->qgroup_lock); | 2371 | spin_lock(&fs_info->qgroup_lock); |
2343 | quota_root = fs_info->quota_root; | 2372 | quota_root = fs_info->quota_root; |
@@ -2376,7 +2405,7 @@ retry: | |||
2376 | ret = btrfs_start_delalloc_inodes(root, 0); | 2405 | ret = btrfs_start_delalloc_inodes(root, 0); |
2377 | if (ret) | 2406 | if (ret) |
2378 | return ret; | 2407 | return ret; |
2379 | btrfs_wait_ordered_extents(root, -1, 0, (u64)-1); | 2408 | btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); |
2380 | trans = btrfs_join_transaction(root); | 2409 | trans = btrfs_join_transaction(root); |
2381 | if (IS_ERR(trans)) | 2410 | if (IS_ERR(trans)) |
2382 | return PTR_ERR(trans); | 2411 | return PTR_ERR(trans); |
@@ -2806,55 +2835,130 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) | |||
2806 | * Return <0 for error (including -EQUOT) | 2835 | * Return <0 for error (including -EQUOT) |
2807 | * | 2836 | * |
2808 | * NOTE: this function may sleep for memory allocation. | 2837 | * NOTE: this function may sleep for memory allocation. |
2838 | * if btrfs_qgroup_reserve_data() is called multiple times with | ||
2839 | * same @reserved, caller must ensure when error happens it's OK | ||
2840 | * to free *ALL* reserved space. | ||
2809 | */ | 2841 | */ |
2810 | int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) | 2842 | int btrfs_qgroup_reserve_data(struct inode *inode, |
2843 | struct extent_changeset **reserved_ret, u64 start, | ||
2844 | u64 len) | ||
2811 | { | 2845 | { |
2812 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2846 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2813 | struct extent_changeset changeset; | ||
2814 | struct ulist_node *unode; | 2847 | struct ulist_node *unode; |
2815 | struct ulist_iterator uiter; | 2848 | struct ulist_iterator uiter; |
2849 | struct extent_changeset *reserved; | ||
2850 | u64 orig_reserved; | ||
2851 | u64 to_reserve; | ||
2816 | int ret; | 2852 | int ret; |
2817 | 2853 | ||
2818 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || | 2854 | if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || |
2819 | !is_fstree(root->objectid) || len == 0) | 2855 | !is_fstree(root->objectid) || len == 0) |
2820 | return 0; | 2856 | return 0; |
2821 | 2857 | ||
2822 | changeset.bytes_changed = 0; | 2858 | /* @reserved parameter is mandatory for qgroup */ |
2823 | ulist_init(&changeset.range_changed); | 2859 | if (WARN_ON(!reserved_ret)) |
2860 | return -EINVAL; | ||
2861 | if (!*reserved_ret) { | ||
2862 | *reserved_ret = extent_changeset_alloc(); | ||
2863 | if (!*reserved_ret) | ||
2864 | return -ENOMEM; | ||
2865 | } | ||
2866 | reserved = *reserved_ret; | ||
2867 | /* Record already reserved space */ | ||
2868 | orig_reserved = reserved->bytes_changed; | ||
2824 | ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, | 2869 | ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, |
2825 | start + len -1, EXTENT_QGROUP_RESERVED, &changeset); | 2870 | start + len -1, EXTENT_QGROUP_RESERVED, reserved); |
2871 | |||
2872 | /* Newly reserved space */ | ||
2873 | to_reserve = reserved->bytes_changed - orig_reserved; | ||
2826 | trace_btrfs_qgroup_reserve_data(inode, start, len, | 2874 | trace_btrfs_qgroup_reserve_data(inode, start, len, |
2827 | changeset.bytes_changed, | 2875 | to_reserve, QGROUP_RESERVE); |
2828 | QGROUP_RESERVE); | ||
2829 | if (ret < 0) | 2876 | if (ret < 0) |
2830 | goto cleanup; | 2877 | goto cleanup; |
2831 | ret = qgroup_reserve(root, changeset.bytes_changed, true); | 2878 | ret = qgroup_reserve(root, to_reserve, true); |
2832 | if (ret < 0) | 2879 | if (ret < 0) |
2833 | goto cleanup; | 2880 | goto cleanup; |
2834 | 2881 | ||
2835 | ulist_release(&changeset.range_changed); | ||
2836 | return ret; | 2882 | return ret; |
2837 | 2883 | ||
2838 | cleanup: | 2884 | cleanup: |
2839 | /* cleanup already reserved ranges */ | 2885 | /* cleanup *ALL* already reserved ranges */ |
2840 | ULIST_ITER_INIT(&uiter); | 2886 | ULIST_ITER_INIT(&uiter); |
2841 | while ((unode = ulist_next(&changeset.range_changed, &uiter))) | 2887 | while ((unode = ulist_next(&reserved->range_changed, &uiter))) |
2842 | clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, | 2888 | clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, |
2843 | unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, | 2889 | unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, |
2844 | GFP_NOFS); | 2890 | GFP_NOFS); |
2845 | ulist_release(&changeset.range_changed); | 2891 | extent_changeset_release(reserved); |
2892 | return ret; | ||
2893 | } | ||
2894 | |||
2895 | /* Free ranges specified by @reserved, normally in error path */ | ||
2896 | static int qgroup_free_reserved_data(struct inode *inode, | ||
2897 | struct extent_changeset *reserved, u64 start, u64 len) | ||
2898 | { | ||
2899 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2900 | struct ulist_node *unode; | ||
2901 | struct ulist_iterator uiter; | ||
2902 | struct extent_changeset changeset; | ||
2903 | int freed = 0; | ||
2904 | int ret; | ||
2905 | |||
2906 | extent_changeset_init(&changeset); | ||
2907 | len = round_up(start + len, root->fs_info->sectorsize); | ||
2908 | start = round_down(start, root->fs_info->sectorsize); | ||
2909 | |||
2910 | ULIST_ITER_INIT(&uiter); | ||
2911 | while ((unode = ulist_next(&reserved->range_changed, &uiter))) { | ||
2912 | u64 range_start = unode->val; | ||
2913 | /* unode->aux is the inclusive end */ | ||
2914 | u64 range_len = unode->aux - range_start + 1; | ||
2915 | u64 free_start; | ||
2916 | u64 free_len; | ||
2917 | |||
2918 | extent_changeset_release(&changeset); | ||
2919 | |||
2920 | /* Only free range in range [start, start + len) */ | ||
2921 | if (range_start >= start + len || | ||
2922 | range_start + range_len <= start) | ||
2923 | continue; | ||
2924 | free_start = max(range_start, start); | ||
2925 | free_len = min(start + len, range_start + range_len) - | ||
2926 | free_start; | ||
2927 | /* | ||
2928 | * TODO: To also modify reserved->ranges_reserved to reflect | ||
2929 | * the modification. | ||
2930 | * | ||
2931 | * However as long as we free qgroup reserved according to | ||
2932 | * EXTENT_QGROUP_RESERVED, we won't double free. | ||
2933 | * So not need to rush. | ||
2934 | */ | ||
2935 | ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree, | ||
2936 | free_start, free_start + free_len - 1, | ||
2937 | EXTENT_QGROUP_RESERVED, &changeset); | ||
2938 | if (ret < 0) | ||
2939 | goto out; | ||
2940 | freed += changeset.bytes_changed; | ||
2941 | } | ||
2942 | btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed); | ||
2943 | ret = freed; | ||
2944 | out: | ||
2945 | extent_changeset_release(&changeset); | ||
2846 | return ret; | 2946 | return ret; |
2847 | } | 2947 | } |
2848 | 2948 | ||
2849 | static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, | 2949 | static int __btrfs_qgroup_release_data(struct inode *inode, |
2850 | int free) | 2950 | struct extent_changeset *reserved, u64 start, u64 len, |
2951 | int free) | ||
2851 | { | 2952 | { |
2852 | struct extent_changeset changeset; | 2953 | struct extent_changeset changeset; |
2853 | int trace_op = QGROUP_RELEASE; | 2954 | int trace_op = QGROUP_RELEASE; |
2854 | int ret; | 2955 | int ret; |
2855 | 2956 | ||
2856 | changeset.bytes_changed = 0; | 2957 | /* In release case, we shouldn't have @reserved */ |
2857 | ulist_init(&changeset.range_changed); | 2958 | WARN_ON(!free && reserved); |
2959 | if (free && reserved) | ||
2960 | return qgroup_free_reserved_data(inode, reserved, start, len); | ||
2961 | extent_changeset_init(&changeset); | ||
2858 | ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, | 2962 | ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, |
2859 | start + len -1, EXTENT_QGROUP_RESERVED, &changeset); | 2963 | start + len -1, EXTENT_QGROUP_RESERVED, &changeset); |
2860 | if (ret < 0) | 2964 | if (ret < 0) |
@@ -2868,8 +2972,9 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, | |||
2868 | btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, | 2972 | btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, |
2869 | BTRFS_I(inode)->root->objectid, | 2973 | BTRFS_I(inode)->root->objectid, |
2870 | changeset.bytes_changed); | 2974 | changeset.bytes_changed); |
2975 | ret = changeset.bytes_changed; | ||
2871 | out: | 2976 | out: |
2872 | ulist_release(&changeset.range_changed); | 2977 | extent_changeset_release(&changeset); |
2873 | return ret; | 2978 | return ret; |
2874 | } | 2979 | } |
2875 | 2980 | ||
@@ -2878,14 +2983,17 @@ out: | |||
2878 | * | 2983 | * |
2879 | * Should be called when a range of pages get invalidated before reaching disk. | 2984 | * Should be called when a range of pages get invalidated before reaching disk. |
2880 | * Or for error cleanup case. | 2985 | * Or for error cleanup case. |
2986 | * if @reserved is given, only reserved range in [@start, @start + @len) will | ||
2987 | * be freed. | ||
2881 | * | 2988 | * |
2882 | * For data written to disk, use btrfs_qgroup_release_data(). | 2989 | * For data written to disk, use btrfs_qgroup_release_data(). |
2883 | * | 2990 | * |
2884 | * NOTE: This function may sleep for memory allocation. | 2991 | * NOTE: This function may sleep for memory allocation. |
2885 | */ | 2992 | */ |
2886 | int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len) | 2993 | int btrfs_qgroup_free_data(struct inode *inode, |
2994 | struct extent_changeset *reserved, u64 start, u64 len) | ||
2887 | { | 2995 | { |
2888 | return __btrfs_qgroup_release_data(inode, start, len, 1); | 2996 | return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); |
2889 | } | 2997 | } |
2890 | 2998 | ||
2891 | /* | 2999 | /* |
@@ -2905,7 +3013,7 @@ int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len) | |||
2905 | */ | 3013 | */ |
2906 | int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) | 3014 | int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) |
2907 | { | 3015 | { |
2908 | return __btrfs_qgroup_release_data(inode, start, len, 0); | 3016 | return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); |
2909 | } | 3017 | } |
2910 | 3018 | ||
2911 | int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, | 3019 | int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
@@ -2969,8 +3077,7 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode) | |||
2969 | struct ulist_iterator iter; | 3077 | struct ulist_iterator iter; |
2970 | int ret; | 3078 | int ret; |
2971 | 3079 | ||
2972 | changeset.bytes_changed = 0; | 3080 | extent_changeset_init(&changeset); |
2973 | ulist_init(&changeset.range_changed); | ||
2974 | ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, | 3081 | ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
2975 | EXTENT_QGROUP_RESERVED, &changeset); | 3082 | EXTENT_QGROUP_RESERVED, &changeset); |
2976 | 3083 | ||
@@ -2987,5 +3094,5 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode) | |||
2987 | changeset.bytes_changed); | 3094 | changeset.bytes_changed); |
2988 | 3095 | ||
2989 | } | 3096 | } |
2990 | ulist_release(&changeset.range_changed); | 3097 | extent_changeset_release(&changeset); |
2991 | } | 3098 | } |
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index fe04d3f295c6..d9984e87cddf 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h | |||
@@ -134,8 +134,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | |||
134 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); | 134 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); |
135 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | 135 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); |
136 | struct btrfs_delayed_extent_op; | 136 | struct btrfs_delayed_extent_op; |
137 | int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, | 137 | |
138 | struct btrfs_fs_info *fs_info); | ||
139 | /* | 138 | /* |
140 | * Inform qgroup to trace one dirty extent, its info is recorded in @record. | 139 | * Inform qgroup to trace one dirty extent, its info is recorded in @record. |
141 | * So qgroup can account it at transaction committing time. | 140 | * So qgroup can account it at transaction committing time. |
@@ -243,9 +242,11 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | |||
243 | #endif | 242 | #endif |
244 | 243 | ||
245 | /* New io_tree based accurate qgroup reserve API */ | 244 | /* New io_tree based accurate qgroup reserve API */ |
246 | int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len); | 245 | int btrfs_qgroup_reserve_data(struct inode *inode, |
246 | struct extent_changeset **reserved, u64 start, u64 len); | ||
247 | int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); | 247 | int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); |
248 | int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len); | 248 | int btrfs_qgroup_free_data(struct inode *inode, |
249 | struct extent_changeset *reserved, u64 start, u64 len); | ||
249 | 250 | ||
250 | int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, | 251 | int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
251 | bool enforce); | 252 | bool enforce); |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f3d30d9ea8f9..6f845d219cd6 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <linux/hash.h> | 31 | #include <linux/hash.h> |
32 | #include <linux/list_sort.h> | 32 | #include <linux/list_sort.h> |
33 | #include <linux/raid/xor.h> | 33 | #include <linux/raid/xor.h> |
34 | #include <linux/vmalloc.h> | 34 | #include <linux/mm.h> |
35 | #include <asm/div64.h> | 35 | #include <asm/div64.h> |
36 | #include "ctree.h" | 36 | #include "ctree.h" |
37 | #include "extent_map.h" | 37 | #include "extent_map.h" |
@@ -218,12 +218,9 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) | |||
218 | * of a failing mount. | 218 | * of a failing mount. |
219 | */ | 219 | */ |
220 | table_size = sizeof(*table) + sizeof(*h) * num_entries; | 220 | table_size = sizeof(*table) + sizeof(*h) * num_entries; |
221 | table = kzalloc(table_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); | 221 | table = kvzalloc(table_size, GFP_KERNEL); |
222 | if (!table) { | 222 | if (!table) |
223 | table = vzalloc(table_size); | 223 | return -ENOMEM; |
224 | if (!table) | ||
225 | return -ENOMEM; | ||
226 | } | ||
227 | 224 | ||
228 | spin_lock_init(&table->cache_lock); | 225 | spin_lock_init(&table->cache_lock); |
229 | INIT_LIST_HEAD(&table->stripe_cache); | 226 | INIT_LIST_HEAD(&table->stripe_cache); |
@@ -1101,10 +1098,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, | |||
1101 | } | 1098 | } |
1102 | 1099 | ||
1103 | /* put a new bio on the list */ | 1100 | /* put a new bio on the list */ |
1104 | bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1); | 1101 | bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1); |
1105 | if (!bio) | ||
1106 | return -ENOMEM; | ||
1107 | |||
1108 | bio->bi_iter.bi_size = 0; | 1102 | bio->bi_iter.bi_size = 0; |
1109 | bio->bi_bdev = stripe->dev->bdev; | 1103 | bio->bi_bdev = stripe->dev->bdev; |
1110 | bio->bi_iter.bi_sector = disk_start >> 9; | 1104 | bio->bi_iter.bi_sector = disk_start >> 9; |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index a17e775a4a89..ab852b8e3e37 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -66,7 +66,6 @@ struct reada_extctl { | |||
66 | struct reada_extent { | 66 | struct reada_extent { |
67 | u64 logical; | 67 | u64 logical; |
68 | struct btrfs_key top; | 68 | struct btrfs_key top; |
69 | int err; | ||
70 | struct list_head extctl; | 69 | struct list_head extctl; |
71 | int refcnt; | 70 | int refcnt; |
72 | spinlock_t lock; | 71 | spinlock_t lock; |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d60df51959f7..65661d1aae4e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -3093,11 +3093,12 @@ int prealloc_file_extent_cluster(struct inode *inode, | |||
3093 | u64 prealloc_start = cluster->start - offset; | 3093 | u64 prealloc_start = cluster->start - offset; |
3094 | u64 prealloc_end = cluster->end - offset; | 3094 | u64 prealloc_end = cluster->end - offset; |
3095 | u64 cur_offset; | 3095 | u64 cur_offset; |
3096 | struct extent_changeset *data_reserved = NULL; | ||
3096 | 3097 | ||
3097 | BUG_ON(cluster->start != cluster->boundary[0]); | 3098 | BUG_ON(cluster->start != cluster->boundary[0]); |
3098 | inode_lock(inode); | 3099 | inode_lock(inode); |
3099 | 3100 | ||
3100 | ret = btrfs_check_data_free_space(inode, prealloc_start, | 3101 | ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start, |
3101 | prealloc_end + 1 - prealloc_start); | 3102 | prealloc_end + 1 - prealloc_start); |
3102 | if (ret) | 3103 | if (ret) |
3103 | goto out; | 3104 | goto out; |
@@ -3113,8 +3114,8 @@ int prealloc_file_extent_cluster(struct inode *inode, | |||
3113 | lock_extent(&BTRFS_I(inode)->io_tree, start, end); | 3114 | lock_extent(&BTRFS_I(inode)->io_tree, start, end); |
3114 | num_bytes = end + 1 - start; | 3115 | num_bytes = end + 1 - start; |
3115 | if (cur_offset < start) | 3116 | if (cur_offset < start) |
3116 | btrfs_free_reserved_data_space(inode, cur_offset, | 3117 | btrfs_free_reserved_data_space(inode, data_reserved, |
3117 | start - cur_offset); | 3118 | cur_offset, start - cur_offset); |
3118 | ret = btrfs_prealloc_file_range(inode, 0, start, | 3119 | ret = btrfs_prealloc_file_range(inode, 0, start, |
3119 | num_bytes, num_bytes, | 3120 | num_bytes, num_bytes, |
3120 | end + 1, &alloc_hint); | 3121 | end + 1, &alloc_hint); |
@@ -3125,10 +3126,11 @@ int prealloc_file_extent_cluster(struct inode *inode, | |||
3125 | nr++; | 3126 | nr++; |
3126 | } | 3127 | } |
3127 | if (cur_offset < prealloc_end) | 3128 | if (cur_offset < prealloc_end) |
3128 | btrfs_free_reserved_data_space(inode, cur_offset, | 3129 | btrfs_free_reserved_data_space(inode, data_reserved, |
3129 | prealloc_end + 1 - cur_offset); | 3130 | cur_offset, prealloc_end + 1 - cur_offset); |
3130 | out: | 3131 | out: |
3131 | inode_unlock(inode); | 3132 | inode_unlock(inode); |
3133 | extent_changeset_free(data_reserved); | ||
3132 | return ret; | 3134 | return ret; |
3133 | } | 3135 | } |
3134 | 3136 | ||
@@ -4269,8 +4271,7 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) | |||
4269 | INIT_LIST_HEAD(&rc->reloc_roots); | 4271 | INIT_LIST_HEAD(&rc->reloc_roots); |
4270 | backref_cache_init(&rc->backref_cache); | 4272 | backref_cache_init(&rc->backref_cache); |
4271 | mapping_tree_init(&rc->reloc_root_tree); | 4273 | mapping_tree_init(&rc->reloc_root_tree); |
4272 | extent_io_tree_init(&rc->processed_blocks, | 4274 | extent_io_tree_init(&rc->processed_blocks, NULL); |
4273 | fs_info->btree_inode->i_mapping); | ||
4274 | return rc; | 4275 | return rc; |
4275 | } | 4276 | } |
4276 | 4277 | ||
@@ -4372,7 +4373,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) | |||
4372 | 4373 | ||
4373 | btrfs_wait_block_group_reservations(rc->block_group); | 4374 | btrfs_wait_block_group_reservations(rc->block_group); |
4374 | btrfs_wait_nocow_writers(rc->block_group); | 4375 | btrfs_wait_nocow_writers(rc->block_group); |
4375 | btrfs_wait_ordered_roots(fs_info, -1, | 4376 | btrfs_wait_ordered_roots(fs_info, U64_MAX, |
4376 | rc->block_group->key.objectid, | 4377 | rc->block_group->key.objectid, |
4377 | rc->block_group->key.offset); | 4378 | rc->block_group->key.offset); |
4378 | 4379 | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7d6bc308bf43..460db0cb2d07 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -390,6 +390,13 @@ again: | |||
390 | WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); | 390 | WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); |
391 | WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); | 391 | WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); |
392 | ptr = (unsigned long)(ref + 1); | 392 | ptr = (unsigned long)(ref + 1); |
393 | ret = btrfs_is_name_len_valid(leaf, path->slots[0], ptr, | ||
394 | name_len); | ||
395 | if (!ret) { | ||
396 | err = -EIO; | ||
397 | goto out; | ||
398 | } | ||
399 | |||
393 | WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); | 400 | WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); |
394 | *sequence = btrfs_root_ref_sequence(leaf, ref); | 401 | *sequence = btrfs_root_ref_sequence(leaf, ref); |
395 | 402 | ||
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ba5595d19de1..6f1e4c984b94 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <linux/blkdev.h> | 19 | #include <linux/blkdev.h> |
20 | #include <linux/ratelimit.h> | 20 | #include <linux/ratelimit.h> |
21 | #include <linux/sched/mm.h> | ||
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "volumes.h" | 23 | #include "volumes.h" |
23 | #include "disk-io.h" | 24 | #include "disk-io.h" |
@@ -161,14 +162,6 @@ struct scrub_parity { | |||
161 | unsigned long bitmap[0]; | 162 | unsigned long bitmap[0]; |
162 | }; | 163 | }; |
163 | 164 | ||
164 | struct scrub_wr_ctx { | ||
165 | struct scrub_bio *wr_curr_bio; | ||
166 | struct btrfs_device *tgtdev; | ||
167 | int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ | ||
168 | atomic_t flush_all_writes; | ||
169 | struct mutex wr_lock; | ||
170 | }; | ||
171 | |||
172 | struct scrub_ctx { | 165 | struct scrub_ctx { |
173 | struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX]; | 166 | struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX]; |
174 | struct btrfs_fs_info *fs_info; | 167 | struct btrfs_fs_info *fs_info; |
@@ -183,11 +176,14 @@ struct scrub_ctx { | |||
183 | atomic_t cancel_req; | 176 | atomic_t cancel_req; |
184 | int readonly; | 177 | int readonly; |
185 | int pages_per_rd_bio; | 178 | int pages_per_rd_bio; |
186 | u32 sectorsize; | ||
187 | u32 nodesize; | ||
188 | 179 | ||
189 | int is_dev_replace; | 180 | int is_dev_replace; |
190 | struct scrub_wr_ctx wr_ctx; | 181 | |
182 | struct scrub_bio *wr_curr_bio; | ||
183 | struct mutex wr_lock; | ||
184 | int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ | ||
185 | atomic_t flush_all_writes; | ||
186 | struct btrfs_device *wr_tgtdev; | ||
191 | 187 | ||
192 | /* | 188 | /* |
193 | * statistics | 189 | * statistics |
@@ -289,10 +285,6 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | |||
289 | u64 *extent_physical, | 285 | u64 *extent_physical, |
290 | struct btrfs_device **extent_dev, | 286 | struct btrfs_device **extent_dev, |
291 | int *extent_mirror_num); | 287 | int *extent_mirror_num); |
292 | static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx, | ||
293 | struct btrfs_device *dev, | ||
294 | int is_dev_replace); | ||
295 | static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx); | ||
296 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | 288 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, |
297 | struct scrub_page *spage); | 289 | struct scrub_page *spage); |
298 | static void scrub_wr_submit(struct scrub_ctx *sctx); | 290 | static void scrub_wr_submit(struct scrub_ctx *sctx); |
@@ -643,8 +635,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) | |||
643 | if (!sctx) | 635 | if (!sctx) |
644 | return; | 636 | return; |
645 | 637 | ||
646 | scrub_free_wr_ctx(&sctx->wr_ctx); | ||
647 | |||
648 | /* this can happen when scrub is cancelled */ | 638 | /* this can happen when scrub is cancelled */ |
649 | if (sctx->curr != -1) { | 639 | if (sctx->curr != -1) { |
650 | struct scrub_bio *sbio = sctx->bios[sctx->curr]; | 640 | struct scrub_bio *sbio = sctx->bios[sctx->curr]; |
@@ -664,6 +654,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) | |||
664 | kfree(sbio); | 654 | kfree(sbio); |
665 | } | 655 | } |
666 | 656 | ||
657 | kfree(sctx->wr_curr_bio); | ||
667 | scrub_free_csums(sctx); | 658 | scrub_free_csums(sctx); |
668 | kfree(sctx); | 659 | kfree(sctx); |
669 | } | 660 | } |
@@ -680,7 +671,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
680 | struct scrub_ctx *sctx; | 671 | struct scrub_ctx *sctx; |
681 | int i; | 672 | int i; |
682 | struct btrfs_fs_info *fs_info = dev->fs_info; | 673 | struct btrfs_fs_info *fs_info = dev->fs_info; |
683 | int ret; | ||
684 | 674 | ||
685 | sctx = kzalloc(sizeof(*sctx), GFP_KERNEL); | 675 | sctx = kzalloc(sizeof(*sctx), GFP_KERNEL); |
686 | if (!sctx) | 676 | if (!sctx) |
@@ -710,8 +700,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
710 | sctx->bios[i]->next_free = -1; | 700 | sctx->bios[i]->next_free = -1; |
711 | } | 701 | } |
712 | sctx->first_free = 0; | 702 | sctx->first_free = 0; |
713 | sctx->nodesize = fs_info->nodesize; | ||
714 | sctx->sectorsize = fs_info->sectorsize; | ||
715 | atomic_set(&sctx->bios_in_flight, 0); | 703 | atomic_set(&sctx->bios_in_flight, 0); |
716 | atomic_set(&sctx->workers_pending, 0); | 704 | atomic_set(&sctx->workers_pending, 0); |
717 | atomic_set(&sctx->cancel_req, 0); | 705 | atomic_set(&sctx->cancel_req, 0); |
@@ -722,12 +710,16 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
722 | spin_lock_init(&sctx->stat_lock); | 710 | spin_lock_init(&sctx->stat_lock); |
723 | init_waitqueue_head(&sctx->list_wait); | 711 | init_waitqueue_head(&sctx->list_wait); |
724 | 712 | ||
725 | ret = scrub_setup_wr_ctx(&sctx->wr_ctx, | 713 | WARN_ON(sctx->wr_curr_bio != NULL); |
726 | fs_info->dev_replace.tgtdev, is_dev_replace); | 714 | mutex_init(&sctx->wr_lock); |
727 | if (ret) { | 715 | sctx->wr_curr_bio = NULL; |
728 | scrub_free_ctx(sctx); | 716 | if (is_dev_replace) { |
729 | return ERR_PTR(ret); | 717 | WARN_ON(!fs_info->dev_replace.tgtdev); |
718 | sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO; | ||
719 | sctx->wr_tgtdev = fs_info->dev_replace.tgtdev; | ||
720 | atomic_set(&sctx->flush_all_writes, 0); | ||
730 | } | 721 | } |
722 | |||
731 | return sctx; | 723 | return sctx; |
732 | 724 | ||
733 | nomem: | 725 | nomem: |
@@ -742,6 +734,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
742 | u32 nlink; | 734 | u32 nlink; |
743 | int ret; | 735 | int ret; |
744 | int i; | 736 | int i; |
737 | unsigned nofs_flag; | ||
745 | struct extent_buffer *eb; | 738 | struct extent_buffer *eb; |
746 | struct btrfs_inode_item *inode_item; | 739 | struct btrfs_inode_item *inode_item; |
747 | struct scrub_warning *swarn = warn_ctx; | 740 | struct scrub_warning *swarn = warn_ctx; |
@@ -780,7 +773,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
780 | nlink = btrfs_inode_nlink(eb, inode_item); | 773 | nlink = btrfs_inode_nlink(eb, inode_item); |
781 | btrfs_release_path(swarn->path); | 774 | btrfs_release_path(swarn->path); |
782 | 775 | ||
776 | /* | ||
777 | * init_path might indirectly call vmalloc, or use GFP_KERNEL. Scrub | ||
778 | * uses GFP_NOFS in this context, so we keep it consistent but it does | ||
779 | * not seem to be strictly necessary. | ||
780 | */ | ||
781 | nofs_flag = memalloc_nofs_save(); | ||
783 | ipath = init_ipath(4096, local_root, swarn->path); | 782 | ipath = init_ipath(4096, local_root, swarn->path); |
783 | memalloc_nofs_restore(nofs_flag); | ||
784 | if (IS_ERR(ipath)) { | 784 | if (IS_ERR(ipath)) { |
785 | ret = PTR_ERR(ipath); | 785 | ret = PTR_ERR(ipath); |
786 | ipath = NULL; | 786 | ipath = NULL; |
@@ -954,7 +954,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) | |||
954 | ret = -EIO; | 954 | ret = -EIO; |
955 | goto out; | 955 | goto out; |
956 | } | 956 | } |
957 | ret = repair_io_failure(BTRFS_I(inode), offset, PAGE_SIZE, | 957 | ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE, |
958 | fixup->logical, page, | 958 | fixup->logical, page, |
959 | offset - page_offset(page), | 959 | offset - page_offset(page), |
960 | fixup->mirror_num); | 960 | fixup->mirror_num); |
@@ -1737,12 +1737,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1737 | } | 1737 | } |
1738 | 1738 | ||
1739 | WARN_ON(!page->page); | 1739 | WARN_ON(!page->page); |
1740 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 1740 | bio = btrfs_io_bio_alloc(1); |
1741 | if (!bio) { | ||
1742 | page->io_error = 1; | ||
1743 | sblock->no_io_error_seen = 0; | ||
1744 | continue; | ||
1745 | } | ||
1746 | bio->bi_bdev = page->dev->bdev; | 1741 | bio->bi_bdev = page->dev->bdev; |
1747 | 1742 | ||
1748 | bio_add_page(bio, page->page, PAGE_SIZE, 0); | 1743 | bio_add_page(bio, page->page, PAGE_SIZE, 0); |
@@ -1830,9 +1825,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
1830 | return -EIO; | 1825 | return -EIO; |
1831 | } | 1826 | } |
1832 | 1827 | ||
1833 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 1828 | bio = btrfs_io_bio_alloc(1); |
1834 | if (!bio) | ||
1835 | return -EIO; | ||
1836 | bio->bi_bdev = page_bad->dev->bdev; | 1829 | bio->bi_bdev = page_bad->dev->bdev; |
1837 | bio->bi_iter.bi_sector = page_bad->physical >> 9; | 1830 | bio->bi_iter.bi_sector = page_bad->physical >> 9; |
1838 | bio_set_op_attrs(bio, REQ_OP_WRITE, 0); | 1831 | bio_set_op_attrs(bio, REQ_OP_WRITE, 0); |
@@ -1898,37 +1891,31 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, | |||
1898 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | 1891 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, |
1899 | struct scrub_page *spage) | 1892 | struct scrub_page *spage) |
1900 | { | 1893 | { |
1901 | struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; | ||
1902 | struct scrub_bio *sbio; | 1894 | struct scrub_bio *sbio; |
1903 | int ret; | 1895 | int ret; |
1904 | 1896 | ||
1905 | mutex_lock(&wr_ctx->wr_lock); | 1897 | mutex_lock(&sctx->wr_lock); |
1906 | again: | 1898 | again: |
1907 | if (!wr_ctx->wr_curr_bio) { | 1899 | if (!sctx->wr_curr_bio) { |
1908 | wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), | 1900 | sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio), |
1909 | GFP_KERNEL); | 1901 | GFP_KERNEL); |
1910 | if (!wr_ctx->wr_curr_bio) { | 1902 | if (!sctx->wr_curr_bio) { |
1911 | mutex_unlock(&wr_ctx->wr_lock); | 1903 | mutex_unlock(&sctx->wr_lock); |
1912 | return -ENOMEM; | 1904 | return -ENOMEM; |
1913 | } | 1905 | } |
1914 | wr_ctx->wr_curr_bio->sctx = sctx; | 1906 | sctx->wr_curr_bio->sctx = sctx; |
1915 | wr_ctx->wr_curr_bio->page_count = 0; | 1907 | sctx->wr_curr_bio->page_count = 0; |
1916 | } | 1908 | } |
1917 | sbio = wr_ctx->wr_curr_bio; | 1909 | sbio = sctx->wr_curr_bio; |
1918 | if (sbio->page_count == 0) { | 1910 | if (sbio->page_count == 0) { |
1919 | struct bio *bio; | 1911 | struct bio *bio; |
1920 | 1912 | ||
1921 | sbio->physical = spage->physical_for_dev_replace; | 1913 | sbio->physical = spage->physical_for_dev_replace; |
1922 | sbio->logical = spage->logical; | 1914 | sbio->logical = spage->logical; |
1923 | sbio->dev = wr_ctx->tgtdev; | 1915 | sbio->dev = sctx->wr_tgtdev; |
1924 | bio = sbio->bio; | 1916 | bio = sbio->bio; |
1925 | if (!bio) { | 1917 | if (!bio) { |
1926 | bio = btrfs_io_bio_alloc(GFP_KERNEL, | 1918 | bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio); |
1927 | wr_ctx->pages_per_wr_bio); | ||
1928 | if (!bio) { | ||
1929 | mutex_unlock(&wr_ctx->wr_lock); | ||
1930 | return -ENOMEM; | ||
1931 | } | ||
1932 | sbio->bio = bio; | 1919 | sbio->bio = bio; |
1933 | } | 1920 | } |
1934 | 1921 | ||
@@ -1951,7 +1938,7 @@ again: | |||
1951 | if (sbio->page_count < 1) { | 1938 | if (sbio->page_count < 1) { |
1952 | bio_put(sbio->bio); | 1939 | bio_put(sbio->bio); |
1953 | sbio->bio = NULL; | 1940 | sbio->bio = NULL; |
1954 | mutex_unlock(&wr_ctx->wr_lock); | 1941 | mutex_unlock(&sctx->wr_lock); |
1955 | return -EIO; | 1942 | return -EIO; |
1956 | } | 1943 | } |
1957 | scrub_wr_submit(sctx); | 1944 | scrub_wr_submit(sctx); |
@@ -1961,23 +1948,22 @@ again: | |||
1961 | sbio->pagev[sbio->page_count] = spage; | 1948 | sbio->pagev[sbio->page_count] = spage; |
1962 | scrub_page_get(spage); | 1949 | scrub_page_get(spage); |
1963 | sbio->page_count++; | 1950 | sbio->page_count++; |
1964 | if (sbio->page_count == wr_ctx->pages_per_wr_bio) | 1951 | if (sbio->page_count == sctx->pages_per_wr_bio) |
1965 | scrub_wr_submit(sctx); | 1952 | scrub_wr_submit(sctx); |
1966 | mutex_unlock(&wr_ctx->wr_lock); | 1953 | mutex_unlock(&sctx->wr_lock); |
1967 | 1954 | ||
1968 | return 0; | 1955 | return 0; |
1969 | } | 1956 | } |
1970 | 1957 | ||
1971 | static void scrub_wr_submit(struct scrub_ctx *sctx) | 1958 | static void scrub_wr_submit(struct scrub_ctx *sctx) |
1972 | { | 1959 | { |
1973 | struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; | ||
1974 | struct scrub_bio *sbio; | 1960 | struct scrub_bio *sbio; |
1975 | 1961 | ||
1976 | if (!wr_ctx->wr_curr_bio) | 1962 | if (!sctx->wr_curr_bio) |
1977 | return; | 1963 | return; |
1978 | 1964 | ||
1979 | sbio = wr_ctx->wr_curr_bio; | 1965 | sbio = sctx->wr_curr_bio; |
1980 | wr_ctx->wr_curr_bio = NULL; | 1966 | sctx->wr_curr_bio = NULL; |
1981 | WARN_ON(!sbio->bio->bi_bdev); | 1967 | WARN_ON(!sbio->bio->bi_bdev); |
1982 | scrub_pending_bio_inc(sctx); | 1968 | scrub_pending_bio_inc(sctx); |
1983 | /* process all writes in a single worker thread. Then the block layer | 1969 | /* process all writes in a single worker thread. Then the block layer |
@@ -2081,7 +2067,7 @@ static int scrub_checksum_data(struct scrub_block *sblock) | |||
2081 | page = sblock->pagev[0]->page; | 2067 | page = sblock->pagev[0]->page; |
2082 | buffer = kmap_atomic(page); | 2068 | buffer = kmap_atomic(page); |
2083 | 2069 | ||
2084 | len = sctx->sectorsize; | 2070 | len = sctx->fs_info->sectorsize; |
2085 | index = 0; | 2071 | index = 0; |
2086 | for (;;) { | 2072 | for (;;) { |
2087 | u64 l = min_t(u64, len, PAGE_SIZE); | 2073 | u64 l = min_t(u64, len, PAGE_SIZE); |
@@ -2146,7 +2132,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
2146 | BTRFS_UUID_SIZE)) | 2132 | BTRFS_UUID_SIZE)) |
2147 | sblock->header_error = 1; | 2133 | sblock->header_error = 1; |
2148 | 2134 | ||
2149 | len = sctx->nodesize - BTRFS_CSUM_SIZE; | 2135 | len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE; |
2150 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 2136 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
2151 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; | 2137 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; |
2152 | index = 0; | 2138 | index = 0; |
@@ -2329,10 +2315,7 @@ again: | |||
2329 | sbio->dev = spage->dev; | 2315 | sbio->dev = spage->dev; |
2330 | bio = sbio->bio; | 2316 | bio = sbio->bio; |
2331 | if (!bio) { | 2317 | if (!bio) { |
2332 | bio = btrfs_io_bio_alloc(GFP_KERNEL, | 2318 | bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio); |
2333 | sctx->pages_per_rd_bio); | ||
2334 | if (!bio) | ||
2335 | return -ENOMEM; | ||
2336 | sbio->bio = bio; | 2319 | sbio->bio = bio; |
2337 | } | 2320 | } |
2338 | 2321 | ||
@@ -2420,10 +2403,10 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) | |||
2420 | scrub_block_put(sblock); | 2403 | scrub_block_put(sblock); |
2421 | 2404 | ||
2422 | if (sctx->is_dev_replace && | 2405 | if (sctx->is_dev_replace && |
2423 | atomic_read(&sctx->wr_ctx.flush_all_writes)) { | 2406 | atomic_read(&sctx->flush_all_writes)) { |
2424 | mutex_lock(&sctx->wr_ctx.wr_lock); | 2407 | mutex_lock(&sctx->wr_lock); |
2425 | scrub_wr_submit(sctx); | 2408 | scrub_wr_submit(sctx); |
2426 | mutex_unlock(&sctx->wr_ctx.wr_lock); | 2409 | mutex_unlock(&sctx->wr_lock); |
2427 | } | 2410 | } |
2428 | 2411 | ||
2429 | scrub_pending_bio_dec(sctx); | 2412 | scrub_pending_bio_dec(sctx); |
@@ -2458,10 +2441,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) | |||
2458 | goto bbio_out; | 2441 | goto bbio_out; |
2459 | } | 2442 | } |
2460 | 2443 | ||
2461 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | 2444 | bio = btrfs_io_bio_alloc(0); |
2462 | if (!bio) | ||
2463 | goto bbio_out; | ||
2464 | |||
2465 | bio->bi_iter.bi_sector = logical >> 9; | 2445 | bio->bi_iter.bi_sector = logical >> 9; |
2466 | bio->bi_private = sblock; | 2446 | bio->bi_private = sblock; |
2467 | bio->bi_end_io = scrub_missing_raid56_end_io; | 2447 | bio->bi_end_io = scrub_missing_raid56_end_io; |
@@ -2628,10 +2608,10 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
2628 | spin_unlock(&sctx->list_lock); | 2608 | spin_unlock(&sctx->list_lock); |
2629 | 2609 | ||
2630 | if (sctx->is_dev_replace && | 2610 | if (sctx->is_dev_replace && |
2631 | atomic_read(&sctx->wr_ctx.flush_all_writes)) { | 2611 | atomic_read(&sctx->flush_all_writes)) { |
2632 | mutex_lock(&sctx->wr_ctx.wr_lock); | 2612 | mutex_lock(&sctx->wr_lock); |
2633 | scrub_wr_submit(sctx); | 2613 | scrub_wr_submit(sctx); |
2634 | mutex_unlock(&sctx->wr_ctx.wr_lock); | 2614 | mutex_unlock(&sctx->wr_lock); |
2635 | } | 2615 | } |
2636 | 2616 | ||
2637 | scrub_pending_bio_dec(sctx); | 2617 | scrub_pending_bio_dec(sctx); |
@@ -2726,8 +2706,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum) | |||
2726 | if (!sum) | 2706 | if (!sum) |
2727 | return 0; | 2707 | return 0; |
2728 | 2708 | ||
2729 | index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize; | 2709 | index = ((u32)(logical - sum->bytenr)) / sctx->fs_info->sectorsize; |
2730 | num_sectors = sum->len / sctx->sectorsize; | 2710 | num_sectors = sum->len / sctx->fs_info->sectorsize; |
2731 | memcpy(csum, sum->sums + index, sctx->csum_size); | 2711 | memcpy(csum, sum->sums + index, sctx->csum_size); |
2732 | if (index == num_sectors - 1) { | 2712 | if (index == num_sectors - 1) { |
2733 | list_del(&sum->list); | 2713 | list_del(&sum->list); |
@@ -2746,19 +2726,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2746 | u32 blocksize; | 2726 | u32 blocksize; |
2747 | 2727 | ||
2748 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | 2728 | if (flags & BTRFS_EXTENT_FLAG_DATA) { |
2749 | blocksize = sctx->sectorsize; | 2729 | blocksize = sctx->fs_info->sectorsize; |
2750 | spin_lock(&sctx->stat_lock); | 2730 | spin_lock(&sctx->stat_lock); |
2751 | sctx->stat.data_extents_scrubbed++; | 2731 | sctx->stat.data_extents_scrubbed++; |
2752 | sctx->stat.data_bytes_scrubbed += len; | 2732 | sctx->stat.data_bytes_scrubbed += len; |
2753 | spin_unlock(&sctx->stat_lock); | 2733 | spin_unlock(&sctx->stat_lock); |
2754 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 2734 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
2755 | blocksize = sctx->nodesize; | 2735 | blocksize = sctx->fs_info->nodesize; |
2756 | spin_lock(&sctx->stat_lock); | 2736 | spin_lock(&sctx->stat_lock); |
2757 | sctx->stat.tree_extents_scrubbed++; | 2737 | sctx->stat.tree_extents_scrubbed++; |
2758 | sctx->stat.tree_bytes_scrubbed += len; | 2738 | sctx->stat.tree_bytes_scrubbed += len; |
2759 | spin_unlock(&sctx->stat_lock); | 2739 | spin_unlock(&sctx->stat_lock); |
2760 | } else { | 2740 | } else { |
2761 | blocksize = sctx->sectorsize; | 2741 | blocksize = sctx->fs_info->sectorsize; |
2762 | WARN_ON(1); | 2742 | WARN_ON(1); |
2763 | } | 2743 | } |
2764 | 2744 | ||
@@ -2892,11 +2872,11 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, | |||
2892 | } | 2872 | } |
2893 | 2873 | ||
2894 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | 2874 | if (flags & BTRFS_EXTENT_FLAG_DATA) { |
2895 | blocksize = sctx->sectorsize; | 2875 | blocksize = sctx->fs_info->sectorsize; |
2896 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 2876 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
2897 | blocksize = sctx->nodesize; | 2877 | blocksize = sctx->fs_info->nodesize; |
2898 | } else { | 2878 | } else { |
2899 | blocksize = sctx->sectorsize; | 2879 | blocksize = sctx->fs_info->sectorsize; |
2900 | WARN_ON(1); | 2880 | WARN_ON(1); |
2901 | } | 2881 | } |
2902 | 2882 | ||
@@ -3037,10 +3017,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
3037 | if (ret || !bbio || !bbio->raid_map) | 3017 | if (ret || !bbio || !bbio->raid_map) |
3038 | goto bbio_out; | 3018 | goto bbio_out; |
3039 | 3019 | ||
3040 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | 3020 | bio = btrfs_io_bio_alloc(0); |
3041 | if (!bio) | ||
3042 | goto bbio_out; | ||
3043 | |||
3044 | bio->bi_iter.bi_sector = sparity->logic_start >> 9; | 3021 | bio->bi_iter.bi_sector = sparity->logic_start >> 9; |
3045 | bio->bi_private = sparity; | 3022 | bio->bi_private = sparity; |
3046 | bio->bi_end_io = scrub_parity_bio_endio; | 3023 | bio->bi_end_io = scrub_parity_bio_endio; |
@@ -3305,9 +3282,9 @@ out: | |||
3305 | logic_end - logic_start); | 3282 | logic_end - logic_start); |
3306 | scrub_parity_put(sparity); | 3283 | scrub_parity_put(sparity); |
3307 | scrub_submit(sctx); | 3284 | scrub_submit(sctx); |
3308 | mutex_lock(&sctx->wr_ctx.wr_lock); | 3285 | mutex_lock(&sctx->wr_lock); |
3309 | scrub_wr_submit(sctx); | 3286 | scrub_wr_submit(sctx); |
3310 | mutex_unlock(&sctx->wr_ctx.wr_lock); | 3287 | mutex_unlock(&sctx->wr_lock); |
3311 | 3288 | ||
3312 | btrfs_release_path(path); | 3289 | btrfs_release_path(path); |
3313 | return ret < 0 ? ret : 0; | 3290 | return ret < 0 ? ret : 0; |
@@ -3463,14 +3440,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3463 | */ | 3440 | */ |
3464 | if (atomic_read(&fs_info->scrub_pause_req)) { | 3441 | if (atomic_read(&fs_info->scrub_pause_req)) { |
3465 | /* push queued extents */ | 3442 | /* push queued extents */ |
3466 | atomic_set(&sctx->wr_ctx.flush_all_writes, 1); | 3443 | atomic_set(&sctx->flush_all_writes, 1); |
3467 | scrub_submit(sctx); | 3444 | scrub_submit(sctx); |
3468 | mutex_lock(&sctx->wr_ctx.wr_lock); | 3445 | mutex_lock(&sctx->wr_lock); |
3469 | scrub_wr_submit(sctx); | 3446 | scrub_wr_submit(sctx); |
3470 | mutex_unlock(&sctx->wr_ctx.wr_lock); | 3447 | mutex_unlock(&sctx->wr_lock); |
3471 | wait_event(sctx->list_wait, | 3448 | wait_event(sctx->list_wait, |
3472 | atomic_read(&sctx->bios_in_flight) == 0); | 3449 | atomic_read(&sctx->bios_in_flight) == 0); |
3473 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | 3450 | atomic_set(&sctx->flush_all_writes, 0); |
3474 | scrub_blocked_if_needed(fs_info); | 3451 | scrub_blocked_if_needed(fs_info); |
3475 | } | 3452 | } |
3476 | 3453 | ||
@@ -3677,9 +3654,9 @@ skip: | |||
3677 | out: | 3654 | out: |
3678 | /* push queued extents */ | 3655 | /* push queued extents */ |
3679 | scrub_submit(sctx); | 3656 | scrub_submit(sctx); |
3680 | mutex_lock(&sctx->wr_ctx.wr_lock); | 3657 | mutex_lock(&sctx->wr_lock); |
3681 | scrub_wr_submit(sctx); | 3658 | scrub_wr_submit(sctx); |
3682 | mutex_unlock(&sctx->wr_ctx.wr_lock); | 3659 | mutex_unlock(&sctx->wr_lock); |
3683 | 3660 | ||
3684 | blk_finish_plug(&plug); | 3661 | blk_finish_plug(&plug); |
3685 | btrfs_free_path(path); | 3662 | btrfs_free_path(path); |
@@ -3859,7 +3836,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
3859 | */ | 3836 | */ |
3860 | btrfs_wait_block_group_reservations(cache); | 3837 | btrfs_wait_block_group_reservations(cache); |
3861 | btrfs_wait_nocow_writers(cache); | 3838 | btrfs_wait_nocow_writers(cache); |
3862 | ret = btrfs_wait_ordered_roots(fs_info, -1, | 3839 | ret = btrfs_wait_ordered_roots(fs_info, U64_MAX, |
3863 | cache->key.objectid, | 3840 | cache->key.objectid, |
3864 | cache->key.offset); | 3841 | cache->key.offset); |
3865 | if (ret > 0) { | 3842 | if (ret > 0) { |
@@ -3916,11 +3893,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
3916 | * write requests are really completed when bios_in_flight | 3893 | * write requests are really completed when bios_in_flight |
3917 | * changes to 0. | 3894 | * changes to 0. |
3918 | */ | 3895 | */ |
3919 | atomic_set(&sctx->wr_ctx.flush_all_writes, 1); | 3896 | atomic_set(&sctx->flush_all_writes, 1); |
3920 | scrub_submit(sctx); | 3897 | scrub_submit(sctx); |
3921 | mutex_lock(&sctx->wr_ctx.wr_lock); | 3898 | mutex_lock(&sctx->wr_lock); |
3922 | scrub_wr_submit(sctx); | 3899 | scrub_wr_submit(sctx); |
3923 | mutex_unlock(&sctx->wr_ctx.wr_lock); | 3900 | mutex_unlock(&sctx->wr_lock); |
3924 | 3901 | ||
3925 | wait_event(sctx->list_wait, | 3902 | wait_event(sctx->list_wait, |
3926 | atomic_read(&sctx->bios_in_flight) == 0); | 3903 | atomic_read(&sctx->bios_in_flight) == 0); |
@@ -3934,7 +3911,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
3934 | */ | 3911 | */ |
3935 | wait_event(sctx->list_wait, | 3912 | wait_event(sctx->list_wait, |
3936 | atomic_read(&sctx->workers_pending) == 0); | 3913 | atomic_read(&sctx->workers_pending) == 0); |
3937 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | 3914 | atomic_set(&sctx->flush_all_writes, 0); |
3938 | 3915 | ||
3939 | scrub_pause_off(fs_info); | 3916 | scrub_pause_off(fs_info); |
3940 | 3917 | ||
@@ -4337,32 +4314,6 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | |||
4337 | btrfs_put_bbio(bbio); | 4314 | btrfs_put_bbio(bbio); |
4338 | } | 4315 | } |
4339 | 4316 | ||
4340 | static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx, | ||
4341 | struct btrfs_device *dev, | ||
4342 | int is_dev_replace) | ||
4343 | { | ||
4344 | WARN_ON(wr_ctx->wr_curr_bio != NULL); | ||
4345 | |||
4346 | mutex_init(&wr_ctx->wr_lock); | ||
4347 | wr_ctx->wr_curr_bio = NULL; | ||
4348 | if (!is_dev_replace) | ||
4349 | return 0; | ||
4350 | |||
4351 | WARN_ON(!dev->bdev); | ||
4352 | wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO; | ||
4353 | wr_ctx->tgtdev = dev; | ||
4354 | atomic_set(&wr_ctx->flush_all_writes, 0); | ||
4355 | return 0; | ||
4356 | } | ||
4357 | |||
4358 | static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx) | ||
4359 | { | ||
4360 | mutex_lock(&wr_ctx->wr_lock); | ||
4361 | kfree(wr_ctx->wr_curr_bio); | ||
4362 | wr_ctx->wr_curr_bio = NULL; | ||
4363 | mutex_unlock(&wr_ctx->wr_lock); | ||
4364 | } | ||
4365 | |||
4366 | static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | 4317 | static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
4367 | int mirror_num, u64 physical_for_dev_replace) | 4318 | int mirror_num, u64 physical_for_dev_replace) |
4368 | { | 4319 | { |
@@ -4665,7 +4616,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, | |||
4665 | struct btrfs_device *dev; | 4616 | struct btrfs_device *dev; |
4666 | int ret; | 4617 | int ret; |
4667 | 4618 | ||
4668 | dev = sctx->wr_ctx.tgtdev; | 4619 | dev = sctx->wr_tgtdev; |
4669 | if (!dev) | 4620 | if (!dev) |
4670 | return -EIO; | 4621 | return -EIO; |
4671 | if (!dev->bdev) { | 4622 | if (!dev->bdev) { |
@@ -4673,13 +4624,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, | |||
4673 | "scrub write_page_nocow(bdev == NULL) is unexpected"); | 4624 | "scrub write_page_nocow(bdev == NULL) is unexpected"); |
4674 | return -EIO; | 4625 | return -EIO; |
4675 | } | 4626 | } |
4676 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 4627 | bio = btrfs_io_bio_alloc(1); |
4677 | if (!bio) { | ||
4678 | spin_lock(&sctx->stat_lock); | ||
4679 | sctx->stat.malloc_errors++; | ||
4680 | spin_unlock(&sctx->stat_lock); | ||
4681 | return -ENOMEM; | ||
4682 | } | ||
4683 | bio->bi_iter.bi_size = 0; | 4628 | bio->bi_iter.bi_size = 0; |
4684 | bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; | 4629 | bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; |
4685 | bio->bi_bdev = dev->bdev; | 4630 | bio->bi_bdev = dev->bdev; |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fc496a6f842a..e937c10b8287 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -1069,6 +1069,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1069 | } | 1069 | } |
1070 | } | 1070 | } |
1071 | 1071 | ||
1072 | ret = btrfs_is_name_len_valid(eb, path->slots[0], | ||
1073 | (unsigned long)(di + 1), name_len + data_len); | ||
1074 | if (!ret) { | ||
1075 | ret = -EIO; | ||
1076 | goto out; | ||
1077 | } | ||
1072 | if (name_len + data_len > buf_len) { | 1078 | if (name_len + data_len > buf_len) { |
1073 | buf_len = name_len + data_len; | 1079 | buf_len = name_len + data_len; |
1074 | if (is_vmalloc_addr(buf)) { | 1080 | if (is_vmalloc_addr(buf)) { |
@@ -1083,7 +1089,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1083 | buf = tmp; | 1089 | buf = tmp; |
1084 | } | 1090 | } |
1085 | if (!buf) { | 1091 | if (!buf) { |
1086 | buf = vmalloc(buf_len); | 1092 | buf = kvmalloc(buf_len, GFP_KERNEL); |
1087 | if (!buf) { | 1093 | if (!buf) { |
1088 | ret = -ENOMEM; | 1094 | ret = -ENOMEM; |
1089 | goto out; | 1095 | goto out; |
@@ -2769,15 +2775,20 @@ out: | |||
2769 | 2775 | ||
2770 | struct recorded_ref { | 2776 | struct recorded_ref { |
2771 | struct list_head list; | 2777 | struct list_head list; |
2772 | char *dir_path; | ||
2773 | char *name; | 2778 | char *name; |
2774 | struct fs_path *full_path; | 2779 | struct fs_path *full_path; |
2775 | u64 dir; | 2780 | u64 dir; |
2776 | u64 dir_gen; | 2781 | u64 dir_gen; |
2777 | int dir_path_len; | ||
2778 | int name_len; | 2782 | int name_len; |
2779 | }; | 2783 | }; |
2780 | 2784 | ||
2785 | static void set_ref_path(struct recorded_ref *ref, struct fs_path *path) | ||
2786 | { | ||
2787 | ref->full_path = path; | ||
2788 | ref->name = (char *)kbasename(ref->full_path->start); | ||
2789 | ref->name_len = ref->full_path->end - ref->name; | ||
2790 | } | ||
2791 | |||
2781 | /* | 2792 | /* |
2782 | * We need to process new refs before deleted refs, but compare_tree gives us | 2793 | * We need to process new refs before deleted refs, but compare_tree gives us |
2783 | * everything mixed. So we first record all refs and later process them. | 2794 | * everything mixed. So we first record all refs and later process them. |
@@ -2794,17 +2805,7 @@ static int __record_ref(struct list_head *head, u64 dir, | |||
2794 | 2805 | ||
2795 | ref->dir = dir; | 2806 | ref->dir = dir; |
2796 | ref->dir_gen = dir_gen; | 2807 | ref->dir_gen = dir_gen; |
2797 | ref->full_path = path; | 2808 | set_ref_path(ref, path); |
2798 | |||
2799 | ref->name = (char *)kbasename(ref->full_path->start); | ||
2800 | ref->name_len = ref->full_path->end - ref->name; | ||
2801 | ref->dir_path = ref->full_path->start; | ||
2802 | if (ref->name == ref->full_path->start) | ||
2803 | ref->dir_path_len = 0; | ||
2804 | else | ||
2805 | ref->dir_path_len = ref->full_path->end - | ||
2806 | ref->full_path->start - 1 - ref->name_len; | ||
2807 | |||
2808 | list_add_tail(&ref->list, head); | 2809 | list_add_tail(&ref->list, head); |
2809 | return 0; | 2810 | return 0; |
2810 | } | 2811 | } |
@@ -3546,9 +3547,17 @@ static int is_ancestor(struct btrfs_root *root, | |||
3546 | struct fs_path *fs_path) | 3547 | struct fs_path *fs_path) |
3547 | { | 3548 | { |
3548 | u64 ino = ino2; | 3549 | u64 ino = ino2; |
3550 | bool free_path = false; | ||
3551 | int ret = 0; | ||
3552 | |||
3553 | if (!fs_path) { | ||
3554 | fs_path = fs_path_alloc(); | ||
3555 | if (!fs_path) | ||
3556 | return -ENOMEM; | ||
3557 | free_path = true; | ||
3558 | } | ||
3549 | 3559 | ||
3550 | while (ino > BTRFS_FIRST_FREE_OBJECTID) { | 3560 | while (ino > BTRFS_FIRST_FREE_OBJECTID) { |
3551 | int ret; | ||
3552 | u64 parent; | 3561 | u64 parent; |
3553 | u64 parent_gen; | 3562 | u64 parent_gen; |
3554 | 3563 | ||
@@ -3557,13 +3566,18 @@ static int is_ancestor(struct btrfs_root *root, | |||
3557 | if (ret < 0) { | 3566 | if (ret < 0) { |
3558 | if (ret == -ENOENT && ino == ino2) | 3567 | if (ret == -ENOENT && ino == ino2) |
3559 | ret = 0; | 3568 | ret = 0; |
3560 | return ret; | 3569 | goto out; |
3570 | } | ||
3571 | if (parent == ino1) { | ||
3572 | ret = parent_gen == ino1_gen ? 1 : 0; | ||
3573 | goto out; | ||
3561 | } | 3574 | } |
3562 | if (parent == ino1) | ||
3563 | return parent_gen == ino1_gen ? 1 : 0; | ||
3564 | ino = parent; | 3575 | ino = parent; |
3565 | } | 3576 | } |
3566 | return 0; | 3577 | out: |
3578 | if (free_path) | ||
3579 | fs_path_free(fs_path); | ||
3580 | return ret; | ||
3567 | } | 3581 | } |
3568 | 3582 | ||
3569 | static int wait_for_parent_move(struct send_ctx *sctx, | 3583 | static int wait_for_parent_move(struct send_ctx *sctx, |
@@ -3686,6 +3700,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
3686 | int is_orphan = 0; | 3700 | int is_orphan = 0; |
3687 | u64 last_dir_ino_rm = 0; | 3701 | u64 last_dir_ino_rm = 0; |
3688 | bool can_rename = true; | 3702 | bool can_rename = true; |
3703 | bool orphanized_ancestor = false; | ||
3689 | 3704 | ||
3690 | btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino); | 3705 | btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino); |
3691 | 3706 | ||
@@ -3837,9 +3852,16 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
3837 | * might contain the pre-orphanization name of | 3852 | * might contain the pre-orphanization name of |
3838 | * ow_inode, which is no longer valid. | 3853 | * ow_inode, which is no longer valid. |
3839 | */ | 3854 | */ |
3840 | fs_path_reset(valid_path); | 3855 | ret = is_ancestor(sctx->parent_root, |
3841 | ret = get_cur_path(sctx, sctx->cur_ino, | 3856 | ow_inode, ow_gen, |
3842 | sctx->cur_inode_gen, valid_path); | 3857 | sctx->cur_ino, NULL); |
3858 | if (ret > 0) { | ||
3859 | orphanized_ancestor = true; | ||
3860 | fs_path_reset(valid_path); | ||
3861 | ret = get_cur_path(sctx, sctx->cur_ino, | ||
3862 | sctx->cur_inode_gen, | ||
3863 | valid_path); | ||
3864 | } | ||
3843 | if (ret < 0) | 3865 | if (ret < 0) |
3844 | goto out; | 3866 | goto out; |
3845 | } else { | 3867 | } else { |
@@ -3960,6 +3982,43 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
3960 | if (ret < 0) | 3982 | if (ret < 0) |
3961 | goto out; | 3983 | goto out; |
3962 | if (!ret) { | 3984 | if (!ret) { |
3985 | /* | ||
3986 | * If we orphanized any ancestor before, we need | ||
3987 | * to recompute the full path for deleted names, | ||
3988 | * since any such path was computed before we | ||
3989 | * processed any references and orphanized any | ||
3990 | * ancestor inode. | ||
3991 | */ | ||
3992 | if (orphanized_ancestor) { | ||
3993 | struct fs_path *new_path; | ||
3994 | |||
3995 | /* | ||
3996 | * Our reference's name member points to | ||
3997 | * its full_path member string, so we | ||
3998 | * use here a new path. | ||
3999 | */ | ||
4000 | new_path = fs_path_alloc(); | ||
4001 | if (!new_path) { | ||
4002 | ret = -ENOMEM; | ||
4003 | goto out; | ||
4004 | } | ||
4005 | ret = get_cur_path(sctx, cur->dir, | ||
4006 | cur->dir_gen, | ||
4007 | new_path); | ||
4008 | if (ret < 0) { | ||
4009 | fs_path_free(new_path); | ||
4010 | goto out; | ||
4011 | } | ||
4012 | ret = fs_path_add(new_path, | ||
4013 | cur->name, | ||
4014 | cur->name_len); | ||
4015 | if (ret < 0) { | ||
4016 | fs_path_free(new_path); | ||
4017 | goto out; | ||
4018 | } | ||
4019 | fs_path_free(cur->full_path); | ||
4020 | set_ref_path(cur, new_path); | ||
4021 | } | ||
3963 | ret = send_unlink(sctx, cur->full_path); | 4022 | ret = send_unlink(sctx, cur->full_path); |
3964 | if (ret < 0) | 4023 | if (ret < 0) |
3965 | goto out; | 4024 | goto out; |
@@ -6397,13 +6456,10 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
6397 | 6456 | ||
6398 | alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); | 6457 | alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); |
6399 | 6458 | ||
6400 | sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); | 6459 | sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL); |
6401 | if (!sctx->clone_roots) { | 6460 | if (!sctx->clone_roots) { |
6402 | sctx->clone_roots = vzalloc(alloc_size); | 6461 | ret = -ENOMEM; |
6403 | if (!sctx->clone_roots) { | 6462 | goto out; |
6404 | ret = -ENOMEM; | ||
6405 | goto out; | ||
6406 | } | ||
6407 | } | 6463 | } |
6408 | 6464 | ||
6409 | alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources); | 6465 | alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4f1cdd5058f1..74e47794e63f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -601,18 +601,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, | |||
601 | } | 601 | } |
602 | break; | 602 | break; |
603 | case Opt_alloc_start: | 603 | case Opt_alloc_start: |
604 | num = match_strdup(&args[0]); | 604 | btrfs_info(info, |
605 | if (num) { | 605 | "option alloc_start is obsolete, ignored"); |
606 | mutex_lock(&info->chunk_mutex); | ||
607 | info->alloc_start = memparse(num, NULL); | ||
608 | mutex_unlock(&info->chunk_mutex); | ||
609 | kfree(num); | ||
610 | btrfs_info(info, "allocations start at %llu", | ||
611 | info->alloc_start); | ||
612 | } else { | ||
613 | ret = -ENOMEM; | ||
614 | goto out; | ||
615 | } | ||
616 | break; | 606 | break; |
617 | case Opt_acl: | 607 | case Opt_acl: |
618 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 608 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
@@ -1187,7 +1177,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
1187 | return 0; | 1177 | return 0; |
1188 | } | 1178 | } |
1189 | 1179 | ||
1190 | btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); | 1180 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); |
1191 | 1181 | ||
1192 | trans = btrfs_attach_transaction_barrier(root); | 1182 | trans = btrfs_attach_transaction_barrier(root); |
1193 | if (IS_ERR(trans)) { | 1183 | if (IS_ERR(trans)) { |
@@ -1232,8 +1222,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1232 | seq_puts(seq, ",nobarrier"); | 1222 | seq_puts(seq, ",nobarrier"); |
1233 | if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) | 1223 | if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) |
1234 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | 1224 | seq_printf(seq, ",max_inline=%llu", info->max_inline); |
1235 | if (info->alloc_start != 0) | ||
1236 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | ||
1237 | if (info->thread_pool_size != min_t(unsigned long, | 1225 | if (info->thread_pool_size != min_t(unsigned long, |
1238 | num_online_cpus() + 2, 8)) | 1226 | num_online_cpus() + 2, 8)) |
1239 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | 1227 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); |
@@ -1716,7 +1704,6 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1716 | unsigned long old_opts = fs_info->mount_opt; | 1704 | unsigned long old_opts = fs_info->mount_opt; |
1717 | unsigned long old_compress_type = fs_info->compress_type; | 1705 | unsigned long old_compress_type = fs_info->compress_type; |
1718 | u64 old_max_inline = fs_info->max_inline; | 1706 | u64 old_max_inline = fs_info->max_inline; |
1719 | u64 old_alloc_start = fs_info->alloc_start; | ||
1720 | int old_thread_pool_size = fs_info->thread_pool_size; | 1707 | int old_thread_pool_size = fs_info->thread_pool_size; |
1721 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; | 1708 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; |
1722 | int ret; | 1709 | int ret; |
@@ -1855,9 +1842,6 @@ restore: | |||
1855 | fs_info->mount_opt = old_opts; | 1842 | fs_info->mount_opt = old_opts; |
1856 | fs_info->compress_type = old_compress_type; | 1843 | fs_info->compress_type = old_compress_type; |
1857 | fs_info->max_inline = old_max_inline; | 1844 | fs_info->max_inline = old_max_inline; |
1858 | mutex_lock(&fs_info->chunk_mutex); | ||
1859 | fs_info->alloc_start = old_alloc_start; | ||
1860 | mutex_unlock(&fs_info->chunk_mutex); | ||
1861 | btrfs_resize_thread_pool(fs_info, | 1845 | btrfs_resize_thread_pool(fs_info, |
1862 | old_thread_pool_size, fs_info->thread_pool_size); | 1846 | old_thread_pool_size, fs_info->thread_pool_size); |
1863 | fs_info->metadata_ratio = old_metadata_ratio; | 1847 | fs_info->metadata_ratio = old_metadata_ratio; |
@@ -1898,18 +1882,15 @@ static inline void btrfs_descending_sort_devices( | |||
1898 | static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, | 1882 | static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, |
1899 | u64 *free_bytes) | 1883 | u64 *free_bytes) |
1900 | { | 1884 | { |
1901 | struct btrfs_root *root = fs_info->tree_root; | ||
1902 | struct btrfs_device_info *devices_info; | 1885 | struct btrfs_device_info *devices_info; |
1903 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | 1886 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
1904 | struct btrfs_device *device; | 1887 | struct btrfs_device *device; |
1905 | u64 skip_space; | 1888 | u64 skip_space; |
1906 | u64 type; | 1889 | u64 type; |
1907 | u64 avail_space; | 1890 | u64 avail_space; |
1908 | u64 used_space; | ||
1909 | u64 min_stripe_size; | 1891 | u64 min_stripe_size; |
1910 | int min_stripes = 1, num_stripes = 1; | 1892 | int min_stripes = 1, num_stripes = 1; |
1911 | int i = 0, nr_devices; | 1893 | int i = 0, nr_devices; |
1912 | int ret; | ||
1913 | 1894 | ||
1914 | /* | 1895 | /* |
1915 | * We aren't under the device list lock, so this is racy-ish, but good | 1896 | * We aren't under the device list lock, so this is racy-ish, but good |
@@ -1927,12 +1908,12 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, | |||
1927 | } | 1908 | } |
1928 | 1909 | ||
1929 | devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), | 1910 | devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), |
1930 | GFP_NOFS); | 1911 | GFP_KERNEL); |
1931 | if (!devices_info) | 1912 | if (!devices_info) |
1932 | return -ENOMEM; | 1913 | return -ENOMEM; |
1933 | 1914 | ||
1934 | /* calc min stripe number for data space allocation */ | 1915 | /* calc min stripe number for data space allocation */ |
1935 | type = btrfs_get_alloc_profile(root, 1); | 1916 | type = btrfs_data_alloc_profile(fs_info); |
1936 | if (type & BTRFS_BLOCK_GROUP_RAID0) { | 1917 | if (type & BTRFS_BLOCK_GROUP_RAID0) { |
1937 | min_stripes = 2; | 1918 | min_stripes = 2; |
1938 | num_stripes = nr_devices; | 1919 | num_stripes = nr_devices; |
@@ -1949,8 +1930,6 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, | |||
1949 | else | 1930 | else |
1950 | min_stripe_size = BTRFS_STRIPE_LEN; | 1931 | min_stripe_size = BTRFS_STRIPE_LEN; |
1951 | 1932 | ||
1952 | if (fs_info->alloc_start) | ||
1953 | mutex_lock(&fs_devices->device_list_mutex); | ||
1954 | rcu_read_lock(); | 1933 | rcu_read_lock(); |
1955 | list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { | 1934 | list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { |
1956 | if (!device->in_fs_metadata || !device->bdev || | 1935 | if (!device->in_fs_metadata || !device->bdev || |
@@ -1973,34 +1952,6 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, | |||
1973 | */ | 1952 | */ |
1974 | skip_space = SZ_1M; | 1953 | skip_space = SZ_1M; |
1975 | 1954 | ||
1976 | /* user can set the offset in fs_info->alloc_start. */ | ||
1977 | if (fs_info->alloc_start && | ||
1978 | fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
1979 | device->total_bytes) { | ||
1980 | rcu_read_unlock(); | ||
1981 | skip_space = max(fs_info->alloc_start, skip_space); | ||
1982 | |||
1983 | /* | ||
1984 | * btrfs can not use the free space in | ||
1985 | * [0, skip_space - 1], we must subtract it from the | ||
1986 | * total. In order to implement it, we account the used | ||
1987 | * space in this range first. | ||
1988 | */ | ||
1989 | ret = btrfs_account_dev_extents_size(device, 0, | ||
1990 | skip_space - 1, | ||
1991 | &used_space); | ||
1992 | if (ret) { | ||
1993 | kfree(devices_info); | ||
1994 | mutex_unlock(&fs_devices->device_list_mutex); | ||
1995 | return ret; | ||
1996 | } | ||
1997 | |||
1998 | rcu_read_lock(); | ||
1999 | |||
2000 | /* calc the free space in [0, skip_space - 1] */ | ||
2001 | skip_space -= used_space; | ||
2002 | } | ||
2003 | |||
2004 | /* | 1955 | /* |
2005 | * we can use the free space in [0, skip_space - 1], subtract | 1956 | * we can use the free space in [0, skip_space - 1], subtract |
2006 | * it from the total. | 1957 | * it from the total. |
@@ -2019,8 +1970,6 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, | |||
2019 | i++; | 1970 | i++; |
2020 | } | 1971 | } |
2021 | rcu_read_unlock(); | 1972 | rcu_read_unlock(); |
2022 | if (fs_info->alloc_start) | ||
2023 | mutex_unlock(&fs_devices->device_list_mutex); | ||
2024 | 1973 | ||
2025 | nr_devices = i; | 1974 | nr_devices = i; |
2026 | 1975 | ||
@@ -2057,10 +2006,9 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, | |||
2057 | * multiplier to scale the sizes. | 2006 | * multiplier to scale the sizes. |
2058 | * | 2007 | * |
2059 | * Unused device space usage is based on simulating the chunk allocator | 2008 | * Unused device space usage is based on simulating the chunk allocator |
2060 | * algorithm that respects the device sizes, order of allocations and the | 2009 | * algorithm that respects the device sizes and order of allocations. This is |
2061 | * 'alloc_start' value, this is a close approximation of the actual use but | 2010 | * a close approximation of the actual use but there are other factors that may |
2062 | * there are other factors that may change the result (like a new metadata | 2011 | * change the result (like a new metadata chunk). |
2063 | * chunk). | ||
2064 | * | 2012 | * |
2065 | * If metadata is exhausted, f_bavail will be 0. | 2013 | * If metadata is exhausted, f_bavail will be 0. |
2066 | */ | 2014 | */ |
@@ -2243,7 +2191,7 @@ static int btrfs_freeze(struct super_block *sb) | |||
2243 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 2191 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
2244 | struct btrfs_root *root = fs_info->tree_root; | 2192 | struct btrfs_root *root = fs_info->tree_root; |
2245 | 2193 | ||
2246 | fs_info->fs_frozen = 1; | 2194 | set_bit(BTRFS_FS_FROZEN, &fs_info->flags); |
2247 | /* | 2195 | /* |
2248 | * We don't need a barrier here, we'll wait for any transaction that | 2196 | * We don't need a barrier here, we'll wait for any transaction that |
2249 | * could be in progress on other threads (and do delayed iputs that | 2197 | * could be in progress on other threads (and do delayed iputs that |
@@ -2262,7 +2210,9 @@ static int btrfs_freeze(struct super_block *sb) | |||
2262 | 2210 | ||
2263 | static int btrfs_unfreeze(struct super_block *sb) | 2211 | static int btrfs_unfreeze(struct super_block *sb) |
2264 | { | 2212 | { |
2265 | btrfs_sb(sb)->fs_frozen = 0; | 2213 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
2214 | |||
2215 | clear_bit(BTRFS_FS_FROZEN, &fs_info->flags); | ||
2266 | return 0; | 2216 | return 0; |
2267 | } | 2217 | } |
2268 | 2218 | ||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 1f157fba8940..c2d5f3580b4c 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -447,11 +447,52 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | |||
447 | 447 | ||
448 | BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show); | 448 | BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show); |
449 | 449 | ||
450 | static ssize_t quota_override_show(struct kobject *kobj, | ||
451 | struct kobj_attribute *a, char *buf) | ||
452 | { | ||
453 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | ||
454 | int quota_override; | ||
455 | |||
456 | quota_override = test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags); | ||
457 | return snprintf(buf, PAGE_SIZE, "%d\n", quota_override); | ||
458 | } | ||
459 | |||
460 | static ssize_t quota_override_store(struct kobject *kobj, | ||
461 | struct kobj_attribute *a, | ||
462 | const char *buf, size_t len) | ||
463 | { | ||
464 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | ||
465 | unsigned long knob; | ||
466 | int err; | ||
467 | |||
468 | if (!fs_info) | ||
469 | return -EPERM; | ||
470 | |||
471 | if (!capable(CAP_SYS_RESOURCE)) | ||
472 | return -EPERM; | ||
473 | |||
474 | err = kstrtoul(buf, 10, &knob); | ||
475 | if (err) | ||
476 | return err; | ||
477 | if (knob > 1) | ||
478 | return -EINVAL; | ||
479 | |||
480 | if (knob) | ||
481 | set_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags); | ||
482 | else | ||
483 | clear_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags); | ||
484 | |||
485 | return len; | ||
486 | } | ||
487 | |||
488 | BTRFS_ATTR_RW(quota_override, quota_override_show, quota_override_store); | ||
489 | |||
450 | static const struct attribute *btrfs_attrs[] = { | 490 | static const struct attribute *btrfs_attrs[] = { |
451 | BTRFS_ATTR_PTR(label), | 491 | BTRFS_ATTR_PTR(label), |
452 | BTRFS_ATTR_PTR(nodesize), | 492 | BTRFS_ATTR_PTR(nodesize), |
453 | BTRFS_ATTR_PTR(sectorsize), | 493 | BTRFS_ATTR_PTR(sectorsize), |
454 | BTRFS_ATTR_PTR(clone_alignment), | 494 | BTRFS_ATTR_PTR(clone_alignment), |
495 | BTRFS_ATTR_PTR(quota_override), | ||
455 | NULL, | 496 | NULL, |
456 | }; | 497 | }; |
457 | 498 | ||
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 133753232a94..d06b1c931d05 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c | |||
@@ -87,7 +87,7 @@ static int test_find_delalloc(u32 sectorsize) | |||
87 | return -ENOMEM; | 87 | return -ENOMEM; |
88 | } | 88 | } |
89 | 89 | ||
90 | extent_io_tree_init(&tmp, &inode->i_data); | 90 | extent_io_tree_init(&tmp, inode); |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * First go through and create and mark all of our pages dirty, we pin | 93 | * First go through and create and mark all of our pages dirty, we pin |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2168654c90a1..f615d59b0489 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -93,7 +93,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) | |||
93 | btrfs_put_block_group_trimming(cache); | 93 | btrfs_put_block_group_trimming(cache); |
94 | btrfs_put_block_group(cache); | 94 | btrfs_put_block_group(cache); |
95 | } | 95 | } |
96 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 96 | kfree(transaction); |
97 | } | 97 | } |
98 | } | 98 | } |
99 | 99 | ||
@@ -228,7 +228,7 @@ loop: | |||
228 | */ | 228 | */ |
229 | BUG_ON(type == TRANS_JOIN_NOLOCK); | 229 | BUG_ON(type == TRANS_JOIN_NOLOCK); |
230 | 230 | ||
231 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 231 | cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); |
232 | if (!cur_trans) | 232 | if (!cur_trans) |
233 | return -ENOMEM; | 233 | return -ENOMEM; |
234 | 234 | ||
@@ -238,11 +238,11 @@ loop: | |||
238 | * someone started a transaction after we unlocked. Make sure | 238 | * someone started a transaction after we unlocked. Make sure |
239 | * to redo the checks above | 239 | * to redo the checks above |
240 | */ | 240 | */ |
241 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 241 | kfree(cur_trans); |
242 | goto loop; | 242 | goto loop; |
243 | } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { | 243 | } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { |
244 | spin_unlock(&fs_info->trans_lock); | 244 | spin_unlock(&fs_info->trans_lock); |
245 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 245 | kfree(cur_trans); |
246 | return -EROFS; | 246 | return -EROFS; |
247 | } | 247 | } |
248 | 248 | ||
@@ -294,7 +294,7 @@ loop: | |||
294 | spin_lock_init(&cur_trans->dropped_roots_lock); | 294 | spin_lock_init(&cur_trans->dropped_roots_lock); |
295 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 295 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
296 | extent_io_tree_init(&cur_trans->dirty_pages, | 296 | extent_io_tree_init(&cur_trans->dirty_pages, |
297 | fs_info->btree_inode->i_mapping); | 297 | fs_info->btree_inode); |
298 | fs_info->generation++; | 298 | fs_info->generation++; |
299 | cur_trans->transid = fs_info->generation; | 299 | cur_trans->transid = fs_info->generation; |
300 | fs_info->running_transaction = cur_trans; | 300 | fs_info->running_transaction = cur_trans; |
@@ -1374,9 +1374,6 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, | |||
1374 | ret = commit_fs_roots(trans, fs_info); | 1374 | ret = commit_fs_roots(trans, fs_info); |
1375 | if (ret) | 1375 | if (ret) |
1376 | goto out; | 1376 | goto out; |
1377 | ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); | ||
1378 | if (ret < 0) | ||
1379 | goto out; | ||
1380 | ret = btrfs_qgroup_account_extents(trans, fs_info); | 1377 | ret = btrfs_qgroup_account_extents(trans, fs_info); |
1381 | if (ret < 0) | 1378 | if (ret < 0) |
1382 | goto out; | 1379 | goto out; |
@@ -1926,7 +1923,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | |||
1926 | static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | 1923 | static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) |
1927 | { | 1924 | { |
1928 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) | 1925 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) |
1929 | btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); | 1926 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); |
1930 | } | 1927 | } |
1931 | 1928 | ||
1932 | static inline void | 1929 | static inline void |
@@ -2180,13 +2177,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) | |||
2180 | goto scrub_continue; | 2177 | goto scrub_continue; |
2181 | } | 2178 | } |
2182 | 2179 | ||
2183 | ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); | ||
2184 | if (ret) { | ||
2185 | mutex_unlock(&fs_info->tree_log_mutex); | ||
2186 | mutex_unlock(&fs_info->reloc_mutex); | ||
2187 | goto scrub_continue; | ||
2188 | } | ||
2189 | |||
2190 | /* | 2180 | /* |
2191 | * Since fs roots are all committed, we can get a quite accurate | 2181 | * Since fs roots are all committed, we can get a quite accurate |
2192 | * new_roots. So let's do quota accounting. | 2182 | * new_roots. So let's do quota accounting. |
@@ -2314,7 +2304,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) | |||
2314 | * it'll result in deadlock about SB_FREEZE_FS. | 2304 | * it'll result in deadlock about SB_FREEZE_FS. |
2315 | */ | 2305 | */ |
2316 | if (current != fs_info->transaction_kthread && | 2306 | if (current != fs_info->transaction_kthread && |
2317 | current != fs_info->cleaner_kthread && !fs_info->fs_frozen) | 2307 | current != fs_info->cleaner_kthread && |
2308 | !test_bit(BTRFS_FS_FROZEN, &fs_info->flags)) | ||
2318 | btrfs_run_delayed_iputs(fs_info); | 2309 | btrfs_run_delayed_iputs(fs_info); |
2319 | 2310 | ||
2320 | return ret; | 2311 | return ret; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ccfe9fe7754a..f20ef211a73d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1175,15 +1175,19 @@ next: | |||
1175 | return 0; | 1175 | return 0; |
1176 | } | 1176 | } |
1177 | 1177 | ||
1178 | static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, | 1178 | static int extref_get_fields(struct extent_buffer *eb, int slot, |
1179 | u32 *namelen, char **name, u64 *index, | 1179 | unsigned long ref_ptr, u32 *namelen, char **name, |
1180 | u64 *parent_objectid) | 1180 | u64 *index, u64 *parent_objectid) |
1181 | { | 1181 | { |
1182 | struct btrfs_inode_extref *extref; | 1182 | struct btrfs_inode_extref *extref; |
1183 | 1183 | ||
1184 | extref = (struct btrfs_inode_extref *)ref_ptr; | 1184 | extref = (struct btrfs_inode_extref *)ref_ptr; |
1185 | 1185 | ||
1186 | *namelen = btrfs_inode_extref_name_len(eb, extref); | 1186 | *namelen = btrfs_inode_extref_name_len(eb, extref); |
1187 | if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)&extref->name, | ||
1188 | *namelen)) | ||
1189 | return -EIO; | ||
1190 | |||
1187 | *name = kmalloc(*namelen, GFP_NOFS); | 1191 | *name = kmalloc(*namelen, GFP_NOFS); |
1188 | if (*name == NULL) | 1192 | if (*name == NULL) |
1189 | return -ENOMEM; | 1193 | return -ENOMEM; |
@@ -1198,14 +1202,19 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, | |||
1198 | return 0; | 1202 | return 0; |
1199 | } | 1203 | } |
1200 | 1204 | ||
1201 | static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, | 1205 | static int ref_get_fields(struct extent_buffer *eb, int slot, |
1202 | u32 *namelen, char **name, u64 *index) | 1206 | unsigned long ref_ptr, u32 *namelen, char **name, |
1207 | u64 *index) | ||
1203 | { | 1208 | { |
1204 | struct btrfs_inode_ref *ref; | 1209 | struct btrfs_inode_ref *ref; |
1205 | 1210 | ||
1206 | ref = (struct btrfs_inode_ref *)ref_ptr; | 1211 | ref = (struct btrfs_inode_ref *)ref_ptr; |
1207 | 1212 | ||
1208 | *namelen = btrfs_inode_ref_name_len(eb, ref); | 1213 | *namelen = btrfs_inode_ref_name_len(eb, ref); |
1214 | if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)(ref + 1), | ||
1215 | *namelen)) | ||
1216 | return -EIO; | ||
1217 | |||
1209 | *name = kmalloc(*namelen, GFP_NOFS); | 1218 | *name = kmalloc(*namelen, GFP_NOFS); |
1210 | if (*name == NULL) | 1219 | if (*name == NULL) |
1211 | return -ENOMEM; | 1220 | return -ENOMEM; |
@@ -1280,8 +1289,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
1280 | 1289 | ||
1281 | while (ref_ptr < ref_end) { | 1290 | while (ref_ptr < ref_end) { |
1282 | if (log_ref_ver) { | 1291 | if (log_ref_ver) { |
1283 | ret = extref_get_fields(eb, ref_ptr, &namelen, &name, | 1292 | ret = extref_get_fields(eb, slot, ref_ptr, &namelen, |
1284 | &ref_index, &parent_objectid); | 1293 | &name, &ref_index, &parent_objectid); |
1285 | /* | 1294 | /* |
1286 | * parent object can change from one array | 1295 | * parent object can change from one array |
1287 | * item to another. | 1296 | * item to another. |
@@ -1293,8 +1302,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
1293 | goto out; | 1302 | goto out; |
1294 | } | 1303 | } |
1295 | } else { | 1304 | } else { |
1296 | ret = ref_get_fields(eb, ref_ptr, &namelen, &name, | 1305 | ret = ref_get_fields(eb, slot, ref_ptr, &namelen, |
1297 | &ref_index); | 1306 | &name, &ref_index); |
1298 | } | 1307 | } |
1299 | if (ret) | 1308 | if (ret) |
1300 | goto out; | 1309 | goto out; |
@@ -1841,7 +1850,7 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, | |||
1841 | ptr_end = ptr + item_size; | 1850 | ptr_end = ptr + item_size; |
1842 | while (ptr < ptr_end) { | 1851 | while (ptr < ptr_end) { |
1843 | di = (struct btrfs_dir_item *)ptr; | 1852 | di = (struct btrfs_dir_item *)ptr; |
1844 | if (verify_dir_item(fs_info, eb, di)) | 1853 | if (verify_dir_item(fs_info, eb, slot, di)) |
1845 | return -EIO; | 1854 | return -EIO; |
1846 | name_len = btrfs_dir_name_len(eb, di); | 1855 | name_len = btrfs_dir_name_len(eb, di); |
1847 | ret = replay_one_name(trans, root, path, eb, di, key); | 1856 | ret = replay_one_name(trans, root, path, eb, di, key); |
@@ -2017,7 +2026,7 @@ again: | |||
2017 | ptr_end = ptr + item_size; | 2026 | ptr_end = ptr + item_size; |
2018 | while (ptr < ptr_end) { | 2027 | while (ptr < ptr_end) { |
2019 | di = (struct btrfs_dir_item *)ptr; | 2028 | di = (struct btrfs_dir_item *)ptr; |
2020 | if (verify_dir_item(fs_info, eb, di)) { | 2029 | if (verify_dir_item(fs_info, eb, slot, di)) { |
2021 | ret = -EIO; | 2030 | ret = -EIO; |
2022 | goto out; | 2031 | goto out; |
2023 | } | 2032 | } |
@@ -2102,6 +2111,7 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans, | |||
2102 | struct btrfs_path *path, | 2111 | struct btrfs_path *path, |
2103 | const u64 ino) | 2112 | const u64 ino) |
2104 | { | 2113 | { |
2114 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
2105 | struct btrfs_key search_key; | 2115 | struct btrfs_key search_key; |
2106 | struct btrfs_path *log_path; | 2116 | struct btrfs_path *log_path; |
2107 | int i; | 2117 | int i; |
@@ -2143,6 +2153,12 @@ process_leaf: | |||
2143 | u32 this_len = sizeof(*di) + name_len + data_len; | 2153 | u32 this_len = sizeof(*di) + name_len + data_len; |
2144 | char *name; | 2154 | char *name; |
2145 | 2155 | ||
2156 | ret = verify_dir_item(fs_info, path->nodes[0], | ||
2157 | path->slots[0], di); | ||
2158 | if (ret) { | ||
2159 | ret = -EIO; | ||
2160 | goto out; | ||
2161 | } | ||
2146 | name = kmalloc(name_len, GFP_NOFS); | 2162 | name = kmalloc(name_len, GFP_NOFS); |
2147 | if (!name) { | 2163 | if (!name) { |
2148 | ret = -ENOMEM; | 2164 | ret = -ENOMEM; |
@@ -4546,6 +4562,12 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, | |||
4546 | this_len = sizeof(*extref) + this_name_len; | 4562 | this_len = sizeof(*extref) + this_name_len; |
4547 | } | 4563 | } |
4548 | 4564 | ||
4565 | ret = btrfs_is_name_len_valid(eb, slot, name_ptr, | ||
4566 | this_name_len); | ||
4567 | if (!ret) { | ||
4568 | ret = -EIO; | ||
4569 | goto out; | ||
4570 | } | ||
4549 | if (this_name_len > name_len) { | 4571 | if (this_name_len > name_len) { |
4550 | char *new_name; | 4572 | char *new_name; |
4551 | 4573 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 84a495967e0a..5eb7217738ed 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -242,6 +242,17 @@ static struct btrfs_device *__alloc_device(void) | |||
242 | if (!dev) | 242 | if (!dev) |
243 | return ERR_PTR(-ENOMEM); | 243 | return ERR_PTR(-ENOMEM); |
244 | 244 | ||
245 | /* | ||
246 | * Preallocate a bio that's always going to be used for flushing device | ||
247 | * barriers and matches the device lifespan | ||
248 | */ | ||
249 | dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL); | ||
250 | if (!dev->flush_bio) { | ||
251 | kfree(dev); | ||
252 | return ERR_PTR(-ENOMEM); | ||
253 | } | ||
254 | bio_get(dev->flush_bio); | ||
255 | |||
245 | INIT_LIST_HEAD(&dev->dev_list); | 256 | INIT_LIST_HEAD(&dev->dev_list); |
246 | INIT_LIST_HEAD(&dev->dev_alloc_list); | 257 | INIT_LIST_HEAD(&dev->dev_alloc_list); |
247 | INIT_LIST_HEAD(&dev->resized_list); | 258 | INIT_LIST_HEAD(&dev->resized_list); |
@@ -838,6 +849,7 @@ static void __free_device(struct work_struct *work) | |||
838 | 849 | ||
839 | device = container_of(work, struct btrfs_device, rcu_work); | 850 | device = container_of(work, struct btrfs_device, rcu_work); |
840 | rcu_string_free(device->name); | 851 | rcu_string_free(device->name); |
852 | bio_put(device->flush_bio); | ||
841 | kfree(device); | 853 | kfree(device); |
842 | } | 854 | } |
843 | 855 | ||
@@ -1353,15 +1365,13 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction, | |||
1353 | int ret; | 1365 | int ret; |
1354 | int slot; | 1366 | int slot; |
1355 | struct extent_buffer *l; | 1367 | struct extent_buffer *l; |
1356 | u64 min_search_start; | ||
1357 | 1368 | ||
1358 | /* | 1369 | /* |
1359 | * We don't want to overwrite the superblock on the drive nor any area | 1370 | * We don't want to overwrite the superblock on the drive nor any area |
1360 | * used by the boot loader (grub for example), so we make sure to start | 1371 | * used by the boot loader (grub for example), so we make sure to start |
1361 | * at an offset of at least 1MB. | 1372 | * at an offset of at least 1MB. |
1362 | */ | 1373 | */ |
1363 | min_search_start = max(fs_info->alloc_start, 1024ull * 1024); | 1374 | search_start = max_t(u64, search_start, SZ_1M); |
1364 | search_start = max(search_start, min_search_start); | ||
1365 | 1375 | ||
1366 | path = btrfs_alloc_path(); | 1376 | path = btrfs_alloc_path(); |
1367 | if (!path) | 1377 | if (!path) |
@@ -2387,7 +2397,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2387 | device->io_width = fs_info->sectorsize; | 2397 | device->io_width = fs_info->sectorsize; |
2388 | device->io_align = fs_info->sectorsize; | 2398 | device->io_align = fs_info->sectorsize; |
2389 | device->sector_size = fs_info->sectorsize; | 2399 | device->sector_size = fs_info->sectorsize; |
2390 | device->total_bytes = i_size_read(bdev->bd_inode); | 2400 | device->total_bytes = round_down(i_size_read(bdev->bd_inode), |
2401 | fs_info->sectorsize); | ||
2391 | device->disk_total_bytes = device->total_bytes; | 2402 | device->disk_total_bytes = device->total_bytes; |
2392 | device->commit_total_bytes = device->total_bytes; | 2403 | device->commit_total_bytes = device->total_bytes; |
2393 | device->fs_info = fs_info; | 2404 | device->fs_info = fs_info; |
@@ -2417,16 +2428,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2417 | fs_info->fs_devices->total_devices++; | 2428 | fs_info->fs_devices->total_devices++; |
2418 | fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 2429 | fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
2419 | 2430 | ||
2420 | spin_lock(&fs_info->free_chunk_lock); | 2431 | atomic64_add(device->total_bytes, &fs_info->free_chunk_space); |
2421 | fs_info->free_chunk_space += device->total_bytes; | ||
2422 | spin_unlock(&fs_info->free_chunk_lock); | ||
2423 | 2432 | ||
2424 | if (!blk_queue_nonrot(q)) | 2433 | if (!blk_queue_nonrot(q)) |
2425 | fs_info->fs_devices->rotating = 1; | 2434 | fs_info->fs_devices->rotating = 1; |
2426 | 2435 | ||
2427 | tmp = btrfs_super_total_bytes(fs_info->super_copy); | 2436 | tmp = btrfs_super_total_bytes(fs_info->super_copy); |
2428 | btrfs_set_super_total_bytes(fs_info->super_copy, | 2437 | btrfs_set_super_total_bytes(fs_info->super_copy, |
2429 | tmp + device->total_bytes); | 2438 | round_down(tmp + device->total_bytes, fs_info->sectorsize)); |
2430 | 2439 | ||
2431 | tmp = btrfs_super_num_devices(fs_info->super_copy); | 2440 | tmp = btrfs_super_num_devices(fs_info->super_copy); |
2432 | btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1); | 2441 | btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1); |
@@ -2574,7 +2583,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
2574 | goto error; | 2583 | goto error; |
2575 | } | 2584 | } |
2576 | 2585 | ||
2577 | name = rcu_string_strdup(device_path, GFP_NOFS); | 2586 | name = rcu_string_strdup(device_path, GFP_KERNEL); |
2578 | if (!name) { | 2587 | if (!name) { |
2579 | kfree(device); | 2588 | kfree(device); |
2580 | ret = -ENOMEM; | 2589 | ret = -ENOMEM; |
@@ -2689,6 +2698,8 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
2689 | if (!device->writeable) | 2698 | if (!device->writeable) |
2690 | return -EACCES; | 2699 | return -EACCES; |
2691 | 2700 | ||
2701 | new_size = round_down(new_size, fs_info->sectorsize); | ||
2702 | |||
2692 | mutex_lock(&fs_info->chunk_mutex); | 2703 | mutex_lock(&fs_info->chunk_mutex); |
2693 | old_total = btrfs_super_total_bytes(super_copy); | 2704 | old_total = btrfs_super_total_bytes(super_copy); |
2694 | diff = new_size - device->total_bytes; | 2705 | diff = new_size - device->total_bytes; |
@@ -2701,7 +2712,8 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
2701 | 2712 | ||
2702 | fs_devices = fs_info->fs_devices; | 2713 | fs_devices = fs_info->fs_devices; |
2703 | 2714 | ||
2704 | btrfs_set_super_total_bytes(super_copy, old_total + diff); | 2715 | btrfs_set_super_total_bytes(super_copy, |
2716 | round_down(old_total + diff, fs_info->sectorsize)); | ||
2705 | device->fs_devices->total_rw_bytes += diff; | 2717 | device->fs_devices->total_rw_bytes += diff; |
2706 | 2718 | ||
2707 | btrfs_device_set_total_bytes(device, new_size); | 2719 | btrfs_device_set_total_bytes(device, new_size); |
@@ -2874,9 +2886,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, | |||
2874 | mutex_lock(&fs_info->chunk_mutex); | 2886 | mutex_lock(&fs_info->chunk_mutex); |
2875 | btrfs_device_set_bytes_used(device, | 2887 | btrfs_device_set_bytes_used(device, |
2876 | device->bytes_used - dev_extent_len); | 2888 | device->bytes_used - dev_extent_len); |
2877 | spin_lock(&fs_info->free_chunk_lock); | 2889 | atomic64_add(dev_extent_len, &fs_info->free_chunk_space); |
2878 | fs_info->free_chunk_space += dev_extent_len; | ||
2879 | spin_unlock(&fs_info->free_chunk_lock); | ||
2880 | btrfs_clear_space_info_full(fs_info); | 2890 | btrfs_clear_space_info_full(fs_info); |
2881 | mutex_unlock(&fs_info->chunk_mutex); | 2891 | mutex_unlock(&fs_info->chunk_mutex); |
2882 | } | 2892 | } |
@@ -4393,7 +4403,10 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
4393 | struct btrfs_super_block *super_copy = fs_info->super_copy; | 4403 | struct btrfs_super_block *super_copy = fs_info->super_copy; |
4394 | u64 old_total = btrfs_super_total_bytes(super_copy); | 4404 | u64 old_total = btrfs_super_total_bytes(super_copy); |
4395 | u64 old_size = btrfs_device_get_total_bytes(device); | 4405 | u64 old_size = btrfs_device_get_total_bytes(device); |
4396 | u64 diff = old_size - new_size; | 4406 | u64 diff; |
4407 | |||
4408 | new_size = round_down(new_size, fs_info->sectorsize); | ||
4409 | diff = old_size - new_size; | ||
4397 | 4410 | ||
4398 | if (device->is_tgtdev_for_dev_replace) | 4411 | if (device->is_tgtdev_for_dev_replace) |
4399 | return -EINVAL; | 4412 | return -EINVAL; |
@@ -4409,9 +4422,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
4409 | btrfs_device_set_total_bytes(device, new_size); | 4422 | btrfs_device_set_total_bytes(device, new_size); |
4410 | if (device->writeable) { | 4423 | if (device->writeable) { |
4411 | device->fs_devices->total_rw_bytes -= diff; | 4424 | device->fs_devices->total_rw_bytes -= diff; |
4412 | spin_lock(&fs_info->free_chunk_lock); | 4425 | atomic64_sub(diff, &fs_info->free_chunk_space); |
4413 | fs_info->free_chunk_space -= diff; | ||
4414 | spin_unlock(&fs_info->free_chunk_lock); | ||
4415 | } | 4426 | } |
4416 | mutex_unlock(&fs_info->chunk_mutex); | 4427 | mutex_unlock(&fs_info->chunk_mutex); |
4417 | 4428 | ||
@@ -4522,7 +4533,8 @@ again: | |||
4522 | &fs_info->fs_devices->resized_devices); | 4533 | &fs_info->fs_devices->resized_devices); |
4523 | 4534 | ||
4524 | WARN_ON(diff > old_total); | 4535 | WARN_ON(diff > old_total); |
4525 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | 4536 | btrfs_set_super_total_bytes(super_copy, |
4537 | round_down(old_total - diff, fs_info->sectorsize)); | ||
4526 | mutex_unlock(&fs_info->chunk_mutex); | 4538 | mutex_unlock(&fs_info->chunk_mutex); |
4527 | 4539 | ||
4528 | /* Now btrfs_update_device() will change the on-disk size. */ | 4540 | /* Now btrfs_update_device() will change the on-disk size. */ |
@@ -4535,9 +4547,7 @@ done: | |||
4535 | btrfs_device_set_total_bytes(device, old_size); | 4547 | btrfs_device_set_total_bytes(device, old_size); |
4536 | if (device->writeable) | 4548 | if (device->writeable) |
4537 | device->fs_devices->total_rw_bytes += diff; | 4549 | device->fs_devices->total_rw_bytes += diff; |
4538 | spin_lock(&fs_info->free_chunk_lock); | 4550 | atomic64_add(diff, &fs_info->free_chunk_space); |
4539 | fs_info->free_chunk_space += diff; | ||
4540 | spin_unlock(&fs_info->free_chunk_lock); | ||
4541 | mutex_unlock(&fs_info->chunk_mutex); | 4551 | mutex_unlock(&fs_info->chunk_mutex); |
4542 | } | 4552 | } |
4543 | return ret; | 4553 | return ret; |
@@ -4882,9 +4892,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4882 | btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes); | 4892 | btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes); |
4883 | } | 4893 | } |
4884 | 4894 | ||
4885 | spin_lock(&info->free_chunk_lock); | 4895 | atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space); |
4886 | info->free_chunk_space -= (stripe_size * map->num_stripes); | ||
4887 | spin_unlock(&info->free_chunk_lock); | ||
4888 | 4896 | ||
4889 | free_extent_map(em); | 4897 | free_extent_map(em); |
4890 | check_raid56_incompat_flag(info, type); | 4898 | check_raid56_incompat_flag(info, type); |
@@ -5029,20 +5037,19 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
5029 | static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | 5037 | static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, |
5030 | struct btrfs_fs_info *fs_info) | 5038 | struct btrfs_fs_info *fs_info) |
5031 | { | 5039 | { |
5032 | struct btrfs_root *extent_root = fs_info->extent_root; | ||
5033 | u64 chunk_offset; | 5040 | u64 chunk_offset; |
5034 | u64 sys_chunk_offset; | 5041 | u64 sys_chunk_offset; |
5035 | u64 alloc_profile; | 5042 | u64 alloc_profile; |
5036 | int ret; | 5043 | int ret; |
5037 | 5044 | ||
5038 | chunk_offset = find_next_chunk(fs_info); | 5045 | chunk_offset = find_next_chunk(fs_info); |
5039 | alloc_profile = btrfs_get_alloc_profile(extent_root, 0); | 5046 | alloc_profile = btrfs_metadata_alloc_profile(fs_info); |
5040 | ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile); | 5047 | ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile); |
5041 | if (ret) | 5048 | if (ret) |
5042 | return ret; | 5049 | return ret; |
5043 | 5050 | ||
5044 | sys_chunk_offset = find_next_chunk(fs_info); | 5051 | sys_chunk_offset = find_next_chunk(fs_info); |
5045 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); | 5052 | alloc_profile = btrfs_system_alloc_profile(fs_info); |
5046 | ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile); | 5053 | ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile); |
5047 | return ret; | 5054 | return ret; |
5048 | } | 5055 | } |
@@ -6267,10 +6274,9 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, | |||
6267 | continue; | 6274 | continue; |
6268 | } | 6275 | } |
6269 | 6276 | ||
6270 | if (dev_nr < total_devs - 1) { | 6277 | if (dev_nr < total_devs - 1) |
6271 | bio = btrfs_bio_clone(first_bio, GFP_NOFS); | 6278 | bio = btrfs_bio_clone(first_bio); |
6272 | BUG_ON(!bio); /* -ENOMEM */ | 6279 | else |
6273 | } else | ||
6274 | bio = first_bio; | 6280 | bio = first_bio; |
6275 | 6281 | ||
6276 | submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, | 6282 | submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, |
@@ -6685,10 +6691,8 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, | |||
6685 | device->in_fs_metadata = 1; | 6691 | device->in_fs_metadata = 1; |
6686 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { | 6692 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
6687 | device->fs_devices->total_rw_bytes += device->total_bytes; | 6693 | device->fs_devices->total_rw_bytes += device->total_bytes; |
6688 | spin_lock(&fs_info->free_chunk_lock); | 6694 | atomic64_add(device->total_bytes - device->bytes_used, |
6689 | fs_info->free_chunk_space += device->total_bytes - | 6695 | &fs_info->free_chunk_space); |
6690 | device->bytes_used; | ||
6691 | spin_unlock(&fs_info->free_chunk_lock); | ||
6692 | } | 6696 | } |
6693 | ret = 0; | 6697 | ret = 0; |
6694 | return ret; | 6698 | return ret; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index c7d0fbc915ca..6f45fd60d15a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -74,6 +74,8 @@ struct btrfs_device { | |||
74 | int missing; | 74 | int missing; |
75 | int can_discard; | 75 | int can_discard; |
76 | int is_tgtdev_for_dev_replace; | 76 | int is_tgtdev_for_dev_replace; |
77 | int last_flush_error; | ||
78 | int flush_bio_sent; | ||
77 | 79 | ||
78 | #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED | 80 | #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED |
79 | seqcount_t data_seqcount; | 81 | seqcount_t data_seqcount; |
@@ -279,6 +281,11 @@ struct btrfs_io_bio { | |||
279 | u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; | 281 | u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; |
280 | u8 *csum_allocated; | 282 | u8 *csum_allocated; |
281 | btrfs_io_bio_end_io_t *end_io; | 283 | btrfs_io_bio_end_io_t *end_io; |
284 | struct bvec_iter iter; | ||
285 | /* | ||
286 | * This member must come last, bio_alloc_bioset will allocate enough | ||
287 | * bytes for entire btrfs_io_bio but relies on bio being last. | ||
288 | */ | ||
282 | struct bio bio; | 289 | struct bio bio; |
283 | }; | 290 | }; |
284 | 291 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index b3cbf80c5acf..2c7e53f9ff1b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -336,7 +336,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
336 | u32 this_len = sizeof(*di) + name_len + data_len; | 336 | u32 this_len = sizeof(*di) + name_len + data_len; |
337 | unsigned long name_ptr = (unsigned long)(di + 1); | 337 | unsigned long name_ptr = (unsigned long)(di + 1); |
338 | 338 | ||
339 | if (verify_dir_item(fs_info, leaf, di)) { | 339 | if (verify_dir_item(fs_info, leaf, slot, di)) { |
340 | ret = -EIO; | 340 | ret = -EIO; |
341 | goto err; | 341 | goto err; |
342 | } | 342 | } |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 135b10823c6d..c248f9286366 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -24,12 +24,13 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/zlib.h> | 25 | #include <linux/zlib.h> |
26 | #include <linux/zutil.h> | 26 | #include <linux/zutil.h> |
27 | #include <linux/vmalloc.h> | 27 | #include <linux/mm.h> |
28 | #include <linux/init.h> | 28 | #include <linux/init.h> |
29 | #include <linux/err.h> | 29 | #include <linux/err.h> |
30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
31 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
32 | #include <linux/bio.h> | 32 | #include <linux/bio.h> |
33 | #include <linux/refcount.h> | ||
33 | #include "compression.h" | 34 | #include "compression.h" |
34 | 35 | ||
35 | struct workspace { | 36 | struct workspace { |
@@ -42,7 +43,7 @@ static void zlib_free_workspace(struct list_head *ws) | |||
42 | { | 43 | { |
43 | struct workspace *workspace = list_entry(ws, struct workspace, list); | 44 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
44 | 45 | ||
45 | vfree(workspace->strm.workspace); | 46 | kvfree(workspace->strm.workspace); |
46 | kfree(workspace->buf); | 47 | kfree(workspace->buf); |
47 | kfree(workspace); | 48 | kfree(workspace); |
48 | } | 49 | } |
@@ -52,14 +53,14 @@ static struct list_head *zlib_alloc_workspace(void) | |||
52 | struct workspace *workspace; | 53 | struct workspace *workspace; |
53 | int workspacesize; | 54 | int workspacesize; |
54 | 55 | ||
55 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 56 | workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); |
56 | if (!workspace) | 57 | if (!workspace) |
57 | return ERR_PTR(-ENOMEM); | 58 | return ERR_PTR(-ENOMEM); |
58 | 59 | ||
59 | workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), | 60 | workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), |
60 | zlib_inflate_workspacesize()); | 61 | zlib_inflate_workspacesize()); |
61 | workspace->strm.workspace = vmalloc(workspacesize); | 62 | workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL); |
62 | workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS); | 63 | workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
63 | if (!workspace->strm.workspace || !workspace->buf) | 64 | if (!workspace->strm.workspace || !workspace->buf) |
64 | goto fail; | 65 | goto fail; |
65 | 66 | ||
@@ -211,10 +212,7 @@ out: | |||
211 | return ret; | 212 | return ret; |
212 | } | 213 | } |
213 | 214 | ||
214 | static int zlib_decompress_bio(struct list_head *ws, struct page **pages_in, | 215 | static int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) |
215 | u64 disk_start, | ||
216 | struct bio *orig_bio, | ||
217 | size_t srclen) | ||
218 | { | 216 | { |
219 | struct workspace *workspace = list_entry(ws, struct workspace, list); | 217 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
220 | int ret = 0, ret2; | 218 | int ret = 0, ret2; |
@@ -222,8 +220,12 @@ static int zlib_decompress_bio(struct list_head *ws, struct page **pages_in, | |||
222 | char *data_in; | 220 | char *data_in; |
223 | size_t total_out = 0; | 221 | size_t total_out = 0; |
224 | unsigned long page_in_index = 0; | 222 | unsigned long page_in_index = 0; |
223 | size_t srclen = cb->compressed_len; | ||
225 | unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); | 224 | unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); |
226 | unsigned long buf_start; | 225 | unsigned long buf_start; |
226 | struct page **pages_in = cb->compressed_pages; | ||
227 | u64 disk_start = cb->start; | ||
228 | struct bio *orig_bio = cb->orig_bio; | ||
227 | 229 | ||
228 | data_in = kmap(pages_in[page_in_index]); | 230 | data_in = kmap(pages_in[page_in_index]); |
229 | workspace->strm.next_in = data_in; | 231 | workspace->strm.next_in = data_in; |
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index e37973526153..cd99a3658156 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h | |||
@@ -1410,42 +1410,6 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy, | |||
1410 | TP_ARGS(wq) | 1410 | TP_ARGS(wq) |
1411 | ); | 1411 | ); |
1412 | 1412 | ||
1413 | DECLARE_EVENT_CLASS(btrfs__qgroup_data_map, | ||
1414 | |||
1415 | TP_PROTO(struct inode *inode, u64 free_reserved), | ||
1416 | |||
1417 | TP_ARGS(inode, free_reserved), | ||
1418 | |||
1419 | TP_STRUCT__entry_btrfs( | ||
1420 | __field( u64, rootid ) | ||
1421 | __field( unsigned long, ino ) | ||
1422 | __field( u64, free_reserved ) | ||
1423 | ), | ||
1424 | |||
1425 | TP_fast_assign_btrfs(btrfs_sb(inode->i_sb), | ||
1426 | __entry->rootid = BTRFS_I(inode)->root->objectid; | ||
1427 | __entry->ino = inode->i_ino; | ||
1428 | __entry->free_reserved = free_reserved; | ||
1429 | ), | ||
1430 | |||
1431 | TP_printk_btrfs("rootid=%llu ino=%lu free_reserved=%llu", | ||
1432 | __entry->rootid, __entry->ino, __entry->free_reserved) | ||
1433 | ); | ||
1434 | |||
1435 | DEFINE_EVENT(btrfs__qgroup_data_map, btrfs_qgroup_init_data_rsv_map, | ||
1436 | |||
1437 | TP_PROTO(struct inode *inode, u64 free_reserved), | ||
1438 | |||
1439 | TP_ARGS(inode, free_reserved) | ||
1440 | ); | ||
1441 | |||
1442 | DEFINE_EVENT(btrfs__qgroup_data_map, btrfs_qgroup_free_data_rsv_map, | ||
1443 | |||
1444 | TP_PROTO(struct inode *inode, u64 free_reserved), | ||
1445 | |||
1446 | TP_ARGS(inode, free_reserved) | ||
1447 | ); | ||
1448 | |||
1449 | #define BTRFS_QGROUP_OPERATIONS \ | 1413 | #define BTRFS_QGROUP_OPERATIONS \ |
1450 | { QGROUP_RESERVE, "reserve" }, \ | 1414 | { QGROUP_RESERVE, "reserve" }, \ |
1451 | { QGROUP_RELEASE, "release" }, \ | 1415 | { QGROUP_RELEASE, "release" }, \ |
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index a456e5309238..9aa74f317747 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h | |||
@@ -426,31 +426,54 @@ struct btrfs_ioctl_ino_lookup_args { | |||
426 | char name[BTRFS_INO_LOOKUP_PATH_MAX]; | 426 | char name[BTRFS_INO_LOOKUP_PATH_MAX]; |
427 | }; | 427 | }; |
428 | 428 | ||
429 | /* Search criteria for the btrfs SEARCH ioctl family. */ | ||
429 | struct btrfs_ioctl_search_key { | 430 | struct btrfs_ioctl_search_key { |
430 | /* which root are we searching. 0 is the tree of tree roots */ | 431 | /* |
431 | __u64 tree_id; | 432 | * The tree we're searching in. 1 is the tree of tree roots, 2 is the |
432 | 433 | * extent tree, etc... | |
433 | /* keys returned will be >= min and <= max */ | 434 | * |
434 | __u64 min_objectid; | 435 | * A special tree_id value of 0 will cause a search in the subvolume |
435 | __u64 max_objectid; | 436 | * tree that the inode which is passed to the ioctl is part of. |
436 | 437 | */ | |
437 | /* keys returned will be >= min and <= max */ | 438 | __u64 tree_id; /* in */ |
438 | __u64 min_offset; | ||
439 | __u64 max_offset; | ||
440 | |||
441 | /* max and min transids to search for */ | ||
442 | __u64 min_transid; | ||
443 | __u64 max_transid; | ||
444 | 439 | ||
445 | /* keys returned will be >= min and <= max */ | 440 | /* |
446 | __u32 min_type; | 441 | * When doing a tree search, we're actually taking a slice from a |
447 | __u32 max_type; | 442 | * linear search space of 136-bit keys. |
443 | * | ||
444 | * A full 136-bit tree key is composed as: | ||
445 | * (objectid << 72) + (type << 64) + offset | ||
446 | * | ||
447 | * The individual min and max values for objectid, type and offset | ||
448 | * define the min_key and max_key values for the search range. All | ||
449 | * metadata items with a key in the interval [min_key, max_key] will be | ||
450 | * returned. | ||
451 | * | ||
452 | * Additionally, we can filter the items returned on transaction id of | ||
453 | * the metadata block they're stored in by specifying a transid range. | ||
454 | * Be aware that this transaction id only denotes when the metadata | ||
455 | * page that currently contains the item got written the last time as | ||
456 | * result of a COW operation. The number does not have any meaning | ||
457 | * related to the transaction in which an individual item that is being | ||
458 | * returned was created or changed. | ||
459 | */ | ||
460 | __u64 min_objectid; /* in */ | ||
461 | __u64 max_objectid; /* in */ | ||
462 | __u64 min_offset; /* in */ | ||
463 | __u64 max_offset; /* in */ | ||
464 | __u64 min_transid; /* in */ | ||
465 | __u64 max_transid; /* in */ | ||
466 | __u32 min_type; /* in */ | ||
467 | __u32 max_type; /* in */ | ||
448 | 468 | ||
449 | /* | 469 | /* |
450 | * how many items did userland ask for, and how many are we | 470 | * input: The maximum amount of results desired. |
451 | * returning | 471 | * output: The actual amount of items returned, restricted by any of: |
472 | * - reaching the upper bound of the search range | ||
473 | * - reaching the input nr_items amount of items | ||
474 | * - completely filling the supplied memory buffer | ||
452 | */ | 475 | */ |
453 | __u32 nr_items; | 476 | __u32 nr_items; /* in/out */ |
454 | 477 | ||
455 | /* align to 64 bits */ | 478 | /* align to 64 bits */ |
456 | __u32 unused; | 479 | __u32 unused; |