diff options
author | Chris Mason <clm@fb.com> | 2014-12-02 21:42:03 -0500 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2014-12-02 21:42:03 -0500 |
commit | 9627aeee3e203e30679549e4962633698a6bf87f (patch) | |
tree | 30ee313a7049bf3fcc17e346df5737e967fd9a95 /fs/btrfs | |
parent | cb83b7b81698a4abe531e0ba18b9e288b06947ce (diff) | |
parent | 5d3edd8f44aac94de7b16f4c54290e24f5e8c532 (diff) |
Merge branch 'raid56-scrub-replace' of git://github.com/miaoxie/linux-btrfs into for-linus
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/ctree.c | 14 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 7 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.c | 9 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 24 | ||||
-rw-r--r-- | fs/btrfs/locking.h | 2 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 763 | ||||
-rw-r--r-- | fs/btrfs/raid56.h | 16 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 803 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 52 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 14 |
10 files changed, 1556 insertions, 148 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 817234168a7f..14a72ed14ef7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
80 | { | 80 | { |
81 | int i; | 81 | int i; |
82 | 82 | ||
83 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
84 | /* lockdep really cares that we take all of these spinlocks | ||
85 | * in the right order. If any of the locks in the path are not | ||
86 | * currently blocking, it is going to complain. So, make really | ||
87 | * really sure by forcing the path to blocking before we clear | ||
88 | * the path blocking. | ||
89 | */ | ||
90 | if (held) { | 83 | if (held) { |
91 | btrfs_set_lock_blocking_rw(held, held_rw); | 84 | btrfs_set_lock_blocking_rw(held, held_rw); |
92 | if (held_rw == BTRFS_WRITE_LOCK) | 85 | if (held_rw == BTRFS_WRITE_LOCK) |
@@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
95 | held_rw = BTRFS_READ_LOCK_BLOCKING; | 88 | held_rw = BTRFS_READ_LOCK_BLOCKING; |
96 | } | 89 | } |
97 | btrfs_set_path_blocking(p); | 90 | btrfs_set_path_blocking(p); |
98 | #endif | ||
99 | 91 | ||
100 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { | 92 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { |
101 | if (p->nodes[i] && p->locks[i]) { | 93 | if (p->nodes[i] && p->locks[i]) { |
@@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
107 | } | 99 | } |
108 | } | 100 | } |
109 | 101 | ||
110 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
111 | if (held) | 102 | if (held) |
112 | btrfs_clear_lock_blocking_rw(held, held_rw); | 103 | btrfs_clear_lock_blocking_rw(held, held_rw); |
113 | #endif | ||
114 | } | 104 | } |
115 | 105 | ||
116 | /* this also releases the path */ | 106 | /* this also releases the path */ |
@@ -2893,7 +2883,7 @@ cow_done: | |||
2893 | } | 2883 | } |
2894 | p->locks[level] = BTRFS_WRITE_LOCK; | 2884 | p->locks[level] = BTRFS_WRITE_LOCK; |
2895 | } else { | 2885 | } else { |
2896 | err = btrfs_try_tree_read_lock(b); | 2886 | err = btrfs_tree_read_lock_atomic(b); |
2897 | if (!err) { | 2887 | if (!err) { |
2898 | btrfs_set_path_blocking(p); | 2888 | btrfs_set_path_blocking(p); |
2899 | btrfs_tree_read_lock(b); | 2889 | btrfs_tree_read_lock(b); |
@@ -3025,7 +3015,7 @@ again: | |||
3025 | } | 3015 | } |
3026 | 3016 | ||
3027 | level = btrfs_header_level(b); | 3017 | level = btrfs_header_level(b); |
3028 | err = btrfs_try_tree_read_lock(b); | 3018 | err = btrfs_tree_read_lock_atomic(b); |
3029 | if (!err) { | 3019 | if (!err) { |
3030 | btrfs_set_path_blocking(p); | 3020 | btrfs_set_path_blocking(p); |
3031 | btrfs_tree_read_lock(b); | 3021 | btrfs_tree_read_lock(b); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d71915e04e92..e6fbbd74b716 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -4167,7 +4167,12 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | |||
4167 | /* dev-replace.c */ | 4167 | /* dev-replace.c */ |
4168 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); | 4168 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); |
4169 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); | 4169 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); |
4170 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); | 4170 | void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount); |
4171 | |||
4172 | static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | ||
4173 | { | ||
4174 | btrfs_bio_counter_sub(fs_info, 1); | ||
4175 | } | ||
4171 | 4176 | ||
4172 | /* reada.c */ | 4177 | /* reada.c */ |
4173 | struct reada_control { | 4178 | struct reada_control { |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 3fbd0628620b..ca6a3a3b6b6c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -316,11 +316,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
316 | struct btrfs_device *tgt_device = NULL; | 316 | struct btrfs_device *tgt_device = NULL; |
317 | struct btrfs_device *src_device = NULL; | 317 | struct btrfs_device *src_device = NULL; |
318 | 318 | ||
319 | if (btrfs_fs_incompat(fs_info, RAID56)) { | ||
320 | btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); | ||
321 | return -EOPNOTSUPP; | ||
322 | } | ||
323 | |||
324 | switch (args->start.cont_reading_from_srcdev_mode) { | 319 | switch (args->start.cont_reading_from_srcdev_mode) { |
325 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: | 320 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: |
326 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: | 321 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: |
@@ -927,9 +922,9 @@ void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) | |||
927 | percpu_counter_inc(&fs_info->bio_counter); | 922 | percpu_counter_inc(&fs_info->bio_counter); |
928 | } | 923 | } |
929 | 924 | ||
930 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | 925 | void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount) |
931 | { | 926 | { |
932 | percpu_counter_dec(&fs_info->bio_counter); | 927 | percpu_counter_sub(&fs_info->bio_counter, amount); |
933 | 928 | ||
934 | if (waitqueue_active(&fs_info->replace_wait)) | 929 | if (waitqueue_active(&fs_info->replace_wait)) |
935 | wake_up(&fs_info->replace_wait); | 930 | wake_up(&fs_info->replace_wait); |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 5665d2149249..f8229ef1b46d 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -128,6 +128,26 @@ again: | |||
128 | } | 128 | } |
129 | 129 | ||
130 | /* | 130 | /* |
131 | * take a spinning read lock. | ||
132 | * returns 1 if we get the read lock and 0 if we don't | ||
133 | * this won't wait for blocking writers | ||
134 | */ | ||
135 | int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) | ||
136 | { | ||
137 | if (atomic_read(&eb->blocking_writers)) | ||
138 | return 0; | ||
139 | |||
140 | read_lock(&eb->lock); | ||
141 | if (atomic_read(&eb->blocking_writers)) { | ||
142 | read_unlock(&eb->lock); | ||
143 | return 0; | ||
144 | } | ||
145 | atomic_inc(&eb->read_locks); | ||
146 | atomic_inc(&eb->spinning_readers); | ||
147 | return 1; | ||
148 | } | ||
149 | |||
150 | /* | ||
131 | * returns 1 if we get the read lock and 0 if we don't | 151 | * returns 1 if we get the read lock and 0 if we don't |
132 | * this won't wait for blocking writers | 152 | * this won't wait for blocking writers |
133 | */ | 153 | */ |
@@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) | |||
158 | atomic_read(&eb->blocking_readers)) | 178 | atomic_read(&eb->blocking_readers)) |
159 | return 0; | 179 | return 0; |
160 | 180 | ||
161 | if (!write_trylock(&eb->lock)) | 181 | write_lock(&eb->lock); |
162 | return 0; | ||
163 | |||
164 | if (atomic_read(&eb->blocking_writers) || | 182 | if (atomic_read(&eb->blocking_writers) || |
165 | atomic_read(&eb->blocking_readers)) { | 183 | atomic_read(&eb->blocking_readers)) { |
166 | write_unlock(&eb->lock); | 184 | write_unlock(&eb->lock); |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index b81e0e9a4894..c44a9d5f5362 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
@@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); | |||
35 | void btrfs_assert_tree_locked(struct extent_buffer *eb); | 35 | void btrfs_assert_tree_locked(struct extent_buffer *eb); |
36 | int btrfs_try_tree_read_lock(struct extent_buffer *eb); | 36 | int btrfs_try_tree_read_lock(struct extent_buffer *eb); |
37 | int btrfs_try_tree_write_lock(struct extent_buffer *eb); | 37 | int btrfs_try_tree_write_lock(struct extent_buffer *eb); |
38 | int btrfs_tree_read_lock_atomic(struct extent_buffer *eb); | ||
39 | |||
38 | 40 | ||
39 | static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) | 41 | static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) |
40 | { | 42 | { |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 6a41631cb959..8ab2a17bbba8 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -58,9 +58,23 @@ | |||
58 | */ | 58 | */ |
59 | #define RBIO_CACHE_READY_BIT 3 | 59 | #define RBIO_CACHE_READY_BIT 3 |
60 | 60 | ||
61 | /* | ||
62 | * bbio and raid_map is managed by the caller, so we shouldn't free | ||
63 | * them here. And besides that, all rbios with this flag should not | ||
64 | * be cached, because we need raid_map to check the rbios' stripe | ||
65 | * is the same or not, but it is very likely that the caller has | ||
66 | * free raid_map, so don't cache those rbios. | ||
67 | */ | ||
68 | #define RBIO_HOLD_BBIO_MAP_BIT 4 | ||
61 | 69 | ||
62 | #define RBIO_CACHE_SIZE 1024 | 70 | #define RBIO_CACHE_SIZE 1024 |
63 | 71 | ||
72 | enum btrfs_rbio_ops { | ||
73 | BTRFS_RBIO_WRITE = 0, | ||
74 | BTRFS_RBIO_READ_REBUILD = 1, | ||
75 | BTRFS_RBIO_PARITY_SCRUB = 2, | ||
76 | }; | ||
77 | |||
64 | struct btrfs_raid_bio { | 78 | struct btrfs_raid_bio { |
65 | struct btrfs_fs_info *fs_info; | 79 | struct btrfs_fs_info *fs_info; |
66 | struct btrfs_bio *bbio; | 80 | struct btrfs_bio *bbio; |
@@ -117,13 +131,16 @@ struct btrfs_raid_bio { | |||
117 | /* number of data stripes (no p/q) */ | 131 | /* number of data stripes (no p/q) */ |
118 | int nr_data; | 132 | int nr_data; |
119 | 133 | ||
134 | int real_stripes; | ||
135 | |||
136 | int stripe_npages; | ||
120 | /* | 137 | /* |
121 | * set if we're doing a parity rebuild | 138 | * set if we're doing a parity rebuild |
122 | * for a read from higher up, which is handled | 139 | * for a read from higher up, which is handled |
123 | * differently from a parity rebuild as part of | 140 | * differently from a parity rebuild as part of |
124 | * rmw | 141 | * rmw |
125 | */ | 142 | */ |
126 | int read_rebuild; | 143 | enum btrfs_rbio_ops operation; |
127 | 144 | ||
128 | /* first bad stripe */ | 145 | /* first bad stripe */ |
129 | int faila; | 146 | int faila; |
@@ -131,6 +148,7 @@ struct btrfs_raid_bio { | |||
131 | /* second bad stripe (for raid6 use) */ | 148 | /* second bad stripe (for raid6 use) */ |
132 | int failb; | 149 | int failb; |
133 | 150 | ||
151 | int scrubp; | ||
134 | /* | 152 | /* |
135 | * number of pages needed to represent the full | 153 | * number of pages needed to represent the full |
136 | * stripe | 154 | * stripe |
@@ -144,8 +162,13 @@ struct btrfs_raid_bio { | |||
144 | */ | 162 | */ |
145 | int bio_list_bytes; | 163 | int bio_list_bytes; |
146 | 164 | ||
165 | int generic_bio_cnt; | ||
166 | |||
147 | atomic_t refs; | 167 | atomic_t refs; |
148 | 168 | ||
169 | atomic_t stripes_pending; | ||
170 | |||
171 | atomic_t error; | ||
149 | /* | 172 | /* |
150 | * these are two arrays of pointers. We allocate the | 173 | * these are two arrays of pointers. We allocate the |
151 | * rbio big enough to hold them both and setup their | 174 | * rbio big enough to hold them both and setup their |
@@ -162,6 +185,11 @@ struct btrfs_raid_bio { | |||
162 | * here for faster lookup | 185 | * here for faster lookup |
163 | */ | 186 | */ |
164 | struct page **bio_pages; | 187 | struct page **bio_pages; |
188 | |||
189 | /* | ||
190 | * bitmap to record which horizontal stripe has data | ||
191 | */ | ||
192 | unsigned long *dbitmap; | ||
165 | }; | 193 | }; |
166 | 194 | ||
167 | static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); | 195 | static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); |
@@ -176,6 +204,10 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio); | |||
176 | static void index_rbio_pages(struct btrfs_raid_bio *rbio); | 204 | static void index_rbio_pages(struct btrfs_raid_bio *rbio); |
177 | static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); | 205 | static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); |
178 | 206 | ||
207 | static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, | ||
208 | int need_check); | ||
209 | static void async_scrub_parity(struct btrfs_raid_bio *rbio); | ||
210 | |||
179 | /* | 211 | /* |
180 | * the stripe hash table is used for locking, and to collect | 212 | * the stripe hash table is used for locking, and to collect |
181 | * bios in hopes of making a full stripe | 213 | * bios in hopes of making a full stripe |
@@ -324,6 +356,7 @@ static void merge_rbio(struct btrfs_raid_bio *dest, | |||
324 | { | 356 | { |
325 | bio_list_merge(&dest->bio_list, &victim->bio_list); | 357 | bio_list_merge(&dest->bio_list, &victim->bio_list); |
326 | dest->bio_list_bytes += victim->bio_list_bytes; | 358 | dest->bio_list_bytes += victim->bio_list_bytes; |
359 | dest->generic_bio_cnt += victim->generic_bio_cnt; | ||
327 | bio_list_init(&victim->bio_list); | 360 | bio_list_init(&victim->bio_list); |
328 | } | 361 | } |
329 | 362 | ||
@@ -577,11 +610,20 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, | |||
577 | cur->raid_map[0]) | 610 | cur->raid_map[0]) |
578 | return 0; | 611 | return 0; |
579 | 612 | ||
580 | /* reads can't merge with writes */ | 613 | /* we can't merge with different operations */ |
581 | if (last->read_rebuild != | 614 | if (last->operation != cur->operation) |
582 | cur->read_rebuild) { | 615 | return 0; |
616 | /* | ||
617 | * We've need read the full stripe from the drive. | ||
618 | * check and repair the parity and write the new results. | ||
619 | * | ||
620 | * We're not allowed to add any new bios to the | ||
621 | * bio list here, anyone else that wants to | ||
622 | * change this stripe needs to do their own rmw. | ||
623 | */ | ||
624 | if (last->operation == BTRFS_RBIO_PARITY_SCRUB || | ||
625 | cur->operation == BTRFS_RBIO_PARITY_SCRUB) | ||
583 | return 0; | 626 | return 0; |
584 | } | ||
585 | 627 | ||
586 | return 1; | 628 | return 1; |
587 | } | 629 | } |
@@ -601,7 +643,7 @@ static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index) | |||
601 | */ | 643 | */ |
602 | static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) | 644 | static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) |
603 | { | 645 | { |
604 | if (rbio->nr_data + 1 == rbio->bbio->num_stripes) | 646 | if (rbio->nr_data + 1 == rbio->real_stripes) |
605 | return NULL; | 647 | return NULL; |
606 | 648 | ||
607 | index += ((rbio->nr_data + 1) * rbio->stripe_len) >> | 649 | index += ((rbio->nr_data + 1) * rbio->stripe_len) >> |
@@ -772,11 +814,14 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) | |||
772 | spin_unlock(&rbio->bio_list_lock); | 814 | spin_unlock(&rbio->bio_list_lock); |
773 | spin_unlock_irqrestore(&h->lock, flags); | 815 | spin_unlock_irqrestore(&h->lock, flags); |
774 | 816 | ||
775 | if (next->read_rebuild) | 817 | if (next->operation == BTRFS_RBIO_READ_REBUILD) |
776 | async_read_rebuild(next); | 818 | async_read_rebuild(next); |
777 | else { | 819 | else if (next->operation == BTRFS_RBIO_WRITE) { |
778 | steal_rbio(rbio, next); | 820 | steal_rbio(rbio, next); |
779 | async_rmw_stripe(next); | 821 | async_rmw_stripe(next); |
822 | } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { | ||
823 | steal_rbio(rbio, next); | ||
824 | async_scrub_parity(next); | ||
780 | } | 825 | } |
781 | 826 | ||
782 | goto done_nolock; | 827 | goto done_nolock; |
@@ -796,6 +841,21 @@ done_nolock: | |||
796 | remove_rbio_from_cache(rbio); | 841 | remove_rbio_from_cache(rbio); |
797 | } | 842 | } |
798 | 843 | ||
844 | static inline void | ||
845 | __free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need) | ||
846 | { | ||
847 | if (need) { | ||
848 | kfree(raid_map); | ||
849 | kfree(bbio); | ||
850 | } | ||
851 | } | ||
852 | |||
853 | static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio) | ||
854 | { | ||
855 | __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map, | ||
856 | !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)); | ||
857 | } | ||
858 | |||
799 | static void __free_raid_bio(struct btrfs_raid_bio *rbio) | 859 | static void __free_raid_bio(struct btrfs_raid_bio *rbio) |
800 | { | 860 | { |
801 | int i; | 861 | int i; |
@@ -814,8 +874,9 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) | |||
814 | rbio->stripe_pages[i] = NULL; | 874 | rbio->stripe_pages[i] = NULL; |
815 | } | 875 | } |
816 | } | 876 | } |
817 | kfree(rbio->raid_map); | 877 | |
818 | kfree(rbio->bbio); | 878 | free_bbio_and_raid_map(rbio); |
879 | |||
819 | kfree(rbio); | 880 | kfree(rbio); |
820 | } | 881 | } |
821 | 882 | ||
@@ -833,6 +894,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate) | |||
833 | { | 894 | { |
834 | struct bio *cur = bio_list_get(&rbio->bio_list); | 895 | struct bio *cur = bio_list_get(&rbio->bio_list); |
835 | struct bio *next; | 896 | struct bio *next; |
897 | |||
898 | if (rbio->generic_bio_cnt) | ||
899 | btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); | ||
900 | |||
836 | free_raid_bio(rbio); | 901 | free_raid_bio(rbio); |
837 | 902 | ||
838 | while (cur) { | 903 | while (cur) { |
@@ -858,13 +923,13 @@ static void raid_write_end_io(struct bio *bio, int err) | |||
858 | 923 | ||
859 | bio_put(bio); | 924 | bio_put(bio); |
860 | 925 | ||
861 | if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) | 926 | if (!atomic_dec_and_test(&rbio->stripes_pending)) |
862 | return; | 927 | return; |
863 | 928 | ||
864 | err = 0; | 929 | err = 0; |
865 | 930 | ||
866 | /* OK, we have read all the stripes we need to. */ | 931 | /* OK, we have read all the stripes we need to. */ |
867 | if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) | 932 | if (atomic_read(&rbio->error) > rbio->bbio->max_errors) |
868 | err = -EIO; | 933 | err = -EIO; |
869 | 934 | ||
870 | rbio_orig_end_io(rbio, err, 0); | 935 | rbio_orig_end_io(rbio, err, 0); |
@@ -925,16 +990,16 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | |||
925 | { | 990 | { |
926 | struct btrfs_raid_bio *rbio; | 991 | struct btrfs_raid_bio *rbio; |
927 | int nr_data = 0; | 992 | int nr_data = 0; |
928 | int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes); | 993 | int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; |
994 | int num_pages = rbio_nr_pages(stripe_len, real_stripes); | ||
995 | int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); | ||
929 | void *p; | 996 | void *p; |
930 | 997 | ||
931 | rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2, | 998 | rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 + |
999 | DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8), | ||
932 | GFP_NOFS); | 1000 | GFP_NOFS); |
933 | if (!rbio) { | 1001 | if (!rbio) |
934 | kfree(raid_map); | ||
935 | kfree(bbio); | ||
936 | return ERR_PTR(-ENOMEM); | 1002 | return ERR_PTR(-ENOMEM); |
937 | } | ||
938 | 1003 | ||
939 | bio_list_init(&rbio->bio_list); | 1004 | bio_list_init(&rbio->bio_list); |
940 | INIT_LIST_HEAD(&rbio->plug_list); | 1005 | INIT_LIST_HEAD(&rbio->plug_list); |
@@ -946,9 +1011,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | |||
946 | rbio->fs_info = root->fs_info; | 1011 | rbio->fs_info = root->fs_info; |
947 | rbio->stripe_len = stripe_len; | 1012 | rbio->stripe_len = stripe_len; |
948 | rbio->nr_pages = num_pages; | 1013 | rbio->nr_pages = num_pages; |
1014 | rbio->real_stripes = real_stripes; | ||
1015 | rbio->stripe_npages = stripe_npages; | ||
949 | rbio->faila = -1; | 1016 | rbio->faila = -1; |
950 | rbio->failb = -1; | 1017 | rbio->failb = -1; |
951 | atomic_set(&rbio->refs, 1); | 1018 | atomic_set(&rbio->refs, 1); |
1019 | atomic_set(&rbio->error, 0); | ||
1020 | atomic_set(&rbio->stripes_pending, 0); | ||
952 | 1021 | ||
953 | /* | 1022 | /* |
954 | * the stripe_pages and bio_pages array point to the extra | 1023 | * the stripe_pages and bio_pages array point to the extra |
@@ -957,11 +1026,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | |||
957 | p = rbio + 1; | 1026 | p = rbio + 1; |
958 | rbio->stripe_pages = p; | 1027 | rbio->stripe_pages = p; |
959 | rbio->bio_pages = p + sizeof(struct page *) * num_pages; | 1028 | rbio->bio_pages = p + sizeof(struct page *) * num_pages; |
1029 | rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; | ||
960 | 1030 | ||
961 | if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) | 1031 | if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE) |
962 | nr_data = bbio->num_stripes - 2; | 1032 | nr_data = real_stripes - 2; |
963 | else | 1033 | else |
964 | nr_data = bbio->num_stripes - 1; | 1034 | nr_data = real_stripes - 1; |
965 | 1035 | ||
966 | rbio->nr_data = nr_data; | 1036 | rbio->nr_data = nr_data; |
967 | return rbio; | 1037 | return rbio; |
@@ -1073,7 +1143,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, | |||
1073 | static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) | 1143 | static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) |
1074 | { | 1144 | { |
1075 | if (rbio->faila >= 0 || rbio->failb >= 0) { | 1145 | if (rbio->faila >= 0 || rbio->failb >= 0) { |
1076 | BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1); | 1146 | BUG_ON(rbio->faila == rbio->real_stripes - 1); |
1077 | __raid56_parity_recover(rbio); | 1147 | __raid56_parity_recover(rbio); |
1078 | } else { | 1148 | } else { |
1079 | finish_rmw(rbio); | 1149 | finish_rmw(rbio); |
@@ -1134,7 +1204,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) | |||
1134 | static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | 1204 | static noinline void finish_rmw(struct btrfs_raid_bio *rbio) |
1135 | { | 1205 | { |
1136 | struct btrfs_bio *bbio = rbio->bbio; | 1206 | struct btrfs_bio *bbio = rbio->bbio; |
1137 | void *pointers[bbio->num_stripes]; | 1207 | void *pointers[rbio->real_stripes]; |
1138 | int stripe_len = rbio->stripe_len; | 1208 | int stripe_len = rbio->stripe_len; |
1139 | int nr_data = rbio->nr_data; | 1209 | int nr_data = rbio->nr_data; |
1140 | int stripe; | 1210 | int stripe; |
@@ -1148,11 +1218,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1148 | 1218 | ||
1149 | bio_list_init(&bio_list); | 1219 | bio_list_init(&bio_list); |
1150 | 1220 | ||
1151 | if (bbio->num_stripes - rbio->nr_data == 1) { | 1221 | if (rbio->real_stripes - rbio->nr_data == 1) { |
1152 | p_stripe = bbio->num_stripes - 1; | 1222 | p_stripe = rbio->real_stripes - 1; |
1153 | } else if (bbio->num_stripes - rbio->nr_data == 2) { | 1223 | } else if (rbio->real_stripes - rbio->nr_data == 2) { |
1154 | p_stripe = bbio->num_stripes - 2; | 1224 | p_stripe = rbio->real_stripes - 2; |
1155 | q_stripe = bbio->num_stripes - 1; | 1225 | q_stripe = rbio->real_stripes - 1; |
1156 | } else { | 1226 | } else { |
1157 | BUG(); | 1227 | BUG(); |
1158 | } | 1228 | } |
@@ -1169,7 +1239,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1169 | set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); | 1239 | set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); |
1170 | spin_unlock_irq(&rbio->bio_list_lock); | 1240 | spin_unlock_irq(&rbio->bio_list_lock); |
1171 | 1241 | ||
1172 | atomic_set(&rbio->bbio->error, 0); | 1242 | atomic_set(&rbio->error, 0); |
1173 | 1243 | ||
1174 | /* | 1244 | /* |
1175 | * now that we've set rmw_locked, run through the | 1245 | * now that we've set rmw_locked, run through the |
@@ -1209,7 +1279,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1209 | SetPageUptodate(p); | 1279 | SetPageUptodate(p); |
1210 | pointers[stripe++] = kmap(p); | 1280 | pointers[stripe++] = kmap(p); |
1211 | 1281 | ||
1212 | raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE, | 1282 | raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, |
1213 | pointers); | 1283 | pointers); |
1214 | } else { | 1284 | } else { |
1215 | /* raid5 */ | 1285 | /* raid5 */ |
@@ -1218,7 +1288,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1218 | } | 1288 | } |
1219 | 1289 | ||
1220 | 1290 | ||
1221 | for (stripe = 0; stripe < bbio->num_stripes; stripe++) | 1291 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) |
1222 | kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); | 1292 | kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); |
1223 | } | 1293 | } |
1224 | 1294 | ||
@@ -1227,7 +1297,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1227 | * higher layers (the bio_list in our rbio) and our p/q. Ignore | 1297 | * higher layers (the bio_list in our rbio) and our p/q. Ignore |
1228 | * everything else. | 1298 | * everything else. |
1229 | */ | 1299 | */ |
1230 | for (stripe = 0; stripe < bbio->num_stripes; stripe++) { | 1300 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { |
1231 | for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { | 1301 | for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { |
1232 | struct page *page; | 1302 | struct page *page; |
1233 | if (stripe < rbio->nr_data) { | 1303 | if (stripe < rbio->nr_data) { |
@@ -1245,8 +1315,34 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1245 | } | 1315 | } |
1246 | } | 1316 | } |
1247 | 1317 | ||
1248 | atomic_set(&bbio->stripes_pending, bio_list_size(&bio_list)); | 1318 | if (likely(!bbio->num_tgtdevs)) |
1249 | BUG_ON(atomic_read(&bbio->stripes_pending) == 0); | 1319 | goto write_data; |
1320 | |||
1321 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { | ||
1322 | if (!bbio->tgtdev_map[stripe]) | ||
1323 | continue; | ||
1324 | |||
1325 | for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { | ||
1326 | struct page *page; | ||
1327 | if (stripe < rbio->nr_data) { | ||
1328 | page = page_in_rbio(rbio, stripe, pagenr, 1); | ||
1329 | if (!page) | ||
1330 | continue; | ||
1331 | } else { | ||
1332 | page = rbio_stripe_page(rbio, stripe, pagenr); | ||
1333 | } | ||
1334 | |||
1335 | ret = rbio_add_io_page(rbio, &bio_list, page, | ||
1336 | rbio->bbio->tgtdev_map[stripe], | ||
1337 | pagenr, rbio->stripe_len); | ||
1338 | if (ret) | ||
1339 | goto cleanup; | ||
1340 | } | ||
1341 | } | ||
1342 | |||
1343 | write_data: | ||
1344 | atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list)); | ||
1345 | BUG_ON(atomic_read(&rbio->stripes_pending) == 0); | ||
1250 | 1346 | ||
1251 | while (1) { | 1347 | while (1) { |
1252 | bio = bio_list_pop(&bio_list); | 1348 | bio = bio_list_pop(&bio_list); |
@@ -1283,7 +1379,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio, | |||
1283 | stripe = &rbio->bbio->stripes[i]; | 1379 | stripe = &rbio->bbio->stripes[i]; |
1284 | stripe_start = stripe->physical; | 1380 | stripe_start = stripe->physical; |
1285 | if (physical >= stripe_start && | 1381 | if (physical >= stripe_start && |
1286 | physical < stripe_start + rbio->stripe_len) { | 1382 | physical < stripe_start + rbio->stripe_len && |
1383 | bio->bi_bdev == stripe->dev->bdev) { | ||
1287 | return i; | 1384 | return i; |
1288 | } | 1385 | } |
1289 | } | 1386 | } |
@@ -1331,11 +1428,11 @@ static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed) | |||
1331 | if (rbio->faila == -1) { | 1428 | if (rbio->faila == -1) { |
1332 | /* first failure on this rbio */ | 1429 | /* first failure on this rbio */ |
1333 | rbio->faila = failed; | 1430 | rbio->faila = failed; |
1334 | atomic_inc(&rbio->bbio->error); | 1431 | atomic_inc(&rbio->error); |
1335 | } else if (rbio->failb == -1) { | 1432 | } else if (rbio->failb == -1) { |
1336 | /* second failure on this rbio */ | 1433 | /* second failure on this rbio */ |
1337 | rbio->failb = failed; | 1434 | rbio->failb = failed; |
1338 | atomic_inc(&rbio->bbio->error); | 1435 | atomic_inc(&rbio->error); |
1339 | } else { | 1436 | } else { |
1340 | ret = -EIO; | 1437 | ret = -EIO; |
1341 | } | 1438 | } |
@@ -1394,11 +1491,11 @@ static void raid_rmw_end_io(struct bio *bio, int err) | |||
1394 | 1491 | ||
1395 | bio_put(bio); | 1492 | bio_put(bio); |
1396 | 1493 | ||
1397 | if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) | 1494 | if (!atomic_dec_and_test(&rbio->stripes_pending)) |
1398 | return; | 1495 | return; |
1399 | 1496 | ||
1400 | err = 0; | 1497 | err = 0; |
1401 | if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) | 1498 | if (atomic_read(&rbio->error) > rbio->bbio->max_errors) |
1402 | goto cleanup; | 1499 | goto cleanup; |
1403 | 1500 | ||
1404 | /* | 1501 | /* |
@@ -1439,7 +1536,6 @@ static void async_read_rebuild(struct btrfs_raid_bio *rbio) | |||
1439 | static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | 1536 | static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) |
1440 | { | 1537 | { |
1441 | int bios_to_read = 0; | 1538 | int bios_to_read = 0; |
1442 | struct btrfs_bio *bbio = rbio->bbio; | ||
1443 | struct bio_list bio_list; | 1539 | struct bio_list bio_list; |
1444 | int ret; | 1540 | int ret; |
1445 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); | 1541 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
@@ -1455,7 +1551,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | |||
1455 | 1551 | ||
1456 | index_rbio_pages(rbio); | 1552 | index_rbio_pages(rbio); |
1457 | 1553 | ||
1458 | atomic_set(&rbio->bbio->error, 0); | 1554 | atomic_set(&rbio->error, 0); |
1459 | /* | 1555 | /* |
1460 | * build a list of bios to read all the missing parts of this | 1556 | * build a list of bios to read all the missing parts of this |
1461 | * stripe | 1557 | * stripe |
@@ -1503,7 +1599,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | |||
1503 | * the bbio may be freed once we submit the last bio. Make sure | 1599 | * the bbio may be freed once we submit the last bio. Make sure |
1504 | * not to touch it after that | 1600 | * not to touch it after that |
1505 | */ | 1601 | */ |
1506 | atomic_set(&bbio->stripes_pending, bios_to_read); | 1602 | atomic_set(&rbio->stripes_pending, bios_to_read); |
1507 | while (1) { | 1603 | while (1) { |
1508 | bio = bio_list_pop(&bio_list); | 1604 | bio = bio_list_pop(&bio_list); |
1509 | if (!bio) | 1605 | if (!bio) |
@@ -1686,19 +1782,30 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | |||
1686 | struct btrfs_raid_bio *rbio; | 1782 | struct btrfs_raid_bio *rbio; |
1687 | struct btrfs_plug_cb *plug = NULL; | 1783 | struct btrfs_plug_cb *plug = NULL; |
1688 | struct blk_plug_cb *cb; | 1784 | struct blk_plug_cb *cb; |
1785 | int ret; | ||
1689 | 1786 | ||
1690 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | 1787 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); |
1691 | if (IS_ERR(rbio)) | 1788 | if (IS_ERR(rbio)) { |
1789 | __free_bbio_and_raid_map(bbio, raid_map, 1); | ||
1692 | return PTR_ERR(rbio); | 1790 | return PTR_ERR(rbio); |
1791 | } | ||
1693 | bio_list_add(&rbio->bio_list, bio); | 1792 | bio_list_add(&rbio->bio_list, bio); |
1694 | rbio->bio_list_bytes = bio->bi_iter.bi_size; | 1793 | rbio->bio_list_bytes = bio->bi_iter.bi_size; |
1794 | rbio->operation = BTRFS_RBIO_WRITE; | ||
1795 | |||
1796 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
1797 | rbio->generic_bio_cnt = 1; | ||
1695 | 1798 | ||
1696 | /* | 1799 | /* |
1697 | * don't plug on full rbios, just get them out the door | 1800 | * don't plug on full rbios, just get them out the door |
1698 | * as quickly as we can | 1801 | * as quickly as we can |
1699 | */ | 1802 | */ |
1700 | if (rbio_is_full(rbio)) | 1803 | if (rbio_is_full(rbio)) { |
1701 | return full_stripe_write(rbio); | 1804 | ret = full_stripe_write(rbio); |
1805 | if (ret) | ||
1806 | btrfs_bio_counter_dec(root->fs_info); | ||
1807 | return ret; | ||
1808 | } | ||
1702 | 1809 | ||
1703 | cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info, | 1810 | cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info, |
1704 | sizeof(*plug)); | 1811 | sizeof(*plug)); |
@@ -1709,10 +1816,13 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | |||
1709 | INIT_LIST_HEAD(&plug->rbio_list); | 1816 | INIT_LIST_HEAD(&plug->rbio_list); |
1710 | } | 1817 | } |
1711 | list_add_tail(&rbio->plug_list, &plug->rbio_list); | 1818 | list_add_tail(&rbio->plug_list, &plug->rbio_list); |
1819 | ret = 0; | ||
1712 | } else { | 1820 | } else { |
1713 | return __raid56_parity_write(rbio); | 1821 | ret = __raid56_parity_write(rbio); |
1822 | if (ret) | ||
1823 | btrfs_bio_counter_dec(root->fs_info); | ||
1714 | } | 1824 | } |
1715 | return 0; | 1825 | return ret; |
1716 | } | 1826 | } |
1717 | 1827 | ||
1718 | /* | 1828 | /* |
@@ -1730,7 +1840,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1730 | int err; | 1840 | int err; |
1731 | int i; | 1841 | int i; |
1732 | 1842 | ||
1733 | pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *), | 1843 | pointers = kzalloc(rbio->real_stripes * sizeof(void *), |
1734 | GFP_NOFS); | 1844 | GFP_NOFS); |
1735 | if (!pointers) { | 1845 | if (!pointers) { |
1736 | err = -ENOMEM; | 1846 | err = -ENOMEM; |
@@ -1740,7 +1850,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1740 | faila = rbio->faila; | 1850 | faila = rbio->faila; |
1741 | failb = rbio->failb; | 1851 | failb = rbio->failb; |
1742 | 1852 | ||
1743 | if (rbio->read_rebuild) { | 1853 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { |
1744 | spin_lock_irq(&rbio->bio_list_lock); | 1854 | spin_lock_irq(&rbio->bio_list_lock); |
1745 | set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); | 1855 | set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); |
1746 | spin_unlock_irq(&rbio->bio_list_lock); | 1856 | spin_unlock_irq(&rbio->bio_list_lock); |
@@ -1749,15 +1859,23 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1749 | index_rbio_pages(rbio); | 1859 | index_rbio_pages(rbio); |
1750 | 1860 | ||
1751 | for (pagenr = 0; pagenr < nr_pages; pagenr++) { | 1861 | for (pagenr = 0; pagenr < nr_pages; pagenr++) { |
1862 | /* | ||
1863 | * Now we just use bitmap to mark the horizontal stripes in | ||
1864 | * which we have data when doing parity scrub. | ||
1865 | */ | ||
1866 | if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && | ||
1867 | !test_bit(pagenr, rbio->dbitmap)) | ||
1868 | continue; | ||
1869 | |||
1752 | /* setup our array of pointers with pages | 1870 | /* setup our array of pointers with pages |
1753 | * from each stripe | 1871 | * from each stripe |
1754 | */ | 1872 | */ |
1755 | for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) { | 1873 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { |
1756 | /* | 1874 | /* |
1757 | * if we're rebuilding a read, we have to use | 1875 | * if we're rebuilding a read, we have to use |
1758 | * pages from the bio list | 1876 | * pages from the bio list |
1759 | */ | 1877 | */ |
1760 | if (rbio->read_rebuild && | 1878 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD && |
1761 | (stripe == faila || stripe == failb)) { | 1879 | (stripe == faila || stripe == failb)) { |
1762 | page = page_in_rbio(rbio, stripe, pagenr, 0); | 1880 | page = page_in_rbio(rbio, stripe, pagenr, 0); |
1763 | } else { | 1881 | } else { |
@@ -1767,7 +1885,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1767 | } | 1885 | } |
1768 | 1886 | ||
1769 | /* all raid6 handling here */ | 1887 | /* all raid6 handling here */ |
1770 | if (rbio->raid_map[rbio->bbio->num_stripes - 1] == | 1888 | if (rbio->raid_map[rbio->real_stripes - 1] == |
1771 | RAID6_Q_STRIPE) { | 1889 | RAID6_Q_STRIPE) { |
1772 | 1890 | ||
1773 | /* | 1891 | /* |
@@ -1817,10 +1935,10 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1817 | } | 1935 | } |
1818 | 1936 | ||
1819 | if (rbio->raid_map[failb] == RAID5_P_STRIPE) { | 1937 | if (rbio->raid_map[failb] == RAID5_P_STRIPE) { |
1820 | raid6_datap_recov(rbio->bbio->num_stripes, | 1938 | raid6_datap_recov(rbio->real_stripes, |
1821 | PAGE_SIZE, faila, pointers); | 1939 | PAGE_SIZE, faila, pointers); |
1822 | } else { | 1940 | } else { |
1823 | raid6_2data_recov(rbio->bbio->num_stripes, | 1941 | raid6_2data_recov(rbio->real_stripes, |
1824 | PAGE_SIZE, faila, failb, | 1942 | PAGE_SIZE, faila, failb, |
1825 | pointers); | 1943 | pointers); |
1826 | } | 1944 | } |
@@ -1850,7 +1968,7 @@ pstripe: | |||
1850 | * know they can be trusted. If this was a read reconstruction, | 1968 | * know they can be trusted. If this was a read reconstruction, |
1851 | * other endio functions will fiddle the uptodate bits | 1969 | * other endio functions will fiddle the uptodate bits |
1852 | */ | 1970 | */ |
1853 | if (!rbio->read_rebuild) { | 1971 | if (rbio->operation == BTRFS_RBIO_WRITE) { |
1854 | for (i = 0; i < nr_pages; i++) { | 1972 | for (i = 0; i < nr_pages; i++) { |
1855 | if (faila != -1) { | 1973 | if (faila != -1) { |
1856 | page = rbio_stripe_page(rbio, faila, i); | 1974 | page = rbio_stripe_page(rbio, faila, i); |
@@ -1862,12 +1980,12 @@ pstripe: | |||
1862 | } | 1980 | } |
1863 | } | 1981 | } |
1864 | } | 1982 | } |
1865 | for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) { | 1983 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { |
1866 | /* | 1984 | /* |
1867 | * if we're rebuilding a read, we have to use | 1985 | * if we're rebuilding a read, we have to use |
1868 | * pages from the bio list | 1986 | * pages from the bio list |
1869 | */ | 1987 | */ |
1870 | if (rbio->read_rebuild && | 1988 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD && |
1871 | (stripe == faila || stripe == failb)) { | 1989 | (stripe == faila || stripe == failb)) { |
1872 | page = page_in_rbio(rbio, stripe, pagenr, 0); | 1990 | page = page_in_rbio(rbio, stripe, pagenr, 0); |
1873 | } else { | 1991 | } else { |
@@ -1882,9 +2000,9 @@ cleanup: | |||
1882 | kfree(pointers); | 2000 | kfree(pointers); |
1883 | 2001 | ||
1884 | cleanup_io: | 2002 | cleanup_io: |
1885 | 2003 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { | |
1886 | if (rbio->read_rebuild) { | 2004 | if (err == 0 && |
1887 | if (err == 0) | 2005 | !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)) |
1888 | cache_rbio_pages(rbio); | 2006 | cache_rbio_pages(rbio); |
1889 | else | 2007 | else |
1890 | clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); | 2008 | clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); |
@@ -1893,7 +2011,13 @@ cleanup_io: | |||
1893 | } else if (err == 0) { | 2011 | } else if (err == 0) { |
1894 | rbio->faila = -1; | 2012 | rbio->faila = -1; |
1895 | rbio->failb = -1; | 2013 | rbio->failb = -1; |
1896 | finish_rmw(rbio); | 2014 | |
2015 | if (rbio->operation == BTRFS_RBIO_WRITE) | ||
2016 | finish_rmw(rbio); | ||
2017 | else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) | ||
2018 | finish_parity_scrub(rbio, 0); | ||
2019 | else | ||
2020 | BUG(); | ||
1897 | } else { | 2021 | } else { |
1898 | rbio_orig_end_io(rbio, err, 0); | 2022 | rbio_orig_end_io(rbio, err, 0); |
1899 | } | 2023 | } |
@@ -1917,10 +2041,10 @@ static void raid_recover_end_io(struct bio *bio, int err) | |||
1917 | set_bio_pages_uptodate(bio); | 2041 | set_bio_pages_uptodate(bio); |
1918 | bio_put(bio); | 2042 | bio_put(bio); |
1919 | 2043 | ||
1920 | if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) | 2044 | if (!atomic_dec_and_test(&rbio->stripes_pending)) |
1921 | return; | 2045 | return; |
1922 | 2046 | ||
1923 | if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) | 2047 | if (atomic_read(&rbio->error) > rbio->bbio->max_errors) |
1924 | rbio_orig_end_io(rbio, -EIO, 0); | 2048 | rbio_orig_end_io(rbio, -EIO, 0); |
1925 | else | 2049 | else |
1926 | __raid_recover_end_io(rbio); | 2050 | __raid_recover_end_io(rbio); |
@@ -1937,7 +2061,6 @@ static void raid_recover_end_io(struct bio *bio, int err) | |||
1937 | static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | 2061 | static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) |
1938 | { | 2062 | { |
1939 | int bios_to_read = 0; | 2063 | int bios_to_read = 0; |
1940 | struct btrfs_bio *bbio = rbio->bbio; | ||
1941 | struct bio_list bio_list; | 2064 | struct bio_list bio_list; |
1942 | int ret; | 2065 | int ret; |
1943 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); | 2066 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
@@ -1951,16 +2074,16 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
1951 | if (ret) | 2074 | if (ret) |
1952 | goto cleanup; | 2075 | goto cleanup; |
1953 | 2076 | ||
1954 | atomic_set(&rbio->bbio->error, 0); | 2077 | atomic_set(&rbio->error, 0); |
1955 | 2078 | ||
1956 | /* | 2079 | /* |
1957 | * read everything that hasn't failed. Thanks to the | 2080 | * read everything that hasn't failed. Thanks to the |
1958 | * stripe cache, it is possible that some or all of these | 2081 | * stripe cache, it is possible that some or all of these |
1959 | * pages are going to be uptodate. | 2082 | * pages are going to be uptodate. |
1960 | */ | 2083 | */ |
1961 | for (stripe = 0; stripe < bbio->num_stripes; stripe++) { | 2084 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { |
1962 | if (rbio->faila == stripe || rbio->failb == stripe) { | 2085 | if (rbio->faila == stripe || rbio->failb == stripe) { |
1963 | atomic_inc(&rbio->bbio->error); | 2086 | atomic_inc(&rbio->error); |
1964 | continue; | 2087 | continue; |
1965 | } | 2088 | } |
1966 | 2089 | ||
@@ -1990,7 +2113,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
1990 | * were up to date, or we might have no bios to read because | 2113 | * were up to date, or we might have no bios to read because |
1991 | * the devices were gone. | 2114 | * the devices were gone. |
1992 | */ | 2115 | */ |
1993 | if (atomic_read(&rbio->bbio->error) <= rbio->bbio->max_errors) { | 2116 | if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) { |
1994 | __raid_recover_end_io(rbio); | 2117 | __raid_recover_end_io(rbio); |
1995 | goto out; | 2118 | goto out; |
1996 | } else { | 2119 | } else { |
@@ -2002,7 +2125,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
2002 | * the bbio may be freed once we submit the last bio. Make sure | 2125 | * the bbio may be freed once we submit the last bio. Make sure |
2003 | * not to touch it after that | 2126 | * not to touch it after that |
2004 | */ | 2127 | */ |
2005 | atomic_set(&bbio->stripes_pending, bios_to_read); | 2128 | atomic_set(&rbio->stripes_pending, bios_to_read); |
2006 | while (1) { | 2129 | while (1) { |
2007 | bio = bio_list_pop(&bio_list); | 2130 | bio = bio_list_pop(&bio_list); |
2008 | if (!bio) | 2131 | if (!bio) |
@@ -2021,7 +2144,7 @@ out: | |||
2021 | return 0; | 2144 | return 0; |
2022 | 2145 | ||
2023 | cleanup: | 2146 | cleanup: |
2024 | if (rbio->read_rebuild) | 2147 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD) |
2025 | rbio_orig_end_io(rbio, -EIO, 0); | 2148 | rbio_orig_end_io(rbio, -EIO, 0); |
2026 | return -EIO; | 2149 | return -EIO; |
2027 | } | 2150 | } |
@@ -2034,34 +2157,42 @@ cleanup: | |||
2034 | */ | 2157 | */ |
2035 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, | 2158 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, |
2036 | struct btrfs_bio *bbio, u64 *raid_map, | 2159 | struct btrfs_bio *bbio, u64 *raid_map, |
2037 | u64 stripe_len, int mirror_num) | 2160 | u64 stripe_len, int mirror_num, int generic_io) |
2038 | { | 2161 | { |
2039 | struct btrfs_raid_bio *rbio; | 2162 | struct btrfs_raid_bio *rbio; |
2040 | int ret; | 2163 | int ret; |
2041 | 2164 | ||
2042 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | 2165 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); |
2043 | if (IS_ERR(rbio)) | 2166 | if (IS_ERR(rbio)) { |
2167 | __free_bbio_and_raid_map(bbio, raid_map, generic_io); | ||
2044 | return PTR_ERR(rbio); | 2168 | return PTR_ERR(rbio); |
2169 | } | ||
2045 | 2170 | ||
2046 | rbio->read_rebuild = 1; | 2171 | rbio->operation = BTRFS_RBIO_READ_REBUILD; |
2047 | bio_list_add(&rbio->bio_list, bio); | 2172 | bio_list_add(&rbio->bio_list, bio); |
2048 | rbio->bio_list_bytes = bio->bi_iter.bi_size; | 2173 | rbio->bio_list_bytes = bio->bi_iter.bi_size; |
2049 | 2174 | ||
2050 | rbio->faila = find_logical_bio_stripe(rbio, bio); | 2175 | rbio->faila = find_logical_bio_stripe(rbio, bio); |
2051 | if (rbio->faila == -1) { | 2176 | if (rbio->faila == -1) { |
2052 | BUG(); | 2177 | BUG(); |
2053 | kfree(raid_map); | 2178 | __free_bbio_and_raid_map(bbio, raid_map, generic_io); |
2054 | kfree(bbio); | ||
2055 | kfree(rbio); | 2179 | kfree(rbio); |
2056 | return -EIO; | 2180 | return -EIO; |
2057 | } | 2181 | } |
2058 | 2182 | ||
2183 | if (generic_io) { | ||
2184 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
2185 | rbio->generic_bio_cnt = 1; | ||
2186 | } else { | ||
2187 | set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); | ||
2188 | } | ||
2189 | |||
2059 | /* | 2190 | /* |
2060 | * reconstruct from the q stripe if they are | 2191 | * reconstruct from the q stripe if they are |
2061 | * asking for mirror 3 | 2192 | * asking for mirror 3 |
2062 | */ | 2193 | */ |
2063 | if (mirror_num == 3) | 2194 | if (mirror_num == 3) |
2064 | rbio->failb = bbio->num_stripes - 2; | 2195 | rbio->failb = rbio->real_stripes - 2; |
2065 | 2196 | ||
2066 | ret = lock_stripe_add(rbio); | 2197 | ret = lock_stripe_add(rbio); |
2067 | 2198 | ||
@@ -2098,3 +2229,483 @@ static void read_rebuild_work(struct btrfs_work *work) | |||
2098 | rbio = container_of(work, struct btrfs_raid_bio, work); | 2229 | rbio = container_of(work, struct btrfs_raid_bio, work); |
2099 | __raid56_parity_recover(rbio); | 2230 | __raid56_parity_recover(rbio); |
2100 | } | 2231 | } |
2232 | |||
2233 | /* | ||
2234 | * The following code is used to scrub/replace the parity stripe | ||
2235 | * | ||
2236 | * Note: We need make sure all the pages that add into the scrub/replace | ||
2237 | * raid bio are correct and not be changed during the scrub/replace. That | ||
2238 | * is those pages just hold metadata or file data with checksum. | ||
2239 | */ | ||
2240 | |||
2241 | struct btrfs_raid_bio * | ||
2242 | raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, | ||
2243 | struct btrfs_bio *bbio, u64 *raid_map, | ||
2244 | u64 stripe_len, struct btrfs_device *scrub_dev, | ||
2245 | unsigned long *dbitmap, int stripe_nsectors) | ||
2246 | { | ||
2247 | struct btrfs_raid_bio *rbio; | ||
2248 | int i; | ||
2249 | |||
2250 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | ||
2251 | if (IS_ERR(rbio)) | ||
2252 | return NULL; | ||
2253 | bio_list_add(&rbio->bio_list, bio); | ||
2254 | /* | ||
2255 | * This is a special bio which is used to hold the completion handler | ||
2256 | * and make the scrub rbio is similar to the other types | ||
2257 | */ | ||
2258 | ASSERT(!bio->bi_iter.bi_size); | ||
2259 | rbio->operation = BTRFS_RBIO_PARITY_SCRUB; | ||
2260 | |||
2261 | for (i = 0; i < rbio->real_stripes; i++) { | ||
2262 | if (bbio->stripes[i].dev == scrub_dev) { | ||
2263 | rbio->scrubp = i; | ||
2264 | break; | ||
2265 | } | ||
2266 | } | ||
2267 | |||
2268 | /* Now we just support the sectorsize equals to page size */ | ||
2269 | ASSERT(root->sectorsize == PAGE_SIZE); | ||
2270 | ASSERT(rbio->stripe_npages == stripe_nsectors); | ||
2271 | bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); | ||
2272 | |||
2273 | return rbio; | ||
2274 | } | ||
2275 | |||
2276 | void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, | ||
2277 | struct page *page, u64 logical) | ||
2278 | { | ||
2279 | int stripe_offset; | ||
2280 | int index; | ||
2281 | |||
2282 | ASSERT(logical >= rbio->raid_map[0]); | ||
2283 | ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] + | ||
2284 | rbio->stripe_len * rbio->nr_data); | ||
2285 | stripe_offset = (int)(logical - rbio->raid_map[0]); | ||
2286 | index = stripe_offset >> PAGE_CACHE_SHIFT; | ||
2287 | rbio->bio_pages[index] = page; | ||
2288 | } | ||
2289 | |||
2290 | /* | ||
2291 | * We just scrub the parity that we have correct data on the same horizontal, | ||
2292 | * so we needn't allocate all pages for all the stripes. | ||
2293 | */ | ||
2294 | static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) | ||
2295 | { | ||
2296 | int i; | ||
2297 | int bit; | ||
2298 | int index; | ||
2299 | struct page *page; | ||
2300 | |||
2301 | for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) { | ||
2302 | for (i = 0; i < rbio->real_stripes; i++) { | ||
2303 | index = i * rbio->stripe_npages + bit; | ||
2304 | if (rbio->stripe_pages[index]) | ||
2305 | continue; | ||
2306 | |||
2307 | page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
2308 | if (!page) | ||
2309 | return -ENOMEM; | ||
2310 | rbio->stripe_pages[index] = page; | ||
2311 | ClearPageUptodate(page); | ||
2312 | } | ||
2313 | } | ||
2314 | return 0; | ||
2315 | } | ||
2316 | |||
2317 | /* | ||
2318 | * end io function used by finish_rmw. When we finally | ||
2319 | * get here, we've written a full stripe | ||
2320 | */ | ||
2321 | static void raid_write_parity_end_io(struct bio *bio, int err) | ||
2322 | { | ||
2323 | struct btrfs_raid_bio *rbio = bio->bi_private; | ||
2324 | |||
2325 | if (err) | ||
2326 | fail_bio_stripe(rbio, bio); | ||
2327 | |||
2328 | bio_put(bio); | ||
2329 | |||
2330 | if (!atomic_dec_and_test(&rbio->stripes_pending)) | ||
2331 | return; | ||
2332 | |||
2333 | err = 0; | ||
2334 | |||
2335 | if (atomic_read(&rbio->error)) | ||
2336 | err = -EIO; | ||
2337 | |||
2338 | rbio_orig_end_io(rbio, err, 0); | ||
2339 | } | ||
2340 | |||
2341 | static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, | ||
2342 | int need_check) | ||
2343 | { | ||
2344 | struct btrfs_bio *bbio = rbio->bbio; | ||
2345 | void *pointers[rbio->real_stripes]; | ||
2346 | DECLARE_BITMAP(pbitmap, rbio->stripe_npages); | ||
2347 | int nr_data = rbio->nr_data; | ||
2348 | int stripe; | ||
2349 | int pagenr; | ||
2350 | int p_stripe = -1; | ||
2351 | int q_stripe = -1; | ||
2352 | struct page *p_page = NULL; | ||
2353 | struct page *q_page = NULL; | ||
2354 | struct bio_list bio_list; | ||
2355 | struct bio *bio; | ||
2356 | int is_replace = 0; | ||
2357 | int ret; | ||
2358 | |||
2359 | bio_list_init(&bio_list); | ||
2360 | |||
2361 | if (rbio->real_stripes - rbio->nr_data == 1) { | ||
2362 | p_stripe = rbio->real_stripes - 1; | ||
2363 | } else if (rbio->real_stripes - rbio->nr_data == 2) { | ||
2364 | p_stripe = rbio->real_stripes - 2; | ||
2365 | q_stripe = rbio->real_stripes - 1; | ||
2366 | } else { | ||
2367 | BUG(); | ||
2368 | } | ||
2369 | |||
2370 | if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) { | ||
2371 | is_replace = 1; | ||
2372 | bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages); | ||
2373 | } | ||
2374 | |||
2375 | /* | ||
2376 | * Because the higher layers(scrubber) are unlikely to | ||
2377 | * use this area of the disk again soon, so don't cache | ||
2378 | * it. | ||
2379 | */ | ||
2380 | clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); | ||
2381 | |||
2382 | if (!need_check) | ||
2383 | goto writeback; | ||
2384 | |||
2385 | p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
2386 | if (!p_page) | ||
2387 | goto cleanup; | ||
2388 | SetPageUptodate(p_page); | ||
2389 | |||
2390 | if (q_stripe != -1) { | ||
2391 | q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
2392 | if (!q_page) { | ||
2393 | __free_page(p_page); | ||
2394 | goto cleanup; | ||
2395 | } | ||
2396 | SetPageUptodate(q_page); | ||
2397 | } | ||
2398 | |||
2399 | atomic_set(&rbio->error, 0); | ||
2400 | |||
2401 | for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { | ||
2402 | struct page *p; | ||
2403 | void *parity; | ||
2404 | /* first collect one page from each data stripe */ | ||
2405 | for (stripe = 0; stripe < nr_data; stripe++) { | ||
2406 | p = page_in_rbio(rbio, stripe, pagenr, 0); | ||
2407 | pointers[stripe] = kmap(p); | ||
2408 | } | ||
2409 | |||
2410 | /* then add the parity stripe */ | ||
2411 | pointers[stripe++] = kmap(p_page); | ||
2412 | |||
2413 | if (q_stripe != -1) { | ||
2414 | |||
2415 | /* | ||
2416 | * raid6, add the qstripe and call the | ||
2417 | * library function to fill in our p/q | ||
2418 | */ | ||
2419 | pointers[stripe++] = kmap(q_page); | ||
2420 | |||
2421 | raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, | ||
2422 | pointers); | ||
2423 | } else { | ||
2424 | /* raid5 */ | ||
2425 | memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); | ||
2426 | run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE); | ||
2427 | } | ||
2428 | |||
2429 | /* Check scrubbing pairty and repair it */ | ||
2430 | p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); | ||
2431 | parity = kmap(p); | ||
2432 | if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE)) | ||
2433 | memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE); | ||
2434 | else | ||
2435 | /* Parity is right, needn't writeback */ | ||
2436 | bitmap_clear(rbio->dbitmap, pagenr, 1); | ||
2437 | kunmap(p); | ||
2438 | |||
2439 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) | ||
2440 | kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); | ||
2441 | } | ||
2442 | |||
2443 | __free_page(p_page); | ||
2444 | if (q_page) | ||
2445 | __free_page(q_page); | ||
2446 | |||
2447 | writeback: | ||
2448 | /* | ||
2449 | * time to start writing. Make bios for everything from the | ||
2450 | * higher layers (the bio_list in our rbio) and our p/q. Ignore | ||
2451 | * everything else. | ||
2452 | */ | ||
2453 | for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { | ||
2454 | struct page *page; | ||
2455 | |||
2456 | page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); | ||
2457 | ret = rbio_add_io_page(rbio, &bio_list, | ||
2458 | page, rbio->scrubp, pagenr, rbio->stripe_len); | ||
2459 | if (ret) | ||
2460 | goto cleanup; | ||
2461 | } | ||
2462 | |||
2463 | if (!is_replace) | ||
2464 | goto submit_write; | ||
2465 | |||
2466 | for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) { | ||
2467 | struct page *page; | ||
2468 | |||
2469 | page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); | ||
2470 | ret = rbio_add_io_page(rbio, &bio_list, page, | ||
2471 | bbio->tgtdev_map[rbio->scrubp], | ||
2472 | pagenr, rbio->stripe_len); | ||
2473 | if (ret) | ||
2474 | goto cleanup; | ||
2475 | } | ||
2476 | |||
2477 | submit_write: | ||
2478 | nr_data = bio_list_size(&bio_list); | ||
2479 | if (!nr_data) { | ||
2480 | /* Every parity is right */ | ||
2481 | rbio_orig_end_io(rbio, 0, 0); | ||
2482 | return; | ||
2483 | } | ||
2484 | |||
2485 | atomic_set(&rbio->stripes_pending, nr_data); | ||
2486 | |||
2487 | while (1) { | ||
2488 | bio = bio_list_pop(&bio_list); | ||
2489 | if (!bio) | ||
2490 | break; | ||
2491 | |||
2492 | bio->bi_private = rbio; | ||
2493 | bio->bi_end_io = raid_write_parity_end_io; | ||
2494 | BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); | ||
2495 | submit_bio(WRITE, bio); | ||
2496 | } | ||
2497 | return; | ||
2498 | |||
2499 | cleanup: | ||
2500 | rbio_orig_end_io(rbio, -EIO, 0); | ||
2501 | } | ||
2502 | |||
2503 | static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) | ||
2504 | { | ||
2505 | if (stripe >= 0 && stripe < rbio->nr_data) | ||
2506 | return 1; | ||
2507 | return 0; | ||
2508 | } | ||
2509 | |||
2510 | /* | ||
2511 | * While we're doing the parity check and repair, we could have errors | ||
2512 | * in reading pages off the disk. This checks for errors and if we're | ||
2513 | * not able to read the page it'll trigger parity reconstruction. The | ||
2514 | * parity scrub will be finished after we've reconstructed the failed | ||
2515 | * stripes | ||
2516 | */ | ||
2517 | static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) | ||
2518 | { | ||
2519 | if (atomic_read(&rbio->error) > rbio->bbio->max_errors) | ||
2520 | goto cleanup; | ||
2521 | |||
2522 | if (rbio->faila >= 0 || rbio->failb >= 0) { | ||
2523 | int dfail = 0, failp = -1; | ||
2524 | |||
2525 | if (is_data_stripe(rbio, rbio->faila)) | ||
2526 | dfail++; | ||
2527 | else if (is_parity_stripe(rbio->faila)) | ||
2528 | failp = rbio->faila; | ||
2529 | |||
2530 | if (is_data_stripe(rbio, rbio->failb)) | ||
2531 | dfail++; | ||
2532 | else if (is_parity_stripe(rbio->failb)) | ||
2533 | failp = rbio->failb; | ||
2534 | |||
2535 | /* | ||
2536 | * Because we can not use a scrubbing parity to repair | ||
2537 | * the data, so the capability of the repair is declined. | ||
2538 | * (In the case of RAID5, we can not repair anything) | ||
2539 | */ | ||
2540 | if (dfail > rbio->bbio->max_errors - 1) | ||
2541 | goto cleanup; | ||
2542 | |||
2543 | /* | ||
2544 | * If all data is good, only parity is correctly, just | ||
2545 | * repair the parity. | ||
2546 | */ | ||
2547 | if (dfail == 0) { | ||
2548 | finish_parity_scrub(rbio, 0); | ||
2549 | return; | ||
2550 | } | ||
2551 | |||
2552 | /* | ||
2553 | * Here means we got one corrupted data stripe and one | ||
2554 | * corrupted parity on RAID6, if the corrupted parity | ||
2555 | * is scrubbing parity, luckly, use the other one to repair | ||
2556 | * the data, or we can not repair the data stripe. | ||
2557 | */ | ||
2558 | if (failp != rbio->scrubp) | ||
2559 | goto cleanup; | ||
2560 | |||
2561 | __raid_recover_end_io(rbio); | ||
2562 | } else { | ||
2563 | finish_parity_scrub(rbio, 1); | ||
2564 | } | ||
2565 | return; | ||
2566 | |||
2567 | cleanup: | ||
2568 | rbio_orig_end_io(rbio, -EIO, 0); | ||
2569 | } | ||
2570 | |||
2571 | /* | ||
2572 | * end io for the read phase of the rmw cycle. All the bios here are physical | ||
2573 | * stripe bios we've read from the disk so we can recalculate the parity of the | ||
2574 | * stripe. | ||
2575 | * | ||
2576 | * This will usually kick off finish_rmw once all the bios are read in, but it | ||
2577 | * may trigger parity reconstruction if we had any errors along the way | ||
2578 | */ | ||
2579 | static void raid56_parity_scrub_end_io(struct bio *bio, int err) | ||
2580 | { | ||
2581 | struct btrfs_raid_bio *rbio = bio->bi_private; | ||
2582 | |||
2583 | if (err) | ||
2584 | fail_bio_stripe(rbio, bio); | ||
2585 | else | ||
2586 | set_bio_pages_uptodate(bio); | ||
2587 | |||
2588 | bio_put(bio); | ||
2589 | |||
2590 | if (!atomic_dec_and_test(&rbio->stripes_pending)) | ||
2591 | return; | ||
2592 | |||
2593 | /* | ||
2594 | * this will normally call finish_rmw to start our write | ||
2595 | * but if there are any failed stripes we'll reconstruct | ||
2596 | * from parity first | ||
2597 | */ | ||
2598 | validate_rbio_for_parity_scrub(rbio); | ||
2599 | } | ||
2600 | |||
2601 | static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) | ||
2602 | { | ||
2603 | int bios_to_read = 0; | ||
2604 | struct bio_list bio_list; | ||
2605 | int ret; | ||
2606 | int pagenr; | ||
2607 | int stripe; | ||
2608 | struct bio *bio; | ||
2609 | |||
2610 | ret = alloc_rbio_essential_pages(rbio); | ||
2611 | if (ret) | ||
2612 | goto cleanup; | ||
2613 | |||
2614 | bio_list_init(&bio_list); | ||
2615 | |||
2616 | atomic_set(&rbio->error, 0); | ||
2617 | /* | ||
2618 | * build a list of bios to read all the missing parts of this | ||
2619 | * stripe | ||
2620 | */ | ||
2621 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { | ||
2622 | for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { | ||
2623 | struct page *page; | ||
2624 | /* | ||
2625 | * we want to find all the pages missing from | ||
2626 | * the rbio and read them from the disk. If | ||
2627 | * page_in_rbio finds a page in the bio list | ||
2628 | * we don't need to read it off the stripe. | ||
2629 | */ | ||
2630 | page = page_in_rbio(rbio, stripe, pagenr, 1); | ||
2631 | if (page) | ||
2632 | continue; | ||
2633 | |||
2634 | page = rbio_stripe_page(rbio, stripe, pagenr); | ||
2635 | /* | ||
2636 | * the bio cache may have handed us an uptodate | ||
2637 | * page. If so, be happy and use it | ||
2638 | */ | ||
2639 | if (PageUptodate(page)) | ||
2640 | continue; | ||
2641 | |||
2642 | ret = rbio_add_io_page(rbio, &bio_list, page, | ||
2643 | stripe, pagenr, rbio->stripe_len); | ||
2644 | if (ret) | ||
2645 | goto cleanup; | ||
2646 | } | ||
2647 | } | ||
2648 | |||
2649 | bios_to_read = bio_list_size(&bio_list); | ||
2650 | if (!bios_to_read) { | ||
2651 | /* | ||
2652 | * this can happen if others have merged with | ||
2653 | * us, it means there is nothing left to read. | ||
2654 | * But if there are missing devices it may not be | ||
2655 | * safe to do the full stripe write yet. | ||
2656 | */ | ||
2657 | goto finish; | ||
2658 | } | ||
2659 | |||
2660 | /* | ||
2661 | * the bbio may be freed once we submit the last bio. Make sure | ||
2662 | * not to touch it after that | ||
2663 | */ | ||
2664 | atomic_set(&rbio->stripes_pending, bios_to_read); | ||
2665 | while (1) { | ||
2666 | bio = bio_list_pop(&bio_list); | ||
2667 | if (!bio) | ||
2668 | break; | ||
2669 | |||
2670 | bio->bi_private = rbio; | ||
2671 | bio->bi_end_io = raid56_parity_scrub_end_io; | ||
2672 | |||
2673 | btrfs_bio_wq_end_io(rbio->fs_info, bio, | ||
2674 | BTRFS_WQ_ENDIO_RAID56); | ||
2675 | |||
2676 | BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); | ||
2677 | submit_bio(READ, bio); | ||
2678 | } | ||
2679 | /* the actual write will happen once the reads are done */ | ||
2680 | return; | ||
2681 | |||
2682 | cleanup: | ||
2683 | rbio_orig_end_io(rbio, -EIO, 0); | ||
2684 | return; | ||
2685 | |||
2686 | finish: | ||
2687 | validate_rbio_for_parity_scrub(rbio); | ||
2688 | } | ||
2689 | |||
2690 | static void scrub_parity_work(struct btrfs_work *work) | ||
2691 | { | ||
2692 | struct btrfs_raid_bio *rbio; | ||
2693 | |||
2694 | rbio = container_of(work, struct btrfs_raid_bio, work); | ||
2695 | raid56_parity_scrub_stripe(rbio); | ||
2696 | } | ||
2697 | |||
2698 | static void async_scrub_parity(struct btrfs_raid_bio *rbio) | ||
2699 | { | ||
2700 | btrfs_init_work(&rbio->work, btrfs_rmw_helper, | ||
2701 | scrub_parity_work, NULL, NULL); | ||
2702 | |||
2703 | btrfs_queue_work(rbio->fs_info->rmw_workers, | ||
2704 | &rbio->work); | ||
2705 | } | ||
2706 | |||
2707 | void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) | ||
2708 | { | ||
2709 | if (!lock_stripe_add(rbio)) | ||
2710 | async_scrub_parity(rbio); | ||
2711 | } | ||
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index ea5d73bfdfbe..31d4a157b5e3 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h | |||
@@ -39,13 +39,25 @@ static inline int nr_data_stripes(struct map_lookup *map) | |||
39 | #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \ | 39 | #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \ |
40 | ((x) == RAID6_Q_STRIPE)) | 40 | ((x) == RAID6_Q_STRIPE)) |
41 | 41 | ||
42 | struct btrfs_raid_bio; | ||
43 | struct btrfs_device; | ||
44 | |||
42 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, | 45 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, |
43 | struct btrfs_bio *bbio, u64 *raid_map, | 46 | struct btrfs_bio *bbio, u64 *raid_map, |
44 | u64 stripe_len, int mirror_num); | 47 | u64 stripe_len, int mirror_num, int generic_io); |
45 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | 48 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, |
46 | struct btrfs_bio *bbio, u64 *raid_map, | 49 | struct btrfs_bio *bbio, u64 *raid_map, |
47 | u64 stripe_len); | 50 | u64 stripe_len); |
48 | 51 | ||
52 | struct btrfs_raid_bio * | ||
53 | raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, | ||
54 | struct btrfs_bio *bbio, u64 *raid_map, | ||
55 | u64 stripe_len, struct btrfs_device *scrub_dev, | ||
56 | unsigned long *dbitmap, int stripe_nsectors); | ||
57 | void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, | ||
58 | struct page *page, u64 logical); | ||
59 | void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); | ||
60 | |||
49 | int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); | 61 | int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); |
50 | void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); | 62 | void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); |
51 | #endif | 63 | #endif |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4325bb0111d9..f2bb13a23f86 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -63,10 +63,18 @@ struct scrub_ctx; | |||
63 | */ | 63 | */ |
64 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ | 64 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ |
65 | 65 | ||
66 | struct scrub_recover { | ||
67 | atomic_t refs; | ||
68 | struct btrfs_bio *bbio; | ||
69 | u64 *raid_map; | ||
70 | u64 map_length; | ||
71 | }; | ||
72 | |||
66 | struct scrub_page { | 73 | struct scrub_page { |
67 | struct scrub_block *sblock; | 74 | struct scrub_block *sblock; |
68 | struct page *page; | 75 | struct page *page; |
69 | struct btrfs_device *dev; | 76 | struct btrfs_device *dev; |
77 | struct list_head list; | ||
70 | u64 flags; /* extent flags */ | 78 | u64 flags; /* extent flags */ |
71 | u64 generation; | 79 | u64 generation; |
72 | u64 logical; | 80 | u64 logical; |
@@ -79,6 +87,8 @@ struct scrub_page { | |||
79 | unsigned int io_error:1; | 87 | unsigned int io_error:1; |
80 | }; | 88 | }; |
81 | u8 csum[BTRFS_CSUM_SIZE]; | 89 | u8 csum[BTRFS_CSUM_SIZE]; |
90 | |||
91 | struct scrub_recover *recover; | ||
82 | }; | 92 | }; |
83 | 93 | ||
84 | struct scrub_bio { | 94 | struct scrub_bio { |
@@ -105,14 +115,52 @@ struct scrub_block { | |||
105 | atomic_t outstanding_pages; | 115 | atomic_t outstanding_pages; |
106 | atomic_t ref_count; /* free mem on transition to zero */ | 116 | atomic_t ref_count; /* free mem on transition to zero */ |
107 | struct scrub_ctx *sctx; | 117 | struct scrub_ctx *sctx; |
118 | struct scrub_parity *sparity; | ||
108 | struct { | 119 | struct { |
109 | unsigned int header_error:1; | 120 | unsigned int header_error:1; |
110 | unsigned int checksum_error:1; | 121 | unsigned int checksum_error:1; |
111 | unsigned int no_io_error_seen:1; | 122 | unsigned int no_io_error_seen:1; |
112 | unsigned int generation_error:1; /* also sets header_error */ | 123 | unsigned int generation_error:1; /* also sets header_error */ |
124 | |||
125 | /* The following is for the data used to check parity */ | ||
126 | /* It is for the data with checksum */ | ||
127 | unsigned int data_corrected:1; | ||
113 | }; | 128 | }; |
114 | }; | 129 | }; |
115 | 130 | ||
131 | /* Used for the chunks with parity stripe such RAID5/6 */ | ||
132 | struct scrub_parity { | ||
133 | struct scrub_ctx *sctx; | ||
134 | |||
135 | struct btrfs_device *scrub_dev; | ||
136 | |||
137 | u64 logic_start; | ||
138 | |||
139 | u64 logic_end; | ||
140 | |||
141 | int nsectors; | ||
142 | |||
143 | int stripe_len; | ||
144 | |||
145 | atomic_t ref_count; | ||
146 | |||
147 | struct list_head spages; | ||
148 | |||
149 | /* Work of parity check and repair */ | ||
150 | struct btrfs_work work; | ||
151 | |||
152 | /* Mark the parity blocks which have data */ | ||
153 | unsigned long *dbitmap; | ||
154 | |||
155 | /* | ||
156 | * Mark the parity blocks which have data, but errors happen when | ||
157 | * read data or check data | ||
158 | */ | ||
159 | unsigned long *ebitmap; | ||
160 | |||
161 | unsigned long bitmap[0]; | ||
162 | }; | ||
163 | |||
116 | struct scrub_wr_ctx { | 164 | struct scrub_wr_ctx { |
117 | struct scrub_bio *wr_curr_bio; | 165 | struct scrub_bio *wr_curr_bio; |
118 | struct btrfs_device *tgtdev; | 166 | struct btrfs_device *tgtdev; |
@@ -196,7 +244,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | |||
196 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 244 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
197 | struct scrub_block *sblock, int is_metadata, | 245 | struct scrub_block *sblock, int is_metadata, |
198 | int have_csum, u8 *csum, u64 generation, | 246 | int have_csum, u8 *csum, u64 generation, |
199 | u16 csum_size); | 247 | u16 csum_size, int retry_failed_mirror); |
200 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 248 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
201 | struct scrub_block *sblock, | 249 | struct scrub_block *sblock, |
202 | int is_metadata, int have_csum, | 250 | int is_metadata, int have_csum, |
@@ -218,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock); | |||
218 | static void scrub_block_put(struct scrub_block *sblock); | 266 | static void scrub_block_put(struct scrub_block *sblock); |
219 | static void scrub_page_get(struct scrub_page *spage); | 267 | static void scrub_page_get(struct scrub_page *spage); |
220 | static void scrub_page_put(struct scrub_page *spage); | 268 | static void scrub_page_put(struct scrub_page *spage); |
269 | static void scrub_parity_get(struct scrub_parity *sparity); | ||
270 | static void scrub_parity_put(struct scrub_parity *sparity); | ||
221 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, | 271 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
222 | struct scrub_page *spage); | 272 | struct scrub_page *spage); |
223 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | 273 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
@@ -790,6 +840,20 @@ out: | |||
790 | scrub_pending_trans_workers_dec(sctx); | 840 | scrub_pending_trans_workers_dec(sctx); |
791 | } | 841 | } |
792 | 842 | ||
843 | static inline void scrub_get_recover(struct scrub_recover *recover) | ||
844 | { | ||
845 | atomic_inc(&recover->refs); | ||
846 | } | ||
847 | |||
848 | static inline void scrub_put_recover(struct scrub_recover *recover) | ||
849 | { | ||
850 | if (atomic_dec_and_test(&recover->refs)) { | ||
851 | kfree(recover->bbio); | ||
852 | kfree(recover->raid_map); | ||
853 | kfree(recover); | ||
854 | } | ||
855 | } | ||
856 | |||
793 | /* | 857 | /* |
794 | * scrub_handle_errored_block gets called when either verification of the | 858 | * scrub_handle_errored_block gets called when either verification of the |
795 | * pages failed or the bio failed to read, e.g. with EIO. In the latter | 859 | * pages failed or the bio failed to read, e.g. with EIO. In the latter |
@@ -906,7 +970,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
906 | 970 | ||
907 | /* build and submit the bios for the failed mirror, check checksums */ | 971 | /* build and submit the bios for the failed mirror, check checksums */ |
908 | scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, | 972 | scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, |
909 | csum, generation, sctx->csum_size); | 973 | csum, generation, sctx->csum_size, 1); |
910 | 974 | ||
911 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && | 975 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && |
912 | sblock_bad->no_io_error_seen) { | 976 | sblock_bad->no_io_error_seen) { |
@@ -920,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
920 | */ | 984 | */ |
921 | spin_lock(&sctx->stat_lock); | 985 | spin_lock(&sctx->stat_lock); |
922 | sctx->stat.unverified_errors++; | 986 | sctx->stat.unverified_errors++; |
987 | sblock_to_check->data_corrected = 1; | ||
923 | spin_unlock(&sctx->stat_lock); | 988 | spin_unlock(&sctx->stat_lock); |
924 | 989 | ||
925 | if (sctx->is_dev_replace) | 990 | if (sctx->is_dev_replace) |
@@ -1019,7 +1084,7 @@ nodatasum_case: | |||
1019 | /* build and submit the bios, check checksums */ | 1084 | /* build and submit the bios, check checksums */ |
1020 | scrub_recheck_block(fs_info, sblock_other, is_metadata, | 1085 | scrub_recheck_block(fs_info, sblock_other, is_metadata, |
1021 | have_csum, csum, generation, | 1086 | have_csum, csum, generation, |
1022 | sctx->csum_size); | 1087 | sctx->csum_size, 0); |
1023 | 1088 | ||
1024 | if (!sblock_other->header_error && | 1089 | if (!sblock_other->header_error && |
1025 | !sblock_other->checksum_error && | 1090 | !sblock_other->checksum_error && |
@@ -1169,7 +1234,7 @@ nodatasum_case: | |||
1169 | */ | 1234 | */ |
1170 | scrub_recheck_block(fs_info, sblock_bad, | 1235 | scrub_recheck_block(fs_info, sblock_bad, |
1171 | is_metadata, have_csum, csum, | 1236 | is_metadata, have_csum, csum, |
1172 | generation, sctx->csum_size); | 1237 | generation, sctx->csum_size, 1); |
1173 | if (!sblock_bad->header_error && | 1238 | if (!sblock_bad->header_error && |
1174 | !sblock_bad->checksum_error && | 1239 | !sblock_bad->checksum_error && |
1175 | sblock_bad->no_io_error_seen) | 1240 | sblock_bad->no_io_error_seen) |
@@ -1180,6 +1245,7 @@ nodatasum_case: | |||
1180 | corrected_error: | 1245 | corrected_error: |
1181 | spin_lock(&sctx->stat_lock); | 1246 | spin_lock(&sctx->stat_lock); |
1182 | sctx->stat.corrected_errors++; | 1247 | sctx->stat.corrected_errors++; |
1248 | sblock_to_check->data_corrected = 1; | ||
1183 | spin_unlock(&sctx->stat_lock); | 1249 | spin_unlock(&sctx->stat_lock); |
1184 | printk_ratelimited_in_rcu(KERN_ERR | 1250 | printk_ratelimited_in_rcu(KERN_ERR |
1185 | "BTRFS: fixed up error at logical %llu on dev %s\n", | 1251 | "BTRFS: fixed up error at logical %llu on dev %s\n", |
@@ -1201,11 +1267,18 @@ out: | |||
1201 | mirror_index++) { | 1267 | mirror_index++) { |
1202 | struct scrub_block *sblock = sblocks_for_recheck + | 1268 | struct scrub_block *sblock = sblocks_for_recheck + |
1203 | mirror_index; | 1269 | mirror_index; |
1270 | struct scrub_recover *recover; | ||
1204 | int page_index; | 1271 | int page_index; |
1205 | 1272 | ||
1206 | for (page_index = 0; page_index < sblock->page_count; | 1273 | for (page_index = 0; page_index < sblock->page_count; |
1207 | page_index++) { | 1274 | page_index++) { |
1208 | sblock->pagev[page_index]->sblock = NULL; | 1275 | sblock->pagev[page_index]->sblock = NULL; |
1276 | recover = sblock->pagev[page_index]->recover; | ||
1277 | if (recover) { | ||
1278 | scrub_put_recover(recover); | ||
1279 | sblock->pagev[page_index]->recover = | ||
1280 | NULL; | ||
1281 | } | ||
1209 | scrub_page_put(sblock->pagev[page_index]); | 1282 | scrub_page_put(sblock->pagev[page_index]); |
1210 | } | 1283 | } |
1211 | } | 1284 | } |
@@ -1215,14 +1288,63 @@ out: | |||
1215 | return 0; | 1288 | return 0; |
1216 | } | 1289 | } |
1217 | 1290 | ||
1291 | static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) | ||
1292 | { | ||
1293 | if (raid_map) { | ||
1294 | if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) | ||
1295 | return 3; | ||
1296 | else | ||
1297 | return 2; | ||
1298 | } else { | ||
1299 | return (int)bbio->num_stripes; | ||
1300 | } | ||
1301 | } | ||
1302 | |||
1303 | static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | ||
1304 | u64 mapped_length, | ||
1305 | int nstripes, int mirror, | ||
1306 | int *stripe_index, | ||
1307 | u64 *stripe_offset) | ||
1308 | { | ||
1309 | int i; | ||
1310 | |||
1311 | if (raid_map) { | ||
1312 | /* RAID5/6 */ | ||
1313 | for (i = 0; i < nstripes; i++) { | ||
1314 | if (raid_map[i] == RAID6_Q_STRIPE || | ||
1315 | raid_map[i] == RAID5_P_STRIPE) | ||
1316 | continue; | ||
1317 | |||
1318 | if (logical >= raid_map[i] && | ||
1319 | logical < raid_map[i] + mapped_length) | ||
1320 | break; | ||
1321 | } | ||
1322 | |||
1323 | *stripe_index = i; | ||
1324 | *stripe_offset = logical - raid_map[i]; | ||
1325 | } else { | ||
1326 | /* The other RAID type */ | ||
1327 | *stripe_index = mirror; | ||
1328 | *stripe_offset = 0; | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1218 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 1332 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
1219 | struct btrfs_fs_info *fs_info, | 1333 | struct btrfs_fs_info *fs_info, |
1220 | struct scrub_block *original_sblock, | 1334 | struct scrub_block *original_sblock, |
1221 | u64 length, u64 logical, | 1335 | u64 length, u64 logical, |
1222 | struct scrub_block *sblocks_for_recheck) | 1336 | struct scrub_block *sblocks_for_recheck) |
1223 | { | 1337 | { |
1338 | struct scrub_recover *recover; | ||
1339 | struct btrfs_bio *bbio; | ||
1340 | u64 *raid_map; | ||
1341 | u64 sublen; | ||
1342 | u64 mapped_length; | ||
1343 | u64 stripe_offset; | ||
1344 | int stripe_index; | ||
1224 | int page_index; | 1345 | int page_index; |
1225 | int mirror_index; | 1346 | int mirror_index; |
1347 | int nmirrors; | ||
1226 | int ret; | 1348 | int ret; |
1227 | 1349 | ||
1228 | /* | 1350 | /* |
@@ -1233,23 +1355,39 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | |||
1233 | 1355 | ||
1234 | page_index = 0; | 1356 | page_index = 0; |
1235 | while (length > 0) { | 1357 | while (length > 0) { |
1236 | u64 sublen = min_t(u64, length, PAGE_SIZE); | 1358 | sublen = min_t(u64, length, PAGE_SIZE); |
1237 | u64 mapped_length = sublen; | 1359 | mapped_length = sublen; |
1238 | struct btrfs_bio *bbio = NULL; | 1360 | bbio = NULL; |
1361 | raid_map = NULL; | ||
1239 | 1362 | ||
1240 | /* | 1363 | /* |
1241 | * with a length of PAGE_SIZE, each returned stripe | 1364 | * with a length of PAGE_SIZE, each returned stripe |
1242 | * represents one mirror | 1365 | * represents one mirror |
1243 | */ | 1366 | */ |
1244 | ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, | 1367 | ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, |
1245 | &mapped_length, &bbio, 0); | 1368 | &mapped_length, &bbio, 0, &raid_map); |
1246 | if (ret || !bbio || mapped_length < sublen) { | 1369 | if (ret || !bbio || mapped_length < sublen) { |
1247 | kfree(bbio); | 1370 | kfree(bbio); |
1371 | kfree(raid_map); | ||
1248 | return -EIO; | 1372 | return -EIO; |
1249 | } | 1373 | } |
1250 | 1374 | ||
1375 | recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); | ||
1376 | if (!recover) { | ||
1377 | kfree(bbio); | ||
1378 | kfree(raid_map); | ||
1379 | return -ENOMEM; | ||
1380 | } | ||
1381 | |||
1382 | atomic_set(&recover->refs, 1); | ||
1383 | recover->bbio = bbio; | ||
1384 | recover->raid_map = raid_map; | ||
1385 | recover->map_length = mapped_length; | ||
1386 | |||
1251 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); | 1387 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); |
1252 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; | 1388 | |
1389 | nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); | ||
1390 | for (mirror_index = 0; mirror_index < nmirrors; | ||
1253 | mirror_index++) { | 1391 | mirror_index++) { |
1254 | struct scrub_block *sblock; | 1392 | struct scrub_block *sblock; |
1255 | struct scrub_page *page; | 1393 | struct scrub_page *page; |
@@ -1265,26 +1403,38 @@ leave_nomem: | |||
1265 | spin_lock(&sctx->stat_lock); | 1403 | spin_lock(&sctx->stat_lock); |
1266 | sctx->stat.malloc_errors++; | 1404 | sctx->stat.malloc_errors++; |
1267 | spin_unlock(&sctx->stat_lock); | 1405 | spin_unlock(&sctx->stat_lock); |
1268 | kfree(bbio); | 1406 | scrub_put_recover(recover); |
1269 | return -ENOMEM; | 1407 | return -ENOMEM; |
1270 | } | 1408 | } |
1271 | scrub_page_get(page); | 1409 | scrub_page_get(page); |
1272 | sblock->pagev[page_index] = page; | 1410 | sblock->pagev[page_index] = page; |
1273 | page->logical = logical; | 1411 | page->logical = logical; |
1274 | page->physical = bbio->stripes[mirror_index].physical; | 1412 | |
1413 | scrub_stripe_index_and_offset(logical, raid_map, | ||
1414 | mapped_length, | ||
1415 | bbio->num_stripes, | ||
1416 | mirror_index, | ||
1417 | &stripe_index, | ||
1418 | &stripe_offset); | ||
1419 | page->physical = bbio->stripes[stripe_index].physical + | ||
1420 | stripe_offset; | ||
1421 | page->dev = bbio->stripes[stripe_index].dev; | ||
1422 | |||
1275 | BUG_ON(page_index >= original_sblock->page_count); | 1423 | BUG_ON(page_index >= original_sblock->page_count); |
1276 | page->physical_for_dev_replace = | 1424 | page->physical_for_dev_replace = |
1277 | original_sblock->pagev[page_index]-> | 1425 | original_sblock->pagev[page_index]-> |
1278 | physical_for_dev_replace; | 1426 | physical_for_dev_replace; |
1279 | /* for missing devices, dev->bdev is NULL */ | 1427 | /* for missing devices, dev->bdev is NULL */ |
1280 | page->dev = bbio->stripes[mirror_index].dev; | ||
1281 | page->mirror_num = mirror_index + 1; | 1428 | page->mirror_num = mirror_index + 1; |
1282 | sblock->page_count++; | 1429 | sblock->page_count++; |
1283 | page->page = alloc_page(GFP_NOFS); | 1430 | page->page = alloc_page(GFP_NOFS); |
1284 | if (!page->page) | 1431 | if (!page->page) |
1285 | goto leave_nomem; | 1432 | goto leave_nomem; |
1433 | |||
1434 | scrub_get_recover(recover); | ||
1435 | page->recover = recover; | ||
1286 | } | 1436 | } |
1287 | kfree(bbio); | 1437 | scrub_put_recover(recover); |
1288 | length -= sublen; | 1438 | length -= sublen; |
1289 | logical += sublen; | 1439 | logical += sublen; |
1290 | page_index++; | 1440 | page_index++; |
@@ -1293,6 +1443,51 @@ leave_nomem: | |||
1293 | return 0; | 1443 | return 0; |
1294 | } | 1444 | } |
1295 | 1445 | ||
1446 | struct scrub_bio_ret { | ||
1447 | struct completion event; | ||
1448 | int error; | ||
1449 | }; | ||
1450 | |||
1451 | static void scrub_bio_wait_endio(struct bio *bio, int error) | ||
1452 | { | ||
1453 | struct scrub_bio_ret *ret = bio->bi_private; | ||
1454 | |||
1455 | ret->error = error; | ||
1456 | complete(&ret->event); | ||
1457 | } | ||
1458 | |||
1459 | static inline int scrub_is_page_on_raid56(struct scrub_page *page) | ||
1460 | { | ||
1461 | return page->recover && page->recover->raid_map; | ||
1462 | } | ||
1463 | |||
1464 | static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, | ||
1465 | struct bio *bio, | ||
1466 | struct scrub_page *page) | ||
1467 | { | ||
1468 | struct scrub_bio_ret done; | ||
1469 | int ret; | ||
1470 | |||
1471 | init_completion(&done.event); | ||
1472 | done.error = 0; | ||
1473 | bio->bi_iter.bi_sector = page->logical >> 9; | ||
1474 | bio->bi_private = &done; | ||
1475 | bio->bi_end_io = scrub_bio_wait_endio; | ||
1476 | |||
1477 | ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, | ||
1478 | page->recover->raid_map, | ||
1479 | page->recover->map_length, | ||
1480 | page->mirror_num, 0); | ||
1481 | if (ret) | ||
1482 | return ret; | ||
1483 | |||
1484 | wait_for_completion(&done.event); | ||
1485 | if (done.error) | ||
1486 | return -EIO; | ||
1487 | |||
1488 | return 0; | ||
1489 | } | ||
1490 | |||
1296 | /* | 1491 | /* |
1297 | * this function will check the on disk data for checksum errors, header | 1492 | * this function will check the on disk data for checksum errors, header |
1298 | * errors and read I/O errors. If any I/O errors happen, the exact pages | 1493 | * errors and read I/O errors. If any I/O errors happen, the exact pages |
@@ -1303,7 +1498,7 @@ leave_nomem: | |||
1303 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 1498 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
1304 | struct scrub_block *sblock, int is_metadata, | 1499 | struct scrub_block *sblock, int is_metadata, |
1305 | int have_csum, u8 *csum, u64 generation, | 1500 | int have_csum, u8 *csum, u64 generation, |
1306 | u16 csum_size) | 1501 | u16 csum_size, int retry_failed_mirror) |
1307 | { | 1502 | { |
1308 | int page_num; | 1503 | int page_num; |
1309 | 1504 | ||
@@ -1329,11 +1524,17 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1329 | continue; | 1524 | continue; |
1330 | } | 1525 | } |
1331 | bio->bi_bdev = page->dev->bdev; | 1526 | bio->bi_bdev = page->dev->bdev; |
1332 | bio->bi_iter.bi_sector = page->physical >> 9; | ||
1333 | 1527 | ||
1334 | bio_add_page(bio, page->page, PAGE_SIZE, 0); | 1528 | bio_add_page(bio, page->page, PAGE_SIZE, 0); |
1335 | if (btrfsic_submit_bio_wait(READ, bio)) | 1529 | if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) { |
1336 | sblock->no_io_error_seen = 0; | 1530 | if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) |
1531 | sblock->no_io_error_seen = 0; | ||
1532 | } else { | ||
1533 | bio->bi_iter.bi_sector = page->physical >> 9; | ||
1534 | |||
1535 | if (btrfsic_submit_bio_wait(READ, bio)) | ||
1536 | sblock->no_io_error_seen = 0; | ||
1537 | } | ||
1337 | 1538 | ||
1338 | bio_put(bio); | 1539 | bio_put(bio); |
1339 | } | 1540 | } |
@@ -1486,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock) | |||
1486 | { | 1687 | { |
1487 | int page_num; | 1688 | int page_num; |
1488 | 1689 | ||
1690 | /* | ||
1691 | * This block is used for the check of the parity on the source device, | ||
1692 | * so the data needn't be written into the destination device. | ||
1693 | */ | ||
1694 | if (sblock->sparity) | ||
1695 | return; | ||
1696 | |||
1489 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | 1697 | for (page_num = 0; page_num < sblock->page_count; page_num++) { |
1490 | int ret; | 1698 | int ret; |
1491 | 1699 | ||
@@ -1867,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock) | |||
1867 | if (atomic_dec_and_test(&sblock->ref_count)) { | 2075 | if (atomic_dec_and_test(&sblock->ref_count)) { |
1868 | int i; | 2076 | int i; |
1869 | 2077 | ||
2078 | if (sblock->sparity) | ||
2079 | scrub_parity_put(sblock->sparity); | ||
2080 | |||
1870 | for (i = 0; i < sblock->page_count; i++) | 2081 | for (i = 0; i < sblock->page_count; i++) |
1871 | scrub_page_put(sblock->pagev[i]); | 2082 | scrub_page_put(sblock->pagev[i]); |
1872 | kfree(sblock); | 2083 | kfree(sblock); |
@@ -2124,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
2124 | scrub_pending_bio_dec(sctx); | 2335 | scrub_pending_bio_dec(sctx); |
2125 | } | 2336 | } |
2126 | 2337 | ||
2338 | static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, | ||
2339 | unsigned long *bitmap, | ||
2340 | u64 start, u64 len) | ||
2341 | { | ||
2342 | int offset; | ||
2343 | int nsectors; | ||
2344 | int sectorsize = sparity->sctx->dev_root->sectorsize; | ||
2345 | |||
2346 | if (len >= sparity->stripe_len) { | ||
2347 | bitmap_set(bitmap, 0, sparity->nsectors); | ||
2348 | return; | ||
2349 | } | ||
2350 | |||
2351 | start -= sparity->logic_start; | ||
2352 | offset = (int)do_div(start, sparity->stripe_len); | ||
2353 | offset /= sectorsize; | ||
2354 | nsectors = (int)len / sectorsize; | ||
2355 | |||
2356 | if (offset + nsectors <= sparity->nsectors) { | ||
2357 | bitmap_set(bitmap, offset, nsectors); | ||
2358 | return; | ||
2359 | } | ||
2360 | |||
2361 | bitmap_set(bitmap, offset, sparity->nsectors - offset); | ||
2362 | bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset)); | ||
2363 | } | ||
2364 | |||
2365 | static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity, | ||
2366 | u64 start, u64 len) | ||
2367 | { | ||
2368 | __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len); | ||
2369 | } | ||
2370 | |||
2371 | static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity, | ||
2372 | u64 start, u64 len) | ||
2373 | { | ||
2374 | __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len); | ||
2375 | } | ||
2376 | |||
2127 | static void scrub_block_complete(struct scrub_block *sblock) | 2377 | static void scrub_block_complete(struct scrub_block *sblock) |
2128 | { | 2378 | { |
2379 | int corrupted = 0; | ||
2380 | |||
2129 | if (!sblock->no_io_error_seen) { | 2381 | if (!sblock->no_io_error_seen) { |
2382 | corrupted = 1; | ||
2130 | scrub_handle_errored_block(sblock); | 2383 | scrub_handle_errored_block(sblock); |
2131 | } else { | 2384 | } else { |
2132 | /* | 2385 | /* |
@@ -2134,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock) | |||
2134 | * dev replace case, otherwise write here in dev replace | 2387 | * dev replace case, otherwise write here in dev replace |
2135 | * case. | 2388 | * case. |
2136 | */ | 2389 | */ |
2137 | if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace) | 2390 | corrupted = scrub_checksum(sblock); |
2391 | if (!corrupted && sblock->sctx->is_dev_replace) | ||
2138 | scrub_write_block_to_dev_replace(sblock); | 2392 | scrub_write_block_to_dev_replace(sblock); |
2139 | } | 2393 | } |
2394 | |||
2395 | if (sblock->sparity && corrupted && !sblock->data_corrected) { | ||
2396 | u64 start = sblock->pagev[0]->logical; | ||
2397 | u64 end = sblock->pagev[sblock->page_count - 1]->logical + | ||
2398 | PAGE_SIZE; | ||
2399 | |||
2400 | scrub_parity_mark_sectors_error(sblock->sparity, | ||
2401 | start, end - start); | ||
2402 | } | ||
2140 | } | 2403 | } |
2141 | 2404 | ||
2142 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | 2405 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, |
@@ -2228,6 +2491,132 @@ behind_scrub_pages: | |||
2228 | return 0; | 2491 | return 0; |
2229 | } | 2492 | } |
2230 | 2493 | ||
2494 | static int scrub_pages_for_parity(struct scrub_parity *sparity, | ||
2495 | u64 logical, u64 len, | ||
2496 | u64 physical, struct btrfs_device *dev, | ||
2497 | u64 flags, u64 gen, int mirror_num, u8 *csum) | ||
2498 | { | ||
2499 | struct scrub_ctx *sctx = sparity->sctx; | ||
2500 | struct scrub_block *sblock; | ||
2501 | int index; | ||
2502 | |||
2503 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); | ||
2504 | if (!sblock) { | ||
2505 | spin_lock(&sctx->stat_lock); | ||
2506 | sctx->stat.malloc_errors++; | ||
2507 | spin_unlock(&sctx->stat_lock); | ||
2508 | return -ENOMEM; | ||
2509 | } | ||
2510 | |||
2511 | /* one ref inside this function, plus one for each page added to | ||
2512 | * a bio later on */ | ||
2513 | atomic_set(&sblock->ref_count, 1); | ||
2514 | sblock->sctx = sctx; | ||
2515 | sblock->no_io_error_seen = 1; | ||
2516 | sblock->sparity = sparity; | ||
2517 | scrub_parity_get(sparity); | ||
2518 | |||
2519 | for (index = 0; len > 0; index++) { | ||
2520 | struct scrub_page *spage; | ||
2521 | u64 l = min_t(u64, len, PAGE_SIZE); | ||
2522 | |||
2523 | spage = kzalloc(sizeof(*spage), GFP_NOFS); | ||
2524 | if (!spage) { | ||
2525 | leave_nomem: | ||
2526 | spin_lock(&sctx->stat_lock); | ||
2527 | sctx->stat.malloc_errors++; | ||
2528 | spin_unlock(&sctx->stat_lock); | ||
2529 | scrub_block_put(sblock); | ||
2530 | return -ENOMEM; | ||
2531 | } | ||
2532 | BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); | ||
2533 | /* For scrub block */ | ||
2534 | scrub_page_get(spage); | ||
2535 | sblock->pagev[index] = spage; | ||
2536 | /* For scrub parity */ | ||
2537 | scrub_page_get(spage); | ||
2538 | list_add_tail(&spage->list, &sparity->spages); | ||
2539 | spage->sblock = sblock; | ||
2540 | spage->dev = dev; | ||
2541 | spage->flags = flags; | ||
2542 | spage->generation = gen; | ||
2543 | spage->logical = logical; | ||
2544 | spage->physical = physical; | ||
2545 | spage->mirror_num = mirror_num; | ||
2546 | if (csum) { | ||
2547 | spage->have_csum = 1; | ||
2548 | memcpy(spage->csum, csum, sctx->csum_size); | ||
2549 | } else { | ||
2550 | spage->have_csum = 0; | ||
2551 | } | ||
2552 | sblock->page_count++; | ||
2553 | spage->page = alloc_page(GFP_NOFS); | ||
2554 | if (!spage->page) | ||
2555 | goto leave_nomem; | ||
2556 | len -= l; | ||
2557 | logical += l; | ||
2558 | physical += l; | ||
2559 | } | ||
2560 | |||
2561 | WARN_ON(sblock->page_count == 0); | ||
2562 | for (index = 0; index < sblock->page_count; index++) { | ||
2563 | struct scrub_page *spage = sblock->pagev[index]; | ||
2564 | int ret; | ||
2565 | |||
2566 | ret = scrub_add_page_to_rd_bio(sctx, spage); | ||
2567 | if (ret) { | ||
2568 | scrub_block_put(sblock); | ||
2569 | return ret; | ||
2570 | } | ||
2571 | } | ||
2572 | |||
2573 | /* last one frees, either here or in bio completion for last page */ | ||
2574 | scrub_block_put(sblock); | ||
2575 | return 0; | ||
2576 | } | ||
2577 | |||
2578 | static int scrub_extent_for_parity(struct scrub_parity *sparity, | ||
2579 | u64 logical, u64 len, | ||
2580 | u64 physical, struct btrfs_device *dev, | ||
2581 | u64 flags, u64 gen, int mirror_num) | ||
2582 | { | ||
2583 | struct scrub_ctx *sctx = sparity->sctx; | ||
2584 | int ret; | ||
2585 | u8 csum[BTRFS_CSUM_SIZE]; | ||
2586 | u32 blocksize; | ||
2587 | |||
2588 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
2589 | blocksize = sctx->sectorsize; | ||
2590 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
2591 | blocksize = sctx->nodesize; | ||
2592 | } else { | ||
2593 | blocksize = sctx->sectorsize; | ||
2594 | WARN_ON(1); | ||
2595 | } | ||
2596 | |||
2597 | while (len) { | ||
2598 | u64 l = min_t(u64, len, blocksize); | ||
2599 | int have_csum = 0; | ||
2600 | |||
2601 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
2602 | /* push csums to sbio */ | ||
2603 | have_csum = scrub_find_csum(sctx, logical, l, csum); | ||
2604 | if (have_csum == 0) | ||
2605 | goto skip; | ||
2606 | } | ||
2607 | ret = scrub_pages_for_parity(sparity, logical, l, physical, dev, | ||
2608 | flags, gen, mirror_num, | ||
2609 | have_csum ? csum : NULL); | ||
2610 | skip: | ||
2611 | if (ret) | ||
2612 | return ret; | ||
2613 | len -= l; | ||
2614 | logical += l; | ||
2615 | physical += l; | ||
2616 | } | ||
2617 | return 0; | ||
2618 | } | ||
2619 | |||
2231 | /* | 2620 | /* |
2232 | * Given a physical address, this will calculate it's | 2621 | * Given a physical address, this will calculate it's |
2233 | * logical offset. if this is a parity stripe, it will return | 2622 | * logical offset. if this is a parity stripe, it will return |
@@ -2236,7 +2625,8 @@ behind_scrub_pages: | |||
2236 | * return 0 if it is a data stripe, 1 means parity stripe. | 2625 | * return 0 if it is a data stripe, 1 means parity stripe. |
2237 | */ | 2626 | */ |
2238 | static int get_raid56_logic_offset(u64 physical, int num, | 2627 | static int get_raid56_logic_offset(u64 physical, int num, |
2239 | struct map_lookup *map, u64 *offset) | 2628 | struct map_lookup *map, u64 *offset, |
2629 | u64 *stripe_start) | ||
2240 | { | 2630 | { |
2241 | int i; | 2631 | int i; |
2242 | int j = 0; | 2632 | int j = 0; |
@@ -2247,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num, | |||
2247 | 2637 | ||
2248 | last_offset = (physical - map->stripes[num].physical) * | 2638 | last_offset = (physical - map->stripes[num].physical) * |
2249 | nr_data_stripes(map); | 2639 | nr_data_stripes(map); |
2640 | if (stripe_start) | ||
2641 | *stripe_start = last_offset; | ||
2642 | |||
2250 | *offset = last_offset; | 2643 | *offset = last_offset; |
2251 | for (i = 0; i < nr_data_stripes(map); i++) { | 2644 | for (i = 0; i < nr_data_stripes(map); i++) { |
2252 | *offset = last_offset + i * map->stripe_len; | 2645 | *offset = last_offset + i * map->stripe_len; |
@@ -2269,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num, | |||
2269 | return 1; | 2662 | return 1; |
2270 | } | 2663 | } |
2271 | 2664 | ||
2665 | static void scrub_free_parity(struct scrub_parity *sparity) | ||
2666 | { | ||
2667 | struct scrub_ctx *sctx = sparity->sctx; | ||
2668 | struct scrub_page *curr, *next; | ||
2669 | int nbits; | ||
2670 | |||
2671 | nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors); | ||
2672 | if (nbits) { | ||
2673 | spin_lock(&sctx->stat_lock); | ||
2674 | sctx->stat.read_errors += nbits; | ||
2675 | sctx->stat.uncorrectable_errors += nbits; | ||
2676 | spin_unlock(&sctx->stat_lock); | ||
2677 | } | ||
2678 | |||
2679 | list_for_each_entry_safe(curr, next, &sparity->spages, list) { | ||
2680 | list_del_init(&curr->list); | ||
2681 | scrub_page_put(curr); | ||
2682 | } | ||
2683 | |||
2684 | kfree(sparity); | ||
2685 | } | ||
2686 | |||
2687 | static void scrub_parity_bio_endio(struct bio *bio, int error) | ||
2688 | { | ||
2689 | struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; | ||
2690 | struct scrub_ctx *sctx = sparity->sctx; | ||
2691 | |||
2692 | if (error) | ||
2693 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, | ||
2694 | sparity->nsectors); | ||
2695 | |||
2696 | scrub_free_parity(sparity); | ||
2697 | scrub_pending_bio_dec(sctx); | ||
2698 | bio_put(bio); | ||
2699 | } | ||
2700 | |||
2701 | static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | ||
2702 | { | ||
2703 | struct scrub_ctx *sctx = sparity->sctx; | ||
2704 | struct bio *bio; | ||
2705 | struct btrfs_raid_bio *rbio; | ||
2706 | struct scrub_page *spage; | ||
2707 | struct btrfs_bio *bbio = NULL; | ||
2708 | u64 *raid_map = NULL; | ||
2709 | u64 length; | ||
2710 | int ret; | ||
2711 | |||
2712 | if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap, | ||
2713 | sparity->nsectors)) | ||
2714 | goto out; | ||
2715 | |||
2716 | length = sparity->logic_end - sparity->logic_start + 1; | ||
2717 | ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, | ||
2718 | sparity->logic_start, | ||
2719 | &length, &bbio, 0, &raid_map); | ||
2720 | if (ret || !bbio || !raid_map) | ||
2721 | goto bbio_out; | ||
2722 | |||
2723 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | ||
2724 | if (!bio) | ||
2725 | goto bbio_out; | ||
2726 | |||
2727 | bio->bi_iter.bi_sector = sparity->logic_start >> 9; | ||
2728 | bio->bi_private = sparity; | ||
2729 | bio->bi_end_io = scrub_parity_bio_endio; | ||
2730 | |||
2731 | rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, | ||
2732 | raid_map, length, | ||
2733 | sparity->scrub_dev, | ||
2734 | sparity->dbitmap, | ||
2735 | sparity->nsectors); | ||
2736 | if (!rbio) | ||
2737 | goto rbio_out; | ||
2738 | |||
2739 | list_for_each_entry(spage, &sparity->spages, list) | ||
2740 | raid56_parity_add_scrub_pages(rbio, spage->page, | ||
2741 | spage->logical); | ||
2742 | |||
2743 | scrub_pending_bio_inc(sctx); | ||
2744 | raid56_parity_submit_scrub_rbio(rbio); | ||
2745 | return; | ||
2746 | |||
2747 | rbio_out: | ||
2748 | bio_put(bio); | ||
2749 | bbio_out: | ||
2750 | kfree(bbio); | ||
2751 | kfree(raid_map); | ||
2752 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, | ||
2753 | sparity->nsectors); | ||
2754 | spin_lock(&sctx->stat_lock); | ||
2755 | sctx->stat.malloc_errors++; | ||
2756 | spin_unlock(&sctx->stat_lock); | ||
2757 | out: | ||
2758 | scrub_free_parity(sparity); | ||
2759 | } | ||
2760 | |||
2761 | static inline int scrub_calc_parity_bitmap_len(int nsectors) | ||
2762 | { | ||
2763 | return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8); | ||
2764 | } | ||
2765 | |||
2766 | static void scrub_parity_get(struct scrub_parity *sparity) | ||
2767 | { | ||
2768 | atomic_inc(&sparity->ref_count); | ||
2769 | } | ||
2770 | |||
2771 | static void scrub_parity_put(struct scrub_parity *sparity) | ||
2772 | { | ||
2773 | if (!atomic_dec_and_test(&sparity->ref_count)) | ||
2774 | return; | ||
2775 | |||
2776 | scrub_parity_check_and_repair(sparity); | ||
2777 | } | ||
2778 | |||
2779 | static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, | ||
2780 | struct map_lookup *map, | ||
2781 | struct btrfs_device *sdev, | ||
2782 | struct btrfs_path *path, | ||
2783 | u64 logic_start, | ||
2784 | u64 logic_end) | ||
2785 | { | ||
2786 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
2787 | struct btrfs_root *root = fs_info->extent_root; | ||
2788 | struct btrfs_root *csum_root = fs_info->csum_root; | ||
2789 | struct btrfs_extent_item *extent; | ||
2790 | u64 flags; | ||
2791 | int ret; | ||
2792 | int slot; | ||
2793 | struct extent_buffer *l; | ||
2794 | struct btrfs_key key; | ||
2795 | u64 generation; | ||
2796 | u64 extent_logical; | ||
2797 | u64 extent_physical; | ||
2798 | u64 extent_len; | ||
2799 | struct btrfs_device *extent_dev; | ||
2800 | struct scrub_parity *sparity; | ||
2801 | int nsectors; | ||
2802 | int bitmap_len; | ||
2803 | int extent_mirror_num; | ||
2804 | int stop_loop = 0; | ||
2805 | |||
2806 | nsectors = map->stripe_len / root->sectorsize; | ||
2807 | bitmap_len = scrub_calc_parity_bitmap_len(nsectors); | ||
2808 | sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len, | ||
2809 | GFP_NOFS); | ||
2810 | if (!sparity) { | ||
2811 | spin_lock(&sctx->stat_lock); | ||
2812 | sctx->stat.malloc_errors++; | ||
2813 | spin_unlock(&sctx->stat_lock); | ||
2814 | return -ENOMEM; | ||
2815 | } | ||
2816 | |||
2817 | sparity->stripe_len = map->stripe_len; | ||
2818 | sparity->nsectors = nsectors; | ||
2819 | sparity->sctx = sctx; | ||
2820 | sparity->scrub_dev = sdev; | ||
2821 | sparity->logic_start = logic_start; | ||
2822 | sparity->logic_end = logic_end; | ||
2823 | atomic_set(&sparity->ref_count, 1); | ||
2824 | INIT_LIST_HEAD(&sparity->spages); | ||
2825 | sparity->dbitmap = sparity->bitmap; | ||
2826 | sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; | ||
2827 | |||
2828 | ret = 0; | ||
2829 | while (logic_start < logic_end) { | ||
2830 | if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) | ||
2831 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
2832 | else | ||
2833 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2834 | key.objectid = logic_start; | ||
2835 | key.offset = (u64)-1; | ||
2836 | |||
2837 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
2838 | if (ret < 0) | ||
2839 | goto out; | ||
2840 | |||
2841 | if (ret > 0) { | ||
2842 | ret = btrfs_previous_extent_item(root, path, 0); | ||
2843 | if (ret < 0) | ||
2844 | goto out; | ||
2845 | if (ret > 0) { | ||
2846 | btrfs_release_path(path); | ||
2847 | ret = btrfs_search_slot(NULL, root, &key, | ||
2848 | path, 0, 0); | ||
2849 | if (ret < 0) | ||
2850 | goto out; | ||
2851 | } | ||
2852 | } | ||
2853 | |||
2854 | stop_loop = 0; | ||
2855 | while (1) { | ||
2856 | u64 bytes; | ||
2857 | |||
2858 | l = path->nodes[0]; | ||
2859 | slot = path->slots[0]; | ||
2860 | if (slot >= btrfs_header_nritems(l)) { | ||
2861 | ret = btrfs_next_leaf(root, path); | ||
2862 | if (ret == 0) | ||
2863 | continue; | ||
2864 | if (ret < 0) | ||
2865 | goto out; | ||
2866 | |||
2867 | stop_loop = 1; | ||
2868 | break; | ||
2869 | } | ||
2870 | btrfs_item_key_to_cpu(l, &key, slot); | ||
2871 | |||
2872 | if (key.type == BTRFS_METADATA_ITEM_KEY) | ||
2873 | bytes = root->nodesize; | ||
2874 | else | ||
2875 | bytes = key.offset; | ||
2876 | |||
2877 | if (key.objectid + bytes <= logic_start) | ||
2878 | goto next; | ||
2879 | |||
2880 | if (key.type != BTRFS_EXTENT_ITEM_KEY && | ||
2881 | key.type != BTRFS_METADATA_ITEM_KEY) | ||
2882 | goto next; | ||
2883 | |||
2884 | if (key.objectid > logic_end) { | ||
2885 | stop_loop = 1; | ||
2886 | break; | ||
2887 | } | ||
2888 | |||
2889 | while (key.objectid >= logic_start + map->stripe_len) | ||
2890 | logic_start += map->stripe_len; | ||
2891 | |||
2892 | extent = btrfs_item_ptr(l, slot, | ||
2893 | struct btrfs_extent_item); | ||
2894 | flags = btrfs_extent_flags(l, extent); | ||
2895 | generation = btrfs_extent_generation(l, extent); | ||
2896 | |||
2897 | if (key.objectid < logic_start && | ||
2898 | (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { | ||
2899 | btrfs_err(fs_info, | ||
2900 | "scrub: tree block %llu spanning stripes, ignored. logical=%llu", | ||
2901 | key.objectid, logic_start); | ||
2902 | goto next; | ||
2903 | } | ||
2904 | again: | ||
2905 | extent_logical = key.objectid; | ||
2906 | extent_len = bytes; | ||
2907 | |||
2908 | if (extent_logical < logic_start) { | ||
2909 | extent_len -= logic_start - extent_logical; | ||
2910 | extent_logical = logic_start; | ||
2911 | } | ||
2912 | |||
2913 | if (extent_logical + extent_len > | ||
2914 | logic_start + map->stripe_len) | ||
2915 | extent_len = logic_start + map->stripe_len - | ||
2916 | extent_logical; | ||
2917 | |||
2918 | scrub_parity_mark_sectors_data(sparity, extent_logical, | ||
2919 | extent_len); | ||
2920 | |||
2921 | scrub_remap_extent(fs_info, extent_logical, | ||
2922 | extent_len, &extent_physical, | ||
2923 | &extent_dev, | ||
2924 | &extent_mirror_num); | ||
2925 | |||
2926 | ret = btrfs_lookup_csums_range(csum_root, | ||
2927 | extent_logical, | ||
2928 | extent_logical + extent_len - 1, | ||
2929 | &sctx->csum_list, 1); | ||
2930 | if (ret) | ||
2931 | goto out; | ||
2932 | |||
2933 | ret = scrub_extent_for_parity(sparity, extent_logical, | ||
2934 | extent_len, | ||
2935 | extent_physical, | ||
2936 | extent_dev, flags, | ||
2937 | generation, | ||
2938 | extent_mirror_num); | ||
2939 | if (ret) | ||
2940 | goto out; | ||
2941 | |||
2942 | scrub_free_csums(sctx); | ||
2943 | if (extent_logical + extent_len < | ||
2944 | key.objectid + bytes) { | ||
2945 | logic_start += map->stripe_len; | ||
2946 | |||
2947 | if (logic_start >= logic_end) { | ||
2948 | stop_loop = 1; | ||
2949 | break; | ||
2950 | } | ||
2951 | |||
2952 | if (logic_start < key.objectid + bytes) { | ||
2953 | cond_resched(); | ||
2954 | goto again; | ||
2955 | } | ||
2956 | } | ||
2957 | next: | ||
2958 | path->slots[0]++; | ||
2959 | } | ||
2960 | |||
2961 | btrfs_release_path(path); | ||
2962 | |||
2963 | if (stop_loop) | ||
2964 | break; | ||
2965 | |||
2966 | logic_start += map->stripe_len; | ||
2967 | } | ||
2968 | out: | ||
2969 | if (ret < 0) | ||
2970 | scrub_parity_mark_sectors_error(sparity, logic_start, | ||
2971 | logic_end - logic_start + 1); | ||
2972 | scrub_parity_put(sparity); | ||
2973 | scrub_submit(sctx); | ||
2974 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
2975 | scrub_wr_submit(sctx); | ||
2976 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
2977 | |||
2978 | btrfs_release_path(path); | ||
2979 | return ret < 0 ? ret : 0; | ||
2980 | } | ||
2981 | |||
2272 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | 2982 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, |
2273 | struct map_lookup *map, | 2983 | struct map_lookup *map, |
2274 | struct btrfs_device *scrub_dev, | 2984 | struct btrfs_device *scrub_dev, |
2275 | int num, u64 base, u64 length, | 2985 | int num, u64 base, u64 length, |
2276 | int is_dev_replace) | 2986 | int is_dev_replace) |
2277 | { | 2987 | { |
2278 | struct btrfs_path *path; | 2988 | struct btrfs_path *path, *ppath; |
2279 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | 2989 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
2280 | struct btrfs_root *root = fs_info->extent_root; | 2990 | struct btrfs_root *root = fs_info->extent_root; |
2281 | struct btrfs_root *csum_root = fs_info->csum_root; | 2991 | struct btrfs_root *csum_root = fs_info->csum_root; |
@@ -2302,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2302 | u64 extent_logical; | 3012 | u64 extent_logical; |
2303 | u64 extent_physical; | 3013 | u64 extent_physical; |
2304 | u64 extent_len; | 3014 | u64 extent_len; |
3015 | u64 stripe_logical; | ||
3016 | u64 stripe_end; | ||
2305 | struct btrfs_device *extent_dev; | 3017 | struct btrfs_device *extent_dev; |
2306 | int extent_mirror_num; | 3018 | int extent_mirror_num; |
2307 | int stop_loop = 0; | 3019 | int stop_loop = 0; |
@@ -2327,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2327 | mirror_num = num % map->num_stripes + 1; | 3039 | mirror_num = num % map->num_stripes + 1; |
2328 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3040 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2329 | BTRFS_BLOCK_GROUP_RAID6)) { | 3041 | BTRFS_BLOCK_GROUP_RAID6)) { |
2330 | get_raid56_logic_offset(physical, num, map, &offset); | 3042 | get_raid56_logic_offset(physical, num, map, &offset, NULL); |
2331 | increment = map->stripe_len * nr_data_stripes(map); | 3043 | increment = map->stripe_len * nr_data_stripes(map); |
2332 | mirror_num = 1; | 3044 | mirror_num = 1; |
2333 | } else { | 3045 | } else { |
@@ -2339,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2339 | if (!path) | 3051 | if (!path) |
2340 | return -ENOMEM; | 3052 | return -ENOMEM; |
2341 | 3053 | ||
3054 | ppath = btrfs_alloc_path(); | ||
3055 | if (!ppath) { | ||
3056 | btrfs_free_path(ppath); | ||
3057 | return -ENOMEM; | ||
3058 | } | ||
3059 | |||
2342 | /* | 3060 | /* |
2343 | * work on commit root. The related disk blocks are static as | 3061 | * work on commit root. The related disk blocks are static as |
2344 | * long as COW is applied. This means, it is save to rewrite | 3062 | * long as COW is applied. This means, it is save to rewrite |
@@ -2357,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2357 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3075 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2358 | BTRFS_BLOCK_GROUP_RAID6)) { | 3076 | BTRFS_BLOCK_GROUP_RAID6)) { |
2359 | get_raid56_logic_offset(physical_end, num, | 3077 | get_raid56_logic_offset(physical_end, num, |
2360 | map, &logic_end); | 3078 | map, &logic_end, NULL); |
2361 | logic_end += base; | 3079 | logic_end += base; |
2362 | } else { | 3080 | } else { |
2363 | logic_end = logical + increment * nstripes; | 3081 | logic_end = logical + increment * nstripes; |
@@ -2404,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2404 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3122 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2405 | BTRFS_BLOCK_GROUP_RAID6)) { | 3123 | BTRFS_BLOCK_GROUP_RAID6)) { |
2406 | ret = get_raid56_logic_offset(physical, num, | 3124 | ret = get_raid56_logic_offset(physical, num, |
2407 | map, &logical); | 3125 | map, &logical, &stripe_logical); |
2408 | logical += base; | 3126 | logical += base; |
2409 | if (ret) | 3127 | if (ret) { |
3128 | stripe_logical += base; | ||
3129 | stripe_end = stripe_logical + increment - 1; | ||
3130 | ret = scrub_raid56_parity(sctx, map, scrub_dev, | ||
3131 | ppath, stripe_logical, | ||
3132 | stripe_end); | ||
3133 | if (ret) | ||
3134 | goto out; | ||
2410 | goto skip; | 3135 | goto skip; |
3136 | } | ||
2411 | } | 3137 | } |
2412 | /* | 3138 | /* |
2413 | * canceled? | 3139 | * canceled? |
@@ -2558,13 +3284,25 @@ again: | |||
2558 | * loop until we find next data stripe | 3284 | * loop until we find next data stripe |
2559 | * or we have finished all stripes. | 3285 | * or we have finished all stripes. |
2560 | */ | 3286 | */ |
2561 | do { | 3287 | loop: |
2562 | physical += map->stripe_len; | 3288 | physical += map->stripe_len; |
2563 | ret = get_raid56_logic_offset( | 3289 | ret = get_raid56_logic_offset(physical, |
2564 | physical, num, | 3290 | num, map, &logical, |
2565 | map, &logical); | 3291 | &stripe_logical); |
2566 | logical += base; | 3292 | logical += base; |
2567 | } while (physical < physical_end && ret); | 3293 | |
3294 | if (ret && physical < physical_end) { | ||
3295 | stripe_logical += base; | ||
3296 | stripe_end = stripe_logical + | ||
3297 | increment - 1; | ||
3298 | ret = scrub_raid56_parity(sctx, | ||
3299 | map, scrub_dev, ppath, | ||
3300 | stripe_logical, | ||
3301 | stripe_end); | ||
3302 | if (ret) | ||
3303 | goto out; | ||
3304 | goto loop; | ||
3305 | } | ||
2568 | } else { | 3306 | } else { |
2569 | physical += map->stripe_len; | 3307 | physical += map->stripe_len; |
2570 | logical += increment; | 3308 | logical += increment; |
@@ -2605,6 +3343,7 @@ out: | |||
2605 | 3343 | ||
2606 | blk_finish_plug(&plug); | 3344 | blk_finish_plug(&plug); |
2607 | btrfs_free_path(path); | 3345 | btrfs_free_path(path); |
3346 | btrfs_free_path(ppath); | ||
2608 | return ret < 0 ? ret : 0; | 3347 | return ret < 0 ? ret : 0; |
2609 | } | 3348 | } |
2610 | 3349 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ff2b35114972..0144790e296e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -4879,13 +4879,15 @@ static inline int parity_smaller(u64 a, u64 b) | |||
4879 | static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) | 4879 | static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) |
4880 | { | 4880 | { |
4881 | struct btrfs_bio_stripe s; | 4881 | struct btrfs_bio_stripe s; |
4882 | int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; | ||
4882 | int i; | 4883 | int i; |
4883 | u64 l; | 4884 | u64 l; |
4884 | int again = 1; | 4885 | int again = 1; |
4886 | int m; | ||
4885 | 4887 | ||
4886 | while (again) { | 4888 | while (again) { |
4887 | again = 0; | 4889 | again = 0; |
4888 | for (i = 0; i < bbio->num_stripes - 1; i++) { | 4890 | for (i = 0; i < real_stripes - 1; i++) { |
4889 | if (parity_smaller(raid_map[i], raid_map[i+1])) { | 4891 | if (parity_smaller(raid_map[i], raid_map[i+1])) { |
4890 | s = bbio->stripes[i]; | 4892 | s = bbio->stripes[i]; |
4891 | l = raid_map[i]; | 4893 | l = raid_map[i]; |
@@ -4893,6 +4895,14 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) | |||
4893 | raid_map[i] = raid_map[i+1]; | 4895 | raid_map[i] = raid_map[i+1]; |
4894 | bbio->stripes[i+1] = s; | 4896 | bbio->stripes[i+1] = s; |
4895 | raid_map[i+1] = l; | 4897 | raid_map[i+1] = l; |
4898 | |||
4899 | if (bbio->tgtdev_map) { | ||
4900 | m = bbio->tgtdev_map[i]; | ||
4901 | bbio->tgtdev_map[i] = | ||
4902 | bbio->tgtdev_map[i + 1]; | ||
4903 | bbio->tgtdev_map[i + 1] = m; | ||
4904 | } | ||
4905 | |||
4896 | again = 1; | 4906 | again = 1; |
4897 | } | 4907 | } |
4898 | } | 4908 | } |
@@ -4921,6 +4931,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4921 | int ret = 0; | 4931 | int ret = 0; |
4922 | int num_stripes; | 4932 | int num_stripes; |
4923 | int max_errors = 0; | 4933 | int max_errors = 0; |
4934 | int tgtdev_indexes = 0; | ||
4924 | struct btrfs_bio *bbio = NULL; | 4935 | struct btrfs_bio *bbio = NULL; |
4925 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | 4936 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
4926 | int dev_replace_is_ongoing = 0; | 4937 | int dev_replace_is_ongoing = 0; |
@@ -5159,15 +5170,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5159 | BTRFS_BLOCK_GROUP_RAID6)) { | 5170 | BTRFS_BLOCK_GROUP_RAID6)) { |
5160 | u64 tmp; | 5171 | u64 tmp; |
5161 | 5172 | ||
5162 | if (bbio_ret && ((rw & REQ_WRITE) || mirror_num > 1) | 5173 | if (raid_map_ret && |
5163 | && raid_map_ret) { | 5174 | ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || |
5175 | mirror_num > 1)) { | ||
5164 | int i, rot; | 5176 | int i, rot; |
5165 | 5177 | ||
5166 | /* push stripe_nr back to the start of the full stripe */ | 5178 | /* push stripe_nr back to the start of the full stripe */ |
5167 | stripe_nr = raid56_full_stripe_start; | 5179 | stripe_nr = raid56_full_stripe_start; |
5168 | do_div(stripe_nr, stripe_len); | 5180 | do_div(stripe_nr, stripe_len * nr_data_stripes(map)); |
5169 | |||
5170 | stripe_index = do_div(stripe_nr, nr_data_stripes(map)); | ||
5171 | 5181 | ||
5172 | /* RAID[56] write or recovery. Return all stripes */ | 5182 | /* RAID[56] write or recovery. Return all stripes */ |
5173 | num_stripes = map->num_stripes; | 5183 | num_stripes = map->num_stripes; |
@@ -5233,14 +5243,19 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5233 | num_alloc_stripes <<= 1; | 5243 | num_alloc_stripes <<= 1; |
5234 | if (rw & REQ_GET_READ_MIRRORS) | 5244 | if (rw & REQ_GET_READ_MIRRORS) |
5235 | num_alloc_stripes++; | 5245 | num_alloc_stripes++; |
5246 | tgtdev_indexes = num_stripes; | ||
5236 | } | 5247 | } |
5237 | bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); | 5248 | |
5249 | bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), | ||
5250 | GFP_NOFS); | ||
5238 | if (!bbio) { | 5251 | if (!bbio) { |
5239 | kfree(raid_map); | 5252 | kfree(raid_map); |
5240 | ret = -ENOMEM; | 5253 | ret = -ENOMEM; |
5241 | goto out; | 5254 | goto out; |
5242 | } | 5255 | } |
5243 | atomic_set(&bbio->error, 0); | 5256 | atomic_set(&bbio->error, 0); |
5257 | if (dev_replace_is_ongoing) | ||
5258 | bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); | ||
5244 | 5259 | ||
5245 | if (rw & REQ_DISCARD) { | 5260 | if (rw & REQ_DISCARD) { |
5246 | int factor = 0; | 5261 | int factor = 0; |
@@ -5325,6 +5340,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5325 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) | 5340 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
5326 | max_errors = btrfs_chunk_max_errors(map); | 5341 | max_errors = btrfs_chunk_max_errors(map); |
5327 | 5342 | ||
5343 | tgtdev_indexes = 0; | ||
5328 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && | 5344 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && |
5329 | dev_replace->tgtdev != NULL) { | 5345 | dev_replace->tgtdev != NULL) { |
5330 | int index_where_to_add; | 5346 | int index_where_to_add; |
@@ -5353,8 +5369,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5353 | new->physical = old->physical; | 5369 | new->physical = old->physical; |
5354 | new->length = old->length; | 5370 | new->length = old->length; |
5355 | new->dev = dev_replace->tgtdev; | 5371 | new->dev = dev_replace->tgtdev; |
5372 | bbio->tgtdev_map[i] = index_where_to_add; | ||
5356 | index_where_to_add++; | 5373 | index_where_to_add++; |
5357 | max_errors++; | 5374 | max_errors++; |
5375 | tgtdev_indexes++; | ||
5358 | } | 5376 | } |
5359 | } | 5377 | } |
5360 | num_stripes = index_where_to_add; | 5378 | num_stripes = index_where_to_add; |
@@ -5400,7 +5418,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5400 | tgtdev_stripe->length = | 5418 | tgtdev_stripe->length = |
5401 | bbio->stripes[index_srcdev].length; | 5419 | bbio->stripes[index_srcdev].length; |
5402 | tgtdev_stripe->dev = dev_replace->tgtdev; | 5420 | tgtdev_stripe->dev = dev_replace->tgtdev; |
5421 | bbio->tgtdev_map[index_srcdev] = num_stripes; | ||
5403 | 5422 | ||
5423 | tgtdev_indexes++; | ||
5404 | num_stripes++; | 5424 | num_stripes++; |
5405 | } | 5425 | } |
5406 | } | 5426 | } |
@@ -5410,6 +5430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5410 | bbio->num_stripes = num_stripes; | 5430 | bbio->num_stripes = num_stripes; |
5411 | bbio->max_errors = max_errors; | 5431 | bbio->max_errors = max_errors; |
5412 | bbio->mirror_num = mirror_num; | 5432 | bbio->mirror_num = mirror_num; |
5433 | bbio->num_tgtdevs = tgtdev_indexes; | ||
5413 | 5434 | ||
5414 | /* | 5435 | /* |
5415 | * this is the case that REQ_READ && dev_replace_is_ongoing && | 5436 | * this is the case that REQ_READ && dev_replace_is_ongoing && |
@@ -5441,6 +5462,16 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5441 | mirror_num, NULL); | 5462 | mirror_num, NULL); |
5442 | } | 5463 | } |
5443 | 5464 | ||
5465 | /* For Scrub/replace */ | ||
5466 | int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, | ||
5467 | u64 logical, u64 *length, | ||
5468 | struct btrfs_bio **bbio_ret, int mirror_num, | ||
5469 | u64 **raid_map_ret) | ||
5470 | { | ||
5471 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, | ||
5472 | mirror_num, raid_map_ret); | ||
5473 | } | ||
5474 | |||
5444 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 5475 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
5445 | u64 chunk_start, u64 physical, u64 devid, | 5476 | u64 chunk_start, u64 physical, u64 devid, |
5446 | u64 **logical, int *naddrs, int *stripe_len) | 5477 | u64 **logical, int *naddrs, int *stripe_len) |
@@ -5810,12 +5841,9 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5810 | } else { | 5841 | } else { |
5811 | ret = raid56_parity_recover(root, bio, bbio, | 5842 | ret = raid56_parity_recover(root, bio, bbio, |
5812 | raid_map, map_length, | 5843 | raid_map, map_length, |
5813 | mirror_num); | 5844 | mirror_num, 1); |
5814 | } | 5845 | } |
5815 | /* | 5846 | |
5816 | * FIXME, replace dosen't support raid56 yet, please fix | ||
5817 | * it in the future. | ||
5818 | */ | ||
5819 | btrfs_bio_counter_dec(root->fs_info); | 5847 | btrfs_bio_counter_dec(root->fs_info); |
5820 | return ret; | 5848 | return ret; |
5821 | } | 5849 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 637bcfadadb2..d6fe73c0f4a2 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -292,7 +292,7 @@ struct btrfs_bio_stripe { | |||
292 | struct btrfs_bio; | 292 | struct btrfs_bio; |
293 | typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | 293 | typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); |
294 | 294 | ||
295 | #define BTRFS_BIO_ORIG_BIO_SUBMITTED 0x1 | 295 | #define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) |
296 | 296 | ||
297 | struct btrfs_bio { | 297 | struct btrfs_bio { |
298 | atomic_t stripes_pending; | 298 | atomic_t stripes_pending; |
@@ -305,6 +305,8 @@ struct btrfs_bio { | |||
305 | int max_errors; | 305 | int max_errors; |
306 | int num_stripes; | 306 | int num_stripes; |
307 | int mirror_num; | 307 | int mirror_num; |
308 | int num_tgtdevs; | ||
309 | int *tgtdev_map; | ||
308 | struct btrfs_bio_stripe stripes[]; | 310 | struct btrfs_bio_stripe stripes[]; |
309 | }; | 311 | }; |
310 | 312 | ||
@@ -387,12 +389,18 @@ struct btrfs_balance_control { | |||
387 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | 389 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, |
388 | u64 end, u64 *length); | 390 | u64 end, u64 *length); |
389 | 391 | ||
390 | #define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \ | 392 | #define btrfs_bio_size(total_stripes, real_stripes) \ |
391 | (sizeof(struct btrfs_bio_stripe) * (n))) | 393 | (sizeof(struct btrfs_bio) + \ |
394 | (sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \ | ||
395 | (sizeof(int) * (real_stripes))) | ||
392 | 396 | ||
393 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | 397 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
394 | u64 logical, u64 *length, | 398 | u64 logical, u64 *length, |
395 | struct btrfs_bio **bbio_ret, int mirror_num); | 399 | struct btrfs_bio **bbio_ret, int mirror_num); |
400 | int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, | ||
401 | u64 logical, u64 *length, | ||
402 | struct btrfs_bio **bbio_ret, int mirror_num, | ||
403 | u64 **raid_map_ret); | ||
396 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 404 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
397 | u64 chunk_start, u64 physical, u64 devid, | 405 | u64 chunk_start, u64 physical, u64 devid, |
398 | u64 **logical, int *naddrs, int *stripe_len); | 406 | u64 **logical, int *naddrs, int *stripe_len); |