aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorArne Jansen <sensille@gmx.net>2011-03-08 08:14:00 -0500
committerArne Jansen <sensille@gmx.net>2011-05-12 08:45:20 -0400
commita2de733c78fa7af51ba9670482fa7d392aa67c57 (patch)
treed88817bc7aba94e7efea530efac5ef190c1b799c /fs/btrfs
parent7cf96da3ec7ca225acf4f284b0e904a1f5f98821 (diff)
btrfs: scrub
This adds an initial implementation for scrub. It works quite straightforward. The usermode issues an ioctl for each device in the fs. For each device, it enumerates the allocated device chunks. For each chunk, the contained extents are enumerated and the data checksums fetched. The extents are read sequentially and the checksums verified. If an error occurs (checksum or EIO), a good copy is searched for. If one is found, the bad copy will be rewritten. All enumerations happen from the commit roots. During a transaction commit, the scrubs get paused and afterwards continue from the new roots. This commit is based on the series originally posted to linux-btrfs with some improvements that resulted from comments from David Sterba, Ilya Dryomov and Jan Schmidt. Signed-off-by: Arne Jansen <sensille@gmx.net>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.h37
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/file-item.c8
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/ioctl.h37
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c1492
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/btrfs/volumes.h6
12 files changed, 1600 insertions, 11 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 31610ea73aec..8fda3133c1b8 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -7,4 +7,4 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o 10 compression.o delayed-ref.o relocation.o scrub.o
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2e61fe1b6b8c..31141ba6072d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -23,6 +23,7 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/highmem.h> 24#include <linux/highmem.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rwsem.h>
26#include <linux/completion.h> 27#include <linux/completion.h>
27#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
28#include <linux/wait.h> 29#include <linux/wait.h>
@@ -33,6 +34,7 @@
33#include "extent_io.h" 34#include "extent_io.h"
34#include "extent_map.h" 35#include "extent_map.h"
35#include "async-thread.h" 36#include "async-thread.h"
37#include "ioctl.h"
36 38
37struct btrfs_trans_handle; 39struct btrfs_trans_handle;
38struct btrfs_transaction; 40struct btrfs_transaction;
@@ -510,6 +512,12 @@ struct btrfs_extent_item_v0 {
510/* use full backrefs for extent pointers in the block */ 512/* use full backrefs for extent pointers in the block */
511#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) 513#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
512 514
515/*
516 * this flag is only used internally by scrub and may be changed at any time
517 * it is only declared here to avoid collisions
518 */
519#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
520
513struct btrfs_tree_block_info { 521struct btrfs_tree_block_info {
514 struct btrfs_disk_key key; 522 struct btrfs_disk_key key;
515 u8 level; 523 u8 level;
@@ -1077,6 +1085,17 @@ struct btrfs_fs_info {
1077 1085
1078 void *bdev_holder; 1086 void *bdev_holder;
1079 1087
1088 /* private scrub information */
1089 struct mutex scrub_lock;
1090 atomic_t scrubs_running;
1091 atomic_t scrub_pause_req;
1092 atomic_t scrubs_paused;
1093 atomic_t scrub_cancel_req;
1094 wait_queue_head_t scrub_pause_wait;
1095 struct rw_semaphore scrub_super_lock;
1096 int scrub_workers_refcnt;
1097 struct btrfs_workers scrub_workers;
1098
1080 /* filesystem state */ 1099 /* filesystem state */
1081 u64 fs_state; 1100 u64 fs_state;
1082}; 1101};
@@ -2472,8 +2491,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
2472int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 2491int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
2473 struct btrfs_root *root, struct btrfs_path *path, 2492 struct btrfs_root *root, struct btrfs_path *path,
2474 u64 isize); 2493 u64 isize);
2475int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, 2494int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
2476 u64 end, struct list_head *list); 2495 struct list_head *list, int search_commit);
2477/* inode.c */ 2496/* inode.c */
2478 2497
2479/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 2498/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
@@ -2637,4 +2656,18 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
2637 u64 *bytes_to_reserve); 2656 u64 *bytes_to_reserve);
2638void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 2657void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
2639 struct btrfs_pending_snapshot *pending); 2658 struct btrfs_pending_snapshot *pending);
2659
2660/* scrub.c */
2661int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
2662 struct btrfs_scrub_progress *progress);
2663int btrfs_scrub_pause(struct btrfs_root *root);
2664int btrfs_scrub_pause_super(struct btrfs_root *root);
2665int btrfs_scrub_continue(struct btrfs_root *root);
2666int btrfs_scrub_continue_super(struct btrfs_root *root);
2667int btrfs_scrub_cancel(struct btrfs_root *root);
2668int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
2669int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
2670int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
2671 struct btrfs_scrub_progress *progress);
2672
2640#endif 2673#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fe5aec9b3924..e48e8095c61f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1773,6 +1773,17 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1773 INIT_LIST_HEAD(&fs_info->ordered_extents); 1773 INIT_LIST_HEAD(&fs_info->ordered_extents);
1774 spin_lock_init(&fs_info->ordered_extent_lock); 1774 spin_lock_init(&fs_info->ordered_extent_lock);
1775 1775
1776 mutex_init(&fs_info->scrub_lock);
1777 atomic_set(&fs_info->scrubs_running, 0);
1778 atomic_set(&fs_info->scrub_pause_req, 0);
1779 atomic_set(&fs_info->scrubs_paused, 0);
1780 atomic_set(&fs_info->scrub_cancel_req, 0);
1781 init_waitqueue_head(&fs_info->scrub_pause_wait);
1782 init_rwsem(&fs_info->scrub_super_lock);
1783 fs_info->scrub_workers_refcnt = 0;
1784 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1785 fs_info->thread_pool_size, &fs_info->generic_worker);
1786
1776 sb->s_blocksize = 4096; 1787 sb->s_blocksize = 4096;
1777 sb->s_blocksize_bits = blksize_bits(4096); 1788 sb->s_blocksize_bits = blksize_bits(4096);
1778 sb->s_bdi = &fs_info->bdi; 1789 sb->s_bdi = &fs_info->bdi;
@@ -2599,6 +2610,7 @@ int close_ctree(struct btrfs_root *root)
2599 fs_info->closing = 1; 2610 fs_info->closing = 1;
2600 smp_mb(); 2611 smp_mb();
2601 2612
2613 btrfs_scrub_cancel(root);
2602 btrfs_put_block_group_cache(fs_info); 2614 btrfs_put_block_group_cache(fs_info);
2603 2615
2604 /* 2616 /*
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a6a9d4e8b491..39ca7c1250e7 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -266,7 +266,7 @@ int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
266} 266}
267 267
268int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 268int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
269 struct list_head *list) 269 struct list_head *list, int search_commit)
270{ 270{
271 struct btrfs_key key; 271 struct btrfs_key key;
272 struct btrfs_path *path; 272 struct btrfs_path *path;
@@ -283,6 +283,12 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
283 path = btrfs_alloc_path(); 283 path = btrfs_alloc_path();
284 BUG_ON(!path); 284 BUG_ON(!path);
285 285
286 if (search_commit) {
287 path->skip_locking = 1;
288 path->reada = 2;
289 path->search_commit_root = 1;
290 }
291
286 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 292 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
287 key.offset = start; 293 key.offset = start;
288 key.type = BTRFS_EXTENT_CSUM_KEY; 294 key.type = BTRFS_EXTENT_CSUM_KEY;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 870869aab0b8..27142446b30a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1007,7 +1007,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
1007 LIST_HEAD(list); 1007 LIST_HEAD(list);
1008 1008
1009 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, 1009 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
1010 bytenr + num_bytes - 1, &list); 1010 bytenr + num_bytes - 1, &list, 0);
1011 if (ret == 0 && list_empty(&list)) 1011 if (ret == 0 && list_empty(&list))
1012 return 0; 1012 return 0;
1013 1013
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 8fb382167b13..37ac030d64b4 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -42,6 +42,43 @@ struct btrfs_ioctl_vol_args_v2 {
42 char name[BTRFS_SUBVOL_NAME_MAX + 1]; 42 char name[BTRFS_SUBVOL_NAME_MAX + 1];
43}; 43};
44 44
45/*
46 * structure to report errors and progress to userspace, either as a
47 * result of a finished scrub, a canceled scrub or a progress inquiry
48 */
49struct btrfs_scrub_progress {
50 __u64 data_extents_scrubbed; /* # of data extents scrubbed */
51 __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */
52 __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */
53 __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */
54 __u64 read_errors; /* # of read errors encountered (EIO) */
55 __u64 csum_errors; /* # of failed csum checks */
56 __u64 verify_errors; /* # of occurences, where the metadata
57 * of a tree block did not match the
58 * expected values, like generation or
59 * logical */
60 __u64 no_csum; /* # of 4k data block for which no csum
61 * is present, probably the result of
62 * data written with nodatasum */
63 __u64 csum_discards; /* # of csum for which no data was found
64 * in the extent tree. */
65 __u64 super_errors; /* # of bad super blocks encountered */
66 __u64 malloc_errors; /* # of internal kmalloc errors. These
67 * will likely cause an incomplete
68 * scrub */
69 __u64 uncorrectable_errors; /* # of errors where either no intact
70 * copy was found or the writeback
71 * failed */
72 __u64 corrected_errors; /* # of errors corrected */
73 __u64 last_physical; /* last physical address scrubbed. In
74 * case a scrub was aborted, this can
75 * be used to restart the scrub */
76 __u64 unverified_errors; /* # of occurences where a read for a
77 * full (64k) bio failed, but the re-
78 * check succeeded for each 4k piece.
79 * Intermittent error. */
80};
81
45#define BTRFS_INO_LOOKUP_PATH_MAX 4080 82#define BTRFS_INO_LOOKUP_PATH_MAX 4080
46struct btrfs_ioctl_ino_lookup_args { 83struct btrfs_ioctl_ino_lookup_args {
47 __u64 treeid; 84 __u64 treeid;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 58250e09eb05..db1dffa9952b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4242,7 +4242,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4242 4242
4243 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4243 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4244 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4244 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4245 disk_bytenr + len - 1, &list); 4245 disk_bytenr + len - 1, &list, 0);
4246 4246
4247 while (!list_empty(&list)) { 4247 while (!list_empty(&list)) {
4248 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4248 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
new file mode 100644
index 000000000000..70f9fa772ee9
--- /dev/null
+++ b/fs/btrfs/scrub.c
@@ -0,0 +1,1492 @@
1/*
2 * Copyright (C) 2011 STRATO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h"
27#include "volumes.h"
28#include "disk-io.h"
29#include "ordered-data.h"
30
31/*
32 * This is only the first step towards a full-features scrub. It reads all
33 * extent and super block and verifies the checksums. In case a bad checksum
34 * is found or the extent cannot be read, good data will be written back if
35 * any can be found.
36 *
37 * Future enhancements:
38 * - To enhance the performance, better read-ahead strategies for the
39 * extent-tree can be employed.
40 * - In case an unrepairable extent is encountered, track which files are
41 * affected and report them
42 * - In case of a read error on files with nodatasum, map the file and read
43 * the extent to trigger a writeback of the good copy
44 * - track and record media errors, throw out bad devices
45 * - add a readonly mode
46 * - add a mode to also read unallocated space
47 * - make the prefetch cancellable
48 */
49
50struct scrub_bio;
51struct scrub_page;
52struct scrub_dev;
53struct scrub_fixup;
54static void scrub_bio_end_io(struct bio *bio, int err);
55static void scrub_checksum(struct btrfs_work *work);
56static int scrub_checksum_data(struct scrub_dev *sdev,
57 struct scrub_page *spag, void *buffer);
58static int scrub_checksum_tree_block(struct scrub_dev *sdev,
59 struct scrub_page *spag, u64 logical,
60 void *buffer);
61static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
62static void scrub_recheck_end_io(struct bio *bio, int err);
63static void scrub_fixup_worker(struct btrfs_work *work);
64static void scrub_fixup(struct scrub_fixup *fixup);
65
66#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
67#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
68
69struct scrub_page {
70 u64 flags; /* extent flags */
71 u64 generation;
72 u64 mirror_num;
73 int have_csum;
74 u8 csum[BTRFS_CSUM_SIZE];
75};
76
77struct scrub_bio {
78 int index;
79 struct scrub_dev *sdev;
80 struct bio *bio;
81 int err;
82 u64 logical;
83 u64 physical;
84 struct scrub_page spag[SCRUB_PAGES_PER_BIO];
85 u64 count;
86 int next_free;
87 struct btrfs_work work;
88};
89
90struct scrub_dev {
91 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
92 struct btrfs_device *dev;
93 int first_free;
94 int curr;
95 atomic_t in_flight;
96 spinlock_t list_lock;
97 wait_queue_head_t list_wait;
98 u16 csum_size;
99 struct list_head csum_list;
100 atomic_t cancel_req;
101 /*
102 * statistics
103 */
104 struct btrfs_scrub_progress stat;
105 spinlock_t stat_lock;
106};
107
108struct scrub_fixup {
109 struct scrub_dev *sdev;
110 struct bio *bio;
111 u64 logical;
112 u64 physical;
113 struct scrub_page spag;
114 struct btrfs_work work;
115 int err;
116 int recheck;
117};
118
119static void scrub_free_csums(struct scrub_dev *sdev)
120{
121 while (!list_empty(&sdev->csum_list)) {
122 struct btrfs_ordered_sum *sum;
123 sum = list_first_entry(&sdev->csum_list,
124 struct btrfs_ordered_sum, list);
125 list_del(&sum->list);
126 kfree(sum);
127 }
128}
129
130static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
131{
132 int i;
133 int j;
134 struct page *last_page;
135
136 if (!sdev)
137 return;
138
139 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
140 struct scrub_bio *sbio = sdev->bios[i];
141 struct bio *bio;
142
143 if (!sbio)
144 break;
145
146 bio = sbio->bio;
147 if (bio) {
148 last_page = NULL;
149 for (j = 0; j < bio->bi_vcnt; ++j) {
150 if (bio->bi_io_vec[j].bv_page == last_page)
151 continue;
152 last_page = bio->bi_io_vec[j].bv_page;
153 __free_page(last_page);
154 }
155 bio_put(bio);
156 }
157 kfree(sbio);
158 }
159
160 scrub_free_csums(sdev);
161 kfree(sdev);
162}
163
164static noinline_for_stack
165struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
166{
167 struct scrub_dev *sdev;
168 int i;
169 int j;
170 int ret;
171 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
172
173 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
174 if (!sdev)
175 goto nomem;
176 sdev->dev = dev;
177 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
178 struct bio *bio;
179 struct scrub_bio *sbio;
180
181 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
182 if (!sbio)
183 goto nomem;
184 sdev->bios[i] = sbio;
185
186 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
187 if (!bio)
188 goto nomem;
189
190 sbio->index = i;
191 sbio->sdev = sdev;
192 sbio->bio = bio;
193 sbio->count = 0;
194 sbio->work.func = scrub_checksum;
195 bio->bi_private = sdev->bios[i];
196 bio->bi_end_io = scrub_bio_end_io;
197 bio->bi_sector = 0;
198 bio->bi_bdev = dev->bdev;
199 bio->bi_size = 0;
200
201 for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
202 struct page *page;
203 page = alloc_page(GFP_NOFS);
204 if (!page)
205 goto nomem;
206
207 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
208 if (!ret)
209 goto nomem;
210 }
211 WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
212
213 if (i != SCRUB_BIOS_PER_DEV-1)
214 sdev->bios[i]->next_free = i + 1;
215 else
216 sdev->bios[i]->next_free = -1;
217 }
218 sdev->first_free = 0;
219 sdev->curr = -1;
220 atomic_set(&sdev->in_flight, 0);
221 atomic_set(&sdev->cancel_req, 0);
222 sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
223 INIT_LIST_HEAD(&sdev->csum_list);
224
225 spin_lock_init(&sdev->list_lock);
226 spin_lock_init(&sdev->stat_lock);
227 init_waitqueue_head(&sdev->list_wait);
228 return sdev;
229
230nomem:
231 scrub_free_dev(sdev);
232 return ERR_PTR(-ENOMEM);
233}
234
235/*
236 * scrub_recheck_error gets called when either verification of the page
237 * failed or the bio failed to read, e.g. with EIO. In the latter case,
238 * recheck_error gets called for every page in the bio, even though only
239 * one may be bad
240 */
241static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
242{
243 struct scrub_dev *sdev = sbio->sdev;
244 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
245 struct bio *bio = NULL;
246 struct page *page = NULL;
247 struct scrub_fixup *fixup = NULL;
248 int ret;
249
250 /*
251 * while we're in here we do not want the transaction to commit.
252 * To prevent it, we increment scrubs_running. scrub_pause will
253 * have to wait until we're finished
254 * we can safely increment scrubs_running here, because we're
255 * in the context of the original bio which is still marked in_flight
256 */
257 atomic_inc(&fs_info->scrubs_running);
258
259 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
260 if (!fixup)
261 goto malloc_error;
262
263 fixup->logical = sbio->logical + ix * PAGE_SIZE;
264 fixup->physical = sbio->physical + ix * PAGE_SIZE;
265 fixup->spag = sbio->spag[ix];
266 fixup->sdev = sdev;
267
268 bio = bio_alloc(GFP_NOFS, 1);
269 if (!bio)
270 goto malloc_error;
271 bio->bi_private = fixup;
272 bio->bi_size = 0;
273 bio->bi_bdev = sdev->dev->bdev;
274 fixup->bio = bio;
275 fixup->recheck = 0;
276
277 page = alloc_page(GFP_NOFS);
278 if (!page)
279 goto malloc_error;
280
281 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
282 if (!ret)
283 goto malloc_error;
284
285 if (!sbio->err) {
286 /*
287 * shorter path: just a checksum error, go ahead and correct it
288 */
289 scrub_fixup_worker(&fixup->work);
290 return;
291 }
292
293 /*
294 * an I/O-error occured for one of the blocks in the bio, not
295 * necessarily for this one, so first try to read it separately
296 */
297 fixup->work.func = scrub_fixup_worker;
298 fixup->recheck = 1;
299 bio->bi_end_io = scrub_recheck_end_io;
300 bio->bi_sector = fixup->physical >> 9;
301 bio->bi_bdev = sdev->dev->bdev;
302 submit_bio(0, bio);
303
304 return;
305
306malloc_error:
307 if (bio)
308 bio_put(bio);
309 if (page)
310 __free_page(page);
311 kfree(fixup);
312 spin_lock(&sdev->stat_lock);
313 ++sdev->stat.malloc_errors;
314 spin_unlock(&sdev->stat_lock);
315 atomic_dec(&fs_info->scrubs_running);
316 wake_up(&fs_info->scrub_pause_wait);
317}
318
319static void scrub_recheck_end_io(struct bio *bio, int err)
320{
321 struct scrub_fixup *fixup = bio->bi_private;
322 struct btrfs_fs_info *fs_info = fixup->sdev->dev->dev_root->fs_info;
323
324 fixup->err = err;
325 btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work);
326}
327
328static int scrub_fixup_check(struct scrub_fixup *fixup)
329{
330 int ret = 1;
331 struct page *page;
332 void *buffer;
333 u64 flags = fixup->spag.flags;
334
335 page = fixup->bio->bi_io_vec[0].bv_page;
336 buffer = kmap_atomic(page, KM_USER0);
337 if (flags & BTRFS_EXTENT_FLAG_DATA) {
338 ret = scrub_checksum_data(fixup->sdev,
339 &fixup->spag, buffer);
340 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
341 ret = scrub_checksum_tree_block(fixup->sdev,
342 &fixup->spag,
343 fixup->logical,
344 buffer);
345 } else {
346 WARN_ON(1);
347 }
348 kunmap_atomic(buffer, KM_USER0);
349
350 return ret;
351}
352
353static void scrub_fixup_worker(struct btrfs_work *work)
354{
355 struct scrub_fixup *fixup;
356 struct btrfs_fs_info *fs_info;
357 u64 flags;
358 int ret = 1;
359
360 fixup = container_of(work, struct scrub_fixup, work);
361 fs_info = fixup->sdev->dev->dev_root->fs_info;
362 flags = fixup->spag.flags;
363
364 if (fixup->recheck && fixup->err == 0)
365 ret = scrub_fixup_check(fixup);
366
367 if (ret || fixup->err)
368 scrub_fixup(fixup);
369
370 __free_page(fixup->bio->bi_io_vec[0].bv_page);
371 bio_put(fixup->bio);
372
373 atomic_dec(&fs_info->scrubs_running);
374 wake_up(&fs_info->scrub_pause_wait);
375
376 kfree(fixup);
377}
378
379static void scrub_fixup_end_io(struct bio *bio, int err)
380{
381 complete((struct completion *)bio->bi_private);
382}
383
384static void scrub_fixup(struct scrub_fixup *fixup)
385{
386 struct scrub_dev *sdev = fixup->sdev;
387 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
388 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
389 struct btrfs_multi_bio *multi = NULL;
390 struct bio *bio = fixup->bio;
391 u64 length;
392 int i;
393 int ret;
394 DECLARE_COMPLETION_ONSTACK(complete);
395
396 if ((fixup->spag.flags & BTRFS_EXTENT_FLAG_DATA) &&
397 (fixup->spag.have_csum == 0)) {
398 /*
399 * nodatasum, don't try to fix anything
400 * FIXME: we can do better, open the inode and trigger a
401 * writeback
402 */
403 goto uncorrectable;
404 }
405
406 length = PAGE_SIZE;
407 ret = btrfs_map_block(map_tree, REQ_WRITE, fixup->logical, &length,
408 &multi, 0);
409 if (ret || !multi || length < PAGE_SIZE) {
410 printk(KERN_ERR
411 "scrub_fixup: btrfs_map_block failed us for %llu\n",
412 (unsigned long long)fixup->logical);
413 WARN_ON(1);
414 return;
415 }
416
417 if (multi->num_stripes == 1) {
418 /* there aren't any replicas */
419 goto uncorrectable;
420 }
421
422 /*
423 * first find a good copy
424 */
425 for (i = 0; i < multi->num_stripes; ++i) {
426 if (i == fixup->spag.mirror_num)
427 continue;
428
429 bio->bi_sector = multi->stripes[i].physical >> 9;
430 bio->bi_bdev = multi->stripes[i].dev->bdev;
431 bio->bi_size = PAGE_SIZE;
432 bio->bi_next = NULL;
433 bio->bi_flags |= 1 << BIO_UPTODATE;
434 bio->bi_comp_cpu = -1;
435 bio->bi_end_io = scrub_fixup_end_io;
436 bio->bi_private = &complete;
437
438 submit_bio(0, bio);
439
440 wait_for_completion(&complete);
441
442 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
443 /* I/O-error, this is not a good copy */
444 continue;
445
446 ret = scrub_fixup_check(fixup);
447 if (ret == 0)
448 break;
449 }
450 if (i == multi->num_stripes)
451 goto uncorrectable;
452
453 /*
454 * the bio now contains good data, write it back
455 */
456 bio->bi_sector = fixup->physical >> 9;
457 bio->bi_bdev = sdev->dev->bdev;
458 bio->bi_size = PAGE_SIZE;
459 bio->bi_next = NULL;
460 bio->bi_flags |= 1 << BIO_UPTODATE;
461 bio->bi_comp_cpu = -1;
462 bio->bi_end_io = scrub_fixup_end_io;
463 bio->bi_private = &complete;
464
465 submit_bio(REQ_WRITE, bio);
466
467 wait_for_completion(&complete);
468
469 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
470 /* I/O-error, writeback failed, give up */
471 goto uncorrectable;
472
473 kfree(multi);
474 spin_lock(&sdev->stat_lock);
475 ++sdev->stat.corrected_errors;
476 spin_unlock(&sdev->stat_lock);
477
478 if (printk_ratelimit())
479 printk(KERN_ERR "btrfs: fixed up at %llu\n",
480 (unsigned long long)fixup->logical);
481 return;
482
483uncorrectable:
484 kfree(multi);
485 spin_lock(&sdev->stat_lock);
486 ++sdev->stat.uncorrectable_errors;
487 spin_unlock(&sdev->stat_lock);
488
489 if (printk_ratelimit())
490 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
491 (unsigned long long)fixup->logical);
492}
493
494static void scrub_bio_end_io(struct bio *bio, int err)
495{
496 struct scrub_bio *sbio = bio->bi_private;
497 struct scrub_dev *sdev = sbio->sdev;
498 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
499
500 sbio->err = err;
501
502 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
503}
504
505static void scrub_checksum(struct btrfs_work *work)
506{
507 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
508 struct scrub_dev *sdev = sbio->sdev;
509 struct page *page;
510 void *buffer;
511 int i;
512 u64 flags;
513 u64 logical;
514 int ret;
515
516 if (sbio->err) {
517 struct bio *bio;
518 struct bio *old_bio;
519
520 for (i = 0; i < sbio->count; ++i)
521 scrub_recheck_error(sbio, i);
522 spin_lock(&sdev->stat_lock);
523 ++sdev->stat.read_errors;
524 spin_unlock(&sdev->stat_lock);
525
526 /*
527 * FIXME: allocate a new bio after a media error. I haven't
528 * figured out how to reuse this one
529 */
530 old_bio = sbio->bio;
531 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
532 if (!bio) {
533 /*
534 * alloc failed. cancel the scrub and don't requeue
535 * this sbio
536 */
537 printk(KERN_ERR "btrfs scrub: allocation failure, "
538 "cancelling scrub\n");
539 atomic_inc(&sdev->dev->dev_root->fs_info->
540 scrub_cancel_req);
541 goto out_no_enqueue;
542 }
543 sbio->bio = bio;
544 bio->bi_private = sbio;
545 bio->bi_end_io = scrub_bio_end_io;
546 bio->bi_sector = 0;
547 bio->bi_bdev = sbio->sdev->dev->bdev;
548 bio->bi_size = 0;
549 for (i = 0; i < SCRUB_PAGES_PER_BIO; ++i) {
550 struct page *page;
551 page = old_bio->bi_io_vec[i].bv_page;
552 bio_add_page(bio, page, PAGE_SIZE, 0);
553 }
554 bio_put(old_bio);
555 goto out;
556 }
557 for (i = 0; i < sbio->count; ++i) {
558 page = sbio->bio->bi_io_vec[i].bv_page;
559 buffer = kmap_atomic(page, KM_USER0);
560 flags = sbio->spag[i].flags;
561 logical = sbio->logical + i * PAGE_SIZE;
562 ret = 0;
563 if (flags & BTRFS_EXTENT_FLAG_DATA) {
564 ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
565 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
566 ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
567 logical, buffer);
568 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
569 BUG_ON(i);
570 (void)scrub_checksum_super(sbio, buffer);
571 } else {
572 WARN_ON(1);
573 }
574 kunmap_atomic(buffer, KM_USER0);
575 if (ret)
576 scrub_recheck_error(sbio, i);
577 }
578
579out:
580 spin_lock(&sdev->list_lock);
581 sbio->next_free = sdev->first_free;
582 sdev->first_free = sbio->index;
583 spin_unlock(&sdev->list_lock);
584out_no_enqueue:
585 atomic_dec(&sdev->in_flight);
586 wake_up(&sdev->list_wait);
587}
588
589static int scrub_checksum_data(struct scrub_dev *sdev,
590 struct scrub_page *spag, void *buffer)
591{
592 u8 csum[BTRFS_CSUM_SIZE];
593 u32 crc = ~(u32)0;
594 int fail = 0;
595 struct btrfs_root *root = sdev->dev->dev_root;
596
597 if (!spag->have_csum)
598 return 0;
599
600 crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
601 btrfs_csum_final(crc, csum);
602 if (memcmp(csum, spag->csum, sdev->csum_size))
603 fail = 1;
604
605 spin_lock(&sdev->stat_lock);
606 ++sdev->stat.data_extents_scrubbed;
607 sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
608 if (fail)
609 ++sdev->stat.csum_errors;
610 spin_unlock(&sdev->stat_lock);
611
612 return fail;
613}
614
615static int scrub_checksum_tree_block(struct scrub_dev *sdev,
616 struct scrub_page *spag, u64 logical,
617 void *buffer)
618{
619 struct btrfs_header *h;
620 struct btrfs_root *root = sdev->dev->dev_root;
621 struct btrfs_fs_info *fs_info = root->fs_info;
622 u8 csum[BTRFS_CSUM_SIZE];
623 u32 crc = ~(u32)0;
624 int fail = 0;
625 int crc_fail = 0;
626
627 /*
628 * we don't use the getter functions here, as we
629 * a) don't have an extent buffer and
630 * b) the page is already kmapped
631 */
632 h = (struct btrfs_header *)buffer;
633
634 if (logical != le64_to_cpu(h->bytenr))
635 ++fail;
636
637 if (spag->generation != le64_to_cpu(h->generation))
638 ++fail;
639
640 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
641 ++fail;
642
643 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
644 BTRFS_UUID_SIZE))
645 ++fail;
646
647 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
648 PAGE_SIZE - BTRFS_CSUM_SIZE);
649 btrfs_csum_final(crc, csum);
650 if (memcmp(csum, h->csum, sdev->csum_size))
651 ++crc_fail;
652
653 spin_lock(&sdev->stat_lock);
654 ++sdev->stat.tree_extents_scrubbed;
655 sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
656 if (crc_fail)
657 ++sdev->stat.csum_errors;
658 if (fail)
659 ++sdev->stat.verify_errors;
660 spin_unlock(&sdev->stat_lock);
661
662 return fail || crc_fail;
663}
664
665static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
666{
667 struct btrfs_super_block *s;
668 u64 logical;
669 struct scrub_dev *sdev = sbio->sdev;
670 struct btrfs_root *root = sdev->dev->dev_root;
671 struct btrfs_fs_info *fs_info = root->fs_info;
672 u8 csum[BTRFS_CSUM_SIZE];
673 u32 crc = ~(u32)0;
674 int fail = 0;
675
676 s = (struct btrfs_super_block *)buffer;
677 logical = sbio->logical;
678
679 if (logical != le64_to_cpu(s->bytenr))
680 ++fail;
681
682 if (sbio->spag[0].generation != le64_to_cpu(s->generation))
683 ++fail;
684
685 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
686 ++fail;
687
688 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
689 PAGE_SIZE - BTRFS_CSUM_SIZE);
690 btrfs_csum_final(crc, csum);
691 if (memcmp(csum, s->csum, sbio->sdev->csum_size))
692 ++fail;
693
694 if (fail) {
695 /*
696 * if we find an error in a super block, we just report it.
697 * They will get written with the next transaction commit
698 * anyway
699 */
700 spin_lock(&sdev->stat_lock);
701 ++sdev->stat.super_errors;
702 spin_unlock(&sdev->stat_lock);
703 }
704
705 return fail;
706}
707
708static int scrub_submit(struct scrub_dev *sdev)
709{
710 struct scrub_bio *sbio;
711
712 if (sdev->curr == -1)
713 return 0;
714
715 sbio = sdev->bios[sdev->curr];
716
717 sbio->bio->bi_sector = sbio->physical >> 9;
718 sbio->bio->bi_size = sbio->count * PAGE_SIZE;
719 sbio->bio->bi_next = NULL;
720 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
721 sbio->bio->bi_comp_cpu = -1;
722 sbio->bio->bi_bdev = sdev->dev->bdev;
723 sbio->err = 0;
724 sdev->curr = -1;
725 atomic_inc(&sdev->in_flight);
726
727 submit_bio(0, sbio->bio);
728
729 return 0;
730}
731
732static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
733 u64 physical, u64 flags, u64 gen, u64 mirror_num,
734 u8 *csum, int force)
735{
736 struct scrub_bio *sbio;
737
738again:
739 /*
740 * grab a fresh bio or wait for one to become available
741 */
742 while (sdev->curr == -1) {
743 spin_lock(&sdev->list_lock);
744 sdev->curr = sdev->first_free;
745 if (sdev->curr != -1) {
746 sdev->first_free = sdev->bios[sdev->curr]->next_free;
747 sdev->bios[sdev->curr]->next_free = -1;
748 sdev->bios[sdev->curr]->count = 0;
749 spin_unlock(&sdev->list_lock);
750 } else {
751 spin_unlock(&sdev->list_lock);
752 wait_event(sdev->list_wait, sdev->first_free != -1);
753 }
754 }
755 sbio = sdev->bios[sdev->curr];
756 if (sbio->count == 0) {
757 sbio->physical = physical;
758 sbio->logical = logical;
759 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical) {
760 scrub_submit(sdev);
761 goto again;
762 }
763 sbio->spag[sbio->count].flags = flags;
764 sbio->spag[sbio->count].generation = gen;
765 sbio->spag[sbio->count].have_csum = 0;
766 sbio->spag[sbio->count].mirror_num = mirror_num;
767 if (csum) {
768 sbio->spag[sbio->count].have_csum = 1;
769 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
770 }
771 ++sbio->count;
772 if (sbio->count == SCRUB_PAGES_PER_BIO || force)
773 scrub_submit(sdev);
774
775 return 0;
776}
777
778static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
779 u8 *csum)
780{
781 struct btrfs_ordered_sum *sum = NULL;
782 int ret = 0;
783 unsigned long i;
784 unsigned long num_sectors;
785 u32 sectorsize = sdev->dev->dev_root->sectorsize;
786
787 while (!list_empty(&sdev->csum_list)) {
788 sum = list_first_entry(&sdev->csum_list,
789 struct btrfs_ordered_sum, list);
790 if (sum->bytenr > logical)
791 return 0;
792 if (sum->bytenr + sum->len > logical)
793 break;
794
795 ++sdev->stat.csum_discards;
796 list_del(&sum->list);
797 kfree(sum);
798 sum = NULL;
799 }
800 if (!sum)
801 return 0;
802
803 num_sectors = sum->len / sectorsize;
804 for (i = 0; i < num_sectors; ++i) {
805 if (sum->sums[i].bytenr == logical) {
806 memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
807 ret = 1;
808 break;
809 }
810 }
811 if (ret && i == num_sectors - 1) {
812 list_del(&sum->list);
813 kfree(sum);
814 }
815 return ret;
816}
817
818/* scrub extent tries to collect up to 64 kB for each bio */
819static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
820 u64 physical, u64 flags, u64 gen, u64 mirror_num)
821{
822 int ret;
823 u8 csum[BTRFS_CSUM_SIZE];
824
825 while (len) {
826 u64 l = min_t(u64, len, PAGE_SIZE);
827 int have_csum = 0;
828
829 if (flags & BTRFS_EXTENT_FLAG_DATA) {
830 /* push csums to sbio */
831 have_csum = scrub_find_csum(sdev, logical, l, csum);
832 if (have_csum == 0)
833 ++sdev->stat.no_csum;
834 }
835 ret = scrub_page(sdev, logical, l, physical, flags, gen,
836 mirror_num, have_csum ? csum : NULL, 0);
837 if (ret)
838 return ret;
839 len -= l;
840 logical += l;
841 physical += l;
842 }
843 return 0;
844}
845
846static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
847 struct map_lookup *map, int num, u64 base, u64 length)
848{
849 struct btrfs_path *path;
850 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
851 struct btrfs_root *root = fs_info->extent_root;
852 struct btrfs_root *csum_root = fs_info->csum_root;
853 struct btrfs_extent_item *extent;
854 u64 flags;
855 int ret;
856 int slot;
857 int i;
858 u64 nstripes;
859 int start_stripe;
860 struct extent_buffer *l;
861 struct btrfs_key key;
862 u64 physical;
863 u64 logical;
864 u64 generation;
865 u64 mirror_num;
866
867 u64 increment = map->stripe_len;
868 u64 offset;
869
870 nstripes = length;
871 offset = 0;
872 do_div(nstripes, map->stripe_len);
873 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
874 offset = map->stripe_len * num;
875 increment = map->stripe_len * map->num_stripes;
876 mirror_num = 0;
877 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
878 int factor = map->num_stripes / map->sub_stripes;
879 offset = map->stripe_len * (num / map->sub_stripes);
880 increment = map->stripe_len * factor;
881 mirror_num = num % map->sub_stripes;
882 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
883 increment = map->stripe_len;
884 mirror_num = num % map->num_stripes;
885 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
886 increment = map->stripe_len;
887 mirror_num = num % map->num_stripes;
888 } else {
889 increment = map->stripe_len;
890 mirror_num = 0;
891 }
892
893 path = btrfs_alloc_path();
894 if (!path)
895 return -ENOMEM;
896
897 path->reada = 2;
898 path->search_commit_root = 1;
899 path->skip_locking = 1;
900
901 /*
902 * find all extents for each stripe and just read them to get
903 * them into the page cache
904 * FIXME: we can do better. build a more intelligent prefetching
905 */
906 logical = base + offset;
907 physical = map->stripes[num].physical;
908 ret = 0;
909 for (i = 0; i < nstripes; ++i) {
910 key.objectid = logical;
911 key.type = BTRFS_EXTENT_ITEM_KEY;
912 key.offset = (u64)0;
913
914 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
915 if (ret < 0)
916 goto out;
917
918 l = path->nodes[0];
919 slot = path->slots[0];
920 btrfs_item_key_to_cpu(l, &key, slot);
921 if (key.objectid != logical) {
922 ret = btrfs_previous_item(root, path, 0,
923 BTRFS_EXTENT_ITEM_KEY);
924 if (ret < 0)
925 goto out;
926 }
927
928 while (1) {
929 l = path->nodes[0];
930 slot = path->slots[0];
931 if (slot >= btrfs_header_nritems(l)) {
932 ret = btrfs_next_leaf(root, path);
933 if (ret == 0)
934 continue;
935 if (ret < 0)
936 goto out;
937
938 break;
939 }
940 btrfs_item_key_to_cpu(l, &key, slot);
941
942 if (key.objectid >= logical + map->stripe_len)
943 break;
944
945 path->slots[0]++;
946 }
947 btrfs_release_path(root, path);
948 logical += increment;
949 physical += map->stripe_len;
950 cond_resched();
951 }
952
953 /*
954 * collect all data csums for the stripe to avoid seeking during
955 * the scrub. This might currently (crc32) end up to be about 1MB
956 */
957 start_stripe = 0;
958again:
959 logical = base + offset + start_stripe * increment;
960 for (i = start_stripe; i < nstripes; ++i) {
961 ret = btrfs_lookup_csums_range(csum_root, logical,
962 logical + map->stripe_len - 1,
963 &sdev->csum_list, 1);
964 if (ret)
965 goto out;
966
967 logical += increment;
968 cond_resched();
969 }
970 /*
971 * now find all extents for each stripe and scrub them
972 */
973 logical = base + offset + start_stripe * increment;
974 physical = map->stripes[num].physical + start_stripe * map->stripe_len;
975 ret = 0;
976 for (i = start_stripe; i < nstripes; ++i) {
977 /*
978 * canceled?
979 */
980 if (atomic_read(&fs_info->scrub_cancel_req) ||
981 atomic_read(&sdev->cancel_req)) {
982 ret = -ECANCELED;
983 goto out;
984 }
985 /*
986 * check to see if we have to pause
987 */
988 if (atomic_read(&fs_info->scrub_pause_req)) {
989 /* push queued extents */
990 scrub_submit(sdev);
991 wait_event(sdev->list_wait,
992 atomic_read(&sdev->in_flight) == 0);
993 atomic_inc(&fs_info->scrubs_paused);
994 wake_up(&fs_info->scrub_pause_wait);
995 mutex_lock(&fs_info->scrub_lock);
996 while (atomic_read(&fs_info->scrub_pause_req)) {
997 mutex_unlock(&fs_info->scrub_lock);
998 wait_event(fs_info->scrub_pause_wait,
999 atomic_read(&fs_info->scrub_pause_req) == 0);
1000 mutex_lock(&fs_info->scrub_lock);
1001 }
1002 atomic_dec(&fs_info->scrubs_paused);
1003 mutex_unlock(&fs_info->scrub_lock);
1004 wake_up(&fs_info->scrub_pause_wait);
1005 scrub_free_csums(sdev);
1006 start_stripe = i;
1007 goto again;
1008 }
1009
1010 key.objectid = logical;
1011 key.type = BTRFS_EXTENT_ITEM_KEY;
1012 key.offset = (u64)0;
1013
1014 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1015 if (ret < 0)
1016 goto out;
1017
1018 l = path->nodes[0];
1019 slot = path->slots[0];
1020 btrfs_item_key_to_cpu(l, &key, slot);
1021 if (key.objectid != logical) {
1022 ret = btrfs_previous_item(root, path, 0,
1023 BTRFS_EXTENT_ITEM_KEY);
1024 if (ret < 0)
1025 goto out;
1026 }
1027
1028 while (1) {
1029 l = path->nodes[0];
1030 slot = path->slots[0];
1031 if (slot >= btrfs_header_nritems(l)) {
1032 ret = btrfs_next_leaf(root, path);
1033 if (ret == 0)
1034 continue;
1035 if (ret < 0)
1036 goto out;
1037
1038 break;
1039 }
1040 btrfs_item_key_to_cpu(l, &key, slot);
1041
1042 if (key.objectid + key.offset <= logical)
1043 goto next;
1044
1045 if (key.objectid >= logical + map->stripe_len)
1046 break;
1047
1048 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1049 goto next;
1050
1051 extent = btrfs_item_ptr(l, slot,
1052 struct btrfs_extent_item);
1053 flags = btrfs_extent_flags(l, extent);
1054 generation = btrfs_extent_generation(l, extent);
1055
1056 if (key.objectid < logical &&
1057 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
1058 printk(KERN_ERR
1059 "btrfs scrub: tree block %llu spanning "
1060 "stripes, ignored. logical=%llu\n",
1061 (unsigned long long)key.objectid,
1062 (unsigned long long)logical);
1063 goto next;
1064 }
1065
1066 /*
1067 * trim extent to this stripe
1068 */
1069 if (key.objectid < logical) {
1070 key.offset -= logical - key.objectid;
1071 key.objectid = logical;
1072 }
1073 if (key.objectid + key.offset >
1074 logical + map->stripe_len) {
1075 key.offset = logical + map->stripe_len -
1076 key.objectid;
1077 }
1078
1079 ret = scrub_extent(sdev, key.objectid, key.offset,
1080 key.objectid - logical + physical,
1081 flags, generation, mirror_num);
1082 if (ret)
1083 goto out;
1084
1085next:
1086 path->slots[0]++;
1087 }
1088 btrfs_release_path(root, path);
1089 logical += increment;
1090 physical += map->stripe_len;
1091 spin_lock(&sdev->stat_lock);
1092 sdev->stat.last_physical = physical;
1093 spin_unlock(&sdev->stat_lock);
1094 }
1095 /* push queued extents */
1096 scrub_submit(sdev);
1097
1098out:
1099 btrfs_free_path(path);
1100 return ret < 0 ? ret : 0;
1101}
1102
1103static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
1104 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
1105{
1106 struct btrfs_mapping_tree *map_tree =
1107 &sdev->dev->dev_root->fs_info->mapping_tree;
1108 struct map_lookup *map;
1109 struct extent_map *em;
1110 int i;
1111 int ret = -EINVAL;
1112
1113 read_lock(&map_tree->map_tree.lock);
1114 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
1115 read_unlock(&map_tree->map_tree.lock);
1116
1117 if (!em)
1118 return -EINVAL;
1119
1120 map = (struct map_lookup *)em->bdev;
1121 if (em->start != chunk_offset)
1122 goto out;
1123
1124 if (em->len < length)
1125 goto out;
1126
1127 for (i = 0; i < map->num_stripes; ++i) {
1128 if (map->stripes[i].dev == sdev->dev) {
1129 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1130 if (ret)
1131 goto out;
1132 }
1133 }
1134out:
1135 free_extent_map(em);
1136
1137 return ret;
1138}
1139
1140static noinline_for_stack
1141int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1142{
1143 struct btrfs_dev_extent *dev_extent = NULL;
1144 struct btrfs_path *path;
1145 struct btrfs_root *root = sdev->dev->dev_root;
1146 struct btrfs_fs_info *fs_info = root->fs_info;
1147 u64 length;
1148 u64 chunk_tree;
1149 u64 chunk_objectid;
1150 u64 chunk_offset;
1151 int ret;
1152 int slot;
1153 struct extent_buffer *l;
1154 struct btrfs_key key;
1155 struct btrfs_key found_key;
1156 struct btrfs_block_group_cache *cache;
1157
1158 path = btrfs_alloc_path();
1159 if (!path)
1160 return -ENOMEM;
1161
1162 path->reada = 2;
1163 path->search_commit_root = 1;
1164 path->skip_locking = 1;
1165
1166 key.objectid = sdev->dev->devid;
1167 key.offset = 0ull;
1168 key.type = BTRFS_DEV_EXTENT_KEY;
1169
1170
1171 while (1) {
1172 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1173 if (ret < 0)
1174 goto out;
1175 ret = 0;
1176
1177 l = path->nodes[0];
1178 slot = path->slots[0];
1179
1180 btrfs_item_key_to_cpu(l, &found_key, slot);
1181
1182 if (found_key.objectid != sdev->dev->devid)
1183 break;
1184
1185 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
1186 break;
1187
1188 if (found_key.offset >= end)
1189 break;
1190
1191 if (found_key.offset < key.offset)
1192 break;
1193
1194 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1195 length = btrfs_dev_extent_length(l, dev_extent);
1196
1197 if (found_key.offset + length <= start) {
1198 key.offset = found_key.offset + length;
1199 btrfs_release_path(root, path);
1200 continue;
1201 }
1202
1203 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1204 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1205 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1206
1207 /*
1208 * get a reference on the corresponding block group to prevent
1209 * the chunk from going away while we scrub it
1210 */
1211 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1212 if (!cache) {
1213 ret = -ENOENT;
1214 goto out;
1215 }
1216 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1217 chunk_offset, length);
1218 btrfs_put_block_group(cache);
1219 if (ret)
1220 break;
1221
1222 key.offset = found_key.offset + length;
1223 btrfs_release_path(root, path);
1224 }
1225
1226out:
1227 btrfs_free_path(path);
1228 return ret;
1229}
1230
1231static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1232{
1233 int i;
1234 u64 bytenr;
1235 u64 gen;
1236 int ret;
1237 struct btrfs_device *device = sdev->dev;
1238 struct btrfs_root *root = device->dev_root;
1239
1240 gen = root->fs_info->last_trans_committed;
1241
1242 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1243 bytenr = btrfs_sb_offset(i);
1244 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1245 break;
1246
1247 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1248 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1249 if (ret)
1250 return ret;
1251 }
1252 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1253
1254 return 0;
1255}
1256
1257/*
1258 * get a reference count on fs_info->scrub_workers. start worker if necessary
1259 */
1260static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1261{
1262 struct btrfs_fs_info *fs_info = root->fs_info;
1263
1264 mutex_lock(&fs_info->scrub_lock);
1265 if (fs_info->scrub_workers_refcnt == 0)
1266 btrfs_start_workers(&fs_info->scrub_workers, 1);
1267 ++fs_info->scrub_workers_refcnt;
1268 mutex_unlock(&fs_info->scrub_lock);
1269
1270 return 0;
1271}
1272
1273static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1274{
1275 struct btrfs_fs_info *fs_info = root->fs_info;
1276
1277 mutex_lock(&fs_info->scrub_lock);
1278 if (--fs_info->scrub_workers_refcnt == 0)
1279 btrfs_stop_workers(&fs_info->scrub_workers);
1280 WARN_ON(fs_info->scrub_workers_refcnt < 0);
1281 mutex_unlock(&fs_info->scrub_lock);
1282}
1283
1284
1285int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1286 struct btrfs_scrub_progress *progress)
1287{
1288 struct scrub_dev *sdev;
1289 struct btrfs_fs_info *fs_info = root->fs_info;
1290 int ret;
1291 struct btrfs_device *dev;
1292
1293 if (root->fs_info->closing)
1294 return -EINVAL;
1295
1296 /*
1297 * check some assumptions
1298 */
1299 if (root->sectorsize != PAGE_SIZE ||
1300 root->sectorsize != root->leafsize ||
1301 root->sectorsize != root->nodesize) {
1302 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1303 return -EINVAL;
1304 }
1305
1306 ret = scrub_workers_get(root);
1307 if (ret)
1308 return ret;
1309
1310 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1311 dev = btrfs_find_device(root, devid, NULL, NULL);
1312 if (!dev || dev->missing) {
1313 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1314 scrub_workers_put(root);
1315 return -ENODEV;
1316 }
1317 mutex_lock(&fs_info->scrub_lock);
1318
1319 if (!dev->in_fs_metadata) {
1320 mutex_unlock(&fs_info->scrub_lock);
1321 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1322 scrub_workers_put(root);
1323 return -ENODEV;
1324 }
1325
1326 if (dev->scrub_device) {
1327 mutex_unlock(&fs_info->scrub_lock);
1328 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1329 scrub_workers_put(root);
1330 return -EINPROGRESS;
1331 }
1332 sdev = scrub_setup_dev(dev);
1333 if (IS_ERR(sdev)) {
1334 mutex_unlock(&fs_info->scrub_lock);
1335 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1336 scrub_workers_put(root);
1337 return PTR_ERR(sdev);
1338 }
1339 dev->scrub_device = sdev;
1340
1341 atomic_inc(&fs_info->scrubs_running);
1342 mutex_unlock(&fs_info->scrub_lock);
1343 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1344
1345 down_read(&fs_info->scrub_super_lock);
1346 ret = scrub_supers(sdev);
1347 up_read(&fs_info->scrub_super_lock);
1348
1349 if (!ret)
1350 ret = scrub_enumerate_chunks(sdev, start, end);
1351
1352 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1353
1354 atomic_dec(&fs_info->scrubs_running);
1355 wake_up(&fs_info->scrub_pause_wait);
1356
1357 if (progress)
1358 memcpy(progress, &sdev->stat, sizeof(*progress));
1359
1360 mutex_lock(&fs_info->scrub_lock);
1361 dev->scrub_device = NULL;
1362 mutex_unlock(&fs_info->scrub_lock);
1363
1364 scrub_free_dev(sdev);
1365 scrub_workers_put(root);
1366
1367 return ret;
1368}
1369
1370int btrfs_scrub_pause(struct btrfs_root *root)
1371{
1372 struct btrfs_fs_info *fs_info = root->fs_info;
1373
1374 mutex_lock(&fs_info->scrub_lock);
1375 atomic_inc(&fs_info->scrub_pause_req);
1376 while (atomic_read(&fs_info->scrubs_paused) !=
1377 atomic_read(&fs_info->scrubs_running)) {
1378 mutex_unlock(&fs_info->scrub_lock);
1379 wait_event(fs_info->scrub_pause_wait,
1380 atomic_read(&fs_info->scrubs_paused) ==
1381 atomic_read(&fs_info->scrubs_running));
1382 mutex_lock(&fs_info->scrub_lock);
1383 }
1384 mutex_unlock(&fs_info->scrub_lock);
1385
1386 return 0;
1387}
1388
1389int btrfs_scrub_continue(struct btrfs_root *root)
1390{
1391 struct btrfs_fs_info *fs_info = root->fs_info;
1392
1393 atomic_dec(&fs_info->scrub_pause_req);
1394 wake_up(&fs_info->scrub_pause_wait);
1395 return 0;
1396}
1397
1398int btrfs_scrub_pause_super(struct btrfs_root *root)
1399{
1400 down_write(&root->fs_info->scrub_super_lock);
1401 return 0;
1402}
1403
1404int btrfs_scrub_continue_super(struct btrfs_root *root)
1405{
1406 up_write(&root->fs_info->scrub_super_lock);
1407 return 0;
1408}
1409
1410int btrfs_scrub_cancel(struct btrfs_root *root)
1411{
1412 struct btrfs_fs_info *fs_info = root->fs_info;
1413
1414 mutex_lock(&fs_info->scrub_lock);
1415 if (!atomic_read(&fs_info->scrubs_running)) {
1416 mutex_unlock(&fs_info->scrub_lock);
1417 return -ENOTCONN;
1418 }
1419
1420 atomic_inc(&fs_info->scrub_cancel_req);
1421 while (atomic_read(&fs_info->scrubs_running)) {
1422 mutex_unlock(&fs_info->scrub_lock);
1423 wait_event(fs_info->scrub_pause_wait,
1424 atomic_read(&fs_info->scrubs_running) == 0);
1425 mutex_lock(&fs_info->scrub_lock);
1426 }
1427 atomic_dec(&fs_info->scrub_cancel_req);
1428 mutex_unlock(&fs_info->scrub_lock);
1429
1430 return 0;
1431}
1432
1433int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1434{
1435 struct btrfs_fs_info *fs_info = root->fs_info;
1436 struct scrub_dev *sdev;
1437
1438 mutex_lock(&fs_info->scrub_lock);
1439 sdev = dev->scrub_device;
1440 if (!sdev) {
1441 mutex_unlock(&fs_info->scrub_lock);
1442 return -ENOTCONN;
1443 }
1444 atomic_inc(&sdev->cancel_req);
1445 while (dev->scrub_device) {
1446 mutex_unlock(&fs_info->scrub_lock);
1447 wait_event(fs_info->scrub_pause_wait,
1448 dev->scrub_device == NULL);
1449 mutex_lock(&fs_info->scrub_lock);
1450 }
1451 mutex_unlock(&fs_info->scrub_lock);
1452
1453 return 0;
1454}
1455int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1456{
1457 struct btrfs_fs_info *fs_info = root->fs_info;
1458 struct btrfs_device *dev;
1459 int ret;
1460
1461 /*
1462 * we have to hold the device_list_mutex here so the device
1463 * does not go away in cancel_dev. FIXME: find a better solution
1464 */
1465 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1466 dev = btrfs_find_device(root, devid, NULL, NULL);
1467 if (!dev) {
1468 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1469 return -ENODEV;
1470 }
1471 ret = btrfs_scrub_cancel_dev(root, dev);
1472 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1473
1474 return ret;
1475}
1476
1477int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1478 struct btrfs_scrub_progress *progress)
1479{
1480 struct btrfs_device *dev;
1481 struct scrub_dev *sdev = NULL;
1482
1483 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1484 dev = btrfs_find_device(root, devid, NULL, NULL);
1485 if (dev)
1486 sdev = dev->scrub_device;
1487 if (sdev)
1488 memcpy(progress, &sdev->stat, sizeof(*progress));
1489 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1490
1491 return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1492}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c571734d5e5a..37c2302a08d4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1321,6 +1321,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1321 1321
1322 WARN_ON(cur_trans != trans->transaction); 1322 WARN_ON(cur_trans != trans->transaction);
1323 1323
1324 btrfs_scrub_pause(root);
1324 /* btrfs_commit_tree_roots is responsible for getting the 1325 /* btrfs_commit_tree_roots is responsible for getting the
1325 * various roots consistent with each other. Every pointer 1326 * various roots consistent with each other. Every pointer
1326 * in the tree of tree roots has to point to the most up to date 1327 * in the tree of tree roots has to point to the most up to date
@@ -1405,6 +1406,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1405 1406
1406 mutex_unlock(&root->fs_info->trans_mutex); 1407 mutex_unlock(&root->fs_info->trans_mutex);
1407 1408
1409 btrfs_scrub_continue(root);
1410
1408 if (current->journal_info == trans) 1411 if (current->journal_info == trans)
1409 current->journal_info = NULL; 1412 current->journal_info = NULL;
1410 1413
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f997ec0c1ba4..f1a0726da5f5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -614,7 +614,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
614 614
615 ret = btrfs_lookup_csums_range(root->log_root, 615 ret = btrfs_lookup_csums_range(root->log_root,
616 csum_start, csum_end - 1, 616 csum_start, csum_end - 1,
617 &ordered_sums); 617 &ordered_sums, 0);
618 BUG_ON(ret); 618 BUG_ON(ret);
619 while (!list_empty(&ordered_sums)) { 619 while (!list_empty(&ordered_sums)) {
620 struct btrfs_ordered_sum *sums; 620 struct btrfs_ordered_sum *sums;
@@ -2093,7 +2093,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2093 * the running transaction open, so a full commit can't hop 2093 * the running transaction open, so a full commit can't hop
2094 * in and cause problems either. 2094 * in and cause problems either.
2095 */ 2095 */
2096 btrfs_scrub_pause_super(root);
2096 write_ctree_super(trans, root->fs_info->tree_root, 1); 2097 write_ctree_super(trans, root->fs_info->tree_root, 1);
2098 btrfs_scrub_continue_super(root);
2097 ret = 0; 2099 ret = 0;
2098 2100
2099 mutex_lock(&root->log_mutex); 2101 mutex_lock(&root->log_mutex);
@@ -2689,7 +2691,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2689 ret = btrfs_lookup_csums_range( 2691 ret = btrfs_lookup_csums_range(
2690 log->fs_info->csum_root, 2692 log->fs_info->csum_root,
2691 ds + cs, ds + cs + cl - 1, 2693 ds + cs, ds + cs + cl - 1,
2692 &ordered_sums); 2694 &ordered_sums, 0);
2693 BUG_ON(ret); 2695 BUG_ON(ret);
2694 } 2696 }
2695 } 2697 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8b9fb8c7683d..89ca8f110b6e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -38,9 +38,6 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
38 struct btrfs_device *device); 38 struct btrfs_device *device);
39static int btrfs_relocate_sys_chunks(struct btrfs_root *root); 39static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
40 40
41#define map_lookup_size(n) (sizeof(struct map_lookup) + \
42 (sizeof(struct btrfs_bio_stripe) * (n)))
43
44static DEFINE_MUTEX(uuid_mutex); 41static DEFINE_MUTEX(uuid_mutex);
45static LIST_HEAD(fs_uuids); 42static LIST_HEAD(fs_uuids);
46 43
@@ -1334,6 +1331,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1334 goto error_undo; 1331 goto error_undo;
1335 1332
1336 device->in_fs_metadata = 0; 1333 device->in_fs_metadata = 0;
1334 btrfs_scrub_cancel_dev(root, device);
1337 1335
1338 /* 1336 /*
1339 * the device list mutex makes sure that we don't change 1337 * the device list mutex makes sure that we don't change
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index cc2eadaf7a27..f7c20123a1fe 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -85,6 +85,9 @@ struct btrfs_device {
85 /* physical drive uuid (or lvm uuid) */ 85 /* physical drive uuid (or lvm uuid) */
86 u8 uuid[BTRFS_UUID_SIZE]; 86 u8 uuid[BTRFS_UUID_SIZE];
87 87
88 /* per-device scrub information */
89 struct scrub_dev *scrub_device;
90
88 struct btrfs_work work; 91 struct btrfs_work work;
89}; 92};
90 93
@@ -157,6 +160,9 @@ struct map_lookup {
157 struct btrfs_bio_stripe stripes[]; 160 struct btrfs_bio_stripe stripes[];
158}; 161};
159 162
163#define map_lookup_size(n) (sizeof(struct map_lookup) + \
164 (sizeof(struct btrfs_bio_stripe) * (n)))
165
160/* Used to sort the devices by max_avail(descending sort) */ 166/* Used to sort the devices by max_avail(descending sort) */
161int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); 167int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
162 168