diff options
Diffstat (limited to 'drivers/md/md.h')
-rw-r--r-- | drivers/md/md.h | 110 |
1 files changed, 98 insertions, 12 deletions
diff --git a/drivers/md/md.h b/drivers/md/md.h index 1c26c7a08ae6..1e586bb4452e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -29,6 +29,13 @@ | |||
29 | typedef struct mddev_s mddev_t; | 29 | typedef struct mddev_s mddev_t; |
30 | typedef struct mdk_rdev_s mdk_rdev_t; | 30 | typedef struct mdk_rdev_s mdk_rdev_t; |
31 | 31 | ||
32 | /* Bad block numbers are stored sorted in a single page. | ||
33 | * 64bits is used for each block or extent. | ||
34 | * 54 bits are sector number, 9 bits are extent size, | ||
35 | * 1 bit is an 'acknowledged' flag. | ||
36 | */ | ||
37 | #define MD_MAX_BADBLOCKS (PAGE_SIZE/8) | ||
38 | |||
32 | /* | 39 | /* |
33 | * MD's 'extended' device | 40 | * MD's 'extended' device |
34 | */ | 41 | */ |
@@ -48,7 +55,7 @@ struct mdk_rdev_s | |||
48 | struct block_device *meta_bdev; | 55 | struct block_device *meta_bdev; |
49 | struct block_device *bdev; /* block device handle */ | 56 | struct block_device *bdev; /* block device handle */ |
50 | 57 | ||
51 | struct page *sb_page; | 58 | struct page *sb_page, *bb_page; |
52 | int sb_loaded; | 59 | int sb_loaded; |
53 | __u64 sb_events; | 60 | __u64 sb_events; |
54 | sector_t data_offset; /* start of data in array */ | 61 | sector_t data_offset; /* start of data in array */ |
@@ -74,9 +81,29 @@ struct mdk_rdev_s | |||
74 | #define In_sync 2 /* device is in_sync with rest of array */ | 81 | #define In_sync 2 /* device is in_sync with rest of array */ |
75 | #define WriteMostly 4 /* Avoid reading if at all possible */ | 82 | #define WriteMostly 4 /* Avoid reading if at all possible */ |
76 | #define AutoDetected 7 /* added by auto-detect */ | 83 | #define AutoDetected 7 /* added by auto-detect */ |
77 | #define Blocked 8 /* An error occurred on an externally | 84 | #define Blocked 8 /* An error occurred but has not yet |
78 | * managed array, don't allow writes | 85 | * been acknowledged by the metadata |
86 | * handler, so don't allow writes | ||
79 | * until it is cleared */ | 87 | * until it is cleared */ |
88 | #define WriteErrorSeen 9 /* A write error has been seen on this | ||
89 | * device | ||
90 | */ | ||
91 | #define FaultRecorded 10 /* Intermediate state for clearing | ||
92 | * Blocked. The Fault is/will-be | ||
93 | * recorded in the metadata, but that | ||
94 | * metadata hasn't been stored safely | ||
95 | * on disk yet. | ||
96 | */ | ||
97 | #define BlockedBadBlocks 11 /* A writer is blocked because they | ||
98 | * found an unacknowledged bad-block. | ||
99 | * This can safely be cleared at any | ||
100 | * time, and the writer will re-check. | ||
101 | * It may be set at any time, and at | ||
102 | * worst the writer will timeout and | ||
103 | * re-check. So setting it as | ||
104 | * accurately as possible is good, but | ||
105 | * not absolutely critical. | ||
106 | */ | ||
80 | wait_queue_head_t blocked_wait; | 107 | wait_queue_head_t blocked_wait; |
81 | 108 | ||
82 | int desc_nr; /* descriptor index in the superblock */ | 109 | int desc_nr; /* descriptor index in the superblock */ |
@@ -111,8 +138,54 @@ struct mdk_rdev_s | |||
111 | 138 | ||
112 | struct sysfs_dirent *sysfs_state; /* handle for 'state' | 139 | struct sysfs_dirent *sysfs_state; /* handle for 'state' |
113 | * sysfs entry */ | 140 | * sysfs entry */ |
141 | |||
142 | struct badblocks { | ||
143 | int count; /* count of bad blocks */ | ||
144 | int unacked_exist; /* there probably are unacknowledged | ||
145 | * bad blocks. This is only cleared | ||
146 | * when a read discovers none | ||
147 | */ | ||
148 | int shift; /* shift from sectors to block size | ||
149 | * a -ve shift means badblocks are | ||
150 | * disabled.*/ | ||
151 | u64 *page; /* badblock list */ | ||
152 | int changed; | ||
153 | seqlock_t lock; | ||
154 | |||
155 | sector_t sector; | ||
156 | sector_t size; /* in sectors */ | ||
157 | } badblocks; | ||
114 | }; | 158 | }; |
115 | 159 | ||
160 | #define BB_LEN_MASK (0x00000000000001FFULL) | ||
161 | #define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL) | ||
162 | #define BB_ACK_MASK (0x8000000000000000ULL) | ||
163 | #define BB_MAX_LEN 512 | ||
164 | #define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) | ||
165 | #define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) | ||
166 | #define BB_ACK(x) (!!((x) & BB_ACK_MASK)) | ||
167 | #define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) | ||
168 | |||
169 | extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors, | ||
170 | sector_t *first_bad, int *bad_sectors); | ||
171 | static inline int is_badblock(mdk_rdev_t *rdev, sector_t s, int sectors, | ||
172 | sector_t *first_bad, int *bad_sectors) | ||
173 | { | ||
174 | if (unlikely(rdev->badblocks.count)) { | ||
175 | int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s, | ||
176 | sectors, | ||
177 | first_bad, bad_sectors); | ||
178 | if (rv) | ||
179 | *first_bad -= rdev->data_offset; | ||
180 | return rv; | ||
181 | } | ||
182 | return 0; | ||
183 | } | ||
184 | extern int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors, | ||
185 | int acknowledged); | ||
186 | extern int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors); | ||
187 | extern void md_ack_all_badblocks(struct badblocks *bb); | ||
188 | |||
116 | struct mddev_s | 189 | struct mddev_s |
117 | { | 190 | { |
118 | void *private; | 191 | void *private; |
@@ -239,9 +312,12 @@ struct mddev_s | |||
239 | #define MD_RECOVERY_FROZEN 9 | 312 | #define MD_RECOVERY_FROZEN 9 |
240 | 313 | ||
241 | unsigned long recovery; | 314 | unsigned long recovery; |
242 | int recovery_disabled; /* if we detect that recovery | 315 | /* If a RAID personality determines that recovery (of a particular |
243 | * will always fail, set this | 316 | * device) will fail due to a read error on the source device, it |
244 | * so we don't loop trying */ | 317 | * takes a copy of this number and does not attempt recovery again |
318 | * until this number changes. | ||
319 | */ | ||
320 | int recovery_disabled; | ||
245 | 321 | ||
246 | int in_sync; /* know to not need resync */ | 322 | int in_sync; /* know to not need resync */ |
247 | /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so | 323 | /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so |
@@ -304,11 +380,6 @@ struct mddev_s | |||
304 | * hot-adding a bitmap. It should | 380 | * hot-adding a bitmap. It should |
305 | * eventually be settable by sysfs. | 381 | * eventually be settable by sysfs. |
306 | */ | 382 | */ |
307 | /* When md is serving under dm, it might use a | ||
308 | * dirty_log to store the bits. | ||
309 | */ | ||
310 | struct dm_dirty_log *log; | ||
311 | |||
312 | struct mutex mutex; | 383 | struct mutex mutex; |
313 | unsigned long chunksize; | 384 | unsigned long chunksize; |
314 | unsigned long daemon_sleep; /* how many jiffies between updates? */ | 385 | unsigned long daemon_sleep; /* how many jiffies between updates? */ |
@@ -413,6 +484,20 @@ static inline char * mdname (mddev_t * mddev) | |||
413 | return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; | 484 | return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; |
414 | } | 485 | } |
415 | 486 | ||
487 | static inline int sysfs_link_rdev(mddev_t *mddev, mdk_rdev_t *rdev) | ||
488 | { | ||
489 | char nm[20]; | ||
490 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
491 | return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | ||
492 | } | ||
493 | |||
494 | static inline void sysfs_unlink_rdev(mddev_t *mddev, mdk_rdev_t *rdev) | ||
495 | { | ||
496 | char nm[20]; | ||
497 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
498 | sysfs_remove_link(&mddev->kobj, nm); | ||
499 | } | ||
500 | |||
416 | /* | 501 | /* |
417 | * iterates through some rdev ringlist. It's safe to remove the | 502 | * iterates through some rdev ringlist. It's safe to remove the |
418 | * current 'rdev'. Dont touch 'tmp' though. | 503 | * current 'rdev'. Dont touch 'tmp' though. |
@@ -505,7 +590,7 @@ extern void mddev_init(mddev_t *mddev); | |||
505 | extern int md_run(mddev_t *mddev); | 590 | extern int md_run(mddev_t *mddev); |
506 | extern void md_stop(mddev_t *mddev); | 591 | extern void md_stop(mddev_t *mddev); |
507 | extern void md_stop_writes(mddev_t *mddev); | 592 | extern void md_stop_writes(mddev_t *mddev); |
508 | extern void md_rdev_init(mdk_rdev_t *rdev); | 593 | extern int md_rdev_init(mdk_rdev_t *rdev); |
509 | 594 | ||
510 | extern void mddev_suspend(mddev_t *mddev); | 595 | extern void mddev_suspend(mddev_t *mddev); |
511 | extern void mddev_resume(mddev_t *mddev); | 596 | extern void mddev_resume(mddev_t *mddev); |
@@ -514,4 +599,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | |||
514 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | 599 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, |
515 | mddev_t *mddev); | 600 | mddev_t *mddev); |
516 | extern int mddev_check_plugged(mddev_t *mddev); | 601 | extern int mddev_check_plugged(mddev_t *mddev); |
602 | extern void md_trim_bio(struct bio *bio, int offset, int size); | ||
517 | #endif /* _MD_MD_H */ | 603 | #endif /* _MD_MD_H */ |