aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
commitada47b5fe13d89735805b566185f4885f5a3f750 (patch)
tree644b88f8a71896307d71438e9b3af49126ffb22b /drivers/md/md.c
parent43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff)
parent3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff)
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c498
1 files changed, 354 insertions, 144 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b182f86a19dd..cefd63daff31 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -39,14 +39,17 @@
39#include <linux/buffer_head.h> /* for invalidate_bdev */ 39#include <linux/buffer_head.h> /* for invalidate_bdev */
40#include <linux/poll.h> 40#include <linux/poll.h>
41#include <linux/ctype.h> 41#include <linux/ctype.h>
42#include <linux/string.h>
42#include <linux/hdreg.h> 43#include <linux/hdreg.h>
43#include <linux/proc_fs.h> 44#include <linux/proc_fs.h>
44#include <linux/random.h> 45#include <linux/random.h>
45#include <linux/reboot.h> 46#include <linux/reboot.h>
46#include <linux/file.h> 47#include <linux/file.h>
48#include <linux/compat.h>
47#include <linux/delay.h> 49#include <linux/delay.h>
48#include <linux/raid/md_p.h> 50#include <linux/raid/md_p.h>
49#include <linux/raid/md_u.h> 51#include <linux/raid/md_u.h>
52#include <linux/slab.h>
50#include "md.h" 53#include "md.h"
51#include "bitmap.h" 54#include "bitmap.h"
52 55
@@ -68,6 +71,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
68#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } 71#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
69 72
70/* 73/*
74 * Default number of read corrections we'll attempt on an rdev
75 * before ejecting it from the array. We divide the read error
76 * count by 2 for every hour elapsed between read errors.
77 */
78#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
79/*
71 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' 80 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
72 * is 1000 KB/sec, so the extra system load does not show up that much. 81 * is 1000 KB/sec, so the extra system load does not show up that much.
73 * Increase it if you want to have more _guaranteed_ speed. Note that 82 * Increase it if you want to have more _guaranteed_ speed. Note that
@@ -98,44 +107,40 @@ static struct ctl_table_header *raid_table_header;
98 107
99static ctl_table raid_table[] = { 108static ctl_table raid_table[] = {
100 { 109 {
101 .ctl_name = DEV_RAID_SPEED_LIMIT_MIN,
102 .procname = "speed_limit_min", 110 .procname = "speed_limit_min",
103 .data = &sysctl_speed_limit_min, 111 .data = &sysctl_speed_limit_min,
104 .maxlen = sizeof(int), 112 .maxlen = sizeof(int),
105 .mode = S_IRUGO|S_IWUSR, 113 .mode = S_IRUGO|S_IWUSR,
106 .proc_handler = &proc_dointvec, 114 .proc_handler = proc_dointvec,
107 }, 115 },
108 { 116 {
109 .ctl_name = DEV_RAID_SPEED_LIMIT_MAX,
110 .procname = "speed_limit_max", 117 .procname = "speed_limit_max",
111 .data = &sysctl_speed_limit_max, 118 .data = &sysctl_speed_limit_max,
112 .maxlen = sizeof(int), 119 .maxlen = sizeof(int),
113 .mode = S_IRUGO|S_IWUSR, 120 .mode = S_IRUGO|S_IWUSR,
114 .proc_handler = &proc_dointvec, 121 .proc_handler = proc_dointvec,
115 }, 122 },
116 { .ctl_name = 0 } 123 { }
117}; 124};
118 125
119static ctl_table raid_dir_table[] = { 126static ctl_table raid_dir_table[] = {
120 { 127 {
121 .ctl_name = DEV_RAID,
122 .procname = "raid", 128 .procname = "raid",
123 .maxlen = 0, 129 .maxlen = 0,
124 .mode = S_IRUGO|S_IXUGO, 130 .mode = S_IRUGO|S_IXUGO,
125 .child = raid_table, 131 .child = raid_table,
126 }, 132 },
127 { .ctl_name = 0 } 133 { }
128}; 134};
129 135
130static ctl_table raid_root_table[] = { 136static ctl_table raid_root_table[] = {
131 { 137 {
132 .ctl_name = CTL_DEV,
133 .procname = "dev", 138 .procname = "dev",
134 .maxlen = 0, 139 .maxlen = 0,
135 .mode = 0555, 140 .mode = 0555,
136 .child = raid_dir_table, 141 .child = raid_dir_table,
137 }, 142 },
138 { .ctl_name = 0 } 143 { }
139}; 144};
140 145
141static const struct block_device_operations md_fops; 146static const struct block_device_operations md_fops;
@@ -217,12 +222,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
217 return 0; 222 return 0;
218 } 223 }
219 rcu_read_lock(); 224 rcu_read_lock();
220 if (mddev->suspended) { 225 if (mddev->suspended || mddev->barrier) {
221 DEFINE_WAIT(__wait); 226 DEFINE_WAIT(__wait);
222 for (;;) { 227 for (;;) {
223 prepare_to_wait(&mddev->sb_wait, &__wait, 228 prepare_to_wait(&mddev->sb_wait, &__wait,
224 TASK_UNINTERRUPTIBLE); 229 TASK_UNINTERRUPTIBLE);
225 if (!mddev->suspended) 230 if (!mddev->suspended && !mddev->barrier)
226 break; 231 break;
227 rcu_read_unlock(); 232 rcu_read_unlock();
228 schedule(); 233 schedule();
@@ -264,10 +269,110 @@ static void mddev_resume(mddev_t *mddev)
264 269
265int mddev_congested(mddev_t *mddev, int bits) 270int mddev_congested(mddev_t *mddev, int bits)
266{ 271{
272 if (mddev->barrier)
273 return 1;
267 return mddev->suspended; 274 return mddev->suspended;
268} 275}
269EXPORT_SYMBOL(mddev_congested); 276EXPORT_SYMBOL(mddev_congested);
270 277
278/*
279 * Generic barrier handling for md
280 */
281
282#define POST_REQUEST_BARRIER ((void*)1)
283
284static void md_end_barrier(struct bio *bio, int err)
285{
286 mdk_rdev_t *rdev = bio->bi_private;
287 mddev_t *mddev = rdev->mddev;
288 if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER)
289 set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
290
291 rdev_dec_pending(rdev, mddev);
292
293 if (atomic_dec_and_test(&mddev->flush_pending)) {
294 if (mddev->barrier == POST_REQUEST_BARRIER) {
295 /* This was a post-request barrier */
296 mddev->barrier = NULL;
297 wake_up(&mddev->sb_wait);
298 } else
299 /* The pre-request barrier has finished */
300 schedule_work(&mddev->barrier_work);
301 }
302 bio_put(bio);
303}
304
305static void submit_barriers(mddev_t *mddev)
306{
307 mdk_rdev_t *rdev;
308
309 rcu_read_lock();
310 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
311 if (rdev->raid_disk >= 0 &&
312 !test_bit(Faulty, &rdev->flags)) {
313 /* Take two references, one is dropped
314 * when request finishes, one after
315 * we reclaim rcu_read_lock
316 */
317 struct bio *bi;
318 atomic_inc(&rdev->nr_pending);
319 atomic_inc(&rdev->nr_pending);
320 rcu_read_unlock();
321 bi = bio_alloc(GFP_KERNEL, 0);
322 bi->bi_end_io = md_end_barrier;
323 bi->bi_private = rdev;
324 bi->bi_bdev = rdev->bdev;
325 atomic_inc(&mddev->flush_pending);
326 submit_bio(WRITE_BARRIER, bi);
327 rcu_read_lock();
328 rdev_dec_pending(rdev, mddev);
329 }
330 rcu_read_unlock();
331}
332
333static void md_submit_barrier(struct work_struct *ws)
334{
335 mddev_t *mddev = container_of(ws, mddev_t, barrier_work);
336 struct bio *bio = mddev->barrier;
337
338 atomic_set(&mddev->flush_pending, 1);
339
340 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
341 bio_endio(bio, -EOPNOTSUPP);
342 else if (bio->bi_size == 0)
343 /* an empty barrier - all done */
344 bio_endio(bio, 0);
345 else {
346 bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
347 if (mddev->pers->make_request(mddev->queue, bio))
348 generic_make_request(bio);
349 mddev->barrier = POST_REQUEST_BARRIER;
350 submit_barriers(mddev);
351 }
352 if (atomic_dec_and_test(&mddev->flush_pending)) {
353 mddev->barrier = NULL;
354 wake_up(&mddev->sb_wait);
355 }
356}
357
358void md_barrier_request(mddev_t *mddev, struct bio *bio)
359{
360 spin_lock_irq(&mddev->write_lock);
361 wait_event_lock_irq(mddev->sb_wait,
362 !mddev->barrier,
363 mddev->write_lock, /*nothing*/);
364 mddev->barrier = bio;
365 spin_unlock_irq(&mddev->write_lock);
366
367 atomic_set(&mddev->flush_pending, 1);
368 INIT_WORK(&mddev->barrier_work, md_submit_barrier);
369
370 submit_barriers(mddev);
371
372 if (atomic_dec_and_test(&mddev->flush_pending))
373 schedule_work(&mddev->barrier_work);
374}
375EXPORT_SYMBOL(md_barrier_request);
271 376
272static inline mddev_t *mddev_get(mddev_t *mddev) 377static inline mddev_t *mddev_get(mddev_t *mddev)
273{ 378{
@@ -282,7 +387,9 @@ static void mddev_put(mddev_t *mddev)
282 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) 387 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
283 return; 388 return;
284 if (!mddev->raid_disks && list_empty(&mddev->disks) && 389 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
285 !mddev->hold_active) { 390 mddev->ctime == 0 && !mddev->hold_active) {
391 /* Array is not configured at all, and not held active,
392 * so destroy it */
286 list_del(&mddev->all_mddevs); 393 list_del(&mddev->all_mddevs);
287 if (mddev->gendisk) { 394 if (mddev->gendisk) {
288 /* we did a probe so need to clean up. 395 /* we did a probe so need to clean up.
@@ -367,6 +474,7 @@ static mddev_t * mddev_find(dev_t unit)
367 474
368 mutex_init(&new->open_mutex); 475 mutex_init(&new->open_mutex);
369 mutex_init(&new->reconfig_mutex); 476 mutex_init(&new->reconfig_mutex);
477 mutex_init(&new->bitmap_info.mutex);
370 INIT_LIST_HEAD(&new->disks); 478 INIT_LIST_HEAD(&new->disks);
371 INIT_LIST_HEAD(&new->all_mddevs); 479 INIT_LIST_HEAD(&new->all_mddevs);
372 init_timer(&new->safemode_timer); 480 init_timer(&new->safemode_timer);
@@ -374,6 +482,7 @@ static mddev_t * mddev_find(dev_t unit)
374 atomic_set(&new->openers, 0); 482 atomic_set(&new->openers, 0);
375 atomic_set(&new->active_io, 0); 483 atomic_set(&new->active_io, 0);
376 spin_lock_init(&new->write_lock); 484 spin_lock_init(&new->write_lock);
485 atomic_set(&new->flush_pending, 0);
377 init_waitqueue_head(&new->sb_wait); 486 init_waitqueue_head(&new->sb_wait);
378 init_waitqueue_head(&new->recovery_wait); 487 init_waitqueue_head(&new->recovery_wait);
379 new->reshape_position = MaxSector; 488 new->reshape_position = MaxSector;
@@ -752,7 +861,7 @@ struct super_type {
752 */ 861 */
753int md_check_no_bitmap(mddev_t *mddev) 862int md_check_no_bitmap(mddev_t *mddev)
754{ 863{
755 if (!mddev->bitmap_file && !mddev->bitmap_offset) 864 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
756 return 0; 865 return 0;
757 printk(KERN_ERR "%s: bitmaps are not supported for %s\n", 866 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
758 mdname(mddev), mddev->pers->name); 867 mdname(mddev), mddev->pers->name);
@@ -880,8 +989,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
880 mddev->raid_disks = sb->raid_disks; 989 mddev->raid_disks = sb->raid_disks;
881 mddev->dev_sectors = sb->size * 2; 990 mddev->dev_sectors = sb->size * 2;
882 mddev->events = ev1; 991 mddev->events = ev1;
883 mddev->bitmap_offset = 0; 992 mddev->bitmap_info.offset = 0;
884 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 993 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
885 994
886 if (mddev->minor_version >= 91) { 995 if (mddev->minor_version >= 91) {
887 mddev->reshape_position = sb->reshape_position; 996 mddev->reshape_position = sb->reshape_position;
@@ -915,8 +1024,9 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
915 mddev->max_disks = MD_SB_DISKS; 1024 mddev->max_disks = MD_SB_DISKS;
916 1025
917 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && 1026 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
918 mddev->bitmap_file == NULL) 1027 mddev->bitmap_info.file == NULL)
919 mddev->bitmap_offset = mddev->default_bitmap_offset; 1028 mddev->bitmap_info.offset =
1029 mddev->bitmap_info.default_offset;
920 1030
921 } else if (mddev->pers == NULL) { 1031 } else if (mddev->pers == NULL) {
922 /* Insist on good event counter while assembling */ 1032 /* Insist on good event counter while assembling */
@@ -1033,7 +1143,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1033 sb->layout = mddev->layout; 1143 sb->layout = mddev->layout;
1034 sb->chunk_size = mddev->chunk_sectors << 9; 1144 sb->chunk_size = mddev->chunk_sectors << 9;
1035 1145
1036 if (mddev->bitmap && mddev->bitmap_file == NULL) 1146 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1037 sb->state |= (1<<MD_SB_BITMAP_PRESENT); 1147 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1038 1148
1039 sb->disks[0].state = (1<<MD_DISK_REMOVED); 1149 sb->disks[0].state = (1<<MD_DISK_REMOVED);
@@ -1111,7 +1221,7 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1111{ 1221{
1112 if (num_sectors && num_sectors < rdev->mddev->dev_sectors) 1222 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1113 return 0; /* component must fit device */ 1223 return 0; /* component must fit device */
1114 if (rdev->mddev->bitmap_offset) 1224 if (rdev->mddev->bitmap_info.offset)
1115 return 0; /* can't move bitmap */ 1225 return 0; /* can't move bitmap */
1116 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 1226 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
1117 if (!num_sectors || num_sectors > rdev->sb_start) 1227 if (!num_sectors || num_sectors > rdev->sb_start)
@@ -1290,8 +1400,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1290 mddev->raid_disks = le32_to_cpu(sb->raid_disks); 1400 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1291 mddev->dev_sectors = le64_to_cpu(sb->size); 1401 mddev->dev_sectors = le64_to_cpu(sb->size);
1292 mddev->events = ev1; 1402 mddev->events = ev1;
1293 mddev->bitmap_offset = 0; 1403 mddev->bitmap_info.offset = 0;
1294 mddev->default_bitmap_offset = 1024 >> 9; 1404 mddev->bitmap_info.default_offset = 1024 >> 9;
1295 1405
1296 mddev->recovery_cp = le64_to_cpu(sb->resync_offset); 1406 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1297 memcpy(mddev->uuid, sb->set_uuid, 16); 1407 memcpy(mddev->uuid, sb->set_uuid, 16);
@@ -1299,8 +1409,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1299 mddev->max_disks = (4096-256)/2; 1409 mddev->max_disks = (4096-256)/2;
1300 1410
1301 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && 1411 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1302 mddev->bitmap_file == NULL ) 1412 mddev->bitmap_info.file == NULL )
1303 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); 1413 mddev->bitmap_info.offset =
1414 (__s32)le32_to_cpu(sb->bitmap_offset);
1304 1415
1305 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { 1416 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1306 mddev->reshape_position = le64_to_cpu(sb->reshape_position); 1417 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
@@ -1394,19 +1505,17 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1394 sb->level = cpu_to_le32(mddev->level); 1505 sb->level = cpu_to_le32(mddev->level);
1395 sb->layout = cpu_to_le32(mddev->layout); 1506 sb->layout = cpu_to_le32(mddev->layout);
1396 1507
1397 if (mddev->bitmap && mddev->bitmap_file == NULL) { 1508 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1398 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1509 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1399 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); 1510 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1400 } 1511 }
1401 1512
1402 if (rdev->raid_disk >= 0 && 1513 if (rdev->raid_disk >= 0 &&
1403 !test_bit(In_sync, &rdev->flags)) { 1514 !test_bit(In_sync, &rdev->flags)) {
1404 if (rdev->recovery_offset > 0) { 1515 sb->feature_map |=
1405 sb->feature_map |= 1516 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1406 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); 1517 sb->recovery_offset =
1407 sb->recovery_offset = 1518 cpu_to_le64(rdev->recovery_offset);
1408 cpu_to_le64(rdev->recovery_offset);
1409 }
1410 } 1519 }
1411 1520
1412 if (mddev->reshape_position != MaxSector) { 1521 if (mddev->reshape_position != MaxSector) {
@@ -1440,7 +1549,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1440 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1549 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1441 else if (test_bit(In_sync, &rdev2->flags)) 1550 else if (test_bit(In_sync, &rdev2->flags))
1442 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); 1551 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1443 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0) 1552 else if (rdev2->raid_disk >= 0)
1444 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); 1553 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1445 else 1554 else
1446 sb->dev_roles[i] = cpu_to_le16(0xffff); 1555 sb->dev_roles[i] = cpu_to_le16(0xffff);
@@ -1462,7 +1571,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1462 max_sectors -= rdev->data_offset; 1571 max_sectors -= rdev->data_offset;
1463 if (!num_sectors || num_sectors > max_sectors) 1572 if (!num_sectors || num_sectors > max_sectors)
1464 num_sectors = max_sectors; 1573 num_sectors = max_sectors;
1465 } else if (rdev->mddev->bitmap_offset) { 1574 } else if (rdev->mddev->bitmap_info.offset) {
1466 /* minor version 0 with bitmap we can't move */ 1575 /* minor version 0 with bitmap we can't move */
1467 return 0; 1576 return 0;
1468 } else { 1577 } else {
@@ -1830,15 +1939,11 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1830 1939
1831 uuid = sb->set_uuid; 1940 uuid = sb->set_uuid;
1832 printk(KERN_INFO 1941 printk(KERN_INFO
1833 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" 1942 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
1834 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
1835 "md: Name: \"%s\" CT:%llu\n", 1943 "md: Name: \"%s\" CT:%llu\n",
1836 le32_to_cpu(sb->major_version), 1944 le32_to_cpu(sb->major_version),
1837 le32_to_cpu(sb->feature_map), 1945 le32_to_cpu(sb->feature_map),
1838 uuid[0], uuid[1], uuid[2], uuid[3], 1946 uuid,
1839 uuid[4], uuid[5], uuid[6], uuid[7],
1840 uuid[8], uuid[9], uuid[10], uuid[11],
1841 uuid[12], uuid[13], uuid[14], uuid[15],
1842 sb->set_name, 1947 sb->set_name,
1843 (unsigned long long)le64_to_cpu(sb->ctime) 1948 (unsigned long long)le64_to_cpu(sb->ctime)
1844 & MD_SUPERBLOCK_1_TIME_SEC_MASK); 1949 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
@@ -1847,8 +1952,7 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1847 printk(KERN_INFO 1952 printk(KERN_INFO
1848 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" 1953 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
1849 " RO:%llu\n" 1954 " RO:%llu\n"
1850 "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" 1955 "md: Dev:%08x UUID: %pU\n"
1851 ":%02x%02x%02x%02x%02x%02x\n"
1852 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" 1956 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
1853 "md: (MaxDev:%u) \n", 1957 "md: (MaxDev:%u) \n",
1854 le32_to_cpu(sb->level), 1958 le32_to_cpu(sb->level),
@@ -1861,10 +1965,7 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1861 (unsigned long long)le64_to_cpu(sb->super_offset), 1965 (unsigned long long)le64_to_cpu(sb->super_offset),
1862 (unsigned long long)le64_to_cpu(sb->recovery_offset), 1966 (unsigned long long)le64_to_cpu(sb->recovery_offset),
1863 le32_to_cpu(sb->dev_number), 1967 le32_to_cpu(sb->dev_number),
1864 uuid[0], uuid[1], uuid[2], uuid[3], 1968 uuid,
1865 uuid[4], uuid[5], uuid[6], uuid[7],
1866 uuid[8], uuid[9], uuid[10], uuid[11],
1867 uuid[12], uuid[13], uuid[14], uuid[15],
1868 sb->devflags, 1969 sb->devflags,
1869 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK, 1970 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
1870 (unsigned long long)le64_to_cpu(sb->events), 1971 (unsigned long long)le64_to_cpu(sb->events),
@@ -2008,12 +2109,18 @@ repeat:
2008 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 2109 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
2009 /* .. if the array isn't clean, an 'even' event must also go 2110 /* .. if the array isn't clean, an 'even' event must also go
2010 * to spares. */ 2111 * to spares. */
2011 if ((mddev->events&1)==0) 2112 if ((mddev->events&1)==0) {
2012 nospares = 0; 2113 nospares = 0;
2114 sync_req = 2; /* force a second update to get the
2115 * even/odd in sync */
2116 }
2013 } else { 2117 } else {
2014 /* otherwise an 'odd' event must go to spares */ 2118 /* otherwise an 'odd' event must go to spares */
2015 if ((mddev->events&1)) 2119 if ((mddev->events&1)) {
2016 nospares = 0; 2120 nospares = 0;
2121 sync_req = 2; /* force a second update to get the
2122 * even/odd in sync */
2123 }
2017 } 2124 }
2018 } 2125 }
2019 2126
@@ -2446,12 +2553,49 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2446static struct rdev_sysfs_entry rdev_size = 2553static struct rdev_sysfs_entry rdev_size =
2447__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store); 2554__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2448 2555
2556
2557static ssize_t recovery_start_show(mdk_rdev_t *rdev, char *page)
2558{
2559 unsigned long long recovery_start = rdev->recovery_offset;
2560
2561 if (test_bit(In_sync, &rdev->flags) ||
2562 recovery_start == MaxSector)
2563 return sprintf(page, "none\n");
2564
2565 return sprintf(page, "%llu\n", recovery_start);
2566}
2567
2568static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2569{
2570 unsigned long long recovery_start;
2571
2572 if (cmd_match(buf, "none"))
2573 recovery_start = MaxSector;
2574 else if (strict_strtoull(buf, 10, &recovery_start))
2575 return -EINVAL;
2576
2577 if (rdev->mddev->pers &&
2578 rdev->raid_disk >= 0)
2579 return -EBUSY;
2580
2581 rdev->recovery_offset = recovery_start;
2582 if (recovery_start == MaxSector)
2583 set_bit(In_sync, &rdev->flags);
2584 else
2585 clear_bit(In_sync, &rdev->flags);
2586 return len;
2587}
2588
2589static struct rdev_sysfs_entry rdev_recovery_start =
2590__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
2591
2449static struct attribute *rdev_default_attrs[] = { 2592static struct attribute *rdev_default_attrs[] = {
2450 &rdev_state.attr, 2593 &rdev_state.attr,
2451 &rdev_errors.attr, 2594 &rdev_errors.attr,
2452 &rdev_slot.attr, 2595 &rdev_slot.attr,
2453 &rdev_offset.attr, 2596 &rdev_offset.attr,
2454 &rdev_size.attr, 2597 &rdev_size.attr,
2598 &rdev_recovery_start.attr,
2455 NULL, 2599 NULL,
2456}; 2600};
2457static ssize_t 2601static ssize_t
@@ -2505,7 +2649,7 @@ static void rdev_free(struct kobject *ko)
2505 mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj); 2649 mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
2506 kfree(rdev); 2650 kfree(rdev);
2507} 2651}
2508static struct sysfs_ops rdev_sysfs_ops = { 2652static const struct sysfs_ops rdev_sysfs_ops = {
2509 .show = rdev_attr_show, 2653 .show = rdev_attr_show,
2510 .store = rdev_attr_store, 2654 .store = rdev_attr_store,
2511}; 2655};
@@ -2553,6 +2697,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2553 rdev->flags = 0; 2697 rdev->flags = 0;
2554 rdev->data_offset = 0; 2698 rdev->data_offset = 0;
2555 rdev->sb_events = 0; 2699 rdev->sb_events = 0;
2700 rdev->last_read_error.tv_sec = 0;
2701 rdev->last_read_error.tv_nsec = 0;
2556 atomic_set(&rdev->nr_pending, 0); 2702 atomic_set(&rdev->nr_pending, 0);
2557 atomic_set(&rdev->read_errors, 0); 2703 atomic_set(&rdev->read_errors, 0);
2558 atomic_set(&rdev->corrected_errors, 0); 2704 atomic_set(&rdev->corrected_errors, 0);
@@ -2663,6 +2809,47 @@ static void analyze_sbs(mddev_t * mddev)
2663 } 2809 }
2664} 2810}
2665 2811
2812/* Read a fixed-point number.
2813 * Numbers in sysfs attributes should be in "standard" units where
2814 * possible, so time should be in seconds.
2815 * However we internally use a a much smaller unit such as
2816 * milliseconds or jiffies.
2817 * This function takes a decimal number with a possible fractional
2818 * component, and produces an integer which is the result of
2819 * multiplying that number by 10^'scale'.
2820 * all without any floating-point arithmetic.
2821 */
2822int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
2823{
2824 unsigned long result = 0;
2825 long decimals = -1;
2826 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
2827 if (*cp == '.')
2828 decimals = 0;
2829 else if (decimals < scale) {
2830 unsigned int value;
2831 value = *cp - '0';
2832 result = result * 10 + value;
2833 if (decimals >= 0)
2834 decimals++;
2835 }
2836 cp++;
2837 }
2838 if (*cp == '\n')
2839 cp++;
2840 if (*cp)
2841 return -EINVAL;
2842 if (decimals < 0)
2843 decimals = 0;
2844 while (decimals < scale) {
2845 result *= 10;
2846 decimals ++;
2847 }
2848 *res = result;
2849 return 0;
2850}
2851
2852
2666static void md_safemode_timeout(unsigned long data); 2853static void md_safemode_timeout(unsigned long data);
2667 2854
2668static ssize_t 2855static ssize_t
@@ -2674,31 +2861,10 @@ safe_delay_show(mddev_t *mddev, char *page)
2674static ssize_t 2861static ssize_t
2675safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) 2862safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2676{ 2863{
2677 int scale=1;
2678 int dot=0;
2679 int i;
2680 unsigned long msec; 2864 unsigned long msec;
2681 char buf[30];
2682 2865
2683 /* remove a period, and count digits after it */ 2866 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
2684 if (len >= sizeof(buf))
2685 return -EINVAL;
2686 strlcpy(buf, cbuf, sizeof(buf));
2687 for (i=0; i<len; i++) {
2688 if (dot) {
2689 if (isdigit(buf[i])) {
2690 buf[i-1] = buf[i];
2691 scale *= 10;
2692 }
2693 buf[i] = 0;
2694 } else if (buf[i] == '.') {
2695 dot=1;
2696 buf[i] = 0;
2697 }
2698 }
2699 if (strict_strtoul(buf, 10, &msec) < 0)
2700 return -EINVAL; 2867 return -EINVAL;
2701 msec = (msec * 1000) / scale;
2702 if (msec == 0) 2868 if (msec == 0)
2703 mddev->safemode_delay = 0; 2869 mddev->safemode_delay = 0;
2704 else { 2870 else {
@@ -2974,7 +3140,9 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2974 3140
2975 if (mddev->pers) 3141 if (mddev->pers)
2976 return -EBUSY; 3142 return -EBUSY;
2977 if (!*buf || (*e && *e != '\n')) 3143 if (cmd_match(buf, "none"))
3144 n = MaxSector;
3145 else if (!*buf || (*e && *e != '\n'))
2978 return -EINVAL; 3146 return -EINVAL;
2979 3147
2980 mddev->recovery_cp = n; 3148 mddev->recovery_cp = n;
@@ -3170,6 +3338,29 @@ static struct md_sysfs_entry md_array_state =
3170__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); 3338__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3171 3339
3172static ssize_t 3340static ssize_t
3341max_corrected_read_errors_show(mddev_t *mddev, char *page) {
3342 return sprintf(page, "%d\n",
3343 atomic_read(&mddev->max_corr_read_errors));
3344}
3345
3346static ssize_t
3347max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
3348{
3349 char *e;
3350 unsigned long n = simple_strtoul(buf, &e, 10);
3351
3352 if (*buf && (*e == 0 || *e == '\n')) {
3353 atomic_set(&mddev->max_corr_read_errors, n);
3354 return len;
3355 }
3356 return -EINVAL;
3357}
3358
3359static struct md_sysfs_entry max_corr_read_errors =
3360__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3361 max_corrected_read_errors_store);
3362
3363static ssize_t
3173null_show(mddev_t *mddev, char *page) 3364null_show(mddev_t *mddev, char *page)
3174{ 3365{
3175 return -EINVAL; 3366 return -EINVAL;
@@ -3250,8 +3441,7 @@ bitmap_store(mddev_t *mddev, const char *buf, size_t len)
3250 } 3441 }
3251 if (*end && !isspace(*end)) break; 3442 if (*end && !isspace(*end)) break;
3252 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); 3443 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
3253 buf = end; 3444 buf = skip_spaces(end);
3254 while (isspace(*buf)) buf++;
3255 } 3445 }
3256 bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ 3446 bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
3257out: 3447out:
@@ -3794,6 +3984,7 @@ static struct attribute *md_default_attrs[] = {
3794 &md_array_state.attr, 3984 &md_array_state.attr,
3795 &md_reshape_position.attr, 3985 &md_reshape_position.attr,
3796 &md_array_size.attr, 3986 &md_array_size.attr,
3987 &max_corr_read_errors.attr,
3797 NULL, 3988 NULL,
3798}; 3989};
3799 3990
@@ -3875,7 +4066,7 @@ static void md_free(struct kobject *ko)
3875 kfree(mddev); 4066 kfree(mddev);
3876} 4067}
3877 4068
3878static struct sysfs_ops md_sysfs_ops = { 4069static const struct sysfs_ops md_sysfs_ops = {
3879 .show = md_attr_show, 4070 .show = md_attr_show,
3880 .store = md_attr_store, 4071 .store = md_attr_store,
3881}; 4072};
@@ -3891,13 +4082,16 @@ static void mddev_delayed_delete(struct work_struct *ws)
3891{ 4082{
3892 mddev_t *mddev = container_of(ws, mddev_t, del_work); 4083 mddev_t *mddev = container_of(ws, mddev_t, del_work);
3893 4084
3894 if (mddev->private == &md_redundancy_group) { 4085 if (mddev->private) {
3895 sysfs_remove_group(&mddev->kobj, &md_redundancy_group); 4086 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
4087 if (mddev->private != (void*)1)
4088 sysfs_remove_group(&mddev->kobj, mddev->private);
3896 if (mddev->sysfs_action) 4089 if (mddev->sysfs_action)
3897 sysfs_put(mddev->sysfs_action); 4090 sysfs_put(mddev->sysfs_action);
3898 mddev->sysfs_action = NULL; 4091 mddev->sysfs_action = NULL;
3899 mddev->private = NULL; 4092 mddev->private = NULL;
3900 } 4093 }
4094 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
3901 kobject_del(&mddev->kobj); 4095 kobject_del(&mddev->kobj);
3902 kobject_put(&mddev->kobj); 4096 kobject_put(&mddev->kobj);
3903} 4097}
@@ -3989,6 +4183,8 @@ static int md_alloc(dev_t dev, char *name)
3989 disk->disk_name); 4183 disk->disk_name);
3990 error = 0; 4184 error = 0;
3991 } 4185 }
4186 if (sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4187 printk(KERN_DEBUG "pointless warning\n");
3992 abort: 4188 abort:
3993 mutex_unlock(&disks_mutex); 4189 mutex_unlock(&disks_mutex);
3994 if (!error) { 4190 if (!error) {
@@ -4100,10 +4296,7 @@ static int do_md_run(mddev_t * mddev)
4100 sysfs_notify_dirent(rdev->sysfs_state); 4296 sysfs_notify_dirent(rdev->sysfs_state);
4101 } 4297 }
4102 4298
4103 md_probe(mddev->unit, NULL, NULL);
4104 disk = mddev->gendisk; 4299 disk = mddev->gendisk;
4105 if (!disk)
4106 return -ENOMEM;
4107 4300
4108 spin_lock(&pers_lock); 4301 spin_lock(&pers_lock);
4109 pers = find_pers(mddev->level, mddev->clevel); 4302 pers = find_pers(mddev->level, mddev->clevel);
@@ -4170,7 +4363,7 @@ static int do_md_run(mddev_t * mddev)
4170 mddev->barriers_work = 1; 4363 mddev->barriers_work = 1;
4171 mddev->ok_start_degraded = start_dirty_degraded; 4364 mddev->ok_start_degraded = start_dirty_degraded;
4172 4365
4173 if (start_readonly) 4366 if (start_readonly && mddev->ro == 0)
4174 mddev->ro = 2; /* read-only, but switch on first write */ 4367 mddev->ro = 2; /* read-only, but switch on first write */
4175 4368
4176 err = mddev->pers->run(mddev); 4369 err = mddev->pers->run(mddev);
@@ -4210,6 +4403,8 @@ static int do_md_run(mddev_t * mddev)
4210 mddev->ro = 0; 4403 mddev->ro = 0;
4211 4404
4212 atomic_set(&mddev->writes_pending,0); 4405 atomic_set(&mddev->writes_pending,0);
4406 atomic_set(&mddev->max_corr_read_errors,
4407 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
4213 mddev->safemode = 0; 4408 mddev->safemode = 0;
4214 mddev->safemode_timer.function = md_safemode_timeout; 4409 mddev->safemode_timer.function = md_safemode_timeout;
4215 mddev->safemode_timer.data = (unsigned long) mddev; 4410 mddev->safemode_timer.data = (unsigned long) mddev;
@@ -4232,33 +4427,6 @@ static int do_md_run(mddev_t * mddev)
4232 4427
4233 set_capacity(disk, mddev->array_sectors); 4428 set_capacity(disk, mddev->array_sectors);
4234 4429
4235 /* If there is a partially-recovered drive we need to
4236 * start recovery here. If we leave it to md_check_recovery,
4237 * it will remove the drives and not do the right thing
4238 */
4239 if (mddev->degraded && !mddev->sync_thread) {
4240 int spares = 0;
4241 list_for_each_entry(rdev, &mddev->disks, same_set)
4242 if (rdev->raid_disk >= 0 &&
4243 !test_bit(In_sync, &rdev->flags) &&
4244 !test_bit(Faulty, &rdev->flags))
4245 /* complete an interrupted recovery */
4246 spares++;
4247 if (spares && mddev->pers->sync_request) {
4248 mddev->recovery = 0;
4249 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4250 mddev->sync_thread = md_register_thread(md_do_sync,
4251 mddev,
4252 "resync");
4253 if (!mddev->sync_thread) {
4254 printk(KERN_ERR "%s: could not start resync"
4255 " thread...\n",
4256 mdname(mddev));
4257 /* leave the spares where they are, it shouldn't hurt */
4258 mddev->recovery = 0;
4259 }
4260 }
4261 }
4262 md_wakeup_thread(mddev->thread); 4430 md_wakeup_thread(mddev->thread);
4263 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 4431 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4264 4432
@@ -4314,7 +4482,7 @@ static int deny_bitmap_write_access(struct file * file)
4314 return 0; 4482 return 0;
4315} 4483}
4316 4484
4317static void restore_bitmap_write_access(struct file *file) 4485void restore_bitmap_write_access(struct file *file)
4318{ 4486{
4319 struct inode *inode = file->f_mapping->host; 4487 struct inode *inode = file->f_mapping->host;
4320 4488
@@ -4368,8 +4536,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4368 mddev->queue->unplug_fn = NULL; 4536 mddev->queue->unplug_fn = NULL;
4369 mddev->queue->backing_dev_info.congested_fn = NULL; 4537 mddev->queue->backing_dev_info.congested_fn = NULL;
4370 module_put(mddev->pers->owner); 4538 module_put(mddev->pers->owner);
4371 if (mddev->pers->sync_request) 4539 if (mddev->pers->sync_request && mddev->private == NULL)
4372 mddev->private = &md_redundancy_group; 4540 mddev->private = (void*)1;
4373 mddev->pers = NULL; 4541 mddev->pers = NULL;
4374 /* tell userspace to handle 'inactive' */ 4542 /* tell userspace to handle 'inactive' */
4375 sysfs_notify_dirent(mddev->sysfs_state); 4543 sysfs_notify_dirent(mddev->sysfs_state);
@@ -4409,15 +4577,12 @@ out:
4409 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); 4577 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
4410 4578
4411 bitmap_destroy(mddev); 4579 bitmap_destroy(mddev);
4412 if (mddev->bitmap_file) { 4580 if (mddev->bitmap_info.file) {
4413 restore_bitmap_write_access(mddev->bitmap_file); 4581 restore_bitmap_write_access(mddev->bitmap_info.file);
4414 fput(mddev->bitmap_file); 4582 fput(mddev->bitmap_info.file);
4415 mddev->bitmap_file = NULL; 4583 mddev->bitmap_info.file = NULL;
4416 } 4584 }
4417 mddev->bitmap_offset = 0; 4585 mddev->bitmap_info.offset = 0;
4418
4419 /* make sure all md_delayed_delete calls have finished */
4420 flush_scheduled_work();
4421 4586
4422 export_array(mddev); 4587 export_array(mddev);
4423 4588
@@ -4455,6 +4620,11 @@ out:
4455 mddev->degraded = 0; 4620 mddev->degraded = 0;
4456 mddev->barriers_work = 0; 4621 mddev->barriers_work = 0;
4457 mddev->safemode = 0; 4622 mddev->safemode = 0;
4623 mddev->bitmap_info.offset = 0;
4624 mddev->bitmap_info.default_offset = 0;
4625 mddev->bitmap_info.chunksize = 0;
4626 mddev->bitmap_info.daemon_sleep = 0;
4627 mddev->bitmap_info.max_write_behind = 0;
4458 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4628 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4459 if (mddev->hold_active == UNTIL_STOP) 4629 if (mddev->hold_active == UNTIL_STOP)
4460 mddev->hold_active = 0; 4630 mddev->hold_active = 0;
@@ -4640,7 +4810,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
4640 info.state = 0; 4810 info.state = 0;
4641 if (mddev->in_sync) 4811 if (mddev->in_sync)
4642 info.state = (1<<MD_SB_CLEAN); 4812 info.state = (1<<MD_SB_CLEAN);
4643 if (mddev->bitmap && mddev->bitmap_offset) 4813 if (mddev->bitmap && mddev->bitmap_info.offset)
4644 info.state = (1<<MD_SB_BITMAP_PRESENT); 4814 info.state = (1<<MD_SB_BITMAP_PRESENT);
4645 info.active_disks = insync; 4815 info.active_disks = insync;
4646 info.working_disks = working; 4816 info.working_disks = working;
@@ -4998,23 +5168,23 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
4998 if (fd >= 0) { 5168 if (fd >= 0) {
4999 if (mddev->bitmap) 5169 if (mddev->bitmap)
5000 return -EEXIST; /* cannot add when bitmap is present */ 5170 return -EEXIST; /* cannot add when bitmap is present */
5001 mddev->bitmap_file = fget(fd); 5171 mddev->bitmap_info.file = fget(fd);
5002 5172
5003 if (mddev->bitmap_file == NULL) { 5173 if (mddev->bitmap_info.file == NULL) {
5004 printk(KERN_ERR "%s: error: failed to get bitmap file\n", 5174 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5005 mdname(mddev)); 5175 mdname(mddev));
5006 return -EBADF; 5176 return -EBADF;
5007 } 5177 }
5008 5178
5009 err = deny_bitmap_write_access(mddev->bitmap_file); 5179 err = deny_bitmap_write_access(mddev->bitmap_info.file);
5010 if (err) { 5180 if (err) {
5011 printk(KERN_ERR "%s: error: bitmap file is already in use\n", 5181 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5012 mdname(mddev)); 5182 mdname(mddev));
5013 fput(mddev->bitmap_file); 5183 fput(mddev->bitmap_info.file);
5014 mddev->bitmap_file = NULL; 5184 mddev->bitmap_info.file = NULL;
5015 return err; 5185 return err;
5016 } 5186 }
5017 mddev->bitmap_offset = 0; /* file overrides offset */ 5187 mddev->bitmap_info.offset = 0; /* file overrides offset */
5018 } else if (mddev->bitmap == NULL) 5188 } else if (mddev->bitmap == NULL)
5019 return -ENOENT; /* cannot remove what isn't there */ 5189 return -ENOENT; /* cannot remove what isn't there */
5020 err = 0; 5190 err = 0;
@@ -5029,11 +5199,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
5029 mddev->pers->quiesce(mddev, 0); 5199 mddev->pers->quiesce(mddev, 0);
5030 } 5200 }
5031 if (fd < 0) { 5201 if (fd < 0) {
5032 if (mddev->bitmap_file) { 5202 if (mddev->bitmap_info.file) {
5033 restore_bitmap_write_access(mddev->bitmap_file); 5203 restore_bitmap_write_access(mddev->bitmap_info.file);
5034 fput(mddev->bitmap_file); 5204 fput(mddev->bitmap_info.file);
5035 } 5205 }
5036 mddev->bitmap_file = NULL; 5206 mddev->bitmap_info.file = NULL;
5037 } 5207 }
5038 5208
5039 return err; 5209 return err;
@@ -5070,6 +5240,10 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
5070 mddev->minor_version = info->minor_version; 5240 mddev->minor_version = info->minor_version;
5071 mddev->patch_version = info->patch_version; 5241 mddev->patch_version = info->patch_version;
5072 mddev->persistent = !info->not_persistent; 5242 mddev->persistent = !info->not_persistent;
5243 /* ensure mddev_put doesn't delete this now that there
5244 * is some minimal configuration.
5245 */
5246 mddev->ctime = get_seconds();
5073 return 0; 5247 return 0;
5074 } 5248 }
5075 mddev->major_version = MD_MAJOR_VERSION; 5249 mddev->major_version = MD_MAJOR_VERSION;
@@ -5100,8 +5274,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
5100 mddev->flags = 0; 5274 mddev->flags = 0;
5101 set_bit(MD_CHANGE_DEVS, &mddev->flags); 5275 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5102 5276
5103 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 5277 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
5104 mddev->bitmap_offset = 0; 5278 mddev->bitmap_info.offset = 0;
5105 5279
5106 mddev->reshape_position = MaxSector; 5280 mddev->reshape_position = MaxSector;
5107 5281
@@ -5201,7 +5375,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5201 int state = 0; 5375 int state = 0;
5202 5376
5203 /* calculate expected state,ignoring low bits */ 5377 /* calculate expected state,ignoring low bits */
5204 if (mddev->bitmap && mddev->bitmap_offset) 5378 if (mddev->bitmap && mddev->bitmap_info.offset)
5205 state |= (1 << MD_SB_BITMAP_PRESENT); 5379 state |= (1 << MD_SB_BITMAP_PRESENT);
5206 5380
5207 if (mddev->major_version != info->major_version || 5381 if (mddev->major_version != info->major_version ||
@@ -5260,9 +5434,10 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5260 /* add the bitmap */ 5434 /* add the bitmap */
5261 if (mddev->bitmap) 5435 if (mddev->bitmap)
5262 return -EEXIST; 5436 return -EEXIST;
5263 if (mddev->default_bitmap_offset == 0) 5437 if (mddev->bitmap_info.default_offset == 0)
5264 return -EINVAL; 5438 return -EINVAL;
5265 mddev->bitmap_offset = mddev->default_bitmap_offset; 5439 mddev->bitmap_info.offset =
5440 mddev->bitmap_info.default_offset;
5266 mddev->pers->quiesce(mddev, 1); 5441 mddev->pers->quiesce(mddev, 1);
5267 rv = bitmap_create(mddev); 5442 rv = bitmap_create(mddev);
5268 if (rv) 5443 if (rv)
@@ -5277,7 +5452,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5277 mddev->pers->quiesce(mddev, 1); 5452 mddev->pers->quiesce(mddev, 1);
5278 bitmap_destroy(mddev); 5453 bitmap_destroy(mddev);
5279 mddev->pers->quiesce(mddev, 0); 5454 mddev->pers->quiesce(mddev, 0);
5280 mddev->bitmap_offset = 0; 5455 mddev->bitmap_info.offset = 0;
5281 } 5456 }
5282 } 5457 }
5283 md_update_sb(mddev, 1); 5458 md_update_sb(mddev, 1);
@@ -5528,6 +5703,25 @@ done:
5528abort: 5703abort:
5529 return err; 5704 return err;
5530} 5705}
5706#ifdef CONFIG_COMPAT
5707static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
5708 unsigned int cmd, unsigned long arg)
5709{
5710 switch (cmd) {
5711 case HOT_REMOVE_DISK:
5712 case HOT_ADD_DISK:
5713 case SET_DISK_FAULTY:
5714 case SET_BITMAP_FILE:
5715 /* These take in integer arg, do not convert */
5716 break;
5717 default:
5718 arg = (unsigned long)compat_ptr(arg);
5719 break;
5720 }
5721
5722 return md_ioctl(bdev, mode, cmd, arg);
5723}
5724#endif /* CONFIG_COMPAT */
5531 5725
5532static int md_open(struct block_device *bdev, fmode_t mode) 5726static int md_open(struct block_device *bdev, fmode_t mode)
5533{ 5727{
@@ -5593,6 +5787,9 @@ static const struct block_device_operations md_fops =
5593 .open = md_open, 5787 .open = md_open,
5594 .release = md_release, 5788 .release = md_release,
5595 .ioctl = md_ioctl, 5789 .ioctl = md_ioctl,
5790#ifdef CONFIG_COMPAT
5791 .compat_ioctl = md_compat_ioctl,
5792#endif
5596 .getgeo = md_getgeo, 5793 .getgeo = md_getgeo,
5597 .media_changed = md_media_changed, 5794 .media_changed = md_media_changed,
5598 .revalidate_disk= md_revalidate, 5795 .revalidate_disk= md_revalidate,
@@ -5986,14 +6183,14 @@ static int md_seq_show(struct seq_file *seq, void *v)
5986 unsigned long chunk_kb; 6183 unsigned long chunk_kb;
5987 unsigned long flags; 6184 unsigned long flags;
5988 spin_lock_irqsave(&bitmap->lock, flags); 6185 spin_lock_irqsave(&bitmap->lock, flags);
5989 chunk_kb = bitmap->chunksize >> 10; 6186 chunk_kb = mddev->bitmap_info.chunksize >> 10;
5990 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " 6187 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
5991 "%lu%s chunk", 6188 "%lu%s chunk",
5992 bitmap->pages - bitmap->missing_pages, 6189 bitmap->pages - bitmap->missing_pages,
5993 bitmap->pages, 6190 bitmap->pages,
5994 (bitmap->pages - bitmap->missing_pages) 6191 (bitmap->pages - bitmap->missing_pages)
5995 << (PAGE_SHIFT - 10), 6192 << (PAGE_SHIFT - 10),
5996 chunk_kb ? chunk_kb : bitmap->chunksize, 6193 chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
5997 chunk_kb ? "KB" : "B"); 6194 chunk_kb ? "KB" : "B");
5998 if (bitmap->file) { 6195 if (bitmap->file) {
5999 seq_printf(seq, ", file: "); 6196 seq_printf(seq, ", file: ");
@@ -6279,10 +6476,11 @@ void md_do_sync(mddev_t *mddev)
6279 mddev->curr_resync = 2; 6476 mddev->curr_resync = 2;
6280 6477
6281 try_again: 6478 try_again:
6282 if (kthread_should_stop()) { 6479 if (kthread_should_stop())
6283 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 6480 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6481
6482 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6284 goto skip; 6483 goto skip;
6285 }
6286 for_each_mddev(mddev2, tmp) { 6484 for_each_mddev(mddev2, tmp) {
6287 if (mddev2 == mddev) 6485 if (mddev2 == mddev)
6288 continue; 6486 continue;
@@ -6342,12 +6540,14 @@ void md_do_sync(mddev_t *mddev)
6342 /* recovery follows the physical size of devices */ 6540 /* recovery follows the physical size of devices */
6343 max_sectors = mddev->dev_sectors; 6541 max_sectors = mddev->dev_sectors;
6344 j = MaxSector; 6542 j = MaxSector;
6345 list_for_each_entry(rdev, &mddev->disks, same_set) 6543 rcu_read_lock();
6544 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
6346 if (rdev->raid_disk >= 0 && 6545 if (rdev->raid_disk >= 0 &&
6347 !test_bit(Faulty, &rdev->flags) && 6546 !test_bit(Faulty, &rdev->flags) &&
6348 !test_bit(In_sync, &rdev->flags) && 6547 !test_bit(In_sync, &rdev->flags) &&
6349 rdev->recovery_offset < j) 6548 rdev->recovery_offset < j)
6350 j = rdev->recovery_offset; 6549 j = rdev->recovery_offset;
6550 rcu_read_unlock();
6351 } 6551 }
6352 6552
6353 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev)); 6553 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
@@ -6384,6 +6584,7 @@ void md_do_sync(mddev_t *mddev)
6384 desc, mdname(mddev)); 6584 desc, mdname(mddev));
6385 mddev->curr_resync = j; 6585 mddev->curr_resync = j;
6386 } 6586 }
6587 mddev->curr_resync_completed = mddev->curr_resync;
6387 6588
6388 while (j < max_sectors) { 6589 while (j < max_sectors) {
6389 sector_t sectors; 6590 sector_t sectors;
@@ -6516,22 +6717,29 @@ void md_do_sync(mddev_t *mddev)
6516 } else { 6717 } else {
6517 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 6718 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6518 mddev->curr_resync = MaxSector; 6719 mddev->curr_resync = MaxSector;
6519 list_for_each_entry(rdev, &mddev->disks, same_set) 6720 rcu_read_lock();
6721 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
6520 if (rdev->raid_disk >= 0 && 6722 if (rdev->raid_disk >= 0 &&
6521 !test_bit(Faulty, &rdev->flags) && 6723 !test_bit(Faulty, &rdev->flags) &&
6522 !test_bit(In_sync, &rdev->flags) && 6724 !test_bit(In_sync, &rdev->flags) &&
6523 rdev->recovery_offset < mddev->curr_resync) 6725 rdev->recovery_offset < mddev->curr_resync)
6524 rdev->recovery_offset = mddev->curr_resync; 6726 rdev->recovery_offset = mddev->curr_resync;
6727 rcu_read_unlock();
6525 } 6728 }
6526 } 6729 }
6527 set_bit(MD_CHANGE_DEVS, &mddev->flags); 6730 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6528 6731
6529 skip: 6732 skip:
6733 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
6734 /* We completed so min/max setting can be forgotten if used. */
6735 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6736 mddev->resync_min = 0;
6737 mddev->resync_max = MaxSector;
6738 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6739 mddev->resync_min = mddev->curr_resync_completed;
6530 mddev->curr_resync = 0; 6740 mddev->curr_resync = 0;
6531 mddev->curr_resync_completed = 0;
6532 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 6741 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6533 /* We completed so max setting can be forgotten. */ 6742 mddev->curr_resync_completed = 0;
6534 mddev->resync_max = MaxSector;
6535 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6743 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6536 wake_up(&resync_wait); 6744 wake_up(&resync_wait);
6537 set_bit(MD_RECOVERY_DONE, &mddev->recovery); 6745 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
@@ -6594,6 +6802,7 @@ static int remove_and_add_spares(mddev_t *mddev)
6594 nm, mdname(mddev)); 6802 nm, mdname(mddev));
6595 spares++; 6803 spares++;
6596 md_new_event(mddev); 6804 md_new_event(mddev);
6805 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6597 } else 6806 } else
6598 break; 6807 break;
6599 } 6808 }
@@ -6629,7 +6838,7 @@ void md_check_recovery(mddev_t *mddev)
6629 6838
6630 6839
6631 if (mddev->bitmap) 6840 if (mddev->bitmap)
6632 bitmap_daemon_work(mddev->bitmap); 6841 bitmap_daemon_work(mddev);
6633 6842
6634 if (mddev->ro) 6843 if (mddev->ro)
6635 return; 6844 return;
@@ -6999,5 +7208,6 @@ EXPORT_SYMBOL(md_unregister_thread);
6999EXPORT_SYMBOL(md_wakeup_thread); 7208EXPORT_SYMBOL(md_wakeup_thread);
7000EXPORT_SYMBOL(md_check_recovery); 7209EXPORT_SYMBOL(md_check_recovery);
7001MODULE_LICENSE("GPL"); 7210MODULE_LICENSE("GPL");
7211MODULE_DESCRIPTION("MD RAID framework");
7002MODULE_ALIAS("md"); 7212MODULE_ALIAS("md");
7003MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR); 7213MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);