diff options
-rw-r--r-- | Documentation/md.txt | 37 | ||||
-rw-r--r-- | crypto/xor.c | 2 | ||||
-rw-r--r-- | drivers/md/Kconfig | 31 | ||||
-rw-r--r-- | drivers/md/Makefile | 16 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 49 | ||||
-rw-r--r-- | drivers/md/bitmap.h (renamed from include/linux/raid/bitmap.h) | 0 | ||||
-rw-r--r-- | drivers/md/faulty.c | 19 | ||||
-rw-r--r-- | drivers/md/linear.c | 25 | ||||
-rw-r--r-- | drivers/md/linear.h (renamed from include/linux/raid/linear.h) | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 615 | ||||
-rw-r--r-- | drivers/md/md.h (renamed from include/linux/raid/md_k.h) | 66 | ||||
-rw-r--r-- | drivers/md/mktables.c | 14 | ||||
-rw-r--r-- | drivers/md/multipath.c | 17 | ||||
-rw-r--r-- | drivers/md/multipath.h (renamed from include/linux/raid/multipath.h) | 2 | ||||
-rw-r--r-- | drivers/md/raid0.c | 66 | ||||
-rw-r--r-- | drivers/md/raid0.h (renamed from include/linux/raid/raid0.h) | 2 | ||||
-rw-r--r-- | drivers/md/raid1.c | 35 | ||||
-rw-r--r-- | drivers/md/raid1.h (renamed from include/linux/raid/raid1.h) | 2 | ||||
-rw-r--r-- | drivers/md/raid10.c | 42 | ||||
-rw-r--r-- | drivers/md/raid10.h (renamed from include/linux/raid/raid10.h) | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 1494 | ||||
-rw-r--r-- | drivers/md/raid5.h (renamed from include/linux/raid/raid5.h) | 110 | ||||
-rw-r--r-- | drivers/md/raid6algos.c | 21 | ||||
-rw-r--r-- | drivers/md/raid6altivec.uc | 4 | ||||
-rw-r--r-- | drivers/md/raid6int.uc | 4 | ||||
-rw-r--r-- | drivers/md/raid6mmx.c | 4 | ||||
-rw-r--r-- | drivers/md/raid6recov.c | 13 | ||||
-rw-r--r-- | drivers/md/raid6sse1.c | 4 | ||||
-rw-r--r-- | drivers/md/raid6sse2.c | 4 | ||||
-rw-r--r-- | drivers/md/raid6test/Makefile | 2 | ||||
-rw-r--r-- | drivers/md/raid6test/test.c | 2 | ||||
-rw-r--r-- | drivers/md/raid6x86.h | 2 | ||||
-rw-r--r-- | fs/compat_ioctl.c | 2 | ||||
-rw-r--r-- | include/linux/raid/md.h | 81 | ||||
-rw-r--r-- | include/linux/raid/md_u.h | 35 | ||||
-rw-r--r-- | include/linux/raid/pq.h (renamed from drivers/md/raid6.h) | 28 | ||||
-rw-r--r-- | include/linux/raid/xor.h | 2 | ||||
-rw-r--r-- | init/do_mounts.h | 1 | ||||
-rw-r--r-- | init/do_mounts_md.c | 5 |
39 files changed, 2002 insertions, 860 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt index 1da9d1b1793f..4edd39ec7db9 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -164,15 +164,19 @@ All md devices contain: | |||
164 | raid_disks | 164 | raid_disks |
165 | a text file with a simple number indicating the number of devices | 165 | a text file with a simple number indicating the number of devices |
166 | in a fully functional array. If this is not yet known, the file | 166 | in a fully functional array. If this is not yet known, the file |
167 | will be empty. If an array is being resized (not currently | 167 | will be empty. If an array is being resized this will contain |
168 | possible) this will contain the larger of the old and new sizes. | 168 | the new number of devices. |
169 | Some raid level (RAID1) allow this value to be set while the | 169 | Some raid levels allow this value to be set while the array is |
170 | array is active. This will reconfigure the array. Otherwise | 170 | active. This will reconfigure the array. Otherwise it can only |
171 | it can only be set while assembling an array. | 171 | be set while assembling an array. |
172 | A change to this attribute will not be permitted if it would | ||
173 | reduce the size of the array. To reduce the number of drives | ||
174 | in an e.g. raid5, the array size must first be reduced by | ||
175 | setting the 'array_size' attribute. | ||
172 | 176 | ||
173 | chunk_size | 177 | chunk_size |
174 | This is the size if bytes for 'chunks' and is only relevant to | 178 | This is the size in bytes for 'chunks' and is only relevant to |
175 | raid levels that involve striping (1,4,5,6,10). The address space | 179 | raid levels that involve striping (0,4,5,6,10). The address space |
176 | of the array is conceptually divided into chunks and consecutive | 180 | of the array is conceptually divided into chunks and consecutive |
177 | chunks are striped onto neighbouring devices. | 181 | chunks are striped onto neighbouring devices. |
178 | The size should be at least PAGE_SIZE (4k) and should be a power | 182 | The size should be at least PAGE_SIZE (4k) and should be a power |
@@ -183,6 +187,20 @@ All md devices contain: | |||
183 | simply a number that is interpretted differently by different | 187 | simply a number that is interpretted differently by different |
184 | levels. It can be written while assembling an array. | 188 | levels. It can be written while assembling an array. |
185 | 189 | ||
190 | array_size | ||
191 | This can be used to artificially constrain the available space in | ||
192 | the array to be less than is actually available on the combined | ||
193 | devices. Writing a number (in Kilobytes) which is less than | ||
194 | the available size will set the size. Any reconfiguration of the | ||
195 | array (e.g. adding devices) will not cause the size to change. | ||
196 | Writing the word 'default' will cause the effective size of the | ||
197 | array to be whatever size is actually available based on | ||
198 | 'level', 'chunk_size' and 'component_size'. | ||
199 | |||
200 | This can be used to reduce the size of the array before reducing | ||
201 | the number of devices in a raid4/5/6, or to support external | ||
202 | metadata formats which mandate such clipping. | ||
203 | |||
186 | reshape_position | 204 | reshape_position |
187 | This is either "none" or a sector number within the devices of | 205 | This is either "none" or a sector number within the devices of |
188 | the array where "reshape" is up to. If this is set, the three | 206 | the array where "reshape" is up to. If this is set, the three |
@@ -207,6 +225,11 @@ All md devices contain: | |||
207 | about the array. It can be 0.90 (traditional format), 1.0, 1.1, | 225 | about the array. It can be 0.90 (traditional format), 1.0, 1.1, |
208 | 1.2 (newer format in varying locations) or "none" indicating that | 226 | 1.2 (newer format in varying locations) or "none" indicating that |
209 | the kernel isn't managing metadata at all. | 227 | the kernel isn't managing metadata at all. |
228 | Alternately it can be "external:" followed by a string which | ||
229 | is set by user-space. This indicates that metadata is managed | ||
230 | by a user-space program. Any device failure or other event that | ||
231 | requires a metadata update will cause array activity to be | ||
232 | suspended until the event is acknowledged. | ||
210 | 233 | ||
211 | resync_start | 234 | resync_start |
212 | The point at which resync should start. If no resync is needed, | 235 | The point at which resync should start. If no resync is needed, |
diff --git a/crypto/xor.c b/crypto/xor.c index b2e6db075e49..996b6ee57d9e 100644 --- a/crypto/xor.c +++ b/crypto/xor.c | |||
@@ -18,8 +18,8 @@ | |||
18 | 18 | ||
19 | #define BH_TRACE 0 | 19 | #define BH_TRACE 0 |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/raid/md.h> | ||
22 | #include <linux/raid/xor.h> | 21 | #include <linux/raid/xor.h> |
22 | #include <linux/jiffies.h> | ||
23 | #include <asm/xor.h> | 23 | #include <asm/xor.h> |
24 | 24 | ||
25 | /* The xor routines to use. */ | 25 | /* The xor routines to use. */ |
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 2281b5098e95..36e0675be9f7 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -121,6 +121,7 @@ config MD_RAID10 | |||
121 | config MD_RAID456 | 121 | config MD_RAID456 |
122 | tristate "RAID-4/RAID-5/RAID-6 mode" | 122 | tristate "RAID-4/RAID-5/RAID-6 mode" |
123 | depends on BLK_DEV_MD | 123 | depends on BLK_DEV_MD |
124 | select MD_RAID6_PQ | ||
124 | select ASYNC_MEMCPY | 125 | select ASYNC_MEMCPY |
125 | select ASYNC_XOR | 126 | select ASYNC_XOR |
126 | ---help--- | 127 | ---help--- |
@@ -151,34 +152,8 @@ config MD_RAID456 | |||
151 | 152 | ||
152 | If unsure, say Y. | 153 | If unsure, say Y. |
153 | 154 | ||
154 | config MD_RAID5_RESHAPE | 155 | config MD_RAID6_PQ |
155 | bool "Support adding drives to a raid-5 array" | 156 | tristate |
156 | depends on MD_RAID456 | ||
157 | default y | ||
158 | ---help--- | ||
159 | A RAID-5 set can be expanded by adding extra drives. This | ||
160 | requires "restriping" the array which means (almost) every | ||
161 | block must be written to a different place. | ||
162 | |||
163 | This option allows such restriping to be done while the array | ||
164 | is online. | ||
165 | |||
166 | You will need mdadm version 2.4.1 or later to use this | ||
167 | feature safely. During the early stage of reshape there is | ||
168 | a critical section where live data is being over-written. A | ||
169 | crash during this time needs extra care for recovery. The | ||
170 | newer mdadm takes a copy of the data in the critical section | ||
171 | and will restore it, if necessary, after a crash. | ||
172 | |||
173 | The mdadm usage is e.g. | ||
174 | mdadm --grow /dev/md1 --raid-disks=6 | ||
175 | to grow '/dev/md1' to having 6 disks. | ||
176 | |||
177 | Note: The array can only be expanded, not contracted. | ||
178 | There should be enough spares already present to make the new | ||
179 | array workable. | ||
180 | |||
181 | If unsure, say Y. | ||
182 | 157 | ||
183 | config MD_MULTIPATH | 158 | config MD_MULTIPATH |
184 | tristate "Multipath I/O support" | 159 | tristate "Multipath I/O support" |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 72880b7e28d9..45cc5951d928 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -2,20 +2,21 @@ | |||
2 | # Makefile for the kernel software RAID and LVM drivers. | 2 | # Makefile for the kernel software RAID and LVM drivers. |
3 | # | 3 | # |
4 | 4 | ||
5 | dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ | 5 | dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ |
6 | dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o | 6 | dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o |
7 | dm-multipath-objs := dm-path-selector.o dm-mpath.o | 7 | dm-multipath-y += dm-path-selector.o dm-mpath.o |
8 | dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ | 8 | dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ |
9 | dm-snap-persistent.o | 9 | dm-snap-persistent.o |
10 | dm-mirror-objs := dm-raid1.o | 10 | dm-mirror-y += dm-raid1.o |
11 | md-mod-objs := md.o bitmap.o | 11 | md-mod-y += md.o bitmap.o |
12 | raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ | 12 | raid456-y += raid5.o |
13 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | ||
13 | raid6int1.o raid6int2.o raid6int4.o \ | 14 | raid6int1.o raid6int2.o raid6int4.o \ |
14 | raid6int8.o raid6int16.o raid6int32.o \ | 15 | raid6int8.o raid6int16.o raid6int32.o \ |
15 | raid6altivec1.o raid6altivec2.o raid6altivec4.o \ | 16 | raid6altivec1.o raid6altivec2.o raid6altivec4.o \ |
16 | raid6altivec8.o \ | 17 | raid6altivec8.o \ |
17 | raid6mmx.o raid6sse1.o raid6sse2.o | 18 | raid6mmx.o raid6sse1.o raid6sse2.o |
18 | hostprogs-y := mktables | 19 | hostprogs-y += mktables |
19 | 20 | ||
20 | # Note: link order is important. All raid personalities | 21 | # Note: link order is important. All raid personalities |
21 | # and must come before md.o, as they each initialise | 22 | # and must come before md.o, as they each initialise |
@@ -26,6 +27,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o | |||
26 | obj-$(CONFIG_MD_RAID0) += raid0.o | 27 | obj-$(CONFIG_MD_RAID0) += raid0.o |
27 | obj-$(CONFIG_MD_RAID1) += raid1.o | 28 | obj-$(CONFIG_MD_RAID1) += raid1.o |
28 | obj-$(CONFIG_MD_RAID10) += raid10.o | 29 | obj-$(CONFIG_MD_RAID10) += raid10.o |
30 | obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o | ||
29 | obj-$(CONFIG_MD_RAID456) += raid456.o | 31 | obj-$(CONFIG_MD_RAID456) += raid456.o |
30 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o | 32 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o |
31 | obj-$(CONFIG_MD_FAULTY) += faulty.o | 33 | obj-$(CONFIG_MD_FAULTY) += faulty.o |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 719943763391..f8a9f7ab2cb8 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -16,6 +16,7 @@ | |||
16 | * wait if count gets too high, wake when it drops to half. | 16 | * wait if count gets too high, wake when it drops to half. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/blkdev.h> | ||
19 | #include <linux/module.h> | 20 | #include <linux/module.h> |
20 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
21 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
@@ -26,8 +27,8 @@ | |||
26 | #include <linux/file.h> | 27 | #include <linux/file.h> |
27 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
28 | #include <linux/buffer_head.h> | 29 | #include <linux/buffer_head.h> |
29 | #include <linux/raid/md.h> | 30 | #include "md.h" |
30 | #include <linux/raid/bitmap.h> | 31 | #include "bitmap.h" |
31 | 32 | ||
32 | /* debug macros */ | 33 | /* debug macros */ |
33 | 34 | ||
@@ -111,9 +112,10 @@ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int creat | |||
111 | unsigned char *mappage; | 112 | unsigned char *mappage; |
112 | 113 | ||
113 | if (page >= bitmap->pages) { | 114 | if (page >= bitmap->pages) { |
114 | printk(KERN_ALERT | 115 | /* This can happen if bitmap_start_sync goes beyond |
115 | "%s: invalid bitmap page request: %lu (> %lu)\n", | 116 | * End-of-device while looking for a whole page. |
116 | bmname(bitmap), page, bitmap->pages-1); | 117 | * It is harmless. |
118 | */ | ||
117 | return -EINVAL; | 119 | return -EINVAL; |
118 | } | 120 | } |
119 | 121 | ||
@@ -265,7 +267,6 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | |||
265 | list_for_each_continue_rcu(pos, &mddev->disks) { | 267 | list_for_each_continue_rcu(pos, &mddev->disks) { |
266 | rdev = list_entry(pos, mdk_rdev_t, same_set); | 268 | rdev = list_entry(pos, mdk_rdev_t, same_set); |
267 | if (rdev->raid_disk >= 0 && | 269 | if (rdev->raid_disk >= 0 && |
268 | test_bit(In_sync, &rdev->flags) && | ||
269 | !test_bit(Faulty, &rdev->flags)) { | 270 | !test_bit(Faulty, &rdev->flags)) { |
270 | /* this is a usable devices */ | 271 | /* this is a usable devices */ |
271 | atomic_inc(&rdev->nr_pending); | 272 | atomic_inc(&rdev->nr_pending); |
@@ -297,7 +298,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
297 | + size/512 > 0) | 298 | + size/512 > 0) |
298 | /* bitmap runs in to metadata */ | 299 | /* bitmap runs in to metadata */ |
299 | goto bad_alignment; | 300 | goto bad_alignment; |
300 | if (rdev->data_offset + mddev->size*2 | 301 | if (rdev->data_offset + mddev->dev_sectors |
301 | > rdev->sb_start + bitmap->offset) | 302 | > rdev->sb_start + bitmap->offset) |
302 | /* data runs in to bitmap */ | 303 | /* data runs in to bitmap */ |
303 | goto bad_alignment; | 304 | goto bad_alignment; |
@@ -570,7 +571,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
570 | else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || | 571 | else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || |
571 | le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) | 572 | le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) |
572 | reason = "unrecognized superblock version"; | 573 | reason = "unrecognized superblock version"; |
573 | else if (chunksize < PAGE_SIZE) | 574 | else if (chunksize < 512) |
574 | reason = "bitmap chunksize too small"; | 575 | reason = "bitmap chunksize too small"; |
575 | else if ((1 << ffz(~chunksize)) != chunksize) | 576 | else if ((1 << ffz(~chunksize)) != chunksize) |
576 | reason = "bitmap chunksize not a power of 2"; | 577 | reason = "bitmap chunksize not a power of 2"; |
@@ -1306,6 +1307,9 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1306 | PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", | 1307 | PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", |
1307 | atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); | 1308 | atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); |
1308 | } | 1309 | } |
1310 | if (bitmap->mddev->degraded) | ||
1311 | /* Never clear bits or update events_cleared when degraded */ | ||
1312 | success = 0; | ||
1309 | 1313 | ||
1310 | while (sectors) { | 1314 | while (sectors) { |
1311 | int blocks; | 1315 | int blocks; |
@@ -1345,8 +1349,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1345 | } | 1349 | } |
1346 | } | 1350 | } |
1347 | 1351 | ||
1348 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | 1352 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, |
1349 | int degraded) | 1353 | int degraded) |
1350 | { | 1354 | { |
1351 | bitmap_counter_t *bmc; | 1355 | bitmap_counter_t *bmc; |
1352 | int rv; | 1356 | int rv; |
@@ -1374,6 +1378,29 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | |||
1374 | return rv; | 1378 | return rv; |
1375 | } | 1379 | } |
1376 | 1380 | ||
1381 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | ||
1382 | int degraded) | ||
1383 | { | ||
1384 | /* bitmap_start_sync must always report on multiples of whole | ||
1385 | * pages, otherwise resync (which is very PAGE_SIZE based) will | ||
1386 | * get confused. | ||
1387 | * So call __bitmap_start_sync repeatedly (if needed) until | ||
1388 | * At least PAGE_SIZE>>9 blocks are covered. | ||
1389 | * Return the 'or' of the result. | ||
1390 | */ | ||
1391 | int rv = 0; | ||
1392 | int blocks1; | ||
1393 | |||
1394 | *blocks = 0; | ||
1395 | while (*blocks < (PAGE_SIZE>>9)) { | ||
1396 | rv |= __bitmap_start_sync(bitmap, offset, | ||
1397 | &blocks1, degraded); | ||
1398 | offset += blocks1; | ||
1399 | *blocks += blocks1; | ||
1400 | } | ||
1401 | return rv; | ||
1402 | } | ||
1403 | |||
1377 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) | 1404 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) |
1378 | { | 1405 | { |
1379 | bitmap_counter_t *bmc; | 1406 | bitmap_counter_t *bmc; |
@@ -1443,6 +1470,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
1443 | wait_event(bitmap->mddev->recovery_wait, | 1470 | wait_event(bitmap->mddev->recovery_wait, |
1444 | atomic_read(&bitmap->mddev->recovery_active) == 0); | 1471 | atomic_read(&bitmap->mddev->recovery_active) == 0); |
1445 | 1472 | ||
1473 | bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; | ||
1474 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | ||
1446 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); | 1475 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); |
1447 | s = 0; | 1476 | s = 0; |
1448 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { | 1477 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { |
diff --git a/include/linux/raid/bitmap.h b/drivers/md/bitmap.h index e98900671ca9..e98900671ca9 100644 --- a/include/linux/raid/bitmap.h +++ b/drivers/md/bitmap.h | |||
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 86d9adf90e79..8695809b24b0 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -62,7 +62,10 @@ | |||
62 | #define ModeShift 5 | 62 | #define ModeShift 5 |
63 | 63 | ||
64 | #define MaxFault 50 | 64 | #define MaxFault 50 |
65 | #include <linux/raid/md.h> | 65 | #include <linux/blkdev.h> |
66 | #include <linux/raid/md_u.h> | ||
67 | #include "md.h" | ||
68 | #include <linux/seq_file.h> | ||
66 | 69 | ||
67 | 70 | ||
68 | static void faulty_fail(struct bio *bio, int error) | 71 | static void faulty_fail(struct bio *bio, int error) |
@@ -280,6 +283,17 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size) | |||
280 | return 0; | 283 | return 0; |
281 | } | 284 | } |
282 | 285 | ||
286 | static sector_t faulty_size(mddev_t *mddev, sector_t sectors, int raid_disks) | ||
287 | { | ||
288 | WARN_ONCE(raid_disks, | ||
289 | "%s does not support generic reshape\n", __func__); | ||
290 | |||
291 | if (sectors == 0) | ||
292 | return mddev->dev_sectors; | ||
293 | |||
294 | return sectors; | ||
295 | } | ||
296 | |||
283 | static int run(mddev_t *mddev) | 297 | static int run(mddev_t *mddev) |
284 | { | 298 | { |
285 | mdk_rdev_t *rdev; | 299 | mdk_rdev_t *rdev; |
@@ -298,7 +312,7 @@ static int run(mddev_t *mddev) | |||
298 | list_for_each_entry(rdev, &mddev->disks, same_set) | 312 | list_for_each_entry(rdev, &mddev->disks, same_set) |
299 | conf->rdev = rdev; | 313 | conf->rdev = rdev; |
300 | 314 | ||
301 | mddev->array_sectors = mddev->size * 2; | 315 | md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); |
302 | mddev->private = conf; | 316 | mddev->private = conf; |
303 | 317 | ||
304 | reconfig(mddev, mddev->layout, -1); | 318 | reconfig(mddev, mddev->layout, -1); |
@@ -325,6 +339,7 @@ static struct mdk_personality faulty_personality = | |||
325 | .stop = stop, | 339 | .stop = stop, |
326 | .status = status, | 340 | .status = status, |
327 | .reconfig = reconfig, | 341 | .reconfig = reconfig, |
342 | .size = faulty_size, | ||
328 | }; | 343 | }; |
329 | 344 | ||
330 | static int __init raid_init(void) | 345 | static int __init raid_init(void) |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 09658b218474..7a36e38393a1 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -16,7 +16,11 @@ | |||
16 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 16 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/raid/linear.h> | 19 | #include <linux/blkdev.h> |
20 | #include <linux/raid/md_u.h> | ||
21 | #include <linux/seq_file.h> | ||
22 | #include "md.h" | ||
23 | #include "linear.h" | ||
20 | 24 | ||
21 | /* | 25 | /* |
22 | * find which device holds a particular offset | 26 | * find which device holds a particular offset |
@@ -97,6 +101,16 @@ static int linear_congested(void *data, int bits) | |||
97 | return ret; | 101 | return ret; |
98 | } | 102 | } |
99 | 103 | ||
104 | static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) | ||
105 | { | ||
106 | linear_conf_t *conf = mddev_to_conf(mddev); | ||
107 | |||
108 | WARN_ONCE(sectors || raid_disks, | ||
109 | "%s does not support generic reshape\n", __func__); | ||
110 | |||
111 | return conf->array_sectors; | ||
112 | } | ||
113 | |||
100 | static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | 114 | static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) |
101 | { | 115 | { |
102 | linear_conf_t *conf; | 116 | linear_conf_t *conf; |
@@ -135,8 +149,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
135 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) | 149 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) |
136 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | 150 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); |
137 | 151 | ||
138 | disk->num_sectors = rdev->size * 2; | 152 | disk->num_sectors = rdev->sectors; |
139 | conf->array_sectors += rdev->size * 2; | 153 | conf->array_sectors += rdev->sectors; |
140 | 154 | ||
141 | cnt++; | 155 | cnt++; |
142 | } | 156 | } |
@@ -249,7 +263,7 @@ static int linear_run (mddev_t *mddev) | |||
249 | if (!conf) | 263 | if (!conf) |
250 | return 1; | 264 | return 1; |
251 | mddev->private = conf; | 265 | mddev->private = conf; |
252 | mddev->array_sectors = conf->array_sectors; | 266 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
253 | 267 | ||
254 | blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); | 268 | blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); |
255 | mddev->queue->unplug_fn = linear_unplug; | 269 | mddev->queue->unplug_fn = linear_unplug; |
@@ -283,7 +297,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | |||
283 | newconf->prev = mddev_to_conf(mddev); | 297 | newconf->prev = mddev_to_conf(mddev); |
284 | mddev->private = newconf; | 298 | mddev->private = newconf; |
285 | mddev->raid_disks++; | 299 | mddev->raid_disks++; |
286 | mddev->array_sectors = newconf->array_sectors; | 300 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
287 | set_capacity(mddev->gendisk, mddev->array_sectors); | 301 | set_capacity(mddev->gendisk, mddev->array_sectors); |
288 | return 0; | 302 | return 0; |
289 | } | 303 | } |
@@ -381,6 +395,7 @@ static struct mdk_personality linear_personality = | |||
381 | .stop = linear_stop, | 395 | .stop = linear_stop, |
382 | .status = linear_status, | 396 | .status = linear_status, |
383 | .hot_add_disk = linear_add, | 397 | .hot_add_disk = linear_add, |
398 | .size = linear_size, | ||
384 | }; | 399 | }; |
385 | 400 | ||
386 | static int __init linear_init (void) | 401 | static int __init linear_init (void) |
diff --git a/include/linux/raid/linear.h b/drivers/md/linear.h index f38b9c586afb..bf8179587f95 100644 --- a/include/linux/raid/linear.h +++ b/drivers/md/linear.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _LINEAR_H | 1 | #ifndef _LINEAR_H |
2 | #define _LINEAR_H | 2 | #define _LINEAR_H |
3 | 3 | ||
4 | #include <linux/raid/md.h> | ||
5 | |||
6 | struct dev_info { | 4 | struct dev_info { |
7 | mdk_rdev_t *rdev; | 5 | mdk_rdev_t *rdev; |
8 | sector_t num_sectors; | 6 | sector_t num_sectors; |
diff --git a/drivers/md/md.c b/drivers/md/md.c index a307f87eb90e..ed5727c089a9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -33,9 +33,9 @@ | |||
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/kthread.h> | 35 | #include <linux/kthread.h> |
36 | #include <linux/raid/md.h> | 36 | #include <linux/blkdev.h> |
37 | #include <linux/raid/bitmap.h> | ||
38 | #include <linux/sysctl.h> | 37 | #include <linux/sysctl.h> |
38 | #include <linux/seq_file.h> | ||
39 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 39 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
40 | #include <linux/poll.h> | 40 | #include <linux/poll.h> |
41 | #include <linux/ctype.h> | 41 | #include <linux/ctype.h> |
@@ -45,11 +45,10 @@ | |||
45 | #include <linux/reboot.h> | 45 | #include <linux/reboot.h> |
46 | #include <linux/file.h> | 46 | #include <linux/file.h> |
47 | #include <linux/delay.h> | 47 | #include <linux/delay.h> |
48 | 48 | #include <linux/raid/md_p.h> | |
49 | #define MAJOR_NR MD_MAJOR | 49 | #include <linux/raid/md_u.h> |
50 | 50 | #include "md.h" | |
51 | /* 63 partitions with the alternate major number (mdp) */ | 51 | #include "bitmap.h" |
52 | #define MdpMinorShift 6 | ||
53 | 52 | ||
54 | #define DEBUG 0 | 53 | #define DEBUG 0 |
55 | #define dprintk(x...) ((void)(DEBUG && printk(x))) | 54 | #define dprintk(x...) ((void)(DEBUG && printk(x))) |
@@ -202,12 +201,68 @@ static DEFINE_SPINLOCK(all_mddevs_lock); | |||
202 | ) | 201 | ) |
203 | 202 | ||
204 | 203 | ||
205 | static int md_fail_request(struct request_queue *q, struct bio *bio) | 204 | /* Rather than calling directly into the personality make_request function, |
205 | * IO requests come here first so that we can check if the device is | ||
206 | * being suspended pending a reconfiguration. | ||
207 | * We hold a refcount over the call to ->make_request. By the time that | ||
208 | * call has finished, the bio has been linked into some internal structure | ||
209 | * and so is visible to ->quiesce(), so we don't need the refcount any more. | ||
210 | */ | ||
211 | static int md_make_request(struct request_queue *q, struct bio *bio) | ||
206 | { | 212 | { |
207 | bio_io_error(bio); | 213 | mddev_t *mddev = q->queuedata; |
208 | return 0; | 214 | int rv; |
215 | if (mddev == NULL || mddev->pers == NULL) { | ||
216 | bio_io_error(bio); | ||
217 | return 0; | ||
218 | } | ||
219 | rcu_read_lock(); | ||
220 | if (mddev->suspended) { | ||
221 | DEFINE_WAIT(__wait); | ||
222 | for (;;) { | ||
223 | prepare_to_wait(&mddev->sb_wait, &__wait, | ||
224 | TASK_UNINTERRUPTIBLE); | ||
225 | if (!mddev->suspended) | ||
226 | break; | ||
227 | rcu_read_unlock(); | ||
228 | schedule(); | ||
229 | rcu_read_lock(); | ||
230 | } | ||
231 | finish_wait(&mddev->sb_wait, &__wait); | ||
232 | } | ||
233 | atomic_inc(&mddev->active_io); | ||
234 | rcu_read_unlock(); | ||
235 | rv = mddev->pers->make_request(q, bio); | ||
236 | if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) | ||
237 | wake_up(&mddev->sb_wait); | ||
238 | |||
239 | return rv; | ||
240 | } | ||
241 | |||
242 | static void mddev_suspend(mddev_t *mddev) | ||
243 | { | ||
244 | BUG_ON(mddev->suspended); | ||
245 | mddev->suspended = 1; | ||
246 | synchronize_rcu(); | ||
247 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); | ||
248 | mddev->pers->quiesce(mddev, 1); | ||
249 | md_unregister_thread(mddev->thread); | ||
250 | mddev->thread = NULL; | ||
251 | /* we now know that no code is executing in the personality module, | ||
252 | * except possibly the tail end of a ->bi_end_io function, but that | ||
253 | * is certain to complete before the module has a chance to get | ||
254 | * unloaded | ||
255 | */ | ||
256 | } | ||
257 | |||
258 | static void mddev_resume(mddev_t *mddev) | ||
259 | { | ||
260 | mddev->suspended = 0; | ||
261 | wake_up(&mddev->sb_wait); | ||
262 | mddev->pers->quiesce(mddev, 0); | ||
209 | } | 263 | } |
210 | 264 | ||
265 | |||
211 | static inline mddev_t *mddev_get(mddev_t *mddev) | 266 | static inline mddev_t *mddev_get(mddev_t *mddev) |
212 | { | 267 | { |
213 | atomic_inc(&mddev->active); | 268 | atomic_inc(&mddev->active); |
@@ -310,6 +365,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
310 | init_timer(&new->safemode_timer); | 365 | init_timer(&new->safemode_timer); |
311 | atomic_set(&new->active, 1); | 366 | atomic_set(&new->active, 1); |
312 | atomic_set(&new->openers, 0); | 367 | atomic_set(&new->openers, 0); |
368 | atomic_set(&new->active_io, 0); | ||
313 | spin_lock_init(&new->write_lock); | 369 | spin_lock_init(&new->write_lock); |
314 | init_waitqueue_head(&new->sb_wait); | 370 | init_waitqueue_head(&new->sb_wait); |
315 | init_waitqueue_head(&new->recovery_wait); | 371 | init_waitqueue_head(&new->recovery_wait); |
@@ -326,6 +382,11 @@ static inline int mddev_lock(mddev_t * mddev) | |||
326 | return mutex_lock_interruptible(&mddev->reconfig_mutex); | 382 | return mutex_lock_interruptible(&mddev->reconfig_mutex); |
327 | } | 383 | } |
328 | 384 | ||
385 | static inline int mddev_is_locked(mddev_t *mddev) | ||
386 | { | ||
387 | return mutex_is_locked(&mddev->reconfig_mutex); | ||
388 | } | ||
389 | |||
329 | static inline int mddev_trylock(mddev_t * mddev) | 390 | static inline int mddev_trylock(mddev_t * mddev) |
330 | { | 391 | { |
331 | return mutex_trylock(&mddev->reconfig_mutex); | 392 | return mutex_trylock(&mddev->reconfig_mutex); |
@@ -409,7 +470,7 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
409 | rdev->sb_loaded = 0; | 470 | rdev->sb_loaded = 0; |
410 | rdev->sb_page = NULL; | 471 | rdev->sb_page = NULL; |
411 | rdev->sb_start = 0; | 472 | rdev->sb_start = 0; |
412 | rdev->size = 0; | 473 | rdev->sectors = 0; |
413 | } | 474 | } |
414 | } | 475 | } |
415 | 476 | ||
@@ -775,9 +836,9 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
775 | else | 836 | else |
776 | ret = 0; | 837 | ret = 0; |
777 | } | 838 | } |
778 | rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2; | 839 | rdev->sectors = calc_num_sectors(rdev, sb->chunk_size); |
779 | 840 | ||
780 | if (rdev->size < sb->size && sb->level > 1) | 841 | if (rdev->sectors < sb->size * 2 && sb->level > 1) |
781 | /* "this cannot possibly happen" ... */ | 842 | /* "this cannot possibly happen" ... */ |
782 | ret = -EINVAL; | 843 | ret = -EINVAL; |
783 | 844 | ||
@@ -812,7 +873,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
812 | mddev->clevel[0] = 0; | 873 | mddev->clevel[0] = 0; |
813 | mddev->layout = sb->layout; | 874 | mddev->layout = sb->layout; |
814 | mddev->raid_disks = sb->raid_disks; | 875 | mddev->raid_disks = sb->raid_disks; |
815 | mddev->size = sb->size; | 876 | mddev->dev_sectors = sb->size * 2; |
816 | mddev->events = ev1; | 877 | mddev->events = ev1; |
817 | mddev->bitmap_offset = 0; | 878 | mddev->bitmap_offset = 0; |
818 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 879 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
@@ -926,7 +987,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
926 | 987 | ||
927 | sb->ctime = mddev->ctime; | 988 | sb->ctime = mddev->ctime; |
928 | sb->level = mddev->level; | 989 | sb->level = mddev->level; |
929 | sb->size = mddev->size; | 990 | sb->size = mddev->dev_sectors / 2; |
930 | sb->raid_disks = mddev->raid_disks; | 991 | sb->raid_disks = mddev->raid_disks; |
931 | sb->md_minor = mddev->md_minor; | 992 | sb->md_minor = mddev->md_minor; |
932 | sb->not_persistent = 0; | 993 | sb->not_persistent = 0; |
@@ -1024,7 +1085,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1024 | static unsigned long long | 1085 | static unsigned long long |
1025 | super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | 1086 | super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) |
1026 | { | 1087 | { |
1027 | if (num_sectors && num_sectors < rdev->mddev->size * 2) | 1088 | if (num_sectors && num_sectors < rdev->mddev->dev_sectors) |
1028 | return 0; /* component must fit device */ | 1089 | return 0; /* component must fit device */ |
1029 | if (rdev->mddev->bitmap_offset) | 1090 | if (rdev->mddev->bitmap_offset) |
1030 | return 0; /* can't move bitmap */ | 1091 | return 0; /* can't move bitmap */ |
@@ -1180,16 +1241,17 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1180 | ret = 0; | 1241 | ret = 0; |
1181 | } | 1242 | } |
1182 | if (minor_version) | 1243 | if (minor_version) |
1183 | rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; | 1244 | rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) - |
1245 | le64_to_cpu(sb->data_offset); | ||
1184 | else | 1246 | else |
1185 | rdev->size = rdev->sb_start / 2; | 1247 | rdev->sectors = rdev->sb_start; |
1186 | if (rdev->size < le64_to_cpu(sb->data_size)/2) | 1248 | if (rdev->sectors < le64_to_cpu(sb->data_size)) |
1187 | return -EINVAL; | 1249 | return -EINVAL; |
1188 | rdev->size = le64_to_cpu(sb->data_size)/2; | 1250 | rdev->sectors = le64_to_cpu(sb->data_size); |
1189 | if (le32_to_cpu(sb->chunksize)) | 1251 | if (le32_to_cpu(sb->chunksize)) |
1190 | rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1); | 1252 | rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1); |
1191 | 1253 | ||
1192 | if (le64_to_cpu(sb->size) > rdev->size*2) | 1254 | if (le64_to_cpu(sb->size) > rdev->sectors) |
1193 | return -EINVAL; | 1255 | return -EINVAL; |
1194 | return ret; | 1256 | return ret; |
1195 | } | 1257 | } |
@@ -1216,7 +1278,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1216 | mddev->clevel[0] = 0; | 1278 | mddev->clevel[0] = 0; |
1217 | mddev->layout = le32_to_cpu(sb->layout); | 1279 | mddev->layout = le32_to_cpu(sb->layout); |
1218 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); | 1280 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); |
1219 | mddev->size = le64_to_cpu(sb->size)/2; | 1281 | mddev->dev_sectors = le64_to_cpu(sb->size); |
1220 | mddev->events = ev1; | 1282 | mddev->events = ev1; |
1221 | mddev->bitmap_offset = 0; | 1283 | mddev->bitmap_offset = 0; |
1222 | mddev->default_bitmap_offset = 1024 >> 9; | 1284 | mddev->default_bitmap_offset = 1024 >> 9; |
@@ -1312,7 +1374,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1312 | sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors)); | 1374 | sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors)); |
1313 | 1375 | ||
1314 | sb->raid_disks = cpu_to_le32(mddev->raid_disks); | 1376 | sb->raid_disks = cpu_to_le32(mddev->raid_disks); |
1315 | sb->size = cpu_to_le64(mddev->size<<1); | 1377 | sb->size = cpu_to_le64(mddev->dev_sectors); |
1316 | 1378 | ||
1317 | if (mddev->bitmap && mddev->bitmap_file == NULL) { | 1379 | if (mddev->bitmap && mddev->bitmap_file == NULL) { |
1318 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1380 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
@@ -1320,10 +1382,15 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1320 | } | 1382 | } |
1321 | 1383 | ||
1322 | if (rdev->raid_disk >= 0 && | 1384 | if (rdev->raid_disk >= 0 && |
1323 | !test_bit(In_sync, &rdev->flags) && | 1385 | !test_bit(In_sync, &rdev->flags)) { |
1324 | rdev->recovery_offset > 0) { | 1386 | if (mddev->curr_resync_completed > rdev->recovery_offset) |
1325 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | 1387 | rdev->recovery_offset = mddev->curr_resync_completed; |
1326 | sb->recovery_offset = cpu_to_le64(rdev->recovery_offset); | 1388 | if (rdev->recovery_offset > 0) { |
1389 | sb->feature_map |= | ||
1390 | cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | ||
1391 | sb->recovery_offset = | ||
1392 | cpu_to_le64(rdev->recovery_offset); | ||
1393 | } | ||
1327 | } | 1394 | } |
1328 | 1395 | ||
1329 | if (mddev->reshape_position != MaxSector) { | 1396 | if (mddev->reshape_position != MaxSector) { |
@@ -1365,7 +1432,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | |||
1365 | { | 1432 | { |
1366 | struct mdp_superblock_1 *sb; | 1433 | struct mdp_superblock_1 *sb; |
1367 | sector_t max_sectors; | 1434 | sector_t max_sectors; |
1368 | if (num_sectors && num_sectors < rdev->mddev->size * 2) | 1435 | if (num_sectors && num_sectors < rdev->mddev->dev_sectors) |
1369 | return 0; /* component must fit device */ | 1436 | return 0; /* component must fit device */ |
1370 | if (rdev->sb_start < rdev->data_offset) { | 1437 | if (rdev->sb_start < rdev->data_offset) { |
1371 | /* minor versions 1 and 2; superblock before data */ | 1438 | /* minor versions 1 and 2; superblock before data */ |
@@ -1381,7 +1448,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | |||
1381 | sector_t sb_start; | 1448 | sector_t sb_start; |
1382 | sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; | 1449 | sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; |
1383 | sb_start &= ~(sector_t)(4*2 - 1); | 1450 | sb_start &= ~(sector_t)(4*2 - 1); |
1384 | max_sectors = rdev->size * 2 + sb_start - rdev->sb_start; | 1451 | max_sectors = rdev->sectors + sb_start - rdev->sb_start; |
1385 | if (!num_sectors || num_sectors > max_sectors) | 1452 | if (!num_sectors || num_sectors > max_sectors) |
1386 | num_sectors = max_sectors; | 1453 | num_sectors = max_sectors; |
1387 | rdev->sb_start = sb_start; | 1454 | rdev->sb_start = sb_start; |
@@ -1433,6 +1500,38 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
1433 | 1500 | ||
1434 | static LIST_HEAD(pending_raid_disks); | 1501 | static LIST_HEAD(pending_raid_disks); |
1435 | 1502 | ||
1503 | static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) | ||
1504 | { | ||
1505 | struct mdk_personality *pers = mddev->pers; | ||
1506 | struct gendisk *disk = mddev->gendisk; | ||
1507 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | ||
1508 | struct blk_integrity *bi_mddev = blk_get_integrity(disk); | ||
1509 | |||
1510 | /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ | ||
1511 | if (pers && pers->level >= 4 && pers->level <= 6) | ||
1512 | return; | ||
1513 | |||
1514 | /* If rdev is integrity capable, register profile for mddev */ | ||
1515 | if (!bi_mddev && bi_rdev) { | ||
1516 | if (blk_integrity_register(disk, bi_rdev)) | ||
1517 | printk(KERN_ERR "%s: %s Could not register integrity!\n", | ||
1518 | __func__, disk->disk_name); | ||
1519 | else | ||
1520 | printk(KERN_NOTICE "Enabling data integrity on %s\n", | ||
1521 | disk->disk_name); | ||
1522 | return; | ||
1523 | } | ||
1524 | |||
1525 | /* Check that mddev and rdev have matching profiles */ | ||
1526 | if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { | ||
1527 | printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, | ||
1528 | disk->disk_name, rdev->bdev->bd_disk->disk_name); | ||
1529 | printk(KERN_NOTICE "Disabling data integrity on %s\n", | ||
1530 | disk->disk_name); | ||
1531 | blk_integrity_unregister(disk); | ||
1532 | } | ||
1533 | } | ||
1534 | |||
1436 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | 1535 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) |
1437 | { | 1536 | { |
1438 | char b[BDEVNAME_SIZE]; | 1537 | char b[BDEVNAME_SIZE]; |
@@ -1449,8 +1548,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1449 | if (find_rdev(mddev, rdev->bdev->bd_dev)) | 1548 | if (find_rdev(mddev, rdev->bdev->bd_dev)) |
1450 | return -EEXIST; | 1549 | return -EEXIST; |
1451 | 1550 | ||
1452 | /* make sure rdev->size exceeds mddev->size */ | 1551 | /* make sure rdev->sectors exceeds mddev->dev_sectors */ |
1453 | if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) { | 1552 | if (rdev->sectors && (mddev->dev_sectors == 0 || |
1553 | rdev->sectors < mddev->dev_sectors)) { | ||
1454 | if (mddev->pers) { | 1554 | if (mddev->pers) { |
1455 | /* Cannot change size, so fail | 1555 | /* Cannot change size, so fail |
1456 | * If mddev->level <= 0, then we don't care | 1556 | * If mddev->level <= 0, then we don't care |
@@ -1459,7 +1559,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1459 | if (mddev->level > 0) | 1559 | if (mddev->level > 0) |
1460 | return -ENOSPC; | 1560 | return -ENOSPC; |
1461 | } else | 1561 | } else |
1462 | mddev->size = rdev->size; | 1562 | mddev->dev_sectors = rdev->sectors; |
1463 | } | 1563 | } |
1464 | 1564 | ||
1465 | /* Verify rdev->desc_nr is unique. | 1565 | /* Verify rdev->desc_nr is unique. |
@@ -1503,6 +1603,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1503 | 1603 | ||
1504 | /* May as well allow recovery to be retried once */ | 1604 | /* May as well allow recovery to be retried once */ |
1505 | mddev->recovery_disabled = 0; | 1605 | mddev->recovery_disabled = 0; |
1606 | |||
1607 | md_integrity_check(rdev, mddev); | ||
1506 | return 0; | 1608 | return 0; |
1507 | 1609 | ||
1508 | fail: | 1610 | fail: |
@@ -1713,8 +1815,8 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
1713 | static void print_rdev(mdk_rdev_t *rdev, int major_version) | 1815 | static void print_rdev(mdk_rdev_t *rdev, int major_version) |
1714 | { | 1816 | { |
1715 | char b[BDEVNAME_SIZE]; | 1817 | char b[BDEVNAME_SIZE]; |
1716 | printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", | 1818 | printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n", |
1717 | bdevname(rdev->bdev,b), (unsigned long long)rdev->size, | 1819 | bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors, |
1718 | test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), | 1820 | test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), |
1719 | rdev->desc_nr); | 1821 | rdev->desc_nr); |
1720 | if (rdev->sb_loaded) { | 1822 | if (rdev->sb_loaded) { |
@@ -2153,7 +2255,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2153 | return -EINVAL; | 2255 | return -EINVAL; |
2154 | if (rdev->mddev->pers && rdev->raid_disk >= 0) | 2256 | if (rdev->mddev->pers && rdev->raid_disk >= 0) |
2155 | return -EBUSY; | 2257 | return -EBUSY; |
2156 | if (rdev->size && rdev->mddev->external) | 2258 | if (rdev->sectors && rdev->mddev->external) |
2157 | /* Must set offset before size, so overlap checks | 2259 | /* Must set offset before size, so overlap checks |
2158 | * can be sane */ | 2260 | * can be sane */ |
2159 | return -EBUSY; | 2261 | return -EBUSY; |
@@ -2167,7 +2269,7 @@ __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); | |||
2167 | static ssize_t | 2269 | static ssize_t |
2168 | rdev_size_show(mdk_rdev_t *rdev, char *page) | 2270 | rdev_size_show(mdk_rdev_t *rdev, char *page) |
2169 | { | 2271 | { |
2170 | return sprintf(page, "%llu\n", (unsigned long long)rdev->size); | 2272 | return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2); |
2171 | } | 2273 | } |
2172 | 2274 | ||
2173 | static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) | 2275 | static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) |
@@ -2180,34 +2282,52 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) | |||
2180 | return 1; | 2282 | return 1; |
2181 | } | 2283 | } |
2182 | 2284 | ||
2285 | static int strict_blocks_to_sectors(const char *buf, sector_t *sectors) | ||
2286 | { | ||
2287 | unsigned long long blocks; | ||
2288 | sector_t new; | ||
2289 | |||
2290 | if (strict_strtoull(buf, 10, &blocks) < 0) | ||
2291 | return -EINVAL; | ||
2292 | |||
2293 | if (blocks & 1ULL << (8 * sizeof(blocks) - 1)) | ||
2294 | return -EINVAL; /* sector conversion overflow */ | ||
2295 | |||
2296 | new = blocks * 2; | ||
2297 | if (new != blocks * 2) | ||
2298 | return -EINVAL; /* unsigned long long to sector_t overflow */ | ||
2299 | |||
2300 | *sectors = new; | ||
2301 | return 0; | ||
2302 | } | ||
2303 | |||
2183 | static ssize_t | 2304 | static ssize_t |
2184 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | 2305 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) |
2185 | { | 2306 | { |
2186 | unsigned long long size; | ||
2187 | unsigned long long oldsize = rdev->size; | ||
2188 | mddev_t *my_mddev = rdev->mddev; | 2307 | mddev_t *my_mddev = rdev->mddev; |
2308 | sector_t oldsectors = rdev->sectors; | ||
2309 | sector_t sectors; | ||
2189 | 2310 | ||
2190 | if (strict_strtoull(buf, 10, &size) < 0) | 2311 | if (strict_blocks_to_sectors(buf, §ors) < 0) |
2191 | return -EINVAL; | 2312 | return -EINVAL; |
2192 | if (my_mddev->pers && rdev->raid_disk >= 0) { | 2313 | if (my_mddev->pers && rdev->raid_disk >= 0) { |
2193 | if (my_mddev->persistent) { | 2314 | if (my_mddev->persistent) { |
2194 | size = super_types[my_mddev->major_version]. | 2315 | sectors = super_types[my_mddev->major_version]. |
2195 | rdev_size_change(rdev, size * 2); | 2316 | rdev_size_change(rdev, sectors); |
2196 | if (!size) | 2317 | if (!sectors) |
2197 | return -EBUSY; | 2318 | return -EBUSY; |
2198 | } else if (!size) { | 2319 | } else if (!sectors) |
2199 | size = (rdev->bdev->bd_inode->i_size >> 10); | 2320 | sectors = (rdev->bdev->bd_inode->i_size >> 9) - |
2200 | size -= rdev->data_offset/2; | 2321 | rdev->data_offset; |
2201 | } | ||
2202 | } | 2322 | } |
2203 | if (size < my_mddev->size) | 2323 | if (sectors < my_mddev->dev_sectors) |
2204 | return -EINVAL; /* component must fit device */ | 2324 | return -EINVAL; /* component must fit device */ |
2205 | 2325 | ||
2206 | rdev->size = size; | 2326 | rdev->sectors = sectors; |
2207 | if (size > oldsize && my_mddev->external) { | 2327 | if (sectors > oldsectors && my_mddev->external) { |
2208 | /* need to check that all other rdevs with the same ->bdev | 2328 | /* need to check that all other rdevs with the same ->bdev |
2209 | * do not overlap. We need to unlock the mddev to avoid | 2329 | * do not overlap. We need to unlock the mddev to avoid |
2210 | * a deadlock. We have already changed rdev->size, and if | 2330 | * a deadlock. We have already changed rdev->sectors, and if |
2211 | * we have to change it back, we will have the lock again. | 2331 | * we have to change it back, we will have the lock again. |
2212 | */ | 2332 | */ |
2213 | mddev_t *mddev; | 2333 | mddev_t *mddev; |
@@ -2223,9 +2343,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2223 | if (test_bit(AllReserved, &rdev2->flags) || | 2343 | if (test_bit(AllReserved, &rdev2->flags) || |
2224 | (rdev->bdev == rdev2->bdev && | 2344 | (rdev->bdev == rdev2->bdev && |
2225 | rdev != rdev2 && | 2345 | rdev != rdev2 && |
2226 | overlaps(rdev->data_offset, rdev->size * 2, | 2346 | overlaps(rdev->data_offset, rdev->sectors, |
2227 | rdev2->data_offset, | 2347 | rdev2->data_offset, |
2228 | rdev2->size * 2))) { | 2348 | rdev2->sectors))) { |
2229 | overlap = 1; | 2349 | overlap = 1; |
2230 | break; | 2350 | break; |
2231 | } | 2351 | } |
@@ -2239,11 +2359,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2239 | if (overlap) { | 2359 | if (overlap) { |
2240 | /* Someone else could have slipped in a size | 2360 | /* Someone else could have slipped in a size |
2241 | * change here, but doing so is just silly. | 2361 | * change here, but doing so is just silly. |
2242 | * We put oldsize back because we *know* it is | 2362 | * We put oldsectors back because we *know* it is |
2243 | * safe, and trust userspace not to race with | 2363 | * safe, and trust userspace not to race with |
2244 | * itself | 2364 | * itself |
2245 | */ | 2365 | */ |
2246 | rdev->size = oldsize; | 2366 | rdev->sectors = oldsectors; |
2247 | return -EBUSY; | 2367 | return -EBUSY; |
2248 | } | 2368 | } |
2249 | } | 2369 | } |
@@ -2547,18 +2667,101 @@ level_show(mddev_t *mddev, char *page) | |||
2547 | static ssize_t | 2667 | static ssize_t |
2548 | level_store(mddev_t *mddev, const char *buf, size_t len) | 2668 | level_store(mddev_t *mddev, const char *buf, size_t len) |
2549 | { | 2669 | { |
2670 | char level[16]; | ||
2550 | ssize_t rv = len; | 2671 | ssize_t rv = len; |
2551 | if (mddev->pers) | 2672 | struct mdk_personality *pers; |
2673 | void *priv; | ||
2674 | |||
2675 | if (mddev->pers == NULL) { | ||
2676 | if (len == 0) | ||
2677 | return 0; | ||
2678 | if (len >= sizeof(mddev->clevel)) | ||
2679 | return -ENOSPC; | ||
2680 | strncpy(mddev->clevel, buf, len); | ||
2681 | if (mddev->clevel[len-1] == '\n') | ||
2682 | len--; | ||
2683 | mddev->clevel[len] = 0; | ||
2684 | mddev->level = LEVEL_NONE; | ||
2685 | return rv; | ||
2686 | } | ||
2687 | |||
2688 | /* request to change the personality. Need to ensure: | ||
2689 | * - array is not engaged in resync/recovery/reshape | ||
2690 | * - old personality can be suspended | ||
2691 | * - new personality will access other array. | ||
2692 | */ | ||
2693 | |||
2694 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) | ||
2552 | return -EBUSY; | 2695 | return -EBUSY; |
2553 | if (len == 0) | 2696 | |
2554 | return 0; | 2697 | if (!mddev->pers->quiesce) { |
2555 | if (len >= sizeof(mddev->clevel)) | 2698 | printk(KERN_WARNING "md: %s: %s does not support online personality change\n", |
2556 | return -ENOSPC; | 2699 | mdname(mddev), mddev->pers->name); |
2557 | strncpy(mddev->clevel, buf, len); | 2700 | return -EINVAL; |
2558 | if (mddev->clevel[len-1] == '\n') | 2701 | } |
2702 | |||
2703 | /* Now find the new personality */ | ||
2704 | if (len == 0 || len >= sizeof(level)) | ||
2705 | return -EINVAL; | ||
2706 | strncpy(level, buf, len); | ||
2707 | if (level[len-1] == '\n') | ||
2559 | len--; | 2708 | len--; |
2560 | mddev->clevel[len] = 0; | 2709 | level[len] = 0; |
2561 | mddev->level = LEVEL_NONE; | 2710 | |
2711 | request_module("md-%s", level); | ||
2712 | spin_lock(&pers_lock); | ||
2713 | pers = find_pers(LEVEL_NONE, level); | ||
2714 | if (!pers || !try_module_get(pers->owner)) { | ||
2715 | spin_unlock(&pers_lock); | ||
2716 | printk(KERN_WARNING "md: personality %s not loaded\n", level); | ||
2717 | return -EINVAL; | ||
2718 | } | ||
2719 | spin_unlock(&pers_lock); | ||
2720 | |||
2721 | if (pers == mddev->pers) { | ||
2722 | /* Nothing to do! */ | ||
2723 | module_put(pers->owner); | ||
2724 | return rv; | ||
2725 | } | ||
2726 | if (!pers->takeover) { | ||
2727 | module_put(pers->owner); | ||
2728 | printk(KERN_WARNING "md: %s: %s does not support personality takeover\n", | ||
2729 | mdname(mddev), level); | ||
2730 | return -EINVAL; | ||
2731 | } | ||
2732 | |||
2733 | /* ->takeover must set new_* and/or delta_disks | ||
2734 | * if it succeeds, and may set them when it fails. | ||
2735 | */ | ||
2736 | priv = pers->takeover(mddev); | ||
2737 | if (IS_ERR(priv)) { | ||
2738 | mddev->new_level = mddev->level; | ||
2739 | mddev->new_layout = mddev->layout; | ||
2740 | mddev->new_chunk = mddev->chunk_size; | ||
2741 | mddev->raid_disks -= mddev->delta_disks; | ||
2742 | mddev->delta_disks = 0; | ||
2743 | module_put(pers->owner); | ||
2744 | printk(KERN_WARNING "md: %s: %s would not accept array\n", | ||
2745 | mdname(mddev), level); | ||
2746 | return PTR_ERR(priv); | ||
2747 | } | ||
2748 | |||
2749 | /* Looks like we have a winner */ | ||
2750 | mddev_suspend(mddev); | ||
2751 | mddev->pers->stop(mddev); | ||
2752 | module_put(mddev->pers->owner); | ||
2753 | mddev->pers = pers; | ||
2754 | mddev->private = priv; | ||
2755 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | ||
2756 | mddev->level = mddev->new_level; | ||
2757 | mddev->layout = mddev->new_layout; | ||
2758 | mddev->chunk_size = mddev->new_chunk; | ||
2759 | mddev->delta_disks = 0; | ||
2760 | pers->run(mddev); | ||
2761 | mddev_resume(mddev); | ||
2762 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
2763 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
2764 | md_wakeup_thread(mddev->thread); | ||
2562 | return rv; | 2765 | return rv; |
2563 | } | 2766 | } |
2564 | 2767 | ||
@@ -2586,12 +2789,18 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) | |||
2586 | if (!*buf || (*e && *e != '\n')) | 2789 | if (!*buf || (*e && *e != '\n')) |
2587 | return -EINVAL; | 2790 | return -EINVAL; |
2588 | 2791 | ||
2589 | if (mddev->pers) | 2792 | if (mddev->pers) { |
2590 | return -EBUSY; | 2793 | int err; |
2591 | if (mddev->reshape_position != MaxSector) | 2794 | if (mddev->pers->reconfig == NULL) |
2795 | return -EBUSY; | ||
2796 | err = mddev->pers->reconfig(mddev, n, -1); | ||
2797 | if (err) | ||
2798 | return err; | ||
2799 | } else { | ||
2592 | mddev->new_layout = n; | 2800 | mddev->new_layout = n; |
2593 | else | 2801 | if (mddev->reshape_position == MaxSector) |
2594 | mddev->layout = n; | 2802 | mddev->layout = n; |
2803 | } | ||
2595 | return len; | 2804 | return len; |
2596 | } | 2805 | } |
2597 | static struct md_sysfs_entry md_layout = | 2806 | static struct md_sysfs_entry md_layout = |
@@ -2648,19 +2857,24 @@ chunk_size_show(mddev_t *mddev, char *page) | |||
2648 | static ssize_t | 2857 | static ssize_t |
2649 | chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | 2858 | chunk_size_store(mddev_t *mddev, const char *buf, size_t len) |
2650 | { | 2859 | { |
2651 | /* can only set chunk_size if array is not yet active */ | ||
2652 | char *e; | 2860 | char *e; |
2653 | unsigned long n = simple_strtoul(buf, &e, 10); | 2861 | unsigned long n = simple_strtoul(buf, &e, 10); |
2654 | 2862 | ||
2655 | if (!*buf || (*e && *e != '\n')) | 2863 | if (!*buf || (*e && *e != '\n')) |
2656 | return -EINVAL; | 2864 | return -EINVAL; |
2657 | 2865 | ||
2658 | if (mddev->pers) | 2866 | if (mddev->pers) { |
2659 | return -EBUSY; | 2867 | int err; |
2660 | else if (mddev->reshape_position != MaxSector) | 2868 | if (mddev->pers->reconfig == NULL) |
2869 | return -EBUSY; | ||
2870 | err = mddev->pers->reconfig(mddev, -1, n); | ||
2871 | if (err) | ||
2872 | return err; | ||
2873 | } else { | ||
2661 | mddev->new_chunk = n; | 2874 | mddev->new_chunk = n; |
2662 | else | 2875 | if (mddev->reshape_position == MaxSector) |
2663 | mddev->chunk_size = n; | 2876 | mddev->chunk_size = n; |
2877 | } | ||
2664 | return len; | 2878 | return len; |
2665 | } | 2879 | } |
2666 | static struct md_sysfs_entry md_chunk_size = | 2880 | static struct md_sysfs_entry md_chunk_size = |
@@ -2669,6 +2883,8 @@ __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); | |||
2669 | static ssize_t | 2883 | static ssize_t |
2670 | resync_start_show(mddev_t *mddev, char *page) | 2884 | resync_start_show(mddev_t *mddev, char *page) |
2671 | { | 2885 | { |
2886 | if (mddev->recovery_cp == MaxSector) | ||
2887 | return sprintf(page, "none\n"); | ||
2672 | return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); | 2888 | return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); |
2673 | } | 2889 | } |
2674 | 2890 | ||
@@ -2766,7 +2982,7 @@ array_state_show(mddev_t *mddev, char *page) | |||
2766 | else { | 2982 | else { |
2767 | if (list_empty(&mddev->disks) && | 2983 | if (list_empty(&mddev->disks) && |
2768 | mddev->raid_disks == 0 && | 2984 | mddev->raid_disks == 0 && |
2769 | mddev->size == 0) | 2985 | mddev->dev_sectors == 0) |
2770 | st = clear; | 2986 | st = clear; |
2771 | else | 2987 | else |
2772 | st = inactive; | 2988 | st = inactive; |
@@ -2973,7 +3189,8 @@ __ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store); | |||
2973 | static ssize_t | 3189 | static ssize_t |
2974 | size_show(mddev_t *mddev, char *page) | 3190 | size_show(mddev_t *mddev, char *page) |
2975 | { | 3191 | { |
2976 | return sprintf(page, "%llu\n", (unsigned long long)mddev->size); | 3192 | return sprintf(page, "%llu\n", |
3193 | (unsigned long long)mddev->dev_sectors / 2); | ||
2977 | } | 3194 | } |
2978 | 3195 | ||
2979 | static int update_size(mddev_t *mddev, sector_t num_sectors); | 3196 | static int update_size(mddev_t *mddev, sector_t num_sectors); |
@@ -2985,20 +3202,18 @@ size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2985 | * not increase it (except from 0). | 3202 | * not increase it (except from 0). |
2986 | * If array is active, we can try an on-line resize | 3203 | * If array is active, we can try an on-line resize |
2987 | */ | 3204 | */ |
2988 | char *e; | 3205 | sector_t sectors; |
2989 | int err = 0; | 3206 | int err = strict_blocks_to_sectors(buf, §ors); |
2990 | unsigned long long size = simple_strtoull(buf, &e, 10); | ||
2991 | if (!*buf || *buf == '\n' || | ||
2992 | (*e && *e != '\n')) | ||
2993 | return -EINVAL; | ||
2994 | 3207 | ||
3208 | if (err < 0) | ||
3209 | return err; | ||
2995 | if (mddev->pers) { | 3210 | if (mddev->pers) { |
2996 | err = update_size(mddev, size * 2); | 3211 | err = update_size(mddev, sectors); |
2997 | md_update_sb(mddev, 1); | 3212 | md_update_sb(mddev, 1); |
2998 | } else { | 3213 | } else { |
2999 | if (mddev->size == 0 || | 3214 | if (mddev->dev_sectors == 0 || |
3000 | mddev->size > size) | 3215 | mddev->dev_sectors > sectors) |
3001 | mddev->size = size; | 3216 | mddev->dev_sectors = sectors; |
3002 | else | 3217 | else |
3003 | err = -ENOSPC; | 3218 | err = -ENOSPC; |
3004 | } | 3219 | } |
@@ -3251,6 +3466,8 @@ static ssize_t | |||
3251 | sync_speed_show(mddev_t *mddev, char *page) | 3466 | sync_speed_show(mddev_t *mddev, char *page) |
3252 | { | 3467 | { |
3253 | unsigned long resync, dt, db; | 3468 | unsigned long resync, dt, db; |
3469 | if (mddev->curr_resync == 0) | ||
3470 | return sprintf(page, "none\n"); | ||
3254 | resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); | 3471 | resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); |
3255 | dt = (jiffies - mddev->resync_mark) / HZ; | 3472 | dt = (jiffies - mddev->resync_mark) / HZ; |
3256 | if (!dt) dt++; | 3473 | if (!dt) dt++; |
@@ -3263,15 +3480,15 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed); | |||
3263 | static ssize_t | 3480 | static ssize_t |
3264 | sync_completed_show(mddev_t *mddev, char *page) | 3481 | sync_completed_show(mddev_t *mddev, char *page) |
3265 | { | 3482 | { |
3266 | unsigned long max_blocks, resync; | 3483 | unsigned long max_sectors, resync; |
3267 | 3484 | ||
3268 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3485 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
3269 | max_blocks = mddev->resync_max_sectors; | 3486 | max_sectors = mddev->resync_max_sectors; |
3270 | else | 3487 | else |
3271 | max_blocks = mddev->size << 1; | 3488 | max_sectors = mddev->dev_sectors; |
3272 | 3489 | ||
3273 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); | 3490 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); |
3274 | return sprintf(page, "%lu / %lu\n", resync, max_blocks); | 3491 | return sprintf(page, "%lu / %lu\n", resync, max_sectors); |
3275 | } | 3492 | } |
3276 | 3493 | ||
3277 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); | 3494 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); |
@@ -3431,6 +3648,57 @@ static struct md_sysfs_entry md_reshape_position = | |||
3431 | __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, | 3648 | __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, |
3432 | reshape_position_store); | 3649 | reshape_position_store); |
3433 | 3650 | ||
3651 | static ssize_t | ||
3652 | array_size_show(mddev_t *mddev, char *page) | ||
3653 | { | ||
3654 | if (mddev->external_size) | ||
3655 | return sprintf(page, "%llu\n", | ||
3656 | (unsigned long long)mddev->array_sectors/2); | ||
3657 | else | ||
3658 | return sprintf(page, "default\n"); | ||
3659 | } | ||
3660 | |||
3661 | static ssize_t | ||
3662 | array_size_store(mddev_t *mddev, const char *buf, size_t len) | ||
3663 | { | ||
3664 | sector_t sectors; | ||
3665 | |||
3666 | if (strncmp(buf, "default", 7) == 0) { | ||
3667 | if (mddev->pers) | ||
3668 | sectors = mddev->pers->size(mddev, 0, 0); | ||
3669 | else | ||
3670 | sectors = mddev->array_sectors; | ||
3671 | |||
3672 | mddev->external_size = 0; | ||
3673 | } else { | ||
3674 | if (strict_blocks_to_sectors(buf, §ors) < 0) | ||
3675 | return -EINVAL; | ||
3676 | if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) | ||
3677 | return -EINVAL; | ||
3678 | |||
3679 | mddev->external_size = 1; | ||
3680 | } | ||
3681 | |||
3682 | mddev->array_sectors = sectors; | ||
3683 | set_capacity(mddev->gendisk, mddev->array_sectors); | ||
3684 | if (mddev->pers) { | ||
3685 | struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | ||
3686 | |||
3687 | if (bdev) { | ||
3688 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
3689 | i_size_write(bdev->bd_inode, | ||
3690 | (loff_t)mddev->array_sectors << 9); | ||
3691 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
3692 | bdput(bdev); | ||
3693 | } | ||
3694 | } | ||
3695 | |||
3696 | return len; | ||
3697 | } | ||
3698 | |||
3699 | static struct md_sysfs_entry md_array_size = | ||
3700 | __ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show, | ||
3701 | array_size_store); | ||
3434 | 3702 | ||
3435 | static struct attribute *md_default_attrs[] = { | 3703 | static struct attribute *md_default_attrs[] = { |
3436 | &md_level.attr, | 3704 | &md_level.attr, |
@@ -3444,6 +3712,7 @@ static struct attribute *md_default_attrs[] = { | |||
3444 | &md_safe_delay.attr, | 3712 | &md_safe_delay.attr, |
3445 | &md_array_state.attr, | 3713 | &md_array_state.attr, |
3446 | &md_reshape_position.attr, | 3714 | &md_reshape_position.attr, |
3715 | &md_array_size.attr, | ||
3447 | NULL, | 3716 | NULL, |
3448 | }; | 3717 | }; |
3449 | 3718 | ||
@@ -3602,10 +3871,12 @@ static int md_alloc(dev_t dev, char *name) | |||
3602 | mddev_put(mddev); | 3871 | mddev_put(mddev); |
3603 | return -ENOMEM; | 3872 | return -ENOMEM; |
3604 | } | 3873 | } |
3874 | mddev->queue->queuedata = mddev; | ||
3875 | |||
3605 | /* Can be unlocked because the queue is new: no concurrency */ | 3876 | /* Can be unlocked because the queue is new: no concurrency */ |
3606 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); | 3877 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); |
3607 | 3878 | ||
3608 | blk_queue_make_request(mddev->queue, md_fail_request); | 3879 | blk_queue_make_request(mddev->queue, md_make_request); |
3609 | 3880 | ||
3610 | disk = alloc_disk(1 << shift); | 3881 | disk = alloc_disk(1 << shift); |
3611 | if (!disk) { | 3882 | if (!disk) { |
@@ -3731,13 +4002,13 @@ static int do_md_run(mddev_t * mddev) | |||
3731 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 4002 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
3732 | if (test_bit(Faulty, &rdev->flags)) | 4003 | if (test_bit(Faulty, &rdev->flags)) |
3733 | continue; | 4004 | continue; |
3734 | if (rdev->size < chunk_size / 1024) { | 4005 | if (rdev->sectors < chunk_size / 512) { |
3735 | printk(KERN_WARNING | 4006 | printk(KERN_WARNING |
3736 | "md: Dev %s smaller than chunk_size:" | 4007 | "md: Dev %s smaller than chunk_size:" |
3737 | " %lluk < %dk\n", | 4008 | " %llu < %d\n", |
3738 | bdevname(rdev->bdev,b), | 4009 | bdevname(rdev->bdev,b), |
3739 | (unsigned long long)rdev->size, | 4010 | (unsigned long long)rdev->sectors, |
3740 | chunk_size / 1024); | 4011 | chunk_size / 512); |
3741 | return -EINVAL; | 4012 | return -EINVAL; |
3742 | } | 4013 | } |
3743 | } | 4014 | } |
@@ -3761,11 +4032,11 @@ static int do_md_run(mddev_t * mddev) | |||
3761 | 4032 | ||
3762 | /* perform some consistency tests on the device. | 4033 | /* perform some consistency tests on the device. |
3763 | * We don't want the data to overlap the metadata, | 4034 | * We don't want the data to overlap the metadata, |
3764 | * Internal Bitmap issues has handled elsewhere. | 4035 | * Internal Bitmap issues have been handled elsewhere. |
3765 | */ | 4036 | */ |
3766 | if (rdev->data_offset < rdev->sb_start) { | 4037 | if (rdev->data_offset < rdev->sb_start) { |
3767 | if (mddev->size && | 4038 | if (mddev->dev_sectors && |
3768 | rdev->data_offset + mddev->size*2 | 4039 | rdev->data_offset + mddev->dev_sectors |
3769 | > rdev->sb_start) { | 4040 | > rdev->sb_start) { |
3770 | printk("md: %s: data overlaps metadata\n", | 4041 | printk("md: %s: data overlaps metadata\n", |
3771 | mdname(mddev)); | 4042 | mdname(mddev)); |
@@ -3801,9 +4072,16 @@ static int do_md_run(mddev_t * mddev) | |||
3801 | } | 4072 | } |
3802 | mddev->pers = pers; | 4073 | mddev->pers = pers; |
3803 | spin_unlock(&pers_lock); | 4074 | spin_unlock(&pers_lock); |
3804 | mddev->level = pers->level; | 4075 | if (mddev->level != pers->level) { |
4076 | mddev->level = pers->level; | ||
4077 | mddev->new_level = pers->level; | ||
4078 | } | ||
3805 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 4079 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
3806 | 4080 | ||
4081 | if (pers->level >= 4 && pers->level <= 6) | ||
4082 | /* Cannot support integrity (yet) */ | ||
4083 | blk_integrity_unregister(mddev->gendisk); | ||
4084 | |||
3807 | if (mddev->reshape_position != MaxSector && | 4085 | if (mddev->reshape_position != MaxSector && |
3808 | pers->start_reshape == NULL) { | 4086 | pers->start_reshape == NULL) { |
3809 | /* This personality cannot handle reshaping... */ | 4087 | /* This personality cannot handle reshaping... */ |
@@ -3843,7 +4121,9 @@ static int do_md_run(mddev_t * mddev) | |||
3843 | } | 4121 | } |
3844 | 4122 | ||
3845 | mddev->recovery = 0; | 4123 | mddev->recovery = 0; |
3846 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 4124 | /* may be over-ridden by personality */ |
4125 | mddev->resync_max_sectors = mddev->dev_sectors; | ||
4126 | |||
3847 | mddev->barriers_work = 1; | 4127 | mddev->barriers_work = 1; |
3848 | mddev->ok_start_degraded = start_dirty_degraded; | 4128 | mddev->ok_start_degraded = start_dirty_degraded; |
3849 | 4129 | ||
@@ -3853,7 +4133,17 @@ static int do_md_run(mddev_t * mddev) | |||
3853 | err = mddev->pers->run(mddev); | 4133 | err = mddev->pers->run(mddev); |
3854 | if (err) | 4134 | if (err) |
3855 | printk(KERN_ERR "md: pers->run() failed ...\n"); | 4135 | printk(KERN_ERR "md: pers->run() failed ...\n"); |
3856 | else if (mddev->pers->sync_request) { | 4136 | else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) { |
4137 | WARN_ONCE(!mddev->external_size, "%s: default size too small," | ||
4138 | " but 'external_size' not in effect?\n", __func__); | ||
4139 | printk(KERN_ERR | ||
4140 | "md: invalid array_size %llu > default size %llu\n", | ||
4141 | (unsigned long long)mddev->array_sectors / 2, | ||
4142 | (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2); | ||
4143 | err = -EINVAL; | ||
4144 | mddev->pers->stop(mddev); | ||
4145 | } | ||
4146 | if (err == 0 && mddev->pers->sync_request) { | ||
3857 | err = bitmap_create(mddev); | 4147 | err = bitmap_create(mddev); |
3858 | if (err) { | 4148 | if (err) { |
3859 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", | 4149 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", |
@@ -3899,16 +4189,6 @@ static int do_md_run(mddev_t * mddev) | |||
3899 | 4189 | ||
3900 | set_capacity(disk, mddev->array_sectors); | 4190 | set_capacity(disk, mddev->array_sectors); |
3901 | 4191 | ||
3902 | /* If we call blk_queue_make_request here, it will | ||
3903 | * re-initialise max_sectors etc which may have been | ||
3904 | * refined inside -> run. So just set the bits we need to set. | ||
3905 | * Most initialisation happended when we called | ||
3906 | * blk_queue_make_request(..., md_fail_request) | ||
3907 | * earlier. | ||
3908 | */ | ||
3909 | mddev->queue->queuedata = mddev; | ||
3910 | mddev->queue->make_request_fn = mddev->pers->make_request; | ||
3911 | |||
3912 | /* If there is a partially-recovered drive we need to | 4192 | /* If there is a partially-recovered drive we need to |
3913 | * start recovery here. If we leave it to md_check_recovery, | 4193 | * start recovery here. If we leave it to md_check_recovery, |
3914 | * it will remove the drives and not do the right thing | 4194 | * it will remove the drives and not do the right thing |
@@ -4038,7 +4318,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4038 | md_super_wait(mddev); | 4318 | md_super_wait(mddev); |
4039 | if (mddev->ro) | 4319 | if (mddev->ro) |
4040 | set_disk_ro(disk, 0); | 4320 | set_disk_ro(disk, 0); |
4041 | blk_queue_make_request(mddev->queue, md_fail_request); | 4321 | |
4042 | mddev->pers->stop(mddev); | 4322 | mddev->pers->stop(mddev); |
4043 | mddev->queue->merge_bvec_fn = NULL; | 4323 | mddev->queue->merge_bvec_fn = NULL; |
4044 | mddev->queue->unplug_fn = NULL; | 4324 | mddev->queue->unplug_fn = NULL; |
@@ -4095,7 +4375,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4095 | export_array(mddev); | 4375 | export_array(mddev); |
4096 | 4376 | ||
4097 | mddev->array_sectors = 0; | 4377 | mddev->array_sectors = 0; |
4098 | mddev->size = 0; | 4378 | mddev->external_size = 0; |
4379 | mddev->dev_sectors = 0; | ||
4099 | mddev->raid_disks = 0; | 4380 | mddev->raid_disks = 0; |
4100 | mddev->recovery_cp = 0; | 4381 | mddev->recovery_cp = 0; |
4101 | mddev->resync_min = 0; | 4382 | mddev->resync_min = 0; |
@@ -4135,6 +4416,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4135 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 4416 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
4136 | mdname(mddev)); | 4417 | mdname(mddev)); |
4137 | err = 0; | 4418 | err = 0; |
4419 | blk_integrity_unregister(disk); | ||
4138 | md_new_event(mddev); | 4420 | md_new_event(mddev); |
4139 | sysfs_notify_dirent(mddev->sysfs_state); | 4421 | sysfs_notify_dirent(mddev->sysfs_state); |
4140 | out: | 4422 | out: |
@@ -4300,8 +4582,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
4300 | info.patch_version = MD_PATCHLEVEL_VERSION; | 4582 | info.patch_version = MD_PATCHLEVEL_VERSION; |
4301 | info.ctime = mddev->ctime; | 4583 | info.ctime = mddev->ctime; |
4302 | info.level = mddev->level; | 4584 | info.level = mddev->level; |
4303 | info.size = mddev->size; | 4585 | info.size = mddev->dev_sectors / 2; |
4304 | if (info.size != mddev->size) /* overflow */ | 4586 | if (info.size != mddev->dev_sectors / 2) /* overflow */ |
4305 | info.size = -1; | 4587 | info.size = -1; |
4306 | info.nr_disks = nr; | 4588 | info.nr_disks = nr; |
4307 | info.raid_disks = mddev->raid_disks; | 4589 | info.raid_disks = mddev->raid_disks; |
@@ -4480,6 +4762,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
4480 | clear_bit(In_sync, &rdev->flags); /* just to be sure */ | 4762 | clear_bit(In_sync, &rdev->flags); /* just to be sure */ |
4481 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | 4763 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) |
4482 | set_bit(WriteMostly, &rdev->flags); | 4764 | set_bit(WriteMostly, &rdev->flags); |
4765 | else | ||
4766 | clear_bit(WriteMostly, &rdev->flags); | ||
4483 | 4767 | ||
4484 | rdev->raid_disk = -1; | 4768 | rdev->raid_disk = -1; |
4485 | err = bind_rdev_to_array(rdev, mddev); | 4769 | err = bind_rdev_to_array(rdev, mddev); |
@@ -4543,7 +4827,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
4543 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; | 4827 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; |
4544 | } else | 4828 | } else |
4545 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); | 4829 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); |
4546 | rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; | 4830 | rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); |
4547 | 4831 | ||
4548 | err = bind_rdev_to_array(rdev, mddev); | 4832 | err = bind_rdev_to_array(rdev, mddev); |
4549 | if (err) { | 4833 | if (err) { |
@@ -4613,7 +4897,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) | |||
4613 | else | 4897 | else |
4614 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; | 4898 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; |
4615 | 4899 | ||
4616 | rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; | 4900 | rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); |
4617 | 4901 | ||
4618 | if (test_bit(Faulty, &rdev->flags)) { | 4902 | if (test_bit(Faulty, &rdev->flags)) { |
4619 | printk(KERN_WARNING | 4903 | printk(KERN_WARNING |
@@ -4749,7 +5033,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4749 | 5033 | ||
4750 | mddev->level = info->level; | 5034 | mddev->level = info->level; |
4751 | mddev->clevel[0] = 0; | 5035 | mddev->clevel[0] = 0; |
4752 | mddev->size = info->size; | 5036 | mddev->dev_sectors = 2 * (sector_t)info->size; |
4753 | mddev->raid_disks = info->raid_disks; | 5037 | mddev->raid_disks = info->raid_disks; |
4754 | /* don't set md_minor, it is determined by which /dev/md* was | 5038 | /* don't set md_minor, it is determined by which /dev/md* was |
4755 | * openned | 5039 | * openned |
@@ -4788,6 +5072,17 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4788 | return 0; | 5072 | return 0; |
4789 | } | 5073 | } |
4790 | 5074 | ||
5075 | void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors) | ||
5076 | { | ||
5077 | WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__); | ||
5078 | |||
5079 | if (mddev->external_size) | ||
5080 | return; | ||
5081 | |||
5082 | mddev->array_sectors = array_sectors; | ||
5083 | } | ||
5084 | EXPORT_SYMBOL(md_set_array_sectors); | ||
5085 | |||
4791 | static int update_size(mddev_t *mddev, sector_t num_sectors) | 5086 | static int update_size(mddev_t *mddev, sector_t num_sectors) |
4792 | { | 5087 | { |
4793 | mdk_rdev_t *rdev; | 5088 | mdk_rdev_t *rdev; |
@@ -4814,8 +5109,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
4814 | */ | 5109 | */ |
4815 | return -EBUSY; | 5110 | return -EBUSY; |
4816 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5111 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
4817 | sector_t avail; | 5112 | sector_t avail = rdev->sectors; |
4818 | avail = rdev->size * 2; | ||
4819 | 5113 | ||
4820 | if (fit && (num_sectors == 0 || num_sectors > avail)) | 5114 | if (fit && (num_sectors == 0 || num_sectors > avail)) |
4821 | num_sectors = avail; | 5115 | num_sectors = avail; |
@@ -4887,12 +5181,18 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
4887 | ) | 5181 | ) |
4888 | return -EINVAL; | 5182 | return -EINVAL; |
4889 | /* Check there is only one change */ | 5183 | /* Check there is only one change */ |
4890 | if (info->size >= 0 && mddev->size != info->size) cnt++; | 5184 | if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) |
4891 | if (mddev->raid_disks != info->raid_disks) cnt++; | 5185 | cnt++; |
4892 | if (mddev->layout != info->layout) cnt++; | 5186 | if (mddev->raid_disks != info->raid_disks) |
4893 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++; | 5187 | cnt++; |
4894 | if (cnt == 0) return 0; | 5188 | if (mddev->layout != info->layout) |
4895 | if (cnt > 1) return -EINVAL; | 5189 | cnt++; |
5190 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) | ||
5191 | cnt++; | ||
5192 | if (cnt == 0) | ||
5193 | return 0; | ||
5194 | if (cnt > 1) | ||
5195 | return -EINVAL; | ||
4896 | 5196 | ||
4897 | if (mddev->layout != info->layout) { | 5197 | if (mddev->layout != info->layout) { |
4898 | /* Change layout | 5198 | /* Change layout |
@@ -4904,7 +5204,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
4904 | else | 5204 | else |
4905 | return mddev->pers->reconfig(mddev, info->layout, -1); | 5205 | return mddev->pers->reconfig(mddev, info->layout, -1); |
4906 | } | 5206 | } |
4907 | if (info->size >= 0 && mddev->size != info->size) | 5207 | if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) |
4908 | rv = update_size(mddev, (sector_t)info->size * 2); | 5208 | rv = update_size(mddev, (sector_t)info->size * 2); |
4909 | 5209 | ||
4910 | if (mddev->raid_disks != info->raid_disks) | 5210 | if (mddev->raid_disks != info->raid_disks) |
@@ -5331,6 +5631,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
5331 | 5631 | ||
5332 | void md_unregister_thread(mdk_thread_t *thread) | 5632 | void md_unregister_thread(mdk_thread_t *thread) |
5333 | { | 5633 | { |
5634 | if (!thread) | ||
5635 | return; | ||
5334 | dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); | 5636 | dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); |
5335 | 5637 | ||
5336 | kthread_stop(thread->tsk); | 5638 | kthread_stop(thread->tsk); |
@@ -5404,7 +5706,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
5404 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 5706 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
5405 | max_blocks = mddev->resync_max_sectors >> 1; | 5707 | max_blocks = mddev->resync_max_sectors >> 1; |
5406 | else | 5708 | else |
5407 | max_blocks = mddev->size; | 5709 | max_blocks = mddev->dev_sectors / 2; |
5408 | 5710 | ||
5409 | /* | 5711 | /* |
5410 | * Should not happen. | 5712 | * Should not happen. |
@@ -5537,7 +5839,7 @@ struct mdstat_info { | |||
5537 | static int md_seq_show(struct seq_file *seq, void *v) | 5839 | static int md_seq_show(struct seq_file *seq, void *v) |
5538 | { | 5840 | { |
5539 | mddev_t *mddev = v; | 5841 | mddev_t *mddev = v; |
5540 | sector_t size; | 5842 | sector_t sectors; |
5541 | mdk_rdev_t *rdev; | 5843 | mdk_rdev_t *rdev; |
5542 | struct mdstat_info *mi = seq->private; | 5844 | struct mdstat_info *mi = seq->private; |
5543 | struct bitmap *bitmap; | 5845 | struct bitmap *bitmap; |
@@ -5573,7 +5875,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5573 | seq_printf(seq, " %s", mddev->pers->name); | 5875 | seq_printf(seq, " %s", mddev->pers->name); |
5574 | } | 5876 | } |
5575 | 5877 | ||
5576 | size = 0; | 5878 | sectors = 0; |
5577 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5879 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
5578 | char b[BDEVNAME_SIZE]; | 5880 | char b[BDEVNAME_SIZE]; |
5579 | seq_printf(seq, " %s[%d]", | 5881 | seq_printf(seq, " %s[%d]", |
@@ -5585,7 +5887,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5585 | continue; | 5887 | continue; |
5586 | } else if (rdev->raid_disk < 0) | 5888 | } else if (rdev->raid_disk < 0) |
5587 | seq_printf(seq, "(S)"); /* spare */ | 5889 | seq_printf(seq, "(S)"); /* spare */ |
5588 | size += rdev->size; | 5890 | sectors += rdev->sectors; |
5589 | } | 5891 | } |
5590 | 5892 | ||
5591 | if (!list_empty(&mddev->disks)) { | 5893 | if (!list_empty(&mddev->disks)) { |
@@ -5595,7 +5897,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5595 | mddev->array_sectors / 2); | 5897 | mddev->array_sectors / 2); |
5596 | else | 5898 | else |
5597 | seq_printf(seq, "\n %llu blocks", | 5899 | seq_printf(seq, "\n %llu blocks", |
5598 | (unsigned long long)size); | 5900 | (unsigned long long)sectors / 2); |
5599 | } | 5901 | } |
5600 | if (mddev->persistent) { | 5902 | if (mddev->persistent) { |
5601 | if (mddev->major_version != 0 || | 5903 | if (mddev->major_version != 0 || |
@@ -5722,19 +6024,19 @@ int unregister_md_personality(struct mdk_personality *p) | |||
5722 | return 0; | 6024 | return 0; |
5723 | } | 6025 | } |
5724 | 6026 | ||
5725 | static int is_mddev_idle(mddev_t *mddev) | 6027 | static int is_mddev_idle(mddev_t *mddev, int init) |
5726 | { | 6028 | { |
5727 | mdk_rdev_t * rdev; | 6029 | mdk_rdev_t * rdev; |
5728 | int idle; | 6030 | int idle; |
5729 | long curr_events; | 6031 | int curr_events; |
5730 | 6032 | ||
5731 | idle = 1; | 6033 | idle = 1; |
5732 | rcu_read_lock(); | 6034 | rcu_read_lock(); |
5733 | rdev_for_each_rcu(rdev, mddev) { | 6035 | rdev_for_each_rcu(rdev, mddev) { |
5734 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; | 6036 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; |
5735 | curr_events = part_stat_read(&disk->part0, sectors[0]) + | 6037 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + |
5736 | part_stat_read(&disk->part0, sectors[1]) - | 6038 | (int)part_stat_read(&disk->part0, sectors[1]) - |
5737 | atomic_read(&disk->sync_io); | 6039 | atomic_read(&disk->sync_io); |
5738 | /* sync IO will cause sync_io to increase before the disk_stats | 6040 | /* sync IO will cause sync_io to increase before the disk_stats |
5739 | * as sync_io is counted when a request starts, and | 6041 | * as sync_io is counted when a request starts, and |
5740 | * disk_stats is counted when it completes. | 6042 | * disk_stats is counted when it completes. |
@@ -5757,7 +6059,7 @@ static int is_mddev_idle(mddev_t *mddev) | |||
5757 | * always make curr_events less than last_events. | 6059 | * always make curr_events less than last_events. |
5758 | * | 6060 | * |
5759 | */ | 6061 | */ |
5760 | if (curr_events - rdev->last_events > 4096) { | 6062 | if (init || curr_events - rdev->last_events > 64) { |
5761 | rdev->last_events = curr_events; | 6063 | rdev->last_events = curr_events; |
5762 | idle = 0; | 6064 | idle = 0; |
5763 | } | 6065 | } |
@@ -5980,10 +6282,10 @@ void md_do_sync(mddev_t *mddev) | |||
5980 | j = mddev->recovery_cp; | 6282 | j = mddev->recovery_cp; |
5981 | 6283 | ||
5982 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 6284 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
5983 | max_sectors = mddev->size << 1; | 6285 | max_sectors = mddev->dev_sectors; |
5984 | else { | 6286 | else { |
5985 | /* recovery follows the physical size of devices */ | 6287 | /* recovery follows the physical size of devices */ |
5986 | max_sectors = mddev->size << 1; | 6288 | max_sectors = mddev->dev_sectors; |
5987 | j = MaxSector; | 6289 | j = MaxSector; |
5988 | list_for_each_entry(rdev, &mddev->disks, same_set) | 6290 | list_for_each_entry(rdev, &mddev->disks, same_set) |
5989 | if (rdev->raid_disk >= 0 && | 6291 | if (rdev->raid_disk >= 0 && |
@@ -6000,7 +6302,7 @@ void md_do_sync(mddev_t *mddev) | |||
6000 | "(but not more than %d KB/sec) for %s.\n", | 6302 | "(but not more than %d KB/sec) for %s.\n", |
6001 | speed_max(mddev), desc); | 6303 | speed_max(mddev), desc); |
6002 | 6304 | ||
6003 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 6305 | is_mddev_idle(mddev, 1); /* this initializes IO event counters */ |
6004 | 6306 | ||
6005 | io_sectors = 0; | 6307 | io_sectors = 0; |
6006 | for (m = 0; m < SYNC_MARKS; m++) { | 6308 | for (m = 0; m < SYNC_MARKS; m++) { |
@@ -6040,6 +6342,18 @@ void md_do_sync(mddev_t *mddev) | |||
6040 | } | 6342 | } |
6041 | if (kthread_should_stop()) | 6343 | if (kthread_should_stop()) |
6042 | goto interrupted; | 6344 | goto interrupted; |
6345 | |||
6346 | if (mddev->curr_resync > mddev->curr_resync_completed && | ||
6347 | (mddev->curr_resync - mddev->curr_resync_completed) | ||
6348 | > (max_sectors >> 4)) { | ||
6349 | /* time to update curr_resync_completed */ | ||
6350 | blk_unplug(mddev->queue); | ||
6351 | wait_event(mddev->recovery_wait, | ||
6352 | atomic_read(&mddev->recovery_active) == 0); | ||
6353 | mddev->curr_resync_completed = | ||
6354 | mddev->curr_resync; | ||
6355 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
6356 | } | ||
6043 | sectors = mddev->pers->sync_request(mddev, j, &skipped, | 6357 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
6044 | currspeed < speed_min(mddev)); | 6358 | currspeed < speed_min(mddev)); |
6045 | if (sectors == 0) { | 6359 | if (sectors == 0) { |
@@ -6102,7 +6416,7 @@ void md_do_sync(mddev_t *mddev) | |||
6102 | 6416 | ||
6103 | if (currspeed > speed_min(mddev)) { | 6417 | if (currspeed > speed_min(mddev)) { |
6104 | if ((currspeed > speed_max(mddev)) || | 6418 | if ((currspeed > speed_max(mddev)) || |
6105 | !is_mddev_idle(mddev)) { | 6419 | !is_mddev_idle(mddev, 0)) { |
6106 | msleep(500); | 6420 | msleep(500); |
6107 | goto repeat; | 6421 | goto repeat; |
6108 | } | 6422 | } |
@@ -6173,6 +6487,8 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
6173 | mdk_rdev_t *rdev; | 6487 | mdk_rdev_t *rdev; |
6174 | int spares = 0; | 6488 | int spares = 0; |
6175 | 6489 | ||
6490 | mddev->curr_resync_completed = 0; | ||
6491 | |||
6176 | list_for_each_entry(rdev, &mddev->disks, same_set) | 6492 | list_for_each_entry(rdev, &mddev->disks, same_set) |
6177 | if (rdev->raid_disk >= 0 && | 6493 | if (rdev->raid_disk >= 0 && |
6178 | !test_bit(Blocked, &rdev->flags) && | 6494 | !test_bit(Blocked, &rdev->flags) && |
@@ -6327,6 +6643,9 @@ void md_check_recovery(mddev_t *mddev) | |||
6327 | sysfs_notify(&mddev->kobj, NULL, | 6643 | sysfs_notify(&mddev->kobj, NULL, |
6328 | "degraded"); | 6644 | "degraded"); |
6329 | } | 6645 | } |
6646 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
6647 | mddev->pers->finish_reshape) | ||
6648 | mddev->pers->finish_reshape(mddev); | ||
6330 | md_update_sb(mddev, 1); | 6649 | md_update_sb(mddev, 1); |
6331 | 6650 | ||
6332 | /* if array is no-longer degraded, then any saved_raid_disk | 6651 | /* if array is no-longer degraded, then any saved_raid_disk |
@@ -6470,13 +6789,13 @@ static void md_geninit(void) | |||
6470 | 6789 | ||
6471 | static int __init md_init(void) | 6790 | static int __init md_init(void) |
6472 | { | 6791 | { |
6473 | if (register_blkdev(MAJOR_NR, "md")) | 6792 | if (register_blkdev(MD_MAJOR, "md")) |
6474 | return -1; | 6793 | return -1; |
6475 | if ((mdp_major=register_blkdev(0, "mdp"))<=0) { | 6794 | if ((mdp_major=register_blkdev(0, "mdp"))<=0) { |
6476 | unregister_blkdev(MAJOR_NR, "md"); | 6795 | unregister_blkdev(MD_MAJOR, "md"); |
6477 | return -1; | 6796 | return -1; |
6478 | } | 6797 | } |
6479 | blk_register_region(MKDEV(MAJOR_NR, 0), 1UL<<MINORBITS, THIS_MODULE, | 6798 | blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, |
6480 | md_probe, NULL, NULL); | 6799 | md_probe, NULL, NULL); |
6481 | blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, | 6800 | blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, |
6482 | md_probe, NULL, NULL); | 6801 | md_probe, NULL, NULL); |
@@ -6562,10 +6881,10 @@ static __exit void md_exit(void) | |||
6562 | mddev_t *mddev; | 6881 | mddev_t *mddev; |
6563 | struct list_head *tmp; | 6882 | struct list_head *tmp; |
6564 | 6883 | ||
6565 | blk_unregister_region(MKDEV(MAJOR_NR,0), 1U << MINORBITS); | 6884 | blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS); |
6566 | blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS); | 6885 | blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS); |
6567 | 6886 | ||
6568 | unregister_blkdev(MAJOR_NR,"md"); | 6887 | unregister_blkdev(MD_MAJOR,"md"); |
6569 | unregister_blkdev(mdp_major, "mdp"); | 6888 | unregister_blkdev(mdp_major, "mdp"); |
6570 | unregister_reboot_notifier(&md_notifier); | 6889 | unregister_reboot_notifier(&md_notifier); |
6571 | unregister_sysctl_table(raid_table_header); | 6890 | unregister_sysctl_table(raid_table_header); |
diff --git a/include/linux/raid/md_k.h b/drivers/md/md.h index 9743e4dbc918..e9b7f54c24d6 100644 --- a/include/linux/raid/md_k.h +++ b/drivers/md/md.h | |||
@@ -15,21 +15,8 @@ | |||
15 | #ifndef _MD_K_H | 15 | #ifndef _MD_K_H |
16 | #define _MD_K_H | 16 | #define _MD_K_H |
17 | 17 | ||
18 | /* and dm-bio-list.h is not under include/linux because.... ??? */ | ||
19 | #include "../../../drivers/md/dm-bio-list.h" | ||
20 | |||
21 | #ifdef CONFIG_BLOCK | 18 | #ifdef CONFIG_BLOCK |
22 | 19 | ||
23 | #define LEVEL_MULTIPATH (-4) | ||
24 | #define LEVEL_LINEAR (-1) | ||
25 | #define LEVEL_FAULTY (-5) | ||
26 | |||
27 | /* we need a value for 'no level specified' and 0 | ||
28 | * means 'raid0', so we need something else. This is | ||
29 | * for internal use only | ||
30 | */ | ||
31 | #define LEVEL_NONE (-1000000) | ||
32 | |||
33 | #define MaxSector (~(sector_t)0) | 20 | #define MaxSector (~(sector_t)0) |
34 | 21 | ||
35 | typedef struct mddev_s mddev_t; | 22 | typedef struct mddev_s mddev_t; |
@@ -49,9 +36,9 @@ struct mdk_rdev_s | |||
49 | { | 36 | { |
50 | struct list_head same_set; /* RAID devices within the same set */ | 37 | struct list_head same_set; /* RAID devices within the same set */ |
51 | 38 | ||
52 | sector_t size; /* Device size (in blocks) */ | 39 | sector_t sectors; /* Device size (in 512bytes sectors) */ |
53 | mddev_t *mddev; /* RAID array if running */ | 40 | mddev_t *mddev; /* RAID array if running */ |
54 | long last_events; /* IO event timestamp */ | 41 | int last_events; /* IO event timestamp */ |
55 | 42 | ||
56 | struct block_device *bdev; /* block device handle */ | 43 | struct block_device *bdev; /* block device handle */ |
57 | 44 | ||
@@ -132,6 +119,8 @@ struct mddev_s | |||
132 | #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ | 119 | #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ |
133 | #define MD_CHANGE_PENDING 2 /* superblock update in progress */ | 120 | #define MD_CHANGE_PENDING 2 /* superblock update in progress */ |
134 | 121 | ||
122 | int suspended; | ||
123 | atomic_t active_io; | ||
135 | int ro; | 124 | int ro; |
136 | 125 | ||
137 | struct gendisk *gendisk; | 126 | struct gendisk *gendisk; |
@@ -155,8 +144,11 @@ struct mddev_s | |||
155 | char clevel[16]; | 144 | char clevel[16]; |
156 | int raid_disks; | 145 | int raid_disks; |
157 | int max_disks; | 146 | int max_disks; |
158 | sector_t size; /* used size of component devices */ | 147 | sector_t dev_sectors; /* used size of |
148 | * component devices */ | ||
159 | sector_t array_sectors; /* exported array size */ | 149 | sector_t array_sectors; /* exported array size */ |
150 | int external_size; /* size managed | ||
151 | * externally */ | ||
160 | __u64 events; | 152 | __u64 events; |
161 | 153 | ||
162 | char uuid[16]; | 154 | char uuid[16]; |
@@ -172,6 +164,13 @@ struct mddev_s | |||
172 | struct mdk_thread_s *thread; /* management thread */ | 164 | struct mdk_thread_s *thread; /* management thread */ |
173 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ | 165 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ |
174 | sector_t curr_resync; /* last block scheduled */ | 166 | sector_t curr_resync; /* last block scheduled */ |
167 | /* As resync requests can complete out of order, we cannot easily track | ||
168 | * how much resync has been completed. So we occasionally pause until | ||
169 | * everything completes, then set curr_resync_completed to curr_resync. | ||
170 | * As such it may be well behind the real resync mark, but it is a value | ||
171 | * we are certain of. | ||
172 | */ | ||
173 | sector_t curr_resync_completed; | ||
175 | unsigned long resync_mark; /* a recent timestamp */ | 174 | unsigned long resync_mark; /* a recent timestamp */ |
176 | sector_t resync_mark_cnt;/* blocks written at resync_mark */ | 175 | sector_t resync_mark_cnt;/* blocks written at resync_mark */ |
177 | sector_t curr_mark_cnt; /* blocks scheduled now */ | 176 | sector_t curr_mark_cnt; /* blocks scheduled now */ |
@@ -315,8 +314,10 @@ struct mdk_personality | |||
315 | int (*spare_active) (mddev_t *mddev); | 314 | int (*spare_active) (mddev_t *mddev); |
316 | sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); | 315 | sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); |
317 | int (*resize) (mddev_t *mddev, sector_t sectors); | 316 | int (*resize) (mddev_t *mddev, sector_t sectors); |
317 | sector_t (*size) (mddev_t *mddev, sector_t sectors, int raid_disks); | ||
318 | int (*check_reshape) (mddev_t *mddev); | 318 | int (*check_reshape) (mddev_t *mddev); |
319 | int (*start_reshape) (mddev_t *mddev); | 319 | int (*start_reshape) (mddev_t *mddev); |
320 | void (*finish_reshape) (mddev_t *mddev); | ||
320 | int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); | 321 | int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); |
321 | /* quiesce moves between quiescence states | 322 | /* quiesce moves between quiescence states |
322 | * 0 - fully active | 323 | * 0 - fully active |
@@ -324,6 +325,16 @@ struct mdk_personality | |||
324 | * others - reserved | 325 | * others - reserved |
325 | */ | 326 | */ |
326 | void (*quiesce) (mddev_t *mddev, int state); | 327 | void (*quiesce) (mddev_t *mddev, int state); |
328 | /* takeover is used to transition an array from one | ||
329 | * personality to another. The new personality must be able | ||
330 | * to handle the data in the current layout. | ||
331 | * e.g. 2drive raid1 -> 2drive raid5 | ||
332 | * ndrive raid5 -> degraded n+1drive raid6 with special layout | ||
333 | * If the takeover succeeds, a new 'private' structure is returned. | ||
334 | * This needs to be installed and then ->run used to activate the | ||
335 | * array. | ||
336 | */ | ||
337 | void *(*takeover) (mddev_t *mddev); | ||
327 | }; | 338 | }; |
328 | 339 | ||
329 | 340 | ||
@@ -400,3 +411,26 @@ static inline void safe_put_page(struct page *p) | |||
400 | #endif /* CONFIG_BLOCK */ | 411 | #endif /* CONFIG_BLOCK */ |
401 | #endif | 412 | #endif |
402 | 413 | ||
414 | |||
415 | extern int register_md_personality(struct mdk_personality *p); | ||
416 | extern int unregister_md_personality(struct mdk_personality *p); | ||
417 | extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), | ||
418 | mddev_t *mddev, const char *name); | ||
419 | extern void md_unregister_thread(mdk_thread_t *thread); | ||
420 | extern void md_wakeup_thread(mdk_thread_t *thread); | ||
421 | extern void md_check_recovery(mddev_t *mddev); | ||
422 | extern void md_write_start(mddev_t *mddev, struct bio *bi); | ||
423 | extern void md_write_end(mddev_t *mddev); | ||
424 | extern void md_done_sync(mddev_t *mddev, int blocks, int ok); | ||
425 | extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); | ||
426 | |||
427 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | ||
428 | sector_t sector, int size, struct page *page); | ||
429 | extern void md_super_wait(mddev_t *mddev); | ||
430 | extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, | ||
431 | struct page *page, int rw); | ||
432 | extern void md_do_sync(mddev_t *mddev); | ||
433 | extern void md_new_event(mddev_t *mddev); | ||
434 | extern int md_allow_write(mddev_t *mddev); | ||
435 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | ||
436 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); | ||
diff --git a/drivers/md/mktables.c b/drivers/md/mktables.c index b61d5767aae7..3b1500843bba 100644 --- a/drivers/md/mktables.c +++ b/drivers/md/mktables.c | |||
@@ -59,7 +59,7 @@ int main(int argc, char *argv[]) | |||
59 | uint8_t v; | 59 | uint8_t v; |
60 | uint8_t exptbl[256], invtbl[256]; | 60 | uint8_t exptbl[256], invtbl[256]; |
61 | 61 | ||
62 | printf("#include \"raid6.h\"\n"); | 62 | printf("#include <linux/raid/pq.h>\n"); |
63 | 63 | ||
64 | /* Compute multiplication table */ | 64 | /* Compute multiplication table */ |
65 | printf("\nconst u8 __attribute__((aligned(256)))\n" | 65 | printf("\nconst u8 __attribute__((aligned(256)))\n" |
@@ -76,6 +76,9 @@ int main(int argc, char *argv[]) | |||
76 | printf("\t},\n"); | 76 | printf("\t},\n"); |
77 | } | 77 | } |
78 | printf("};\n"); | 78 | printf("};\n"); |
79 | printf("#ifdef __KERNEL__\n"); | ||
80 | printf("EXPORT_SYMBOL(raid6_gfmul);\n"); | ||
81 | printf("#endif\n"); | ||
79 | 82 | ||
80 | /* Compute power-of-2 table (exponent) */ | 83 | /* Compute power-of-2 table (exponent) */ |
81 | v = 1; | 84 | v = 1; |
@@ -92,6 +95,9 @@ int main(int argc, char *argv[]) | |||
92 | } | 95 | } |
93 | } | 96 | } |
94 | printf("};\n"); | 97 | printf("};\n"); |
98 | printf("#ifdef __KERNEL__\n"); | ||
99 | printf("EXPORT_SYMBOL(raid6_gfexp);\n"); | ||
100 | printf("#endif\n"); | ||
95 | 101 | ||
96 | /* Compute inverse table x^-1 == x^254 */ | 102 | /* Compute inverse table x^-1 == x^254 */ |
97 | printf("\nconst u8 __attribute__((aligned(256)))\n" | 103 | printf("\nconst u8 __attribute__((aligned(256)))\n" |
@@ -104,6 +110,9 @@ int main(int argc, char *argv[]) | |||
104 | } | 110 | } |
105 | } | 111 | } |
106 | printf("};\n"); | 112 | printf("};\n"); |
113 | printf("#ifdef __KERNEL__\n"); | ||
114 | printf("EXPORT_SYMBOL(raid6_gfinv);\n"); | ||
115 | printf("#endif\n"); | ||
107 | 116 | ||
108 | /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ | 117 | /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ |
109 | printf("\nconst u8 __attribute__((aligned(256)))\n" | 118 | printf("\nconst u8 __attribute__((aligned(256)))\n" |
@@ -115,6 +124,9 @@ int main(int argc, char *argv[]) | |||
115 | (j == 7) ? '\n' : ' '); | 124 | (j == 7) ? '\n' : ' '); |
116 | } | 125 | } |
117 | printf("};\n"); | 126 | printf("};\n"); |
127 | printf("#ifdef __KERNEL__\n"); | ||
128 | printf("EXPORT_SYMBOL(raid6_gfexi);\n"); | ||
129 | printf("#endif\n"); | ||
118 | 130 | ||
119 | return 0; | 131 | return 0; |
120 | } | 132 | } |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index f6d08f241671..41ced0cbe823 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -19,7 +19,11 @@ | |||
19 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 19 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/raid/multipath.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/raid/md_u.h> | ||
24 | #include <linux/seq_file.h> | ||
25 | #include "md.h" | ||
26 | #include "multipath.h" | ||
23 | 27 | ||
24 | #define MAX_WORK_PER_DISK 128 | 28 | #define MAX_WORK_PER_DISK 128 |
25 | 29 | ||
@@ -402,6 +406,14 @@ static void multipathd (mddev_t *mddev) | |||
402 | spin_unlock_irqrestore(&conf->device_lock, flags); | 406 | spin_unlock_irqrestore(&conf->device_lock, flags); |
403 | } | 407 | } |
404 | 408 | ||
409 | static sector_t multipath_size(mddev_t *mddev, sector_t sectors, int raid_disks) | ||
410 | { | ||
411 | WARN_ONCE(sectors || raid_disks, | ||
412 | "%s does not support generic reshape\n", __func__); | ||
413 | |||
414 | return mddev->dev_sectors; | ||
415 | } | ||
416 | |||
405 | static int multipath_run (mddev_t *mddev) | 417 | static int multipath_run (mddev_t *mddev) |
406 | { | 418 | { |
407 | multipath_conf_t *conf; | 419 | multipath_conf_t *conf; |
@@ -498,7 +510,7 @@ static int multipath_run (mddev_t *mddev) | |||
498 | /* | 510 | /* |
499 | * Ok, everything is just fine now | 511 | * Ok, everything is just fine now |
500 | */ | 512 | */ |
501 | mddev->array_sectors = mddev->size * 2; | 513 | md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); |
502 | 514 | ||
503 | mddev->queue->unplug_fn = multipath_unplug; | 515 | mddev->queue->unplug_fn = multipath_unplug; |
504 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; | 516 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; |
@@ -543,6 +555,7 @@ static struct mdk_personality multipath_personality = | |||
543 | .error_handler = multipath_error, | 555 | .error_handler = multipath_error, |
544 | .hot_add_disk = multipath_add_disk, | 556 | .hot_add_disk = multipath_add_disk, |
545 | .hot_remove_disk= multipath_remove_disk, | 557 | .hot_remove_disk= multipath_remove_disk, |
558 | .size = multipath_size, | ||
546 | }; | 559 | }; |
547 | 560 | ||
548 | static int __init multipath_init (void) | 561 | static int __init multipath_init (void) |
diff --git a/include/linux/raid/multipath.h b/drivers/md/multipath.h index 6f53fc177a47..6fa70b400cda 100644 --- a/include/linux/raid/multipath.h +++ b/drivers/md/multipath.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _MULTIPATH_H | 1 | #ifndef _MULTIPATH_H |
2 | #define _MULTIPATH_H | 2 | #define _MULTIPATH_H |
3 | 3 | ||
4 | #include <linux/raid/md.h> | ||
5 | |||
6 | struct multipath_info { | 4 | struct multipath_info { |
7 | mdk_rdev_t *rdev; | 5 | mdk_rdev_t *rdev; |
8 | }; | 6 | }; |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c605ba805586..c08d7559be55 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -18,7 +18,10 @@ | |||
18 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/raid/raid0.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/seq_file.h> | ||
23 | #include "md.h" | ||
24 | #include "raid0.h" | ||
22 | 25 | ||
23 | static void raid0_unplug(struct request_queue *q) | 26 | static void raid0_unplug(struct request_queue *q) |
24 | { | 27 | { |
@@ -73,16 +76,15 @@ static int create_strip_zones (mddev_t *mddev) | |||
73 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 76 | list_for_each_entry(rdev2, &mddev->disks, same_set) { |
74 | printk(KERN_INFO "raid0: comparing %s(%llu)", | 77 | printk(KERN_INFO "raid0: comparing %s(%llu)", |
75 | bdevname(rdev1->bdev,b), | 78 | bdevname(rdev1->bdev,b), |
76 | (unsigned long long)rdev1->size); | 79 | (unsigned long long)rdev1->sectors); |
77 | printk(KERN_INFO " with %s(%llu)\n", | 80 | printk(KERN_INFO " with %s(%llu)\n", |
78 | bdevname(rdev2->bdev,b), | 81 | bdevname(rdev2->bdev,b), |
79 | (unsigned long long)rdev2->size); | 82 | (unsigned long long)rdev2->sectors); |
80 | if (rdev2 == rdev1) { | 83 | if (rdev2 == rdev1) { |
81 | printk(KERN_INFO "raid0: END\n"); | 84 | printk(KERN_INFO "raid0: END\n"); |
82 | break; | 85 | break; |
83 | } | 86 | } |
84 | if (rdev2->size == rdev1->size) | 87 | if (rdev2->sectors == rdev1->sectors) { |
85 | { | ||
86 | /* | 88 | /* |
87 | * Not unique, don't count it as a new | 89 | * Not unique, don't count it as a new |
88 | * group | 90 | * group |
@@ -145,7 +147,7 @@ static int create_strip_zones (mddev_t *mddev) | |||
145 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) | 147 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) |
146 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | 148 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); |
147 | 149 | ||
148 | if (!smallest || (rdev1->size <smallest->size)) | 150 | if (!smallest || (rdev1->sectors < smallest->sectors)) |
149 | smallest = rdev1; | 151 | smallest = rdev1; |
150 | cnt++; | 152 | cnt++; |
151 | } | 153 | } |
@@ -155,10 +157,10 @@ static int create_strip_zones (mddev_t *mddev) | |||
155 | goto abort; | 157 | goto abort; |
156 | } | 158 | } |
157 | zone->nb_dev = cnt; | 159 | zone->nb_dev = cnt; |
158 | zone->sectors = smallest->size * cnt * 2; | 160 | zone->sectors = smallest->sectors * cnt; |
159 | zone->zone_start = 0; | 161 | zone->zone_start = 0; |
160 | 162 | ||
161 | current_start = smallest->size * 2; | 163 | current_start = smallest->sectors; |
162 | curr_zone_start = zone->sectors; | 164 | curr_zone_start = zone->sectors; |
163 | 165 | ||
164 | /* now do the other zones */ | 166 | /* now do the other zones */ |
@@ -177,29 +179,29 @@ static int create_strip_zones (mddev_t *mddev) | |||
177 | rdev = conf->strip_zone[0].dev[j]; | 179 | rdev = conf->strip_zone[0].dev[j]; |
178 | printk(KERN_INFO "raid0: checking %s ...", | 180 | printk(KERN_INFO "raid0: checking %s ...", |
179 | bdevname(rdev->bdev, b)); | 181 | bdevname(rdev->bdev, b)); |
180 | if (rdev->size > current_start / 2) { | 182 | if (rdev->sectors <= current_start) { |
181 | printk(KERN_INFO " contained as device %d\n", | ||
182 | c); | ||
183 | zone->dev[c] = rdev; | ||
184 | c++; | ||
185 | if (!smallest || (rdev->size <smallest->size)) { | ||
186 | smallest = rdev; | ||
187 | printk(KERN_INFO " (%llu) is smallest!.\n", | ||
188 | (unsigned long long)rdev->size); | ||
189 | } | ||
190 | } else | ||
191 | printk(KERN_INFO " nope.\n"); | 183 | printk(KERN_INFO " nope.\n"); |
184 | continue; | ||
185 | } | ||
186 | printk(KERN_INFO " contained as device %d\n", c); | ||
187 | zone->dev[c] = rdev; | ||
188 | c++; | ||
189 | if (!smallest || rdev->sectors < smallest->sectors) { | ||
190 | smallest = rdev; | ||
191 | printk(KERN_INFO " (%llu) is smallest!.\n", | ||
192 | (unsigned long long)rdev->sectors); | ||
193 | } | ||
192 | } | 194 | } |
193 | 195 | ||
194 | zone->nb_dev = c; | 196 | zone->nb_dev = c; |
195 | zone->sectors = (smallest->size * 2 - current_start) * c; | 197 | zone->sectors = (smallest->sectors - current_start) * c; |
196 | printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", | 198 | printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", |
197 | zone->nb_dev, (unsigned long long)zone->sectors); | 199 | zone->nb_dev, (unsigned long long)zone->sectors); |
198 | 200 | ||
199 | zone->zone_start = curr_zone_start; | 201 | zone->zone_start = curr_zone_start; |
200 | curr_zone_start += zone->sectors; | 202 | curr_zone_start += zone->sectors; |
201 | 203 | ||
202 | current_start = smallest->size * 2; | 204 | current_start = smallest->sectors; |
203 | printk(KERN_INFO "raid0: current zone start: %llu\n", | 205 | printk(KERN_INFO "raid0: current zone start: %llu\n", |
204 | (unsigned long long)current_start); | 206 | (unsigned long long)current_start); |
205 | } | 207 | } |
@@ -261,12 +263,25 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
261 | return max; | 263 | return max; |
262 | } | 264 | } |
263 | 265 | ||
266 | static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) | ||
267 | { | ||
268 | sector_t array_sectors = 0; | ||
269 | mdk_rdev_t *rdev; | ||
270 | |||
271 | WARN_ONCE(sectors || raid_disks, | ||
272 | "%s does not support generic reshape\n", __func__); | ||
273 | |||
274 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
275 | array_sectors += rdev->sectors; | ||
276 | |||
277 | return array_sectors; | ||
278 | } | ||
279 | |||
264 | static int raid0_run (mddev_t *mddev) | 280 | static int raid0_run (mddev_t *mddev) |
265 | { | 281 | { |
266 | unsigned cur=0, i=0, nb_zone; | 282 | unsigned cur=0, i=0, nb_zone; |
267 | s64 sectors; | 283 | s64 sectors; |
268 | raid0_conf_t *conf; | 284 | raid0_conf_t *conf; |
269 | mdk_rdev_t *rdev; | ||
270 | 285 | ||
271 | if (mddev->chunk_size == 0) { | 286 | if (mddev->chunk_size == 0) { |
272 | printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); | 287 | printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); |
@@ -291,16 +306,14 @@ static int raid0_run (mddev_t *mddev) | |||
291 | goto out_free_conf; | 306 | goto out_free_conf; |
292 | 307 | ||
293 | /* calculate array device size */ | 308 | /* calculate array device size */ |
294 | mddev->array_sectors = 0; | 309 | md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); |
295 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
296 | mddev->array_sectors += rdev->size * 2; | ||
297 | 310 | ||
298 | printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", | 311 | printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", |
299 | (unsigned long long)mddev->array_sectors); | 312 | (unsigned long long)mddev->array_sectors); |
300 | printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", | 313 | printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", |
301 | (unsigned long long)conf->spacing); | 314 | (unsigned long long)conf->spacing); |
302 | { | 315 | { |
303 | sector_t s = mddev->array_sectors; | 316 | sector_t s = raid0_size(mddev, 0, 0); |
304 | sector_t space = conf->spacing; | 317 | sector_t space = conf->spacing; |
305 | int round; | 318 | int round; |
306 | conf->sector_shift = 0; | 319 | conf->sector_shift = 0; |
@@ -509,6 +522,7 @@ static struct mdk_personality raid0_personality= | |||
509 | .run = raid0_run, | 522 | .run = raid0_run, |
510 | .stop = raid0_stop, | 523 | .stop = raid0_stop, |
511 | .status = raid0_status, | 524 | .status = raid0_status, |
525 | .size = raid0_size, | ||
512 | }; | 526 | }; |
513 | 527 | ||
514 | static int __init raid0_init (void) | 528 | static int __init raid0_init (void) |
diff --git a/include/linux/raid/raid0.h b/drivers/md/raid0.h index fd42aa87c391..824b12eb1d4f 100644 --- a/include/linux/raid/raid0.h +++ b/drivers/md/raid0.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _RAID0_H | 1 | #ifndef _RAID0_H |
2 | #define _RAID0_H | 2 | #define _RAID0_H |
3 | 3 | ||
4 | #include <linux/raid/md.h> | ||
5 | |||
6 | struct strip_zone | 4 | struct strip_zone |
7 | { | 5 | { |
8 | sector_t zone_start; /* Zone offset in md_dev (in sectors) */ | 6 | sector_t zone_start; /* Zone offset in md_dev (in sectors) */ |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e2466425d9ca..b4f4badc0068 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -31,10 +31,13 @@ | |||
31 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 31 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include "dm-bio-list.h" | ||
35 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
36 | #include <linux/raid/raid1.h> | 35 | #include <linux/blkdev.h> |
37 | #include <linux/raid/bitmap.h> | 36 | #include <linux/seq_file.h> |
37 | #include "md.h" | ||
38 | #include "dm-bio-list.h" | ||
39 | #include "raid1.h" | ||
40 | #include "bitmap.h" | ||
38 | 41 | ||
39 | #define DEBUG 0 | 42 | #define DEBUG 0 |
40 | #if DEBUG | 43 | #if DEBUG |
@@ -1723,7 +1726,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1723 | return 0; | 1726 | return 0; |
1724 | } | 1727 | } |
1725 | 1728 | ||
1726 | max_sector = mddev->size << 1; | 1729 | max_sector = mddev->dev_sectors; |
1727 | if (sector_nr >= max_sector) { | 1730 | if (sector_nr >= max_sector) { |
1728 | /* If we aborted, we need to abort the | 1731 | /* If we aborted, we need to abort the |
1729 | * sync on the 'current' bitmap chunk (there will | 1732 | * sync on the 'current' bitmap chunk (there will |
@@ -1919,6 +1922,14 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1919 | return nr_sectors; | 1922 | return nr_sectors; |
1920 | } | 1923 | } |
1921 | 1924 | ||
1925 | static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks) | ||
1926 | { | ||
1927 | if (sectors) | ||
1928 | return sectors; | ||
1929 | |||
1930 | return mddev->dev_sectors; | ||
1931 | } | ||
1932 | |||
1922 | static int run(mddev_t *mddev) | 1933 | static int run(mddev_t *mddev) |
1923 | { | 1934 | { |
1924 | conf_t *conf; | 1935 | conf_t *conf; |
@@ -2048,7 +2059,7 @@ static int run(mddev_t *mddev) | |||
2048 | /* | 2059 | /* |
2049 | * Ok, everything is just fine now | 2060 | * Ok, everything is just fine now |
2050 | */ | 2061 | */ |
2051 | mddev->array_sectors = mddev->size * 2; | 2062 | md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); |
2052 | 2063 | ||
2053 | mddev->queue->unplug_fn = raid1_unplug; | 2064 | mddev->queue->unplug_fn = raid1_unplug; |
2054 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2065 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
@@ -2089,6 +2100,9 @@ static int stop(mddev_t *mddev) | |||
2089 | /* need to kick something here to make sure I/O goes? */ | 2100 | /* need to kick something here to make sure I/O goes? */ |
2090 | } | 2101 | } |
2091 | 2102 | ||
2103 | raise_barrier(conf); | ||
2104 | lower_barrier(conf); | ||
2105 | |||
2092 | md_unregister_thread(mddev->thread); | 2106 | md_unregister_thread(mddev->thread); |
2093 | mddev->thread = NULL; | 2107 | mddev->thread = NULL; |
2094 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 2108 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
@@ -2110,15 +2124,17 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) | |||
2110 | * any io in the removed space completes, but it hardly seems | 2124 | * any io in the removed space completes, but it hardly seems |
2111 | * worth it. | 2125 | * worth it. |
2112 | */ | 2126 | */ |
2113 | mddev->array_sectors = sectors; | 2127 | md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0)); |
2128 | if (mddev->array_sectors > raid1_size(mddev, sectors, 0)) | ||
2129 | return -EINVAL; | ||
2114 | set_capacity(mddev->gendisk, mddev->array_sectors); | 2130 | set_capacity(mddev->gendisk, mddev->array_sectors); |
2115 | mddev->changed = 1; | 2131 | mddev->changed = 1; |
2116 | if (mddev->array_sectors / 2 > mddev->size && | 2132 | if (sectors > mddev->dev_sectors && |
2117 | mddev->recovery_cp == MaxSector) { | 2133 | mddev->recovery_cp == MaxSector) { |
2118 | mddev->recovery_cp = mddev->size << 1; | 2134 | mddev->recovery_cp = mddev->dev_sectors; |
2119 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 2135 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
2120 | } | 2136 | } |
2121 | mddev->size = mddev->array_sectors / 2; | 2137 | mddev->dev_sectors = sectors; |
2122 | mddev->resync_max_sectors = sectors; | 2138 | mddev->resync_max_sectors = sectors; |
2123 | return 0; | 2139 | return 0; |
2124 | } | 2140 | } |
@@ -2264,6 +2280,7 @@ static struct mdk_personality raid1_personality = | |||
2264 | .spare_active = raid1_spare_active, | 2280 | .spare_active = raid1_spare_active, |
2265 | .sync_request = sync_request, | 2281 | .sync_request = sync_request, |
2266 | .resize = raid1_resize, | 2282 | .resize = raid1_resize, |
2283 | .size = raid1_size, | ||
2267 | .check_reshape = raid1_reshape, | 2284 | .check_reshape = raid1_reshape, |
2268 | .quiesce = raid1_quiesce, | 2285 | .quiesce = raid1_quiesce, |
2269 | }; | 2286 | }; |
diff --git a/include/linux/raid/raid1.h b/drivers/md/raid1.h index 0a9ba7c3302e..1620eea3d57c 100644 --- a/include/linux/raid/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _RAID1_H | 1 | #ifndef _RAID1_H |
2 | #define _RAID1_H | 2 | #define _RAID1_H |
3 | 3 | ||
4 | #include <linux/raid/md.h> | ||
5 | |||
6 | typedef struct mirror_info mirror_info_t; | 4 | typedef struct mirror_info mirror_info_t; |
7 | 5 | ||
8 | struct mirror_info { | 6 | struct mirror_info { |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7301631abe04..e293d92641ac 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -18,10 +18,13 @@ | |||
18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include "dm-bio-list.h" | ||
22 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
23 | #include <linux/raid/raid10.h> | 22 | #include <linux/blkdev.h> |
24 | #include <linux/raid/bitmap.h> | 23 | #include <linux/seq_file.h> |
24 | #include "md.h" | ||
25 | #include "dm-bio-list.h" | ||
26 | #include "raid10.h" | ||
27 | #include "bitmap.h" | ||
25 | 28 | ||
26 | /* | 29 | /* |
27 | * RAID10 provides a combination of RAID0 and RAID1 functionality. | 30 | * RAID10 provides a combination of RAID0 and RAID1 functionality. |
@@ -1695,7 +1698,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1695 | return 0; | 1698 | return 0; |
1696 | 1699 | ||
1697 | skipped: | 1700 | skipped: |
1698 | max_sector = mddev->size << 1; | 1701 | max_sector = mddev->dev_sectors; |
1699 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 1702 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
1700 | max_sector = mddev->resync_max_sectors; | 1703 | max_sector = mddev->resync_max_sectors; |
1701 | if (sector_nr >= max_sector) { | 1704 | if (sector_nr >= max_sector) { |
@@ -2020,6 +2023,25 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
2020 | goto skipped; | 2023 | goto skipped; |
2021 | } | 2024 | } |
2022 | 2025 | ||
2026 | static sector_t | ||
2027 | raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | ||
2028 | { | ||
2029 | sector_t size; | ||
2030 | conf_t *conf = mddev_to_conf(mddev); | ||
2031 | |||
2032 | if (!raid_disks) | ||
2033 | raid_disks = mddev->raid_disks; | ||
2034 | if (!sectors) | ||
2035 | sectors = mddev->dev_sectors; | ||
2036 | |||
2037 | size = sectors >> conf->chunk_shift; | ||
2038 | sector_div(size, conf->far_copies); | ||
2039 | size = size * raid_disks; | ||
2040 | sector_div(size, conf->near_copies); | ||
2041 | |||
2042 | return size << conf->chunk_shift; | ||
2043 | } | ||
2044 | |||
2023 | static int run(mddev_t *mddev) | 2045 | static int run(mddev_t *mddev) |
2024 | { | 2046 | { |
2025 | conf_t *conf; | 2047 | conf_t *conf; |
@@ -2076,7 +2098,7 @@ static int run(mddev_t *mddev) | |||
2076 | conf->far_offset = fo; | 2098 | conf->far_offset = fo; |
2077 | conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; | 2099 | conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; |
2078 | conf->chunk_shift = ffz(~mddev->chunk_size) - 9; | 2100 | conf->chunk_shift = ffz(~mddev->chunk_size) - 9; |
2079 | size = mddev->size >> (conf->chunk_shift-1); | 2101 | size = mddev->dev_sectors >> conf->chunk_shift; |
2080 | sector_div(size, fc); | 2102 | sector_div(size, fc); |
2081 | size = size * conf->raid_disks; | 2103 | size = size * conf->raid_disks; |
2082 | sector_div(size, nc); | 2104 | sector_div(size, nc); |
@@ -2089,7 +2111,7 @@ static int run(mddev_t *mddev) | |||
2089 | */ | 2111 | */ |
2090 | stride += conf->raid_disks - 1; | 2112 | stride += conf->raid_disks - 1; |
2091 | sector_div(stride, conf->raid_disks); | 2113 | sector_div(stride, conf->raid_disks); |
2092 | mddev->size = stride << (conf->chunk_shift-1); | 2114 | mddev->dev_sectors = stride << conf->chunk_shift; |
2093 | 2115 | ||
2094 | if (fo) | 2116 | if (fo) |
2095 | stride = 1; | 2117 | stride = 1; |
@@ -2171,8 +2193,8 @@ static int run(mddev_t *mddev) | |||
2171 | /* | 2193 | /* |
2172 | * Ok, everything is just fine now | 2194 | * Ok, everything is just fine now |
2173 | */ | 2195 | */ |
2174 | mddev->array_sectors = size << conf->chunk_shift; | 2196 | md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); |
2175 | mddev->resync_max_sectors = size << conf->chunk_shift; | 2197 | mddev->resync_max_sectors = raid10_size(mddev, 0, 0); |
2176 | 2198 | ||
2177 | mddev->queue->unplug_fn = raid10_unplug; | 2199 | mddev->queue->unplug_fn = raid10_unplug; |
2178 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | 2200 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; |
@@ -2208,6 +2230,9 @@ static int stop(mddev_t *mddev) | |||
2208 | { | 2230 | { |
2209 | conf_t *conf = mddev_to_conf(mddev); | 2231 | conf_t *conf = mddev_to_conf(mddev); |
2210 | 2232 | ||
2233 | raise_barrier(conf, 0); | ||
2234 | lower_barrier(conf); | ||
2235 | |||
2211 | md_unregister_thread(mddev->thread); | 2236 | md_unregister_thread(mddev->thread); |
2212 | mddev->thread = NULL; | 2237 | mddev->thread = NULL; |
2213 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 2238 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
@@ -2255,6 +2280,7 @@ static struct mdk_personality raid10_personality = | |||
2255 | .spare_active = raid10_spare_active, | 2280 | .spare_active = raid10_spare_active, |
2256 | .sync_request = sync_request, | 2281 | .sync_request = sync_request, |
2257 | .quiesce = raid10_quiesce, | 2282 | .quiesce = raid10_quiesce, |
2283 | .size = raid10_size, | ||
2258 | }; | 2284 | }; |
2259 | 2285 | ||
2260 | static int __init raid_init(void) | 2286 | static int __init raid_init(void) |
diff --git a/include/linux/raid/raid10.h b/drivers/md/raid10.h index e9091cfeb286..244dbe507a54 100644 --- a/include/linux/raid/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _RAID10_H | 1 | #ifndef _RAID10_H |
2 | #define _RAID10_H | 2 | #define _RAID10_H |
3 | 3 | ||
4 | #include <linux/raid/md.h> | ||
5 | |||
6 | typedef struct mirror_info mirror_info_t; | 4 | typedef struct mirror_info mirror_info_t; |
7 | 5 | ||
8 | struct mirror_info { | 6 | struct mirror_info { |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a5ba080d303b..3bbc6d647044 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -43,11 +43,14 @@ | |||
43 | * miss any bits. | 43 | * miss any bits. |
44 | */ | 44 | */ |
45 | 45 | ||
46 | #include <linux/blkdev.h> | ||
46 | #include <linux/kthread.h> | 47 | #include <linux/kthread.h> |
47 | #include "raid6.h" | 48 | #include <linux/raid/pq.h> |
48 | |||
49 | #include <linux/raid/bitmap.h> | ||
50 | #include <linux/async_tx.h> | 49 | #include <linux/async_tx.h> |
50 | #include <linux/seq_file.h> | ||
51 | #include "md.h" | ||
52 | #include "raid5.h" | ||
53 | #include "bitmap.h" | ||
51 | 54 | ||
52 | /* | 55 | /* |
53 | * Stripe cache | 56 | * Stripe cache |
@@ -91,11 +94,6 @@ | |||
91 | 94 | ||
92 | #define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) | 95 | #define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) |
93 | 96 | ||
94 | #if !RAID6_USE_EMPTY_ZERO_PAGE | ||
95 | /* In .bss so it's zeroed */ | ||
96 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | ||
97 | #endif | ||
98 | |||
99 | /* | 97 | /* |
100 | * We maintain a biased count of active stripes in the bottom 16 bits of | 98 | * We maintain a biased count of active stripes in the bottom 16 bits of |
101 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits | 99 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits |
@@ -130,12 +128,42 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | |||
130 | bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); | 128 | bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); |
131 | } | 129 | } |
132 | 130 | ||
131 | /* Find first data disk in a raid6 stripe */ | ||
132 | static inline int raid6_d0(struct stripe_head *sh) | ||
133 | { | ||
134 | if (sh->ddf_layout) | ||
135 | /* ddf always start from first device */ | ||
136 | return 0; | ||
137 | /* md starts just after Q block */ | ||
138 | if (sh->qd_idx == sh->disks - 1) | ||
139 | return 0; | ||
140 | else | ||
141 | return sh->qd_idx + 1; | ||
142 | } | ||
133 | static inline int raid6_next_disk(int disk, int raid_disks) | 143 | static inline int raid6_next_disk(int disk, int raid_disks) |
134 | { | 144 | { |
135 | disk++; | 145 | disk++; |
136 | return (disk < raid_disks) ? disk : 0; | 146 | return (disk < raid_disks) ? disk : 0; |
137 | } | 147 | } |
138 | 148 | ||
149 | /* When walking through the disks in a raid5, starting at raid6_d0, | ||
150 | * We need to map each disk to a 'slot', where the data disks are slot | ||
151 | * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk | ||
152 | * is raid_disks-1. This help does that mapping. | ||
153 | */ | ||
154 | static int raid6_idx_to_slot(int idx, struct stripe_head *sh, | ||
155 | int *count, int syndrome_disks) | ||
156 | { | ||
157 | int slot; | ||
158 | |||
159 | if (idx == sh->pd_idx) | ||
160 | return syndrome_disks; | ||
161 | if (idx == sh->qd_idx) | ||
162 | return syndrome_disks + 1; | ||
163 | slot = (*count)++; | ||
164 | return slot; | ||
165 | } | ||
166 | |||
139 | static void return_io(struct bio *return_bi) | 167 | static void return_io(struct bio *return_bi) |
140 | { | 168 | { |
141 | struct bio *bi = return_bi; | 169 | struct bio *bi = return_bi; |
@@ -193,6 +221,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
193 | } | 221 | } |
194 | } | 222 | } |
195 | } | 223 | } |
224 | |||
196 | static void release_stripe(struct stripe_head *sh) | 225 | static void release_stripe(struct stripe_head *sh) |
197 | { | 226 | { |
198 | raid5_conf_t *conf = sh->raid_conf; | 227 | raid5_conf_t *conf = sh->raid_conf; |
@@ -270,9 +299,11 @@ static int grow_buffers(struct stripe_head *sh, int num) | |||
270 | return 0; | 299 | return 0; |
271 | } | 300 | } |
272 | 301 | ||
273 | static void raid5_build_block(struct stripe_head *sh, int i); | 302 | static void raid5_build_block(struct stripe_head *sh, int i, int previous); |
303 | static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, | ||
304 | struct stripe_head *sh); | ||
274 | 305 | ||
275 | static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks) | 306 | static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) |
276 | { | 307 | { |
277 | raid5_conf_t *conf = sh->raid_conf; | 308 | raid5_conf_t *conf = sh->raid_conf; |
278 | int i; | 309 | int i; |
@@ -287,11 +318,12 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int | |||
287 | 318 | ||
288 | remove_hash(sh); | 319 | remove_hash(sh); |
289 | 320 | ||
321 | sh->generation = conf->generation - previous; | ||
322 | sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; | ||
290 | sh->sector = sector; | 323 | sh->sector = sector; |
291 | sh->pd_idx = pd_idx; | 324 | stripe_set_idx(sector, conf, previous, sh); |
292 | sh->state = 0; | 325 | sh->state = 0; |
293 | 326 | ||
294 | sh->disks = disks; | ||
295 | 327 | ||
296 | for (i = sh->disks; i--; ) { | 328 | for (i = sh->disks; i--; ) { |
297 | struct r5dev *dev = &sh->dev[i]; | 329 | struct r5dev *dev = &sh->dev[i]; |
@@ -305,12 +337,13 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int | |||
305 | BUG(); | 337 | BUG(); |
306 | } | 338 | } |
307 | dev->flags = 0; | 339 | dev->flags = 0; |
308 | raid5_build_block(sh, i); | 340 | raid5_build_block(sh, i, previous); |
309 | } | 341 | } |
310 | insert_hash(conf, sh); | 342 | insert_hash(conf, sh); |
311 | } | 343 | } |
312 | 344 | ||
313 | static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, int disks) | 345 | static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, |
346 | short generation) | ||
314 | { | 347 | { |
315 | struct stripe_head *sh; | 348 | struct stripe_head *sh; |
316 | struct hlist_node *hn; | 349 | struct hlist_node *hn; |
@@ -318,7 +351,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in | |||
318 | CHECK_DEVLOCK(); | 351 | CHECK_DEVLOCK(); |
319 | pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); | 352 | pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); |
320 | hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) | 353 | hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) |
321 | if (sh->sector == sector && sh->disks == disks) | 354 | if (sh->sector == sector && sh->generation == generation) |
322 | return sh; | 355 | return sh; |
323 | pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); | 356 | pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); |
324 | return NULL; | 357 | return NULL; |
@@ -327,8 +360,9 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in | |||
327 | static void unplug_slaves(mddev_t *mddev); | 360 | static void unplug_slaves(mddev_t *mddev); |
328 | static void raid5_unplug_device(struct request_queue *q); | 361 | static void raid5_unplug_device(struct request_queue *q); |
329 | 362 | ||
330 | static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks, | 363 | static struct stripe_head * |
331 | int pd_idx, int noblock) | 364 | get_active_stripe(raid5_conf_t *conf, sector_t sector, |
365 | int previous, int noblock) | ||
332 | { | 366 | { |
333 | struct stripe_head *sh; | 367 | struct stripe_head *sh; |
334 | 368 | ||
@@ -340,7 +374,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
340 | wait_event_lock_irq(conf->wait_for_stripe, | 374 | wait_event_lock_irq(conf->wait_for_stripe, |
341 | conf->quiesce == 0, | 375 | conf->quiesce == 0, |
342 | conf->device_lock, /* nothing */); | 376 | conf->device_lock, /* nothing */); |
343 | sh = __find_stripe(conf, sector, disks); | 377 | sh = __find_stripe(conf, sector, conf->generation - previous); |
344 | if (!sh) { | 378 | if (!sh) { |
345 | if (!conf->inactive_blocked) | 379 | if (!conf->inactive_blocked) |
346 | sh = get_free_stripe(conf); | 380 | sh = get_free_stripe(conf); |
@@ -358,10 +392,11 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
358 | ); | 392 | ); |
359 | conf->inactive_blocked = 0; | 393 | conf->inactive_blocked = 0; |
360 | } else | 394 | } else |
361 | init_stripe(sh, sector, pd_idx, disks); | 395 | init_stripe(sh, sector, previous); |
362 | } else { | 396 | } else { |
363 | if (atomic_read(&sh->count)) { | 397 | if (atomic_read(&sh->count)) { |
364 | BUG_ON(!list_empty(&sh->lru)); | 398 | BUG_ON(!list_empty(&sh->lru) |
399 | && !test_bit(STRIPE_EXPANDING, &sh->state)); | ||
365 | } else { | 400 | } else { |
366 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 401 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
367 | atomic_inc(&conf->active_stripes); | 402 | atomic_inc(&conf->active_stripes); |
@@ -895,8 +930,10 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
895 | struct kmem_cache *sc; | 930 | struct kmem_cache *sc; |
896 | int devs = conf->raid_disks; | 931 | int devs = conf->raid_disks; |
897 | 932 | ||
898 | sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev)); | 933 | sprintf(conf->cache_name[0], |
899 | sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev)); | 934 | "raid%d-%s", conf->level, mdname(conf->mddev)); |
935 | sprintf(conf->cache_name[1], | ||
936 | "raid%d-%s-alt", conf->level, mdname(conf->mddev)); | ||
900 | conf->active_name = 0; | 937 | conf->active_name = 0; |
901 | sc = kmem_cache_create(conf->cache_name[conf->active_name], | 938 | sc = kmem_cache_create(conf->cache_name[conf->active_name], |
902 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 939 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
@@ -911,7 +948,6 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
911 | return 0; | 948 | return 0; |
912 | } | 949 | } |
913 | 950 | ||
914 | #ifdef CONFIG_MD_RAID5_RESHAPE | ||
915 | static int resize_stripes(raid5_conf_t *conf, int newsize) | 951 | static int resize_stripes(raid5_conf_t *conf, int newsize) |
916 | { | 952 | { |
917 | /* Make all the stripes able to hold 'newsize' devices. | 953 | /* Make all the stripes able to hold 'newsize' devices. |
@@ -1036,7 +1072,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1036 | conf->pool_size = newsize; | 1072 | conf->pool_size = newsize; |
1037 | return err; | 1073 | return err; |
1038 | } | 1074 | } |
1039 | #endif | ||
1040 | 1075 | ||
1041 | static int drop_one_stripe(raid5_conf_t *conf) | 1076 | static int drop_one_stripe(raid5_conf_t *conf) |
1042 | { | 1077 | { |
@@ -1066,7 +1101,7 @@ static void shrink_stripes(raid5_conf_t *conf) | |||
1066 | 1101 | ||
1067 | static void raid5_end_read_request(struct bio * bi, int error) | 1102 | static void raid5_end_read_request(struct bio * bi, int error) |
1068 | { | 1103 | { |
1069 | struct stripe_head *sh = bi->bi_private; | 1104 | struct stripe_head *sh = bi->bi_private; |
1070 | raid5_conf_t *conf = sh->raid_conf; | 1105 | raid5_conf_t *conf = sh->raid_conf; |
1071 | int disks = sh->disks, i; | 1106 | int disks = sh->disks, i; |
1072 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); | 1107 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); |
@@ -1148,7 +1183,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1148 | 1183 | ||
1149 | static void raid5_end_write_request(struct bio *bi, int error) | 1184 | static void raid5_end_write_request(struct bio *bi, int error) |
1150 | { | 1185 | { |
1151 | struct stripe_head *sh = bi->bi_private; | 1186 | struct stripe_head *sh = bi->bi_private; |
1152 | raid5_conf_t *conf = sh->raid_conf; | 1187 | raid5_conf_t *conf = sh->raid_conf; |
1153 | int disks = sh->disks, i; | 1188 | int disks = sh->disks, i; |
1154 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); | 1189 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); |
@@ -1176,9 +1211,9 @@ static void raid5_end_write_request(struct bio *bi, int error) | |||
1176 | } | 1211 | } |
1177 | 1212 | ||
1178 | 1213 | ||
1179 | static sector_t compute_blocknr(struct stripe_head *sh, int i); | 1214 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); |
1180 | 1215 | ||
1181 | static void raid5_build_block(struct stripe_head *sh, int i) | 1216 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) |
1182 | { | 1217 | { |
1183 | struct r5dev *dev = &sh->dev[i]; | 1218 | struct r5dev *dev = &sh->dev[i]; |
1184 | 1219 | ||
@@ -1194,7 +1229,7 @@ static void raid5_build_block(struct stripe_head *sh, int i) | |||
1194 | dev->req.bi_private = sh; | 1229 | dev->req.bi_private = sh; |
1195 | 1230 | ||
1196 | dev->flags = 0; | 1231 | dev->flags = 0; |
1197 | dev->sector = compute_blocknr(sh, i); | 1232 | dev->sector = compute_blocknr(sh, i, previous); |
1198 | } | 1233 | } |
1199 | 1234 | ||
1200 | static void error(mddev_t *mddev, mdk_rdev_t *rdev) | 1235 | static void error(mddev_t *mddev, mdk_rdev_t *rdev) |
@@ -1227,15 +1262,23 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1227 | * Input: a 'big' sector number, | 1262 | * Input: a 'big' sector number, |
1228 | * Output: index of the data and parity disk, and the sector # in them. | 1263 | * Output: index of the data and parity disk, and the sector # in them. |
1229 | */ | 1264 | */ |
1230 | static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, | 1265 | static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, |
1231 | unsigned int data_disks, unsigned int * dd_idx, | 1266 | int previous, int *dd_idx, |
1232 | unsigned int * pd_idx, raid5_conf_t *conf) | 1267 | struct stripe_head *sh) |
1233 | { | 1268 | { |
1234 | long stripe; | 1269 | long stripe; |
1235 | unsigned long chunk_number; | 1270 | unsigned long chunk_number; |
1236 | unsigned int chunk_offset; | 1271 | unsigned int chunk_offset; |
1272 | int pd_idx, qd_idx; | ||
1273 | int ddf_layout = 0; | ||
1237 | sector_t new_sector; | 1274 | sector_t new_sector; |
1238 | int sectors_per_chunk = conf->chunk_size >> 9; | 1275 | int algorithm = previous ? conf->prev_algo |
1276 | : conf->algorithm; | ||
1277 | int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) | ||
1278 | : (conf->chunk_size >> 9); | ||
1279 | int raid_disks = previous ? conf->previous_raid_disks | ||
1280 | : conf->raid_disks; | ||
1281 | int data_disks = raid_disks - conf->max_degraded; | ||
1239 | 1282 | ||
1240 | /* First compute the information on this sector */ | 1283 | /* First compute the information on this sector */ |
1241 | 1284 | ||
@@ -1259,68 +1302,170 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, | |||
1259 | /* | 1302 | /* |
1260 | * Select the parity disk based on the user selected algorithm. | 1303 | * Select the parity disk based on the user selected algorithm. |
1261 | */ | 1304 | */ |
1305 | pd_idx = qd_idx = ~0; | ||
1262 | switch(conf->level) { | 1306 | switch(conf->level) { |
1263 | case 4: | 1307 | case 4: |
1264 | *pd_idx = data_disks; | 1308 | pd_idx = data_disks; |
1265 | break; | 1309 | break; |
1266 | case 5: | 1310 | case 5: |
1267 | switch (conf->algorithm) { | 1311 | switch (algorithm) { |
1268 | case ALGORITHM_LEFT_ASYMMETRIC: | 1312 | case ALGORITHM_LEFT_ASYMMETRIC: |
1269 | *pd_idx = data_disks - stripe % raid_disks; | 1313 | pd_idx = data_disks - stripe % raid_disks; |
1270 | if (*dd_idx >= *pd_idx) | 1314 | if (*dd_idx >= pd_idx) |
1271 | (*dd_idx)++; | 1315 | (*dd_idx)++; |
1272 | break; | 1316 | break; |
1273 | case ALGORITHM_RIGHT_ASYMMETRIC: | 1317 | case ALGORITHM_RIGHT_ASYMMETRIC: |
1274 | *pd_idx = stripe % raid_disks; | 1318 | pd_idx = stripe % raid_disks; |
1275 | if (*dd_idx >= *pd_idx) | 1319 | if (*dd_idx >= pd_idx) |
1276 | (*dd_idx)++; | 1320 | (*dd_idx)++; |
1277 | break; | 1321 | break; |
1278 | case ALGORITHM_LEFT_SYMMETRIC: | 1322 | case ALGORITHM_LEFT_SYMMETRIC: |
1279 | *pd_idx = data_disks - stripe % raid_disks; | 1323 | pd_idx = data_disks - stripe % raid_disks; |
1280 | *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; | 1324 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; |
1281 | break; | 1325 | break; |
1282 | case ALGORITHM_RIGHT_SYMMETRIC: | 1326 | case ALGORITHM_RIGHT_SYMMETRIC: |
1283 | *pd_idx = stripe % raid_disks; | 1327 | pd_idx = stripe % raid_disks; |
1284 | *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; | 1328 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; |
1329 | break; | ||
1330 | case ALGORITHM_PARITY_0: | ||
1331 | pd_idx = 0; | ||
1332 | (*dd_idx)++; | ||
1333 | break; | ||
1334 | case ALGORITHM_PARITY_N: | ||
1335 | pd_idx = data_disks; | ||
1285 | break; | 1336 | break; |
1286 | default: | 1337 | default: |
1287 | printk(KERN_ERR "raid5: unsupported algorithm %d\n", | 1338 | printk(KERN_ERR "raid5: unsupported algorithm %d\n", |
1288 | conf->algorithm); | 1339 | algorithm); |
1340 | BUG(); | ||
1289 | } | 1341 | } |
1290 | break; | 1342 | break; |
1291 | case 6: | 1343 | case 6: |
1292 | 1344 | ||
1293 | /**** FIX THIS ****/ | 1345 | switch (algorithm) { |
1294 | switch (conf->algorithm) { | ||
1295 | case ALGORITHM_LEFT_ASYMMETRIC: | 1346 | case ALGORITHM_LEFT_ASYMMETRIC: |
1296 | *pd_idx = raid_disks - 1 - (stripe % raid_disks); | 1347 | pd_idx = raid_disks - 1 - (stripe % raid_disks); |
1297 | if (*pd_idx == raid_disks-1) | 1348 | qd_idx = pd_idx + 1; |
1298 | (*dd_idx)++; /* Q D D D P */ | 1349 | if (pd_idx == raid_disks-1) { |
1299 | else if (*dd_idx >= *pd_idx) | 1350 | (*dd_idx)++; /* Q D D D P */ |
1351 | qd_idx = 0; | ||
1352 | } else if (*dd_idx >= pd_idx) | ||
1300 | (*dd_idx) += 2; /* D D P Q D */ | 1353 | (*dd_idx) += 2; /* D D P Q D */ |
1301 | break; | 1354 | break; |
1302 | case ALGORITHM_RIGHT_ASYMMETRIC: | 1355 | case ALGORITHM_RIGHT_ASYMMETRIC: |
1303 | *pd_idx = stripe % raid_disks; | 1356 | pd_idx = stripe % raid_disks; |
1304 | if (*pd_idx == raid_disks-1) | 1357 | qd_idx = pd_idx + 1; |
1305 | (*dd_idx)++; /* Q D D D P */ | 1358 | if (pd_idx == raid_disks-1) { |
1306 | else if (*dd_idx >= *pd_idx) | 1359 | (*dd_idx)++; /* Q D D D P */ |
1360 | qd_idx = 0; | ||
1361 | } else if (*dd_idx >= pd_idx) | ||
1307 | (*dd_idx) += 2; /* D D P Q D */ | 1362 | (*dd_idx) += 2; /* D D P Q D */ |
1308 | break; | 1363 | break; |
1309 | case ALGORITHM_LEFT_SYMMETRIC: | 1364 | case ALGORITHM_LEFT_SYMMETRIC: |
1310 | *pd_idx = raid_disks - 1 - (stripe % raid_disks); | 1365 | pd_idx = raid_disks - 1 - (stripe % raid_disks); |
1311 | *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; | 1366 | qd_idx = (pd_idx + 1) % raid_disks; |
1367 | *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; | ||
1312 | break; | 1368 | break; |
1313 | case ALGORITHM_RIGHT_SYMMETRIC: | 1369 | case ALGORITHM_RIGHT_SYMMETRIC: |
1314 | *pd_idx = stripe % raid_disks; | 1370 | pd_idx = stripe % raid_disks; |
1315 | *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; | 1371 | qd_idx = (pd_idx + 1) % raid_disks; |
1372 | *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; | ||
1373 | break; | ||
1374 | |||
1375 | case ALGORITHM_PARITY_0: | ||
1376 | pd_idx = 0; | ||
1377 | qd_idx = 1; | ||
1378 | (*dd_idx) += 2; | ||
1379 | break; | ||
1380 | case ALGORITHM_PARITY_N: | ||
1381 | pd_idx = data_disks; | ||
1382 | qd_idx = data_disks + 1; | ||
1316 | break; | 1383 | break; |
1384 | |||
1385 | case ALGORITHM_ROTATING_ZERO_RESTART: | ||
1386 | /* Exactly the same as RIGHT_ASYMMETRIC, but or | ||
1387 | * of blocks for computing Q is different. | ||
1388 | */ | ||
1389 | pd_idx = stripe % raid_disks; | ||
1390 | qd_idx = pd_idx + 1; | ||
1391 | if (pd_idx == raid_disks-1) { | ||
1392 | (*dd_idx)++; /* Q D D D P */ | ||
1393 | qd_idx = 0; | ||
1394 | } else if (*dd_idx >= pd_idx) | ||
1395 | (*dd_idx) += 2; /* D D P Q D */ | ||
1396 | ddf_layout = 1; | ||
1397 | break; | ||
1398 | |||
1399 | case ALGORITHM_ROTATING_N_RESTART: | ||
1400 | /* Same a left_asymmetric, by first stripe is | ||
1401 | * D D D P Q rather than | ||
1402 | * Q D D D P | ||
1403 | */ | ||
1404 | pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks); | ||
1405 | qd_idx = pd_idx + 1; | ||
1406 | if (pd_idx == raid_disks-1) { | ||
1407 | (*dd_idx)++; /* Q D D D P */ | ||
1408 | qd_idx = 0; | ||
1409 | } else if (*dd_idx >= pd_idx) | ||
1410 | (*dd_idx) += 2; /* D D P Q D */ | ||
1411 | ddf_layout = 1; | ||
1412 | break; | ||
1413 | |||
1414 | case ALGORITHM_ROTATING_N_CONTINUE: | ||
1415 | /* Same as left_symmetric but Q is before P */ | ||
1416 | pd_idx = raid_disks - 1 - (stripe % raid_disks); | ||
1417 | qd_idx = (pd_idx + raid_disks - 1) % raid_disks; | ||
1418 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; | ||
1419 | ddf_layout = 1; | ||
1420 | break; | ||
1421 | |||
1422 | case ALGORITHM_LEFT_ASYMMETRIC_6: | ||
1423 | /* RAID5 left_asymmetric, with Q on last device */ | ||
1424 | pd_idx = data_disks - stripe % (raid_disks-1); | ||
1425 | if (*dd_idx >= pd_idx) | ||
1426 | (*dd_idx)++; | ||
1427 | qd_idx = raid_disks - 1; | ||
1428 | break; | ||
1429 | |||
1430 | case ALGORITHM_RIGHT_ASYMMETRIC_6: | ||
1431 | pd_idx = stripe % (raid_disks-1); | ||
1432 | if (*dd_idx >= pd_idx) | ||
1433 | (*dd_idx)++; | ||
1434 | qd_idx = raid_disks - 1; | ||
1435 | break; | ||
1436 | |||
1437 | case ALGORITHM_LEFT_SYMMETRIC_6: | ||
1438 | pd_idx = data_disks - stripe % (raid_disks-1); | ||
1439 | *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); | ||
1440 | qd_idx = raid_disks - 1; | ||
1441 | break; | ||
1442 | |||
1443 | case ALGORITHM_RIGHT_SYMMETRIC_6: | ||
1444 | pd_idx = stripe % (raid_disks-1); | ||
1445 | *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); | ||
1446 | qd_idx = raid_disks - 1; | ||
1447 | break; | ||
1448 | |||
1449 | case ALGORITHM_PARITY_0_6: | ||
1450 | pd_idx = 0; | ||
1451 | (*dd_idx)++; | ||
1452 | qd_idx = raid_disks - 1; | ||
1453 | break; | ||
1454 | |||
1455 | |||
1317 | default: | 1456 | default: |
1318 | printk(KERN_CRIT "raid6: unsupported algorithm %d\n", | 1457 | printk(KERN_CRIT "raid6: unsupported algorithm %d\n", |
1319 | conf->algorithm); | 1458 | algorithm); |
1459 | BUG(); | ||
1320 | } | 1460 | } |
1321 | break; | 1461 | break; |
1322 | } | 1462 | } |
1323 | 1463 | ||
1464 | if (sh) { | ||
1465 | sh->pd_idx = pd_idx; | ||
1466 | sh->qd_idx = qd_idx; | ||
1467 | sh->ddf_layout = ddf_layout; | ||
1468 | } | ||
1324 | /* | 1469 | /* |
1325 | * Finally, compute the new sector number | 1470 | * Finally, compute the new sector number |
1326 | */ | 1471 | */ |
@@ -1329,17 +1474,21 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, | |||
1329 | } | 1474 | } |
1330 | 1475 | ||
1331 | 1476 | ||
1332 | static sector_t compute_blocknr(struct stripe_head *sh, int i) | 1477 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) |
1333 | { | 1478 | { |
1334 | raid5_conf_t *conf = sh->raid_conf; | 1479 | raid5_conf_t *conf = sh->raid_conf; |
1335 | int raid_disks = sh->disks; | 1480 | int raid_disks = sh->disks; |
1336 | int data_disks = raid_disks - conf->max_degraded; | 1481 | int data_disks = raid_disks - conf->max_degraded; |
1337 | sector_t new_sector = sh->sector, check; | 1482 | sector_t new_sector = sh->sector, check; |
1338 | int sectors_per_chunk = conf->chunk_size >> 9; | 1483 | int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) |
1484 | : (conf->chunk_size >> 9); | ||
1485 | int algorithm = previous ? conf->prev_algo | ||
1486 | : conf->algorithm; | ||
1339 | sector_t stripe; | 1487 | sector_t stripe; |
1340 | int chunk_offset; | 1488 | int chunk_offset; |
1341 | int chunk_number, dummy1, dummy2, dd_idx = i; | 1489 | int chunk_number, dummy1, dd_idx = i; |
1342 | sector_t r_sector; | 1490 | sector_t r_sector; |
1491 | struct stripe_head sh2; | ||
1343 | 1492 | ||
1344 | 1493 | ||
1345 | chunk_offset = sector_div(new_sector, sectors_per_chunk); | 1494 | chunk_offset = sector_div(new_sector, sectors_per_chunk); |
@@ -1351,7 +1500,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1351 | switch(conf->level) { | 1500 | switch(conf->level) { |
1352 | case 4: break; | 1501 | case 4: break; |
1353 | case 5: | 1502 | case 5: |
1354 | switch (conf->algorithm) { | 1503 | switch (algorithm) { |
1355 | case ALGORITHM_LEFT_ASYMMETRIC: | 1504 | case ALGORITHM_LEFT_ASYMMETRIC: |
1356 | case ALGORITHM_RIGHT_ASYMMETRIC: | 1505 | case ALGORITHM_RIGHT_ASYMMETRIC: |
1357 | if (i > sh->pd_idx) | 1506 | if (i > sh->pd_idx) |
@@ -1363,19 +1512,27 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1363 | i += raid_disks; | 1512 | i += raid_disks; |
1364 | i -= (sh->pd_idx + 1); | 1513 | i -= (sh->pd_idx + 1); |
1365 | break; | 1514 | break; |
1515 | case ALGORITHM_PARITY_0: | ||
1516 | i -= 1; | ||
1517 | break; | ||
1518 | case ALGORITHM_PARITY_N: | ||
1519 | break; | ||
1366 | default: | 1520 | default: |
1367 | printk(KERN_ERR "raid5: unsupported algorithm %d\n", | 1521 | printk(KERN_ERR "raid5: unsupported algorithm %d\n", |
1368 | conf->algorithm); | 1522 | algorithm); |
1523 | BUG(); | ||
1369 | } | 1524 | } |
1370 | break; | 1525 | break; |
1371 | case 6: | 1526 | case 6: |
1372 | if (i == raid6_next_disk(sh->pd_idx, raid_disks)) | 1527 | if (i == sh->qd_idx) |
1373 | return 0; /* It is the Q disk */ | 1528 | return 0; /* It is the Q disk */ |
1374 | switch (conf->algorithm) { | 1529 | switch (algorithm) { |
1375 | case ALGORITHM_LEFT_ASYMMETRIC: | 1530 | case ALGORITHM_LEFT_ASYMMETRIC: |
1376 | case ALGORITHM_RIGHT_ASYMMETRIC: | 1531 | case ALGORITHM_RIGHT_ASYMMETRIC: |
1377 | if (sh->pd_idx == raid_disks-1) | 1532 | case ALGORITHM_ROTATING_ZERO_RESTART: |
1378 | i--; /* Q D D D P */ | 1533 | case ALGORITHM_ROTATING_N_RESTART: |
1534 | if (sh->pd_idx == raid_disks-1) | ||
1535 | i--; /* Q D D D P */ | ||
1379 | else if (i > sh->pd_idx) | 1536 | else if (i > sh->pd_idx) |
1380 | i -= 2; /* D D P Q D */ | 1537 | i -= 2; /* D D P Q D */ |
1381 | break; | 1538 | break; |
@@ -1390,9 +1547,35 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1390 | i -= (sh->pd_idx + 2); | 1547 | i -= (sh->pd_idx + 2); |
1391 | } | 1548 | } |
1392 | break; | 1549 | break; |
1550 | case ALGORITHM_PARITY_0: | ||
1551 | i -= 2; | ||
1552 | break; | ||
1553 | case ALGORITHM_PARITY_N: | ||
1554 | break; | ||
1555 | case ALGORITHM_ROTATING_N_CONTINUE: | ||
1556 | if (sh->pd_idx == 0) | ||
1557 | i--; /* P D D D Q */ | ||
1558 | else if (i > sh->pd_idx) | ||
1559 | i -= 2; /* D D Q P D */ | ||
1560 | break; | ||
1561 | case ALGORITHM_LEFT_ASYMMETRIC_6: | ||
1562 | case ALGORITHM_RIGHT_ASYMMETRIC_6: | ||
1563 | if (i > sh->pd_idx) | ||
1564 | i--; | ||
1565 | break; | ||
1566 | case ALGORITHM_LEFT_SYMMETRIC_6: | ||
1567 | case ALGORITHM_RIGHT_SYMMETRIC_6: | ||
1568 | if (i < sh->pd_idx) | ||
1569 | i += data_disks + 1; | ||
1570 | i -= (sh->pd_idx + 1); | ||
1571 | break; | ||
1572 | case ALGORITHM_PARITY_0_6: | ||
1573 | i -= 1; | ||
1574 | break; | ||
1393 | default: | 1575 | default: |
1394 | printk(KERN_CRIT "raid6: unsupported algorithm %d\n", | 1576 | printk(KERN_CRIT "raid6: unsupported algorithm %d\n", |
1395 | conf->algorithm); | 1577 | algorithm); |
1578 | BUG(); | ||
1396 | } | 1579 | } |
1397 | break; | 1580 | break; |
1398 | } | 1581 | } |
@@ -1400,8 +1583,10 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1400 | chunk_number = stripe * data_disks + i; | 1583 | chunk_number = stripe * data_disks + i; |
1401 | r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; | 1584 | r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; |
1402 | 1585 | ||
1403 | check = raid5_compute_sector(r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); | 1586 | check = raid5_compute_sector(conf, r_sector, |
1404 | if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { | 1587 | previous, &dummy1, &sh2); |
1588 | if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx | ||
1589 | || sh2.qd_idx != sh->qd_idx) { | ||
1405 | printk(KERN_ERR "compute_blocknr: map not correct\n"); | 1590 | printk(KERN_ERR "compute_blocknr: map not correct\n"); |
1406 | return 0; | 1591 | return 0; |
1407 | } | 1592 | } |
@@ -1468,14 +1653,16 @@ static void copy_data(int frombio, struct bio *bio, | |||
1468 | 1653 | ||
1469 | static void compute_parity6(struct stripe_head *sh, int method) | 1654 | static void compute_parity6(struct stripe_head *sh, int method) |
1470 | { | 1655 | { |
1471 | raid6_conf_t *conf = sh->raid_conf; | 1656 | raid5_conf_t *conf = sh->raid_conf; |
1472 | int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = sh->disks, count; | 1657 | int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; |
1658 | int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); | ||
1473 | struct bio *chosen; | 1659 | struct bio *chosen; |
1474 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | 1660 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ |
1475 | void *ptrs[disks]; | 1661 | void *ptrs[syndrome_disks+2]; |
1476 | 1662 | ||
1477 | qd_idx = raid6_next_disk(pd_idx, disks); | 1663 | pd_idx = sh->pd_idx; |
1478 | d0_idx = raid6_next_disk(qd_idx, disks); | 1664 | qd_idx = sh->qd_idx; |
1665 | d0_idx = raid6_d0(sh); | ||
1479 | 1666 | ||
1480 | pr_debug("compute_parity, stripe %llu, method %d\n", | 1667 | pr_debug("compute_parity, stripe %llu, method %d\n", |
1481 | (unsigned long long)sh->sector, method); | 1668 | (unsigned long long)sh->sector, method); |
@@ -1513,24 +1700,29 @@ static void compute_parity6(struct stripe_head *sh, int method) | |||
1513 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 1700 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
1514 | } | 1701 | } |
1515 | 1702 | ||
1516 | // switch(method) { | 1703 | /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ |
1517 | // case RECONSTRUCT_WRITE: | 1704 | |
1518 | // case CHECK_PARITY: | 1705 | for (i = 0; i < disks; i++) |
1519 | // case UPDATE_PARITY: | 1706 | ptrs[i] = (void *)raid6_empty_zero_page; |
1520 | /* Note that unlike RAID-5, the ordering of the disks matters greatly. */ | 1707 | |
1521 | /* FIX: Is this ordering of drives even remotely optimal? */ | 1708 | count = 0; |
1522 | count = 0; | 1709 | i = d0_idx; |
1523 | i = d0_idx; | 1710 | do { |
1524 | do { | 1711 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); |
1525 | ptrs[count++] = page_address(sh->dev[i].page); | 1712 | |
1526 | if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags)) | 1713 | ptrs[slot] = page_address(sh->dev[i].page); |
1527 | printk("block %d/%d not uptodate on parity calc\n", i,count); | 1714 | if (slot < syndrome_disks && |
1528 | i = raid6_next_disk(i, disks); | 1715 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { |
1529 | } while ( i != d0_idx ); | 1716 | printk(KERN_ERR "block %d/%d not uptodate " |
1530 | // break; | 1717 | "on parity calc\n", i, count); |
1531 | // } | 1718 | BUG(); |
1532 | 1719 | } | |
1533 | raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); | 1720 | |
1721 | i = raid6_next_disk(i, disks); | ||
1722 | } while (i != d0_idx); | ||
1723 | BUG_ON(count != syndrome_disks); | ||
1724 | |||
1725 | raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs); | ||
1534 | 1726 | ||
1535 | switch(method) { | 1727 | switch(method) { |
1536 | case RECONSTRUCT_WRITE: | 1728 | case RECONSTRUCT_WRITE: |
@@ -1552,8 +1744,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) | |||
1552 | { | 1744 | { |
1553 | int i, count, disks = sh->disks; | 1745 | int i, count, disks = sh->disks; |
1554 | void *ptr[MAX_XOR_BLOCKS], *dest, *p; | 1746 | void *ptr[MAX_XOR_BLOCKS], *dest, *p; |
1555 | int pd_idx = sh->pd_idx; | 1747 | int qd_idx = sh->qd_idx; |
1556 | int qd_idx = raid6_next_disk(pd_idx, disks); | ||
1557 | 1748 | ||
1558 | pr_debug("compute_block_1, stripe %llu, idx %d\n", | 1749 | pr_debug("compute_block_1, stripe %llu, idx %d\n", |
1559 | (unsigned long long)sh->sector, dd_idx); | 1750 | (unsigned long long)sh->sector, dd_idx); |
@@ -1589,63 +1780,65 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) | |||
1589 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | 1780 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) |
1590 | { | 1781 | { |
1591 | int i, count, disks = sh->disks; | 1782 | int i, count, disks = sh->disks; |
1592 | int pd_idx = sh->pd_idx; | 1783 | int syndrome_disks = sh->ddf_layout ? disks : disks-2; |
1593 | int qd_idx = raid6_next_disk(pd_idx, disks); | 1784 | int d0_idx = raid6_d0(sh); |
1594 | int d0_idx = raid6_next_disk(qd_idx, disks); | 1785 | int faila = -1, failb = -1; |
1595 | int faila, failb; | 1786 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ |
1787 | void *ptrs[syndrome_disks+2]; | ||
1596 | 1788 | ||
1597 | /* faila and failb are disk numbers relative to d0_idx */ | 1789 | for (i = 0; i < disks ; i++) |
1598 | /* pd_idx become disks-2 and qd_idx become disks-1 */ | 1790 | ptrs[i] = (void *)raid6_empty_zero_page; |
1599 | faila = (dd_idx1 < d0_idx) ? dd_idx1+(disks-d0_idx) : dd_idx1-d0_idx; | 1791 | count = 0; |
1600 | failb = (dd_idx2 < d0_idx) ? dd_idx2+(disks-d0_idx) : dd_idx2-d0_idx; | 1792 | i = d0_idx; |
1793 | do { | ||
1794 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
1795 | |||
1796 | ptrs[slot] = page_address(sh->dev[i].page); | ||
1797 | |||
1798 | if (i == dd_idx1) | ||
1799 | faila = slot; | ||
1800 | if (i == dd_idx2) | ||
1801 | failb = slot; | ||
1802 | i = raid6_next_disk(i, disks); | ||
1803 | } while (i != d0_idx); | ||
1804 | BUG_ON(count != syndrome_disks); | ||
1601 | 1805 | ||
1602 | BUG_ON(faila == failb); | 1806 | BUG_ON(faila == failb); |
1603 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } | 1807 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } |
1604 | 1808 | ||
1605 | pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", | 1809 | pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", |
1606 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb); | 1810 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, |
1811 | faila, failb); | ||
1607 | 1812 | ||
1608 | if ( failb == disks-1 ) { | 1813 | if (failb == syndrome_disks+1) { |
1609 | /* Q disk is one of the missing disks */ | 1814 | /* Q disk is one of the missing disks */ |
1610 | if ( faila == disks-2 ) { | 1815 | if (faila == syndrome_disks) { |
1611 | /* Missing P+Q, just recompute */ | 1816 | /* Missing P+Q, just recompute */ |
1612 | compute_parity6(sh, UPDATE_PARITY); | 1817 | compute_parity6(sh, UPDATE_PARITY); |
1613 | return; | 1818 | return; |
1614 | } else { | 1819 | } else { |
1615 | /* We're missing D+Q; recompute D from P */ | 1820 | /* We're missing D+Q; recompute D from P */ |
1616 | compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0); | 1821 | compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ? |
1822 | dd_idx2 : dd_idx1), | ||
1823 | 0); | ||
1617 | compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ | 1824 | compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ |
1618 | return; | 1825 | return; |
1619 | } | 1826 | } |
1620 | } | 1827 | } |
1621 | 1828 | ||
1622 | /* We're missing D+P or D+D; build pointer table */ | 1829 | /* We're missing D+P or D+D; */ |
1623 | { | 1830 | if (failb == syndrome_disks) { |
1624 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | 1831 | /* We're missing D+P. */ |
1625 | void *ptrs[disks]; | 1832 | raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs); |
1626 | 1833 | } else { | |
1627 | count = 0; | 1834 | /* We're missing D+D. */ |
1628 | i = d0_idx; | 1835 | raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb, |
1629 | do { | 1836 | ptrs); |
1630 | ptrs[count++] = page_address(sh->dev[i].page); | ||
1631 | i = raid6_next_disk(i, disks); | ||
1632 | if (i != dd_idx1 && i != dd_idx2 && | ||
1633 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) | ||
1634 | printk("compute_2 with missing block %d/%d\n", count, i); | ||
1635 | } while ( i != d0_idx ); | ||
1636 | |||
1637 | if ( failb == disks-2 ) { | ||
1638 | /* We're missing D+P. */ | ||
1639 | raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs); | ||
1640 | } else { | ||
1641 | /* We're missing D+D. */ | ||
1642 | raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs); | ||
1643 | } | ||
1644 | |||
1645 | /* Both the above update both missing blocks */ | ||
1646 | set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags); | ||
1647 | set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags); | ||
1648 | } | 1837 | } |
1838 | |||
1839 | /* Both the above update both missing blocks */ | ||
1840 | set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags); | ||
1841 | set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags); | ||
1649 | } | 1842 | } |
1650 | 1843 | ||
1651 | static void | 1844 | static void |
@@ -1800,17 +1993,21 @@ static int page_is_zero(struct page *p) | |||
1800 | memcmp(a, a+4, STRIPE_SIZE-4)==0); | 1993 | memcmp(a, a+4, STRIPE_SIZE-4)==0); |
1801 | } | 1994 | } |
1802 | 1995 | ||
1803 | static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks) | 1996 | static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, |
1997 | struct stripe_head *sh) | ||
1804 | { | 1998 | { |
1805 | int sectors_per_chunk = conf->chunk_size >> 9; | 1999 | int sectors_per_chunk = |
1806 | int pd_idx, dd_idx; | 2000 | previous ? (conf->prev_chunk >> 9) |
2001 | : (conf->chunk_size >> 9); | ||
2002 | int dd_idx; | ||
1807 | int chunk_offset = sector_div(stripe, sectors_per_chunk); | 2003 | int chunk_offset = sector_div(stripe, sectors_per_chunk); |
2004 | int disks = previous ? conf->previous_raid_disks : conf->raid_disks; | ||
1808 | 2005 | ||
1809 | raid5_compute_sector(stripe * (disks - conf->max_degraded) | 2006 | raid5_compute_sector(conf, |
2007 | stripe * (disks - conf->max_degraded) | ||
1810 | *sectors_per_chunk + chunk_offset, | 2008 | *sectors_per_chunk + chunk_offset, |
1811 | disks, disks - conf->max_degraded, | 2009 | previous, |
1812 | &dd_idx, &pd_idx, conf); | 2010 | &dd_idx, sh); |
1813 | return pd_idx; | ||
1814 | } | 2011 | } |
1815 | 2012 | ||
1816 | static void | 2013 | static void |
@@ -2181,7 +2378,7 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf, | |||
2181 | struct r6_state *r6s, int disks) | 2378 | struct r6_state *r6s, int disks) |
2182 | { | 2379 | { |
2183 | int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; | 2380 | int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; |
2184 | int qd_idx = r6s->qd_idx; | 2381 | int qd_idx = sh->qd_idx; |
2185 | for (i = disks; i--; ) { | 2382 | for (i = disks; i--; ) { |
2186 | struct r5dev *dev = &sh->dev[i]; | 2383 | struct r5dev *dev = &sh->dev[i]; |
2187 | /* Would I have to read this buffer for reconstruct_write */ | 2384 | /* Would I have to read this buffer for reconstruct_write */ |
@@ -2371,7 +2568,7 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, | |||
2371 | int update_p = 0, update_q = 0; | 2568 | int update_p = 0, update_q = 0; |
2372 | struct r5dev *dev; | 2569 | struct r5dev *dev; |
2373 | int pd_idx = sh->pd_idx; | 2570 | int pd_idx = sh->pd_idx; |
2374 | int qd_idx = r6s->qd_idx; | 2571 | int qd_idx = sh->qd_idx; |
2375 | 2572 | ||
2376 | set_bit(STRIPE_HANDLE, &sh->state); | 2573 | set_bit(STRIPE_HANDLE, &sh->state); |
2377 | 2574 | ||
@@ -2467,17 +2664,14 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2467 | struct dma_async_tx_descriptor *tx = NULL; | 2664 | struct dma_async_tx_descriptor *tx = NULL; |
2468 | clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); | 2665 | clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); |
2469 | for (i = 0; i < sh->disks; i++) | 2666 | for (i = 0; i < sh->disks; i++) |
2470 | if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) { | 2667 | if (i != sh->pd_idx && i != sh->qd_idx) { |
2471 | int dd_idx, pd_idx, j; | 2668 | int dd_idx, j; |
2472 | struct stripe_head *sh2; | 2669 | struct stripe_head *sh2; |
2473 | 2670 | ||
2474 | sector_t bn = compute_blocknr(sh, i); | 2671 | sector_t bn = compute_blocknr(sh, i, 1); |
2475 | sector_t s = raid5_compute_sector(bn, conf->raid_disks, | 2672 | sector_t s = raid5_compute_sector(conf, bn, 0, |
2476 | conf->raid_disks - | 2673 | &dd_idx, NULL); |
2477 | conf->max_degraded, &dd_idx, | 2674 | sh2 = get_active_stripe(conf, s, 0, 1); |
2478 | &pd_idx, conf); | ||
2479 | sh2 = get_active_stripe(conf, s, conf->raid_disks, | ||
2480 | pd_idx, 1); | ||
2481 | if (sh2 == NULL) | 2675 | if (sh2 == NULL) |
2482 | /* so far only the early blocks of this stripe | 2676 | /* so far only the early blocks of this stripe |
2483 | * have been requested. When later blocks | 2677 | * have been requested. When later blocks |
@@ -2500,8 +2694,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2500 | set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); | 2694 | set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); |
2501 | for (j = 0; j < conf->raid_disks; j++) | 2695 | for (j = 0; j < conf->raid_disks; j++) |
2502 | if (j != sh2->pd_idx && | 2696 | if (j != sh2->pd_idx && |
2503 | (!r6s || j != raid6_next_disk(sh2->pd_idx, | 2697 | (!r6s || j != sh2->qd_idx) && |
2504 | sh2->disks)) && | ||
2505 | !test_bit(R5_Expanded, &sh2->dev[j].flags)) | 2698 | !test_bit(R5_Expanded, &sh2->dev[j].flags)) |
2506 | break; | 2699 | break; |
2507 | if (j == conf->raid_disks) { | 2700 | if (j == conf->raid_disks) { |
@@ -2750,6 +2943,23 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2750 | 2943 | ||
2751 | /* Finish reconstruct operations initiated by the expansion process */ | 2944 | /* Finish reconstruct operations initiated by the expansion process */ |
2752 | if (sh->reconstruct_state == reconstruct_state_result) { | 2945 | if (sh->reconstruct_state == reconstruct_state_result) { |
2946 | struct stripe_head *sh2 | ||
2947 | = get_active_stripe(conf, sh->sector, 1, 1); | ||
2948 | if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { | ||
2949 | /* sh cannot be written until sh2 has been read. | ||
2950 | * so arrange for sh to be delayed a little | ||
2951 | */ | ||
2952 | set_bit(STRIPE_DELAYED, &sh->state); | ||
2953 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2954 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, | ||
2955 | &sh2->state)) | ||
2956 | atomic_inc(&conf->preread_active_stripes); | ||
2957 | release_stripe(sh2); | ||
2958 | goto unlock; | ||
2959 | } | ||
2960 | if (sh2) | ||
2961 | release_stripe(sh2); | ||
2962 | |||
2753 | sh->reconstruct_state = reconstruct_state_idle; | 2963 | sh->reconstruct_state = reconstruct_state_idle; |
2754 | clear_bit(STRIPE_EXPANDING, &sh->state); | 2964 | clear_bit(STRIPE_EXPANDING, &sh->state); |
2755 | for (i = conf->raid_disks; i--; ) { | 2965 | for (i = conf->raid_disks; i--; ) { |
@@ -2763,8 +2973,7 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2763 | !sh->reconstruct_state) { | 2973 | !sh->reconstruct_state) { |
2764 | /* Need to write out all blocks after computing parity */ | 2974 | /* Need to write out all blocks after computing parity */ |
2765 | sh->disks = conf->raid_disks; | 2975 | sh->disks = conf->raid_disks; |
2766 | sh->pd_idx = stripe_to_pdidx(sh->sector, conf, | 2976 | stripe_set_idx(sh->sector, conf, 0, sh); |
2767 | conf->raid_disks); | ||
2768 | schedule_reconstruction5(sh, &s, 1, 1); | 2977 | schedule_reconstruction5(sh, &s, 1, 1); |
2769 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { | 2978 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { |
2770 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 2979 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
@@ -2796,20 +3005,19 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2796 | 3005 | ||
2797 | static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | 3006 | static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) |
2798 | { | 3007 | { |
2799 | raid6_conf_t *conf = sh->raid_conf; | 3008 | raid5_conf_t *conf = sh->raid_conf; |
2800 | int disks = sh->disks; | 3009 | int disks = sh->disks; |
2801 | struct bio *return_bi = NULL; | 3010 | struct bio *return_bi = NULL; |
2802 | int i, pd_idx = sh->pd_idx; | 3011 | int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx; |
2803 | struct stripe_head_state s; | 3012 | struct stripe_head_state s; |
2804 | struct r6_state r6s; | 3013 | struct r6_state r6s; |
2805 | struct r5dev *dev, *pdev, *qdev; | 3014 | struct r5dev *dev, *pdev, *qdev; |
2806 | mdk_rdev_t *blocked_rdev = NULL; | 3015 | mdk_rdev_t *blocked_rdev = NULL; |
2807 | 3016 | ||
2808 | r6s.qd_idx = raid6_next_disk(pd_idx, disks); | ||
2809 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " | 3017 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " |
2810 | "pd_idx=%d, qd_idx=%d\n", | 3018 | "pd_idx=%d, qd_idx=%d\n", |
2811 | (unsigned long long)sh->sector, sh->state, | 3019 | (unsigned long long)sh->sector, sh->state, |
2812 | atomic_read(&sh->count), pd_idx, r6s.qd_idx); | 3020 | atomic_read(&sh->count), pd_idx, qd_idx); |
2813 | memset(&s, 0, sizeof(s)); | 3021 | memset(&s, 0, sizeof(s)); |
2814 | 3022 | ||
2815 | spin_lock(&sh->lock); | 3023 | spin_lock(&sh->lock); |
@@ -2920,9 +3128,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
2920 | pdev = &sh->dev[pd_idx]; | 3128 | pdev = &sh->dev[pd_idx]; |
2921 | r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx) | 3129 | r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx) |
2922 | || (s.failed >= 2 && r6s.failed_num[1] == pd_idx); | 3130 | || (s.failed >= 2 && r6s.failed_num[1] == pd_idx); |
2923 | qdev = &sh->dev[r6s.qd_idx]; | 3131 | qdev = &sh->dev[qd_idx]; |
2924 | r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx) | 3132 | r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx) |
2925 | || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx); | 3133 | || (s.failed >= 2 && r6s.failed_num[1] == qd_idx); |
2926 | 3134 | ||
2927 | if ( s.written && | 3135 | if ( s.written && |
2928 | ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags) | 3136 | ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags) |
@@ -2980,10 +3188,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
2980 | } | 3188 | } |
2981 | 3189 | ||
2982 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { | 3190 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { |
3191 | struct stripe_head *sh2 | ||
3192 | = get_active_stripe(conf, sh->sector, 1, 1); | ||
3193 | if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { | ||
3194 | /* sh cannot be written until sh2 has been read. | ||
3195 | * so arrange for sh to be delayed a little | ||
3196 | */ | ||
3197 | set_bit(STRIPE_DELAYED, &sh->state); | ||
3198 | set_bit(STRIPE_HANDLE, &sh->state); | ||
3199 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, | ||
3200 | &sh2->state)) | ||
3201 | atomic_inc(&conf->preread_active_stripes); | ||
3202 | release_stripe(sh2); | ||
3203 | goto unlock; | ||
3204 | } | ||
3205 | if (sh2) | ||
3206 | release_stripe(sh2); | ||
3207 | |||
2983 | /* Need to write out all blocks after computing P&Q */ | 3208 | /* Need to write out all blocks after computing P&Q */ |
2984 | sh->disks = conf->raid_disks; | 3209 | sh->disks = conf->raid_disks; |
2985 | sh->pd_idx = stripe_to_pdidx(sh->sector, conf, | 3210 | stripe_set_idx(sh->sector, conf, 0, sh); |
2986 | conf->raid_disks); | ||
2987 | compute_parity6(sh, RECONSTRUCT_WRITE); | 3211 | compute_parity6(sh, RECONSTRUCT_WRITE); |
2988 | for (i = conf->raid_disks ; i-- ; ) { | 3212 | for (i = conf->raid_disks ; i-- ; ) { |
2989 | set_bit(R5_LOCKED, &sh->dev[i].flags); | 3213 | set_bit(R5_LOCKED, &sh->dev[i].flags); |
@@ -3134,6 +3358,8 @@ static int raid5_mergeable_bvec(struct request_queue *q, | |||
3134 | if ((bvm->bi_rw & 1) == WRITE) | 3358 | if ((bvm->bi_rw & 1) == WRITE) |
3135 | return biovec->bv_len; /* always allow writes to be mergeable */ | 3359 | return biovec->bv_len; /* always allow writes to be mergeable */ |
3136 | 3360 | ||
3361 | if (mddev->new_chunk < mddev->chunk_size) | ||
3362 | chunk_sectors = mddev->new_chunk >> 9; | ||
3137 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; | 3363 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; |
3138 | if (max < 0) max = 0; | 3364 | if (max < 0) max = 0; |
3139 | if (max <= biovec->bv_len && bio_sectors == 0) | 3365 | if (max <= biovec->bv_len && bio_sectors == 0) |
@@ -3149,6 +3375,8 @@ static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) | |||
3149 | unsigned int chunk_sectors = mddev->chunk_size >> 9; | 3375 | unsigned int chunk_sectors = mddev->chunk_size >> 9; |
3150 | unsigned int bio_sectors = bio->bi_size >> 9; | 3376 | unsigned int bio_sectors = bio->bi_size >> 9; |
3151 | 3377 | ||
3378 | if (mddev->new_chunk < mddev->chunk_size) | ||
3379 | chunk_sectors = mddev->new_chunk >> 9; | ||
3152 | return chunk_sectors >= | 3380 | return chunk_sectors >= |
3153 | ((sector & (chunk_sectors - 1)) + bio_sectors); | 3381 | ((sector & (chunk_sectors - 1)) + bio_sectors); |
3154 | } | 3382 | } |
@@ -3255,9 +3483,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) | |||
3255 | { | 3483 | { |
3256 | mddev_t *mddev = q->queuedata; | 3484 | mddev_t *mddev = q->queuedata; |
3257 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3485 | raid5_conf_t *conf = mddev_to_conf(mddev); |
3258 | const unsigned int raid_disks = conf->raid_disks; | 3486 | unsigned int dd_idx; |
3259 | const unsigned int data_disks = raid_disks - conf->max_degraded; | ||
3260 | unsigned int dd_idx, pd_idx; | ||
3261 | struct bio* align_bi; | 3487 | struct bio* align_bi; |
3262 | mdk_rdev_t *rdev; | 3488 | mdk_rdev_t *rdev; |
3263 | 3489 | ||
@@ -3266,7 +3492,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) | |||
3266 | return 0; | 3492 | return 0; |
3267 | } | 3493 | } |
3268 | /* | 3494 | /* |
3269 | * use bio_clone to make a copy of the bio | 3495 | * use bio_clone to make a copy of the bio |
3270 | */ | 3496 | */ |
3271 | align_bi = bio_clone(raid_bio, GFP_NOIO); | 3497 | align_bi = bio_clone(raid_bio, GFP_NOIO); |
3272 | if (!align_bi) | 3498 | if (!align_bi) |
@@ -3280,12 +3506,9 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) | |||
3280 | /* | 3506 | /* |
3281 | * compute position | 3507 | * compute position |
3282 | */ | 3508 | */ |
3283 | align_bi->bi_sector = raid5_compute_sector(raid_bio->bi_sector, | 3509 | align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector, |
3284 | raid_disks, | 3510 | 0, |
3285 | data_disks, | 3511 | &dd_idx, NULL); |
3286 | &dd_idx, | ||
3287 | &pd_idx, | ||
3288 | conf); | ||
3289 | 3512 | ||
3290 | rcu_read_lock(); | 3513 | rcu_read_lock(); |
3291 | rdev = rcu_dereference(conf->disks[dd_idx].rdev); | 3514 | rdev = rcu_dereference(conf->disks[dd_idx].rdev); |
@@ -3377,7 +3600,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3377 | { | 3600 | { |
3378 | mddev_t *mddev = q->queuedata; | 3601 | mddev_t *mddev = q->queuedata; |
3379 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3602 | raid5_conf_t *conf = mddev_to_conf(mddev); |
3380 | unsigned int dd_idx, pd_idx; | 3603 | int dd_idx; |
3381 | sector_t new_sector; | 3604 | sector_t new_sector; |
3382 | sector_t logical_sector, last_sector; | 3605 | sector_t logical_sector, last_sector; |
3383 | struct stripe_head *sh; | 3606 | struct stripe_head *sh; |
@@ -3400,7 +3623,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3400 | if (rw == READ && | 3623 | if (rw == READ && |
3401 | mddev->reshape_position == MaxSector && | 3624 | mddev->reshape_position == MaxSector && |
3402 | chunk_aligned_read(q,bi)) | 3625 | chunk_aligned_read(q,bi)) |
3403 | return 0; | 3626 | return 0; |
3404 | 3627 | ||
3405 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | 3628 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); |
3406 | last_sector = bi->bi_sector + (bi->bi_size>>9); | 3629 | last_sector = bi->bi_sector + (bi->bi_size>>9); |
@@ -3410,26 +3633,31 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3410 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 3633 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
3411 | DEFINE_WAIT(w); | 3634 | DEFINE_WAIT(w); |
3412 | int disks, data_disks; | 3635 | int disks, data_disks; |
3636 | int previous; | ||
3413 | 3637 | ||
3414 | retry: | 3638 | retry: |
3639 | previous = 0; | ||
3640 | disks = conf->raid_disks; | ||
3415 | prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); | 3641 | prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); |
3416 | if (likely(conf->expand_progress == MaxSector)) | 3642 | if (unlikely(conf->reshape_progress != MaxSector)) { |
3417 | disks = conf->raid_disks; | 3643 | /* spinlock is needed as reshape_progress may be |
3418 | else { | ||
3419 | /* spinlock is needed as expand_progress may be | ||
3420 | * 64bit on a 32bit platform, and so it might be | 3644 | * 64bit on a 32bit platform, and so it might be |
3421 | * possible to see a half-updated value | 3645 | * possible to see a half-updated value |
3422 | * Ofcourse expand_progress could change after | 3646 | * Ofcourse reshape_progress could change after |
3423 | * the lock is dropped, so once we get a reference | 3647 | * the lock is dropped, so once we get a reference |
3424 | * to the stripe that we think it is, we will have | 3648 | * to the stripe that we think it is, we will have |
3425 | * to check again. | 3649 | * to check again. |
3426 | */ | 3650 | */ |
3427 | spin_lock_irq(&conf->device_lock); | 3651 | spin_lock_irq(&conf->device_lock); |
3428 | disks = conf->raid_disks; | 3652 | if (mddev->delta_disks < 0 |
3429 | if (logical_sector >= conf->expand_progress) | 3653 | ? logical_sector < conf->reshape_progress |
3654 | : logical_sector >= conf->reshape_progress) { | ||
3430 | disks = conf->previous_raid_disks; | 3655 | disks = conf->previous_raid_disks; |
3431 | else { | 3656 | previous = 1; |
3432 | if (logical_sector >= conf->expand_lo) { | 3657 | } else { |
3658 | if (mddev->delta_disks < 0 | ||
3659 | ? logical_sector < conf->reshape_safe | ||
3660 | : logical_sector >= conf->reshape_safe) { | ||
3433 | spin_unlock_irq(&conf->device_lock); | 3661 | spin_unlock_irq(&conf->device_lock); |
3434 | schedule(); | 3662 | schedule(); |
3435 | goto retry; | 3663 | goto retry; |
@@ -3439,15 +3667,17 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3439 | } | 3667 | } |
3440 | data_disks = disks - conf->max_degraded; | 3668 | data_disks = disks - conf->max_degraded; |
3441 | 3669 | ||
3442 | new_sector = raid5_compute_sector(logical_sector, disks, data_disks, | 3670 | new_sector = raid5_compute_sector(conf, logical_sector, |
3443 | &dd_idx, &pd_idx, conf); | 3671 | previous, |
3672 | &dd_idx, NULL); | ||
3444 | pr_debug("raid5: make_request, sector %llu logical %llu\n", | 3673 | pr_debug("raid5: make_request, sector %llu logical %llu\n", |
3445 | (unsigned long long)new_sector, | 3674 | (unsigned long long)new_sector, |
3446 | (unsigned long long)logical_sector); | 3675 | (unsigned long long)logical_sector); |
3447 | 3676 | ||
3448 | sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK)); | 3677 | sh = get_active_stripe(conf, new_sector, previous, |
3678 | (bi->bi_rw&RWA_MASK)); | ||
3449 | if (sh) { | 3679 | if (sh) { |
3450 | if (unlikely(conf->expand_progress != MaxSector)) { | 3680 | if (unlikely(previous)) { |
3451 | /* expansion might have moved on while waiting for a | 3681 | /* expansion might have moved on while waiting for a |
3452 | * stripe, so we must do the range check again. | 3682 | * stripe, so we must do the range check again. |
3453 | * Expansion could still move past after this | 3683 | * Expansion could still move past after this |
@@ -3458,8 +3688,9 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3458 | */ | 3688 | */ |
3459 | int must_retry = 0; | 3689 | int must_retry = 0; |
3460 | spin_lock_irq(&conf->device_lock); | 3690 | spin_lock_irq(&conf->device_lock); |
3461 | if (logical_sector < conf->expand_progress && | 3691 | if (mddev->delta_disks < 0 |
3462 | disks == conf->previous_raid_disks) | 3692 | ? logical_sector >= conf->reshape_progress |
3693 | : logical_sector < conf->reshape_progress) | ||
3463 | /* mismatch, need to try again */ | 3694 | /* mismatch, need to try again */ |
3464 | must_retry = 1; | 3695 | must_retry = 1; |
3465 | spin_unlock_irq(&conf->device_lock); | 3696 | spin_unlock_irq(&conf->device_lock); |
@@ -3514,6 +3745,8 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3514 | return 0; | 3745 | return 0; |
3515 | } | 3746 | } |
3516 | 3747 | ||
3748 | static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks); | ||
3749 | |||
3517 | static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped) | 3750 | static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped) |
3518 | { | 3751 | { |
3519 | /* reshaping is quite different to recovery/resync so it is | 3752 | /* reshaping is quite different to recovery/resync so it is |
@@ -3527,61 +3760,118 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3527 | */ | 3760 | */ |
3528 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | 3761 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; |
3529 | struct stripe_head *sh; | 3762 | struct stripe_head *sh; |
3530 | int pd_idx; | ||
3531 | sector_t first_sector, last_sector; | 3763 | sector_t first_sector, last_sector; |
3532 | int raid_disks = conf->previous_raid_disks; | 3764 | int raid_disks = conf->previous_raid_disks; |
3533 | int data_disks = raid_disks - conf->max_degraded; | 3765 | int data_disks = raid_disks - conf->max_degraded; |
3534 | int new_data_disks = conf->raid_disks - conf->max_degraded; | 3766 | int new_data_disks = conf->raid_disks - conf->max_degraded; |
3535 | int i; | 3767 | int i; |
3536 | int dd_idx; | 3768 | int dd_idx; |
3537 | sector_t writepos, safepos, gap; | 3769 | sector_t writepos, readpos, safepos; |
3538 | 3770 | sector_t stripe_addr; | |
3539 | if (sector_nr == 0 && | 3771 | int reshape_sectors; |
3540 | conf->expand_progress != 0) { | 3772 | struct list_head stripes; |
3541 | /* restarting in the middle, skip the initial sectors */ | 3773 | |
3542 | sector_nr = conf->expand_progress; | 3774 | if (sector_nr == 0) { |
3775 | /* If restarting in the middle, skip the initial sectors */ | ||
3776 | if (mddev->delta_disks < 0 && | ||
3777 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { | ||
3778 | sector_nr = raid5_size(mddev, 0, 0) | ||
3779 | - conf->reshape_progress; | ||
3780 | } else if (mddev->delta_disks > 0 && | ||
3781 | conf->reshape_progress > 0) | ||
3782 | sector_nr = conf->reshape_progress; | ||
3543 | sector_div(sector_nr, new_data_disks); | 3783 | sector_div(sector_nr, new_data_disks); |
3544 | *skipped = 1; | 3784 | if (sector_nr) { |
3545 | return sector_nr; | 3785 | *skipped = 1; |
3786 | return sector_nr; | ||
3787 | } | ||
3546 | } | 3788 | } |
3547 | 3789 | ||
3790 | /* We need to process a full chunk at a time. | ||
3791 | * If old and new chunk sizes differ, we need to process the | ||
3792 | * largest of these | ||
3793 | */ | ||
3794 | if (mddev->new_chunk > mddev->chunk_size) | ||
3795 | reshape_sectors = mddev->new_chunk / 512; | ||
3796 | else | ||
3797 | reshape_sectors = mddev->chunk_size / 512; | ||
3798 | |||
3548 | /* we update the metadata when there is more than 3Meg | 3799 | /* we update the metadata when there is more than 3Meg |
3549 | * in the block range (that is rather arbitrary, should | 3800 | * in the block range (that is rather arbitrary, should |
3550 | * probably be time based) or when the data about to be | 3801 | * probably be time based) or when the data about to be |
3551 | * copied would over-write the source of the data at | 3802 | * copied would over-write the source of the data at |
3552 | * the front of the range. | 3803 | * the front of the range. |
3553 | * i.e. one new_stripe forward from expand_progress new_maps | 3804 | * i.e. one new_stripe along from reshape_progress new_maps |
3554 | * to after where expand_lo old_maps to | 3805 | * to after where reshape_safe old_maps to |
3555 | */ | 3806 | */ |
3556 | writepos = conf->expand_progress + | 3807 | writepos = conf->reshape_progress; |
3557 | conf->chunk_size/512*(new_data_disks); | ||
3558 | sector_div(writepos, new_data_disks); | 3808 | sector_div(writepos, new_data_disks); |
3559 | safepos = conf->expand_lo; | 3809 | readpos = conf->reshape_progress; |
3810 | sector_div(readpos, data_disks); | ||
3811 | safepos = conf->reshape_safe; | ||
3560 | sector_div(safepos, data_disks); | 3812 | sector_div(safepos, data_disks); |
3561 | gap = conf->expand_progress - conf->expand_lo; | 3813 | if (mddev->delta_disks < 0) { |
3814 | writepos -= reshape_sectors; | ||
3815 | readpos += reshape_sectors; | ||
3816 | safepos += reshape_sectors; | ||
3817 | } else { | ||
3818 | writepos += reshape_sectors; | ||
3819 | readpos -= reshape_sectors; | ||
3820 | safepos -= reshape_sectors; | ||
3821 | } | ||
3562 | 3822 | ||
3563 | if (writepos >= safepos || | 3823 | /* 'writepos' is the most advanced device address we might write. |
3564 | gap > (new_data_disks)*3000*2 /*3Meg*/) { | 3824 | * 'readpos' is the least advanced device address we might read. |
3825 | * 'safepos' is the least address recorded in the metadata as having | ||
3826 | * been reshaped. | ||
3827 | * If 'readpos' is behind 'writepos', then there is no way that we can | ||
3828 | * ensure safety in the face of a crash - that must be done by userspace | ||
3829 | * making a backup of the data. So in that case there is no particular | ||
3830 | * rush to update metadata. | ||
3831 | * Otherwise if 'safepos' is behind 'writepos', then we really need to | ||
3832 | * update the metadata to advance 'safepos' to match 'readpos' so that | ||
3833 | * we can be safe in the event of a crash. | ||
3834 | * So we insist on updating metadata if safepos is behind writepos and | ||
3835 | * readpos is beyond writepos. | ||
3836 | * In any case, update the metadata every 10 seconds. | ||
3837 | * Maybe that number should be configurable, but I'm not sure it is | ||
3838 | * worth it.... maybe it could be a multiple of safemode_delay??? | ||
3839 | */ | ||
3840 | if ((mddev->delta_disks < 0 | ||
3841 | ? (safepos > writepos && readpos < writepos) | ||
3842 | : (safepos < writepos && readpos > writepos)) || | ||
3843 | time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { | ||
3565 | /* Cannot proceed until we've updated the superblock... */ | 3844 | /* Cannot proceed until we've updated the superblock... */ |
3566 | wait_event(conf->wait_for_overlap, | 3845 | wait_event(conf->wait_for_overlap, |
3567 | atomic_read(&conf->reshape_stripes)==0); | 3846 | atomic_read(&conf->reshape_stripes)==0); |
3568 | mddev->reshape_position = conf->expand_progress; | 3847 | mddev->reshape_position = conf->reshape_progress; |
3848 | conf->reshape_checkpoint = jiffies; | ||
3569 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3849 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
3570 | md_wakeup_thread(mddev->thread); | 3850 | md_wakeup_thread(mddev->thread); |
3571 | wait_event(mddev->sb_wait, mddev->flags == 0 || | 3851 | wait_event(mddev->sb_wait, mddev->flags == 0 || |
3572 | kthread_should_stop()); | 3852 | kthread_should_stop()); |
3573 | spin_lock_irq(&conf->device_lock); | 3853 | spin_lock_irq(&conf->device_lock); |
3574 | conf->expand_lo = mddev->reshape_position; | 3854 | conf->reshape_safe = mddev->reshape_position; |
3575 | spin_unlock_irq(&conf->device_lock); | 3855 | spin_unlock_irq(&conf->device_lock); |
3576 | wake_up(&conf->wait_for_overlap); | 3856 | wake_up(&conf->wait_for_overlap); |
3577 | } | 3857 | } |
3578 | 3858 | ||
3579 | for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) { | 3859 | if (mddev->delta_disks < 0) { |
3860 | BUG_ON(conf->reshape_progress == 0); | ||
3861 | stripe_addr = writepos; | ||
3862 | BUG_ON((mddev->dev_sectors & | ||
3863 | ~((sector_t)reshape_sectors - 1)) | ||
3864 | - reshape_sectors - stripe_addr | ||
3865 | != sector_nr); | ||
3866 | } else { | ||
3867 | BUG_ON(writepos != sector_nr + reshape_sectors); | ||
3868 | stripe_addr = sector_nr; | ||
3869 | } | ||
3870 | INIT_LIST_HEAD(&stripes); | ||
3871 | for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { | ||
3580 | int j; | 3872 | int j; |
3581 | int skipped = 0; | 3873 | int skipped = 0; |
3582 | pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks); | 3874 | sh = get_active_stripe(conf, stripe_addr+i, 0, 0); |
3583 | sh = get_active_stripe(conf, sector_nr+i, | ||
3584 | conf->raid_disks, pd_idx, 0); | ||
3585 | set_bit(STRIPE_EXPANDING, &sh->state); | 3875 | set_bit(STRIPE_EXPANDING, &sh->state); |
3586 | atomic_inc(&conf->reshape_stripes); | 3876 | atomic_inc(&conf->reshape_stripes); |
3587 | /* If any of this stripe is beyond the end of the old | 3877 | /* If any of this stripe is beyond the end of the old |
@@ -3592,10 +3882,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3592 | if (j == sh->pd_idx) | 3882 | if (j == sh->pd_idx) |
3593 | continue; | 3883 | continue; |
3594 | if (conf->level == 6 && | 3884 | if (conf->level == 6 && |
3595 | j == raid6_next_disk(sh->pd_idx, sh->disks)) | 3885 | j == sh->qd_idx) |
3596 | continue; | 3886 | continue; |
3597 | s = compute_blocknr(sh, j); | 3887 | s = compute_blocknr(sh, j, 0); |
3598 | if (s < mddev->array_sectors) { | 3888 | if (s < raid5_size(mddev, 0, 0)) { |
3599 | skipped = 1; | 3889 | skipped = 1; |
3600 | continue; | 3890 | continue; |
3601 | } | 3891 | } |
@@ -3607,10 +3897,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3607 | set_bit(STRIPE_EXPAND_READY, &sh->state); | 3897 | set_bit(STRIPE_EXPAND_READY, &sh->state); |
3608 | set_bit(STRIPE_HANDLE, &sh->state); | 3898 | set_bit(STRIPE_HANDLE, &sh->state); |
3609 | } | 3899 | } |
3610 | release_stripe(sh); | 3900 | list_add(&sh->lru, &stripes); |
3611 | } | 3901 | } |
3612 | spin_lock_irq(&conf->device_lock); | 3902 | spin_lock_irq(&conf->device_lock); |
3613 | conf->expand_progress = (sector_nr + i) * new_data_disks; | 3903 | if (mddev->delta_disks < 0) |
3904 | conf->reshape_progress -= reshape_sectors * new_data_disks; | ||
3905 | else | ||
3906 | conf->reshape_progress += reshape_sectors * new_data_disks; | ||
3614 | spin_unlock_irq(&conf->device_lock); | 3907 | spin_unlock_irq(&conf->device_lock); |
3615 | /* Ok, those stripe are ready. We can start scheduling | 3908 | /* Ok, those stripe are ready. We can start scheduling |
3616 | * reads on the source stripes. | 3909 | * reads on the source stripes. |
@@ -3618,46 +3911,50 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3618 | * block on the destination stripes. | 3911 | * block on the destination stripes. |
3619 | */ | 3912 | */ |
3620 | first_sector = | 3913 | first_sector = |
3621 | raid5_compute_sector(sector_nr*(new_data_disks), | 3914 | raid5_compute_sector(conf, stripe_addr*(new_data_disks), |
3622 | raid_disks, data_disks, | 3915 | 1, &dd_idx, NULL); |
3623 | &dd_idx, &pd_idx, conf); | ||
3624 | last_sector = | 3916 | last_sector = |
3625 | raid5_compute_sector((sector_nr+conf->chunk_size/512) | 3917 | raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512) |
3626 | *(new_data_disks) -1, | 3918 | *(new_data_disks) - 1), |
3627 | raid_disks, data_disks, | 3919 | 1, &dd_idx, NULL); |
3628 | &dd_idx, &pd_idx, conf); | 3920 | if (last_sector >= mddev->dev_sectors) |
3629 | if (last_sector >= (mddev->size<<1)) | 3921 | last_sector = mddev->dev_sectors - 1; |
3630 | last_sector = (mddev->size<<1)-1; | ||
3631 | while (first_sector <= last_sector) { | 3922 | while (first_sector <= last_sector) { |
3632 | pd_idx = stripe_to_pdidx(first_sector, conf, | 3923 | sh = get_active_stripe(conf, first_sector, 1, 0); |
3633 | conf->previous_raid_disks); | ||
3634 | sh = get_active_stripe(conf, first_sector, | ||
3635 | conf->previous_raid_disks, pd_idx, 0); | ||
3636 | set_bit(STRIPE_EXPAND_SOURCE, &sh->state); | 3924 | set_bit(STRIPE_EXPAND_SOURCE, &sh->state); |
3637 | set_bit(STRIPE_HANDLE, &sh->state); | 3925 | set_bit(STRIPE_HANDLE, &sh->state); |
3638 | release_stripe(sh); | 3926 | release_stripe(sh); |
3639 | first_sector += STRIPE_SECTORS; | 3927 | first_sector += STRIPE_SECTORS; |
3640 | } | 3928 | } |
3929 | /* Now that the sources are clearly marked, we can release | ||
3930 | * the destination stripes | ||
3931 | */ | ||
3932 | while (!list_empty(&stripes)) { | ||
3933 | sh = list_entry(stripes.next, struct stripe_head, lru); | ||
3934 | list_del_init(&sh->lru); | ||
3935 | release_stripe(sh); | ||
3936 | } | ||
3641 | /* If this takes us to the resync_max point where we have to pause, | 3937 | /* If this takes us to the resync_max point where we have to pause, |
3642 | * then we need to write out the superblock. | 3938 | * then we need to write out the superblock. |
3643 | */ | 3939 | */ |
3644 | sector_nr += conf->chunk_size>>9; | 3940 | sector_nr += reshape_sectors; |
3645 | if (sector_nr >= mddev->resync_max) { | 3941 | if (sector_nr >= mddev->resync_max) { |
3646 | /* Cannot proceed until we've updated the superblock... */ | 3942 | /* Cannot proceed until we've updated the superblock... */ |
3647 | wait_event(conf->wait_for_overlap, | 3943 | wait_event(conf->wait_for_overlap, |
3648 | atomic_read(&conf->reshape_stripes) == 0); | 3944 | atomic_read(&conf->reshape_stripes) == 0); |
3649 | mddev->reshape_position = conf->expand_progress; | 3945 | mddev->reshape_position = conf->reshape_progress; |
3946 | conf->reshape_checkpoint = jiffies; | ||
3650 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3947 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
3651 | md_wakeup_thread(mddev->thread); | 3948 | md_wakeup_thread(mddev->thread); |
3652 | wait_event(mddev->sb_wait, | 3949 | wait_event(mddev->sb_wait, |
3653 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) | 3950 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) |
3654 | || kthread_should_stop()); | 3951 | || kthread_should_stop()); |
3655 | spin_lock_irq(&conf->device_lock); | 3952 | spin_lock_irq(&conf->device_lock); |
3656 | conf->expand_lo = mddev->reshape_position; | 3953 | conf->reshape_safe = mddev->reshape_position; |
3657 | spin_unlock_irq(&conf->device_lock); | 3954 | spin_unlock_irq(&conf->device_lock); |
3658 | wake_up(&conf->wait_for_overlap); | 3955 | wake_up(&conf->wait_for_overlap); |
3659 | } | 3956 | } |
3660 | return conf->chunk_size>>9; | 3957 | return reshape_sectors; |
3661 | } | 3958 | } |
3662 | 3959 | ||
3663 | /* FIXME go_faster isn't used */ | 3960 | /* FIXME go_faster isn't used */ |
@@ -3665,9 +3962,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3665 | { | 3962 | { |
3666 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | 3963 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; |
3667 | struct stripe_head *sh; | 3964 | struct stripe_head *sh; |
3668 | int pd_idx; | 3965 | sector_t max_sector = mddev->dev_sectors; |
3669 | int raid_disks = conf->raid_disks; | ||
3670 | sector_t max_sector = mddev->size << 1; | ||
3671 | int sync_blocks; | 3966 | int sync_blocks; |
3672 | int still_degraded = 0; | 3967 | int still_degraded = 0; |
3673 | int i; | 3968 | int i; |
@@ -3675,6 +3970,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3675 | if (sector_nr >= max_sector) { | 3970 | if (sector_nr >= max_sector) { |
3676 | /* just being told to finish up .. nothing much to do */ | 3971 | /* just being told to finish up .. nothing much to do */ |
3677 | unplug_slaves(mddev); | 3972 | unplug_slaves(mddev); |
3973 | |||
3678 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { | 3974 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { |
3679 | end_reshape(conf); | 3975 | end_reshape(conf); |
3680 | return 0; | 3976 | return 0; |
@@ -3705,7 +4001,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3705 | */ | 4001 | */ |
3706 | if (mddev->degraded >= conf->max_degraded && | 4002 | if (mddev->degraded >= conf->max_degraded && |
3707 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 4003 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
3708 | sector_t rv = (mddev->size << 1) - sector_nr; | 4004 | sector_t rv = mddev->dev_sectors - sector_nr; |
3709 | *skipped = 1; | 4005 | *skipped = 1; |
3710 | return rv; | 4006 | return rv; |
3711 | } | 4007 | } |
@@ -3721,10 +4017,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3721 | 4017 | ||
3722 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); | 4018 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); |
3723 | 4019 | ||
3724 | pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks); | 4020 | sh = get_active_stripe(conf, sector_nr, 0, 1); |
3725 | sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1); | ||
3726 | if (sh == NULL) { | 4021 | if (sh == NULL) { |
3727 | sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0); | 4022 | sh = get_active_stripe(conf, sector_nr, 0, 0); |
3728 | /* make sure we don't swamp the stripe cache if someone else | 4023 | /* make sure we don't swamp the stripe cache if someone else |
3729 | * is trying to get access | 4024 | * is trying to get access |
3730 | */ | 4025 | */ |
@@ -3766,19 +4061,15 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3766 | * it will be only one 'dd_idx' and only need one call to raid5_compute_sector. | 4061 | * it will be only one 'dd_idx' and only need one call to raid5_compute_sector. |
3767 | */ | 4062 | */ |
3768 | struct stripe_head *sh; | 4063 | struct stripe_head *sh; |
3769 | int dd_idx, pd_idx; | 4064 | int dd_idx; |
3770 | sector_t sector, logical_sector, last_sector; | 4065 | sector_t sector, logical_sector, last_sector; |
3771 | int scnt = 0; | 4066 | int scnt = 0; |
3772 | int remaining; | 4067 | int remaining; |
3773 | int handled = 0; | 4068 | int handled = 0; |
3774 | 4069 | ||
3775 | logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | 4070 | logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); |
3776 | sector = raid5_compute_sector( logical_sector, | 4071 | sector = raid5_compute_sector(conf, logical_sector, |
3777 | conf->raid_disks, | 4072 | 0, &dd_idx, NULL); |
3778 | conf->raid_disks - conf->max_degraded, | ||
3779 | &dd_idx, | ||
3780 | &pd_idx, | ||
3781 | conf); | ||
3782 | last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); | 4073 | last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); |
3783 | 4074 | ||
3784 | for (; logical_sector < last_sector; | 4075 | for (; logical_sector < last_sector; |
@@ -3790,7 +4081,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3790 | /* already done this stripe */ | 4081 | /* already done this stripe */ |
3791 | continue; | 4082 | continue; |
3792 | 4083 | ||
3793 | sh = get_active_stripe(conf, sector, conf->raid_disks, pd_idx, 1); | 4084 | sh = get_active_stripe(conf, sector, 0, 1); |
3794 | 4085 | ||
3795 | if (!sh) { | 4086 | if (!sh) { |
3796 | /* failed to get a stripe - must wait */ | 4087 | /* failed to get a stripe - must wait */ |
@@ -3992,89 +4283,69 @@ static struct attribute_group raid5_attrs_group = { | |||
3992 | .attrs = raid5_attrs, | 4283 | .attrs = raid5_attrs, |
3993 | }; | 4284 | }; |
3994 | 4285 | ||
3995 | static int run(mddev_t *mddev) | 4286 | static sector_t |
4287 | raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | ||
4288 | { | ||
4289 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
4290 | |||
4291 | if (!sectors) | ||
4292 | sectors = mddev->dev_sectors; | ||
4293 | if (!raid_disks) { | ||
4294 | /* size is defined by the smallest of previous and new size */ | ||
4295 | if (conf->raid_disks < conf->previous_raid_disks) | ||
4296 | raid_disks = conf->raid_disks; | ||
4297 | else | ||
4298 | raid_disks = conf->previous_raid_disks; | ||
4299 | } | ||
4300 | |||
4301 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); | ||
4302 | sectors &= ~((sector_t)mddev->new_chunk/512 - 1); | ||
4303 | return sectors * (raid_disks - conf->max_degraded); | ||
4304 | } | ||
4305 | |||
4306 | static raid5_conf_t *setup_conf(mddev_t *mddev) | ||
3996 | { | 4307 | { |
3997 | raid5_conf_t *conf; | 4308 | raid5_conf_t *conf; |
3998 | int raid_disk, memory; | 4309 | int raid_disk, memory; |
3999 | mdk_rdev_t *rdev; | 4310 | mdk_rdev_t *rdev; |
4000 | struct disk_info *disk; | 4311 | struct disk_info *disk; |
4001 | int working_disks = 0; | ||
4002 | 4312 | ||
4003 | if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { | 4313 | if (mddev->new_level != 5 |
4314 | && mddev->new_level != 4 | ||
4315 | && mddev->new_level != 6) { | ||
4004 | printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n", | 4316 | printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n", |
4005 | mdname(mddev), mddev->level); | 4317 | mdname(mddev), mddev->new_level); |
4006 | return -EIO; | 4318 | return ERR_PTR(-EIO); |
4007 | } | 4319 | } |
4008 | 4320 | if ((mddev->new_level == 5 | |
4009 | if (mddev->chunk_size < PAGE_SIZE) { | 4321 | && !algorithm_valid_raid5(mddev->new_layout)) || |
4010 | printk(KERN_ERR "md/raid5: chunk_size must be at least " | 4322 | (mddev->new_level == 6 |
4011 | "PAGE_SIZE but %d < %ld\n", | 4323 | && !algorithm_valid_raid6(mddev->new_layout))) { |
4012 | mddev->chunk_size, PAGE_SIZE); | 4324 | printk(KERN_ERR "raid5: %s: layout %d not supported\n", |
4013 | return -EINVAL; | 4325 | mdname(mddev), mddev->new_layout); |
4326 | return ERR_PTR(-EIO); | ||
4014 | } | 4327 | } |
4015 | 4328 | if (mddev->new_level == 6 && mddev->raid_disks < 4) { | |
4016 | if (mddev->reshape_position != MaxSector) { | 4329 | printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", |
4017 | /* Check that we can continue the reshape. | 4330 | mdname(mddev), mddev->raid_disks); |
4018 | * Currently only disks can change, it must | 4331 | return ERR_PTR(-EINVAL); |
4019 | * increase, and we must be past the point where | ||
4020 | * a stripe over-writes itself | ||
4021 | */ | ||
4022 | sector_t here_new, here_old; | ||
4023 | int old_disks; | ||
4024 | int max_degraded = (mddev->level == 5 ? 1 : 2); | ||
4025 | |||
4026 | if (mddev->new_level != mddev->level || | ||
4027 | mddev->new_layout != mddev->layout || | ||
4028 | mddev->new_chunk != mddev->chunk_size) { | ||
4029 | printk(KERN_ERR "raid5: %s: unsupported reshape " | ||
4030 | "required - aborting.\n", | ||
4031 | mdname(mddev)); | ||
4032 | return -EINVAL; | ||
4033 | } | ||
4034 | if (mddev->delta_disks <= 0) { | ||
4035 | printk(KERN_ERR "raid5: %s: unsupported reshape " | ||
4036 | "(reduce disks) required - aborting.\n", | ||
4037 | mdname(mddev)); | ||
4038 | return -EINVAL; | ||
4039 | } | ||
4040 | old_disks = mddev->raid_disks - mddev->delta_disks; | ||
4041 | /* reshape_position must be on a new-stripe boundary, and one | ||
4042 | * further up in new geometry must map after here in old | ||
4043 | * geometry. | ||
4044 | */ | ||
4045 | here_new = mddev->reshape_position; | ||
4046 | if (sector_div(here_new, (mddev->chunk_size>>9)* | ||
4047 | (mddev->raid_disks - max_degraded))) { | ||
4048 | printk(KERN_ERR "raid5: reshape_position not " | ||
4049 | "on a stripe boundary\n"); | ||
4050 | return -EINVAL; | ||
4051 | } | ||
4052 | /* here_new is the stripe we will write to */ | ||
4053 | here_old = mddev->reshape_position; | ||
4054 | sector_div(here_old, (mddev->chunk_size>>9)* | ||
4055 | (old_disks-max_degraded)); | ||
4056 | /* here_old is the first stripe that we might need to read | ||
4057 | * from */ | ||
4058 | if (here_new >= here_old) { | ||
4059 | /* Reading from the same stripe as writing to - bad */ | ||
4060 | printk(KERN_ERR "raid5: reshape_position too early for " | ||
4061 | "auto-recovery - aborting.\n"); | ||
4062 | return -EINVAL; | ||
4063 | } | ||
4064 | printk(KERN_INFO "raid5: reshape will continue\n"); | ||
4065 | /* OK, we should be able to continue; */ | ||
4066 | } | 4332 | } |
4067 | 4333 | ||
4334 | if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) { | ||
4335 | printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", | ||
4336 | mddev->new_chunk, mdname(mddev)); | ||
4337 | return ERR_PTR(-EINVAL); | ||
4338 | } | ||
4068 | 4339 | ||
4069 | mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); | 4340 | conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL); |
4070 | if ((conf = mddev->private) == NULL) | 4341 | if (conf == NULL) |
4071 | goto abort; | 4342 | goto abort; |
4072 | if (mddev->reshape_position == MaxSector) { | 4343 | |
4073 | conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks; | 4344 | conf->raid_disks = mddev->raid_disks; |
4074 | } else { | 4345 | if (mddev->reshape_position == MaxSector) |
4075 | conf->raid_disks = mddev->raid_disks; | 4346 | conf->previous_raid_disks = mddev->raid_disks; |
4347 | else | ||
4076 | conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; | 4348 | conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; |
4077 | } | ||
4078 | 4349 | ||
4079 | conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), | 4350 | conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), |
4080 | GFP_KERNEL); | 4351 | GFP_KERNEL); |
@@ -4086,13 +4357,12 @@ static int run(mddev_t *mddev) | |||
4086 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) | 4357 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) |
4087 | goto abort; | 4358 | goto abort; |
4088 | 4359 | ||
4089 | if (mddev->level == 6) { | 4360 | if (mddev->new_level == 6) { |
4090 | conf->spare_page = alloc_page(GFP_KERNEL); | 4361 | conf->spare_page = alloc_page(GFP_KERNEL); |
4091 | if (!conf->spare_page) | 4362 | if (!conf->spare_page) |
4092 | goto abort; | 4363 | goto abort; |
4093 | } | 4364 | } |
4094 | spin_lock_init(&conf->device_lock); | 4365 | spin_lock_init(&conf->device_lock); |
4095 | mddev->queue->queue_lock = &conf->device_lock; | ||
4096 | init_waitqueue_head(&conf->wait_for_stripe); | 4366 | init_waitqueue_head(&conf->wait_for_stripe); |
4097 | init_waitqueue_head(&conf->wait_for_overlap); | 4367 | init_waitqueue_head(&conf->wait_for_overlap); |
4098 | INIT_LIST_HEAD(&conf->handle_list); | 4368 | INIT_LIST_HEAD(&conf->handle_list); |
@@ -4121,47 +4391,134 @@ static int run(mddev_t *mddev) | |||
4121 | printk(KERN_INFO "raid5: device %s operational as raid" | 4391 | printk(KERN_INFO "raid5: device %s operational as raid" |
4122 | " disk %d\n", bdevname(rdev->bdev,b), | 4392 | " disk %d\n", bdevname(rdev->bdev,b), |
4123 | raid_disk); | 4393 | raid_disk); |
4124 | working_disks++; | ||
4125 | } else | 4394 | } else |
4126 | /* Cannot rely on bitmap to complete recovery */ | 4395 | /* Cannot rely on bitmap to complete recovery */ |
4127 | conf->fullsync = 1; | 4396 | conf->fullsync = 1; |
4128 | } | 4397 | } |
4129 | 4398 | ||
4130 | /* | 4399 | conf->chunk_size = mddev->new_chunk; |
4131 | * 0 for a fully functional array, 1 or 2 for a degraded array. | 4400 | conf->level = mddev->new_level; |
4132 | */ | ||
4133 | mddev->degraded = conf->raid_disks - working_disks; | ||
4134 | conf->mddev = mddev; | ||
4135 | conf->chunk_size = mddev->chunk_size; | ||
4136 | conf->level = mddev->level; | ||
4137 | if (conf->level == 6) | 4401 | if (conf->level == 6) |
4138 | conf->max_degraded = 2; | 4402 | conf->max_degraded = 2; |
4139 | else | 4403 | else |
4140 | conf->max_degraded = 1; | 4404 | conf->max_degraded = 1; |
4141 | conf->algorithm = mddev->layout; | 4405 | conf->algorithm = mddev->new_layout; |
4142 | conf->max_nr_stripes = NR_STRIPES; | 4406 | conf->max_nr_stripes = NR_STRIPES; |
4143 | conf->expand_progress = mddev->reshape_position; | 4407 | conf->reshape_progress = mddev->reshape_position; |
4144 | 4408 | if (conf->reshape_progress != MaxSector) { | |
4145 | /* device size must be a multiple of chunk size */ | 4409 | conf->prev_chunk = mddev->chunk_size; |
4146 | mddev->size &= ~(mddev->chunk_size/1024 -1); | 4410 | conf->prev_algo = mddev->layout; |
4147 | mddev->resync_max_sectors = mddev->size << 1; | 4411 | } |
4148 | 4412 | ||
4149 | if (conf->level == 6 && conf->raid_disks < 4) { | 4413 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + |
4150 | printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", | 4414 | conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; |
4151 | mdname(mddev), conf->raid_disks); | 4415 | if (grow_stripes(conf, conf->max_nr_stripes)) { |
4416 | printk(KERN_ERR | ||
4417 | "raid5: couldn't allocate %dkB for buffers\n", memory); | ||
4152 | goto abort; | 4418 | goto abort; |
4153 | } | 4419 | } else |
4154 | if (!conf->chunk_size || conf->chunk_size % 4) { | 4420 | printk(KERN_INFO "raid5: allocated %dkB for %s\n", |
4155 | printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", | 4421 | memory, mdname(mddev)); |
4156 | conf->chunk_size, mdname(mddev)); | 4422 | |
4423 | conf->thread = md_register_thread(raid5d, mddev, "%s_raid5"); | ||
4424 | if (!conf->thread) { | ||
4425 | printk(KERN_ERR | ||
4426 | "raid5: couldn't allocate thread for %s\n", | ||
4427 | mdname(mddev)); | ||
4157 | goto abort; | 4428 | goto abort; |
4158 | } | 4429 | } |
4159 | if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) { | 4430 | |
4160 | printk(KERN_ERR | 4431 | return conf; |
4161 | "raid5: unsupported parity algorithm %d for %s\n", | 4432 | |
4162 | conf->algorithm, mdname(mddev)); | 4433 | abort: |
4163 | goto abort; | 4434 | if (conf) { |
4435 | shrink_stripes(conf); | ||
4436 | safe_put_page(conf->spare_page); | ||
4437 | kfree(conf->disks); | ||
4438 | kfree(conf->stripe_hashtbl); | ||
4439 | kfree(conf); | ||
4440 | return ERR_PTR(-EIO); | ||
4441 | } else | ||
4442 | return ERR_PTR(-ENOMEM); | ||
4443 | } | ||
4444 | |||
4445 | static int run(mddev_t *mddev) | ||
4446 | { | ||
4447 | raid5_conf_t *conf; | ||
4448 | int working_disks = 0; | ||
4449 | mdk_rdev_t *rdev; | ||
4450 | |||
4451 | if (mddev->reshape_position != MaxSector) { | ||
4452 | /* Check that we can continue the reshape. | ||
4453 | * Currently only disks can change, it must | ||
4454 | * increase, and we must be past the point where | ||
4455 | * a stripe over-writes itself | ||
4456 | */ | ||
4457 | sector_t here_new, here_old; | ||
4458 | int old_disks; | ||
4459 | int max_degraded = (mddev->level == 6 ? 2 : 1); | ||
4460 | |||
4461 | if (mddev->new_level != mddev->level) { | ||
4462 | printk(KERN_ERR "raid5: %s: unsupported reshape " | ||
4463 | "required - aborting.\n", | ||
4464 | mdname(mddev)); | ||
4465 | return -EINVAL; | ||
4466 | } | ||
4467 | old_disks = mddev->raid_disks - mddev->delta_disks; | ||
4468 | /* reshape_position must be on a new-stripe boundary, and one | ||
4469 | * further up in new geometry must map after here in old | ||
4470 | * geometry. | ||
4471 | */ | ||
4472 | here_new = mddev->reshape_position; | ||
4473 | if (sector_div(here_new, (mddev->new_chunk>>9)* | ||
4474 | (mddev->raid_disks - max_degraded))) { | ||
4475 | printk(KERN_ERR "raid5: reshape_position not " | ||
4476 | "on a stripe boundary\n"); | ||
4477 | return -EINVAL; | ||
4478 | } | ||
4479 | /* here_new is the stripe we will write to */ | ||
4480 | here_old = mddev->reshape_position; | ||
4481 | sector_div(here_old, (mddev->chunk_size>>9)* | ||
4482 | (old_disks-max_degraded)); | ||
4483 | /* here_old is the first stripe that we might need to read | ||
4484 | * from */ | ||
4485 | if (here_new >= here_old) { | ||
4486 | /* Reading from the same stripe as writing to - bad */ | ||
4487 | printk(KERN_ERR "raid5: reshape_position too early for " | ||
4488 | "auto-recovery - aborting.\n"); | ||
4489 | return -EINVAL; | ||
4490 | } | ||
4491 | printk(KERN_INFO "raid5: reshape will continue\n"); | ||
4492 | /* OK, we should be able to continue; */ | ||
4493 | } else { | ||
4494 | BUG_ON(mddev->level != mddev->new_level); | ||
4495 | BUG_ON(mddev->layout != mddev->new_layout); | ||
4496 | BUG_ON(mddev->chunk_size != mddev->new_chunk); | ||
4497 | BUG_ON(mddev->delta_disks != 0); | ||
4164 | } | 4498 | } |
4499 | |||
4500 | if (mddev->private == NULL) | ||
4501 | conf = setup_conf(mddev); | ||
4502 | else | ||
4503 | conf = mddev->private; | ||
4504 | |||
4505 | if (IS_ERR(conf)) | ||
4506 | return PTR_ERR(conf); | ||
4507 | |||
4508 | mddev->thread = conf->thread; | ||
4509 | conf->thread = NULL; | ||
4510 | mddev->private = conf; | ||
4511 | |||
4512 | /* | ||
4513 | * 0 for a fully functional array, 1 or 2 for a degraded array. | ||
4514 | */ | ||
4515 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
4516 | if (rdev->raid_disk >= 0 && | ||
4517 | test_bit(In_sync, &rdev->flags)) | ||
4518 | working_disks++; | ||
4519 | |||
4520 | mddev->degraded = conf->raid_disks - working_disks; | ||
4521 | |||
4165 | if (mddev->degraded > conf->max_degraded) { | 4522 | if (mddev->degraded > conf->max_degraded) { |
4166 | printk(KERN_ERR "raid5: not enough operational devices for %s" | 4523 | printk(KERN_ERR "raid5: not enough operational devices for %s" |
4167 | " (%d/%d failed)\n", | 4524 | " (%d/%d failed)\n", |
@@ -4169,6 +4526,10 @@ static int run(mddev_t *mddev) | |||
4169 | goto abort; | 4526 | goto abort; |
4170 | } | 4527 | } |
4171 | 4528 | ||
4529 | /* device size must be a multiple of chunk size */ | ||
4530 | mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); | ||
4531 | mddev->resync_max_sectors = mddev->dev_sectors; | ||
4532 | |||
4172 | if (mddev->degraded > 0 && | 4533 | if (mddev->degraded > 0 && |
4173 | mddev->recovery_cp != MaxSector) { | 4534 | mddev->recovery_cp != MaxSector) { |
4174 | if (mddev->ok_start_degraded) | 4535 | if (mddev->ok_start_degraded) |
@@ -4184,43 +4545,22 @@ static int run(mddev_t *mddev) | |||
4184 | } | 4545 | } |
4185 | } | 4546 | } |
4186 | 4547 | ||
4187 | { | ||
4188 | mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5"); | ||
4189 | if (!mddev->thread) { | ||
4190 | printk(KERN_ERR | ||
4191 | "raid5: couldn't allocate thread for %s\n", | ||
4192 | mdname(mddev)); | ||
4193 | goto abort; | ||
4194 | } | ||
4195 | } | ||
4196 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | ||
4197 | conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; | ||
4198 | if (grow_stripes(conf, conf->max_nr_stripes)) { | ||
4199 | printk(KERN_ERR | ||
4200 | "raid5: couldn't allocate %dkB for buffers\n", memory); | ||
4201 | shrink_stripes(conf); | ||
4202 | md_unregister_thread(mddev->thread); | ||
4203 | goto abort; | ||
4204 | } else | ||
4205 | printk(KERN_INFO "raid5: allocated %dkB for %s\n", | ||
4206 | memory, mdname(mddev)); | ||
4207 | |||
4208 | if (mddev->degraded == 0) | 4548 | if (mddev->degraded == 0) |
4209 | printk("raid5: raid level %d set %s active with %d out of %d" | 4549 | printk("raid5: raid level %d set %s active with %d out of %d" |
4210 | " devices, algorithm %d\n", conf->level, mdname(mddev), | 4550 | " devices, algorithm %d\n", conf->level, mdname(mddev), |
4211 | mddev->raid_disks-mddev->degraded, mddev->raid_disks, | 4551 | mddev->raid_disks-mddev->degraded, mddev->raid_disks, |
4212 | conf->algorithm); | 4552 | mddev->new_layout); |
4213 | else | 4553 | else |
4214 | printk(KERN_ALERT "raid5: raid level %d set %s active with %d" | 4554 | printk(KERN_ALERT "raid5: raid level %d set %s active with %d" |
4215 | " out of %d devices, algorithm %d\n", conf->level, | 4555 | " out of %d devices, algorithm %d\n", conf->level, |
4216 | mdname(mddev), mddev->raid_disks - mddev->degraded, | 4556 | mdname(mddev), mddev->raid_disks - mddev->degraded, |
4217 | mddev->raid_disks, conf->algorithm); | 4557 | mddev->raid_disks, mddev->new_layout); |
4218 | 4558 | ||
4219 | print_raid5_conf(conf); | 4559 | print_raid5_conf(conf); |
4220 | 4560 | ||
4221 | if (conf->expand_progress != MaxSector) { | 4561 | if (conf->reshape_progress != MaxSector) { |
4222 | printk("...ok start reshape thread\n"); | 4562 | printk("...ok start reshape thread\n"); |
4223 | conf->expand_lo = conf->expand_progress; | 4563 | conf->reshape_safe = conf->reshape_progress; |
4224 | atomic_set(&conf->reshape_stripes, 0); | 4564 | atomic_set(&conf->reshape_stripes, 0); |
4225 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 4565 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
4226 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 4566 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
@@ -4247,18 +4587,22 @@ static int run(mddev_t *mddev) | |||
4247 | "raid5: failed to create sysfs attributes for %s\n", | 4587 | "raid5: failed to create sysfs attributes for %s\n", |
4248 | mdname(mddev)); | 4588 | mdname(mddev)); |
4249 | 4589 | ||
4590 | mddev->queue->queue_lock = &conf->device_lock; | ||
4591 | |||
4250 | mddev->queue->unplug_fn = raid5_unplug_device; | 4592 | mddev->queue->unplug_fn = raid5_unplug_device; |
4251 | mddev->queue->backing_dev_info.congested_data = mddev; | 4593 | mddev->queue->backing_dev_info.congested_data = mddev; |
4252 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | 4594 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; |
4253 | 4595 | ||
4254 | mddev->array_sectors = 2 * mddev->size * (conf->previous_raid_disks - | 4596 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
4255 | conf->max_degraded); | ||
4256 | 4597 | ||
4257 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 4598 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
4258 | 4599 | ||
4259 | return 0; | 4600 | return 0; |
4260 | abort: | 4601 | abort: |
4602 | md_unregister_thread(mddev->thread); | ||
4603 | mddev->thread = NULL; | ||
4261 | if (conf) { | 4604 | if (conf) { |
4605 | shrink_stripes(conf); | ||
4262 | print_raid5_conf(conf); | 4606 | print_raid5_conf(conf); |
4263 | safe_put_page(conf->spare_page); | 4607 | safe_put_page(conf->spare_page); |
4264 | kfree(conf->disks); | 4608 | kfree(conf->disks); |
@@ -4396,6 +4740,10 @@ static int raid5_remove_disk(mddev_t *mddev, int number) | |||
4396 | print_raid5_conf(conf); | 4740 | print_raid5_conf(conf); |
4397 | rdev = p->rdev; | 4741 | rdev = p->rdev; |
4398 | if (rdev) { | 4742 | if (rdev) { |
4743 | if (number >= conf->raid_disks && | ||
4744 | conf->reshape_progress == MaxSector) | ||
4745 | clear_bit(In_sync, &rdev->flags); | ||
4746 | |||
4399 | if (test_bit(In_sync, &rdev->flags) || | 4747 | if (test_bit(In_sync, &rdev->flags) || |
4400 | atomic_read(&rdev->nr_pending)) { | 4748 | atomic_read(&rdev->nr_pending)) { |
4401 | err = -EBUSY; | 4749 | err = -EBUSY; |
@@ -4405,7 +4753,8 @@ static int raid5_remove_disk(mddev_t *mddev, int number) | |||
4405 | * isn't possible. | 4753 | * isn't possible. |
4406 | */ | 4754 | */ |
4407 | if (!test_bit(Faulty, &rdev->flags) && | 4755 | if (!test_bit(Faulty, &rdev->flags) && |
4408 | mddev->degraded <= conf->max_degraded) { | 4756 | mddev->degraded <= conf->max_degraded && |
4757 | number < conf->raid_disks) { | ||
4409 | err = -EBUSY; | 4758 | err = -EBUSY; |
4410 | goto abort; | 4759 | goto abort; |
4411 | } | 4760 | } |
@@ -4472,36 +4821,48 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
4472 | * any io in the removed space completes, but it hardly seems | 4821 | * any io in the removed space completes, but it hardly seems |
4473 | * worth it. | 4822 | * worth it. |
4474 | */ | 4823 | */ |
4475 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
4476 | |||
4477 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); | 4824 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); |
4478 | mddev->array_sectors = sectors * (mddev->raid_disks | 4825 | md_set_array_sectors(mddev, raid5_size(mddev, sectors, |
4479 | - conf->max_degraded); | 4826 | mddev->raid_disks)); |
4827 | if (mddev->array_sectors > | ||
4828 | raid5_size(mddev, sectors, mddev->raid_disks)) | ||
4829 | return -EINVAL; | ||
4480 | set_capacity(mddev->gendisk, mddev->array_sectors); | 4830 | set_capacity(mddev->gendisk, mddev->array_sectors); |
4481 | mddev->changed = 1; | 4831 | mddev->changed = 1; |
4482 | if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) { | 4832 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { |
4483 | mddev->recovery_cp = mddev->size << 1; | 4833 | mddev->recovery_cp = mddev->dev_sectors; |
4484 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4834 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
4485 | } | 4835 | } |
4486 | mddev->size = sectors /2; | 4836 | mddev->dev_sectors = sectors; |
4487 | mddev->resync_max_sectors = sectors; | 4837 | mddev->resync_max_sectors = sectors; |
4488 | return 0; | 4838 | return 0; |
4489 | } | 4839 | } |
4490 | 4840 | ||
4491 | #ifdef CONFIG_MD_RAID5_RESHAPE | ||
4492 | static int raid5_check_reshape(mddev_t *mddev) | 4841 | static int raid5_check_reshape(mddev_t *mddev) |
4493 | { | 4842 | { |
4494 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4843 | raid5_conf_t *conf = mddev_to_conf(mddev); |
4495 | int err; | ||
4496 | 4844 | ||
4497 | if (mddev->delta_disks < 0 || | 4845 | if (mddev->delta_disks == 0 && |
4498 | mddev->new_level != mddev->level) | 4846 | mddev->new_layout == mddev->layout && |
4499 | return -EINVAL; /* Cannot shrink array or change level yet */ | 4847 | mddev->new_chunk == mddev->chunk_size) |
4500 | if (mddev->delta_disks == 0) | 4848 | return -EINVAL; /* nothing to do */ |
4501 | return 0; /* nothing to do */ | ||
4502 | if (mddev->bitmap) | 4849 | if (mddev->bitmap) |
4503 | /* Cannot grow a bitmap yet */ | 4850 | /* Cannot grow a bitmap yet */ |
4504 | return -EBUSY; | 4851 | return -EBUSY; |
4852 | if (mddev->degraded > conf->max_degraded) | ||
4853 | return -EINVAL; | ||
4854 | if (mddev->delta_disks < 0) { | ||
4855 | /* We might be able to shrink, but the devices must | ||
4856 | * be made bigger first. | ||
4857 | * For raid6, 4 is the minimum size. | ||
4858 | * Otherwise 2 is the minimum | ||
4859 | */ | ||
4860 | int min = 2; | ||
4861 | if (mddev->level == 6) | ||
4862 | min = 4; | ||
4863 | if (mddev->raid_disks + mddev->delta_disks < min) | ||
4864 | return -EINVAL; | ||
4865 | } | ||
4505 | 4866 | ||
4506 | /* Can only proceed if there are plenty of stripe_heads. | 4867 | /* Can only proceed if there are plenty of stripe_heads. |
4507 | * We need a minimum of one full stripe,, and for sensible progress | 4868 | * We need a minimum of one full stripe,, and for sensible progress |
@@ -4514,18 +4875,12 @@ static int raid5_check_reshape(mddev_t *mddev) | |||
4514 | if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes || | 4875 | if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes || |
4515 | (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { | 4876 | (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { |
4516 | printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", | 4877 | printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", |
4517 | (mddev->chunk_size / STRIPE_SIZE)*4); | 4878 | (max(mddev->chunk_size, mddev->new_chunk) |
4879 | / STRIPE_SIZE)*4); | ||
4518 | return -ENOSPC; | 4880 | return -ENOSPC; |
4519 | } | 4881 | } |
4520 | 4882 | ||
4521 | err = resize_stripes(conf, conf->raid_disks + mddev->delta_disks); | 4883 | return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); |
4522 | if (err) | ||
4523 | return err; | ||
4524 | |||
4525 | if (mddev->degraded > conf->max_degraded) | ||
4526 | return -EINVAL; | ||
4527 | /* looks like we might be able to manage this */ | ||
4528 | return 0; | ||
4529 | } | 4884 | } |
4530 | 4885 | ||
4531 | static int raid5_start_reshape(mddev_t *mddev) | 4886 | static int raid5_start_reshape(mddev_t *mddev) |
@@ -4550,12 +4905,31 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4550 | */ | 4905 | */ |
4551 | return -EINVAL; | 4906 | return -EINVAL; |
4552 | 4907 | ||
4908 | /* Refuse to reduce size of the array. Any reductions in | ||
4909 | * array size must be through explicit setting of array_size | ||
4910 | * attribute. | ||
4911 | */ | ||
4912 | if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks) | ||
4913 | < mddev->array_sectors) { | ||
4914 | printk(KERN_ERR "md: %s: array size must be reduced " | ||
4915 | "before number of disks\n", mdname(mddev)); | ||
4916 | return -EINVAL; | ||
4917 | } | ||
4918 | |||
4553 | atomic_set(&conf->reshape_stripes, 0); | 4919 | atomic_set(&conf->reshape_stripes, 0); |
4554 | spin_lock_irq(&conf->device_lock); | 4920 | spin_lock_irq(&conf->device_lock); |
4555 | conf->previous_raid_disks = conf->raid_disks; | 4921 | conf->previous_raid_disks = conf->raid_disks; |
4556 | conf->raid_disks += mddev->delta_disks; | 4922 | conf->raid_disks += mddev->delta_disks; |
4557 | conf->expand_progress = 0; | 4923 | conf->prev_chunk = conf->chunk_size; |
4558 | conf->expand_lo = 0; | 4924 | conf->chunk_size = mddev->new_chunk; |
4925 | conf->prev_algo = conf->algorithm; | ||
4926 | conf->algorithm = mddev->new_layout; | ||
4927 | if (mddev->delta_disks < 0) | ||
4928 | conf->reshape_progress = raid5_size(mddev, 0, 0); | ||
4929 | else | ||
4930 | conf->reshape_progress = 0; | ||
4931 | conf->reshape_safe = conf->reshape_progress; | ||
4932 | conf->generation++; | ||
4559 | spin_unlock_irq(&conf->device_lock); | 4933 | spin_unlock_irq(&conf->device_lock); |
4560 | 4934 | ||
4561 | /* Add some new drives, as many as will fit. | 4935 | /* Add some new drives, as many as will fit. |
@@ -4580,9 +4954,12 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4580 | break; | 4954 | break; |
4581 | } | 4955 | } |
4582 | 4956 | ||
4583 | spin_lock_irqsave(&conf->device_lock, flags); | 4957 | if (mddev->delta_disks > 0) { |
4584 | mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices; | 4958 | spin_lock_irqsave(&conf->device_lock, flags); |
4585 | spin_unlock_irqrestore(&conf->device_lock, flags); | 4959 | mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) |
4960 | - added_devices; | ||
4961 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
4962 | } | ||
4586 | mddev->raid_disks = conf->raid_disks; | 4963 | mddev->raid_disks = conf->raid_disks; |
4587 | mddev->reshape_position = 0; | 4964 | mddev->reshape_position = 0; |
4588 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 4965 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
@@ -4597,52 +4974,86 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4597 | mddev->recovery = 0; | 4974 | mddev->recovery = 0; |
4598 | spin_lock_irq(&conf->device_lock); | 4975 | spin_lock_irq(&conf->device_lock); |
4599 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; | 4976 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; |
4600 | conf->expand_progress = MaxSector; | 4977 | conf->reshape_progress = MaxSector; |
4601 | spin_unlock_irq(&conf->device_lock); | 4978 | spin_unlock_irq(&conf->device_lock); |
4602 | return -EAGAIN; | 4979 | return -EAGAIN; |
4603 | } | 4980 | } |
4981 | conf->reshape_checkpoint = jiffies; | ||
4604 | md_wakeup_thread(mddev->sync_thread); | 4982 | md_wakeup_thread(mddev->sync_thread); |
4605 | md_new_event(mddev); | 4983 | md_new_event(mddev); |
4606 | return 0; | 4984 | return 0; |
4607 | } | 4985 | } |
4608 | #endif | ||
4609 | 4986 | ||
4987 | /* This is called from the reshape thread and should make any | ||
4988 | * changes needed in 'conf' | ||
4989 | */ | ||
4610 | static void end_reshape(raid5_conf_t *conf) | 4990 | static void end_reshape(raid5_conf_t *conf) |
4611 | { | 4991 | { |
4612 | struct block_device *bdev; | ||
4613 | 4992 | ||
4614 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { | 4993 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { |
4615 | conf->mddev->array_sectors = 2 * conf->mddev->size * | 4994 | |
4616 | (conf->raid_disks - conf->max_degraded); | ||
4617 | set_capacity(conf->mddev->gendisk, conf->mddev->array_sectors); | ||
4618 | conf->mddev->changed = 1; | ||
4619 | |||
4620 | bdev = bdget_disk(conf->mddev->gendisk, 0); | ||
4621 | if (bdev) { | ||
4622 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
4623 | i_size_write(bdev->bd_inode, | ||
4624 | (loff_t)conf->mddev->array_sectors << 9); | ||
4625 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
4626 | bdput(bdev); | ||
4627 | } | ||
4628 | spin_lock_irq(&conf->device_lock); | 4995 | spin_lock_irq(&conf->device_lock); |
4629 | conf->expand_progress = MaxSector; | 4996 | conf->previous_raid_disks = conf->raid_disks; |
4997 | conf->reshape_progress = MaxSector; | ||
4630 | spin_unlock_irq(&conf->device_lock); | 4998 | spin_unlock_irq(&conf->device_lock); |
4631 | conf->mddev->reshape_position = MaxSector; | 4999 | wake_up(&conf->wait_for_overlap); |
4632 | 5000 | ||
4633 | /* read-ahead size must cover two whole stripes, which is | 5001 | /* read-ahead size must cover two whole stripes, which is |
4634 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | 5002 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices |
4635 | */ | 5003 | */ |
4636 | { | 5004 | { |
4637 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | 5005 | int data_disks = conf->raid_disks - conf->max_degraded; |
4638 | int stripe = data_disks * | 5006 | int stripe = data_disks * (conf->chunk_size |
4639 | (conf->mddev->chunk_size / PAGE_SIZE); | 5007 | / PAGE_SIZE); |
4640 | if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 5008 | if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
4641 | conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 5009 | conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
4642 | } | 5010 | } |
4643 | } | 5011 | } |
4644 | } | 5012 | } |
4645 | 5013 | ||
5014 | /* This is called from the raid5d thread with mddev_lock held. | ||
5015 | * It makes config changes to the device. | ||
5016 | */ | ||
5017 | static void raid5_finish_reshape(mddev_t *mddev) | ||
5018 | { | ||
5019 | struct block_device *bdev; | ||
5020 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
5021 | |||
5022 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | ||
5023 | |||
5024 | if (mddev->delta_disks > 0) { | ||
5025 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | ||
5026 | set_capacity(mddev->gendisk, mddev->array_sectors); | ||
5027 | mddev->changed = 1; | ||
5028 | |||
5029 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5030 | if (bdev) { | ||
5031 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5032 | i_size_write(bdev->bd_inode, | ||
5033 | (loff_t)mddev->array_sectors << 9); | ||
5034 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5035 | bdput(bdev); | ||
5036 | } | ||
5037 | } else { | ||
5038 | int d; | ||
5039 | mddev->degraded = conf->raid_disks; | ||
5040 | for (d = 0; d < conf->raid_disks ; d++) | ||
5041 | if (conf->disks[d].rdev && | ||
5042 | test_bit(In_sync, | ||
5043 | &conf->disks[d].rdev->flags)) | ||
5044 | mddev->degraded--; | ||
5045 | for (d = conf->raid_disks ; | ||
5046 | d < conf->raid_disks - mddev->delta_disks; | ||
5047 | d++) | ||
5048 | raid5_remove_disk(mddev, d); | ||
5049 | } | ||
5050 | mddev->layout = conf->algorithm; | ||
5051 | mddev->chunk_size = conf->chunk_size; | ||
5052 | mddev->reshape_position = MaxSector; | ||
5053 | mddev->delta_disks = 0; | ||
5054 | } | ||
5055 | } | ||
5056 | |||
4646 | static void raid5_quiesce(mddev_t *mddev, int state) | 5057 | static void raid5_quiesce(mddev_t *mddev, int state) |
4647 | { | 5058 | { |
4648 | raid5_conf_t *conf = mddev_to_conf(mddev); | 5059 | raid5_conf_t *conf = mddev_to_conf(mddev); |
@@ -4672,6 +5083,212 @@ static void raid5_quiesce(mddev_t *mddev, int state) | |||
4672 | } | 5083 | } |
4673 | } | 5084 | } |
4674 | 5085 | ||
5086 | |||
5087 | static void *raid5_takeover_raid1(mddev_t *mddev) | ||
5088 | { | ||
5089 | int chunksect; | ||
5090 | |||
5091 | if (mddev->raid_disks != 2 || | ||
5092 | mddev->degraded > 1) | ||
5093 | return ERR_PTR(-EINVAL); | ||
5094 | |||
5095 | /* Should check if there are write-behind devices? */ | ||
5096 | |||
5097 | chunksect = 64*2; /* 64K by default */ | ||
5098 | |||
5099 | /* The array must be an exact multiple of chunksize */ | ||
5100 | while (chunksect && (mddev->array_sectors & (chunksect-1))) | ||
5101 | chunksect >>= 1; | ||
5102 | |||
5103 | if ((chunksect<<9) < STRIPE_SIZE) | ||
5104 | /* array size does not allow a suitable chunk size */ | ||
5105 | return ERR_PTR(-EINVAL); | ||
5106 | |||
5107 | mddev->new_level = 5; | ||
5108 | mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; | ||
5109 | mddev->new_chunk = chunksect << 9; | ||
5110 | |||
5111 | return setup_conf(mddev); | ||
5112 | } | ||
5113 | |||
5114 | static void *raid5_takeover_raid6(mddev_t *mddev) | ||
5115 | { | ||
5116 | int new_layout; | ||
5117 | |||
5118 | switch (mddev->layout) { | ||
5119 | case ALGORITHM_LEFT_ASYMMETRIC_6: | ||
5120 | new_layout = ALGORITHM_LEFT_ASYMMETRIC; | ||
5121 | break; | ||
5122 | case ALGORITHM_RIGHT_ASYMMETRIC_6: | ||
5123 | new_layout = ALGORITHM_RIGHT_ASYMMETRIC; | ||
5124 | break; | ||
5125 | case ALGORITHM_LEFT_SYMMETRIC_6: | ||
5126 | new_layout = ALGORITHM_LEFT_SYMMETRIC; | ||
5127 | break; | ||
5128 | case ALGORITHM_RIGHT_SYMMETRIC_6: | ||
5129 | new_layout = ALGORITHM_RIGHT_SYMMETRIC; | ||
5130 | break; | ||
5131 | case ALGORITHM_PARITY_0_6: | ||
5132 | new_layout = ALGORITHM_PARITY_0; | ||
5133 | break; | ||
5134 | case ALGORITHM_PARITY_N: | ||
5135 | new_layout = ALGORITHM_PARITY_N; | ||
5136 | break; | ||
5137 | default: | ||
5138 | return ERR_PTR(-EINVAL); | ||
5139 | } | ||
5140 | mddev->new_level = 5; | ||
5141 | mddev->new_layout = new_layout; | ||
5142 | mddev->delta_disks = -1; | ||
5143 | mddev->raid_disks -= 1; | ||
5144 | return setup_conf(mddev); | ||
5145 | } | ||
5146 | |||
5147 | |||
5148 | static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) | ||
5149 | { | ||
5150 | /* For a 2-drive array, the layout and chunk size can be changed | ||
5151 | * immediately as not restriping is needed. | ||
5152 | * For larger arrays we record the new value - after validation | ||
5153 | * to be used by a reshape pass. | ||
5154 | */ | ||
5155 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
5156 | |||
5157 | if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) | ||
5158 | return -EINVAL; | ||
5159 | if (new_chunk > 0) { | ||
5160 | if (new_chunk & (new_chunk-1)) | ||
5161 | /* not a power of 2 */ | ||
5162 | return -EINVAL; | ||
5163 | if (new_chunk < PAGE_SIZE) | ||
5164 | return -EINVAL; | ||
5165 | if (mddev->array_sectors & ((new_chunk>>9)-1)) | ||
5166 | /* not factor of array size */ | ||
5167 | return -EINVAL; | ||
5168 | } | ||
5169 | |||
5170 | /* They look valid */ | ||
5171 | |||
5172 | if (mddev->raid_disks == 2) { | ||
5173 | |||
5174 | if (new_layout >= 0) { | ||
5175 | conf->algorithm = new_layout; | ||
5176 | mddev->layout = mddev->new_layout = new_layout; | ||
5177 | } | ||
5178 | if (new_chunk > 0) { | ||
5179 | conf->chunk_size = new_chunk; | ||
5180 | mddev->chunk_size = mddev->new_chunk = new_chunk; | ||
5181 | } | ||
5182 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
5183 | md_wakeup_thread(mddev->thread); | ||
5184 | } else { | ||
5185 | if (new_layout >= 0) | ||
5186 | mddev->new_layout = new_layout; | ||
5187 | if (new_chunk > 0) | ||
5188 | mddev->new_chunk = new_chunk; | ||
5189 | } | ||
5190 | return 0; | ||
5191 | } | ||
5192 | |||
5193 | static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk) | ||
5194 | { | ||
5195 | if (new_layout >= 0 && !algorithm_valid_raid6(new_layout)) | ||
5196 | return -EINVAL; | ||
5197 | if (new_chunk > 0) { | ||
5198 | if (new_chunk & (new_chunk-1)) | ||
5199 | /* not a power of 2 */ | ||
5200 | return -EINVAL; | ||
5201 | if (new_chunk < PAGE_SIZE) | ||
5202 | return -EINVAL; | ||
5203 | if (mddev->array_sectors & ((new_chunk>>9)-1)) | ||
5204 | /* not factor of array size */ | ||
5205 | return -EINVAL; | ||
5206 | } | ||
5207 | |||
5208 | /* They look valid */ | ||
5209 | |||
5210 | if (new_layout >= 0) | ||
5211 | mddev->new_layout = new_layout; | ||
5212 | if (new_chunk > 0) | ||
5213 | mddev->new_chunk = new_chunk; | ||
5214 | |||
5215 | return 0; | ||
5216 | } | ||
5217 | |||
5218 | static void *raid5_takeover(mddev_t *mddev) | ||
5219 | { | ||
5220 | /* raid5 can take over: | ||
5221 | * raid0 - if all devices are the same - make it a raid4 layout | ||
5222 | * raid1 - if there are two drives. We need to know the chunk size | ||
5223 | * raid4 - trivial - just use a raid4 layout. | ||
5224 | * raid6 - Providing it is a *_6 layout | ||
5225 | * | ||
5226 | * For now, just do raid1 | ||
5227 | */ | ||
5228 | |||
5229 | if (mddev->level == 1) | ||
5230 | return raid5_takeover_raid1(mddev); | ||
5231 | if (mddev->level == 4) { | ||
5232 | mddev->new_layout = ALGORITHM_PARITY_N; | ||
5233 | mddev->new_level = 5; | ||
5234 | return setup_conf(mddev); | ||
5235 | } | ||
5236 | if (mddev->level == 6) | ||
5237 | return raid5_takeover_raid6(mddev); | ||
5238 | |||
5239 | return ERR_PTR(-EINVAL); | ||
5240 | } | ||
5241 | |||
5242 | |||
5243 | static struct mdk_personality raid5_personality; | ||
5244 | |||
5245 | static void *raid6_takeover(mddev_t *mddev) | ||
5246 | { | ||
5247 | /* Currently can only take over a raid5. We map the | ||
5248 | * personality to an equivalent raid6 personality | ||
5249 | * with the Q block at the end. | ||
5250 | */ | ||
5251 | int new_layout; | ||
5252 | |||
5253 | if (mddev->pers != &raid5_personality) | ||
5254 | return ERR_PTR(-EINVAL); | ||
5255 | if (mddev->degraded > 1) | ||
5256 | return ERR_PTR(-EINVAL); | ||
5257 | if (mddev->raid_disks > 253) | ||
5258 | return ERR_PTR(-EINVAL); | ||
5259 | if (mddev->raid_disks < 3) | ||
5260 | return ERR_PTR(-EINVAL); | ||
5261 | |||
5262 | switch (mddev->layout) { | ||
5263 | case ALGORITHM_LEFT_ASYMMETRIC: | ||
5264 | new_layout = ALGORITHM_LEFT_ASYMMETRIC_6; | ||
5265 | break; | ||
5266 | case ALGORITHM_RIGHT_ASYMMETRIC: | ||
5267 | new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6; | ||
5268 | break; | ||
5269 | case ALGORITHM_LEFT_SYMMETRIC: | ||
5270 | new_layout = ALGORITHM_LEFT_SYMMETRIC_6; | ||
5271 | break; | ||
5272 | case ALGORITHM_RIGHT_SYMMETRIC: | ||
5273 | new_layout = ALGORITHM_RIGHT_SYMMETRIC_6; | ||
5274 | break; | ||
5275 | case ALGORITHM_PARITY_0: | ||
5276 | new_layout = ALGORITHM_PARITY_0_6; | ||
5277 | break; | ||
5278 | case ALGORITHM_PARITY_N: | ||
5279 | new_layout = ALGORITHM_PARITY_N; | ||
5280 | break; | ||
5281 | default: | ||
5282 | return ERR_PTR(-EINVAL); | ||
5283 | } | ||
5284 | mddev->new_level = 6; | ||
5285 | mddev->new_layout = new_layout; | ||
5286 | mddev->delta_disks = 1; | ||
5287 | mddev->raid_disks += 1; | ||
5288 | return setup_conf(mddev); | ||
5289 | } | ||
5290 | |||
5291 | |||
4675 | static struct mdk_personality raid6_personality = | 5292 | static struct mdk_personality raid6_personality = |
4676 | { | 5293 | { |
4677 | .name = "raid6", | 5294 | .name = "raid6", |
@@ -4687,11 +5304,13 @@ static struct mdk_personality raid6_personality = | |||
4687 | .spare_active = raid5_spare_active, | 5304 | .spare_active = raid5_spare_active, |
4688 | .sync_request = sync_request, | 5305 | .sync_request = sync_request, |
4689 | .resize = raid5_resize, | 5306 | .resize = raid5_resize, |
4690 | #ifdef CONFIG_MD_RAID5_RESHAPE | 5307 | .size = raid5_size, |
4691 | .check_reshape = raid5_check_reshape, | 5308 | .check_reshape = raid5_check_reshape, |
4692 | .start_reshape = raid5_start_reshape, | 5309 | .start_reshape = raid5_start_reshape, |
4693 | #endif | 5310 | .finish_reshape = raid5_finish_reshape, |
4694 | .quiesce = raid5_quiesce, | 5311 | .quiesce = raid5_quiesce, |
5312 | .takeover = raid6_takeover, | ||
5313 | .reconfig = raid6_reconfig, | ||
4695 | }; | 5314 | }; |
4696 | static struct mdk_personality raid5_personality = | 5315 | static struct mdk_personality raid5_personality = |
4697 | { | 5316 | { |
@@ -4708,11 +5327,13 @@ static struct mdk_personality raid5_personality = | |||
4708 | .spare_active = raid5_spare_active, | 5327 | .spare_active = raid5_spare_active, |
4709 | .sync_request = sync_request, | 5328 | .sync_request = sync_request, |
4710 | .resize = raid5_resize, | 5329 | .resize = raid5_resize, |
4711 | #ifdef CONFIG_MD_RAID5_RESHAPE | 5330 | .size = raid5_size, |
4712 | .check_reshape = raid5_check_reshape, | 5331 | .check_reshape = raid5_check_reshape, |
4713 | .start_reshape = raid5_start_reshape, | 5332 | .start_reshape = raid5_start_reshape, |
4714 | #endif | 5333 | .finish_reshape = raid5_finish_reshape, |
4715 | .quiesce = raid5_quiesce, | 5334 | .quiesce = raid5_quiesce, |
5335 | .takeover = raid5_takeover, | ||
5336 | .reconfig = raid5_reconfig, | ||
4716 | }; | 5337 | }; |
4717 | 5338 | ||
4718 | static struct mdk_personality raid4_personality = | 5339 | static struct mdk_personality raid4_personality = |
@@ -4730,20 +5351,15 @@ static struct mdk_personality raid4_personality = | |||
4730 | .spare_active = raid5_spare_active, | 5351 | .spare_active = raid5_spare_active, |
4731 | .sync_request = sync_request, | 5352 | .sync_request = sync_request, |
4732 | .resize = raid5_resize, | 5353 | .resize = raid5_resize, |
4733 | #ifdef CONFIG_MD_RAID5_RESHAPE | 5354 | .size = raid5_size, |
4734 | .check_reshape = raid5_check_reshape, | 5355 | .check_reshape = raid5_check_reshape, |
4735 | .start_reshape = raid5_start_reshape, | 5356 | .start_reshape = raid5_start_reshape, |
4736 | #endif | 5357 | .finish_reshape = raid5_finish_reshape, |
4737 | .quiesce = raid5_quiesce, | 5358 | .quiesce = raid5_quiesce, |
4738 | }; | 5359 | }; |
4739 | 5360 | ||
4740 | static int __init raid5_init(void) | 5361 | static int __init raid5_init(void) |
4741 | { | 5362 | { |
4742 | int e; | ||
4743 | |||
4744 | e = raid6_select_algo(); | ||
4745 | if ( e ) | ||
4746 | return e; | ||
4747 | register_md_personality(&raid6_personality); | 5363 | register_md_personality(&raid6_personality); |
4748 | register_md_personality(&raid5_personality); | 5364 | register_md_personality(&raid5_personality); |
4749 | register_md_personality(&raid4_personality); | 5365 | register_md_personality(&raid4_personality); |
diff --git a/include/linux/raid/raid5.h b/drivers/md/raid5.h index 3b2672792457..52ba99954dec 100644 --- a/include/linux/raid/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _RAID5_H | 1 | #ifndef _RAID5_H |
2 | #define _RAID5_H | 2 | #define _RAID5_H |
3 | 3 | ||
4 | #include <linux/raid/md.h> | ||
5 | #include <linux/raid/xor.h> | 4 | #include <linux/raid/xor.h> |
6 | 5 | ||
7 | /* | 6 | /* |
@@ -197,15 +196,19 @@ enum reconstruct_states { | |||
197 | 196 | ||
198 | struct stripe_head { | 197 | struct stripe_head { |
199 | struct hlist_node hash; | 198 | struct hlist_node hash; |
200 | struct list_head lru; /* inactive_list or handle_list */ | 199 | struct list_head lru; /* inactive_list or handle_list */ |
201 | struct raid5_private_data *raid_conf; | 200 | struct raid5_private_data *raid_conf; |
202 | sector_t sector; /* sector of this row */ | 201 | short generation; /* increments with every |
203 | int pd_idx; /* parity disk index */ | 202 | * reshape */ |
204 | unsigned long state; /* state flags */ | 203 | sector_t sector; /* sector of this row */ |
205 | atomic_t count; /* nr of active thread/requests */ | 204 | short pd_idx; /* parity disk index */ |
205 | short qd_idx; /* 'Q' disk index for raid6 */ | ||
206 | short ddf_layout;/* use DDF ordering to calculate Q */ | ||
207 | unsigned long state; /* state flags */ | ||
208 | atomic_t count; /* nr of active thread/requests */ | ||
206 | spinlock_t lock; | 209 | spinlock_t lock; |
207 | int bm_seq; /* sequence number for bitmap flushes */ | 210 | int bm_seq; /* sequence number for bitmap flushes */ |
208 | int disks; /* disks in stripe */ | 211 | int disks; /* disks in stripe */ |
209 | enum check_states check_state; | 212 | enum check_states check_state; |
210 | enum reconstruct_states reconstruct_state; | 213 | enum reconstruct_states reconstruct_state; |
211 | /* stripe_operations | 214 | /* stripe_operations |
@@ -238,7 +241,7 @@ struct stripe_head_state { | |||
238 | 241 | ||
239 | /* r6_state - extra state data only relevant to r6 */ | 242 | /* r6_state - extra state data only relevant to r6 */ |
240 | struct r6_state { | 243 | struct r6_state { |
241 | int p_failed, q_failed, qd_idx, failed_num[2]; | 244 | int p_failed, q_failed, failed_num[2]; |
242 | }; | 245 | }; |
243 | 246 | ||
244 | /* Flags */ | 247 | /* Flags */ |
@@ -268,6 +271,8 @@ struct r6_state { | |||
268 | #define READ_MODIFY_WRITE 2 | 271 | #define READ_MODIFY_WRITE 2 |
269 | /* not a write method, but a compute_parity mode */ | 272 | /* not a write method, but a compute_parity mode */ |
270 | #define CHECK_PARITY 3 | 273 | #define CHECK_PARITY 3 |
274 | /* Additional compute_parity mode -- updates the parity w/o LOCKING */ | ||
275 | #define UPDATE_PARITY 4 | ||
271 | 276 | ||
272 | /* | 277 | /* |
273 | * Stripe state | 278 | * Stripe state |
@@ -319,7 +324,7 @@ struct r6_state { | |||
319 | * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue. | 324 | * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue. |
320 | * HANDLE gets cleared if stripe_handle leave nothing locked. | 325 | * HANDLE gets cleared if stripe_handle leave nothing locked. |
321 | */ | 326 | */ |
322 | 327 | ||
323 | 328 | ||
324 | struct disk_info { | 329 | struct disk_info { |
325 | mdk_rdev_t *rdev; | 330 | mdk_rdev_t *rdev; |
@@ -334,12 +339,21 @@ struct raid5_private_data { | |||
334 | int raid_disks; | 339 | int raid_disks; |
335 | int max_nr_stripes; | 340 | int max_nr_stripes; |
336 | 341 | ||
337 | /* used during an expand */ | 342 | /* reshape_progress is the leading edge of a 'reshape' |
338 | sector_t expand_progress; /* MaxSector when no expand happening */ | 343 | * It has value MaxSector when no reshape is happening |
339 | sector_t expand_lo; /* from here up to expand_progress it out-of-bounds | 344 | * If delta_disks < 0, it is the last sector we started work on, |
340 | * as we haven't flushed the metadata yet | 345 | * else is it the next sector to work on. |
341 | */ | 346 | */ |
347 | sector_t reshape_progress; | ||
348 | /* reshape_safe is the trailing edge of a reshape. We know that | ||
349 | * before (or after) this address, all reshape has completed. | ||
350 | */ | ||
351 | sector_t reshape_safe; | ||
342 | int previous_raid_disks; | 352 | int previous_raid_disks; |
353 | int prev_chunk, prev_algo; | ||
354 | short generation; /* increments with every reshape */ | ||
355 | unsigned long reshape_checkpoint; /* Time we last updated | ||
356 | * metadata */ | ||
343 | 357 | ||
344 | struct list_head handle_list; /* stripes needing handling */ | 358 | struct list_head handle_list; /* stripes needing handling */ |
345 | struct list_head hold_list; /* preread ready stripes */ | 359 | struct list_head hold_list; /* preread ready stripes */ |
@@ -385,6 +399,11 @@ struct raid5_private_data { | |||
385 | int pool_size; /* number of disks in stripeheads in pool */ | 399 | int pool_size; /* number of disks in stripeheads in pool */ |
386 | spinlock_t device_lock; | 400 | spinlock_t device_lock; |
387 | struct disk_info *disks; | 401 | struct disk_info *disks; |
402 | |||
403 | /* When taking over an array from a different personality, we store | ||
404 | * the new thread here until we fully activate the array. | ||
405 | */ | ||
406 | struct mdk_thread_s *thread; | ||
388 | }; | 407 | }; |
389 | 408 | ||
390 | typedef struct raid5_private_data raid5_conf_t; | 409 | typedef struct raid5_private_data raid5_conf_t; |
@@ -394,9 +413,62 @@ typedef struct raid5_private_data raid5_conf_t; | |||
394 | /* | 413 | /* |
395 | * Our supported algorithms | 414 | * Our supported algorithms |
396 | */ | 415 | */ |
397 | #define ALGORITHM_LEFT_ASYMMETRIC 0 | 416 | #define ALGORITHM_LEFT_ASYMMETRIC 0 /* Rotating Parity N with Data Restart */ |
398 | #define ALGORITHM_RIGHT_ASYMMETRIC 1 | 417 | #define ALGORITHM_RIGHT_ASYMMETRIC 1 /* Rotating Parity 0 with Data Restart */ |
399 | #define ALGORITHM_LEFT_SYMMETRIC 2 | 418 | #define ALGORITHM_LEFT_SYMMETRIC 2 /* Rotating Parity N with Data Continuation */ |
400 | #define ALGORITHM_RIGHT_SYMMETRIC 3 | 419 | #define ALGORITHM_RIGHT_SYMMETRIC 3 /* Rotating Parity 0 with Data Continuation */ |
420 | |||
421 | /* Define non-rotating (raid4) algorithms. These allow | ||
422 | * conversion of raid4 to raid5. | ||
423 | */ | ||
424 | #define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */ | ||
425 | #define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */ | ||
426 | |||
427 | /* DDF RAID6 layouts differ from md/raid6 layouts in two ways. | ||
428 | * Firstly, the exact positioning of the parity block is slightly | ||
429 | * different between the 'LEFT_*' modes of md and the "_N_*" modes | ||
430 | * of DDF. | ||
431 | * Secondly, or order of datablocks over which the Q syndrome is computed | ||
432 | * is different. | ||
433 | * Consequently we have different layouts for DDF/raid6 than md/raid6. | ||
434 | * These layouts are from the DDFv1.2 spec. | ||
435 | * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but | ||
436 | * leaves RLQ=3 as 'Vendor Specific' | ||
437 | */ | ||
438 | |||
439 | #define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */ | ||
440 | #define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */ | ||
441 | #define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */ | ||
442 | |||
443 | |||
444 | /* For every RAID5 algorithm we define a RAID6 algorithm | ||
445 | * with exactly the same layout for data and parity, and | ||
446 | * with the Q block always on the last device (N-1). | ||
447 | * This allows trivial conversion from RAID5 to RAID6 | ||
448 | */ | ||
449 | #define ALGORITHM_LEFT_ASYMMETRIC_6 16 | ||
450 | #define ALGORITHM_RIGHT_ASYMMETRIC_6 17 | ||
451 | #define ALGORITHM_LEFT_SYMMETRIC_6 18 | ||
452 | #define ALGORITHM_RIGHT_SYMMETRIC_6 19 | ||
453 | #define ALGORITHM_PARITY_0_6 20 | ||
454 | #define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N | ||
455 | |||
456 | static inline int algorithm_valid_raid5(int layout) | ||
457 | { | ||
458 | return (layout >= 0) && | ||
459 | (layout <= 5); | ||
460 | } | ||
461 | static inline int algorithm_valid_raid6(int layout) | ||
462 | { | ||
463 | return (layout >= 0 && layout <= 5) | ||
464 | || | ||
465 | (layout == 8 || layout == 10) | ||
466 | || | ||
467 | (layout >= 16 && layout <= 20); | ||
468 | } | ||
401 | 469 | ||
470 | static inline int algorithm_is_DDF(int layout) | ||
471 | { | ||
472 | return layout >= 8 && layout <= 10; | ||
473 | } | ||
402 | #endif | 474 | #endif |
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c index 21987e3dbe6c..866215ac7f25 100644 --- a/drivers/md/raid6algos.c +++ b/drivers/md/raid6algos.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
@@ -16,13 +16,20 @@ | |||
16 | * Algorithm list and algorithm selection for RAID-6 | 16 | * Algorithm list and algorithm selection for RAID-6 |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include "raid6.h" | 19 | #include <linux/raid/pq.h> |
20 | #ifndef __KERNEL__ | 20 | #ifndef __KERNEL__ |
21 | #include <sys/mman.h> | 21 | #include <sys/mman.h> |
22 | #include <stdio.h> | 22 | #include <stdio.h> |
23 | #else | ||
24 | #if !RAID6_USE_EMPTY_ZERO_PAGE | ||
25 | /* In .bss so it's zeroed */ | ||
26 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | ||
27 | EXPORT_SYMBOL(raid6_empty_zero_page); | ||
28 | #endif | ||
23 | #endif | 29 | #endif |
24 | 30 | ||
25 | struct raid6_calls raid6_call; | 31 | struct raid6_calls raid6_call; |
32 | EXPORT_SYMBOL_GPL(raid6_call); | ||
26 | 33 | ||
27 | /* Various routine sets */ | 34 | /* Various routine sets */ |
28 | extern const struct raid6_calls raid6_intx1; | 35 | extern const struct raid6_calls raid6_intx1; |
@@ -79,6 +86,7 @@ const struct raid6_calls * const raid6_algos[] = { | |||
79 | #else | 86 | #else |
80 | /* Need more time to be stable in userspace */ | 87 | /* Need more time to be stable in userspace */ |
81 | #define RAID6_TIME_JIFFIES_LG2 9 | 88 | #define RAID6_TIME_JIFFIES_LG2 9 |
89 | #define time_before(x, y) ((x) < (y)) | ||
82 | #endif | 90 | #endif |
83 | 91 | ||
84 | /* Try to pick the best algorithm */ | 92 | /* Try to pick the best algorithm */ |
@@ -152,3 +160,12 @@ int __init raid6_select_algo(void) | |||
152 | 160 | ||
153 | return best ? 0 : -EINVAL; | 161 | return best ? 0 : -EINVAL; |
154 | } | 162 | } |
163 | |||
164 | static void raid6_exit(void) | ||
165 | { | ||
166 | do { } while (0); | ||
167 | } | ||
168 | |||
169 | subsys_initcall(raid6_select_algo); | ||
170 | module_exit(raid6_exit); | ||
171 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/raid6altivec.uc b/drivers/md/raid6altivec.uc index b9afd35b8812..699dfeee4944 100644 --- a/drivers/md/raid6altivec.uc +++ b/drivers/md/raid6altivec.uc | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
@@ -22,7 +22,7 @@ | |||
22 | * bracked this with preempt_disable/enable or in a lock) | 22 | * bracked this with preempt_disable/enable or in a lock) |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include "raid6.h" | 25 | #include <linux/raid/pq.h> |
26 | 26 | ||
27 | #ifdef CONFIG_ALTIVEC | 27 | #ifdef CONFIG_ALTIVEC |
28 | 28 | ||
diff --git a/drivers/md/raid6int.uc b/drivers/md/raid6int.uc index ad004cee0e26..f9bf9cba357f 100644 --- a/drivers/md/raid6int.uc +++ b/drivers/md/raid6int.uc | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
@@ -18,7 +18,7 @@ | |||
18 | * This file is postprocessed using unroll.pl | 18 | * This file is postprocessed using unroll.pl |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include "raid6.h" | 21 | #include <linux/raid/pq.h> |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * This is the C data type to use | 24 | * This is the C data type to use |
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c index d4e4a1bd70ad..e7f6c13132bf 100644 --- a/drivers/md/raid6mmx.c +++ b/drivers/md/raid6mmx.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
@@ -18,7 +18,7 @@ | |||
18 | 18 | ||
19 | #if defined(__i386__) && !defined(__arch_um__) | 19 | #if defined(__i386__) && !defined(__arch_um__) |
20 | 20 | ||
21 | #include "raid6.h" | 21 | #include <linux/raid/pq.h> |
22 | #include "raid6x86.h" | 22 | #include "raid6x86.h" |
23 | 23 | ||
24 | /* Shared with raid6sse1.c */ | 24 | /* Shared with raid6sse1.c */ |
diff --git a/drivers/md/raid6recov.c b/drivers/md/raid6recov.c index a8c4d9451bd9..2609f00e0d61 100644 --- a/drivers/md/raid6recov.c +++ b/drivers/md/raid6recov.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
@@ -18,7 +18,7 @@ | |||
18 | * the syndrome.) | 18 | * the syndrome.) |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include "raid6.h" | 21 | #include <linux/raid/pq.h> |
22 | 22 | ||
23 | /* Recover two failed data blocks. */ | 23 | /* Recover two failed data blocks. */ |
24 | void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | 24 | void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, |
@@ -63,9 +63,7 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | |||
63 | p++; q++; | 63 | p++; q++; |
64 | } | 64 | } |
65 | } | 65 | } |
66 | 66 | EXPORT_SYMBOL_GPL(raid6_2data_recov); | |
67 | |||
68 | |||
69 | 67 | ||
70 | /* Recover failure of one data block plus the P block */ | 68 | /* Recover failure of one data block plus the P block */ |
71 | void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) | 69 | void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) |
@@ -97,9 +95,10 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) | |||
97 | q++; dq++; | 95 | q++; dq++; |
98 | } | 96 | } |
99 | } | 97 | } |
98 | EXPORT_SYMBOL_GPL(raid6_datap_recov); | ||
100 | 99 | ||
101 | 100 | #ifndef __KERNEL__ | |
102 | #ifndef __KERNEL__ /* Testing only */ | 101 | /* Testing only */ |
103 | 102 | ||
104 | /* Recover two failed blocks. */ | 103 | /* Recover two failed blocks. */ |
105 | void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) | 104 | void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) |
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c index 0666237276ff..b274dd5eab8f 100644 --- a/drivers/md/raid6sse1.c +++ b/drivers/md/raid6sse1.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
@@ -23,7 +23,7 @@ | |||
23 | 23 | ||
24 | #if defined(__i386__) && !defined(__arch_um__) | 24 | #if defined(__i386__) && !defined(__arch_um__) |
25 | 25 | ||
26 | #include "raid6.h" | 26 | #include <linux/raid/pq.h> |
27 | #include "raid6x86.h" | 27 | #include "raid6x86.h" |
28 | 28 | ||
29 | /* Defined in raid6mmx.c */ | 29 | /* Defined in raid6mmx.c */ |
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c index b034ad868039..6ed6c6c0389f 100644 --- a/drivers/md/raid6sse2.c +++ b/drivers/md/raid6sse2.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
@@ -19,7 +19,7 @@ | |||
19 | 19 | ||
20 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | 20 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) |
21 | 21 | ||
22 | #include "raid6.h" | 22 | #include <linux/raid/pq.h> |
23 | #include "raid6x86.h" | 23 | #include "raid6x86.h" |
24 | 24 | ||
25 | static const struct raid6_sse_constants { | 25 | static const struct raid6_sse_constants { |
diff --git a/drivers/md/raid6test/Makefile b/drivers/md/raid6test/Makefile index 78e0396adf2a..58ffdf4f5161 100644 --- a/drivers/md/raid6test/Makefile +++ b/drivers/md/raid6test/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | 5 | ||
6 | CC = gcc | 6 | CC = gcc |
7 | OPTFLAGS = -O2 # Adjust as desired | 7 | OPTFLAGS = -O2 # Adjust as desired |
8 | CFLAGS = -I.. -g $(OPTFLAGS) | 8 | CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) |
9 | LD = ld | 9 | LD = ld |
10 | PERL = perl | 10 | PERL = perl |
11 | AR = ar | 11 | AR = ar |
diff --git a/drivers/md/raid6test/test.c b/drivers/md/raid6test/test.c index 559cc41b2585..7a930318b17d 100644 --- a/drivers/md/raid6test/test.c +++ b/drivers/md/raid6test/test.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <stdlib.h> | 17 | #include <stdlib.h> |
18 | #include <stdio.h> | 18 | #include <stdio.h> |
19 | #include <string.h> | 19 | #include <string.h> |
20 | #include "raid6.h" | 20 | #include <linux/raid/pq.h> |
21 | 21 | ||
22 | #define NDISKS 16 /* Including P and Q */ | 22 | #define NDISKS 16 /* Including P and Q */ |
23 | 23 | ||
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h index 99fea7a70ca7..4c22c1568558 100644 --- a/drivers/md/raid6x86.h +++ b/drivers/md/raid6x86.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index ff786687e93b..3e87ce443ea2 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/if.h> | 23 | #include <linux/if.h> |
24 | #include <linux/if_bridge.h> | 24 | #include <linux/if_bridge.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/raid/md.h> | 26 | #include <linux/raid/md_u.h> |
27 | #include <linux/kd.h> | 27 | #include <linux/kd.h> |
28 | #include <linux/route.h> | 28 | #include <linux/route.h> |
29 | #include <linux/in6.h> | 29 | #include <linux/in6.h> |
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h deleted file mode 100644 index 82bea14cae1a..000000000000 --- a/include/linux/raid/md.h +++ /dev/null | |||
@@ -1,81 +0,0 @@ | |||
1 | /* | ||
2 | md.h : Multiple Devices driver for Linux | ||
3 | Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman | ||
4 | Copyright (C) 1994-96 Marc ZYNGIER | ||
5 | <zyngier@ufr-info-p7.ibp.fr> or | ||
6 | <maz@gloups.fdn.fr> | ||
7 | |||
8 | This program is free software; you can redistribute it and/or modify | ||
9 | it under the terms of the GNU General Public License as published by | ||
10 | the Free Software Foundation; either version 2, or (at your option) | ||
11 | any later version. | ||
12 | |||
13 | You should have received a copy of the GNU General Public License | ||
14 | (for example /usr/src/linux/COPYING); if not, write to the Free | ||
15 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
16 | */ | ||
17 | |||
18 | #ifndef _MD_H | ||
19 | #define _MD_H | ||
20 | |||
21 | #include <linux/blkdev.h> | ||
22 | #include <linux/seq_file.h> | ||
23 | |||
24 | /* | ||
25 | * 'md_p.h' holds the 'physical' layout of RAID devices | ||
26 | * 'md_u.h' holds the user <=> kernel API | ||
27 | * | ||
28 | * 'md_k.h' holds kernel internal definitions | ||
29 | */ | ||
30 | |||
31 | #include <linux/raid/md_p.h> | ||
32 | #include <linux/raid/md_u.h> | ||
33 | #include <linux/raid/md_k.h> | ||
34 | |||
35 | #ifdef CONFIG_MD | ||
36 | |||
37 | /* | ||
38 | * Different major versions are not compatible. | ||
39 | * Different minor versions are only downward compatible. | ||
40 | * Different patchlevel versions are downward and upward compatible. | ||
41 | */ | ||
42 | #define MD_MAJOR_VERSION 0 | ||
43 | #define MD_MINOR_VERSION 90 | ||
44 | /* | ||
45 | * MD_PATCHLEVEL_VERSION indicates kernel functionality. | ||
46 | * >=1 means different superblock formats are selectable using SET_ARRAY_INFO | ||
47 | * and major_version/minor_version accordingly | ||
48 | * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT | ||
49 | * in the super status byte | ||
50 | * >=3 means that bitmap superblock version 4 is supported, which uses | ||
51 | * little-ending representation rather than host-endian | ||
52 | */ | ||
53 | #define MD_PATCHLEVEL_VERSION 3 | ||
54 | |||
55 | extern int mdp_major; | ||
56 | |||
57 | extern int register_md_personality(struct mdk_personality *p); | ||
58 | extern int unregister_md_personality(struct mdk_personality *p); | ||
59 | extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), | ||
60 | mddev_t *mddev, const char *name); | ||
61 | extern void md_unregister_thread(mdk_thread_t *thread); | ||
62 | extern void md_wakeup_thread(mdk_thread_t *thread); | ||
63 | extern void md_check_recovery(mddev_t *mddev); | ||
64 | extern void md_write_start(mddev_t *mddev, struct bio *bi); | ||
65 | extern void md_write_end(mddev_t *mddev); | ||
66 | extern void md_done_sync(mddev_t *mddev, int blocks, int ok); | ||
67 | extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); | ||
68 | |||
69 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | ||
70 | sector_t sector, int size, struct page *page); | ||
71 | extern void md_super_wait(mddev_t *mddev); | ||
72 | extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, | ||
73 | struct page *page, int rw); | ||
74 | extern void md_do_sync(mddev_t *mddev); | ||
75 | extern void md_new_event(mddev_t *mddev); | ||
76 | extern int md_allow_write(mddev_t *mddev); | ||
77 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | ||
78 | |||
79 | #endif /* CONFIG_MD */ | ||
80 | #endif | ||
81 | |||
diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h index 7192035fc4b0..fb1abb3367e9 100644 --- a/include/linux/raid/md_u.h +++ b/include/linux/raid/md_u.h | |||
@@ -15,6 +15,24 @@ | |||
15 | #ifndef _MD_U_H | 15 | #ifndef _MD_U_H |
16 | #define _MD_U_H | 16 | #define _MD_U_H |
17 | 17 | ||
18 | /* | ||
19 | * Different major versions are not compatible. | ||
20 | * Different minor versions are only downward compatible. | ||
21 | * Different patchlevel versions are downward and upward compatible. | ||
22 | */ | ||
23 | #define MD_MAJOR_VERSION 0 | ||
24 | #define MD_MINOR_VERSION 90 | ||
25 | /* | ||
26 | * MD_PATCHLEVEL_VERSION indicates kernel functionality. | ||
27 | * >=1 means different superblock formats are selectable using SET_ARRAY_INFO | ||
28 | * and major_version/minor_version accordingly | ||
29 | * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT | ||
30 | * in the super status byte | ||
31 | * >=3 means that bitmap superblock version 4 is supported, which uses | ||
32 | * little-ending representation rather than host-endian | ||
33 | */ | ||
34 | #define MD_PATCHLEVEL_VERSION 3 | ||
35 | |||
18 | /* ioctls */ | 36 | /* ioctls */ |
19 | 37 | ||
20 | /* status */ | 38 | /* status */ |
@@ -46,6 +64,12 @@ | |||
46 | #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) | 64 | #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) |
47 | #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) | 65 | #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) |
48 | 66 | ||
67 | /* 63 partitions with the alternate major number (mdp) */ | ||
68 | #define MdpMinorShift 6 | ||
69 | #ifdef __KERNEL__ | ||
70 | extern int mdp_major; | ||
71 | #endif | ||
72 | |||
49 | typedef struct mdu_version_s { | 73 | typedef struct mdu_version_s { |
50 | int major; | 74 | int major; |
51 | int minor; | 75 | int minor; |
@@ -85,6 +109,17 @@ typedef struct mdu_array_info_s { | |||
85 | 109 | ||
86 | } mdu_array_info_t; | 110 | } mdu_array_info_t; |
87 | 111 | ||
112 | /* non-obvious values for 'level' */ | ||
113 | #define LEVEL_MULTIPATH (-4) | ||
114 | #define LEVEL_LINEAR (-1) | ||
115 | #define LEVEL_FAULTY (-5) | ||
116 | |||
117 | /* we need a value for 'no level specified' and 0 | ||
118 | * means 'raid0', so we need something else. This is | ||
119 | * for internal use only | ||
120 | */ | ||
121 | #define LEVEL_NONE (-1000000) | ||
122 | |||
88 | typedef struct mdu_disk_info_s { | 123 | typedef struct mdu_disk_info_s { |
89 | /* | 124 | /* |
90 | * configuration/status of one particular disk | 125 | * configuration/status of one particular disk |
diff --git a/drivers/md/raid6.h b/include/linux/raid/pq.h index 98dcde88470e..d92480f8285c 100644 --- a/drivers/md/raid6.h +++ b/include/linux/raid/pq.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | 8 | * Boston MA 02111-1307, USA; either version 2 of the License, or |
9 | * (at your option) any later version; incorporated herein by reference. | 9 | * (at your option) any later version; incorporated herein by reference. |
10 | * | 10 | * |
11 | * ----------------------------------------------------------------------- */ | 11 | * ----------------------------------------------------------------------- */ |
@@ -17,14 +17,7 @@ | |||
17 | 17 | ||
18 | /* Set to 1 to use kernel-wide empty_zero_page */ | 18 | /* Set to 1 to use kernel-wide empty_zero_page */ |
19 | #define RAID6_USE_EMPTY_ZERO_PAGE 0 | 19 | #define RAID6_USE_EMPTY_ZERO_PAGE 0 |
20 | 20 | #include <linux/blkdev.h> | |
21 | #include <linux/raid/md.h> | ||
22 | #include <linux/raid/raid5.h> | ||
23 | |||
24 | typedef raid5_conf_t raid6_conf_t; /* Same configuration */ | ||
25 | |||
26 | /* Additional compute_parity mode -- updates the parity w/o LOCKING */ | ||
27 | #define UPDATE_PARITY 4 | ||
28 | 21 | ||
29 | /* We need a pre-zeroed page... if we don't want to use the kernel-provided | 22 | /* We need a pre-zeroed page... if we don't want to use the kernel-provided |
30 | one define it here */ | 23 | one define it here */ |
@@ -68,6 +61,10 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; | |||
68 | #define enable_kernel_altivec() | 61 | #define enable_kernel_altivec() |
69 | #define disable_kernel_altivec() | 62 | #define disable_kernel_altivec() |
70 | 63 | ||
64 | #define EXPORT_SYMBOL(sym) | ||
65 | #define MODULE_LICENSE(licence) | ||
66 | #define subsys_initcall(x) | ||
67 | #define module_exit(x) | ||
71 | #endif /* __KERNEL__ */ | 68 | #endif /* __KERNEL__ */ |
72 | 69 | ||
73 | /* Routine choices */ | 70 | /* Routine choices */ |
@@ -98,9 +95,11 @@ extern const u8 raid6_gfinv[256] __attribute__((aligned(256))); | |||
98 | extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); | 95 | extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); |
99 | 96 | ||
100 | /* Recovery routines */ | 97 | /* Recovery routines */ |
101 | void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, void **ptrs); | 98 | void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, |
99 | void **ptrs); | ||
102 | void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs); | 100 | void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs); |
103 | void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs); | 101 | void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, |
102 | void **ptrs); | ||
104 | 103 | ||
105 | /* Some definitions to allow code to be compiled for testing in userspace */ | 104 | /* Some definitions to allow code to be compiled for testing in userspace */ |
106 | #ifndef __KERNEL__ | 105 | #ifndef __KERNEL__ |
@@ -108,8 +107,11 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs | |||
108 | # define jiffies raid6_jiffies() | 107 | # define jiffies raid6_jiffies() |
109 | # define printk printf | 108 | # define printk printf |
110 | # define GFP_KERNEL 0 | 109 | # define GFP_KERNEL 0 |
111 | # define __get_free_pages(x,y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0)) | 110 | # define __get_free_pages(x, y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \ |
112 | # define free_pages(x,y) munmap((void *)(x), (y)*PAGE_SIZE) | 111 | PROT_READ|PROT_WRITE, \ |
112 | MAP_PRIVATE|MAP_ANONYMOUS,\ | ||
113 | 0, 0)) | ||
114 | # define free_pages(x, y) munmap((void *)(x), (y)*PAGE_SIZE) | ||
113 | 115 | ||
114 | static inline void cpu_relax(void) | 116 | static inline void cpu_relax(void) |
115 | { | 117 | { |
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 3e120587eada..5a210959e3f8 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _XOR_H | 1 | #ifndef _XOR_H |
2 | #define _XOR_H | 2 | #define _XOR_H |
3 | 3 | ||
4 | #include <linux/raid/md.h> | ||
5 | |||
6 | #define MAX_XOR_BLOCKS 4 | 4 | #define MAX_XOR_BLOCKS 4 |
7 | 5 | ||
8 | extern void xor_blocks(unsigned int count, unsigned int bytes, | 6 | extern void xor_blocks(unsigned int count, unsigned int bytes, |
diff --git a/init/do_mounts.h b/init/do_mounts.h index 9aa968d54329..f5b978a9bb92 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
2 | #include <linux/blkdev.h> | ||
2 | #include <linux/init.h> | 3 | #include <linux/init.h> |
3 | #include <linux/syscalls.h> | 4 | #include <linux/syscalls.h> |
4 | #include <linux/unistd.h> | 5 | #include <linux/unistd.h> |
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index 9bdddbcb3d6a..69aebbf8fd2d 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/delay.h> | 1 | #include <linux/delay.h> |
2 | #include <linux/raid/md.h> | 2 | #include <linux/raid/md_u.h> |
3 | #include <linux/raid/md_p.h> | ||
3 | 4 | ||
4 | #include "do_mounts.h" | 5 | #include "do_mounts.h" |
5 | 6 | ||
@@ -112,8 +113,6 @@ static int __init md_setup(char *str) | |||
112 | return 1; | 113 | return 1; |
113 | } | 114 | } |
114 | 115 | ||
115 | #define MdpMinorShift 6 | ||
116 | |||
117 | static void __init md_setup_drive(void) | 116 | static void __init md_setup_drive(void) |
118 | { | 117 | { |
119 | int minor, i, ent, partitioned; | 118 | int minor, i, ent, partitioned; |