diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-08-03 02:49:21 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-08-03 02:49:21 -0400 |
commit | f3406816bb2486fc44558bec77179cd9bcbd4450 (patch) | |
tree | 718db1ef45e55314b5e7290f77e70e6328d855a4 | |
parent | 4400478ba3d939b680810aa004f1e954b4f8ba16 (diff) | |
parent | ed8b752bccf2560e305e25125721d2f0ac759e88 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (34 commits)
dm table: set flush capability based on underlying devices
dm crypt: optionally support discard requests
dm raid: add md raid1 support
dm raid: support metadata devices
dm raid: add write_mostly parameter
dm raid: add region_size parameter
dm raid: improve table parameters documentation
dm ioctl: forbid multiple device specifiers
dm ioctl: introduce __get_dev_cell
dm ioctl: fill in device parameters in more ioctls
dm flakey: add corrupt_bio_byte feature
dm flakey: add drop_writes
dm flakey: support feature args
dm flakey: use dm_target_offset and support discards
dm table: share target argument parsing functions
dm snapshot: skip reading origin when overwriting complete chunk
dm: ignore merge_bvec for snapshots when safe
dm table: clean dm_get_device and move exports
dm raid: tidy includes
dm ioctl: prevent empty message
...
-rw-r--r-- | Documentation/device-mapper/dm-crypt.txt | 21 | ||||
-rw-r--r-- | Documentation/device-mapper/dm-flakey.txt | 48 | ||||
-rw-r--r-- | Documentation/device-mapper/dm-raid.txt | 138 | ||||
-rw-r--r-- | drivers/md/Kconfig | 5 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 62 | ||||
-rw-r--r-- | drivers/md/dm-flakey.c | 270 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 29 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 89 | ||||
-rw-r--r-- | drivers/md/dm-kcopyd.c | 42 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-base.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 32 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 147 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 621 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 80 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 84 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 155 | ||||
-rw-r--r-- | drivers/md/dm.c | 75 | ||||
-rw-r--r-- | drivers/md/dm.h | 2 | ||||
-rw-r--r-- | include/linux/device-mapper.h | 43 | ||||
-rw-r--r-- | include/linux/dm-ioctl.h | 4 | ||||
-rw-r--r-- | include/linux/dm-kcopyd.h | 15 |
21 files changed, 1561 insertions, 404 deletions
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt index 6b5c42dbbe8..2c656ae43ba 100644 --- a/Documentation/device-mapper/dm-crypt.txt +++ b/Documentation/device-mapper/dm-crypt.txt | |||
@@ -4,7 +4,8 @@ dm-crypt | |||
4 | Device-Mapper's "crypt" target provides transparent encryption of block devices | 4 | Device-Mapper's "crypt" target provides transparent encryption of block devices |
5 | using the kernel crypto API. | 5 | using the kernel crypto API. |
6 | 6 | ||
7 | Parameters: <cipher> <key> <iv_offset> <device path> <offset> | 7 | Parameters: <cipher> <key> <iv_offset> <device path> \ |
8 | <offset> [<#opt_params> <opt_params>] | ||
8 | 9 | ||
9 | <cipher> | 10 | <cipher> |
10 | Encryption cipher and an optional IV generation mode. | 11 | Encryption cipher and an optional IV generation mode. |
@@ -37,6 +38,24 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset> | |||
37 | <offset> | 38 | <offset> |
38 | Starting sector within the device where the encrypted data begins. | 39 | Starting sector within the device where the encrypted data begins. |
39 | 40 | ||
41 | <#opt_params> | ||
42 | Number of optional parameters. If there are no optional parameters, | ||
43 | the optional paramaters section can be skipped or #opt_params can be zero. | ||
44 | Otherwise #opt_params is the number of following arguments. | ||
45 | |||
46 | Example of optional parameters section: | ||
47 | 1 allow_discards | ||
48 | |||
49 | allow_discards | ||
50 | Block discard requests (a.k.a. TRIM) are passed through the crypt device. | ||
51 | The default is to ignore discard requests. | ||
52 | |||
53 | WARNING: Assess the specific security risks carefully before enabling this | ||
54 | option. For example, allowing discards on encrypted devices may lead to | ||
55 | the leak of information about the ciphertext device (filesystem type, | ||
56 | used space etc.) if the discarded blocks can be located easily on the | ||
57 | device later. | ||
58 | |||
40 | Example scripts | 59 | Example scripts |
41 | =============== | 60 | =============== |
42 | LUKS (Linux Unified Key Setup) is now the preferred way to set up disk | 61 | LUKS (Linux Unified Key Setup) is now the preferred way to set up disk |
diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt index c8efdfd19a6..6ff5c232722 100644 --- a/Documentation/device-mapper/dm-flakey.txt +++ b/Documentation/device-mapper/dm-flakey.txt | |||
@@ -1,17 +1,53 @@ | |||
1 | dm-flakey | 1 | dm-flakey |
2 | ========= | 2 | ========= |
3 | 3 | ||
4 | This target is the same as the linear target except that it returns I/O | 4 | This target is the same as the linear target except that it exhibits |
5 | errors periodically. It's been found useful in simulating failing | 5 | unreliable behaviour periodically. It's been found useful in simulating |
6 | devices for testing purposes. | 6 | failing devices for testing purposes. |
7 | 7 | ||
8 | Starting from the time the table is loaded, the device is available for | 8 | Starting from the time the table is loaded, the device is available for |
9 | <up interval> seconds, then returns errors for <down interval> seconds, | 9 | <up interval> seconds, then exhibits unreliable behaviour for <down |
10 | and then this cycle repeats. | 10 | interval> seconds, and then this cycle repeats. |
11 | 11 | ||
12 | Parameters: <dev path> <offset> <up interval> <down interval> | 12 | Also, consider using this in combination with the dm-delay target too, |
13 | which can delay reads and writes and/or send them to different | ||
14 | underlying devices. | ||
15 | |||
16 | Table parameters | ||
17 | ---------------- | ||
18 | <dev path> <offset> <up interval> <down interval> \ | ||
19 | [<num_features> [<feature arguments>]] | ||
20 | |||
21 | Mandatory parameters: | ||
13 | <dev path>: Full pathname to the underlying block-device, or a | 22 | <dev path>: Full pathname to the underlying block-device, or a |
14 | "major:minor" device-number. | 23 | "major:minor" device-number. |
15 | <offset>: Starting sector within the device. | 24 | <offset>: Starting sector within the device. |
16 | <up interval>: Number of seconds device is available. | 25 | <up interval>: Number of seconds device is available. |
17 | <down interval>: Number of seconds device returns errors. | 26 | <down interval>: Number of seconds device returns errors. |
27 | |||
28 | Optional feature parameters: | ||
29 | If no feature parameters are present, during the periods of | ||
30 | unreliability, all I/O returns errors. | ||
31 | |||
32 | drop_writes: | ||
33 | All write I/O is silently ignored. | ||
34 | Read I/O is handled correctly. | ||
35 | |||
36 | corrupt_bio_byte <Nth_byte> <direction> <value> <flags>: | ||
37 | During <down interval>, replace <Nth_byte> of the data of | ||
38 | each matching bio with <value>. | ||
39 | |||
40 | <Nth_byte>: The offset of the byte to replace. | ||
41 | Counting starts at 1, to replace the first byte. | ||
42 | <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes. | ||
43 | 'w' is incompatible with drop_writes. | ||
44 | <value>: The value (from 0-255) to write. | ||
45 | <flags>: Perform the replacement only if bio->bi_rw has all the | ||
46 | selected flags set. | ||
47 | |||
48 | Examples: | ||
49 | corrupt_bio_byte 32 r 1 0 | ||
50 | - replaces the 32nd byte of READ bios with the value 1 | ||
51 | |||
52 | corrupt_bio_byte 224 w 0 32 | ||
53 | - replaces the 224th byte of REQ_META (=32) bios with the value 0 | ||
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 33b6b7071ac..2a8c11331d2 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt | |||
@@ -1,70 +1,108 @@ | |||
1 | Device-mapper RAID (dm-raid) is a bridge from DM to MD. It | 1 | dm-raid |
2 | provides a way to use device-mapper interfaces to access the MD RAID | 2 | ------- |
3 | drivers. | ||
4 | 3 | ||
5 | As with all device-mapper targets, the nominal public interfaces are the | 4 | The device-mapper RAID (dm-raid) target provides a bridge from DM to MD. |
6 | constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO | 5 | It allows the MD RAID drivers to be accessed using a device-mapper |
7 | and STATUSTYPE_TABLE). The CTR table looks like the following: | 6 | interface. |
8 | 7 | ||
9 | 1: <s> <l> raid \ | 8 | The target is named "raid" and it accepts the following parameters: |
10 | 2: <raid_type> <#raid_params> <raid_params> \ | 9 | |
11 | 3: <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN> | 10 | <raid_type> <#raid_params> <raid_params> \ |
12 | 11 | <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>] | |
13 | Line 1 contains the standard first three arguments to any device-mapper | 12 | |
14 | target - the start, length, and target type fields. The target type in | 13 | <raid_type>: |
15 | this case is "raid". | 14 | raid1 RAID1 mirroring |
16 | 15 | raid4 RAID4 dedicated parity disk | |
17 | Line 2 contains the arguments that define the particular raid | 16 | raid5_la RAID5 left asymmetric |
18 | type/personality/level, the required arguments for that raid type, and | 17 | - rotating parity 0 with data continuation |
19 | any optional arguments. Possible raid types include: raid4, raid5_la, | 18 | raid5_ra RAID5 right asymmetric |
20 | raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is | 19 | - rotating parity N with data continuation |
21 | planned for the future.) The list of required and optional parameters | 20 | raid5_ls RAID5 left symmetric |
22 | is the same for all the current raid types. The required parameters are | 21 | - rotating parity 0 with data restart |
23 | positional, while the optional parameters are given as key/value pairs. | 22 | raid5_rs RAID5 right symmetric |
24 | The possible parameters are as follows: | 23 | - rotating parity N with data restart |
25 | <chunk_size> Chunk size in sectors. | 24 | raid6_zr RAID6 zero restart |
26 | [[no]sync] Force/Prevent RAID initialization | 25 | - rotating parity zero (left-to-right) with data restart |
27 | [rebuild <idx>] Rebuild the drive indicated by the index | 26 | raid6_nr RAID6 N restart |
28 | [daemon_sleep <ms>] Time between bitmap daemon work to clear bits | 27 | - rotating parity N (right-to-left) with data restart |
29 | [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization | 28 | raid6_nc RAID6 N continue |
30 | [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization | 29 | - rotating parity N (right-to-left) with data continuation |
31 | [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 30 | |
32 | [stripe_cache <sectors>] Stripe cache size for higher RAIDs | 31 | Refererence: Chapter 4 of |
33 | 32 | http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf | |
34 | Line 3 contains the list of devices that compose the array in | 33 | |
35 | metadata/data device pairs. If the metadata is stored separately, a '-' | 34 | <#raid_params>: The number of parameters that follow. |
36 | is given for the metadata device position. If a drive has failed or is | 35 | |
37 | missing at creation time, a '-' can be given for both the metadata and | 36 | <raid_params> consists of |
38 | data drives for a given position. | 37 | Mandatory parameters: |
39 | 38 | <chunk_size>: Chunk size in sectors. This parameter is often known as | |
40 | NB. Currently all metadata devices must be specified as '-'. | 39 | "stripe size". It is the only mandatory parameter and |
41 | 40 | is placed first. | |
42 | Examples: | 41 | |
43 | # RAID4 - 4 data drives, 1 parity | 42 | followed by optional parameters (in any order): |
43 | [sync|nosync] Force or prevent RAID initialization. | ||
44 | |||
45 | [rebuild <idx>] Rebuild drive number idx (first drive is 0). | ||
46 | |||
47 | [daemon_sleep <ms>] | ||
48 | Interval between runs of the bitmap daemon that | ||
49 | clear bits. A longer interval means less bitmap I/O but | ||
50 | resyncing after a failure is likely to take longer. | ||
51 | |||
52 | [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization | ||
53 | [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization | ||
54 | [write_mostly <idx>] Drive index is write-mostly | ||
55 | [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | ||
56 | [stripe_cache <sectors>] Stripe cache size (higher RAIDs only) | ||
57 | [region_size <sectors>] | ||
58 | The region_size multiplied by the number of regions is the | ||
59 | logical size of the array. The bitmap records the device | ||
60 | synchronisation state for each region. | ||
61 | |||
62 | <#raid_devs>: The number of devices composing the array. | ||
63 | Each device consists of two entries. The first is the device | ||
64 | containing the metadata (if any); the second is the one containing the | ||
65 | data. | ||
66 | |||
67 | If a drive has failed or is missing at creation time, a '-' can be | ||
68 | given for both the metadata and data drives for a given position. | ||
69 | |||
70 | |||
71 | Example tables | ||
72 | -------------- | ||
73 | # RAID4 - 4 data drives, 1 parity (no metadata devices) | ||
44 | # No metadata devices specified to hold superblock/bitmap info | 74 | # No metadata devices specified to hold superblock/bitmap info |
45 | # Chunk size of 1MiB | 75 | # Chunk size of 1MiB |
46 | # (Lines separated for easy reading) | 76 | # (Lines separated for easy reading) |
77 | |||
47 | 0 1960893648 raid \ | 78 | 0 1960893648 raid \ |
48 | raid4 1 2048 \ | 79 | raid4 1 2048 \ |
49 | 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 | 80 | 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 |
50 | 81 | ||
51 | # RAID4 - 4 data drives, 1 parity (no metadata devices) | 82 | # RAID4 - 4 data drives, 1 parity (with metadata devices) |
52 | # Chunk size of 1MiB, force RAID initialization, | 83 | # Chunk size of 1MiB, force RAID initialization, |
53 | # min recovery rate at 20 kiB/sec/disk | 84 | # min recovery rate at 20 kiB/sec/disk |
85 | |||
54 | 0 1960893648 raid \ | 86 | 0 1960893648 raid \ |
55 | raid4 4 2048 min_recovery_rate 20 sync\ | 87 | raid4 4 2048 sync min_recovery_rate 20 \ |
56 | 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 | 88 | 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82 |
57 | 89 | ||
58 | Performing a 'dmsetup table' should display the CTR table used to | 90 | 'dmsetup table' displays the table used to construct the mapping. |
59 | construct the mapping (with possible reordering of optional | 91 | The optional parameters are always printed in the order listed |
60 | parameters). | 92 | above with "sync" or "nosync" always output ahead of the other |
93 | arguments, regardless of the order used when originally loading the table. | ||
94 | Arguments that can be repeated are ordered by value. | ||
61 | 95 | ||
62 | Performing a 'dmsetup status' will yield information on the state and | 96 | 'dmsetup status' yields information on the state and health of the |
63 | health of the array. The output is as follows: | 97 | array. |
98 | The output is as follows: | ||
64 | 1: <s> <l> raid \ | 99 | 1: <s> <l> raid \ |
65 | 2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio> | 100 | 2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio> |
66 | 101 | ||
67 | Line 1 is standard DM output. Line 2 is best shown by example: | 102 | Line 1 is the standard output produced by device-mapper. |
103 | Line 2 is produced by the raid target, and best explained by example: | ||
68 | 0 1960893648 raid raid4 5 AAAAA 2/490221568 | 104 | 0 1960893648 raid raid4 5 AAAAA 2/490221568 |
69 | Here we can see the RAID type is raid4, there are 5 devices - all of | 105 | Here we can see the RAID type is raid4, there are 5 devices - all of |
70 | which are 'A'live, and the array is 2/490221568 complete with recovery. | 106 | which are 'A'live, and the array is 2/490221568 complete with recovery. |
107 | Faulty or missing devices are marked 'D'. Devices that are out-of-sync | ||
108 | are marked 'a'. | ||
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 8420129fc5e..f75a66e7d31 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -241,12 +241,13 @@ config DM_MIRROR | |||
241 | needed for live data migration tools such as 'pvmove'. | 241 | needed for live data migration tools such as 'pvmove'. |
242 | 242 | ||
243 | config DM_RAID | 243 | config DM_RAID |
244 | tristate "RAID 4/5/6 target (EXPERIMENTAL)" | 244 | tristate "RAID 1/4/5/6 target (EXPERIMENTAL)" |
245 | depends on BLK_DEV_DM && EXPERIMENTAL | 245 | depends on BLK_DEV_DM && EXPERIMENTAL |
246 | select MD_RAID1 | ||
246 | select MD_RAID456 | 247 | select MD_RAID456 |
247 | select BLK_DEV_MD | 248 | select BLK_DEV_MD |
248 | ---help--- | 249 | ---help--- |
249 | A dm target that supports RAID4, RAID5 and RAID6 mappings | 250 | A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings |
250 | 251 | ||
251 | A RAID-5 set of N drives with a capacity of C MB per drive provides | 252 | A RAID-5 set of N drives with a capacity of C MB per drive provides |
252 | the capacity of C * (N - 1) MB, and protects against a failure | 253 | the capacity of C * (N - 1) MB, and protects against a failure |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bae6c4e23d3..49da55c1528 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/device-mapper.h> | 30 | #include <linux/device-mapper.h> |
31 | 31 | ||
32 | #define DM_MSG_PREFIX "crypt" | 32 | #define DM_MSG_PREFIX "crypt" |
33 | #define MESG_STR(x) x, sizeof(x) | ||
34 | 33 | ||
35 | /* | 34 | /* |
36 | * context holding the current state of a multi-part conversion | 35 | * context holding the current state of a multi-part conversion |
@@ -239,7 +238,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, | |||
239 | struct dm_crypt_request *dmreq) | 238 | struct dm_crypt_request *dmreq) |
240 | { | 239 | { |
241 | memset(iv, 0, cc->iv_size); | 240 | memset(iv, 0, cc->iv_size); |
242 | *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); | 241 | *(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); |
243 | 242 | ||
244 | return 0; | 243 | return 0; |
245 | } | 244 | } |
@@ -248,7 +247,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv, | |||
248 | struct dm_crypt_request *dmreq) | 247 | struct dm_crypt_request *dmreq) |
249 | { | 248 | { |
250 | memset(iv, 0, cc->iv_size); | 249 | memset(iv, 0, cc->iv_size); |
251 | *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); | 250 | *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); |
252 | 251 | ||
253 | return 0; | 252 | return 0; |
254 | } | 253 | } |
@@ -415,7 +414,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, | |||
415 | struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; | 414 | struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; |
416 | 415 | ||
417 | memset(iv, 0, cc->iv_size); | 416 | memset(iv, 0, cc->iv_size); |
418 | *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); | 417 | *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); |
419 | crypto_cipher_encrypt_one(essiv_tfm, iv, iv); | 418 | crypto_cipher_encrypt_one(essiv_tfm, iv, iv); |
420 | 419 | ||
421 | return 0; | 420 | return 0; |
@@ -1575,11 +1574,17 @@ bad_mem: | |||
1575 | static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | 1574 | static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
1576 | { | 1575 | { |
1577 | struct crypt_config *cc; | 1576 | struct crypt_config *cc; |
1578 | unsigned int key_size; | 1577 | unsigned int key_size, opt_params; |
1579 | unsigned long long tmpll; | 1578 | unsigned long long tmpll; |
1580 | int ret; | 1579 | int ret; |
1580 | struct dm_arg_set as; | ||
1581 | const char *opt_string; | ||
1582 | |||
1583 | static struct dm_arg _args[] = { | ||
1584 | {0, 1, "Invalid number of feature args"}, | ||
1585 | }; | ||
1581 | 1586 | ||
1582 | if (argc != 5) { | 1587 | if (argc < 5) { |
1583 | ti->error = "Not enough arguments"; | 1588 | ti->error = "Not enough arguments"; |
1584 | return -EINVAL; | 1589 | return -EINVAL; |
1585 | } | 1590 | } |
@@ -1648,6 +1653,30 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1648 | } | 1653 | } |
1649 | cc->start = tmpll; | 1654 | cc->start = tmpll; |
1650 | 1655 | ||
1656 | argv += 5; | ||
1657 | argc -= 5; | ||
1658 | |||
1659 | /* Optional parameters */ | ||
1660 | if (argc) { | ||
1661 | as.argc = argc; | ||
1662 | as.argv = argv; | ||
1663 | |||
1664 | ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error); | ||
1665 | if (ret) | ||
1666 | goto bad; | ||
1667 | |||
1668 | opt_string = dm_shift_arg(&as); | ||
1669 | |||
1670 | if (opt_params == 1 && opt_string && | ||
1671 | !strcasecmp(opt_string, "allow_discards")) | ||
1672 | ti->num_discard_requests = 1; | ||
1673 | else if (opt_params) { | ||
1674 | ret = -EINVAL; | ||
1675 | ti->error = "Invalid feature arguments"; | ||
1676 | goto bad; | ||
1677 | } | ||
1678 | } | ||
1679 | |||
1651 | ret = -ENOMEM; | 1680 | ret = -ENOMEM; |
1652 | cc->io_queue = alloc_workqueue("kcryptd_io", | 1681 | cc->io_queue = alloc_workqueue("kcryptd_io", |
1653 | WQ_NON_REENTRANT| | 1682 | WQ_NON_REENTRANT| |
@@ -1682,9 +1711,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
1682 | struct dm_crypt_io *io; | 1711 | struct dm_crypt_io *io; |
1683 | struct crypt_config *cc; | 1712 | struct crypt_config *cc; |
1684 | 1713 | ||
1685 | if (bio->bi_rw & REQ_FLUSH) { | 1714 | /* |
1715 | * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. | ||
1716 | * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight | ||
1717 | * - for REQ_DISCARD caller must use flush if IO ordering matters | ||
1718 | */ | ||
1719 | if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { | ||
1686 | cc = ti->private; | 1720 | cc = ti->private; |
1687 | bio->bi_bdev = cc->dev->bdev; | 1721 | bio->bi_bdev = cc->dev->bdev; |
1722 | if (bio_sectors(bio)) | ||
1723 | bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); | ||
1688 | return DM_MAPIO_REMAPPED; | 1724 | return DM_MAPIO_REMAPPED; |
1689 | } | 1725 | } |
1690 | 1726 | ||
@@ -1727,6 +1763,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type, | |||
1727 | 1763 | ||
1728 | DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, | 1764 | DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, |
1729 | cc->dev->name, (unsigned long long)cc->start); | 1765 | cc->dev->name, (unsigned long long)cc->start); |
1766 | |||
1767 | if (ti->num_discard_requests) | ||
1768 | DMEMIT(" 1 allow_discards"); | ||
1769 | |||
1730 | break; | 1770 | break; |
1731 | } | 1771 | } |
1732 | return 0; | 1772 | return 0; |
@@ -1770,12 +1810,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) | |||
1770 | if (argc < 2) | 1810 | if (argc < 2) |
1771 | goto error; | 1811 | goto error; |
1772 | 1812 | ||
1773 | if (!strnicmp(argv[0], MESG_STR("key"))) { | 1813 | if (!strcasecmp(argv[0], "key")) { |
1774 | if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) { | 1814 | if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) { |
1775 | DMWARN("not suspended during key manipulation."); | 1815 | DMWARN("not suspended during key manipulation."); |
1776 | return -EINVAL; | 1816 | return -EINVAL; |
1777 | } | 1817 | } |
1778 | if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) { | 1818 | if (argc == 3 && !strcasecmp(argv[1], "set")) { |
1779 | ret = crypt_set_key(cc, argv[2]); | 1819 | ret = crypt_set_key(cc, argv[2]); |
1780 | if (ret) | 1820 | if (ret) |
1781 | return ret; | 1821 | return ret; |
@@ -1783,7 +1823,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) | |||
1783 | ret = cc->iv_gen_ops->init(cc); | 1823 | ret = cc->iv_gen_ops->init(cc); |
1784 | return ret; | 1824 | return ret; |
1785 | } | 1825 | } |
1786 | if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) { | 1826 | if (argc == 2 && !strcasecmp(argv[1], "wipe")) { |
1787 | if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { | 1827 | if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { |
1788 | ret = cc->iv_gen_ops->wipe(cc); | 1828 | ret = cc->iv_gen_ops->wipe(cc); |
1789 | if (ret) | 1829 | if (ret) |
@@ -1823,7 +1863,7 @@ static int crypt_iterate_devices(struct dm_target *ti, | |||
1823 | 1863 | ||
1824 | static struct target_type crypt_target = { | 1864 | static struct target_type crypt_target = { |
1825 | .name = "crypt", | 1865 | .name = "crypt", |
1826 | .version = {1, 10, 0}, | 1866 | .version = {1, 11, 0}, |
1827 | .module = THIS_MODULE, | 1867 | .module = THIS_MODULE, |
1828 | .ctr = crypt_ctr, | 1868 | .ctr = crypt_ctr, |
1829 | .dtr = crypt_dtr, | 1869 | .dtr = crypt_dtr, |
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index ea790623c30..89f73ca22cf 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2003 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2003 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
@@ -15,6 +15,9 @@ | |||
15 | 15 | ||
16 | #define DM_MSG_PREFIX "flakey" | 16 | #define DM_MSG_PREFIX "flakey" |
17 | 17 | ||
18 | #define all_corrupt_bio_flags_match(bio, fc) \ | ||
19 | (((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags) | ||
20 | |||
18 | /* | 21 | /* |
19 | * Flakey: Used for testing only, simulates intermittent, | 22 | * Flakey: Used for testing only, simulates intermittent, |
20 | * catastrophic device failure. | 23 | * catastrophic device failure. |
@@ -25,60 +28,189 @@ struct flakey_c { | |||
25 | sector_t start; | 28 | sector_t start; |
26 | unsigned up_interval; | 29 | unsigned up_interval; |
27 | unsigned down_interval; | 30 | unsigned down_interval; |
31 | unsigned long flags; | ||
32 | unsigned corrupt_bio_byte; | ||
33 | unsigned corrupt_bio_rw; | ||
34 | unsigned corrupt_bio_value; | ||
35 | unsigned corrupt_bio_flags; | ||
36 | }; | ||
37 | |||
38 | enum feature_flag_bits { | ||
39 | DROP_WRITES | ||
28 | }; | 40 | }; |
29 | 41 | ||
42 | static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, | ||
43 | struct dm_target *ti) | ||
44 | { | ||
45 | int r; | ||
46 | unsigned argc; | ||
47 | const char *arg_name; | ||
48 | |||
49 | static struct dm_arg _args[] = { | ||
50 | {0, 6, "Invalid number of feature args"}, | ||
51 | {1, UINT_MAX, "Invalid corrupt bio byte"}, | ||
52 | {0, 255, "Invalid corrupt value to write into bio byte (0-255)"}, | ||
53 | {0, UINT_MAX, "Invalid corrupt bio flags mask"}, | ||
54 | }; | ||
55 | |||
56 | /* No feature arguments supplied. */ | ||
57 | if (!as->argc) | ||
58 | return 0; | ||
59 | |||
60 | r = dm_read_arg_group(_args, as, &argc, &ti->error); | ||
61 | if (r) | ||
62 | return r; | ||
63 | |||
64 | while (argc) { | ||
65 | arg_name = dm_shift_arg(as); | ||
66 | argc--; | ||
67 | |||
68 | /* | ||
69 | * drop_writes | ||
70 | */ | ||
71 | if (!strcasecmp(arg_name, "drop_writes")) { | ||
72 | if (test_and_set_bit(DROP_WRITES, &fc->flags)) { | ||
73 | ti->error = "Feature drop_writes duplicated"; | ||
74 | return -EINVAL; | ||
75 | } | ||
76 | |||
77 | continue; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags> | ||
82 | */ | ||
83 | if (!strcasecmp(arg_name, "corrupt_bio_byte")) { | ||
84 | if (!argc) | ||
85 | ti->error = "Feature corrupt_bio_byte requires parameters"; | ||
86 | |||
87 | r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error); | ||
88 | if (r) | ||
89 | return r; | ||
90 | argc--; | ||
91 | |||
92 | /* | ||
93 | * Direction r or w? | ||
94 | */ | ||
95 | arg_name = dm_shift_arg(as); | ||
96 | if (!strcasecmp(arg_name, "w")) | ||
97 | fc->corrupt_bio_rw = WRITE; | ||
98 | else if (!strcasecmp(arg_name, "r")) | ||
99 | fc->corrupt_bio_rw = READ; | ||
100 | else { | ||
101 | ti->error = "Invalid corrupt bio direction (r or w)"; | ||
102 | return -EINVAL; | ||
103 | } | ||
104 | argc--; | ||
105 | |||
106 | /* | ||
107 | * Value of byte (0-255) to write in place of correct one. | ||
108 | */ | ||
109 | r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error); | ||
110 | if (r) | ||
111 | return r; | ||
112 | argc--; | ||
113 | |||
114 | /* | ||
115 | * Only corrupt bios with these flags set. | ||
116 | */ | ||
117 | r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error); | ||
118 | if (r) | ||
119 | return r; | ||
120 | argc--; | ||
121 | |||
122 | continue; | ||
123 | } | ||
124 | |||
125 | ti->error = "Unrecognised flakey feature requested"; | ||
126 | return -EINVAL; | ||
127 | } | ||
128 | |||
129 | if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { | ||
130 | ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; | ||
131 | return -EINVAL; | ||
132 | } | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | |||
30 | /* | 137 | /* |
31 | * Construct a flakey mapping: <dev_path> <offset> <up interval> <down interval> | 138 | * Construct a flakey mapping: |
139 | * <dev_path> <offset> <up interval> <down interval> [<#feature args> [<arg>]*] | ||
140 | * | ||
141 | * Feature args: | ||
142 | * [drop_writes] | ||
143 | * [corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>] | ||
144 | * | ||
145 | * Nth_byte starts from 1 for the first byte. | ||
146 | * Direction is r for READ or w for WRITE. | ||
147 | * bio_flags is ignored if 0. | ||
32 | */ | 148 | */ |
33 | static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) | 149 | static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
34 | { | 150 | { |
151 | static struct dm_arg _args[] = { | ||
152 | {0, UINT_MAX, "Invalid up interval"}, | ||
153 | {0, UINT_MAX, "Invalid down interval"}, | ||
154 | }; | ||
155 | |||
156 | int r; | ||
35 | struct flakey_c *fc; | 157 | struct flakey_c *fc; |
36 | unsigned long long tmp; | 158 | unsigned long long tmpll; |
159 | struct dm_arg_set as; | ||
160 | const char *devname; | ||
37 | 161 | ||
38 | if (argc != 4) { | 162 | as.argc = argc; |
39 | ti->error = "dm-flakey: Invalid argument count"; | 163 | as.argv = argv; |
164 | |||
165 | if (argc < 4) { | ||
166 | ti->error = "Invalid argument count"; | ||
40 | return -EINVAL; | 167 | return -EINVAL; |
41 | } | 168 | } |
42 | 169 | ||
43 | fc = kmalloc(sizeof(*fc), GFP_KERNEL); | 170 | fc = kzalloc(sizeof(*fc), GFP_KERNEL); |
44 | if (!fc) { | 171 | if (!fc) { |
45 | ti->error = "dm-flakey: Cannot allocate linear context"; | 172 | ti->error = "Cannot allocate linear context"; |
46 | return -ENOMEM; | 173 | return -ENOMEM; |
47 | } | 174 | } |
48 | fc->start_time = jiffies; | 175 | fc->start_time = jiffies; |
49 | 176 | ||
50 | if (sscanf(argv[1], "%llu", &tmp) != 1) { | 177 | devname = dm_shift_arg(&as); |
51 | ti->error = "dm-flakey: Invalid device sector"; | 178 | |
179 | if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) { | ||
180 | ti->error = "Invalid device sector"; | ||
52 | goto bad; | 181 | goto bad; |
53 | } | 182 | } |
54 | fc->start = tmp; | 183 | fc->start = tmpll; |
55 | 184 | ||
56 | if (sscanf(argv[2], "%u", &fc->up_interval) != 1) { | 185 | r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error); |
57 | ti->error = "dm-flakey: Invalid up interval"; | 186 | if (r) |
58 | goto bad; | 187 | goto bad; |
59 | } | ||
60 | 188 | ||
61 | if (sscanf(argv[3], "%u", &fc->down_interval) != 1) { | 189 | r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error); |
62 | ti->error = "dm-flakey: Invalid down interval"; | 190 | if (r) |
63 | goto bad; | 191 | goto bad; |
64 | } | ||
65 | 192 | ||
66 | if (!(fc->up_interval + fc->down_interval)) { | 193 | if (!(fc->up_interval + fc->down_interval)) { |
67 | ti->error = "dm-flakey: Total (up + down) interval is zero"; | 194 | ti->error = "Total (up + down) interval is zero"; |
68 | goto bad; | 195 | goto bad; |
69 | } | 196 | } |
70 | 197 | ||
71 | if (fc->up_interval + fc->down_interval < fc->up_interval) { | 198 | if (fc->up_interval + fc->down_interval < fc->up_interval) { |
72 | ti->error = "dm-flakey: Interval overflow"; | 199 | ti->error = "Interval overflow"; |
73 | goto bad; | 200 | goto bad; |
74 | } | 201 | } |
75 | 202 | ||
76 | if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) { | 203 | r = parse_features(&as, fc, ti); |
77 | ti->error = "dm-flakey: Device lookup failed"; | 204 | if (r) |
205 | goto bad; | ||
206 | |||
207 | if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) { | ||
208 | ti->error = "Device lookup failed"; | ||
78 | goto bad; | 209 | goto bad; |
79 | } | 210 | } |
80 | 211 | ||
81 | ti->num_flush_requests = 1; | 212 | ti->num_flush_requests = 1; |
213 | ti->num_discard_requests = 1; | ||
82 | ti->private = fc; | 214 | ti->private = fc; |
83 | return 0; | 215 | return 0; |
84 | 216 | ||
@@ -99,7 +231,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector) | |||
99 | { | 231 | { |
100 | struct flakey_c *fc = ti->private; | 232 | struct flakey_c *fc = ti->private; |
101 | 233 | ||
102 | return fc->start + (bi_sector - ti->begin); | 234 | return fc->start + dm_target_offset(ti, bi_sector); |
103 | } | 235 | } |
104 | 236 | ||
105 | static void flakey_map_bio(struct dm_target *ti, struct bio *bio) | 237 | static void flakey_map_bio(struct dm_target *ti, struct bio *bio) |
@@ -111,6 +243,25 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) | |||
111 | bio->bi_sector = flakey_map_sector(ti, bio->bi_sector); | 243 | bio->bi_sector = flakey_map_sector(ti, bio->bi_sector); |
112 | } | 244 | } |
113 | 245 | ||
246 | static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) | ||
247 | { | ||
248 | unsigned bio_bytes = bio_cur_bytes(bio); | ||
249 | char *data = bio_data(bio); | ||
250 | |||
251 | /* | ||
252 | * Overwrite the Nth byte of the data returned. | ||
253 | */ | ||
254 | if (data && bio_bytes >= fc->corrupt_bio_byte) { | ||
255 | data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; | ||
256 | |||
257 | DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " | ||
258 | "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n", | ||
259 | bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, | ||
260 | (bio_data_dir(bio) == WRITE) ? 'w' : 'r', | ||
261 | bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes); | ||
262 | } | ||
263 | } | ||
264 | |||
114 | static int flakey_map(struct dm_target *ti, struct bio *bio, | 265 | static int flakey_map(struct dm_target *ti, struct bio *bio, |
115 | union map_info *map_context) | 266 | union map_info *map_context) |
116 | { | 267 | { |
@@ -119,18 +270,71 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, | |||
119 | 270 | ||
120 | /* Are we alive ? */ | 271 | /* Are we alive ? */ |
121 | elapsed = (jiffies - fc->start_time) / HZ; | 272 | elapsed = (jiffies - fc->start_time) / HZ; |
122 | if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) | 273 | if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { |
274 | /* | ||
275 | * Flag this bio as submitted while down. | ||
276 | */ | ||
277 | map_context->ll = 1; | ||
278 | |||
279 | /* | ||
280 | * Map reads as normal. | ||
281 | */ | ||
282 | if (bio_data_dir(bio) == READ) | ||
283 | goto map_bio; | ||
284 | |||
285 | /* | ||
286 | * Drop writes? | ||
287 | */ | ||
288 | if (test_bit(DROP_WRITES, &fc->flags)) { | ||
289 | bio_endio(bio, 0); | ||
290 | return DM_MAPIO_SUBMITTED; | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * Corrupt matching writes. | ||
295 | */ | ||
296 | if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) { | ||
297 | if (all_corrupt_bio_flags_match(bio, fc)) | ||
298 | corrupt_bio_data(bio, fc); | ||
299 | goto map_bio; | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * By default, error all I/O. | ||
304 | */ | ||
123 | return -EIO; | 305 | return -EIO; |
306 | } | ||
124 | 307 | ||
308 | map_bio: | ||
125 | flakey_map_bio(ti, bio); | 309 | flakey_map_bio(ti, bio); |
126 | 310 | ||
127 | return DM_MAPIO_REMAPPED; | 311 | return DM_MAPIO_REMAPPED; |
128 | } | 312 | } |
129 | 313 | ||
314 | static int flakey_end_io(struct dm_target *ti, struct bio *bio, | ||
315 | int error, union map_info *map_context) | ||
316 | { | ||
317 | struct flakey_c *fc = ti->private; | ||
318 | unsigned bio_submitted_while_down = map_context->ll; | ||
319 | |||
320 | /* | ||
321 | * Corrupt successful READs while in down state. | ||
322 | * If flags were specified, only corrupt those that match. | ||
323 | */ | ||
324 | if (!error && bio_submitted_while_down && | ||
325 | (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && | ||
326 | all_corrupt_bio_flags_match(bio, fc)) | ||
327 | corrupt_bio_data(bio, fc); | ||
328 | |||
329 | return error; | ||
330 | } | ||
331 | |||
130 | static int flakey_status(struct dm_target *ti, status_type_t type, | 332 | static int flakey_status(struct dm_target *ti, status_type_t type, |
131 | char *result, unsigned int maxlen) | 333 | char *result, unsigned int maxlen) |
132 | { | 334 | { |
335 | unsigned sz = 0; | ||
133 | struct flakey_c *fc = ti->private; | 336 | struct flakey_c *fc = ti->private; |
337 | unsigned drop_writes; | ||
134 | 338 | ||
135 | switch (type) { | 339 | switch (type) { |
136 | case STATUSTYPE_INFO: | 340 | case STATUSTYPE_INFO: |
@@ -138,9 +342,22 @@ static int flakey_status(struct dm_target *ti, status_type_t type, | |||
138 | break; | 342 | break; |
139 | 343 | ||
140 | case STATUSTYPE_TABLE: | 344 | case STATUSTYPE_TABLE: |
141 | snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name, | 345 | DMEMIT("%s %llu %u %u ", fc->dev->name, |
142 | (unsigned long long)fc->start, fc->up_interval, | 346 | (unsigned long long)fc->start, fc->up_interval, |
143 | fc->down_interval); | 347 | fc->down_interval); |
348 | |||
349 | drop_writes = test_bit(DROP_WRITES, &fc->flags); | ||
350 | DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5); | ||
351 | |||
352 | if (drop_writes) | ||
353 | DMEMIT("drop_writes "); | ||
354 | |||
355 | if (fc->corrupt_bio_byte) | ||
356 | DMEMIT("corrupt_bio_byte %u %c %u %u ", | ||
357 | fc->corrupt_bio_byte, | ||
358 | (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r', | ||
359 | fc->corrupt_bio_value, fc->corrupt_bio_flags); | ||
360 | |||
144 | break; | 361 | break; |
145 | } | 362 | } |
146 | return 0; | 363 | return 0; |
@@ -177,11 +394,12 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ | |||
177 | 394 | ||
178 | static struct target_type flakey_target = { | 395 | static struct target_type flakey_target = { |
179 | .name = "flakey", | 396 | .name = "flakey", |
180 | .version = {1, 1, 0}, | 397 | .version = {1, 2, 0}, |
181 | .module = THIS_MODULE, | 398 | .module = THIS_MODULE, |
182 | .ctr = flakey_ctr, | 399 | .ctr = flakey_ctr, |
183 | .dtr = flakey_dtr, | 400 | .dtr = flakey_dtr, |
184 | .map = flakey_map, | 401 | .map = flakey_map, |
402 | .end_io = flakey_end_io, | ||
185 | .status = flakey_status, | 403 | .status = flakey_status, |
186 | .ioctl = flakey_ioctl, | 404 | .ioctl = flakey_ioctl, |
187 | .merge = flakey_merge, | 405 | .merge = flakey_merge, |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 2067288f61f..ad2eba40e31 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -38,6 +38,8 @@ struct io { | |||
38 | struct dm_io_client *client; | 38 | struct dm_io_client *client; |
39 | io_notify_fn callback; | 39 | io_notify_fn callback; |
40 | void *context; | 40 | void *context; |
41 | void *vma_invalidate_address; | ||
42 | unsigned long vma_invalidate_size; | ||
41 | } __attribute__((aligned(DM_IO_MAX_REGIONS))); | 43 | } __attribute__((aligned(DM_IO_MAX_REGIONS))); |
42 | 44 | ||
43 | static struct kmem_cache *_dm_io_cache; | 45 | static struct kmem_cache *_dm_io_cache; |
@@ -116,6 +118,10 @@ static void dec_count(struct io *io, unsigned int region, int error) | |||
116 | set_bit(region, &io->error_bits); | 118 | set_bit(region, &io->error_bits); |
117 | 119 | ||
118 | if (atomic_dec_and_test(&io->count)) { | 120 | if (atomic_dec_and_test(&io->count)) { |
121 | if (io->vma_invalidate_size) | ||
122 | invalidate_kernel_vmap_range(io->vma_invalidate_address, | ||
123 | io->vma_invalidate_size); | ||
124 | |||
119 | if (io->sleeper) | 125 | if (io->sleeper) |
120 | wake_up_process(io->sleeper); | 126 | wake_up_process(io->sleeper); |
121 | 127 | ||
@@ -159,6 +165,9 @@ struct dpages { | |||
159 | 165 | ||
160 | unsigned context_u; | 166 | unsigned context_u; |
161 | void *context_ptr; | 167 | void *context_ptr; |
168 | |||
169 | void *vma_invalidate_address; | ||
170 | unsigned long vma_invalidate_size; | ||
162 | }; | 171 | }; |
163 | 172 | ||
164 | /* | 173 | /* |
@@ -377,6 +386,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
377 | io->sleeper = current; | 386 | io->sleeper = current; |
378 | io->client = client; | 387 | io->client = client; |
379 | 388 | ||
389 | io->vma_invalidate_address = dp->vma_invalidate_address; | ||
390 | io->vma_invalidate_size = dp->vma_invalidate_size; | ||
391 | |||
380 | dispatch_io(rw, num_regions, where, dp, io, 1); | 392 | dispatch_io(rw, num_regions, where, dp, io, 1); |
381 | 393 | ||
382 | while (1) { | 394 | while (1) { |
@@ -415,13 +427,21 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, | |||
415 | io->callback = fn; | 427 | io->callback = fn; |
416 | io->context = context; | 428 | io->context = context; |
417 | 429 | ||
430 | io->vma_invalidate_address = dp->vma_invalidate_address; | ||
431 | io->vma_invalidate_size = dp->vma_invalidate_size; | ||
432 | |||
418 | dispatch_io(rw, num_regions, where, dp, io, 0); | 433 | dispatch_io(rw, num_regions, where, dp, io, 0); |
419 | return 0; | 434 | return 0; |
420 | } | 435 | } |
421 | 436 | ||
422 | static int dp_init(struct dm_io_request *io_req, struct dpages *dp) | 437 | static int dp_init(struct dm_io_request *io_req, struct dpages *dp, |
438 | unsigned long size) | ||
423 | { | 439 | { |
424 | /* Set up dpages based on memory type */ | 440 | /* Set up dpages based on memory type */ |
441 | |||
442 | dp->vma_invalidate_address = NULL; | ||
443 | dp->vma_invalidate_size = 0; | ||
444 | |||
425 | switch (io_req->mem.type) { | 445 | switch (io_req->mem.type) { |
426 | case DM_IO_PAGE_LIST: | 446 | case DM_IO_PAGE_LIST: |
427 | list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); | 447 | list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); |
@@ -432,6 +452,11 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp) | |||
432 | break; | 452 | break; |
433 | 453 | ||
434 | case DM_IO_VMA: | 454 | case DM_IO_VMA: |
455 | flush_kernel_vmap_range(io_req->mem.ptr.vma, size); | ||
456 | if ((io_req->bi_rw & RW_MASK) == READ) { | ||
457 | dp->vma_invalidate_address = io_req->mem.ptr.vma; | ||
458 | dp->vma_invalidate_size = size; | ||
459 | } | ||
435 | vm_dp_init(dp, io_req->mem.ptr.vma); | 460 | vm_dp_init(dp, io_req->mem.ptr.vma); |
436 | break; | 461 | break; |
437 | 462 | ||
@@ -460,7 +485,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions, | |||
460 | int r; | 485 | int r; |
461 | struct dpages dp; | 486 | struct dpages dp; |
462 | 487 | ||
463 | r = dp_init(io_req, &dp); | 488 | r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); |
464 | if (r) | 489 | if (r) |
465 | return r; | 490 | return r; |
466 | 491 | ||
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 4cacdad2270..2e9a3ca37bd 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -128,6 +128,24 @@ static struct hash_cell *__get_uuid_cell(const char *str) | |||
128 | return NULL; | 128 | return NULL; |
129 | } | 129 | } |
130 | 130 | ||
131 | static struct hash_cell *__get_dev_cell(uint64_t dev) | ||
132 | { | ||
133 | struct mapped_device *md; | ||
134 | struct hash_cell *hc; | ||
135 | |||
136 | md = dm_get_md(huge_decode_dev(dev)); | ||
137 | if (!md) | ||
138 | return NULL; | ||
139 | |||
140 | hc = dm_get_mdptr(md); | ||
141 | if (!hc) { | ||
142 | dm_put(md); | ||
143 | return NULL; | ||
144 | } | ||
145 | |||
146 | return hc; | ||
147 | } | ||
148 | |||
131 | /*----------------------------------------------------------------- | 149 | /*----------------------------------------------------------------- |
132 | * Inserting, removing and renaming a device. | 150 | * Inserting, removing and renaming a device. |
133 | *---------------------------------------------------------------*/ | 151 | *---------------------------------------------------------------*/ |
@@ -718,25 +736,45 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) | |||
718 | */ | 736 | */ |
719 | static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) | 737 | static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) |
720 | { | 738 | { |
721 | struct mapped_device *md; | 739 | struct hash_cell *hc = NULL; |
722 | void *mdptr = NULL; | ||
723 | 740 | ||
724 | if (*param->uuid) | 741 | if (*param->uuid) { |
725 | return __get_uuid_cell(param->uuid); | 742 | if (*param->name || param->dev) |
743 | return NULL; | ||
726 | 744 | ||
727 | if (*param->name) | 745 | hc = __get_uuid_cell(param->uuid); |
728 | return __get_name_cell(param->name); | 746 | if (!hc) |
747 | return NULL; | ||
748 | } else if (*param->name) { | ||
749 | if (param->dev) | ||
750 | return NULL; | ||
729 | 751 | ||
730 | md = dm_get_md(huge_decode_dev(param->dev)); | 752 | hc = __get_name_cell(param->name); |
731 | if (!md) | 753 | if (!hc) |
732 | goto out; | 754 | return NULL; |
755 | } else if (param->dev) { | ||
756 | hc = __get_dev_cell(param->dev); | ||
757 | if (!hc) | ||
758 | return NULL; | ||
759 | } else | ||
760 | return NULL; | ||
733 | 761 | ||
734 | mdptr = dm_get_mdptr(md); | 762 | /* |
735 | if (!mdptr) | 763 | * Sneakily write in both the name and the uuid |
736 | dm_put(md); | 764 | * while we have the cell. |
765 | */ | ||
766 | strlcpy(param->name, hc->name, sizeof(param->name)); | ||
767 | if (hc->uuid) | ||
768 | strlcpy(param->uuid, hc->uuid, sizeof(param->uuid)); | ||
769 | else | ||
770 | param->uuid[0] = '\0'; | ||
737 | 771 | ||
738 | out: | 772 | if (hc->new_map) |
739 | return mdptr; | 773 | param->flags |= DM_INACTIVE_PRESENT_FLAG; |
774 | else | ||
775 | param->flags &= ~DM_INACTIVE_PRESENT_FLAG; | ||
776 | |||
777 | return hc; | ||
740 | } | 778 | } |
741 | 779 | ||
742 | static struct mapped_device *find_device(struct dm_ioctl *param) | 780 | static struct mapped_device *find_device(struct dm_ioctl *param) |
@@ -746,24 +784,8 @@ static struct mapped_device *find_device(struct dm_ioctl *param) | |||
746 | 784 | ||
747 | down_read(&_hash_lock); | 785 | down_read(&_hash_lock); |
748 | hc = __find_device_hash_cell(param); | 786 | hc = __find_device_hash_cell(param); |
749 | if (hc) { | 787 | if (hc) |
750 | md = hc->md; | 788 | md = hc->md; |
751 | |||
752 | /* | ||
753 | * Sneakily write in both the name and the uuid | ||
754 | * while we have the cell. | ||
755 | */ | ||
756 | strlcpy(param->name, hc->name, sizeof(param->name)); | ||
757 | if (hc->uuid) | ||
758 | strlcpy(param->uuid, hc->uuid, sizeof(param->uuid)); | ||
759 | else | ||
760 | param->uuid[0] = '\0'; | ||
761 | |||
762 | if (hc->new_map) | ||
763 | param->flags |= DM_INACTIVE_PRESENT_FLAG; | ||
764 | else | ||
765 | param->flags &= ~DM_INACTIVE_PRESENT_FLAG; | ||
766 | } | ||
767 | up_read(&_hash_lock); | 789 | up_read(&_hash_lock); |
768 | 790 | ||
769 | return md; | 791 | return md; |
@@ -1402,6 +1424,11 @@ static int target_message(struct dm_ioctl *param, size_t param_size) | |||
1402 | goto out; | 1424 | goto out; |
1403 | } | 1425 | } |
1404 | 1426 | ||
1427 | if (!argc) { | ||
1428 | DMWARN("Empty message received."); | ||
1429 | goto out; | ||
1430 | } | ||
1431 | |||
1405 | table = dm_get_live_table(md); | 1432 | table = dm_get_live_table(md); |
1406 | if (!table) | 1433 | if (!table) |
1407 | goto out_argv; | 1434 | goto out_argv; |
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 320401dec10..f8214702963 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c | |||
@@ -224,8 +224,6 @@ struct kcopyd_job { | |||
224 | unsigned int num_dests; | 224 | unsigned int num_dests; |
225 | struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; | 225 | struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; |
226 | 226 | ||
227 | sector_t offset; | ||
228 | unsigned int nr_pages; | ||
229 | struct page_list *pages; | 227 | struct page_list *pages; |
230 | 228 | ||
231 | /* | 229 | /* |
@@ -380,7 +378,7 @@ static int run_io_job(struct kcopyd_job *job) | |||
380 | .bi_rw = job->rw, | 378 | .bi_rw = job->rw, |
381 | .mem.type = DM_IO_PAGE_LIST, | 379 | .mem.type = DM_IO_PAGE_LIST, |
382 | .mem.ptr.pl = job->pages, | 380 | .mem.ptr.pl = job->pages, |
383 | .mem.offset = job->offset, | 381 | .mem.offset = 0, |
384 | .notify.fn = complete_io, | 382 | .notify.fn = complete_io, |
385 | .notify.context = job, | 383 | .notify.context = job, |
386 | .client = job->kc->io_client, | 384 | .client = job->kc->io_client, |
@@ -397,10 +395,9 @@ static int run_io_job(struct kcopyd_job *job) | |||
397 | static int run_pages_job(struct kcopyd_job *job) | 395 | static int run_pages_job(struct kcopyd_job *job) |
398 | { | 396 | { |
399 | int r; | 397 | int r; |
398 | unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); | ||
400 | 399 | ||
401 | job->nr_pages = dm_div_up(job->dests[0].count + job->offset, | 400 | r = kcopyd_get_pages(job->kc, nr_pages, &job->pages); |
402 | PAGE_SIZE >> 9); | ||
403 | r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); | ||
404 | if (!r) { | 401 | if (!r) { |
405 | /* this job is ready for io */ | 402 | /* this job is ready for io */ |
406 | push(&job->kc->io_jobs, job); | 403 | push(&job->kc->io_jobs, job); |
@@ -602,8 +599,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, | |||
602 | job->num_dests = num_dests; | 599 | job->num_dests = num_dests; |
603 | memcpy(&job->dests, dests, sizeof(*dests) * num_dests); | 600 | memcpy(&job->dests, dests, sizeof(*dests) * num_dests); |
604 | 601 | ||
605 | job->offset = 0; | ||
606 | job->nr_pages = 0; | ||
607 | job->pages = NULL; | 602 | job->pages = NULL; |
608 | 603 | ||
609 | job->fn = fn; | 604 | job->fn = fn; |
@@ -622,6 +617,37 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, | |||
622 | } | 617 | } |
623 | EXPORT_SYMBOL(dm_kcopyd_copy); | 618 | EXPORT_SYMBOL(dm_kcopyd_copy); |
624 | 619 | ||
620 | void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, | ||
621 | dm_kcopyd_notify_fn fn, void *context) | ||
622 | { | ||
623 | struct kcopyd_job *job; | ||
624 | |||
625 | job = mempool_alloc(kc->job_pool, GFP_NOIO); | ||
626 | |||
627 | memset(job, 0, sizeof(struct kcopyd_job)); | ||
628 | job->kc = kc; | ||
629 | job->fn = fn; | ||
630 | job->context = context; | ||
631 | |||
632 | atomic_inc(&kc->nr_jobs); | ||
633 | |||
634 | return job; | ||
635 | } | ||
636 | EXPORT_SYMBOL(dm_kcopyd_prepare_callback); | ||
637 | |||
638 | void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) | ||
639 | { | ||
640 | struct kcopyd_job *job = j; | ||
641 | struct dm_kcopyd_client *kc = job->kc; | ||
642 | |||
643 | job->read_err = read_err; | ||
644 | job->write_err = write_err; | ||
645 | |||
646 | push(&kc->complete_jobs, job); | ||
647 | wake(kc); | ||
648 | } | ||
649 | EXPORT_SYMBOL(dm_kcopyd_do_callback); | ||
650 | |||
625 | /* | 651 | /* |
626 | * Cancels a kcopyd job, eg. someone might be deactivating a | 652 | * Cancels a kcopyd job, eg. someone might be deactivating a |
627 | * mirror. | 653 | * mirror. |
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index aa2e0c374ab..1021c898601 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c | |||
@@ -394,8 +394,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list) | |||
394 | group[count] = fe->region; | 394 | group[count] = fe->region; |
395 | count++; | 395 | count++; |
396 | 396 | ||
397 | list_del(&fe->list); | 397 | list_move(&fe->list, &tmp_list); |
398 | list_add(&fe->list, &tmp_list); | ||
399 | 398 | ||
400 | type = fe->type; | 399 | type = fe->type; |
401 | if (count >= MAX_FLUSH_GROUP_COUNT) | 400 | if (count >= MAX_FLUSH_GROUP_COUNT) |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 948e3f4925b..3b52bb72bd1 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -197,15 +197,21 @@ EXPORT_SYMBOL(dm_dirty_log_destroy); | |||
197 | #define MIRROR_DISK_VERSION 2 | 197 | #define MIRROR_DISK_VERSION 2 |
198 | #define LOG_OFFSET 2 | 198 | #define LOG_OFFSET 2 |
199 | 199 | ||
200 | struct log_header { | 200 | struct log_header_disk { |
201 | uint32_t magic; | 201 | __le32 magic; |
202 | 202 | ||
203 | /* | 203 | /* |
204 | * Simple, incrementing version. no backward | 204 | * Simple, incrementing version. no backward |
205 | * compatibility. | 205 | * compatibility. |
206 | */ | 206 | */ |
207 | __le32 version; | ||
208 | __le64 nr_regions; | ||
209 | } __packed; | ||
210 | |||
211 | struct log_header_core { | ||
212 | uint32_t magic; | ||
207 | uint32_t version; | 213 | uint32_t version; |
208 | sector_t nr_regions; | 214 | uint64_t nr_regions; |
209 | }; | 215 | }; |
210 | 216 | ||
211 | struct log_c { | 217 | struct log_c { |
@@ -239,10 +245,10 @@ struct log_c { | |||
239 | int log_dev_failed; | 245 | int log_dev_failed; |
240 | int log_dev_flush_failed; | 246 | int log_dev_flush_failed; |
241 | struct dm_dev *log_dev; | 247 | struct dm_dev *log_dev; |
242 | struct log_header header; | 248 | struct log_header_core header; |
243 | 249 | ||
244 | struct dm_io_region header_location; | 250 | struct dm_io_region header_location; |
245 | struct log_header *disk_header; | 251 | struct log_header_disk *disk_header; |
246 | }; | 252 | }; |
247 | 253 | ||
248 | /* | 254 | /* |
@@ -251,34 +257,34 @@ struct log_c { | |||
251 | */ | 257 | */ |
252 | static inline int log_test_bit(uint32_t *bs, unsigned bit) | 258 | static inline int log_test_bit(uint32_t *bs, unsigned bit) |
253 | { | 259 | { |
254 | return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0; | 260 | return test_bit_le(bit, bs) ? 1 : 0; |
255 | } | 261 | } |
256 | 262 | ||
257 | static inline void log_set_bit(struct log_c *l, | 263 | static inline void log_set_bit(struct log_c *l, |
258 | uint32_t *bs, unsigned bit) | 264 | uint32_t *bs, unsigned bit) |
259 | { | 265 | { |
260 | __test_and_set_bit_le(bit, (unsigned long *) bs); | 266 | __set_bit_le(bit, bs); |
261 | l->touched_cleaned = 1; | 267 | l->touched_cleaned = 1; |
262 | } | 268 | } |
263 | 269 | ||
264 | static inline void log_clear_bit(struct log_c *l, | 270 | static inline void log_clear_bit(struct log_c *l, |
265 | uint32_t *bs, unsigned bit) | 271 | uint32_t *bs, unsigned bit) |
266 | { | 272 | { |
267 | __test_and_clear_bit_le(bit, (unsigned long *) bs); | 273 | __clear_bit_le(bit, bs); |
268 | l->touched_dirtied = 1; | 274 | l->touched_dirtied = 1; |
269 | } | 275 | } |
270 | 276 | ||
271 | /*---------------------------------------------------------------- | 277 | /*---------------------------------------------------------------- |
272 | * Header IO | 278 | * Header IO |
273 | *--------------------------------------------------------------*/ | 279 | *--------------------------------------------------------------*/ |
274 | static void header_to_disk(struct log_header *core, struct log_header *disk) | 280 | static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk) |
275 | { | 281 | { |
276 | disk->magic = cpu_to_le32(core->magic); | 282 | disk->magic = cpu_to_le32(core->magic); |
277 | disk->version = cpu_to_le32(core->version); | 283 | disk->version = cpu_to_le32(core->version); |
278 | disk->nr_regions = cpu_to_le64(core->nr_regions); | 284 | disk->nr_regions = cpu_to_le64(core->nr_regions); |
279 | } | 285 | } |
280 | 286 | ||
281 | static void header_from_disk(struct log_header *core, struct log_header *disk) | 287 | static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk) |
282 | { | 288 | { |
283 | core->magic = le32_to_cpu(disk->magic); | 289 | core->magic = le32_to_cpu(disk->magic); |
284 | core->version = le32_to_cpu(disk->version); | 290 | core->version = le32_to_cpu(disk->version); |
@@ -486,7 +492,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
486 | memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); | 492 | memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); |
487 | lc->sync_count = (sync == NOSYNC) ? region_count : 0; | 493 | lc->sync_count = (sync == NOSYNC) ? region_count : 0; |
488 | 494 | ||
489 | lc->recovering_bits = vmalloc(bitset_size); | 495 | lc->recovering_bits = vzalloc(bitset_size); |
490 | if (!lc->recovering_bits) { | 496 | if (!lc->recovering_bits) { |
491 | DMWARN("couldn't allocate sync bitset"); | 497 | DMWARN("couldn't allocate sync bitset"); |
492 | vfree(lc->sync_bits); | 498 | vfree(lc->sync_bits); |
@@ -498,7 +504,6 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, | |||
498 | kfree(lc); | 504 | kfree(lc); |
499 | return -ENOMEM; | 505 | return -ENOMEM; |
500 | } | 506 | } |
501 | memset(lc->recovering_bits, 0, bitset_size); | ||
502 | lc->sync_search = 0; | 507 | lc->sync_search = 0; |
503 | log->context = lc; | 508 | log->context = lc; |
504 | 509 | ||
@@ -739,8 +744,7 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region) | |||
739 | return 0; | 744 | return 0; |
740 | 745 | ||
741 | do { | 746 | do { |
742 | *region = find_next_zero_bit_le( | 747 | *region = find_next_zero_bit_le(lc->sync_bits, |
743 | (unsigned long *) lc->sync_bits, | ||
744 | lc->region_count, | 748 | lc->region_count, |
745 | lc->sync_search); | 749 | lc->sync_search); |
746 | lc->sync_search = *region + 1; | 750 | lc->sync_search = *region + 1; |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c3547016f0f..5e0090ef418 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/atomic.h> | 22 | #include <linux/atomic.h> |
23 | 23 | ||
24 | #define DM_MSG_PREFIX "multipath" | 24 | #define DM_MSG_PREFIX "multipath" |
25 | #define MESG_STR(x) x, sizeof(x) | ||
26 | #define DM_PG_INIT_DELAY_MSECS 2000 | 25 | #define DM_PG_INIT_DELAY_MSECS 2000 |
27 | #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) | 26 | #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) |
28 | 27 | ||
@@ -505,80 +504,29 @@ static void trigger_event(struct work_struct *work) | |||
505 | * <#paths> <#per-path selector args> | 504 | * <#paths> <#per-path selector args> |
506 | * [<path> [<arg>]* ]+ ]+ | 505 | * [<path> [<arg>]* ]+ ]+ |
507 | *---------------------------------------------------------------*/ | 506 | *---------------------------------------------------------------*/ |
508 | struct param { | 507 | static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, |
509 | unsigned min; | ||
510 | unsigned max; | ||
511 | char *error; | ||
512 | }; | ||
513 | |||
514 | static int read_param(struct param *param, char *str, unsigned *v, char **error) | ||
515 | { | ||
516 | if (!str || | ||
517 | (sscanf(str, "%u", v) != 1) || | ||
518 | (*v < param->min) || | ||
519 | (*v > param->max)) { | ||
520 | *error = param->error; | ||
521 | return -EINVAL; | ||
522 | } | ||
523 | |||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | struct arg_set { | ||
528 | unsigned argc; | ||
529 | char **argv; | ||
530 | }; | ||
531 | |||
532 | static char *shift(struct arg_set *as) | ||
533 | { | ||
534 | char *r; | ||
535 | |||
536 | if (as->argc) { | ||
537 | as->argc--; | ||
538 | r = *as->argv; | ||
539 | as->argv++; | ||
540 | return r; | ||
541 | } | ||
542 | |||
543 | return NULL; | ||
544 | } | ||
545 | |||
546 | static void consume(struct arg_set *as, unsigned n) | ||
547 | { | ||
548 | BUG_ON (as->argc < n); | ||
549 | as->argc -= n; | ||
550 | as->argv += n; | ||
551 | } | ||
552 | |||
553 | static int parse_path_selector(struct arg_set *as, struct priority_group *pg, | ||
554 | struct dm_target *ti) | 508 | struct dm_target *ti) |
555 | { | 509 | { |
556 | int r; | 510 | int r; |
557 | struct path_selector_type *pst; | 511 | struct path_selector_type *pst; |
558 | unsigned ps_argc; | 512 | unsigned ps_argc; |
559 | 513 | ||
560 | static struct param _params[] = { | 514 | static struct dm_arg _args[] = { |
561 | {0, 1024, "invalid number of path selector args"}, | 515 | {0, 1024, "invalid number of path selector args"}, |
562 | }; | 516 | }; |
563 | 517 | ||
564 | pst = dm_get_path_selector(shift(as)); | 518 | pst = dm_get_path_selector(dm_shift_arg(as)); |
565 | if (!pst) { | 519 | if (!pst) { |
566 | ti->error = "unknown path selector type"; | 520 | ti->error = "unknown path selector type"; |
567 | return -EINVAL; | 521 | return -EINVAL; |
568 | } | 522 | } |
569 | 523 | ||
570 | r = read_param(_params, shift(as), &ps_argc, &ti->error); | 524 | r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); |
571 | if (r) { | 525 | if (r) { |
572 | dm_put_path_selector(pst); | 526 | dm_put_path_selector(pst); |
573 | return -EINVAL; | 527 | return -EINVAL; |
574 | } | 528 | } |
575 | 529 | ||
576 | if (ps_argc > as->argc) { | ||
577 | dm_put_path_selector(pst); | ||
578 | ti->error = "not enough arguments for path selector"; | ||
579 | return -EINVAL; | ||
580 | } | ||
581 | |||
582 | r = pst->create(&pg->ps, ps_argc, as->argv); | 530 | r = pst->create(&pg->ps, ps_argc, as->argv); |
583 | if (r) { | 531 | if (r) { |
584 | dm_put_path_selector(pst); | 532 | dm_put_path_selector(pst); |
@@ -587,12 +535,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg, | |||
587 | } | 535 | } |
588 | 536 | ||
589 | pg->ps.type = pst; | 537 | pg->ps.type = pst; |
590 | consume(as, ps_argc); | 538 | dm_consume_args(as, ps_argc); |
591 | 539 | ||
592 | return 0; | 540 | return 0; |
593 | } | 541 | } |
594 | 542 | ||
595 | static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, | 543 | static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, |
596 | struct dm_target *ti) | 544 | struct dm_target *ti) |
597 | { | 545 | { |
598 | int r; | 546 | int r; |
@@ -609,7 +557,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, | |||
609 | if (!p) | 557 | if (!p) |
610 | return ERR_PTR(-ENOMEM); | 558 | return ERR_PTR(-ENOMEM); |
611 | 559 | ||
612 | r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table), | 560 | r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), |
613 | &p->path.dev); | 561 | &p->path.dev); |
614 | if (r) { | 562 | if (r) { |
615 | ti->error = "error getting device"; | 563 | ti->error = "error getting device"; |
@@ -660,16 +608,16 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, | |||
660 | return ERR_PTR(r); | 608 | return ERR_PTR(r); |
661 | } | 609 | } |
662 | 610 | ||
663 | static struct priority_group *parse_priority_group(struct arg_set *as, | 611 | static struct priority_group *parse_priority_group(struct dm_arg_set *as, |
664 | struct multipath *m) | 612 | struct multipath *m) |
665 | { | 613 | { |
666 | static struct param _params[] = { | 614 | static struct dm_arg _args[] = { |
667 | {1, 1024, "invalid number of paths"}, | 615 | {1, 1024, "invalid number of paths"}, |
668 | {0, 1024, "invalid number of selector args"} | 616 | {0, 1024, "invalid number of selector args"} |
669 | }; | 617 | }; |
670 | 618 | ||
671 | int r; | 619 | int r; |
672 | unsigned i, nr_selector_args, nr_params; | 620 | unsigned i, nr_selector_args, nr_args; |
673 | struct priority_group *pg; | 621 | struct priority_group *pg; |
674 | struct dm_target *ti = m->ti; | 622 | struct dm_target *ti = m->ti; |
675 | 623 | ||
@@ -693,26 +641,26 @@ static struct priority_group *parse_priority_group(struct arg_set *as, | |||
693 | /* | 641 | /* |
694 | * read the paths | 642 | * read the paths |
695 | */ | 643 | */ |
696 | r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); | 644 | r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); |
697 | if (r) | 645 | if (r) |
698 | goto bad; | 646 | goto bad; |
699 | 647 | ||
700 | r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); | 648 | r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); |
701 | if (r) | 649 | if (r) |
702 | goto bad; | 650 | goto bad; |
703 | 651 | ||
704 | nr_params = 1 + nr_selector_args; | 652 | nr_args = 1 + nr_selector_args; |
705 | for (i = 0; i < pg->nr_pgpaths; i++) { | 653 | for (i = 0; i < pg->nr_pgpaths; i++) { |
706 | struct pgpath *pgpath; | 654 | struct pgpath *pgpath; |
707 | struct arg_set path_args; | 655 | struct dm_arg_set path_args; |
708 | 656 | ||
709 | if (as->argc < nr_params) { | 657 | if (as->argc < nr_args) { |
710 | ti->error = "not enough path parameters"; | 658 | ti->error = "not enough path parameters"; |
711 | r = -EINVAL; | 659 | r = -EINVAL; |
712 | goto bad; | 660 | goto bad; |
713 | } | 661 | } |
714 | 662 | ||
715 | path_args.argc = nr_params; | 663 | path_args.argc = nr_args; |
716 | path_args.argv = as->argv; | 664 | path_args.argv = as->argv; |
717 | 665 | ||
718 | pgpath = parse_path(&path_args, &pg->ps, ti); | 666 | pgpath = parse_path(&path_args, &pg->ps, ti); |
@@ -723,7 +671,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as, | |||
723 | 671 | ||
724 | pgpath->pg = pg; | 672 | pgpath->pg = pg; |
725 | list_add_tail(&pgpath->list, &pg->pgpaths); | 673 | list_add_tail(&pgpath->list, &pg->pgpaths); |
726 | consume(as, nr_params); | 674 | dm_consume_args(as, nr_args); |
727 | } | 675 | } |
728 | 676 | ||
729 | return pg; | 677 | return pg; |
@@ -733,28 +681,23 @@ static struct priority_group *parse_priority_group(struct arg_set *as, | |||
733 | return ERR_PTR(r); | 681 | return ERR_PTR(r); |
734 | } | 682 | } |
735 | 683 | ||
736 | static int parse_hw_handler(struct arg_set *as, struct multipath *m) | 684 | static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) |
737 | { | 685 | { |
738 | unsigned hw_argc; | 686 | unsigned hw_argc; |
739 | int ret; | 687 | int ret; |
740 | struct dm_target *ti = m->ti; | 688 | struct dm_target *ti = m->ti; |
741 | 689 | ||
742 | static struct param _params[] = { | 690 | static struct dm_arg _args[] = { |
743 | {0, 1024, "invalid number of hardware handler args"}, | 691 | {0, 1024, "invalid number of hardware handler args"}, |
744 | }; | 692 | }; |
745 | 693 | ||
746 | if (read_param(_params, shift(as), &hw_argc, &ti->error)) | 694 | if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) |
747 | return -EINVAL; | 695 | return -EINVAL; |
748 | 696 | ||
749 | if (!hw_argc) | 697 | if (!hw_argc) |
750 | return 0; | 698 | return 0; |
751 | 699 | ||
752 | if (hw_argc > as->argc) { | 700 | m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); |
753 | ti->error = "not enough arguments for hardware handler"; | ||
754 | return -EINVAL; | ||
755 | } | ||
756 | |||
757 | m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); | ||
758 | request_module("scsi_dh_%s", m->hw_handler_name); | 701 | request_module("scsi_dh_%s", m->hw_handler_name); |
759 | if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { | 702 | if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { |
760 | ti->error = "unknown hardware handler type"; | 703 | ti->error = "unknown hardware handler type"; |
@@ -778,7 +721,7 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m) | |||
778 | for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) | 721 | for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) |
779 | j = sprintf(p, "%s", as->argv[i]); | 722 | j = sprintf(p, "%s", as->argv[i]); |
780 | } | 723 | } |
781 | consume(as, hw_argc - 1); | 724 | dm_consume_args(as, hw_argc - 1); |
782 | 725 | ||
783 | return 0; | 726 | return 0; |
784 | fail: | 727 | fail: |
@@ -787,20 +730,20 @@ fail: | |||
787 | return ret; | 730 | return ret; |
788 | } | 731 | } |
789 | 732 | ||
790 | static int parse_features(struct arg_set *as, struct multipath *m) | 733 | static int parse_features(struct dm_arg_set *as, struct multipath *m) |
791 | { | 734 | { |
792 | int r; | 735 | int r; |
793 | unsigned argc; | 736 | unsigned argc; |
794 | struct dm_target *ti = m->ti; | 737 | struct dm_target *ti = m->ti; |
795 | const char *param_name; | 738 | const char *arg_name; |
796 | 739 | ||
797 | static struct param _params[] = { | 740 | static struct dm_arg _args[] = { |
798 | {0, 5, "invalid number of feature args"}, | 741 | {0, 5, "invalid number of feature args"}, |
799 | {1, 50, "pg_init_retries must be between 1 and 50"}, | 742 | {1, 50, "pg_init_retries must be between 1 and 50"}, |
800 | {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, | 743 | {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, |
801 | }; | 744 | }; |
802 | 745 | ||
803 | r = read_param(_params, shift(as), &argc, &ti->error); | 746 | r = dm_read_arg_group(_args, as, &argc, &ti->error); |
804 | if (r) | 747 | if (r) |
805 | return -EINVAL; | 748 | return -EINVAL; |
806 | 749 | ||
@@ -808,26 +751,24 @@ static int parse_features(struct arg_set *as, struct multipath *m) | |||
808 | return 0; | 751 | return 0; |
809 | 752 | ||
810 | do { | 753 | do { |
811 | param_name = shift(as); | 754 | arg_name = dm_shift_arg(as); |
812 | argc--; | 755 | argc--; |
813 | 756 | ||
814 | if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { | 757 | if (!strcasecmp(arg_name, "queue_if_no_path")) { |
815 | r = queue_if_no_path(m, 1, 0); | 758 | r = queue_if_no_path(m, 1, 0); |
816 | continue; | 759 | continue; |
817 | } | 760 | } |
818 | 761 | ||
819 | if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && | 762 | if (!strcasecmp(arg_name, "pg_init_retries") && |
820 | (argc >= 1)) { | 763 | (argc >= 1)) { |
821 | r = read_param(_params + 1, shift(as), | 764 | r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); |
822 | &m->pg_init_retries, &ti->error); | ||
823 | argc--; | 765 | argc--; |
824 | continue; | 766 | continue; |
825 | } | 767 | } |
826 | 768 | ||
827 | if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) && | 769 | if (!strcasecmp(arg_name, "pg_init_delay_msecs") && |
828 | (argc >= 1)) { | 770 | (argc >= 1)) { |
829 | r = read_param(_params + 2, shift(as), | 771 | r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); |
830 | &m->pg_init_delay_msecs, &ti->error); | ||
831 | argc--; | 772 | argc--; |
832 | continue; | 773 | continue; |
833 | } | 774 | } |
@@ -842,15 +783,15 @@ static int parse_features(struct arg_set *as, struct multipath *m) | |||
842 | static int multipath_ctr(struct dm_target *ti, unsigned int argc, | 783 | static int multipath_ctr(struct dm_target *ti, unsigned int argc, |
843 | char **argv) | 784 | char **argv) |
844 | { | 785 | { |
845 | /* target parameters */ | 786 | /* target arguments */ |
846 | static struct param _params[] = { | 787 | static struct dm_arg _args[] = { |
847 | {0, 1024, "invalid number of priority groups"}, | 788 | {0, 1024, "invalid number of priority groups"}, |
848 | {0, 1024, "invalid initial priority group number"}, | 789 | {0, 1024, "invalid initial priority group number"}, |
849 | }; | 790 | }; |
850 | 791 | ||
851 | int r; | 792 | int r; |
852 | struct multipath *m; | 793 | struct multipath *m; |
853 | struct arg_set as; | 794 | struct dm_arg_set as; |
854 | unsigned pg_count = 0; | 795 | unsigned pg_count = 0; |
855 | unsigned next_pg_num; | 796 | unsigned next_pg_num; |
856 | 797 | ||
@@ -871,11 +812,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, | |||
871 | if (r) | 812 | if (r) |
872 | goto bad; | 813 | goto bad; |
873 | 814 | ||
874 | r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); | 815 | r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); |
875 | if (r) | 816 | if (r) |
876 | goto bad; | 817 | goto bad; |
877 | 818 | ||
878 | r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); | 819 | r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); |
879 | if (r) | 820 | if (r) |
880 | goto bad; | 821 | goto bad; |
881 | 822 | ||
@@ -1505,10 +1446,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) | |||
1505 | } | 1446 | } |
1506 | 1447 | ||
1507 | if (argc == 1) { | 1448 | if (argc == 1) { |
1508 | if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) { | 1449 | if (!strcasecmp(argv[0], "queue_if_no_path")) { |
1509 | r = queue_if_no_path(m, 1, 0); | 1450 | r = queue_if_no_path(m, 1, 0); |
1510 | goto out; | 1451 | goto out; |
1511 | } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) { | 1452 | } else if (!strcasecmp(argv[0], "fail_if_no_path")) { |
1512 | r = queue_if_no_path(m, 0, 0); | 1453 | r = queue_if_no_path(m, 0, 0); |
1513 | goto out; | 1454 | goto out; |
1514 | } | 1455 | } |
@@ -1519,18 +1460,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) | |||
1519 | goto out; | 1460 | goto out; |
1520 | } | 1461 | } |
1521 | 1462 | ||
1522 | if (!strnicmp(argv[0], MESG_STR("disable_group"))) { | 1463 | if (!strcasecmp(argv[0], "disable_group")) { |
1523 | r = bypass_pg_num(m, argv[1], 1); | 1464 | r = bypass_pg_num(m, argv[1], 1); |
1524 | goto out; | 1465 | goto out; |
1525 | } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) { | 1466 | } else if (!strcasecmp(argv[0], "enable_group")) { |
1526 | r = bypass_pg_num(m, argv[1], 0); | 1467 | r = bypass_pg_num(m, argv[1], 0); |
1527 | goto out; | 1468 | goto out; |
1528 | } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) { | 1469 | } else if (!strcasecmp(argv[0], "switch_group")) { |
1529 | r = switch_pg_num(m, argv[1]); | 1470 | r = switch_pg_num(m, argv[1]); |
1530 | goto out; | 1471 | goto out; |
1531 | } else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) | 1472 | } else if (!strcasecmp(argv[0], "reinstate_path")) |
1532 | action = reinstate_path; | 1473 | action = reinstate_path; |
1533 | else if (!strnicmp(argv[0], MESG_STR("fail_path"))) | 1474 | else if (!strcasecmp(argv[0], "fail_path")) |
1534 | action = fail_path; | 1475 | action = fail_path; |
1535 | else { | 1476 | else { |
1536 | DMWARN("Unrecognised multipath message received."); | 1477 | DMWARN("Unrecognised multipath message received."); |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index e5d8904fc8f..a002dd85db1 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -8,19 +8,19 @@ | |||
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | 9 | ||
10 | #include "md.h" | 10 | #include "md.h" |
11 | #include "raid1.h" | ||
11 | #include "raid5.h" | 12 | #include "raid5.h" |
12 | #include "dm.h" | ||
13 | #include "bitmap.h" | 13 | #include "bitmap.h" |
14 | 14 | ||
15 | #include <linux/device-mapper.h> | ||
16 | |||
15 | #define DM_MSG_PREFIX "raid" | 17 | #define DM_MSG_PREFIX "raid" |
16 | 18 | ||
17 | /* | 19 | /* |
18 | * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then | 20 | * The following flags are used by dm-raid.c to set up the array state. |
19 | * make it so the flag doesn't set anything. | 21 | * They must be cleared before md_run is called. |
20 | */ | 22 | */ |
21 | #ifndef MD_SYNC_STATE_FORCED | 23 | #define FirstUse 10 /* rdev flag */ |
22 | #define MD_SYNC_STATE_FORCED 0 | ||
23 | #endif | ||
24 | 24 | ||
25 | struct raid_dev { | 25 | struct raid_dev { |
26 | /* | 26 | /* |
@@ -43,14 +43,15 @@ struct raid_dev { | |||
43 | /* | 43 | /* |
44 | * Flags for rs->print_flags field. | 44 | * Flags for rs->print_flags field. |
45 | */ | 45 | */ |
46 | #define DMPF_DAEMON_SLEEP 0x1 | 46 | #define DMPF_SYNC 0x1 |
47 | #define DMPF_MAX_WRITE_BEHIND 0x2 | 47 | #define DMPF_NOSYNC 0x2 |
48 | #define DMPF_SYNC 0x4 | 48 | #define DMPF_REBUILD 0x4 |
49 | #define DMPF_NOSYNC 0x8 | 49 | #define DMPF_DAEMON_SLEEP 0x8 |
50 | #define DMPF_STRIPE_CACHE 0x10 | 50 | #define DMPF_MIN_RECOVERY_RATE 0x10 |
51 | #define DMPF_MIN_RECOVERY_RATE 0x20 | 51 | #define DMPF_MAX_RECOVERY_RATE 0x20 |
52 | #define DMPF_MAX_RECOVERY_RATE 0x40 | 52 | #define DMPF_MAX_WRITE_BEHIND 0x40 |
53 | 53 | #define DMPF_STRIPE_CACHE 0x80 | |
54 | #define DMPF_REGION_SIZE 0X100 | ||
54 | struct raid_set { | 55 | struct raid_set { |
55 | struct dm_target *ti; | 56 | struct dm_target *ti; |
56 | 57 | ||
@@ -72,6 +73,7 @@ static struct raid_type { | |||
72 | const unsigned level; /* RAID level. */ | 73 | const unsigned level; /* RAID level. */ |
73 | const unsigned algorithm; /* RAID algorithm. */ | 74 | const unsigned algorithm; /* RAID algorithm. */ |
74 | } raid_types[] = { | 75 | } raid_types[] = { |
76 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, | ||
75 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, | 77 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, |
76 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, | 78 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, |
77 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, | 79 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, |
@@ -105,7 +107,8 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra | |||
105 | } | 107 | } |
106 | 108 | ||
107 | sectors_per_dev = ti->len; | 109 | sectors_per_dev = ti->len; |
108 | if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { | 110 | if ((raid_type->level > 1) && |
111 | sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { | ||
109 | ti->error = "Target length not divisible by number of data devices"; | 112 | ti->error = "Target length not divisible by number of data devices"; |
110 | return ERR_PTR(-EINVAL); | 113 | return ERR_PTR(-EINVAL); |
111 | } | 114 | } |
@@ -147,9 +150,16 @@ static void context_free(struct raid_set *rs) | |||
147 | { | 150 | { |
148 | int i; | 151 | int i; |
149 | 152 | ||
150 | for (i = 0; i < rs->md.raid_disks; i++) | 153 | for (i = 0; i < rs->md.raid_disks; i++) { |
154 | if (rs->dev[i].meta_dev) | ||
155 | dm_put_device(rs->ti, rs->dev[i].meta_dev); | ||
156 | if (rs->dev[i].rdev.sb_page) | ||
157 | put_page(rs->dev[i].rdev.sb_page); | ||
158 | rs->dev[i].rdev.sb_page = NULL; | ||
159 | rs->dev[i].rdev.sb_loaded = 0; | ||
151 | if (rs->dev[i].data_dev) | 160 | if (rs->dev[i].data_dev) |
152 | dm_put_device(rs->ti, rs->dev[i].data_dev); | 161 | dm_put_device(rs->ti, rs->dev[i].data_dev); |
162 | } | ||
153 | 163 | ||
154 | kfree(rs); | 164 | kfree(rs); |
155 | } | 165 | } |
@@ -159,7 +169,16 @@ static void context_free(struct raid_set *rs) | |||
159 | * <meta_dev>: meta device name or '-' if missing | 169 | * <meta_dev>: meta device name or '-' if missing |
160 | * <data_dev>: data device name or '-' if missing | 170 | * <data_dev>: data device name or '-' if missing |
161 | * | 171 | * |
162 | * This code parses those words. | 172 | * The following are permitted: |
173 | * - - | ||
174 | * - <data_dev> | ||
175 | * <meta_dev> <data_dev> | ||
176 | * | ||
177 | * The following is not allowed: | ||
178 | * <meta_dev> - | ||
179 | * | ||
180 | * This code parses those words. If there is a failure, | ||
181 | * the caller must use context_free to unwind the operations. | ||
163 | */ | 182 | */ |
164 | static int dev_parms(struct raid_set *rs, char **argv) | 183 | static int dev_parms(struct raid_set *rs, char **argv) |
165 | { | 184 | { |
@@ -182,8 +201,16 @@ static int dev_parms(struct raid_set *rs, char **argv) | |||
182 | rs->dev[i].rdev.mddev = &rs->md; | 201 | rs->dev[i].rdev.mddev = &rs->md; |
183 | 202 | ||
184 | if (strcmp(argv[0], "-")) { | 203 | if (strcmp(argv[0], "-")) { |
185 | rs->ti->error = "Metadata devices not supported"; | 204 | ret = dm_get_device(rs->ti, argv[0], |
186 | return -EINVAL; | 205 | dm_table_get_mode(rs->ti->table), |
206 | &rs->dev[i].meta_dev); | ||
207 | rs->ti->error = "RAID metadata device lookup failure"; | ||
208 | if (ret) | ||
209 | return ret; | ||
210 | |||
211 | rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL); | ||
212 | if (!rs->dev[i].rdev.sb_page) | ||
213 | return -ENOMEM; | ||
187 | } | 214 | } |
188 | 215 | ||
189 | if (!strcmp(argv[1], "-")) { | 216 | if (!strcmp(argv[1], "-")) { |
@@ -193,6 +220,10 @@ static int dev_parms(struct raid_set *rs, char **argv) | |||
193 | return -EINVAL; | 220 | return -EINVAL; |
194 | } | 221 | } |
195 | 222 | ||
223 | rs->ti->error = "No data device supplied with metadata device"; | ||
224 | if (rs->dev[i].meta_dev) | ||
225 | return -EINVAL; | ||
226 | |||
196 | continue; | 227 | continue; |
197 | } | 228 | } |
198 | 229 | ||
@@ -204,6 +235,10 @@ static int dev_parms(struct raid_set *rs, char **argv) | |||
204 | return ret; | 235 | return ret; |
205 | } | 236 | } |
206 | 237 | ||
238 | if (rs->dev[i].meta_dev) { | ||
239 | metadata_available = 1; | ||
240 | rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev; | ||
241 | } | ||
207 | rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; | 242 | rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; |
208 | list_add(&rs->dev[i].rdev.same_set, &rs->md.disks); | 243 | list_add(&rs->dev[i].rdev.same_set, &rs->md.disks); |
209 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) | 244 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) |
@@ -235,33 +270,109 @@ static int dev_parms(struct raid_set *rs, char **argv) | |||
235 | } | 270 | } |
236 | 271 | ||
237 | /* | 272 | /* |
273 | * validate_region_size | ||
274 | * @rs | ||
275 | * @region_size: region size in sectors. If 0, pick a size (4MiB default). | ||
276 | * | ||
277 | * Set rs->md.bitmap_info.chunksize (which really refers to 'region size'). | ||
278 | * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap. | ||
279 | * | ||
280 | * Returns: 0 on success, -EINVAL on failure. | ||
281 | */ | ||
282 | static int validate_region_size(struct raid_set *rs, unsigned long region_size) | ||
283 | { | ||
284 | unsigned long min_region_size = rs->ti->len / (1 << 21); | ||
285 | |||
286 | if (!region_size) { | ||
287 | /* | ||
288 | * Choose a reasonable default. All figures in sectors. | ||
289 | */ | ||
290 | if (min_region_size > (1 << 13)) { | ||
291 | DMINFO("Choosing default region size of %lu sectors", | ||
292 | region_size); | ||
293 | region_size = min_region_size; | ||
294 | } else { | ||
295 | DMINFO("Choosing default region size of 4MiB"); | ||
296 | region_size = 1 << 13; /* sectors */ | ||
297 | } | ||
298 | } else { | ||
299 | /* | ||
300 | * Validate user-supplied value. | ||
301 | */ | ||
302 | if (region_size > rs->ti->len) { | ||
303 | rs->ti->error = "Supplied region size is too large"; | ||
304 | return -EINVAL; | ||
305 | } | ||
306 | |||
307 | if (region_size < min_region_size) { | ||
308 | DMERR("Supplied region_size (%lu sectors) below minimum (%lu)", | ||
309 | region_size, min_region_size); | ||
310 | rs->ti->error = "Supplied region size is too small"; | ||
311 | return -EINVAL; | ||
312 | } | ||
313 | |||
314 | if (!is_power_of_2(region_size)) { | ||
315 | rs->ti->error = "Region size is not a power of 2"; | ||
316 | return -EINVAL; | ||
317 | } | ||
318 | |||
319 | if (region_size < rs->md.chunk_sectors) { | ||
320 | rs->ti->error = "Region size is smaller than the chunk size"; | ||
321 | return -EINVAL; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Convert sectors to bytes. | ||
327 | */ | ||
328 | rs->md.bitmap_info.chunksize = (region_size << 9); | ||
329 | |||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | /* | ||
238 | * Possible arguments are... | 334 | * Possible arguments are... |
239 | * RAID456: | ||
240 | * <chunk_size> [optional_args] | 335 | * <chunk_size> [optional_args] |
241 | * | 336 | * |
242 | * Optional args: | 337 | * Argument definitions |
243 | * [[no]sync] Force or prevent recovery of the entire array | 338 | * <chunk_size> The number of sectors per disk that |
339 | * will form the "stripe" | ||
340 | * [[no]sync] Force or prevent recovery of the | ||
341 | * entire array | ||
244 | * [rebuild <idx>] Rebuild the drive indicated by the index | 342 | * [rebuild <idx>] Rebuild the drive indicated by the index |
245 | * [daemon_sleep <ms>] Time between bitmap daemon work to clear bits | 343 | * [daemon_sleep <ms>] Time between bitmap daemon work to |
344 | * clear bits | ||
246 | * [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization | 345 | * [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization |
247 | * [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization | 346 | * [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization |
347 | * [write_mostly <idx>] Indicate a write mostly drive via index | ||
248 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 348 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) |
249 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs | 349 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs |
350 | * [region_size <sectors>] Defines granularity of bitmap | ||
250 | */ | 351 | */ |
251 | static int parse_raid_params(struct raid_set *rs, char **argv, | 352 | static int parse_raid_params(struct raid_set *rs, char **argv, |
252 | unsigned num_raid_params) | 353 | unsigned num_raid_params) |
253 | { | 354 | { |
254 | unsigned i, rebuild_cnt = 0; | 355 | unsigned i, rebuild_cnt = 0; |
255 | unsigned long value; | 356 | unsigned long value, region_size = 0; |
256 | char *key; | 357 | char *key; |
257 | 358 | ||
258 | /* | 359 | /* |
259 | * First, parse the in-order required arguments | 360 | * First, parse the in-order required arguments |
361 | * "chunk_size" is the only argument of this type. | ||
260 | */ | 362 | */ |
261 | if ((strict_strtoul(argv[0], 10, &value) < 0) || | 363 | if ((strict_strtoul(argv[0], 10, &value) < 0)) { |
262 | !is_power_of_2(value) || (value < 8)) { | ||
263 | rs->ti->error = "Bad chunk size"; | 364 | rs->ti->error = "Bad chunk size"; |
264 | return -EINVAL; | 365 | return -EINVAL; |
366 | } else if (rs->raid_type->level == 1) { | ||
367 | if (value) | ||
368 | DMERR("Ignoring chunk size parameter for RAID 1"); | ||
369 | value = 0; | ||
370 | } else if (!is_power_of_2(value)) { | ||
371 | rs->ti->error = "Chunk size must be a power of 2"; | ||
372 | return -EINVAL; | ||
373 | } else if (value < 8) { | ||
374 | rs->ti->error = "Chunk size value is too small"; | ||
375 | return -EINVAL; | ||
265 | } | 376 | } |
266 | 377 | ||
267 | rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; | 378 | rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; |
@@ -269,22 +380,39 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
269 | num_raid_params--; | 380 | num_raid_params--; |
270 | 381 | ||
271 | /* | 382 | /* |
272 | * Second, parse the unordered optional arguments | 383 | * We set each individual device as In_sync with a completed |
384 | * 'recovery_offset'. If there has been a device failure or | ||
385 | * replacement then one of the following cases applies: | ||
386 | * | ||
387 | * 1) User specifies 'rebuild'. | ||
388 | * - Device is reset when param is read. | ||
389 | * 2) A new device is supplied. | ||
390 | * - No matching superblock found, resets device. | ||
391 | * 3) Device failure was transient and returns on reload. | ||
392 | * - Failure noticed, resets device for bitmap replay. | ||
393 | * 4) Device hadn't completed recovery after previous failure. | ||
394 | * - Superblock is read and overrides recovery_offset. | ||
395 | * | ||
396 | * What is found in the superblocks of the devices is always | ||
397 | * authoritative, unless 'rebuild' or '[no]sync' was specified. | ||
273 | */ | 398 | */ |
274 | for (i = 0; i < rs->md.raid_disks; i++) | 399 | for (i = 0; i < rs->md.raid_disks; i++) { |
275 | set_bit(In_sync, &rs->dev[i].rdev.flags); | 400 | set_bit(In_sync, &rs->dev[i].rdev.flags); |
401 | rs->dev[i].rdev.recovery_offset = MaxSector; | ||
402 | } | ||
276 | 403 | ||
404 | /* | ||
405 | * Second, parse the unordered optional arguments | ||
406 | */ | ||
277 | for (i = 0; i < num_raid_params; i++) { | 407 | for (i = 0; i < num_raid_params; i++) { |
278 | if (!strcmp(argv[i], "nosync")) { | 408 | if (!strcasecmp(argv[i], "nosync")) { |
279 | rs->md.recovery_cp = MaxSector; | 409 | rs->md.recovery_cp = MaxSector; |
280 | rs->print_flags |= DMPF_NOSYNC; | 410 | rs->print_flags |= DMPF_NOSYNC; |
281 | rs->md.flags |= MD_SYNC_STATE_FORCED; | ||
282 | continue; | 411 | continue; |
283 | } | 412 | } |
284 | if (!strcmp(argv[i], "sync")) { | 413 | if (!strcasecmp(argv[i], "sync")) { |
285 | rs->md.recovery_cp = 0; | 414 | rs->md.recovery_cp = 0; |
286 | rs->print_flags |= DMPF_SYNC; | 415 | rs->print_flags |= DMPF_SYNC; |
287 | rs->md.flags |= MD_SYNC_STATE_FORCED; | ||
288 | continue; | 416 | continue; |
289 | } | 417 | } |
290 | 418 | ||
@@ -300,9 +428,13 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
300 | return -EINVAL; | 428 | return -EINVAL; |
301 | } | 429 | } |
302 | 430 | ||
303 | if (!strcmp(key, "rebuild")) { | 431 | if (!strcasecmp(key, "rebuild")) { |
304 | if (++rebuild_cnt > rs->raid_type->parity_devs) { | 432 | rebuild_cnt++; |
305 | rs->ti->error = "Too many rebuild drives given"; | 433 | if (((rs->raid_type->level != 1) && |
434 | (rebuild_cnt > rs->raid_type->parity_devs)) || | ||
435 | ((rs->raid_type->level == 1) && | ||
436 | (rebuild_cnt > (rs->md.raid_disks - 1)))) { | ||
437 | rs->ti->error = "Too many rebuild devices specified for given RAID type"; | ||
306 | return -EINVAL; | 438 | return -EINVAL; |
307 | } | 439 | } |
308 | if (value > rs->md.raid_disks) { | 440 | if (value > rs->md.raid_disks) { |
@@ -311,7 +443,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
311 | } | 443 | } |
312 | clear_bit(In_sync, &rs->dev[value].rdev.flags); | 444 | clear_bit(In_sync, &rs->dev[value].rdev.flags); |
313 | rs->dev[value].rdev.recovery_offset = 0; | 445 | rs->dev[value].rdev.recovery_offset = 0; |
314 | } else if (!strcmp(key, "max_write_behind")) { | 446 | rs->print_flags |= DMPF_REBUILD; |
447 | } else if (!strcasecmp(key, "write_mostly")) { | ||
448 | if (rs->raid_type->level != 1) { | ||
449 | rs->ti->error = "write_mostly option is only valid for RAID1"; | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | if (value > rs->md.raid_disks) { | ||
453 | rs->ti->error = "Invalid write_mostly drive index given"; | ||
454 | return -EINVAL; | ||
455 | } | ||
456 | set_bit(WriteMostly, &rs->dev[value].rdev.flags); | ||
457 | } else if (!strcasecmp(key, "max_write_behind")) { | ||
458 | if (rs->raid_type->level != 1) { | ||
459 | rs->ti->error = "max_write_behind option is only valid for RAID1"; | ||
460 | return -EINVAL; | ||
461 | } | ||
315 | rs->print_flags |= DMPF_MAX_WRITE_BEHIND; | 462 | rs->print_flags |= DMPF_MAX_WRITE_BEHIND; |
316 | 463 | ||
317 | /* | 464 | /* |
@@ -324,14 +471,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
324 | return -EINVAL; | 471 | return -EINVAL; |
325 | } | 472 | } |
326 | rs->md.bitmap_info.max_write_behind = value; | 473 | rs->md.bitmap_info.max_write_behind = value; |
327 | } else if (!strcmp(key, "daemon_sleep")) { | 474 | } else if (!strcasecmp(key, "daemon_sleep")) { |
328 | rs->print_flags |= DMPF_DAEMON_SLEEP; | 475 | rs->print_flags |= DMPF_DAEMON_SLEEP; |
329 | if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { | 476 | if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { |
330 | rs->ti->error = "daemon sleep period out of range"; | 477 | rs->ti->error = "daemon sleep period out of range"; |
331 | return -EINVAL; | 478 | return -EINVAL; |
332 | } | 479 | } |
333 | rs->md.bitmap_info.daemon_sleep = value; | 480 | rs->md.bitmap_info.daemon_sleep = value; |
334 | } else if (!strcmp(key, "stripe_cache")) { | 481 | } else if (!strcasecmp(key, "stripe_cache")) { |
335 | rs->print_flags |= DMPF_STRIPE_CACHE; | 482 | rs->print_flags |= DMPF_STRIPE_CACHE; |
336 | 483 | ||
337 | /* | 484 | /* |
@@ -348,20 +495,23 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
348 | rs->ti->error = "Bad stripe_cache size"; | 495 | rs->ti->error = "Bad stripe_cache size"; |
349 | return -EINVAL; | 496 | return -EINVAL; |
350 | } | 497 | } |
351 | } else if (!strcmp(key, "min_recovery_rate")) { | 498 | } else if (!strcasecmp(key, "min_recovery_rate")) { |
352 | rs->print_flags |= DMPF_MIN_RECOVERY_RATE; | 499 | rs->print_flags |= DMPF_MIN_RECOVERY_RATE; |
353 | if (value > INT_MAX) { | 500 | if (value > INT_MAX) { |
354 | rs->ti->error = "min_recovery_rate out of range"; | 501 | rs->ti->error = "min_recovery_rate out of range"; |
355 | return -EINVAL; | 502 | return -EINVAL; |
356 | } | 503 | } |
357 | rs->md.sync_speed_min = (int)value; | 504 | rs->md.sync_speed_min = (int)value; |
358 | } else if (!strcmp(key, "max_recovery_rate")) { | 505 | } else if (!strcasecmp(key, "max_recovery_rate")) { |
359 | rs->print_flags |= DMPF_MAX_RECOVERY_RATE; | 506 | rs->print_flags |= DMPF_MAX_RECOVERY_RATE; |
360 | if (value > INT_MAX) { | 507 | if (value > INT_MAX) { |
361 | rs->ti->error = "max_recovery_rate out of range"; | 508 | rs->ti->error = "max_recovery_rate out of range"; |
362 | return -EINVAL; | 509 | return -EINVAL; |
363 | } | 510 | } |
364 | rs->md.sync_speed_max = (int)value; | 511 | rs->md.sync_speed_max = (int)value; |
512 | } else if (!strcasecmp(key, "region_size")) { | ||
513 | rs->print_flags |= DMPF_REGION_SIZE; | ||
514 | region_size = value; | ||
365 | } else { | 515 | } else { |
366 | DMERR("Unable to parse RAID parameter: %s", key); | 516 | DMERR("Unable to parse RAID parameter: %s", key); |
367 | rs->ti->error = "Unable to parse RAID parameters"; | 517 | rs->ti->error = "Unable to parse RAID parameters"; |
@@ -369,6 +519,19 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
369 | } | 519 | } |
370 | } | 520 | } |
371 | 521 | ||
522 | if (validate_region_size(rs, region_size)) | ||
523 | return -EINVAL; | ||
524 | |||
525 | if (rs->md.chunk_sectors) | ||
526 | rs->ti->split_io = rs->md.chunk_sectors; | ||
527 | else | ||
528 | rs->ti->split_io = region_size; | ||
529 | |||
530 | if (rs->md.chunk_sectors) | ||
531 | rs->ti->split_io = rs->md.chunk_sectors; | ||
532 | else | ||
533 | rs->ti->split_io = region_size; | ||
534 | |||
372 | /* Assume there are no metadata devices until the drives are parsed */ | 535 | /* Assume there are no metadata devices until the drives are parsed */ |
373 | rs->md.persistent = 0; | 536 | rs->md.persistent = 0; |
374 | rs->md.external = 1; | 537 | rs->md.external = 1; |
@@ -387,17 +550,351 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
387 | { | 550 | { |
388 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); | 551 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); |
389 | 552 | ||
553 | if (rs->raid_type->level == 1) | ||
554 | return md_raid1_congested(&rs->md, bits); | ||
555 | |||
390 | return md_raid5_congested(&rs->md, bits); | 556 | return md_raid5_congested(&rs->md, bits); |
391 | } | 557 | } |
392 | 558 | ||
393 | /* | 559 | /* |
560 | * This structure is never routinely used by userspace, unlike md superblocks. | ||
561 | * Devices with this superblock should only ever be accessed via device-mapper. | ||
562 | */ | ||
563 | #define DM_RAID_MAGIC 0x64526D44 | ||
564 | struct dm_raid_superblock { | ||
565 | __le32 magic; /* "DmRd" */ | ||
566 | __le32 features; /* Used to indicate possible future changes */ | ||
567 | |||
568 | __le32 num_devices; /* Number of devices in this array. (Max 64) */ | ||
569 | __le32 array_position; /* The position of this drive in the array */ | ||
570 | |||
571 | __le64 events; /* Incremented by md when superblock updated */ | ||
572 | __le64 failed_devices; /* Bit field of devices to indicate failures */ | ||
573 | |||
574 | /* | ||
575 | * This offset tracks the progress of the repair or replacement of | ||
576 | * an individual drive. | ||
577 | */ | ||
578 | __le64 disk_recovery_offset; | ||
579 | |||
580 | /* | ||
581 | * This offset tracks the progress of the initial array | ||
582 | * synchronisation/parity calculation. | ||
583 | */ | ||
584 | __le64 array_resync_offset; | ||
585 | |||
586 | /* | ||
587 | * RAID characteristics | ||
588 | */ | ||
589 | __le32 level; | ||
590 | __le32 layout; | ||
591 | __le32 stripe_sectors; | ||
592 | |||
593 | __u8 pad[452]; /* Round struct to 512 bytes. */ | ||
594 | /* Always set to 0 when writing. */ | ||
595 | } __packed; | ||
596 | |||
597 | static int read_disk_sb(mdk_rdev_t *rdev, int size) | ||
598 | { | ||
599 | BUG_ON(!rdev->sb_page); | ||
600 | |||
601 | if (rdev->sb_loaded) | ||
602 | return 0; | ||
603 | |||
604 | if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) { | ||
605 | DMERR("Failed to read device superblock"); | ||
606 | return -EINVAL; | ||
607 | } | ||
608 | |||
609 | rdev->sb_loaded = 1; | ||
610 | |||
611 | return 0; | ||
612 | } | ||
613 | |||
614 | static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev) | ||
615 | { | ||
616 | mdk_rdev_t *r, *t; | ||
617 | uint64_t failed_devices; | ||
618 | struct dm_raid_superblock *sb; | ||
619 | |||
620 | sb = page_address(rdev->sb_page); | ||
621 | failed_devices = le64_to_cpu(sb->failed_devices); | ||
622 | |||
623 | rdev_for_each(r, t, mddev) | ||
624 | if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) | ||
625 | failed_devices |= (1ULL << r->raid_disk); | ||
626 | |||
627 | memset(sb, 0, sizeof(*sb)); | ||
628 | |||
629 | sb->magic = cpu_to_le32(DM_RAID_MAGIC); | ||
630 | sb->features = cpu_to_le32(0); /* No features yet */ | ||
631 | |||
632 | sb->num_devices = cpu_to_le32(mddev->raid_disks); | ||
633 | sb->array_position = cpu_to_le32(rdev->raid_disk); | ||
634 | |||
635 | sb->events = cpu_to_le64(mddev->events); | ||
636 | sb->failed_devices = cpu_to_le64(failed_devices); | ||
637 | |||
638 | sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); | ||
639 | sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); | ||
640 | |||
641 | sb->level = cpu_to_le32(mddev->level); | ||
642 | sb->layout = cpu_to_le32(mddev->layout); | ||
643 | sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors); | ||
644 | } | ||
645 | |||
646 | /* | ||
647 | * super_load | ||
648 | * | ||
649 | * This function creates a superblock if one is not found on the device | ||
650 | * and will decide which superblock to use if there's a choice. | ||
651 | * | ||
652 | * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise | ||
653 | */ | ||
654 | static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev) | ||
655 | { | ||
656 | int ret; | ||
657 | struct dm_raid_superblock *sb; | ||
658 | struct dm_raid_superblock *refsb; | ||
659 | uint64_t events_sb, events_refsb; | ||
660 | |||
661 | rdev->sb_start = 0; | ||
662 | rdev->sb_size = sizeof(*sb); | ||
663 | |||
664 | ret = read_disk_sb(rdev, rdev->sb_size); | ||
665 | if (ret) | ||
666 | return ret; | ||
667 | |||
668 | sb = page_address(rdev->sb_page); | ||
669 | if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) { | ||
670 | super_sync(rdev->mddev, rdev); | ||
671 | |||
672 | set_bit(FirstUse, &rdev->flags); | ||
673 | |||
674 | /* Force writing of superblocks to disk */ | ||
675 | set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags); | ||
676 | |||
677 | /* Any superblock is better than none, choose that if given */ | ||
678 | return refdev ? 0 : 1; | ||
679 | } | ||
680 | |||
681 | if (!refdev) | ||
682 | return 1; | ||
683 | |||
684 | events_sb = le64_to_cpu(sb->events); | ||
685 | |||
686 | refsb = page_address(refdev->sb_page); | ||
687 | events_refsb = le64_to_cpu(refsb->events); | ||
688 | |||
689 | return (events_sb > events_refsb) ? 1 : 0; | ||
690 | } | ||
691 | |||
692 | static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev) | ||
693 | { | ||
694 | int role; | ||
695 | struct raid_set *rs = container_of(mddev, struct raid_set, md); | ||
696 | uint64_t events_sb; | ||
697 | uint64_t failed_devices; | ||
698 | struct dm_raid_superblock *sb; | ||
699 | uint32_t new_devs = 0; | ||
700 | uint32_t rebuilds = 0; | ||
701 | mdk_rdev_t *r, *t; | ||
702 | struct dm_raid_superblock *sb2; | ||
703 | |||
704 | sb = page_address(rdev->sb_page); | ||
705 | events_sb = le64_to_cpu(sb->events); | ||
706 | failed_devices = le64_to_cpu(sb->failed_devices); | ||
707 | |||
708 | /* | ||
709 | * Initialise to 1 if this is a new superblock. | ||
710 | */ | ||
711 | mddev->events = events_sb ? : 1; | ||
712 | |||
713 | /* | ||
714 | * Reshaping is not currently allowed | ||
715 | */ | ||
716 | if ((le32_to_cpu(sb->level) != mddev->level) || | ||
717 | (le32_to_cpu(sb->layout) != mddev->layout) || | ||
718 | (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) { | ||
719 | DMERR("Reshaping arrays not yet supported."); | ||
720 | return -EINVAL; | ||
721 | } | ||
722 | |||
723 | /* We can only change the number of devices in RAID1 right now */ | ||
724 | if ((rs->raid_type->level != 1) && | ||
725 | (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { | ||
726 | DMERR("Reshaping arrays not yet supported."); | ||
727 | return -EINVAL; | ||
728 | } | ||
729 | |||
730 | if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))) | ||
731 | mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); | ||
732 | |||
733 | /* | ||
734 | * During load, we set FirstUse if a new superblock was written. | ||
735 | * There are two reasons we might not have a superblock: | ||
736 | * 1) The array is brand new - in which case, all of the | ||
737 | * devices must have their In_sync bit set. Also, | ||
738 | * recovery_cp must be 0, unless forced. | ||
739 | * 2) This is a new device being added to an old array | ||
740 | * and the new device needs to be rebuilt - in which | ||
741 | * case the In_sync bit will /not/ be set and | ||
742 | * recovery_cp must be MaxSector. | ||
743 | */ | ||
744 | rdev_for_each(r, t, mddev) { | ||
745 | if (!test_bit(In_sync, &r->flags)) { | ||
746 | if (!test_bit(FirstUse, &r->flags)) | ||
747 | DMERR("Superblock area of " | ||
748 | "rebuild device %d should have been " | ||
749 | "cleared.", r->raid_disk); | ||
750 | set_bit(FirstUse, &r->flags); | ||
751 | rebuilds++; | ||
752 | } else if (test_bit(FirstUse, &r->flags)) | ||
753 | new_devs++; | ||
754 | } | ||
755 | |||
756 | if (!rebuilds) { | ||
757 | if (new_devs == mddev->raid_disks) { | ||
758 | DMINFO("Superblocks created for new array"); | ||
759 | set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); | ||
760 | } else if (new_devs) { | ||
761 | DMERR("New device injected " | ||
762 | "into existing array without 'rebuild' " | ||
763 | "parameter specified"); | ||
764 | return -EINVAL; | ||
765 | } | ||
766 | } else if (new_devs) { | ||
767 | DMERR("'rebuild' devices cannot be " | ||
768 | "injected into an array with other first-time devices"); | ||
769 | return -EINVAL; | ||
770 | } else if (mddev->recovery_cp != MaxSector) { | ||
771 | DMERR("'rebuild' specified while array is not in-sync"); | ||
772 | return -EINVAL; | ||
773 | } | ||
774 | |||
775 | /* | ||
776 | * Now we set the Faulty bit for those devices that are | ||
777 | * recorded in the superblock as failed. | ||
778 | */ | ||
779 | rdev_for_each(r, t, mddev) { | ||
780 | if (!r->sb_page) | ||
781 | continue; | ||
782 | sb2 = page_address(r->sb_page); | ||
783 | sb2->failed_devices = 0; | ||
784 | |||
785 | /* | ||
786 | * Check for any device re-ordering. | ||
787 | */ | ||
788 | if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) { | ||
789 | role = le32_to_cpu(sb2->array_position); | ||
790 | if (role != r->raid_disk) { | ||
791 | if (rs->raid_type->level != 1) { | ||
792 | rs->ti->error = "Cannot change device " | ||
793 | "positions in RAID array"; | ||
794 | return -EINVAL; | ||
795 | } | ||
796 | DMINFO("RAID1 device #%d now at position #%d", | ||
797 | role, r->raid_disk); | ||
798 | } | ||
799 | |||
800 | /* | ||
801 | * Partial recovery is performed on | ||
802 | * returning failed devices. | ||
803 | */ | ||
804 | if (failed_devices & (1 << role)) | ||
805 | set_bit(Faulty, &r->flags); | ||
806 | } | ||
807 | } | ||
808 | |||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev) | ||
813 | { | ||
814 | struct dm_raid_superblock *sb = page_address(rdev->sb_page); | ||
815 | |||
816 | /* | ||
817 | * If mddev->events is not set, we know we have not yet initialized | ||
818 | * the array. | ||
819 | */ | ||
820 | if (!mddev->events && super_init_validation(mddev, rdev)) | ||
821 | return -EINVAL; | ||
822 | |||
823 | mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */ | ||
824 | rdev->mddev->bitmap_info.default_offset = 4096 >> 9; | ||
825 | if (!test_bit(FirstUse, &rdev->flags)) { | ||
826 | rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset); | ||
827 | if (rdev->recovery_offset != MaxSector) | ||
828 | clear_bit(In_sync, &rdev->flags); | ||
829 | } | ||
830 | |||
831 | /* | ||
832 | * If a device comes back, set it as not In_sync and no longer faulty. | ||
833 | */ | ||
834 | if (test_bit(Faulty, &rdev->flags)) { | ||
835 | clear_bit(Faulty, &rdev->flags); | ||
836 | clear_bit(In_sync, &rdev->flags); | ||
837 | rdev->saved_raid_disk = rdev->raid_disk; | ||
838 | rdev->recovery_offset = 0; | ||
839 | } | ||
840 | |||
841 | clear_bit(FirstUse, &rdev->flags); | ||
842 | |||
843 | return 0; | ||
844 | } | ||
845 | |||
846 | /* | ||
847 | * Analyse superblocks and select the freshest. | ||
848 | */ | ||
849 | static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | ||
850 | { | ||
851 | int ret; | ||
852 | mdk_rdev_t *rdev, *freshest, *tmp; | ||
853 | mddev_t *mddev = &rs->md; | ||
854 | |||
855 | freshest = NULL; | ||
856 | rdev_for_each(rdev, tmp, mddev) { | ||
857 | if (!rdev->meta_bdev) | ||
858 | continue; | ||
859 | |||
860 | ret = super_load(rdev, freshest); | ||
861 | |||
862 | switch (ret) { | ||
863 | case 1: | ||
864 | freshest = rdev; | ||
865 | break; | ||
866 | case 0: | ||
867 | break; | ||
868 | default: | ||
869 | ti->error = "Failed to load superblock"; | ||
870 | return ret; | ||
871 | } | ||
872 | } | ||
873 | |||
874 | if (!freshest) | ||
875 | return 0; | ||
876 | |||
877 | /* | ||
878 | * Validation of the freshest device provides the source of | ||
879 | * validation for the remaining devices. | ||
880 | */ | ||
881 | ti->error = "Unable to assemble array: Invalid superblocks"; | ||
882 | if (super_validate(mddev, freshest)) | ||
883 | return -EINVAL; | ||
884 | |||
885 | rdev_for_each(rdev, tmp, mddev) | ||
886 | if ((rdev != freshest) && super_validate(mddev, rdev)) | ||
887 | return -EINVAL; | ||
888 | |||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | /* | ||
394 | * Construct a RAID4/5/6 mapping: | 893 | * Construct a RAID4/5/6 mapping: |
395 | * Args: | 894 | * Args: |
396 | * <raid_type> <#raid_params> <raid_params> \ | 895 | * <raid_type> <#raid_params> <raid_params> \ |
397 | * <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> } | 896 | * <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> } |
398 | * | 897 | * |
399 | * ** metadata devices are not supported yet, use '-' instead ** | ||
400 | * | ||
401 | * <raid_params> varies by <raid_type>. See 'parse_raid_params' for | 898 | * <raid_params> varies by <raid_type>. See 'parse_raid_params' for |
402 | * details on possible <raid_params>. | 899 | * details on possible <raid_params>. |
403 | */ | 900 | */ |
@@ -465,8 +962,12 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
465 | if (ret) | 962 | if (ret) |
466 | goto bad; | 963 | goto bad; |
467 | 964 | ||
965 | rs->md.sync_super = super_sync; | ||
966 | ret = analyse_superblocks(ti, rs); | ||
967 | if (ret) | ||
968 | goto bad; | ||
969 | |||
468 | INIT_WORK(&rs->md.event_work, do_table_event); | 970 | INIT_WORK(&rs->md.event_work, do_table_event); |
469 | ti->split_io = rs->md.chunk_sectors; | ||
470 | ti->private = rs; | 971 | ti->private = rs; |
471 | 972 | ||
472 | mutex_lock(&rs->md.reconfig_mutex); | 973 | mutex_lock(&rs->md.reconfig_mutex); |
@@ -482,6 +983,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
482 | rs->callbacks.congested_fn = raid_is_congested; | 983 | rs->callbacks.congested_fn = raid_is_congested; |
483 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 984 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
484 | 985 | ||
986 | mddev_suspend(&rs->md); | ||
485 | return 0; | 987 | return 0; |
486 | 988 | ||
487 | bad: | 989 | bad: |
@@ -546,12 +1048,17 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
546 | break; | 1048 | break; |
547 | case STATUSTYPE_TABLE: | 1049 | case STATUSTYPE_TABLE: |
548 | /* The string you would use to construct this array */ | 1050 | /* The string you would use to construct this array */ |
549 | for (i = 0; i < rs->md.raid_disks; i++) | 1051 | for (i = 0; i < rs->md.raid_disks; i++) { |
550 | if (rs->dev[i].data_dev && | 1052 | if ((rs->print_flags & DMPF_REBUILD) && |
1053 | rs->dev[i].data_dev && | ||
551 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) | 1054 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) |
552 | raid_param_cnt++; /* for rebuilds */ | 1055 | raid_param_cnt += 2; /* for rebuilds */ |
1056 | if (rs->dev[i].data_dev && | ||
1057 | test_bit(WriteMostly, &rs->dev[i].rdev.flags)) | ||
1058 | raid_param_cnt += 2; | ||
1059 | } | ||
553 | 1060 | ||
554 | raid_param_cnt += (hweight64(rs->print_flags) * 2); | 1061 | raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2); |
555 | if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) | 1062 | if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) |
556 | raid_param_cnt--; | 1063 | raid_param_cnt--; |
557 | 1064 | ||
@@ -565,7 +1072,8 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
565 | DMEMIT(" nosync"); | 1072 | DMEMIT(" nosync"); |
566 | 1073 | ||
567 | for (i = 0; i < rs->md.raid_disks; i++) | 1074 | for (i = 0; i < rs->md.raid_disks; i++) |
568 | if (rs->dev[i].data_dev && | 1075 | if ((rs->print_flags & DMPF_REBUILD) && |
1076 | rs->dev[i].data_dev && | ||
569 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) | 1077 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) |
570 | DMEMIT(" rebuild %u", i); | 1078 | DMEMIT(" rebuild %u", i); |
571 | 1079 | ||
@@ -579,6 +1087,11 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
579 | if (rs->print_flags & DMPF_MAX_RECOVERY_RATE) | 1087 | if (rs->print_flags & DMPF_MAX_RECOVERY_RATE) |
580 | DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max); | 1088 | DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max); |
581 | 1089 | ||
1090 | for (i = 0; i < rs->md.raid_disks; i++) | ||
1091 | if (rs->dev[i].data_dev && | ||
1092 | test_bit(WriteMostly, &rs->dev[i].rdev.flags)) | ||
1093 | DMEMIT(" write_mostly %u", i); | ||
1094 | |||
582 | if (rs->print_flags & DMPF_MAX_WRITE_BEHIND) | 1095 | if (rs->print_flags & DMPF_MAX_WRITE_BEHIND) |
583 | DMEMIT(" max_write_behind %lu", | 1096 | DMEMIT(" max_write_behind %lu", |
584 | rs->md.bitmap_info.max_write_behind); | 1097 | rs->md.bitmap_info.max_write_behind); |
@@ -591,9 +1104,16 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
591 | conf ? conf->max_nr_stripes * 2 : 0); | 1104 | conf ? conf->max_nr_stripes * 2 : 0); |
592 | } | 1105 | } |
593 | 1106 | ||
1107 | if (rs->print_flags & DMPF_REGION_SIZE) | ||
1108 | DMEMIT(" region_size %lu", | ||
1109 | rs->md.bitmap_info.chunksize >> 9); | ||
1110 | |||
594 | DMEMIT(" %d", rs->md.raid_disks); | 1111 | DMEMIT(" %d", rs->md.raid_disks); |
595 | for (i = 0; i < rs->md.raid_disks; i++) { | 1112 | for (i = 0; i < rs->md.raid_disks; i++) { |
596 | DMEMIT(" -"); /* metadata device */ | 1113 | if (rs->dev[i].meta_dev) |
1114 | DMEMIT(" %s", rs->dev[i].meta_dev->name); | ||
1115 | else | ||
1116 | DMEMIT(" -"); | ||
597 | 1117 | ||
598 | if (rs->dev[i].data_dev) | 1118 | if (rs->dev[i].data_dev) |
599 | DMEMIT(" %s", rs->dev[i].data_dev->name); | 1119 | DMEMIT(" %s", rs->dev[i].data_dev->name); |
@@ -650,12 +1170,13 @@ static void raid_resume(struct dm_target *ti) | |||
650 | { | 1170 | { |
651 | struct raid_set *rs = ti->private; | 1171 | struct raid_set *rs = ti->private; |
652 | 1172 | ||
1173 | bitmap_load(&rs->md); | ||
653 | mddev_resume(&rs->md); | 1174 | mddev_resume(&rs->md); |
654 | } | 1175 | } |
655 | 1176 | ||
656 | static struct target_type raid_target = { | 1177 | static struct target_type raid_target = { |
657 | .name = "raid", | 1178 | .name = "raid", |
658 | .version = {1, 0, 0}, | 1179 | .version = {1, 1, 0}, |
659 | .module = THIS_MODULE, | 1180 | .module = THIS_MODULE, |
660 | .ctr = raid_ctr, | 1181 | .ctr = raid_ctr, |
661 | .dtr = raid_dtr, | 1182 | .dtr = raid_dtr, |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 135c2f1fdbf..d1f1d701710 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -58,25 +58,30 @@ | |||
58 | #define NUM_SNAPSHOT_HDR_CHUNKS 1 | 58 | #define NUM_SNAPSHOT_HDR_CHUNKS 1 |
59 | 59 | ||
60 | struct disk_header { | 60 | struct disk_header { |
61 | uint32_t magic; | 61 | __le32 magic; |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Is this snapshot valid. There is no way of recovering | 64 | * Is this snapshot valid. There is no way of recovering |
65 | * an invalid snapshot. | 65 | * an invalid snapshot. |
66 | */ | 66 | */ |
67 | uint32_t valid; | 67 | __le32 valid; |
68 | 68 | ||
69 | /* | 69 | /* |
70 | * Simple, incrementing version. no backward | 70 | * Simple, incrementing version. no backward |
71 | * compatibility. | 71 | * compatibility. |
72 | */ | 72 | */ |
73 | uint32_t version; | 73 | __le32 version; |
74 | 74 | ||
75 | /* In sectors */ | 75 | /* In sectors */ |
76 | uint32_t chunk_size; | 76 | __le32 chunk_size; |
77 | }; | 77 | } __packed; |
78 | 78 | ||
79 | struct disk_exception { | 79 | struct disk_exception { |
80 | __le64 old_chunk; | ||
81 | __le64 new_chunk; | ||
82 | } __packed; | ||
83 | |||
84 | struct core_exception { | ||
80 | uint64_t old_chunk; | 85 | uint64_t old_chunk; |
81 | uint64_t new_chunk; | 86 | uint64_t new_chunk; |
82 | }; | 87 | }; |
@@ -169,10 +174,9 @@ static int alloc_area(struct pstore *ps) | |||
169 | if (!ps->area) | 174 | if (!ps->area) |
170 | goto err_area; | 175 | goto err_area; |
171 | 176 | ||
172 | ps->zero_area = vmalloc(len); | 177 | ps->zero_area = vzalloc(len); |
173 | if (!ps->zero_area) | 178 | if (!ps->zero_area) |
174 | goto err_zero_area; | 179 | goto err_zero_area; |
175 | memset(ps->zero_area, 0, len); | ||
176 | 180 | ||
177 | ps->header_area = vmalloc(len); | 181 | ps->header_area = vmalloc(len); |
178 | if (!ps->header_area) | 182 | if (!ps->header_area) |
@@ -396,32 +400,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | |||
396 | } | 400 | } |
397 | 401 | ||
398 | static void read_exception(struct pstore *ps, | 402 | static void read_exception(struct pstore *ps, |
399 | uint32_t index, struct disk_exception *result) | 403 | uint32_t index, struct core_exception *result) |
400 | { | 404 | { |
401 | struct disk_exception *e = get_exception(ps, index); | 405 | struct disk_exception *de = get_exception(ps, index); |
402 | 406 | ||
403 | /* copy it */ | 407 | /* copy it */ |
404 | result->old_chunk = le64_to_cpu(e->old_chunk); | 408 | result->old_chunk = le64_to_cpu(de->old_chunk); |
405 | result->new_chunk = le64_to_cpu(e->new_chunk); | 409 | result->new_chunk = le64_to_cpu(de->new_chunk); |
406 | } | 410 | } |
407 | 411 | ||
408 | static void write_exception(struct pstore *ps, | 412 | static void write_exception(struct pstore *ps, |
409 | uint32_t index, struct disk_exception *de) | 413 | uint32_t index, struct core_exception *e) |
410 | { | 414 | { |
411 | struct disk_exception *e = get_exception(ps, index); | 415 | struct disk_exception *de = get_exception(ps, index); |
412 | 416 | ||
413 | /* copy it */ | 417 | /* copy it */ |
414 | e->old_chunk = cpu_to_le64(de->old_chunk); | 418 | de->old_chunk = cpu_to_le64(e->old_chunk); |
415 | e->new_chunk = cpu_to_le64(de->new_chunk); | 419 | de->new_chunk = cpu_to_le64(e->new_chunk); |
416 | } | 420 | } |
417 | 421 | ||
418 | static void clear_exception(struct pstore *ps, uint32_t index) | 422 | static void clear_exception(struct pstore *ps, uint32_t index) |
419 | { | 423 | { |
420 | struct disk_exception *e = get_exception(ps, index); | 424 | struct disk_exception *de = get_exception(ps, index); |
421 | 425 | ||
422 | /* clear it */ | 426 | /* clear it */ |
423 | e->old_chunk = 0; | 427 | de->old_chunk = 0; |
424 | e->new_chunk = 0; | 428 | de->new_chunk = 0; |
425 | } | 429 | } |
426 | 430 | ||
427 | /* | 431 | /* |
@@ -437,13 +441,13 @@ static int insert_exceptions(struct pstore *ps, | |||
437 | { | 441 | { |
438 | int r; | 442 | int r; |
439 | unsigned int i; | 443 | unsigned int i; |
440 | struct disk_exception de; | 444 | struct core_exception e; |
441 | 445 | ||
442 | /* presume the area is full */ | 446 | /* presume the area is full */ |
443 | *full = 1; | 447 | *full = 1; |
444 | 448 | ||
445 | for (i = 0; i < ps->exceptions_per_area; i++) { | 449 | for (i = 0; i < ps->exceptions_per_area; i++) { |
446 | read_exception(ps, i, &de); | 450 | read_exception(ps, i, &e); |
447 | 451 | ||
448 | /* | 452 | /* |
449 | * If the new_chunk is pointing at the start of | 453 | * If the new_chunk is pointing at the start of |
@@ -451,7 +455,7 @@ static int insert_exceptions(struct pstore *ps, | |||
451 | * is we know that we've hit the end of the | 455 | * is we know that we've hit the end of the |
452 | * exceptions. Therefore the area is not full. | 456 | * exceptions. Therefore the area is not full. |
453 | */ | 457 | */ |
454 | if (de.new_chunk == 0LL) { | 458 | if (e.new_chunk == 0LL) { |
455 | ps->current_committed = i; | 459 | ps->current_committed = i; |
456 | *full = 0; | 460 | *full = 0; |
457 | break; | 461 | break; |
@@ -460,13 +464,13 @@ static int insert_exceptions(struct pstore *ps, | |||
460 | /* | 464 | /* |
461 | * Keep track of the start of the free chunks. | 465 | * Keep track of the start of the free chunks. |
462 | */ | 466 | */ |
463 | if (ps->next_free <= de.new_chunk) | 467 | if (ps->next_free <= e.new_chunk) |
464 | ps->next_free = de.new_chunk + 1; | 468 | ps->next_free = e.new_chunk + 1; |
465 | 469 | ||
466 | /* | 470 | /* |
467 | * Otherwise we add the exception to the snapshot. | 471 | * Otherwise we add the exception to the snapshot. |
468 | */ | 472 | */ |
469 | r = callback(callback_context, de.old_chunk, de.new_chunk); | 473 | r = callback(callback_context, e.old_chunk, e.new_chunk); |
470 | if (r) | 474 | if (r) |
471 | return r; | 475 | return r; |
472 | } | 476 | } |
@@ -563,7 +567,7 @@ static int persistent_read_metadata(struct dm_exception_store *store, | |||
563 | ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / | 567 | ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / |
564 | sizeof(struct disk_exception); | 568 | sizeof(struct disk_exception); |
565 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | 569 | ps->callbacks = dm_vcalloc(ps->exceptions_per_area, |
566 | sizeof(*ps->callbacks)); | 570 | sizeof(*ps->callbacks)); |
567 | if (!ps->callbacks) | 571 | if (!ps->callbacks) |
568 | return -ENOMEM; | 572 | return -ENOMEM; |
569 | 573 | ||
@@ -641,12 +645,12 @@ static void persistent_commit_exception(struct dm_exception_store *store, | |||
641 | { | 645 | { |
642 | unsigned int i; | 646 | unsigned int i; |
643 | struct pstore *ps = get_info(store); | 647 | struct pstore *ps = get_info(store); |
644 | struct disk_exception de; | 648 | struct core_exception ce; |
645 | struct commit_callback *cb; | 649 | struct commit_callback *cb; |
646 | 650 | ||
647 | de.old_chunk = e->old_chunk; | 651 | ce.old_chunk = e->old_chunk; |
648 | de.new_chunk = e->new_chunk; | 652 | ce.new_chunk = e->new_chunk; |
649 | write_exception(ps, ps->current_committed++, &de); | 653 | write_exception(ps, ps->current_committed++, &ce); |
650 | 654 | ||
651 | /* | 655 | /* |
652 | * Add the callback to the back of the array. This code | 656 | * Add the callback to the back of the array. This code |
@@ -670,7 +674,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, | |||
670 | * If we completely filled the current area, then wipe the next one. | 674 | * If we completely filled the current area, then wipe the next one. |
671 | */ | 675 | */ |
672 | if ((ps->current_committed == ps->exceptions_per_area) && | 676 | if ((ps->current_committed == ps->exceptions_per_area) && |
673 | zero_disk_area(ps, ps->current_area + 1)) | 677 | zero_disk_area(ps, ps->current_area + 1)) |
674 | ps->valid = 0; | 678 | ps->valid = 0; |
675 | 679 | ||
676 | /* | 680 | /* |
@@ -701,7 +705,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store, | |||
701 | chunk_t *last_new_chunk) | 705 | chunk_t *last_new_chunk) |
702 | { | 706 | { |
703 | struct pstore *ps = get_info(store); | 707 | struct pstore *ps = get_info(store); |
704 | struct disk_exception de; | 708 | struct core_exception ce; |
705 | int nr_consecutive; | 709 | int nr_consecutive; |
706 | int r; | 710 | int r; |
707 | 711 | ||
@@ -722,9 +726,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store, | |||
722 | ps->current_committed = ps->exceptions_per_area; | 726 | ps->current_committed = ps->exceptions_per_area; |
723 | } | 727 | } |
724 | 728 | ||
725 | read_exception(ps, ps->current_committed - 1, &de); | 729 | read_exception(ps, ps->current_committed - 1, &ce); |
726 | *last_old_chunk = de.old_chunk; | 730 | *last_old_chunk = ce.old_chunk; |
727 | *last_new_chunk = de.new_chunk; | 731 | *last_new_chunk = ce.new_chunk; |
728 | 732 | ||
729 | /* | 733 | /* |
730 | * Find number of consecutive chunks within the current area, | 734 | * Find number of consecutive chunks within the current area, |
@@ -733,9 +737,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store, | |||
733 | for (nr_consecutive = 1; nr_consecutive < ps->current_committed; | 737 | for (nr_consecutive = 1; nr_consecutive < ps->current_committed; |
734 | nr_consecutive++) { | 738 | nr_consecutive++) { |
735 | read_exception(ps, ps->current_committed - 1 - nr_consecutive, | 739 | read_exception(ps, ps->current_committed - 1 - nr_consecutive, |
736 | &de); | 740 | &ce); |
737 | if (de.old_chunk != *last_old_chunk - nr_consecutive || | 741 | if (ce.old_chunk != *last_old_chunk - nr_consecutive || |
738 | de.new_chunk != *last_new_chunk - nr_consecutive) | 742 | ce.new_chunk != *last_new_chunk - nr_consecutive) |
739 | break; | 743 | break; |
740 | } | 744 | } |
741 | 745 | ||
@@ -753,7 +757,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, | |||
753 | for (i = 0; i < nr_merged; i++) | 757 | for (i = 0; i < nr_merged; i++) |
754 | clear_exception(ps, ps->current_committed - 1 - i); | 758 | clear_exception(ps, ps->current_committed - 1 - i); |
755 | 759 | ||
756 | r = area_io(ps, WRITE); | 760 | r = area_io(ps, WRITE_FLUSH_FUA); |
757 | if (r < 0) | 761 | if (r < 0) |
758 | return r; | 762 | return r; |
759 | 763 | ||
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 9ecff5f3023..6f758870fc1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -30,16 +30,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; | |||
30 | ((ti)->type->name == dm_snapshot_merge_target_name) | 30 | ((ti)->type->name == dm_snapshot_merge_target_name) |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * The percentage increment we will wake up users at | ||
34 | */ | ||
35 | #define WAKE_UP_PERCENT 5 | ||
36 | |||
37 | /* | ||
38 | * kcopyd priority of snapshot operations | ||
39 | */ | ||
40 | #define SNAPSHOT_COPY_PRIORITY 2 | ||
41 | |||
42 | /* | ||
43 | * The size of the mempool used to track chunks in use. | 33 | * The size of the mempool used to track chunks in use. |
44 | */ | 34 | */ |
45 | #define MIN_IOS 256 | 35 | #define MIN_IOS 256 |
@@ -180,6 +170,13 @@ struct dm_snap_pending_exception { | |||
180 | * kcopyd. | 170 | * kcopyd. |
181 | */ | 171 | */ |
182 | int started; | 172 | int started; |
173 | |||
174 | /* | ||
175 | * For writing a complete chunk, bypassing the copy. | ||
176 | */ | ||
177 | struct bio *full_bio; | ||
178 | bio_end_io_t *full_bio_end_io; | ||
179 | void *full_bio_private; | ||
183 | }; | 180 | }; |
184 | 181 | ||
185 | /* | 182 | /* |
@@ -1055,8 +1052,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1055 | 1052 | ||
1056 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 1053 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
1057 | if (!s) { | 1054 | if (!s) { |
1058 | ti->error = "Cannot allocate snapshot context private " | 1055 | ti->error = "Cannot allocate private snapshot structure"; |
1059 | "structure"; | ||
1060 | r = -ENOMEM; | 1056 | r = -ENOMEM; |
1061 | goto bad; | 1057 | goto bad; |
1062 | } | 1058 | } |
@@ -1380,6 +1376,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) | |||
1380 | struct dm_snapshot *s = pe->snap; | 1376 | struct dm_snapshot *s = pe->snap; |
1381 | struct bio *origin_bios = NULL; | 1377 | struct bio *origin_bios = NULL; |
1382 | struct bio *snapshot_bios = NULL; | 1378 | struct bio *snapshot_bios = NULL; |
1379 | struct bio *full_bio = NULL; | ||
1383 | int error = 0; | 1380 | int error = 0; |
1384 | 1381 | ||
1385 | if (!success) { | 1382 | if (!success) { |
@@ -1415,10 +1412,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) | |||
1415 | */ | 1412 | */ |
1416 | dm_insert_exception(&s->complete, e); | 1413 | dm_insert_exception(&s->complete, e); |
1417 | 1414 | ||
1418 | out: | 1415 | out: |
1419 | dm_remove_exception(&pe->e); | 1416 | dm_remove_exception(&pe->e); |
1420 | snapshot_bios = bio_list_get(&pe->snapshot_bios); | 1417 | snapshot_bios = bio_list_get(&pe->snapshot_bios); |
1421 | origin_bios = bio_list_get(&pe->origin_bios); | 1418 | origin_bios = bio_list_get(&pe->origin_bios); |
1419 | full_bio = pe->full_bio; | ||
1420 | if (full_bio) { | ||
1421 | full_bio->bi_end_io = pe->full_bio_end_io; | ||
1422 | full_bio->bi_private = pe->full_bio_private; | ||
1423 | } | ||
1422 | free_pending_exception(pe); | 1424 | free_pending_exception(pe); |
1423 | 1425 | ||
1424 | increment_pending_exceptions_done_count(); | 1426 | increment_pending_exceptions_done_count(); |
@@ -1426,10 +1428,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) | |||
1426 | up_write(&s->lock); | 1428 | up_write(&s->lock); |
1427 | 1429 | ||
1428 | /* Submit any pending write bios */ | 1430 | /* Submit any pending write bios */ |
1429 | if (error) | 1431 | if (error) { |
1432 | if (full_bio) | ||
1433 | bio_io_error(full_bio); | ||
1430 | error_bios(snapshot_bios); | 1434 | error_bios(snapshot_bios); |
1431 | else | 1435 | } else { |
1436 | if (full_bio) | ||
1437 | bio_endio(full_bio, 0); | ||
1432 | flush_bios(snapshot_bios); | 1438 | flush_bios(snapshot_bios); |
1439 | } | ||
1433 | 1440 | ||
1434 | retry_origin_bios(s, origin_bios); | 1441 | retry_origin_bios(s, origin_bios); |
1435 | } | 1442 | } |
@@ -1480,8 +1487,33 @@ static void start_copy(struct dm_snap_pending_exception *pe) | |||
1480 | dest.count = src.count; | 1487 | dest.count = src.count; |
1481 | 1488 | ||
1482 | /* Hand over to kcopyd */ | 1489 | /* Hand over to kcopyd */ |
1483 | dm_kcopyd_copy(s->kcopyd_client, | 1490 | dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); |
1484 | &src, 1, &dest, 0, copy_callback, pe); | 1491 | } |
1492 | |||
1493 | static void full_bio_end_io(struct bio *bio, int error) | ||
1494 | { | ||
1495 | void *callback_data = bio->bi_private; | ||
1496 | |||
1497 | dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0); | ||
1498 | } | ||
1499 | |||
1500 | static void start_full_bio(struct dm_snap_pending_exception *pe, | ||
1501 | struct bio *bio) | ||
1502 | { | ||
1503 | struct dm_snapshot *s = pe->snap; | ||
1504 | void *callback_data; | ||
1505 | |||
1506 | pe->full_bio = bio; | ||
1507 | pe->full_bio_end_io = bio->bi_end_io; | ||
1508 | pe->full_bio_private = bio->bi_private; | ||
1509 | |||
1510 | callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, | ||
1511 | copy_callback, pe); | ||
1512 | |||
1513 | bio->bi_end_io = full_bio_end_io; | ||
1514 | bio->bi_private = callback_data; | ||
1515 | |||
1516 | generic_make_request(bio); | ||
1485 | } | 1517 | } |
1486 | 1518 | ||
1487 | static struct dm_snap_pending_exception * | 1519 | static struct dm_snap_pending_exception * |
@@ -1519,6 +1551,7 @@ __find_pending_exception(struct dm_snapshot *s, | |||
1519 | bio_list_init(&pe->origin_bios); | 1551 | bio_list_init(&pe->origin_bios); |
1520 | bio_list_init(&pe->snapshot_bios); | 1552 | bio_list_init(&pe->snapshot_bios); |
1521 | pe->started = 0; | 1553 | pe->started = 0; |
1554 | pe->full_bio = NULL; | ||
1522 | 1555 | ||
1523 | if (s->store->type->prepare_exception(s->store, &pe->e)) { | 1556 | if (s->store->type->prepare_exception(s->store, &pe->e)) { |
1524 | free_pending_exception(pe); | 1557 | free_pending_exception(pe); |
@@ -1612,10 +1645,19 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
1612 | } | 1645 | } |
1613 | 1646 | ||
1614 | remap_exception(s, &pe->e, bio, chunk); | 1647 | remap_exception(s, &pe->e, bio, chunk); |
1615 | bio_list_add(&pe->snapshot_bios, bio); | ||
1616 | 1648 | ||
1617 | r = DM_MAPIO_SUBMITTED; | 1649 | r = DM_MAPIO_SUBMITTED; |
1618 | 1650 | ||
1651 | if (!pe->started && | ||
1652 | bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { | ||
1653 | pe->started = 1; | ||
1654 | up_write(&s->lock); | ||
1655 | start_full_bio(pe, bio); | ||
1656 | goto out; | ||
1657 | } | ||
1658 | |||
1659 | bio_list_add(&pe->snapshot_bios, bio); | ||
1660 | |||
1619 | if (!pe->started) { | 1661 | if (!pe->started) { |
1620 | /* this is protected by snap->lock */ | 1662 | /* this is protected by snap->lock */ |
1621 | pe->started = 1; | 1663 | pe->started = 1; |
@@ -1628,9 +1670,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
1628 | map_context->ptr = track_chunk(s, chunk); | 1670 | map_context->ptr = track_chunk(s, chunk); |
1629 | } | 1671 | } |
1630 | 1672 | ||
1631 | out_unlock: | 1673 | out_unlock: |
1632 | up_write(&s->lock); | 1674 | up_write(&s->lock); |
1633 | out: | 1675 | out: |
1634 | return r; | 1676 | return r; |
1635 | } | 1677 | } |
1636 | 1678 | ||
@@ -1974,7 +2016,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, | |||
1974 | pe_to_start_now = pe; | 2016 | pe_to_start_now = pe; |
1975 | } | 2017 | } |
1976 | 2018 | ||
1977 | next_snapshot: | 2019 | next_snapshot: |
1978 | up_write(&snap->lock); | 2020 | up_write(&snap->lock); |
1979 | 2021 | ||
1980 | if (pe_to_start_now) { | 2022 | if (pe_to_start_now) { |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index bfe9c2333ce..986b8754bb0 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -54,7 +54,6 @@ struct dm_table { | |||
54 | sector_t *highs; | 54 | sector_t *highs; |
55 | struct dm_target *targets; | 55 | struct dm_target *targets; |
56 | 56 | ||
57 | unsigned discards_supported:1; | ||
58 | unsigned integrity_supported:1; | 57 | unsigned integrity_supported:1; |
59 | 58 | ||
60 | /* | 59 | /* |
@@ -154,12 +153,11 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) | |||
154 | return NULL; | 153 | return NULL; |
155 | 154 | ||
156 | size = nmemb * elem_size; | 155 | size = nmemb * elem_size; |
157 | addr = vmalloc(size); | 156 | addr = vzalloc(size); |
158 | if (addr) | ||
159 | memset(addr, 0, size); | ||
160 | 157 | ||
161 | return addr; | 158 | return addr; |
162 | } | 159 | } |
160 | EXPORT_SYMBOL(dm_vcalloc); | ||
163 | 161 | ||
164 | /* | 162 | /* |
165 | * highs, and targets are managed as dynamic arrays during a | 163 | * highs, and targets are managed as dynamic arrays during a |
@@ -209,7 +207,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, | |||
209 | INIT_LIST_HEAD(&t->devices); | 207 | INIT_LIST_HEAD(&t->devices); |
210 | INIT_LIST_HEAD(&t->target_callbacks); | 208 | INIT_LIST_HEAD(&t->target_callbacks); |
211 | atomic_set(&t->holders, 0); | 209 | atomic_set(&t->holders, 0); |
212 | t->discards_supported = 1; | ||
213 | 210 | ||
214 | if (!num_targets) | 211 | if (!num_targets) |
215 | num_targets = KEYS_PER_NODE; | 212 | num_targets = KEYS_PER_NODE; |
@@ -281,6 +278,7 @@ void dm_table_get(struct dm_table *t) | |||
281 | { | 278 | { |
282 | atomic_inc(&t->holders); | 279 | atomic_inc(&t->holders); |
283 | } | 280 | } |
281 | EXPORT_SYMBOL(dm_table_get); | ||
284 | 282 | ||
285 | void dm_table_put(struct dm_table *t) | 283 | void dm_table_put(struct dm_table *t) |
286 | { | 284 | { |
@@ -290,6 +288,7 @@ void dm_table_put(struct dm_table *t) | |||
290 | smp_mb__before_atomic_dec(); | 288 | smp_mb__before_atomic_dec(); |
291 | atomic_dec(&t->holders); | 289 | atomic_dec(&t->holders); |
292 | } | 290 | } |
291 | EXPORT_SYMBOL(dm_table_put); | ||
293 | 292 | ||
294 | /* | 293 | /* |
295 | * Checks to see if we need to extend highs or targets. | 294 | * Checks to see if we need to extend highs or targets. |
@@ -455,13 +454,14 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, | |||
455 | * Add a device to the list, or just increment the usage count if | 454 | * Add a device to the list, or just increment the usage count if |
456 | * it's already present. | 455 | * it's already present. |
457 | */ | 456 | */ |
458 | static int __table_get_device(struct dm_table *t, struct dm_target *ti, | 457 | int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, |
459 | const char *path, fmode_t mode, struct dm_dev **result) | 458 | struct dm_dev **result) |
460 | { | 459 | { |
461 | int r; | 460 | int r; |
462 | dev_t uninitialized_var(dev); | 461 | dev_t uninitialized_var(dev); |
463 | struct dm_dev_internal *dd; | 462 | struct dm_dev_internal *dd; |
464 | unsigned int major, minor; | 463 | unsigned int major, minor; |
464 | struct dm_table *t = ti->table; | ||
465 | 465 | ||
466 | BUG_ON(!t); | 466 | BUG_ON(!t); |
467 | 467 | ||
@@ -509,6 +509,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, | |||
509 | *result = &dd->dm_dev; | 509 | *result = &dd->dm_dev; |
510 | return 0; | 510 | return 0; |
511 | } | 511 | } |
512 | EXPORT_SYMBOL(dm_get_device); | ||
512 | 513 | ||
513 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, | 514 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, |
514 | sector_t start, sector_t len, void *data) | 515 | sector_t start, sector_t len, void *data) |
@@ -539,23 +540,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, | |||
539 | * If not we'll force DM to use PAGE_SIZE or | 540 | * If not we'll force DM to use PAGE_SIZE or |
540 | * smaller I/O, just to be safe. | 541 | * smaller I/O, just to be safe. |
541 | */ | 542 | */ |
542 | 543 | if (dm_queue_merge_is_compulsory(q) && !ti->type->merge) | |
543 | if (q->merge_bvec_fn && !ti->type->merge) | ||
544 | blk_limits_max_hw_sectors(limits, | 544 | blk_limits_max_hw_sectors(limits, |
545 | (unsigned int) (PAGE_SIZE >> 9)); | 545 | (unsigned int) (PAGE_SIZE >> 9)); |
546 | return 0; | 546 | return 0; |
547 | } | 547 | } |
548 | EXPORT_SYMBOL_GPL(dm_set_device_limits); | 548 | EXPORT_SYMBOL_GPL(dm_set_device_limits); |
549 | 549 | ||
550 | int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, | ||
551 | struct dm_dev **result) | ||
552 | { | ||
553 | return __table_get_device(ti->table, ti, path, mode, result); | ||
554 | } | ||
555 | |||
556 | |||
557 | /* | 550 | /* |
558 | * Decrement a devices use count and remove it if necessary. | 551 | * Decrement a device's use count and remove it if necessary. |
559 | */ | 552 | */ |
560 | void dm_put_device(struct dm_target *ti, struct dm_dev *d) | 553 | void dm_put_device(struct dm_target *ti, struct dm_dev *d) |
561 | { | 554 | { |
@@ -568,6 +561,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) | |||
568 | kfree(dd); | 561 | kfree(dd); |
569 | } | 562 | } |
570 | } | 563 | } |
564 | EXPORT_SYMBOL(dm_put_device); | ||
571 | 565 | ||
572 | /* | 566 | /* |
573 | * Checks to see if the target joins onto the end of the table. | 567 | * Checks to see if the target joins onto the end of the table. |
@@ -791,8 +785,9 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
791 | 785 | ||
792 | t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; | 786 | t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; |
793 | 787 | ||
794 | if (!tgt->num_discard_requests) | 788 | if (!tgt->num_discard_requests && tgt->discards_supported) |
795 | t->discards_supported = 0; | 789 | DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", |
790 | dm_device_name(t->md), type); | ||
796 | 791 | ||
797 | return 0; | 792 | return 0; |
798 | 793 | ||
@@ -802,6 +797,63 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
802 | return r; | 797 | return r; |
803 | } | 798 | } |
804 | 799 | ||
800 | /* | ||
801 | * Target argument parsing helpers. | ||
802 | */ | ||
803 | static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, | ||
804 | unsigned *value, char **error, unsigned grouped) | ||
805 | { | ||
806 | const char *arg_str = dm_shift_arg(arg_set); | ||
807 | |||
808 | if (!arg_str || | ||
809 | (sscanf(arg_str, "%u", value) != 1) || | ||
810 | (*value < arg->min) || | ||
811 | (*value > arg->max) || | ||
812 | (grouped && arg_set->argc < *value)) { | ||
813 | *error = arg->error; | ||
814 | return -EINVAL; | ||
815 | } | ||
816 | |||
817 | return 0; | ||
818 | } | ||
819 | |||
820 | int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, | ||
821 | unsigned *value, char **error) | ||
822 | { | ||
823 | return validate_next_arg(arg, arg_set, value, error, 0); | ||
824 | } | ||
825 | EXPORT_SYMBOL(dm_read_arg); | ||
826 | |||
827 | int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, | ||
828 | unsigned *value, char **error) | ||
829 | { | ||
830 | return validate_next_arg(arg, arg_set, value, error, 1); | ||
831 | } | ||
832 | EXPORT_SYMBOL(dm_read_arg_group); | ||
833 | |||
834 | const char *dm_shift_arg(struct dm_arg_set *as) | ||
835 | { | ||
836 | char *r; | ||
837 | |||
838 | if (as->argc) { | ||
839 | as->argc--; | ||
840 | r = *as->argv; | ||
841 | as->argv++; | ||
842 | return r; | ||
843 | } | ||
844 | |||
845 | return NULL; | ||
846 | } | ||
847 | EXPORT_SYMBOL(dm_shift_arg); | ||
848 | |||
849 | void dm_consume_args(struct dm_arg_set *as, unsigned num_args) | ||
850 | { | ||
851 | BUG_ON(as->argc < num_args); | ||
852 | as->argc -= num_args; | ||
853 | as->argv += num_args; | ||
854 | } | ||
855 | EXPORT_SYMBOL(dm_consume_args); | ||
856 | |||
805 | static int dm_table_set_type(struct dm_table *t) | 857 | static int dm_table_set_type(struct dm_table *t) |
806 | { | 858 | { |
807 | unsigned i; | 859 | unsigned i; |
@@ -1077,11 +1129,13 @@ void dm_table_event(struct dm_table *t) | |||
1077 | t->event_fn(t->event_context); | 1129 | t->event_fn(t->event_context); |
1078 | mutex_unlock(&_event_lock); | 1130 | mutex_unlock(&_event_lock); |
1079 | } | 1131 | } |
1132 | EXPORT_SYMBOL(dm_table_event); | ||
1080 | 1133 | ||
1081 | sector_t dm_table_get_size(struct dm_table *t) | 1134 | sector_t dm_table_get_size(struct dm_table *t) |
1082 | { | 1135 | { |
1083 | return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; | 1136 | return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; |
1084 | } | 1137 | } |
1138 | EXPORT_SYMBOL(dm_table_get_size); | ||
1085 | 1139 | ||
1086 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) | 1140 | struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) |
1087 | { | 1141 | { |
@@ -1194,9 +1248,45 @@ static void dm_table_set_integrity(struct dm_table *t) | |||
1194 | blk_get_integrity(template_disk)); | 1248 | blk_get_integrity(template_disk)); |
1195 | } | 1249 | } |
1196 | 1250 | ||
1251 | static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, | ||
1252 | sector_t start, sector_t len, void *data) | ||
1253 | { | ||
1254 | unsigned flush = (*(unsigned *)data); | ||
1255 | struct request_queue *q = bdev_get_queue(dev->bdev); | ||
1256 | |||
1257 | return q && (q->flush_flags & flush); | ||
1258 | } | ||
1259 | |||
1260 | static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) | ||
1261 | { | ||
1262 | struct dm_target *ti; | ||
1263 | unsigned i = 0; | ||
1264 | |||
1265 | /* | ||
1266 | * Require at least one underlying device to support flushes. | ||
1267 | * t->devices includes internal dm devices such as mirror logs | ||
1268 | * so we need to use iterate_devices here, which targets | ||
1269 | * supporting flushes must provide. | ||
1270 | */ | ||
1271 | while (i < dm_table_get_num_targets(t)) { | ||
1272 | ti = dm_table_get_target(t, i++); | ||
1273 | |||
1274 | if (!ti->num_flush_requests) | ||
1275 | continue; | ||
1276 | |||
1277 | if (ti->type->iterate_devices && | ||
1278 | ti->type->iterate_devices(ti, device_flush_capable, &flush)) | ||
1279 | return 1; | ||
1280 | } | ||
1281 | |||
1282 | return 0; | ||
1283 | } | ||
1284 | |||
1197 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | 1285 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, |
1198 | struct queue_limits *limits) | 1286 | struct queue_limits *limits) |
1199 | { | 1287 | { |
1288 | unsigned flush = 0; | ||
1289 | |||
1200 | /* | 1290 | /* |
1201 | * Copy table's limits to the DM device's request_queue | 1291 | * Copy table's limits to the DM device's request_queue |
1202 | */ | 1292 | */ |
@@ -1207,6 +1297,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | |||
1207 | else | 1297 | else |
1208 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | 1298 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); |
1209 | 1299 | ||
1300 | if (dm_table_supports_flush(t, REQ_FLUSH)) { | ||
1301 | flush |= REQ_FLUSH; | ||
1302 | if (dm_table_supports_flush(t, REQ_FUA)) | ||
1303 | flush |= REQ_FUA; | ||
1304 | } | ||
1305 | blk_queue_flush(q, flush); | ||
1306 | |||
1210 | dm_table_set_integrity(t); | 1307 | dm_table_set_integrity(t); |
1211 | 1308 | ||
1212 | /* | 1309 | /* |
@@ -1237,6 +1334,7 @@ fmode_t dm_table_get_mode(struct dm_table *t) | |||
1237 | { | 1334 | { |
1238 | return t->mode; | 1335 | return t->mode; |
1239 | } | 1336 | } |
1337 | EXPORT_SYMBOL(dm_table_get_mode); | ||
1240 | 1338 | ||
1241 | static void suspend_targets(struct dm_table *t, unsigned postsuspend) | 1339 | static void suspend_targets(struct dm_table *t, unsigned postsuspend) |
1242 | { | 1340 | { |
@@ -1345,6 +1443,7 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) | |||
1345 | { | 1443 | { |
1346 | return t->md; | 1444 | return t->md; |
1347 | } | 1445 | } |
1446 | EXPORT_SYMBOL(dm_table_get_md); | ||
1348 | 1447 | ||
1349 | static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, | 1448 | static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, |
1350 | sector_t start, sector_t len, void *data) | 1449 | sector_t start, sector_t len, void *data) |
@@ -1359,19 +1458,19 @@ bool dm_table_supports_discards(struct dm_table *t) | |||
1359 | struct dm_target *ti; | 1458 | struct dm_target *ti; |
1360 | unsigned i = 0; | 1459 | unsigned i = 0; |
1361 | 1460 | ||
1362 | if (!t->discards_supported) | ||
1363 | return 0; | ||
1364 | |||
1365 | /* | 1461 | /* |
1366 | * Unless any target used by the table set discards_supported, | 1462 | * Unless any target used by the table set discards_supported, |
1367 | * require at least one underlying device to support discards. | 1463 | * require at least one underlying device to support discards. |
1368 | * t->devices includes internal dm devices such as mirror logs | 1464 | * t->devices includes internal dm devices such as mirror logs |
1369 | * so we need to use iterate_devices here, which targets | 1465 | * so we need to use iterate_devices here, which targets |
1370 | * supporting discard must provide. | 1466 | * supporting discard selectively must provide. |
1371 | */ | 1467 | */ |
1372 | while (i < dm_table_get_num_targets(t)) { | 1468 | while (i < dm_table_get_num_targets(t)) { |
1373 | ti = dm_table_get_target(t, i++); | 1469 | ti = dm_table_get_target(t, i++); |
1374 | 1470 | ||
1471 | if (!ti->num_discard_requests) | ||
1472 | continue; | ||
1473 | |||
1375 | if (ti->discards_supported) | 1474 | if (ti->discards_supported) |
1376 | return 1; | 1475 | return 1; |
1377 | 1476 | ||
@@ -1382,13 +1481,3 @@ bool dm_table_supports_discards(struct dm_table *t) | |||
1382 | 1481 | ||
1383 | return 0; | 1482 | return 0; |
1384 | } | 1483 | } |
1385 | |||
1386 | EXPORT_SYMBOL(dm_vcalloc); | ||
1387 | EXPORT_SYMBOL(dm_get_device); | ||
1388 | EXPORT_SYMBOL(dm_put_device); | ||
1389 | EXPORT_SYMBOL(dm_table_event); | ||
1390 | EXPORT_SYMBOL(dm_table_get_size); | ||
1391 | EXPORT_SYMBOL(dm_table_get_mode); | ||
1392 | EXPORT_SYMBOL(dm_table_get_md); | ||
1393 | EXPORT_SYMBOL(dm_table_put); | ||
1394 | EXPORT_SYMBOL(dm_table_get); | ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0cf68b47887..52b39f335bb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -37,6 +37,8 @@ static const char *_name = DM_NAME; | |||
37 | static unsigned int major = 0; | 37 | static unsigned int major = 0; |
38 | static unsigned int _major = 0; | 38 | static unsigned int _major = 0; |
39 | 39 | ||
40 | static DEFINE_IDR(_minor_idr); | ||
41 | |||
40 | static DEFINE_SPINLOCK(_minor_lock); | 42 | static DEFINE_SPINLOCK(_minor_lock); |
41 | /* | 43 | /* |
42 | * For bio-based dm. | 44 | * For bio-based dm. |
@@ -109,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |||
109 | #define DMF_FREEING 3 | 111 | #define DMF_FREEING 3 |
110 | #define DMF_DELETING 4 | 112 | #define DMF_DELETING 4 |
111 | #define DMF_NOFLUSH_SUSPENDING 5 | 113 | #define DMF_NOFLUSH_SUSPENDING 5 |
114 | #define DMF_MERGE_IS_OPTIONAL 6 | ||
112 | 115 | ||
113 | /* | 116 | /* |
114 | * Work processed by per-device workqueue. | 117 | * Work processed by per-device workqueue. |
@@ -313,6 +316,12 @@ static void __exit dm_exit(void) | |||
313 | 316 | ||
314 | while (i--) | 317 | while (i--) |
315 | _exits[i](); | 318 | _exits[i](); |
319 | |||
320 | /* | ||
321 | * Should be empty by this point. | ||
322 | */ | ||
323 | idr_remove_all(&_minor_idr); | ||
324 | idr_destroy(&_minor_idr); | ||
316 | } | 325 | } |
317 | 326 | ||
318 | /* | 327 | /* |
@@ -1171,7 +1180,8 @@ static int __clone_and_map_discard(struct clone_info *ci) | |||
1171 | 1180 | ||
1172 | /* | 1181 | /* |
1173 | * Even though the device advertised discard support, | 1182 | * Even though the device advertised discard support, |
1174 | * reconfiguration might have changed that since the | 1183 | * that does not mean every target supports it, and |
1184 | * reconfiguration might also have changed that since the | ||
1175 | * check was performed. | 1185 | * check was performed. |
1176 | */ | 1186 | */ |
1177 | if (!ti->num_discard_requests) | 1187 | if (!ti->num_discard_requests) |
@@ -1705,8 +1715,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
1705 | /*----------------------------------------------------------------- | 1715 | /*----------------------------------------------------------------- |
1706 | * An IDR is used to keep track of allocated minor numbers. | 1716 | * An IDR is used to keep track of allocated minor numbers. |
1707 | *---------------------------------------------------------------*/ | 1717 | *---------------------------------------------------------------*/ |
1708 | static DEFINE_IDR(_minor_idr); | ||
1709 | |||
1710 | static void free_minor(int minor) | 1718 | static void free_minor(int minor) |
1711 | { | 1719 | { |
1712 | spin_lock(&_minor_lock); | 1720 | spin_lock(&_minor_lock); |
@@ -1800,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
1800 | blk_queue_make_request(md->queue, dm_request); | 1808 | blk_queue_make_request(md->queue, dm_request); |
1801 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1809 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1802 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1810 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1803 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
1804 | } | 1811 | } |
1805 | 1812 | ||
1806 | /* | 1813 | /* |
@@ -1986,6 +1993,59 @@ static void __set_size(struct mapped_device *md, sector_t size) | |||
1986 | } | 1993 | } |
1987 | 1994 | ||
1988 | /* | 1995 | /* |
1996 | * Return 1 if the queue has a compulsory merge_bvec_fn function. | ||
1997 | * | ||
1998 | * If this function returns 0, then the device is either a non-dm | ||
1999 | * device without a merge_bvec_fn, or it is a dm device that is | ||
2000 | * able to split any bios it receives that are too big. | ||
2001 | */ | ||
2002 | int dm_queue_merge_is_compulsory(struct request_queue *q) | ||
2003 | { | ||
2004 | struct mapped_device *dev_md; | ||
2005 | |||
2006 | if (!q->merge_bvec_fn) | ||
2007 | return 0; | ||
2008 | |||
2009 | if (q->make_request_fn == dm_request) { | ||
2010 | dev_md = q->queuedata; | ||
2011 | if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) | ||
2012 | return 0; | ||
2013 | } | ||
2014 | |||
2015 | return 1; | ||
2016 | } | ||
2017 | |||
2018 | static int dm_device_merge_is_compulsory(struct dm_target *ti, | ||
2019 | struct dm_dev *dev, sector_t start, | ||
2020 | sector_t len, void *data) | ||
2021 | { | ||
2022 | struct block_device *bdev = dev->bdev; | ||
2023 | struct request_queue *q = bdev_get_queue(bdev); | ||
2024 | |||
2025 | return dm_queue_merge_is_compulsory(q); | ||
2026 | } | ||
2027 | |||
2028 | /* | ||
2029 | * Return 1 if it is acceptable to ignore merge_bvec_fn based | ||
2030 | * on the properties of the underlying devices. | ||
2031 | */ | ||
2032 | static int dm_table_merge_is_optional(struct dm_table *table) | ||
2033 | { | ||
2034 | unsigned i = 0; | ||
2035 | struct dm_target *ti; | ||
2036 | |||
2037 | while (i < dm_table_get_num_targets(table)) { | ||
2038 | ti = dm_table_get_target(table, i++); | ||
2039 | |||
2040 | if (ti->type->iterate_devices && | ||
2041 | ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) | ||
2042 | return 0; | ||
2043 | } | ||
2044 | |||
2045 | return 1; | ||
2046 | } | ||
2047 | |||
2048 | /* | ||
1989 | * Returns old map, which caller must destroy. | 2049 | * Returns old map, which caller must destroy. |
1990 | */ | 2050 | */ |
1991 | static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | 2051 | static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, |
@@ -1995,6 +2055,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | |||
1995 | struct request_queue *q = md->queue; | 2055 | struct request_queue *q = md->queue; |
1996 | sector_t size; | 2056 | sector_t size; |
1997 | unsigned long flags; | 2057 | unsigned long flags; |
2058 | int merge_is_optional; | ||
1998 | 2059 | ||
1999 | size = dm_table_get_size(t); | 2060 | size = dm_table_get_size(t); |
2000 | 2061 | ||
@@ -2020,10 +2081,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | |||
2020 | 2081 | ||
2021 | __bind_mempools(md, t); | 2082 | __bind_mempools(md, t); |
2022 | 2083 | ||
2084 | merge_is_optional = dm_table_merge_is_optional(t); | ||
2085 | |||
2023 | write_lock_irqsave(&md->map_lock, flags); | 2086 | write_lock_irqsave(&md->map_lock, flags); |
2024 | old_map = md->map; | 2087 | old_map = md->map; |
2025 | md->map = t; | 2088 | md->map = t; |
2026 | dm_table_set_restrictions(t, q, limits); | 2089 | dm_table_set_restrictions(t, q, limits); |
2090 | if (merge_is_optional) | ||
2091 | set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); | ||
2092 | else | ||
2093 | clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); | ||
2027 | write_unlock_irqrestore(&md->map_lock, flags); | 2094 | write_unlock_irqrestore(&md->map_lock, flags); |
2028 | 2095 | ||
2029 | return old_map; | 2096 | return old_map; |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 1aaf16746da..6745dbd278a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -66,6 +66,8 @@ int dm_table_alloc_md_mempools(struct dm_table *t); | |||
66 | void dm_table_free_md_mempools(struct dm_table *t); | 66 | void dm_table_free_md_mempools(struct dm_table *t); |
67 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); | 67 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); |
68 | 68 | ||
69 | int dm_queue_merge_is_compulsory(struct request_queue *q); | ||
70 | |||
69 | void dm_lock_md_type(struct mapped_device *md); | 71 | void dm_lock_md_type(struct mapped_device *md); |
70 | void dm_unlock_md_type(struct mapped_device *md); | 72 | void dm_unlock_md_type(struct mapped_device *md); |
71 | void dm_set_md_type(struct mapped_device *md, unsigned type); | 73 | void dm_set_md_type(struct mapped_device *md, unsigned type); |
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4427e045405..3fa1f3d90ce 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h | |||
@@ -208,6 +208,49 @@ struct dm_target_callbacks { | |||
208 | int dm_register_target(struct target_type *t); | 208 | int dm_register_target(struct target_type *t); |
209 | void dm_unregister_target(struct target_type *t); | 209 | void dm_unregister_target(struct target_type *t); |
210 | 210 | ||
211 | /* | ||
212 | * Target argument parsing. | ||
213 | */ | ||
214 | struct dm_arg_set { | ||
215 | unsigned argc; | ||
216 | char **argv; | ||
217 | }; | ||
218 | |||
219 | /* | ||
220 | * The minimum and maximum value of a numeric argument, together with | ||
221 | * the error message to use if the number is found to be outside that range. | ||
222 | */ | ||
223 | struct dm_arg { | ||
224 | unsigned min; | ||
225 | unsigned max; | ||
226 | char *error; | ||
227 | }; | ||
228 | |||
229 | /* | ||
230 | * Validate the next argument, either returning it as *value or, if invalid, | ||
231 | * returning -EINVAL and setting *error. | ||
232 | */ | ||
233 | int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, | ||
234 | unsigned *value, char **error); | ||
235 | |||
236 | /* | ||
237 | * Process the next argument as the start of a group containing between | ||
238 | * arg->min and arg->max further arguments. Either return the size as | ||
239 | * *num_args or, if invalid, return -EINVAL and set *error. | ||
240 | */ | ||
241 | int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, | ||
242 | unsigned *num_args, char **error); | ||
243 | |||
244 | /* | ||
245 | * Return the current argument and shift to the next. | ||
246 | */ | ||
247 | const char *dm_shift_arg(struct dm_arg_set *as); | ||
248 | |||
249 | /* | ||
250 | * Move through num_args arguments. | ||
251 | */ | ||
252 | void dm_consume_args(struct dm_arg_set *as, unsigned num_args); | ||
253 | |||
211 | /*----------------------------------------------------------------- | 254 | /*----------------------------------------------------------------- |
212 | * Functions for creating and manipulating mapped devices. | 255 | * Functions for creating and manipulating mapped devices. |
213 | * Drop the reference with dm_put when you finish with the object. | 256 | * Drop the reference with dm_put when you finish with the object. |
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index 3708455ee6c..0cb8eff76bd 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h | |||
@@ -267,9 +267,9 @@ enum { | |||
267 | #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) | 267 | #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) |
268 | 268 | ||
269 | #define DM_VERSION_MAJOR 4 | 269 | #define DM_VERSION_MAJOR 4 |
270 | #define DM_VERSION_MINOR 20 | 270 | #define DM_VERSION_MINOR 21 |
271 | #define DM_VERSION_PATCHLEVEL 0 | 271 | #define DM_VERSION_PATCHLEVEL 0 |
272 | #define DM_VERSION_EXTRA "-ioctl (2011-02-02)" | 272 | #define DM_VERSION_EXTRA "-ioctl (2011-07-06)" |
273 | 273 | ||
274 | /* Status bits */ | 274 | /* Status bits */ |
275 | #define DM_READONLY_FLAG (1 << 0) /* In/Out */ | 275 | #define DM_READONLY_FLAG (1 << 0) /* In/Out */ |
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 298d587e349..5e54458e920 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h | |||
@@ -42,5 +42,20 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, | |||
42 | unsigned num_dests, struct dm_io_region *dests, | 42 | unsigned num_dests, struct dm_io_region *dests, |
43 | unsigned flags, dm_kcopyd_notify_fn fn, void *context); | 43 | unsigned flags, dm_kcopyd_notify_fn fn, void *context); |
44 | 44 | ||
45 | /* | ||
46 | * Prepare a callback and submit it via the kcopyd thread. | ||
47 | * | ||
48 | * dm_kcopyd_prepare_callback allocates a callback structure and returns it. | ||
49 | * It must not be called from interrupt context. | ||
50 | * The returned value should be passed into dm_kcopyd_do_callback. | ||
51 | * | ||
52 | * dm_kcopyd_do_callback submits the callback. | ||
53 | * It may be called from interrupt context. | ||
54 | * The callback is issued from the kcopyd thread. | ||
55 | */ | ||
56 | void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, | ||
57 | dm_kcopyd_notify_fn fn, void *context); | ||
58 | void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err); | ||
59 | |||
45 | #endif /* __KERNEL__ */ | 60 | #endif /* __KERNEL__ */ |
46 | #endif /* _LINUX_DM_KCOPYD_H */ | 61 | #endif /* _LINUX_DM_KCOPYD_H */ |