diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 00:12:47 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 00:12:47 -0500 |
| commit | ac322de6bf5416cb145b58599297b8be73cd86ac (patch) | |
| tree | 1a1be9f8b9241159fb4cde14a548eba9a4155b28 /include/uapi/linux | |
| parent | ccf21b69a83afaee4d5499e0d03eacf23946e08c (diff) | |
| parent | 339421def582abb14c2217aa8c8f28bb2e299174 (diff) | |
Merge tag 'md/4.4' of git://neil.brown.name/md
Pull md updates from Neil Brown:
"Two major components to this update.
1) The clustered-raid1 support from SUSE is nearly complete. There
are a few outstanding issues being worked on. Maybe half a dozen
patches will bring this to a usable state.
2) The first stage of journalled-raid5 support from Facebook makes an
appearance. With a journal device configured (typically NVRAM or
SSD), the "RAID5 write hole" should be closed - a crash during
degraded operations cannot result in data corruption.
The next stage will be to use the journal as a write-behind cache
so that latency can be reduced and in some cases throughput
increased by performing more full-stripe writes.
* tag 'md/4.4' of git://neil.brown.name/md: (66 commits)
MD: when RAID journal is missing/faulty, block RESTART_ARRAY_RW
MD: set journal disk ->raid_disk
MD: kick out journal disk if it's not fresh
raid5-cache: start raid5 readonly if journal is missing
MD: add new bit to indicate raid array with journal
raid5-cache: IO error handling
raid5: journal disk can't be removed
raid5-cache: add trim support for log
MD: fix info output for journal disk
raid5-cache: use bio chaining
raid5-cache: small log->seq cleanup
raid5-cache: new helper: r5_reserve_log_entry
raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta
raid5-cache: take rdev->data_offset into account early on
raid5-cache: refactor bio allocation
raid5-cache: clean up r5l_get_meta
raid5-cache: simplify state machine when caches flushes are not needed
raid5-cache: factor out a helper to run all stripes for an I/O unit
raid5-cache: rename flushed_ios to finished_ios
raid5-cache: free I/O units earlier
...
Diffstat (limited to 'include/uapi/linux')
| -rw-r--r-- | include/uapi/linux/raid/md_p.h | 73 |
1 files changed, 72 insertions, 1 deletions
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 2ae6131e69a5..c3e654c6d518 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h | |||
| @@ -89,6 +89,12 @@ | |||
| 89 | * read requests will only be sent here in | 89 | * read requests will only be sent here in |
| 90 | * dire need | 90 | * dire need |
| 91 | */ | 91 | */ |
| 92 | #define MD_DISK_JOURNAL 18 /* disk is used as the write journal in RAID-5/6 */ | ||
| 93 | |||
| 94 | #define MD_DISK_ROLE_SPARE 0xffff | ||
| 95 | #define MD_DISK_ROLE_FAULTY 0xfffe | ||
| 96 | #define MD_DISK_ROLE_JOURNAL 0xfffd | ||
| 97 | #define MD_DISK_ROLE_MAX 0xff00 /* max value of regular disk role */ | ||
| 92 | 98 | ||
| 93 | typedef struct mdp_device_descriptor_s { | 99 | typedef struct mdp_device_descriptor_s { |
| 94 | __u32 number; /* 0 Device number in the entire set */ | 100 | __u32 number; /* 0 Device number in the entire set */ |
| @@ -252,7 +258,10 @@ struct mdp_superblock_1 { | |||
| 252 | __le64 data_offset; /* sector start of data, often 0 */ | 258 | __le64 data_offset; /* sector start of data, often 0 */ |
| 253 | __le64 data_size; /* sectors in this device that can be used for data */ | 259 | __le64 data_size; /* sectors in this device that can be used for data */ |
| 254 | __le64 super_offset; /* sector start of this superblock */ | 260 | __le64 super_offset; /* sector start of this superblock */ |
| 255 | __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ | 261 | union { |
| 262 | __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ | ||
| 263 | __le64 journal_tail;/* journal tail of journal device (from data_offset) */ | ||
| 264 | }; | ||
| 256 | __le32 dev_number; /* permanent identifier of this device - not role in raid */ | 265 | __le32 dev_number; /* permanent identifier of this device - not role in raid */ |
| 257 | __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ | 266 | __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ |
| 258 | __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ | 267 | __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ |
| @@ -302,6 +311,8 @@ struct mdp_superblock_1 { | |||
| 302 | #define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening | 311 | #define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening |
| 303 | * is guided by bitmap. | 312 | * is guided by bitmap. |
| 304 | */ | 313 | */ |
| 314 | #define MD_FEATURE_CLUSTERED 256 /* clustered MD */ | ||
| 315 | #define MD_FEATURE_JOURNAL 512 /* support write cache */ | ||
| 305 | #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ | 316 | #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |
| 306 | |MD_FEATURE_RECOVERY_OFFSET \ | 317 | |MD_FEATURE_RECOVERY_OFFSET \ |
| 307 | |MD_FEATURE_RESHAPE_ACTIVE \ | 318 | |MD_FEATURE_RESHAPE_ACTIVE \ |
| @@ -310,6 +321,66 @@ struct mdp_superblock_1 { | |||
| 310 | |MD_FEATURE_RESHAPE_BACKWARDS \ | 321 | |MD_FEATURE_RESHAPE_BACKWARDS \ |
| 311 | |MD_FEATURE_NEW_OFFSET \ | 322 | |MD_FEATURE_NEW_OFFSET \ |
| 312 | |MD_FEATURE_RECOVERY_BITMAP \ | 323 | |MD_FEATURE_RECOVERY_BITMAP \ |
| 324 | |MD_FEATURE_CLUSTERED \ | ||
| 325 | |MD_FEATURE_JOURNAL \ | ||
| 313 | ) | 326 | ) |
| 314 | 327 | ||
| 328 | struct r5l_payload_header { | ||
| 329 | __le16 type; | ||
| 330 | __le16 flags; | ||
| 331 | } __attribute__ ((__packed__)); | ||
| 332 | |||
| 333 | enum r5l_payload_type { | ||
| 334 | R5LOG_PAYLOAD_DATA = 0, | ||
| 335 | R5LOG_PAYLOAD_PARITY = 1, | ||
| 336 | R5LOG_PAYLOAD_FLUSH = 2, | ||
| 337 | }; | ||
| 338 | |||
| 339 | struct r5l_payload_data_parity { | ||
| 340 | struct r5l_payload_header header; | ||
| 341 | __le32 size; /* sector. data/parity size. each 4k | ||
| 342 | * has a checksum */ | ||
| 343 | __le64 location; /* sector. For data, it's raid sector. For | ||
| 344 | * parity, it's stripe sector */ | ||
| 345 | __le32 checksum[]; | ||
| 346 | } __attribute__ ((__packed__)); | ||
| 347 | |||
| 348 | enum r5l_payload_data_parity_flag { | ||
| 349 | R5LOG_PAYLOAD_FLAG_DISCARD = 1, /* payload is discard */ | ||
| 350 | /* | ||
| 351 | * RESHAPED/RESHAPING is only set when there is reshape activity. Note, | ||
| 352 | * both data/parity of a stripe should have the same flag set | ||
| 353 | * | ||
| 354 | * RESHAPED: reshape is running, and this stripe finished reshape | ||
| 355 | * RESHAPING: reshape is running, and this stripe isn't reshaped | ||
| 356 | */ | ||
| 357 | R5LOG_PAYLOAD_FLAG_RESHAPED = 2, | ||
| 358 | R5LOG_PAYLOAD_FLAG_RESHAPING = 3, | ||
| 359 | }; | ||
| 360 | |||
| 361 | struct r5l_payload_flush { | ||
| 362 | struct r5l_payload_header header; | ||
| 363 | __le32 size; /* flush_stripes size, bytes */ | ||
| 364 | __le64 flush_stripes[]; | ||
| 365 | } __attribute__ ((__packed__)); | ||
| 366 | |||
| 367 | enum r5l_payload_flush_flag { | ||
| 368 | R5LOG_PAYLOAD_FLAG_FLUSH_STRIPE = 1, /* data represents whole stripe */ | ||
| 369 | }; | ||
| 370 | |||
| 371 | struct r5l_meta_block { | ||
| 372 | __le32 magic; | ||
| 373 | __le32 checksum; | ||
| 374 | __u8 version; | ||
| 375 | __u8 __zero_pading_1; | ||
| 376 | __le16 __zero_pading_2; | ||
| 377 | __le32 meta_size; /* whole size of the block */ | ||
| 378 | |||
| 379 | __le64 seq; | ||
| 380 | __le64 position; /* sector, start from rdev->data_offset, current position */ | ||
| 381 | struct r5l_payload_header payloads[]; | ||
| 382 | } __attribute__ ((__packed__)); | ||
| 383 | |||
| 384 | #define R5LOG_VERSION 0x1 | ||
| 385 | #define R5LOG_MAGIC 0x6433c509 | ||
| 315 | #endif | 386 | #endif |
