diff options
author | Dan Williams <dan.j.williams@intel.com> | 2008-04-28 05:15:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:42 -0400 |
commit | 8b3e6cdc53b7f29f7026955d6cb6902a49322a15 (patch) | |
tree | 69d9f22a526e687fe2892d327caa3fa6ddd65cf6 /include/linux | |
parent | e46b272b6608783ed7aa7b0594871550ce20b849 (diff) |
md: introduce get_priority_stripe() to improve raid456 write performance
Improve write performance by preventing the delayed_list from dumping all its
stripes onto the handle_list in one shot. Delayed stripes are now further
delayed by being held on the 'hold_list'. The 'hold_list' is bypassed when:
* a STRIPE_IO_STARTED stripe is found at the head of 'handle_list'
* 'handle_list' is empty and i/o is being done to satisfy full stripe-width
write requests
* 'bypass_count' is less than 'bypass_threshold'. By default the threshold
is 1, i.e. every other stripe handled is a preread stripe provided the
top two conditions are false.
Benchmark data:
System: 2x Xeon 5150, 4x SATA, mem=1GB
Baseline: 2.6.24-rc7
Configuration: mdadm --create /dev/md0 /dev/sd[b-e] -n 4 -l 5 --assume-clean
Test1: dd if=/dev/zero of=/dev/md0 bs=1024k count=2048
* patched: +33% (stripe_cache_size = 256), +25% (stripe_cache_size = 512)
Test2: tiobench --size 2048 --numruns 5 --block 4096 --block 131072 (XFS)
* patched: +13%
* patched + preread_bypass_threshold = 0: +37%
Changes since v1:
* reduce bypass_threshold from (chunk_size / sectors_per_chunk) to (1) and
make it configurable. This defaults to fairness and modest performance
gains out of the box.
Changes since v2:
* [neilb@suse.de]: kill STRIPE_PRIO_HI and preread_needed as they are not
necessary, the important change was clearing STRIPE_DELAYED in
add_stripe_bio and this has been moved out to make_request for the hang
fix.
* [neilb@suse.de]: simplify get_priority_stripe
* [dan.j.williams@intel.com]: reset the bypass_count when ->hold_list is
sampled empty (+11%)
* [dan.j.williams@intel.com]: decrement the bypass_count at the detection
of stripes being naturally promoted off of hold_list +2%. Note, resetting
bypass_count instead of decrementing on these events yields +4% but that is
probably too aggressive.
Changes since v3:
* cosmetic fixups
Tested-by: James W. Laferriere <babydr@baby-dragons.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/raid/raid5.h | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 93678f57ccbe..f0827d31ae6f 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h | |||
@@ -252,6 +252,8 @@ struct r6_state { | |||
252 | #define STRIPE_EXPANDING 9 | 252 | #define STRIPE_EXPANDING 9 |
253 | #define STRIPE_EXPAND_SOURCE 10 | 253 | #define STRIPE_EXPAND_SOURCE 10 |
254 | #define STRIPE_EXPAND_READY 11 | 254 | #define STRIPE_EXPAND_READY 11 |
255 | #define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */ | ||
256 | #define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ | ||
255 | /* | 257 | /* |
256 | * Operations flags (in issue order) | 258 | * Operations flags (in issue order) |
257 | */ | 259 | */ |
@@ -316,12 +318,17 @@ struct raid5_private_data { | |||
316 | int previous_raid_disks; | 318 | int previous_raid_disks; |
317 | 319 | ||
318 | struct list_head handle_list; /* stripes needing handling */ | 320 | struct list_head handle_list; /* stripes needing handling */ |
321 | struct list_head hold_list; /* preread ready stripes */ | ||
319 | struct list_head delayed_list; /* stripes that have plugged requests */ | 322 | struct list_head delayed_list; /* stripes that have plugged requests */ |
320 | struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ | 323 | struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ |
321 | struct bio *retry_read_aligned; /* currently retrying aligned bios */ | 324 | struct bio *retry_read_aligned; /* currently retrying aligned bios */ |
322 | struct bio *retry_read_aligned_list; /* aligned bios retry list */ | 325 | struct bio *retry_read_aligned_list; /* aligned bios retry list */ |
323 | atomic_t preread_active_stripes; /* stripes with scheduled io */ | 326 | atomic_t preread_active_stripes; /* stripes with scheduled io */ |
324 | atomic_t active_aligned_reads; | 327 | atomic_t active_aligned_reads; |
328 | atomic_t pending_full_writes; /* full write backlog */ | ||
329 | int bypass_count; /* bypassed prereads */ | ||
330 | int bypass_threshold; /* preread nice */ | ||
331 | struct list_head *last_hold; /* detect hold_list promotions */ | ||
325 | 332 | ||
326 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ | 333 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ |
327 | /* unfortunately we need two cache names as we temporarily have | 334 | /* unfortunately we need two cache names as we temporarily have |