diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-10 16:03:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-10 16:03:41 -0400 |
commit | 4d7696f1b05f4aeb586c74868fe3da2731daca4b (patch) | |
tree | dd6cf4d41df2c0a1f52a85a3f8b8af5a9ebdeb5d | |
parent | b05430fc9341fea7a6228a3611c850a476809596 (diff) | |
parent | bfc90cb0936f5b972706625f38f72c7cb726c20a (diff) |
Merge tag 'md/3.12' of git://neil.brown.name/md
Pull md update from Neil Brown:
"Headline item is multithreading for RAID5 so that more IO/sec can be
supported on fast (SSD) devices. Also TILE-Gx SIMD suppor for RAID6
calculations and an assortment of bug fixes"
* tag 'md/3.12' of git://neil.brown.name/md:
raid5: only wakeup necessary threads
md/raid5: flush out all pending requests before proceeding with reshape.
md/raid5: use seqcount to protect access to shape in make_request.
raid5: sysfs entry to control worker thread number
raid5: offload stripe handle to workqueue
raid5: fix stripe release order
raid5: make release_stripe lockless
md: avoid deadlock when dirty buffers during md_stop.
md: Don't test all of mddev->flags at once.
md: Fix apparent cut-and-paste error in super_90_validate
raid6/test: replace echo -e with printf
RAID: add tilegx SIMD implementation of raid6
md: fix safe_mode buglet.
md: don't call md_allow_write in get_bitmap_file.
-rw-r--r-- | drivers/md/md.c | 54 | ||||
-rw-r--r-- | drivers/md/md.h | 8 | ||||
-rw-r--r-- | drivers/md/raid5.c | 362 | ||||
-rw-r--r-- | drivers/md/raid5.h | 22 | ||||
-rw-r--r-- | include/linux/raid/pq.h | 1 | ||||
-rw-r--r-- | lib/raid6/Makefile | 6 | ||||
-rw-r--r-- | lib/raid6/algos.c | 3 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 9 | ||||
-rw-r--r-- | lib/raid6/tilegx.uc | 86 |
9 files changed, 510 insertions, 41 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 9f13e13506ef..adf4d7e1d5e1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1180,7 +1180,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
1180 | mddev->bitmap_info.offset = | 1180 | mddev->bitmap_info.offset = |
1181 | mddev->bitmap_info.default_offset; | 1181 | mddev->bitmap_info.default_offset; |
1182 | mddev->bitmap_info.space = | 1182 | mddev->bitmap_info.space = |
1183 | mddev->bitmap_info.space; | 1183 | mddev->bitmap_info.default_space; |
1184 | } | 1184 | } |
1185 | 1185 | ||
1186 | } else if (mddev->pers == NULL) { | 1186 | } else if (mddev->pers == NULL) { |
@@ -3429,7 +3429,7 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len) | |||
3429 | mddev->safemode_delay = (msec*HZ)/1000; | 3429 | mddev->safemode_delay = (msec*HZ)/1000; |
3430 | if (mddev->safemode_delay == 0) | 3430 | if (mddev->safemode_delay == 0) |
3431 | mddev->safemode_delay = 1; | 3431 | mddev->safemode_delay = 1; |
3432 | if (mddev->safemode_delay < old_delay) | 3432 | if (mddev->safemode_delay < old_delay || old_delay == 0) |
3433 | md_safemode_timeout((unsigned long)mddev); | 3433 | md_safemode_timeout((unsigned long)mddev); |
3434 | } | 3434 | } |
3435 | return len; | 3435 | return len; |
@@ -5144,7 +5144,7 @@ int md_run(struct mddev *mddev) | |||
5144 | 5144 | ||
5145 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5145 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
5146 | 5146 | ||
5147 | if (mddev->flags) | 5147 | if (mddev->flags & MD_UPDATE_SB_FLAGS) |
5148 | md_update_sb(mddev, 0); | 5148 | md_update_sb(mddev, 0); |
5149 | 5149 | ||
5150 | md_new_event(mddev); | 5150 | md_new_event(mddev); |
@@ -5289,7 +5289,7 @@ static void __md_stop_writes(struct mddev *mddev) | |||
5289 | md_super_wait(mddev); | 5289 | md_super_wait(mddev); |
5290 | 5290 | ||
5291 | if (mddev->ro == 0 && | 5291 | if (mddev->ro == 0 && |
5292 | (!mddev->in_sync || mddev->flags)) { | 5292 | (!mddev->in_sync || (mddev->flags & MD_UPDATE_SB_FLAGS))) { |
5293 | /* mark array as shutdown cleanly */ | 5293 | /* mark array as shutdown cleanly */ |
5294 | mddev->in_sync = 1; | 5294 | mddev->in_sync = 1; |
5295 | md_update_sb(mddev, 1); | 5295 | md_update_sb(mddev, 1); |
@@ -5337,8 +5337,14 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) | |||
5337 | err = -EBUSY; | 5337 | err = -EBUSY; |
5338 | goto out; | 5338 | goto out; |
5339 | } | 5339 | } |
5340 | if (bdev) | 5340 | if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) { |
5341 | sync_blockdev(bdev); | 5341 | /* Someone opened the device since we flushed it |
5342 | * so page cache could be dirty and it is too late | ||
5343 | * to flush. So abort | ||
5344 | */ | ||
5345 | mutex_unlock(&mddev->open_mutex); | ||
5346 | return -EBUSY; | ||
5347 | } | ||
5342 | if (mddev->pers) { | 5348 | if (mddev->pers) { |
5343 | __md_stop_writes(mddev); | 5349 | __md_stop_writes(mddev); |
5344 | 5350 | ||
@@ -5373,14 +5379,14 @@ static int do_md_stop(struct mddev * mddev, int mode, | |||
5373 | mutex_unlock(&mddev->open_mutex); | 5379 | mutex_unlock(&mddev->open_mutex); |
5374 | return -EBUSY; | 5380 | return -EBUSY; |
5375 | } | 5381 | } |
5376 | if (bdev) | 5382 | if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) { |
5377 | /* It is possible IO was issued on some other | 5383 | /* Someone opened the device since we flushed it |
5378 | * open file which was closed before we took ->open_mutex. | 5384 | * so page cache could be dirty and it is too late |
5379 | * As that was not the last close __blkdev_put will not | 5385 | * to flush. So abort |
5380 | * have called sync_blockdev, so we must. | ||
5381 | */ | 5386 | */ |
5382 | sync_blockdev(bdev); | 5387 | mutex_unlock(&mddev->open_mutex); |
5383 | 5388 | return -EBUSY; | |
5389 | } | ||
5384 | if (mddev->pers) { | 5390 | if (mddev->pers) { |
5385 | if (mddev->ro) | 5391 | if (mddev->ro) |
5386 | set_disk_ro(disk, 0); | 5392 | set_disk_ro(disk, 0); |
@@ -5628,10 +5634,7 @@ static int get_bitmap_file(struct mddev * mddev, void __user * arg) | |||
5628 | char *ptr, *buf = NULL; | 5634 | char *ptr, *buf = NULL; |
5629 | int err = -ENOMEM; | 5635 | int err = -ENOMEM; |
5630 | 5636 | ||
5631 | if (md_allow_write(mddev)) | 5637 | file = kmalloc(sizeof(*file), GFP_NOIO); |
5632 | file = kmalloc(sizeof(*file), GFP_NOIO); | ||
5633 | else | ||
5634 | file = kmalloc(sizeof(*file), GFP_KERNEL); | ||
5635 | 5638 | ||
5636 | if (!file) | 5639 | if (!file) |
5637 | goto out; | 5640 | goto out; |
@@ -6420,6 +6423,20 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6420 | !test_bit(MD_RECOVERY_NEEDED, | 6423 | !test_bit(MD_RECOVERY_NEEDED, |
6421 | &mddev->flags), | 6424 | &mddev->flags), |
6422 | msecs_to_jiffies(5000)); | 6425 | msecs_to_jiffies(5000)); |
6426 | if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) { | ||
6427 | /* Need to flush page cache, and ensure no-one else opens | ||
6428 | * and writes | ||
6429 | */ | ||
6430 | mutex_lock(&mddev->open_mutex); | ||
6431 | if (atomic_read(&mddev->openers) > 1) { | ||
6432 | mutex_unlock(&mddev->open_mutex); | ||
6433 | err = -EBUSY; | ||
6434 | goto abort; | ||
6435 | } | ||
6436 | set_bit(MD_STILL_CLOSED, &mddev->flags); | ||
6437 | mutex_unlock(&mddev->open_mutex); | ||
6438 | sync_blockdev(bdev); | ||
6439 | } | ||
6423 | err = mddev_lock(mddev); | 6440 | err = mddev_lock(mddev); |
6424 | if (err) { | 6441 | if (err) { |
6425 | printk(KERN_INFO | 6442 | printk(KERN_INFO |
@@ -6673,6 +6690,7 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
6673 | 6690 | ||
6674 | err = 0; | 6691 | err = 0; |
6675 | atomic_inc(&mddev->openers); | 6692 | atomic_inc(&mddev->openers); |
6693 | clear_bit(MD_STILL_CLOSED, &mddev->flags); | ||
6676 | mutex_unlock(&mddev->open_mutex); | 6694 | mutex_unlock(&mddev->open_mutex); |
6677 | 6695 | ||
6678 | check_disk_change(bdev); | 6696 | check_disk_change(bdev); |
@@ -7817,7 +7835,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7817 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 7835 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
7818 | } | 7836 | } |
7819 | 7837 | ||
7820 | if (mddev->flags) | 7838 | if (mddev->flags & MD_UPDATE_SB_FLAGS) |
7821 | md_update_sb(mddev, 0); | 7839 | md_update_sb(mddev, 0); |
7822 | 7840 | ||
7823 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && | 7841 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 20f02c0b5f2d..608050c43f17 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -204,12 +204,16 @@ struct mddev { | |||
204 | struct md_personality *pers; | 204 | struct md_personality *pers; |
205 | dev_t unit; | 205 | dev_t unit; |
206 | int md_minor; | 206 | int md_minor; |
207 | struct list_head disks; | 207 | struct list_head disks; |
208 | unsigned long flags; | 208 | unsigned long flags; |
209 | #define MD_CHANGE_DEVS 0 /* Some device status has changed */ | 209 | #define MD_CHANGE_DEVS 0 /* Some device status has changed */ |
210 | #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ | 210 | #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ |
211 | #define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ | 211 | #define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ |
212 | #define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */ | ||
212 | #define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */ | 213 | #define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */ |
214 | #define MD_STILL_CLOSED 4 /* If set, then array has not been opened since | ||
215 | * md_ioctl checked on it. | ||
216 | */ | ||
213 | 217 | ||
214 | int suspended; | 218 | int suspended; |
215 | atomic_t active_io; | 219 | atomic_t active_io; |
@@ -218,7 +222,7 @@ struct mddev { | |||
218 | * are happening, so run/ | 222 | * are happening, so run/ |
219 | * takeover/stop are not safe | 223 | * takeover/stop are not safe |
220 | */ | 224 | */ |
221 | int ready; /* See when safe to pass | 225 | int ready; /* See when safe to pass |
222 | * IO requests down */ | 226 | * IO requests down */ |
223 | struct gendisk *gendisk; | 227 | struct gendisk *gendisk; |
224 | 228 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 78ea44336e75..7ff4f252ca1a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/cpu.h> | 53 | #include <linux/cpu.h> |
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | #include <linux/ratelimit.h> | 55 | #include <linux/ratelimit.h> |
56 | #include <linux/nodemask.h> | ||
56 | #include <trace/events/block.h> | 57 | #include <trace/events/block.h> |
57 | 58 | ||
58 | #include "md.h" | 59 | #include "md.h" |
@@ -60,6 +61,10 @@ | |||
60 | #include "raid0.h" | 61 | #include "raid0.h" |
61 | #include "bitmap.h" | 62 | #include "bitmap.h" |
62 | 63 | ||
64 | #define cpu_to_group(cpu) cpu_to_node(cpu) | ||
65 | #define ANY_GROUP NUMA_NO_NODE | ||
66 | |||
67 | static struct workqueue_struct *raid5_wq; | ||
63 | /* | 68 | /* |
64 | * Stripe cache | 69 | * Stripe cache |
65 | */ | 70 | */ |
@@ -72,6 +77,7 @@ | |||
72 | #define BYPASS_THRESHOLD 1 | 77 | #define BYPASS_THRESHOLD 1 |
73 | #define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) | 78 | #define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) |
74 | #define HASH_MASK (NR_HASH - 1) | 79 | #define HASH_MASK (NR_HASH - 1) |
80 | #define MAX_STRIPE_BATCH 8 | ||
75 | 81 | ||
76 | static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) | 82 | static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) |
77 | { | 83 | { |
@@ -200,6 +206,49 @@ static int stripe_operations_active(struct stripe_head *sh) | |||
200 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); | 206 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); |
201 | } | 207 | } |
202 | 208 | ||
209 | static void raid5_wakeup_stripe_thread(struct stripe_head *sh) | ||
210 | { | ||
211 | struct r5conf *conf = sh->raid_conf; | ||
212 | struct r5worker_group *group; | ||
213 | int thread_cnt; | ||
214 | int i, cpu = sh->cpu; | ||
215 | |||
216 | if (!cpu_online(cpu)) { | ||
217 | cpu = cpumask_any(cpu_online_mask); | ||
218 | sh->cpu = cpu; | ||
219 | } | ||
220 | |||
221 | if (list_empty(&sh->lru)) { | ||
222 | struct r5worker_group *group; | ||
223 | group = conf->worker_groups + cpu_to_group(cpu); | ||
224 | list_add_tail(&sh->lru, &group->handle_list); | ||
225 | group->stripes_cnt++; | ||
226 | sh->group = group; | ||
227 | } | ||
228 | |||
229 | if (conf->worker_cnt_per_group == 0) { | ||
230 | md_wakeup_thread(conf->mddev->thread); | ||
231 | return; | ||
232 | } | ||
233 | |||
234 | group = conf->worker_groups + cpu_to_group(sh->cpu); | ||
235 | |||
236 | group->workers[0].working = true; | ||
237 | /* at least one worker should run to avoid race */ | ||
238 | queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work); | ||
239 | |||
240 | thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1; | ||
241 | /* wakeup more workers */ | ||
242 | for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) { | ||
243 | if (group->workers[i].working == false) { | ||
244 | group->workers[i].working = true; | ||
245 | queue_work_on(sh->cpu, raid5_wq, | ||
246 | &group->workers[i].work); | ||
247 | thread_cnt--; | ||
248 | } | ||
249 | } | ||
250 | } | ||
251 | |||
203 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) | 252 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) |
204 | { | 253 | { |
205 | BUG_ON(!list_empty(&sh->lru)); | 254 | BUG_ON(!list_empty(&sh->lru)); |
@@ -214,7 +263,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) | |||
214 | else { | 263 | else { |
215 | clear_bit(STRIPE_DELAYED, &sh->state); | 264 | clear_bit(STRIPE_DELAYED, &sh->state); |
216 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 265 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
217 | list_add_tail(&sh->lru, &conf->handle_list); | 266 | if (conf->worker_cnt_per_group == 0) { |
267 | list_add_tail(&sh->lru, &conf->handle_list); | ||
268 | } else { | ||
269 | raid5_wakeup_stripe_thread(sh); | ||
270 | return; | ||
271 | } | ||
218 | } | 272 | } |
219 | md_wakeup_thread(conf->mddev->thread); | 273 | md_wakeup_thread(conf->mddev->thread); |
220 | } else { | 274 | } else { |
@@ -239,12 +293,62 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | |||
239 | do_release_stripe(conf, sh); | 293 | do_release_stripe(conf, sh); |
240 | } | 294 | } |
241 | 295 | ||
296 | static struct llist_node *llist_reverse_order(struct llist_node *head) | ||
297 | { | ||
298 | struct llist_node *new_head = NULL; | ||
299 | |||
300 | while (head) { | ||
301 | struct llist_node *tmp = head; | ||
302 | head = head->next; | ||
303 | tmp->next = new_head; | ||
304 | new_head = tmp; | ||
305 | } | ||
306 | |||
307 | return new_head; | ||
308 | } | ||
309 | |||
310 | /* should hold conf->device_lock already */ | ||
311 | static int release_stripe_list(struct r5conf *conf) | ||
312 | { | ||
313 | struct stripe_head *sh; | ||
314 | int count = 0; | ||
315 | struct llist_node *head; | ||
316 | |||
317 | head = llist_del_all(&conf->released_stripes); | ||
318 | head = llist_reverse_order(head); | ||
319 | while (head) { | ||
320 | sh = llist_entry(head, struct stripe_head, release_list); | ||
321 | head = llist_next(head); | ||
322 | /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */ | ||
323 | smp_mb(); | ||
324 | clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state); | ||
325 | /* | ||
326 | * Don't worry the bit is set here, because if the bit is set | ||
327 | * again, the count is always > 1. This is true for | ||
328 | * STRIPE_ON_UNPLUG_LIST bit too. | ||
329 | */ | ||
330 | __release_stripe(conf, sh); | ||
331 | count++; | ||
332 | } | ||
333 | |||
334 | return count; | ||
335 | } | ||
336 | |||
242 | static void release_stripe(struct stripe_head *sh) | 337 | static void release_stripe(struct stripe_head *sh) |
243 | { | 338 | { |
244 | struct r5conf *conf = sh->raid_conf; | 339 | struct r5conf *conf = sh->raid_conf; |
245 | unsigned long flags; | 340 | unsigned long flags; |
341 | bool wakeup; | ||
246 | 342 | ||
343 | if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) | ||
344 | goto slow_path; | ||
345 | wakeup = llist_add(&sh->release_list, &conf->released_stripes); | ||
346 | if (wakeup) | ||
347 | md_wakeup_thread(conf->mddev->thread); | ||
348 | return; | ||
349 | slow_path: | ||
247 | local_irq_save(flags); | 350 | local_irq_save(flags); |
351 | /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ | ||
248 | if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { | 352 | if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { |
249 | do_release_stripe(conf, sh); | 353 | do_release_stripe(conf, sh); |
250 | spin_unlock(&conf->device_lock); | 354 | spin_unlock(&conf->device_lock); |
@@ -359,6 +463,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) | |||
359 | raid5_build_block(sh, i, previous); | 463 | raid5_build_block(sh, i, previous); |
360 | } | 464 | } |
361 | insert_hash(conf, sh); | 465 | insert_hash(conf, sh); |
466 | sh->cpu = smp_processor_id(); | ||
362 | } | 467 | } |
363 | 468 | ||
364 | static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, | 469 | static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, |
@@ -491,7 +596,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
491 | if (atomic_read(&sh->count)) { | 596 | if (atomic_read(&sh->count)) { |
492 | BUG_ON(!list_empty(&sh->lru) | 597 | BUG_ON(!list_empty(&sh->lru) |
493 | && !test_bit(STRIPE_EXPANDING, &sh->state) | 598 | && !test_bit(STRIPE_EXPANDING, &sh->state) |
494 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)); | 599 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state) |
600 | && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state)); | ||
495 | } else { | 601 | } else { |
496 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 602 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
497 | atomic_inc(&conf->active_stripes); | 603 | atomic_inc(&conf->active_stripes); |
@@ -499,6 +605,10 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
499 | !test_bit(STRIPE_EXPANDING, &sh->state)) | 605 | !test_bit(STRIPE_EXPANDING, &sh->state)) |
500 | BUG(); | 606 | BUG(); |
501 | list_del_init(&sh->lru); | 607 | list_del_init(&sh->lru); |
608 | if (sh->group) { | ||
609 | sh->group->stripes_cnt--; | ||
610 | sh->group = NULL; | ||
611 | } | ||
502 | } | 612 | } |
503 | } | 613 | } |
504 | } while (sh == NULL); | 614 | } while (sh == NULL); |
@@ -3779,6 +3889,7 @@ static void raid5_activate_delayed(struct r5conf *conf) | |||
3779 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 3889 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
3780 | atomic_inc(&conf->preread_active_stripes); | 3890 | atomic_inc(&conf->preread_active_stripes); |
3781 | list_add_tail(&sh->lru, &conf->hold_list); | 3891 | list_add_tail(&sh->lru, &conf->hold_list); |
3892 | raid5_wakeup_stripe_thread(sh); | ||
3782 | } | 3893 | } |
3783 | } | 3894 | } |
3784 | } | 3895 | } |
@@ -4058,18 +4169,35 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
4058 | * head of the hold_list has changed, i.e. the head was promoted to the | 4169 | * head of the hold_list has changed, i.e. the head was promoted to the |
4059 | * handle_list. | 4170 | * handle_list. |
4060 | */ | 4171 | */ |
4061 | static struct stripe_head *__get_priority_stripe(struct r5conf *conf) | 4172 | static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) |
4062 | { | 4173 | { |
4063 | struct stripe_head *sh; | 4174 | struct stripe_head *sh = NULL, *tmp; |
4175 | struct list_head *handle_list = NULL; | ||
4176 | struct r5worker_group *wg = NULL; | ||
4177 | |||
4178 | if (conf->worker_cnt_per_group == 0) { | ||
4179 | handle_list = &conf->handle_list; | ||
4180 | } else if (group != ANY_GROUP) { | ||
4181 | handle_list = &conf->worker_groups[group].handle_list; | ||
4182 | wg = &conf->worker_groups[group]; | ||
4183 | } else { | ||
4184 | int i; | ||
4185 | for (i = 0; i < conf->group_cnt; i++) { | ||
4186 | handle_list = &conf->worker_groups[i].handle_list; | ||
4187 | wg = &conf->worker_groups[i]; | ||
4188 | if (!list_empty(handle_list)) | ||
4189 | break; | ||
4190 | } | ||
4191 | } | ||
4064 | 4192 | ||
4065 | pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n", | 4193 | pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n", |
4066 | __func__, | 4194 | __func__, |
4067 | list_empty(&conf->handle_list) ? "empty" : "busy", | 4195 | list_empty(handle_list) ? "empty" : "busy", |
4068 | list_empty(&conf->hold_list) ? "empty" : "busy", | 4196 | list_empty(&conf->hold_list) ? "empty" : "busy", |
4069 | atomic_read(&conf->pending_full_writes), conf->bypass_count); | 4197 | atomic_read(&conf->pending_full_writes), conf->bypass_count); |
4070 | 4198 | ||
4071 | if (!list_empty(&conf->handle_list)) { | 4199 | if (!list_empty(handle_list)) { |
4072 | sh = list_entry(conf->handle_list.next, typeof(*sh), lru); | 4200 | sh = list_entry(handle_list->next, typeof(*sh), lru); |
4073 | 4201 | ||
4074 | if (list_empty(&conf->hold_list)) | 4202 | if (list_empty(&conf->hold_list)) |
4075 | conf->bypass_count = 0; | 4203 | conf->bypass_count = 0; |
@@ -4087,14 +4215,32 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf) | |||
4087 | ((conf->bypass_threshold && | 4215 | ((conf->bypass_threshold && |
4088 | conf->bypass_count > conf->bypass_threshold) || | 4216 | conf->bypass_count > conf->bypass_threshold) || |
4089 | atomic_read(&conf->pending_full_writes) == 0)) { | 4217 | atomic_read(&conf->pending_full_writes) == 0)) { |
4090 | sh = list_entry(conf->hold_list.next, | 4218 | |
4091 | typeof(*sh), lru); | 4219 | list_for_each_entry(tmp, &conf->hold_list, lru) { |
4092 | conf->bypass_count -= conf->bypass_threshold; | 4220 | if (conf->worker_cnt_per_group == 0 || |
4093 | if (conf->bypass_count < 0) | 4221 | group == ANY_GROUP || |
4094 | conf->bypass_count = 0; | 4222 | !cpu_online(tmp->cpu) || |
4095 | } else | 4223 | cpu_to_group(tmp->cpu) == group) { |
4224 | sh = tmp; | ||
4225 | break; | ||
4226 | } | ||
4227 | } | ||
4228 | |||
4229 | if (sh) { | ||
4230 | conf->bypass_count -= conf->bypass_threshold; | ||
4231 | if (conf->bypass_count < 0) | ||
4232 | conf->bypass_count = 0; | ||
4233 | } | ||
4234 | wg = NULL; | ||
4235 | } | ||
4236 | |||
4237 | if (!sh) | ||
4096 | return NULL; | 4238 | return NULL; |
4097 | 4239 | ||
4240 | if (wg) { | ||
4241 | wg->stripes_cnt--; | ||
4242 | sh->group = NULL; | ||
4243 | } | ||
4098 | list_del_init(&sh->lru); | 4244 | list_del_init(&sh->lru); |
4099 | atomic_inc(&sh->count); | 4245 | atomic_inc(&sh->count); |
4100 | BUG_ON(atomic_read(&sh->count) != 1); | 4246 | BUG_ON(atomic_read(&sh->count) != 1); |
@@ -4127,6 +4273,10 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | |||
4127 | */ | 4273 | */ |
4128 | smp_mb__before_clear_bit(); | 4274 | smp_mb__before_clear_bit(); |
4129 | clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); | 4275 | clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); |
4276 | /* | ||
4277 | * STRIPE_ON_RELEASE_LIST could be set here. In that | ||
4278 | * case, the count is always > 1 here | ||
4279 | */ | ||
4130 | __release_stripe(conf, sh); | 4280 | __release_stripe(conf, sh); |
4131 | cnt++; | 4281 | cnt++; |
4132 | } | 4282 | } |
@@ -4286,8 +4436,10 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4286 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 4436 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
4287 | DEFINE_WAIT(w); | 4437 | DEFINE_WAIT(w); |
4288 | int previous; | 4438 | int previous; |
4439 | int seq; | ||
4289 | 4440 | ||
4290 | retry: | 4441 | retry: |
4442 | seq = read_seqcount_begin(&conf->gen_lock); | ||
4291 | previous = 0; | 4443 | previous = 0; |
4292 | prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); | 4444 | prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); |
4293 | if (unlikely(conf->reshape_progress != MaxSector)) { | 4445 | if (unlikely(conf->reshape_progress != MaxSector)) { |
@@ -4320,7 +4472,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4320 | previous, | 4472 | previous, |
4321 | &dd_idx, NULL); | 4473 | &dd_idx, NULL); |
4322 | pr_debug("raid456: make_request, sector %llu logical %llu\n", | 4474 | pr_debug("raid456: make_request, sector %llu logical %llu\n", |
4323 | (unsigned long long)new_sector, | 4475 | (unsigned long long)new_sector, |
4324 | (unsigned long long)logical_sector); | 4476 | (unsigned long long)logical_sector); |
4325 | 4477 | ||
4326 | sh = get_active_stripe(conf, new_sector, previous, | 4478 | sh = get_active_stripe(conf, new_sector, previous, |
@@ -4349,6 +4501,13 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4349 | goto retry; | 4501 | goto retry; |
4350 | } | 4502 | } |
4351 | } | 4503 | } |
4504 | if (read_seqcount_retry(&conf->gen_lock, seq)) { | ||
4505 | /* Might have got the wrong stripe_head | ||
4506 | * by accident | ||
4507 | */ | ||
4508 | release_stripe(sh); | ||
4509 | goto retry; | ||
4510 | } | ||
4352 | 4511 | ||
4353 | if (rw == WRITE && | 4512 | if (rw == WRITE && |
4354 | logical_sector >= mddev->suspend_lo && | 4513 | logical_sector >= mddev->suspend_lo && |
@@ -4788,14 +4947,14 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4788 | return handled; | 4947 | return handled; |
4789 | } | 4948 | } |
4790 | 4949 | ||
4791 | #define MAX_STRIPE_BATCH 8 | 4950 | static int handle_active_stripes(struct r5conf *conf, int group, |
4792 | static int handle_active_stripes(struct r5conf *conf) | 4951 | struct r5worker *worker) |
4793 | { | 4952 | { |
4794 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; | 4953 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; |
4795 | int i, batch_size = 0; | 4954 | int i, batch_size = 0; |
4796 | 4955 | ||
4797 | while (batch_size < MAX_STRIPE_BATCH && | 4956 | while (batch_size < MAX_STRIPE_BATCH && |
4798 | (sh = __get_priority_stripe(conf)) != NULL) | 4957 | (sh = __get_priority_stripe(conf, group)) != NULL) |
4799 | batch[batch_size++] = sh; | 4958 | batch[batch_size++] = sh; |
4800 | 4959 | ||
4801 | if (batch_size == 0) | 4960 | if (batch_size == 0) |
@@ -4813,6 +4972,39 @@ static int handle_active_stripes(struct r5conf *conf) | |||
4813 | return batch_size; | 4972 | return batch_size; |
4814 | } | 4973 | } |
4815 | 4974 | ||
4975 | static void raid5_do_work(struct work_struct *work) | ||
4976 | { | ||
4977 | struct r5worker *worker = container_of(work, struct r5worker, work); | ||
4978 | struct r5worker_group *group = worker->group; | ||
4979 | struct r5conf *conf = group->conf; | ||
4980 | int group_id = group - conf->worker_groups; | ||
4981 | int handled; | ||
4982 | struct blk_plug plug; | ||
4983 | |||
4984 | pr_debug("+++ raid5worker active\n"); | ||
4985 | |||
4986 | blk_start_plug(&plug); | ||
4987 | handled = 0; | ||
4988 | spin_lock_irq(&conf->device_lock); | ||
4989 | while (1) { | ||
4990 | int batch_size, released; | ||
4991 | |||
4992 | released = release_stripe_list(conf); | ||
4993 | |||
4994 | batch_size = handle_active_stripes(conf, group_id, worker); | ||
4995 | worker->working = false; | ||
4996 | if (!batch_size && !released) | ||
4997 | break; | ||
4998 | handled += batch_size; | ||
4999 | } | ||
5000 | pr_debug("%d stripes handled\n", handled); | ||
5001 | |||
5002 | spin_unlock_irq(&conf->device_lock); | ||
5003 | blk_finish_plug(&plug); | ||
5004 | |||
5005 | pr_debug("--- raid5worker inactive\n"); | ||
5006 | } | ||
5007 | |||
4816 | /* | 5008 | /* |
4817 | * This is our raid5 kernel thread. | 5009 | * This is our raid5 kernel thread. |
4818 | * | 5010 | * |
@@ -4836,7 +5028,9 @@ static void raid5d(struct md_thread *thread) | |||
4836 | spin_lock_irq(&conf->device_lock); | 5028 | spin_lock_irq(&conf->device_lock); |
4837 | while (1) { | 5029 | while (1) { |
4838 | struct bio *bio; | 5030 | struct bio *bio; |
4839 | int batch_size; | 5031 | int batch_size, released; |
5032 | |||
5033 | released = release_stripe_list(conf); | ||
4840 | 5034 | ||
4841 | if ( | 5035 | if ( |
4842 | !list_empty(&conf->bitmap_list)) { | 5036 | !list_empty(&conf->bitmap_list)) { |
@@ -4860,8 +5054,8 @@ static void raid5d(struct md_thread *thread) | |||
4860 | handled++; | 5054 | handled++; |
4861 | } | 5055 | } |
4862 | 5056 | ||
4863 | batch_size = handle_active_stripes(conf); | 5057 | batch_size = handle_active_stripes(conf, ANY_GROUP, NULL); |
4864 | if (!batch_size) | 5058 | if (!batch_size && !released) |
4865 | break; | 5059 | break; |
4866 | handled += batch_size; | 5060 | handled += batch_size; |
4867 | 5061 | ||
@@ -4989,10 +5183,70 @@ stripe_cache_active_show(struct mddev *mddev, char *page) | |||
4989 | static struct md_sysfs_entry | 5183 | static struct md_sysfs_entry |
4990 | raid5_stripecache_active = __ATTR_RO(stripe_cache_active); | 5184 | raid5_stripecache_active = __ATTR_RO(stripe_cache_active); |
4991 | 5185 | ||
5186 | static ssize_t | ||
5187 | raid5_show_group_thread_cnt(struct mddev *mddev, char *page) | ||
5188 | { | ||
5189 | struct r5conf *conf = mddev->private; | ||
5190 | if (conf) | ||
5191 | return sprintf(page, "%d\n", conf->worker_cnt_per_group); | ||
5192 | else | ||
5193 | return 0; | ||
5194 | } | ||
5195 | |||
5196 | static int alloc_thread_groups(struct r5conf *conf, int cnt); | ||
5197 | static ssize_t | ||
5198 | raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) | ||
5199 | { | ||
5200 | struct r5conf *conf = mddev->private; | ||
5201 | unsigned long new; | ||
5202 | int err; | ||
5203 | struct r5worker_group *old_groups; | ||
5204 | int old_group_cnt; | ||
5205 | |||
5206 | if (len >= PAGE_SIZE) | ||
5207 | return -EINVAL; | ||
5208 | if (!conf) | ||
5209 | return -ENODEV; | ||
5210 | |||
5211 | if (kstrtoul(page, 10, &new)) | ||
5212 | return -EINVAL; | ||
5213 | |||
5214 | if (new == conf->worker_cnt_per_group) | ||
5215 | return len; | ||
5216 | |||
5217 | mddev_suspend(mddev); | ||
5218 | |||
5219 | old_groups = conf->worker_groups; | ||
5220 | old_group_cnt = conf->worker_cnt_per_group; | ||
5221 | |||
5222 | conf->worker_groups = NULL; | ||
5223 | err = alloc_thread_groups(conf, new); | ||
5224 | if (err) { | ||
5225 | conf->worker_groups = old_groups; | ||
5226 | conf->worker_cnt_per_group = old_group_cnt; | ||
5227 | } else { | ||
5228 | if (old_groups) | ||
5229 | kfree(old_groups[0].workers); | ||
5230 | kfree(old_groups); | ||
5231 | } | ||
5232 | |||
5233 | mddev_resume(mddev); | ||
5234 | |||
5235 | if (err) | ||
5236 | return err; | ||
5237 | return len; | ||
5238 | } | ||
5239 | |||
5240 | static struct md_sysfs_entry | ||
5241 | raid5_group_thread_cnt = __ATTR(group_thread_cnt, S_IRUGO | S_IWUSR, | ||
5242 | raid5_show_group_thread_cnt, | ||
5243 | raid5_store_group_thread_cnt); | ||
5244 | |||
4992 | static struct attribute *raid5_attrs[] = { | 5245 | static struct attribute *raid5_attrs[] = { |
4993 | &raid5_stripecache_size.attr, | 5246 | &raid5_stripecache_size.attr, |
4994 | &raid5_stripecache_active.attr, | 5247 | &raid5_stripecache_active.attr, |
4995 | &raid5_preread_bypass_threshold.attr, | 5248 | &raid5_preread_bypass_threshold.attr, |
5249 | &raid5_group_thread_cnt.attr, | ||
4996 | NULL, | 5250 | NULL, |
4997 | }; | 5251 | }; |
4998 | static struct attribute_group raid5_attrs_group = { | 5252 | static struct attribute_group raid5_attrs_group = { |
@@ -5000,6 +5254,54 @@ static struct attribute_group raid5_attrs_group = { | |||
5000 | .attrs = raid5_attrs, | 5254 | .attrs = raid5_attrs, |
5001 | }; | 5255 | }; |
5002 | 5256 | ||
5257 | static int alloc_thread_groups(struct r5conf *conf, int cnt) | ||
5258 | { | ||
5259 | int i, j; | ||
5260 | ssize_t size; | ||
5261 | struct r5worker *workers; | ||
5262 | |||
5263 | conf->worker_cnt_per_group = cnt; | ||
5264 | if (cnt == 0) { | ||
5265 | conf->worker_groups = NULL; | ||
5266 | return 0; | ||
5267 | } | ||
5268 | conf->group_cnt = num_possible_nodes(); | ||
5269 | size = sizeof(struct r5worker) * cnt; | ||
5270 | workers = kzalloc(size * conf->group_cnt, GFP_NOIO); | ||
5271 | conf->worker_groups = kzalloc(sizeof(struct r5worker_group) * | ||
5272 | conf->group_cnt, GFP_NOIO); | ||
5273 | if (!conf->worker_groups || !workers) { | ||
5274 | kfree(workers); | ||
5275 | kfree(conf->worker_groups); | ||
5276 | conf->worker_groups = NULL; | ||
5277 | return -ENOMEM; | ||
5278 | } | ||
5279 | |||
5280 | for (i = 0; i < conf->group_cnt; i++) { | ||
5281 | struct r5worker_group *group; | ||
5282 | |||
5283 | group = &conf->worker_groups[i]; | ||
5284 | INIT_LIST_HEAD(&group->handle_list); | ||
5285 | group->conf = conf; | ||
5286 | group->workers = workers + i * cnt; | ||
5287 | |||
5288 | for (j = 0; j < cnt; j++) { | ||
5289 | group->workers[j].group = group; | ||
5290 | INIT_WORK(&group->workers[j].work, raid5_do_work); | ||
5291 | } | ||
5292 | } | ||
5293 | |||
5294 | return 0; | ||
5295 | } | ||
5296 | |||
5297 | static void free_thread_groups(struct r5conf *conf) | ||
5298 | { | ||
5299 | if (conf->worker_groups) | ||
5300 | kfree(conf->worker_groups[0].workers); | ||
5301 | kfree(conf->worker_groups); | ||
5302 | conf->worker_groups = NULL; | ||
5303 | } | ||
5304 | |||
5003 | static sector_t | 5305 | static sector_t |
5004 | raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks) | 5306 | raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks) |
5005 | { | 5307 | { |
@@ -5040,6 +5342,7 @@ static void raid5_free_percpu(struct r5conf *conf) | |||
5040 | 5342 | ||
5041 | static void free_conf(struct r5conf *conf) | 5343 | static void free_conf(struct r5conf *conf) |
5042 | { | 5344 | { |
5345 | free_thread_groups(conf); | ||
5043 | shrink_stripes(conf); | 5346 | shrink_stripes(conf); |
5044 | raid5_free_percpu(conf); | 5347 | raid5_free_percpu(conf); |
5045 | kfree(conf->disks); | 5348 | kfree(conf->disks); |
@@ -5168,7 +5471,11 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5168 | conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); | 5471 | conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); |
5169 | if (conf == NULL) | 5472 | if (conf == NULL) |
5170 | goto abort; | 5473 | goto abort; |
5474 | /* Don't enable multi-threading by default*/ | ||
5475 | if (alloc_thread_groups(conf, 0)) | ||
5476 | goto abort; | ||
5171 | spin_lock_init(&conf->device_lock); | 5477 | spin_lock_init(&conf->device_lock); |
5478 | seqcount_init(&conf->gen_lock); | ||
5172 | init_waitqueue_head(&conf->wait_for_stripe); | 5479 | init_waitqueue_head(&conf->wait_for_stripe); |
5173 | init_waitqueue_head(&conf->wait_for_overlap); | 5480 | init_waitqueue_head(&conf->wait_for_overlap); |
5174 | INIT_LIST_HEAD(&conf->handle_list); | 5481 | INIT_LIST_HEAD(&conf->handle_list); |
@@ -5176,6 +5483,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5176 | INIT_LIST_HEAD(&conf->delayed_list); | 5483 | INIT_LIST_HEAD(&conf->delayed_list); |
5177 | INIT_LIST_HEAD(&conf->bitmap_list); | 5484 | INIT_LIST_HEAD(&conf->bitmap_list); |
5178 | INIT_LIST_HEAD(&conf->inactive_list); | 5485 | INIT_LIST_HEAD(&conf->inactive_list); |
5486 | init_llist_head(&conf->released_stripes); | ||
5179 | atomic_set(&conf->active_stripes, 0); | 5487 | atomic_set(&conf->active_stripes, 0); |
5180 | atomic_set(&conf->preread_active_stripes, 0); | 5488 | atomic_set(&conf->preread_active_stripes, 0); |
5181 | atomic_set(&conf->active_aligned_reads, 0); | 5489 | atomic_set(&conf->active_aligned_reads, 0); |
@@ -5980,6 +6288,7 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5980 | 6288 | ||
5981 | atomic_set(&conf->reshape_stripes, 0); | 6289 | atomic_set(&conf->reshape_stripes, 0); |
5982 | spin_lock_irq(&conf->device_lock); | 6290 | spin_lock_irq(&conf->device_lock); |
6291 | write_seqcount_begin(&conf->gen_lock); | ||
5983 | conf->previous_raid_disks = conf->raid_disks; | 6292 | conf->previous_raid_disks = conf->raid_disks; |
5984 | conf->raid_disks += mddev->delta_disks; | 6293 | conf->raid_disks += mddev->delta_disks; |
5985 | conf->prev_chunk_sectors = conf->chunk_sectors; | 6294 | conf->prev_chunk_sectors = conf->chunk_sectors; |
@@ -5996,8 +6305,16 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5996 | else | 6305 | else |
5997 | conf->reshape_progress = 0; | 6306 | conf->reshape_progress = 0; |
5998 | conf->reshape_safe = conf->reshape_progress; | 6307 | conf->reshape_safe = conf->reshape_progress; |
6308 | write_seqcount_end(&conf->gen_lock); | ||
5999 | spin_unlock_irq(&conf->device_lock); | 6309 | spin_unlock_irq(&conf->device_lock); |
6000 | 6310 | ||
6311 | /* Now make sure any requests that proceeded on the assumption | ||
6312 | * the reshape wasn't running - like Discard or Read - have | ||
6313 | * completed. | ||
6314 | */ | ||
6315 | mddev_suspend(mddev); | ||
6316 | mddev_resume(mddev); | ||
6317 | |||
6001 | /* Add some new drives, as many as will fit. | 6318 | /* Add some new drives, as many as will fit. |
6002 | * We know there are enough to make the newly sized array work. | 6319 | * We know there are enough to make the newly sized array work. |
6003 | * Don't add devices if we are reducing the number of | 6320 | * Don't add devices if we are reducing the number of |
@@ -6472,6 +6789,10 @@ static struct md_personality raid4_personality = | |||
6472 | 6789 | ||
6473 | static int __init raid5_init(void) | 6790 | static int __init raid5_init(void) |
6474 | { | 6791 | { |
6792 | raid5_wq = alloc_workqueue("raid5wq", | ||
6793 | WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0); | ||
6794 | if (!raid5_wq) | ||
6795 | return -ENOMEM; | ||
6475 | register_md_personality(&raid6_personality); | 6796 | register_md_personality(&raid6_personality); |
6476 | register_md_personality(&raid5_personality); | 6797 | register_md_personality(&raid5_personality); |
6477 | register_md_personality(&raid4_personality); | 6798 | register_md_personality(&raid4_personality); |
@@ -6483,6 +6804,7 @@ static void raid5_exit(void) | |||
6483 | unregister_md_personality(&raid6_personality); | 6804 | unregister_md_personality(&raid6_personality); |
6484 | unregister_md_personality(&raid5_personality); | 6805 | unregister_md_personality(&raid5_personality); |
6485 | unregister_md_personality(&raid4_personality); | 6806 | unregister_md_personality(&raid4_personality); |
6807 | destroy_workqueue(raid5_wq); | ||
6486 | } | 6808 | } |
6487 | 6809 | ||
6488 | module_init(raid5_init); | 6810 | module_init(raid5_init); |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 70c49329ca9a..2113ffa82c7a 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -197,6 +197,7 @@ enum reconstruct_states { | |||
197 | struct stripe_head { | 197 | struct stripe_head { |
198 | struct hlist_node hash; | 198 | struct hlist_node hash; |
199 | struct list_head lru; /* inactive_list or handle_list */ | 199 | struct list_head lru; /* inactive_list or handle_list */ |
200 | struct llist_node release_list; | ||
200 | struct r5conf *raid_conf; | 201 | struct r5conf *raid_conf; |
201 | short generation; /* increments with every | 202 | short generation; /* increments with every |
202 | * reshape */ | 203 | * reshape */ |
@@ -211,6 +212,8 @@ struct stripe_head { | |||
211 | enum check_states check_state; | 212 | enum check_states check_state; |
212 | enum reconstruct_states reconstruct_state; | 213 | enum reconstruct_states reconstruct_state; |
213 | spinlock_t stripe_lock; | 214 | spinlock_t stripe_lock; |
215 | int cpu; | ||
216 | struct r5worker_group *group; | ||
214 | /** | 217 | /** |
215 | * struct stripe_operations | 218 | * struct stripe_operations |
216 | * @target - STRIPE_OP_COMPUTE_BLK target | 219 | * @target - STRIPE_OP_COMPUTE_BLK target |
@@ -321,6 +324,7 @@ enum { | |||
321 | STRIPE_OPS_REQ_PENDING, | 324 | STRIPE_OPS_REQ_PENDING, |
322 | STRIPE_ON_UNPLUG_LIST, | 325 | STRIPE_ON_UNPLUG_LIST, |
323 | STRIPE_DISCARD, | 326 | STRIPE_DISCARD, |
327 | STRIPE_ON_RELEASE_LIST, | ||
324 | }; | 328 | }; |
325 | 329 | ||
326 | /* | 330 | /* |
@@ -363,6 +367,19 @@ struct disk_info { | |||
363 | struct md_rdev *rdev, *replacement; | 367 | struct md_rdev *rdev, *replacement; |
364 | }; | 368 | }; |
365 | 369 | ||
370 | struct r5worker { | ||
371 | struct work_struct work; | ||
372 | struct r5worker_group *group; | ||
373 | bool working; | ||
374 | }; | ||
375 | |||
376 | struct r5worker_group { | ||
377 | struct list_head handle_list; | ||
378 | struct r5conf *conf; | ||
379 | struct r5worker *workers; | ||
380 | int stripes_cnt; | ||
381 | }; | ||
382 | |||
366 | struct r5conf { | 383 | struct r5conf { |
367 | struct hlist_head *stripe_hashtbl; | 384 | struct hlist_head *stripe_hashtbl; |
368 | struct mddev *mddev; | 385 | struct mddev *mddev; |
@@ -386,6 +403,7 @@ struct r5conf { | |||
386 | int prev_chunk_sectors; | 403 | int prev_chunk_sectors; |
387 | int prev_algo; | 404 | int prev_algo; |
388 | short generation; /* increments with every reshape */ | 405 | short generation; /* increments with every reshape */ |
406 | seqcount_t gen_lock; /* lock against generation changes */ | ||
389 | unsigned long reshape_checkpoint; /* Time we last updated | 407 | unsigned long reshape_checkpoint; /* Time we last updated |
390 | * metadata */ | 408 | * metadata */ |
391 | long long min_offset_diff; /* minimum difference between | 409 | long long min_offset_diff; /* minimum difference between |
@@ -445,6 +463,7 @@ struct r5conf { | |||
445 | */ | 463 | */ |
446 | atomic_t active_stripes; | 464 | atomic_t active_stripes; |
447 | struct list_head inactive_list; | 465 | struct list_head inactive_list; |
466 | struct llist_head released_stripes; | ||
448 | wait_queue_head_t wait_for_stripe; | 467 | wait_queue_head_t wait_for_stripe; |
449 | wait_queue_head_t wait_for_overlap; | 468 | wait_queue_head_t wait_for_overlap; |
450 | int inactive_blocked; /* release of inactive stripes blocked, | 469 | int inactive_blocked; /* release of inactive stripes blocked, |
@@ -458,6 +477,9 @@ struct r5conf { | |||
458 | * the new thread here until we fully activate the array. | 477 | * the new thread here until we fully activate the array. |
459 | */ | 478 | */ |
460 | struct md_thread *thread; | 479 | struct md_thread *thread; |
480 | struct r5worker_group *worker_groups; | ||
481 | int group_cnt; | ||
482 | int worker_cnt_per_group; | ||
461 | }; | 483 | }; |
462 | 484 | ||
463 | /* | 485 | /* |
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 0f424698064f..73069cb6c54a 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h | |||
@@ -101,6 +101,7 @@ extern const struct raid6_calls raid6_altivec8; | |||
101 | extern const struct raid6_calls raid6_avx2x1; | 101 | extern const struct raid6_calls raid6_avx2x1; |
102 | extern const struct raid6_calls raid6_avx2x2; | 102 | extern const struct raid6_calls raid6_avx2x2; |
103 | extern const struct raid6_calls raid6_avx2x4; | 103 | extern const struct raid6_calls raid6_avx2x4; |
104 | extern const struct raid6_calls raid6_tilegx8; | ||
104 | 105 | ||
105 | struct raid6_recov_calls { | 106 | struct raid6_recov_calls { |
106 | void (*data2)(int, size_t, int, int, void **); | 107 | void (*data2)(int, size_t, int, int, void **); |
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index b4625787c7ee..c7dab0645554 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile | |||
@@ -6,6 +6,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ | |||
6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o | 6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o |
7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o | 7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o |
8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o | 8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o |
9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o | ||
9 | 10 | ||
10 | hostprogs-y += mktables | 11 | hostprogs-y += mktables |
11 | 12 | ||
@@ -110,6 +111,11 @@ $(obj)/neon8.c: UNROLL := 8 | |||
110 | $(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE | 111 | $(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE |
111 | $(call if_changed,unroll) | 112 | $(call if_changed,unroll) |
112 | 113 | ||
114 | targets += tilegx8.c | ||
115 | $(obj)/tilegx8.c: UNROLL := 8 | ||
116 | $(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE | ||
117 | $(call if_changed,unroll) | ||
118 | |||
113 | quiet_cmd_mktable = TABLE $@ | 119 | quiet_cmd_mktable = TABLE $@ |
114 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | 120 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) |
115 | 121 | ||
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 74e6f5629dbc..f0b1aa3586d1 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c | |||
@@ -66,6 +66,9 @@ const struct raid6_calls * const raid6_algos[] = { | |||
66 | &raid6_altivec4, | 66 | &raid6_altivec4, |
67 | &raid6_altivec8, | 67 | &raid6_altivec8, |
68 | #endif | 68 | #endif |
69 | #if defined(CONFIG_TILEGX) | ||
70 | &raid6_tilegx8, | ||
71 | #endif | ||
69 | &raid6_intx1, | 72 | &raid6_intx1, |
70 | &raid6_intx2, | 73 | &raid6_intx2, |
71 | &raid6_intx4, | 74 | &raid6_intx4, |
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 28afa1a06e03..29090f3db677 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile | |||
@@ -40,13 +40,16 @@ else ifeq ($(HAS_NEON),yes) | |||
40 | OBJS += neon.o neon1.o neon2.o neon4.o neon8.o | 40 | OBJS += neon.o neon1.o neon2.o neon4.o neon8.o |
41 | CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 | 41 | CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 |
42 | else | 42 | else |
43 | HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\ | 43 | HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\ |
44 | gcc -c -x c - >&/dev/null && \ | 44 | gcc -c -x c - >&/dev/null && \ |
45 | rm ./-.o && echo yes) | 45 | rm ./-.o && echo yes) |
46 | ifeq ($(HAS_ALTIVEC),yes) | 46 | ifeq ($(HAS_ALTIVEC),yes) |
47 | OBJS += altivec1.o altivec2.o altivec4.o altivec8.o | 47 | OBJS += altivec1.o altivec2.o altivec4.o altivec8.o |
48 | endif | 48 | endif |
49 | endif | 49 | endif |
50 | ifeq ($(ARCH),tilegx) | ||
51 | OBJS += tilegx8.o | ||
52 | endif | ||
50 | 53 | ||
51 | .c.o: | 54 | .c.o: |
52 | $(CC) $(CFLAGS) -c -o $@ $< | 55 | $(CC) $(CFLAGS) -c -o $@ $< |
@@ -109,11 +112,15 @@ int16.c: int.uc ../unroll.awk | |||
109 | int32.c: int.uc ../unroll.awk | 112 | int32.c: int.uc ../unroll.awk |
110 | $(AWK) ../unroll.awk -vN=32 < int.uc > $@ | 113 | $(AWK) ../unroll.awk -vN=32 < int.uc > $@ |
111 | 114 | ||
115 | tilegx8.c: tilegx.uc ../unroll.awk | ||
116 | $(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@ | ||
117 | |||
112 | tables.c: mktables | 118 | tables.c: mktables |
113 | ./mktables > tables.c | 119 | ./mktables > tables.c |
114 | 120 | ||
115 | clean: | 121 | clean: |
116 | rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test | 122 | rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test |
123 | rm -f tilegx*.c | ||
117 | 124 | ||
118 | spotless: clean | 125 | spotless: clean |
119 | rm -f *~ | 126 | rm -f *~ |
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc new file mode 100644 index 000000000000..e7c29459cbcd --- /dev/null +++ b/lib/raid6/tilegx.uc | |||
@@ -0,0 +1,86 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * Copyright 2012 Tilera Corporation - All Rights Reserved | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
9 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
10 | * (at your option) any later version; incorporated herein by reference. | ||
11 | * | ||
12 | * ----------------------------------------------------------------------- */ | ||
13 | |||
14 | /* | ||
15 | * tilegx$#.c | ||
16 | * | ||
17 | * $#-way unrolled TILE-Gx SIMD for RAID-6 math. | ||
18 | * | ||
19 | * This file is postprocessed using unroll.awk. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/raid/pq.h> | ||
24 | |||
25 | /* Create 8 byte copies of constant byte */ | ||
26 | # define NBYTES(x) (__insn_v1addi(0, x)) | ||
27 | # define NSIZE 8 | ||
28 | |||
29 | /* | ||
30 | * The SHLBYTE() operation shifts each byte left by 1, *not* | ||
31 | * rolling over into the next byte | ||
32 | */ | ||
33 | static inline __attribute_const__ u64 SHLBYTE(u64 v) | ||
34 | { | ||
35 | /* Vector One Byte Shift Left Immediate. */ | ||
36 | return __insn_v1shli(v, 1); | ||
37 | } | ||
38 | |||
39 | /* | ||
40 | * The MASK() operation returns 0xFF in any byte for which the high | ||
41 | * bit is 1, 0x00 for any byte for which the high bit is 0. | ||
42 | */ | ||
43 | static inline __attribute_const__ u64 MASK(u64 v) | ||
44 | { | ||
45 | /* Vector One Byte Shift Right Signed Immediate. */ | ||
46 | return __insn_v1shrsi(v, 7); | ||
47 | } | ||
48 | |||
49 | |||
50 | void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
51 | { | ||
52 | u8 **dptr = (u8 **)ptrs; | ||
53 | u64 *p, *q; | ||
54 | int d, z, z0; | ||
55 | |||
56 | u64 wd$$, wq$$, wp$$, w1$$, w2$$; | ||
57 | u64 x1d = NBYTES(0x1d); | ||
58 | u64 * z0ptr; | ||
59 | |||
60 | z0 = disks - 3; /* Highest data disk */ | ||
61 | p = (u64 *)dptr[z0+1]; /* XOR parity */ | ||
62 | q = (u64 *)dptr[z0+2]; /* RS syndrome */ | ||
63 | |||
64 | z0ptr = (u64 *)&dptr[z0][0]; | ||
65 | for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | ||
66 | wq$$ = wp$$ = *z0ptr++; | ||
67 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
68 | wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE]; | ||
69 | wp$$ = wp$$ ^ wd$$; | ||
70 | w2$$ = MASK(wq$$); | ||
71 | w1$$ = SHLBYTE(wq$$); | ||
72 | w2$$ = w2$$ & x1d; | ||
73 | w1$$ = w1$$ ^ w2$$; | ||
74 | wq$$ = w1$$ ^ wd$$; | ||
75 | } | ||
76 | *p++ = wp$$; | ||
77 | *q++ = wq$$; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | const struct raid6_calls raid6_tilegx$# = { | ||
82 | raid6_tilegx$#_gen_syndrome, | ||
83 | NULL, | ||
84 | "tilegx$#", | ||
85 | 0 | ||
86 | }; | ||