diff options
author | majianpeng <majianpeng@gmail.com> | 2013-11-13 23:16:20 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2013-11-18 23:19:18 -0500 |
commit | 60aaf933854511630e16be4efe0f96485e132de4 (patch) | |
tree | 123612bc22106a77a6af562b41f0d1163af3f6b2 /drivers/md | |
parent | d206dcfa9809ec3409483e93b5e362f801fa0c27 (diff) |
md/raid5: Use conf->device_lock protect changing of multi-thread resources.
When we change group_thread_cnt from sysfs entry, it can OOPS.
The kernel messages are:
[ 135.299021] BUG: unable to handle kernel NULL pointer dereference at (null)
[ 135.299073] IP: [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
[ 135.299107] PGD 0
[ 135.299122] Oops: 0000 [#1] SMP
[ 135.299144] Modules linked in: netconsole e1000e ptp pps_core
[ 135.299188] CPU: 3 PID: 2225 Comm: md0_raid5 Not tainted 3.12.0+ #24
[ 135.299214] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080015 11/09/2011
[ 135.299255] task: ffff8800b9638f80 ti: ffff8800b77a4000 task.ti: ffff8800b77a4000
[ 135.299283] RIP: 0010:[<ffffffff815188ab>] [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
[ 135.299323] RSP: 0018:ffff8800b77a5c48 EFLAGS: 00010002
[ 135.299344] RAX: ffff880037bb5c70 RBX: 0000000000000000 RCX: 0000000000000008
[ 135.299371] RDX: ffff880037bb5cb8 RSI: 0000000000000001 RDI: ffff880037bb5c00
[ 135.299398] RBP: ffff8800b77a5d08 R08: 0000000000000001 R09: 0000000000000000
[ 135.299425] R10: ffff8800b77a5c98 R11: 00000000ffffffff R12: ffff880037bb5c00
[ 135.299452] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880037bb5c70
[ 135.299479] FS: 0000000000000000(0000) GS:ffff88013fd80000(0000) knlGS:0000000000000000
[ 135.299510] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 135.299532] CR2: 0000000000000000 CR3: 0000000001c0b000 CR4: 00000000000407e0
[ 135.299559] Stack:
[ 135.299570] ffff8800b77a5c88 ffffffff8107383e ffff8800b77a5c88 ffff880037a64300
[ 135.299611] 000000000000ec08 ffff880037bb5cb8 ffff8800b77a5c98 ffffffffffffffd8
[ 135.299654] 000000000000ec08 ffff880037bb5c60 ffff8800b77a5c98 ffff8800b77a5c98
[ 135.299696] Call Trace:
[ 135.299711] [<ffffffff8107383e>] ? __wake_up+0x4e/0x70
[ 135.299733] [<ffffffff81518f88>] raid5d+0x4c8/0x680
[ 135.299756] [<ffffffff817174ed>] ? schedule_timeout+0x15d/0x1f0
[ 135.299781] [<ffffffff81524c9f>] md_thread+0x11f/0x170
[ 135.299804] [<ffffffff81069cd0>] ? wake_up_bit+0x40/0x40
[ 135.299826] [<ffffffff81524b80>] ? md_rdev_init+0x110/0x110
[ 135.299850] [<ffffffff81069656>] kthread+0xc6/0xd0
[ 135.299871] [<ffffffff81069590>] ? kthread_freezable_should_stop+0x70/0x70
[ 135.299899] [<ffffffff81722ffc>] ret_from_fork+0x7c/0xb0
[ 135.299923] [<ffffffff81069590>] ? kthread_freezable_should_stop+0x70/0x70
[ 135.299951] Code: ff ff ff 0f 84 d7 fe ff ff e9 5c fe ff ff 66 90 41 8b b4 24 d8 01 00 00 45 31 ed 85 f6 0f 8e 7b fd ff ff 49 8b 9c 24 d0 01 00 00 <48> 3b 1b 49 89 dd 0f 85 67 fd ff ff 48 8d 43 28 31 d2 eb 17 90
[ 135.300005] RIP [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
[ 135.300005] RSP <ffff8800b77a5c48>
[ 135.300005] CR2: 0000000000000000
[ 135.300005] ---[ end trace 504854e5bb7562ed ]---
[ 135.300005] Kernel panic - not syncing: Fatal exception
This is because raid5d() can be running when the multi-thread
resources are changed via system. We see need to provide locking.
mddev->device_lock is suitable, but we cannot simple call
alloc_thread_groups under this lock as we cannot allocate memory
while holding a spinlock.
So change alloc_thread_groups() to allocate and return the data
structures, then raid5_store_group_thread_cnt() can take the lock
while updating the pointers to the data structures.
This fixes a bug introduced in 3.12 and so is suitable for the 3.12.x
stable series.
Fixes: b721420e8719131896b009b11edbbd27
Cc: stable@vger.kernel.org (3.12)
Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Shaohua Li <shli@kernel.org>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid5.c | 63 |
1 files changed, 39 insertions, 24 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index fea67727f807..c7db410190c4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -5387,15 +5387,18 @@ raid5_show_group_thread_cnt(struct mddev *mddev, char *page) | |||
5387 | return 0; | 5387 | return 0; |
5388 | } | 5388 | } |
5389 | 5389 | ||
5390 | static int alloc_thread_groups(struct r5conf *conf, int cnt); | 5390 | static int alloc_thread_groups(struct r5conf *conf, int cnt, |
5391 | int *group_cnt, | ||
5392 | int *worker_cnt_per_group, | ||
5393 | struct r5worker_group **worker_groups); | ||
5391 | static ssize_t | 5394 | static ssize_t |
5392 | raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) | 5395 | raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) |
5393 | { | 5396 | { |
5394 | struct r5conf *conf = mddev->private; | 5397 | struct r5conf *conf = mddev->private; |
5395 | unsigned long new; | 5398 | unsigned long new; |
5396 | int err; | 5399 | int err; |
5397 | struct r5worker_group *old_groups; | 5400 | struct r5worker_group *new_groups, *old_groups; |
5398 | int old_group_cnt; | 5401 | int group_cnt, worker_cnt_per_group; |
5399 | 5402 | ||
5400 | if (len >= PAGE_SIZE) | 5403 | if (len >= PAGE_SIZE) |
5401 | return -EINVAL; | 5404 | return -EINVAL; |
@@ -5411,17 +5414,19 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) | |||
5411 | mddev_suspend(mddev); | 5414 | mddev_suspend(mddev); |
5412 | 5415 | ||
5413 | old_groups = conf->worker_groups; | 5416 | old_groups = conf->worker_groups; |
5414 | old_group_cnt = conf->worker_cnt_per_group; | ||
5415 | |||
5416 | if (old_groups) | 5417 | if (old_groups) |
5417 | flush_workqueue(raid5_wq); | 5418 | flush_workqueue(raid5_wq); |
5418 | 5419 | ||
5419 | conf->worker_groups = NULL; | 5420 | err = alloc_thread_groups(conf, new, |
5420 | err = alloc_thread_groups(conf, new); | 5421 | &group_cnt, &worker_cnt_per_group, |
5421 | if (err) { | 5422 | &new_groups); |
5422 | conf->worker_groups = old_groups; | 5423 | if (!err) { |
5423 | conf->worker_cnt_per_group = old_group_cnt; | 5424 | spin_lock_irq(&conf->device_lock); |
5424 | } else { | 5425 | conf->group_cnt = group_cnt; |
5426 | conf->worker_cnt_per_group = worker_cnt_per_group; | ||
5427 | conf->worker_groups = new_groups; | ||
5428 | spin_unlock_irq(&conf->device_lock); | ||
5429 | |||
5425 | if (old_groups) | 5430 | if (old_groups) |
5426 | kfree(old_groups[0].workers); | 5431 | kfree(old_groups[0].workers); |
5427 | kfree(old_groups); | 5432 | kfree(old_groups); |
@@ -5451,33 +5456,36 @@ static struct attribute_group raid5_attrs_group = { | |||
5451 | .attrs = raid5_attrs, | 5456 | .attrs = raid5_attrs, |
5452 | }; | 5457 | }; |
5453 | 5458 | ||
5454 | static int alloc_thread_groups(struct r5conf *conf, int cnt) | 5459 | static int alloc_thread_groups(struct r5conf *conf, int cnt, |
5460 | int *group_cnt, | ||
5461 | int *worker_cnt_per_group, | ||
5462 | struct r5worker_group **worker_groups) | ||
5455 | { | 5463 | { |
5456 | int i, j, k; | 5464 | int i, j, k; |
5457 | ssize_t size; | 5465 | ssize_t size; |
5458 | struct r5worker *workers; | 5466 | struct r5worker *workers; |
5459 | 5467 | ||
5460 | conf->worker_cnt_per_group = cnt; | 5468 | *worker_cnt_per_group = cnt; |
5461 | if (cnt == 0) { | 5469 | if (cnt == 0) { |
5462 | conf->worker_groups = NULL; | 5470 | *group_cnt = 0; |
5471 | *worker_groups = NULL; | ||
5463 | return 0; | 5472 | return 0; |
5464 | } | 5473 | } |
5465 | conf->group_cnt = num_possible_nodes(); | 5474 | *group_cnt = num_possible_nodes(); |
5466 | size = sizeof(struct r5worker) * cnt; | 5475 | size = sizeof(struct r5worker) * cnt; |
5467 | workers = kzalloc(size * conf->group_cnt, GFP_NOIO); | 5476 | workers = kzalloc(size * *group_cnt, GFP_NOIO); |
5468 | conf->worker_groups = kzalloc(sizeof(struct r5worker_group) * | 5477 | *worker_groups = kzalloc(sizeof(struct r5worker_group) * |
5469 | conf->group_cnt, GFP_NOIO); | 5478 | *group_cnt, GFP_NOIO); |
5470 | if (!conf->worker_groups || !workers) { | 5479 | if (!*worker_groups || !workers) { |
5471 | kfree(workers); | 5480 | kfree(workers); |
5472 | kfree(conf->worker_groups); | 5481 | kfree(*worker_groups); |
5473 | conf->worker_groups = NULL; | ||
5474 | return -ENOMEM; | 5482 | return -ENOMEM; |
5475 | } | 5483 | } |
5476 | 5484 | ||
5477 | for (i = 0; i < conf->group_cnt; i++) { | 5485 | for (i = 0; i < *group_cnt; i++) { |
5478 | struct r5worker_group *group; | 5486 | struct r5worker_group *group; |
5479 | 5487 | ||
5480 | group = &conf->worker_groups[i]; | 5488 | group = worker_groups[i]; |
5481 | INIT_LIST_HEAD(&group->handle_list); | 5489 | INIT_LIST_HEAD(&group->handle_list); |
5482 | group->conf = conf; | 5490 | group->conf = conf; |
5483 | group->workers = workers + i * cnt; | 5491 | group->workers = workers + i * cnt; |
@@ -5640,6 +5648,8 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5640 | struct disk_info *disk; | 5648 | struct disk_info *disk; |
5641 | char pers_name[6]; | 5649 | char pers_name[6]; |
5642 | int i; | 5650 | int i; |
5651 | int group_cnt, worker_cnt_per_group; | ||
5652 | struct r5worker_group *new_group; | ||
5643 | 5653 | ||
5644 | if (mddev->new_level != 5 | 5654 | if (mddev->new_level != 5 |
5645 | && mddev->new_level != 4 | 5655 | && mddev->new_level != 4 |
@@ -5674,7 +5684,12 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5674 | if (conf == NULL) | 5684 | if (conf == NULL) |
5675 | goto abort; | 5685 | goto abort; |
5676 | /* Don't enable multi-threading by default*/ | 5686 | /* Don't enable multi-threading by default*/ |
5677 | if (alloc_thread_groups(conf, 0)) | 5687 | if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group, |
5688 | &new_group)) { | ||
5689 | conf->group_cnt = group_cnt; | ||
5690 | conf->worker_cnt_per_group = worker_cnt_per_group; | ||
5691 | conf->worker_groups = new_group; | ||
5692 | } else | ||
5678 | goto abort; | 5693 | goto abort; |
5679 | spin_lock_init(&conf->device_lock); | 5694 | spin_lock_init(&conf->device_lock); |
5680 | seqcount_init(&conf->gen_lock); | 5695 | seqcount_init(&conf->gen_lock); |