diff options
-rw-r--r-- | drivers/md/raid5.c | 186 | ||||
-rw-r--r-- | drivers/md/raid5.h | 15 |
2 files changed, 186 insertions, 15 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d87a2de667ea..32fa1131cafc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/cpu.h> | 53 | #include <linux/cpu.h> |
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | #include <linux/ratelimit.h> | 55 | #include <linux/ratelimit.h> |
56 | #include <linux/nodemask.h> | ||
56 | #include <trace/events/block.h> | 57 | #include <trace/events/block.h> |
57 | 58 | ||
58 | #include "md.h" | 59 | #include "md.h" |
@@ -60,6 +61,10 @@ | |||
60 | #include "raid0.h" | 61 | #include "raid0.h" |
61 | #include "bitmap.h" | 62 | #include "bitmap.h" |
62 | 63 | ||
64 | #define cpu_to_group(cpu) cpu_to_node(cpu) | ||
65 | #define ANY_GROUP NUMA_NO_NODE | ||
66 | |||
67 | static struct workqueue_struct *raid5_wq; | ||
63 | /* | 68 | /* |
64 | * Stripe cache | 69 | * Stripe cache |
65 | */ | 70 | */ |
@@ -200,6 +205,34 @@ static int stripe_operations_active(struct stripe_head *sh) | |||
200 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); | 205 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); |
201 | } | 206 | } |
202 | 207 | ||
208 | static void raid5_wakeup_stripe_thread(struct stripe_head *sh) | ||
209 | { | ||
210 | struct r5conf *conf = sh->raid_conf; | ||
211 | struct r5worker_group *group; | ||
212 | int i, cpu = sh->cpu; | ||
213 | |||
214 | if (!cpu_online(cpu)) { | ||
215 | cpu = cpumask_any(cpu_online_mask); | ||
216 | sh->cpu = cpu; | ||
217 | } | ||
218 | |||
219 | if (list_empty(&sh->lru)) { | ||
220 | struct r5worker_group *group; | ||
221 | group = conf->worker_groups + cpu_to_group(cpu); | ||
222 | list_add_tail(&sh->lru, &group->handle_list); | ||
223 | } | ||
224 | |||
225 | if (conf->worker_cnt_per_group == 0) { | ||
226 | md_wakeup_thread(conf->mddev->thread); | ||
227 | return; | ||
228 | } | ||
229 | |||
230 | group = conf->worker_groups + cpu_to_group(sh->cpu); | ||
231 | |||
232 | for (i = 0; i < conf->worker_cnt_per_group; i++) | ||
233 | queue_work_on(sh->cpu, raid5_wq, &group->workers[i].work); | ||
234 | } | ||
235 | |||
203 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) | 236 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) |
204 | { | 237 | { |
205 | BUG_ON(!list_empty(&sh->lru)); | 238 | BUG_ON(!list_empty(&sh->lru)); |
@@ -214,7 +247,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) | |||
214 | else { | 247 | else { |
215 | clear_bit(STRIPE_DELAYED, &sh->state); | 248 | clear_bit(STRIPE_DELAYED, &sh->state); |
216 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 249 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
217 | list_add_tail(&sh->lru, &conf->handle_list); | 250 | if (conf->worker_cnt_per_group == 0) { |
251 | list_add_tail(&sh->lru, &conf->handle_list); | ||
252 | } else { | ||
253 | raid5_wakeup_stripe_thread(sh); | ||
254 | return; | ||
255 | } | ||
218 | } | 256 | } |
219 | md_wakeup_thread(conf->mddev->thread); | 257 | md_wakeup_thread(conf->mddev->thread); |
220 | } else { | 258 | } else { |
@@ -409,6 +447,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) | |||
409 | raid5_build_block(sh, i, previous); | 447 | raid5_build_block(sh, i, previous); |
410 | } | 448 | } |
411 | insert_hash(conf, sh); | 449 | insert_hash(conf, sh); |
450 | sh->cpu = smp_processor_id(); | ||
412 | } | 451 | } |
413 | 452 | ||
414 | static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, | 453 | static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, |
@@ -3830,6 +3869,7 @@ static void raid5_activate_delayed(struct r5conf *conf) | |||
3830 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 3869 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
3831 | atomic_inc(&conf->preread_active_stripes); | 3870 | atomic_inc(&conf->preread_active_stripes); |
3832 | list_add_tail(&sh->lru, &conf->hold_list); | 3871 | list_add_tail(&sh->lru, &conf->hold_list); |
3872 | raid5_wakeup_stripe_thread(sh); | ||
3833 | } | 3873 | } |
3834 | } | 3874 | } |
3835 | } | 3875 | } |
@@ -4109,18 +4149,32 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
4109 | * head of the hold_list has changed, i.e. the head was promoted to the | 4149 | * head of the hold_list has changed, i.e. the head was promoted to the |
4110 | * handle_list. | 4150 | * handle_list. |
4111 | */ | 4151 | */ |
4112 | static struct stripe_head *__get_priority_stripe(struct r5conf *conf) | 4152 | static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) |
4113 | { | 4153 | { |
4114 | struct stripe_head *sh; | 4154 | struct stripe_head *sh = NULL, *tmp; |
4155 | struct list_head *handle_list = NULL; | ||
4156 | |||
4157 | if (conf->worker_cnt_per_group == 0) { | ||
4158 | handle_list = &conf->handle_list; | ||
4159 | } else if (group != ANY_GROUP) { | ||
4160 | handle_list = &conf->worker_groups[group].handle_list; | ||
4161 | } else { | ||
4162 | int i; | ||
4163 | for (i = 0; i < conf->group_cnt; i++) { | ||
4164 | handle_list = &conf->worker_groups[i].handle_list; | ||
4165 | if (!list_empty(handle_list)) | ||
4166 | break; | ||
4167 | } | ||
4168 | } | ||
4115 | 4169 | ||
4116 | pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n", | 4170 | pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n", |
4117 | __func__, | 4171 | __func__, |
4118 | list_empty(&conf->handle_list) ? "empty" : "busy", | 4172 | list_empty(handle_list) ? "empty" : "busy", |
4119 | list_empty(&conf->hold_list) ? "empty" : "busy", | 4173 | list_empty(&conf->hold_list) ? "empty" : "busy", |
4120 | atomic_read(&conf->pending_full_writes), conf->bypass_count); | 4174 | atomic_read(&conf->pending_full_writes), conf->bypass_count); |
4121 | 4175 | ||
4122 | if (!list_empty(&conf->handle_list)) { | 4176 | if (!list_empty(handle_list)) { |
4123 | sh = list_entry(conf->handle_list.next, typeof(*sh), lru); | 4177 | sh = list_entry(handle_list->next, typeof(*sh), lru); |
4124 | 4178 | ||
4125 | if (list_empty(&conf->hold_list)) | 4179 | if (list_empty(&conf->hold_list)) |
4126 | conf->bypass_count = 0; | 4180 | conf->bypass_count = 0; |
@@ -4138,12 +4192,25 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf) | |||
4138 | ((conf->bypass_threshold && | 4192 | ((conf->bypass_threshold && |
4139 | conf->bypass_count > conf->bypass_threshold) || | 4193 | conf->bypass_count > conf->bypass_threshold) || |
4140 | atomic_read(&conf->pending_full_writes) == 0)) { | 4194 | atomic_read(&conf->pending_full_writes) == 0)) { |
4141 | sh = list_entry(conf->hold_list.next, | 4195 | |
4142 | typeof(*sh), lru); | 4196 | list_for_each_entry(tmp, &conf->hold_list, lru) { |
4143 | conf->bypass_count -= conf->bypass_threshold; | 4197 | if (conf->worker_cnt_per_group == 0 || |
4144 | if (conf->bypass_count < 0) | 4198 | group == ANY_GROUP || |
4145 | conf->bypass_count = 0; | 4199 | !cpu_online(tmp->cpu) || |
4146 | } else | 4200 | cpu_to_group(tmp->cpu) == group) { |
4201 | sh = tmp; | ||
4202 | break; | ||
4203 | } | ||
4204 | } | ||
4205 | |||
4206 | if (sh) { | ||
4207 | conf->bypass_count -= conf->bypass_threshold; | ||
4208 | if (conf->bypass_count < 0) | ||
4209 | conf->bypass_count = 0; | ||
4210 | } | ||
4211 | } | ||
4212 | |||
4213 | if (!sh) | ||
4147 | return NULL; | 4214 | return NULL; |
4148 | 4215 | ||
4149 | list_del_init(&sh->lru); | 4216 | list_del_init(&sh->lru); |
@@ -4844,13 +4911,13 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4844 | } | 4911 | } |
4845 | 4912 | ||
4846 | #define MAX_STRIPE_BATCH 8 | 4913 | #define MAX_STRIPE_BATCH 8 |
4847 | static int handle_active_stripes(struct r5conf *conf) | 4914 | static int handle_active_stripes(struct r5conf *conf, int group) |
4848 | { | 4915 | { |
4849 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; | 4916 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; |
4850 | int i, batch_size = 0; | 4917 | int i, batch_size = 0; |
4851 | 4918 | ||
4852 | while (batch_size < MAX_STRIPE_BATCH && | 4919 | while (batch_size < MAX_STRIPE_BATCH && |
4853 | (sh = __get_priority_stripe(conf)) != NULL) | 4920 | (sh = __get_priority_stripe(conf, group)) != NULL) |
4854 | batch[batch_size++] = sh; | 4921 | batch[batch_size++] = sh; |
4855 | 4922 | ||
4856 | if (batch_size == 0) | 4923 | if (batch_size == 0) |
@@ -4868,6 +4935,38 @@ static int handle_active_stripes(struct r5conf *conf) | |||
4868 | return batch_size; | 4935 | return batch_size; |
4869 | } | 4936 | } |
4870 | 4937 | ||
4938 | static void raid5_do_work(struct work_struct *work) | ||
4939 | { | ||
4940 | struct r5worker *worker = container_of(work, struct r5worker, work); | ||
4941 | struct r5worker_group *group = worker->group; | ||
4942 | struct r5conf *conf = group->conf; | ||
4943 | int group_id = group - conf->worker_groups; | ||
4944 | int handled; | ||
4945 | struct blk_plug plug; | ||
4946 | |||
4947 | pr_debug("+++ raid5worker active\n"); | ||
4948 | |||
4949 | blk_start_plug(&plug); | ||
4950 | handled = 0; | ||
4951 | spin_lock_irq(&conf->device_lock); | ||
4952 | while (1) { | ||
4953 | int batch_size, released; | ||
4954 | |||
4955 | released = release_stripe_list(conf); | ||
4956 | |||
4957 | batch_size = handle_active_stripes(conf, group_id); | ||
4958 | if (!batch_size && !released) | ||
4959 | break; | ||
4960 | handled += batch_size; | ||
4961 | } | ||
4962 | pr_debug("%d stripes handled\n", handled); | ||
4963 | |||
4964 | spin_unlock_irq(&conf->device_lock); | ||
4965 | blk_finish_plug(&plug); | ||
4966 | |||
4967 | pr_debug("--- raid5worker inactive\n"); | ||
4968 | } | ||
4969 | |||
4871 | /* | 4970 | /* |
4872 | * This is our raid5 kernel thread. | 4971 | * This is our raid5 kernel thread. |
4873 | * | 4972 | * |
@@ -4917,7 +5016,7 @@ static void raid5d(struct md_thread *thread) | |||
4917 | handled++; | 5016 | handled++; |
4918 | } | 5017 | } |
4919 | 5018 | ||
4920 | batch_size = handle_active_stripes(conf); | 5019 | batch_size = handle_active_stripes(conf, ANY_GROUP); |
4921 | if (!batch_size && !released) | 5020 | if (!batch_size && !released) |
4922 | break; | 5021 | break; |
4923 | handled += batch_size; | 5022 | handled += batch_size; |
@@ -5057,6 +5156,54 @@ static struct attribute_group raid5_attrs_group = { | |||
5057 | .attrs = raid5_attrs, | 5156 | .attrs = raid5_attrs, |
5058 | }; | 5157 | }; |
5059 | 5158 | ||
5159 | static int alloc_thread_groups(struct r5conf *conf, int cnt) | ||
5160 | { | ||
5161 | int i, j; | ||
5162 | ssize_t size; | ||
5163 | struct r5worker *workers; | ||
5164 | |||
5165 | conf->worker_cnt_per_group = cnt; | ||
5166 | if (cnt == 0) { | ||
5167 | conf->worker_groups = NULL; | ||
5168 | return 0; | ||
5169 | } | ||
5170 | conf->group_cnt = num_possible_nodes(); | ||
5171 | size = sizeof(struct r5worker) * cnt; | ||
5172 | workers = kzalloc(size * conf->group_cnt, GFP_NOIO); | ||
5173 | conf->worker_groups = kzalloc(sizeof(struct r5worker_group) * | ||
5174 | conf->group_cnt, GFP_NOIO); | ||
5175 | if (!conf->worker_groups || !workers) { | ||
5176 | kfree(workers); | ||
5177 | kfree(conf->worker_groups); | ||
5178 | conf->worker_groups = NULL; | ||
5179 | return -ENOMEM; | ||
5180 | } | ||
5181 | |||
5182 | for (i = 0; i < conf->group_cnt; i++) { | ||
5183 | struct r5worker_group *group; | ||
5184 | |||
5185 | group = &conf->worker_groups[i]; | ||
5186 | INIT_LIST_HEAD(&group->handle_list); | ||
5187 | group->conf = conf; | ||
5188 | group->workers = workers + i * cnt; | ||
5189 | |||
5190 | for (j = 0; j < cnt; j++) { | ||
5191 | group->workers[j].group = group; | ||
5192 | INIT_WORK(&group->workers[j].work, raid5_do_work); | ||
5193 | } | ||
5194 | } | ||
5195 | |||
5196 | return 0; | ||
5197 | } | ||
5198 | |||
5199 | static void free_thread_groups(struct r5conf *conf) | ||
5200 | { | ||
5201 | if (conf->worker_groups) | ||
5202 | kfree(conf->worker_groups[0].workers); | ||
5203 | kfree(conf->worker_groups); | ||
5204 | conf->worker_groups = NULL; | ||
5205 | } | ||
5206 | |||
5060 | static sector_t | 5207 | static sector_t |
5061 | raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks) | 5208 | raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks) |
5062 | { | 5209 | { |
@@ -5097,6 +5244,7 @@ static void raid5_free_percpu(struct r5conf *conf) | |||
5097 | 5244 | ||
5098 | static void free_conf(struct r5conf *conf) | 5245 | static void free_conf(struct r5conf *conf) |
5099 | { | 5246 | { |
5247 | free_thread_groups(conf); | ||
5100 | shrink_stripes(conf); | 5248 | shrink_stripes(conf); |
5101 | raid5_free_percpu(conf); | 5249 | raid5_free_percpu(conf); |
5102 | kfree(conf->disks); | 5250 | kfree(conf->disks); |
@@ -5225,6 +5373,9 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5225 | conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); | 5373 | conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); |
5226 | if (conf == NULL) | 5374 | if (conf == NULL) |
5227 | goto abort; | 5375 | goto abort; |
5376 | /* Don't enable multi-threading by default*/ | ||
5377 | if (alloc_thread_groups(conf, 0)) | ||
5378 | goto abort; | ||
5228 | spin_lock_init(&conf->device_lock); | 5379 | spin_lock_init(&conf->device_lock); |
5229 | init_waitqueue_head(&conf->wait_for_stripe); | 5380 | init_waitqueue_head(&conf->wait_for_stripe); |
5230 | init_waitqueue_head(&conf->wait_for_overlap); | 5381 | init_waitqueue_head(&conf->wait_for_overlap); |
@@ -6530,6 +6681,10 @@ static struct md_personality raid4_personality = | |||
6530 | 6681 | ||
6531 | static int __init raid5_init(void) | 6682 | static int __init raid5_init(void) |
6532 | { | 6683 | { |
6684 | raid5_wq = alloc_workqueue("raid5wq", | ||
6685 | WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0); | ||
6686 | if (!raid5_wq) | ||
6687 | return -ENOMEM; | ||
6533 | register_md_personality(&raid6_personality); | 6688 | register_md_personality(&raid6_personality); |
6534 | register_md_personality(&raid5_personality); | 6689 | register_md_personality(&raid5_personality); |
6535 | register_md_personality(&raid4_personality); | 6690 | register_md_personality(&raid4_personality); |
@@ -6541,6 +6696,7 @@ static void raid5_exit(void) | |||
6541 | unregister_md_personality(&raid6_personality); | 6696 | unregister_md_personality(&raid6_personality); |
6542 | unregister_md_personality(&raid5_personality); | 6697 | unregister_md_personality(&raid5_personality); |
6543 | unregister_md_personality(&raid4_personality); | 6698 | unregister_md_personality(&raid4_personality); |
6699 | destroy_workqueue(raid5_wq); | ||
6544 | } | 6700 | } |
6545 | 6701 | ||
6546 | module_init(raid5_init); | 6702 | module_init(raid5_init); |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index a98f99d2a58f..105366371fbf 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -212,6 +212,7 @@ struct stripe_head { | |||
212 | enum check_states check_state; | 212 | enum check_states check_state; |
213 | enum reconstruct_states reconstruct_state; | 213 | enum reconstruct_states reconstruct_state; |
214 | spinlock_t stripe_lock; | 214 | spinlock_t stripe_lock; |
215 | int cpu; | ||
215 | /** | 216 | /** |
216 | * struct stripe_operations | 217 | * struct stripe_operations |
217 | * @target - STRIPE_OP_COMPUTE_BLK target | 218 | * @target - STRIPE_OP_COMPUTE_BLK target |
@@ -365,6 +366,17 @@ struct disk_info { | |||
365 | struct md_rdev *rdev, *replacement; | 366 | struct md_rdev *rdev, *replacement; |
366 | }; | 367 | }; |
367 | 368 | ||
369 | struct r5worker { | ||
370 | struct work_struct work; | ||
371 | struct r5worker_group *group; | ||
372 | }; | ||
373 | |||
374 | struct r5worker_group { | ||
375 | struct list_head handle_list; | ||
376 | struct r5conf *conf; | ||
377 | struct r5worker *workers; | ||
378 | }; | ||
379 | |||
368 | struct r5conf { | 380 | struct r5conf { |
369 | struct hlist_head *stripe_hashtbl; | 381 | struct hlist_head *stripe_hashtbl; |
370 | struct mddev *mddev; | 382 | struct mddev *mddev; |
@@ -461,6 +473,9 @@ struct r5conf { | |||
461 | * the new thread here until we fully activate the array. | 473 | * the new thread here until we fully activate the array. |
462 | */ | 474 | */ |
463 | struct md_thread *thread; | 475 | struct md_thread *thread; |
476 | struct r5worker_group *worker_groups; | ||
477 | int group_cnt; | ||
478 | int worker_cnt_per_group; | ||
464 | }; | 479 | }; |
465 | 480 | ||
466 | /* | 481 | /* |