aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c186
-rw-r--r--drivers/md/raid5.h15
2 files changed, 186 insertions, 15 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d87a2de667ea..32fa1131cafc 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -53,6 +53,7 @@
53#include <linux/cpu.h> 53#include <linux/cpu.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <linux/ratelimit.h> 55#include <linux/ratelimit.h>
56#include <linux/nodemask.h>
56#include <trace/events/block.h> 57#include <trace/events/block.h>
57 58
58#include "md.h" 59#include "md.h"
@@ -60,6 +61,10 @@
60#include "raid0.h" 61#include "raid0.h"
61#include "bitmap.h" 62#include "bitmap.h"
62 63
64#define cpu_to_group(cpu) cpu_to_node(cpu)
65#define ANY_GROUP NUMA_NO_NODE
66
67static struct workqueue_struct *raid5_wq;
63/* 68/*
64 * Stripe cache 69 * Stripe cache
65 */ 70 */
@@ -200,6 +205,34 @@ static int stripe_operations_active(struct stripe_head *sh)
200 test_bit(STRIPE_COMPUTE_RUN, &sh->state); 205 test_bit(STRIPE_COMPUTE_RUN, &sh->state);
201} 206}
202 207
208static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
209{
210 struct r5conf *conf = sh->raid_conf;
211 struct r5worker_group *group;
212 int i, cpu = sh->cpu;
213
214 if (!cpu_online(cpu)) {
215 cpu = cpumask_any(cpu_online_mask);
216 sh->cpu = cpu;
217 }
218
219 if (list_empty(&sh->lru)) {
220 struct r5worker_group *group;
221 group = conf->worker_groups + cpu_to_group(cpu);
222 list_add_tail(&sh->lru, &group->handle_list);
223 }
224
225 if (conf->worker_cnt_per_group == 0) {
226 md_wakeup_thread(conf->mddev->thread);
227 return;
228 }
229
230 group = conf->worker_groups + cpu_to_group(sh->cpu);
231
232 for (i = 0; i < conf->worker_cnt_per_group; i++)
233 queue_work_on(sh->cpu, raid5_wq, &group->workers[i].work);
234}
235
203static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) 236static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
204{ 237{
205 BUG_ON(!list_empty(&sh->lru)); 238 BUG_ON(!list_empty(&sh->lru));
@@ -214,7 +247,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
214 else { 247 else {
215 clear_bit(STRIPE_DELAYED, &sh->state); 248 clear_bit(STRIPE_DELAYED, &sh->state);
216 clear_bit(STRIPE_BIT_DELAY, &sh->state); 249 clear_bit(STRIPE_BIT_DELAY, &sh->state);
217 list_add_tail(&sh->lru, &conf->handle_list); 250 if (conf->worker_cnt_per_group == 0) {
251 list_add_tail(&sh->lru, &conf->handle_list);
252 } else {
253 raid5_wakeup_stripe_thread(sh);
254 return;
255 }
218 } 256 }
219 md_wakeup_thread(conf->mddev->thread); 257 md_wakeup_thread(conf->mddev->thread);
220 } else { 258 } else {
@@ -409,6 +447,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
409 raid5_build_block(sh, i, previous); 447 raid5_build_block(sh, i, previous);
410 } 448 }
411 insert_hash(conf, sh); 449 insert_hash(conf, sh);
450 sh->cpu = smp_processor_id();
412} 451}
413 452
414static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, 453static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
@@ -3830,6 +3869,7 @@ static void raid5_activate_delayed(struct r5conf *conf)
3830 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 3869 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
3831 atomic_inc(&conf->preread_active_stripes); 3870 atomic_inc(&conf->preread_active_stripes);
3832 list_add_tail(&sh->lru, &conf->hold_list); 3871 list_add_tail(&sh->lru, &conf->hold_list);
3872 raid5_wakeup_stripe_thread(sh);
3833 } 3873 }
3834 } 3874 }
3835} 3875}
@@ -4109,18 +4149,32 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4109 * head of the hold_list has changed, i.e. the head was promoted to the 4149 * head of the hold_list has changed, i.e. the head was promoted to the
4110 * handle_list. 4150 * handle_list.
4111 */ 4151 */
4112static struct stripe_head *__get_priority_stripe(struct r5conf *conf) 4152static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
4113{ 4153{
4114 struct stripe_head *sh; 4154 struct stripe_head *sh = NULL, *tmp;
4155 struct list_head *handle_list = NULL;
4156
4157 if (conf->worker_cnt_per_group == 0) {
4158 handle_list = &conf->handle_list;
4159 } else if (group != ANY_GROUP) {
4160 handle_list = &conf->worker_groups[group].handle_list;
4161 } else {
4162 int i;
4163 for (i = 0; i < conf->group_cnt; i++) {
4164 handle_list = &conf->worker_groups[i].handle_list;
4165 if (!list_empty(handle_list))
4166 break;
4167 }
4168 }
4115 4169
4116 pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n", 4170 pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n",
4117 __func__, 4171 __func__,
4118 list_empty(&conf->handle_list) ? "empty" : "busy", 4172 list_empty(handle_list) ? "empty" : "busy",
4119 list_empty(&conf->hold_list) ? "empty" : "busy", 4173 list_empty(&conf->hold_list) ? "empty" : "busy",
4120 atomic_read(&conf->pending_full_writes), conf->bypass_count); 4174 atomic_read(&conf->pending_full_writes), conf->bypass_count);
4121 4175
4122 if (!list_empty(&conf->handle_list)) { 4176 if (!list_empty(handle_list)) {
4123 sh = list_entry(conf->handle_list.next, typeof(*sh), lru); 4177 sh = list_entry(handle_list->next, typeof(*sh), lru);
4124 4178
4125 if (list_empty(&conf->hold_list)) 4179 if (list_empty(&conf->hold_list))
4126 conf->bypass_count = 0; 4180 conf->bypass_count = 0;
@@ -4138,12 +4192,25 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
4138 ((conf->bypass_threshold && 4192 ((conf->bypass_threshold &&
4139 conf->bypass_count > conf->bypass_threshold) || 4193 conf->bypass_count > conf->bypass_threshold) ||
4140 atomic_read(&conf->pending_full_writes) == 0)) { 4194 atomic_read(&conf->pending_full_writes) == 0)) {
4141 sh = list_entry(conf->hold_list.next, 4195
4142 typeof(*sh), lru); 4196 list_for_each_entry(tmp, &conf->hold_list, lru) {
4143 conf->bypass_count -= conf->bypass_threshold; 4197 if (conf->worker_cnt_per_group == 0 ||
4144 if (conf->bypass_count < 0) 4198 group == ANY_GROUP ||
4145 conf->bypass_count = 0; 4199 !cpu_online(tmp->cpu) ||
4146 } else 4200 cpu_to_group(tmp->cpu) == group) {
4201 sh = tmp;
4202 break;
4203 }
4204 }
4205
4206 if (sh) {
4207 conf->bypass_count -= conf->bypass_threshold;
4208 if (conf->bypass_count < 0)
4209 conf->bypass_count = 0;
4210 }
4211 }
4212
4213 if (!sh)
4147 return NULL; 4214 return NULL;
4148 4215
4149 list_del_init(&sh->lru); 4216 list_del_init(&sh->lru);
@@ -4844,13 +4911,13 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4844} 4911}
4845 4912
4846#define MAX_STRIPE_BATCH 8 4913#define MAX_STRIPE_BATCH 8
4847static int handle_active_stripes(struct r5conf *conf) 4914static int handle_active_stripes(struct r5conf *conf, int group)
4848{ 4915{
4849 struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; 4916 struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
4850 int i, batch_size = 0; 4917 int i, batch_size = 0;
4851 4918
4852 while (batch_size < MAX_STRIPE_BATCH && 4919 while (batch_size < MAX_STRIPE_BATCH &&
4853 (sh = __get_priority_stripe(conf)) != NULL) 4920 (sh = __get_priority_stripe(conf, group)) != NULL)
4854 batch[batch_size++] = sh; 4921 batch[batch_size++] = sh;
4855 4922
4856 if (batch_size == 0) 4923 if (batch_size == 0)
@@ -4868,6 +4935,38 @@ static int handle_active_stripes(struct r5conf *conf)
4868 return batch_size; 4935 return batch_size;
4869} 4936}
4870 4937
4938static void raid5_do_work(struct work_struct *work)
4939{
4940 struct r5worker *worker = container_of(work, struct r5worker, work);
4941 struct r5worker_group *group = worker->group;
4942 struct r5conf *conf = group->conf;
4943 int group_id = group - conf->worker_groups;
4944 int handled;
4945 struct blk_plug plug;
4946
4947 pr_debug("+++ raid5worker active\n");
4948
4949 blk_start_plug(&plug);
4950 handled = 0;
4951 spin_lock_irq(&conf->device_lock);
4952 while (1) {
4953 int batch_size, released;
4954
4955 released = release_stripe_list(conf);
4956
4957 batch_size = handle_active_stripes(conf, group_id);
4958 if (!batch_size && !released)
4959 break;
4960 handled += batch_size;
4961 }
4962 pr_debug("%d stripes handled\n", handled);
4963
4964 spin_unlock_irq(&conf->device_lock);
4965 blk_finish_plug(&plug);
4966
4967 pr_debug("--- raid5worker inactive\n");
4968}
4969
4871/* 4970/*
4872 * This is our raid5 kernel thread. 4971 * This is our raid5 kernel thread.
4873 * 4972 *
@@ -4917,7 +5016,7 @@ static void raid5d(struct md_thread *thread)
4917 handled++; 5016 handled++;
4918 } 5017 }
4919 5018
4920 batch_size = handle_active_stripes(conf); 5019 batch_size = handle_active_stripes(conf, ANY_GROUP);
4921 if (!batch_size && !released) 5020 if (!batch_size && !released)
4922 break; 5021 break;
4923 handled += batch_size; 5022 handled += batch_size;
@@ -5057,6 +5156,54 @@ static struct attribute_group raid5_attrs_group = {
5057 .attrs = raid5_attrs, 5156 .attrs = raid5_attrs,
5058}; 5157};
5059 5158
5159static int alloc_thread_groups(struct r5conf *conf, int cnt)
5160{
5161 int i, j;
5162 ssize_t size;
5163 struct r5worker *workers;
5164
5165 conf->worker_cnt_per_group = cnt;
5166 if (cnt == 0) {
5167 conf->worker_groups = NULL;
5168 return 0;
5169 }
5170 conf->group_cnt = num_possible_nodes();
5171 size = sizeof(struct r5worker) * cnt;
5172 workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
5173 conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
5174 conf->group_cnt, GFP_NOIO);
5175 if (!conf->worker_groups || !workers) {
5176 kfree(workers);
5177 kfree(conf->worker_groups);
5178 conf->worker_groups = NULL;
5179 return -ENOMEM;
5180 }
5181
5182 for (i = 0; i < conf->group_cnt; i++) {
5183 struct r5worker_group *group;
5184
5185 group = &conf->worker_groups[i];
5186 INIT_LIST_HEAD(&group->handle_list);
5187 group->conf = conf;
5188 group->workers = workers + i * cnt;
5189
5190 for (j = 0; j < cnt; j++) {
5191 group->workers[j].group = group;
5192 INIT_WORK(&group->workers[j].work, raid5_do_work);
5193 }
5194 }
5195
5196 return 0;
5197}
5198
5199static void free_thread_groups(struct r5conf *conf)
5200{
5201 if (conf->worker_groups)
5202 kfree(conf->worker_groups[0].workers);
5203 kfree(conf->worker_groups);
5204 conf->worker_groups = NULL;
5205}
5206
5060static sector_t 5207static sector_t
5061raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks) 5208raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
5062{ 5209{
@@ -5097,6 +5244,7 @@ static void raid5_free_percpu(struct r5conf *conf)
5097 5244
5098static void free_conf(struct r5conf *conf) 5245static void free_conf(struct r5conf *conf)
5099{ 5246{
5247 free_thread_groups(conf);
5100 shrink_stripes(conf); 5248 shrink_stripes(conf);
5101 raid5_free_percpu(conf); 5249 raid5_free_percpu(conf);
5102 kfree(conf->disks); 5250 kfree(conf->disks);
@@ -5225,6 +5373,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
5225 conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); 5373 conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
5226 if (conf == NULL) 5374 if (conf == NULL)
5227 goto abort; 5375 goto abort;
5376 /* Don't enable multi-threading by default*/
5377 if (alloc_thread_groups(conf, 0))
5378 goto abort;
5228 spin_lock_init(&conf->device_lock); 5379 spin_lock_init(&conf->device_lock);
5229 init_waitqueue_head(&conf->wait_for_stripe); 5380 init_waitqueue_head(&conf->wait_for_stripe);
5230 init_waitqueue_head(&conf->wait_for_overlap); 5381 init_waitqueue_head(&conf->wait_for_overlap);
@@ -6530,6 +6681,10 @@ static struct md_personality raid4_personality =
6530 6681
6531static int __init raid5_init(void) 6682static int __init raid5_init(void)
6532{ 6683{
6684 raid5_wq = alloc_workqueue("raid5wq",
6685 WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
6686 if (!raid5_wq)
6687 return -ENOMEM;
6533 register_md_personality(&raid6_personality); 6688 register_md_personality(&raid6_personality);
6534 register_md_personality(&raid5_personality); 6689 register_md_personality(&raid5_personality);
6535 register_md_personality(&raid4_personality); 6690 register_md_personality(&raid4_personality);
@@ -6541,6 +6696,7 @@ static void raid5_exit(void)
6541 unregister_md_personality(&raid6_personality); 6696 unregister_md_personality(&raid6_personality);
6542 unregister_md_personality(&raid5_personality); 6697 unregister_md_personality(&raid5_personality);
6543 unregister_md_personality(&raid4_personality); 6698 unregister_md_personality(&raid4_personality);
6699 destroy_workqueue(raid5_wq);
6544} 6700}
6545 6701
6546module_init(raid5_init); 6702module_init(raid5_init);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index a98f99d2a58f..105366371fbf 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -212,6 +212,7 @@ struct stripe_head {
212 enum check_states check_state; 212 enum check_states check_state;
213 enum reconstruct_states reconstruct_state; 213 enum reconstruct_states reconstruct_state;
214 spinlock_t stripe_lock; 214 spinlock_t stripe_lock;
215 int cpu;
215 /** 216 /**
216 * struct stripe_operations 217 * struct stripe_operations
217 * @target - STRIPE_OP_COMPUTE_BLK target 218 * @target - STRIPE_OP_COMPUTE_BLK target
@@ -365,6 +366,17 @@ struct disk_info {
365 struct md_rdev *rdev, *replacement; 366 struct md_rdev *rdev, *replacement;
366}; 367};
367 368
369struct r5worker {
370 struct work_struct work;
371 struct r5worker_group *group;
372};
373
374struct r5worker_group {
375 struct list_head handle_list;
376 struct r5conf *conf;
377 struct r5worker *workers;
378};
379
368struct r5conf { 380struct r5conf {
369 struct hlist_head *stripe_hashtbl; 381 struct hlist_head *stripe_hashtbl;
370 struct mddev *mddev; 382 struct mddev *mddev;
@@ -461,6 +473,9 @@ struct r5conf {
461 * the new thread here until we fully activate the array. 473 * the new thread here until we fully activate the array.
462 */ 474 */
463 struct md_thread *thread; 475 struct md_thread *thread;
476 struct r5worker_group *worker_groups;
477 int group_cnt;
478 int worker_cnt_per_group;
464}; 479};
465 480
466/* 481/*