diff options
author | Jeff Moyer <jmoyer@redhat.com> | 2012-12-05 14:17:21 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-12-05 14:17:21 -0500 |
commit | 8fa72d234da9b6b473bbb1f74d533663e4996e6b (patch) | |
tree | 536b8d2cf5aa3f0baee3aa0d89e602733be3df56 /mm/backing-dev.c | |
parent | c304a51bf42a7ce48f430a371e1213472c89e13d (diff) |
bdi: add a user-tunable cpu_list for the bdi flusher threads
In realtime environments, it may be desirable to keep the per-bdi
flusher threads from running on certain cpus. This patch adds a
cpu_list file to /sys/class/bdi/* to enable this. The default is to tie
the flusher threads to the same numa node as the backing device (though
I could be convinced to make it a mask of all cpus to avoid a change in
behaviour).
Thanks to Jeremy Eder for the original idea.
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r-- | mm/backing-dev.c | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca2b3ee176..bd6a6cabef71 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
12 | #include <linux/device.h> | 12 | #include <linux/device.h> |
13 | #include <linux/slab.h> | ||
13 | #include <trace/events/writeback.h> | 14 | #include <trace/events/writeback.h> |
14 | 15 | ||
15 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); | 16 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); |
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev, | |||
221 | } | 222 | } |
222 | BDI_SHOW(max_ratio, bdi->max_ratio) | 223 | BDI_SHOW(max_ratio, bdi->max_ratio) |
223 | 224 | ||
225 | static ssize_t cpu_list_store(struct device *dev, | ||
226 | struct device_attribute *attr, const char *buf, size_t count) | ||
227 | { | ||
228 | struct backing_dev_info *bdi = dev_get_drvdata(dev); | ||
229 | struct bdi_writeback *wb = &bdi->wb; | ||
230 | cpumask_var_t newmask; | ||
231 | ssize_t ret; | ||
232 | struct task_struct *task; | ||
233 | |||
234 | if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) | ||
235 | return -ENOMEM; | ||
236 | |||
237 | ret = cpulist_parse(buf, newmask); | ||
238 | if (!ret) { | ||
239 | spin_lock_bh(&bdi->wb_lock); | ||
240 | task = wb->task; | ||
241 | if (task) | ||
242 | get_task_struct(task); | ||
243 | spin_unlock_bh(&bdi->wb_lock); | ||
244 | |||
245 | mutex_lock(&bdi->flusher_cpumask_lock); | ||
246 | if (task) { | ||
247 | ret = set_cpus_allowed_ptr(task, newmask); | ||
248 | put_task_struct(task); | ||
249 | } | ||
250 | if (ret == 0) { | ||
251 | cpumask_copy(bdi->flusher_cpumask, newmask); | ||
252 | ret = count; | ||
253 | } | ||
254 | mutex_unlock(&bdi->flusher_cpumask_lock); | ||
255 | |||
256 | } | ||
257 | free_cpumask_var(newmask); | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | static ssize_t cpu_list_show(struct device *dev, | ||
263 | struct device_attribute *attr, char *page) | ||
264 | { | ||
265 | struct backing_dev_info *bdi = dev_get_drvdata(dev); | ||
266 | ssize_t ret; | ||
267 | |||
268 | mutex_lock(&bdi->flusher_cpumask_lock); | ||
269 | ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask); | ||
270 | mutex_unlock(&bdi->flusher_cpumask_lock); | ||
271 | |||
272 | return ret; | ||
273 | } | ||
274 | |||
224 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) | 275 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) |
225 | 276 | ||
226 | static struct device_attribute bdi_dev_attrs[] = { | 277 | static struct device_attribute bdi_dev_attrs[] = { |
227 | __ATTR_RW(read_ahead_kb), | 278 | __ATTR_RW(read_ahead_kb), |
228 | __ATTR_RW(min_ratio), | 279 | __ATTR_RW(min_ratio), |
229 | __ATTR_RW(max_ratio), | 280 | __ATTR_RW(max_ratio), |
281 | __ATTR_RW(cpu_list), | ||
230 | __ATTR_NULL, | 282 | __ATTR_NULL, |
231 | }; | 283 | }; |
232 | 284 | ||
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr) | |||
428 | writeback_inodes_wb(&bdi->wb, 1024, | 480 | writeback_inodes_wb(&bdi->wb, 1024, |
429 | WB_REASON_FORKER_THREAD); | 481 | WB_REASON_FORKER_THREAD); |
430 | } else { | 482 | } else { |
483 | int ret; | ||
431 | /* | 484 | /* |
432 | * The spinlock makes sure we do not lose | 485 | * The spinlock makes sure we do not lose |
433 | * wake-ups when racing with 'bdi_queue_work()'. | 486 | * wake-ups when racing with 'bdi_queue_work()'. |
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr) | |||
437 | spin_lock_bh(&bdi->wb_lock); | 490 | spin_lock_bh(&bdi->wb_lock); |
438 | bdi->wb.task = task; | 491 | bdi->wb.task = task; |
439 | spin_unlock_bh(&bdi->wb_lock); | 492 | spin_unlock_bh(&bdi->wb_lock); |
493 | mutex_lock(&bdi->flusher_cpumask_lock); | ||
494 | ret = set_cpus_allowed_ptr(task, | ||
495 | bdi->flusher_cpumask); | ||
496 | mutex_unlock(&bdi->flusher_cpumask_lock); | ||
497 | if (ret) | ||
498 | printk_once("%s: failed to bind flusher" | ||
499 | " thread %s, error %d\n", | ||
500 | __func__, task->comm, ret); | ||
440 | wake_up_process(task); | 501 | wake_up_process(task); |
441 | } | 502 | } |
442 | bdi_clear_pending(bdi); | 503 | bdi_clear_pending(bdi); |
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
509 | dev_name(dev)); | 570 | dev_name(dev)); |
510 | if (IS_ERR(wb->task)) | 571 | if (IS_ERR(wb->task)) |
511 | return PTR_ERR(wb->task); | 572 | return PTR_ERR(wb->task); |
573 | } else { | ||
574 | int node; | ||
575 | /* | ||
576 | * Set up a default cpumask for the flusher threads that | ||
577 | * includes all cpus on the same numa node as the device. | ||
578 | * The mask may be overridden via sysfs. | ||
579 | */ | ||
580 | node = dev_to_node(bdi->dev); | ||
581 | if (node != NUMA_NO_NODE) | ||
582 | cpumask_copy(bdi->flusher_cpumask, | ||
583 | cpumask_of_node(node)); | ||
512 | } | 584 | } |
513 | 585 | ||
514 | bdi_debug_register(bdi, dev_name(dev)); | 586 | bdi_debug_register(bdi, dev_name(dev)); |
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi) | |||
634 | 706 | ||
635 | bdi_wb_init(&bdi->wb, bdi); | 707 | bdi_wb_init(&bdi->wb, bdi); |
636 | 708 | ||
709 | if (!bdi_cap_flush_forker(bdi)) { | ||
710 | bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); | ||
711 | if (!bdi->flusher_cpumask) | ||
712 | return -ENOMEM; | ||
713 | cpumask_setall(bdi->flusher_cpumask); | ||
714 | mutex_init(&bdi->flusher_cpumask_lock); | ||
715 | } else | ||
716 | bdi->flusher_cpumask = NULL; | ||
717 | |||
637 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 718 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
638 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); | 719 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); |
639 | if (err) | 720 | if (err) |
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi) | |||
656 | err: | 737 | err: |
657 | while (i--) | 738 | while (i--) |
658 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 739 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
740 | kfree(bdi->flusher_cpumask); | ||
659 | } | 741 | } |
660 | 742 | ||
661 | return err; | 743 | return err; |
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
683 | 765 | ||
684 | bdi_unregister(bdi); | 766 | bdi_unregister(bdi); |
685 | 767 | ||
768 | kfree(bdi->flusher_cpumask); | ||
769 | |||
686 | /* | 770 | /* |
687 | * If bdi_unregister() had already been called earlier, the | 771 | * If bdi_unregister() had already been called earlier, the |
688 | * wakeup_timer could still be armed because bdi_prune_sb() | 772 | * wakeup_timer could still be armed because bdi_prune_sb() |