aboutsummaryrefslogtreecommitdiffstats
path: root/mm/backing-dev.c
diff options
context:
space:
mode:
authorJeff Moyer <jmoyer@redhat.com>2012-12-05 14:17:21 -0500
committerJens Axboe <axboe@kernel.dk>2012-12-05 14:17:21 -0500
commit8fa72d234da9b6b473bbb1f74d533663e4996e6b (patch)
tree536b8d2cf5aa3f0baee3aa0d89e602733be3df56 /mm/backing-dev.c
parentc304a51bf42a7ce48f430a371e1213472c89e13d (diff)
bdi: add a user-tunable cpu_list for the bdi flusher threads
In realtime environments, it may be desirable to keep the per-bdi flusher threads from running on certain cpus. This patch adds a cpu_list file to /sys/class/bdi/* to enable this. The default is to tie the flusher threads to the same numa node as the backing device (though I could be convinced to make it a mask of all cpus to avoid a change in behaviour). Thanks to Jeremy Eder for the original idea. Signed-off-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r--mm/backing-dev.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee176..bd6a6cabef71 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/device.h> 12#include <linux/device.h>
13#include <linux/slab.h>
13#include <trace/events/writeback.h> 14#include <trace/events/writeback.h>
14 15
15static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 16static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
221} 222}
222BDI_SHOW(max_ratio, bdi->max_ratio) 223BDI_SHOW(max_ratio, bdi->max_ratio)
223 224
225static ssize_t cpu_list_store(struct device *dev,
226 struct device_attribute *attr, const char *buf, size_t count)
227{
228 struct backing_dev_info *bdi = dev_get_drvdata(dev);
229 struct bdi_writeback *wb = &bdi->wb;
230 cpumask_var_t newmask;
231 ssize_t ret;
232 struct task_struct *task;
233
234 if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
235 return -ENOMEM;
236
237 ret = cpulist_parse(buf, newmask);
238 if (!ret) {
239 spin_lock_bh(&bdi->wb_lock);
240 task = wb->task;
241 if (task)
242 get_task_struct(task);
243 spin_unlock_bh(&bdi->wb_lock);
244
245 mutex_lock(&bdi->flusher_cpumask_lock);
246 if (task) {
247 ret = set_cpus_allowed_ptr(task, newmask);
248 put_task_struct(task);
249 }
250 if (ret == 0) {
251 cpumask_copy(bdi->flusher_cpumask, newmask);
252 ret = count;
253 }
254 mutex_unlock(&bdi->flusher_cpumask_lock);
255
256 }
257 free_cpumask_var(newmask);
258
259 return ret;
260}
261
262static ssize_t cpu_list_show(struct device *dev,
263 struct device_attribute *attr, char *page)
264{
265 struct backing_dev_info *bdi = dev_get_drvdata(dev);
266 ssize_t ret;
267
268 mutex_lock(&bdi->flusher_cpumask_lock);
269 ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
270 mutex_unlock(&bdi->flusher_cpumask_lock);
271
272 return ret;
273}
274
224#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) 275#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
225 276
226static struct device_attribute bdi_dev_attrs[] = { 277static struct device_attribute bdi_dev_attrs[] = {
227 __ATTR_RW(read_ahead_kb), 278 __ATTR_RW(read_ahead_kb),
228 __ATTR_RW(min_ratio), 279 __ATTR_RW(min_ratio),
229 __ATTR_RW(max_ratio), 280 __ATTR_RW(max_ratio),
281 __ATTR_RW(cpu_list),
230 __ATTR_NULL, 282 __ATTR_NULL,
231}; 283};
232 284
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
428 writeback_inodes_wb(&bdi->wb, 1024, 480 writeback_inodes_wb(&bdi->wb, 1024,
429 WB_REASON_FORKER_THREAD); 481 WB_REASON_FORKER_THREAD);
430 } else { 482 } else {
483 int ret;
431 /* 484 /*
432 * The spinlock makes sure we do not lose 485 * The spinlock makes sure we do not lose
433 * wake-ups when racing with 'bdi_queue_work()'. 486 * wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
437 spin_lock_bh(&bdi->wb_lock); 490 spin_lock_bh(&bdi->wb_lock);
438 bdi->wb.task = task; 491 bdi->wb.task = task;
439 spin_unlock_bh(&bdi->wb_lock); 492 spin_unlock_bh(&bdi->wb_lock);
493 mutex_lock(&bdi->flusher_cpumask_lock);
494 ret = set_cpus_allowed_ptr(task,
495 bdi->flusher_cpumask);
496 mutex_unlock(&bdi->flusher_cpumask_lock);
497 if (ret)
498 printk_once("%s: failed to bind flusher"
499 " thread %s, error %d\n",
500 __func__, task->comm, ret);
440 wake_up_process(task); 501 wake_up_process(task);
441 } 502 }
442 bdi_clear_pending(bdi); 503 bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
509 dev_name(dev)); 570 dev_name(dev));
510 if (IS_ERR(wb->task)) 571 if (IS_ERR(wb->task))
511 return PTR_ERR(wb->task); 572 return PTR_ERR(wb->task);
573 } else {
574 int node;
575 /*
576 * Set up a default cpumask for the flusher threads that
577 * includes all cpus on the same numa node as the device.
578 * The mask may be overridden via sysfs.
579 */
580 node = dev_to_node(bdi->dev);
581 if (node != NUMA_NO_NODE)
582 cpumask_copy(bdi->flusher_cpumask,
583 cpumask_of_node(node));
512 } 584 }
513 585
514 bdi_debug_register(bdi, dev_name(dev)); 586 bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)
634 706
635 bdi_wb_init(&bdi->wb, bdi); 707 bdi_wb_init(&bdi->wb, bdi);
636 708
709 if (!bdi_cap_flush_forker(bdi)) {
710 bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
711 if (!bdi->flusher_cpumask)
712 return -ENOMEM;
713 cpumask_setall(bdi->flusher_cpumask);
714 mutex_init(&bdi->flusher_cpumask_lock);
715 } else
716 bdi->flusher_cpumask = NULL;
717
637 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 718 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
638 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 719 err = percpu_counter_init(&bdi->bdi_stat[i], 0);
639 if (err) 720 if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
656err: 737err:
657 while (i--) 738 while (i--)
658 percpu_counter_destroy(&bdi->bdi_stat[i]); 739 percpu_counter_destroy(&bdi->bdi_stat[i]);
740 kfree(bdi->flusher_cpumask);
659 } 741 }
660 742
661 return err; 743 return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
683 765
684 bdi_unregister(bdi); 766 bdi_unregister(bdi);
685 767
768 kfree(bdi->flusher_cpumask);
769
686 /* 770 /*
687 * If bdi_unregister() had already been called earlier, the 771 * If bdi_unregister() had already been called earlier, the
688 * wakeup_timer could still be armed because bdi_prune_sb() 772 * wakeup_timer could still be armed because bdi_prune_sb()