aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 11:27:23 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 11:27:23 -0500
commit60da5bf47dd3d301a1d3bd4f0a4b9e29a184515c (patch)
tree30de83370440aae5350d9ab3fbe6583abd439ee8 /mm
parent3c2e81ef344a90bb0a39d84af6878b4aeff568a2 (diff)
parentcbae8d45d61f3a8c155caf267d01e5e0f0b2f4b7 (diff)
Merge branch 'for-3.8/core' of git://git.kernel.dk/linux-block
Pull block layer core updates from Jens Axboe: "Here are the core block IO bits for 3.8. The branch contains: - The final version of the surprise device removal fixups from Bart. - Don't hide EFI partitions under advanced partition types. It's fairly wide spread these days. This is especially dangerous for systems that have both msdos and efi partition tables, where you want to keep them in sync. - Cleanup of using -1 instead of the proper NUMA_NO_NODE - Export control of bdi flusher thread CPU mask and default to using the home node (if known) from Jeff. - Export unplug tracepoint for MD. - Core improvements from Shaohua. Reinstate the recursive merge, as the original bug has been fixed. Add plugging for discard and also fix a problem handling non pow-of-2 discard limits. There's a trivial merge in block/blk-exec.c due to a fix that went into 3.7-rc at a later point than -rc4 where this is based." * 'for-3.8/core' of git://git.kernel.dk/linux-block: block: export block_unplug tracepoint block: add plug for blkdev_issue_discard block: discard granularity might not be power of 2 deadline: Allow 0ms deadline latency, increase the read speed partitions: enable EFI/GPT support by default bsg: Remove unused function bsg_goose_queue() block: Make blk_cleanup_queue() wait until request_fn finished block: Avoid scheduling delayed work on a dead queue block: Avoid that request_fn is invoked on a dead queue block: Let blk_drain_queue() caller obtain the queue lock block: Rename queue dead flag bdi: add a user-tunable cpu_list for the bdi flusher threads block: use NUMA_NO_NODE instead of -1 block: recursive merge requests block CFQ: avoid moving request to different queue
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee176..bd6a6cabef71 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/device.h> 12#include <linux/device.h>
13#include <linux/slab.h>
13#include <trace/events/writeback.h> 14#include <trace/events/writeback.h>
14 15
15static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 16static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
221} 222}
222BDI_SHOW(max_ratio, bdi->max_ratio) 223BDI_SHOW(max_ratio, bdi->max_ratio)
223 224
225static ssize_t cpu_list_store(struct device *dev,
226 struct device_attribute *attr, const char *buf, size_t count)
227{
228 struct backing_dev_info *bdi = dev_get_drvdata(dev);
229 struct bdi_writeback *wb = &bdi->wb;
230 cpumask_var_t newmask;
231 ssize_t ret;
232 struct task_struct *task;
233
234 if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
235 return -ENOMEM;
236
237 ret = cpulist_parse(buf, newmask);
238 if (!ret) {
239 spin_lock_bh(&bdi->wb_lock);
240 task = wb->task;
241 if (task)
242 get_task_struct(task);
243 spin_unlock_bh(&bdi->wb_lock);
244
245 mutex_lock(&bdi->flusher_cpumask_lock);
246 if (task) {
247 ret = set_cpus_allowed_ptr(task, newmask);
248 put_task_struct(task);
249 }
250 if (ret == 0) {
251 cpumask_copy(bdi->flusher_cpumask, newmask);
252 ret = count;
253 }
254 mutex_unlock(&bdi->flusher_cpumask_lock);
255
256 }
257 free_cpumask_var(newmask);
258
259 return ret;
260}
261
262static ssize_t cpu_list_show(struct device *dev,
263 struct device_attribute *attr, char *page)
264{
265 struct backing_dev_info *bdi = dev_get_drvdata(dev);
266 ssize_t ret;
267
268 mutex_lock(&bdi->flusher_cpumask_lock);
269 ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
270 mutex_unlock(&bdi->flusher_cpumask_lock);
271
272 return ret;
273}
274
224#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) 275#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
225 276
226static struct device_attribute bdi_dev_attrs[] = { 277static struct device_attribute bdi_dev_attrs[] = {
227 __ATTR_RW(read_ahead_kb), 278 __ATTR_RW(read_ahead_kb),
228 __ATTR_RW(min_ratio), 279 __ATTR_RW(min_ratio),
229 __ATTR_RW(max_ratio), 280 __ATTR_RW(max_ratio),
281 __ATTR_RW(cpu_list),
230 __ATTR_NULL, 282 __ATTR_NULL,
231}; 283};
232 284
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
428 writeback_inodes_wb(&bdi->wb, 1024, 480 writeback_inodes_wb(&bdi->wb, 1024,
429 WB_REASON_FORKER_THREAD); 481 WB_REASON_FORKER_THREAD);
430 } else { 482 } else {
483 int ret;
431 /* 484 /*
432 * The spinlock makes sure we do not lose 485 * The spinlock makes sure we do not lose
433 * wake-ups when racing with 'bdi_queue_work()'. 486 * wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
437 spin_lock_bh(&bdi->wb_lock); 490 spin_lock_bh(&bdi->wb_lock);
438 bdi->wb.task = task; 491 bdi->wb.task = task;
439 spin_unlock_bh(&bdi->wb_lock); 492 spin_unlock_bh(&bdi->wb_lock);
493 mutex_lock(&bdi->flusher_cpumask_lock);
494 ret = set_cpus_allowed_ptr(task,
495 bdi->flusher_cpumask);
496 mutex_unlock(&bdi->flusher_cpumask_lock);
497 if (ret)
498 printk_once("%s: failed to bind flusher"
499 " thread %s, error %d\n",
500 __func__, task->comm, ret);
440 wake_up_process(task); 501 wake_up_process(task);
441 } 502 }
442 bdi_clear_pending(bdi); 503 bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
509 dev_name(dev)); 570 dev_name(dev));
510 if (IS_ERR(wb->task)) 571 if (IS_ERR(wb->task))
511 return PTR_ERR(wb->task); 572 return PTR_ERR(wb->task);
573 } else {
574 int node;
575 /*
576 * Set up a default cpumask for the flusher threads that
577 * includes all cpus on the same numa node as the device.
578 * The mask may be overridden via sysfs.
579 */
580 node = dev_to_node(bdi->dev);
581 if (node != NUMA_NO_NODE)
582 cpumask_copy(bdi->flusher_cpumask,
583 cpumask_of_node(node));
512 } 584 }
513 585
514 bdi_debug_register(bdi, dev_name(dev)); 586 bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)
634 706
635 bdi_wb_init(&bdi->wb, bdi); 707 bdi_wb_init(&bdi->wb, bdi);
636 708
709 if (!bdi_cap_flush_forker(bdi)) {
710 bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
711 if (!bdi->flusher_cpumask)
712 return -ENOMEM;
713 cpumask_setall(bdi->flusher_cpumask);
714 mutex_init(&bdi->flusher_cpumask_lock);
715 } else
716 bdi->flusher_cpumask = NULL;
717
637 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 718 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
638 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 719 err = percpu_counter_init(&bdi->bdi_stat[i], 0);
639 if (err) 720 if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
656err: 737err:
657 while (i--) 738 while (i--)
658 percpu_counter_destroy(&bdi->bdi_stat[i]); 739 percpu_counter_destroy(&bdi->bdi_stat[i]);
740 kfree(bdi->flusher_cpumask);
659 } 741 }
660 742
661 return err; 743 return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
683 765
684 bdi_unregister(bdi); 766 bdi_unregister(bdi);
685 767
768 kfree(bdi->flusher_cpumask);
769
686 /* 770 /*
687 * If bdi_unregister() had already been called earlier, the 771 * If bdi_unregister() had already been called earlier, the
688 * wakeup_timer could still be armed because bdi_prune_sb() 772 * wakeup_timer could still be armed because bdi_prune_sb()