Merge branch 'for-3.8/core' of git://git.kernel.dk/linux-block

Pull block layer core updates from Jens Axboe: "Here are the core block IO bits for 3.8. The branch contains: - The final version of the surprise device removal fixups from Bart. - Don't hide EFI partitions under advanced partition types. It's fairly wide spread these days. This is especially dangerous for systems that have both msdos and efi partition tables, where you want to keep them in sync. - Cleanup of using -1 instead of the proper NUMA_NO_NODE - Export control of bdi flusher thread CPU mask and default to using the home node (if known) from Jeff. - Export unplug tracepoint for MD. - Core improvements from Shaohua. Reinstate the recursive merge, as the original bug has been fixed. Add plugging for discard and also fix a problem handling non pow-of-2 discard limits. There's a trivial merge in block/blk-exec.c due to a fix that went into 3.7-rc at a later point than -rc4 where this is based." * 'for-3.8/core' of git://git.kernel.dk/linux-block: block: export block_unplug tracepoint block: add plug for blkdev_issue_discard block: discard granularity might not be power of 2 deadline: Allow 0ms deadline latency, increase the read speed partitions: enable EFI/GPT support by default bsg: Remove unused function bsg_goose_queue() block: Make blk_cleanup_queue() wait until request_fn finished block: Avoid scheduling delayed work on a dead queue block: Avoid that request_fn is invoked on a dead queue block: Let blk_drain_queue() caller obtain the queue lock block: Rename queue dead flag bdi: add a user-tunable cpu_list for the bdi flusher threads block: use NUMA_NO_NODE instead of -1 block: recursive merge requests block CFQ: avoid moving request to different queue
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-17 11:27:23 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-17 11:27:23 -0500
commit: 60da5bf47dd3d301a1d3bd4f0a4b9e29a184515c (patch)
tree: 30de83370440aae5350d9ab3fbe6583abd439ee8 /mm
parent: 3c2e81ef344a90bb0a39d84af6878b4aeff568a2 (diff)
parent: cbae8d45d61f3a8c155caf267d01e5e0f0b2f4b7 (diff)
1 files changed, 84 insertions, 0 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee176..bd6a6cabef71 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <linux/slab.h>
 #include <trace/events/writeback.h>
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio)
+static ssize_t cpu_list_store(struct device *dev,
+                struct device_attribute *attr, const char *buf, size_t count)
+{
+        struct backing_dev_info *bdi = dev_get_drvdata(dev);
+        struct bdi_writeback *wb = &bdi->wb;
+        cpumask_var_t newmask;
+        ssize_t ret;
+        struct task_struct *task;
+        if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+                return -ENOMEM;
+        ret = cpulist_parse(buf, newmask);
+        if (!ret) {
+                spin_lock_bh(&bdi->wb_lock);
+                task = wb->task;
+                if (task)
+                        get_task_struct(task);
+                spin_unlock_bh(&bdi->wb_lock);
+                mutex_lock(&bdi->flusher_cpumask_lock);
+                if (task) {
+                        ret = set_cpus_allowed_ptr(task, newmask);
+                        put_task_struct(task);
+                }
+                if (ret == 0) {
+                        cpumask_copy(bdi->flusher_cpumask, newmask);
+                        ret = count;
+                }
+                mutex_unlock(&bdi->flusher_cpumask_lock);
+        }
+        free_cpumask_var(newmask);
+        return ret;
+}
+static ssize_t cpu_list_show(struct device *dev,
+                struct device_attribute *attr, char *page)
+{
+        struct backing_dev_info *bdi = dev_get_drvdata(dev);
+        ssize_t ret;
+        mutex_lock(&bdi->flusher_cpumask_lock);
+        ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
+        mutex_unlock(&bdi->flusher_cpumask_lock);
+        return ret;
+}
 #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
 static struct device_attribute bdi_dev_attrs[] = {
        __ATTR_RW(read_ahead_kb),
        __ATTR_RW(min_ratio),
        __ATTR_RW(max_ratio),
+        __ATTR_RW(cpu_list),
        __ATTR_NULL,
 };
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
                                writeback_inodes_wb(&bdi->wb, 1024,
                                                    WB_REASON_FORKER_THREAD);
                        } else {
+                                int ret;
                                /*
                                 * The spinlock makes sure we do not lose
                                 * wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
                                spin_lock_bh(&bdi->wb_lock);
                                bdi->wb.task = task;
                                spin_unlock_bh(&bdi->wb_lock);
+                                mutex_lock(&bdi->flusher_cpumask_lock);
+                                ret = set_cpus_allowed_ptr(task,
+                                                        bdi->flusher_cpumask);
+                                mutex_unlock(&bdi->flusher_cpumask_lock);
+                                if (ret)
+                                        printk_once("%s: failed to bind flusher"
+                                                    " thread %s, error %d\n",
+                                                    __func__, task->comm, ret);
                                wake_up_process(task);
                        }
                        bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
                                                dev_name(dev));
                if (IS_ERR(wb->task))
                        return PTR_ERR(wb->task);
+        } else {
+                int node;
+                /*
+                 * Set up a default cpumask for the flusher threads that
+                 * includes all cpus on the same numa node as the device.
+                 * The mask may be overridden via sysfs.
+                 */
+                node = dev_to_node(bdi->dev);
+                if (node != NUMA_NO_NODE)
+                        cpumask_copy(bdi->flusher_cpumask,
+                                     cpumask_of_node(node));
        }
        bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)
        bdi_wb_init(&bdi->wb, bdi);
+        if (!bdi_cap_flush_forker(bdi)) {
+                bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+                if (!bdi->flusher_cpumask)
+                        return -ENOMEM;
+                cpumask_setall(bdi->flusher_cpumask);
+                mutex_init(&bdi->flusher_cpumask_lock);
+        } else
+                bdi->flusher_cpumask = NULL;
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
                err = percpu_counter_init(&bdi->bdi_stat[i], 0);
                if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
 err:
                while (i--)
                        percpu_counter_destroy(&bdi->bdi_stat[i]);
+                kfree(bdi->flusher_cpumask);
        }
        return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
        bdi_unregister(bdi);
+        kfree(bdi->flusher_cpumask);
        /*
         * If bdi_unregister() had already been called earlier, the
         * wakeup_timer could still be armed because bdi_prune_sb()
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-12-17 11:27:23 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-12-17 11:27:23 -0500
commit	60da5bf47dd3d301a1d3bd4f0a4b9e29a184515c (patch)
tree	30de83370440aae5350d9ab3fbe6583abd439ee8 /mm
parent	3c2e81ef344a90bb0a39d84af6878b4aeff568a2 (diff)
parent	cbae8d45d61f3a8c155caf267d01e5e0f0b2f4b7 (diff)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca2b3ee176..bd6a6cabef71 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
10	#include <linux/module.h>	10	#include <linux/module.h>
11	#include <linux/writeback.h>	11	#include <linux/writeback.h>
12	#include <linux/device.h>	12	#include <linux/device.h>
		13	#include <linux/slab.h>
13	#include <trace/events/writeback.h>	14	#include <trace/events/writeback.h>
14		15
15	static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);	16	static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
221	}	222	}
222	BDI_SHOW(max_ratio, bdi->max_ratio)	223	BDI_SHOW(max_ratio, bdi->max_ratio)
223		224
		225	static ssize_t cpu_list_store(struct device *dev,
		226	struct device_attribute attr, const char buf, size_t count)
		227	{
		228	struct backing_dev_info *bdi = dev_get_drvdata(dev);
		229	struct bdi_writeback *wb = &bdi->wb;
		230	cpumask_var_t newmask;
		231	ssize_t ret;
		232	struct task_struct *task;
		233
		234	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
		235	return -ENOMEM;
		236
		237	ret = cpulist_parse(buf, newmask);
		238	if (!ret) {
		239	spin_lock_bh(&bdi->wb_lock);
		240	task = wb->task;
		241	if (task)
		242	get_task_struct(task);
		243	spin_unlock_bh(&bdi->wb_lock);
		244
		245	mutex_lock(&bdi->flusher_cpumask_lock);
		246	if (task) {
		247	ret = set_cpus_allowed_ptr(task, newmask);
		248	put_task_struct(task);
		249	}
		250	if (ret == 0) {
		251	cpumask_copy(bdi->flusher_cpumask, newmask);
		252	ret = count;
		253	}
		254	mutex_unlock(&bdi->flusher_cpumask_lock);
		255
		256	}
		257	free_cpumask_var(newmask);
		258
		259	return ret;
		260	}
		261
		262	static ssize_t cpu_list_show(struct device *dev,
		263	struct device_attribute attr, char page)
		264	{
		265	struct backing_dev_info *bdi = dev_get_drvdata(dev);
		266	ssize_t ret;
		267
		268	mutex_lock(&bdi->flusher_cpumask_lock);
		269	ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
		270	mutex_unlock(&bdi->flusher_cpumask_lock);
		271
		272	return ret;
		273	}
		274
224	#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)	275	#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
225		276
226	static struct device_attribute bdi_dev_attrs[] = {	277	static struct device_attribute bdi_dev_attrs[] = {
227	__ATTR_RW(read_ahead_kb),	278	__ATTR_RW(read_ahead_kb),
228	__ATTR_RW(min_ratio),	279	__ATTR_RW(min_ratio),
229	__ATTR_RW(max_ratio),	280	__ATTR_RW(max_ratio),
		281	__ATTR_RW(cpu_list),
230	__ATTR_NULL,	282	__ATTR_NULL,
231	};	283	};
232		284
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
428	writeback_inodes_wb(&bdi->wb, 1024,	480	writeback_inodes_wb(&bdi->wb, 1024,
429	WB_REASON_FORKER_THREAD);	481	WB_REASON_FORKER_THREAD);
430	} else {	482	} else {
		483	int ret;
431	/*	484	/*
432	* The spinlock makes sure we do not lose	485	* The spinlock makes sure we do not lose
433	* wake-ups when racing with 'bdi_queue_work()'.	486	* wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
437	spin_lock_bh(&bdi->wb_lock);	490	spin_lock_bh(&bdi->wb_lock);
438	bdi->wb.task = task;	491	bdi->wb.task = task;
439	spin_unlock_bh(&bdi->wb_lock);	492	spin_unlock_bh(&bdi->wb_lock);
		493	mutex_lock(&bdi->flusher_cpumask_lock);
		494	ret = set_cpus_allowed_ptr(task,
		495	bdi->flusher_cpumask);
		496	mutex_unlock(&bdi->flusher_cpumask_lock);
		497	if (ret)
		498	printk_once("%s: failed to bind flusher"
		499	" thread %s, error %d\n",
		500	__func__, task->comm, ret);
440	wake_up_process(task);	501	wake_up_process(task);
441	}	502	}
442	bdi_clear_pending(bdi);	503	bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info bdi, struct device parent,
509	dev_name(dev));	570	dev_name(dev));
510	if (IS_ERR(wb->task))	571	if (IS_ERR(wb->task))
511	return PTR_ERR(wb->task);	572	return PTR_ERR(wb->task);
		573	} else {
		574	int node;
		575	/*
		576	* Set up a default cpumask for the flusher threads that
		577	* includes all cpus on the same numa node as the device.
		578	* The mask may be overridden via sysfs.
		579	*/
		580	node = dev_to_node(bdi->dev);
		581	if (node != NUMA_NO_NODE)
		582	cpumask_copy(bdi->flusher_cpumask,
		583	cpumask_of_node(node));
512	}	584	}
513		585
514	bdi_debug_register(bdi, dev_name(dev));	586	bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)
634		706
635	bdi_wb_init(&bdi->wb, bdi);	707	bdi_wb_init(&bdi->wb, bdi);
636		708
		709	if (!bdi_cap_flush_forker(bdi)) {
		710	bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
		711	if (!bdi->flusher_cpumask)
		712	return -ENOMEM;
		713	cpumask_setall(bdi->flusher_cpumask);
		714	mutex_init(&bdi->flusher_cpumask_lock);
		715	} else
		716	bdi->flusher_cpumask = NULL;
		717
637	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {	718	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
638	err = percpu_counter_init(&bdi->bdi_stat[i], 0);	719	err = percpu_counter_init(&bdi->bdi_stat[i], 0);
639	if (err)	720	if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
656	err:	737	err:
657	while (i--)	738	while (i--)
658	percpu_counter_destroy(&bdi->bdi_stat[i]);	739	percpu_counter_destroy(&bdi->bdi_stat[i]);
		740	kfree(bdi->flusher_cpumask);
659	}	741	}
660		742
661	return err;	743	return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
683		765
684	bdi_unregister(bdi);	766	bdi_unregister(bdi);
685		767
		768	kfree(bdi->flusher_cpumask);
		769
686	/*	770	/*
687	* If bdi_unregister() had already been called earlier, the	771	* If bdi_unregister() had already been called earlier, the
688	* wakeup_timer could still be armed because bdi_prune_sb()	772	* wakeup_timer could still be armed because bdi_prune_sb()