aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJens Axboe <jaxboe@fusionio.com>2011-03-08 07:19:51 -0500
committerJens Axboe <jaxboe@fusionio.com>2011-03-10 02:45:54 -0500
commit73c101011926c5832e6e141682180c4debe2cf45 (patch)
treeb8eeb521a7833cb198d6f39d5a931d820e2a663f /kernel
parenta488e74976bf0a9bccecdd094378394942dacef1 (diff)
block: initial patch for on-stack per-task plugging
This patch adds support for creating a queuing context outside of the queue itself. This enables us to batch up pieces of IO before grabbing the block device queue lock and submitting them to the IO scheduler. The context is created on the stack of the process and assigned in the task structure, so that we can auto-unplug it if we hit a schedule event. The current queue plugging happens implicitly if IO is submitted to an empty device, yet callers have to remember to unplug that IO when they are going to wait for it. This is an ugly API and has caused bugs in the past. Additionally, it requires hacks in the vm (->sync_page() callback) to handle that logic. By switching to an explicit plugging scheme we make the API a lot nicer and can get rid of the ->sync_page() hack in the vm. Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/sched.c12
3 files changed, 16 insertions, 0 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b..6a488ad2dce 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
908 profile_task_exit(tsk); 908 profile_task_exit(tsk);
909 909
910 WARN_ON(atomic_read(&tsk->fs_excl)); 910 WARN_ON(atomic_read(&tsk->fs_excl));
911 WARN_ON(blk_needs_flush_plug(tsk));
911 912
912 if (unlikely(in_interrupt())) 913 if (unlikely(in_interrupt()))
913 panic("Aiee, killing interrupt handler!"); 914 panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index 25e429152dd..027c80e5162 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1204 * Clear TID on mm_release()? 1204 * Clear TID on mm_release()?
1205 */ 1205 */
1206 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1206 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1207#ifdef CONFIG_BLOCK
1208 p->plug = NULL;
1209#endif
1207#ifdef CONFIG_FUTEX 1210#ifdef CONFIG_FUTEX
1208 p->robust_list = NULL; 1211 p->robust_list = NULL;
1209#ifdef CONFIG_COMPAT 1212#ifdef CONFIG_COMPAT
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7b..ca098bf4cc6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3978,6 +3978,16 @@ need_resched_nonpreemptible:
3978 switch_count = &prev->nvcsw; 3978 switch_count = &prev->nvcsw;
3979 } 3979 }
3980 3980
3981 /*
3982 * If we are going to sleep and we have plugged IO queued, make
3983 * sure to submit it to avoid deadlocks.
3984 */
3985 if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
3986 raw_spin_unlock(&rq->lock);
3987 blk_flush_plug(prev);
3988 raw_spin_lock(&rq->lock);
3989 }
3990
3981 pre_schedule(rq, prev); 3991 pre_schedule(rq, prev);
3982 3992
3983 if (unlikely(!rq->nr_running)) 3993 if (unlikely(!rq->nr_running))
@@ -5333,6 +5343,7 @@ void __sched io_schedule(void)
5333 5343
5334 delayacct_blkio_start(); 5344 delayacct_blkio_start();
5335 atomic_inc(&rq->nr_iowait); 5345 atomic_inc(&rq->nr_iowait);
5346 blk_flush_plug(current);
5336 current->in_iowait = 1; 5347 current->in_iowait = 1;
5337 schedule(); 5348 schedule();
5338 current->in_iowait = 0; 5349 current->in_iowait = 0;
@@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout)
5348 5359
5349 delayacct_blkio_start(); 5360 delayacct_blkio_start();
5350 atomic_inc(&rq->nr_iowait); 5361 atomic_inc(&rq->nr_iowait);
5362 blk_flush_plug(current);
5351 current->in_iowait = 1; 5363 current->in_iowait = 1;
5352 ret = schedule_timeout(timeout); 5364 ret = schedule_timeout(timeout);
5353 current->in_iowait = 0; 5365 current->in_iowait = 0;