diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2011-03-08 07:19:51 -0500 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2011-03-10 02:45:54 -0500 |
commit | 73c101011926c5832e6e141682180c4debe2cf45 (patch) | |
tree | b8eeb521a7833cb198d6f39d5a931d820e2a663f /kernel | |
parent | a488e74976bf0a9bccecdd094378394942dacef1 (diff) |
block: initial patch for on-stack per-task plugging
This patch adds support for creating a queuing context outside
of the queue itself. This enables us to batch up pieces of IO
before grabbing the block device queue lock and submitting them to
the IO scheduler.
The context is created on the stack of the process and assigned in
the task structure, so that we can auto-unplug it if we hit a schedule
event.
The current queue plugging happens implicitly if IO is submitted to
an empty device, yet callers have to remember to unplug that IO when
they are going to wait for it. This is an ugly API and has caused bugs
in the past. Additionally, it requires hacks in the vm (->sync_page()
callback) to handle that logic. By switching to an explicit plugging
scheme we make the API a lot nicer and can get rid of the ->sync_page()
hack in the vm.
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 1 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/sched.c | 12 |
3 files changed, 16 insertions, 0 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index f9a45ebcc7b..6a488ad2dce 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code) | |||
908 | profile_task_exit(tsk); | 908 | profile_task_exit(tsk); |
909 | 909 | ||
910 | WARN_ON(atomic_read(&tsk->fs_excl)); | 910 | WARN_ON(atomic_read(&tsk->fs_excl)); |
911 | WARN_ON(blk_needs_flush_plug(tsk)); | ||
911 | 912 | ||
912 | if (unlikely(in_interrupt())) | 913 | if (unlikely(in_interrupt())) |
913 | panic("Aiee, killing interrupt handler!"); | 914 | panic("Aiee, killing interrupt handler!"); |
diff --git a/kernel/fork.c b/kernel/fork.c index 25e429152dd..027c80e5162 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1204 | * Clear TID on mm_release()? | 1204 | * Clear TID on mm_release()? |
1205 | */ | 1205 | */ |
1206 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | 1206 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; |
1207 | #ifdef CONFIG_BLOCK | ||
1208 | p->plug = NULL; | ||
1209 | #endif | ||
1207 | #ifdef CONFIG_FUTEX | 1210 | #ifdef CONFIG_FUTEX |
1208 | p->robust_list = NULL; | 1211 | p->robust_list = NULL; |
1209 | #ifdef CONFIG_COMPAT | 1212 | #ifdef CONFIG_COMPAT |
diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4ec7b..ca098bf4cc6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3978,6 +3978,16 @@ need_resched_nonpreemptible: | |||
3978 | switch_count = &prev->nvcsw; | 3978 | switch_count = &prev->nvcsw; |
3979 | } | 3979 | } |
3980 | 3980 | ||
3981 | /* | ||
3982 | * If we are going to sleep and we have plugged IO queued, make | ||
3983 | * sure to submit it to avoid deadlocks. | ||
3984 | */ | ||
3985 | if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) { | ||
3986 | raw_spin_unlock(&rq->lock); | ||
3987 | blk_flush_plug(prev); | ||
3988 | raw_spin_lock(&rq->lock); | ||
3989 | } | ||
3990 | |||
3981 | pre_schedule(rq, prev); | 3991 | pre_schedule(rq, prev); |
3982 | 3992 | ||
3983 | if (unlikely(!rq->nr_running)) | 3993 | if (unlikely(!rq->nr_running)) |
@@ -5333,6 +5343,7 @@ void __sched io_schedule(void) | |||
5333 | 5343 | ||
5334 | delayacct_blkio_start(); | 5344 | delayacct_blkio_start(); |
5335 | atomic_inc(&rq->nr_iowait); | 5345 | atomic_inc(&rq->nr_iowait); |
5346 | blk_flush_plug(current); | ||
5336 | current->in_iowait = 1; | 5347 | current->in_iowait = 1; |
5337 | schedule(); | 5348 | schedule(); |
5338 | current->in_iowait = 0; | 5349 | current->in_iowait = 0; |
@@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout) | |||
5348 | 5359 | ||
5349 | delayacct_blkio_start(); | 5360 | delayacct_blkio_start(); |
5350 | atomic_inc(&rq->nr_iowait); | 5361 | atomic_inc(&rq->nr_iowait); |
5362 | blk_flush_plug(current); | ||
5351 | current->in_iowait = 1; | 5363 | current->in_iowait = 1; |
5352 | ret = schedule_timeout(timeout); | 5364 | ret = schedule_timeout(timeout); |
5353 | current->in_iowait = 0; | 5365 | current->in_iowait = 0; |