aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-02-15 03:45:53 -0500
committerJens Axboe <axboe@kernel.dk>2012-02-15 03:45:53 -0500
commit621032ad6eaabf2fe771c4fa0d8f58e1fcfcdba6 (patch)
tree903f4b1ae7dc9aa4f0538b606b1c7deb1542e704
parent2274b029f640cd652ab59c363e5beebf5f50e609 (diff)
block: exit_io_context() should call elevator_exit_icq_fn()
While updating locking, b2efa05265 "block, cfq: unlink cfq_io_context's immediately" moved elevator_exit_icq_fn() invocation from exit_io_context() to the final ioc put. While this doesn't cause catastrophic failure, it effectively removes task exit notification to elevator and cause noticeable IO performance degradation with CFQ. On task exit, CFQ used to immediately expire the slice if it was being used by the exiting task as no more IO would be issued by the task; however, after b2efa05265, the notification is lost and disk could sit idle needlessly, leading to noticeable IO performance degradation for certain workloads. This patch renames ioc_exit_icq() to ioc_destroy_icq(), separates elevator_exit_icq_fn() invocation into ioc_exit_icq() and invokes it from exit_io_context(). ICQ_EXITED flag is added to avoid invoking the callback more than once for the same icq. Walking icq_list from ioc side and invoking elevator callback requires reverse double locking. This may be better implemented using RCU; unfortunately, using RCU isn't trivial. e.g. RCU protection would need to cover request_queue and queue_lock switch on cleanup makes grabbing queue_lock from RCU unsafe. Reverse double locking should do, at least for now. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-and-bisected-by: Shaohua Li <shli@kernel.org> LKML-Reference: <CANejiEVzs=pUhQSTvUppkDcc2TNZyfohBRLygW5zFmXyk5A-xQ@mail.gmail.com> Tested-by: Shaohua Li <shaohua.li@intel.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-ioc.c55
-rw-r--r--include/linux/iocontext.h1
2 files changed, 48 insertions, 8 deletions
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index f53c80ecaf07..92bf55540d87 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -36,11 +36,23 @@ static void icq_free_icq_rcu(struct rcu_head *head)
36 kmem_cache_free(icq->__rcu_icq_cache, icq); 36 kmem_cache_free(icq->__rcu_icq_cache, icq);
37} 37}
38 38
39/* 39/* Exit an icq. Called with both ioc and q locked. */
40 * Exit and free an icq. Called with both ioc and q locked.
41 */
42static void ioc_exit_icq(struct io_cq *icq) 40static void ioc_exit_icq(struct io_cq *icq)
43{ 41{
42 struct elevator_type *et = icq->q->elevator->type;
43
44 if (icq->flags & ICQ_EXITED)
45 return;
46
47 if (et->ops.elevator_exit_icq_fn)
48 et->ops.elevator_exit_icq_fn(icq);
49
50 icq->flags |= ICQ_EXITED;
51}
52
53/* Release an icq. Called with both ioc and q locked. */
54static void ioc_destroy_icq(struct io_cq *icq)
55{
44 struct io_context *ioc = icq->ioc; 56 struct io_context *ioc = icq->ioc;
45 struct request_queue *q = icq->q; 57 struct request_queue *q = icq->q;
46 struct elevator_type *et = q->elevator->type; 58 struct elevator_type *et = q->elevator->type;
@@ -60,8 +72,7 @@ static void ioc_exit_icq(struct io_cq *icq)
60 if (rcu_dereference_raw(ioc->icq_hint) == icq) 72 if (rcu_dereference_raw(ioc->icq_hint) == icq)
61 rcu_assign_pointer(ioc->icq_hint, NULL); 73 rcu_assign_pointer(ioc->icq_hint, NULL);
62 74
63 if (et->ops.elevator_exit_icq_fn) 75 ioc_exit_icq(icq);
64 et->ops.elevator_exit_icq_fn(icq);
65 76
66 /* 77 /*
67 * @icq->q might have gone away by the time RCU callback runs 78 * @icq->q might have gone away by the time RCU callback runs
@@ -95,7 +106,7 @@ static void ioc_release_fn(struct work_struct *work)
95 struct request_queue *q = icq->q; 106 struct request_queue *q = icq->q;
96 107
97 if (spin_trylock(q->queue_lock)) { 108 if (spin_trylock(q->queue_lock)) {
98 ioc_exit_icq(icq); 109 ioc_destroy_icq(icq);
99 spin_unlock(q->queue_lock); 110 spin_unlock(q->queue_lock);
100 } else { 111 } else {
101 spin_unlock_irqrestore(&ioc->lock, flags); 112 spin_unlock_irqrestore(&ioc->lock, flags);
@@ -142,13 +153,41 @@ EXPORT_SYMBOL(put_io_context);
142void exit_io_context(struct task_struct *task) 153void exit_io_context(struct task_struct *task)
143{ 154{
144 struct io_context *ioc; 155 struct io_context *ioc;
156 struct io_cq *icq;
157 struct hlist_node *n;
158 unsigned long flags;
145 159
146 task_lock(task); 160 task_lock(task);
147 ioc = task->io_context; 161 ioc = task->io_context;
148 task->io_context = NULL; 162 task->io_context = NULL;
149 task_unlock(task); 163 task_unlock(task);
150 164
151 atomic_dec(&ioc->nr_tasks); 165 if (!atomic_dec_and_test(&ioc->nr_tasks)) {
166 put_io_context(ioc);
167 return;
168 }
169
170 /*
171 * Need ioc lock to walk icq_list and q lock to exit icq. Perform
172 * reverse double locking. Read comment in ioc_release_fn() for
173 * explanation on the nested locking annotation.
174 */
175retry:
176 spin_lock_irqsave_nested(&ioc->lock, flags, 1);
177 hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) {
178 if (icq->flags & ICQ_EXITED)
179 continue;
180 if (spin_trylock(icq->q->queue_lock)) {
181 ioc_exit_icq(icq);
182 spin_unlock(icq->q->queue_lock);
183 } else {
184 spin_unlock_irqrestore(&ioc->lock, flags);
185 cpu_relax();
186 goto retry;
187 }
188 }
189 spin_unlock_irqrestore(&ioc->lock, flags);
190
152 put_io_context(ioc); 191 put_io_context(ioc);
153} 192}
154 193
@@ -168,7 +207,7 @@ void ioc_clear_queue(struct request_queue *q)
168 struct io_context *ioc = icq->ioc; 207 struct io_context *ioc = icq->ioc;
169 208
170 spin_lock(&ioc->lock); 209 spin_lock(&ioc->lock);
171 ioc_exit_icq(icq); 210 ioc_destroy_icq(icq);
172 spin_unlock(&ioc->lock); 211 spin_unlock(&ioc->lock);
173 } 212 }
174} 213}
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 17839c7b9614..1a3018063034 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -8,6 +8,7 @@
8enum { 8enum {
9 ICQ_IOPRIO_CHANGED = 1 << 0, 9 ICQ_IOPRIO_CHANGED = 1 << 0,
10 ICQ_CGROUP_CHANGED = 1 << 1, 10 ICQ_CGROUP_CHANGED = 1 << 1,
11 ICQ_EXITED = 1 << 2,
11 12
12 ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED, 13 ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED,
13}; 14};