diff options
Diffstat (limited to 'block/blk-ioc.c')
-rw-r--r-- | block/blk-ioc.c | 111 |
1 files changed, 25 insertions, 86 deletions
diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 27a06e00eaec..8b782a63c297 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c | |||
@@ -29,21 +29,6 @@ void get_io_context(struct io_context *ioc) | |||
29 | } | 29 | } |
30 | EXPORT_SYMBOL(get_io_context); | 30 | EXPORT_SYMBOL(get_io_context); |
31 | 31 | ||
32 | /* | ||
33 | * Releasing ioc may nest into another put_io_context() leading to nested | ||
34 | * fast path release. As the ioc's can't be the same, this is okay but | ||
35 | * makes lockdep whine. Keep track of nesting and use it as subclass. | ||
36 | */ | ||
37 | #ifdef CONFIG_LOCKDEP | ||
38 | #define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0) | ||
39 | #define ioc_release_depth_inc(q) (q)->ioc_release_depth++ | ||
40 | #define ioc_release_depth_dec(q) (q)->ioc_release_depth-- | ||
41 | #else | ||
42 | #define ioc_release_depth(q) 0 | ||
43 | #define ioc_release_depth_inc(q) do { } while (0) | ||
44 | #define ioc_release_depth_dec(q) do { } while (0) | ||
45 | #endif | ||
46 | |||
47 | static void icq_free_icq_rcu(struct rcu_head *head) | 32 | static void icq_free_icq_rcu(struct rcu_head *head) |
48 | { | 33 | { |
49 | struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); | 34 | struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); |
@@ -75,11 +60,8 @@ static void ioc_exit_icq(struct io_cq *icq) | |||
75 | if (rcu_dereference_raw(ioc->icq_hint) == icq) | 60 | if (rcu_dereference_raw(ioc->icq_hint) == icq) |
76 | rcu_assign_pointer(ioc->icq_hint, NULL); | 61 | rcu_assign_pointer(ioc->icq_hint, NULL); |
77 | 62 | ||
78 | if (et->ops.elevator_exit_icq_fn) { | 63 | if (et->ops.elevator_exit_icq_fn) |
79 | ioc_release_depth_inc(q); | ||
80 | et->ops.elevator_exit_icq_fn(icq); | 64 | et->ops.elevator_exit_icq_fn(icq); |
81 | ioc_release_depth_dec(q); | ||
82 | } | ||
83 | 65 | ||
84 | /* | 66 | /* |
85 | * @icq->q might have gone away by the time RCU callback runs | 67 | * @icq->q might have gone away by the time RCU callback runs |
@@ -98,8 +80,15 @@ static void ioc_release_fn(struct work_struct *work) | |||
98 | struct io_context *ioc = container_of(work, struct io_context, | 80 | struct io_context *ioc = container_of(work, struct io_context, |
99 | release_work); | 81 | release_work); |
100 | struct request_queue *last_q = NULL; | 82 | struct request_queue *last_q = NULL; |
83 | unsigned long flags; | ||
101 | 84 | ||
102 | spin_lock_irq(&ioc->lock); | 85 | /* |
86 | * Exiting icq may call into put_io_context() through elevator | ||
87 | * which will trigger lockdep warning. The ioc's are guaranteed to | ||
88 | * be different, use a different locking subclass here. Use | ||
89 | * irqsave variant as there's no spin_lock_irq_nested(). | ||
90 | */ | ||
91 | spin_lock_irqsave_nested(&ioc->lock, flags, 1); | ||
103 | 92 | ||
104 | while (!hlist_empty(&ioc->icq_list)) { | 93 | while (!hlist_empty(&ioc->icq_list)) { |
105 | struct io_cq *icq = hlist_entry(ioc->icq_list.first, | 94 | struct io_cq *icq = hlist_entry(ioc->icq_list.first, |
@@ -121,15 +110,15 @@ static void ioc_release_fn(struct work_struct *work) | |||
121 | */ | 110 | */ |
122 | if (last_q) { | 111 | if (last_q) { |
123 | spin_unlock(last_q->queue_lock); | 112 | spin_unlock(last_q->queue_lock); |
124 | spin_unlock_irq(&ioc->lock); | 113 | spin_unlock_irqrestore(&ioc->lock, flags); |
125 | blk_put_queue(last_q); | 114 | blk_put_queue(last_q); |
126 | } else { | 115 | } else { |
127 | spin_unlock_irq(&ioc->lock); | 116 | spin_unlock_irqrestore(&ioc->lock, flags); |
128 | } | 117 | } |
129 | 118 | ||
130 | last_q = this_q; | 119 | last_q = this_q; |
131 | spin_lock_irq(this_q->queue_lock); | 120 | spin_lock_irqsave(this_q->queue_lock, flags); |
132 | spin_lock(&ioc->lock); | 121 | spin_lock_nested(&ioc->lock, 1); |
133 | continue; | 122 | continue; |
134 | } | 123 | } |
135 | ioc_exit_icq(icq); | 124 | ioc_exit_icq(icq); |
@@ -137,10 +126,10 @@ static void ioc_release_fn(struct work_struct *work) | |||
137 | 126 | ||
138 | if (last_q) { | 127 | if (last_q) { |
139 | spin_unlock(last_q->queue_lock); | 128 | spin_unlock(last_q->queue_lock); |
140 | spin_unlock_irq(&ioc->lock); | 129 | spin_unlock_irqrestore(&ioc->lock, flags); |
141 | blk_put_queue(last_q); | 130 | blk_put_queue(last_q); |
142 | } else { | 131 | } else { |
143 | spin_unlock_irq(&ioc->lock); | 132 | spin_unlock_irqrestore(&ioc->lock, flags); |
144 | } | 133 | } |
145 | 134 | ||
146 | kmem_cache_free(iocontext_cachep, ioc); | 135 | kmem_cache_free(iocontext_cachep, ioc); |
@@ -149,79 +138,29 @@ static void ioc_release_fn(struct work_struct *work) | |||
149 | /** | 138 | /** |
150 | * put_io_context - put a reference of io_context | 139 | * put_io_context - put a reference of io_context |
151 | * @ioc: io_context to put | 140 | * @ioc: io_context to put |
152 | * @locked_q: request_queue the caller is holding queue_lock of (hint) | ||
153 | * | 141 | * |
154 | * Decrement reference count of @ioc and release it if the count reaches | 142 | * Decrement reference count of @ioc and release it if the count reaches |
155 | * zero. If the caller is holding queue_lock of a queue, it can indicate | 143 | * zero. |
156 | * that with @locked_q. This is an optimization hint and the caller is | ||
157 | * allowed to pass in %NULL even when it's holding a queue_lock. | ||
158 | */ | 144 | */ |
159 | void put_io_context(struct io_context *ioc, struct request_queue *locked_q) | 145 | void put_io_context(struct io_context *ioc) |
160 | { | 146 | { |
161 | struct request_queue *last_q = locked_q; | ||
162 | unsigned long flags; | 147 | unsigned long flags; |
163 | 148 | ||
164 | if (ioc == NULL) | 149 | if (ioc == NULL) |
165 | return; | 150 | return; |
166 | 151 | ||
167 | BUG_ON(atomic_long_read(&ioc->refcount) <= 0); | 152 | BUG_ON(atomic_long_read(&ioc->refcount) <= 0); |
168 | if (locked_q) | ||
169 | lockdep_assert_held(locked_q->queue_lock); | ||
170 | |||
171 | if (!atomic_long_dec_and_test(&ioc->refcount)) | ||
172 | return; | ||
173 | 153 | ||
174 | /* | 154 | /* |
175 | * Destroy @ioc. This is a bit messy because icq's are chained | 155 | * Releasing ioc requires reverse order double locking and we may |
176 | * from both ioc and queue, and ioc->lock nests inside queue_lock. | 156 | * already be holding a queue_lock. Do it asynchronously from wq. |
177 | * The inner ioc->lock should be held to walk our icq_list and then | ||
178 | * for each icq the outer matching queue_lock should be grabbed. | ||
179 | * ie. We need to do reverse-order double lock dancing. | ||
180 | * | ||
181 | * Another twist is that we are often called with one of the | ||
182 | * matching queue_locks held as indicated by @locked_q, which | ||
183 | * prevents performing double-lock dance for other queues. | ||
184 | * | ||
185 | * So, we do it in two stages. The fast path uses the queue_lock | ||
186 | * the caller is holding and, if other queues need to be accessed, | ||
187 | * uses trylock to avoid introducing locking dependency. This can | ||
188 | * handle most cases, especially if @ioc was performing IO on only | ||
189 | * single device. | ||
190 | * | ||
191 | * If trylock doesn't cut it, we defer to @ioc->release_work which | ||
192 | * can do all the double-locking dancing. | ||
193 | */ | 157 | */ |
194 | spin_lock_irqsave_nested(&ioc->lock, flags, | 158 | if (atomic_long_dec_and_test(&ioc->refcount)) { |
195 | ioc_release_depth(locked_q)); | 159 | spin_lock_irqsave(&ioc->lock, flags); |
196 | 160 | if (!hlist_empty(&ioc->icq_list)) | |
197 | while (!hlist_empty(&ioc->icq_list)) { | 161 | schedule_work(&ioc->release_work); |
198 | struct io_cq *icq = hlist_entry(ioc->icq_list.first, | 162 | spin_unlock_irqrestore(&ioc->lock, flags); |
199 | struct io_cq, ioc_node); | ||
200 | struct request_queue *this_q = icq->q; | ||
201 | |||
202 | if (this_q != last_q) { | ||
203 | if (last_q && last_q != locked_q) | ||
204 | spin_unlock(last_q->queue_lock); | ||
205 | last_q = NULL; | ||
206 | |||
207 | if (!spin_trylock(this_q->queue_lock)) | ||
208 | break; | ||
209 | last_q = this_q; | ||
210 | continue; | ||
211 | } | ||
212 | ioc_exit_icq(icq); | ||
213 | } | 163 | } |
214 | |||
215 | if (last_q && last_q != locked_q) | ||
216 | spin_unlock(last_q->queue_lock); | ||
217 | |||
218 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
219 | |||
220 | /* if no icq is left, we're done; otherwise, kick release_work */ | ||
221 | if (hlist_empty(&ioc->icq_list)) | ||
222 | kmem_cache_free(iocontext_cachep, ioc); | ||
223 | else | ||
224 | schedule_work(&ioc->release_work); | ||
225 | } | 164 | } |
226 | EXPORT_SYMBOL(put_io_context); | 165 | EXPORT_SYMBOL(put_io_context); |
227 | 166 | ||
@@ -236,7 +175,7 @@ void exit_io_context(struct task_struct *task) | |||
236 | task_unlock(task); | 175 | task_unlock(task); |
237 | 176 | ||
238 | atomic_dec(&ioc->nr_tasks); | 177 | atomic_dec(&ioc->nr_tasks); |
239 | put_io_context(ioc, NULL); | 178 | put_io_context(ioc); |
240 | } | 179 | } |
241 | 180 | ||
242 | /** | 181 | /** |