diff options
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r-- | block/cfq-iosched.c | 2428 |
1 files changed, 2428 insertions, 0 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c new file mode 100644 index 000000000000..ecacca9c877e --- /dev/null +++ b/block/cfq-iosched.c | |||
@@ -0,0 +1,2428 @@ | |||
1 | /* | ||
2 | * linux/drivers/block/cfq-iosched.c | ||
3 | * | ||
4 | * CFQ, or complete fairness queueing, disk scheduler. | ||
5 | * | ||
6 | * Based on ideas from a previously unfinished io | ||
7 | * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. | ||
8 | * | ||
9 | * Copyright (C) 2003 Jens Axboe <axboe@suse.de> | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/fs.h> | ||
13 | #include <linux/blkdev.h> | ||
14 | #include <linux/elevator.h> | ||
15 | #include <linux/bio.h> | ||
16 | #include <linux/config.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/compiler.h> | ||
21 | #include <linux/hash.h> | ||
22 | #include <linux/rbtree.h> | ||
23 | #include <linux/mempool.h> | ||
24 | #include <linux/ioprio.h> | ||
25 | #include <linux/writeback.h> | ||
26 | |||
27 | /* | ||
28 | * tunables | ||
29 | */ | ||
30 | static int cfq_quantum = 4; /* max queue in one round of service */ | ||
31 | static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ | ||
32 | static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; | ||
33 | static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ | ||
34 | static int cfq_back_penalty = 2; /* penalty of a backwards seek */ | ||
35 | |||
36 | static int cfq_slice_sync = HZ / 10; | ||
37 | static int cfq_slice_async = HZ / 25; | ||
38 | static int cfq_slice_async_rq = 2; | ||
39 | static int cfq_slice_idle = HZ / 100; | ||
40 | |||
41 | #define CFQ_IDLE_GRACE (HZ / 10) | ||
42 | #define CFQ_SLICE_SCALE (5) | ||
43 | |||
44 | #define CFQ_KEY_ASYNC (0) | ||
45 | #define CFQ_KEY_ANY (0xffff) | ||
46 | |||
47 | /* | ||
48 | * disable queueing at the driver/hardware level | ||
49 | */ | ||
50 | static int cfq_max_depth = 2; | ||
51 | |||
52 | /* | ||
53 | * for the hash of cfqq inside the cfqd | ||
54 | */ | ||
55 | #define CFQ_QHASH_SHIFT 6 | ||
56 | #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) | ||
57 | #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) | ||
58 | |||
59 | /* | ||
60 | * for the hash of crq inside the cfqq | ||
61 | */ | ||
62 | #define CFQ_MHASH_SHIFT 6 | ||
63 | #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) | ||
64 | #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) | ||
65 | #define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) | ||
66 | #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) | ||
67 | #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) | ||
68 | |||
69 | #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) | ||
70 | #define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) | ||
71 | |||
72 | #define RQ_DATA(rq) (rq)->elevator_private | ||
73 | |||
74 | /* | ||
75 | * rb-tree defines | ||
76 | */ | ||
77 | #define RB_NONE (2) | ||
78 | #define RB_EMPTY(node) ((node)->rb_node == NULL) | ||
79 | #define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE | ||
80 | #define RB_CLEAR(node) do { \ | ||
81 | (node)->rb_parent = NULL; \ | ||
82 | RB_CLEAR_COLOR((node)); \ | ||
83 | (node)->rb_right = NULL; \ | ||
84 | (node)->rb_left = NULL; \ | ||
85 | } while (0) | ||
86 | #define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) | ||
87 | #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) | ||
88 | #define rq_rb_key(rq) (rq)->sector | ||
89 | |||
90 | static kmem_cache_t *crq_pool; | ||
91 | static kmem_cache_t *cfq_pool; | ||
92 | static kmem_cache_t *cfq_ioc_pool; | ||
93 | |||
94 | #define CFQ_PRIO_LISTS IOPRIO_BE_NR | ||
95 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) | ||
96 | #define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) | ||
97 | #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) | ||
98 | |||
99 | #define ASYNC (0) | ||
100 | #define SYNC (1) | ||
101 | |||
102 | #define cfq_cfqq_dispatched(cfqq) \ | ||
103 | ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC]) | ||
104 | |||
105 | #define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) | ||
106 | |||
107 | #define cfq_cfqq_sync(cfqq) \ | ||
108 | (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) | ||
109 | |||
110 | /* | ||
111 | * Per block device queue structure | ||
112 | */ | ||
113 | struct cfq_data { | ||
114 | atomic_t ref; | ||
115 | request_queue_t *queue; | ||
116 | |||
117 | /* | ||
118 | * rr list of queues with requests and the count of them | ||
119 | */ | ||
120 | struct list_head rr_list[CFQ_PRIO_LISTS]; | ||
121 | struct list_head busy_rr; | ||
122 | struct list_head cur_rr; | ||
123 | struct list_head idle_rr; | ||
124 | unsigned int busy_queues; | ||
125 | |||
126 | /* | ||
127 | * non-ordered list of empty cfqq's | ||
128 | */ | ||
129 | struct list_head empty_list; | ||
130 | |||
131 | /* | ||
132 | * cfqq lookup hash | ||
133 | */ | ||
134 | struct hlist_head *cfq_hash; | ||
135 | |||
136 | /* | ||
137 | * global crq hash for all queues | ||
138 | */ | ||
139 | struct hlist_head *crq_hash; | ||
140 | |||
141 | unsigned int max_queued; | ||
142 | |||
143 | mempool_t *crq_pool; | ||
144 | |||
145 | int rq_in_driver; | ||
146 | |||
147 | /* | ||
148 | * schedule slice state info | ||
149 | */ | ||
150 | /* | ||
151 | * idle window management | ||
152 | */ | ||
153 | struct timer_list idle_slice_timer; | ||
154 | struct work_struct unplug_work; | ||
155 | |||
156 | struct cfq_queue *active_queue; | ||
157 | struct cfq_io_context *active_cic; | ||
158 | int cur_prio, cur_end_prio; | ||
159 | unsigned int dispatch_slice; | ||
160 | |||
161 | struct timer_list idle_class_timer; | ||
162 | |||
163 | sector_t last_sector; | ||
164 | unsigned long last_end_request; | ||
165 | |||
166 | unsigned int rq_starved; | ||
167 | |||
168 | /* | ||
169 | * tunables, see top of file | ||
170 | */ | ||
171 | unsigned int cfq_quantum; | ||
172 | unsigned int cfq_queued; | ||
173 | unsigned int cfq_fifo_expire[2]; | ||
174 | unsigned int cfq_back_penalty; | ||
175 | unsigned int cfq_back_max; | ||
176 | unsigned int cfq_slice[2]; | ||
177 | unsigned int cfq_slice_async_rq; | ||
178 | unsigned int cfq_slice_idle; | ||
179 | unsigned int cfq_max_depth; | ||
180 | }; | ||
181 | |||
182 | /* | ||
183 | * Per process-grouping structure | ||
184 | */ | ||
185 | struct cfq_queue { | ||
186 | /* reference count */ | ||
187 | atomic_t ref; | ||
188 | /* parent cfq_data */ | ||
189 | struct cfq_data *cfqd; | ||
190 | /* cfqq lookup hash */ | ||
191 | struct hlist_node cfq_hash; | ||
192 | /* hash key */ | ||
193 | unsigned int key; | ||
194 | /* on either rr or empty list of cfqd */ | ||
195 | struct list_head cfq_list; | ||
196 | /* sorted list of pending requests */ | ||
197 | struct rb_root sort_list; | ||
198 | /* if fifo isn't expired, next request to serve */ | ||
199 | struct cfq_rq *next_crq; | ||
200 | /* requests queued in sort_list */ | ||
201 | int queued[2]; | ||
202 | /* currently allocated requests */ | ||
203 | int allocated[2]; | ||
204 | /* fifo list of requests in sort_list */ | ||
205 | struct list_head fifo; | ||
206 | |||
207 | unsigned long slice_start; | ||
208 | unsigned long slice_end; | ||
209 | unsigned long slice_left; | ||
210 | unsigned long service_last; | ||
211 | |||
212 | /* number of requests that are on the dispatch list */ | ||
213 | int on_dispatch[2]; | ||
214 | |||
215 | /* io prio of this group */ | ||
216 | unsigned short ioprio, org_ioprio; | ||
217 | unsigned short ioprio_class, org_ioprio_class; | ||
218 | |||
219 | /* various state flags, see below */ | ||
220 | unsigned int flags; | ||
221 | }; | ||
222 | |||
223 | struct cfq_rq { | ||
224 | struct rb_node rb_node; | ||
225 | sector_t rb_key; | ||
226 | struct request *request; | ||
227 | struct hlist_node hash; | ||
228 | |||
229 | struct cfq_queue *cfq_queue; | ||
230 | struct cfq_io_context *io_context; | ||
231 | |||
232 | unsigned int crq_flags; | ||
233 | }; | ||
234 | |||
235 | enum cfqq_state_flags { | ||
236 | CFQ_CFQQ_FLAG_on_rr = 0, | ||
237 | CFQ_CFQQ_FLAG_wait_request, | ||
238 | CFQ_CFQQ_FLAG_must_alloc, | ||
239 | CFQ_CFQQ_FLAG_must_alloc_slice, | ||
240 | CFQ_CFQQ_FLAG_must_dispatch, | ||
241 | CFQ_CFQQ_FLAG_fifo_expire, | ||
242 | CFQ_CFQQ_FLAG_idle_window, | ||
243 | CFQ_CFQQ_FLAG_prio_changed, | ||
244 | CFQ_CFQQ_FLAG_expired, | ||
245 | }; | ||
246 | |||
247 | #define CFQ_CFQQ_FNS(name) \ | ||
248 | static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ | ||
249 | { \ | ||
250 | cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ | ||
251 | } \ | ||
252 | static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ | ||
253 | { \ | ||
254 | cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ | ||
255 | } \ | ||
256 | static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ | ||
257 | { \ | ||
258 | return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ | ||
259 | } | ||
260 | |||
261 | CFQ_CFQQ_FNS(on_rr); | ||
262 | CFQ_CFQQ_FNS(wait_request); | ||
263 | CFQ_CFQQ_FNS(must_alloc); | ||
264 | CFQ_CFQQ_FNS(must_alloc_slice); | ||
265 | CFQ_CFQQ_FNS(must_dispatch); | ||
266 | CFQ_CFQQ_FNS(fifo_expire); | ||
267 | CFQ_CFQQ_FNS(idle_window); | ||
268 | CFQ_CFQQ_FNS(prio_changed); | ||
269 | CFQ_CFQQ_FNS(expired); | ||
270 | #undef CFQ_CFQQ_FNS | ||
271 | |||
272 | enum cfq_rq_state_flags { | ||
273 | CFQ_CRQ_FLAG_is_sync = 0, | ||
274 | }; | ||
275 | |||
276 | #define CFQ_CRQ_FNS(name) \ | ||
277 | static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \ | ||
278 | { \ | ||
279 | crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \ | ||
280 | } \ | ||
281 | static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \ | ||
282 | { \ | ||
283 | crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \ | ||
284 | } \ | ||
285 | static inline int cfq_crq_##name(const struct cfq_rq *crq) \ | ||
286 | { \ | ||
287 | return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ | ||
288 | } | ||
289 | |||
290 | CFQ_CRQ_FNS(is_sync); | ||
291 | #undef CFQ_CRQ_FNS | ||
292 | |||
293 | static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); | ||
294 | static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); | ||
295 | static void cfq_put_cfqd(struct cfq_data *cfqd); | ||
296 | |||
297 | #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) | ||
298 | |||
299 | /* | ||
300 | * lots of deadline iosched dupes, can be abstracted later... | ||
301 | */ | ||
302 | static inline void cfq_del_crq_hash(struct cfq_rq *crq) | ||
303 | { | ||
304 | hlist_del_init(&crq->hash); | ||
305 | } | ||
306 | |||
307 | static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) | ||
308 | { | ||
309 | const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); | ||
310 | |||
311 | hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); | ||
312 | } | ||
313 | |||
314 | static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) | ||
315 | { | ||
316 | struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; | ||
317 | struct hlist_node *entry, *next; | ||
318 | |||
319 | hlist_for_each_safe(entry, next, hash_list) { | ||
320 | struct cfq_rq *crq = list_entry_hash(entry); | ||
321 | struct request *__rq = crq->request; | ||
322 | |||
323 | if (!rq_mergeable(__rq)) { | ||
324 | cfq_del_crq_hash(crq); | ||
325 | continue; | ||
326 | } | ||
327 | |||
328 | if (rq_hash_key(__rq) == offset) | ||
329 | return __rq; | ||
330 | } | ||
331 | |||
332 | return NULL; | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * scheduler run of queue, if there are requests pending and no one in the | ||
337 | * driver that will restart queueing | ||
338 | */ | ||
339 | static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | ||
340 | { | ||
341 | if (!cfqd->rq_in_driver && cfqd->busy_queues) | ||
342 | kblockd_schedule_work(&cfqd->unplug_work); | ||
343 | } | ||
344 | |||
345 | static int cfq_queue_empty(request_queue_t *q) | ||
346 | { | ||
347 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
348 | |||
349 | return !cfqd->busy_queues; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * Lifted from AS - choose which of crq1 and crq2 that is best served now. | ||
354 | * We choose the request that is closest to the head right now. Distance | ||
355 | * behind the head are penalized and only allowed to a certain extent. | ||
356 | */ | ||
357 | static struct cfq_rq * | ||
358 | cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) | ||
359 | { | ||
360 | sector_t last, s1, s2, d1 = 0, d2 = 0; | ||
361 | int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */ | ||
362 | unsigned long back_max; | ||
363 | |||
364 | if (crq1 == NULL || crq1 == crq2) | ||
365 | return crq2; | ||
366 | if (crq2 == NULL) | ||
367 | return crq1; | ||
368 | |||
369 | if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) | ||
370 | return crq1; | ||
371 | else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) | ||
372 | return crq2; | ||
373 | |||
374 | s1 = crq1->request->sector; | ||
375 | s2 = crq2->request->sector; | ||
376 | |||
377 | last = cfqd->last_sector; | ||
378 | |||
379 | /* | ||
380 | * by definition, 1KiB is 2 sectors | ||
381 | */ | ||
382 | back_max = cfqd->cfq_back_max * 2; | ||
383 | |||
384 | /* | ||
385 | * Strict one way elevator _except_ in the case where we allow | ||
386 | * short backward seeks which are biased as twice the cost of a | ||
387 | * similar forward seek. | ||
388 | */ | ||
389 | if (s1 >= last) | ||
390 | d1 = s1 - last; | ||
391 | else if (s1 + back_max >= last) | ||
392 | d1 = (last - s1) * cfqd->cfq_back_penalty; | ||
393 | else | ||
394 | r1_wrap = 1; | ||
395 | |||
396 | if (s2 >= last) | ||
397 | d2 = s2 - last; | ||
398 | else if (s2 + back_max >= last) | ||
399 | d2 = (last - s2) * cfqd->cfq_back_penalty; | ||
400 | else | ||
401 | r2_wrap = 1; | ||
402 | |||
403 | /* Found required data */ | ||
404 | if (!r1_wrap && r2_wrap) | ||
405 | return crq1; | ||
406 | else if (!r2_wrap && r1_wrap) | ||
407 | return crq2; | ||
408 | else if (r1_wrap && r2_wrap) { | ||
409 | /* both behind the head */ | ||
410 | if (s1 <= s2) | ||
411 | return crq1; | ||
412 | else | ||
413 | return crq2; | ||
414 | } | ||
415 | |||
416 | /* Both requests in front of the head */ | ||
417 | if (d1 < d2) | ||
418 | return crq1; | ||
419 | else if (d2 < d1) | ||
420 | return crq2; | ||
421 | else { | ||
422 | if (s1 >= s2) | ||
423 | return crq1; | ||
424 | else | ||
425 | return crq2; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * would be nice to take fifo expire time into account as well | ||
431 | */ | ||
432 | static struct cfq_rq * | ||
433 | cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | ||
434 | struct cfq_rq *last) | ||
435 | { | ||
436 | struct cfq_rq *crq_next = NULL, *crq_prev = NULL; | ||
437 | struct rb_node *rbnext, *rbprev; | ||
438 | |||
439 | if (!(rbnext = rb_next(&last->rb_node))) { | ||
440 | rbnext = rb_first(&cfqq->sort_list); | ||
441 | if (rbnext == &last->rb_node) | ||
442 | rbnext = NULL; | ||
443 | } | ||
444 | |||
445 | rbprev = rb_prev(&last->rb_node); | ||
446 | |||
447 | if (rbprev) | ||
448 | crq_prev = rb_entry_crq(rbprev); | ||
449 | if (rbnext) | ||
450 | crq_next = rb_entry_crq(rbnext); | ||
451 | |||
452 | return cfq_choose_req(cfqd, crq_next, crq_prev); | ||
453 | } | ||
454 | |||
455 | static void cfq_update_next_crq(struct cfq_rq *crq) | ||
456 | { | ||
457 | struct cfq_queue *cfqq = crq->cfq_queue; | ||
458 | |||
459 | if (cfqq->next_crq == crq) | ||
460 | cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); | ||
461 | } | ||
462 | |||
463 | static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) | ||
464 | { | ||
465 | struct cfq_data *cfqd = cfqq->cfqd; | ||
466 | struct list_head *list, *entry; | ||
467 | |||
468 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); | ||
469 | |||
470 | list_del(&cfqq->cfq_list); | ||
471 | |||
472 | if (cfq_class_rt(cfqq)) | ||
473 | list = &cfqd->cur_rr; | ||
474 | else if (cfq_class_idle(cfqq)) | ||
475 | list = &cfqd->idle_rr; | ||
476 | else { | ||
477 | /* | ||
478 | * if cfqq has requests in flight, don't allow it to be | ||
479 | * found in cfq_set_active_queue before it has finished them. | ||
480 | * this is done to increase fairness between a process that | ||
481 | * has lots of io pending vs one that only generates one | ||
482 | * sporadically or synchronously | ||
483 | */ | ||
484 | if (cfq_cfqq_dispatched(cfqq)) | ||
485 | list = &cfqd->busy_rr; | ||
486 | else | ||
487 | list = &cfqd->rr_list[cfqq->ioprio]; | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * if queue was preempted, just add to front to be fair. busy_rr | ||
492 | * isn't sorted. | ||
493 | */ | ||
494 | if (preempted || list == &cfqd->busy_rr) { | ||
495 | list_add(&cfqq->cfq_list, list); | ||
496 | return; | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * sort by when queue was last serviced | ||
501 | */ | ||
502 | entry = list; | ||
503 | while ((entry = entry->prev) != list) { | ||
504 | struct cfq_queue *__cfqq = list_entry_cfqq(entry); | ||
505 | |||
506 | if (!__cfqq->service_last) | ||
507 | break; | ||
508 | if (time_before(__cfqq->service_last, cfqq->service_last)) | ||
509 | break; | ||
510 | } | ||
511 | |||
512 | list_add(&cfqq->cfq_list, entry); | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * add to busy list of queues for service, trying to be fair in ordering | ||
517 | * the pending list according to last request service | ||
518 | */ | ||
519 | static inline void | ||
520 | cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
521 | { | ||
522 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | ||
523 | cfq_mark_cfqq_on_rr(cfqq); | ||
524 | cfqd->busy_queues++; | ||
525 | |||
526 | cfq_resort_rr_list(cfqq, 0); | ||
527 | } | ||
528 | |||
529 | static inline void | ||
530 | cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
531 | { | ||
532 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); | ||
533 | cfq_clear_cfqq_on_rr(cfqq); | ||
534 | list_move(&cfqq->cfq_list, &cfqd->empty_list); | ||
535 | |||
536 | BUG_ON(!cfqd->busy_queues); | ||
537 | cfqd->busy_queues--; | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * rb tree support functions | ||
542 | */ | ||
543 | static inline void cfq_del_crq_rb(struct cfq_rq *crq) | ||
544 | { | ||
545 | struct cfq_queue *cfqq = crq->cfq_queue; | ||
546 | struct cfq_data *cfqd = cfqq->cfqd; | ||
547 | const int sync = cfq_crq_is_sync(crq); | ||
548 | |||
549 | BUG_ON(!cfqq->queued[sync]); | ||
550 | cfqq->queued[sync]--; | ||
551 | |||
552 | cfq_update_next_crq(crq); | ||
553 | |||
554 | rb_erase(&crq->rb_node, &cfqq->sort_list); | ||
555 | RB_CLEAR_COLOR(&crq->rb_node); | ||
556 | |||
557 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list)) | ||
558 | cfq_del_cfqq_rr(cfqd, cfqq); | ||
559 | } | ||
560 | |||
561 | static struct cfq_rq * | ||
562 | __cfq_add_crq_rb(struct cfq_rq *crq) | ||
563 | { | ||
564 | struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; | ||
565 | struct rb_node *parent = NULL; | ||
566 | struct cfq_rq *__crq; | ||
567 | |||
568 | while (*p) { | ||
569 | parent = *p; | ||
570 | __crq = rb_entry_crq(parent); | ||
571 | |||
572 | if (crq->rb_key < __crq->rb_key) | ||
573 | p = &(*p)->rb_left; | ||
574 | else if (crq->rb_key > __crq->rb_key) | ||
575 | p = &(*p)->rb_right; | ||
576 | else | ||
577 | return __crq; | ||
578 | } | ||
579 | |||
580 | rb_link_node(&crq->rb_node, parent, p); | ||
581 | return NULL; | ||
582 | } | ||
583 | |||
584 | static void cfq_add_crq_rb(struct cfq_rq *crq) | ||
585 | { | ||
586 | struct cfq_queue *cfqq = crq->cfq_queue; | ||
587 | struct cfq_data *cfqd = cfqq->cfqd; | ||
588 | struct request *rq = crq->request; | ||
589 | struct cfq_rq *__alias; | ||
590 | |||
591 | crq->rb_key = rq_rb_key(rq); | ||
592 | cfqq->queued[cfq_crq_is_sync(crq)]++; | ||
593 | |||
594 | /* | ||
595 | * looks a little odd, but the first insert might return an alias. | ||
596 | * if that happens, put the alias on the dispatch list | ||
597 | */ | ||
598 | while ((__alias = __cfq_add_crq_rb(crq)) != NULL) | ||
599 | cfq_dispatch_insert(cfqd->queue, __alias); | ||
600 | |||
601 | rb_insert_color(&crq->rb_node, &cfqq->sort_list); | ||
602 | |||
603 | if (!cfq_cfqq_on_rr(cfqq)) | ||
604 | cfq_add_cfqq_rr(cfqd, cfqq); | ||
605 | |||
606 | /* | ||
607 | * check if this request is a better next-serve candidate | ||
608 | */ | ||
609 | cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); | ||
610 | } | ||
611 | |||
612 | static inline void | ||
613 | cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) | ||
614 | { | ||
615 | rb_erase(&crq->rb_node, &cfqq->sort_list); | ||
616 | cfqq->queued[cfq_crq_is_sync(crq)]--; | ||
617 | |||
618 | cfq_add_crq_rb(crq); | ||
619 | } | ||
620 | |||
621 | static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) | ||
622 | |||
623 | { | ||
624 | struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY); | ||
625 | struct rb_node *n; | ||
626 | |||
627 | if (!cfqq) | ||
628 | goto out; | ||
629 | |||
630 | n = cfqq->sort_list.rb_node; | ||
631 | while (n) { | ||
632 | struct cfq_rq *crq = rb_entry_crq(n); | ||
633 | |||
634 | if (sector < crq->rb_key) | ||
635 | n = n->rb_left; | ||
636 | else if (sector > crq->rb_key) | ||
637 | n = n->rb_right; | ||
638 | else | ||
639 | return crq->request; | ||
640 | } | ||
641 | |||
642 | out: | ||
643 | return NULL; | ||
644 | } | ||
645 | |||
646 | static void cfq_activate_request(request_queue_t *q, struct request *rq) | ||
647 | { | ||
648 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
649 | |||
650 | cfqd->rq_in_driver++; | ||
651 | } | ||
652 | |||
653 | static void cfq_deactivate_request(request_queue_t *q, struct request *rq) | ||
654 | { | ||
655 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
656 | |||
657 | WARN_ON(!cfqd->rq_in_driver); | ||
658 | cfqd->rq_in_driver--; | ||
659 | } | ||
660 | |||
661 | static void cfq_remove_request(struct request *rq) | ||
662 | { | ||
663 | struct cfq_rq *crq = RQ_DATA(rq); | ||
664 | |||
665 | list_del_init(&rq->queuelist); | ||
666 | cfq_del_crq_rb(crq); | ||
667 | cfq_del_crq_hash(crq); | ||
668 | } | ||
669 | |||
670 | static int | ||
671 | cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) | ||
672 | { | ||
673 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
674 | struct request *__rq; | ||
675 | int ret; | ||
676 | |||
677 | __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); | ||
678 | if (__rq && elv_rq_merge_ok(__rq, bio)) { | ||
679 | ret = ELEVATOR_BACK_MERGE; | ||
680 | goto out; | ||
681 | } | ||
682 | |||
683 | __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); | ||
684 | if (__rq && elv_rq_merge_ok(__rq, bio)) { | ||
685 | ret = ELEVATOR_FRONT_MERGE; | ||
686 | goto out; | ||
687 | } | ||
688 | |||
689 | return ELEVATOR_NO_MERGE; | ||
690 | out: | ||
691 | *req = __rq; | ||
692 | return ret; | ||
693 | } | ||
694 | |||
695 | static void cfq_merged_request(request_queue_t *q, struct request *req) | ||
696 | { | ||
697 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
698 | struct cfq_rq *crq = RQ_DATA(req); | ||
699 | |||
700 | cfq_del_crq_hash(crq); | ||
701 | cfq_add_crq_hash(cfqd, crq); | ||
702 | |||
703 | if (rq_rb_key(req) != crq->rb_key) { | ||
704 | struct cfq_queue *cfqq = crq->cfq_queue; | ||
705 | |||
706 | cfq_update_next_crq(crq); | ||
707 | cfq_reposition_crq_rb(cfqq, crq); | ||
708 | } | ||
709 | } | ||
710 | |||
711 | static void | ||
712 | cfq_merged_requests(request_queue_t *q, struct request *rq, | ||
713 | struct request *next) | ||
714 | { | ||
715 | cfq_merged_request(q, rq); | ||
716 | |||
717 | /* | ||
718 | * reposition in fifo if next is older than rq | ||
719 | */ | ||
720 | if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && | ||
721 | time_before(next->start_time, rq->start_time)) | ||
722 | list_move(&rq->queuelist, &next->queuelist); | ||
723 | |||
724 | cfq_remove_request(next); | ||
725 | } | ||
726 | |||
727 | static inline void | ||
728 | __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
729 | { | ||
730 | if (cfqq) { | ||
731 | /* | ||
732 | * stop potential idle class queues waiting service | ||
733 | */ | ||
734 | del_timer(&cfqd->idle_class_timer); | ||
735 | |||
736 | cfqq->slice_start = jiffies; | ||
737 | cfqq->slice_end = 0; | ||
738 | cfqq->slice_left = 0; | ||
739 | cfq_clear_cfqq_must_alloc_slice(cfqq); | ||
740 | cfq_clear_cfqq_fifo_expire(cfqq); | ||
741 | cfq_clear_cfqq_expired(cfqq); | ||
742 | } | ||
743 | |||
744 | cfqd->active_queue = cfqq; | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * 0 | ||
749 | * 0,1 | ||
750 | * 0,1,2 | ||
751 | * 0,1,2,3 | ||
752 | * 0,1,2,3,4 | ||
753 | * 0,1,2,3,4,5 | ||
754 | * 0,1,2,3,4,5,6 | ||
755 | * 0,1,2,3,4,5,6,7 | ||
756 | */ | ||
757 | static int cfq_get_next_prio_level(struct cfq_data *cfqd) | ||
758 | { | ||
759 | int prio, wrap; | ||
760 | |||
761 | prio = -1; | ||
762 | wrap = 0; | ||
763 | do { | ||
764 | int p; | ||
765 | |||
766 | for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { | ||
767 | if (!list_empty(&cfqd->rr_list[p])) { | ||
768 | prio = p; | ||
769 | break; | ||
770 | } | ||
771 | } | ||
772 | |||
773 | if (prio != -1) | ||
774 | break; | ||
775 | cfqd->cur_prio = 0; | ||
776 | if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { | ||
777 | cfqd->cur_end_prio = 0; | ||
778 | if (wrap) | ||
779 | break; | ||
780 | wrap = 1; | ||
781 | } | ||
782 | } while (1); | ||
783 | |||
784 | if (unlikely(prio == -1)) | ||
785 | return -1; | ||
786 | |||
787 | BUG_ON(prio >= CFQ_PRIO_LISTS); | ||
788 | |||
789 | list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); | ||
790 | |||
791 | cfqd->cur_prio = prio + 1; | ||
792 | if (cfqd->cur_prio > cfqd->cur_end_prio) { | ||
793 | cfqd->cur_end_prio = cfqd->cur_prio; | ||
794 | cfqd->cur_prio = 0; | ||
795 | } | ||
796 | if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { | ||
797 | cfqd->cur_prio = 0; | ||
798 | cfqd->cur_end_prio = 0; | ||
799 | } | ||
800 | |||
801 | return prio; | ||
802 | } | ||
803 | |||
804 | static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) | ||
805 | { | ||
806 | struct cfq_queue *cfqq; | ||
807 | |||
808 | /* | ||
809 | * if current queue is expired but not done with its requests yet, | ||
810 | * wait for that to happen | ||
811 | */ | ||
812 | if ((cfqq = cfqd->active_queue) != NULL) { | ||
813 | if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq)) | ||
814 | return NULL; | ||
815 | } | ||
816 | |||
817 | /* | ||
818 | * if current list is non-empty, grab first entry. if it is empty, | ||
819 | * get next prio level and grab first entry then if any are spliced | ||
820 | */ | ||
821 | if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) | ||
822 | cfqq = list_entry_cfqq(cfqd->cur_rr.next); | ||
823 | |||
824 | /* | ||
825 | * if we have idle queues and no rt or be queues had pending | ||
826 | * requests, either allow immediate service if the grace period | ||
827 | * has passed or arm the idle grace timer | ||
828 | */ | ||
829 | if (!cfqq && !list_empty(&cfqd->idle_rr)) { | ||
830 | unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; | ||
831 | |||
832 | if (time_after_eq(jiffies, end)) | ||
833 | cfqq = list_entry_cfqq(cfqd->idle_rr.next); | ||
834 | else | ||
835 | mod_timer(&cfqd->idle_class_timer, end); | ||
836 | } | ||
837 | |||
838 | __cfq_set_active_queue(cfqd, cfqq); | ||
839 | return cfqq; | ||
840 | } | ||
841 | |||
842 | /* | ||
843 | * current cfqq expired its slice (or was too idle), select new one | ||
844 | */ | ||
845 | static void | ||
846 | __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | ||
847 | int preempted) | ||
848 | { | ||
849 | unsigned long now = jiffies; | ||
850 | |||
851 | if (cfq_cfqq_wait_request(cfqq)) | ||
852 | del_timer(&cfqd->idle_slice_timer); | ||
853 | |||
854 | if (!preempted && !cfq_cfqq_dispatched(cfqq)) | ||
855 | cfqq->service_last = now; | ||
856 | |||
857 | cfq_clear_cfqq_must_dispatch(cfqq); | ||
858 | cfq_clear_cfqq_wait_request(cfqq); | ||
859 | |||
860 | /* | ||
861 | * store what was left of this slice, if the queue idled out | ||
862 | * or was preempted | ||
863 | */ | ||
864 | if (time_after(now, cfqq->slice_end)) | ||
865 | cfqq->slice_left = now - cfqq->slice_end; | ||
866 | else | ||
867 | cfqq->slice_left = 0; | ||
868 | |||
869 | if (cfq_cfqq_on_rr(cfqq)) | ||
870 | cfq_resort_rr_list(cfqq, preempted); | ||
871 | |||
872 | if (cfqq == cfqd->active_queue) | ||
873 | cfqd->active_queue = NULL; | ||
874 | |||
875 | if (cfqd->active_cic) { | ||
876 | put_io_context(cfqd->active_cic->ioc); | ||
877 | cfqd->active_cic = NULL; | ||
878 | } | ||
879 | |||
880 | cfqd->dispatch_slice = 0; | ||
881 | } | ||
882 | |||
883 | static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) | ||
884 | { | ||
885 | struct cfq_queue *cfqq = cfqd->active_queue; | ||
886 | |||
887 | if (cfqq) { | ||
888 | /* | ||
889 | * use deferred expiry, if there are requests in progress as | ||
890 | * not to disturb the slice of the next queue | ||
891 | */ | ||
892 | if (cfq_cfqq_dispatched(cfqq)) | ||
893 | cfq_mark_cfqq_expired(cfqq); | ||
894 | else | ||
895 | __cfq_slice_expired(cfqd, cfqq, preempted); | ||
896 | } | ||
897 | } | ||
898 | |||
899 | static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
900 | |||
901 | { | ||
902 | WARN_ON(!RB_EMPTY(&cfqq->sort_list)); | ||
903 | WARN_ON(cfqq != cfqd->active_queue); | ||
904 | |||
905 | /* | ||
906 | * idle is disabled, either manually or by past process history | ||
907 | */ | ||
908 | if (!cfqd->cfq_slice_idle) | ||
909 | return 0; | ||
910 | if (!cfq_cfqq_idle_window(cfqq)) | ||
911 | return 0; | ||
912 | /* | ||
913 | * task has exited, don't wait | ||
914 | */ | ||
915 | if (cfqd->active_cic && !cfqd->active_cic->ioc->task) | ||
916 | return 0; | ||
917 | |||
918 | cfq_mark_cfqq_must_dispatch(cfqq); | ||
919 | cfq_mark_cfqq_wait_request(cfqq); | ||
920 | |||
921 | if (!timer_pending(&cfqd->idle_slice_timer)) { | ||
922 | unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); | ||
923 | |||
924 | cfqd->idle_slice_timer.expires = jiffies + slice_left; | ||
925 | add_timer(&cfqd->idle_slice_timer); | ||
926 | } | ||
927 | |||
928 | return 1; | ||
929 | } | ||
930 | |||
931 | static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) | ||
932 | { | ||
933 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
934 | struct cfq_queue *cfqq = crq->cfq_queue; | ||
935 | |||
936 | cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); | ||
937 | cfq_remove_request(crq->request); | ||
938 | cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; | ||
939 | elv_dispatch_sort(q, crq->request); | ||
940 | } | ||
941 | |||
942 | /* | ||
943 | * return expired entry, or NULL to just start from scratch in rbtree | ||
944 | */ | ||
945 | static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) | ||
946 | { | ||
947 | struct cfq_data *cfqd = cfqq->cfqd; | ||
948 | struct request *rq; | ||
949 | struct cfq_rq *crq; | ||
950 | |||
951 | if (cfq_cfqq_fifo_expire(cfqq)) | ||
952 | return NULL; | ||
953 | |||
954 | if (!list_empty(&cfqq->fifo)) { | ||
955 | int fifo = cfq_cfqq_class_sync(cfqq); | ||
956 | |||
957 | crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); | ||
958 | rq = crq->request; | ||
959 | if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { | ||
960 | cfq_mark_cfqq_fifo_expire(cfqq); | ||
961 | return crq; | ||
962 | } | ||
963 | } | ||
964 | |||
965 | return NULL; | ||
966 | } | ||
967 | |||
968 | /* | ||
969 | * Scale schedule slice based on io priority. Use the sync time slice only | ||
970 | * if a queue is marked sync and has sync io queued. A sync queue with async | ||
971 | * io only, should not get full sync slice length. | ||
972 | */ | ||
973 | static inline int | ||
974 | cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
975 | { | ||
976 | const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; | ||
977 | |||
978 | WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); | ||
979 | |||
980 | return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); | ||
981 | } | ||
982 | |||
983 | static inline void | ||
984 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
985 | { | ||
986 | cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; | ||
987 | } | ||
988 | |||
989 | static inline int | ||
990 | cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
991 | { | ||
992 | const int base_rq = cfqd->cfq_slice_async_rq; | ||
993 | |||
994 | WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); | ||
995 | |||
996 | return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); | ||
997 | } | ||
998 | |||
999 | /* | ||
1000 | * get next queue for service | ||
1001 | */ | ||
1002 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force) | ||
1003 | { | ||
1004 | unsigned long now = jiffies; | ||
1005 | struct cfq_queue *cfqq; | ||
1006 | |||
1007 | cfqq = cfqd->active_queue; | ||
1008 | if (!cfqq) | ||
1009 | goto new_queue; | ||
1010 | |||
1011 | if (cfq_cfqq_expired(cfqq)) | ||
1012 | goto new_queue; | ||
1013 | |||
1014 | /* | ||
1015 | * slice has expired | ||
1016 | */ | ||
1017 | if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end)) | ||
1018 | goto expire; | ||
1019 | |||
1020 | /* | ||
1021 | * if queue has requests, dispatch one. if not, check if | ||
1022 | * enough slice is left to wait for one | ||
1023 | */ | ||
1024 | if (!RB_EMPTY(&cfqq->sort_list)) | ||
1025 | goto keep_queue; | ||
1026 | else if (!force && cfq_cfqq_class_sync(cfqq) && | ||
1027 | time_before(now, cfqq->slice_end)) { | ||
1028 | if (cfq_arm_slice_timer(cfqd, cfqq)) | ||
1029 | return NULL; | ||
1030 | } | ||
1031 | |||
1032 | expire: | ||
1033 | cfq_slice_expired(cfqd, 0); | ||
1034 | new_queue: | ||
1035 | cfqq = cfq_set_active_queue(cfqd); | ||
1036 | keep_queue: | ||
1037 | return cfqq; | ||
1038 | } | ||
1039 | |||
1040 | static int | ||
1041 | __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, | ||
1042 | int max_dispatch) | ||
1043 | { | ||
1044 | int dispatched = 0; | ||
1045 | |||
1046 | BUG_ON(RB_EMPTY(&cfqq->sort_list)); | ||
1047 | |||
1048 | do { | ||
1049 | struct cfq_rq *crq; | ||
1050 | |||
1051 | /* | ||
1052 | * follow expired path, else get first next available | ||
1053 | */ | ||
1054 | if ((crq = cfq_check_fifo(cfqq)) == NULL) | ||
1055 | crq = cfqq->next_crq; | ||
1056 | |||
1057 | /* | ||
1058 | * finally, insert request into driver dispatch list | ||
1059 | */ | ||
1060 | cfq_dispatch_insert(cfqd->queue, crq); | ||
1061 | |||
1062 | cfqd->dispatch_slice++; | ||
1063 | dispatched++; | ||
1064 | |||
1065 | if (!cfqd->active_cic) { | ||
1066 | atomic_inc(&crq->io_context->ioc->refcount); | ||
1067 | cfqd->active_cic = crq->io_context; | ||
1068 | } | ||
1069 | |||
1070 | if (RB_EMPTY(&cfqq->sort_list)) | ||
1071 | break; | ||
1072 | |||
1073 | } while (dispatched < max_dispatch); | ||
1074 | |||
1075 | /* | ||
1076 | * if slice end isn't set yet, set it. if at least one request was | ||
1077 | * sync, use the sync time slice value | ||
1078 | */ | ||
1079 | if (!cfqq->slice_end) | ||
1080 | cfq_set_prio_slice(cfqd, cfqq); | ||
1081 | |||
1082 | /* | ||
1083 | * expire an async queue immediately if it has used up its slice. idle | ||
1084 | * queue always expire after 1 dispatch round. | ||
1085 | */ | ||
1086 | if ((!cfq_cfqq_sync(cfqq) && | ||
1087 | cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || | ||
1088 | cfq_class_idle(cfqq)) | ||
1089 | cfq_slice_expired(cfqd, 0); | ||
1090 | |||
1091 | return dispatched; | ||
1092 | } | ||
1093 | |||
1094 | static int | ||
1095 | cfq_dispatch_requests(request_queue_t *q, int force) | ||
1096 | { | ||
1097 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
1098 | struct cfq_queue *cfqq; | ||
1099 | |||
1100 | if (!cfqd->busy_queues) | ||
1101 | return 0; | ||
1102 | |||
1103 | cfqq = cfq_select_queue(cfqd, force); | ||
1104 | if (cfqq) { | ||
1105 | int max_dispatch; | ||
1106 | |||
1107 | /* | ||
1108 | * if idle window is disabled, allow queue buildup | ||
1109 | */ | ||
1110 | if (!cfq_cfqq_idle_window(cfqq) && | ||
1111 | cfqd->rq_in_driver >= cfqd->cfq_max_depth) | ||
1112 | return 0; | ||
1113 | |||
1114 | cfq_clear_cfqq_must_dispatch(cfqq); | ||
1115 | cfq_clear_cfqq_wait_request(cfqq); | ||
1116 | del_timer(&cfqd->idle_slice_timer); | ||
1117 | |||
1118 | if (!force) { | ||
1119 | max_dispatch = cfqd->cfq_quantum; | ||
1120 | if (cfq_class_idle(cfqq)) | ||
1121 | max_dispatch = 1; | ||
1122 | } else | ||
1123 | max_dispatch = INT_MAX; | ||
1124 | |||
1125 | return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); | ||
1126 | } | ||
1127 | |||
1128 | return 0; | ||
1129 | } | ||
1130 | |||
1131 | /* | ||
1132 | * task holds one reference to the queue, dropped when task exits. each crq | ||
1133 | * in-flight on this queue also holds a reference, dropped when crq is freed. | ||
1134 | * | ||
1135 | * queue lock must be held here. | ||
1136 | */ | ||
1137 | static void cfq_put_queue(struct cfq_queue *cfqq) | ||
1138 | { | ||
1139 | struct cfq_data *cfqd = cfqq->cfqd; | ||
1140 | |||
1141 | BUG_ON(atomic_read(&cfqq->ref) <= 0); | ||
1142 | |||
1143 | if (!atomic_dec_and_test(&cfqq->ref)) | ||
1144 | return; | ||
1145 | |||
1146 | BUG_ON(rb_first(&cfqq->sort_list)); | ||
1147 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); | ||
1148 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | ||
1149 | |||
1150 | if (unlikely(cfqd->active_queue == cfqq)) { | ||
1151 | __cfq_slice_expired(cfqd, cfqq, 0); | ||
1152 | cfq_schedule_dispatch(cfqd); | ||
1153 | } | ||
1154 | |||
1155 | cfq_put_cfqd(cfqq->cfqd); | ||
1156 | |||
1157 | /* | ||
1158 | * it's on the empty list and still hashed | ||
1159 | */ | ||
1160 | list_del(&cfqq->cfq_list); | ||
1161 | hlist_del(&cfqq->cfq_hash); | ||
1162 | kmem_cache_free(cfq_pool, cfqq); | ||
1163 | } | ||
1164 | |||
1165 | static inline struct cfq_queue * | ||
1166 | __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, | ||
1167 | const int hashval) | ||
1168 | { | ||
1169 | struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; | ||
1170 | struct hlist_node *entry, *next; | ||
1171 | |||
1172 | hlist_for_each_safe(entry, next, hash_list) { | ||
1173 | struct cfq_queue *__cfqq = list_entry_qhash(entry); | ||
1174 | const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio); | ||
1175 | |||
1176 | if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY)) | ||
1177 | return __cfqq; | ||
1178 | } | ||
1179 | |||
1180 | return NULL; | ||
1181 | } | ||
1182 | |||
1183 | static struct cfq_queue * | ||
1184 | cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) | ||
1185 | { | ||
1186 | return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); | ||
1187 | } | ||
1188 | |||
1189 | static void cfq_free_io_context(struct cfq_io_context *cic) | ||
1190 | { | ||
1191 | struct cfq_io_context *__cic; | ||
1192 | struct list_head *entry, *next; | ||
1193 | |||
1194 | list_for_each_safe(entry, next, &cic->list) { | ||
1195 | __cic = list_entry(entry, struct cfq_io_context, list); | ||
1196 | kmem_cache_free(cfq_ioc_pool, __cic); | ||
1197 | } | ||
1198 | |||
1199 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1200 | } | ||
1201 | |||
1202 | /* | ||
1203 | * Called with interrupts disabled | ||
1204 | */ | ||
1205 | static void cfq_exit_single_io_context(struct cfq_io_context *cic) | ||
1206 | { | ||
1207 | struct cfq_data *cfqd = cic->cfqq->cfqd; | ||
1208 | request_queue_t *q = cfqd->queue; | ||
1209 | |||
1210 | WARN_ON(!irqs_disabled()); | ||
1211 | |||
1212 | spin_lock(q->queue_lock); | ||
1213 | |||
1214 | if (unlikely(cic->cfqq == cfqd->active_queue)) { | ||
1215 | __cfq_slice_expired(cfqd, cic->cfqq, 0); | ||
1216 | cfq_schedule_dispatch(cfqd); | ||
1217 | } | ||
1218 | |||
1219 | cfq_put_queue(cic->cfqq); | ||
1220 | cic->cfqq = NULL; | ||
1221 | spin_unlock(q->queue_lock); | ||
1222 | } | ||
1223 | |||
1224 | /* | ||
1225 | * Another task may update the task cic list, if it is doing a queue lookup | ||
1226 | * on its behalf. cfq_cic_lock excludes such concurrent updates | ||
1227 | */ | ||
1228 | static void cfq_exit_io_context(struct cfq_io_context *cic) | ||
1229 | { | ||
1230 | struct cfq_io_context *__cic; | ||
1231 | struct list_head *entry; | ||
1232 | unsigned long flags; | ||
1233 | |||
1234 | local_irq_save(flags); | ||
1235 | |||
1236 | /* | ||
1237 | * put the reference this task is holding to the various queues | ||
1238 | */ | ||
1239 | list_for_each(entry, &cic->list) { | ||
1240 | __cic = list_entry(entry, struct cfq_io_context, list); | ||
1241 | cfq_exit_single_io_context(__cic); | ||
1242 | } | ||
1243 | |||
1244 | cfq_exit_single_io_context(cic); | ||
1245 | local_irq_restore(flags); | ||
1246 | } | ||
1247 | |||
1248 | static struct cfq_io_context * | ||
1249 | cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | ||
1250 | { | ||
1251 | struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); | ||
1252 | |||
1253 | if (cic) { | ||
1254 | INIT_LIST_HEAD(&cic->list); | ||
1255 | cic->cfqq = NULL; | ||
1256 | cic->key = NULL; | ||
1257 | cic->last_end_request = jiffies; | ||
1258 | cic->ttime_total = 0; | ||
1259 | cic->ttime_samples = 0; | ||
1260 | cic->ttime_mean = 0; | ||
1261 | cic->dtor = cfq_free_io_context; | ||
1262 | cic->exit = cfq_exit_io_context; | ||
1263 | } | ||
1264 | |||
1265 | return cic; | ||
1266 | } | ||
1267 | |||
1268 | static void cfq_init_prio_data(struct cfq_queue *cfqq) | ||
1269 | { | ||
1270 | struct task_struct *tsk = current; | ||
1271 | int ioprio_class; | ||
1272 | |||
1273 | if (!cfq_cfqq_prio_changed(cfqq)) | ||
1274 | return; | ||
1275 | |||
1276 | ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); | ||
1277 | switch (ioprio_class) { | ||
1278 | default: | ||
1279 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); | ||
1280 | case IOPRIO_CLASS_NONE: | ||
1281 | /* | ||
1282 | * no prio set, place us in the middle of the BE classes | ||
1283 | */ | ||
1284 | cfqq->ioprio = task_nice_ioprio(tsk); | ||
1285 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | ||
1286 | break; | ||
1287 | case IOPRIO_CLASS_RT: | ||
1288 | cfqq->ioprio = task_ioprio(tsk); | ||
1289 | cfqq->ioprio_class = IOPRIO_CLASS_RT; | ||
1290 | break; | ||
1291 | case IOPRIO_CLASS_BE: | ||
1292 | cfqq->ioprio = task_ioprio(tsk); | ||
1293 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | ||
1294 | break; | ||
1295 | case IOPRIO_CLASS_IDLE: | ||
1296 | cfqq->ioprio_class = IOPRIO_CLASS_IDLE; | ||
1297 | cfqq->ioprio = 7; | ||
1298 | cfq_clear_cfqq_idle_window(cfqq); | ||
1299 | break; | ||
1300 | } | ||
1301 | |||
1302 | /* | ||
1303 | * keep track of original prio settings in case we have to temporarily | ||
1304 | * elevate the priority of this queue | ||
1305 | */ | ||
1306 | cfqq->org_ioprio = cfqq->ioprio; | ||
1307 | cfqq->org_ioprio_class = cfqq->ioprio_class; | ||
1308 | |||
1309 | if (cfq_cfqq_on_rr(cfqq)) | ||
1310 | cfq_resort_rr_list(cfqq, 0); | ||
1311 | |||
1312 | cfq_clear_cfqq_prio_changed(cfqq); | ||
1313 | } | ||
1314 | |||
1315 | static inline void changed_ioprio(struct cfq_queue *cfqq) | ||
1316 | { | ||
1317 | if (cfqq) { | ||
1318 | struct cfq_data *cfqd = cfqq->cfqd; | ||
1319 | |||
1320 | spin_lock(cfqd->queue->queue_lock); | ||
1321 | cfq_mark_cfqq_prio_changed(cfqq); | ||
1322 | cfq_init_prio_data(cfqq); | ||
1323 | spin_unlock(cfqd->queue->queue_lock); | ||
1324 | } | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * callback from sys_ioprio_set, irqs are disabled | ||
1329 | */ | ||
1330 | static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) | ||
1331 | { | ||
1332 | struct cfq_io_context *cic = ioc->cic; | ||
1333 | |||
1334 | changed_ioprio(cic->cfqq); | ||
1335 | |||
1336 | list_for_each_entry(cic, &cic->list, list) | ||
1337 | changed_ioprio(cic->cfqq); | ||
1338 | |||
1339 | return 0; | ||
1340 | } | ||
1341 | |||
1342 | static struct cfq_queue * | ||
1343 | cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio, | ||
1344 | gfp_t gfp_mask) | ||
1345 | { | ||
1346 | const int hashval = hash_long(key, CFQ_QHASH_SHIFT); | ||
1347 | struct cfq_queue *cfqq, *new_cfqq = NULL; | ||
1348 | |||
1349 | retry: | ||
1350 | cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); | ||
1351 | |||
1352 | if (!cfqq) { | ||
1353 | if (new_cfqq) { | ||
1354 | cfqq = new_cfqq; | ||
1355 | new_cfqq = NULL; | ||
1356 | } else if (gfp_mask & __GFP_WAIT) { | ||
1357 | spin_unlock_irq(cfqd->queue->queue_lock); | ||
1358 | new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); | ||
1359 | spin_lock_irq(cfqd->queue->queue_lock); | ||
1360 | goto retry; | ||
1361 | } else { | ||
1362 | cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); | ||
1363 | if (!cfqq) | ||
1364 | goto out; | ||
1365 | } | ||
1366 | |||
1367 | memset(cfqq, 0, sizeof(*cfqq)); | ||
1368 | |||
1369 | INIT_HLIST_NODE(&cfqq->cfq_hash); | ||
1370 | INIT_LIST_HEAD(&cfqq->cfq_list); | ||
1371 | RB_CLEAR_ROOT(&cfqq->sort_list); | ||
1372 | INIT_LIST_HEAD(&cfqq->fifo); | ||
1373 | |||
1374 | cfqq->key = key; | ||
1375 | hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); | ||
1376 | atomic_set(&cfqq->ref, 0); | ||
1377 | cfqq->cfqd = cfqd; | ||
1378 | atomic_inc(&cfqd->ref); | ||
1379 | cfqq->service_last = 0; | ||
1380 | /* | ||
1381 | * set ->slice_left to allow preemption for a new process | ||
1382 | */ | ||
1383 | cfqq->slice_left = 2 * cfqd->cfq_slice_idle; | ||
1384 | cfq_mark_cfqq_idle_window(cfqq); | ||
1385 | cfq_mark_cfqq_prio_changed(cfqq); | ||
1386 | cfq_init_prio_data(cfqq); | ||
1387 | } | ||
1388 | |||
1389 | if (new_cfqq) | ||
1390 | kmem_cache_free(cfq_pool, new_cfqq); | ||
1391 | |||
1392 | atomic_inc(&cfqq->ref); | ||
1393 | out: | ||
1394 | WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); | ||
1395 | return cfqq; | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1399 | * Setup general io context and cfq io context. There can be several cfq | ||
1400 | * io contexts per general io context, if this process is doing io to more | ||
1401 | * than one device managed by cfq. Note that caller is holding a reference to | ||
1402 | * cfqq, so we don't need to worry about it disappearing | ||
1403 | */ | ||
1404 | static struct cfq_io_context * | ||
1405 | cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) | ||
1406 | { | ||
1407 | struct io_context *ioc = NULL; | ||
1408 | struct cfq_io_context *cic; | ||
1409 | |||
1410 | might_sleep_if(gfp_mask & __GFP_WAIT); | ||
1411 | |||
1412 | ioc = get_io_context(gfp_mask); | ||
1413 | if (!ioc) | ||
1414 | return NULL; | ||
1415 | |||
1416 | if ((cic = ioc->cic) == NULL) { | ||
1417 | cic = cfq_alloc_io_context(cfqd, gfp_mask); | ||
1418 | |||
1419 | if (cic == NULL) | ||
1420 | goto err; | ||
1421 | |||
1422 | /* | ||
1423 | * manually increment generic io_context usage count, it | ||
1424 | * cannot go away since we are already holding one ref to it | ||
1425 | */ | ||
1426 | ioc->cic = cic; | ||
1427 | ioc->set_ioprio = cfq_ioc_set_ioprio; | ||
1428 | cic->ioc = ioc; | ||
1429 | cic->key = cfqd; | ||
1430 | atomic_inc(&cfqd->ref); | ||
1431 | } else { | ||
1432 | struct cfq_io_context *__cic; | ||
1433 | |||
1434 | /* | ||
1435 | * the first cic on the list is actually the head itself | ||
1436 | */ | ||
1437 | if (cic->key == cfqd) | ||
1438 | goto out; | ||
1439 | |||
1440 | /* | ||
1441 | * cic exists, check if we already are there. linear search | ||
1442 | * should be ok here, the list will usually not be more than | ||
1443 | * 1 or a few entries long | ||
1444 | */ | ||
1445 | list_for_each_entry(__cic, &cic->list, list) { | ||
1446 | /* | ||
1447 | * this process is already holding a reference to | ||
1448 | * this queue, so no need to get one more | ||
1449 | */ | ||
1450 | if (__cic->key == cfqd) { | ||
1451 | cic = __cic; | ||
1452 | goto out; | ||
1453 | } | ||
1454 | } | ||
1455 | |||
1456 | /* | ||
1457 | * nope, process doesn't have a cic assoicated with this | ||
1458 | * cfqq yet. get a new one and add to list | ||
1459 | */ | ||
1460 | __cic = cfq_alloc_io_context(cfqd, gfp_mask); | ||
1461 | if (__cic == NULL) | ||
1462 | goto err; | ||
1463 | |||
1464 | __cic->ioc = ioc; | ||
1465 | __cic->key = cfqd; | ||
1466 | atomic_inc(&cfqd->ref); | ||
1467 | list_add(&__cic->list, &cic->list); | ||
1468 | cic = __cic; | ||
1469 | } | ||
1470 | |||
1471 | out: | ||
1472 | return cic; | ||
1473 | err: | ||
1474 | put_io_context(ioc); | ||
1475 | return NULL; | ||
1476 | } | ||
1477 | |||
1478 | static void | ||
1479 | cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) | ||
1480 | { | ||
1481 | unsigned long elapsed, ttime; | ||
1482 | |||
1483 | /* | ||
1484 | * if this context already has stuff queued, thinktime is from | ||
1485 | * last queue not last end | ||
1486 | */ | ||
1487 | #if 0 | ||
1488 | if (time_after(cic->last_end_request, cic->last_queue)) | ||
1489 | elapsed = jiffies - cic->last_end_request; | ||
1490 | else | ||
1491 | elapsed = jiffies - cic->last_queue; | ||
1492 | #else | ||
1493 | elapsed = jiffies - cic->last_end_request; | ||
1494 | #endif | ||
1495 | |||
1496 | ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); | ||
1497 | |||
1498 | cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; | ||
1499 | cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; | ||
1500 | cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; | ||
1501 | } | ||
1502 | |||
1503 | #define sample_valid(samples) ((samples) > 80) | ||
1504 | |||
1505 | /* | ||
1506 | * Disable idle window if the process thinks too long or seeks so much that | ||
1507 | * it doesn't matter | ||
1508 | */ | ||
1509 | static void | ||
1510 | cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | ||
1511 | struct cfq_io_context *cic) | ||
1512 | { | ||
1513 | int enable_idle = cfq_cfqq_idle_window(cfqq); | ||
1514 | |||
1515 | if (!cic->ioc->task || !cfqd->cfq_slice_idle) | ||
1516 | enable_idle = 0; | ||
1517 | else if (sample_valid(cic->ttime_samples)) { | ||
1518 | if (cic->ttime_mean > cfqd->cfq_slice_idle) | ||
1519 | enable_idle = 0; | ||
1520 | else | ||
1521 | enable_idle = 1; | ||
1522 | } | ||
1523 | |||
1524 | if (enable_idle) | ||
1525 | cfq_mark_cfqq_idle_window(cfqq); | ||
1526 | else | ||
1527 | cfq_clear_cfqq_idle_window(cfqq); | ||
1528 | } | ||
1529 | |||
1530 | |||
1531 | /* | ||
1532 | * Check if new_cfqq should preempt the currently active queue. Return 0 for | ||
1533 | * no or if we aren't sure, a 1 will cause a preempt. | ||
1534 | */ | ||
1535 | static int | ||
1536 | cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | ||
1537 | struct cfq_rq *crq) | ||
1538 | { | ||
1539 | struct cfq_queue *cfqq = cfqd->active_queue; | ||
1540 | |||
1541 | if (cfq_class_idle(new_cfqq)) | ||
1542 | return 0; | ||
1543 | |||
1544 | if (!cfqq) | ||
1545 | return 1; | ||
1546 | |||
1547 | if (cfq_class_idle(cfqq)) | ||
1548 | return 1; | ||
1549 | if (!cfq_cfqq_wait_request(new_cfqq)) | ||
1550 | return 0; | ||
1551 | /* | ||
1552 | * if it doesn't have slice left, forget it | ||
1553 | */ | ||
1554 | if (new_cfqq->slice_left < cfqd->cfq_slice_idle) | ||
1555 | return 0; | ||
1556 | if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) | ||
1557 | return 1; | ||
1558 | |||
1559 | return 0; | ||
1560 | } | ||
1561 | |||
1562 | /* | ||
1563 | * cfqq preempts the active queue. if we allowed preempt with no slice left, | ||
1564 | * let it have half of its nominal slice. | ||
1565 | */ | ||
1566 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
1567 | { | ||
1568 | struct cfq_queue *__cfqq, *next; | ||
1569 | |||
1570 | list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list) | ||
1571 | cfq_resort_rr_list(__cfqq, 1); | ||
1572 | |||
1573 | if (!cfqq->slice_left) | ||
1574 | cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; | ||
1575 | |||
1576 | cfqq->slice_end = cfqq->slice_left + jiffies; | ||
1577 | __cfq_slice_expired(cfqd, cfqq, 1); | ||
1578 | __cfq_set_active_queue(cfqd, cfqq); | ||
1579 | } | ||
1580 | |||
1581 | /* | ||
1582 | * should really be a ll_rw_blk.c helper | ||
1583 | */ | ||
1584 | static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
1585 | { | ||
1586 | request_queue_t *q = cfqd->queue; | ||
1587 | |||
1588 | if (!blk_queue_plugged(q)) | ||
1589 | q->request_fn(q); | ||
1590 | else | ||
1591 | __generic_unplug_device(q); | ||
1592 | } | ||
1593 | |||
1594 | /* | ||
1595 | * Called when a new fs request (crq) is added (to cfqq). Check if there's | ||
1596 | * something we should do about it | ||
1597 | */ | ||
1598 | static void | ||
1599 | cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | ||
1600 | struct cfq_rq *crq) | ||
1601 | { | ||
1602 | struct cfq_io_context *cic; | ||
1603 | |||
1604 | cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); | ||
1605 | |||
1606 | /* | ||
1607 | * we never wait for an async request and we don't allow preemption | ||
1608 | * of an async request. so just return early | ||
1609 | */ | ||
1610 | if (!cfq_crq_is_sync(crq)) | ||
1611 | return; | ||
1612 | |||
1613 | cic = crq->io_context; | ||
1614 | |||
1615 | cfq_update_io_thinktime(cfqd, cic); | ||
1616 | cfq_update_idle_window(cfqd, cfqq, cic); | ||
1617 | |||
1618 | cic->last_queue = jiffies; | ||
1619 | |||
1620 | if (cfqq == cfqd->active_queue) { | ||
1621 | /* | ||
1622 | * if we are waiting for a request for this queue, let it rip | ||
1623 | * immediately and flag that we must not expire this queue | ||
1624 | * just now | ||
1625 | */ | ||
1626 | if (cfq_cfqq_wait_request(cfqq)) { | ||
1627 | cfq_mark_cfqq_must_dispatch(cfqq); | ||
1628 | del_timer(&cfqd->idle_slice_timer); | ||
1629 | cfq_start_queueing(cfqd, cfqq); | ||
1630 | } | ||
1631 | } else if (cfq_should_preempt(cfqd, cfqq, crq)) { | ||
1632 | /* | ||
1633 | * not the active queue - expire current slice if it is | ||
1634 | * idle and has expired it's mean thinktime or this new queue | ||
1635 | * has some old slice time left and is of higher priority | ||
1636 | */ | ||
1637 | cfq_preempt_queue(cfqd, cfqq); | ||
1638 | cfq_mark_cfqq_must_dispatch(cfqq); | ||
1639 | cfq_start_queueing(cfqd, cfqq); | ||
1640 | } | ||
1641 | } | ||
1642 | |||
1643 | static void cfq_insert_request(request_queue_t *q, struct request *rq) | ||
1644 | { | ||
1645 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
1646 | struct cfq_rq *crq = RQ_DATA(rq); | ||
1647 | struct cfq_queue *cfqq = crq->cfq_queue; | ||
1648 | |||
1649 | cfq_init_prio_data(cfqq); | ||
1650 | |||
1651 | cfq_add_crq_rb(crq); | ||
1652 | |||
1653 | list_add_tail(&rq->queuelist, &cfqq->fifo); | ||
1654 | |||
1655 | if (rq_mergeable(rq)) | ||
1656 | cfq_add_crq_hash(cfqd, crq); | ||
1657 | |||
1658 | cfq_crq_enqueued(cfqd, cfqq, crq); | ||
1659 | } | ||
1660 | |||
1661 | static void cfq_completed_request(request_queue_t *q, struct request *rq) | ||
1662 | { | ||
1663 | struct cfq_rq *crq = RQ_DATA(rq); | ||
1664 | struct cfq_queue *cfqq = crq->cfq_queue; | ||
1665 | struct cfq_data *cfqd = cfqq->cfqd; | ||
1666 | const int sync = cfq_crq_is_sync(crq); | ||
1667 | unsigned long now; | ||
1668 | |||
1669 | now = jiffies; | ||
1670 | |||
1671 | WARN_ON(!cfqd->rq_in_driver); | ||
1672 | WARN_ON(!cfqq->on_dispatch[sync]); | ||
1673 | cfqd->rq_in_driver--; | ||
1674 | cfqq->on_dispatch[sync]--; | ||
1675 | |||
1676 | if (!cfq_class_idle(cfqq)) | ||
1677 | cfqd->last_end_request = now; | ||
1678 | |||
1679 | if (!cfq_cfqq_dispatched(cfqq)) { | ||
1680 | if (cfq_cfqq_on_rr(cfqq)) { | ||
1681 | cfqq->service_last = now; | ||
1682 | cfq_resort_rr_list(cfqq, 0); | ||
1683 | } | ||
1684 | if (cfq_cfqq_expired(cfqq)) { | ||
1685 | __cfq_slice_expired(cfqd, cfqq, 0); | ||
1686 | cfq_schedule_dispatch(cfqd); | ||
1687 | } | ||
1688 | } | ||
1689 | |||
1690 | if (cfq_crq_is_sync(crq)) | ||
1691 | crq->io_context->last_end_request = now; | ||
1692 | } | ||
1693 | |||
1694 | static struct request * | ||
1695 | cfq_former_request(request_queue_t *q, struct request *rq) | ||
1696 | { | ||
1697 | struct cfq_rq *crq = RQ_DATA(rq); | ||
1698 | struct rb_node *rbprev = rb_prev(&crq->rb_node); | ||
1699 | |||
1700 | if (rbprev) | ||
1701 | return rb_entry_crq(rbprev)->request; | ||
1702 | |||
1703 | return NULL; | ||
1704 | } | ||
1705 | |||
1706 | static struct request * | ||
1707 | cfq_latter_request(request_queue_t *q, struct request *rq) | ||
1708 | { | ||
1709 | struct cfq_rq *crq = RQ_DATA(rq); | ||
1710 | struct rb_node *rbnext = rb_next(&crq->rb_node); | ||
1711 | |||
1712 | if (rbnext) | ||
1713 | return rb_entry_crq(rbnext)->request; | ||
1714 | |||
1715 | return NULL; | ||
1716 | } | ||
1717 | |||
1718 | /* | ||
1719 | * we temporarily boost lower priority queues if they are holding fs exclusive | ||
1720 | * resources. they are boosted to normal prio (CLASS_BE/4) | ||
1721 | */ | ||
1722 | static void cfq_prio_boost(struct cfq_queue *cfqq) | ||
1723 | { | ||
1724 | const int ioprio_class = cfqq->ioprio_class; | ||
1725 | const int ioprio = cfqq->ioprio; | ||
1726 | |||
1727 | if (has_fs_excl()) { | ||
1728 | /* | ||
1729 | * boost idle prio on transactions that would lock out other | ||
1730 | * users of the filesystem | ||
1731 | */ | ||
1732 | if (cfq_class_idle(cfqq)) | ||
1733 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | ||
1734 | if (cfqq->ioprio > IOPRIO_NORM) | ||
1735 | cfqq->ioprio = IOPRIO_NORM; | ||
1736 | } else { | ||
1737 | /* | ||
1738 | * check if we need to unboost the queue | ||
1739 | */ | ||
1740 | if (cfqq->ioprio_class != cfqq->org_ioprio_class) | ||
1741 | cfqq->ioprio_class = cfqq->org_ioprio_class; | ||
1742 | if (cfqq->ioprio != cfqq->org_ioprio) | ||
1743 | cfqq->ioprio = cfqq->org_ioprio; | ||
1744 | } | ||
1745 | |||
1746 | /* | ||
1747 | * refile between round-robin lists if we moved the priority class | ||
1748 | */ | ||
1749 | if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) && | ||
1750 | cfq_cfqq_on_rr(cfqq)) | ||
1751 | cfq_resort_rr_list(cfqq, 0); | ||
1752 | } | ||
1753 | |||
1754 | static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) | ||
1755 | { | ||
1756 | if (rw == READ || process_sync(task)) | ||
1757 | return task->pid; | ||
1758 | |||
1759 | return CFQ_KEY_ASYNC; | ||
1760 | } | ||
1761 | |||
1762 | static inline int | ||
1763 | __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, | ||
1764 | struct task_struct *task, int rw) | ||
1765 | { | ||
1766 | #if 1 | ||
1767 | if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && | ||
1768 | !cfq_cfqq_must_alloc_slice(cfqq)) { | ||
1769 | cfq_mark_cfqq_must_alloc_slice(cfqq); | ||
1770 | return ELV_MQUEUE_MUST; | ||
1771 | } | ||
1772 | |||
1773 | return ELV_MQUEUE_MAY; | ||
1774 | #else | ||
1775 | if (!cfqq || task->flags & PF_MEMALLOC) | ||
1776 | return ELV_MQUEUE_MAY; | ||
1777 | if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) { | ||
1778 | if (cfq_cfqq_wait_request(cfqq)) | ||
1779 | return ELV_MQUEUE_MUST; | ||
1780 | |||
1781 | /* | ||
1782 | * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we | ||
1783 | * can quickly flood the queue with writes from a single task | ||
1784 | */ | ||
1785 | if (rw == READ || !cfq_cfqq_must_alloc_slice(cfqq)) { | ||
1786 | cfq_mark_cfqq_must_alloc_slice(cfqq); | ||
1787 | return ELV_MQUEUE_MUST; | ||
1788 | } | ||
1789 | |||
1790 | return ELV_MQUEUE_MAY; | ||
1791 | } | ||
1792 | if (cfq_class_idle(cfqq)) | ||
1793 | return ELV_MQUEUE_NO; | ||
1794 | if (cfqq->allocated[rw] >= cfqd->max_queued) { | ||
1795 | struct io_context *ioc = get_io_context(GFP_ATOMIC); | ||
1796 | int ret = ELV_MQUEUE_NO; | ||
1797 | |||
1798 | if (ioc && ioc->nr_batch_requests) | ||
1799 | ret = ELV_MQUEUE_MAY; | ||
1800 | |||
1801 | put_io_context(ioc); | ||
1802 | return ret; | ||
1803 | } | ||
1804 | |||
1805 | return ELV_MQUEUE_MAY; | ||
1806 | #endif | ||
1807 | } | ||
1808 | |||
1809 | static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) | ||
1810 | { | ||
1811 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
1812 | struct task_struct *tsk = current; | ||
1813 | struct cfq_queue *cfqq; | ||
1814 | |||
1815 | /* | ||
1816 | * don't force setup of a queue from here, as a call to may_queue | ||
1817 | * does not necessarily imply that a request actually will be queued. | ||
1818 | * so just lookup a possibly existing queue, or return 'may queue' | ||
1819 | * if that fails | ||
1820 | */ | ||
1821 | cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio); | ||
1822 | if (cfqq) { | ||
1823 | cfq_init_prio_data(cfqq); | ||
1824 | cfq_prio_boost(cfqq); | ||
1825 | |||
1826 | return __cfq_may_queue(cfqd, cfqq, tsk, rw); | ||
1827 | } | ||
1828 | |||
1829 | return ELV_MQUEUE_MAY; | ||
1830 | } | ||
1831 | |||
1832 | static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) | ||
1833 | { | ||
1834 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
1835 | struct request_list *rl = &q->rq; | ||
1836 | |||
1837 | if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) { | ||
1838 | smp_mb(); | ||
1839 | if (waitqueue_active(&rl->wait[READ])) | ||
1840 | wake_up(&rl->wait[READ]); | ||
1841 | } | ||
1842 | |||
1843 | if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) { | ||
1844 | smp_mb(); | ||
1845 | if (waitqueue_active(&rl->wait[WRITE])) | ||
1846 | wake_up(&rl->wait[WRITE]); | ||
1847 | } | ||
1848 | } | ||
1849 | |||
1850 | /* | ||
1851 | * queue lock held here | ||
1852 | */ | ||
1853 | static void cfq_put_request(request_queue_t *q, struct request *rq) | ||
1854 | { | ||
1855 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
1856 | struct cfq_rq *crq = RQ_DATA(rq); | ||
1857 | |||
1858 | if (crq) { | ||
1859 | struct cfq_queue *cfqq = crq->cfq_queue; | ||
1860 | const int rw = rq_data_dir(rq); | ||
1861 | |||
1862 | BUG_ON(!cfqq->allocated[rw]); | ||
1863 | cfqq->allocated[rw]--; | ||
1864 | |||
1865 | put_io_context(crq->io_context->ioc); | ||
1866 | |||
1867 | mempool_free(crq, cfqd->crq_pool); | ||
1868 | rq->elevator_private = NULL; | ||
1869 | |||
1870 | cfq_check_waiters(q, cfqq); | ||
1871 | cfq_put_queue(cfqq); | ||
1872 | } | ||
1873 | } | ||
1874 | |||
1875 | /* | ||
1876 | * Allocate cfq data structures associated with this request. | ||
1877 | */ | ||
1878 | static int | ||
1879 | cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, | ||
1880 | gfp_t gfp_mask) | ||
1881 | { | ||
1882 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
1883 | struct task_struct *tsk = current; | ||
1884 | struct cfq_io_context *cic; | ||
1885 | const int rw = rq_data_dir(rq); | ||
1886 | pid_t key = cfq_queue_pid(tsk, rw); | ||
1887 | struct cfq_queue *cfqq; | ||
1888 | struct cfq_rq *crq; | ||
1889 | unsigned long flags; | ||
1890 | |||
1891 | might_sleep_if(gfp_mask & __GFP_WAIT); | ||
1892 | |||
1893 | cic = cfq_get_io_context(cfqd, key, gfp_mask); | ||
1894 | |||
1895 | spin_lock_irqsave(q->queue_lock, flags); | ||
1896 | |||
1897 | if (!cic) | ||
1898 | goto queue_fail; | ||
1899 | |||
1900 | if (!cic->cfqq) { | ||
1901 | cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask); | ||
1902 | if (!cfqq) | ||
1903 | goto queue_fail; | ||
1904 | |||
1905 | cic->cfqq = cfqq; | ||
1906 | } else | ||
1907 | cfqq = cic->cfqq; | ||
1908 | |||
1909 | cfqq->allocated[rw]++; | ||
1910 | cfq_clear_cfqq_must_alloc(cfqq); | ||
1911 | cfqd->rq_starved = 0; | ||
1912 | atomic_inc(&cfqq->ref); | ||
1913 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1914 | |||
1915 | crq = mempool_alloc(cfqd->crq_pool, gfp_mask); | ||
1916 | if (crq) { | ||
1917 | RB_CLEAR(&crq->rb_node); | ||
1918 | crq->rb_key = 0; | ||
1919 | crq->request = rq; | ||
1920 | INIT_HLIST_NODE(&crq->hash); | ||
1921 | crq->cfq_queue = cfqq; | ||
1922 | crq->io_context = cic; | ||
1923 | |||
1924 | if (rw == READ || process_sync(tsk)) | ||
1925 | cfq_mark_crq_is_sync(crq); | ||
1926 | else | ||
1927 | cfq_clear_crq_is_sync(crq); | ||
1928 | |||
1929 | rq->elevator_private = crq; | ||
1930 | return 0; | ||
1931 | } | ||
1932 | |||
1933 | spin_lock_irqsave(q->queue_lock, flags); | ||
1934 | cfqq->allocated[rw]--; | ||
1935 | if (!(cfqq->allocated[0] + cfqq->allocated[1])) | ||
1936 | cfq_mark_cfqq_must_alloc(cfqq); | ||
1937 | cfq_put_queue(cfqq); | ||
1938 | queue_fail: | ||
1939 | if (cic) | ||
1940 | put_io_context(cic->ioc); | ||
1941 | /* | ||
1942 | * mark us rq allocation starved. we need to kickstart the process | ||
1943 | * ourselves if there are no pending requests that can do it for us. | ||
1944 | * that would be an extremely rare OOM situation | ||
1945 | */ | ||
1946 | cfqd->rq_starved = 1; | ||
1947 | cfq_schedule_dispatch(cfqd); | ||
1948 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1949 | return 1; | ||
1950 | } | ||
1951 | |||
1952 | static void cfq_kick_queue(void *data) | ||
1953 | { | ||
1954 | request_queue_t *q = data; | ||
1955 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
1956 | unsigned long flags; | ||
1957 | |||
1958 | spin_lock_irqsave(q->queue_lock, flags); | ||
1959 | |||
1960 | if (cfqd->rq_starved) { | ||
1961 | struct request_list *rl = &q->rq; | ||
1962 | |||
1963 | /* | ||
1964 | * we aren't guaranteed to get a request after this, but we | ||
1965 | * have to be opportunistic | ||
1966 | */ | ||
1967 | smp_mb(); | ||
1968 | if (waitqueue_active(&rl->wait[READ])) | ||
1969 | wake_up(&rl->wait[READ]); | ||
1970 | if (waitqueue_active(&rl->wait[WRITE])) | ||
1971 | wake_up(&rl->wait[WRITE]); | ||
1972 | } | ||
1973 | |||
1974 | blk_remove_plug(q); | ||
1975 | q->request_fn(q); | ||
1976 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1977 | } | ||
1978 | |||
1979 | /* | ||
1980 | * Timer running if the active_queue is currently idling inside its time slice | ||
1981 | */ | ||
1982 | static void cfq_idle_slice_timer(unsigned long data) | ||
1983 | { | ||
1984 | struct cfq_data *cfqd = (struct cfq_data *) data; | ||
1985 | struct cfq_queue *cfqq; | ||
1986 | unsigned long flags; | ||
1987 | |||
1988 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
1989 | |||
1990 | if ((cfqq = cfqd->active_queue) != NULL) { | ||
1991 | unsigned long now = jiffies; | ||
1992 | |||
1993 | /* | ||
1994 | * expired | ||
1995 | */ | ||
1996 | if (time_after(now, cfqq->slice_end)) | ||
1997 | goto expire; | ||
1998 | |||
1999 | /* | ||
2000 | * only expire and reinvoke request handler, if there are | ||
2001 | * other queues with pending requests | ||
2002 | */ | ||
2003 | if (!cfqd->busy_queues) { | ||
2004 | cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end); | ||
2005 | add_timer(&cfqd->idle_slice_timer); | ||
2006 | goto out_cont; | ||
2007 | } | ||
2008 | |||
2009 | /* | ||
2010 | * not expired and it has a request pending, let it dispatch | ||
2011 | */ | ||
2012 | if (!RB_EMPTY(&cfqq->sort_list)) { | ||
2013 | cfq_mark_cfqq_must_dispatch(cfqq); | ||
2014 | goto out_kick; | ||
2015 | } | ||
2016 | } | ||
2017 | expire: | ||
2018 | cfq_slice_expired(cfqd, 0); | ||
2019 | out_kick: | ||
2020 | cfq_schedule_dispatch(cfqd); | ||
2021 | out_cont: | ||
2022 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
2023 | } | ||
2024 | |||
2025 | /* | ||
2026 | * Timer running if an idle class queue is waiting for service | ||
2027 | */ | ||
2028 | static void cfq_idle_class_timer(unsigned long data) | ||
2029 | { | ||
2030 | struct cfq_data *cfqd = (struct cfq_data *) data; | ||
2031 | unsigned long flags, end; | ||
2032 | |||
2033 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
2034 | |||
2035 | /* | ||
2036 | * race with a non-idle queue, reset timer | ||
2037 | */ | ||
2038 | end = cfqd->last_end_request + CFQ_IDLE_GRACE; | ||
2039 | if (!time_after_eq(jiffies, end)) { | ||
2040 | cfqd->idle_class_timer.expires = end; | ||
2041 | add_timer(&cfqd->idle_class_timer); | ||
2042 | } else | ||
2043 | cfq_schedule_dispatch(cfqd); | ||
2044 | |||
2045 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
2046 | } | ||
2047 | |||
2048 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) | ||
2049 | { | ||
2050 | del_timer_sync(&cfqd->idle_slice_timer); | ||
2051 | del_timer_sync(&cfqd->idle_class_timer); | ||
2052 | blk_sync_queue(cfqd->queue); | ||
2053 | } | ||
2054 | |||
2055 | static void cfq_put_cfqd(struct cfq_data *cfqd) | ||
2056 | { | ||
2057 | request_queue_t *q = cfqd->queue; | ||
2058 | |||
2059 | if (!atomic_dec_and_test(&cfqd->ref)) | ||
2060 | return; | ||
2061 | |||
2062 | cfq_shutdown_timer_wq(cfqd); | ||
2063 | blk_put_queue(q); | ||
2064 | |||
2065 | mempool_destroy(cfqd->crq_pool); | ||
2066 | kfree(cfqd->crq_hash); | ||
2067 | kfree(cfqd->cfq_hash); | ||
2068 | kfree(cfqd); | ||
2069 | } | ||
2070 | |||
2071 | static void cfq_exit_queue(elevator_t *e) | ||
2072 | { | ||
2073 | struct cfq_data *cfqd = e->elevator_data; | ||
2074 | |||
2075 | cfq_shutdown_timer_wq(cfqd); | ||
2076 | cfq_put_cfqd(cfqd); | ||
2077 | } | ||
2078 | |||
2079 | static int cfq_init_queue(request_queue_t *q, elevator_t *e) | ||
2080 | { | ||
2081 | struct cfq_data *cfqd; | ||
2082 | int i; | ||
2083 | |||
2084 | cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); | ||
2085 | if (!cfqd) | ||
2086 | return -ENOMEM; | ||
2087 | |||
2088 | memset(cfqd, 0, sizeof(*cfqd)); | ||
2089 | |||
2090 | for (i = 0; i < CFQ_PRIO_LISTS; i++) | ||
2091 | INIT_LIST_HEAD(&cfqd->rr_list[i]); | ||
2092 | |||
2093 | INIT_LIST_HEAD(&cfqd->busy_rr); | ||
2094 | INIT_LIST_HEAD(&cfqd->cur_rr); | ||
2095 | INIT_LIST_HEAD(&cfqd->idle_rr); | ||
2096 | INIT_LIST_HEAD(&cfqd->empty_list); | ||
2097 | |||
2098 | cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); | ||
2099 | if (!cfqd->crq_hash) | ||
2100 | goto out_crqhash; | ||
2101 | |||
2102 | cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); | ||
2103 | if (!cfqd->cfq_hash) | ||
2104 | goto out_cfqhash; | ||
2105 | |||
2106 | cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool); | ||
2107 | if (!cfqd->crq_pool) | ||
2108 | goto out_crqpool; | ||
2109 | |||
2110 | for (i = 0; i < CFQ_MHASH_ENTRIES; i++) | ||
2111 | INIT_HLIST_HEAD(&cfqd->crq_hash[i]); | ||
2112 | for (i = 0; i < CFQ_QHASH_ENTRIES; i++) | ||
2113 | INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); | ||
2114 | |||
2115 | e->elevator_data = cfqd; | ||
2116 | |||
2117 | cfqd->queue = q; | ||
2118 | atomic_inc(&q->refcnt); | ||
2119 | |||
2120 | cfqd->max_queued = q->nr_requests / 4; | ||
2121 | q->nr_batching = cfq_queued; | ||
2122 | |||
2123 | init_timer(&cfqd->idle_slice_timer); | ||
2124 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; | ||
2125 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; | ||
2126 | |||
2127 | init_timer(&cfqd->idle_class_timer); | ||
2128 | cfqd->idle_class_timer.function = cfq_idle_class_timer; | ||
2129 | cfqd->idle_class_timer.data = (unsigned long) cfqd; | ||
2130 | |||
2131 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); | ||
2132 | |||
2133 | atomic_set(&cfqd->ref, 1); | ||
2134 | |||
2135 | cfqd->cfq_queued = cfq_queued; | ||
2136 | cfqd->cfq_quantum = cfq_quantum; | ||
2137 | cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; | ||
2138 | cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; | ||
2139 | cfqd->cfq_back_max = cfq_back_max; | ||
2140 | cfqd->cfq_back_penalty = cfq_back_penalty; | ||
2141 | cfqd->cfq_slice[0] = cfq_slice_async; | ||
2142 | cfqd->cfq_slice[1] = cfq_slice_sync; | ||
2143 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; | ||
2144 | cfqd->cfq_slice_idle = cfq_slice_idle; | ||
2145 | cfqd->cfq_max_depth = cfq_max_depth; | ||
2146 | |||
2147 | return 0; | ||
2148 | out_crqpool: | ||
2149 | kfree(cfqd->cfq_hash); | ||
2150 | out_cfqhash: | ||
2151 | kfree(cfqd->crq_hash); | ||
2152 | out_crqhash: | ||
2153 | kfree(cfqd); | ||
2154 | return -ENOMEM; | ||
2155 | } | ||
2156 | |||
2157 | static void cfq_slab_kill(void) | ||
2158 | { | ||
2159 | if (crq_pool) | ||
2160 | kmem_cache_destroy(crq_pool); | ||
2161 | if (cfq_pool) | ||
2162 | kmem_cache_destroy(cfq_pool); | ||
2163 | if (cfq_ioc_pool) | ||
2164 | kmem_cache_destroy(cfq_ioc_pool); | ||
2165 | } | ||
2166 | |||
2167 | static int __init cfq_slab_setup(void) | ||
2168 | { | ||
2169 | crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, | ||
2170 | NULL, NULL); | ||
2171 | if (!crq_pool) | ||
2172 | goto fail; | ||
2173 | |||
2174 | cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, | ||
2175 | NULL, NULL); | ||
2176 | if (!cfq_pool) | ||
2177 | goto fail; | ||
2178 | |||
2179 | cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool", | ||
2180 | sizeof(struct cfq_io_context), 0, 0, NULL, NULL); | ||
2181 | if (!cfq_ioc_pool) | ||
2182 | goto fail; | ||
2183 | |||
2184 | return 0; | ||
2185 | fail: | ||
2186 | cfq_slab_kill(); | ||
2187 | return -ENOMEM; | ||
2188 | } | ||
2189 | |||
2190 | /* | ||
2191 | * sysfs parts below --> | ||
2192 | */ | ||
2193 | struct cfq_fs_entry { | ||
2194 | struct attribute attr; | ||
2195 | ssize_t (*show)(struct cfq_data *, char *); | ||
2196 | ssize_t (*store)(struct cfq_data *, const char *, size_t); | ||
2197 | }; | ||
2198 | |||
2199 | static ssize_t | ||
2200 | cfq_var_show(unsigned int var, char *page) | ||
2201 | { | ||
2202 | return sprintf(page, "%d\n", var); | ||
2203 | } | ||
2204 | |||
2205 | static ssize_t | ||
2206 | cfq_var_store(unsigned int *var, const char *page, size_t count) | ||
2207 | { | ||
2208 | char *p = (char *) page; | ||
2209 | |||
2210 | *var = simple_strtoul(p, &p, 10); | ||
2211 | return count; | ||
2212 | } | ||
2213 | |||
2214 | #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ | ||
2215 | static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \ | ||
2216 | { \ | ||
2217 | unsigned int __data = __VAR; \ | ||
2218 | if (__CONV) \ | ||
2219 | __data = jiffies_to_msecs(__data); \ | ||
2220 | return cfq_var_show(__data, (page)); \ | ||
2221 | } | ||
2222 | SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); | ||
2223 | SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); | ||
2224 | SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); | ||
2225 | SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); | ||
2226 | SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0); | ||
2227 | SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0); | ||
2228 | SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); | ||
2229 | SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); | ||
2230 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); | ||
2231 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); | ||
2232 | SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0); | ||
2233 | #undef SHOW_FUNCTION | ||
2234 | |||
2235 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ | ||
2236 | static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \ | ||
2237 | { \ | ||
2238 | unsigned int __data; \ | ||
2239 | int ret = cfq_var_store(&__data, (page), count); \ | ||
2240 | if (__data < (MIN)) \ | ||
2241 | __data = (MIN); \ | ||
2242 | else if (__data > (MAX)) \ | ||
2243 | __data = (MAX); \ | ||
2244 | if (__CONV) \ | ||
2245 | *(__PTR) = msecs_to_jiffies(__data); \ | ||
2246 | else \ | ||
2247 | *(__PTR) = __data; \ | ||
2248 | return ret; \ | ||
2249 | } | ||
2250 | STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); | ||
2251 | STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); | ||
2252 | STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); | ||
2253 | STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); | ||
2254 | STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); | ||
2255 | STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); | ||
2256 | STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); | ||
2257 | STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); | ||
2258 | STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); | ||
2259 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); | ||
2260 | STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0); | ||
2261 | #undef STORE_FUNCTION | ||
2262 | |||
2263 | static struct cfq_fs_entry cfq_quantum_entry = { | ||
2264 | .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR }, | ||
2265 | .show = cfq_quantum_show, | ||
2266 | .store = cfq_quantum_store, | ||
2267 | }; | ||
2268 | static struct cfq_fs_entry cfq_queued_entry = { | ||
2269 | .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR }, | ||
2270 | .show = cfq_queued_show, | ||
2271 | .store = cfq_queued_store, | ||
2272 | }; | ||
2273 | static struct cfq_fs_entry cfq_fifo_expire_sync_entry = { | ||
2274 | .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR }, | ||
2275 | .show = cfq_fifo_expire_sync_show, | ||
2276 | .store = cfq_fifo_expire_sync_store, | ||
2277 | }; | ||
2278 | static struct cfq_fs_entry cfq_fifo_expire_async_entry = { | ||
2279 | .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR }, | ||
2280 | .show = cfq_fifo_expire_async_show, | ||
2281 | .store = cfq_fifo_expire_async_store, | ||
2282 | }; | ||
2283 | static struct cfq_fs_entry cfq_back_max_entry = { | ||
2284 | .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR }, | ||
2285 | .show = cfq_back_max_show, | ||
2286 | .store = cfq_back_max_store, | ||
2287 | }; | ||
2288 | static struct cfq_fs_entry cfq_back_penalty_entry = { | ||
2289 | .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR }, | ||
2290 | .show = cfq_back_penalty_show, | ||
2291 | .store = cfq_back_penalty_store, | ||
2292 | }; | ||
2293 | static struct cfq_fs_entry cfq_slice_sync_entry = { | ||
2294 | .attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR }, | ||
2295 | .show = cfq_slice_sync_show, | ||
2296 | .store = cfq_slice_sync_store, | ||
2297 | }; | ||
2298 | static struct cfq_fs_entry cfq_slice_async_entry = { | ||
2299 | .attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR }, | ||
2300 | .show = cfq_slice_async_show, | ||
2301 | .store = cfq_slice_async_store, | ||
2302 | }; | ||
2303 | static struct cfq_fs_entry cfq_slice_async_rq_entry = { | ||
2304 | .attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR }, | ||
2305 | .show = cfq_slice_async_rq_show, | ||
2306 | .store = cfq_slice_async_rq_store, | ||
2307 | }; | ||
2308 | static struct cfq_fs_entry cfq_slice_idle_entry = { | ||
2309 | .attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR }, | ||
2310 | .show = cfq_slice_idle_show, | ||
2311 | .store = cfq_slice_idle_store, | ||
2312 | }; | ||
2313 | static struct cfq_fs_entry cfq_max_depth_entry = { | ||
2314 | .attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR }, | ||
2315 | .show = cfq_max_depth_show, | ||
2316 | .store = cfq_max_depth_store, | ||
2317 | }; | ||
2318 | |||
2319 | static struct attribute *default_attrs[] = { | ||
2320 | &cfq_quantum_entry.attr, | ||
2321 | &cfq_queued_entry.attr, | ||
2322 | &cfq_fifo_expire_sync_entry.attr, | ||
2323 | &cfq_fifo_expire_async_entry.attr, | ||
2324 | &cfq_back_max_entry.attr, | ||
2325 | &cfq_back_penalty_entry.attr, | ||
2326 | &cfq_slice_sync_entry.attr, | ||
2327 | &cfq_slice_async_entry.attr, | ||
2328 | &cfq_slice_async_rq_entry.attr, | ||
2329 | &cfq_slice_idle_entry.attr, | ||
2330 | &cfq_max_depth_entry.attr, | ||
2331 | NULL, | ||
2332 | }; | ||
2333 | |||
2334 | #define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr) | ||
2335 | |||
2336 | static ssize_t | ||
2337 | cfq_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | ||
2338 | { | ||
2339 | elevator_t *e = container_of(kobj, elevator_t, kobj); | ||
2340 | struct cfq_fs_entry *entry = to_cfq(attr); | ||
2341 | |||
2342 | if (!entry->show) | ||
2343 | return -EIO; | ||
2344 | |||
2345 | return entry->show(e->elevator_data, page); | ||
2346 | } | ||
2347 | |||
2348 | static ssize_t | ||
2349 | cfq_attr_store(struct kobject *kobj, struct attribute *attr, | ||
2350 | const char *page, size_t length) | ||
2351 | { | ||
2352 | elevator_t *e = container_of(kobj, elevator_t, kobj); | ||
2353 | struct cfq_fs_entry *entry = to_cfq(attr); | ||
2354 | |||
2355 | if (!entry->store) | ||
2356 | return -EIO; | ||
2357 | |||
2358 | return entry->store(e->elevator_data, page, length); | ||
2359 | } | ||
2360 | |||
2361 | static struct sysfs_ops cfq_sysfs_ops = { | ||
2362 | .show = cfq_attr_show, | ||
2363 | .store = cfq_attr_store, | ||
2364 | }; | ||
2365 | |||
2366 | static struct kobj_type cfq_ktype = { | ||
2367 | .sysfs_ops = &cfq_sysfs_ops, | ||
2368 | .default_attrs = default_attrs, | ||
2369 | }; | ||
2370 | |||
2371 | static struct elevator_type iosched_cfq = { | ||
2372 | .ops = { | ||
2373 | .elevator_merge_fn = cfq_merge, | ||
2374 | .elevator_merged_fn = cfq_merged_request, | ||
2375 | .elevator_merge_req_fn = cfq_merged_requests, | ||
2376 | .elevator_dispatch_fn = cfq_dispatch_requests, | ||
2377 | .elevator_add_req_fn = cfq_insert_request, | ||
2378 | .elevator_activate_req_fn = cfq_activate_request, | ||
2379 | .elevator_deactivate_req_fn = cfq_deactivate_request, | ||
2380 | .elevator_queue_empty_fn = cfq_queue_empty, | ||
2381 | .elevator_completed_req_fn = cfq_completed_request, | ||
2382 | .elevator_former_req_fn = cfq_former_request, | ||
2383 | .elevator_latter_req_fn = cfq_latter_request, | ||
2384 | .elevator_set_req_fn = cfq_set_request, | ||
2385 | .elevator_put_req_fn = cfq_put_request, | ||
2386 | .elevator_may_queue_fn = cfq_may_queue, | ||
2387 | .elevator_init_fn = cfq_init_queue, | ||
2388 | .elevator_exit_fn = cfq_exit_queue, | ||
2389 | }, | ||
2390 | .elevator_ktype = &cfq_ktype, | ||
2391 | .elevator_name = "cfq", | ||
2392 | .elevator_owner = THIS_MODULE, | ||
2393 | }; | ||
2394 | |||
2395 | static int __init cfq_init(void) | ||
2396 | { | ||
2397 | int ret; | ||
2398 | |||
2399 | /* | ||
2400 | * could be 0 on HZ < 1000 setups | ||
2401 | */ | ||
2402 | if (!cfq_slice_async) | ||
2403 | cfq_slice_async = 1; | ||
2404 | if (!cfq_slice_idle) | ||
2405 | cfq_slice_idle = 1; | ||
2406 | |||
2407 | if (cfq_slab_setup()) | ||
2408 | return -ENOMEM; | ||
2409 | |||
2410 | ret = elv_register(&iosched_cfq); | ||
2411 | if (ret) | ||
2412 | cfq_slab_kill(); | ||
2413 | |||
2414 | return ret; | ||
2415 | } | ||
2416 | |||
2417 | static void __exit cfq_exit(void) | ||
2418 | { | ||
2419 | elv_unregister(&iosched_cfq); | ||
2420 | cfq_slab_kill(); | ||
2421 | } | ||
2422 | |||
2423 | module_init(cfq_init); | ||
2424 | module_exit(cfq_exit); | ||
2425 | |||
2426 | MODULE_AUTHOR("Jens Axboe"); | ||
2427 | MODULE_LICENSE("GPL"); | ||
2428 | MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); | ||