aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/cfq-iosched.c871
-rw-r--r--block/elevator.c24
-rw-r--r--block/genhd.c14
-rw-r--r--block/ll_rw_blk.c7
4 files changed, 471 insertions, 445 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b6491c020f26..64df3fa303b0 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -9,7 +9,6 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
11#include <linux/elevator.h> 11#include <linux/elevator.h>
12#include <linux/hash.h>
13#include <linux/rbtree.h> 12#include <linux/rbtree.h>
14#include <linux/ioprio.h> 13#include <linux/ioprio.h>
15 14
@@ -26,19 +25,17 @@ static int cfq_slice_async = HZ / 25;
26static const int cfq_slice_async_rq = 2; 25static const int cfq_slice_async_rq = 2;
27static int cfq_slice_idle = HZ / 125; 26static int cfq_slice_idle = HZ / 125;
28 27
28/*
29 * grace period before allowing idle class to get disk access
30 */
29#define CFQ_IDLE_GRACE (HZ / 10) 31#define CFQ_IDLE_GRACE (HZ / 10)
30#define CFQ_SLICE_SCALE (5)
31
32#define CFQ_KEY_ASYNC (0)
33 32
34/* 33/*
35 * for the hash of cfqq inside the cfqd 34 * below this threshold, we consider thinktime immediate
36 */ 35 */
37#define CFQ_QHASH_SHIFT 6 36#define CFQ_MIN_TT (2)
38#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
39#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
40 37
41#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) 38#define CFQ_SLICE_SCALE (5)
42 39
43#define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private) 40#define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private)
44#define RQ_CFQQ(rq) ((rq)->elevator_private2) 41#define RQ_CFQQ(rq) ((rq)->elevator_private2)
@@ -56,17 +53,21 @@ static struct completion *ioc_gone;
56#define ASYNC (0) 53#define ASYNC (0)
57#define SYNC (1) 54#define SYNC (1)
58 55
59#define cfq_cfqq_dispatched(cfqq) \
60 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])
61
62#define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)
63
64#define cfq_cfqq_sync(cfqq) \
65 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
66
67#define sample_valid(samples) ((samples) > 80) 56#define sample_valid(samples) ((samples) > 80)
68 57
69/* 58/*
59 * Most of our rbtree usage is for sorting with min extraction, so
60 * if we cache the leftmost node we don't have to walk down the tree
61 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
62 * move this into the elevator for the rq sorting as well.
63 */
64struct cfq_rb_root {
65 struct rb_root rb;
66 struct rb_node *left;
67};
68#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }
69
70/*
70 * Per block device queue structure 71 * Per block device queue structure
71 */ 72 */
72struct cfq_data { 73struct cfq_data {
@@ -75,18 +76,11 @@ struct cfq_data {
75 /* 76 /*
76 * rr list of queues with requests and the count of them 77 * rr list of queues with requests and the count of them
77 */ 78 */
78 struct list_head rr_list[CFQ_PRIO_LISTS]; 79 struct cfq_rb_root service_tree;
79 struct list_head busy_rr;
80 struct list_head cur_rr;
81 struct list_head idle_rr;
82 unsigned int busy_queues; 80 unsigned int busy_queues;
83 81
84 /*
85 * cfqq lookup hash
86 */
87 struct hlist_head *cfq_hash;
88
89 int rq_in_driver; 82 int rq_in_driver;
83 int sync_flight;
90 int hw_tag; 84 int hw_tag;
91 85
92 /* 86 /*
@@ -97,12 +91,10 @@ struct cfq_data {
97 91
98 struct cfq_queue *active_queue; 92 struct cfq_queue *active_queue;
99 struct cfq_io_context *active_cic; 93 struct cfq_io_context *active_cic;
100 int cur_prio, cur_end_prio;
101 unsigned int dispatch_slice;
102 94
103 struct timer_list idle_class_timer; 95 struct timer_list idle_class_timer;
104 96
105 sector_t last_sector; 97 sector_t last_position;
106 unsigned long last_end_request; 98 unsigned long last_end_request;
107 99
108 /* 100 /*
@@ -117,6 +109,9 @@ struct cfq_data {
117 unsigned int cfq_slice_idle; 109 unsigned int cfq_slice_idle;
118 110
119 struct list_head cic_list; 111 struct list_head cic_list;
112
113 sector_t new_seek_mean;
114 u64 new_seek_total;
120}; 115};
121 116
122/* 117/*
@@ -127,12 +122,10 @@ struct cfq_queue {
127 atomic_t ref; 122 atomic_t ref;
128 /* parent cfq_data */ 123 /* parent cfq_data */
129 struct cfq_data *cfqd; 124 struct cfq_data *cfqd;
130 /* cfqq lookup hash */ 125 /* service_tree member */
131 struct hlist_node cfq_hash; 126 struct rb_node rb_node;
132 /* hash key */ 127 /* service_tree key */
133 unsigned int key; 128 unsigned long rb_key;
134 /* member of the rr/busy/cur/idle cfqd list */
135 struct list_head cfq_list;
136 /* sorted list of pending requests */ 129 /* sorted list of pending requests */
137 struct rb_root sort_list; 130 struct rb_root sort_list;
138 /* if fifo isn't expired, next request to serve */ 131 /* if fifo isn't expired, next request to serve */
@@ -147,11 +140,10 @@ struct cfq_queue {
147 struct list_head fifo; 140 struct list_head fifo;
148 141
149 unsigned long slice_end; 142 unsigned long slice_end;
150 unsigned long service_last;
151 long slice_resid; 143 long slice_resid;
152 144
153 /* number of requests that are on the dispatch list */ 145 /* number of requests that are on the dispatch list or inside driver */
154 int on_dispatch[2]; 146 int dispatched;
155 147
156 /* io prio of this group */ 148 /* io prio of this group */
157 unsigned short ioprio, org_ioprio; 149 unsigned short ioprio, org_ioprio;
@@ -159,6 +151,8 @@ struct cfq_queue {
159 151
160 /* various state flags, see below */ 152 /* various state flags, see below */
161 unsigned int flags; 153 unsigned int flags;
154
155 sector_t last_request_pos;
162}; 156};
163 157
164enum cfqq_state_flags { 158enum cfqq_state_flags {
@@ -172,6 +166,7 @@ enum cfqq_state_flags {
172 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 166 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
173 CFQ_CFQQ_FLAG_queue_new, /* queue never been serviced */ 167 CFQ_CFQQ_FLAG_queue_new, /* queue never been serviced */
174 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 168 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
169 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
175}; 170};
176 171
177#define CFQ_CFQQ_FNS(name) \ 172#define CFQ_CFQQ_FNS(name) \
@@ -198,11 +193,38 @@ CFQ_CFQQ_FNS(idle_window);
198CFQ_CFQQ_FNS(prio_changed); 193CFQ_CFQQ_FNS(prio_changed);
199CFQ_CFQQ_FNS(queue_new); 194CFQ_CFQQ_FNS(queue_new);
200CFQ_CFQQ_FNS(slice_new); 195CFQ_CFQQ_FNS(slice_new);
196CFQ_CFQQ_FNS(sync);
201#undef CFQ_CFQQ_FNS 197#undef CFQ_CFQQ_FNS
202 198
203static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
204static void cfq_dispatch_insert(request_queue_t *, struct request *); 199static void cfq_dispatch_insert(request_queue_t *, struct request *);
205static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); 200static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
201 struct task_struct *, gfp_t);
202static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *,
203 struct io_context *);
204
205static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
206 int is_sync)
207{
208 return cic->cfqq[!!is_sync];
209}
210
211static inline void cic_set_cfqq(struct cfq_io_context *cic,
212 struct cfq_queue *cfqq, int is_sync)
213{
214 cic->cfqq[!!is_sync] = cfqq;
215}
216
217/*
218 * We regard a request as SYNC, if it's either a read or has the SYNC bit
219 * set (in which case it could also be direct WRITE).
220 */
221static inline int cfq_bio_sync(struct bio *bio)
222{
223 if (bio_data_dir(bio) == READ || bio_sync(bio))
224 return 1;
225
226 return 0;
227}
206 228
207/* 229/*
208 * scheduler run of queue, if there are requests pending and no one in the 230 * scheduler run of queue, if there are requests pending and no one in the
@@ -221,44 +243,31 @@ static int cfq_queue_empty(request_queue_t *q)
221 return !cfqd->busy_queues; 243 return !cfqd->busy_queues;
222} 244}
223 245
224static inline pid_t cfq_queue_pid(struct task_struct *task, int rw, int is_sync)
225{
226 /*
227 * Use the per-process queue, for read requests and syncronous writes
228 */
229 if (!(rw & REQ_RW) || is_sync)
230 return task->pid;
231
232 return CFQ_KEY_ASYNC;
233}
234
235/* 246/*
236 * Scale schedule slice based on io priority. Use the sync time slice only 247 * Scale schedule slice based on io priority. Use the sync time slice only
237 * if a queue is marked sync and has sync io queued. A sync queue with async 248 * if a queue is marked sync and has sync io queued. A sync queue with async
238 * io only, should not get full sync slice length. 249 * io only, should not get full sync slice length.
239 */ 250 */
240static inline int 251static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
241cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 252 unsigned short prio)
242{ 253{
243 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; 254 const int base_slice = cfqd->cfq_slice[sync];
244 255
245 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 256 WARN_ON(prio >= IOPRIO_BE_NR);
257
258 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
259}
246 260
247 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); 261static inline int
262cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
263{
264 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
248} 265}
249 266
250static inline void 267static inline void
251cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 268cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
252{ 269{
253 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 270 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
254 cfqq->slice_end += cfqq->slice_resid;
255
256 /*
257 * Don't carry over residual for more than one slice, we only want
258 * to slightly correct the fairness. Carrying over forever would
259 * easily introduce oscillations.
260 */
261 cfqq->slice_resid = 0;
262} 271}
263 272
264/* 273/*
@@ -307,7 +316,7 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
307 s1 = rq1->sector; 316 s1 = rq1->sector;
308 s2 = rq2->sector; 317 s2 = rq2->sector;
309 318
310 last = cfqd->last_sector; 319 last = cfqd->last_position;
311 320
312 /* 321 /*
313 * by definition, 1KiB is 2 sectors 322 * by definition, 1KiB is 2 sectors
@@ -372,6 +381,26 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
372} 381}
373 382
374/* 383/*
384 * The below is leftmost cache rbtree addon
385 */
386static struct rb_node *cfq_rb_first(struct cfq_rb_root *root)
387{
388 if (!root->left)
389 root->left = rb_first(&root->rb);
390
391 return root->left;
392}
393
394static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
395{
396 if (root->left == n)
397 root->left = NULL;
398
399 rb_erase(n, &root->rb);
400 RB_CLEAR_NODE(n);
401}
402
403/*
375 * would be nice to take fifo expire time into account as well 404 * would be nice to take fifo expire time into account as well
376 */ 405 */
377static struct request * 406static struct request *
@@ -398,78 +427,96 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
398 return cfq_choose_req(cfqd, next, prev); 427 return cfq_choose_req(cfqd, next, prev);
399} 428}
400 429
401static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 430static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
431 struct cfq_queue *cfqq)
402{ 432{
403 struct cfq_data *cfqd = cfqq->cfqd;
404 struct list_head *list, *n;
405 struct cfq_queue *__cfqq;
406
407 /* 433 /*
408 * Resorting requires the cfqq to be on the RR list already. 434 * just an approximation, should be ok.
409 */ 435 */
410 if (!cfq_cfqq_on_rr(cfqq)) 436 return (cfqd->busy_queues - 1) * (cfq_prio_slice(cfqd, 1, 0) -
411 return; 437 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
438}
412 439
413 list_del(&cfqq->cfq_list); 440/*
441 * The cfqd->service_tree holds all pending cfq_queue's that have
442 * requests waiting to be processed. It is sorted in the order that
443 * we will service the queues.
444 */
445static void cfq_service_tree_add(struct cfq_data *cfqd,
446 struct cfq_queue *cfqq, int add_front)
447{
448 struct rb_node **p = &cfqd->service_tree.rb.rb_node;
449 struct rb_node *parent = NULL;
450 unsigned long rb_key;
451 int left;
452
453 if (!add_front) {
454 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
455 rb_key += cfqq->slice_resid;
456 cfqq->slice_resid = 0;
457 } else
458 rb_key = 0;
414 459
415 if (cfq_class_rt(cfqq)) 460 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
416 list = &cfqd->cur_rr;
417 else if (cfq_class_idle(cfqq))
418 list = &cfqd->idle_rr;
419 else {
420 /* 461 /*
421 * if cfqq has requests in flight, don't allow it to be 462 * same position, nothing more to do
422 * found in cfq_set_active_queue before it has finished them.
423 * this is done to increase fairness between a process that
424 * has lots of io pending vs one that only generates one
425 * sporadically or synchronously
426 */ 463 */
427 if (cfq_cfqq_dispatched(cfqq)) 464 if (rb_key == cfqq->rb_key)
428 list = &cfqd->busy_rr; 465 return;
429 else 466
430 list = &cfqd->rr_list[cfqq->ioprio]; 467 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
431 } 468 }
432 469
433 if (preempted || cfq_cfqq_queue_new(cfqq)) { 470 left = 1;
434 /* 471 while (*p) {
435 * If this queue was preempted or is new (never been serviced), 472 struct cfq_queue *__cfqq;
436 * let it be added first for fairness but beind other new 473 struct rb_node **n;
437 * queues. 474
438 */ 475 parent = *p;
439 n = list; 476 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
440 while (n->next != list) {
441 __cfqq = list_entry_cfqq(n->next);
442 if (!cfq_cfqq_queue_new(__cfqq))
443 break;
444 477
445 n = n->next;
446 }
447 list_add_tail(&cfqq->cfq_list, n);
448 } else if (!cfq_cfqq_class_sync(cfqq)) {
449 /*
450 * async queue always goes to the end. this wont be overly
451 * unfair to writes, as the sort of the sync queue wont be
452 * allowed to pass the async queue again.
453 */
454 list_add_tail(&cfqq->cfq_list, list);
455 } else {
456 /* 478 /*
457 * sort by last service, but don't cross a new or async 479 * sort RT queues first, we always want to give
458 * queue. we don't cross a new queue because it hasn't been 480 * preference to them. IDLE queues goes to the back.
459 * service before, and we don't cross an async queue because 481 * after that, sort on the next service time.
460 * it gets added to the end on expire.
461 */ 482 */
462 n = list; 483 if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq))
463 while ((n = n->prev) != list) { 484 n = &(*p)->rb_left;
464 struct cfq_queue *__cfqq = list_entry_cfqq(n); 485 else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq))
486 n = &(*p)->rb_right;
487 else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq))
488 n = &(*p)->rb_left;
489 else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
490 n = &(*p)->rb_right;
491 else if (rb_key < __cfqq->rb_key)
492 n = &(*p)->rb_left;
493 else
494 n = &(*p)->rb_right;
465 495
466 if (!cfq_cfqq_class_sync(cfqq) || !__cfqq->service_last) 496 if (n == &(*p)->rb_right)
467 break; 497 left = 0;
468 if (time_before(__cfqq->service_last, cfqq->service_last)) 498
469 break; 499 p = n;
470 }
471 list_add(&cfqq->cfq_list, n);
472 } 500 }
501
502 if (left)
503 cfqd->service_tree.left = &cfqq->rb_node;
504
505 cfqq->rb_key = rb_key;
506 rb_link_node(&cfqq->rb_node, parent, p);
507 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
508}
509
510/*
511 * Update cfqq's position in the service tree.
512 */
513static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
514{
515 /*
516 * Resorting requires the cfqq to be on the RR list already.
517 */
518 if (cfq_cfqq_on_rr(cfqq))
519 cfq_service_tree_add(cfqd, cfqq, 0);
473} 520}
474 521
475/* 522/*
@@ -483,15 +530,21 @@ cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
483 cfq_mark_cfqq_on_rr(cfqq); 530 cfq_mark_cfqq_on_rr(cfqq);
484 cfqd->busy_queues++; 531 cfqd->busy_queues++;
485 532
486 cfq_resort_rr_list(cfqq, 0); 533 cfq_resort_rr_list(cfqd, cfqq);
487} 534}
488 535
536/*
537 * Called when the cfqq no longer has requests pending, remove it from
538 * the service tree.
539 */
489static inline void 540static inline void
490cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 541cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
491{ 542{
492 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 543 BUG_ON(!cfq_cfqq_on_rr(cfqq));
493 cfq_clear_cfqq_on_rr(cfqq); 544 cfq_clear_cfqq_on_rr(cfqq);
494 list_del_init(&cfqq->cfq_list); 545
546 if (!RB_EMPTY_NODE(&cfqq->rb_node))
547 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
495 548
496 BUG_ON(!cfqd->busy_queues); 549 BUG_ON(!cfqd->busy_queues);
497 cfqd->busy_queues--; 550 cfqd->busy_queues--;
@@ -532,6 +585,12 @@ static void cfq_add_rq_rb(struct request *rq)
532 585
533 if (!cfq_cfqq_on_rr(cfqq)) 586 if (!cfq_cfqq_on_rr(cfqq))
534 cfq_add_cfqq_rr(cfqd, cfqq); 587 cfq_add_cfqq_rr(cfqd, cfqq);
588
589 /*
590 * check if this request is a better next-serve candidate
591 */
592 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
593 BUG_ON(!cfqq->next_rq);
535} 594}
536 595
537static inline void 596static inline void
@@ -546,10 +605,14 @@ static struct request *
546cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) 605cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
547{ 606{
548 struct task_struct *tsk = current; 607 struct task_struct *tsk = current;
549 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio), bio_sync(bio)); 608 struct cfq_io_context *cic;
550 struct cfq_queue *cfqq; 609 struct cfq_queue *cfqq;
551 610
552 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); 611 cic = cfq_cic_rb_lookup(cfqd, tsk->io_context);
612 if (!cic)
613 return NULL;
614
615 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
553 if (cfqq) { 616 if (cfqq) {
554 sector_t sector = bio->bi_sector + bio_sectors(bio); 617 sector_t sector = bio->bi_sector + bio_sectors(bio);
555 618
@@ -573,6 +636,8 @@ static void cfq_activate_request(request_queue_t *q, struct request *rq)
573 */ 636 */
574 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) 637 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
575 cfqd->hw_tag = 1; 638 cfqd->hw_tag = 1;
639
640 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
576} 641}
577 642
578static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 643static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
@@ -599,8 +664,7 @@ static void cfq_remove_request(struct request *rq)
599 } 664 }
600} 665}
601 666
602static int 667static int cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
603cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
604{ 668{
605 struct cfq_data *cfqd = q->elevator->elevator_data; 669 struct cfq_data *cfqd = q->elevator->elevator_data;
606 struct request *__rq; 670 struct request *__rq;
@@ -642,23 +706,24 @@ static int cfq_allow_merge(request_queue_t *q, struct request *rq,
642 struct bio *bio) 706 struct bio *bio)
643{ 707{
644 struct cfq_data *cfqd = q->elevator->elevator_data; 708 struct cfq_data *cfqd = q->elevator->elevator_data;
645 const int rw = bio_data_dir(bio); 709 struct cfq_io_context *cic;
646 struct cfq_queue *cfqq; 710 struct cfq_queue *cfqq;
647 pid_t key;
648 711
649 /* 712 /*
650 * Disallow merge of a sync bio into an async request. 713 * Disallow merge of a sync bio into an async request.
651 */ 714 */
652 if ((bio_data_dir(bio) == READ || bio_sync(bio)) && !rq_is_sync(rq)) 715 if (cfq_bio_sync(bio) && !rq_is_sync(rq))
653 return 0; 716 return 0;
654 717
655 /* 718 /*
656 * Lookup the cfqq that this bio will be queued with. Allow 719 * Lookup the cfqq that this bio will be queued with. Allow
657 * merge only if rq is queued there. 720 * merge only if rq is queued there.
658 */ 721 */
659 key = cfq_queue_pid(current, rw, bio_sync(bio)); 722 cic = cfq_cic_rb_lookup(cfqd, current->io_context);
660 cfqq = cfq_find_cfq_hash(cfqd, key, current->ioprio); 723 if (!cic)
724 return 0;
661 725
726 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
662 if (cfqq == RQ_CFQQ(rq)) 727 if (cfqq == RQ_CFQQ(rq))
663 return 1; 728 return 1;
664 729
@@ -678,6 +743,7 @@ __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
678 cfq_clear_cfqq_must_alloc_slice(cfqq); 743 cfq_clear_cfqq_must_alloc_slice(cfqq);
679 cfq_clear_cfqq_fifo_expire(cfqq); 744 cfq_clear_cfqq_fifo_expire(cfqq);
680 cfq_mark_cfqq_slice_new(cfqq); 745 cfq_mark_cfqq_slice_new(cfqq);
746 cfq_clear_cfqq_queue_new(cfqq);
681 } 747 }
682 748
683 cfqd->active_queue = cfqq; 749 cfqd->active_queue = cfqq;
@@ -688,23 +754,21 @@ __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
688 */ 754 */
689static void 755static void
690__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 756__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
691 int preempted, int timed_out) 757 int timed_out)
692{ 758{
693 if (cfq_cfqq_wait_request(cfqq)) 759 if (cfq_cfqq_wait_request(cfqq))
694 del_timer(&cfqd->idle_slice_timer); 760 del_timer(&cfqd->idle_slice_timer);
695 761
696 cfq_clear_cfqq_must_dispatch(cfqq); 762 cfq_clear_cfqq_must_dispatch(cfqq);
697 cfq_clear_cfqq_wait_request(cfqq); 763 cfq_clear_cfqq_wait_request(cfqq);
698 cfq_clear_cfqq_queue_new(cfqq);
699 764
700 /* 765 /*
701 * store what was left of this slice, if the queue idled out 766 * store what was left of this slice, if the queue idled/timed out
702 * or was preempted
703 */ 767 */
704 if (timed_out && !cfq_cfqq_slice_new(cfqq)) 768 if (timed_out && !cfq_cfqq_slice_new(cfqq))
705 cfqq->slice_resid = cfqq->slice_end - jiffies; 769 cfqq->slice_resid = cfqq->slice_end - jiffies;
706 770
707 cfq_resort_rr_list(cfqq, preempted); 771 cfq_resort_rr_list(cfqd, cfqq);
708 772
709 if (cfqq == cfqd->active_queue) 773 if (cfqq == cfqd->active_queue)
710 cfqd->active_queue = NULL; 774 cfqd->active_queue = NULL;
@@ -713,163 +777,152 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
713 put_io_context(cfqd->active_cic->ioc); 777 put_io_context(cfqd->active_cic->ioc);
714 cfqd->active_cic = NULL; 778 cfqd->active_cic = NULL;
715 } 779 }
716
717 cfqd->dispatch_slice = 0;
718} 780}
719 781
720static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted, 782static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
721 int timed_out)
722{ 783{
723 struct cfq_queue *cfqq = cfqd->active_queue; 784 struct cfq_queue *cfqq = cfqd->active_queue;
724 785
725 if (cfqq) 786 if (cfqq)
726 __cfq_slice_expired(cfqd, cfqq, preempted, timed_out); 787 __cfq_slice_expired(cfqd, cfqq, timed_out);
727} 788}
728 789
729/* 790/*
730 * 0 791 * Get next queue for service. Unless we have a queue preemption,
731 * 0,1 792 * we'll simply select the first cfqq in the service tree.
732 * 0,1,2
733 * 0,1,2,3
734 * 0,1,2,3,4
735 * 0,1,2,3,4,5
736 * 0,1,2,3,4,5,6
737 * 0,1,2,3,4,5,6,7
738 */ 793 */
739static int cfq_get_next_prio_level(struct cfq_data *cfqd) 794static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
740{ 795{
741 int prio, wrap; 796 struct cfq_queue *cfqq;
797 struct rb_node *n;
742 798
743 prio = -1; 799 if (RB_EMPTY_ROOT(&cfqd->service_tree.rb))
744 wrap = 0; 800 return NULL;
745 do {
746 int p;
747 801
748 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { 802 n = cfq_rb_first(&cfqd->service_tree);
749 if (!list_empty(&cfqd->rr_list[p])) { 803 cfqq = rb_entry(n, struct cfq_queue, rb_node);
750 prio = p;
751 break;
752 }
753 }
754 804
755 if (prio != -1) 805 if (cfq_class_idle(cfqq)) {
756 break; 806 unsigned long end;
757 cfqd->cur_prio = 0;
758 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
759 cfqd->cur_end_prio = 0;
760 if (wrap)
761 break;
762 wrap = 1;
763 }
764 } while (1);
765 807
766 if (unlikely(prio == -1)) 808 /*
767 return -1; 809 * if we have idle queues and no rt or be queues had
810 * pending requests, either allow immediate service if
811 * the grace period has passed or arm the idle grace
812 * timer
813 */
814 end = cfqd->last_end_request + CFQ_IDLE_GRACE;
815 if (time_before(jiffies, end)) {
816 mod_timer(&cfqd->idle_class_timer, end);
817 cfqq = NULL;
818 }
819 }
768 820
769 BUG_ON(prio >= CFQ_PRIO_LISTS); 821 return cfqq;
822}
770 823
771 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); 824/*
825 * Get and set a new active queue for service.
826 */
827static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
828{
829 struct cfq_queue *cfqq;
772 830
773 cfqd->cur_prio = prio + 1; 831 cfqq = cfq_get_next_queue(cfqd);
774 if (cfqd->cur_prio > cfqd->cur_end_prio) { 832 __cfq_set_active_queue(cfqd, cfqq);
775 cfqd->cur_end_prio = cfqd->cur_prio; 833 return cfqq;
776 cfqd->cur_prio = 0; 834}
777 }
778 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
779 cfqd->cur_prio = 0;
780 cfqd->cur_end_prio = 0;
781 }
782 835
783 return prio; 836static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
837 struct request *rq)
838{
839 if (rq->sector >= cfqd->last_position)
840 return rq->sector - cfqd->last_position;
841 else
842 return cfqd->last_position - rq->sector;
784} 843}
785 844
786static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 845static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
787{ 846{
788 struct cfq_queue *cfqq = NULL; 847 struct cfq_io_context *cic = cfqd->active_cic;
789 848
790 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) { 849 if (!sample_valid(cic->seek_samples))
791 /* 850 return 0;
792 * if current list is non-empty, grab first entry. if it is
793 * empty, get next prio level and grab first entry then if any
794 * are spliced
795 */
796 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
797 } else if (!list_empty(&cfqd->busy_rr)) {
798 /*
799 * If no new queues are available, check if the busy list has
800 * some before falling back to idle io.
801 */
802 cfqq = list_entry_cfqq(cfqd->busy_rr.next);
803 } else if (!list_empty(&cfqd->idle_rr)) {
804 /*
805 * if we have idle queues and no rt or be queues had pending
806 * requests, either allow immediate service if the grace period
807 * has passed or arm the idle grace timer
808 */
809 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
810 851
811 if (time_after_eq(jiffies, end)) 852 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
812 cfqq = list_entry_cfqq(cfqd->idle_rr.next); 853}
813 else
814 mod_timer(&cfqd->idle_class_timer, end);
815 }
816 854
817 __cfq_set_active_queue(cfqd, cfqq); 855static int cfq_close_cooperator(struct cfq_data *cfq_data,
818 return cfqq; 856 struct cfq_queue *cfqq)
857{
858 /*
859 * We should notice if some of the queues are cooperating, eg
860 * working closely on the same area of the disk. In that case,
861 * we can group them together and don't waste time idling.
862 */
863 return 0;
819} 864}
820 865
821#define CIC_SEEKY(cic) ((cic)->seek_mean > (128 * 1024)) 866#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
822 867
823static int cfq_arm_slice_timer(struct cfq_data *cfqd) 868static void cfq_arm_slice_timer(struct cfq_data *cfqd)
824{ 869{
825 struct cfq_queue *cfqq = cfqd->active_queue; 870 struct cfq_queue *cfqq = cfqd->active_queue;
826 struct cfq_io_context *cic; 871 struct cfq_io_context *cic;
827 unsigned long sl; 872 unsigned long sl;
828 873
829 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 874 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
875 WARN_ON(cfq_cfqq_slice_new(cfqq));
830 876
831 /* 877 /*
832 * idle is disabled, either manually or by past process history 878 * idle is disabled, either manually or by past process history
833 */ 879 */
834 if (!cfqd->cfq_slice_idle) 880 if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq))
835 return 0; 881 return;
836 if (!cfq_cfqq_idle_window(cfqq)) 882
837 return 0;
838 /* 883 /*
839 * task has exited, don't wait 884 * task has exited, don't wait
840 */ 885 */
841 cic = cfqd->active_cic; 886 cic = cfqd->active_cic;
842 if (!cic || !cic->ioc->task) 887 if (!cic || !cic->ioc->task)
843 return 0; 888 return;
889
890 /*
891 * See if this prio level has a good candidate
892 */
893 if (cfq_close_cooperator(cfqd, cfqq) &&
894 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
895 return;
844 896
845 cfq_mark_cfqq_must_dispatch(cfqq); 897 cfq_mark_cfqq_must_dispatch(cfqq);
846 cfq_mark_cfqq_wait_request(cfqq); 898 cfq_mark_cfqq_wait_request(cfqq);
847 899
848 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
849
850 /* 900 /*
851 * we don't want to idle for seeks, but we do want to allow 901 * we don't want to idle for seeks, but we do want to allow
852 * fair distribution of slice time for a process doing back-to-back 902 * fair distribution of slice time for a process doing back-to-back
853 * seeks. so allow a little bit of time for him to submit a new rq 903 * seeks. so allow a little bit of time for him to submit a new rq
854 */ 904 */
905 sl = cfqd->cfq_slice_idle;
855 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) 906 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
856 sl = min(sl, msecs_to_jiffies(2)); 907 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
857 908
858 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 909 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
859 return 1;
860} 910}
861 911
912/*
913 * Move request from internal lists to the request queue dispatch list.
914 */
862static void cfq_dispatch_insert(request_queue_t *q, struct request *rq) 915static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
863{ 916{
864 struct cfq_data *cfqd = q->elevator->elevator_data; 917 struct cfq_data *cfqd = q->elevator->elevator_data;
865 struct cfq_queue *cfqq = RQ_CFQQ(rq); 918 struct cfq_queue *cfqq = RQ_CFQQ(rq);
866 919
867 cfq_remove_request(rq); 920 cfq_remove_request(rq);
868 cfqq->on_dispatch[rq_is_sync(rq)]++; 921 cfqq->dispatched++;
869 elv_dispatch_sort(q, rq); 922 elv_dispatch_sort(q, rq);
870 923
871 rq = list_entry(q->queue_head.prev, struct request, queuelist); 924 if (cfq_cfqq_sync(cfqq))
872 cfqd->last_sector = rq->sector + rq->nr_sectors; 925 cfqd->sync_flight++;
873} 926}
874 927
875/* 928/*
@@ -889,13 +942,13 @@ static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
889 if (list_empty(&cfqq->fifo)) 942 if (list_empty(&cfqq->fifo))
890 return NULL; 943 return NULL;
891 944
892 fifo = cfq_cfqq_class_sync(cfqq); 945 fifo = cfq_cfqq_sync(cfqq);
893 rq = rq_entry_fifo(cfqq->fifo.next); 946 rq = rq_entry_fifo(cfqq->fifo.next);
894 947
895 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) 948 if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
896 return rq; 949 return NULL;
897 950
898 return NULL; 951 return rq;
899} 952}
900 953
901static inline int 954static inline int
@@ -909,7 +962,8 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
909} 962}
910 963
911/* 964/*
912 * get next queue for service 965 * Select a queue for service. If we have a current active queue,
966 * check whether to continue servicing it, or retrieve and set a new one.
913 */ 967 */
914static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 968static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
915{ 969{
@@ -920,33 +974,41 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
920 goto new_queue; 974 goto new_queue;
921 975
922 /* 976 /*
923 * slice has expired 977 * The active queue has run out of time, expire it and select new.
924 */ 978 */
925 if (!cfq_cfqq_must_dispatch(cfqq) && cfq_slice_used(cfqq)) 979 if (cfq_slice_used(cfqq))
926 goto expire; 980 goto expire;
927 981
928 /* 982 /*
929 * if queue has requests, dispatch one. if not, check if 983 * The active queue has requests and isn't expired, allow it to
930 * enough slice is left to wait for one 984 * dispatch.
931 */ 985 */
932 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 986 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
933 goto keep_queue; 987 goto keep_queue;
934 else if (cfq_cfqq_slice_new(cfqq) || cfq_cfqq_dispatched(cfqq)) { 988
989 /*
990 * No requests pending. If the active queue still has requests in
991 * flight or is idling for a new request, allow either of these
992 * conditions to happen (or time out) before selecting a new queue.
993 */
994 if (timer_pending(&cfqd->idle_slice_timer) ||
995 (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) {
935 cfqq = NULL; 996 cfqq = NULL;
936 goto keep_queue; 997 goto keep_queue;
937 } else if (cfq_cfqq_class_sync(cfqq)) {
938 if (cfq_arm_slice_timer(cfqd))
939 return NULL;
940 } 998 }
941 999
942expire: 1000expire:
943 cfq_slice_expired(cfqd, 0, 0); 1001 cfq_slice_expired(cfqd, 0);
944new_queue: 1002new_queue:
945 cfqq = cfq_set_active_queue(cfqd); 1003 cfqq = cfq_set_active_queue(cfqd);
946keep_queue: 1004keep_queue:
947 return cfqq; 1005 return cfqq;
948} 1006}
949 1007
1008/*
1009 * Dispatch some requests from cfqq, moving them to the request queue
1010 * dispatch list.
1011 */
950static int 1012static int
951__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1013__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
952 int max_dispatch) 1014 int max_dispatch)
@@ -969,7 +1031,6 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
969 */ 1031 */
970 cfq_dispatch_insert(cfqd->queue, rq); 1032 cfq_dispatch_insert(cfqd->queue, rq);
971 1033
972 cfqd->dispatch_slice++;
973 dispatched++; 1034 dispatched++;
974 1035
975 if (!cfqd->active_cic) { 1036 if (!cfqd->active_cic) {
@@ -986,58 +1047,55 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
986 * expire an async queue immediately if it has used up its slice. idle 1047 * expire an async queue immediately if it has used up its slice. idle
987 * queue always expire after 1 dispatch round. 1048 * queue always expire after 1 dispatch round.
988 */ 1049 */
989 if ((!cfq_cfqq_sync(cfqq) && 1050 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
990 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || 1051 dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
991 cfq_class_idle(cfqq)) { 1052 cfq_class_idle(cfqq))) {
992 cfqq->slice_end = jiffies + 1; 1053 cfqq->slice_end = jiffies + 1;
993 cfq_slice_expired(cfqd, 0, 0); 1054 cfq_slice_expired(cfqd, 0);
994 } 1055 }
995 1056
996 return dispatched; 1057 return dispatched;
997} 1058}
998 1059
999static int 1060static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
1000cfq_forced_dispatch_cfqqs(struct list_head *list)
1001{ 1061{
1002 struct cfq_queue *cfqq, *next; 1062 int dispatched = 0;
1003 int dispatched;
1004 1063
1005 dispatched = 0; 1064 while (cfqq->next_rq) {
1006 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 1065 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
1007 while (cfqq->next_rq) { 1066 dispatched++;
1008 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
1009 dispatched++;
1010 }
1011 BUG_ON(!list_empty(&cfqq->fifo));
1012 } 1067 }
1013 1068
1069 BUG_ON(!list_empty(&cfqq->fifo));
1014 return dispatched; 1070 return dispatched;
1015} 1071}
1016 1072
1017static int 1073/*
1018cfq_forced_dispatch(struct cfq_data *cfqd) 1074 * Drain our current requests. Used for barriers and when switching
1075 * io schedulers on-the-fly.
1076 */
1077static int cfq_forced_dispatch(struct cfq_data *cfqd)
1019{ 1078{
1020 int i, dispatched = 0; 1079 int dispatched = 0;
1080 struct rb_node *n;
1021 1081
1022 for (i = 0; i < CFQ_PRIO_LISTS; i++) 1082 while ((n = cfq_rb_first(&cfqd->service_tree)) != NULL) {
1023 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); 1083 struct cfq_queue *cfqq = rb_entry(n, struct cfq_queue, rb_node);
1024 1084
1025 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr); 1085 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1026 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); 1086 }
1027 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
1028 1087
1029 cfq_slice_expired(cfqd, 0, 0); 1088 cfq_slice_expired(cfqd, 0);
1030 1089
1031 BUG_ON(cfqd->busy_queues); 1090 BUG_ON(cfqd->busy_queues);
1032 1091
1033 return dispatched; 1092 return dispatched;
1034} 1093}
1035 1094
1036static int 1095static int cfq_dispatch_requests(request_queue_t *q, int force)
1037cfq_dispatch_requests(request_queue_t *q, int force)
1038{ 1096{
1039 struct cfq_data *cfqd = q->elevator->elevator_data; 1097 struct cfq_data *cfqd = q->elevator->elevator_data;
1040 struct cfq_queue *cfqq, *prev_cfqq; 1098 struct cfq_queue *cfqq;
1041 int dispatched; 1099 int dispatched;
1042 1100
1043 if (!cfqd->busy_queues) 1101 if (!cfqd->busy_queues)
@@ -1047,34 +1105,28 @@ cfq_dispatch_requests(request_queue_t *q, int force)
1047 return cfq_forced_dispatch(cfqd); 1105 return cfq_forced_dispatch(cfqd);
1048 1106
1049 dispatched = 0; 1107 dispatched = 0;
1050 prev_cfqq = NULL;
1051 while ((cfqq = cfq_select_queue(cfqd)) != NULL) { 1108 while ((cfqq = cfq_select_queue(cfqd)) != NULL) {
1052 int max_dispatch; 1109 int max_dispatch;
1053 1110
1054 /* 1111 max_dispatch = cfqd->cfq_quantum;
1055 * Don't repeat dispatch from the previous queue. 1112 if (cfq_class_idle(cfqq))
1056 */ 1113 max_dispatch = 1;
1057 if (prev_cfqq == cfqq)
1058 break;
1059 1114
1060 /* 1115 if (cfqq->dispatched >= max_dispatch) {
1061 * So we have dispatched before in this round, if the 1116 if (cfqd->busy_queues > 1)
1062 * next queue has idling enabled (must be sync), don't 1117 break;
1063 * allow it service until the previous have continued. 1118 if (cfqq->dispatched >= 4 * max_dispatch)
1064 */ 1119 break;
1065 if (cfqd->rq_in_driver && cfq_cfqq_idle_window(cfqq)) 1120 }
1121
1122 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
1066 break; 1123 break;
1067 1124
1068 cfq_clear_cfqq_must_dispatch(cfqq); 1125 cfq_clear_cfqq_must_dispatch(cfqq);
1069 cfq_clear_cfqq_wait_request(cfqq); 1126 cfq_clear_cfqq_wait_request(cfqq);
1070 del_timer(&cfqd->idle_slice_timer); 1127 del_timer(&cfqd->idle_slice_timer);
1071 1128
1072 max_dispatch = cfqd->cfq_quantum;
1073 if (cfq_class_idle(cfqq))
1074 max_dispatch = 1;
1075
1076 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1129 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1077 prev_cfqq = cfqq;
1078 } 1130 }
1079 1131
1080 return dispatched; 1132 return dispatched;
@@ -1100,48 +1152,21 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
1100 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1152 BUG_ON(cfq_cfqq_on_rr(cfqq));
1101 1153
1102 if (unlikely(cfqd->active_queue == cfqq)) { 1154 if (unlikely(cfqd->active_queue == cfqq)) {
1103 __cfq_slice_expired(cfqd, cfqq, 0, 0); 1155 __cfq_slice_expired(cfqd, cfqq, 0);
1104 cfq_schedule_dispatch(cfqd); 1156 cfq_schedule_dispatch(cfqd);
1105 } 1157 }
1106 1158
1107 /*
1108 * it's on the empty list and still hashed
1109 */
1110 list_del(&cfqq->cfq_list);
1111 hlist_del(&cfqq->cfq_hash);
1112 kmem_cache_free(cfq_pool, cfqq); 1159 kmem_cache_free(cfq_pool, cfqq);
1113} 1160}
1114 1161
1115static struct cfq_queue *
1116__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1117 const int hashval)
1118{
1119 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
1120 struct hlist_node *entry;
1121 struct cfq_queue *__cfqq;
1122
1123 hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) {
1124 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio);
1125
1126 if (__cfqq->key == key && (__p == prio || !prio))
1127 return __cfqq;
1128 }
1129
1130 return NULL;
1131}
1132
1133static struct cfq_queue *
1134cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
1135{
1136 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
1137}
1138
1139static void cfq_free_io_context(struct io_context *ioc) 1162static void cfq_free_io_context(struct io_context *ioc)
1140{ 1163{
1141 struct cfq_io_context *__cic; 1164 struct cfq_io_context *__cic;
1142 struct rb_node *n; 1165 struct rb_node *n;
1143 int freed = 0; 1166 int freed = 0;
1144 1167
1168 ioc->ioc_data = NULL;
1169
1145 while ((n = rb_first(&ioc->cic_root)) != NULL) { 1170 while ((n = rb_first(&ioc->cic_root)) != NULL) {
1146 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1171 __cic = rb_entry(n, struct cfq_io_context, rb_node);
1147 rb_erase(&__cic->rb_node, &ioc->cic_root); 1172 rb_erase(&__cic->rb_node, &ioc->cic_root);
@@ -1158,7 +1183,7 @@ static void cfq_free_io_context(struct io_context *ioc)
1158static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1183static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1159{ 1184{
1160 if (unlikely(cfqq == cfqd->active_queue)) { 1185 if (unlikely(cfqq == cfqd->active_queue)) {
1161 __cfq_slice_expired(cfqd, cfqq, 0, 0); 1186 __cfq_slice_expired(cfqd, cfqq, 0);
1162 cfq_schedule_dispatch(cfqd); 1187 cfq_schedule_dispatch(cfqd);
1163 } 1188 }
1164 1189
@@ -1183,10 +1208,6 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1183 } 1208 }
1184} 1209}
1185 1210
1186
1187/*
1188 * Called with interrupts disabled
1189 */
1190static void cfq_exit_single_io_context(struct cfq_io_context *cic) 1211static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1191{ 1212{
1192 struct cfq_data *cfqd = cic->key; 1213 struct cfq_data *cfqd = cic->key;
@@ -1200,15 +1221,20 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1200 } 1221 }
1201} 1222}
1202 1223
1224/*
1225 * The process that ioc belongs to has exited, we need to clean up
1226 * and put the internal structures we have that belongs to that process.
1227 */
1203static void cfq_exit_io_context(struct io_context *ioc) 1228static void cfq_exit_io_context(struct io_context *ioc)
1204{ 1229{
1205 struct cfq_io_context *__cic; 1230 struct cfq_io_context *__cic;
1206 struct rb_node *n; 1231 struct rb_node *n;
1207 1232
1233 ioc->ioc_data = NULL;
1234
1208 /* 1235 /*
1209 * put the reference this task is holding to the various queues 1236 * put the reference this task is holding to the various queues
1210 */ 1237 */
1211
1212 n = rb_first(&ioc->cic_root); 1238 n = rb_first(&ioc->cic_root);
1213 while (n != NULL) { 1239 while (n != NULL) {
1214 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1240 __cic = rb_entry(n, struct cfq_io_context, rb_node);
@@ -1276,8 +1302,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq)
1276 */ 1302 */
1277 cfqq->org_ioprio = cfqq->ioprio; 1303 cfqq->org_ioprio = cfqq->ioprio;
1278 cfqq->org_ioprio_class = cfqq->ioprio_class; 1304 cfqq->org_ioprio_class = cfqq->ioprio_class;
1279
1280 cfq_resort_rr_list(cfqq, 0);
1281 cfq_clear_cfqq_prio_changed(cfqq); 1305 cfq_clear_cfqq_prio_changed(cfqq);
1282} 1306}
1283 1307
@@ -1295,7 +1319,7 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
1295 cfqq = cic->cfqq[ASYNC]; 1319 cfqq = cic->cfqq[ASYNC];
1296 if (cfqq) { 1320 if (cfqq) {
1297 struct cfq_queue *new_cfqq; 1321 struct cfq_queue *new_cfqq;
1298 new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC, cic->ioc->task, 1322 new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc->task,
1299 GFP_ATOMIC); 1323 GFP_ATOMIC);
1300 if (new_cfqq) { 1324 if (new_cfqq) {
1301 cic->cfqq[ASYNC] = new_cfqq; 1325 cic->cfqq[ASYNC] = new_cfqq;
@@ -1327,16 +1351,16 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
1327} 1351}
1328 1352
1329static struct cfq_queue * 1353static struct cfq_queue *
1330cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, 1354cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk,
1331 gfp_t gfp_mask) 1355 gfp_t gfp_mask)
1332{ 1356{
1333 const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
1334 struct cfq_queue *cfqq, *new_cfqq = NULL; 1357 struct cfq_queue *cfqq, *new_cfqq = NULL;
1335 unsigned short ioprio; 1358 struct cfq_io_context *cic;
1336 1359
1337retry: 1360retry:
1338 ioprio = tsk->ioprio; 1361 cic = cfq_cic_rb_lookup(cfqd, tsk->io_context);
1339 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); 1362 /* cic always exists here */
1363 cfqq = cic_to_cfqq(cic, is_sync);
1340 1364
1341 if (!cfqq) { 1365 if (!cfqq) {
1342 if (new_cfqq) { 1366 if (new_cfqq) {
@@ -1361,18 +1385,20 @@ retry:
1361 1385
1362 memset(cfqq, 0, sizeof(*cfqq)); 1386 memset(cfqq, 0, sizeof(*cfqq));
1363 1387
1364 INIT_HLIST_NODE(&cfqq->cfq_hash); 1388 RB_CLEAR_NODE(&cfqq->rb_node);
1365 INIT_LIST_HEAD(&cfqq->cfq_list);
1366 INIT_LIST_HEAD(&cfqq->fifo); 1389 INIT_LIST_HEAD(&cfqq->fifo);
1367 1390
1368 cfqq->key = key;
1369 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
1370 atomic_set(&cfqq->ref, 0); 1391 atomic_set(&cfqq->ref, 0);
1371 cfqq->cfqd = cfqd; 1392 cfqq->cfqd = cfqd;
1372 1393
1373 cfq_mark_cfqq_idle_window(cfqq); 1394 if (is_sync) {
1395 cfq_mark_cfqq_idle_window(cfqq);
1396 cfq_mark_cfqq_sync(cfqq);
1397 }
1398
1374 cfq_mark_cfqq_prio_changed(cfqq); 1399 cfq_mark_cfqq_prio_changed(cfqq);
1375 cfq_mark_cfqq_queue_new(cfqq); 1400 cfq_mark_cfqq_queue_new(cfqq);
1401
1376 cfq_init_prio_data(cfqq); 1402 cfq_init_prio_data(cfqq);
1377 } 1403 }
1378 1404
@@ -1385,10 +1411,17 @@ out:
1385 return cfqq; 1411 return cfqq;
1386} 1412}
1387 1413
1414/*
1415 * We drop cfq io contexts lazily, so we may find a dead one.
1416 */
1388static void 1417static void
1389cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) 1418cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
1390{ 1419{
1391 WARN_ON(!list_empty(&cic->queue_list)); 1420 WARN_ON(!list_empty(&cic->queue_list));
1421
1422 if (ioc->ioc_data == cic)
1423 ioc->ioc_data = NULL;
1424
1392 rb_erase(&cic->rb_node, &ioc->cic_root); 1425 rb_erase(&cic->rb_node, &ioc->cic_root);
1393 kmem_cache_free(cfq_ioc_pool, cic); 1426 kmem_cache_free(cfq_ioc_pool, cic);
1394 elv_ioc_count_dec(ioc_count); 1427 elv_ioc_count_dec(ioc_count);
@@ -1401,6 +1434,16 @@ cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1401 struct cfq_io_context *cic; 1434 struct cfq_io_context *cic;
1402 void *k, *key = cfqd; 1435 void *k, *key = cfqd;
1403 1436
1437 if (unlikely(!ioc))
1438 return NULL;
1439
1440 /*
1441 * we maintain a last-hit cache, to avoid browsing over the tree
1442 */
1443 cic = ioc->ioc_data;
1444 if (cic && cic->key == cfqd)
1445 return cic;
1446
1404restart: 1447restart:
1405 n = ioc->cic_root.rb_node; 1448 n = ioc->cic_root.rb_node;
1406 while (n) { 1449 while (n) {
@@ -1416,8 +1459,10 @@ restart:
1416 n = n->rb_left; 1459 n = n->rb_left;
1417 else if (key > k) 1460 else if (key > k)
1418 n = n->rb_right; 1461 n = n->rb_right;
1419 else 1462 else {
1463 ioc->ioc_data = cic;
1420 return cic; 1464 return cic;
1465 }
1421 } 1466 }
1422 1467
1423 return NULL; 1468 return NULL;
@@ -1514,7 +1559,8 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1514} 1559}
1515 1560
1516static void 1561static void
1517cfq_update_io_seektime(struct cfq_io_context *cic, struct request *rq) 1562cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1563 struct request *rq)
1518{ 1564{
1519 sector_t sdist; 1565 sector_t sdist;
1520 u64 total; 1566 u64 total;
@@ -1524,6 +1570,11 @@ cfq_update_io_seektime(struct cfq_io_context *cic, struct request *rq)
1524 else 1570 else
1525 sdist = cic->last_request_pos - rq->sector; 1571 sdist = cic->last_request_pos - rq->sector;
1526 1572
1573 if (!cic->seek_samples) {
1574 cfqd->new_seek_total = (7*cic->seek_total + (u64)256*sdist) / 8;
1575 cfqd->new_seek_mean = cfqd->new_seek_total / 256;
1576 }
1577
1527 /* 1578 /*
1528 * Don't allow the seek distance to get too large from the 1579 * Don't allow the seek distance to get too large from the
1529 * odd fragment, pagein, etc 1580 * odd fragment, pagein, etc
@@ -1548,7 +1599,12 @@ static void
1548cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1599cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1549 struct cfq_io_context *cic) 1600 struct cfq_io_context *cic)
1550{ 1601{
1551 int enable_idle = cfq_cfqq_idle_window(cfqq); 1602 int enable_idle;
1603
1604 if (!cfq_cfqq_sync(cfqq))
1605 return;
1606
1607 enable_idle = cfq_cfqq_idle_window(cfqq);
1552 1608
1553 if (!cic->ioc->task || !cfqd->cfq_slice_idle || 1609 if (!cic->ioc->task || !cfqd->cfq_slice_idle ||
1554 (cfqd->hw_tag && CIC_SEEKY(cic))) 1610 (cfqd->hw_tag && CIC_SEEKY(cic)))
@@ -1574,24 +1630,28 @@ static int
1574cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1630cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1575 struct request *rq) 1631 struct request *rq)
1576{ 1632{
1577 struct cfq_queue *cfqq = cfqd->active_queue; 1633 struct cfq_queue *cfqq;
1578 1634
1579 if (cfq_class_idle(new_cfqq)) 1635 cfqq = cfqd->active_queue;
1636 if (!cfqq)
1580 return 0; 1637 return 0;
1581 1638
1582 if (!cfqq) 1639 if (cfq_slice_used(cfqq))
1640 return 1;
1641
1642 if (cfq_class_idle(new_cfqq))
1583 return 0; 1643 return 0;
1584 1644
1585 if (cfq_class_idle(cfqq)) 1645 if (cfq_class_idle(cfqq))
1586 return 1; 1646 return 1;
1587 if (!cfq_cfqq_wait_request(new_cfqq)) 1647
1588 return 0;
1589 /* 1648 /*
1590 * if the new request is sync, but the currently running queue is 1649 * if the new request is sync, but the currently running queue is
1591 * not, let the sync request have priority. 1650 * not, let the sync request have priority.
1592 */ 1651 */
1593 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) 1652 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
1594 return 1; 1653 return 1;
1654
1595 /* 1655 /*
1596 * So both queues are sync. Let the new request get disk time if 1656 * So both queues are sync. Let the new request get disk time if
1597 * it's a metadata request and the current queue is doing regular IO. 1657 * it's a metadata request and the current queue is doing regular IO.
@@ -1599,6 +1659,16 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1599 if (rq_is_meta(rq) && !cfqq->meta_pending) 1659 if (rq_is_meta(rq) && !cfqq->meta_pending)
1600 return 1; 1660 return 1;
1601 1661
1662 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
1663 return 0;
1664
1665 /*
1666 * if this request is as-good as one we would expect from the
1667 * current cfqq, let it preempt
1668 */
1669 if (cfq_rq_close(cfqd, rq))
1670 return 1;
1671
1602 return 0; 1672 return 0;
1603} 1673}
1604 1674
@@ -1608,14 +1678,15 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1608 */ 1678 */
1609static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1679static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1610{ 1680{
1611 cfq_slice_expired(cfqd, 1, 1); 1681 cfq_slice_expired(cfqd, 1);
1612 1682
1613 /* 1683 /*
1614 * Put the new queue at the front of the of the current list, 1684 * Put the new queue at the front of the of the current list,
1615 * so we know that it will be selected next. 1685 * so we know that it will be selected next.
1616 */ 1686 */
1617 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 1687 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1618 list_move(&cfqq->cfq_list, &cfqd->cur_rr); 1688
1689 cfq_service_tree_add(cfqd, cfqq, 1);
1619 1690
1620 cfqq->slice_end = 0; 1691 cfqq->slice_end = 0;
1621 cfq_mark_cfqq_slice_new(cfqq); 1692 cfq_mark_cfqq_slice_new(cfqq);
@@ -1634,34 +1705,12 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1634 if (rq_is_meta(rq)) 1705 if (rq_is_meta(rq))
1635 cfqq->meta_pending++; 1706 cfqq->meta_pending++;
1636 1707
1637 /*
1638 * check if this request is a better next-serve candidate)) {
1639 */
1640 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
1641 BUG_ON(!cfqq->next_rq);
1642
1643 /*
1644 * we never wait for an async request and we don't allow preemption
1645 * of an async request. so just return early
1646 */
1647 if (!rq_is_sync(rq)) {
1648 /*
1649 * sync process issued an async request, if it's waiting
1650 * then expire it and kick rq handling.
1651 */
1652 if (cic == cfqd->active_cic &&
1653 del_timer(&cfqd->idle_slice_timer)) {
1654 cfq_slice_expired(cfqd, 0, 0);
1655 blk_start_queueing(cfqd->queue);
1656 }
1657 return;
1658 }
1659
1660 cfq_update_io_thinktime(cfqd, cic); 1708 cfq_update_io_thinktime(cfqd, cic);
1661 cfq_update_io_seektime(cic, rq); 1709 cfq_update_io_seektime(cfqd, cic, rq);
1662 cfq_update_idle_window(cfqd, cfqq, cic); 1710 cfq_update_idle_window(cfqd, cfqq, cic);
1663 1711
1664 cic->last_request_pos = rq->sector + rq->nr_sectors; 1712 cic->last_request_pos = rq->sector + rq->nr_sectors;
1713 cfqq->last_request_pos = cic->last_request_pos;
1665 1714
1666 if (cfqq == cfqd->active_queue) { 1715 if (cfqq == cfqd->active_queue) {
1667 /* 1716 /*
@@ -1710,16 +1759,16 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1710 now = jiffies; 1759 now = jiffies;
1711 1760
1712 WARN_ON(!cfqd->rq_in_driver); 1761 WARN_ON(!cfqd->rq_in_driver);
1713 WARN_ON(!cfqq->on_dispatch[sync]); 1762 WARN_ON(!cfqq->dispatched);
1714 cfqd->rq_in_driver--; 1763 cfqd->rq_in_driver--;
1715 cfqq->on_dispatch[sync]--; 1764 cfqq->dispatched--;
1716 cfqq->service_last = now; 1765
1766 if (cfq_cfqq_sync(cfqq))
1767 cfqd->sync_flight--;
1717 1768
1718 if (!cfq_class_idle(cfqq)) 1769 if (!cfq_class_idle(cfqq))
1719 cfqd->last_end_request = now; 1770 cfqd->last_end_request = now;
1720 1771
1721 cfq_resort_rr_list(cfqq, 0);
1722
1723 if (sync) 1772 if (sync)
1724 RQ_CIC(rq)->last_end_request = now; 1773 RQ_CIC(rq)->last_end_request = now;
1725 1774
@@ -1733,12 +1782,13 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1733 cfq_clear_cfqq_slice_new(cfqq); 1782 cfq_clear_cfqq_slice_new(cfqq);
1734 } 1783 }
1735 if (cfq_slice_used(cfqq)) 1784 if (cfq_slice_used(cfqq))
1736 cfq_slice_expired(cfqd, 0, 1); 1785 cfq_slice_expired(cfqd, 1);
1737 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) { 1786 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list))
1738 if (!cfq_arm_slice_timer(cfqd)) 1787 cfq_arm_slice_timer(cfqd);
1739 cfq_schedule_dispatch(cfqd);
1740 }
1741 } 1788 }
1789
1790 if (!cfqd->rq_in_driver)
1791 cfq_schedule_dispatch(cfqd);
1742} 1792}
1743 1793
1744/* 1794/*
@@ -1747,9 +1797,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1747 */ 1797 */
1748static void cfq_prio_boost(struct cfq_queue *cfqq) 1798static void cfq_prio_boost(struct cfq_queue *cfqq)
1749{ 1799{
1750 const int ioprio_class = cfqq->ioprio_class;
1751 const int ioprio = cfqq->ioprio;
1752
1753 if (has_fs_excl()) { 1800 if (has_fs_excl()) {
1754 /* 1801 /*
1755 * boost idle prio on transactions that would lock out other 1802 * boost idle prio on transactions that would lock out other
@@ -1768,12 +1815,6 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
1768 if (cfqq->ioprio != cfqq->org_ioprio) 1815 if (cfqq->ioprio != cfqq->org_ioprio)
1769 cfqq->ioprio = cfqq->org_ioprio; 1816 cfqq->ioprio = cfqq->org_ioprio;
1770 } 1817 }
1771
1772 /*
1773 * refile between round-robin lists if we moved the priority class
1774 */
1775 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio))
1776 cfq_resort_rr_list(cfqq, 0);
1777} 1818}
1778 1819
1779static inline int __cfq_may_queue(struct cfq_queue *cfqq) 1820static inline int __cfq_may_queue(struct cfq_queue *cfqq)
@@ -1791,10 +1832,8 @@ static int cfq_may_queue(request_queue_t *q, int rw)
1791{ 1832{
1792 struct cfq_data *cfqd = q->elevator->elevator_data; 1833 struct cfq_data *cfqd = q->elevator->elevator_data;
1793 struct task_struct *tsk = current; 1834 struct task_struct *tsk = current;
1835 struct cfq_io_context *cic;
1794 struct cfq_queue *cfqq; 1836 struct cfq_queue *cfqq;
1795 unsigned int key;
1796
1797 key = cfq_queue_pid(tsk, rw, rw & REQ_RW_SYNC);
1798 1837
1799 /* 1838 /*
1800 * don't force setup of a queue from here, as a call to may_queue 1839 * don't force setup of a queue from here, as a call to may_queue
@@ -1802,7 +1841,11 @@ static int cfq_may_queue(request_queue_t *q, int rw)
1802 * so just lookup a possibly existing queue, or return 'may queue' 1841 * so just lookup a possibly existing queue, or return 'may queue'
1803 * if that fails 1842 * if that fails
1804 */ 1843 */
1805 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); 1844 cic = cfq_cic_rb_lookup(cfqd, tsk->io_context);
1845 if (!cic)
1846 return ELV_MQUEUE_MAY;
1847
1848 cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC);
1806 if (cfqq) { 1849 if (cfqq) {
1807 cfq_init_prio_data(cfqq); 1850 cfq_init_prio_data(cfqq);
1808 cfq_prio_boost(cfqq); 1851 cfq_prio_boost(cfqq);
@@ -1846,7 +1889,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
1846 struct cfq_io_context *cic; 1889 struct cfq_io_context *cic;
1847 const int rw = rq_data_dir(rq); 1890 const int rw = rq_data_dir(rq);
1848 const int is_sync = rq_is_sync(rq); 1891 const int is_sync = rq_is_sync(rq);
1849 pid_t key = cfq_queue_pid(tsk, rw, is_sync);
1850 struct cfq_queue *cfqq; 1892 struct cfq_queue *cfqq;
1851 unsigned long flags; 1893 unsigned long flags;
1852 1894
@@ -1859,14 +1901,15 @@ cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
1859 if (!cic) 1901 if (!cic)
1860 goto queue_fail; 1902 goto queue_fail;
1861 1903
1862 if (!cic->cfqq[is_sync]) { 1904 cfqq = cic_to_cfqq(cic, is_sync);
1863 cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask); 1905 if (!cfqq) {
1906 cfqq = cfq_get_queue(cfqd, is_sync, tsk, gfp_mask);
1907
1864 if (!cfqq) 1908 if (!cfqq)
1865 goto queue_fail; 1909 goto queue_fail;
1866 1910
1867 cic->cfqq[is_sync] = cfqq; 1911 cic_set_cfqq(cic, cfqq, is_sync);
1868 } else 1912 }
1869 cfqq = cic->cfqq[is_sync];
1870 1913
1871 cfqq->allocated[rw]++; 1914 cfqq->allocated[rw]++;
1872 cfq_clear_cfqq_must_alloc(cfqq); 1915 cfq_clear_cfqq_must_alloc(cfqq);
@@ -1936,7 +1979,7 @@ static void cfq_idle_slice_timer(unsigned long data)
1936 } 1979 }
1937 } 1980 }
1938expire: 1981expire:
1939 cfq_slice_expired(cfqd, 0, timed_out); 1982 cfq_slice_expired(cfqd, timed_out);
1940out_kick: 1983out_kick:
1941 cfq_schedule_dispatch(cfqd); 1984 cfq_schedule_dispatch(cfqd);
1942out_cont: 1985out_cont:
@@ -1982,7 +2025,7 @@ static void cfq_exit_queue(elevator_t *e)
1982 spin_lock_irq(q->queue_lock); 2025 spin_lock_irq(q->queue_lock);
1983 2026
1984 if (cfqd->active_queue) 2027 if (cfqd->active_queue)
1985 __cfq_slice_expired(cfqd, cfqd->active_queue, 0, 0); 2028 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
1986 2029
1987 while (!list_empty(&cfqd->cic_list)) { 2030 while (!list_empty(&cfqd->cic_list)) {
1988 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 2031 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
@@ -1996,14 +2039,12 @@ static void cfq_exit_queue(elevator_t *e)
1996 2039
1997 cfq_shutdown_timer_wq(cfqd); 2040 cfq_shutdown_timer_wq(cfqd);
1998 2041
1999 kfree(cfqd->cfq_hash);
2000 kfree(cfqd); 2042 kfree(cfqd);
2001} 2043}
2002 2044
2003static void *cfq_init_queue(request_queue_t *q) 2045static void *cfq_init_queue(request_queue_t *q)
2004{ 2046{
2005 struct cfq_data *cfqd; 2047 struct cfq_data *cfqd;
2006 int i;
2007 2048
2008 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node); 2049 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
2009 if (!cfqd) 2050 if (!cfqd)
@@ -2011,21 +2052,9 @@ static void *cfq_init_queue(request_queue_t *q)
2011 2052
2012 memset(cfqd, 0, sizeof(*cfqd)); 2053 memset(cfqd, 0, sizeof(*cfqd));
2013 2054
2014 for (i = 0; i < CFQ_PRIO_LISTS; i++) 2055 cfqd->service_tree = CFQ_RB_ROOT;
2015 INIT_LIST_HEAD(&cfqd->rr_list[i]);
2016
2017 INIT_LIST_HEAD(&cfqd->busy_rr);
2018 INIT_LIST_HEAD(&cfqd->cur_rr);
2019 INIT_LIST_HEAD(&cfqd->idle_rr);
2020 INIT_LIST_HEAD(&cfqd->cic_list); 2056 INIT_LIST_HEAD(&cfqd->cic_list);
2021 2057
2022 cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
2023 if (!cfqd->cfq_hash)
2024 goto out_free;
2025
2026 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2027 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2028
2029 cfqd->queue = q; 2058 cfqd->queue = q;
2030 2059
2031 init_timer(&cfqd->idle_slice_timer); 2060 init_timer(&cfqd->idle_slice_timer);
@@ -2049,9 +2078,6 @@ static void *cfq_init_queue(request_queue_t *q)
2049 cfqd->cfq_slice_idle = cfq_slice_idle; 2078 cfqd->cfq_slice_idle = cfq_slice_idle;
2050 2079
2051 return cfqd; 2080 return cfqd;
2052out_free:
2053 kfree(cfqd);
2054 return NULL;
2055} 2081}
2056 2082
2057static void cfq_slab_kill(void) 2083static void cfq_slab_kill(void)
@@ -2083,7 +2109,6 @@ fail:
2083/* 2109/*
2084 * sysfs parts below --> 2110 * sysfs parts below -->
2085 */ 2111 */
2086
2087static ssize_t 2112static ssize_t
2088cfq_var_show(unsigned int var, char *page) 2113cfq_var_show(unsigned int var, char *page)
2089{ 2114{
diff --git a/block/elevator.c b/block/elevator.c
index 25f6ef28e3bb..ce866eb75f6a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -134,13 +134,13 @@ static struct elevator_type *elevator_get(const char *name)
134{ 134{
135 struct elevator_type *e; 135 struct elevator_type *e;
136 136
137 spin_lock_irq(&elv_list_lock); 137 spin_lock(&elv_list_lock);
138 138
139 e = elevator_find(name); 139 e = elevator_find(name);
140 if (e && !try_module_get(e->elevator_owner)) 140 if (e && !try_module_get(e->elevator_owner))
141 e = NULL; 141 e = NULL;
142 142
143 spin_unlock_irq(&elv_list_lock); 143 spin_unlock(&elv_list_lock);
144 144
145 return e; 145 return e;
146} 146}
@@ -964,17 +964,19 @@ void elv_unregister_queue(struct request_queue *q)
964 964
965int elv_register(struct elevator_type *e) 965int elv_register(struct elevator_type *e)
966{ 966{
967 spin_lock_irq(&elv_list_lock); 967 char *def = "";
968
969 spin_lock(&elv_list_lock);
968 BUG_ON(elevator_find(e->elevator_name)); 970 BUG_ON(elevator_find(e->elevator_name));
969 list_add_tail(&e->list, &elv_list); 971 list_add_tail(&e->list, &elv_list);
970 spin_unlock_irq(&elv_list_lock); 972 spin_unlock(&elv_list_lock);
971 973
972 printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
973 if (!strcmp(e->elevator_name, chosen_elevator) || 974 if (!strcmp(e->elevator_name, chosen_elevator) ||
974 (!*chosen_elevator && 975 (!*chosen_elevator &&
975 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 976 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
976 printk(" (default)"); 977 def = " (default)";
977 printk("\n"); 978
979 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def);
978 return 0; 980 return 0;
979} 981}
980EXPORT_SYMBOL_GPL(elv_register); 982EXPORT_SYMBOL_GPL(elv_register);
@@ -997,9 +999,9 @@ void elv_unregister(struct elevator_type *e)
997 read_unlock(&tasklist_lock); 999 read_unlock(&tasklist_lock);
998 } 1000 }
999 1001
1000 spin_lock_irq(&elv_list_lock); 1002 spin_lock(&elv_list_lock);
1001 list_del_init(&e->list); 1003 list_del_init(&e->list);
1002 spin_unlock_irq(&elv_list_lock); 1004 spin_unlock(&elv_list_lock);
1003} 1005}
1004EXPORT_SYMBOL_GPL(elv_unregister); 1006EXPORT_SYMBOL_GPL(elv_unregister);
1005 1007
@@ -1117,7 +1119,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
1117 struct list_head *entry; 1119 struct list_head *entry;
1118 int len = 0; 1120 int len = 0;
1119 1121
1120 spin_lock_irq(&elv_list_lock); 1122 spin_lock(&elv_list_lock);
1121 list_for_each(entry, &elv_list) { 1123 list_for_each(entry, &elv_list) {
1122 struct elevator_type *__e; 1124 struct elevator_type *__e;
1123 1125
@@ -1127,7 +1129,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
1127 else 1129 else
1128 len += sprintf(name+len, "%s ", __e->elevator_name); 1130 len += sprintf(name+len, "%s ", __e->elevator_name);
1129 } 1131 }
1130 spin_unlock_irq(&elv_list_lock); 1132 spin_unlock(&elv_list_lock);
1131 1133
1132 len += sprintf(len+name, "\n"); 1134 len += sprintf(len+name, "\n");
1133 return len; 1135 return len;
diff --git a/block/genhd.c b/block/genhd.c
index 050a1f0f3a86..b5664440896c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -17,7 +17,7 @@
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19 19
20struct subsystem block_subsys; 20struct kset block_subsys;
21static DEFINE_MUTEX(block_subsys_lock); 21static DEFINE_MUTEX(block_subsys_lock);
22 22
23/* 23/*
@@ -62,8 +62,6 @@ int register_blkdev(unsigned int major, const char *name)
62 /* temporary */ 62 /* temporary */
63 if (major == 0) { 63 if (major == 0) {
64 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 64 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
65 if (is_lanana_major(index))
66 continue;
67 if (major_names[index] == NULL) 65 if (major_names[index] == NULL)
68 break; 66 break;
69 } 67 }
@@ -223,7 +221,7 @@ static void *part_start(struct seq_file *part, loff_t *pos)
223 loff_t l = *pos; 221 loff_t l = *pos;
224 222
225 mutex_lock(&block_subsys_lock); 223 mutex_lock(&block_subsys_lock);
226 list_for_each(p, &block_subsys.kset.list) 224 list_for_each(p, &block_subsys.list)
227 if (!l--) 225 if (!l--)
228 return list_entry(p, struct gendisk, kobj.entry); 226 return list_entry(p, struct gendisk, kobj.entry);
229 return NULL; 227 return NULL;
@@ -233,7 +231,7 @@ static void *part_next(struct seq_file *part, void *v, loff_t *pos)
233{ 231{
234 struct list_head *p = ((struct gendisk *)v)->kobj.entry.next; 232 struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
235 ++*pos; 233 ++*pos;
236 return p==&block_subsys.kset.list ? NULL : 234 return p==&block_subsys.list ? NULL :
237 list_entry(p, struct gendisk, kobj.entry); 235 list_entry(p, struct gendisk, kobj.entry);
238} 236}
239 237
@@ -248,7 +246,7 @@ static int show_partition(struct seq_file *part, void *v)
248 int n; 246 int n;
249 char buf[BDEVNAME_SIZE]; 247 char buf[BDEVNAME_SIZE];
250 248
251 if (&sgp->kobj.entry == block_subsys.kset.list.next) 249 if (&sgp->kobj.entry == block_subsys.list.next)
252 seq_puts(part, "major minor #blocks name\n\n"); 250 seq_puts(part, "major minor #blocks name\n\n");
253 251
254 /* Don't show non-partitionable removeable devices or empty devices */ 252 /* Don't show non-partitionable removeable devices or empty devices */
@@ -567,7 +565,7 @@ static void *diskstats_start(struct seq_file *part, loff_t *pos)
567 struct list_head *p; 565 struct list_head *p;
568 566
569 mutex_lock(&block_subsys_lock); 567 mutex_lock(&block_subsys_lock);
570 list_for_each(p, &block_subsys.kset.list) 568 list_for_each(p, &block_subsys.list)
571 if (!k--) 569 if (!k--)
572 return list_entry(p, struct gendisk, kobj.entry); 570 return list_entry(p, struct gendisk, kobj.entry);
573 return NULL; 571 return NULL;
@@ -577,7 +575,7 @@ static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
577{ 575{
578 struct list_head *p = ((struct gendisk *)v)->kobj.entry.next; 576 struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
579 ++*pos; 577 ++*pos;
580 return p==&block_subsys.kset.list ? NULL : 578 return p==&block_subsys.list ? NULL :
581 list_entry(p, struct gendisk, kobj.entry); 579 list_entry(p, struct gendisk, kobj.entry);
582} 580}
583 581
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index cf8752abd61a..5873861e1dbb 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1221,7 +1221,7 @@ void blk_recount_segments(request_queue_t *q, struct bio *bio)
1221 * considered part of another segment, since that might 1221 * considered part of another segment, since that might
1222 * change with the bounce page. 1222 * change with the bounce page.
1223 */ 1223 */
1224 high = page_to_pfn(bv->bv_page) >= q->bounce_pfn; 1224 high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
1225 if (high || highprv) 1225 if (high || highprv)
1226 goto new_hw_segment; 1226 goto new_hw_segment;
1227 if (cluster) { 1227 if (cluster) {
@@ -3660,8 +3660,8 @@ int __init blk_dev_init(void)
3660 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 3660 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
3661 register_hotcpu_notifier(&blk_cpu_notifier); 3661 register_hotcpu_notifier(&blk_cpu_notifier);
3662 3662
3663 blk_max_low_pfn = max_low_pfn; 3663 blk_max_low_pfn = max_low_pfn - 1;
3664 blk_max_pfn = max_pfn; 3664 blk_max_pfn = max_pfn - 1;
3665 3665
3666 return 0; 3666 return 0;
3667} 3667}
@@ -3743,6 +3743,7 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node)
3743 ret->nr_batch_requests = 0; /* because this is 0 */ 3743 ret->nr_batch_requests = 0; /* because this is 0 */
3744 ret->aic = NULL; 3744 ret->aic = NULL;
3745 ret->cic_root.rb_node = NULL; 3745 ret->cic_root.rb_node = NULL;
3746 ret->ioc_data = NULL;
3746 /* make sure set_task_ioprio() sees the settings above */ 3747 /* make sure set_task_ioprio() sees the settings above */
3747 smp_wmb(); 3748 smp_wmb();
3748 tsk->io_context = ret; 3749 tsk->io_context = ret;