aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-10-02 08:45:08 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2006-10-02 08:45:08 -0400
commit59458f40e25915a355d8b1d701425fe9f4f9ea23 (patch)
treef1c9a2934df686e36d75f759ab7313b6f0e0e5f9 /block
parent825f9075d74028d11d7f5932f04e1b5db3022b51 (diff)
parentd834c16516d1ebec4766fc58c059bf01311e6045 (diff)
Merge branch 'master' into gfs2
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig20
-rw-r--r--block/Kconfig.iosched3
-rw-r--r--block/Makefile2
-rw-r--r--block/as-iosched.c674
-rw-r--r--block/blktrace.c32
-rw-r--r--block/cfq-iosched.c867
-rw-r--r--block/deadline-iosched.c464
-rw-r--r--block/elevator.c315
-rw-r--r--block/genhd.c9
-rw-r--r--block/ll_rw_blk.c239
-rw-r--r--block/noop-iosched.c2
-rw-r--r--block/scsi_ioctl.c6
12 files changed, 933 insertions, 1700 deletions
diff --git a/block/Kconfig b/block/Kconfig
index b6f5f0a79655..83766a6bdee2 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -1,6 +1,24 @@
1# 1#
2# Block layer core configuration 2# Block layer core configuration
3# 3#
4config BLOCK
5 bool "Enable the block layer" if EMBEDDED
6 default y
7 help
8 This permits the block layer to be removed from the kernel if it's not
9 needed (on some embedded devices for example). If this option is
10 disabled, then blockdev files will become unusable and some
11 filesystems (such as ext3) will become unavailable.
12
13 This option will also disable SCSI character devices and USB storage
14 since they make use of various block layer definitions and
15 facilities.
16
17 Say Y here unless you know you really don't want to mount disks and
18 suchlike.
19
20if BLOCK
21
4#XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64 22#XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64
5#for instance. 23#for instance.
6config LBD 24config LBD
@@ -33,4 +51,6 @@ config LSF
33 51
34 If unsure, say Y. 52 If unsure, say Y.
35 53
54endif
55
36source block/Kconfig.iosched 56source block/Kconfig.iosched
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 48d090e266fc..903f0d3b6852 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -1,3 +1,4 @@
1if BLOCK
1 2
2menu "IO Schedulers" 3menu "IO Schedulers"
3 4
@@ -67,3 +68,5 @@ config DEFAULT_IOSCHED
67 default "noop" if DEFAULT_NOOP 68 default "noop" if DEFAULT_NOOP
68 69
69endmenu 70endmenu
71
72endif
diff --git a/block/Makefile b/block/Makefile
index c05de0e0037f..4b84d0d5947b 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the kernel block layer 2# Makefile for the kernel block layer
3# 3#
4 4
5obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o 5obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
6 6
7obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 7obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
8obj-$(CONFIG_IOSCHED_AS) += as-iosched.o 8obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 5da56d48fbd3..50b95e4c1425 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Anticipatory & deadline i/o scheduler. 2 * Anticipatory & deadline i/o scheduler.
3 * 3 *
4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
5 * Nick Piggin <nickpiggin@yahoo.com.au> 5 * Nick Piggin <nickpiggin@yahoo.com.au>
6 * 6 *
7 */ 7 */
@@ -14,7 +14,6 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <linux/hash.h>
18#include <linux/rbtree.h> 17#include <linux/rbtree.h>
19#include <linux/interrupt.h> 18#include <linux/interrupt.h>
20 19
@@ -93,9 +92,8 @@ struct as_data {
93 struct rb_root sort_list[2]; 92 struct rb_root sort_list[2];
94 struct list_head fifo_list[2]; 93 struct list_head fifo_list[2];
95 94
96 struct as_rq *next_arq[2]; /* next in sort order */ 95 struct request *next_rq[2]; /* next in sort order */
97 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ 96 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
98 struct hlist_head *hash; /* request hash */
99 97
100 unsigned long exit_prob; /* probability a task will exit while 98 unsigned long exit_prob; /* probability a task will exit while
101 being waited on */ 99 being waited on */
@@ -115,7 +113,6 @@ struct as_data {
115 int write_batch_count; /* max # of reqs in a write batch */ 113 int write_batch_count; /* max # of reqs in a write batch */
116 int current_write_count; /* how many requests left this batch */ 114 int current_write_count; /* how many requests left this batch */
117 int write_batch_idled; /* has the write batch gone idle? */ 115 int write_batch_idled; /* has the write batch gone idle? */
118 mempool_t *arq_pool;
119 116
120 enum anticipation_status antic_status; 117 enum anticipation_status antic_status;
121 unsigned long antic_start; /* jiffies: when it started */ 118 unsigned long antic_start; /* jiffies: when it started */
@@ -133,8 +130,6 @@ struct as_data {
133 unsigned long antic_expire; 130 unsigned long antic_expire;
134}; 131};
135 132
136#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
137
138/* 133/*
139 * per-request data. 134 * per-request data.
140 */ 135 */
@@ -150,40 +145,14 @@ enum arq_state {
150 AS_RQ_POSTSCHED, /* when they shouldn't be */ 145 AS_RQ_POSTSCHED, /* when they shouldn't be */
151}; 146};
152 147
153struct as_rq { 148#define RQ_IOC(rq) ((struct io_context *) (rq)->elevator_private)
154 /* 149#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2)
155 * rbtree index, key is the starting offset 150#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)
156 */
157 struct rb_node rb_node;
158 sector_t rb_key;
159
160 struct request *request;
161
162 struct io_context *io_context; /* The submitting task */
163
164 /*
165 * request hash, key is the ending offset (for back merge lookup)
166 */
167 struct hlist_node hash;
168
169 /*
170 * expire fifo
171 */
172 struct list_head fifo;
173 unsigned long expires;
174 151
175 unsigned int is_sync; 152static DEFINE_PER_CPU(unsigned long, ioc_count);
176 enum arq_state state;
177};
178
179#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)
180
181static kmem_cache_t *arq_pool;
182
183static atomic_t ioc_count = ATOMIC_INIT(0);
184static struct completion *ioc_gone; 153static struct completion *ioc_gone;
185 154
186static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); 155static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
187static void as_antic_stop(struct as_data *ad); 156static void as_antic_stop(struct as_data *ad);
188 157
189/* 158/*
@@ -194,7 +163,8 @@ static void as_antic_stop(struct as_data *ad);
194static void free_as_io_context(struct as_io_context *aic) 163static void free_as_io_context(struct as_io_context *aic)
195{ 164{
196 kfree(aic); 165 kfree(aic);
197 if (atomic_dec_and_test(&ioc_count) && ioc_gone) 166 elv_ioc_count_dec(ioc_count);
167 if (ioc_gone && !elv_ioc_count_read(ioc_count))
198 complete(ioc_gone); 168 complete(ioc_gone);
199} 169}
200 170
@@ -230,7 +200,7 @@ static struct as_io_context *alloc_as_io_context(void)
230 ret->seek_total = 0; 200 ret->seek_total = 0;
231 ret->seek_samples = 0; 201 ret->seek_samples = 0;
232 ret->seek_mean = 0; 202 ret->seek_mean = 0;
233 atomic_inc(&ioc_count); 203 elv_ioc_count_inc(ioc_count);
234 } 204 }
235 205
236 return ret; 206 return ret;
@@ -240,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void)
240 * If the current task has no AS IO context then create one and initialise it. 210 * If the current task has no AS IO context then create one and initialise it.
241 * Then take a ref on the task's io context and return it. 211 * Then take a ref on the task's io context and return it.
242 */ 212 */
243static struct io_context *as_get_io_context(void) 213static struct io_context *as_get_io_context(int node)
244{ 214{
245 struct io_context *ioc = get_io_context(GFP_ATOMIC); 215 struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
246 if (ioc && !ioc->aic) { 216 if (ioc && !ioc->aic) {
247 ioc->aic = alloc_as_io_context(); 217 ioc->aic = alloc_as_io_context();
248 if (!ioc->aic) { 218 if (!ioc->aic) {
@@ -253,194 +223,43 @@ static struct io_context *as_get_io_context(void)
253 return ioc; 223 return ioc;
254} 224}
255 225
256static void as_put_io_context(struct as_rq *arq) 226static void as_put_io_context(struct request *rq)
257{ 227{
258 struct as_io_context *aic; 228 struct as_io_context *aic;
259 229
260 if (unlikely(!arq->io_context)) 230 if (unlikely(!RQ_IOC(rq)))
261 return; 231 return;
262 232
263 aic = arq->io_context->aic; 233 aic = RQ_IOC(rq)->aic;
264 234
265 if (arq->is_sync == REQ_SYNC && aic) { 235 if (rq_is_sync(rq) && aic) {
266 spin_lock(&aic->lock); 236 spin_lock(&aic->lock);
267 set_bit(AS_TASK_IORUNNING, &aic->state); 237 set_bit(AS_TASK_IORUNNING, &aic->state);
268 aic->last_end_request = jiffies; 238 aic->last_end_request = jiffies;
269 spin_unlock(&aic->lock); 239 spin_unlock(&aic->lock);
270 } 240 }
271 241
272 put_io_context(arq->io_context); 242 put_io_context(RQ_IOC(rq));
273}
274
275/*
276 * the back merge hash support functions
277 */
278static const int as_hash_shift = 6;
279#define AS_HASH_BLOCK(sec) ((sec) >> 3)
280#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
281#define AS_HASH_ENTRIES (1 << as_hash_shift)
282#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
283
284static inline void __as_del_arq_hash(struct as_rq *arq)
285{
286 hlist_del_init(&arq->hash);
287}
288
289static inline void as_del_arq_hash(struct as_rq *arq)
290{
291 if (!hlist_unhashed(&arq->hash))
292 __as_del_arq_hash(arq);
293}
294
295static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
296{
297 struct request *rq = arq->request;
298
299 BUG_ON(!hlist_unhashed(&arq->hash));
300
301 hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
302}
303
304/*
305 * move hot entry to front of chain
306 */
307static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
308{
309 struct request *rq = arq->request;
310 struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
311
312 if (hlist_unhashed(&arq->hash)) {
313 WARN_ON(1);
314 return;
315 }
316
317 if (&arq->hash != head->first) {
318 hlist_del(&arq->hash);
319 hlist_add_head(&arq->hash, head);
320 }
321}
322
323static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
324{
325 struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
326 struct hlist_node *entry, *next;
327 struct as_rq *arq;
328
329 hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) {
330 struct request *__rq = arq->request;
331
332 BUG_ON(hlist_unhashed(&arq->hash));
333
334 if (!rq_mergeable(__rq)) {
335 as_del_arq_hash(arq);
336 continue;
337 }
338
339 if (rq_hash_key(__rq) == offset)
340 return __rq;
341 }
342
343 return NULL;
344} 243}
345 244
346/* 245/*
347 * rb tree support functions 246 * rb tree support functions
348 */ 247 */
349#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node) 248#define RQ_RB_ROOT(ad, rq) (&(ad)->sort_list[rq_is_sync((rq))])
350#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])
351#define rq_rb_key(rq) (rq)->sector
352
353/*
354 * as_find_first_arq finds the first (lowest sector numbered) request
355 * for the specified data_dir. Used to sweep back to the start of the disk
356 * (1-way elevator) after we process the last (highest sector) request.
357 */
358static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
359{
360 struct rb_node *n = ad->sort_list[data_dir].rb_node;
361
362 if (n == NULL)
363 return NULL;
364
365 for (;;) {
366 if (n->rb_left == NULL)
367 return rb_entry_arq(n);
368
369 n = n->rb_left;
370 }
371}
372
373/*
374 * Add the request to the rb tree if it is unique. If there is an alias (an
375 * existing request against the same sector), which can happen when using
376 * direct IO, then return the alias.
377 */
378static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
379{
380 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
381 struct rb_node *parent = NULL;
382 struct as_rq *__arq;
383 struct request *rq = arq->request;
384
385 arq->rb_key = rq_rb_key(rq);
386
387 while (*p) {
388 parent = *p;
389 __arq = rb_entry_arq(parent);
390
391 if (arq->rb_key < __arq->rb_key)
392 p = &(*p)->rb_left;
393 else if (arq->rb_key > __arq->rb_key)
394 p = &(*p)->rb_right;
395 else
396 return __arq;
397 }
398
399 rb_link_node(&arq->rb_node, parent, p);
400 rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
401
402 return NULL;
403}
404 249
405static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) 250static void as_add_rq_rb(struct as_data *ad, struct request *rq)
406{ 251{
407 struct as_rq *alias; 252 struct request *alias;
408 253
409 while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { 254 while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
410 as_move_to_dispatch(ad, alias); 255 as_move_to_dispatch(ad, alias);
411 as_antic_stop(ad); 256 as_antic_stop(ad);
412 } 257 }
413} 258}
414 259
415static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) 260static inline void as_del_rq_rb(struct as_data *ad, struct request *rq)
416{
417 if (!RB_EMPTY_NODE(&arq->rb_node)) {
418 WARN_ON(1);
419 return;
420 }
421
422 rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
423 RB_CLEAR_NODE(&arq->rb_node);
424}
425
426static struct request *
427as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
428{ 261{
429 struct rb_node *n = ad->sort_list[data_dir].rb_node; 262 elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
430 struct as_rq *arq;
431
432 while (n) {
433 arq = rb_entry_arq(n);
434
435 if (sector < arq->rb_key)
436 n = n->rb_left;
437 else if (sector > arq->rb_key)
438 n = n->rb_right;
439 else
440 return arq->request;
441 }
442
443 return NULL;
444} 263}
445 264
446/* 265/*
@@ -458,26 +277,26 @@ as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
458 * as_choose_req selects the preferred one of two requests of the same data_dir 277 * as_choose_req selects the preferred one of two requests of the same data_dir
459 * ignoring time - eg. timeouts, which is the job of as_dispatch_request 278 * ignoring time - eg. timeouts, which is the job of as_dispatch_request
460 */ 279 */
461static struct as_rq * 280static struct request *
462as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) 281as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
463{ 282{
464 int data_dir; 283 int data_dir;
465 sector_t last, s1, s2, d1, d2; 284 sector_t last, s1, s2, d1, d2;
466 int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ 285 int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */
467 const sector_t maxback = MAXBACK; 286 const sector_t maxback = MAXBACK;
468 287
469 if (arq1 == NULL || arq1 == arq2) 288 if (rq1 == NULL || rq1 == rq2)
470 return arq2; 289 return rq2;
471 if (arq2 == NULL) 290 if (rq2 == NULL)
472 return arq1; 291 return rq1;
473 292
474 data_dir = arq1->is_sync; 293 data_dir = rq_is_sync(rq1);
475 294
476 last = ad->last_sector[data_dir]; 295 last = ad->last_sector[data_dir];
477 s1 = arq1->request->sector; 296 s1 = rq1->sector;
478 s2 = arq2->request->sector; 297 s2 = rq2->sector;
479 298
480 BUG_ON(data_dir != arq2->is_sync); 299 BUG_ON(data_dir != rq_is_sync(rq2));
481 300
482 /* 301 /*
483 * Strict one way elevator _except_ in the case where we allow 302 * Strict one way elevator _except_ in the case where we allow
@@ -504,61 +323,58 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
504 323
505 /* Found required data */ 324 /* Found required data */
506 if (!r1_wrap && r2_wrap) 325 if (!r1_wrap && r2_wrap)
507 return arq1; 326 return rq1;
508 else if (!r2_wrap && r1_wrap) 327 else if (!r2_wrap && r1_wrap)
509 return arq2; 328 return rq2;
510 else if (r1_wrap && r2_wrap) { 329 else if (r1_wrap && r2_wrap) {
511 /* both behind the head */ 330 /* both behind the head */
512 if (s1 <= s2) 331 if (s1 <= s2)
513 return arq1; 332 return rq1;
514 else 333 else
515 return arq2; 334 return rq2;
516 } 335 }
517 336
518 /* Both requests in front of the head */ 337 /* Both requests in front of the head */
519 if (d1 < d2) 338 if (d1 < d2)
520 return arq1; 339 return rq1;
521 else if (d2 < d1) 340 else if (d2 < d1)
522 return arq2; 341 return rq2;
523 else { 342 else {
524 if (s1 >= s2) 343 if (s1 >= s2)
525 return arq1; 344 return rq1;
526 else 345 else
527 return arq2; 346 return rq2;
528 } 347 }
529} 348}
530 349
531/* 350/*
532 * as_find_next_arq finds the next request after @prev in elevator order. 351 * as_find_next_rq finds the next request after @prev in elevator order.
533 * this with as_choose_req form the basis for how the scheduler chooses 352 * this with as_choose_req form the basis for how the scheduler chooses
534 * what request to process next. Anticipation works on top of this. 353 * what request to process next. Anticipation works on top of this.
535 */ 354 */
536static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) 355static struct request *
356as_find_next_rq(struct as_data *ad, struct request *last)
537{ 357{
538 const int data_dir = last->is_sync;
539 struct as_rq *ret;
540 struct rb_node *rbnext = rb_next(&last->rb_node); 358 struct rb_node *rbnext = rb_next(&last->rb_node);
541 struct rb_node *rbprev = rb_prev(&last->rb_node); 359 struct rb_node *rbprev = rb_prev(&last->rb_node);
542 struct as_rq *arq_next, *arq_prev; 360 struct request *next = NULL, *prev = NULL;
543 361
544 BUG_ON(!RB_EMPTY_NODE(&last->rb_node)); 362 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
545 363
546 if (rbprev) 364 if (rbprev)
547 arq_prev = rb_entry_arq(rbprev); 365 prev = rb_entry_rq(rbprev);
548 else
549 arq_prev = NULL;
550 366
551 if (rbnext) 367 if (rbnext)
552 arq_next = rb_entry_arq(rbnext); 368 next = rb_entry_rq(rbnext);
553 else { 369 else {
554 arq_next = as_find_first_arq(ad, data_dir); 370 const int data_dir = rq_is_sync(last);
555 if (arq_next == last)
556 arq_next = NULL;
557 }
558 371
559 ret = as_choose_req(ad, arq_next, arq_prev); 372 rbnext = rb_first(&ad->sort_list[data_dir]);
373 if (rbnext && rbnext != &last->rb_node)
374 next = rb_entry_rq(rbnext);
375 }
560 376
561 return ret; 377 return as_choose_req(ad, next, prev);
562} 378}
563 379
564/* 380/*
@@ -712,8 +528,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
712static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, 528static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
713 struct request *rq) 529 struct request *rq)
714{ 530{
715 struct as_rq *arq = RQ_DATA(rq); 531 int data_dir = rq_is_sync(rq);
716 int data_dir = arq->is_sync;
717 unsigned long thinktime = 0; 532 unsigned long thinktime = 0;
718 sector_t seek_dist; 533 sector_t seek_dist;
719 534
@@ -752,11 +567,11 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
752 * previous one issued. 567 * previous one issued.
753 */ 568 */
754static int as_close_req(struct as_data *ad, struct as_io_context *aic, 569static int as_close_req(struct as_data *ad, struct as_io_context *aic,
755 struct as_rq *arq) 570 struct request *rq)
756{ 571{
757 unsigned long delay; /* milliseconds */ 572 unsigned long delay; /* milliseconds */
758 sector_t last = ad->last_sector[ad->batch_data_dir]; 573 sector_t last = ad->last_sector[ad->batch_data_dir];
759 sector_t next = arq->request->sector; 574 sector_t next = rq->sector;
760 sector_t delta; /* acceptable close offset (in sectors) */ 575 sector_t delta; /* acceptable close offset (in sectors) */
761 sector_t s; 576 sector_t s;
762 577
@@ -813,7 +628,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic,
813 * 628 *
814 * If this task has queued some other IO, do not enter enticipation. 629 * If this task has queued some other IO, do not enter enticipation.
815 */ 630 */
816static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) 631static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
817{ 632{
818 struct io_context *ioc; 633 struct io_context *ioc;
819 struct as_io_context *aic; 634 struct as_io_context *aic;
@@ -821,7 +636,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
821 ioc = ad->io_context; 636 ioc = ad->io_context;
822 BUG_ON(!ioc); 637 BUG_ON(!ioc);
823 638
824 if (arq && ioc == arq->io_context) { 639 if (rq && ioc == RQ_IOC(rq)) {
825 /* request from same process */ 640 /* request from same process */
826 return 1; 641 return 1;
827 } 642 }
@@ -848,7 +663,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
848 return 1; 663 return 1;
849 } 664 }
850 665
851 if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) { 666 if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) {
852 /* 667 /*
853 * Found a close request that is not one of ours. 668 * Found a close request that is not one of ours.
854 * 669 *
@@ -864,7 +679,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
864 ad->exit_no_coop = (7*ad->exit_no_coop)/8; 679 ad->exit_no_coop = (7*ad->exit_no_coop)/8;
865 } 680 }
866 681
867 as_update_iohist(ad, aic, arq->request); 682 as_update_iohist(ad, aic, rq);
868 return 1; 683 return 1;
869 } 684 }
870 685
@@ -891,10 +706,10 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
891} 706}
892 707
893/* 708/*
894 * as_can_anticipate indicates whether we should either run arq 709 * as_can_anticipate indicates whether we should either run rq
895 * or keep anticipating a better request. 710 * or keep anticipating a better request.
896 */ 711 */
897static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) 712static int as_can_anticipate(struct as_data *ad, struct request *rq)
898{ 713{
899 if (!ad->io_context) 714 if (!ad->io_context)
900 /* 715 /*
@@ -908,7 +723,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
908 */ 723 */
909 return 0; 724 return 0;
910 725
911 if (as_can_break_anticipation(ad, arq)) 726 if (as_can_break_anticipation(ad, rq))
912 /* 727 /*
913 * This request is a good candidate. Don't keep anticipating, 728 * This request is a good candidate. Don't keep anticipating,
914 * run it. 729 * run it.
@@ -926,16 +741,16 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
926} 741}
927 742
928/* 743/*
929 * as_update_arq must be called whenever a request (arq) is added to 744 * as_update_rq must be called whenever a request (rq) is added to
930 * the sort_list. This function keeps caches up to date, and checks if the 745 * the sort_list. This function keeps caches up to date, and checks if the
931 * request might be one we are "anticipating" 746 * request might be one we are "anticipating"
932 */ 747 */
933static void as_update_arq(struct as_data *ad, struct as_rq *arq) 748static void as_update_rq(struct as_data *ad, struct request *rq)
934{ 749{
935 const int data_dir = arq->is_sync; 750 const int data_dir = rq_is_sync(rq);
936 751
937 /* keep the next_arq cache up to date */ 752 /* keep the next_rq cache up to date */
938 ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); 753 ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]);
939 754
940 /* 755 /*
941 * have we been anticipating this request? 756 * have we been anticipating this request?
@@ -944,7 +759,7 @@ static void as_update_arq(struct as_data *ad, struct as_rq *arq)
944 */ 759 */
945 if (ad->antic_status == ANTIC_WAIT_REQ 760 if (ad->antic_status == ANTIC_WAIT_REQ
946 || ad->antic_status == ANTIC_WAIT_NEXT) { 761 || ad->antic_status == ANTIC_WAIT_NEXT) {
947 if (as_can_break_anticipation(ad, arq)) 762 if (as_can_break_anticipation(ad, rq))
948 as_antic_stop(ad); 763 as_antic_stop(ad);
949 } 764 }
950} 765}
@@ -984,12 +799,11 @@ static void update_write_batch(struct as_data *ad)
984static void as_completed_request(request_queue_t *q, struct request *rq) 799static void as_completed_request(request_queue_t *q, struct request *rq)
985{ 800{
986 struct as_data *ad = q->elevator->elevator_data; 801 struct as_data *ad = q->elevator->elevator_data;
987 struct as_rq *arq = RQ_DATA(rq);
988 802
989 WARN_ON(!list_empty(&rq->queuelist)); 803 WARN_ON(!list_empty(&rq->queuelist));
990 804
991 if (arq->state != AS_RQ_REMOVED) { 805 if (RQ_STATE(rq) != AS_RQ_REMOVED) {
992 printk("arq->state %d\n", arq->state); 806 printk("rq->state %d\n", RQ_STATE(rq));
993 WARN_ON(1); 807 WARN_ON(1);
994 goto out; 808 goto out;
995 } 809 }
@@ -1009,14 +823,14 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
1009 * actually serviced. This should help devices with big TCQ windows 823 * actually serviced. This should help devices with big TCQ windows
1010 * and writeback caches 824 * and writeback caches
1011 */ 825 */
1012 if (ad->new_batch && ad->batch_data_dir == arq->is_sync) { 826 if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
1013 update_write_batch(ad); 827 update_write_batch(ad);
1014 ad->current_batch_expires = jiffies + 828 ad->current_batch_expires = jiffies +
1015 ad->batch_expire[REQ_SYNC]; 829 ad->batch_expire[REQ_SYNC];
1016 ad->new_batch = 0; 830 ad->new_batch = 0;
1017 } 831 }
1018 832
1019 if (ad->io_context == arq->io_context && ad->io_context) { 833 if (ad->io_context == RQ_IOC(rq) && ad->io_context) {
1020 ad->antic_start = jiffies; 834 ad->antic_start = jiffies;
1021 ad->ioc_finished = 1; 835 ad->ioc_finished = 1;
1022 if (ad->antic_status == ANTIC_WAIT_REQ) { 836 if (ad->antic_status == ANTIC_WAIT_REQ) {
@@ -1028,9 +842,9 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
1028 } 842 }
1029 } 843 }
1030 844
1031 as_put_io_context(arq); 845 as_put_io_context(rq);
1032out: 846out:
1033 arq->state = AS_RQ_POSTSCHED; 847 RQ_SET_STATE(rq, AS_RQ_POSTSCHED);
1034} 848}
1035 849
1036/* 850/*
@@ -1041,27 +855,27 @@ out:
1041 */ 855 */
1042static void as_remove_queued_request(request_queue_t *q, struct request *rq) 856static void as_remove_queued_request(request_queue_t *q, struct request *rq)
1043{ 857{
1044 struct as_rq *arq = RQ_DATA(rq); 858 const int data_dir = rq_is_sync(rq);
1045 const int data_dir = arq->is_sync;
1046 struct as_data *ad = q->elevator->elevator_data; 859 struct as_data *ad = q->elevator->elevator_data;
860 struct io_context *ioc;
1047 861
1048 WARN_ON(arq->state != AS_RQ_QUEUED); 862 WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
1049 863
1050 if (arq->io_context && arq->io_context->aic) { 864 ioc = RQ_IOC(rq);
1051 BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); 865 if (ioc && ioc->aic) {
1052 atomic_dec(&arq->io_context->aic->nr_queued); 866 BUG_ON(!atomic_read(&ioc->aic->nr_queued));
867 atomic_dec(&ioc->aic->nr_queued);
1053 } 868 }
1054 869
1055 /* 870 /*
1056 * Update the "next_arq" cache if we are about to remove its 871 * Update the "next_rq" cache if we are about to remove its
1057 * entry 872 * entry
1058 */ 873 */
1059 if (ad->next_arq[data_dir] == arq) 874 if (ad->next_rq[data_dir] == rq)
1060 ad->next_arq[data_dir] = as_find_next_arq(ad, arq); 875 ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
1061 876
1062 list_del_init(&arq->fifo); 877 rq_fifo_clear(rq);
1063 as_del_arq_hash(arq); 878 as_del_rq_rb(ad, rq);
1064 as_del_arq_rb(ad, arq);
1065} 879}
1066 880
1067/* 881/*
@@ -1074,7 +888,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
1074 */ 888 */
1075static int as_fifo_expired(struct as_data *ad, int adir) 889static int as_fifo_expired(struct as_data *ad, int adir)
1076{ 890{
1077 struct as_rq *arq; 891 struct request *rq;
1078 long delta_jif; 892 long delta_jif;
1079 893
1080 delta_jif = jiffies - ad->last_check_fifo[adir]; 894 delta_jif = jiffies - ad->last_check_fifo[adir];
@@ -1088,9 +902,9 @@ static int as_fifo_expired(struct as_data *ad, int adir)
1088 if (list_empty(&ad->fifo_list[adir])) 902 if (list_empty(&ad->fifo_list[adir]))
1089 return 0; 903 return 0;
1090 904
1091 arq = list_entry_fifo(ad->fifo_list[adir].next); 905 rq = rq_entry_fifo(ad->fifo_list[adir].next);
1092 906
1093 return time_after(jiffies, arq->expires); 907 return time_after(jiffies, rq_fifo_time(rq));
1094} 908}
1095 909
1096/* 910/*
@@ -1113,25 +927,25 @@ static inline int as_batch_expired(struct as_data *ad)
1113/* 927/*
1114 * move an entry to dispatch queue 928 * move an entry to dispatch queue
1115 */ 929 */
1116static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) 930static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
1117{ 931{
1118 struct request *rq = arq->request; 932 const int data_dir = rq_is_sync(rq);
1119 const int data_dir = arq->is_sync;
1120 933
1121 BUG_ON(!RB_EMPTY_NODE(&arq->rb_node)); 934 BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
1122 935
1123 as_antic_stop(ad); 936 as_antic_stop(ad);
1124 ad->antic_status = ANTIC_OFF; 937 ad->antic_status = ANTIC_OFF;
1125 938
1126 /* 939 /*
1127 * This has to be set in order to be correctly updated by 940 * This has to be set in order to be correctly updated by
1128 * as_find_next_arq 941 * as_find_next_rq
1129 */ 942 */
1130 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; 943 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
1131 944
1132 if (data_dir == REQ_SYNC) { 945 if (data_dir == REQ_SYNC) {
946 struct io_context *ioc = RQ_IOC(rq);
1133 /* In case we have to anticipate after this */ 947 /* In case we have to anticipate after this */
1134 copy_io_context(&ad->io_context, &arq->io_context); 948 copy_io_context(&ad->io_context, &ioc);
1135 } else { 949 } else {
1136 if (ad->io_context) { 950 if (ad->io_context) {
1137 put_io_context(ad->io_context); 951 put_io_context(ad->io_context);
@@ -1143,19 +957,19 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1143 } 957 }
1144 ad->ioc_finished = 0; 958 ad->ioc_finished = 0;
1145 959
1146 ad->next_arq[data_dir] = as_find_next_arq(ad, arq); 960 ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
1147 961
1148 /* 962 /*
1149 * take it off the sort and fifo list, add to dispatch queue 963 * take it off the sort and fifo list, add to dispatch queue
1150 */ 964 */
1151 as_remove_queued_request(ad->q, rq); 965 as_remove_queued_request(ad->q, rq);
1152 WARN_ON(arq->state != AS_RQ_QUEUED); 966 WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
1153 967
1154 elv_dispatch_sort(ad->q, rq); 968 elv_dispatch_sort(ad->q, rq);
1155 969
1156 arq->state = AS_RQ_DISPATCHED; 970 RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
1157 if (arq->io_context && arq->io_context->aic) 971 if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
1158 atomic_inc(&arq->io_context->aic->nr_dispatched); 972 atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
1159 ad->nr_dispatched++; 973 ad->nr_dispatched++;
1160} 974}
1161 975
@@ -1167,9 +981,9 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1167static int as_dispatch_request(request_queue_t *q, int force) 981static int as_dispatch_request(request_queue_t *q, int force)
1168{ 982{
1169 struct as_data *ad = q->elevator->elevator_data; 983 struct as_data *ad = q->elevator->elevator_data;
1170 struct as_rq *arq;
1171 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); 984 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
1172 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); 985 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
986 struct request *rq;
1173 987
1174 if (unlikely(force)) { 988 if (unlikely(force)) {
1175 /* 989 /*
@@ -1185,14 +999,14 @@ static int as_dispatch_request(request_queue_t *q, int force)
1185 ad->changed_batch = 0; 999 ad->changed_batch = 0;
1186 ad->new_batch = 0; 1000 ad->new_batch = 0;
1187 1001
1188 while (ad->next_arq[REQ_SYNC]) { 1002 while (ad->next_rq[REQ_SYNC]) {
1189 as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); 1003 as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
1190 dispatched++; 1004 dispatched++;
1191 } 1005 }
1192 ad->last_check_fifo[REQ_SYNC] = jiffies; 1006 ad->last_check_fifo[REQ_SYNC] = jiffies;
1193 1007
1194 while (ad->next_arq[REQ_ASYNC]) { 1008 while (ad->next_rq[REQ_ASYNC]) {
1195 as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); 1009 as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
1196 dispatched++; 1010 dispatched++;
1197 } 1011 }
1198 ad->last_check_fifo[REQ_ASYNC] = jiffies; 1012 ad->last_check_fifo[REQ_ASYNC] = jiffies;
@@ -1216,19 +1030,19 @@ static int as_dispatch_request(request_queue_t *q, int force)
1216 /* 1030 /*
1217 * batch is still running or no reads or no writes 1031 * batch is still running or no reads or no writes
1218 */ 1032 */
1219 arq = ad->next_arq[ad->batch_data_dir]; 1033 rq = ad->next_rq[ad->batch_data_dir];
1220 1034
1221 if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { 1035 if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
1222 if (as_fifo_expired(ad, REQ_SYNC)) 1036 if (as_fifo_expired(ad, REQ_SYNC))
1223 goto fifo_expired; 1037 goto fifo_expired;
1224 1038
1225 if (as_can_anticipate(ad, arq)) { 1039 if (as_can_anticipate(ad, rq)) {
1226 as_antic_waitreq(ad); 1040 as_antic_waitreq(ad);
1227 return 0; 1041 return 0;
1228 } 1042 }
1229 } 1043 }
1230 1044
1231 if (arq) { 1045 if (rq) {
1232 /* we have a "next request" */ 1046 /* we have a "next request" */
1233 if (reads && !writes) 1047 if (reads && !writes)
1234 ad->current_batch_expires = 1048 ad->current_batch_expires =
@@ -1256,7 +1070,7 @@ static int as_dispatch_request(request_queue_t *q, int force)
1256 ad->changed_batch = 1; 1070 ad->changed_batch = 1;
1257 } 1071 }
1258 ad->batch_data_dir = REQ_SYNC; 1072 ad->batch_data_dir = REQ_SYNC;
1259 arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); 1073 rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
1260 ad->last_check_fifo[ad->batch_data_dir] = jiffies; 1074 ad->last_check_fifo[ad->batch_data_dir] = jiffies;
1261 goto dispatch_request; 1075 goto dispatch_request;
1262 } 1076 }
@@ -1282,7 +1096,7 @@ dispatch_writes:
1282 ad->batch_data_dir = REQ_ASYNC; 1096 ad->batch_data_dir = REQ_ASYNC;
1283 ad->current_write_count = ad->write_batch_count; 1097 ad->current_write_count = ad->write_batch_count;
1284 ad->write_batch_idled = 0; 1098 ad->write_batch_idled = 0;
1285 arq = ad->next_arq[ad->batch_data_dir]; 1099 rq = ad->next_rq[ad->batch_data_dir];
1286 goto dispatch_request; 1100 goto dispatch_request;
1287 } 1101 }
1288 1102
@@ -1296,8 +1110,7 @@ dispatch_request:
1296 1110
1297 if (as_fifo_expired(ad, ad->batch_data_dir)) { 1111 if (as_fifo_expired(ad, ad->batch_data_dir)) {
1298fifo_expired: 1112fifo_expired:
1299 arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); 1113 rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
1300 BUG_ON(arq == NULL);
1301 } 1114 }
1302 1115
1303 if (ad->changed_batch) { 1116 if (ad->changed_batch) {
@@ -1316,70 +1129,58 @@ fifo_expired:
1316 } 1129 }
1317 1130
1318 /* 1131 /*
1319 * arq is the selected appropriate request. 1132 * rq is the selected appropriate request.
1320 */ 1133 */
1321 as_move_to_dispatch(ad, arq); 1134 as_move_to_dispatch(ad, rq);
1322 1135
1323 return 1; 1136 return 1;
1324} 1137}
1325 1138
1326/* 1139/*
1327 * add arq to rbtree and fifo 1140 * add rq to rbtree and fifo
1328 */ 1141 */
1329static void as_add_request(request_queue_t *q, struct request *rq) 1142static void as_add_request(request_queue_t *q, struct request *rq)
1330{ 1143{
1331 struct as_data *ad = q->elevator->elevator_data; 1144 struct as_data *ad = q->elevator->elevator_data;
1332 struct as_rq *arq = RQ_DATA(rq);
1333 int data_dir; 1145 int data_dir;
1334 1146
1335 arq->state = AS_RQ_NEW; 1147 RQ_SET_STATE(rq, AS_RQ_NEW);
1336 1148
1337 if (rq_data_dir(arq->request) == READ 1149 data_dir = rq_is_sync(rq);
1338 || (arq->request->flags & REQ_RW_SYNC))
1339 arq->is_sync = 1;
1340 else
1341 arq->is_sync = 0;
1342 data_dir = arq->is_sync;
1343 1150
1344 arq->io_context = as_get_io_context(); 1151 rq->elevator_private = as_get_io_context(q->node);
1345 1152
1346 if (arq->io_context) { 1153 if (RQ_IOC(rq)) {
1347 as_update_iohist(ad, arq->io_context->aic, arq->request); 1154 as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
1348 atomic_inc(&arq->io_context->aic->nr_queued); 1155 atomic_inc(&RQ_IOC(rq)->aic->nr_queued);
1349 } 1156 }
1350 1157
1351 as_add_arq_rb(ad, arq); 1158 as_add_rq_rb(ad, rq);
1352 if (rq_mergeable(arq->request))
1353 as_add_arq_hash(ad, arq);
1354 1159
1355 /* 1160 /*
1356 * set expire time (only used for reads) and add to fifo list 1161 * set expire time (only used for reads) and add to fifo list
1357 */ 1162 */
1358 arq->expires = jiffies + ad->fifo_expire[data_dir]; 1163 rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]);
1359 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); 1164 list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]);
1360 1165
1361 as_update_arq(ad, arq); /* keep state machine up to date */ 1166 as_update_rq(ad, rq); /* keep state machine up to date */
1362 arq->state = AS_RQ_QUEUED; 1167 RQ_SET_STATE(rq, AS_RQ_QUEUED);
1363} 1168}
1364 1169
1365static void as_activate_request(request_queue_t *q, struct request *rq) 1170static void as_activate_request(request_queue_t *q, struct request *rq)
1366{ 1171{
1367 struct as_rq *arq = RQ_DATA(rq); 1172 WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED);
1368 1173 RQ_SET_STATE(rq, AS_RQ_REMOVED);
1369 WARN_ON(arq->state != AS_RQ_DISPATCHED); 1174 if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
1370 arq->state = AS_RQ_REMOVED; 1175 atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched);
1371 if (arq->io_context && arq->io_context->aic)
1372 atomic_dec(&arq->io_context->aic->nr_dispatched);
1373} 1176}
1374 1177
1375static void as_deactivate_request(request_queue_t *q, struct request *rq) 1178static void as_deactivate_request(request_queue_t *q, struct request *rq)
1376{ 1179{
1377 struct as_rq *arq = RQ_DATA(rq); 1180 WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED);
1378 1181 RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
1379 WARN_ON(arq->state != AS_RQ_REMOVED); 1182 if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
1380 arq->state = AS_RQ_DISPATCHED; 1183 atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
1381 if (arq->io_context && arq->io_context->aic)
1382 atomic_inc(&arq->io_context->aic->nr_dispatched);
1383} 1184}
1384 1185
1385/* 1186/*
@@ -1396,93 +1197,35 @@ static int as_queue_empty(request_queue_t *q)
1396 && list_empty(&ad->fifo_list[REQ_SYNC]); 1197 && list_empty(&ad->fifo_list[REQ_SYNC]);
1397} 1198}
1398 1199
1399static struct request *as_former_request(request_queue_t *q,
1400 struct request *rq)
1401{
1402 struct as_rq *arq = RQ_DATA(rq);
1403 struct rb_node *rbprev = rb_prev(&arq->rb_node);
1404 struct request *ret = NULL;
1405
1406 if (rbprev)
1407 ret = rb_entry_arq(rbprev)->request;
1408
1409 return ret;
1410}
1411
1412static struct request *as_latter_request(request_queue_t *q,
1413 struct request *rq)
1414{
1415 struct as_rq *arq = RQ_DATA(rq);
1416 struct rb_node *rbnext = rb_next(&arq->rb_node);
1417 struct request *ret = NULL;
1418
1419 if (rbnext)
1420 ret = rb_entry_arq(rbnext)->request;
1421
1422 return ret;
1423}
1424
1425static int 1200static int
1426as_merge(request_queue_t *q, struct request **req, struct bio *bio) 1201as_merge(request_queue_t *q, struct request **req, struct bio *bio)
1427{ 1202{
1428 struct as_data *ad = q->elevator->elevator_data; 1203 struct as_data *ad = q->elevator->elevator_data;
1429 sector_t rb_key = bio->bi_sector + bio_sectors(bio); 1204 sector_t rb_key = bio->bi_sector + bio_sectors(bio);
1430 struct request *__rq; 1205 struct request *__rq;
1431 int ret;
1432
1433 /*
1434 * see if the merge hash can satisfy a back merge
1435 */
1436 __rq = as_find_arq_hash(ad, bio->bi_sector);
1437 if (__rq) {
1438 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
1439
1440 if (elv_rq_merge_ok(__rq, bio)) {
1441 ret = ELEVATOR_BACK_MERGE;
1442 goto out;
1443 }
1444 }
1445 1206
1446 /* 1207 /*
1447 * check for front merge 1208 * check for front merge
1448 */ 1209 */
1449 __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); 1210 __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key);
1450 if (__rq) { 1211 if (__rq && elv_rq_merge_ok(__rq, bio)) {
1451 BUG_ON(rb_key != rq_rb_key(__rq)); 1212 *req = __rq;
1452 1213 return ELEVATOR_FRONT_MERGE;
1453 if (elv_rq_merge_ok(__rq, bio)) {
1454 ret = ELEVATOR_FRONT_MERGE;
1455 goto out;
1456 }
1457 } 1214 }
1458 1215
1459 return ELEVATOR_NO_MERGE; 1216 return ELEVATOR_NO_MERGE;
1460out:
1461 if (ret) {
1462 if (rq_mergeable(__rq))
1463 as_hot_arq_hash(ad, RQ_DATA(__rq));
1464 }
1465 *req = __rq;
1466 return ret;
1467} 1217}
1468 1218
1469static void as_merged_request(request_queue_t *q, struct request *req) 1219static void as_merged_request(request_queue_t *q, struct request *req, int type)
1470{ 1220{
1471 struct as_data *ad = q->elevator->elevator_data; 1221 struct as_data *ad = q->elevator->elevator_data;
1472 struct as_rq *arq = RQ_DATA(req);
1473
1474 /*
1475 * hash always needs to be repositioned, key is end sector
1476 */
1477 as_del_arq_hash(arq);
1478 as_add_arq_hash(ad, arq);
1479 1222
1480 /* 1223 /*
1481 * if the merge was a front merge, we need to reposition request 1224 * if the merge was a front merge, we need to reposition request
1482 */ 1225 */
1483 if (rq_rb_key(req) != arq->rb_key) { 1226 if (type == ELEVATOR_FRONT_MERGE) {
1484 as_del_arq_rb(ad, arq); 1227 as_del_rq_rb(ad, req);
1485 as_add_arq_rb(ad, arq); 1228 as_add_rq_rb(ad, req);
1486 /* 1229 /*
1487 * Note! At this stage of this and the next function, our next 1230 * Note! At this stage of this and the next function, our next
1488 * request may not be optimal - eg the request may have "grown" 1231 * request may not be optimal - eg the request may have "grown"
@@ -1494,38 +1237,22 @@ static void as_merged_request(request_queue_t *q, struct request *req)
1494static void as_merged_requests(request_queue_t *q, struct request *req, 1237static void as_merged_requests(request_queue_t *q, struct request *req,
1495 struct request *next) 1238 struct request *next)
1496{ 1239{
1497 struct as_data *ad = q->elevator->elevator_data;
1498 struct as_rq *arq = RQ_DATA(req);
1499 struct as_rq *anext = RQ_DATA(next);
1500
1501 BUG_ON(!arq);
1502 BUG_ON(!anext);
1503
1504 /* 1240 /*
1505 * reposition arq (this is the merged request) in hash, and in rbtree 1241 * if next expires before rq, assign its expire time to arq
1506 * in case of a front merge 1242 * and move into next position (next will be deleted) in fifo
1507 */ 1243 */
1508 as_del_arq_hash(arq); 1244 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
1509 as_add_arq_hash(ad, arq); 1245 if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
1510 1246 struct io_context *rioc = RQ_IOC(req);
1511 if (rq_rb_key(req) != arq->rb_key) { 1247 struct io_context *nioc = RQ_IOC(next);
1512 as_del_arq_rb(ad, arq);
1513 as_add_arq_rb(ad, arq);
1514 }
1515 1248
1516 /* 1249 list_move(&req->queuelist, &next->queuelist);
1517 * if anext expires before arq, assign its expire time to arq 1250 rq_set_fifo_time(req, rq_fifo_time(next));
1518 * and move into anext position (anext will be deleted) in fifo
1519 */
1520 if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
1521 if (time_before(anext->expires, arq->expires)) {
1522 list_move(&arq->fifo, &anext->fifo);
1523 arq->expires = anext->expires;
1524 /* 1251 /*
1525 * Don't copy here but swap, because when anext is 1252 * Don't copy here but swap, because when anext is
1526 * removed below, it must contain the unused context 1253 * removed below, it must contain the unused context
1527 */ 1254 */
1528 swap_io_context(&arq->io_context, &anext->io_context); 1255 swap_io_context(&rioc, &nioc);
1529 } 1256 }
1530 } 1257 }
1531 1258
@@ -1533,9 +1260,9 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
1533 * kill knowledge of next, this one is a goner 1260 * kill knowledge of next, this one is a goner
1534 */ 1261 */
1535 as_remove_queued_request(q, next); 1262 as_remove_queued_request(q, next);
1536 as_put_io_context(anext); 1263 as_put_io_context(next);
1537 1264
1538 anext->state = AS_RQ_MERGED; 1265 RQ_SET_STATE(next, AS_RQ_MERGED);
1539} 1266}
1540 1267
1541/* 1268/*
@@ -1553,61 +1280,18 @@ static void as_work_handler(void *data)
1553 unsigned long flags; 1280 unsigned long flags;
1554 1281
1555 spin_lock_irqsave(q->queue_lock, flags); 1282 spin_lock_irqsave(q->queue_lock, flags);
1556 if (!as_queue_empty(q)) 1283 blk_start_queueing(q);
1557 q->request_fn(q);
1558 spin_unlock_irqrestore(q->queue_lock, flags); 1284 spin_unlock_irqrestore(q->queue_lock, flags);
1559} 1285}
1560 1286
1561static void as_put_request(request_queue_t *q, struct request *rq) 1287static int as_may_queue(request_queue_t *q, int rw)
1562{
1563 struct as_data *ad = q->elevator->elevator_data;
1564 struct as_rq *arq = RQ_DATA(rq);
1565
1566 if (!arq) {
1567 WARN_ON(1);
1568 return;
1569 }
1570
1571 if (unlikely(arq->state != AS_RQ_POSTSCHED &&
1572 arq->state != AS_RQ_PRESCHED &&
1573 arq->state != AS_RQ_MERGED)) {
1574 printk("arq->state %d\n", arq->state);
1575 WARN_ON(1);
1576 }
1577
1578 mempool_free(arq, ad->arq_pool);
1579 rq->elevator_private = NULL;
1580}
1581
1582static int as_set_request(request_queue_t *q, struct request *rq,
1583 struct bio *bio, gfp_t gfp_mask)
1584{
1585 struct as_data *ad = q->elevator->elevator_data;
1586 struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
1587
1588 if (arq) {
1589 memset(arq, 0, sizeof(*arq));
1590 RB_CLEAR_NODE(&arq->rb_node);
1591 arq->request = rq;
1592 arq->state = AS_RQ_PRESCHED;
1593 arq->io_context = NULL;
1594 INIT_HLIST_NODE(&arq->hash);
1595 INIT_LIST_HEAD(&arq->fifo);
1596 rq->elevator_private = arq;
1597 return 0;
1598 }
1599
1600 return 1;
1601}
1602
1603static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
1604{ 1288{
1605 int ret = ELV_MQUEUE_MAY; 1289 int ret = ELV_MQUEUE_MAY;
1606 struct as_data *ad = q->elevator->elevator_data; 1290 struct as_data *ad = q->elevator->elevator_data;
1607 struct io_context *ioc; 1291 struct io_context *ioc;
1608 if (ad->antic_status == ANTIC_WAIT_REQ || 1292 if (ad->antic_status == ANTIC_WAIT_REQ ||
1609 ad->antic_status == ANTIC_WAIT_NEXT) { 1293 ad->antic_status == ANTIC_WAIT_NEXT) {
1610 ioc = as_get_io_context(); 1294 ioc = as_get_io_context(q->node);
1611 if (ad->io_context == ioc) 1295 if (ad->io_context == ioc)
1612 ret = ELV_MQUEUE_MUST; 1296 ret = ELV_MQUEUE_MUST;
1613 put_io_context(ioc); 1297 put_io_context(ioc);
@@ -1626,23 +1310,16 @@ static void as_exit_queue(elevator_t *e)
1626 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); 1310 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
1627 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); 1311 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
1628 1312
1629 mempool_destroy(ad->arq_pool);
1630 put_io_context(ad->io_context); 1313 put_io_context(ad->io_context);
1631 kfree(ad->hash);
1632 kfree(ad); 1314 kfree(ad);
1633} 1315}
1634 1316
1635/* 1317/*
1636 * initialize elevator private data (as_data), and alloc a arq for 1318 * initialize elevator private data (as_data).
1637 * each request on the free lists
1638 */ 1319 */
1639static void *as_init_queue(request_queue_t *q, elevator_t *e) 1320static void *as_init_queue(request_queue_t *q, elevator_t *e)
1640{ 1321{
1641 struct as_data *ad; 1322 struct as_data *ad;
1642 int i;
1643
1644 if (!arq_pool)
1645 return NULL;
1646 1323
1647 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); 1324 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
1648 if (!ad) 1325 if (!ad)
@@ -1651,30 +1328,12 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
1651 1328
1652 ad->q = q; /* Identify what queue the data belongs to */ 1329 ad->q = q; /* Identify what queue the data belongs to */
1653 1330
1654 ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES,
1655 GFP_KERNEL, q->node);
1656 if (!ad->hash) {
1657 kfree(ad);
1658 return NULL;
1659 }
1660
1661 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1662 mempool_free_slab, arq_pool, q->node);
1663 if (!ad->arq_pool) {
1664 kfree(ad->hash);
1665 kfree(ad);
1666 return NULL;
1667 }
1668
1669 /* anticipatory scheduling helpers */ 1331 /* anticipatory scheduling helpers */
1670 ad->antic_timer.function = as_antic_timeout; 1332 ad->antic_timer.function = as_antic_timeout;
1671 ad->antic_timer.data = (unsigned long)q; 1333 ad->antic_timer.data = (unsigned long)q;
1672 init_timer(&ad->antic_timer); 1334 init_timer(&ad->antic_timer);
1673 INIT_WORK(&ad->antic_work, as_work_handler, q); 1335 INIT_WORK(&ad->antic_work, as_work_handler, q);
1674 1336
1675 for (i = 0; i < AS_HASH_ENTRIES; i++)
1676 INIT_HLIST_HEAD(&ad->hash[i]);
1677
1678 INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); 1337 INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
1679 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); 1338 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
1680 ad->sort_list[REQ_SYNC] = RB_ROOT; 1339 ad->sort_list[REQ_SYNC] = RB_ROOT;
@@ -1787,10 +1446,8 @@ static struct elevator_type iosched_as = {
1787 .elevator_deactivate_req_fn = as_deactivate_request, 1446 .elevator_deactivate_req_fn = as_deactivate_request,
1788 .elevator_queue_empty_fn = as_queue_empty, 1447 .elevator_queue_empty_fn = as_queue_empty,
1789 .elevator_completed_req_fn = as_completed_request, 1448 .elevator_completed_req_fn = as_completed_request,
1790 .elevator_former_req_fn = as_former_request, 1449 .elevator_former_req_fn = elv_rb_former_request,
1791 .elevator_latter_req_fn = as_latter_request, 1450 .elevator_latter_req_fn = elv_rb_latter_request,
1792 .elevator_set_req_fn = as_set_request,
1793 .elevator_put_req_fn = as_put_request,
1794 .elevator_may_queue_fn = as_may_queue, 1451 .elevator_may_queue_fn = as_may_queue,
1795 .elevator_init_fn = as_init_queue, 1452 .elevator_init_fn = as_init_queue,
1796 .elevator_exit_fn = as_exit_queue, 1453 .elevator_exit_fn = as_exit_queue,
@@ -1806,11 +1463,6 @@ static int __init as_init(void)
1806{ 1463{
1807 int ret; 1464 int ret;
1808 1465
1809 arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
1810 0, 0, NULL, NULL);
1811 if (!arq_pool)
1812 return -ENOMEM;
1813
1814 ret = elv_register(&iosched_as); 1466 ret = elv_register(&iosched_as);
1815 if (!ret) { 1467 if (!ret) {
1816 /* 1468 /*
@@ -1822,21 +1474,19 @@ static int __init as_init(void)
1822 return 0; 1474 return 0;
1823 } 1475 }
1824 1476
1825 kmem_cache_destroy(arq_pool);
1826 return ret; 1477 return ret;
1827} 1478}
1828 1479
1829static void __exit as_exit(void) 1480static void __exit as_exit(void)
1830{ 1481{
1831 DECLARE_COMPLETION(all_gone); 1482 DECLARE_COMPLETION_ONSTACK(all_gone);
1832 elv_unregister(&iosched_as); 1483 elv_unregister(&iosched_as);
1833 ioc_gone = &all_gone; 1484 ioc_gone = &all_gone;
1834 /* ioc_gone's update must be visible before reading ioc_count */ 1485 /* ioc_gone's update must be visible before reading ioc_count */
1835 smp_wmb(); 1486 smp_wmb();
1836 if (atomic_read(&ioc_count)) 1487 if (elv_ioc_count_read(ioc_count))
1837 wait_for_completion(ioc_gone); 1488 wait_for_completion(ioc_gone);
1838 synchronize_rcu(); 1489 synchronize_rcu();
1839 kmem_cache_destroy(arq_pool);
1840} 1490}
1841 1491
1842module_init(as_init); 1492module_init(as_init);
diff --git a/block/blktrace.c b/block/blktrace.c
index 2b4ef2b89b8d..135593c8e45b 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006 Jens Axboe <axboe@suse.de> 2 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -69,7 +69,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
69/* 69/*
70 * Bio action bits of interest 70 * Bio action bits of interest
71 */ 71 */
72static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) }; 72static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
73 73
74/* 74/*
75 * More could be added as needed, taking care to increment the decrementer 75 * More could be added as needed, taking care to increment the decrementer
@@ -81,6 +81,8 @@ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_AC
81 (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) 81 (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
82#define trace_ahead_bit(rw) \ 82#define trace_ahead_bit(rw) \
83 (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) 83 (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
84#define trace_meta_bit(rw) \
85 (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
84 86
85/* 87/*
86 * The worker for the various blk_add_trace*() types. Fills out a 88 * The worker for the various blk_add_trace*() types. Fills out a
@@ -103,6 +105,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
103 what |= bio_act[trace_barrier_bit(rw)]; 105 what |= bio_act[trace_barrier_bit(rw)];
104 what |= bio_act[trace_sync_bit(rw)]; 106 what |= bio_act[trace_sync_bit(rw)];
105 what |= bio_act[trace_ahead_bit(rw)]; 107 what |= bio_act[trace_ahead_bit(rw)];
108 what |= bio_act[trace_meta_bit(rw)];
106 109
107 pid = tsk->pid; 110 pid = tsk->pid;
108 if (unlikely(act_log_check(bt, what, sector, pid))) 111 if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -450,8 +453,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
450 **/ 453 **/
451void blk_trace_shutdown(request_queue_t *q) 454void blk_trace_shutdown(request_queue_t *q)
452{ 455{
453 blk_trace_startstop(q, 0); 456 if (q->blk_trace) {
454 blk_trace_remove(q); 457 blk_trace_startstop(q, 0);
458 blk_trace_remove(q);
459 }
455} 460}
456 461
457/* 462/*
@@ -471,6 +476,9 @@ static void blk_check_time(unsigned long long *t)
471 *t -= (a + b) / 2; 476 *t -= (a + b) / 2;
472} 477}
473 478
479/*
480 * calibrate our inter-CPU timings
481 */
474static void blk_trace_check_cpu_time(void *data) 482static void blk_trace_check_cpu_time(void *data)
475{ 483{
476 unsigned long long *t; 484 unsigned long long *t;
@@ -488,20 +496,6 @@ static void blk_trace_check_cpu_time(void *data)
488 put_cpu(); 496 put_cpu();
489} 497}
490 498
491/*
492 * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
493 * timings
494 */
495static void blk_trace_calibrate_offsets(void)
496{
497 unsigned long flags;
498
499 smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
500 local_irq_save(flags);
501 blk_trace_check_cpu_time(NULL);
502 local_irq_restore(flags);
503}
504
505static void blk_trace_set_ht_offsets(void) 499static void blk_trace_set_ht_offsets(void)
506{ 500{
507#if defined(CONFIG_SCHED_SMT) 501#if defined(CONFIG_SCHED_SMT)
@@ -530,7 +524,7 @@ static void blk_trace_set_ht_offsets(void)
530static __init int blk_trace_init(void) 524static __init int blk_trace_init(void)
531{ 525{
532 mutex_init(&blk_tree_mutex); 526 mutex_init(&blk_tree_mutex);
533 blk_trace_calibrate_offsets(); 527 on_each_cpu(blk_trace_check_cpu_time, NULL, 1, 1);
534 blk_trace_set_ht_offsets(); 528 blk_trace_set_ht_offsets();
535 529
536 return 0; 530 return 0;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3a3aee08ec5f..d3d76136f53a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4,7 +4,7 @@
4 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 * 6 *
7 * Copyright (C) 2003 Jens Axboe <axboe@suse.de> 7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
@@ -17,7 +17,6 @@
17 * tunables 17 * tunables
18 */ 18 */
19static const int cfq_quantum = 4; /* max queue in one round of service */ 19static const int cfq_quantum = 4; /* max queue in one round of service */
20static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
21static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 20static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
22static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 21static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
23static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ 22static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
@@ -32,8 +31,6 @@ static int cfq_slice_idle = HZ / 125;
32 31
33#define CFQ_KEY_ASYNC (0) 32#define CFQ_KEY_ASYNC (0)
34 33
35static DEFINE_SPINLOCK(cfq_exit_lock);
36
37/* 34/*
38 * for the hash of cfqq inside the cfqd 35 * for the hash of cfqq inside the cfqd
39 */ 36 */
@@ -41,37 +38,19 @@ static DEFINE_SPINLOCK(cfq_exit_lock);
41#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) 38#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
42#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) 39#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
43 40
44/*
45 * for the hash of crq inside the cfqq
46 */
47#define CFQ_MHASH_SHIFT 6
48#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
49#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
50#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
51#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
52#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
53
54#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) 41#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
55#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
56 42
57#define RQ_DATA(rq) (rq)->elevator_private 43#define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private)
44#define RQ_CFQQ(rq) ((rq)->elevator_private2)
58 45
59/*
60 * rb-tree defines
61 */
62#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
63#define rq_rb_key(rq) (rq)->sector
64
65static kmem_cache_t *crq_pool;
66static kmem_cache_t *cfq_pool; 46static kmem_cache_t *cfq_pool;
67static kmem_cache_t *cfq_ioc_pool; 47static kmem_cache_t *cfq_ioc_pool;
68 48
69static atomic_t ioc_count = ATOMIC_INIT(0); 49static DEFINE_PER_CPU(unsigned long, ioc_count);
70static struct completion *ioc_gone; 50static struct completion *ioc_gone;
71 51
72#define CFQ_PRIO_LISTS IOPRIO_BE_NR 52#define CFQ_PRIO_LISTS IOPRIO_BE_NR
73#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 53#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
74#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
75#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 54#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
76 55
77#define ASYNC (0) 56#define ASYNC (0)
@@ -103,29 +82,14 @@ struct cfq_data {
103 unsigned int busy_queues; 82 unsigned int busy_queues;
104 83
105 /* 84 /*
106 * non-ordered list of empty cfqq's
107 */
108 struct list_head empty_list;
109
110 /*
111 * cfqq lookup hash 85 * cfqq lookup hash
112 */ 86 */
113 struct hlist_head *cfq_hash; 87 struct hlist_head *cfq_hash;
114 88
115 /*
116 * global crq hash for all queues
117 */
118 struct hlist_head *crq_hash;
119
120 mempool_t *crq_pool;
121
122 int rq_in_driver; 89 int rq_in_driver;
123 int hw_tag; 90 int hw_tag;
124 91
125 /* 92 /*
126 * schedule slice state info
127 */
128 /*
129 * idle window management 93 * idle window management
130 */ 94 */
131 struct timer_list idle_slice_timer; 95 struct timer_list idle_slice_timer;
@@ -141,13 +105,10 @@ struct cfq_data {
141 sector_t last_sector; 105 sector_t last_sector;
142 unsigned long last_end_request; 106 unsigned long last_end_request;
143 107
144 unsigned int rq_starved;
145
146 /* 108 /*
147 * tunables, see top of file 109 * tunables, see top of file
148 */ 110 */
149 unsigned int cfq_quantum; 111 unsigned int cfq_quantum;
150 unsigned int cfq_queued;
151 unsigned int cfq_fifo_expire[2]; 112 unsigned int cfq_fifo_expire[2];
152 unsigned int cfq_back_penalty; 113 unsigned int cfq_back_penalty;
153 unsigned int cfq_back_max; 114 unsigned int cfq_back_max;
@@ -170,23 +131,24 @@ struct cfq_queue {
170 struct hlist_node cfq_hash; 131 struct hlist_node cfq_hash;
171 /* hash key */ 132 /* hash key */
172 unsigned int key; 133 unsigned int key;
173 /* on either rr or empty list of cfqd */ 134 /* member of the rr/busy/cur/idle cfqd list */
174 struct list_head cfq_list; 135 struct list_head cfq_list;
175 /* sorted list of pending requests */ 136 /* sorted list of pending requests */
176 struct rb_root sort_list; 137 struct rb_root sort_list;
177 /* if fifo isn't expired, next request to serve */ 138 /* if fifo isn't expired, next request to serve */
178 struct cfq_rq *next_crq; 139 struct request *next_rq;
179 /* requests queued in sort_list */ 140 /* requests queued in sort_list */
180 int queued[2]; 141 int queued[2];
181 /* currently allocated requests */ 142 /* currently allocated requests */
182 int allocated[2]; 143 int allocated[2];
144 /* pending metadata requests */
145 int meta_pending;
183 /* fifo list of requests in sort_list */ 146 /* fifo list of requests in sort_list */
184 struct list_head fifo; 147 struct list_head fifo;
185 148
186 unsigned long slice_start; 149 unsigned long slice_start;
187 unsigned long slice_end; 150 unsigned long slice_end;
188 unsigned long slice_left; 151 unsigned long slice_left;
189 unsigned long service_last;
190 152
191 /* number of requests that are on the dispatch list */ 153 /* number of requests that are on the dispatch list */
192 int on_dispatch[2]; 154 int on_dispatch[2];
@@ -199,18 +161,6 @@ struct cfq_queue {
199 unsigned int flags; 161 unsigned int flags;
200}; 162};
201 163
202struct cfq_rq {
203 struct rb_node rb_node;
204 sector_t rb_key;
205 struct request *request;
206 struct hlist_node hash;
207
208 struct cfq_queue *cfq_queue;
209 struct cfq_io_context *io_context;
210
211 unsigned int crq_flags;
212};
213
214enum cfqq_state_flags { 164enum cfqq_state_flags {
215 CFQ_CFQQ_FLAG_on_rr = 0, 165 CFQ_CFQQ_FLAG_on_rr = 0,
216 CFQ_CFQQ_FLAG_wait_request, 166 CFQ_CFQQ_FLAG_wait_request,
@@ -220,6 +170,7 @@ enum cfqq_state_flags {
220 CFQ_CFQQ_FLAG_fifo_expire, 170 CFQ_CFQQ_FLAG_fifo_expire,
221 CFQ_CFQQ_FLAG_idle_window, 171 CFQ_CFQQ_FLAG_idle_window,
222 CFQ_CFQQ_FLAG_prio_changed, 172 CFQ_CFQQ_FLAG_prio_changed,
173 CFQ_CFQQ_FLAG_queue_new,
223}; 174};
224 175
225#define CFQ_CFQQ_FNS(name) \ 176#define CFQ_CFQQ_FNS(name) \
@@ -244,70 +195,14 @@ CFQ_CFQQ_FNS(must_dispatch);
244CFQ_CFQQ_FNS(fifo_expire); 195CFQ_CFQQ_FNS(fifo_expire);
245CFQ_CFQQ_FNS(idle_window); 196CFQ_CFQQ_FNS(idle_window);
246CFQ_CFQQ_FNS(prio_changed); 197CFQ_CFQQ_FNS(prio_changed);
198CFQ_CFQQ_FNS(queue_new);
247#undef CFQ_CFQQ_FNS 199#undef CFQ_CFQQ_FNS
248 200
249enum cfq_rq_state_flags {
250 CFQ_CRQ_FLAG_is_sync = 0,
251};
252
253#define CFQ_CRQ_FNS(name) \
254static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \
255{ \
256 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \
257} \
258static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \
259{ \
260 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \
261} \
262static inline int cfq_crq_##name(const struct cfq_rq *crq) \
263{ \
264 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
265}
266
267CFQ_CRQ_FNS(is_sync);
268#undef CFQ_CRQ_FNS
269
270static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); 201static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
271static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); 202static void cfq_dispatch_insert(request_queue_t *, struct request *);
272static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); 203static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
273 204
274/* 205/*
275 * lots of deadline iosched dupes, can be abstracted later...
276 */
277static inline void cfq_del_crq_hash(struct cfq_rq *crq)
278{
279 hlist_del_init(&crq->hash);
280}
281
282static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
283{
284 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
285
286 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
287}
288
289static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
290{
291 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
292 struct hlist_node *entry, *next;
293
294 hlist_for_each_safe(entry, next, hash_list) {
295 struct cfq_rq *crq = list_entry_hash(entry);
296 struct request *__rq = crq->request;
297
298 if (!rq_mergeable(__rq)) {
299 cfq_del_crq_hash(crq);
300 continue;
301 }
302
303 if (rq_hash_key(__rq) == offset)
304 return __rq;
305 }
306
307 return NULL;
308}
309
310/*
311 * scheduler run of queue, if there are requests pending and no one in the 206 * scheduler run of queue, if there are requests pending and no one in the
312 * driver that will restart queueing 207 * driver that will restart queueing
313 */ 208 */
@@ -333,12 +228,12 @@ static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
333} 228}
334 229
335/* 230/*
336 * Lifted from AS - choose which of crq1 and crq2 that is best served now. 231 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
337 * We choose the request that is closest to the head right now. Distance 232 * We choose the request that is closest to the head right now. Distance
338 * behind the head is penalized and only allowed to a certain extent. 233 * behind the head is penalized and only allowed to a certain extent.
339 */ 234 */
340static struct cfq_rq * 235static struct request *
341cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) 236cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
342{ 237{
343 sector_t last, s1, s2, d1 = 0, d2 = 0; 238 sector_t last, s1, s2, d1 = 0, d2 = 0;
344 unsigned long back_max; 239 unsigned long back_max;
@@ -346,18 +241,22 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
346#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 241#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
347 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 242 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
348 243
349 if (crq1 == NULL || crq1 == crq2) 244 if (rq1 == NULL || rq1 == rq2)
350 return crq2; 245 return rq2;
351 if (crq2 == NULL) 246 if (rq2 == NULL)
352 return crq1; 247 return rq1;
353 248
354 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) 249 if (rq_is_sync(rq1) && !rq_is_sync(rq2))
355 return crq1; 250 return rq1;
356 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) 251 else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
357 return crq2; 252 return rq2;
253 if (rq_is_meta(rq1) && !rq_is_meta(rq2))
254 return rq1;
255 else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
256 return rq2;
358 257
359 s1 = crq1->request->sector; 258 s1 = rq1->sector;
360 s2 = crq2->request->sector; 259 s2 = rq2->sector;
361 260
362 last = cfqd->last_sector; 261 last = cfqd->last_sector;
363 262
@@ -392,23 +291,23 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
392 * check two variables for all permutations: --> faster! 291 * check two variables for all permutations: --> faster!
393 */ 292 */
394 switch (wrap) { 293 switch (wrap) {
395 case 0: /* common case for CFQ: crq1 and crq2 not wrapped */ 294 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
396 if (d1 < d2) 295 if (d1 < d2)
397 return crq1; 296 return rq1;
398 else if (d2 < d1) 297 else if (d2 < d1)
399 return crq2; 298 return rq2;
400 else { 299 else {
401 if (s1 >= s2) 300 if (s1 >= s2)
402 return crq1; 301 return rq1;
403 else 302 else
404 return crq2; 303 return rq2;
405 } 304 }
406 305
407 case CFQ_RQ2_WRAP: 306 case CFQ_RQ2_WRAP:
408 return crq1; 307 return rq1;
409 case CFQ_RQ1_WRAP: 308 case CFQ_RQ1_WRAP:
410 return crq2; 309 return rq2;
411 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */ 310 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
412 default: 311 default:
413 /* 312 /*
414 * Since both rqs are wrapped, 313 * Since both rqs are wrapped,
@@ -417,50 +316,43 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
417 * since back seek takes more time than forward. 316 * since back seek takes more time than forward.
418 */ 317 */
419 if (s1 <= s2) 318 if (s1 <= s2)
420 return crq1; 319 return rq1;
421 else 320 else
422 return crq2; 321 return rq2;
423 } 322 }
424} 323}
425 324
426/* 325/*
427 * would be nice to take fifo expire time into account as well 326 * would be nice to take fifo expire time into account as well
428 */ 327 */
429static struct cfq_rq * 328static struct request *
430cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 329cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
431 struct cfq_rq *last) 330 struct request *last)
432{ 331{
433 struct cfq_rq *crq_next = NULL, *crq_prev = NULL; 332 struct rb_node *rbnext = rb_next(&last->rb_node);
434 struct rb_node *rbnext, *rbprev; 333 struct rb_node *rbprev = rb_prev(&last->rb_node);
435 334 struct request *next = NULL, *prev = NULL;
436 if (!(rbnext = rb_next(&last->rb_node))) {
437 rbnext = rb_first(&cfqq->sort_list);
438 if (rbnext == &last->rb_node)
439 rbnext = NULL;
440 }
441 335
442 rbprev = rb_prev(&last->rb_node); 336 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
443 337
444 if (rbprev) 338 if (rbprev)
445 crq_prev = rb_entry_crq(rbprev); 339 prev = rb_entry_rq(rbprev);
446 if (rbnext)
447 crq_next = rb_entry_crq(rbnext);
448
449 return cfq_choose_req(cfqd, crq_next, crq_prev);
450}
451 340
452static void cfq_update_next_crq(struct cfq_rq *crq) 341 if (rbnext)
453{ 342 next = rb_entry_rq(rbnext);
454 struct cfq_queue *cfqq = crq->cfq_queue; 343 else {
344 rbnext = rb_first(&cfqq->sort_list);
345 if (rbnext && rbnext != &last->rb_node)
346 next = rb_entry_rq(rbnext);
347 }
455 348
456 if (cfqq->next_crq == crq) 349 return cfq_choose_req(cfqd, next, prev);
457 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
458} 350}
459 351
460static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 352static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
461{ 353{
462 struct cfq_data *cfqd = cfqq->cfqd; 354 struct cfq_data *cfqd = cfqq->cfqd;
463 struct list_head *list, *entry; 355 struct list_head *list;
464 356
465 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 357 BUG_ON(!cfq_cfqq_on_rr(cfqq));
466 358
@@ -485,31 +377,26 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
485 } 377 }
486 378
487 /* 379 /*
488 * if queue was preempted, just add to front to be fair. busy_rr 380 * If this queue was preempted or is new (never been serviced), let
489 * isn't sorted, but insert at the back for fairness. 381 * it be added first for fairness but beind other new queues.
382 * Otherwise, just add to the back of the list.
490 */ 383 */
491 if (preempted || list == &cfqd->busy_rr) { 384 if (preempted || cfq_cfqq_queue_new(cfqq)) {
492 if (preempted) 385 struct list_head *n = list;
493 list = list->prev; 386 struct cfq_queue *__cfqq;
494 387
495 list_add_tail(&cfqq->cfq_list, list); 388 while (n->next != list) {
496 return; 389 __cfqq = list_entry_cfqq(n->next);
497 } 390 if (!cfq_cfqq_queue_new(__cfqq))
391 break;
498 392
499 /* 393 n = n->next;
500 * sort by when queue was last serviced 394 }
501 */
502 entry = list;
503 while ((entry = entry->prev) != list) {
504 struct cfq_queue *__cfqq = list_entry_cfqq(entry);
505 395
506 if (!__cfqq->service_last) 396 list = n;
507 break;
508 if (time_before(__cfqq->service_last, cfqq->service_last))
509 break;
510 } 397 }
511 398
512 list_add(&cfqq->cfq_list, entry); 399 list_add_tail(&cfqq->cfq_list, list);
513} 400}
514 401
515/* 402/*
@@ -531,7 +418,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
531{ 418{
532 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 419 BUG_ON(!cfq_cfqq_on_rr(cfqq));
533 cfq_clear_cfqq_on_rr(cfqq); 420 cfq_clear_cfqq_on_rr(cfqq);
534 list_move(&cfqq->cfq_list, &cfqd->empty_list); 421 list_del_init(&cfqq->cfq_list);
535 422
536 BUG_ON(!cfqd->busy_queues); 423 BUG_ON(!cfqd->busy_queues);
537 cfqd->busy_queues--; 424 cfqd->busy_queues--;
@@ -540,81 +427,43 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
540/* 427/*
541 * rb tree support functions 428 * rb tree support functions
542 */ 429 */
543static inline void cfq_del_crq_rb(struct cfq_rq *crq) 430static inline void cfq_del_rq_rb(struct request *rq)
544{ 431{
545 struct cfq_queue *cfqq = crq->cfq_queue; 432 struct cfq_queue *cfqq = RQ_CFQQ(rq);
546 struct cfq_data *cfqd = cfqq->cfqd; 433 struct cfq_data *cfqd = cfqq->cfqd;
547 const int sync = cfq_crq_is_sync(crq); 434 const int sync = rq_is_sync(rq);
548 435
549 BUG_ON(!cfqq->queued[sync]); 436 BUG_ON(!cfqq->queued[sync]);
550 cfqq->queued[sync]--; 437 cfqq->queued[sync]--;
551 438
552 cfq_update_next_crq(crq); 439 elv_rb_del(&cfqq->sort_list, rq);
553
554 rb_erase(&crq->rb_node, &cfqq->sort_list);
555 440
556 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 441 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
557 cfq_del_cfqq_rr(cfqd, cfqq); 442 cfq_del_cfqq_rr(cfqd, cfqq);
558} 443}
559 444
560static struct cfq_rq * 445static void cfq_add_rq_rb(struct request *rq)
561__cfq_add_crq_rb(struct cfq_rq *crq)
562{ 446{
563 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; 447 struct cfq_queue *cfqq = RQ_CFQQ(rq);
564 struct rb_node *parent = NULL;
565 struct cfq_rq *__crq;
566
567 while (*p) {
568 parent = *p;
569 __crq = rb_entry_crq(parent);
570
571 if (crq->rb_key < __crq->rb_key)
572 p = &(*p)->rb_left;
573 else if (crq->rb_key > __crq->rb_key)
574 p = &(*p)->rb_right;
575 else
576 return __crq;
577 }
578
579 rb_link_node(&crq->rb_node, parent, p);
580 return NULL;
581}
582
583static void cfq_add_crq_rb(struct cfq_rq *crq)
584{
585 struct cfq_queue *cfqq = crq->cfq_queue;
586 struct cfq_data *cfqd = cfqq->cfqd; 448 struct cfq_data *cfqd = cfqq->cfqd;
587 struct request *rq = crq->request; 449 struct request *__alias;
588 struct cfq_rq *__alias;
589 450
590 crq->rb_key = rq_rb_key(rq); 451 cfqq->queued[rq_is_sync(rq)]++;
591 cfqq->queued[cfq_crq_is_sync(crq)]++;
592 452
593 /* 453 /*
594 * looks a little odd, but the first insert might return an alias. 454 * looks a little odd, but the first insert might return an alias.
595 * if that happens, put the alias on the dispatch list 455 * if that happens, put the alias on the dispatch list
596 */ 456 */
597 while ((__alias = __cfq_add_crq_rb(crq)) != NULL) 457 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
598 cfq_dispatch_insert(cfqd->queue, __alias); 458 cfq_dispatch_insert(cfqd->queue, __alias);
599
600 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
601
602 if (!cfq_cfqq_on_rr(cfqq))
603 cfq_add_cfqq_rr(cfqd, cfqq);
604
605 /*
606 * check if this request is a better next-serve candidate
607 */
608 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
609} 459}
610 460
611static inline void 461static inline void
612cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) 462cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
613{ 463{
614 rb_erase(&crq->rb_node, &cfqq->sort_list); 464 elv_rb_del(&cfqq->sort_list, rq);
615 cfqq->queued[cfq_crq_is_sync(crq)]--; 465 cfqq->queued[rq_is_sync(rq)]--;
616 466 cfq_add_rq_rb(rq);
617 cfq_add_crq_rb(crq);
618} 467}
619 468
620static struct request * 469static struct request *
@@ -623,27 +472,14 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
623 struct task_struct *tsk = current; 472 struct task_struct *tsk = current;
624 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio)); 473 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
625 struct cfq_queue *cfqq; 474 struct cfq_queue *cfqq;
626 struct rb_node *n;
627 sector_t sector;
628 475
629 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); 476 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
630 if (!cfqq) 477 if (cfqq) {
631 goto out; 478 sector_t sector = bio->bi_sector + bio_sectors(bio);
632
633 sector = bio->bi_sector + bio_sectors(bio);
634 n = cfqq->sort_list.rb_node;
635 while (n) {
636 struct cfq_rq *crq = rb_entry_crq(n);
637 479
638 if (sector < crq->rb_key) 480 return elv_rb_find(&cfqq->sort_list, sector);
639 n = n->rb_left;
640 else if (sector > crq->rb_key)
641 n = n->rb_right;
642 else
643 return crq->request;
644 } 481 }
645 482
646out:
647 return NULL; 483 return NULL;
648} 484}
649 485
@@ -673,11 +509,18 @@ static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
673 509
674static void cfq_remove_request(struct request *rq) 510static void cfq_remove_request(struct request *rq)
675{ 511{
676 struct cfq_rq *crq = RQ_DATA(rq); 512 struct cfq_queue *cfqq = RQ_CFQQ(rq);
513
514 if (cfqq->next_rq == rq)
515 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
677 516
678 list_del_init(&rq->queuelist); 517 list_del_init(&rq->queuelist);
679 cfq_del_crq_rb(crq); 518 cfq_del_rq_rb(rq);
680 cfq_del_crq_hash(crq); 519
520 if (rq_is_meta(rq)) {
521 WARN_ON(!cfqq->meta_pending);
522 cfqq->meta_pending--;
523 }
681} 524}
682 525
683static int 526static int
@@ -685,39 +528,23 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
685{ 528{
686 struct cfq_data *cfqd = q->elevator->elevator_data; 529 struct cfq_data *cfqd = q->elevator->elevator_data;
687 struct request *__rq; 530 struct request *__rq;
688 int ret;
689
690 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
691 if (__rq && elv_rq_merge_ok(__rq, bio)) {
692 ret = ELEVATOR_BACK_MERGE;
693 goto out;
694 }
695 531
696 __rq = cfq_find_rq_fmerge(cfqd, bio); 532 __rq = cfq_find_rq_fmerge(cfqd, bio);
697 if (__rq && elv_rq_merge_ok(__rq, bio)) { 533 if (__rq && elv_rq_merge_ok(__rq, bio)) {
698 ret = ELEVATOR_FRONT_MERGE; 534 *req = __rq;
699 goto out; 535 return ELEVATOR_FRONT_MERGE;
700 } 536 }
701 537
702 return ELEVATOR_NO_MERGE; 538 return ELEVATOR_NO_MERGE;
703out:
704 *req = __rq;
705 return ret;
706} 539}
707 540
708static void cfq_merged_request(request_queue_t *q, struct request *req) 541static void cfq_merged_request(request_queue_t *q, struct request *req,
542 int type)
709{ 543{
710 struct cfq_data *cfqd = q->elevator->elevator_data; 544 if (type == ELEVATOR_FRONT_MERGE) {
711 struct cfq_rq *crq = RQ_DATA(req); 545 struct cfq_queue *cfqq = RQ_CFQQ(req);
712
713 cfq_del_crq_hash(crq);
714 cfq_add_crq_hash(cfqd, crq);
715
716 if (rq_rb_key(req) != crq->rb_key) {
717 struct cfq_queue *cfqq = crq->cfq_queue;
718 546
719 cfq_update_next_crq(crq); 547 cfq_reposition_rq_rb(cfqq, req);
720 cfq_reposition_crq_rb(cfqq, crq);
721 } 548 }
722} 549}
723 550
@@ -725,8 +552,6 @@ static void
725cfq_merged_requests(request_queue_t *q, struct request *rq, 552cfq_merged_requests(request_queue_t *q, struct request *rq,
726 struct request *next) 553 struct request *next)
727{ 554{
728 cfq_merged_request(q, rq);
729
730 /* 555 /*
731 * reposition in fifo if next is older than rq 556 * reposition in fifo if next is older than rq
732 */ 557 */
@@ -768,13 +593,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
768 if (cfq_cfqq_wait_request(cfqq)) 593 if (cfq_cfqq_wait_request(cfqq))
769 del_timer(&cfqd->idle_slice_timer); 594 del_timer(&cfqd->idle_slice_timer);
770 595
771 if (!preempted && !cfq_cfqq_dispatched(cfqq)) { 596 if (!preempted && !cfq_cfqq_dispatched(cfqq))
772 cfqq->service_last = now;
773 cfq_schedule_dispatch(cfqd); 597 cfq_schedule_dispatch(cfqd);
774 }
775 598
776 cfq_clear_cfqq_must_dispatch(cfqq); 599 cfq_clear_cfqq_must_dispatch(cfqq);
777 cfq_clear_cfqq_wait_request(cfqq); 600 cfq_clear_cfqq_wait_request(cfqq);
601 cfq_clear_cfqq_queue_new(cfqq);
778 602
779 /* 603 /*
780 * store what was left of this slice, if the queue idled out 604 * store what was left of this slice, if the queue idled out
@@ -868,26 +692,25 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
868{ 692{
869 struct cfq_queue *cfqq = NULL; 693 struct cfq_queue *cfqq = NULL;
870 694
871 /* 695 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) {
872 * if current list is non-empty, grab first entry. if it is empty, 696 /*
873 * get next prio level and grab first entry then if any are spliced 697 * if current list is non-empty, grab first entry. if it is
874 */ 698 * empty, get next prio level and grab first entry then if any
875 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) 699 * are spliced
700 */
876 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 701 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
877 702 } else if (!list_empty(&cfqd->busy_rr)) {
878 /* 703 /*
879 * If no new queues are available, check if the busy list has some 704 * If no new queues are available, check if the busy list has
880 * before falling back to idle io. 705 * some before falling back to idle io.
881 */ 706 */
882 if (!cfqq && !list_empty(&cfqd->busy_rr))
883 cfqq = list_entry_cfqq(cfqd->busy_rr.next); 707 cfqq = list_entry_cfqq(cfqd->busy_rr.next);
884 708 } else if (!list_empty(&cfqd->idle_rr)) {
885 /* 709 /*
886 * if we have idle queues and no rt or be queues had pending 710 * if we have idle queues and no rt or be queues had pending
887 * requests, either allow immediate service if the grace period 711 * requests, either allow immediate service if the grace period
888 * has passed or arm the idle grace timer 712 * has passed or arm the idle grace timer
889 */ 713 */
890 if (!cfqq && !list_empty(&cfqd->idle_rr)) {
891 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; 714 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
892 715
893 if (time_after_eq(jiffies, end)) 716 if (time_after_eq(jiffies, end))
@@ -942,16 +765,14 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
942 return 1; 765 return 1;
943} 766}
944 767
945static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) 768static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
946{ 769{
947 struct cfq_data *cfqd = q->elevator->elevator_data; 770 struct cfq_data *cfqd = q->elevator->elevator_data;
948 struct cfq_queue *cfqq = crq->cfq_queue; 771 struct cfq_queue *cfqq = RQ_CFQQ(rq);
949 struct request *rq;
950 772
951 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); 773 cfq_remove_request(rq);
952 cfq_remove_request(crq->request); 774 cfqq->on_dispatch[rq_is_sync(rq)]++;
953 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; 775 elv_dispatch_sort(q, rq);
954 elv_dispatch_sort(q, crq->request);
955 776
956 rq = list_entry(q->queue_head.prev, struct request, queuelist); 777 rq = list_entry(q->queue_head.prev, struct request, queuelist);
957 cfqd->last_sector = rq->sector + rq->nr_sectors; 778 cfqd->last_sector = rq->sector + rq->nr_sectors;
@@ -960,24 +781,23 @@ static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
960/* 781/*
961 * return expired entry, or NULL to just start from scratch in rbtree 782 * return expired entry, or NULL to just start from scratch in rbtree
962 */ 783 */
963static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) 784static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
964{ 785{
965 struct cfq_data *cfqd = cfqq->cfqd; 786 struct cfq_data *cfqd = cfqq->cfqd;
966 struct request *rq; 787 struct request *rq;
967 struct cfq_rq *crq; 788 int fifo;
968 789
969 if (cfq_cfqq_fifo_expire(cfqq)) 790 if (cfq_cfqq_fifo_expire(cfqq))
970 return NULL; 791 return NULL;
792 if (list_empty(&cfqq->fifo))
793 return NULL;
971 794
972 if (!list_empty(&cfqq->fifo)) { 795 fifo = cfq_cfqq_class_sync(cfqq);
973 int fifo = cfq_cfqq_class_sync(cfqq); 796 rq = rq_entry_fifo(cfqq->fifo.next);
974 797
975 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); 798 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
976 rq = crq->request; 799 cfq_mark_cfqq_fifo_expire(cfqq);
977 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { 800 return rq;
978 cfq_mark_cfqq_fifo_expire(cfqq);
979 return crq;
980 }
981 } 801 }
982 802
983 return NULL; 803 return NULL;
@@ -1063,25 +883,25 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1063 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); 883 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
1064 884
1065 do { 885 do {
1066 struct cfq_rq *crq; 886 struct request *rq;
1067 887
1068 /* 888 /*
1069 * follow expired path, else get first next available 889 * follow expired path, else get first next available
1070 */ 890 */
1071 if ((crq = cfq_check_fifo(cfqq)) == NULL) 891 if ((rq = cfq_check_fifo(cfqq)) == NULL)
1072 crq = cfqq->next_crq; 892 rq = cfqq->next_rq;
1073 893
1074 /* 894 /*
1075 * finally, insert request into driver dispatch list 895 * finally, insert request into driver dispatch list
1076 */ 896 */
1077 cfq_dispatch_insert(cfqd->queue, crq); 897 cfq_dispatch_insert(cfqd->queue, rq);
1078 898
1079 cfqd->dispatch_slice++; 899 cfqd->dispatch_slice++;
1080 dispatched++; 900 dispatched++;
1081 901
1082 if (!cfqd->active_cic) { 902 if (!cfqd->active_cic) {
1083 atomic_inc(&crq->io_context->ioc->refcount); 903 atomic_inc(&RQ_CIC(rq)->ioc->refcount);
1084 cfqd->active_cic = crq->io_context; 904 cfqd->active_cic = RQ_CIC(rq);
1085 } 905 }
1086 906
1087 if (RB_EMPTY_ROOT(&cfqq->sort_list)) 907 if (RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -1112,13 +932,12 @@ static int
1112cfq_forced_dispatch_cfqqs(struct list_head *list) 932cfq_forced_dispatch_cfqqs(struct list_head *list)
1113{ 933{
1114 struct cfq_queue *cfqq, *next; 934 struct cfq_queue *cfqq, *next;
1115 struct cfq_rq *crq;
1116 int dispatched; 935 int dispatched;
1117 936
1118 dispatched = 0; 937 dispatched = 0;
1119 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 938 list_for_each_entry_safe(cfqq, next, list, cfq_list) {
1120 while ((crq = cfqq->next_crq)) { 939 while (cfqq->next_rq) {
1121 cfq_dispatch_insert(cfqq->cfqd->queue, crq); 940 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
1122 dispatched++; 941 dispatched++;
1123 } 942 }
1124 BUG_ON(!list_empty(&cfqq->fifo)); 943 BUG_ON(!list_empty(&cfqq->fifo));
@@ -1194,8 +1013,8 @@ cfq_dispatch_requests(request_queue_t *q, int force)
1194} 1013}
1195 1014
1196/* 1015/*
1197 * task holds one reference to the queue, dropped when task exits. each crq 1016 * task holds one reference to the queue, dropped when task exits. each rq
1198 * in-flight on this queue also holds a reference, dropped when crq is freed. 1017 * in-flight on this queue also holds a reference, dropped when rq is freed.
1199 * 1018 *
1200 * queue lock must be held here. 1019 * queue lock must be held here.
1201 */ 1020 */
@@ -1223,7 +1042,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
1223 kmem_cache_free(cfq_pool, cfqq); 1042 kmem_cache_free(cfq_pool, cfqq);
1224} 1043}
1225 1044
1226static inline struct cfq_queue * 1045static struct cfq_queue *
1227__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, 1046__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1228 const int hashval) 1047 const int hashval)
1229{ 1048{
@@ -1260,62 +1079,63 @@ static void cfq_free_io_context(struct io_context *ioc)
1260 freed++; 1079 freed++;
1261 } 1080 }
1262 1081
1263 if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) 1082 elv_ioc_count_mod(ioc_count, -freed);
1083
1084 if (ioc_gone && !elv_ioc_count_read(ioc_count))
1264 complete(ioc_gone); 1085 complete(ioc_gone);
1265} 1086}
1266 1087
1267static void cfq_trim(struct io_context *ioc) 1088static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1268{ 1089{
1269 ioc->set_ioprio = NULL; 1090 if (unlikely(cfqq == cfqd->active_queue))
1270 cfq_free_io_context(ioc); 1091 __cfq_slice_expired(cfqd, cfqq, 0);
1092
1093 cfq_put_queue(cfqq);
1271} 1094}
1272 1095
1273/* 1096static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1274 * Called with interrupts disabled 1097 struct cfq_io_context *cic)
1275 */
1276static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1277{ 1098{
1278 struct cfq_data *cfqd = cic->key; 1099 list_del_init(&cic->queue_list);
1279 request_queue_t *q; 1100 smp_wmb();
1280 1101 cic->key = NULL;
1281 if (!cfqd)
1282 return;
1283
1284 q = cfqd->queue;
1285
1286 WARN_ON(!irqs_disabled());
1287
1288 spin_lock(q->queue_lock);
1289 1102
1290 if (cic->cfqq[ASYNC]) { 1103 if (cic->cfqq[ASYNC]) {
1291 if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue)) 1104 cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
1292 __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0);
1293 cfq_put_queue(cic->cfqq[ASYNC]);
1294 cic->cfqq[ASYNC] = NULL; 1105 cic->cfqq[ASYNC] = NULL;
1295 } 1106 }
1296 1107
1297 if (cic->cfqq[SYNC]) { 1108 if (cic->cfqq[SYNC]) {
1298 if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue)) 1109 cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
1299 __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0);
1300 cfq_put_queue(cic->cfqq[SYNC]);
1301 cic->cfqq[SYNC] = NULL; 1110 cic->cfqq[SYNC] = NULL;
1302 } 1111 }
1112}
1303 1113
1304 cic->key = NULL; 1114
1305 list_del_init(&cic->queue_list); 1115/*
1306 spin_unlock(q->queue_lock); 1116 * Called with interrupts disabled
1117 */
1118static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1119{
1120 struct cfq_data *cfqd = cic->key;
1121
1122 if (cfqd) {
1123 request_queue_t *q = cfqd->queue;
1124
1125 spin_lock_irq(q->queue_lock);
1126 __cfq_exit_single_io_context(cfqd, cic);
1127 spin_unlock_irq(q->queue_lock);
1128 }
1307} 1129}
1308 1130
1309static void cfq_exit_io_context(struct io_context *ioc) 1131static void cfq_exit_io_context(struct io_context *ioc)
1310{ 1132{
1311 struct cfq_io_context *__cic; 1133 struct cfq_io_context *__cic;
1312 unsigned long flags;
1313 struct rb_node *n; 1134 struct rb_node *n;
1314 1135
1315 /* 1136 /*
1316 * put the reference this task is holding to the various queues 1137 * put the reference this task is holding to the various queues
1317 */ 1138 */
1318 spin_lock_irqsave(&cfq_exit_lock, flags);
1319 1139
1320 n = rb_first(&ioc->cic_root); 1140 n = rb_first(&ioc->cic_root);
1321 while (n != NULL) { 1141 while (n != NULL) {
@@ -1324,22 +1144,21 @@ static void cfq_exit_io_context(struct io_context *ioc)
1324 cfq_exit_single_io_context(__cic); 1144 cfq_exit_single_io_context(__cic);
1325 n = rb_next(n); 1145 n = rb_next(n);
1326 } 1146 }
1327
1328 spin_unlock_irqrestore(&cfq_exit_lock, flags);
1329} 1147}
1330 1148
1331static struct cfq_io_context * 1149static struct cfq_io_context *
1332cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1150cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1333{ 1151{
1334 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1152 struct cfq_io_context *cic;
1335 1153
1154 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node);
1336 if (cic) { 1155 if (cic) {
1337 memset(cic, 0, sizeof(*cic)); 1156 memset(cic, 0, sizeof(*cic));
1338 cic->last_end_request = jiffies; 1157 cic->last_end_request = jiffies;
1339 INIT_LIST_HEAD(&cic->queue_list); 1158 INIT_LIST_HEAD(&cic->queue_list);
1340 cic->dtor = cfq_free_io_context; 1159 cic->dtor = cfq_free_io_context;
1341 cic->exit = cfq_exit_io_context; 1160 cic->exit = cfq_exit_io_context;
1342 atomic_inc(&ioc_count); 1161 elv_ioc_count_inc(ioc_count);
1343 } 1162 }
1344 1163
1345 return cic; 1164 return cic;
@@ -1420,15 +1239,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
1420 spin_unlock(cfqd->queue->queue_lock); 1239 spin_unlock(cfqd->queue->queue_lock);
1421} 1240}
1422 1241
1423/* 1242static void cfq_ioc_set_ioprio(struct io_context *ioc)
1424 * callback from sys_ioprio_set, irqs are disabled
1425 */
1426static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1427{ 1243{
1428 struct cfq_io_context *cic; 1244 struct cfq_io_context *cic;
1429 struct rb_node *n; 1245 struct rb_node *n;
1430 1246
1431 spin_lock(&cfq_exit_lock); 1247 ioc->ioprio_changed = 0;
1432 1248
1433 n = rb_first(&ioc->cic_root); 1249 n = rb_first(&ioc->cic_root);
1434 while (n != NULL) { 1250 while (n != NULL) {
@@ -1437,10 +1253,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1437 changed_ioprio(cic); 1253 changed_ioprio(cic);
1438 n = rb_next(n); 1254 n = rb_next(n);
1439 } 1255 }
1440
1441 spin_unlock(&cfq_exit_lock);
1442
1443 return 0;
1444} 1256}
1445 1257
1446static struct cfq_queue * 1258static struct cfq_queue *
@@ -1460,12 +1272,18 @@ retry:
1460 cfqq = new_cfqq; 1272 cfqq = new_cfqq;
1461 new_cfqq = NULL; 1273 new_cfqq = NULL;
1462 } else if (gfp_mask & __GFP_WAIT) { 1274 } else if (gfp_mask & __GFP_WAIT) {
1275 /*
1276 * Inform the allocator of the fact that we will
1277 * just repeat this allocation if it fails, to allow
1278 * the allocator to do whatever it needs to attempt to
1279 * free memory.
1280 */
1463 spin_unlock_irq(cfqd->queue->queue_lock); 1281 spin_unlock_irq(cfqd->queue->queue_lock);
1464 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1282 new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node);
1465 spin_lock_irq(cfqd->queue->queue_lock); 1283 spin_lock_irq(cfqd->queue->queue_lock);
1466 goto retry; 1284 goto retry;
1467 } else { 1285 } else {
1468 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1286 cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node);
1469 if (!cfqq) 1287 if (!cfqq)
1470 goto out; 1288 goto out;
1471 } 1289 }
@@ -1480,13 +1298,13 @@ retry:
1480 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); 1298 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
1481 atomic_set(&cfqq->ref, 0); 1299 atomic_set(&cfqq->ref, 0);
1482 cfqq->cfqd = cfqd; 1300 cfqq->cfqd = cfqd;
1483 cfqq->service_last = 0;
1484 /* 1301 /*
1485 * set ->slice_left to allow preemption for a new process 1302 * set ->slice_left to allow preemption for a new process
1486 */ 1303 */
1487 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1304 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1488 cfq_mark_cfqq_idle_window(cfqq); 1305 cfq_mark_cfqq_idle_window(cfqq);
1489 cfq_mark_cfqq_prio_changed(cfqq); 1306 cfq_mark_cfqq_prio_changed(cfqq);
1307 cfq_mark_cfqq_queue_new(cfqq);
1490 cfq_init_prio_data(cfqq); 1308 cfq_init_prio_data(cfqq);
1491 } 1309 }
1492 1310
@@ -1502,12 +1320,10 @@ out:
1502static void 1320static void
1503cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) 1321cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
1504{ 1322{
1505 spin_lock(&cfq_exit_lock); 1323 WARN_ON(!list_empty(&cic->queue_list));
1506 rb_erase(&cic->rb_node, &ioc->cic_root); 1324 rb_erase(&cic->rb_node, &ioc->cic_root);
1507 list_del_init(&cic->queue_list);
1508 spin_unlock(&cfq_exit_lock);
1509 kmem_cache_free(cfq_ioc_pool, cic); 1325 kmem_cache_free(cfq_ioc_pool, cic);
1510 atomic_dec(&ioc_count); 1326 elv_ioc_count_dec(ioc_count);
1511} 1327}
1512 1328
1513static struct cfq_io_context * 1329static struct cfq_io_context *
@@ -1551,7 +1367,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
1551 cic->ioc = ioc; 1367 cic->ioc = ioc;
1552 cic->key = cfqd; 1368 cic->key = cfqd;
1553 1369
1554 ioc->set_ioprio = cfq_ioc_set_ioprio;
1555restart: 1370restart:
1556 parent = NULL; 1371 parent = NULL;
1557 p = &ioc->cic_root.rb_node; 1372 p = &ioc->cic_root.rb_node;
@@ -1573,11 +1388,12 @@ restart:
1573 BUG(); 1388 BUG();
1574 } 1389 }
1575 1390
1576 spin_lock(&cfq_exit_lock);
1577 rb_link_node(&cic->rb_node, parent, p); 1391 rb_link_node(&cic->rb_node, parent, p);
1578 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1392 rb_insert_color(&cic->rb_node, &ioc->cic_root);
1393
1394 spin_lock_irq(cfqd->queue->queue_lock);
1579 list_add(&cic->queue_list, &cfqd->cic_list); 1395 list_add(&cic->queue_list, &cfqd->cic_list);
1580 spin_unlock(&cfq_exit_lock); 1396 spin_unlock_irq(cfqd->queue->queue_lock);
1581} 1397}
1582 1398
1583/* 1399/*
@@ -1593,7 +1409,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1593 1409
1594 might_sleep_if(gfp_mask & __GFP_WAIT); 1410 might_sleep_if(gfp_mask & __GFP_WAIT);
1595 1411
1596 ioc = get_io_context(gfp_mask); 1412 ioc = get_io_context(gfp_mask, cfqd->queue->node);
1597 if (!ioc) 1413 if (!ioc)
1598 return NULL; 1414 return NULL;
1599 1415
@@ -1607,6 +1423,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1607 1423
1608 cfq_cic_link(cfqd, ioc, cic); 1424 cfq_cic_link(cfqd, ioc, cic);
1609out: 1425out:
1426 smp_read_barrier_depends();
1427 if (unlikely(ioc->ioprio_changed))
1428 cfq_ioc_set_ioprio(ioc);
1429
1610 return cic; 1430 return cic;
1611err: 1431err:
1612 put_io_context(ioc); 1432 put_io_context(ioc);
@@ -1640,15 +1460,15 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1640 1460
1641static void 1461static void
1642cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, 1462cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1643 struct cfq_rq *crq) 1463 struct request *rq)
1644{ 1464{
1645 sector_t sdist; 1465 sector_t sdist;
1646 u64 total; 1466 u64 total;
1647 1467
1648 if (cic->last_request_pos < crq->request->sector) 1468 if (cic->last_request_pos < rq->sector)
1649 sdist = crq->request->sector - cic->last_request_pos; 1469 sdist = rq->sector - cic->last_request_pos;
1650 else 1470 else
1651 sdist = cic->last_request_pos - crq->request->sector; 1471 sdist = cic->last_request_pos - rq->sector;
1652 1472
1653 /* 1473 /*
1654 * Don't allow the seek distance to get too large from the 1474 * Don't allow the seek distance to get too large from the
@@ -1699,7 +1519,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1699 */ 1519 */
1700static int 1520static int
1701cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1521cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1702 struct cfq_rq *crq) 1522 struct request *rq)
1703{ 1523{
1704 struct cfq_queue *cfqq = cfqd->active_queue; 1524 struct cfq_queue *cfqq = cfqd->active_queue;
1705 1525
@@ -1718,7 +1538,17 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1718 */ 1538 */
1719 if (new_cfqq->slice_left < cfqd->cfq_slice_idle) 1539 if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
1720 return 0; 1540 return 0;
1721 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) 1541 /*
1542 * if the new request is sync, but the currently running queue is
1543 * not, let the sync request have priority.
1544 */
1545 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
1546 return 1;
1547 /*
1548 * So both queues are sync. Let the new request get disk time if
1549 * it's a metadata request and the current queue is doing regular IO.
1550 */
1551 if (rq_is_meta(rq) && !cfqq->meta_pending)
1722 return 1; 1552 return 1;
1723 1553
1724 return 0; 1554 return 0;
@@ -1730,47 +1560,45 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1730 */ 1560 */
1731static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1561static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1732{ 1562{
1733 struct cfq_queue *__cfqq, *next; 1563 cfq_slice_expired(cfqd, 1);
1734
1735 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
1736 cfq_resort_rr_list(__cfqq, 1);
1737 1564
1738 if (!cfqq->slice_left) 1565 if (!cfqq->slice_left)
1739 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; 1566 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
1740 1567
1741 cfqq->slice_end = cfqq->slice_left + jiffies; 1568 /*
1742 cfq_slice_expired(cfqd, 1); 1569 * Put the new queue at the front of the of the current list,
1743 __cfq_set_active_queue(cfqd, cfqq); 1570 * so we know that it will be selected next.
1744} 1571 */
1745 1572 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1746/* 1573 list_move(&cfqq->cfq_list, &cfqd->cur_rr);
1747 * should really be a ll_rw_blk.c helper
1748 */
1749static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1750{
1751 request_queue_t *q = cfqd->queue;
1752 1574
1753 if (!blk_queue_plugged(q)) 1575 cfqq->slice_end = cfqq->slice_left + jiffies;
1754 q->request_fn(q);
1755 else
1756 __generic_unplug_device(q);
1757} 1576}
1758 1577
1759/* 1578/*
1760 * Called when a new fs request (crq) is added (to cfqq). Check if there's 1579 * Called when a new fs request (rq) is added (to cfqq). Check if there's
1761 * something we should do about it 1580 * something we should do about it
1762 */ 1581 */
1763static void 1582static void
1764cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1583cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1765 struct cfq_rq *crq) 1584 struct request *rq)
1766{ 1585{
1767 struct cfq_io_context *cic = crq->io_context; 1586 struct cfq_io_context *cic = RQ_CIC(rq);
1587
1588 if (rq_is_meta(rq))
1589 cfqq->meta_pending++;
1590
1591 /*
1592 * check if this request is a better next-serve candidate)) {
1593 */
1594 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
1595 BUG_ON(!cfqq->next_rq);
1768 1596
1769 /* 1597 /*
1770 * we never wait for an async request and we don't allow preemption 1598 * we never wait for an async request and we don't allow preemption
1771 * of an async request. so just return early 1599 * of an async request. so just return early
1772 */ 1600 */
1773 if (!cfq_crq_is_sync(crq)) { 1601 if (!rq_is_sync(rq)) {
1774 /* 1602 /*
1775 * sync process issued an async request, if it's waiting 1603 * sync process issued an async request, if it's waiting
1776 * then expire it and kick rq handling. 1604 * then expire it and kick rq handling.
@@ -1778,17 +1606,17 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1778 if (cic == cfqd->active_cic && 1606 if (cic == cfqd->active_cic &&
1779 del_timer(&cfqd->idle_slice_timer)) { 1607 del_timer(&cfqd->idle_slice_timer)) {
1780 cfq_slice_expired(cfqd, 0); 1608 cfq_slice_expired(cfqd, 0);
1781 cfq_start_queueing(cfqd, cfqq); 1609 blk_start_queueing(cfqd->queue);
1782 } 1610 }
1783 return; 1611 return;
1784 } 1612 }
1785 1613
1786 cfq_update_io_thinktime(cfqd, cic); 1614 cfq_update_io_thinktime(cfqd, cic);
1787 cfq_update_io_seektime(cfqd, cic, crq); 1615 cfq_update_io_seektime(cfqd, cic, rq);
1788 cfq_update_idle_window(cfqd, cfqq, cic); 1616 cfq_update_idle_window(cfqd, cfqq, cic);
1789 1617
1790 cic->last_queue = jiffies; 1618 cic->last_queue = jiffies;
1791 cic->last_request_pos = crq->request->sector + crq->request->nr_sectors; 1619 cic->last_request_pos = rq->sector + rq->nr_sectors;
1792 1620
1793 if (cfqq == cfqd->active_queue) { 1621 if (cfqq == cfqd->active_queue) {
1794 /* 1622 /*
@@ -1799,9 +1627,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1799 if (cfq_cfqq_wait_request(cfqq)) { 1627 if (cfq_cfqq_wait_request(cfqq)) {
1800 cfq_mark_cfqq_must_dispatch(cfqq); 1628 cfq_mark_cfqq_must_dispatch(cfqq);
1801 del_timer(&cfqd->idle_slice_timer); 1629 del_timer(&cfqd->idle_slice_timer);
1802 cfq_start_queueing(cfqd, cfqq); 1630 blk_start_queueing(cfqd->queue);
1803 } 1631 }
1804 } else if (cfq_should_preempt(cfqd, cfqq, crq)) { 1632 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
1805 /* 1633 /*
1806 * not the active queue - expire current slice if it is 1634 * not the active queue - expire current slice if it is
1807 * idle and has expired it's mean thinktime or this new queue 1635 * idle and has expired it's mean thinktime or this new queue
@@ -1809,34 +1637,32 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1809 */ 1637 */
1810 cfq_preempt_queue(cfqd, cfqq); 1638 cfq_preempt_queue(cfqd, cfqq);
1811 cfq_mark_cfqq_must_dispatch(cfqq); 1639 cfq_mark_cfqq_must_dispatch(cfqq);
1812 cfq_start_queueing(cfqd, cfqq); 1640 blk_start_queueing(cfqd->queue);
1813 } 1641 }
1814} 1642}
1815 1643
1816static void cfq_insert_request(request_queue_t *q, struct request *rq) 1644static void cfq_insert_request(request_queue_t *q, struct request *rq)
1817{ 1645{
1818 struct cfq_data *cfqd = q->elevator->elevator_data; 1646 struct cfq_data *cfqd = q->elevator->elevator_data;
1819 struct cfq_rq *crq = RQ_DATA(rq); 1647 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1820 struct cfq_queue *cfqq = crq->cfq_queue;
1821 1648
1822 cfq_init_prio_data(cfqq); 1649 cfq_init_prio_data(cfqq);
1823 1650
1824 cfq_add_crq_rb(crq); 1651 cfq_add_rq_rb(rq);
1825 1652
1826 list_add_tail(&rq->queuelist, &cfqq->fifo); 1653 if (!cfq_cfqq_on_rr(cfqq))
1654 cfq_add_cfqq_rr(cfqd, cfqq);
1827 1655
1828 if (rq_mergeable(rq)) 1656 list_add_tail(&rq->queuelist, &cfqq->fifo);
1829 cfq_add_crq_hash(cfqd, crq);
1830 1657
1831 cfq_crq_enqueued(cfqd, cfqq, crq); 1658 cfq_rq_enqueued(cfqd, cfqq, rq);
1832} 1659}
1833 1660
1834static void cfq_completed_request(request_queue_t *q, struct request *rq) 1661static void cfq_completed_request(request_queue_t *q, struct request *rq)
1835{ 1662{
1836 struct cfq_rq *crq = RQ_DATA(rq); 1663 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1837 struct cfq_queue *cfqq = crq->cfq_queue;
1838 struct cfq_data *cfqd = cfqq->cfqd; 1664 struct cfq_data *cfqd = cfqq->cfqd;
1839 const int sync = cfq_crq_is_sync(crq); 1665 const int sync = rq_is_sync(rq);
1840 unsigned long now; 1666 unsigned long now;
1841 1667
1842 now = jiffies; 1668 now = jiffies;
@@ -1849,15 +1675,11 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1849 if (!cfq_class_idle(cfqq)) 1675 if (!cfq_class_idle(cfqq))
1850 cfqd->last_end_request = now; 1676 cfqd->last_end_request = now;
1851 1677
1852 if (!cfq_cfqq_dispatched(cfqq)) { 1678 if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq))
1853 if (cfq_cfqq_on_rr(cfqq)) { 1679 cfq_resort_rr_list(cfqq, 0);
1854 cfqq->service_last = now;
1855 cfq_resort_rr_list(cfqq, 0);
1856 }
1857 }
1858 1680
1859 if (sync) 1681 if (sync)
1860 crq->io_context->last_end_request = now; 1682 RQ_CIC(rq)->last_end_request = now;
1861 1683
1862 /* 1684 /*
1863 * If this is the active queue, check if it needs to be expired, 1685 * If this is the active queue, check if it needs to be expired,
@@ -1873,30 +1695,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1873 } 1695 }
1874} 1696}
1875 1697
1876static struct request *
1877cfq_former_request(request_queue_t *q, struct request *rq)
1878{
1879 struct cfq_rq *crq = RQ_DATA(rq);
1880 struct rb_node *rbprev = rb_prev(&crq->rb_node);
1881
1882 if (rbprev)
1883 return rb_entry_crq(rbprev)->request;
1884
1885 return NULL;
1886}
1887
1888static struct request *
1889cfq_latter_request(request_queue_t *q, struct request *rq)
1890{
1891 struct cfq_rq *crq = RQ_DATA(rq);
1892 struct rb_node *rbnext = rb_next(&crq->rb_node);
1893
1894 if (rbnext)
1895 return rb_entry_crq(rbnext)->request;
1896
1897 return NULL;
1898}
1899
1900/* 1698/*
1901 * we temporarily boost lower priority queues if they are holding fs exclusive 1699 * we temporarily boost lower priority queues if they are holding fs exclusive
1902 * resources. they are boosted to normal prio (CLASS_BE/4) 1700 * resources. they are boosted to normal prio (CLASS_BE/4)
@@ -1933,9 +1731,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
1933 cfq_resort_rr_list(cfqq, 0); 1731 cfq_resort_rr_list(cfqq, 0);
1934} 1732}
1935 1733
1936static inline int 1734static inline int __cfq_may_queue(struct cfq_queue *cfqq)
1937__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1938 struct task_struct *task, int rw)
1939{ 1735{
1940 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 1736 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
1941 !cfq_cfqq_must_alloc_slice(cfqq)) { 1737 !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -1946,7 +1742,7 @@ __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1946 return ELV_MQUEUE_MAY; 1742 return ELV_MQUEUE_MAY;
1947} 1743}
1948 1744
1949static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) 1745static int cfq_may_queue(request_queue_t *q, int rw)
1950{ 1746{
1951 struct cfq_data *cfqd = q->elevator->elevator_data; 1747 struct cfq_data *cfqd = q->elevator->elevator_data;
1952 struct task_struct *tsk = current; 1748 struct task_struct *tsk = current;
@@ -1963,48 +1759,30 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
1963 cfq_init_prio_data(cfqq); 1759 cfq_init_prio_data(cfqq);
1964 cfq_prio_boost(cfqq); 1760 cfq_prio_boost(cfqq);
1965 1761
1966 return __cfq_may_queue(cfqd, cfqq, tsk, rw); 1762 return __cfq_may_queue(cfqq);
1967 } 1763 }
1968 1764
1969 return ELV_MQUEUE_MAY; 1765 return ELV_MQUEUE_MAY;
1970} 1766}
1971 1767
1972static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
1973{
1974 struct cfq_data *cfqd = q->elevator->elevator_data;
1975
1976 if (unlikely(cfqd->rq_starved)) {
1977 struct request_list *rl = &q->rq;
1978
1979 smp_mb();
1980 if (waitqueue_active(&rl->wait[READ]))
1981 wake_up(&rl->wait[READ]);
1982 if (waitqueue_active(&rl->wait[WRITE]))
1983 wake_up(&rl->wait[WRITE]);
1984 }
1985}
1986
1987/* 1768/*
1988 * queue lock held here 1769 * queue lock held here
1989 */ 1770 */
1990static void cfq_put_request(request_queue_t *q, struct request *rq) 1771static void cfq_put_request(request_queue_t *q, struct request *rq)
1991{ 1772{
1992 struct cfq_data *cfqd = q->elevator->elevator_data; 1773 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1993 struct cfq_rq *crq = RQ_DATA(rq);
1994 1774
1995 if (crq) { 1775 if (cfqq) {
1996 struct cfq_queue *cfqq = crq->cfq_queue;
1997 const int rw = rq_data_dir(rq); 1776 const int rw = rq_data_dir(rq);
1998 1777
1999 BUG_ON(!cfqq->allocated[rw]); 1778 BUG_ON(!cfqq->allocated[rw]);
2000 cfqq->allocated[rw]--; 1779 cfqq->allocated[rw]--;
2001 1780
2002 put_io_context(crq->io_context->ioc); 1781 put_io_context(RQ_CIC(rq)->ioc);
2003 1782
2004 mempool_free(crq, cfqd->crq_pool);
2005 rq->elevator_private = NULL; 1783 rq->elevator_private = NULL;
1784 rq->elevator_private2 = NULL;
2006 1785
2007 cfq_check_waiters(q, cfqq);
2008 cfq_put_queue(cfqq); 1786 cfq_put_queue(cfqq);
2009 } 1787 }
2010} 1788}
@@ -2013,8 +1791,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
2013 * Allocate cfq data structures associated with this request. 1791 * Allocate cfq data structures associated with this request.
2014 */ 1792 */
2015static int 1793static int
2016cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 1794cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
2017 gfp_t gfp_mask)
2018{ 1795{
2019 struct cfq_data *cfqd = q->elevator->elevator_data; 1796 struct cfq_data *cfqd = q->elevator->elevator_data;
2020 struct task_struct *tsk = current; 1797 struct task_struct *tsk = current;
@@ -2022,7 +1799,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
2022 const int rw = rq_data_dir(rq); 1799 const int rw = rq_data_dir(rq);
2023 pid_t key = cfq_queue_pid(tsk, rw); 1800 pid_t key = cfq_queue_pid(tsk, rw);
2024 struct cfq_queue *cfqq; 1801 struct cfq_queue *cfqq;
2025 struct cfq_rq *crq;
2026 unsigned long flags; 1802 unsigned long flags;
2027 int is_sync = key != CFQ_KEY_ASYNC; 1803 int is_sync = key != CFQ_KEY_ASYNC;
2028 1804
@@ -2046,42 +1822,18 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
2046 1822
2047 cfqq->allocated[rw]++; 1823 cfqq->allocated[rw]++;
2048 cfq_clear_cfqq_must_alloc(cfqq); 1824 cfq_clear_cfqq_must_alloc(cfqq);
2049 cfqd->rq_starved = 0;
2050 atomic_inc(&cfqq->ref); 1825 atomic_inc(&cfqq->ref);
2051 spin_unlock_irqrestore(q->queue_lock, flags);
2052 1826
2053 crq = mempool_alloc(cfqd->crq_pool, gfp_mask); 1827 spin_unlock_irqrestore(q->queue_lock, flags);
2054 if (crq) {
2055 RB_CLEAR_NODE(&crq->rb_node);
2056 crq->rb_key = 0;
2057 crq->request = rq;
2058 INIT_HLIST_NODE(&crq->hash);
2059 crq->cfq_queue = cfqq;
2060 crq->io_context = cic;
2061
2062 if (is_sync)
2063 cfq_mark_crq_is_sync(crq);
2064 else
2065 cfq_clear_crq_is_sync(crq);
2066 1828
2067 rq->elevator_private = crq; 1829 rq->elevator_private = cic;
2068 return 0; 1830 rq->elevator_private2 = cfqq;
2069 } 1831 return 0;
2070 1832
2071 spin_lock_irqsave(q->queue_lock, flags);
2072 cfqq->allocated[rw]--;
2073 if (!(cfqq->allocated[0] + cfqq->allocated[1]))
2074 cfq_mark_cfqq_must_alloc(cfqq);
2075 cfq_put_queue(cfqq);
2076queue_fail: 1833queue_fail:
2077 if (cic) 1834 if (cic)
2078 put_io_context(cic->ioc); 1835 put_io_context(cic->ioc);
2079 /* 1836
2080 * mark us rq allocation starved. we need to kickstart the process
2081 * ourselves if there are no pending requests that can do it for us.
2082 * that would be an extremely rare OOM situation
2083 */
2084 cfqd->rq_starved = 1;
2085 cfq_schedule_dispatch(cfqd); 1837 cfq_schedule_dispatch(cfqd);
2086 spin_unlock_irqrestore(q->queue_lock, flags); 1838 spin_unlock_irqrestore(q->queue_lock, flags);
2087 return 1; 1839 return 1;
@@ -2090,27 +1842,10 @@ queue_fail:
2090static void cfq_kick_queue(void *data) 1842static void cfq_kick_queue(void *data)
2091{ 1843{
2092 request_queue_t *q = data; 1844 request_queue_t *q = data;
2093 struct cfq_data *cfqd = q->elevator->elevator_data;
2094 unsigned long flags; 1845 unsigned long flags;
2095 1846
2096 spin_lock_irqsave(q->queue_lock, flags); 1847 spin_lock_irqsave(q->queue_lock, flags);
2097 1848 blk_start_queueing(q);
2098 if (cfqd->rq_starved) {
2099 struct request_list *rl = &q->rq;
2100
2101 /*
2102 * we aren't guaranteed to get a request after this, but we
2103 * have to be opportunistic
2104 */
2105 smp_mb();
2106 if (waitqueue_active(&rl->wait[READ]))
2107 wake_up(&rl->wait[READ]);
2108 if (waitqueue_active(&rl->wait[WRITE]))
2109 wake_up(&rl->wait[WRITE]);
2110 }
2111
2112 blk_remove_plug(q);
2113 q->request_fn(q);
2114 spin_unlock_irqrestore(q->queue_lock, flags); 1849 spin_unlock_irqrestore(q->queue_lock, flags);
2115} 1850}
2116 1851
@@ -2193,7 +1928,6 @@ static void cfq_exit_queue(elevator_t *e)
2193 1928
2194 cfq_shutdown_timer_wq(cfqd); 1929 cfq_shutdown_timer_wq(cfqd);
2195 1930
2196 spin_lock(&cfq_exit_lock);
2197 spin_lock_irq(q->queue_lock); 1931 spin_lock_irq(q->queue_lock);
2198 1932
2199 if (cfqd->active_queue) 1933 if (cfqd->active_queue)
@@ -2203,25 +1937,14 @@ static void cfq_exit_queue(elevator_t *e)
2203 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 1937 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
2204 struct cfq_io_context, 1938 struct cfq_io_context,
2205 queue_list); 1939 queue_list);
2206 if (cic->cfqq[ASYNC]) { 1940
2207 cfq_put_queue(cic->cfqq[ASYNC]); 1941 __cfq_exit_single_io_context(cfqd, cic);
2208 cic->cfqq[ASYNC] = NULL;
2209 }
2210 if (cic->cfqq[SYNC]) {
2211 cfq_put_queue(cic->cfqq[SYNC]);
2212 cic->cfqq[SYNC] = NULL;
2213 }
2214 cic->key = NULL;
2215 list_del_init(&cic->queue_list);
2216 } 1942 }
2217 1943
2218 spin_unlock_irq(q->queue_lock); 1944 spin_unlock_irq(q->queue_lock);
2219 spin_unlock(&cfq_exit_lock);
2220 1945
2221 cfq_shutdown_timer_wq(cfqd); 1946 cfq_shutdown_timer_wq(cfqd);
2222 1947
2223 mempool_destroy(cfqd->crq_pool);
2224 kfree(cfqd->crq_hash);
2225 kfree(cfqd->cfq_hash); 1948 kfree(cfqd->cfq_hash);
2226 kfree(cfqd); 1949 kfree(cfqd);
2227} 1950}
@@ -2231,7 +1954,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2231 struct cfq_data *cfqd; 1954 struct cfq_data *cfqd;
2232 int i; 1955 int i;
2233 1956
2234 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 1957 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
2235 if (!cfqd) 1958 if (!cfqd)
2236 return NULL; 1959 return NULL;
2237 1960
@@ -2243,23 +1966,12 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2243 INIT_LIST_HEAD(&cfqd->busy_rr); 1966 INIT_LIST_HEAD(&cfqd->busy_rr);
2244 INIT_LIST_HEAD(&cfqd->cur_rr); 1967 INIT_LIST_HEAD(&cfqd->cur_rr);
2245 INIT_LIST_HEAD(&cfqd->idle_rr); 1968 INIT_LIST_HEAD(&cfqd->idle_rr);
2246 INIT_LIST_HEAD(&cfqd->empty_list);
2247 INIT_LIST_HEAD(&cfqd->cic_list); 1969 INIT_LIST_HEAD(&cfqd->cic_list);
2248 1970
2249 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); 1971 cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
2250 if (!cfqd->crq_hash)
2251 goto out_crqhash;
2252
2253 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
2254 if (!cfqd->cfq_hash) 1972 if (!cfqd->cfq_hash)
2255 goto out_cfqhash; 1973 goto out_free;
2256
2257 cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
2258 if (!cfqd->crq_pool)
2259 goto out_crqpool;
2260 1974
2261 for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
2262 INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
2263 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 1975 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2264 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 1976 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2265 1977
@@ -2275,7 +1987,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2275 1987
2276 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); 1988 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
2277 1989
2278 cfqd->cfq_queued = cfq_queued;
2279 cfqd->cfq_quantum = cfq_quantum; 1990 cfqd->cfq_quantum = cfq_quantum;
2280 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 1991 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
2281 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 1992 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
@@ -2287,19 +1998,13 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2287 cfqd->cfq_slice_idle = cfq_slice_idle; 1998 cfqd->cfq_slice_idle = cfq_slice_idle;
2288 1999
2289 return cfqd; 2000 return cfqd;
2290out_crqpool: 2001out_free:
2291 kfree(cfqd->cfq_hash);
2292out_cfqhash:
2293 kfree(cfqd->crq_hash);
2294out_crqhash:
2295 kfree(cfqd); 2002 kfree(cfqd);
2296 return NULL; 2003 return NULL;
2297} 2004}
2298 2005
2299static void cfq_slab_kill(void) 2006static void cfq_slab_kill(void)
2300{ 2007{
2301 if (crq_pool)
2302 kmem_cache_destroy(crq_pool);
2303 if (cfq_pool) 2008 if (cfq_pool)
2304 kmem_cache_destroy(cfq_pool); 2009 kmem_cache_destroy(cfq_pool);
2305 if (cfq_ioc_pool) 2010 if (cfq_ioc_pool)
@@ -2308,11 +2013,6 @@ static void cfq_slab_kill(void)
2308 2013
2309static int __init cfq_slab_setup(void) 2014static int __init cfq_slab_setup(void)
2310{ 2015{
2311 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
2312 NULL, NULL);
2313 if (!crq_pool)
2314 goto fail;
2315
2316 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, 2016 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
2317 NULL, NULL); 2017 NULL, NULL);
2318 if (!cfq_pool) 2018 if (!cfq_pool)
@@ -2358,7 +2058,6 @@ static ssize_t __FUNC(elevator_t *e, char *page) \
2358 return cfq_var_show(__data, (page)); \ 2058 return cfq_var_show(__data, (page)); \
2359} 2059}
2360SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 2060SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
2361SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
2362SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 2061SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
2363SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 2062SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
2364SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); 2063SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
@@ -2386,7 +2085,6 @@ static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
2386 return ret; \ 2085 return ret; \
2387} 2086}
2388STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 2087STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
2389STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
2390STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); 2088STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
2391STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); 2089STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
2392STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 2090STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
@@ -2402,7 +2100,6 @@ STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX,
2402 2100
2403static struct elv_fs_entry cfq_attrs[] = { 2101static struct elv_fs_entry cfq_attrs[] = {
2404 CFQ_ATTR(quantum), 2102 CFQ_ATTR(quantum),
2405 CFQ_ATTR(queued),
2406 CFQ_ATTR(fifo_expire_sync), 2103 CFQ_ATTR(fifo_expire_sync),
2407 CFQ_ATTR(fifo_expire_async), 2104 CFQ_ATTR(fifo_expire_async),
2408 CFQ_ATTR(back_seek_max), 2105 CFQ_ATTR(back_seek_max),
@@ -2425,14 +2122,14 @@ static struct elevator_type iosched_cfq = {
2425 .elevator_deactivate_req_fn = cfq_deactivate_request, 2122 .elevator_deactivate_req_fn = cfq_deactivate_request,
2426 .elevator_queue_empty_fn = cfq_queue_empty, 2123 .elevator_queue_empty_fn = cfq_queue_empty,
2427 .elevator_completed_req_fn = cfq_completed_request, 2124 .elevator_completed_req_fn = cfq_completed_request,
2428 .elevator_former_req_fn = cfq_former_request, 2125 .elevator_former_req_fn = elv_rb_former_request,
2429 .elevator_latter_req_fn = cfq_latter_request, 2126 .elevator_latter_req_fn = elv_rb_latter_request,
2430 .elevator_set_req_fn = cfq_set_request, 2127 .elevator_set_req_fn = cfq_set_request,
2431 .elevator_put_req_fn = cfq_put_request, 2128 .elevator_put_req_fn = cfq_put_request,
2432 .elevator_may_queue_fn = cfq_may_queue, 2129 .elevator_may_queue_fn = cfq_may_queue,
2433 .elevator_init_fn = cfq_init_queue, 2130 .elevator_init_fn = cfq_init_queue,
2434 .elevator_exit_fn = cfq_exit_queue, 2131 .elevator_exit_fn = cfq_exit_queue,
2435 .trim = cfq_trim, 2132 .trim = cfq_free_io_context,
2436 }, 2133 },
2437 .elevator_attrs = cfq_attrs, 2134 .elevator_attrs = cfq_attrs,
2438 .elevator_name = "cfq", 2135 .elevator_name = "cfq",
@@ -2463,12 +2160,12 @@ static int __init cfq_init(void)
2463 2160
2464static void __exit cfq_exit(void) 2161static void __exit cfq_exit(void)
2465{ 2162{
2466 DECLARE_COMPLETION(all_gone); 2163 DECLARE_COMPLETION_ONSTACK(all_gone);
2467 elv_unregister(&iosched_cfq); 2164 elv_unregister(&iosched_cfq);
2468 ioc_gone = &all_gone; 2165 ioc_gone = &all_gone;
2469 /* ioc_gone's update must be visible before reading ioc_count */ 2166 /* ioc_gone's update must be visible before reading ioc_count */
2470 smp_wmb(); 2167 smp_wmb();
2471 if (atomic_read(&ioc_count)) 2168 if (elv_ioc_count_read(ioc_count))
2472 wait_for_completion(ioc_gone); 2169 wait_for_completion(ioc_gone);
2473 synchronize_rcu(); 2170 synchronize_rcu();
2474 cfq_slab_kill(); 2171 cfq_slab_kill();
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c7ca9f0b6498..b7c5b34cb7b4 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Deadline i/o scheduler. 2 * Deadline i/o scheduler.
3 * 3 *
4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
5 */ 5 */
6#include <linux/kernel.h> 6#include <linux/kernel.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
@@ -12,7 +12,6 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/compiler.h> 14#include <linux/compiler.h>
15#include <linux/hash.h>
16#include <linux/rbtree.h> 15#include <linux/rbtree.h>
17 16
18/* 17/*
@@ -24,13 +23,6 @@ static const int writes_starved = 2; /* max times reads can starve a write */
24static const int fifo_batch = 16; /* # of sequential requests treated as one 23static const int fifo_batch = 16; /* # of sequential requests treated as one
25 by the above parameters. For throughput. */ 24 by the above parameters. For throughput. */
26 25
27static const int deadline_hash_shift = 5;
28#define DL_HASH_BLOCK(sec) ((sec) >> 3)
29#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
30#define DL_HASH_ENTRIES (1 << deadline_hash_shift)
31#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
32#define ON_HASH(drq) (!hlist_unhashed(&(drq)->hash))
33
34struct deadline_data { 26struct deadline_data {
35 /* 27 /*
36 * run time data 28 * run time data
@@ -45,8 +37,7 @@ struct deadline_data {
45 /* 37 /*
46 * next in sort order. read, write or both are NULL 38 * next in sort order. read, write or both are NULL
47 */ 39 */
48 struct deadline_rq *next_drq[2]; 40 struct request *next_rq[2];
49 struct hlist_head *hash; /* request hash */
50 unsigned int batching; /* number of sequential requests made */ 41 unsigned int batching; /* number of sequential requests made */
51 sector_t last_sector; /* head position */ 42 sector_t last_sector; /* head position */
52 unsigned int starved; /* times reads have starved writes */ 43 unsigned int starved; /* times reads have starved writes */
@@ -58,240 +49,69 @@ struct deadline_data {
58 int fifo_batch; 49 int fifo_batch;
59 int writes_starved; 50 int writes_starved;
60 int front_merges; 51 int front_merges;
61
62 mempool_t *drq_pool;
63}; 52};
64 53
65/* 54static void deadline_move_request(struct deadline_data *, struct request *);
66 * pre-request data.
67 */
68struct deadline_rq {
69 /*
70 * rbtree index, key is the starting offset
71 */
72 struct rb_node rb_node;
73 sector_t rb_key;
74
75 struct request *request;
76
77 /*
78 * request hash, key is the ending offset (for back merge lookup)
79 */
80 struct hlist_node hash;
81
82 /*
83 * expire fifo
84 */
85 struct list_head fifo;
86 unsigned long expires;
87};
88
89static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
90
91static kmem_cache_t *drq_pool;
92
93#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
94 55
95/* 56#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))])
96 * the back merge hash support functions
97 */
98static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
99{
100 hlist_del_init(&drq->hash);
101}
102
103static inline void deadline_del_drq_hash(struct deadline_rq *drq)
104{
105 if (ON_HASH(drq))
106 __deadline_del_drq_hash(drq);
107}
108
109static inline void
110deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
111{
112 struct request *rq = drq->request;
113
114 BUG_ON(ON_HASH(drq));
115
116 hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
117}
118
119/*
120 * move hot entry to front of chain
121 */
122static inline void
123deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
124{
125 struct request *rq = drq->request;
126 struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
127
128 if (ON_HASH(drq) && &drq->hash != head->first) {
129 hlist_del(&drq->hash);
130 hlist_add_head(&drq->hash, head);
131 }
132}
133
134static struct request *
135deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
136{
137 struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
138 struct hlist_node *entry, *next;
139 struct deadline_rq *drq;
140
141 hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) {
142 struct request *__rq = drq->request;
143
144 BUG_ON(!ON_HASH(drq));
145
146 if (!rq_mergeable(__rq)) {
147 __deadline_del_drq_hash(drq);
148 continue;
149 }
150
151 if (rq_hash_key(__rq) == offset)
152 return __rq;
153 }
154
155 return NULL;
156}
157
158/*
159 * rb tree support functions
160 */
161#define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)
162#define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])
163#define rq_rb_key(rq) (rq)->sector
164
165static struct deadline_rq *
166__deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
167{
168 struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
169 struct rb_node *parent = NULL;
170 struct deadline_rq *__drq;
171
172 while (*p) {
173 parent = *p;
174 __drq = rb_entry_drq(parent);
175
176 if (drq->rb_key < __drq->rb_key)
177 p = &(*p)->rb_left;
178 else if (drq->rb_key > __drq->rb_key)
179 p = &(*p)->rb_right;
180 else
181 return __drq;
182 }
183
184 rb_link_node(&drq->rb_node, parent, p);
185 return NULL;
186}
187 57
188static void 58static void
189deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 59deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
190{ 60{
191 struct deadline_rq *__alias; 61 struct rb_root *root = RQ_RB_ROOT(dd, rq);
192 62 struct request *__alias;
193 drq->rb_key = rq_rb_key(drq->request);
194 63
195retry: 64retry:
196 __alias = __deadline_add_drq_rb(dd, drq); 65 __alias = elv_rb_add(root, rq);
197 if (!__alias) { 66 if (unlikely(__alias)) {
198 rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); 67 deadline_move_request(dd, __alias);
199 return; 68 goto retry;
200 } 69 }
201
202 deadline_move_request(dd, __alias);
203 goto retry;
204} 70}
205 71
206static inline void 72static inline void
207deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 73deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
208{ 74{
209 const int data_dir = rq_data_dir(drq->request); 75 const int data_dir = rq_data_dir(rq);
210 76
211 if (dd->next_drq[data_dir] == drq) { 77 if (dd->next_rq[data_dir] == rq) {
212 struct rb_node *rbnext = rb_next(&drq->rb_node); 78 struct rb_node *rbnext = rb_next(&rq->rb_node);
213 79
214 dd->next_drq[data_dir] = NULL; 80 dd->next_rq[data_dir] = NULL;
215 if (rbnext) 81 if (rbnext)
216 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 82 dd->next_rq[data_dir] = rb_entry_rq(rbnext);
217 }
218
219 BUG_ON(!RB_EMPTY_NODE(&drq->rb_node));
220 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
221 RB_CLEAR_NODE(&drq->rb_node);
222}
223
224static struct request *
225deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
226{
227 struct rb_node *n = dd->sort_list[data_dir].rb_node;
228 struct deadline_rq *drq;
229
230 while (n) {
231 drq = rb_entry_drq(n);
232
233 if (sector < drq->rb_key)
234 n = n->rb_left;
235 else if (sector > drq->rb_key)
236 n = n->rb_right;
237 else
238 return drq->request;
239 } 83 }
240 84
241 return NULL; 85 elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
242} 86}
243 87
244/* 88/*
245 * deadline_find_first_drq finds the first (lowest sector numbered) request 89 * add rq to rbtree and fifo
246 * for the specified data_dir. Used to sweep back to the start of the disk
247 * (1-way elevator) after we process the last (highest sector) request.
248 */
249static struct deadline_rq *
250deadline_find_first_drq(struct deadline_data *dd, int data_dir)
251{
252 struct rb_node *n = dd->sort_list[data_dir].rb_node;
253
254 for (;;) {
255 if (n->rb_left == NULL)
256 return rb_entry_drq(n);
257
258 n = n->rb_left;
259 }
260}
261
262/*
263 * add drq to rbtree and fifo
264 */ 90 */
265static void 91static void
266deadline_add_request(struct request_queue *q, struct request *rq) 92deadline_add_request(struct request_queue *q, struct request *rq)
267{ 93{
268 struct deadline_data *dd = q->elevator->elevator_data; 94 struct deadline_data *dd = q->elevator->elevator_data;
269 struct deadline_rq *drq = RQ_DATA(rq); 95 const int data_dir = rq_data_dir(rq);
270 96
271 const int data_dir = rq_data_dir(drq->request); 97 deadline_add_rq_rb(dd, rq);
272 98
273 deadline_add_drq_rb(dd, drq);
274 /* 99 /*
275 * set expire time (only used for reads) and add to fifo list 100 * set expire time (only used for reads) and add to fifo list
276 */ 101 */
277 drq->expires = jiffies + dd->fifo_expire[data_dir]; 102 rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
278 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]); 103 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
279
280 if (rq_mergeable(rq))
281 deadline_add_drq_hash(dd, drq);
282} 104}
283 105
284/* 106/*
285 * remove rq from rbtree, fifo, and hash 107 * remove rq from rbtree and fifo.
286 */ 108 */
287static void deadline_remove_request(request_queue_t *q, struct request *rq) 109static void deadline_remove_request(request_queue_t *q, struct request *rq)
288{ 110{
289 struct deadline_rq *drq = RQ_DATA(rq);
290 struct deadline_data *dd = q->elevator->elevator_data; 111 struct deadline_data *dd = q->elevator->elevator_data;
291 112
292 list_del_init(&drq->fifo); 113 rq_fifo_clear(rq);
293 deadline_del_drq_rb(dd, drq); 114 deadline_del_rq_rb(dd, rq);
294 deadline_del_drq_hash(drq);
295} 115}
296 116
297static int 117static int
@@ -302,27 +122,14 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
302 int ret; 122 int ret;
303 123
304 /* 124 /*
305 * see if the merge hash can satisfy a back merge
306 */
307 __rq = deadline_find_drq_hash(dd, bio->bi_sector);
308 if (__rq) {
309 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
310
311 if (elv_rq_merge_ok(__rq, bio)) {
312 ret = ELEVATOR_BACK_MERGE;
313 goto out;
314 }
315 }
316
317 /*
318 * check for front merge 125 * check for front merge
319 */ 126 */
320 if (dd->front_merges) { 127 if (dd->front_merges) {
321 sector_t rb_key = bio->bi_sector + bio_sectors(bio); 128 sector_t sector = bio->bi_sector + bio_sectors(bio);
322 129
323 __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio)); 130 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
324 if (__rq) { 131 if (__rq) {
325 BUG_ON(rb_key != rq_rb_key(__rq)); 132 BUG_ON(sector != __rq->sector);
326 133
327 if (elv_rq_merge_ok(__rq, bio)) { 134 if (elv_rq_merge_ok(__rq, bio)) {
328 ret = ELEVATOR_FRONT_MERGE; 135 ret = ELEVATOR_FRONT_MERGE;
@@ -333,29 +140,21 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
333 140
334 return ELEVATOR_NO_MERGE; 141 return ELEVATOR_NO_MERGE;
335out: 142out:
336 if (ret)
337 deadline_hot_drq_hash(dd, RQ_DATA(__rq));
338 *req = __rq; 143 *req = __rq;
339 return ret; 144 return ret;
340} 145}
341 146
342static void deadline_merged_request(request_queue_t *q, struct request *req) 147static void deadline_merged_request(request_queue_t *q, struct request *req,
148 int type)
343{ 149{
344 struct deadline_data *dd = q->elevator->elevator_data; 150 struct deadline_data *dd = q->elevator->elevator_data;
345 struct deadline_rq *drq = RQ_DATA(req);
346
347 /*
348 * hash always needs to be repositioned, key is end sector
349 */
350 deadline_del_drq_hash(drq);
351 deadline_add_drq_hash(dd, drq);
352 151
353 /* 152 /*
354 * if the merge was a front merge, we need to reposition request 153 * if the merge was a front merge, we need to reposition request
355 */ 154 */
356 if (rq_rb_key(req) != drq->rb_key) { 155 if (type == ELEVATOR_FRONT_MERGE) {
357 deadline_del_drq_rb(dd, drq); 156 elv_rb_del(RQ_RB_ROOT(dd, req), req);
358 deadline_add_drq_rb(dd, drq); 157 deadline_add_rq_rb(dd, req);
359 } 158 }
360} 159}
361 160
@@ -363,33 +162,14 @@ static void
363deadline_merged_requests(request_queue_t *q, struct request *req, 162deadline_merged_requests(request_queue_t *q, struct request *req,
364 struct request *next) 163 struct request *next)
365{ 164{
366 struct deadline_data *dd = q->elevator->elevator_data;
367 struct deadline_rq *drq = RQ_DATA(req);
368 struct deadline_rq *dnext = RQ_DATA(next);
369
370 BUG_ON(!drq);
371 BUG_ON(!dnext);
372
373 /* 165 /*
374 * reposition drq (this is the merged request) in hash, and in rbtree 166 * if next expires before rq, assign its expire time to rq
375 * in case of a front merge 167 * and move into next position (next will be deleted) in fifo
376 */ 168 */
377 deadline_del_drq_hash(drq); 169 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
378 deadline_add_drq_hash(dd, drq); 170 if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
379 171 list_move(&req->queuelist, &next->queuelist);
380 if (rq_rb_key(req) != drq->rb_key) { 172 rq_set_fifo_time(req, rq_fifo_time(next));
381 deadline_del_drq_rb(dd, drq);
382 deadline_add_drq_rb(dd, drq);
383 }
384
385 /*
386 * if dnext expires before drq, assign its expire time to drq
387 * and move into dnext position (dnext will be deleted) in fifo
388 */
389 if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
390 if (time_before(dnext->expires, drq->expires)) {
391 list_move(&drq->fifo, &dnext->fifo);
392 drq->expires = dnext->expires;
393 } 173 }
394 } 174 }
395 175
@@ -403,52 +183,50 @@ deadline_merged_requests(request_queue_t *q, struct request *req,
403 * move request from sort list to dispatch queue. 183 * move request from sort list to dispatch queue.
404 */ 184 */
405static inline void 185static inline void
406deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq) 186deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
407{ 187{
408 request_queue_t *q = drq->request->q; 188 request_queue_t *q = rq->q;
409 189
410 deadline_remove_request(q, drq->request); 190 deadline_remove_request(q, rq);
411 elv_dispatch_add_tail(q, drq->request); 191 elv_dispatch_add_tail(q, rq);
412} 192}
413 193
414/* 194/*
415 * move an entry to dispatch queue 195 * move an entry to dispatch queue
416 */ 196 */
417static void 197static void
418deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq) 198deadline_move_request(struct deadline_data *dd, struct request *rq)
419{ 199{
420 const int data_dir = rq_data_dir(drq->request); 200 const int data_dir = rq_data_dir(rq);
421 struct rb_node *rbnext = rb_next(&drq->rb_node); 201 struct rb_node *rbnext = rb_next(&rq->rb_node);
422 202
423 dd->next_drq[READ] = NULL; 203 dd->next_rq[READ] = NULL;
424 dd->next_drq[WRITE] = NULL; 204 dd->next_rq[WRITE] = NULL;
425 205
426 if (rbnext) 206 if (rbnext)
427 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 207 dd->next_rq[data_dir] = rb_entry_rq(rbnext);
428 208
429 dd->last_sector = drq->request->sector + drq->request->nr_sectors; 209 dd->last_sector = rq->sector + rq->nr_sectors;
430 210
431 /* 211 /*
432 * take it off the sort and fifo list, move 212 * take it off the sort and fifo list, move
433 * to dispatch queue 213 * to dispatch queue
434 */ 214 */
435 deadline_move_to_dispatch(dd, drq); 215 deadline_move_to_dispatch(dd, rq);
436} 216}
437 217
438#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
439
440/* 218/*
441 * deadline_check_fifo returns 0 if there are no expired reads on the fifo, 219 * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
442 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) 220 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
443 */ 221 */
444static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) 222static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
445{ 223{
446 struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next); 224 struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
447 225
448 /* 226 /*
449 * drq is expired! 227 * rq is expired!
450 */ 228 */
451 if (time_after(jiffies, drq->expires)) 229 if (time_after(jiffies, rq_fifo_time(rq)))
452 return 1; 230 return 1;
453 231
454 return 0; 232 return 0;
@@ -463,21 +241,21 @@ static int deadline_dispatch_requests(request_queue_t *q, int force)
463 struct deadline_data *dd = q->elevator->elevator_data; 241 struct deadline_data *dd = q->elevator->elevator_data;
464 const int reads = !list_empty(&dd->fifo_list[READ]); 242 const int reads = !list_empty(&dd->fifo_list[READ]);
465 const int writes = !list_empty(&dd->fifo_list[WRITE]); 243 const int writes = !list_empty(&dd->fifo_list[WRITE]);
466 struct deadline_rq *drq; 244 struct request *rq;
467 int data_dir; 245 int data_dir;
468 246
469 /* 247 /*
470 * batches are currently reads XOR writes 248 * batches are currently reads XOR writes
471 */ 249 */
472 if (dd->next_drq[WRITE]) 250 if (dd->next_rq[WRITE])
473 drq = dd->next_drq[WRITE]; 251 rq = dd->next_rq[WRITE];
474 else 252 else
475 drq = dd->next_drq[READ]; 253 rq = dd->next_rq[READ];
476 254
477 if (drq) { 255 if (rq) {
478 /* we have a "next request" */ 256 /* we have a "next request" */
479 257
480 if (dd->last_sector != drq->request->sector) 258 if (dd->last_sector != rq->sector)
481 /* end the batch on a non sequential request */ 259 /* end the batch on a non sequential request */
482 dd->batching += dd->fifo_batch; 260 dd->batching += dd->fifo_batch;
483 261
@@ -526,30 +304,33 @@ dispatch_find_request:
526 if (deadline_check_fifo(dd, data_dir)) { 304 if (deadline_check_fifo(dd, data_dir)) {
527 /* An expired request exists - satisfy it */ 305 /* An expired request exists - satisfy it */
528 dd->batching = 0; 306 dd->batching = 0;
529 drq = list_entry_fifo(dd->fifo_list[data_dir].next); 307 rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
530 308
531 } else if (dd->next_drq[data_dir]) { 309 } else if (dd->next_rq[data_dir]) {
532 /* 310 /*
533 * The last req was the same dir and we have a next request in 311 * The last req was the same dir and we have a next request in
534 * sort order. No expired requests so continue on from here. 312 * sort order. No expired requests so continue on from here.
535 */ 313 */
536 drq = dd->next_drq[data_dir]; 314 rq = dd->next_rq[data_dir];
537 } else { 315 } else {
316 struct rb_node *node;
538 /* 317 /*
539 * The last req was the other direction or we have run out of 318 * The last req was the other direction or we have run out of
540 * higher-sectored requests. Go back to the lowest sectored 319 * higher-sectored requests. Go back to the lowest sectored
541 * request (1 way elevator) and start a new batch. 320 * request (1 way elevator) and start a new batch.
542 */ 321 */
543 dd->batching = 0; 322 dd->batching = 0;
544 drq = deadline_find_first_drq(dd, data_dir); 323 node = rb_first(&dd->sort_list[data_dir]);
324 if (node)
325 rq = rb_entry_rq(node);
545 } 326 }
546 327
547dispatch_request: 328dispatch_request:
548 /* 329 /*
549 * drq is the selected appropriate request. 330 * rq is the selected appropriate request.
550 */ 331 */
551 dd->batching++; 332 dd->batching++;
552 deadline_move_request(dd, drq); 333 deadline_move_request(dd, rq);
553 334
554 return 1; 335 return 1;
555} 336}
@@ -562,30 +343,6 @@ static int deadline_queue_empty(request_queue_t *q)
562 && list_empty(&dd->fifo_list[READ]); 343 && list_empty(&dd->fifo_list[READ]);
563} 344}
564 345
565static struct request *
566deadline_former_request(request_queue_t *q, struct request *rq)
567{
568 struct deadline_rq *drq = RQ_DATA(rq);
569 struct rb_node *rbprev = rb_prev(&drq->rb_node);
570
571 if (rbprev)
572 return rb_entry_drq(rbprev)->request;
573
574 return NULL;
575}
576
577static struct request *
578deadline_latter_request(request_queue_t *q, struct request *rq)
579{
580 struct deadline_rq *drq = RQ_DATA(rq);
581 struct rb_node *rbnext = rb_next(&drq->rb_node);
582
583 if (rbnext)
584 return rb_entry_drq(rbnext)->request;
585
586 return NULL;
587}
588
589static void deadline_exit_queue(elevator_t *e) 346static void deadline_exit_queue(elevator_t *e)
590{ 347{
591 struct deadline_data *dd = e->elevator_data; 348 struct deadline_data *dd = e->elevator_data;
@@ -593,46 +350,21 @@ static void deadline_exit_queue(elevator_t *e)
593 BUG_ON(!list_empty(&dd->fifo_list[READ])); 350 BUG_ON(!list_empty(&dd->fifo_list[READ]));
594 BUG_ON(!list_empty(&dd->fifo_list[WRITE])); 351 BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
595 352
596 mempool_destroy(dd->drq_pool);
597 kfree(dd->hash);
598 kfree(dd); 353 kfree(dd);
599} 354}
600 355
601/* 356/*
602 * initialize elevator private data (deadline_data), and alloc a drq for 357 * initialize elevator private data (deadline_data).
603 * each request on the free lists
604 */ 358 */
605static void *deadline_init_queue(request_queue_t *q, elevator_t *e) 359static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
606{ 360{
607 struct deadline_data *dd; 361 struct deadline_data *dd;
608 int i;
609
610 if (!drq_pool)
611 return NULL;
612 362
613 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); 363 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
614 if (!dd) 364 if (!dd)
615 return NULL; 365 return NULL;
616 memset(dd, 0, sizeof(*dd)); 366 memset(dd, 0, sizeof(*dd));
617 367
618 dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES,
619 GFP_KERNEL, q->node);
620 if (!dd->hash) {
621 kfree(dd);
622 return NULL;
623 }
624
625 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
626 mempool_free_slab, drq_pool, q->node);
627 if (!dd->drq_pool) {
628 kfree(dd->hash);
629 kfree(dd);
630 return NULL;
631 }
632
633 for (i = 0; i < DL_HASH_ENTRIES; i++)
634 INIT_HLIST_HEAD(&dd->hash[i]);
635
636 INIT_LIST_HEAD(&dd->fifo_list[READ]); 368 INIT_LIST_HEAD(&dd->fifo_list[READ]);
637 INIT_LIST_HEAD(&dd->fifo_list[WRITE]); 369 INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
638 dd->sort_list[READ] = RB_ROOT; 370 dd->sort_list[READ] = RB_ROOT;
@@ -645,39 +377,6 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
645 return dd; 377 return dd;
646} 378}
647 379
648static void deadline_put_request(request_queue_t *q, struct request *rq)
649{
650 struct deadline_data *dd = q->elevator->elevator_data;
651 struct deadline_rq *drq = RQ_DATA(rq);
652
653 mempool_free(drq, dd->drq_pool);
654 rq->elevator_private = NULL;
655}
656
657static int
658deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
659 gfp_t gfp_mask)
660{
661 struct deadline_data *dd = q->elevator->elevator_data;
662 struct deadline_rq *drq;
663
664 drq = mempool_alloc(dd->drq_pool, gfp_mask);
665 if (drq) {
666 memset(drq, 0, sizeof(*drq));
667 RB_CLEAR_NODE(&drq->rb_node);
668 drq->request = rq;
669
670 INIT_HLIST_NODE(&drq->hash);
671
672 INIT_LIST_HEAD(&drq->fifo);
673
674 rq->elevator_private = drq;
675 return 0;
676 }
677
678 return 1;
679}
680
681/* 380/*
682 * sysfs parts below 381 * sysfs parts below
683 */ 382 */
@@ -757,10 +456,8 @@ static struct elevator_type iosched_deadline = {
757 .elevator_dispatch_fn = deadline_dispatch_requests, 456 .elevator_dispatch_fn = deadline_dispatch_requests,
758 .elevator_add_req_fn = deadline_add_request, 457 .elevator_add_req_fn = deadline_add_request,
759 .elevator_queue_empty_fn = deadline_queue_empty, 458 .elevator_queue_empty_fn = deadline_queue_empty,
760 .elevator_former_req_fn = deadline_former_request, 459 .elevator_former_req_fn = elv_rb_former_request,
761 .elevator_latter_req_fn = deadline_latter_request, 460 .elevator_latter_req_fn = elv_rb_latter_request,
762 .elevator_set_req_fn = deadline_set_request,
763 .elevator_put_req_fn = deadline_put_request,
764 .elevator_init_fn = deadline_init_queue, 461 .elevator_init_fn = deadline_init_queue,
765 .elevator_exit_fn = deadline_exit_queue, 462 .elevator_exit_fn = deadline_exit_queue,
766 }, 463 },
@@ -772,24 +469,11 @@ static struct elevator_type iosched_deadline = {
772 469
773static int __init deadline_init(void) 470static int __init deadline_init(void)
774{ 471{
775 int ret; 472 return elv_register(&iosched_deadline);
776
777 drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
778 0, 0, NULL, NULL);
779
780 if (!drq_pool)
781 return -ENOMEM;
782
783 ret = elv_register(&iosched_deadline);
784 if (ret)
785 kmem_cache_destroy(drq_pool);
786
787 return ret;
788} 473}
789 474
790static void __exit deadline_exit(void) 475static void __exit deadline_exit(void)
791{ 476{
792 kmem_cache_destroy(drq_pool);
793 elv_unregister(&iosched_deadline); 477 elv_unregister(&iosched_deadline);
794} 478}
795 479
diff --git a/block/elevator.c b/block/elevator.c
index 9b72dc7c8a5c..487dd3da8853 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * 5 *
6 * 30042000 Jens Axboe <axboe@suse.de> : 6 * 30042000 Jens Axboe <axboe@kernel.dk> :
7 * 7 *
8 * Split the elevator a bit so that it is possible to choose a different 8 * Split the elevator a bit so that it is possible to choose a different
9 * one or even write a new "plug in". There are three pieces: 9 * one or even write a new "plug in". There are three pieces:
@@ -33,6 +33,7 @@
33#include <linux/compiler.h> 33#include <linux/compiler.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/blktrace_api.h> 35#include <linux/blktrace_api.h>
36#include <linux/hash.h>
36 37
37#include <asm/uaccess.h> 38#include <asm/uaccess.h>
38 39
@@ -40,6 +41,16 @@ static DEFINE_SPINLOCK(elv_list_lock);
40static LIST_HEAD(elv_list); 41static LIST_HEAD(elv_list);
41 42
42/* 43/*
44 * Merge hash stuff.
45 */
46static const int elv_hash_shift = 6;
47#define ELV_HASH_BLOCK(sec) ((sec) >> 3)
48#define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
49#define ELV_HASH_ENTRIES (1 << elv_hash_shift)
50#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
51#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
52
53/*
43 * can we safely merge with this request? 54 * can we safely merge with this request?
44 */ 55 */
45inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 56inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
@@ -56,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
56 /* 67 /*
57 * same device and no special stuff set, merge is ok 68 * same device and no special stuff set, merge is ok
58 */ 69 */
59 if (rq->rq_disk == bio->bi_bdev->bd_disk && 70 if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special)
60 !rq->waiting && !rq->special)
61 return 1; 71 return 1;
62 72
63 return 0; 73 return 0;
@@ -151,27 +161,44 @@ __setup("elevator=", elevator_setup);
151 161
152static struct kobj_type elv_ktype; 162static struct kobj_type elv_ktype;
153 163
154static elevator_t *elevator_alloc(struct elevator_type *e) 164static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
155{ 165{
156 elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); 166 elevator_t *eq;
157 if (eq) { 167 int i;
158 memset(eq, 0, sizeof(*eq)); 168
159 eq->ops = &e->ops; 169 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
160 eq->elevator_type = e; 170 if (unlikely(!eq))
161 kobject_init(&eq->kobj); 171 goto err;
162 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 172
163 eq->kobj.ktype = &elv_ktype; 173 memset(eq, 0, sizeof(*eq));
164 mutex_init(&eq->sysfs_lock); 174 eq->ops = &e->ops;
165 } else { 175 eq->elevator_type = e;
166 elevator_put(e); 176 kobject_init(&eq->kobj);
167 } 177 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
178 eq->kobj.ktype = &elv_ktype;
179 mutex_init(&eq->sysfs_lock);
180
181 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
182 GFP_KERNEL, q->node);
183 if (!eq->hash)
184 goto err;
185
186 for (i = 0; i < ELV_HASH_ENTRIES; i++)
187 INIT_HLIST_HEAD(&eq->hash[i]);
188
168 return eq; 189 return eq;
190err:
191 kfree(eq);
192 elevator_put(e);
193 return NULL;
169} 194}
170 195
171static void elevator_release(struct kobject *kobj) 196static void elevator_release(struct kobject *kobj)
172{ 197{
173 elevator_t *e = container_of(kobj, elevator_t, kobj); 198 elevator_t *e = container_of(kobj, elevator_t, kobj);
199
174 elevator_put(e->elevator_type); 200 elevator_put(e->elevator_type);
201 kfree(e->hash);
175 kfree(e); 202 kfree(e);
176} 203}
177 204
@@ -198,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name)
198 e = elevator_get("noop"); 225 e = elevator_get("noop");
199 } 226 }
200 227
201 eq = elevator_alloc(e); 228 eq = elevator_alloc(q, e);
202 if (!eq) 229 if (!eq)
203 return -ENOMEM; 230 return -ENOMEM;
204 231
@@ -212,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name)
212 return ret; 239 return ret;
213} 240}
214 241
242EXPORT_SYMBOL(elevator_init);
243
215void elevator_exit(elevator_t *e) 244void elevator_exit(elevator_t *e)
216{ 245{
217 mutex_lock(&e->sysfs_lock); 246 mutex_lock(&e->sysfs_lock);
@@ -223,10 +252,118 @@ void elevator_exit(elevator_t *e)
223 kobject_put(&e->kobj); 252 kobject_put(&e->kobj);
224} 253}
225 254
255EXPORT_SYMBOL(elevator_exit);
256
257static inline void __elv_rqhash_del(struct request *rq)
258{
259 hlist_del_init(&rq->hash);
260}
261
262static void elv_rqhash_del(request_queue_t *q, struct request *rq)
263{
264 if (ELV_ON_HASH(rq))
265 __elv_rqhash_del(rq);
266}
267
268static void elv_rqhash_add(request_queue_t *q, struct request *rq)
269{
270 elevator_t *e = q->elevator;
271
272 BUG_ON(ELV_ON_HASH(rq));
273 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
274}
275
276static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
277{
278 __elv_rqhash_del(rq);
279 elv_rqhash_add(q, rq);
280}
281
282static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
283{
284 elevator_t *e = q->elevator;
285 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
286 struct hlist_node *entry, *next;
287 struct request *rq;
288
289 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
290 BUG_ON(!ELV_ON_HASH(rq));
291
292 if (unlikely(!rq_mergeable(rq))) {
293 __elv_rqhash_del(rq);
294 continue;
295 }
296
297 if (rq_hash_key(rq) == offset)
298 return rq;
299 }
300
301 return NULL;
302}
303
304/*
305 * RB-tree support functions for inserting/lookup/removal of requests
306 * in a sorted RB tree.
307 */
308struct request *elv_rb_add(struct rb_root *root, struct request *rq)
309{
310 struct rb_node **p = &root->rb_node;
311 struct rb_node *parent = NULL;
312 struct request *__rq;
313
314 while (*p) {
315 parent = *p;
316 __rq = rb_entry(parent, struct request, rb_node);
317
318 if (rq->sector < __rq->sector)
319 p = &(*p)->rb_left;
320 else if (rq->sector > __rq->sector)
321 p = &(*p)->rb_right;
322 else
323 return __rq;
324 }
325
326 rb_link_node(&rq->rb_node, parent, p);
327 rb_insert_color(&rq->rb_node, root);
328 return NULL;
329}
330
331EXPORT_SYMBOL(elv_rb_add);
332
333void elv_rb_del(struct rb_root *root, struct request *rq)
334{
335 BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
336 rb_erase(&rq->rb_node, root);
337 RB_CLEAR_NODE(&rq->rb_node);
338}
339
340EXPORT_SYMBOL(elv_rb_del);
341
342struct request *elv_rb_find(struct rb_root *root, sector_t sector)
343{
344 struct rb_node *n = root->rb_node;
345 struct request *rq;
346
347 while (n) {
348 rq = rb_entry(n, struct request, rb_node);
349
350 if (sector < rq->sector)
351 n = n->rb_left;
352 else if (sector > rq->sector)
353 n = n->rb_right;
354 else
355 return rq;
356 }
357
358 return NULL;
359}
360
361EXPORT_SYMBOL(elv_rb_find);
362
226/* 363/*
227 * Insert rq into dispatch queue of q. Queue lock must be held on 364 * Insert rq into dispatch queue of q. Queue lock must be held on
228 * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be 365 * entry. rq is sort insted into the dispatch queue. To be used by
229 * appended to the dispatch queue. To be used by specific elevators. 366 * specific elevators.
230 */ 367 */
231void elv_dispatch_sort(request_queue_t *q, struct request *rq) 368void elv_dispatch_sort(request_queue_t *q, struct request *rq)
232{ 369{
@@ -235,6 +372,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
235 372
236 if (q->last_merge == rq) 373 if (q->last_merge == rq)
237 q->last_merge = NULL; 374 q->last_merge = NULL;
375
376 elv_rqhash_del(q, rq);
377
238 q->nr_sorted--; 378 q->nr_sorted--;
239 379
240 boundary = q->end_sector; 380 boundary = q->end_sector;
@@ -242,7 +382,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
242 list_for_each_prev(entry, &q->queue_head) { 382 list_for_each_prev(entry, &q->queue_head) {
243 struct request *pos = list_entry_rq(entry); 383 struct request *pos = list_entry_rq(entry);
244 384
245 if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 385 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
246 break; 386 break;
247 if (rq->sector >= boundary) { 387 if (rq->sector >= boundary) {
248 if (pos->sector < boundary) 388 if (pos->sector < boundary)
@@ -258,11 +398,38 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
258 list_add(&rq->queuelist, entry); 398 list_add(&rq->queuelist, entry);
259} 399}
260 400
401EXPORT_SYMBOL(elv_dispatch_sort);
402
403/*
404 * Insert rq into dispatch queue of q. Queue lock must be held on
405 * entry. rq is added to the back of the dispatch queue. To be used by
406 * specific elevators.
407 */
408void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
409{
410 if (q->last_merge == rq)
411 q->last_merge = NULL;
412
413 elv_rqhash_del(q, rq);
414
415 q->nr_sorted--;
416
417 q->end_sector = rq_end_sector(rq);
418 q->boundary_rq = rq;
419 list_add_tail(&rq->queuelist, &q->queue_head);
420}
421
422EXPORT_SYMBOL(elv_dispatch_add_tail);
423
261int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 424int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
262{ 425{
263 elevator_t *e = q->elevator; 426 elevator_t *e = q->elevator;
427 struct request *__rq;
264 int ret; 428 int ret;
265 429
430 /*
431 * First try one-hit cache.
432 */
266 if (q->last_merge) { 433 if (q->last_merge) {
267 ret = elv_try_merge(q->last_merge, bio); 434 ret = elv_try_merge(q->last_merge, bio);
268 if (ret != ELEVATOR_NO_MERGE) { 435 if (ret != ELEVATOR_NO_MERGE) {
@@ -271,18 +438,30 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
271 } 438 }
272 } 439 }
273 440
441 /*
442 * See if our hash lookup can find a potential backmerge.
443 */
444 __rq = elv_rqhash_find(q, bio->bi_sector);
445 if (__rq && elv_rq_merge_ok(__rq, bio)) {
446 *req = __rq;
447 return ELEVATOR_BACK_MERGE;
448 }
449
274 if (e->ops->elevator_merge_fn) 450 if (e->ops->elevator_merge_fn)
275 return e->ops->elevator_merge_fn(q, req, bio); 451 return e->ops->elevator_merge_fn(q, req, bio);
276 452
277 return ELEVATOR_NO_MERGE; 453 return ELEVATOR_NO_MERGE;
278} 454}
279 455
280void elv_merged_request(request_queue_t *q, struct request *rq) 456void elv_merged_request(request_queue_t *q, struct request *rq, int type)
281{ 457{
282 elevator_t *e = q->elevator; 458 elevator_t *e = q->elevator;
283 459
284 if (e->ops->elevator_merged_fn) 460 if (e->ops->elevator_merged_fn)
285 e->ops->elevator_merged_fn(q, rq); 461 e->ops->elevator_merged_fn(q, rq, type);
462
463 if (type == ELEVATOR_BACK_MERGE)
464 elv_rqhash_reposition(q, rq);
286 465
287 q->last_merge = rq; 466 q->last_merge = rq;
288} 467}
@@ -294,8 +473,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
294 473
295 if (e->ops->elevator_merge_req_fn) 474 if (e->ops->elevator_merge_req_fn)
296 e->ops->elevator_merge_req_fn(q, rq, next); 475 e->ops->elevator_merge_req_fn(q, rq, next);
297 q->nr_sorted--;
298 476
477 elv_rqhash_reposition(q, rq);
478 elv_rqhash_del(q, next);
479
480 q->nr_sorted--;
299 q->last_merge = rq; 481 q->last_merge = rq;
300} 482}
301 483
@@ -313,7 +495,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
313 e->ops->elevator_deactivate_req_fn(q, rq); 495 e->ops->elevator_deactivate_req_fn(q, rq);
314 } 496 }
315 497
316 rq->flags &= ~REQ_STARTED; 498 rq->cmd_flags &= ~REQ_STARTED;
317 499
318 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 500 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
319} 501}
@@ -344,13 +526,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
344 526
345 switch (where) { 527 switch (where) {
346 case ELEVATOR_INSERT_FRONT: 528 case ELEVATOR_INSERT_FRONT:
347 rq->flags |= REQ_SOFTBARRIER; 529 rq->cmd_flags |= REQ_SOFTBARRIER;
348 530
349 list_add(&rq->queuelist, &q->queue_head); 531 list_add(&rq->queuelist, &q->queue_head);
350 break; 532 break;
351 533
352 case ELEVATOR_INSERT_BACK: 534 case ELEVATOR_INSERT_BACK:
353 rq->flags |= REQ_SOFTBARRIER; 535 rq->cmd_flags |= REQ_SOFTBARRIER;
354 elv_drain_elevator(q); 536 elv_drain_elevator(q);
355 list_add_tail(&rq->queuelist, &q->queue_head); 537 list_add_tail(&rq->queuelist, &q->queue_head);
356 /* 538 /*
@@ -369,10 +551,14 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
369 551
370 case ELEVATOR_INSERT_SORT: 552 case ELEVATOR_INSERT_SORT:
371 BUG_ON(!blk_fs_request(rq)); 553 BUG_ON(!blk_fs_request(rq));
372 rq->flags |= REQ_SORTED; 554 rq->cmd_flags |= REQ_SORTED;
373 q->nr_sorted++; 555 q->nr_sorted++;
374 if (q->last_merge == NULL && rq_mergeable(rq)) 556 if (rq_mergeable(rq)) {
375 q->last_merge = rq; 557 elv_rqhash_add(q, rq);
558 if (!q->last_merge)
559 q->last_merge = rq;
560 }
561
376 /* 562 /*
377 * Some ioscheds (cfq) run q->request_fn directly, so 563 * Some ioscheds (cfq) run q->request_fn directly, so
378 * rq cannot be accessed after calling 564 * rq cannot be accessed after calling
@@ -387,7 +573,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
387 * insertion; otherwise, requests should be requeued 573 * insertion; otherwise, requests should be requeued
388 * in ordseq order. 574 * in ordseq order.
389 */ 575 */
390 rq->flags |= REQ_SOFTBARRIER; 576 rq->cmd_flags |= REQ_SOFTBARRIER;
391 577
392 if (q->ordseq == 0) { 578 if (q->ordseq == 0) {
393 list_add(&rq->queuelist, &q->queue_head); 579 list_add(&rq->queuelist, &q->queue_head);
@@ -429,9 +615,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
429 int plug) 615 int plug)
430{ 616{
431 if (q->ordcolor) 617 if (q->ordcolor)
432 rq->flags |= REQ_ORDERED_COLOR; 618 rq->cmd_flags |= REQ_ORDERED_COLOR;
433 619
434 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 620 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
435 /* 621 /*
436 * toggle ordered color 622 * toggle ordered color
437 */ 623 */
@@ -452,7 +638,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
452 q->end_sector = rq_end_sector(rq); 638 q->end_sector = rq_end_sector(rq);
453 q->boundary_rq = rq; 639 q->boundary_rq = rq;
454 } 640 }
455 } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 641 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
456 where = ELEVATOR_INSERT_BACK; 642 where = ELEVATOR_INSERT_BACK;
457 643
458 if (plug) 644 if (plug)
@@ -461,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
461 elv_insert(q, rq, where); 647 elv_insert(q, rq, where);
462} 648}
463 649
650EXPORT_SYMBOL(__elv_add_request);
651
464void elv_add_request(request_queue_t *q, struct request *rq, int where, 652void elv_add_request(request_queue_t *q, struct request *rq, int where,
465 int plug) 653 int plug)
466{ 654{
@@ -471,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
471 spin_unlock_irqrestore(q->queue_lock, flags); 659 spin_unlock_irqrestore(q->queue_lock, flags);
472} 660}
473 661
662EXPORT_SYMBOL(elv_add_request);
663
474static inline struct request *__elv_next_request(request_queue_t *q) 664static inline struct request *__elv_next_request(request_queue_t *q)
475{ 665{
476 struct request *rq; 666 struct request *rq;
@@ -493,7 +683,7 @@ struct request *elv_next_request(request_queue_t *q)
493 int ret; 683 int ret;
494 684
495 while ((rq = __elv_next_request(q)) != NULL) { 685 while ((rq = __elv_next_request(q)) != NULL) {
496 if (!(rq->flags & REQ_STARTED)) { 686 if (!(rq->cmd_flags & REQ_STARTED)) {
497 elevator_t *e = q->elevator; 687 elevator_t *e = q->elevator;
498 688
499 /* 689 /*
@@ -510,7 +700,7 @@ struct request *elv_next_request(request_queue_t *q)
510 * it, a request that has been delayed should 700 * it, a request that has been delayed should
511 * not be passed by new incoming requests 701 * not be passed by new incoming requests
512 */ 702 */
513 rq->flags |= REQ_STARTED; 703 rq->cmd_flags |= REQ_STARTED;
514 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 704 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
515 } 705 }
516 706
@@ -519,7 +709,7 @@ struct request *elv_next_request(request_queue_t *q)
519 q->boundary_rq = NULL; 709 q->boundary_rq = NULL;
520 } 710 }
521 711
522 if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) 712 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
523 break; 713 break;
524 714
525 ret = q->prep_rq_fn(q, rq); 715 ret = q->prep_rq_fn(q, rq);
@@ -541,7 +731,7 @@ struct request *elv_next_request(request_queue_t *q)
541 nr_bytes = rq->data_len; 731 nr_bytes = rq->data_len;
542 732
543 blkdev_dequeue_request(rq); 733 blkdev_dequeue_request(rq);
544 rq->flags |= REQ_QUIET; 734 rq->cmd_flags |= REQ_QUIET;
545 end_that_request_chunk(rq, 0, nr_bytes); 735 end_that_request_chunk(rq, 0, nr_bytes);
546 end_that_request_last(rq, 0); 736 end_that_request_last(rq, 0);
547 } else { 737 } else {
@@ -554,9 +744,12 @@ struct request *elv_next_request(request_queue_t *q)
554 return rq; 744 return rq;
555} 745}
556 746
747EXPORT_SYMBOL(elv_next_request);
748
557void elv_dequeue_request(request_queue_t *q, struct request *rq) 749void elv_dequeue_request(request_queue_t *q, struct request *rq)
558{ 750{
559 BUG_ON(list_empty(&rq->queuelist)); 751 BUG_ON(list_empty(&rq->queuelist));
752 BUG_ON(ELV_ON_HASH(rq));
560 753
561 list_del_init(&rq->queuelist); 754 list_del_init(&rq->queuelist);
562 755
@@ -569,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq)
569 q->in_flight++; 762 q->in_flight++;
570} 763}
571 764
765EXPORT_SYMBOL(elv_dequeue_request);
766
572int elv_queue_empty(request_queue_t *q) 767int elv_queue_empty(request_queue_t *q)
573{ 768{
574 elevator_t *e = q->elevator; 769 elevator_t *e = q->elevator;
@@ -582,6 +777,8 @@ int elv_queue_empty(request_queue_t *q)
582 return 1; 777 return 1;
583} 778}
584 779
780EXPORT_SYMBOL(elv_queue_empty);
781
585struct request *elv_latter_request(request_queue_t *q, struct request *rq) 782struct request *elv_latter_request(request_queue_t *q, struct request *rq)
586{ 783{
587 elevator_t *e = q->elevator; 784 elevator_t *e = q->elevator;
@@ -600,13 +797,12 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
600 return NULL; 797 return NULL;
601} 798}
602 799
603int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 800int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
604 gfp_t gfp_mask)
605{ 801{
606 elevator_t *e = q->elevator; 802 elevator_t *e = q->elevator;
607 803
608 if (e->ops->elevator_set_req_fn) 804 if (e->ops->elevator_set_req_fn)
609 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); 805 return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
610 806
611 rq->elevator_private = NULL; 807 rq->elevator_private = NULL;
612 return 0; 808 return 0;
@@ -620,12 +816,12 @@ void elv_put_request(request_queue_t *q, struct request *rq)
620 e->ops->elevator_put_req_fn(q, rq); 816 e->ops->elevator_put_req_fn(q, rq);
621} 817}
622 818
623int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) 819int elv_may_queue(request_queue_t *q, int rw)
624{ 820{
625 elevator_t *e = q->elevator; 821 elevator_t *e = q->elevator;
626 822
627 if (e->ops->elevator_may_queue_fn) 823 if (e->ops->elevator_may_queue_fn)
628 return e->ops->elevator_may_queue_fn(q, rw, bio); 824 return e->ops->elevator_may_queue_fn(q, rw);
629 825
630 return ELV_MQUEUE_MAY; 826 return ELV_MQUEUE_MAY;
631} 827}
@@ -792,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
792 /* 988 /*
793 * Allocate new elevator 989 * Allocate new elevator
794 */ 990 */
795 e = elevator_alloc(new_e); 991 e = elevator_alloc(q, new_e);
796 if (!e) 992 if (!e)
797 return 0; 993 return 0;
798 994
@@ -908,11 +1104,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
908 return len; 1104 return len;
909} 1105}
910 1106
911EXPORT_SYMBOL(elv_dispatch_sort); 1107struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
912EXPORT_SYMBOL(elv_add_request); 1108{
913EXPORT_SYMBOL(__elv_add_request); 1109 struct rb_node *rbprev = rb_prev(&rq->rb_node);
914EXPORT_SYMBOL(elv_next_request); 1110
915EXPORT_SYMBOL(elv_dequeue_request); 1111 if (rbprev)
916EXPORT_SYMBOL(elv_queue_empty); 1112 return rb_entry_rq(rbprev);
917EXPORT_SYMBOL(elevator_exit); 1113
918EXPORT_SYMBOL(elevator_init); 1114 return NULL;
1115}
1116
1117EXPORT_SYMBOL(elv_rb_former_request);
1118
1119struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
1120{
1121 struct rb_node *rbnext = rb_next(&rq->rb_node);
1122
1123 if (rbnext)
1124 return rb_entry_rq(rbnext);
1125
1126 return NULL;
1127}
1128
1129EXPORT_SYMBOL(elv_rb_latter_request);
diff --git a/block/genhd.c b/block/genhd.c
index 25d1f42568cc..653919d50cd4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -295,10 +295,15 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data)
295 295
296static int __init genhd_device_init(void) 296static int __init genhd_device_init(void)
297{ 297{
298 int err;
299
298 bdev_map = kobj_map_init(base_probe, &block_subsys_lock); 300 bdev_map = kobj_map_init(base_probe, &block_subsys_lock);
299 blk_dev_init(); 301 blk_dev_init();
300 subsystem_register(&block_subsys); 302 err = subsystem_register(&block_subsys);
301 return 0; 303 if (err < 0)
304 printk(KERN_WARNING "%s: subsystem_register error: %d\n",
305 __FUNCTION__, err);
306 return err;
302} 307}
303 308
304subsys_initcall(genhd_device_init); 309subsys_initcall(genhd_device_init);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9c3a06bcb7ba..83425fb3c8db 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data);
39static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); 39static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
40static void init_request_from_bio(struct request *req, struct bio *bio); 40static void init_request_from_bio(struct request *req, struct bio *bio);
41static int __make_request(request_queue_t *q, struct bio *bio); 41static int __make_request(request_queue_t *q, struct bio *bio);
42static struct io_context *current_io_context(gfp_t gfp_flags, int node);
42 43
43/* 44/*
44 * For the allocated request tables 45 * For the allocated request tables
@@ -277,19 +278,19 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
277 278
278EXPORT_SYMBOL(blk_queue_make_request); 279EXPORT_SYMBOL(blk_queue_make_request);
279 280
280static inline void rq_init(request_queue_t *q, struct request *rq) 281static void rq_init(request_queue_t *q, struct request *rq)
281{ 282{
282 INIT_LIST_HEAD(&rq->queuelist); 283 INIT_LIST_HEAD(&rq->queuelist);
283 INIT_LIST_HEAD(&rq->donelist); 284 INIT_LIST_HEAD(&rq->donelist);
284 285
285 rq->errors = 0; 286 rq->errors = 0;
286 rq->rq_status = RQ_ACTIVE;
287 rq->bio = rq->biotail = NULL; 287 rq->bio = rq->biotail = NULL;
288 INIT_HLIST_NODE(&rq->hash);
289 RB_CLEAR_NODE(&rq->rb_node);
288 rq->ioprio = 0; 290 rq->ioprio = 0;
289 rq->buffer = NULL; 291 rq->buffer = NULL;
290 rq->ref_count = 1; 292 rq->ref_count = 1;
291 rq->q = q; 293 rq->q = q;
292 rq->waiting = NULL;
293 rq->special = NULL; 294 rq->special = NULL;
294 rq->data_len = 0; 295 rq->data_len = 0;
295 rq->data = NULL; 296 rq->data = NULL;
@@ -382,8 +383,8 @@ unsigned blk_ordered_req_seq(struct request *rq)
382 if (rq == &q->post_flush_rq) 383 if (rq == &q->post_flush_rq)
383 return QUEUE_ORDSEQ_POSTFLUSH; 384 return QUEUE_ORDSEQ_POSTFLUSH;
384 385
385 if ((rq->flags & REQ_ORDERED_COLOR) == 386 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
386 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) 387 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
387 return QUEUE_ORDSEQ_DRAIN; 388 return QUEUE_ORDSEQ_DRAIN;
388 else 389 else
389 return QUEUE_ORDSEQ_DONE; 390 return QUEUE_ORDSEQ_DONE;
@@ -446,11 +447,11 @@ static void queue_flush(request_queue_t *q, unsigned which)
446 end_io = post_flush_end_io; 447 end_io = post_flush_end_io;
447 } 448 }
448 449
450 rq->cmd_flags = REQ_HARDBARRIER;
449 rq_init(q, rq); 451 rq_init(q, rq);
450 rq->flags = REQ_HARDBARRIER;
451 rq->elevator_private = NULL; 452 rq->elevator_private = NULL;
453 rq->elevator_private2 = NULL;
452 rq->rq_disk = q->bar_rq.rq_disk; 454 rq->rq_disk = q->bar_rq.rq_disk;
453 rq->rl = NULL;
454 rq->end_io = end_io; 455 rq->end_io = end_io;
455 q->prepare_flush_fn(q, rq); 456 q->prepare_flush_fn(q, rq);
456 457
@@ -471,11 +472,13 @@ static inline struct request *start_ordered(request_queue_t *q,
471 blkdev_dequeue_request(rq); 472 blkdev_dequeue_request(rq);
472 q->orig_bar_rq = rq; 473 q->orig_bar_rq = rq;
473 rq = &q->bar_rq; 474 rq = &q->bar_rq;
475 rq->cmd_flags = 0;
474 rq_init(q, rq); 476 rq_init(q, rq);
475 rq->flags = bio_data_dir(q->orig_bar_rq->bio); 477 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
476 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; 478 rq->cmd_flags |= REQ_RW;
479 rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
477 rq->elevator_private = NULL; 480 rq->elevator_private = NULL;
478 rq->rl = NULL; 481 rq->elevator_private2 = NULL;
479 init_request_from_bio(rq, q->orig_bar_rq->bio); 482 init_request_from_bio(rq, q->orig_bar_rq->bio);
480 rq->end_io = bar_end_io; 483 rq->end_io = bar_end_io;
481 484
@@ -587,8 +590,8 @@ static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
587 return 0; 590 return 0;
588} 591}
589 592
590static inline int ordered_bio_endio(struct request *rq, struct bio *bio, 593static int ordered_bio_endio(struct request *rq, struct bio *bio,
591 unsigned int nbytes, int error) 594 unsigned int nbytes, int error)
592{ 595{
593 request_queue_t *q = rq->q; 596 request_queue_t *q = rq->q;
594 bio_end_io_t *endio; 597 bio_end_io_t *endio;
@@ -1124,7 +1127,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
1124 } 1127 }
1125 1128
1126 list_del_init(&rq->queuelist); 1129 list_del_init(&rq->queuelist);
1127 rq->flags &= ~REQ_QUEUED; 1130 rq->cmd_flags &= ~REQ_QUEUED;
1128 rq->tag = -1; 1131 rq->tag = -1;
1129 1132
1130 if (unlikely(bqt->tag_index[tag] == NULL)) 1133 if (unlikely(bqt->tag_index[tag] == NULL))
@@ -1160,7 +1163,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
1160 struct blk_queue_tag *bqt = q->queue_tags; 1163 struct blk_queue_tag *bqt = q->queue_tags;
1161 int tag; 1164 int tag;
1162 1165
1163 if (unlikely((rq->flags & REQ_QUEUED))) { 1166 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
1164 printk(KERN_ERR 1167 printk(KERN_ERR
1165 "%s: request %p for device [%s] already tagged %d", 1168 "%s: request %p for device [%s] already tagged %d",
1166 __FUNCTION__, rq, 1169 __FUNCTION__, rq,
@@ -1168,13 +1171,18 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
1168 BUG(); 1171 BUG();
1169 } 1172 }
1170 1173
1171 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); 1174 /*
1172 if (tag >= bqt->max_depth) 1175 * Protect against shared tag maps, as we may not have exclusive
1173 return 1; 1176 * access to the tag map.
1177 */
1178 do {
1179 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
1180 if (tag >= bqt->max_depth)
1181 return 1;
1174 1182
1175 __set_bit(tag, bqt->tag_map); 1183 } while (test_and_set_bit(tag, bqt->tag_map));
1176 1184
1177 rq->flags |= REQ_QUEUED; 1185 rq->cmd_flags |= REQ_QUEUED;
1178 rq->tag = tag; 1186 rq->tag = tag;
1179 bqt->tag_index[tag] = rq; 1187 bqt->tag_index[tag] = rq;
1180 blkdev_dequeue_request(rq); 1188 blkdev_dequeue_request(rq);
@@ -1210,65 +1218,31 @@ void blk_queue_invalidate_tags(request_queue_t *q)
1210 printk(KERN_ERR 1218 printk(KERN_ERR
1211 "%s: bad tag found on list\n", __FUNCTION__); 1219 "%s: bad tag found on list\n", __FUNCTION__);
1212 list_del_init(&rq->queuelist); 1220 list_del_init(&rq->queuelist);
1213 rq->flags &= ~REQ_QUEUED; 1221 rq->cmd_flags &= ~REQ_QUEUED;
1214 } else 1222 } else
1215 blk_queue_end_tag(q, rq); 1223 blk_queue_end_tag(q, rq);
1216 1224
1217 rq->flags &= ~REQ_STARTED; 1225 rq->cmd_flags &= ~REQ_STARTED;
1218 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); 1226 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1219 } 1227 }
1220} 1228}
1221 1229
1222EXPORT_SYMBOL(blk_queue_invalidate_tags); 1230EXPORT_SYMBOL(blk_queue_invalidate_tags);
1223 1231
1224static const char * const rq_flags[] = {
1225 "REQ_RW",
1226 "REQ_FAILFAST",
1227 "REQ_SORTED",
1228 "REQ_SOFTBARRIER",
1229 "REQ_HARDBARRIER",
1230 "REQ_FUA",
1231 "REQ_CMD",
1232 "REQ_NOMERGE",
1233 "REQ_STARTED",
1234 "REQ_DONTPREP",
1235 "REQ_QUEUED",
1236 "REQ_ELVPRIV",
1237 "REQ_PC",
1238 "REQ_BLOCK_PC",
1239 "REQ_SENSE",
1240 "REQ_FAILED",
1241 "REQ_QUIET",
1242 "REQ_SPECIAL",
1243 "REQ_DRIVE_CMD",
1244 "REQ_DRIVE_TASK",
1245 "REQ_DRIVE_TASKFILE",
1246 "REQ_PREEMPT",
1247 "REQ_PM_SUSPEND",
1248 "REQ_PM_RESUME",
1249 "REQ_PM_SHUTDOWN",
1250 "REQ_ORDERED_COLOR",
1251};
1252
1253void blk_dump_rq_flags(struct request *rq, char *msg) 1232void blk_dump_rq_flags(struct request *rq, char *msg)
1254{ 1233{
1255 int bit; 1234 int bit;
1256 1235
1257 printk("%s: dev %s: flags = ", msg, 1236 printk("%s: dev %s: type=%x, flags=%x\n", msg,
1258 rq->rq_disk ? rq->rq_disk->disk_name : "?"); 1237 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
1259 bit = 0; 1238 rq->cmd_flags);
1260 do {
1261 if (rq->flags & (1 << bit))
1262 printk("%s ", rq_flags[bit]);
1263 bit++;
1264 } while (bit < __REQ_NR_BITS);
1265 1239
1266 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, 1240 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1267 rq->nr_sectors, 1241 rq->nr_sectors,
1268 rq->current_nr_sectors); 1242 rq->current_nr_sectors);
1269 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); 1243 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1270 1244
1271 if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) { 1245 if (blk_pc_request(rq)) {
1272 printk("cdb: "); 1246 printk("cdb: ");
1273 for (bit = 0; bit < sizeof(rq->cmd); bit++) 1247 for (bit = 0; bit < sizeof(rq->cmd); bit++)
1274 printk("%02x ", rq->cmd[bit]); 1248 printk("%02x ", rq->cmd[bit]);
@@ -1441,7 +1415,7 @@ static inline int ll_new_mergeable(request_queue_t *q,
1441 int nr_phys_segs = bio_phys_segments(q, bio); 1415 int nr_phys_segs = bio_phys_segments(q, bio);
1442 1416
1443 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1417 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1444 req->flags |= REQ_NOMERGE; 1418 req->cmd_flags |= REQ_NOMERGE;
1445 if (req == q->last_merge) 1419 if (req == q->last_merge)
1446 q->last_merge = NULL; 1420 q->last_merge = NULL;
1447 return 0; 1421 return 0;
@@ -1464,7 +1438,7 @@ static inline int ll_new_hw_segment(request_queue_t *q,
1464 1438
1465 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments 1439 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1466 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1440 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1467 req->flags |= REQ_NOMERGE; 1441 req->cmd_flags |= REQ_NOMERGE;
1468 if (req == q->last_merge) 1442 if (req == q->last_merge)
1469 q->last_merge = NULL; 1443 q->last_merge = NULL;
1470 return 0; 1444 return 0;
@@ -1491,7 +1465,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1491 max_sectors = q->max_sectors; 1465 max_sectors = q->max_sectors;
1492 1466
1493 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1467 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1494 req->flags |= REQ_NOMERGE; 1468 req->cmd_flags |= REQ_NOMERGE;
1495 if (req == q->last_merge) 1469 if (req == q->last_merge)
1496 q->last_merge = NULL; 1470 q->last_merge = NULL;
1497 return 0; 1471 return 0;
@@ -1530,7 +1504,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1530 1504
1531 1505
1532 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1506 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1533 req->flags |= REQ_NOMERGE; 1507 req->cmd_flags |= REQ_NOMERGE;
1534 if (req == q->last_merge) 1508 if (req == q->last_merge)
1535 q->last_merge = NULL; 1509 q->last_merge = NULL;
1536 return 0; 1510 return 0;
@@ -1847,8 +1821,7 @@ static void blk_release_queue(struct kobject *kobj)
1847 if (q->queue_tags) 1821 if (q->queue_tags)
1848 __blk_queue_free_tags(q); 1822 __blk_queue_free_tags(q);
1849 1823
1850 if (q->blk_trace) 1824 blk_trace_shutdown(q);
1851 blk_trace_shutdown(q);
1852 1825
1853 kmem_cache_free(requestq_cachep, q); 1826 kmem_cache_free(requestq_cachep, q);
1854} 1827}
@@ -2030,14 +2003,13 @@ EXPORT_SYMBOL(blk_get_queue);
2030 2003
2031static inline void blk_free_request(request_queue_t *q, struct request *rq) 2004static inline void blk_free_request(request_queue_t *q, struct request *rq)
2032{ 2005{
2033 if (rq->flags & REQ_ELVPRIV) 2006 if (rq->cmd_flags & REQ_ELVPRIV)
2034 elv_put_request(q, rq); 2007 elv_put_request(q, rq);
2035 mempool_free(rq, q->rq.rq_pool); 2008 mempool_free(rq, q->rq.rq_pool);
2036} 2009}
2037 2010
2038static inline struct request * 2011static struct request *
2039blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, 2012blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask)
2040 int priv, gfp_t gfp_mask)
2041{ 2013{
2042 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 2014 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
2043 2015
@@ -2045,17 +2017,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
2045 return NULL; 2017 return NULL;
2046 2018
2047 /* 2019 /*
2048 * first three bits are identical in rq->flags and bio->bi_rw, 2020 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
2049 * see bio.h and blkdev.h 2021 * see bio.h and blkdev.h
2050 */ 2022 */
2051 rq->flags = rw; 2023 rq->cmd_flags = rw | REQ_ALLOCED;
2052 2024
2053 if (priv) { 2025 if (priv) {
2054 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { 2026 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
2055 mempool_free(rq, q->rq.rq_pool); 2027 mempool_free(rq, q->rq.rq_pool);
2056 return NULL; 2028 return NULL;
2057 } 2029 }
2058 rq->flags |= REQ_ELVPRIV; 2030 rq->cmd_flags |= REQ_ELVPRIV;
2059 } 2031 }
2060 2032
2061 return rq; 2033 return rq;
@@ -2142,13 +2114,13 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
2142 struct io_context *ioc = NULL; 2114 struct io_context *ioc = NULL;
2143 int may_queue, priv; 2115 int may_queue, priv;
2144 2116
2145 may_queue = elv_may_queue(q, rw, bio); 2117 may_queue = elv_may_queue(q, rw);
2146 if (may_queue == ELV_MQUEUE_NO) 2118 if (may_queue == ELV_MQUEUE_NO)
2147 goto rq_starved; 2119 goto rq_starved;
2148 2120
2149 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { 2121 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
2150 if (rl->count[rw]+1 >= q->nr_requests) { 2122 if (rl->count[rw]+1 >= q->nr_requests) {
2151 ioc = current_io_context(GFP_ATOMIC); 2123 ioc = current_io_context(GFP_ATOMIC, q->node);
2152 /* 2124 /*
2153 * The queue will fill after this allocation, so set 2125 * The queue will fill after this allocation, so set
2154 * it as full, and mark this process as "batching". 2126 * it as full, and mark this process as "batching".
@@ -2190,7 +2162,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
2190 2162
2191 spin_unlock_irq(q->queue_lock); 2163 spin_unlock_irq(q->queue_lock);
2192 2164
2193 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); 2165 rq = blk_alloc_request(q, rw, priv, gfp_mask);
2194 if (unlikely(!rq)) { 2166 if (unlikely(!rq)) {
2195 /* 2167 /*
2196 * Allocation failed presumably due to memory. Undo anything 2168 * Allocation failed presumably due to memory. Undo anything
@@ -2226,7 +2198,6 @@ rq_starved:
2226 ioc->nr_batch_requests--; 2198 ioc->nr_batch_requests--;
2227 2199
2228 rq_init(q, rq); 2200 rq_init(q, rq);
2229 rq->rl = rl;
2230 2201
2231 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 2202 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
2232out: 2203out:
@@ -2269,7 +2240,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
2269 * up to a big batch of them for a small period time. 2240 * up to a big batch of them for a small period time.
2270 * See ioc_batching, ioc_set_batching 2241 * See ioc_batching, ioc_set_batching
2271 */ 2242 */
2272 ioc = current_io_context(GFP_NOIO); 2243 ioc = current_io_context(GFP_NOIO, q->node);
2273 ioc_set_batching(q, ioc); 2244 ioc_set_batching(q, ioc);
2274 2245
2275 spin_lock_irq(q->queue_lock); 2246 spin_lock_irq(q->queue_lock);
@@ -2301,6 +2272,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
2301EXPORT_SYMBOL(blk_get_request); 2272EXPORT_SYMBOL(blk_get_request);
2302 2273
2303/** 2274/**
2275 * blk_start_queueing - initiate dispatch of requests to device
2276 * @q: request queue to kick into gear
2277 *
2278 * This is basically a helper to remove the need to know whether a queue
2279 * is plugged or not if someone just wants to initiate dispatch of requests
2280 * for this queue.
2281 *
2282 * The queue lock must be held with interrupts disabled.
2283 */
2284void blk_start_queueing(request_queue_t *q)
2285{
2286 if (!blk_queue_plugged(q))
2287 q->request_fn(q);
2288 else
2289 __generic_unplug_device(q);
2290}
2291EXPORT_SYMBOL(blk_start_queueing);
2292
2293/**
2304 * blk_requeue_request - put a request back on queue 2294 * blk_requeue_request - put a request back on queue
2305 * @q: request queue where request should be inserted 2295 * @q: request queue where request should be inserted
2306 * @rq: request to be inserted 2296 * @rq: request to be inserted
@@ -2352,7 +2342,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
2352 * must not attempt merges on this) and that it acts as a soft 2342 * must not attempt merges on this) and that it acts as a soft
2353 * barrier 2343 * barrier
2354 */ 2344 */
2355 rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER; 2345 rq->cmd_type = REQ_TYPE_SPECIAL;
2346 rq->cmd_flags |= REQ_SOFTBARRIER;
2356 2347
2357 rq->special = data; 2348 rq->special = data;
2358 2349
@@ -2366,11 +2357,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
2366 2357
2367 drive_stat_acct(rq, rq->nr_sectors, 1); 2358 drive_stat_acct(rq, rq->nr_sectors, 1);
2368 __elv_add_request(q, rq, where, 0); 2359 __elv_add_request(q, rq, where, 0);
2369 2360 blk_start_queueing(q);
2370 if (blk_queue_plugged(q))
2371 __generic_unplug_device(q);
2372 else
2373 q->request_fn(q);
2374 spin_unlock_irqrestore(q->queue_lock, flags); 2361 spin_unlock_irqrestore(q->queue_lock, flags);
2375} 2362}
2376 2363
@@ -2559,7 +2546,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2559 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2546 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2560 2547
2561 rq->rq_disk = bd_disk; 2548 rq->rq_disk = bd_disk;
2562 rq->flags |= REQ_NOMERGE; 2549 rq->cmd_flags |= REQ_NOMERGE;
2563 rq->end_io = done; 2550 rq->end_io = done;
2564 WARN_ON(irqs_disabled()); 2551 WARN_ON(irqs_disabled());
2565 spin_lock_irq(q->queue_lock); 2552 spin_lock_irq(q->queue_lock);
@@ -2599,10 +2586,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2599 rq->sense_len = 0; 2586 rq->sense_len = 0;
2600 } 2587 }
2601 2588
2602 rq->waiting = &wait; 2589 rq->end_io_data = &wait;
2603 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); 2590 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2604 wait_for_completion(&wait); 2591 wait_for_completion(&wait);
2605 rq->waiting = NULL;
2606 2592
2607 if (rq->errors) 2593 if (rq->errors)
2608 err = -EIO; 2594 err = -EIO;
@@ -2711,8 +2697,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats);
2711 */ 2697 */
2712void __blk_put_request(request_queue_t *q, struct request *req) 2698void __blk_put_request(request_queue_t *q, struct request *req)
2713{ 2699{
2714 struct request_list *rl = req->rl;
2715
2716 if (unlikely(!q)) 2700 if (unlikely(!q))
2717 return; 2701 return;
2718 if (unlikely(--req->ref_count)) 2702 if (unlikely(--req->ref_count))
@@ -2720,18 +2704,16 @@ void __blk_put_request(request_queue_t *q, struct request *req)
2720 2704
2721 elv_completed_request(q, req); 2705 elv_completed_request(q, req);
2722 2706
2723 req->rq_status = RQ_INACTIVE;
2724 req->rl = NULL;
2725
2726 /* 2707 /*
2727 * Request may not have originated from ll_rw_blk. if not, 2708 * Request may not have originated from ll_rw_blk. if not,
2728 * it didn't come out of our reserved rq pools 2709 * it didn't come out of our reserved rq pools
2729 */ 2710 */
2730 if (rl) { 2711 if (req->cmd_flags & REQ_ALLOCED) {
2731 int rw = rq_data_dir(req); 2712 int rw = rq_data_dir(req);
2732 int priv = req->flags & REQ_ELVPRIV; 2713 int priv = req->cmd_flags & REQ_ELVPRIV;
2733 2714
2734 BUG_ON(!list_empty(&req->queuelist)); 2715 BUG_ON(!list_empty(&req->queuelist));
2716 BUG_ON(!hlist_unhashed(&req->hash));
2735 2717
2736 blk_free_request(q, req); 2718 blk_free_request(q, req);
2737 freed_request(q, rw, priv); 2719 freed_request(q, rw, priv);
@@ -2765,9 +2747,9 @@ EXPORT_SYMBOL(blk_put_request);
2765 */ 2747 */
2766void blk_end_sync_rq(struct request *rq, int error) 2748void blk_end_sync_rq(struct request *rq, int error)
2767{ 2749{
2768 struct completion *waiting = rq->waiting; 2750 struct completion *waiting = rq->end_io_data;
2769 2751
2770 rq->waiting = NULL; 2752 rq->end_io_data = NULL;
2771 __blk_put_request(rq->q, rq); 2753 __blk_put_request(rq->q, rq);
2772 2754
2773 /* 2755 /*
@@ -2830,7 +2812,7 @@ static int attempt_merge(request_queue_t *q, struct request *req,
2830 2812
2831 if (rq_data_dir(req) != rq_data_dir(next) 2813 if (rq_data_dir(req) != rq_data_dir(next)
2832 || req->rq_disk != next->rq_disk 2814 || req->rq_disk != next->rq_disk
2833 || next->waiting || next->special) 2815 || next->special)
2834 return 0; 2816 return 0;
2835 2817
2836 /* 2818 /*
@@ -2891,22 +2873,24 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2891 2873
2892static void init_request_from_bio(struct request *req, struct bio *bio) 2874static void init_request_from_bio(struct request *req, struct bio *bio)
2893{ 2875{
2894 req->flags |= REQ_CMD; 2876 req->cmd_type = REQ_TYPE_FS;
2895 2877
2896 /* 2878 /*
2897 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 2879 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2898 */ 2880 */
2899 if (bio_rw_ahead(bio) || bio_failfast(bio)) 2881 if (bio_rw_ahead(bio) || bio_failfast(bio))
2900 req->flags |= REQ_FAILFAST; 2882 req->cmd_flags |= REQ_FAILFAST;
2901 2883
2902 /* 2884 /*
2903 * REQ_BARRIER implies no merging, but lets make it explicit 2885 * REQ_BARRIER implies no merging, but lets make it explicit
2904 */ 2886 */
2905 if (unlikely(bio_barrier(bio))) 2887 if (unlikely(bio_barrier(bio)))
2906 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 2888 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2907 2889
2908 if (bio_sync(bio)) 2890 if (bio_sync(bio))
2909 req->flags |= REQ_RW_SYNC; 2891 req->cmd_flags |= REQ_RW_SYNC;
2892 if (bio_rw_meta(bio))
2893 req->cmd_flags |= REQ_RW_META;
2910 2894
2911 req->errors = 0; 2895 req->errors = 0;
2912 req->hard_sector = req->sector = bio->bi_sector; 2896 req->hard_sector = req->sector = bio->bi_sector;
@@ -2915,7 +2899,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
2915 req->nr_phys_segments = bio_phys_segments(req->q, bio); 2899 req->nr_phys_segments = bio_phys_segments(req->q, bio);
2916 req->nr_hw_segments = bio_hw_segments(req->q, bio); 2900 req->nr_hw_segments = bio_hw_segments(req->q, bio);
2917 req->buffer = bio_data(bio); /* see ->buffer comment above */ 2901 req->buffer = bio_data(bio); /* see ->buffer comment above */
2918 req->waiting = NULL;
2919 req->bio = req->biotail = bio; 2902 req->bio = req->biotail = bio;
2920 req->ioprio = bio_prio(bio); 2903 req->ioprio = bio_prio(bio);
2921 req->rq_disk = bio->bi_bdev->bd_disk; 2904 req->rq_disk = bio->bi_bdev->bd_disk;
@@ -2925,17 +2908,11 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
2925static int __make_request(request_queue_t *q, struct bio *bio) 2908static int __make_request(request_queue_t *q, struct bio *bio)
2926{ 2909{
2927 struct request *req; 2910 struct request *req;
2928 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; 2911 int el_ret, nr_sectors, barrier, err;
2929 unsigned short prio; 2912 const unsigned short prio = bio_prio(bio);
2930 sector_t sector; 2913 const int sync = bio_sync(bio);
2931 2914
2932 sector = bio->bi_sector;
2933 nr_sectors = bio_sectors(bio); 2915 nr_sectors = bio_sectors(bio);
2934 cur_nr_sectors = bio_cur_sectors(bio);
2935 prio = bio_prio(bio);
2936
2937 rw = bio_data_dir(bio);
2938 sync = bio_sync(bio);
2939 2916
2940 /* 2917 /*
2941 * low level driver can indicate that it wants pages above a 2918 * low level driver can indicate that it wants pages above a
@@ -2944,8 +2921,6 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2944 */ 2921 */
2945 blk_queue_bounce(q, &bio); 2922 blk_queue_bounce(q, &bio);
2946 2923
2947 spin_lock_prefetch(q->queue_lock);
2948
2949 barrier = bio_barrier(bio); 2924 barrier = bio_barrier(bio);
2950 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { 2925 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2951 err = -EOPNOTSUPP; 2926 err = -EOPNOTSUPP;
@@ -2973,7 +2948,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2973 req->ioprio = ioprio_best(req->ioprio, prio); 2948 req->ioprio = ioprio_best(req->ioprio, prio);
2974 drive_stat_acct(req, nr_sectors, 0); 2949 drive_stat_acct(req, nr_sectors, 0);
2975 if (!attempt_back_merge(q, req)) 2950 if (!attempt_back_merge(q, req))
2976 elv_merged_request(q, req); 2951 elv_merged_request(q, req, el_ret);
2977 goto out; 2952 goto out;
2978 2953
2979 case ELEVATOR_FRONT_MERGE: 2954 case ELEVATOR_FRONT_MERGE:
@@ -2993,14 +2968,14 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2993 * not touch req->buffer either... 2968 * not touch req->buffer either...
2994 */ 2969 */
2995 req->buffer = bio_data(bio); 2970 req->buffer = bio_data(bio);
2996 req->current_nr_sectors = cur_nr_sectors; 2971 req->current_nr_sectors = bio_cur_sectors(bio);
2997 req->hard_cur_sectors = cur_nr_sectors; 2972 req->hard_cur_sectors = req->current_nr_sectors;
2998 req->sector = req->hard_sector = sector; 2973 req->sector = req->hard_sector = bio->bi_sector;
2999 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2974 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
3000 req->ioprio = ioprio_best(req->ioprio, prio); 2975 req->ioprio = ioprio_best(req->ioprio, prio);
3001 drive_stat_acct(req, nr_sectors, 0); 2976 drive_stat_acct(req, nr_sectors, 0);
3002 if (!attempt_front_merge(q, req)) 2977 if (!attempt_front_merge(q, req))
3003 elv_merged_request(q, req); 2978 elv_merged_request(q, req, el_ret);
3004 goto out; 2979 goto out;
3005 2980
3006 /* ELV_NO_MERGE: elevator says don't/can't merge. */ 2981 /* ELV_NO_MERGE: elevator says don't/can't merge. */
@@ -3013,7 +2988,7 @@ get_rq:
3013 * Grab a free request. This is might sleep but can not fail. 2988 * Grab a free request. This is might sleep but can not fail.
3014 * Returns with the queue unlocked. 2989 * Returns with the queue unlocked.
3015 */ 2990 */
3016 req = get_request_wait(q, rw, bio); 2991 req = get_request_wait(q, bio_data_dir(bio), bio);
3017 2992
3018 /* 2993 /*
3019 * After dropping the lock and possibly sleeping here, our request 2994 * After dropping the lock and possibly sleeping here, our request
@@ -3307,7 +3282,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
3307 req->errors = 0; 3282 req->errors = 0;
3308 3283
3309 if (!uptodate) { 3284 if (!uptodate) {
3310 if (blk_fs_request(req) && !(req->flags & REQ_QUIET)) 3285 if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
3311 printk("end_request: I/O error, dev %s, sector %llu\n", 3286 printk("end_request: I/O error, dev %s, sector %llu\n",
3312 req->rq_disk ? req->rq_disk->disk_name : "?", 3287 req->rq_disk ? req->rq_disk->disk_name : "?",
3313 (unsigned long long)req->sector); 3288 (unsigned long long)req->sector);
@@ -3570,8 +3545,8 @@ EXPORT_SYMBOL(end_request);
3570 3545
3571void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) 3546void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
3572{ 3547{
3573 /* first two bits are identical in rq->flags and bio->bi_rw */ 3548 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */
3574 rq->flags |= (bio->bi_rw & 3); 3549 rq->cmd_flags |= (bio->bi_rw & 3);
3575 3550
3576 rq->nr_phys_segments = bio_phys_segments(q, bio); 3551 rq->nr_phys_segments = bio_phys_segments(q, bio);
3577 rq->nr_hw_segments = bio_hw_segments(q, bio); 3552 rq->nr_hw_segments = bio_hw_segments(q, bio);
@@ -3659,25 +3634,22 @@ EXPORT_SYMBOL(put_io_context);
3659/* Called by the exitting task */ 3634/* Called by the exitting task */
3660void exit_io_context(void) 3635void exit_io_context(void)
3661{ 3636{
3662 unsigned long flags;
3663 struct io_context *ioc; 3637 struct io_context *ioc;
3664 struct cfq_io_context *cic; 3638 struct cfq_io_context *cic;
3665 3639
3666 local_irq_save(flags);
3667 task_lock(current); 3640 task_lock(current);
3668 ioc = current->io_context; 3641 ioc = current->io_context;
3669 current->io_context = NULL; 3642 current->io_context = NULL;
3670 ioc->task = NULL;
3671 task_unlock(current); 3643 task_unlock(current);
3672 local_irq_restore(flags);
3673 3644
3645 ioc->task = NULL;
3674 if (ioc->aic && ioc->aic->exit) 3646 if (ioc->aic && ioc->aic->exit)
3675 ioc->aic->exit(ioc->aic); 3647 ioc->aic->exit(ioc->aic);
3676 if (ioc->cic_root.rb_node != NULL) { 3648 if (ioc->cic_root.rb_node != NULL) {
3677 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); 3649 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
3678 cic->exit(ioc); 3650 cic->exit(ioc);
3679 } 3651 }
3680 3652
3681 put_io_context(ioc); 3653 put_io_context(ioc);
3682} 3654}
3683 3655
@@ -3689,7 +3661,7 @@ void exit_io_context(void)
3689 * but since the current task itself holds a reference, the context can be 3661 * but since the current task itself holds a reference, the context can be
3690 * used in general code, so long as it stays within `current` context. 3662 * used in general code, so long as it stays within `current` context.
3691 */ 3663 */
3692struct io_context *current_io_context(gfp_t gfp_flags) 3664static struct io_context *current_io_context(gfp_t gfp_flags, int node)
3693{ 3665{
3694 struct task_struct *tsk = current; 3666 struct task_struct *tsk = current;
3695 struct io_context *ret; 3667 struct io_context *ret;
@@ -3698,11 +3670,11 @@ struct io_context *current_io_context(gfp_t gfp_flags)
3698 if (likely(ret)) 3670 if (likely(ret))
3699 return ret; 3671 return ret;
3700 3672
3701 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); 3673 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
3702 if (ret) { 3674 if (ret) {
3703 atomic_set(&ret->refcount, 1); 3675 atomic_set(&ret->refcount, 1);
3704 ret->task = current; 3676 ret->task = current;
3705 ret->set_ioprio = NULL; 3677 ret->ioprio_changed = 0;
3706 ret->last_waited = jiffies; /* doesn't matter... */ 3678 ret->last_waited = jiffies; /* doesn't matter... */
3707 ret->nr_batch_requests = 0; /* because this is 0 */ 3679 ret->nr_batch_requests = 0; /* because this is 0 */
3708 ret->aic = NULL; 3680 ret->aic = NULL;
@@ -3722,10 +3694,10 @@ EXPORT_SYMBOL(current_io_context);
3722 * 3694 *
3723 * This is always called in the context of the task which submitted the I/O. 3695 * This is always called in the context of the task which submitted the I/O.
3724 */ 3696 */
3725struct io_context *get_io_context(gfp_t gfp_flags) 3697struct io_context *get_io_context(gfp_t gfp_flags, int node)
3726{ 3698{
3727 struct io_context *ret; 3699 struct io_context *ret;
3728 ret = current_io_context(gfp_flags); 3700 ret = current_io_context(gfp_flags, node);
3729 if (likely(ret)) 3701 if (likely(ret))
3730 atomic_inc(&ret->refcount); 3702 atomic_inc(&ret->refcount);
3731 return ret; 3703 return ret;
@@ -3838,9 +3810,6 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
3838 ssize_t ret = queue_var_store(&ra_kb, page, count); 3810 ssize_t ret = queue_var_store(&ra_kb, page, count);
3839 3811
3840 spin_lock_irq(q->queue_lock); 3812 spin_lock_irq(q->queue_lock);
3841 if (ra_kb > (q->max_sectors >> 1))
3842 ra_kb = (q->max_sectors >> 1);
3843
3844 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 3813 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
3845 spin_unlock_irq(q->queue_lock); 3814 spin_unlock_irq(q->queue_lock);
3846 3815
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 56a7c620574f..79af43179421 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e)
69{ 69{
70 struct noop_data *nd; 70 struct noop_data *nd;
71 71
72 nd = kmalloc(sizeof(*nd), GFP_KERNEL); 72 nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
73 if (!nd) 73 if (!nd)
74 return NULL; 74 return NULL;
75 INIT_LIST_HEAD(&nd->queue); 75 INIT_LIST_HEAD(&nd->queue);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index b33eda26e205..2dc326421a24 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q,
294 rq->sense = sense; 294 rq->sense = sense;
295 rq->sense_len = 0; 295 rq->sense_len = 0;
296 296
297 rq->flags |= REQ_BLOCK_PC; 297 rq->cmd_type = REQ_TYPE_BLOCK_PC;
298 bio = rq->bio; 298 bio = rq->bio;
299 299
300 /* 300 /*
@@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
470 memset(sense, 0, sizeof(sense)); 470 memset(sense, 0, sizeof(sense));
471 rq->sense = sense; 471 rq->sense = sense;
472 rq->sense_len = 0; 472 rq->sense_len = 0;
473 rq->flags |= REQ_BLOCK_PC; 473 rq->cmd_type = REQ_TYPE_BLOCK_PC;
474 474
475 blk_execute_rq(q, disk, rq, 0); 475 blk_execute_rq(q, disk, rq, 0);
476 476
@@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c
502 int err; 502 int err;
503 503
504 rq = blk_get_request(q, WRITE, __GFP_WAIT); 504 rq = blk_get_request(q, WRITE, __GFP_WAIT);
505 rq->flags |= REQ_BLOCK_PC; 505 rq->cmd_type = REQ_TYPE_BLOCK_PC;
506 rq->data = NULL; 506 rq->data = NULL;
507 rq->data_len = 0; 507 rq->data_len = 0;
508 rq->timeout = BLK_DEFAULT_TIMEOUT; 508 rq->timeout = BLK_DEFAULT_TIMEOUT;