aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig1
-rw-r--r--block/cfq-iosched.c357
-rw-r--r--block/genhd.c6
-rw-r--r--block/ll_rw_blk.c39
4 files changed, 221 insertions, 182 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 43ca070dc0f8..5536839886ff 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -13,6 +13,7 @@ config LBD
13 13
14config BLK_DEV_IO_TRACE 14config BLK_DEV_IO_TRACE
15 bool "Support for tracing block io actions" 15 bool "Support for tracing block io actions"
16 depends on SYSFS
16 select RELAY 17 select RELAY
17 select DEBUG_FS 18 select DEBUG_FS
18 help 19 help
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index bde40a6ae665..67d446de0227 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -26,18 +26,12 @@ static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
26static const int cfq_slice_sync = HZ / 10; 26static const int cfq_slice_sync = HZ / 10;
27static int cfq_slice_async = HZ / 25; 27static int cfq_slice_async = HZ / 25;
28static const int cfq_slice_async_rq = 2; 28static const int cfq_slice_async_rq = 2;
29static int cfq_slice_idle = HZ / 100; 29static int cfq_slice_idle = HZ / 70;
30 30
31#define CFQ_IDLE_GRACE (HZ / 10) 31#define CFQ_IDLE_GRACE (HZ / 10)
32#define CFQ_SLICE_SCALE (5) 32#define CFQ_SLICE_SCALE (5)
33 33
34#define CFQ_KEY_ASYNC (0) 34#define CFQ_KEY_ASYNC (0)
35#define CFQ_KEY_ANY (0xffff)
36
37/*
38 * disable queueing at the driver/hardware level
39 */
40static const int cfq_max_depth = 2;
41 35
42static DEFINE_RWLOCK(cfq_exit_lock); 36static DEFINE_RWLOCK(cfq_exit_lock);
43 37
@@ -102,6 +96,8 @@ static struct completion *ioc_gone;
102#define cfq_cfqq_sync(cfqq) \ 96#define cfq_cfqq_sync(cfqq) \
103 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) 97 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
104 98
99#define sample_valid(samples) ((samples) > 80)
100
105/* 101/*
106 * Per block device queue structure 102 * Per block device queue structure
107 */ 103 */
@@ -170,7 +166,6 @@ struct cfq_data {
170 unsigned int cfq_slice[2]; 166 unsigned int cfq_slice[2];
171 unsigned int cfq_slice_async_rq; 167 unsigned int cfq_slice_async_rq;
172 unsigned int cfq_slice_idle; 168 unsigned int cfq_slice_idle;
173 unsigned int cfq_max_depth;
174 169
175 struct list_head cic_list; 170 struct list_head cic_list;
176}; 171};
@@ -343,17 +338,27 @@ static int cfq_queue_empty(request_queue_t *q)
343 return !cfqd->busy_queues; 338 return !cfqd->busy_queues;
344} 339}
345 340
341static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
342{
343 if (rw == READ || process_sync(task))
344 return task->pid;
345
346 return CFQ_KEY_ASYNC;
347}
348
346/* 349/*
347 * Lifted from AS - choose which of crq1 and crq2 that is best served now. 350 * Lifted from AS - choose which of crq1 and crq2 that is best served now.
348 * We choose the request that is closest to the head right now. Distance 351 * We choose the request that is closest to the head right now. Distance
349 * behind the head are penalized and only allowed to a certain extent. 352 * behind the head is penalized and only allowed to a certain extent.
350 */ 353 */
351static struct cfq_rq * 354static struct cfq_rq *
352cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) 355cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
353{ 356{
354 sector_t last, s1, s2, d1 = 0, d2 = 0; 357 sector_t last, s1, s2, d1 = 0, d2 = 0;
355 int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */
356 unsigned long back_max; 358 unsigned long back_max;
359#define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */
360#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
361 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
357 362
358 if (crq1 == NULL || crq1 == crq2) 363 if (crq1 == NULL || crq1 == crq2)
359 return crq2; 364 return crq2;
@@ -385,35 +390,47 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
385 else if (s1 + back_max >= last) 390 else if (s1 + back_max >= last)
386 d1 = (last - s1) * cfqd->cfq_back_penalty; 391 d1 = (last - s1) * cfqd->cfq_back_penalty;
387 else 392 else
388 r1_wrap = 1; 393 wrap |= CFQ_RQ1_WRAP;
389 394
390 if (s2 >= last) 395 if (s2 >= last)
391 d2 = s2 - last; 396 d2 = s2 - last;
392 else if (s2 + back_max >= last) 397 else if (s2 + back_max >= last)
393 d2 = (last - s2) * cfqd->cfq_back_penalty; 398 d2 = (last - s2) * cfqd->cfq_back_penalty;
394 else 399 else
395 r2_wrap = 1; 400 wrap |= CFQ_RQ2_WRAP;
396 401
397 /* Found required data */ 402 /* Found required data */
398 if (!r1_wrap && r2_wrap) 403
399 return crq1; 404 /*
400 else if (!r2_wrap && r1_wrap) 405 * By doing switch() on the bit mask "wrap" we avoid having to
401 return crq2; 406 * check two variables for all permutations: --> faster!
402 else if (r1_wrap && r2_wrap) { 407 */
403 /* both behind the head */ 408 switch (wrap) {
404 if (s1 <= s2) 409 case 0: /* common case for CFQ: crq1 and crq2 not wrapped */
410 if (d1 < d2)
405 return crq1; 411 return crq1;
406 else 412 else if (d2 < d1)
407 return crq2; 413 return crq2;
408 } 414 else {
415 if (s1 >= s2)
416 return crq1;
417 else
418 return crq2;
419 }
409 420
410 /* Both requests in front of the head */ 421 case CFQ_RQ2_WRAP:
411 if (d1 < d2)
412 return crq1; 422 return crq1;
413 else if (d2 < d1) 423 case CFQ_RQ1_WRAP:
414 return crq2; 424 return crq2;
415 else { 425 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */
416 if (s1 >= s2) 426 default:
427 /*
428 * Since both rqs are wrapped,
429 * start with the one that's further behind head
430 * (--> only *one* back seek required),
431 * since back seek takes more time than forward.
432 */
433 if (s1 <= s2)
417 return crq1; 434 return crq1;
418 else 435 else
419 return crq2; 436 return crq2;
@@ -612,15 +629,20 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
612 cfq_add_crq_rb(crq); 629 cfq_add_crq_rb(crq);
613} 630}
614 631
615static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) 632static struct request *
616 633cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
617{ 634{
618 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY); 635 struct task_struct *tsk = current;
636 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
637 struct cfq_queue *cfqq;
619 struct rb_node *n; 638 struct rb_node *n;
639 sector_t sector;
620 640
641 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
621 if (!cfqq) 642 if (!cfqq)
622 goto out; 643 goto out;
623 644
645 sector = bio->bi_sector + bio_sectors(bio);
624 n = cfqq->sort_list.rb_node; 646 n = cfqq->sort_list.rb_node;
625 while (n) { 647 while (n) {
626 struct cfq_rq *crq = rb_entry_crq(n); 648 struct cfq_rq *crq = rb_entry_crq(n);
@@ -674,7 +696,7 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
674 goto out; 696 goto out;
675 } 697 }
676 698
677 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); 699 __rq = cfq_find_rq_fmerge(cfqd, bio);
678 if (__rq && elv_rq_merge_ok(__rq, bio)) { 700 if (__rq && elv_rq_merge_ok(__rq, bio)) {
679 ret = ELEVATOR_FRONT_MERGE; 701 ret = ELEVATOR_FRONT_MERGE;
680 goto out; 702 goto out;
@@ -877,6 +899,7 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
877static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 899static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
878 900
879{ 901{
902 struct cfq_io_context *cic;
880 unsigned long sl; 903 unsigned long sl;
881 904
882 WARN_ON(!RB_EMPTY(&cfqq->sort_list)); 905 WARN_ON(!RB_EMPTY(&cfqq->sort_list));
@@ -892,13 +915,23 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
892 /* 915 /*
893 * task has exited, don't wait 916 * task has exited, don't wait
894 */ 917 */
895 if (cfqd->active_cic && !cfqd->active_cic->ioc->task) 918 cic = cfqd->active_cic;
919 if (!cic || !cic->ioc->task)
896 return 0; 920 return 0;
897 921
898 cfq_mark_cfqq_must_dispatch(cfqq); 922 cfq_mark_cfqq_must_dispatch(cfqq);
899 cfq_mark_cfqq_wait_request(cfqq); 923 cfq_mark_cfqq_wait_request(cfqq);
900 924
901 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); 925 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
926
927 /*
928 * we don't want to idle for seeks, but we do want to allow
929 * fair distribution of slice time for a process doing back-to-back
930 * seeks. so allow a little bit of time for him to submit a new rq
931 */
932 if (sample_valid(cic->seek_samples) && cic->seek_mean > 131072)
933 sl = 2;
934
902 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 935 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
903 return 1; 936 return 1;
904} 937}
@@ -1115,13 +1148,6 @@ cfq_dispatch_requests(request_queue_t *q, int force)
1115 if (cfqq) { 1148 if (cfqq) {
1116 int max_dispatch; 1149 int max_dispatch;
1117 1150
1118 /*
1119 * if idle window is disabled, allow queue buildup
1120 */
1121 if (!cfq_cfqq_idle_window(cfqq) &&
1122 cfqd->rq_in_driver >= cfqd->cfq_max_depth)
1123 return 0;
1124
1125 cfq_clear_cfqq_must_dispatch(cfqq); 1151 cfq_clear_cfqq_must_dispatch(cfqq);
1126 cfq_clear_cfqq_wait_request(cfqq); 1152 cfq_clear_cfqq_wait_request(cfqq);
1127 del_timer(&cfqd->idle_slice_timer); 1153 del_timer(&cfqd->idle_slice_timer);
@@ -1171,13 +1197,13 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1171 const int hashval) 1197 const int hashval)
1172{ 1198{
1173 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; 1199 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
1174 struct hlist_node *entry, *next; 1200 struct hlist_node *entry;
1201 struct cfq_queue *__cfqq;
1175 1202
1176 hlist_for_each_safe(entry, next, hash_list) { 1203 hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) {
1177 struct cfq_queue *__cfqq = list_entry_qhash(entry);
1178 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); 1204 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio);
1179 1205
1180 if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY)) 1206 if (__cfqq->key == key && (__p == prio || !prio))
1181 return __cfqq; 1207 return __cfqq;
1182 } 1208 }
1183 1209
@@ -1190,19 +1216,19 @@ cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
1190 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); 1216 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
1191} 1217}
1192 1218
1193static void cfq_free_io_context(struct cfq_io_context *cic) 1219static void cfq_free_io_context(struct io_context *ioc)
1194{ 1220{
1195 struct cfq_io_context *__cic; 1221 struct cfq_io_context *__cic;
1196 struct list_head *entry, *next; 1222 struct rb_node *n;
1197 int freed = 1; 1223 int freed = 0;
1198 1224
1199 list_for_each_safe(entry, next, &cic->list) { 1225 while ((n = rb_first(&ioc->cic_root)) != NULL) {
1200 __cic = list_entry(entry, struct cfq_io_context, list); 1226 __cic = rb_entry(n, struct cfq_io_context, rb_node);
1227 rb_erase(&__cic->rb_node, &ioc->cic_root);
1201 kmem_cache_free(cfq_ioc_pool, __cic); 1228 kmem_cache_free(cfq_ioc_pool, __cic);
1202 freed++; 1229 freed++;
1203 } 1230 }
1204 1231
1205 kmem_cache_free(cfq_ioc_pool, cic);
1206 if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) 1232 if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone)
1207 complete(ioc_gone); 1233 complete(ioc_gone);
1208} 1234}
@@ -1210,8 +1236,7 @@ static void cfq_free_io_context(struct cfq_io_context *cic)
1210static void cfq_trim(struct io_context *ioc) 1236static void cfq_trim(struct io_context *ioc)
1211{ 1237{
1212 ioc->set_ioprio = NULL; 1238 ioc->set_ioprio = NULL;
1213 if (ioc->cic) 1239 cfq_free_io_context(ioc);
1214 cfq_free_io_context(ioc->cic);
1215} 1240}
1216 1241
1217/* 1242/*
@@ -1250,26 +1275,26 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1250 spin_unlock(q->queue_lock); 1275 spin_unlock(q->queue_lock);
1251} 1276}
1252 1277
1253static void cfq_exit_io_context(struct cfq_io_context *cic) 1278static void cfq_exit_io_context(struct io_context *ioc)
1254{ 1279{
1255 struct cfq_io_context *__cic; 1280 struct cfq_io_context *__cic;
1256 struct list_head *entry;
1257 unsigned long flags; 1281 unsigned long flags;
1258 1282 struct rb_node *n;
1259 local_irq_save(flags);
1260 1283
1261 /* 1284 /*
1262 * put the reference this task is holding to the various queues 1285 * put the reference this task is holding to the various queues
1263 */ 1286 */
1264 read_lock(&cfq_exit_lock); 1287 read_lock_irqsave(&cfq_exit_lock, flags);
1265 list_for_each(entry, &cic->list) { 1288
1266 __cic = list_entry(entry, struct cfq_io_context, list); 1289 n = rb_first(&ioc->cic_root);
1290 while (n != NULL) {
1291 __cic = rb_entry(n, struct cfq_io_context, rb_node);
1292
1267 cfq_exit_single_io_context(__cic); 1293 cfq_exit_single_io_context(__cic);
1294 n = rb_next(n);
1268 } 1295 }
1269 1296
1270 cfq_exit_single_io_context(cic); 1297 read_unlock_irqrestore(&cfq_exit_lock, flags);
1271 read_unlock(&cfq_exit_lock);
1272 local_irq_restore(flags);
1273} 1298}
1274 1299
1275static struct cfq_io_context * 1300static struct cfq_io_context *
@@ -1278,10 +1303,10 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1278 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1303 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
1279 1304
1280 if (cic) { 1305 if (cic) {
1281 INIT_LIST_HEAD(&cic->list); 1306 RB_CLEAR(&cic->rb_node);
1307 cic->key = NULL;
1282 cic->cfqq[ASYNC] = NULL; 1308 cic->cfqq[ASYNC] = NULL;
1283 cic->cfqq[SYNC] = NULL; 1309 cic->cfqq[SYNC] = NULL;
1284 cic->key = NULL;
1285 cic->last_end_request = jiffies; 1310 cic->last_end_request = jiffies;
1286 cic->ttime_total = 0; 1311 cic->ttime_total = 0;
1287 cic->ttime_samples = 0; 1312 cic->ttime_samples = 0;
@@ -1373,15 +1398,17 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
1373static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) 1398static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1374{ 1399{
1375 struct cfq_io_context *cic; 1400 struct cfq_io_context *cic;
1401 struct rb_node *n;
1376 1402
1377 write_lock(&cfq_exit_lock); 1403 write_lock(&cfq_exit_lock);
1378 1404
1379 cic = ioc->cic; 1405 n = rb_first(&ioc->cic_root);
1380 1406 while (n != NULL) {
1381 changed_ioprio(cic); 1407 cic = rb_entry(n, struct cfq_io_context, rb_node);
1382 1408
1383 list_for_each_entry(cic, &cic->list, list)
1384 changed_ioprio(cic); 1409 changed_ioprio(cic);
1410 n = rb_next(n);
1411 }
1385 1412
1386 write_unlock(&cfq_exit_lock); 1413 write_unlock(&cfq_exit_lock);
1387 1414
@@ -1445,14 +1472,67 @@ out:
1445 return cfqq; 1472 return cfqq;
1446} 1473}
1447 1474
1475static struct cfq_io_context *
1476cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1477{
1478 struct rb_node *n = ioc->cic_root.rb_node;
1479 struct cfq_io_context *cic;
1480 void *key = cfqd;
1481
1482 while (n) {
1483 cic = rb_entry(n, struct cfq_io_context, rb_node);
1484
1485 if (key < cic->key)
1486 n = n->rb_left;
1487 else if (key > cic->key)
1488 n = n->rb_right;
1489 else
1490 return cic;
1491 }
1492
1493 return NULL;
1494}
1495
1496static inline void
1497cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
1498 struct cfq_io_context *cic)
1499{
1500 struct rb_node **p = &ioc->cic_root.rb_node;
1501 struct rb_node *parent = NULL;
1502 struct cfq_io_context *__cic;
1503
1504 read_lock(&cfq_exit_lock);
1505
1506 cic->ioc = ioc;
1507 cic->key = cfqd;
1508
1509 ioc->set_ioprio = cfq_ioc_set_ioprio;
1510
1511 while (*p) {
1512 parent = *p;
1513 __cic = rb_entry(parent, struct cfq_io_context, rb_node);
1514
1515 if (cic->key < __cic->key)
1516 p = &(*p)->rb_left;
1517 else if (cic->key > __cic->key)
1518 p = &(*p)->rb_right;
1519 else
1520 BUG();
1521 }
1522
1523 rb_link_node(&cic->rb_node, parent, p);
1524 rb_insert_color(&cic->rb_node, &ioc->cic_root);
1525 list_add(&cic->queue_list, &cfqd->cic_list);
1526 read_unlock(&cfq_exit_lock);
1527}
1528
1448/* 1529/*
1449 * Setup general io context and cfq io context. There can be several cfq 1530 * Setup general io context and cfq io context. There can be several cfq
1450 * io contexts per general io context, if this process is doing io to more 1531 * io contexts per general io context, if this process is doing io to more
1451 * than one device managed by cfq. Note that caller is holding a reference to 1532 * than one device managed by cfq.
1452 * cfqq, so we don't need to worry about it disappearing
1453 */ 1533 */
1454static struct cfq_io_context * 1534static struct cfq_io_context *
1455cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) 1535cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1456{ 1536{
1457 struct io_context *ioc = NULL; 1537 struct io_context *ioc = NULL;
1458 struct cfq_io_context *cic; 1538 struct cfq_io_context *cic;
@@ -1463,88 +1543,15 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
1463 if (!ioc) 1543 if (!ioc)
1464 return NULL; 1544 return NULL;
1465 1545
1466restart: 1546 cic = cfq_cic_rb_lookup(cfqd, ioc);
1467 if ((cic = ioc->cic) == NULL) { 1547 if (cic)
1468 cic = cfq_alloc_io_context(cfqd, gfp_mask); 1548 goto out;
1469
1470 if (cic == NULL)
1471 goto err;
1472
1473 /*
1474 * manually increment generic io_context usage count, it
1475 * cannot go away since we are already holding one ref to it
1476 */
1477 cic->ioc = ioc;
1478 cic->key = cfqd;
1479 read_lock(&cfq_exit_lock);
1480 ioc->set_ioprio = cfq_ioc_set_ioprio;
1481 ioc->cic = cic;
1482 list_add(&cic->queue_list, &cfqd->cic_list);
1483 read_unlock(&cfq_exit_lock);
1484 } else {
1485 struct cfq_io_context *__cic;
1486
1487 /*
1488 * the first cic on the list is actually the head itself
1489 */
1490 if (cic->key == cfqd)
1491 goto out;
1492
1493 if (unlikely(!cic->key)) {
1494 read_lock(&cfq_exit_lock);
1495 if (list_empty(&cic->list))
1496 ioc->cic = NULL;
1497 else
1498 ioc->cic = list_entry(cic->list.next,
1499 struct cfq_io_context,
1500 list);
1501 read_unlock(&cfq_exit_lock);
1502 kmem_cache_free(cfq_ioc_pool, cic);
1503 atomic_dec(&ioc_count);
1504 goto restart;
1505 }
1506
1507 /*
1508 * cic exists, check if we already are there. linear search
1509 * should be ok here, the list will usually not be more than
1510 * 1 or a few entries long
1511 */
1512 list_for_each_entry(__cic, &cic->list, list) {
1513 /*
1514 * this process is already holding a reference to
1515 * this queue, so no need to get one more
1516 */
1517 if (__cic->key == cfqd) {
1518 cic = __cic;
1519 goto out;
1520 }
1521 if (unlikely(!__cic->key)) {
1522 read_lock(&cfq_exit_lock);
1523 list_del(&__cic->list);
1524 read_unlock(&cfq_exit_lock);
1525 kmem_cache_free(cfq_ioc_pool, __cic);
1526 atomic_dec(&ioc_count);
1527 goto restart;
1528 }
1529 }
1530 1549
1531 /* 1550 cic = cfq_alloc_io_context(cfqd, gfp_mask);
1532 * nope, process doesn't have a cic assoicated with this 1551 if (cic == NULL)
1533 * cfqq yet. get a new one and add to list 1552 goto err;
1534 */
1535 __cic = cfq_alloc_io_context(cfqd, gfp_mask);
1536 if (__cic == NULL)
1537 goto err;
1538
1539 __cic->ioc = ioc;
1540 __cic->key = cfqd;
1541 read_lock(&cfq_exit_lock);
1542 list_add(&__cic->list, &cic->list);
1543 list_add(&__cic->queue_list, &cfqd->cic_list);
1544 read_unlock(&cfq_exit_lock);
1545 cic = __cic;
1546 }
1547 1553
1554 cfq_cic_link(cfqd, ioc, cic);
1548out: 1555out:
1549 return cic; 1556 return cic;
1550err: 1557err:
@@ -1577,7 +1584,33 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1577 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 1584 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
1578} 1585}
1579 1586
1580#define sample_valid(samples) ((samples) > 80) 1587static void
1588cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1589 struct cfq_rq *crq)
1590{
1591 sector_t sdist;
1592 u64 total;
1593
1594 if (cic->last_request_pos < crq->request->sector)
1595 sdist = crq->request->sector - cic->last_request_pos;
1596 else
1597 sdist = cic->last_request_pos - crq->request->sector;
1598
1599 /*
1600 * Don't allow the seek distance to get too large from the
1601 * odd fragment, pagein, etc
1602 */
1603 if (cic->seek_samples <= 60) /* second&third seek */
1604 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024);
1605 else
1606 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64);
1607
1608 cic->seek_samples = (7*cic->seek_samples + 256) / 8;
1609 cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8;
1610 total = cic->seek_total + (cic->seek_samples/2);
1611 do_div(total, cic->seek_samples);
1612 cic->seek_mean = (sector_t)total;
1613}
1581 1614
1582/* 1615/*
1583 * Disable idle window if the process thinks too long or seeks so much that 1616 * Disable idle window if the process thinks too long or seeks so much that
@@ -1690,9 +1723,11 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1690 cic = crq->io_context; 1723 cic = crq->io_context;
1691 1724
1692 cfq_update_io_thinktime(cfqd, cic); 1725 cfq_update_io_thinktime(cfqd, cic);
1726 cfq_update_io_seektime(cfqd, cic, crq);
1693 cfq_update_idle_window(cfqd, cfqq, cic); 1727 cfq_update_idle_window(cfqd, cfqq, cic);
1694 1728
1695 cic->last_queue = jiffies; 1729 cic->last_queue = jiffies;
1730 cic->last_request_pos = crq->request->sector + crq->request->nr_sectors;
1696 1731
1697 if (cfqq == cfqd->active_queue) { 1732 if (cfqq == cfqd->active_queue) {
1698 /* 1733 /*
@@ -1825,14 +1860,6 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
1825 cfq_resort_rr_list(cfqq, 0); 1860 cfq_resort_rr_list(cfqq, 0);
1826} 1861}
1827 1862
1828static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
1829{
1830 if (rw == READ || process_sync(task))
1831 return task->pid;
1832
1833 return CFQ_KEY_ASYNC;
1834}
1835
1836static inline int 1863static inline int
1837__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1864__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1838 struct task_struct *task, int rw) 1865 struct task_struct *task, int rw)
@@ -1965,7 +1992,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
1965 1992
1966 might_sleep_if(gfp_mask & __GFP_WAIT); 1993 might_sleep_if(gfp_mask & __GFP_WAIT);
1967 1994
1968 cic = cfq_get_io_context(cfqd, key, gfp_mask); 1995 cic = cfq_get_io_context(cfqd, gfp_mask);
1969 1996
1970 spin_lock_irqsave(q->queue_lock, flags); 1997 spin_lock_irqsave(q->queue_lock, flags);
1971 1998
@@ -2133,11 +2160,14 @@ static void cfq_exit_queue(elevator_t *e)
2133 request_queue_t *q = cfqd->queue; 2160 request_queue_t *q = cfqd->queue;
2134 2161
2135 cfq_shutdown_timer_wq(cfqd); 2162 cfq_shutdown_timer_wq(cfqd);
2163
2136 write_lock(&cfq_exit_lock); 2164 write_lock(&cfq_exit_lock);
2137 spin_lock_irq(q->queue_lock); 2165 spin_lock_irq(q->queue_lock);
2166
2138 if (cfqd->active_queue) 2167 if (cfqd->active_queue)
2139 __cfq_slice_expired(cfqd, cfqd->active_queue, 0); 2168 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
2140 while(!list_empty(&cfqd->cic_list)) { 2169
2170 while (!list_empty(&cfqd->cic_list)) {
2141 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 2171 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
2142 struct cfq_io_context, 2172 struct cfq_io_context,
2143 queue_list); 2173 queue_list);
@@ -2152,6 +2182,7 @@ static void cfq_exit_queue(elevator_t *e)
2152 cic->key = NULL; 2182 cic->key = NULL;
2153 list_del_init(&cic->queue_list); 2183 list_del_init(&cic->queue_list);
2154 } 2184 }
2185
2155 spin_unlock_irq(q->queue_lock); 2186 spin_unlock_irq(q->queue_lock);
2156 write_unlock(&cfq_exit_lock); 2187 write_unlock(&cfq_exit_lock);
2157 2188
@@ -2227,7 +2258,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2227 cfqd->cfq_slice[1] = cfq_slice_sync; 2258 cfqd->cfq_slice[1] = cfq_slice_sync;
2228 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2259 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2229 cfqd->cfq_slice_idle = cfq_slice_idle; 2260 cfqd->cfq_slice_idle = cfq_slice_idle;
2230 cfqd->cfq_max_depth = cfq_max_depth;
2231 2261
2232 return 0; 2262 return 0;
2233out_crqpool: 2263out_crqpool:
@@ -2310,7 +2340,6 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
2310SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 2340SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
2311SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 2341SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
2312SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 2342SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2313SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
2314#undef SHOW_FUNCTION 2343#undef SHOW_FUNCTION
2315 2344
2316#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2345#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
@@ -2339,7 +2368,6 @@ STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
2339STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 2368STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
2340STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 2369STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
2341STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); 2370STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
2342STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);
2343#undef STORE_FUNCTION 2371#undef STORE_FUNCTION
2344 2372
2345#define CFQ_ATTR(name) \ 2373#define CFQ_ATTR(name) \
@@ -2356,7 +2384,6 @@ static struct elv_fs_entry cfq_attrs[] = {
2356 CFQ_ATTR(slice_async), 2384 CFQ_ATTR(slice_async),
2357 CFQ_ATTR(slice_async_rq), 2385 CFQ_ATTR(slice_async_rq),
2358 CFQ_ATTR(slice_idle), 2386 CFQ_ATTR(slice_idle),
2359 CFQ_ATTR(max_depth),
2360 __ATTR_NULL 2387 __ATTR_NULL
2361}; 2388};
2362 2389
diff --git a/block/genhd.c b/block/genhd.c
index 64510fd88621..db4c60c802d6 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -454,8 +454,8 @@ static ssize_t disk_stats_read(struct gendisk * disk, char *page)
454 disk_round_stats(disk); 454 disk_round_stats(disk);
455 preempt_enable(); 455 preempt_enable();
456 return sprintf(page, 456 return sprintf(page,
457 "%8u %8u %8llu %8u " 457 "%8lu %8lu %8llu %8u "
458 "%8u %8u %8llu %8u " 458 "%8lu %8lu %8llu %8u "
459 "%8u %8u %8u" 459 "%8u %8u %8u"
460 "\n", 460 "\n",
461 disk_stat_read(disk, ios[READ]), 461 disk_stat_read(disk, ios[READ]),
@@ -649,7 +649,7 @@ static int diskstats_show(struct seq_file *s, void *v)
649 preempt_disable(); 649 preempt_disable();
650 disk_round_stats(gp); 650 disk_round_stats(gp);
651 preempt_enable(); 651 preempt_enable();
652 seq_printf(s, "%4d %4d %s %u %u %llu %u %u %u %llu %u %u %u %u\n", 652 seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
653 gp->major, n + gp->first_minor, disk_name(gp, n, buf), 653 gp->major, n + gp->first_minor, disk_name(gp, n, buf),
654 disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), 654 disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]),
655 (unsigned long long)disk_stat_read(gp, sectors[0]), 655 (unsigned long long)disk_stat_read(gp, sectors[0]),
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 062067fa7ead..5b26af8597f3 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -785,6 +785,8 @@ void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
785 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 785 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
786 t->max_segment_size = min(t->max_segment_size,b->max_segment_size); 786 t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
787 t->hardsect_size = max(t->hardsect_size,b->hardsect_size); 787 t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
788 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
789 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
788} 790}
789 791
790EXPORT_SYMBOL(blk_queue_stack_limits); 792EXPORT_SYMBOL(blk_queue_stack_limits);
@@ -906,17 +908,15 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
906 __FUNCTION__, depth); 908 __FUNCTION__, depth);
907 } 909 }
908 910
909 tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC); 911 tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC);
910 if (!tag_index) 912 if (!tag_index)
911 goto fail; 913 goto fail;
912 914
913 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; 915 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
914 tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); 916 tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
915 if (!tag_map) 917 if (!tag_map)
916 goto fail; 918 goto fail;
917 919
918 memset(tag_index, 0, depth * sizeof(struct request *));
919 memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));
920 tags->real_max_depth = depth; 920 tags->real_max_depth = depth;
921 tags->max_depth = depth; 921 tags->max_depth = depth;
922 tags->tag_index = tag_index; 922 tags->tag_index = tag_index;
@@ -2479,10 +2479,12 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2479 rq->rq_disk = bd_disk; 2479 rq->rq_disk = bd_disk;
2480 rq->flags |= REQ_NOMERGE; 2480 rq->flags |= REQ_NOMERGE;
2481 rq->end_io = done; 2481 rq->end_io = done;
2482 elv_add_request(q, rq, where, 1); 2482 WARN_ON(irqs_disabled());
2483 generic_unplug_device(q); 2483 spin_lock_irq(q->queue_lock);
2484 __elv_add_request(q, rq, where, 1);
2485 __generic_unplug_device(q);
2486 spin_unlock_irq(q->queue_lock);
2484} 2487}
2485
2486EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 2488EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2487 2489
2488/** 2490/**
@@ -3512,7 +3514,7 @@ int __init blk_dev_init(void)
3512 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3514 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3513 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); 3515 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
3514 3516
3515 for_each_cpu(i) 3517 for_each_possible_cpu(i)
3516 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 3518 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
3517 3519
3518 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 3520 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
@@ -3537,11 +3539,17 @@ void put_io_context(struct io_context *ioc)
3537 BUG_ON(atomic_read(&ioc->refcount) == 0); 3539 BUG_ON(atomic_read(&ioc->refcount) == 0);
3538 3540
3539 if (atomic_dec_and_test(&ioc->refcount)) { 3541 if (atomic_dec_and_test(&ioc->refcount)) {
3542 struct cfq_io_context *cic;
3543
3540 rcu_read_lock(); 3544 rcu_read_lock();
3541 if (ioc->aic && ioc->aic->dtor) 3545 if (ioc->aic && ioc->aic->dtor)
3542 ioc->aic->dtor(ioc->aic); 3546 ioc->aic->dtor(ioc->aic);
3543 if (ioc->cic && ioc->cic->dtor) 3547 if (ioc->cic_root.rb_node != NULL) {
3544 ioc->cic->dtor(ioc->cic); 3548 struct rb_node *n = rb_first(&ioc->cic_root);
3549
3550 cic = rb_entry(n, struct cfq_io_context, rb_node);
3551 cic->dtor(ioc);
3552 }
3545 rcu_read_unlock(); 3553 rcu_read_unlock();
3546 3554
3547 kmem_cache_free(iocontext_cachep, ioc); 3555 kmem_cache_free(iocontext_cachep, ioc);
@@ -3554,6 +3562,7 @@ void exit_io_context(void)
3554{ 3562{
3555 unsigned long flags; 3563 unsigned long flags;
3556 struct io_context *ioc; 3564 struct io_context *ioc;
3565 struct cfq_io_context *cic;
3557 3566
3558 local_irq_save(flags); 3567 local_irq_save(flags);
3559 task_lock(current); 3568 task_lock(current);
@@ -3565,9 +3574,11 @@ void exit_io_context(void)
3565 3574
3566 if (ioc->aic && ioc->aic->exit) 3575 if (ioc->aic && ioc->aic->exit)
3567 ioc->aic->exit(ioc->aic); 3576 ioc->aic->exit(ioc->aic);
3568 if (ioc->cic && ioc->cic->exit) 3577 if (ioc->cic_root.rb_node != NULL) {
3569 ioc->cic->exit(ioc->cic); 3578 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
3570 3579 cic->exit(ioc);
3580 }
3581
3571 put_io_context(ioc); 3582 put_io_context(ioc);
3572} 3583}
3573 3584
@@ -3596,7 +3607,7 @@ struct io_context *current_io_context(gfp_t gfp_flags)
3596 ret->last_waited = jiffies; /* doesn't matter... */ 3607 ret->last_waited = jiffies; /* doesn't matter... */
3597 ret->nr_batch_requests = 0; /* because this is 0 */ 3608 ret->nr_batch_requests = 0; /* because this is 0 */
3598 ret->aic = NULL; 3609 ret->aic = NULL;
3599 ret->cic = NULL; 3610 ret->cic_root.rb_node = NULL;
3600 tsk->io_context = ret; 3611 tsk->io_context = ret;
3601 } 3612 }
3602 3613