aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-06-10 14:47:26 -0400
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-06-10 14:47:26 -0400
commitf0cd91a68acdc9b49d7f6738b514a426da627649 (patch)
tree8ad73564015794197583b094217ae0a71e71e753 /block
parent60eef25701d25e99c991dd0f4a9f3832a0c3ad3e (diff)
parent128e6ced247cda88f96fa9f2e4ba8b2c4a681560 (diff)
Merge ../linux-2.6
Diffstat (limited to 'block')
-rw-r--r--block/as-iosched.c18
-rw-r--r--block/cfq-iosched.c134
-rw-r--r--block/deadline-iosched.c13
-rw-r--r--block/elevator.c65
-rw-r--r--block/ll_rw_blk.c30
-rw-r--r--block/noop-iosched.c7
6 files changed, 178 insertions, 89 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 296708ceceb2..a7caf35ca0c2 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1648,17 +1648,17 @@ static void as_exit_queue(elevator_t *e)
1648 * initialize elevator private data (as_data), and alloc a arq for 1648 * initialize elevator private data (as_data), and alloc a arq for
1649 * each request on the free lists 1649 * each request on the free lists
1650 */ 1650 */
1651static int as_init_queue(request_queue_t *q, elevator_t *e) 1651static void *as_init_queue(request_queue_t *q, elevator_t *e)
1652{ 1652{
1653 struct as_data *ad; 1653 struct as_data *ad;
1654 int i; 1654 int i;
1655 1655
1656 if (!arq_pool) 1656 if (!arq_pool)
1657 return -ENOMEM; 1657 return NULL;
1658 1658
1659 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); 1659 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
1660 if (!ad) 1660 if (!ad)
1661 return -ENOMEM; 1661 return NULL;
1662 memset(ad, 0, sizeof(*ad)); 1662 memset(ad, 0, sizeof(*ad));
1663 1663
1664 ad->q = q; /* Identify what queue the data belongs to */ 1664 ad->q = q; /* Identify what queue the data belongs to */
@@ -1667,7 +1667,7 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
1667 GFP_KERNEL, q->node); 1667 GFP_KERNEL, q->node);
1668 if (!ad->hash) { 1668 if (!ad->hash) {
1669 kfree(ad); 1669 kfree(ad);
1670 return -ENOMEM; 1670 return NULL;
1671 } 1671 }
1672 1672
1673 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1673 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
@@ -1675,7 +1675,7 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
1675 if (!ad->arq_pool) { 1675 if (!ad->arq_pool) {
1676 kfree(ad->hash); 1676 kfree(ad->hash);
1677 kfree(ad); 1677 kfree(ad);
1678 return -ENOMEM; 1678 return NULL;
1679 } 1679 }
1680 1680
1681 /* anticipatory scheduling helpers */ 1681 /* anticipatory scheduling helpers */
@@ -1696,14 +1696,13 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
1696 ad->antic_expire = default_antic_expire; 1696 ad->antic_expire = default_antic_expire;
1697 ad->batch_expire[REQ_SYNC] = default_read_batch_expire; 1697 ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
1698 ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; 1698 ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
1699 e->elevator_data = ad;
1700 1699
1701 ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; 1700 ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
1702 ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; 1701 ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
1703 if (ad->write_batch_count < 2) 1702 if (ad->write_batch_count < 2)
1704 ad->write_batch_count = 2; 1703 ad->write_batch_count = 2;
1705 1704
1706 return 0; 1705 return ad;
1707} 1706}
1708 1707
1709/* 1708/*
@@ -1844,9 +1843,10 @@ static void __exit as_exit(void)
1844 DECLARE_COMPLETION(all_gone); 1843 DECLARE_COMPLETION(all_gone);
1845 elv_unregister(&iosched_as); 1844 elv_unregister(&iosched_as);
1846 ioc_gone = &all_gone; 1845 ioc_gone = &all_gone;
1847 barrier(); 1846 /* ioc_gone's update must be visible before reading ioc_count */
1847 smp_wmb();
1848 if (atomic_read(&ioc_count)) 1848 if (atomic_read(&ioc_count))
1849 complete(ioc_gone); 1849 wait_for_completion(ioc_gone);
1850 synchronize_rcu(); 1850 synchronize_rcu();
1851 kmem_cache_destroy(arq_pool); 1851 kmem_cache_destroy(arq_pool);
1852} 1852}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 67d446de0227..a46d030e092a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -33,7 +33,7 @@ static int cfq_slice_idle = HZ / 70;
33 33
34#define CFQ_KEY_ASYNC (0) 34#define CFQ_KEY_ASYNC (0)
35 35
36static DEFINE_RWLOCK(cfq_exit_lock); 36static DEFINE_SPINLOCK(cfq_exit_lock);
37 37
38/* 38/*
39 * for the hash of cfqq inside the cfqd 39 * for the hash of cfqq inside the cfqd
@@ -133,6 +133,7 @@ struct cfq_data {
133 mempool_t *crq_pool; 133 mempool_t *crq_pool;
134 134
135 int rq_in_driver; 135 int rq_in_driver;
136 int hw_tag;
136 137
137 /* 138 /*
138 * schedule slice state info 139 * schedule slice state info
@@ -500,10 +501,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
500 501
501 /* 502 /*
502 * if queue was preempted, just add to front to be fair. busy_rr 503 * if queue was preempted, just add to front to be fair. busy_rr
503 * isn't sorted. 504 * isn't sorted, but insert at the back for fairness.
504 */ 505 */
505 if (preempted || list == &cfqd->busy_rr) { 506 if (preempted || list == &cfqd->busy_rr) {
506 list_add(&cfqq->cfq_list, list); 507 if (preempted)
508 list = list->prev;
509
510 list_add_tail(&cfqq->cfq_list, list);
507 return; 511 return;
508 } 512 }
509 513
@@ -664,6 +668,15 @@ static void cfq_activate_request(request_queue_t *q, struct request *rq)
664 struct cfq_data *cfqd = q->elevator->elevator_data; 668 struct cfq_data *cfqd = q->elevator->elevator_data;
665 669
666 cfqd->rq_in_driver++; 670 cfqd->rq_in_driver++;
671
672 /*
673 * If the depth is larger 1, it really could be queueing. But lets
674 * make the mark a little higher - idling could still be good for
675 * low queueing, and a low queueing number could also just indicate
676 * a SCSI mid layer like behaviour where limit+1 is often seen.
677 */
678 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
679 cfqd->hw_tag = 1;
667} 680}
668 681
669static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 682static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
@@ -879,6 +892,13 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
879 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 892 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
880 893
881 /* 894 /*
895 * If no new queues are available, check if the busy list has some
896 * before falling back to idle io.
897 */
898 if (!cfqq && !list_empty(&cfqd->busy_rr))
899 cfqq = list_entry_cfqq(cfqd->busy_rr.next);
900
901 /*
882 * if we have idle queues and no rt or be queues had pending 902 * if we have idle queues and no rt or be queues had pending
883 * requests, either allow immediate service if the grace period 903 * requests, either allow immediate service if the grace period
884 * has passed or arm the idle grace timer 904 * has passed or arm the idle grace timer
@@ -1284,7 +1304,7 @@ static void cfq_exit_io_context(struct io_context *ioc)
1284 /* 1304 /*
1285 * put the reference this task is holding to the various queues 1305 * put the reference this task is holding to the various queues
1286 */ 1306 */
1287 read_lock_irqsave(&cfq_exit_lock, flags); 1307 spin_lock_irqsave(&cfq_exit_lock, flags);
1288 1308
1289 n = rb_first(&ioc->cic_root); 1309 n = rb_first(&ioc->cic_root);
1290 while (n != NULL) { 1310 while (n != NULL) {
@@ -1294,7 +1314,7 @@ static void cfq_exit_io_context(struct io_context *ioc)
1294 n = rb_next(n); 1314 n = rb_next(n);
1295 } 1315 }
1296 1316
1297 read_unlock_irqrestore(&cfq_exit_lock, flags); 1317 spin_unlock_irqrestore(&cfq_exit_lock, flags);
1298} 1318}
1299 1319
1300static struct cfq_io_context * 1320static struct cfq_io_context *
@@ -1400,17 +1420,17 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1400 struct cfq_io_context *cic; 1420 struct cfq_io_context *cic;
1401 struct rb_node *n; 1421 struct rb_node *n;
1402 1422
1403 write_lock(&cfq_exit_lock); 1423 spin_lock(&cfq_exit_lock);
1404 1424
1405 n = rb_first(&ioc->cic_root); 1425 n = rb_first(&ioc->cic_root);
1406 while (n != NULL) { 1426 while (n != NULL) {
1407 cic = rb_entry(n, struct cfq_io_context, rb_node); 1427 cic = rb_entry(n, struct cfq_io_context, rb_node);
1408 1428
1409 changed_ioprio(cic); 1429 changed_ioprio(cic);
1410 n = rb_next(n); 1430 n = rb_next(n);
1411 } 1431 }
1412 1432
1413 write_unlock(&cfq_exit_lock); 1433 spin_unlock(&cfq_exit_lock);
1414 1434
1415 return 0; 1435 return 0;
1416} 1436}
@@ -1458,7 +1478,8 @@ retry:
1458 * set ->slice_left to allow preemption for a new process 1478 * set ->slice_left to allow preemption for a new process
1459 */ 1479 */
1460 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1480 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1461 cfq_mark_cfqq_idle_window(cfqq); 1481 if (!cfqd->hw_tag)
1482 cfq_mark_cfqq_idle_window(cfqq);
1462 cfq_mark_cfqq_prio_changed(cfqq); 1483 cfq_mark_cfqq_prio_changed(cfqq);
1463 cfq_init_prio_data(cfqq); 1484 cfq_init_prio_data(cfqq);
1464 } 1485 }
@@ -1472,19 +1493,38 @@ out:
1472 return cfqq; 1493 return cfqq;
1473} 1494}
1474 1495
1496static void
1497cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
1498{
1499 spin_lock(&cfq_exit_lock);
1500 rb_erase(&cic->rb_node, &ioc->cic_root);
1501 list_del_init(&cic->queue_list);
1502 spin_unlock(&cfq_exit_lock);
1503 kmem_cache_free(cfq_ioc_pool, cic);
1504 atomic_dec(&ioc_count);
1505}
1506
1475static struct cfq_io_context * 1507static struct cfq_io_context *
1476cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) 1508cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1477{ 1509{
1478 struct rb_node *n = ioc->cic_root.rb_node; 1510 struct rb_node *n;
1479 struct cfq_io_context *cic; 1511 struct cfq_io_context *cic;
1480 void *key = cfqd; 1512 void *k, *key = cfqd;
1481 1513
1514restart:
1515 n = ioc->cic_root.rb_node;
1482 while (n) { 1516 while (n) {
1483 cic = rb_entry(n, struct cfq_io_context, rb_node); 1517 cic = rb_entry(n, struct cfq_io_context, rb_node);
1518 /* ->key must be copied to avoid race with cfq_exit_queue() */
1519 k = cic->key;
1520 if (unlikely(!k)) {
1521 cfq_drop_dead_cic(ioc, cic);
1522 goto restart;
1523 }
1484 1524
1485 if (key < cic->key) 1525 if (key < k)
1486 n = n->rb_left; 1526 n = n->rb_left;
1487 else if (key > cic->key) 1527 else if (key > k)
1488 n = n->rb_right; 1528 n = n->rb_right;
1489 else 1529 else
1490 return cic; 1530 return cic;
@@ -1497,33 +1537,41 @@ static inline void
1497cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 1537cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
1498 struct cfq_io_context *cic) 1538 struct cfq_io_context *cic)
1499{ 1539{
1500 struct rb_node **p = &ioc->cic_root.rb_node; 1540 struct rb_node **p;
1501 struct rb_node *parent = NULL; 1541 struct rb_node *parent;
1502 struct cfq_io_context *__cic; 1542 struct cfq_io_context *__cic;
1503 1543 void *k;
1504 read_lock(&cfq_exit_lock);
1505 1544
1506 cic->ioc = ioc; 1545 cic->ioc = ioc;
1507 cic->key = cfqd; 1546 cic->key = cfqd;
1508 1547
1509 ioc->set_ioprio = cfq_ioc_set_ioprio; 1548 ioc->set_ioprio = cfq_ioc_set_ioprio;
1510 1549restart:
1550 parent = NULL;
1551 p = &ioc->cic_root.rb_node;
1511 while (*p) { 1552 while (*p) {
1512 parent = *p; 1553 parent = *p;
1513 __cic = rb_entry(parent, struct cfq_io_context, rb_node); 1554 __cic = rb_entry(parent, struct cfq_io_context, rb_node);
1555 /* ->key must be copied to avoid race with cfq_exit_queue() */
1556 k = __cic->key;
1557 if (unlikely(!k)) {
1558 cfq_drop_dead_cic(ioc, cic);
1559 goto restart;
1560 }
1514 1561
1515 if (cic->key < __cic->key) 1562 if (cic->key < k)
1516 p = &(*p)->rb_left; 1563 p = &(*p)->rb_left;
1517 else if (cic->key > __cic->key) 1564 else if (cic->key > k)
1518 p = &(*p)->rb_right; 1565 p = &(*p)->rb_right;
1519 else 1566 else
1520 BUG(); 1567 BUG();
1521 } 1568 }
1522 1569
1570 spin_lock(&cfq_exit_lock);
1523 rb_link_node(&cic->rb_node, parent, p); 1571 rb_link_node(&cic->rb_node, parent, p);
1524 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1572 rb_insert_color(&cic->rb_node, &ioc->cic_root);
1525 list_add(&cic->queue_list, &cfqd->cic_list); 1573 list_add(&cic->queue_list, &cfqd->cic_list);
1526 read_unlock(&cfq_exit_lock); 1574 spin_unlock(&cfq_exit_lock);
1527} 1575}
1528 1576
1529/* 1577/*
@@ -1622,7 +1670,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1622{ 1670{
1623 int enable_idle = cfq_cfqq_idle_window(cfqq); 1671 int enable_idle = cfq_cfqq_idle_window(cfqq);
1624 1672
1625 if (!cic->ioc->task || !cfqd->cfq_slice_idle) 1673 if (!cic->ioc->task || !cfqd->cfq_slice_idle || cfqd->hw_tag)
1626 enable_idle = 0; 1674 enable_idle = 0;
1627 else if (sample_valid(cic->ttime_samples)) { 1675 else if (sample_valid(cic->ttime_samples)) {
1628 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1676 if (cic->ttime_mean > cfqd->cfq_slice_idle)
@@ -1713,14 +1761,24 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1713 1761
1714 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 1762 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
1715 1763
1764 cic = crq->io_context;
1765
1716 /* 1766 /*
1717 * we never wait for an async request and we don't allow preemption 1767 * we never wait for an async request and we don't allow preemption
1718 * of an async request. so just return early 1768 * of an async request. so just return early
1719 */ 1769 */
1720 if (!cfq_crq_is_sync(crq)) 1770 if (!cfq_crq_is_sync(crq)) {
1771 /*
1772 * sync process issued an async request, if it's waiting
1773 * then expire it and kick rq handling.
1774 */
1775 if (cic == cfqd->active_cic &&
1776 del_timer(&cfqd->idle_slice_timer)) {
1777 cfq_slice_expired(cfqd, 0);
1778 cfq_start_queueing(cfqd, cfqq);
1779 }
1721 return; 1780 return;
1722 1781 }
1723 cic = crq->io_context;
1724 1782
1725 cfq_update_io_thinktime(cfqd, cic); 1783 cfq_update_io_thinktime(cfqd, cic);
1726 cfq_update_io_seektime(cfqd, cic, crq); 1784 cfq_update_io_seektime(cfqd, cic, crq);
@@ -2138,10 +2196,9 @@ static void cfq_idle_class_timer(unsigned long data)
2138 * race with a non-idle queue, reset timer 2196 * race with a non-idle queue, reset timer
2139 */ 2197 */
2140 end = cfqd->last_end_request + CFQ_IDLE_GRACE; 2198 end = cfqd->last_end_request + CFQ_IDLE_GRACE;
2141 if (!time_after_eq(jiffies, end)) { 2199 if (!time_after_eq(jiffies, end))
2142 cfqd->idle_class_timer.expires = end; 2200 mod_timer(&cfqd->idle_class_timer, end);
2143 add_timer(&cfqd->idle_class_timer); 2201 else
2144 } else
2145 cfq_schedule_dispatch(cfqd); 2202 cfq_schedule_dispatch(cfqd);
2146 2203
2147 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2204 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
@@ -2161,7 +2218,7 @@ static void cfq_exit_queue(elevator_t *e)
2161 2218
2162 cfq_shutdown_timer_wq(cfqd); 2219 cfq_shutdown_timer_wq(cfqd);
2163 2220
2164 write_lock(&cfq_exit_lock); 2221 spin_lock(&cfq_exit_lock);
2165 spin_lock_irq(q->queue_lock); 2222 spin_lock_irq(q->queue_lock);
2166 2223
2167 if (cfqd->active_queue) 2224 if (cfqd->active_queue)
@@ -2184,7 +2241,7 @@ static void cfq_exit_queue(elevator_t *e)
2184 } 2241 }
2185 2242
2186 spin_unlock_irq(q->queue_lock); 2243 spin_unlock_irq(q->queue_lock);
2187 write_unlock(&cfq_exit_lock); 2244 spin_unlock(&cfq_exit_lock);
2188 2245
2189 cfq_shutdown_timer_wq(cfqd); 2246 cfq_shutdown_timer_wq(cfqd);
2190 2247
@@ -2194,14 +2251,14 @@ static void cfq_exit_queue(elevator_t *e)
2194 kfree(cfqd); 2251 kfree(cfqd);
2195} 2252}
2196 2253
2197static int cfq_init_queue(request_queue_t *q, elevator_t *e) 2254static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2198{ 2255{
2199 struct cfq_data *cfqd; 2256 struct cfq_data *cfqd;
2200 int i; 2257 int i;
2201 2258
2202 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 2259 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
2203 if (!cfqd) 2260 if (!cfqd)
2204 return -ENOMEM; 2261 return NULL;
2205 2262
2206 memset(cfqd, 0, sizeof(*cfqd)); 2263 memset(cfqd, 0, sizeof(*cfqd));
2207 2264
@@ -2231,8 +2288,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2231 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 2288 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2232 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 2289 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2233 2290
2234 e->elevator_data = cfqd;
2235
2236 cfqd->queue = q; 2291 cfqd->queue = q;
2237 2292
2238 cfqd->max_queued = q->nr_requests / 4; 2293 cfqd->max_queued = q->nr_requests / 4;
@@ -2259,14 +2314,14 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2259 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2314 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2260 cfqd->cfq_slice_idle = cfq_slice_idle; 2315 cfqd->cfq_slice_idle = cfq_slice_idle;
2261 2316
2262 return 0; 2317 return cfqd;
2263out_crqpool: 2318out_crqpool:
2264 kfree(cfqd->cfq_hash); 2319 kfree(cfqd->cfq_hash);
2265out_cfqhash: 2320out_cfqhash:
2266 kfree(cfqd->crq_hash); 2321 kfree(cfqd->crq_hash);
2267out_crqhash: 2322out_crqhash:
2268 kfree(cfqd); 2323 kfree(cfqd);
2269 return -ENOMEM; 2324 return NULL;
2270} 2325}
2271 2326
2272static void cfq_slab_kill(void) 2327static void cfq_slab_kill(void)
@@ -2439,9 +2494,10 @@ static void __exit cfq_exit(void)
2439 DECLARE_COMPLETION(all_gone); 2494 DECLARE_COMPLETION(all_gone);
2440 elv_unregister(&iosched_cfq); 2495 elv_unregister(&iosched_cfq);
2441 ioc_gone = &all_gone; 2496 ioc_gone = &all_gone;
2442 barrier(); 2497 /* ioc_gone's update must be visible before reading ioc_count */
2498 smp_wmb();
2443 if (atomic_read(&ioc_count)) 2499 if (atomic_read(&ioc_count))
2444 complete(ioc_gone); 2500 wait_for_completion(ioc_gone);
2445 synchronize_rcu(); 2501 synchronize_rcu();
2446 cfq_slab_kill(); 2502 cfq_slab_kill();
2447} 2503}
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 399fa1e60e1f..3bd0415a9828 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -613,24 +613,24 @@ static void deadline_exit_queue(elevator_t *e)
613 * initialize elevator private data (deadline_data), and alloc a drq for 613 * initialize elevator private data (deadline_data), and alloc a drq for
614 * each request on the free lists 614 * each request on the free lists
615 */ 615 */
616static int deadline_init_queue(request_queue_t *q, elevator_t *e) 616static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
617{ 617{
618 struct deadline_data *dd; 618 struct deadline_data *dd;
619 int i; 619 int i;
620 620
621 if (!drq_pool) 621 if (!drq_pool)
622 return -ENOMEM; 622 return NULL;
623 623
624 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); 624 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
625 if (!dd) 625 if (!dd)
626 return -ENOMEM; 626 return NULL;
627 memset(dd, 0, sizeof(*dd)); 627 memset(dd, 0, sizeof(*dd));
628 628
629 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES, 629 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES,
630 GFP_KERNEL, q->node); 630 GFP_KERNEL, q->node);
631 if (!dd->hash) { 631 if (!dd->hash) {
632 kfree(dd); 632 kfree(dd);
633 return -ENOMEM; 633 return NULL;
634 } 634 }
635 635
636 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 636 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
@@ -638,7 +638,7 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
638 if (!dd->drq_pool) { 638 if (!dd->drq_pool) {
639 kfree(dd->hash); 639 kfree(dd->hash);
640 kfree(dd); 640 kfree(dd);
641 return -ENOMEM; 641 return NULL;
642 } 642 }
643 643
644 for (i = 0; i < DL_HASH_ENTRIES; i++) 644 for (i = 0; i < DL_HASH_ENTRIES; i++)
@@ -653,8 +653,7 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
653 dd->writes_starved = writes_starved; 653 dd->writes_starved = writes_starved;
654 dd->front_merges = 1; 654 dd->front_merges = 1;
655 dd->fifo_batch = fifo_batch; 655 dd->fifo_batch = fifo_batch;
656 e->elevator_data = dd; 656 return dd;
657 return 0;
658} 657}
659 658
660static void deadline_put_request(request_queue_t *q, struct request *rq) 659static void deadline_put_request(request_queue_t *q, struct request *rq)
diff --git a/block/elevator.c b/block/elevator.c
index 0d6be03d929e..a0afdd317cef 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -121,16 +121,16 @@ static struct elevator_type *elevator_get(const char *name)
121 return e; 121 return e;
122} 122}
123 123
124static int elevator_attach(request_queue_t *q, struct elevator_queue *eq) 124static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
125{ 125{
126 int ret = 0; 126 return eq->ops->elevator_init_fn(q, eq);
127}
127 128
129static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
130 void *data)
131{
128 q->elevator = eq; 132 q->elevator = eq;
129 133 eq->elevator_data = data;
130 if (eq->ops->elevator_init_fn)
131 ret = eq->ops->elevator_init_fn(q, eq);
132
133 return ret;
134} 134}
135 135
136static char chosen_elevator[16]; 136static char chosen_elevator[16];
@@ -181,6 +181,7 @@ int elevator_init(request_queue_t *q, char *name)
181 struct elevator_type *e = NULL; 181 struct elevator_type *e = NULL;
182 struct elevator_queue *eq; 182 struct elevator_queue *eq;
183 int ret = 0; 183 int ret = 0;
184 void *data;
184 185
185 INIT_LIST_HEAD(&q->queue_head); 186 INIT_LIST_HEAD(&q->queue_head);
186 q->last_merge = NULL; 187 q->last_merge = NULL;
@@ -202,10 +203,13 @@ int elevator_init(request_queue_t *q, char *name)
202 if (!eq) 203 if (!eq)
203 return -ENOMEM; 204 return -ENOMEM;
204 205
205 ret = elevator_attach(q, eq); 206 data = elevator_init_queue(q, eq);
206 if (ret) 207 if (!data) {
207 kobject_put(&eq->kobj); 208 kobject_put(&eq->kobj);
209 return -ENOMEM;
210 }
208 211
212 elevator_attach(q, eq, data);
209 return ret; 213 return ret;
210} 214}
211 215
@@ -333,6 +337,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
333{ 337{
334 struct list_head *pos; 338 struct list_head *pos;
335 unsigned ordseq; 339 unsigned ordseq;
340 int unplug_it = 1;
336 341
337 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 342 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
338 343
@@ -399,6 +404,11 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
399 } 404 }
400 405
401 list_add_tail(&rq->queuelist, pos); 406 list_add_tail(&rq->queuelist, pos);
407 /*
408 * most requeues happen because of a busy condition, don't
409 * force unplug of the queue for that case.
410 */
411 unplug_it = 0;
402 break; 412 break;
403 413
404 default: 414 default:
@@ -407,7 +417,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
407 BUG(); 417 BUG();
408 } 418 }
409 419
410 if (blk_queue_plugged(q)) { 420 if (unplug_it && blk_queue_plugged(q)) {
411 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 421 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
412 - q->in_flight; 422 - q->in_flight;
413 423
@@ -716,13 +726,16 @@ int elv_register_queue(struct request_queue *q)
716 return error; 726 return error;
717} 727}
718 728
729static void __elv_unregister_queue(elevator_t *e)
730{
731 kobject_uevent(&e->kobj, KOBJ_REMOVE);
732 kobject_del(&e->kobj);
733}
734
719void elv_unregister_queue(struct request_queue *q) 735void elv_unregister_queue(struct request_queue *q)
720{ 736{
721 if (q) { 737 if (q)
722 elevator_t *e = q->elevator; 738 __elv_unregister_queue(q->elevator);
723 kobject_uevent(&e->kobj, KOBJ_REMOVE);
724 kobject_del(&e->kobj);
725 }
726} 739}
727 740
728int elv_register(struct elevator_type *e) 741int elv_register(struct elevator_type *e)
@@ -774,6 +787,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
774static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 787static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
775{ 788{
776 elevator_t *old_elevator, *e; 789 elevator_t *old_elevator, *e;
790 void *data;
777 791
778 /* 792 /*
779 * Allocate new elevator 793 * Allocate new elevator
@@ -782,6 +796,12 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
782 if (!e) 796 if (!e)
783 return 0; 797 return 0;
784 798
799 data = elevator_init_queue(q, e);
800 if (!data) {
801 kobject_put(&e->kobj);
802 return 0;
803 }
804
785 /* 805 /*
786 * Turn on BYPASS and drain all requests w/ elevator private data 806 * Turn on BYPASS and drain all requests w/ elevator private data
787 */ 807 */
@@ -800,19 +820,19 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
800 elv_drain_elevator(q); 820 elv_drain_elevator(q);
801 } 821 }
802 822
803 spin_unlock_irq(q->queue_lock);
804
805 /* 823 /*
806 * unregister old elevator data 824 * Remember old elevator.
807 */ 825 */
808 elv_unregister_queue(q);
809 old_elevator = q->elevator; 826 old_elevator = q->elevator;
810 827
811 /* 828 /*
812 * attach and start new elevator 829 * attach and start new elevator
813 */ 830 */
814 if (elevator_attach(q, e)) 831 elevator_attach(q, e, data);
815 goto fail; 832
833 spin_unlock_irq(q->queue_lock);
834
835 __elv_unregister_queue(old_elevator);
816 836
817 if (elv_register_queue(q)) 837 if (elv_register_queue(q))
818 goto fail_register; 838 goto fail_register;
@@ -831,7 +851,6 @@ fail_register:
831 */ 851 */
832 elevator_exit(e); 852 elevator_exit(e);
833 e = NULL; 853 e = NULL;
834fail:
835 q->elevator = old_elevator; 854 q->elevator = old_elevator;
836 elv_register_queue(q); 855 elv_register_queue(q);
837 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 856 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
@@ -895,10 +914,8 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
895EXPORT_SYMBOL(elv_dispatch_sort); 914EXPORT_SYMBOL(elv_dispatch_sort);
896EXPORT_SYMBOL(elv_add_request); 915EXPORT_SYMBOL(elv_add_request);
897EXPORT_SYMBOL(__elv_add_request); 916EXPORT_SYMBOL(__elv_add_request);
898EXPORT_SYMBOL(elv_requeue_request);
899EXPORT_SYMBOL(elv_next_request); 917EXPORT_SYMBOL(elv_next_request);
900EXPORT_SYMBOL(elv_dequeue_request); 918EXPORT_SYMBOL(elv_dequeue_request);
901EXPORT_SYMBOL(elv_queue_empty); 919EXPORT_SYMBOL(elv_queue_empty);
902EXPORT_SYMBOL(elv_completed_request);
903EXPORT_SYMBOL(elevator_exit); 920EXPORT_SYMBOL(elevator_exit);
904EXPORT_SYMBOL(elevator_init); 921EXPORT_SYMBOL(elevator_init);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index e112d1a5dab6..7eb36c53f4b7 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1554,7 +1554,7 @@ void blk_plug_device(request_queue_t *q)
1554 * don't plug a stopped queue, it must be paired with blk_start_queue() 1554 * don't plug a stopped queue, it must be paired with blk_start_queue()
1555 * which will restart the queueing 1555 * which will restart the queueing
1556 */ 1556 */
1557 if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) 1557 if (blk_queue_stopped(q))
1558 return; 1558 return;
1559 1559
1560 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { 1560 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
@@ -1587,7 +1587,7 @@ EXPORT_SYMBOL(blk_remove_plug);
1587 */ 1587 */
1588void __generic_unplug_device(request_queue_t *q) 1588void __generic_unplug_device(request_queue_t *q)
1589{ 1589{
1590 if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))) 1590 if (unlikely(blk_queue_stopped(q)))
1591 return; 1591 return;
1592 1592
1593 if (!blk_remove_plug(q)) 1593 if (!blk_remove_plug(q))
@@ -1732,8 +1732,21 @@ void blk_run_queue(struct request_queue *q)
1732 1732
1733 spin_lock_irqsave(q->queue_lock, flags); 1733 spin_lock_irqsave(q->queue_lock, flags);
1734 blk_remove_plug(q); 1734 blk_remove_plug(q);
1735 if (!elv_queue_empty(q)) 1735
1736 q->request_fn(q); 1736 /*
1737 * Only recurse once to avoid overrunning the stack, let the unplug
1738 * handling reinvoke the handler shortly if we already got there.
1739 */
1740 if (!elv_queue_empty(q)) {
1741 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1742 q->request_fn(q);
1743 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1744 } else {
1745 blk_plug_device(q);
1746 kblockd_schedule_work(&q->unplug_work);
1747 }
1748 }
1749
1737 spin_unlock_irqrestore(q->queue_lock, flags); 1750 spin_unlock_irqrestore(q->queue_lock, flags);
1738} 1751}
1739EXPORT_SYMBOL(blk_run_queue); 1752EXPORT_SYMBOL(blk_run_queue);
@@ -3385,7 +3398,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
3385} 3398}
3386 3399
3387 3400
3388static struct notifier_block __devinitdata blk_cpu_notifier = { 3401static struct notifier_block blk_cpu_notifier = {
3389 .notifier_call = blk_cpu_notify, 3402 .notifier_call = blk_cpu_notify,
3390}; 3403};
3391 3404
@@ -3439,7 +3452,12 @@ void end_that_request_last(struct request *req, int uptodate)
3439 if (unlikely(laptop_mode) && blk_fs_request(req)) 3452 if (unlikely(laptop_mode) && blk_fs_request(req))
3440 laptop_io_completion(); 3453 laptop_io_completion();
3441 3454
3442 if (disk && blk_fs_request(req)) { 3455 /*
3456 * Account IO completion. bar_rq isn't accounted as a normal
3457 * IO on queueing nor completion. Accounting the containing
3458 * request is enough.
3459 */
3460 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
3443 unsigned long duration = jiffies - req->start_time; 3461 unsigned long duration = jiffies - req->start_time;
3444 const int rw = rq_data_dir(req); 3462 const int rw = rq_data_dir(req);
3445 3463
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index f370e4a7fe6d..56a7c620574f 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -65,16 +65,15 @@ noop_latter_request(request_queue_t *q, struct request *rq)
65 return list_entry(rq->queuelist.next, struct request, queuelist); 65 return list_entry(rq->queuelist.next, struct request, queuelist);
66} 66}
67 67
68static int noop_init_queue(request_queue_t *q, elevator_t *e) 68static void *noop_init_queue(request_queue_t *q, elevator_t *e)
69{ 69{
70 struct noop_data *nd; 70 struct noop_data *nd;
71 71
72 nd = kmalloc(sizeof(*nd), GFP_KERNEL); 72 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
73 if (!nd) 73 if (!nd)
74 return -ENOMEM; 74 return NULL;
75 INIT_LIST_HEAD(&nd->queue); 75 INIT_LIST_HEAD(&nd->queue);
76 e->elevator_data = nd; 76 return nd;
77 return 0;
78} 77}
79 78
80static void noop_exit_queue(elevator_t *e) 79static void noop_exit_queue(elevator_t *e)