aboutsummaryrefslogtreecommitdiffstats
path: root/block/cfq-iosched.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r--block/cfq-iosched.c134
1 files changed, 95 insertions, 39 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 67d446de0227..a46d030e092a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -33,7 +33,7 @@ static int cfq_slice_idle = HZ / 70;
33 33
34#define CFQ_KEY_ASYNC (0) 34#define CFQ_KEY_ASYNC (0)
35 35
36static DEFINE_RWLOCK(cfq_exit_lock); 36static DEFINE_SPINLOCK(cfq_exit_lock);
37 37
38/* 38/*
39 * for the hash of cfqq inside the cfqd 39 * for the hash of cfqq inside the cfqd
@@ -133,6 +133,7 @@ struct cfq_data {
133 mempool_t *crq_pool; 133 mempool_t *crq_pool;
134 134
135 int rq_in_driver; 135 int rq_in_driver;
136 int hw_tag;
136 137
137 /* 138 /*
138 * schedule slice state info 139 * schedule slice state info
@@ -500,10 +501,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
500 501
501 /* 502 /*
502 * if queue was preempted, just add to front to be fair. busy_rr 503 * if queue was preempted, just add to front to be fair. busy_rr
503 * isn't sorted. 504 * isn't sorted, but insert at the back for fairness.
504 */ 505 */
505 if (preempted || list == &cfqd->busy_rr) { 506 if (preempted || list == &cfqd->busy_rr) {
506 list_add(&cfqq->cfq_list, list); 507 if (preempted)
508 list = list->prev;
509
510 list_add_tail(&cfqq->cfq_list, list);
507 return; 511 return;
508 } 512 }
509 513
@@ -664,6 +668,15 @@ static void cfq_activate_request(request_queue_t *q, struct request *rq)
664 struct cfq_data *cfqd = q->elevator->elevator_data; 668 struct cfq_data *cfqd = q->elevator->elevator_data;
665 669
666 cfqd->rq_in_driver++; 670 cfqd->rq_in_driver++;
671
672 /*
673 * If the depth is larger 1, it really could be queueing. But lets
674 * make the mark a little higher - idling could still be good for
675 * low queueing, and a low queueing number could also just indicate
676 * a SCSI mid layer like behaviour where limit+1 is often seen.
677 */
678 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
679 cfqd->hw_tag = 1;
667} 680}
668 681
669static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 682static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
@@ -879,6 +892,13 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
879 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 892 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
880 893
881 /* 894 /*
895 * If no new queues are available, check if the busy list has some
896 * before falling back to idle io.
897 */
898 if (!cfqq && !list_empty(&cfqd->busy_rr))
899 cfqq = list_entry_cfqq(cfqd->busy_rr.next);
900
901 /*
882 * if we have idle queues and no rt or be queues had pending 902 * if we have idle queues and no rt or be queues had pending
883 * requests, either allow immediate service if the grace period 903 * requests, either allow immediate service if the grace period
884 * has passed or arm the idle grace timer 904 * has passed or arm the idle grace timer
@@ -1284,7 +1304,7 @@ static void cfq_exit_io_context(struct io_context *ioc)
1284 /* 1304 /*
1285 * put the reference this task is holding to the various queues 1305 * put the reference this task is holding to the various queues
1286 */ 1306 */
1287 read_lock_irqsave(&cfq_exit_lock, flags); 1307 spin_lock_irqsave(&cfq_exit_lock, flags);
1288 1308
1289 n = rb_first(&ioc->cic_root); 1309 n = rb_first(&ioc->cic_root);
1290 while (n != NULL) { 1310 while (n != NULL) {
@@ -1294,7 +1314,7 @@ static void cfq_exit_io_context(struct io_context *ioc)
1294 n = rb_next(n); 1314 n = rb_next(n);
1295 } 1315 }
1296 1316
1297 read_unlock_irqrestore(&cfq_exit_lock, flags); 1317 spin_unlock_irqrestore(&cfq_exit_lock, flags);
1298} 1318}
1299 1319
1300static struct cfq_io_context * 1320static struct cfq_io_context *
@@ -1400,17 +1420,17 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1400 struct cfq_io_context *cic; 1420 struct cfq_io_context *cic;
1401 struct rb_node *n; 1421 struct rb_node *n;
1402 1422
1403 write_lock(&cfq_exit_lock); 1423 spin_lock(&cfq_exit_lock);
1404 1424
1405 n = rb_first(&ioc->cic_root); 1425 n = rb_first(&ioc->cic_root);
1406 while (n != NULL) { 1426 while (n != NULL) {
1407 cic = rb_entry(n, struct cfq_io_context, rb_node); 1427 cic = rb_entry(n, struct cfq_io_context, rb_node);
1408 1428
1409 changed_ioprio(cic); 1429 changed_ioprio(cic);
1410 n = rb_next(n); 1430 n = rb_next(n);
1411 } 1431 }
1412 1432
1413 write_unlock(&cfq_exit_lock); 1433 spin_unlock(&cfq_exit_lock);
1414 1434
1415 return 0; 1435 return 0;
1416} 1436}
@@ -1458,7 +1478,8 @@ retry:
1458 * set ->slice_left to allow preemption for a new process 1478 * set ->slice_left to allow preemption for a new process
1459 */ 1479 */
1460 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1480 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1461 cfq_mark_cfqq_idle_window(cfqq); 1481 if (!cfqd->hw_tag)
1482 cfq_mark_cfqq_idle_window(cfqq);
1462 cfq_mark_cfqq_prio_changed(cfqq); 1483 cfq_mark_cfqq_prio_changed(cfqq);
1463 cfq_init_prio_data(cfqq); 1484 cfq_init_prio_data(cfqq);
1464 } 1485 }
@@ -1472,19 +1493,38 @@ out:
1472 return cfqq; 1493 return cfqq;
1473} 1494}
1474 1495
1496static void
1497cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
1498{
1499 spin_lock(&cfq_exit_lock);
1500 rb_erase(&cic->rb_node, &ioc->cic_root);
1501 list_del_init(&cic->queue_list);
1502 spin_unlock(&cfq_exit_lock);
1503 kmem_cache_free(cfq_ioc_pool, cic);
1504 atomic_dec(&ioc_count);
1505}
1506
1475static struct cfq_io_context * 1507static struct cfq_io_context *
1476cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) 1508cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1477{ 1509{
1478 struct rb_node *n = ioc->cic_root.rb_node; 1510 struct rb_node *n;
1479 struct cfq_io_context *cic; 1511 struct cfq_io_context *cic;
1480 void *key = cfqd; 1512 void *k, *key = cfqd;
1481 1513
1514restart:
1515 n = ioc->cic_root.rb_node;
1482 while (n) { 1516 while (n) {
1483 cic = rb_entry(n, struct cfq_io_context, rb_node); 1517 cic = rb_entry(n, struct cfq_io_context, rb_node);
1518 /* ->key must be copied to avoid race with cfq_exit_queue() */
1519 k = cic->key;
1520 if (unlikely(!k)) {
1521 cfq_drop_dead_cic(ioc, cic);
1522 goto restart;
1523 }
1484 1524
1485 if (key < cic->key) 1525 if (key < k)
1486 n = n->rb_left; 1526 n = n->rb_left;
1487 else if (key > cic->key) 1527 else if (key > k)
1488 n = n->rb_right; 1528 n = n->rb_right;
1489 else 1529 else
1490 return cic; 1530 return cic;
@@ -1497,33 +1537,41 @@ static inline void
1497cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 1537cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
1498 struct cfq_io_context *cic) 1538 struct cfq_io_context *cic)
1499{ 1539{
1500 struct rb_node **p = &ioc->cic_root.rb_node; 1540 struct rb_node **p;
1501 struct rb_node *parent = NULL; 1541 struct rb_node *parent;
1502 struct cfq_io_context *__cic; 1542 struct cfq_io_context *__cic;
1503 1543 void *k;
1504 read_lock(&cfq_exit_lock);
1505 1544
1506 cic->ioc = ioc; 1545 cic->ioc = ioc;
1507 cic->key = cfqd; 1546 cic->key = cfqd;
1508 1547
1509 ioc->set_ioprio = cfq_ioc_set_ioprio; 1548 ioc->set_ioprio = cfq_ioc_set_ioprio;
1510 1549restart:
1550 parent = NULL;
1551 p = &ioc->cic_root.rb_node;
1511 while (*p) { 1552 while (*p) {
1512 parent = *p; 1553 parent = *p;
1513 __cic = rb_entry(parent, struct cfq_io_context, rb_node); 1554 __cic = rb_entry(parent, struct cfq_io_context, rb_node);
1555 /* ->key must be copied to avoid race with cfq_exit_queue() */
1556 k = __cic->key;
1557 if (unlikely(!k)) {
1558 cfq_drop_dead_cic(ioc, cic);
1559 goto restart;
1560 }
1514 1561
1515 if (cic->key < __cic->key) 1562 if (cic->key < k)
1516 p = &(*p)->rb_left; 1563 p = &(*p)->rb_left;
1517 else if (cic->key > __cic->key) 1564 else if (cic->key > k)
1518 p = &(*p)->rb_right; 1565 p = &(*p)->rb_right;
1519 else 1566 else
1520 BUG(); 1567 BUG();
1521 } 1568 }
1522 1569
1570 spin_lock(&cfq_exit_lock);
1523 rb_link_node(&cic->rb_node, parent, p); 1571 rb_link_node(&cic->rb_node, parent, p);
1524 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1572 rb_insert_color(&cic->rb_node, &ioc->cic_root);
1525 list_add(&cic->queue_list, &cfqd->cic_list); 1573 list_add(&cic->queue_list, &cfqd->cic_list);
1526 read_unlock(&cfq_exit_lock); 1574 spin_unlock(&cfq_exit_lock);
1527} 1575}
1528 1576
1529/* 1577/*
@@ -1622,7 +1670,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1622{ 1670{
1623 int enable_idle = cfq_cfqq_idle_window(cfqq); 1671 int enable_idle = cfq_cfqq_idle_window(cfqq);
1624 1672
1625 if (!cic->ioc->task || !cfqd->cfq_slice_idle) 1673 if (!cic->ioc->task || !cfqd->cfq_slice_idle || cfqd->hw_tag)
1626 enable_idle = 0; 1674 enable_idle = 0;
1627 else if (sample_valid(cic->ttime_samples)) { 1675 else if (sample_valid(cic->ttime_samples)) {
1628 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1676 if (cic->ttime_mean > cfqd->cfq_slice_idle)
@@ -1713,14 +1761,24 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1713 1761
1714 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 1762 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
1715 1763
1764 cic = crq->io_context;
1765
1716 /* 1766 /*
1717 * we never wait for an async request and we don't allow preemption 1767 * we never wait for an async request and we don't allow preemption
1718 * of an async request. so just return early 1768 * of an async request. so just return early
1719 */ 1769 */
1720 if (!cfq_crq_is_sync(crq)) 1770 if (!cfq_crq_is_sync(crq)) {
1771 /*
1772 * sync process issued an async request, if it's waiting
1773 * then expire it and kick rq handling.
1774 */
1775 if (cic == cfqd->active_cic &&
1776 del_timer(&cfqd->idle_slice_timer)) {
1777 cfq_slice_expired(cfqd, 0);
1778 cfq_start_queueing(cfqd, cfqq);
1779 }
1721 return; 1780 return;
1722 1781 }
1723 cic = crq->io_context;
1724 1782
1725 cfq_update_io_thinktime(cfqd, cic); 1783 cfq_update_io_thinktime(cfqd, cic);
1726 cfq_update_io_seektime(cfqd, cic, crq); 1784 cfq_update_io_seektime(cfqd, cic, crq);
@@ -2138,10 +2196,9 @@ static void cfq_idle_class_timer(unsigned long data)
2138 * race with a non-idle queue, reset timer 2196 * race with a non-idle queue, reset timer
2139 */ 2197 */
2140 end = cfqd->last_end_request + CFQ_IDLE_GRACE; 2198 end = cfqd->last_end_request + CFQ_IDLE_GRACE;
2141 if (!time_after_eq(jiffies, end)) { 2199 if (!time_after_eq(jiffies, end))
2142 cfqd->idle_class_timer.expires = end; 2200 mod_timer(&cfqd->idle_class_timer, end);
2143 add_timer(&cfqd->idle_class_timer); 2201 else
2144 } else
2145 cfq_schedule_dispatch(cfqd); 2202 cfq_schedule_dispatch(cfqd);
2146 2203
2147 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2204 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
@@ -2161,7 +2218,7 @@ static void cfq_exit_queue(elevator_t *e)
2161 2218
2162 cfq_shutdown_timer_wq(cfqd); 2219 cfq_shutdown_timer_wq(cfqd);
2163 2220
2164 write_lock(&cfq_exit_lock); 2221 spin_lock(&cfq_exit_lock);
2165 spin_lock_irq(q->queue_lock); 2222 spin_lock_irq(q->queue_lock);
2166 2223
2167 if (cfqd->active_queue) 2224 if (cfqd->active_queue)
@@ -2184,7 +2241,7 @@ static void cfq_exit_queue(elevator_t *e)
2184 } 2241 }
2185 2242
2186 spin_unlock_irq(q->queue_lock); 2243 spin_unlock_irq(q->queue_lock);
2187 write_unlock(&cfq_exit_lock); 2244 spin_unlock(&cfq_exit_lock);
2188 2245
2189 cfq_shutdown_timer_wq(cfqd); 2246 cfq_shutdown_timer_wq(cfqd);
2190 2247
@@ -2194,14 +2251,14 @@ static void cfq_exit_queue(elevator_t *e)
2194 kfree(cfqd); 2251 kfree(cfqd);
2195} 2252}
2196 2253
2197static int cfq_init_queue(request_queue_t *q, elevator_t *e) 2254static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2198{ 2255{
2199 struct cfq_data *cfqd; 2256 struct cfq_data *cfqd;
2200 int i; 2257 int i;
2201 2258
2202 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 2259 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
2203 if (!cfqd) 2260 if (!cfqd)
2204 return -ENOMEM; 2261 return NULL;
2205 2262
2206 memset(cfqd, 0, sizeof(*cfqd)); 2263 memset(cfqd, 0, sizeof(*cfqd));
2207 2264
@@ -2231,8 +2288,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2231 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 2288 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2232 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 2289 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2233 2290
2234 e->elevator_data = cfqd;
2235
2236 cfqd->queue = q; 2291 cfqd->queue = q;
2237 2292
2238 cfqd->max_queued = q->nr_requests / 4; 2293 cfqd->max_queued = q->nr_requests / 4;
@@ -2259,14 +2314,14 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2259 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2314 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2260 cfqd->cfq_slice_idle = cfq_slice_idle; 2315 cfqd->cfq_slice_idle = cfq_slice_idle;
2261 2316
2262 return 0; 2317 return cfqd;
2263out_crqpool: 2318out_crqpool:
2264 kfree(cfqd->cfq_hash); 2319 kfree(cfqd->cfq_hash);
2265out_cfqhash: 2320out_cfqhash:
2266 kfree(cfqd->crq_hash); 2321 kfree(cfqd->crq_hash);
2267out_crqhash: 2322out_crqhash:
2268 kfree(cfqd); 2323 kfree(cfqd);
2269 return -ENOMEM; 2324 return NULL;
2270} 2325}
2271 2326
2272static void cfq_slab_kill(void) 2327static void cfq_slab_kill(void)
@@ -2439,9 +2494,10 @@ static void __exit cfq_exit(void)
2439 DECLARE_COMPLETION(all_gone); 2494 DECLARE_COMPLETION(all_gone);
2440 elv_unregister(&iosched_cfq); 2495 elv_unregister(&iosched_cfq);
2441 ioc_gone = &all_gone; 2496 ioc_gone = &all_gone;
2442 barrier(); 2497 /* ioc_gone's update must be visible before reading ioc_count */
2498 smp_wmb();
2443 if (atomic_read(&ioc_count)) 2499 if (atomic_read(&ioc_count))
2444 complete(ioc_gone); 2500 wait_for_completion(ioc_gone);
2445 synchronize_rcu(); 2501 synchronize_rcu();
2446 cfq_slab_kill(); 2502 cfq_slab_kill();
2447} 2503}