aboutsummaryrefslogtreecommitdiffstats
path: root/block/cfq-iosched.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r--block/cfq-iosched.c98
1 files changed, 61 insertions, 37 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 01c416ba8437..6200d9b9af28 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -33,7 +33,7 @@ static int cfq_slice_idle = HZ / 70;
33 33
34#define CFQ_KEY_ASYNC (0) 34#define CFQ_KEY_ASYNC (0)
35 35
36static DEFINE_RWLOCK(cfq_exit_lock); 36static DEFINE_SPINLOCK(cfq_exit_lock);
37 37
38/* 38/*
39 * for the hash of cfqq inside the cfqd 39 * for the hash of cfqq inside the cfqd
@@ -128,6 +128,7 @@ struct cfq_data {
128 mempool_t *crq_pool; 128 mempool_t *crq_pool;
129 129
130 int rq_in_driver; 130 int rq_in_driver;
131 int hw_tag;
131 132
132 /* 133 /*
133 * schedule slice state info 134 * schedule slice state info
@@ -495,10 +496,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
495 496
496 /* 497 /*
497 * if queue was preempted, just add to front to be fair. busy_rr 498 * if queue was preempted, just add to front to be fair. busy_rr
498 * isn't sorted. 499 * isn't sorted, but insert at the back for fairness.
499 */ 500 */
500 if (preempted || list == &cfqd->busy_rr) { 501 if (preempted || list == &cfqd->busy_rr) {
501 list_add(&cfqq->cfq_list, list); 502 if (preempted)
503 list = list->prev;
504
505 list_add_tail(&cfqq->cfq_list, list);
502 return; 506 return;
503 } 507 }
504 508
@@ -658,6 +662,15 @@ static void cfq_activate_request(request_queue_t *q, struct request *rq)
658 struct cfq_data *cfqd = q->elevator->elevator_data; 662 struct cfq_data *cfqd = q->elevator->elevator_data;
659 663
660 cfqd->rq_in_driver++; 664 cfqd->rq_in_driver++;
665
666 /*
667 * If the depth is larger 1, it really could be queueing. But lets
668 * make the mark a little higher - idling could still be good for
669 * low queueing, and a low queueing number could also just indicate
670 * a SCSI mid layer like behaviour where limit+1 is often seen.
671 */
672 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
673 cfqd->hw_tag = 1;
661} 674}
662 675
663static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 676static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
@@ -873,6 +886,13 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
873 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 886 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
874 887
875 /* 888 /*
889 * If no new queues are available, check if the busy list has some
890 * before falling back to idle io.
891 */
892 if (!cfqq && !list_empty(&cfqd->busy_rr))
893 cfqq = list_entry_cfqq(cfqd->busy_rr.next);
894
895 /*
876 * if we have idle queues and no rt or be queues had pending 896 * if we have idle queues and no rt or be queues had pending
877 * requests, either allow immediate service if the grace period 897 * requests, either allow immediate service if the grace period
878 * has passed or arm the idle grace timer 898 * has passed or arm the idle grace timer
@@ -1278,7 +1298,7 @@ static void cfq_exit_io_context(struct io_context *ioc)
1278 /* 1298 /*
1279 * put the reference this task is holding to the various queues 1299 * put the reference this task is holding to the various queues
1280 */ 1300 */
1281 read_lock_irqsave(&cfq_exit_lock, flags); 1301 spin_lock_irqsave(&cfq_exit_lock, flags);
1282 1302
1283 n = rb_first(&ioc->cic_root); 1303 n = rb_first(&ioc->cic_root);
1284 while (n != NULL) { 1304 while (n != NULL) {
@@ -1288,7 +1308,7 @@ static void cfq_exit_io_context(struct io_context *ioc)
1288 n = rb_next(n); 1308 n = rb_next(n);
1289 } 1309 }
1290 1310
1291 read_unlock_irqrestore(&cfq_exit_lock, flags); 1311 spin_unlock_irqrestore(&cfq_exit_lock, flags);
1292} 1312}
1293 1313
1294static struct cfq_io_context * 1314static struct cfq_io_context *
@@ -1297,17 +1317,12 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1297 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1317 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
1298 1318
1299 if (cic) { 1319 if (cic) {
1300 RB_CLEAR(&cic->rb_node); 1320 memset(cic, 0, sizeof(*cic));
1301 cic->key = NULL; 1321 RB_CLEAR_COLOR(&cic->rb_node);
1302 cic->cfqq[ASYNC] = NULL;
1303 cic->cfqq[SYNC] = NULL;
1304 cic->last_end_request = jiffies; 1322 cic->last_end_request = jiffies;
1305 cic->ttime_total = 0; 1323 INIT_LIST_HEAD(&cic->queue_list);
1306 cic->ttime_samples = 0;
1307 cic->ttime_mean = 0;
1308 cic->dtor = cfq_free_io_context; 1324 cic->dtor = cfq_free_io_context;
1309 cic->exit = cfq_exit_io_context; 1325 cic->exit = cfq_exit_io_context;
1310 INIT_LIST_HEAD(&cic->queue_list);
1311 atomic_inc(&ioc_count); 1326 atomic_inc(&ioc_count);
1312 } 1327 }
1313 1328
@@ -1394,17 +1409,17 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1394 struct cfq_io_context *cic; 1409 struct cfq_io_context *cic;
1395 struct rb_node *n; 1410 struct rb_node *n;
1396 1411
1397 write_lock(&cfq_exit_lock); 1412 spin_lock(&cfq_exit_lock);
1398 1413
1399 n = rb_first(&ioc->cic_root); 1414 n = rb_first(&ioc->cic_root);
1400 while (n != NULL) { 1415 while (n != NULL) {
1401 cic = rb_entry(n, struct cfq_io_context, rb_node); 1416 cic = rb_entry(n, struct cfq_io_context, rb_node);
1402 1417
1403 changed_ioprio(cic); 1418 changed_ioprio(cic);
1404 n = rb_next(n); 1419 n = rb_next(n);
1405 } 1420 }
1406 1421
1407 write_unlock(&cfq_exit_lock); 1422 spin_unlock(&cfq_exit_lock);
1408 1423
1409 return 0; 1424 return 0;
1410} 1425}
@@ -1452,7 +1467,8 @@ retry:
1452 * set ->slice_left to allow preemption for a new process 1467 * set ->slice_left to allow preemption for a new process
1453 */ 1468 */
1454 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1469 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1455 cfq_mark_cfqq_idle_window(cfqq); 1470 if (!cfqd->hw_tag)
1471 cfq_mark_cfqq_idle_window(cfqq);
1456 cfq_mark_cfqq_prio_changed(cfqq); 1472 cfq_mark_cfqq_prio_changed(cfqq);
1457 cfq_init_prio_data(cfqq); 1473 cfq_init_prio_data(cfqq);
1458 } 1474 }
@@ -1469,9 +1485,10 @@ out:
1469static void 1485static void
1470cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) 1486cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
1471{ 1487{
1472 read_lock(&cfq_exit_lock); 1488 spin_lock(&cfq_exit_lock);
1473 rb_erase(&cic->rb_node, &ioc->cic_root); 1489 rb_erase(&cic->rb_node, &ioc->cic_root);
1474 read_unlock(&cfq_exit_lock); 1490 list_del_init(&cic->queue_list);
1491 spin_unlock(&cfq_exit_lock);
1475 kmem_cache_free(cfq_ioc_pool, cic); 1492 kmem_cache_free(cfq_ioc_pool, cic);
1476 atomic_dec(&ioc_count); 1493 atomic_dec(&ioc_count);
1477} 1494}
@@ -1539,11 +1556,11 @@ restart:
1539 BUG(); 1556 BUG();
1540 } 1557 }
1541 1558
1542 read_lock(&cfq_exit_lock); 1559 spin_lock(&cfq_exit_lock);
1543 rb_link_node(&cic->rb_node, parent, p); 1560 rb_link_node(&cic->rb_node, parent, p);
1544 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1561 rb_insert_color(&cic->rb_node, &ioc->cic_root);
1545 list_add(&cic->queue_list, &cfqd->cic_list); 1562 list_add(&cic->queue_list, &cfqd->cic_list);
1546 read_unlock(&cfq_exit_lock); 1563 spin_unlock(&cfq_exit_lock);
1547} 1564}
1548 1565
1549/* 1566/*
@@ -1642,7 +1659,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1642{ 1659{
1643 int enable_idle = cfq_cfqq_idle_window(cfqq); 1660 int enable_idle = cfq_cfqq_idle_window(cfqq);
1644 1661
1645 if (!cic->ioc->task || !cfqd->cfq_slice_idle) 1662 if (!cic->ioc->task || !cfqd->cfq_slice_idle || cfqd->hw_tag)
1646 enable_idle = 0; 1663 enable_idle = 0;
1647 else if (sample_valid(cic->ttime_samples)) { 1664 else if (sample_valid(cic->ttime_samples)) {
1648 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1665 if (cic->ttime_mean > cfqd->cfq_slice_idle)
@@ -1733,14 +1750,24 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1733 1750
1734 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 1751 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
1735 1752
1753 cic = crq->io_context;
1754
1736 /* 1755 /*
1737 * we never wait for an async request and we don't allow preemption 1756 * we never wait for an async request and we don't allow preemption
1738 * of an async request. so just return early 1757 * of an async request. so just return early
1739 */ 1758 */
1740 if (!cfq_crq_is_sync(crq)) 1759 if (!cfq_crq_is_sync(crq)) {
1760 /*
1761 * sync process issued an async request, if it's waiting
1762 * then expire it and kick rq handling.
1763 */
1764 if (cic == cfqd->active_cic &&
1765 del_timer(&cfqd->idle_slice_timer)) {
1766 cfq_slice_expired(cfqd, 0);
1767 cfq_start_queueing(cfqd, cfqq);
1768 }
1741 return; 1769 return;
1742 1770 }
1743 cic = crq->io_context;
1744 1771
1745 cfq_update_io_thinktime(cfqd, cic); 1772 cfq_update_io_thinktime(cfqd, cic);
1746 cfq_update_io_seektime(cfqd, cic, crq); 1773 cfq_update_io_seektime(cfqd, cic, crq);
@@ -2158,10 +2185,9 @@ static void cfq_idle_class_timer(unsigned long data)
2158 * race with a non-idle queue, reset timer 2185 * race with a non-idle queue, reset timer
2159 */ 2186 */
2160 end = cfqd->last_end_request + CFQ_IDLE_GRACE; 2187 end = cfqd->last_end_request + CFQ_IDLE_GRACE;
2161 if (!time_after_eq(jiffies, end)) { 2188 if (!time_after_eq(jiffies, end))
2162 cfqd->idle_class_timer.expires = end; 2189 mod_timer(&cfqd->idle_class_timer, end);
2163 add_timer(&cfqd->idle_class_timer); 2190 else
2164 } else
2165 cfq_schedule_dispatch(cfqd); 2191 cfq_schedule_dispatch(cfqd);
2166 2192
2167 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2193 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
@@ -2181,7 +2207,7 @@ static void cfq_exit_queue(elevator_t *e)
2181 2207
2182 cfq_shutdown_timer_wq(cfqd); 2208 cfq_shutdown_timer_wq(cfqd);
2183 2209
2184 write_lock(&cfq_exit_lock); 2210 spin_lock(&cfq_exit_lock);
2185 spin_lock_irq(q->queue_lock); 2211 spin_lock_irq(q->queue_lock);
2186 2212
2187 if (cfqd->active_queue) 2213 if (cfqd->active_queue)
@@ -2204,7 +2230,7 @@ static void cfq_exit_queue(elevator_t *e)
2204 } 2230 }
2205 2231
2206 spin_unlock_irq(q->queue_lock); 2232 spin_unlock_irq(q->queue_lock);
2207 write_unlock(&cfq_exit_lock); 2233 spin_unlock(&cfq_exit_lock);
2208 2234
2209 cfq_shutdown_timer_wq(cfqd); 2235 cfq_shutdown_timer_wq(cfqd);
2210 2236
@@ -2214,14 +2240,14 @@ static void cfq_exit_queue(elevator_t *e)
2214 kfree(cfqd); 2240 kfree(cfqd);
2215} 2241}
2216 2242
2217static int cfq_init_queue(request_queue_t *q, elevator_t *e) 2243static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2218{ 2244{
2219 struct cfq_data *cfqd; 2245 struct cfq_data *cfqd;
2220 int i; 2246 int i;
2221 2247
2222 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 2248 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
2223 if (!cfqd) 2249 if (!cfqd)
2224 return -ENOMEM; 2250 return NULL;
2225 2251
2226 memset(cfqd, 0, sizeof(*cfqd)); 2252 memset(cfqd, 0, sizeof(*cfqd));
2227 2253
@@ -2251,8 +2277,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2251 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 2277 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2252 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 2278 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2253 2279
2254 e->elevator_data = cfqd;
2255
2256 cfqd->queue = q; 2280 cfqd->queue = q;
2257 2281
2258 cfqd->max_queued = q->nr_requests / 4; 2282 cfqd->max_queued = q->nr_requests / 4;
@@ -2279,14 +2303,14 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2279 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2303 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2280 cfqd->cfq_slice_idle = cfq_slice_idle; 2304 cfqd->cfq_slice_idle = cfq_slice_idle;
2281 2305
2282 return 0; 2306 return cfqd;
2283out_crqpool: 2307out_crqpool:
2284 kfree(cfqd->cfq_hash); 2308 kfree(cfqd->cfq_hash);
2285out_cfqhash: 2309out_cfqhash:
2286 kfree(cfqd->crq_hash); 2310 kfree(cfqd->crq_hash);
2287out_crqhash: 2311out_crqhash:
2288 kfree(cfqd); 2312 kfree(cfqd);
2289 return -ENOMEM; 2313 return NULL;
2290} 2314}
2291 2315
2292static void cfq_slab_kill(void) 2316static void cfq_slab_kill(void)