diff options
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r-- | block/cfq-iosched.c | 98 |
1 files changed, 61 insertions, 37 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 01c416ba8437..6200d9b9af28 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -33,7 +33,7 @@ static int cfq_slice_idle = HZ / 70; | |||
33 | 33 | ||
34 | #define CFQ_KEY_ASYNC (0) | 34 | #define CFQ_KEY_ASYNC (0) |
35 | 35 | ||
36 | static DEFINE_RWLOCK(cfq_exit_lock); | 36 | static DEFINE_SPINLOCK(cfq_exit_lock); |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * for the hash of cfqq inside the cfqd | 39 | * for the hash of cfqq inside the cfqd |
@@ -128,6 +128,7 @@ struct cfq_data { | |||
128 | mempool_t *crq_pool; | 128 | mempool_t *crq_pool; |
129 | 129 | ||
130 | int rq_in_driver; | 130 | int rq_in_driver; |
131 | int hw_tag; | ||
131 | 132 | ||
132 | /* | 133 | /* |
133 | * schedule slice state info | 134 | * schedule slice state info |
@@ -495,10 +496,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) | |||
495 | 496 | ||
496 | /* | 497 | /* |
497 | * if queue was preempted, just add to front to be fair. busy_rr | 498 | * if queue was preempted, just add to front to be fair. busy_rr |
498 | * isn't sorted. | 499 | * isn't sorted, but insert at the back for fairness. |
499 | */ | 500 | */ |
500 | if (preempted || list == &cfqd->busy_rr) { | 501 | if (preempted || list == &cfqd->busy_rr) { |
501 | list_add(&cfqq->cfq_list, list); | 502 | if (preempted) |
503 | list = list->prev; | ||
504 | |||
505 | list_add_tail(&cfqq->cfq_list, list); | ||
502 | return; | 506 | return; |
503 | } | 507 | } |
504 | 508 | ||
@@ -658,6 +662,15 @@ static void cfq_activate_request(request_queue_t *q, struct request *rq) | |||
658 | struct cfq_data *cfqd = q->elevator->elevator_data; | 662 | struct cfq_data *cfqd = q->elevator->elevator_data; |
659 | 663 | ||
660 | cfqd->rq_in_driver++; | 664 | cfqd->rq_in_driver++; |
665 | |||
666 | /* | ||
667 | * If the depth is larger 1, it really could be queueing. But lets | ||
668 | * make the mark a little higher - idling could still be good for | ||
669 | * low queueing, and a low queueing number could also just indicate | ||
670 | * a SCSI mid layer like behaviour where limit+1 is often seen. | ||
671 | */ | ||
672 | if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) | ||
673 | cfqd->hw_tag = 1; | ||
661 | } | 674 | } |
662 | 675 | ||
663 | static void cfq_deactivate_request(request_queue_t *q, struct request *rq) | 676 | static void cfq_deactivate_request(request_queue_t *q, struct request *rq) |
@@ -873,6 +886,13 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) | |||
873 | cfqq = list_entry_cfqq(cfqd->cur_rr.next); | 886 | cfqq = list_entry_cfqq(cfqd->cur_rr.next); |
874 | 887 | ||
875 | /* | 888 | /* |
889 | * If no new queues are available, check if the busy list has some | ||
890 | * before falling back to idle io. | ||
891 | */ | ||
892 | if (!cfqq && !list_empty(&cfqd->busy_rr)) | ||
893 | cfqq = list_entry_cfqq(cfqd->busy_rr.next); | ||
894 | |||
895 | /* | ||
876 | * if we have idle queues and no rt or be queues had pending | 896 | * if we have idle queues and no rt or be queues had pending |
877 | * requests, either allow immediate service if the grace period | 897 | * requests, either allow immediate service if the grace period |
878 | * has passed or arm the idle grace timer | 898 | * has passed or arm the idle grace timer |
@@ -1278,7 +1298,7 @@ static void cfq_exit_io_context(struct io_context *ioc) | |||
1278 | /* | 1298 | /* |
1279 | * put the reference this task is holding to the various queues | 1299 | * put the reference this task is holding to the various queues |
1280 | */ | 1300 | */ |
1281 | read_lock_irqsave(&cfq_exit_lock, flags); | 1301 | spin_lock_irqsave(&cfq_exit_lock, flags); |
1282 | 1302 | ||
1283 | n = rb_first(&ioc->cic_root); | 1303 | n = rb_first(&ioc->cic_root); |
1284 | while (n != NULL) { | 1304 | while (n != NULL) { |
@@ -1288,7 +1308,7 @@ static void cfq_exit_io_context(struct io_context *ioc) | |||
1288 | n = rb_next(n); | 1308 | n = rb_next(n); |
1289 | } | 1309 | } |
1290 | 1310 | ||
1291 | read_unlock_irqrestore(&cfq_exit_lock, flags); | 1311 | spin_unlock_irqrestore(&cfq_exit_lock, flags); |
1292 | } | 1312 | } |
1293 | 1313 | ||
1294 | static struct cfq_io_context * | 1314 | static struct cfq_io_context * |
@@ -1297,17 +1317,12 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1297 | struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); | 1317 | struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); |
1298 | 1318 | ||
1299 | if (cic) { | 1319 | if (cic) { |
1300 | RB_CLEAR(&cic->rb_node); | 1320 | memset(cic, 0, sizeof(*cic)); |
1301 | cic->key = NULL; | 1321 | RB_CLEAR_COLOR(&cic->rb_node); |
1302 | cic->cfqq[ASYNC] = NULL; | ||
1303 | cic->cfqq[SYNC] = NULL; | ||
1304 | cic->last_end_request = jiffies; | 1322 | cic->last_end_request = jiffies; |
1305 | cic->ttime_total = 0; | 1323 | INIT_LIST_HEAD(&cic->queue_list); |
1306 | cic->ttime_samples = 0; | ||
1307 | cic->ttime_mean = 0; | ||
1308 | cic->dtor = cfq_free_io_context; | 1324 | cic->dtor = cfq_free_io_context; |
1309 | cic->exit = cfq_exit_io_context; | 1325 | cic->exit = cfq_exit_io_context; |
1310 | INIT_LIST_HEAD(&cic->queue_list); | ||
1311 | atomic_inc(&ioc_count); | 1326 | atomic_inc(&ioc_count); |
1312 | } | 1327 | } |
1313 | 1328 | ||
@@ -1394,17 +1409,17 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) | |||
1394 | struct cfq_io_context *cic; | 1409 | struct cfq_io_context *cic; |
1395 | struct rb_node *n; | 1410 | struct rb_node *n; |
1396 | 1411 | ||
1397 | write_lock(&cfq_exit_lock); | 1412 | spin_lock(&cfq_exit_lock); |
1398 | 1413 | ||
1399 | n = rb_first(&ioc->cic_root); | 1414 | n = rb_first(&ioc->cic_root); |
1400 | while (n != NULL) { | 1415 | while (n != NULL) { |
1401 | cic = rb_entry(n, struct cfq_io_context, rb_node); | 1416 | cic = rb_entry(n, struct cfq_io_context, rb_node); |
1402 | 1417 | ||
1403 | changed_ioprio(cic); | 1418 | changed_ioprio(cic); |
1404 | n = rb_next(n); | 1419 | n = rb_next(n); |
1405 | } | 1420 | } |
1406 | 1421 | ||
1407 | write_unlock(&cfq_exit_lock); | 1422 | spin_unlock(&cfq_exit_lock); |
1408 | 1423 | ||
1409 | return 0; | 1424 | return 0; |
1410 | } | 1425 | } |
@@ -1452,7 +1467,8 @@ retry: | |||
1452 | * set ->slice_left to allow preemption for a new process | 1467 | * set ->slice_left to allow preemption for a new process |
1453 | */ | 1468 | */ |
1454 | cfqq->slice_left = 2 * cfqd->cfq_slice_idle; | 1469 | cfqq->slice_left = 2 * cfqd->cfq_slice_idle; |
1455 | cfq_mark_cfqq_idle_window(cfqq); | 1470 | if (!cfqd->hw_tag) |
1471 | cfq_mark_cfqq_idle_window(cfqq); | ||
1456 | cfq_mark_cfqq_prio_changed(cfqq); | 1472 | cfq_mark_cfqq_prio_changed(cfqq); |
1457 | cfq_init_prio_data(cfqq); | 1473 | cfq_init_prio_data(cfqq); |
1458 | } | 1474 | } |
@@ -1469,9 +1485,10 @@ out: | |||
1469 | static void | 1485 | static void |
1470 | cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) | 1486 | cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) |
1471 | { | 1487 | { |
1472 | read_lock(&cfq_exit_lock); | 1488 | spin_lock(&cfq_exit_lock); |
1473 | rb_erase(&cic->rb_node, &ioc->cic_root); | 1489 | rb_erase(&cic->rb_node, &ioc->cic_root); |
1474 | read_unlock(&cfq_exit_lock); | 1490 | list_del_init(&cic->queue_list); |
1491 | spin_unlock(&cfq_exit_lock); | ||
1475 | kmem_cache_free(cfq_ioc_pool, cic); | 1492 | kmem_cache_free(cfq_ioc_pool, cic); |
1476 | atomic_dec(&ioc_count); | 1493 | atomic_dec(&ioc_count); |
1477 | } | 1494 | } |
@@ -1539,11 +1556,11 @@ restart: | |||
1539 | BUG(); | 1556 | BUG(); |
1540 | } | 1557 | } |
1541 | 1558 | ||
1542 | read_lock(&cfq_exit_lock); | 1559 | spin_lock(&cfq_exit_lock); |
1543 | rb_link_node(&cic->rb_node, parent, p); | 1560 | rb_link_node(&cic->rb_node, parent, p); |
1544 | rb_insert_color(&cic->rb_node, &ioc->cic_root); | 1561 | rb_insert_color(&cic->rb_node, &ioc->cic_root); |
1545 | list_add(&cic->queue_list, &cfqd->cic_list); | 1562 | list_add(&cic->queue_list, &cfqd->cic_list); |
1546 | read_unlock(&cfq_exit_lock); | 1563 | spin_unlock(&cfq_exit_lock); |
1547 | } | 1564 | } |
1548 | 1565 | ||
1549 | /* | 1566 | /* |
@@ -1642,7 +1659,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1642 | { | 1659 | { |
1643 | int enable_idle = cfq_cfqq_idle_window(cfqq); | 1660 | int enable_idle = cfq_cfqq_idle_window(cfqq); |
1644 | 1661 | ||
1645 | if (!cic->ioc->task || !cfqd->cfq_slice_idle) | 1662 | if (!cic->ioc->task || !cfqd->cfq_slice_idle || cfqd->hw_tag) |
1646 | enable_idle = 0; | 1663 | enable_idle = 0; |
1647 | else if (sample_valid(cic->ttime_samples)) { | 1664 | else if (sample_valid(cic->ttime_samples)) { |
1648 | if (cic->ttime_mean > cfqd->cfq_slice_idle) | 1665 | if (cic->ttime_mean > cfqd->cfq_slice_idle) |
@@ -1733,14 +1750,24 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1733 | 1750 | ||
1734 | cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); | 1751 | cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); |
1735 | 1752 | ||
1753 | cic = crq->io_context; | ||
1754 | |||
1736 | /* | 1755 | /* |
1737 | * we never wait for an async request and we don't allow preemption | 1756 | * we never wait for an async request and we don't allow preemption |
1738 | * of an async request. so just return early | 1757 | * of an async request. so just return early |
1739 | */ | 1758 | */ |
1740 | if (!cfq_crq_is_sync(crq)) | 1759 | if (!cfq_crq_is_sync(crq)) { |
1760 | /* | ||
1761 | * sync process issued an async request, if it's waiting | ||
1762 | * then expire it and kick rq handling. | ||
1763 | */ | ||
1764 | if (cic == cfqd->active_cic && | ||
1765 | del_timer(&cfqd->idle_slice_timer)) { | ||
1766 | cfq_slice_expired(cfqd, 0); | ||
1767 | cfq_start_queueing(cfqd, cfqq); | ||
1768 | } | ||
1741 | return; | 1769 | return; |
1742 | 1770 | } | |
1743 | cic = crq->io_context; | ||
1744 | 1771 | ||
1745 | cfq_update_io_thinktime(cfqd, cic); | 1772 | cfq_update_io_thinktime(cfqd, cic); |
1746 | cfq_update_io_seektime(cfqd, cic, crq); | 1773 | cfq_update_io_seektime(cfqd, cic, crq); |
@@ -2158,10 +2185,9 @@ static void cfq_idle_class_timer(unsigned long data) | |||
2158 | * race with a non-idle queue, reset timer | 2185 | * race with a non-idle queue, reset timer |
2159 | */ | 2186 | */ |
2160 | end = cfqd->last_end_request + CFQ_IDLE_GRACE; | 2187 | end = cfqd->last_end_request + CFQ_IDLE_GRACE; |
2161 | if (!time_after_eq(jiffies, end)) { | 2188 | if (!time_after_eq(jiffies, end)) |
2162 | cfqd->idle_class_timer.expires = end; | 2189 | mod_timer(&cfqd->idle_class_timer, end); |
2163 | add_timer(&cfqd->idle_class_timer); | 2190 | else |
2164 | } else | ||
2165 | cfq_schedule_dispatch(cfqd); | 2191 | cfq_schedule_dispatch(cfqd); |
2166 | 2192 | ||
2167 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | 2193 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
@@ -2181,7 +2207,7 @@ static void cfq_exit_queue(elevator_t *e) | |||
2181 | 2207 | ||
2182 | cfq_shutdown_timer_wq(cfqd); | 2208 | cfq_shutdown_timer_wq(cfqd); |
2183 | 2209 | ||
2184 | write_lock(&cfq_exit_lock); | 2210 | spin_lock(&cfq_exit_lock); |
2185 | spin_lock_irq(q->queue_lock); | 2211 | spin_lock_irq(q->queue_lock); |
2186 | 2212 | ||
2187 | if (cfqd->active_queue) | 2213 | if (cfqd->active_queue) |
@@ -2204,7 +2230,7 @@ static void cfq_exit_queue(elevator_t *e) | |||
2204 | } | 2230 | } |
2205 | 2231 | ||
2206 | spin_unlock_irq(q->queue_lock); | 2232 | spin_unlock_irq(q->queue_lock); |
2207 | write_unlock(&cfq_exit_lock); | 2233 | spin_unlock(&cfq_exit_lock); |
2208 | 2234 | ||
2209 | cfq_shutdown_timer_wq(cfqd); | 2235 | cfq_shutdown_timer_wq(cfqd); |
2210 | 2236 | ||
@@ -2214,14 +2240,14 @@ static void cfq_exit_queue(elevator_t *e) | |||
2214 | kfree(cfqd); | 2240 | kfree(cfqd); |
2215 | } | 2241 | } |
2216 | 2242 | ||
2217 | static int cfq_init_queue(request_queue_t *q, elevator_t *e) | 2243 | static void *cfq_init_queue(request_queue_t *q, elevator_t *e) |
2218 | { | 2244 | { |
2219 | struct cfq_data *cfqd; | 2245 | struct cfq_data *cfqd; |
2220 | int i; | 2246 | int i; |
2221 | 2247 | ||
2222 | cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); | 2248 | cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); |
2223 | if (!cfqd) | 2249 | if (!cfqd) |
2224 | return -ENOMEM; | 2250 | return NULL; |
2225 | 2251 | ||
2226 | memset(cfqd, 0, sizeof(*cfqd)); | 2252 | memset(cfqd, 0, sizeof(*cfqd)); |
2227 | 2253 | ||
@@ -2251,8 +2277,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) | |||
2251 | for (i = 0; i < CFQ_QHASH_ENTRIES; i++) | 2277 | for (i = 0; i < CFQ_QHASH_ENTRIES; i++) |
2252 | INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); | 2278 | INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); |
2253 | 2279 | ||
2254 | e->elevator_data = cfqd; | ||
2255 | |||
2256 | cfqd->queue = q; | 2280 | cfqd->queue = q; |
2257 | 2281 | ||
2258 | cfqd->max_queued = q->nr_requests / 4; | 2282 | cfqd->max_queued = q->nr_requests / 4; |
@@ -2279,14 +2303,14 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) | |||
2279 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; | 2303 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; |
2280 | cfqd->cfq_slice_idle = cfq_slice_idle; | 2304 | cfqd->cfq_slice_idle = cfq_slice_idle; |
2281 | 2305 | ||
2282 | return 0; | 2306 | return cfqd; |
2283 | out_crqpool: | 2307 | out_crqpool: |
2284 | kfree(cfqd->cfq_hash); | 2308 | kfree(cfqd->cfq_hash); |
2285 | out_cfqhash: | 2309 | out_cfqhash: |
2286 | kfree(cfqd->crq_hash); | 2310 | kfree(cfqd->crq_hash); |
2287 | out_crqhash: | 2311 | out_crqhash: |
2288 | kfree(cfqd); | 2312 | kfree(cfqd); |
2289 | return -ENOMEM; | 2313 | return NULL; |
2290 | } | 2314 | } |
2291 | 2315 | ||
2292 | static void cfq_slab_kill(void) | 2316 | static void cfq_slab_kill(void) |