diff options
author | Jens Axboe <axboe@suse.de> | 2006-03-28 01:59:01 -0500 |
---|---|---|
committer | Jens Axboe <axboe@suse.de> | 2006-03-28 01:59:01 -0500 |
commit | e2d74ac0664c89757bde8fb18c98cd7bf53da61c (patch) | |
tree | 1e858044a9180766eae4ec694d4200c4ae850406 /block | |
parent | 329b10bb0feacb7fb9a41389313ff0a51ae56f2a (diff) |
[PATCH] [BLOCK] cfq-iosched: change cfq io context linking from list to tree
On setups with many disks, we spend a considerable amount of time
looking up the process-disk mapping on each queue of io. Testing with
a NULL based block driver, this costs 40-50% reduction in throughput
for 1000 disks.
Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'block')
-rw-r--r-- | block/cfq-iosched.c | 205 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 19 |
2 files changed, 108 insertions, 116 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index bde40a6ae665..bb43a1677626 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -1190,19 +1190,19 @@ cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) | |||
1190 | return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); | 1190 | return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); |
1191 | } | 1191 | } |
1192 | 1192 | ||
1193 | static void cfq_free_io_context(struct cfq_io_context *cic) | 1193 | static void cfq_free_io_context(struct io_context *ioc) |
1194 | { | 1194 | { |
1195 | struct cfq_io_context *__cic; | 1195 | struct cfq_io_context *__cic; |
1196 | struct list_head *entry, *next; | 1196 | struct rb_node *n; |
1197 | int freed = 1; | 1197 | int freed = 0; |
1198 | 1198 | ||
1199 | list_for_each_safe(entry, next, &cic->list) { | 1199 | while ((n = rb_first(&ioc->cic_root)) != NULL) { |
1200 | __cic = list_entry(entry, struct cfq_io_context, list); | 1200 | __cic = rb_entry(n, struct cfq_io_context, rb_node); |
1201 | rb_erase(&__cic->rb_node, &ioc->cic_root); | ||
1201 | kmem_cache_free(cfq_ioc_pool, __cic); | 1202 | kmem_cache_free(cfq_ioc_pool, __cic); |
1202 | freed++; | 1203 | freed++; |
1203 | } | 1204 | } |
1204 | 1205 | ||
1205 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1206 | if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) | 1206 | if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) |
1207 | complete(ioc_gone); | 1207 | complete(ioc_gone); |
1208 | } | 1208 | } |
@@ -1210,8 +1210,7 @@ static void cfq_free_io_context(struct cfq_io_context *cic) | |||
1210 | static void cfq_trim(struct io_context *ioc) | 1210 | static void cfq_trim(struct io_context *ioc) |
1211 | { | 1211 | { |
1212 | ioc->set_ioprio = NULL; | 1212 | ioc->set_ioprio = NULL; |
1213 | if (ioc->cic) | 1213 | cfq_free_io_context(ioc); |
1214 | cfq_free_io_context(ioc->cic); | ||
1215 | } | 1214 | } |
1216 | 1215 | ||
1217 | /* | 1216 | /* |
@@ -1250,26 +1249,26 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic) | |||
1250 | spin_unlock(q->queue_lock); | 1249 | spin_unlock(q->queue_lock); |
1251 | } | 1250 | } |
1252 | 1251 | ||
1253 | static void cfq_exit_io_context(struct cfq_io_context *cic) | 1252 | static void cfq_exit_io_context(struct io_context *ioc) |
1254 | { | 1253 | { |
1255 | struct cfq_io_context *__cic; | 1254 | struct cfq_io_context *__cic; |
1256 | struct list_head *entry; | ||
1257 | unsigned long flags; | 1255 | unsigned long flags; |
1258 | 1256 | struct rb_node *n; | |
1259 | local_irq_save(flags); | ||
1260 | 1257 | ||
1261 | /* | 1258 | /* |
1262 | * put the reference this task is holding to the various queues | 1259 | * put the reference this task is holding to the various queues |
1263 | */ | 1260 | */ |
1264 | read_lock(&cfq_exit_lock); | 1261 | read_lock_irqsave(&cfq_exit_lock, flags); |
1265 | list_for_each(entry, &cic->list) { | 1262 | |
1266 | __cic = list_entry(entry, struct cfq_io_context, list); | 1263 | n = rb_first(&ioc->cic_root); |
1264 | while (n != NULL) { | ||
1265 | __cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1266 | |||
1267 | cfq_exit_single_io_context(__cic); | 1267 | cfq_exit_single_io_context(__cic); |
1268 | n = rb_next(n); | ||
1268 | } | 1269 | } |
1269 | 1270 | ||
1270 | cfq_exit_single_io_context(cic); | 1271 | read_unlock_irqrestore(&cfq_exit_lock, flags); |
1271 | read_unlock(&cfq_exit_lock); | ||
1272 | local_irq_restore(flags); | ||
1273 | } | 1272 | } |
1274 | 1273 | ||
1275 | static struct cfq_io_context * | 1274 | static struct cfq_io_context * |
@@ -1278,10 +1277,10 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1278 | struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); | 1277 | struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); |
1279 | 1278 | ||
1280 | if (cic) { | 1279 | if (cic) { |
1281 | INIT_LIST_HEAD(&cic->list); | 1280 | RB_CLEAR(&cic->rb_node); |
1281 | cic->key = NULL; | ||
1282 | cic->cfqq[ASYNC] = NULL; | 1282 | cic->cfqq[ASYNC] = NULL; |
1283 | cic->cfqq[SYNC] = NULL; | 1283 | cic->cfqq[SYNC] = NULL; |
1284 | cic->key = NULL; | ||
1285 | cic->last_end_request = jiffies; | 1284 | cic->last_end_request = jiffies; |
1286 | cic->ttime_total = 0; | 1285 | cic->ttime_total = 0; |
1287 | cic->ttime_samples = 0; | 1286 | cic->ttime_samples = 0; |
@@ -1373,15 +1372,17 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1373 | static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) | 1372 | static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) |
1374 | { | 1373 | { |
1375 | struct cfq_io_context *cic; | 1374 | struct cfq_io_context *cic; |
1375 | struct rb_node *n; | ||
1376 | 1376 | ||
1377 | write_lock(&cfq_exit_lock); | 1377 | write_lock(&cfq_exit_lock); |
1378 | 1378 | ||
1379 | cic = ioc->cic; | 1379 | n = rb_first(&ioc->cic_root); |
1380 | 1380 | while (n != NULL) { | |
1381 | changed_ioprio(cic); | 1381 | cic = rb_entry(n, struct cfq_io_context, rb_node); |
1382 | 1382 | ||
1383 | list_for_each_entry(cic, &cic->list, list) | ||
1384 | changed_ioprio(cic); | 1383 | changed_ioprio(cic); |
1384 | n = rb_next(n); | ||
1385 | } | ||
1385 | 1386 | ||
1386 | write_unlock(&cfq_exit_lock); | 1387 | write_unlock(&cfq_exit_lock); |
1387 | 1388 | ||
@@ -1445,14 +1446,67 @@ out: | |||
1445 | return cfqq; | 1446 | return cfqq; |
1446 | } | 1447 | } |
1447 | 1448 | ||
1449 | static struct cfq_io_context * | ||
1450 | cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) | ||
1451 | { | ||
1452 | struct rb_node *n = ioc->cic_root.rb_node; | ||
1453 | struct cfq_io_context *cic; | ||
1454 | void *key = cfqd; | ||
1455 | |||
1456 | while (n) { | ||
1457 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1458 | |||
1459 | if (key < cic->key) | ||
1460 | n = n->rb_left; | ||
1461 | else if (key > cic->key) | ||
1462 | n = n->rb_right; | ||
1463 | else | ||
1464 | return cic; | ||
1465 | } | ||
1466 | |||
1467 | return NULL; | ||
1468 | } | ||
1469 | |||
1470 | static inline void | ||
1471 | cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | ||
1472 | struct cfq_io_context *cic) | ||
1473 | { | ||
1474 | struct rb_node **p = &ioc->cic_root.rb_node; | ||
1475 | struct rb_node *parent = NULL; | ||
1476 | struct cfq_io_context *__cic; | ||
1477 | |||
1478 | read_lock(&cfq_exit_lock); | ||
1479 | |||
1480 | cic->ioc = ioc; | ||
1481 | cic->key = cfqd; | ||
1482 | |||
1483 | ioc->set_ioprio = cfq_ioc_set_ioprio; | ||
1484 | |||
1485 | while (*p) { | ||
1486 | parent = *p; | ||
1487 | __cic = rb_entry(parent, struct cfq_io_context, rb_node); | ||
1488 | |||
1489 | if (cic->key < __cic->key) | ||
1490 | p = &(*p)->rb_left; | ||
1491 | else if (cic->key > __cic->key) | ||
1492 | p = &(*p)->rb_right; | ||
1493 | else | ||
1494 | BUG(); | ||
1495 | } | ||
1496 | |||
1497 | rb_link_node(&cic->rb_node, parent, p); | ||
1498 | rb_insert_color(&cic->rb_node, &ioc->cic_root); | ||
1499 | list_add(&cic->queue_list, &cfqd->cic_list); | ||
1500 | read_unlock(&cfq_exit_lock); | ||
1501 | } | ||
1502 | |||
1448 | /* | 1503 | /* |
1449 | * Setup general io context and cfq io context. There can be several cfq | 1504 | * Setup general io context and cfq io context. There can be several cfq |
1450 | * io contexts per general io context, if this process is doing io to more | 1505 | * io contexts per general io context, if this process is doing io to more |
1451 | * than one device managed by cfq. Note that caller is holding a reference to | 1506 | * than one device managed by cfq. |
1452 | * cfqq, so we don't need to worry about it disappearing | ||
1453 | */ | 1507 | */ |
1454 | static struct cfq_io_context * | 1508 | static struct cfq_io_context * |
1455 | cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) | 1509 | cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) |
1456 | { | 1510 | { |
1457 | struct io_context *ioc = NULL; | 1511 | struct io_context *ioc = NULL; |
1458 | struct cfq_io_context *cic; | 1512 | struct cfq_io_context *cic; |
@@ -1463,88 +1517,15 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) | |||
1463 | if (!ioc) | 1517 | if (!ioc) |
1464 | return NULL; | 1518 | return NULL; |
1465 | 1519 | ||
1466 | restart: | 1520 | cic = cfq_cic_rb_lookup(cfqd, ioc); |
1467 | if ((cic = ioc->cic) == NULL) { | 1521 | if (cic) |
1468 | cic = cfq_alloc_io_context(cfqd, gfp_mask); | 1522 | goto out; |
1469 | |||
1470 | if (cic == NULL) | ||
1471 | goto err; | ||
1472 | |||
1473 | /* | ||
1474 | * manually increment generic io_context usage count, it | ||
1475 | * cannot go away since we are already holding one ref to it | ||
1476 | */ | ||
1477 | cic->ioc = ioc; | ||
1478 | cic->key = cfqd; | ||
1479 | read_lock(&cfq_exit_lock); | ||
1480 | ioc->set_ioprio = cfq_ioc_set_ioprio; | ||
1481 | ioc->cic = cic; | ||
1482 | list_add(&cic->queue_list, &cfqd->cic_list); | ||
1483 | read_unlock(&cfq_exit_lock); | ||
1484 | } else { | ||
1485 | struct cfq_io_context *__cic; | ||
1486 | |||
1487 | /* | ||
1488 | * the first cic on the list is actually the head itself | ||
1489 | */ | ||
1490 | if (cic->key == cfqd) | ||
1491 | goto out; | ||
1492 | |||
1493 | if (unlikely(!cic->key)) { | ||
1494 | read_lock(&cfq_exit_lock); | ||
1495 | if (list_empty(&cic->list)) | ||
1496 | ioc->cic = NULL; | ||
1497 | else | ||
1498 | ioc->cic = list_entry(cic->list.next, | ||
1499 | struct cfq_io_context, | ||
1500 | list); | ||
1501 | read_unlock(&cfq_exit_lock); | ||
1502 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1503 | atomic_dec(&ioc_count); | ||
1504 | goto restart; | ||
1505 | } | ||
1506 | |||
1507 | /* | ||
1508 | * cic exists, check if we already are there. linear search | ||
1509 | * should be ok here, the list will usually not be more than | ||
1510 | * 1 or a few entries long | ||
1511 | */ | ||
1512 | list_for_each_entry(__cic, &cic->list, list) { | ||
1513 | /* | ||
1514 | * this process is already holding a reference to | ||
1515 | * this queue, so no need to get one more | ||
1516 | */ | ||
1517 | if (__cic->key == cfqd) { | ||
1518 | cic = __cic; | ||
1519 | goto out; | ||
1520 | } | ||
1521 | if (unlikely(!__cic->key)) { | ||
1522 | read_lock(&cfq_exit_lock); | ||
1523 | list_del(&__cic->list); | ||
1524 | read_unlock(&cfq_exit_lock); | ||
1525 | kmem_cache_free(cfq_ioc_pool, __cic); | ||
1526 | atomic_dec(&ioc_count); | ||
1527 | goto restart; | ||
1528 | } | ||
1529 | } | ||
1530 | 1523 | ||
1531 | /* | 1524 | cic = cfq_alloc_io_context(cfqd, gfp_mask); |
1532 | * nope, process doesn't have a cic assoicated with this | 1525 | if (cic == NULL) |
1533 | * cfqq yet. get a new one and add to list | 1526 | goto err; |
1534 | */ | ||
1535 | __cic = cfq_alloc_io_context(cfqd, gfp_mask); | ||
1536 | if (__cic == NULL) | ||
1537 | goto err; | ||
1538 | |||
1539 | __cic->ioc = ioc; | ||
1540 | __cic->key = cfqd; | ||
1541 | read_lock(&cfq_exit_lock); | ||
1542 | list_add(&__cic->list, &cic->list); | ||
1543 | list_add(&__cic->queue_list, &cfqd->cic_list); | ||
1544 | read_unlock(&cfq_exit_lock); | ||
1545 | cic = __cic; | ||
1546 | } | ||
1547 | 1527 | ||
1528 | cfq_cic_link(cfqd, ioc, cic); | ||
1548 | out: | 1529 | out: |
1549 | return cic; | 1530 | return cic; |
1550 | err: | 1531 | err: |
@@ -1965,7 +1946,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, | |||
1965 | 1946 | ||
1966 | might_sleep_if(gfp_mask & __GFP_WAIT); | 1947 | might_sleep_if(gfp_mask & __GFP_WAIT); |
1967 | 1948 | ||
1968 | cic = cfq_get_io_context(cfqd, key, gfp_mask); | 1949 | cic = cfq_get_io_context(cfqd, gfp_mask); |
1969 | 1950 | ||
1970 | spin_lock_irqsave(q->queue_lock, flags); | 1951 | spin_lock_irqsave(q->queue_lock, flags); |
1971 | 1952 | ||
@@ -2133,11 +2114,14 @@ static void cfq_exit_queue(elevator_t *e) | |||
2133 | request_queue_t *q = cfqd->queue; | 2114 | request_queue_t *q = cfqd->queue; |
2134 | 2115 | ||
2135 | cfq_shutdown_timer_wq(cfqd); | 2116 | cfq_shutdown_timer_wq(cfqd); |
2117 | |||
2136 | write_lock(&cfq_exit_lock); | 2118 | write_lock(&cfq_exit_lock); |
2137 | spin_lock_irq(q->queue_lock); | 2119 | spin_lock_irq(q->queue_lock); |
2120 | |||
2138 | if (cfqd->active_queue) | 2121 | if (cfqd->active_queue) |
2139 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); | 2122 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); |
2140 | while(!list_empty(&cfqd->cic_list)) { | 2123 | |
2124 | while (!list_empty(&cfqd->cic_list)) { | ||
2141 | struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, | 2125 | struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, |
2142 | struct cfq_io_context, | 2126 | struct cfq_io_context, |
2143 | queue_list); | 2127 | queue_list); |
@@ -2152,6 +2136,7 @@ static void cfq_exit_queue(elevator_t *e) | |||
2152 | cic->key = NULL; | 2136 | cic->key = NULL; |
2153 | list_del_init(&cic->queue_list); | 2137 | list_del_init(&cic->queue_list); |
2154 | } | 2138 | } |
2139 | |||
2155 | spin_unlock_irq(q->queue_lock); | 2140 | spin_unlock_irq(q->queue_lock); |
2156 | write_unlock(&cfq_exit_lock); | 2141 | write_unlock(&cfq_exit_lock); |
2157 | 2142 | ||
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 82469db25100..cb608768ca37 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -3539,11 +3539,15 @@ void put_io_context(struct io_context *ioc) | |||
3539 | BUG_ON(atomic_read(&ioc->refcount) == 0); | 3539 | BUG_ON(atomic_read(&ioc->refcount) == 0); |
3540 | 3540 | ||
3541 | if (atomic_dec_and_test(&ioc->refcount)) { | 3541 | if (atomic_dec_and_test(&ioc->refcount)) { |
3542 | struct cfq_io_context *cic; | ||
3543 | |||
3542 | rcu_read_lock(); | 3544 | rcu_read_lock(); |
3543 | if (ioc->aic && ioc->aic->dtor) | 3545 | if (ioc->aic && ioc->aic->dtor) |
3544 | ioc->aic->dtor(ioc->aic); | 3546 | ioc->aic->dtor(ioc->aic); |
3545 | if (ioc->cic && ioc->cic->dtor) | 3547 | if (ioc->cic_root.rb_node != NULL) { |
3546 | ioc->cic->dtor(ioc->cic); | 3548 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); |
3549 | cic->dtor(ioc); | ||
3550 | } | ||
3547 | rcu_read_unlock(); | 3551 | rcu_read_unlock(); |
3548 | 3552 | ||
3549 | kmem_cache_free(iocontext_cachep, ioc); | 3553 | kmem_cache_free(iocontext_cachep, ioc); |
@@ -3556,6 +3560,7 @@ void exit_io_context(void) | |||
3556 | { | 3560 | { |
3557 | unsigned long flags; | 3561 | unsigned long flags; |
3558 | struct io_context *ioc; | 3562 | struct io_context *ioc; |
3563 | struct cfq_io_context *cic; | ||
3559 | 3564 | ||
3560 | local_irq_save(flags); | 3565 | local_irq_save(flags); |
3561 | task_lock(current); | 3566 | task_lock(current); |
@@ -3567,9 +3572,11 @@ void exit_io_context(void) | |||
3567 | 3572 | ||
3568 | if (ioc->aic && ioc->aic->exit) | 3573 | if (ioc->aic && ioc->aic->exit) |
3569 | ioc->aic->exit(ioc->aic); | 3574 | ioc->aic->exit(ioc->aic); |
3570 | if (ioc->cic && ioc->cic->exit) | 3575 | if (ioc->cic_root.rb_node != NULL) { |
3571 | ioc->cic->exit(ioc->cic); | 3576 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); |
3572 | 3577 | cic->exit(ioc); | |
3578 | } | ||
3579 | |||
3573 | put_io_context(ioc); | 3580 | put_io_context(ioc); |
3574 | } | 3581 | } |
3575 | 3582 | ||
@@ -3598,7 +3605,7 @@ struct io_context *current_io_context(gfp_t gfp_flags) | |||
3598 | ret->last_waited = jiffies; /* doesn't matter... */ | 3605 | ret->last_waited = jiffies; /* doesn't matter... */ |
3599 | ret->nr_batch_requests = 0; /* because this is 0 */ | 3606 | ret->nr_batch_requests = 0; /* because this is 0 */ |
3600 | ret->aic = NULL; | 3607 | ret->aic = NULL; |
3601 | ret->cic = NULL; | 3608 | ret->cic_root.rb_node = NULL; |
3602 | tsk->io_context = ret; | 3609 | tsk->io_context = ret; |
3603 | } | 3610 | } |
3604 | 3611 | ||