aboutsummaryrefslogtreecommitdiffstats
path: root/block/cfq-iosched.c
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2008-01-24 02:44:49 -0500
committerJens Axboe <jens.axboe@oracle.com>2008-01-28 04:50:33 -0500
commit4ac845a2e9a816ed5a7b301f56dcc0a3d0b1ba4d (patch)
tree602f15808d0f3dcdfcd7cc4491b2cc2ccd266fd2 /block/cfq-iosched.c
parent66dac98ed0de7a1125fb0dd7907f238f6b9d2f60 (diff)
block: cfq: make the io contect sharing lockless
The io context sharing introduced a per-ioc spinlock, that would protect the cfq io context lookup. That is a regression from the original, since we never needed any locking there because the ioc/cic were process private. The cic lookup is changed from an rbtree construct to a radix tree, which we can then use RCU to make the reader side lockless. That is the performance critical path, modifying the radix tree is only done on process creation (when that process first does IO, actually) and on process exit (if that process has done IO). As it so happens, radix trees are also much faster for this type of lookup where the key is a pointer. It's a very sparse tree. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r--block/cfq-iosched.c267
1 files changed, 147 insertions, 120 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index dba52b62cef8..8830893e062f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -200,7 +200,7 @@ CFQ_CFQQ_FNS(sync);
200static void cfq_dispatch_insert(struct request_queue *, struct request *); 200static void cfq_dispatch_insert(struct request_queue *, struct request *);
201static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, 201static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
202 struct io_context *, gfp_t); 202 struct io_context *, gfp_t);
203static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *, 203static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
204 struct io_context *); 204 struct io_context *);
205 205
206static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, 206static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
@@ -609,7 +609,7 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
609 struct cfq_io_context *cic; 609 struct cfq_io_context *cic;
610 struct cfq_queue *cfqq; 610 struct cfq_queue *cfqq;
611 611
612 cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); 612 cic = cfq_cic_lookup(cfqd, tsk->io_context);
613 if (!cic) 613 if (!cic)
614 return NULL; 614 return NULL;
615 615
@@ -721,7 +721,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
721 * Lookup the cfqq that this bio will be queued with. Allow 721 * Lookup the cfqq that this bio will be queued with. Allow
722 * merge only if rq is queued there. 722 * merge only if rq is queued there.
723 */ 723 */
724 cic = cfq_cic_rb_lookup(cfqd, current->io_context); 724 cic = cfq_cic_lookup(cfqd, current->io_context);
725 if (!cic) 725 if (!cic)
726 return 0; 726 return 0;
727 727
@@ -1170,29 +1170,74 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
1170 kmem_cache_free(cfq_pool, cfqq); 1170 kmem_cache_free(cfq_pool, cfqq);
1171} 1171}
1172 1172
1173static void cfq_free_io_context(struct io_context *ioc) 1173/*
1174 * Call func for each cic attached to this ioc. Returns number of cic's seen.
1175 */
1176#define CIC_GANG_NR 16
1177static unsigned int
1178call_for_each_cic(struct io_context *ioc,
1179 void (*func)(struct io_context *, struct cfq_io_context *))
1174{ 1180{
1175 struct cfq_io_context *__cic; 1181 struct cfq_io_context *cics[CIC_GANG_NR];
1176 struct rb_node *n; 1182 unsigned long index = 0;
1177 int freed = 0; 1183 unsigned int called = 0;
1184 int nr;
1178 1185
1179 ioc->ioc_data = NULL; 1186 rcu_read_lock();
1180 1187
1181 spin_lock(&ioc->lock); 1188 do {
1189 int i;
1182 1190
1183 while ((n = rb_first(&ioc->cic_root)) != NULL) { 1191 /*
1184 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1192 * Perhaps there's a better way - this just gang lookups from
1185 rb_erase(&__cic->rb_node, &ioc->cic_root); 1193 * 0 to the end, restarting after each CIC_GANG_NR from the
1186 kmem_cache_free(cfq_ioc_pool, __cic); 1194 * last key + 1.
1187 freed++; 1195 */
1188 } 1196 nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics,
1197 index, CIC_GANG_NR);
1198 if (!nr)
1199 break;
1200
1201 called += nr;
1202 index = 1 + (unsigned long) cics[nr - 1]->key;
1203
1204 for (i = 0; i < nr; i++)
1205 func(ioc, cics[i]);
1206 } while (nr == CIC_GANG_NR);
1207
1208 rcu_read_unlock();
1209
1210 return called;
1211}
1212
1213static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
1214{
1215 unsigned long flags;
1216
1217 BUG_ON(!cic->dead_key);
1218
1219 spin_lock_irqsave(&ioc->lock, flags);
1220 radix_tree_delete(&ioc->radix_root, cic->dead_key);
1221 spin_unlock_irqrestore(&ioc->lock, flags);
1222
1223 kmem_cache_free(cfq_ioc_pool, cic);
1224}
1225
1226static void cfq_free_io_context(struct io_context *ioc)
1227{
1228 int freed;
1229
1230 /*
1231 * ioc->refcount is zero here, so no more cic's are allowed to be
1232 * linked into this ioc. So it should be ok to iterate over the known
1233 * list, we will see all cic's since no new ones are added.
1234 */
1235 freed = call_for_each_cic(ioc, cic_free_func);
1189 1236
1190 elv_ioc_count_mod(ioc_count, -freed); 1237 elv_ioc_count_mod(ioc_count, -freed);
1191 1238
1192 if (ioc_gone && !elv_ioc_count_read(ioc_count)) 1239 if (ioc_gone && !elv_ioc_count_read(ioc_count))
1193 complete(ioc_gone); 1240 complete(ioc_gone);
1194
1195 spin_unlock(&ioc->lock);
1196} 1241}
1197 1242
1198static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1243static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -1209,7 +1254,12 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1209 struct cfq_io_context *cic) 1254 struct cfq_io_context *cic)
1210{ 1255{
1211 list_del_init(&cic->queue_list); 1256 list_del_init(&cic->queue_list);
1257
1258 /*
1259 * Make sure key == NULL is seen for dead queues
1260 */
1212 smp_wmb(); 1261 smp_wmb();
1262 cic->dead_key = (unsigned long) cic->key;
1213 cic->key = NULL; 1263 cic->key = NULL;
1214 1264
1215 if (cic->cfqq[ASYNC]) { 1265 if (cic->cfqq[ASYNC]) {
@@ -1223,16 +1273,18 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1223 } 1273 }
1224} 1274}
1225 1275
1226static void cfq_exit_single_io_context(struct cfq_io_context *cic) 1276static void cfq_exit_single_io_context(struct io_context *ioc,
1277 struct cfq_io_context *cic)
1227{ 1278{
1228 struct cfq_data *cfqd = cic->key; 1279 struct cfq_data *cfqd = cic->key;
1229 1280
1230 if (cfqd) { 1281 if (cfqd) {
1231 struct request_queue *q = cfqd->queue; 1282 struct request_queue *q = cfqd->queue;
1283 unsigned long flags;
1232 1284
1233 spin_lock_irq(q->queue_lock); 1285 spin_lock_irqsave(q->queue_lock, flags);
1234 __cfq_exit_single_io_context(cfqd, cic); 1286 __cfq_exit_single_io_context(cfqd, cic);
1235 spin_unlock_irq(q->queue_lock); 1287 spin_unlock_irqrestore(q->queue_lock, flags);
1236 } 1288 }
1237} 1289}
1238 1290
@@ -1242,24 +1294,8 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1242 */ 1294 */
1243static void cfq_exit_io_context(struct io_context *ioc) 1295static void cfq_exit_io_context(struct io_context *ioc)
1244{ 1296{
1245 struct cfq_io_context *__cic; 1297 rcu_assign_pointer(ioc->ioc_data, NULL);
1246 struct rb_node *n; 1298 call_for_each_cic(ioc, cfq_exit_single_io_context);
1247
1248 ioc->ioc_data = NULL;
1249
1250 spin_lock(&ioc->lock);
1251 /*
1252 * put the reference this task is holding to the various queues
1253 */
1254 n = rb_first(&ioc->cic_root);
1255 while (n != NULL) {
1256 __cic = rb_entry(n, struct cfq_io_context, rb_node);
1257
1258 cfq_exit_single_io_context(__cic);
1259 n = rb_next(n);
1260 }
1261
1262 spin_unlock(&ioc->lock);
1263} 1299}
1264 1300
1265static struct cfq_io_context * 1301static struct cfq_io_context *
@@ -1323,7 +1359,8 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
1323 cfq_clear_cfqq_prio_changed(cfqq); 1359 cfq_clear_cfqq_prio_changed(cfqq);
1324} 1360}
1325 1361
1326static inline void changed_ioprio(struct cfq_io_context *cic) 1362static inline void changed_ioprio(struct io_context *ioc,
1363 struct cfq_io_context *cic)
1327{ 1364{
1328 struct cfq_data *cfqd = cic->key; 1365 struct cfq_data *cfqd = cic->key;
1329 struct cfq_queue *cfqq; 1366 struct cfq_queue *cfqq;
@@ -1353,22 +1390,8 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
1353 1390
1354static void cfq_ioc_set_ioprio(struct io_context *ioc) 1391static void cfq_ioc_set_ioprio(struct io_context *ioc)
1355{ 1392{
1356 struct cfq_io_context *cic; 1393 call_for_each_cic(ioc, changed_ioprio);
1357 struct rb_node *n;
1358
1359 spin_lock(&ioc->lock);
1360
1361 ioc->ioprio_changed = 0; 1394 ioc->ioprio_changed = 0;
1362
1363 n = rb_first(&ioc->cic_root);
1364 while (n != NULL) {
1365 cic = rb_entry(n, struct cfq_io_context, rb_node);
1366
1367 changed_ioprio(cic);
1368 n = rb_next(n);
1369 }
1370
1371 spin_unlock(&ioc->lock);
1372} 1395}
1373 1396
1374static struct cfq_queue * 1397static struct cfq_queue *
@@ -1379,7 +1402,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
1379 struct cfq_io_context *cic; 1402 struct cfq_io_context *cic;
1380 1403
1381retry: 1404retry:
1382 cic = cfq_cic_rb_lookup(cfqd, ioc); 1405 cic = cfq_cic_lookup(cfqd, ioc);
1383 /* cic always exists here */ 1406 /* cic always exists here */
1384 cfqq = cic_to_cfqq(cic, is_sync); 1407 cfqq = cic_to_cfqq(cic, is_sync);
1385 1408
@@ -1480,28 +1503,42 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
1480 return cfqq; 1503 return cfqq;
1481} 1504}
1482 1505
1506static void cfq_cic_free(struct cfq_io_context *cic)
1507{
1508 kmem_cache_free(cfq_ioc_pool, cic);
1509 elv_ioc_count_dec(ioc_count);
1510
1511 if (ioc_gone && !elv_ioc_count_read(ioc_count))
1512 complete(ioc_gone);
1513}
1514
1483/* 1515/*
1484 * We drop cfq io contexts lazily, so we may find a dead one. 1516 * We drop cfq io contexts lazily, so we may find a dead one.
1485 */ 1517 */
1486static void 1518static void
1487cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) 1519cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
1520 struct cfq_io_context *cic)
1488{ 1521{
1522 unsigned long flags;
1523
1489 WARN_ON(!list_empty(&cic->queue_list)); 1524 WARN_ON(!list_empty(&cic->queue_list));
1490 1525
1526 spin_lock_irqsave(&ioc->lock, flags);
1527
1491 if (ioc->ioc_data == cic) 1528 if (ioc->ioc_data == cic)
1492 ioc->ioc_data = NULL; 1529 rcu_assign_pointer(ioc->ioc_data, NULL);
1493 1530
1494 rb_erase(&cic->rb_node, &ioc->cic_root); 1531 radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
1495 kmem_cache_free(cfq_ioc_pool, cic); 1532 spin_unlock_irqrestore(&ioc->lock, flags);
1496 elv_ioc_count_dec(ioc_count); 1533
1534 cfq_cic_free(cic);
1497} 1535}
1498 1536
1499static struct cfq_io_context * 1537static struct cfq_io_context *
1500cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) 1538cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1501{ 1539{
1502 struct rb_node *n;
1503 struct cfq_io_context *cic; 1540 struct cfq_io_context *cic;
1504 void *k, *key = cfqd; 1541 void *k;
1505 1542
1506 if (unlikely(!ioc)) 1543 if (unlikely(!ioc))
1507 return NULL; 1544 return NULL;
@@ -1509,79 +1546,65 @@ cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1509 /* 1546 /*
1510 * we maintain a last-hit cache, to avoid browsing over the tree 1547 * we maintain a last-hit cache, to avoid browsing over the tree
1511 */ 1548 */
1512 cic = ioc->ioc_data; 1549 cic = rcu_dereference(ioc->ioc_data);
1513 if (cic && cic->key == cfqd) 1550 if (cic && cic->key == cfqd)
1514 return cic; 1551 return cic;
1515 1552
1516 spin_lock(&ioc->lock); 1553 do {
1517restart: 1554 rcu_read_lock();
1518 n = ioc->cic_root.rb_node; 1555 cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
1519 while (n) { 1556 rcu_read_unlock();
1520 cic = rb_entry(n, struct cfq_io_context, rb_node); 1557 if (!cic)
1558 break;
1521 /* ->key must be copied to avoid race with cfq_exit_queue() */ 1559 /* ->key must be copied to avoid race with cfq_exit_queue() */
1522 k = cic->key; 1560 k = cic->key;
1523 if (unlikely(!k)) { 1561 if (unlikely(!k)) {
1524 cfq_drop_dead_cic(ioc, cic); 1562 cfq_drop_dead_cic(cfqd, ioc, cic);
1525 goto restart; 1563 continue;
1526 } 1564 }
1527 1565
1528 if (key < k) 1566 rcu_assign_pointer(ioc->ioc_data, cic);
1529 n = n->rb_left; 1567 break;
1530 else if (key > k) 1568 } while (1);
1531 n = n->rb_right;
1532 else {
1533 ioc->ioc_data = cic;
1534 spin_unlock(&ioc->lock);
1535 return cic;
1536 }
1537 }
1538 1569
1539 spin_unlock(&ioc->lock); 1570 return cic;
1540 return NULL;
1541} 1571}
1542 1572
1543static inline void 1573/*
1574 * Add cic into ioc, using cfqd as the search key. This enables us to lookup
1575 * the process specific cfq io context when entered from the block layer.
1576 * Also adds the cic to a per-cfqd list, used when this queue is removed.
1577 */
1578static inline int
1544cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 1579cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
1545 struct cfq_io_context *cic) 1580 struct cfq_io_context *cic, gfp_t gfp_mask)
1546{ 1581{
1547 struct rb_node **p;
1548 struct rb_node *parent;
1549 struct cfq_io_context *__cic;
1550 unsigned long flags; 1582 unsigned long flags;
1551 void *k; 1583 int ret;
1552 1584
1553 spin_lock(&ioc->lock); 1585 ret = radix_tree_preload(gfp_mask);
1554 cic->ioc = ioc; 1586 if (!ret) {
1555 cic->key = cfqd; 1587 cic->ioc = ioc;
1588 cic->key = cfqd;
1556 1589
1557restart: 1590 spin_lock_irqsave(&ioc->lock, flags);
1558 parent = NULL; 1591 ret = radix_tree_insert(&ioc->radix_root,
1559 p = &ioc->cic_root.rb_node; 1592 (unsigned long) cfqd, cic);
1560 while (*p) { 1593 spin_unlock_irqrestore(&ioc->lock, flags);
1561 parent = *p;
1562 __cic = rb_entry(parent, struct cfq_io_context, rb_node);
1563 /* ->key must be copied to avoid race with cfq_exit_queue() */
1564 k = __cic->key;
1565 if (unlikely(!k)) {
1566 cfq_drop_dead_cic(ioc, __cic);
1567 goto restart;
1568 }
1569 1594
1570 if (cic->key < k) 1595 radix_tree_preload_end();
1571 p = &(*p)->rb_left; 1596
1572 else if (cic->key > k) 1597 if (!ret) {
1573 p = &(*p)->rb_right; 1598 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1574 else 1599 list_add(&cic->queue_list, &cfqd->cic_list);
1575 BUG(); 1600 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1601 }
1576 } 1602 }
1577 1603
1578 rb_link_node(&cic->rb_node, parent, p); 1604 if (ret)
1579 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1605 printk(KERN_ERR "cfq: cic link failed!\n");
1580 1606
1581 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1607 return ret;
1582 list_add(&cic->queue_list, &cfqd->cic_list);
1583 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1584 spin_unlock(&ioc->lock);
1585} 1608}
1586 1609
1587/* 1610/*
@@ -1601,7 +1624,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1601 if (!ioc) 1624 if (!ioc)
1602 return NULL; 1625 return NULL;
1603 1626
1604 cic = cfq_cic_rb_lookup(cfqd, ioc); 1627 cic = cfq_cic_lookup(cfqd, ioc);
1605 if (cic) 1628 if (cic)
1606 goto out; 1629 goto out;
1607 1630
@@ -1609,13 +1632,17 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1609 if (cic == NULL) 1632 if (cic == NULL)
1610 goto err; 1633 goto err;
1611 1634
1612 cfq_cic_link(cfqd, ioc, cic); 1635 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
1636 goto err_free;
1637
1613out: 1638out:
1614 smp_read_barrier_depends(); 1639 smp_read_barrier_depends();
1615 if (unlikely(ioc->ioprio_changed)) 1640 if (unlikely(ioc->ioprio_changed))
1616 cfq_ioc_set_ioprio(ioc); 1641 cfq_ioc_set_ioprio(ioc);
1617 1642
1618 return cic; 1643 return cic;
1644err_free:
1645 cfq_cic_free(cic);
1619err: 1646err:
1620 put_io_context(ioc); 1647 put_io_context(ioc);
1621 return NULL; 1648 return NULL;
@@ -1909,7 +1936,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
1909 * so just lookup a possibly existing queue, or return 'may queue' 1936 * so just lookup a possibly existing queue, or return 'may queue'
1910 * if that fails 1937 * if that fails
1911 */ 1938 */
1912 cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); 1939 cic = cfq_cic_lookup(cfqd, tsk->io_context);
1913 if (!cic) 1940 if (!cic)
1914 return ELV_MQUEUE_MAY; 1941 return ELV_MQUEUE_MAY;
1915 1942
@@ -2174,7 +2201,7 @@ static int __init cfq_slab_setup(void)
2174 if (!cfq_pool) 2201 if (!cfq_pool)
2175 goto fail; 2202 goto fail;
2176 2203
2177 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); 2204 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU);
2178 if (!cfq_ioc_pool) 2205 if (!cfq_ioc_pool)
2179 goto fail; 2206 goto fail;
2180 2207