aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2013-01-31 14:42:28 -0500
committerChris Mason <chris.mason@fusionio.com>2013-02-01 14:24:24 -0500
commit6ac0f4884eaad28441c6e0f9d1400a08c2149049 (patch)
tree251d86c33360948671a1b4d8cb5a38a5724468a6
parent4ae10b3a133e1147f3c818fe2ebaf005b217b7bf (diff)
Btrfs: add a plugging callback to raid56 writes
Buffered writes and DIRECT_IO writes will often break up big contiguous changes to the file into sub-stripe writes. This adds a plugging callback to gather those smaller writes full stripe writes. Example on flash: fio job to do 64K writes in batches of 3 (which makes a full stripe): With plugging: 450MB/s Without plugging: 220MB/s Signed-off-by: Chris Mason <chris.mason@fusionio.com>
-rw-r--r--fs/btrfs/raid56.c128
1 files changed, 124 insertions, 4 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 7ccddca9ee71..e34e568534d9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -97,9 +97,10 @@ struct btrfs_raid_bio {
97 struct bio_list bio_list; 97 struct bio_list bio_list;
98 spinlock_t bio_list_lock; 98 spinlock_t bio_list_lock;
99 99
100 /* 100 /* also protected by the bio_list_lock, the
101 * also protected by the bio_list_lock, the 101 * plug list is used by the plugging code
102 * stripe locking code uses plug_list to hand off 102 * to collect partial bios while plugged. The
103 * stripe locking code also uses it to hand off
103 * the stripe lock to the next pending IO 104 * the stripe lock to the next pending IO
104 */ 105 */
105 struct list_head plug_list; 106 struct list_head plug_list;
@@ -1558,6 +1559,103 @@ static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
1558} 1559}
1559 1560
1560/* 1561/*
1562 * We use plugging call backs to collect full stripes.
1563 * Any time we get a partial stripe write while plugged
1564 * we collect it into a list. When the unplug comes down,
1565 * we sort the list by logical block number and merge
1566 * everything we can into the same rbios
1567 */
1568struct btrfs_plug_cb {
1569 struct blk_plug_cb cb;
1570 struct btrfs_fs_info *info;
1571 struct list_head rbio_list;
1572 struct btrfs_work work;
1573};
1574
1575/*
1576 * rbios on the plug list are sorted for easier merging.
1577 */
1578static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
1579{
1580 struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
1581 plug_list);
1582 struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
1583 plug_list);
1584 u64 a_sector = ra->bio_list.head->bi_sector;
1585 u64 b_sector = rb->bio_list.head->bi_sector;
1586
1587 if (a_sector < b_sector)
1588 return -1;
1589 if (a_sector > b_sector)
1590 return 1;
1591 return 0;
1592}
1593
1594static void run_plug(struct btrfs_plug_cb *plug)
1595{
1596 struct btrfs_raid_bio *cur;
1597 struct btrfs_raid_bio *last = NULL;
1598
1599 /*
1600 * sort our plug list then try to merge
1601 * everything we can in hopes of creating full
1602 * stripes.
1603 */
1604 list_sort(NULL, &plug->rbio_list, plug_cmp);
1605 while (!list_empty(&plug->rbio_list)) {
1606 cur = list_entry(plug->rbio_list.next,
1607 struct btrfs_raid_bio, plug_list);
1608 list_del_init(&cur->plug_list);
1609
1610 if (rbio_is_full(cur)) {
1611 /* we have a full stripe, send it down */
1612 full_stripe_write(cur);
1613 continue;
1614 }
1615 if (last) {
1616 if (rbio_can_merge(last, cur)) {
1617 merge_rbio(last, cur);
1618 __free_raid_bio(cur);
1619 continue;
1620
1621 }
1622 __raid56_parity_write(last);
1623 }
1624 last = cur;
1625 }
1626 if (last) {
1627 __raid56_parity_write(last);
1628 }
1629 kfree(plug);
1630}
1631
1632/*
1633 * if the unplug comes from schedule, we have to push the
1634 * work off to a helper thread
1635 */
1636static void unplug_work(struct btrfs_work *work)
1637{
1638 struct btrfs_plug_cb *plug;
1639 plug = container_of(work, struct btrfs_plug_cb, work);
1640 run_plug(plug);
1641}
1642
1643static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1644{
1645 struct btrfs_plug_cb *plug;
1646 plug = container_of(cb, struct btrfs_plug_cb, cb);
1647
1648 if (from_schedule) {
1649 plug->work.flags = 0;
1650 plug->work.func = unplug_work;
1651 btrfs_queue_worker(&plug->info->rmw_workers,
1652 &plug->work);
1653 return;
1654 }
1655 run_plug(plug);
1656}
1657
1658/*
1561 * our main entry point for writes from the rest of the FS. 1659 * our main entry point for writes from the rest of the FS.
1562 */ 1660 */
1563int raid56_parity_write(struct btrfs_root *root, struct bio *bio, 1661int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
@@ -1565,6 +1663,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1565 u64 stripe_len) 1663 u64 stripe_len)
1566{ 1664{
1567 struct btrfs_raid_bio *rbio; 1665 struct btrfs_raid_bio *rbio;
1666 struct btrfs_plug_cb *plug = NULL;
1667 struct blk_plug_cb *cb;
1568 1668
1569 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 1669 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
1570 if (IS_ERR(rbio)) { 1670 if (IS_ERR(rbio)) {
@@ -1574,7 +1674,27 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1574 } 1674 }
1575 bio_list_add(&rbio->bio_list, bio); 1675 bio_list_add(&rbio->bio_list, bio);
1576 rbio->bio_list_bytes = bio->bi_size; 1676 rbio->bio_list_bytes = bio->bi_size;
1577 return __raid56_parity_write(rbio); 1677
1678 /*
1679 * don't plug on full rbios, just get them out the door
1680 * as quickly as we can
1681 */
1682 if (rbio_is_full(rbio))
1683 return full_stripe_write(rbio);
1684
1685 cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info,
1686 sizeof(*plug));
1687 if (cb) {
1688 plug = container_of(cb, struct btrfs_plug_cb, cb);
1689 if (!plug->info) {
1690 plug->info = root->fs_info;
1691 INIT_LIST_HEAD(&plug->rbio_list);
1692 }
1693 list_add_tail(&rbio->plug_list, &plug->rbio_list);
1694 } else {
1695 return __raid56_parity_write(rbio);
1696 }
1697 return 0;
1578} 1698}
1579 1699
1580/* 1700/*