diff options
author | Chris Mason <chris.mason@fusionio.com> | 2013-01-31 14:42:28 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@fusionio.com> | 2013-02-01 14:24:24 -0500 |
commit | 6ac0f4884eaad28441c6e0f9d1400a08c2149049 (patch) | |
tree | 251d86c33360948671a1b4d8cb5a38a5724468a6 | |
parent | 4ae10b3a133e1147f3c818fe2ebaf005b217b7bf (diff) |
Btrfs: add a plugging callback to raid56 writes
Buffered writes and DIRECT_IO writes will often break up
big contiguous changes to the file into sub-stripe writes.
This adds a plugging callback to gather those smaller writes full stripe
writes.
Example on flash:
fio job to do 64K writes in batches of 3 (which makes a full stripe):
With plugging: 450MB/s
Without plugging: 220MB/s
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
-rw-r--r-- | fs/btrfs/raid56.c | 128 |
1 files changed, 124 insertions, 4 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 7ccddca9ee71..e34e568534d9 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -97,9 +97,10 @@ struct btrfs_raid_bio { | |||
97 | struct bio_list bio_list; | 97 | struct bio_list bio_list; |
98 | spinlock_t bio_list_lock; | 98 | spinlock_t bio_list_lock; |
99 | 99 | ||
100 | /* | 100 | /* also protected by the bio_list_lock, the |
101 | * also protected by the bio_list_lock, the | 101 | * plug list is used by the plugging code |
102 | * stripe locking code uses plug_list to hand off | 102 | * to collect partial bios while plugged. The |
103 | * stripe locking code also uses it to hand off | ||
103 | * the stripe lock to the next pending IO | 104 | * the stripe lock to the next pending IO |
104 | */ | 105 | */ |
105 | struct list_head plug_list; | 106 | struct list_head plug_list; |
@@ -1558,6 +1559,103 @@ static int __raid56_parity_write(struct btrfs_raid_bio *rbio) | |||
1558 | } | 1559 | } |
1559 | 1560 | ||
1560 | /* | 1561 | /* |
1562 | * We use plugging call backs to collect full stripes. | ||
1563 | * Any time we get a partial stripe write while plugged | ||
1564 | * we collect it into a list. When the unplug comes down, | ||
1565 | * we sort the list by logical block number and merge | ||
1566 | * everything we can into the same rbios | ||
1567 | */ | ||
1568 | struct btrfs_plug_cb { | ||
1569 | struct blk_plug_cb cb; | ||
1570 | struct btrfs_fs_info *info; | ||
1571 | struct list_head rbio_list; | ||
1572 | struct btrfs_work work; | ||
1573 | }; | ||
1574 | |||
1575 | /* | ||
1576 | * rbios on the plug list are sorted for easier merging. | ||
1577 | */ | ||
1578 | static int plug_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
1579 | { | ||
1580 | struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio, | ||
1581 | plug_list); | ||
1582 | struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, | ||
1583 | plug_list); | ||
1584 | u64 a_sector = ra->bio_list.head->bi_sector; | ||
1585 | u64 b_sector = rb->bio_list.head->bi_sector; | ||
1586 | |||
1587 | if (a_sector < b_sector) | ||
1588 | return -1; | ||
1589 | if (a_sector > b_sector) | ||
1590 | return 1; | ||
1591 | return 0; | ||
1592 | } | ||
1593 | |||
1594 | static void run_plug(struct btrfs_plug_cb *plug) | ||
1595 | { | ||
1596 | struct btrfs_raid_bio *cur; | ||
1597 | struct btrfs_raid_bio *last = NULL; | ||
1598 | |||
1599 | /* | ||
1600 | * sort our plug list then try to merge | ||
1601 | * everything we can in hopes of creating full | ||
1602 | * stripes. | ||
1603 | */ | ||
1604 | list_sort(NULL, &plug->rbio_list, plug_cmp); | ||
1605 | while (!list_empty(&plug->rbio_list)) { | ||
1606 | cur = list_entry(plug->rbio_list.next, | ||
1607 | struct btrfs_raid_bio, plug_list); | ||
1608 | list_del_init(&cur->plug_list); | ||
1609 | |||
1610 | if (rbio_is_full(cur)) { | ||
1611 | /* we have a full stripe, send it down */ | ||
1612 | full_stripe_write(cur); | ||
1613 | continue; | ||
1614 | } | ||
1615 | if (last) { | ||
1616 | if (rbio_can_merge(last, cur)) { | ||
1617 | merge_rbio(last, cur); | ||
1618 | __free_raid_bio(cur); | ||
1619 | continue; | ||
1620 | |||
1621 | } | ||
1622 | __raid56_parity_write(last); | ||
1623 | } | ||
1624 | last = cur; | ||
1625 | } | ||
1626 | if (last) { | ||
1627 | __raid56_parity_write(last); | ||
1628 | } | ||
1629 | kfree(plug); | ||
1630 | } | ||
1631 | |||
1632 | /* | ||
1633 | * if the unplug comes from schedule, we have to push the | ||
1634 | * work off to a helper thread | ||
1635 | */ | ||
1636 | static void unplug_work(struct btrfs_work *work) | ||
1637 | { | ||
1638 | struct btrfs_plug_cb *plug; | ||
1639 | plug = container_of(work, struct btrfs_plug_cb, work); | ||
1640 | run_plug(plug); | ||
1641 | } | ||
1642 | |||
1643 | static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) | ||
1644 | { | ||
1645 | struct btrfs_plug_cb *plug; | ||
1646 | plug = container_of(cb, struct btrfs_plug_cb, cb); | ||
1647 | |||
1648 | if (from_schedule) { | ||
1649 | plug->work.flags = 0; | ||
1650 | plug->work.func = unplug_work; | ||
1651 | btrfs_queue_worker(&plug->info->rmw_workers, | ||
1652 | &plug->work); | ||
1653 | return; | ||
1654 | } | ||
1655 | run_plug(plug); | ||
1656 | } | ||
1657 | |||
1658 | /* | ||
1561 | * our main entry point for writes from the rest of the FS. | 1659 | * our main entry point for writes from the rest of the FS. |
1562 | */ | 1660 | */ |
1563 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | 1661 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, |
@@ -1565,6 +1663,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | |||
1565 | u64 stripe_len) | 1663 | u64 stripe_len) |
1566 | { | 1664 | { |
1567 | struct btrfs_raid_bio *rbio; | 1665 | struct btrfs_raid_bio *rbio; |
1666 | struct btrfs_plug_cb *plug = NULL; | ||
1667 | struct blk_plug_cb *cb; | ||
1568 | 1668 | ||
1569 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | 1669 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); |
1570 | if (IS_ERR(rbio)) { | 1670 | if (IS_ERR(rbio)) { |
@@ -1574,7 +1674,27 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | |||
1574 | } | 1674 | } |
1575 | bio_list_add(&rbio->bio_list, bio); | 1675 | bio_list_add(&rbio->bio_list, bio); |
1576 | rbio->bio_list_bytes = bio->bi_size; | 1676 | rbio->bio_list_bytes = bio->bi_size; |
1577 | return __raid56_parity_write(rbio); | 1677 | |
1678 | /* | ||
1679 | * don't plug on full rbios, just get them out the door | ||
1680 | * as quickly as we can | ||
1681 | */ | ||
1682 | if (rbio_is_full(rbio)) | ||
1683 | return full_stripe_write(rbio); | ||
1684 | |||
1685 | cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info, | ||
1686 | sizeof(*plug)); | ||
1687 | if (cb) { | ||
1688 | plug = container_of(cb, struct btrfs_plug_cb, cb); | ||
1689 | if (!plug->info) { | ||
1690 | plug->info = root->fs_info; | ||
1691 | INIT_LIST_HEAD(&plug->rbio_list); | ||
1692 | } | ||
1693 | list_add_tail(&rbio->plug_list, &plug->rbio_list); | ||
1694 | } else { | ||
1695 | return __raid56_parity_write(rbio); | ||
1696 | } | ||
1697 | return 0; | ||
1578 | } | 1698 | } |
1579 | 1699 | ||
1580 | /* | 1700 | /* |