diff options
author | Miao Xie <miaox@cn.fujitsu.com> | 2010-11-21 22:04:43 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-11-21 22:26:04 -0500 |
commit | e65e1535542931e51189832264cd282e5899e4b9 (patch) | |
tree | cc3cda0f658cad9f69c351d0735150958a1b0147 | |
parent | 88f794ede7fadd4b63135b94d1561c1f2d5eb5f5 (diff) |
btrfs: fix panic caused by direct IO
btrfs paniced when we write >64KB data by direct IO at one time.
Reproduce steps:
# mkfs.btrfs /dev/sda5 /dev/sda6
# mount /dev/sda5 /mnt
# dd if=/dev/zero of=/mnt/tmpfile bs=100K count=1 oflag=direct
Then btrfs paniced:
mapping failed logical 1103155200 bio len 69632 len 12288
------------[ cut here ]------------
kernel BUG at fs/btrfs/volumes.c:3010!
[SNIP]
Pid: 1992, comm: btrfs-worker-0 Not tainted 2.6.37-rc1 #1 D2399/PRIMERGY
RIP: 0010:[<ffffffffa03d1462>] [<ffffffffa03d1462>] btrfs_map_bio+0x202/0x210 [btrfs]
[SNIP]
Call Trace:
[<ffffffffa03ab3eb>] __btrfs_submit_bio_done+0x1b/0x20 [btrfs]
[<ffffffffa03a35ff>] run_one_async_done+0x9f/0xb0 [btrfs]
[<ffffffffa03d3d20>] run_ordered_completions+0x80/0xc0 [btrfs]
[<ffffffffa03d45a4>] worker_loop+0x154/0x5f0 [btrfs]
[<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs]
[<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs]
[<ffffffff81083216>] kthread+0x96/0xa0
[<ffffffff8100cec4>] kernel_thread_helper+0x4/0x10
[<ffffffff81083180>] ? kthread+0x0/0xa0
[<ffffffff8100cec0>] ? kernel_thread_helper+0x0/0x10
We fix this problem by splitting bios when we submit bios.
Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Tested-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/inode.c | 205 |
1 files changed, 184 insertions, 21 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8c027aa0020a..a47e4faa8c46 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5535,13 +5535,21 @@ struct btrfs_dio_private { | |||
5535 | u64 bytes; | 5535 | u64 bytes; |
5536 | u32 *csums; | 5536 | u32 *csums; |
5537 | void *private; | 5537 | void *private; |
5538 | |||
5539 | /* number of bios pending for this dio */ | ||
5540 | atomic_t pending_bios; | ||
5541 | |||
5542 | /* IO errors */ | ||
5543 | int errors; | ||
5544 | |||
5545 | struct bio *orig_bio; | ||
5538 | }; | 5546 | }; |
5539 | 5547 | ||
5540 | static void btrfs_endio_direct_read(struct bio *bio, int err) | 5548 | static void btrfs_endio_direct_read(struct bio *bio, int err) |
5541 | { | 5549 | { |
5550 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5542 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | 5551 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; |
5543 | struct bio_vec *bvec = bio->bi_io_vec; | 5552 | struct bio_vec *bvec = bio->bi_io_vec; |
5544 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5545 | struct inode *inode = dip->inode; | 5553 | struct inode *inode = dip->inode; |
5546 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5554 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5547 | u64 start; | 5555 | u64 start; |
@@ -5684,6 +5692,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | |||
5684 | return 0; | 5692 | return 0; |
5685 | } | 5693 | } |
5686 | 5694 | ||
5695 | static void btrfs_end_dio_bio(struct bio *bio, int err) | ||
5696 | { | ||
5697 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5698 | |||
5699 | if (err) { | ||
5700 | printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " | ||
5701 | "disk_bytenr %lu len %u err no %d\n", | ||
5702 | dip->inode->i_ino, bio->bi_rw, bio->bi_sector, | ||
5703 | bio->bi_size, err); | ||
5704 | dip->errors = 1; | ||
5705 | |||
5706 | /* | ||
5707 | * before atomic variable goto zero, we must make sure | ||
5708 | * dip->errors is perceived to be set. | ||
5709 | */ | ||
5710 | smp_mb__before_atomic_dec(); | ||
5711 | } | ||
5712 | |||
5713 | /* if there are more bios still pending for this dio, just exit */ | ||
5714 | if (!atomic_dec_and_test(&dip->pending_bios)) | ||
5715 | goto out; | ||
5716 | |||
5717 | if (dip->errors) | ||
5718 | bio_io_error(dip->orig_bio); | ||
5719 | else { | ||
5720 | set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); | ||
5721 | bio_endio(dip->orig_bio, 0); | ||
5722 | } | ||
5723 | out: | ||
5724 | bio_put(bio); | ||
5725 | } | ||
5726 | |||
5727 | static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | ||
5728 | u64 first_sector, gfp_t gfp_flags) | ||
5729 | { | ||
5730 | int nr_vecs = bio_get_nr_vecs(bdev); | ||
5731 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); | ||
5732 | } | ||
5733 | |||
5734 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | ||
5735 | int rw, u64 file_offset, int skip_sum, | ||
5736 | u32 *csums) | ||
5737 | { | ||
5738 | int write = rw & REQ_WRITE; | ||
5739 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5740 | int ret; | ||
5741 | |||
5742 | bio_get(bio); | ||
5743 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5744 | if (ret) | ||
5745 | goto err; | ||
5746 | |||
5747 | if (write && !skip_sum) { | ||
5748 | ret = btrfs_wq_submit_bio(root->fs_info, | ||
5749 | inode, rw, bio, 0, 0, | ||
5750 | file_offset, | ||
5751 | __btrfs_submit_bio_start_direct_io, | ||
5752 | __btrfs_submit_bio_done); | ||
5753 | goto err; | ||
5754 | } else if (!skip_sum) | ||
5755 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5756 | file_offset, csums); | ||
5757 | |||
5758 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5759 | err: | ||
5760 | bio_put(bio); | ||
5761 | return ret; | ||
5762 | } | ||
5763 | |||
5764 | static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | ||
5765 | int skip_sum) | ||
5766 | { | ||
5767 | struct inode *inode = dip->inode; | ||
5768 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5769 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
5770 | struct bio *bio; | ||
5771 | struct bio *orig_bio = dip->orig_bio; | ||
5772 | struct bio_vec *bvec = orig_bio->bi_io_vec; | ||
5773 | u64 start_sector = orig_bio->bi_sector; | ||
5774 | u64 file_offset = dip->logical_offset; | ||
5775 | u64 submit_len = 0; | ||
5776 | u64 map_length; | ||
5777 | int nr_pages = 0; | ||
5778 | u32 *csums = dip->csums; | ||
5779 | int ret = 0; | ||
5780 | |||
5781 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | ||
5782 | if (!bio) | ||
5783 | return -ENOMEM; | ||
5784 | bio->bi_private = dip; | ||
5785 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5786 | atomic_inc(&dip->pending_bios); | ||
5787 | |||
5788 | map_length = orig_bio->bi_size; | ||
5789 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5790 | &map_length, NULL, 0); | ||
5791 | if (ret) { | ||
5792 | bio_put(bio); | ||
5793 | return -EIO; | ||
5794 | } | ||
5795 | |||
5796 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | ||
5797 | if (unlikely(map_length < submit_len + bvec->bv_len || | ||
5798 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | ||
5799 | bvec->bv_offset) < bvec->bv_len)) { | ||
5800 | /* | ||
5801 | * inc the count before we submit the bio so | ||
5802 | * we know the end IO handler won't happen before | ||
5803 | * we inc the count. Otherwise, the dip might get freed | ||
5804 | * before we're done setting it up | ||
5805 | */ | ||
5806 | atomic_inc(&dip->pending_bios); | ||
5807 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | ||
5808 | file_offset, skip_sum, | ||
5809 | csums); | ||
5810 | if (ret) { | ||
5811 | bio_put(bio); | ||
5812 | atomic_dec(&dip->pending_bios); | ||
5813 | goto out_err; | ||
5814 | } | ||
5815 | |||
5816 | if (!skip_sum) | ||
5817 | csums = csums + nr_pages; | ||
5818 | start_sector += submit_len >> 9; | ||
5819 | file_offset += submit_len; | ||
5820 | |||
5821 | submit_len = 0; | ||
5822 | nr_pages = 0; | ||
5823 | |||
5824 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, | ||
5825 | start_sector, GFP_NOFS); | ||
5826 | if (!bio) | ||
5827 | goto out_err; | ||
5828 | bio->bi_private = dip; | ||
5829 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5830 | |||
5831 | map_length = orig_bio->bi_size; | ||
5832 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5833 | &map_length, NULL, 0); | ||
5834 | if (ret) { | ||
5835 | bio_put(bio); | ||
5836 | goto out_err; | ||
5837 | } | ||
5838 | } else { | ||
5839 | submit_len += bvec->bv_len; | ||
5840 | nr_pages ++; | ||
5841 | bvec++; | ||
5842 | } | ||
5843 | } | ||
5844 | |||
5845 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | ||
5846 | csums); | ||
5847 | if (!ret) | ||
5848 | return 0; | ||
5849 | |||
5850 | bio_put(bio); | ||
5851 | out_err: | ||
5852 | dip->errors = 1; | ||
5853 | /* | ||
5854 | * before atomic variable goto zero, we must | ||
5855 | * make sure dip->errors is perceived to be set. | ||
5856 | */ | ||
5857 | smp_mb__before_atomic_dec(); | ||
5858 | if (atomic_dec_and_test(&dip->pending_bios)) | ||
5859 | bio_io_error(dip->orig_bio); | ||
5860 | |||
5861 | /* bio_end_io() will handle error, so we needn't return it */ | ||
5862 | return 0; | ||
5863 | } | ||
5864 | |||
5687 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | 5865 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, |
5688 | loff_t file_offset) | 5866 | loff_t file_offset) |
5689 | { | 5867 | { |
@@ -5723,33 +5901,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
5723 | 5901 | ||
5724 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | 5902 | dip->disk_bytenr = (u64)bio->bi_sector << 9; |
5725 | bio->bi_private = dip; | 5903 | bio->bi_private = dip; |
5904 | dip->errors = 0; | ||
5905 | dip->orig_bio = bio; | ||
5906 | atomic_set(&dip->pending_bios, 0); | ||
5726 | 5907 | ||
5727 | if (write) | 5908 | if (write) |
5728 | bio->bi_end_io = btrfs_endio_direct_write; | 5909 | bio->bi_end_io = btrfs_endio_direct_write; |
5729 | else | 5910 | else |
5730 | bio->bi_end_io = btrfs_endio_direct_read; | 5911 | bio->bi_end_io = btrfs_endio_direct_read; |
5731 | 5912 | ||
5732 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 5913 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); |
5733 | if (ret) | 5914 | if (!ret) |
5734 | goto free_ordered; | ||
5735 | |||
5736 | if (write && !skip_sum) { | ||
5737 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
5738 | inode, rw, bio, 0, 0, | ||
5739 | dip->logical_offset, | ||
5740 | __btrfs_submit_bio_start_direct_io, | ||
5741 | __btrfs_submit_bio_done); | ||
5742 | if (ret) | ||
5743 | goto free_ordered; | ||
5744 | return; | 5915 | return; |
5745 | } else if (!skip_sum) | ||
5746 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5747 | dip->logical_offset, dip->csums); | ||
5748 | |||
5749 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5750 | if (ret) | ||
5751 | goto free_ordered; | ||
5752 | return; | ||
5753 | free_ordered: | 5916 | free_ordered: |
5754 | /* | 5917 | /* |
5755 | * If this is a write, we need to clean up the reserved space and kill | 5918 | * If this is a write, we need to clean up the reserved space and kill |