aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2010-11-21 22:04:43 -0500
committerChris Mason <chris.mason@oracle.com>2010-11-21 22:26:04 -0500
commite65e1535542931e51189832264cd282e5899e4b9 (patch)
treecc3cda0f658cad9f69c351d0735150958a1b0147
parent88f794ede7fadd4b63135b94d1561c1f2d5eb5f5 (diff)
btrfs: fix panic caused by direct IO
btrfs paniced when we write >64KB data by direct IO at one time. Reproduce steps: # mkfs.btrfs /dev/sda5 /dev/sda6 # mount /dev/sda5 /mnt # dd if=/dev/zero of=/mnt/tmpfile bs=100K count=1 oflag=direct Then btrfs paniced: mapping failed logical 1103155200 bio len 69632 len 12288 ------------[ cut here ]------------ kernel BUG at fs/btrfs/volumes.c:3010! [SNIP] Pid: 1992, comm: btrfs-worker-0 Not tainted 2.6.37-rc1 #1 D2399/PRIMERGY RIP: 0010:[<ffffffffa03d1462>] [<ffffffffa03d1462>] btrfs_map_bio+0x202/0x210 [btrfs] [SNIP] Call Trace: [<ffffffffa03ab3eb>] __btrfs_submit_bio_done+0x1b/0x20 [btrfs] [<ffffffffa03a35ff>] run_one_async_done+0x9f/0xb0 [btrfs] [<ffffffffa03d3d20>] run_ordered_completions+0x80/0xc0 [btrfs] [<ffffffffa03d45a4>] worker_loop+0x154/0x5f0 [btrfs] [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs] [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs] [<ffffffff81083216>] kthread+0x96/0xa0 [<ffffffff8100cec4>] kernel_thread_helper+0x4/0x10 [<ffffffff81083180>] ? kthread+0x0/0xa0 [<ffffffff8100cec0>] ? kernel_thread_helper+0x0/0x10 We fix this problem by splitting bios when we submit bios. Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Tested-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/inode.c205
1 files changed, 184 insertions, 21 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8c027aa0020a..a47e4faa8c46 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5535,13 +5535,21 @@ struct btrfs_dio_private {
5535 u64 bytes; 5535 u64 bytes;
5536 u32 *csums; 5536 u32 *csums;
5537 void *private; 5537 void *private;
5538
5539 /* number of bios pending for this dio */
5540 atomic_t pending_bios;
5541
5542 /* IO errors */
5543 int errors;
5544
5545 struct bio *orig_bio;
5538}; 5546};
5539 5547
5540static void btrfs_endio_direct_read(struct bio *bio, int err) 5548static void btrfs_endio_direct_read(struct bio *bio, int err)
5541{ 5549{
5550 struct btrfs_dio_private *dip = bio->bi_private;
5542 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 5551 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
5543 struct bio_vec *bvec = bio->bi_io_vec; 5552 struct bio_vec *bvec = bio->bi_io_vec;
5544 struct btrfs_dio_private *dip = bio->bi_private;
5545 struct inode *inode = dip->inode; 5553 struct inode *inode = dip->inode;
5546 struct btrfs_root *root = BTRFS_I(inode)->root; 5554 struct btrfs_root *root = BTRFS_I(inode)->root;
5547 u64 start; 5555 u64 start;
@@ -5684,6 +5692,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5684 return 0; 5692 return 0;
5685} 5693}
5686 5694
5695static void btrfs_end_dio_bio(struct bio *bio, int err)
5696{
5697 struct btrfs_dio_private *dip = bio->bi_private;
5698
5699 if (err) {
5700 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
5701 "disk_bytenr %lu len %u err no %d\n",
5702 dip->inode->i_ino, bio->bi_rw, bio->bi_sector,
5703 bio->bi_size, err);
5704 dip->errors = 1;
5705
5706 /*
5707 * before atomic variable goto zero, we must make sure
5708 * dip->errors is perceived to be set.
5709 */
5710 smp_mb__before_atomic_dec();
5711 }
5712
5713 /* if there are more bios still pending for this dio, just exit */
5714 if (!atomic_dec_and_test(&dip->pending_bios))
5715 goto out;
5716
5717 if (dip->errors)
5718 bio_io_error(dip->orig_bio);
5719 else {
5720 set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
5721 bio_endio(dip->orig_bio, 0);
5722 }
5723out:
5724 bio_put(bio);
5725}
5726
5727static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5728 u64 first_sector, gfp_t gfp_flags)
5729{
5730 int nr_vecs = bio_get_nr_vecs(bdev);
5731 return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
5732}
5733
5734static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5735 int rw, u64 file_offset, int skip_sum,
5736 u32 *csums)
5737{
5738 int write = rw & REQ_WRITE;
5739 struct btrfs_root *root = BTRFS_I(inode)->root;
5740 int ret;
5741
5742 bio_get(bio);
5743 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
5744 if (ret)
5745 goto err;
5746
5747 if (write && !skip_sum) {
5748 ret = btrfs_wq_submit_bio(root->fs_info,
5749 inode, rw, bio, 0, 0,
5750 file_offset,
5751 __btrfs_submit_bio_start_direct_io,
5752 __btrfs_submit_bio_done);
5753 goto err;
5754 } else if (!skip_sum)
5755 btrfs_lookup_bio_sums_dio(root, inode, bio,
5756 file_offset, csums);
5757
5758 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5759err:
5760 bio_put(bio);
5761 return ret;
5762}
5763
5764static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5765 int skip_sum)
5766{
5767 struct inode *inode = dip->inode;
5768 struct btrfs_root *root = BTRFS_I(inode)->root;
5769 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
5770 struct bio *bio;
5771 struct bio *orig_bio = dip->orig_bio;
5772 struct bio_vec *bvec = orig_bio->bi_io_vec;
5773 u64 start_sector = orig_bio->bi_sector;
5774 u64 file_offset = dip->logical_offset;
5775 u64 submit_len = 0;
5776 u64 map_length;
5777 int nr_pages = 0;
5778 u32 *csums = dip->csums;
5779 int ret = 0;
5780
5781 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5782 if (!bio)
5783 return -ENOMEM;
5784 bio->bi_private = dip;
5785 bio->bi_end_io = btrfs_end_dio_bio;
5786 atomic_inc(&dip->pending_bios);
5787
5788 map_length = orig_bio->bi_size;
5789 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5790 &map_length, NULL, 0);
5791 if (ret) {
5792 bio_put(bio);
5793 return -EIO;
5794 }
5795
5796 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
5797 if (unlikely(map_length < submit_len + bvec->bv_len ||
5798 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
5799 bvec->bv_offset) < bvec->bv_len)) {
5800 /*
5801 * inc the count before we submit the bio so
5802 * we know the end IO handler won't happen before
5803 * we inc the count. Otherwise, the dip might get freed
5804 * before we're done setting it up
5805 */
5806 atomic_inc(&dip->pending_bios);
5807 ret = __btrfs_submit_dio_bio(bio, inode, rw,
5808 file_offset, skip_sum,
5809 csums);
5810 if (ret) {
5811 bio_put(bio);
5812 atomic_dec(&dip->pending_bios);
5813 goto out_err;
5814 }
5815
5816 if (!skip_sum)
5817 csums = csums + nr_pages;
5818 start_sector += submit_len >> 9;
5819 file_offset += submit_len;
5820
5821 submit_len = 0;
5822 nr_pages = 0;
5823
5824 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
5825 start_sector, GFP_NOFS);
5826 if (!bio)
5827 goto out_err;
5828 bio->bi_private = dip;
5829 bio->bi_end_io = btrfs_end_dio_bio;
5830
5831 map_length = orig_bio->bi_size;
5832 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5833 &map_length, NULL, 0);
5834 if (ret) {
5835 bio_put(bio);
5836 goto out_err;
5837 }
5838 } else {
5839 submit_len += bvec->bv_len;
5840 nr_pages ++;
5841 bvec++;
5842 }
5843 }
5844
5845 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
5846 csums);
5847 if (!ret)
5848 return 0;
5849
5850 bio_put(bio);
5851out_err:
5852 dip->errors = 1;
5853 /*
5854 * before atomic variable goto zero, we must
5855 * make sure dip->errors is perceived to be set.
5856 */
5857 smp_mb__before_atomic_dec();
5858 if (atomic_dec_and_test(&dip->pending_bios))
5859 bio_io_error(dip->orig_bio);
5860
5861 /* bio_end_io() will handle error, so we needn't return it */
5862 return 0;
5863}
5864
5687static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, 5865static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5688 loff_t file_offset) 5866 loff_t file_offset)
5689{ 5867{
@@ -5723,33 +5901,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5723 5901
5724 dip->disk_bytenr = (u64)bio->bi_sector << 9; 5902 dip->disk_bytenr = (u64)bio->bi_sector << 9;
5725 bio->bi_private = dip; 5903 bio->bi_private = dip;
5904 dip->errors = 0;
5905 dip->orig_bio = bio;
5906 atomic_set(&dip->pending_bios, 0);
5726 5907
5727 if (write) 5908 if (write)
5728 bio->bi_end_io = btrfs_endio_direct_write; 5909 bio->bi_end_io = btrfs_endio_direct_write;
5729 else 5910 else
5730 bio->bi_end_io = btrfs_endio_direct_read; 5911 bio->bi_end_io = btrfs_endio_direct_read;
5731 5912
5732 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 5913 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
5733 if (ret) 5914 if (!ret)
5734 goto free_ordered;
5735
5736 if (write && !skip_sum) {
5737 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
5738 inode, rw, bio, 0, 0,
5739 dip->logical_offset,
5740 __btrfs_submit_bio_start_direct_io,
5741 __btrfs_submit_bio_done);
5742 if (ret)
5743 goto free_ordered;
5744 return; 5915 return;
5745 } else if (!skip_sum)
5746 btrfs_lookup_bio_sums_dio(root, inode, bio,
5747 dip->logical_offset, dip->csums);
5748
5749 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5750 if (ret)
5751 goto free_ordered;
5752 return;
5753free_ordered: 5916free_ordered:
5754 /* 5917 /*
5755 * If this is a write, we need to clean up the reserved space and kill 5918 * If this is a write, we need to clean up the reserved space and kill