From 721a9602e6607417c6bc15b18e97a2f35266c690 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Mar 2011 11:56:30 +0100 Subject: block: kill off REQ_UNPLUG With the plugging now being explicitly controlled by the submitter, callers need not pass down unplugging hints to the block layer. If they want to unplug, it's because they manually plugged on their own - in which case, they should just unplug at will. Signed-off-by: Jens Axboe --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 92ac5192c518..b76f7cd47401 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2182,7 +2182,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unsigned long nr_written = 0; if (wbc->sync_mode == WB_SYNC_ALL) - write_flags = WRITE_SYNC_PLUG; + write_flags = WRITE_SYNC; else write_flags = WRITE; -- cgit v1.2.2 From 850265335f792f5d39ab24e5fb7160bac28d77e5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 15 Mar 2011 14:52:12 -0400 Subject: Btrfs: return error if the range we want to map is bogus Currently if we have corrupt metadata map_extent_buffer will complain about it, but not return an error so the caller has no idea a problem was hit. Fix this. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 714adc4ac4c2..1bbd26b4fc5c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3690,6 +3690,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, "wanted %lu %lu\n", (unsigned long long)eb->start, eb->len, start, min_len); WARN_ON(1); + return -EINVAL; } p = extent_buffer_page(eb, i); -- cgit v1.2.2 From 1abe9b8a138c9988ba8f7bfded6453649a31541f Mon Sep 17 00:00:00 2001 From: liubo Date: Thu, 24 Mar 2011 11:18:59 +0000 Subject: Btrfs: add initial tracepoint support for btrfs Tracepoints can provide insight into why btrfs hits bugs and be greatly helpful for debugging, e.g dd-7822 [000] 2121.641088: btrfs_inode_request: root = 5(FS_TREE), gen = 4, ino = 256, blocks = 8, disk_i_size = 0, last_trans = 8, logged_trans = 0 dd-7822 [000] 2121.641100: btrfs_inode_new: root = 5(FS_TREE), gen = 8, ino = 257, blocks = 0, disk_i_size = 0, last_trans = 0, logged_trans = 0 btrfs-transacti-7804 [001] 2146.935420: btrfs_cow_block: root = 2(EXTENT_TREE), refs = 2, orig_buf = 29368320 (orig_level = 0), cow_buf = 29388800 (cow_level = 0) btrfs-transacti-7804 [001] 2146.935473: btrfs_cow_block: root = 1(ROOT_TREE), refs = 2, orig_buf = 29364224 (orig_level = 0), cow_buf = 29392896 (cow_level = 0) btrfs-transacti-7804 [001] 2146.972221: btrfs_transaction_commit: root = 1(ROOT_TREE), gen = 8 flush-btrfs-2-7821 [001] 2155.824210: btrfs_chunk_alloc: root = 3(CHUNK_TREE), offset = 1103101952, size = 1073741824, num_stripes = 1, sub_stripes = 0, type = DATA flush-btrfs-2-7821 [001] 2155.824241: btrfs_cow_block: root = 2(EXTENT_TREE), refs = 2, orig_buf = 29388800 (orig_level = 0), cow_buf = 29396992 (cow_level = 0) flush-btrfs-2-7821 [001] 2155.824255: btrfs_cow_block: root = 4(DEV_TREE), refs = 2, orig_buf = 29372416 (orig_level = 0), cow_buf = 29401088 (cow_level = 0) flush-btrfs-2-7821 [000] 2155.824329: btrfs_cow_block: root = 3(CHUNK_TREE), refs = 2, orig_buf = 20971520 (orig_level = 0), cow_buf = 20975616 (cow_level = 0) btrfs-endio-wri-7800 [001] 2155.898019: btrfs_cow_block: root = 5(FS_TREE), refs = 2, orig_buf = 29384704 (orig_level = 0), cow_buf = 29405184 (cow_level = 0) btrfs-endio-wri-7800 [001] 2155.898043: btrfs_cow_block: root = 7(CSUM_TREE), refs = 2, orig_buf = 29376512 (orig_level = 0), cow_buf = 29409280 (cow_level = 0) Here is what I have added: 1) ordere_extent: btrfs_ordered_extent_add btrfs_ordered_extent_remove btrfs_ordered_extent_start btrfs_ordered_extent_put These provide critical information to understand how ordered_extents are updated. 2) extent_map: btrfs_get_extent extent_map is used in both read and write cases, and it is useful for tracking how btrfs specific IO is running. 3) writepage: __extent_writepage btrfs_writepage_end_io_hook Pages are cirtical resourses and produce a lot of corner cases during writeback, so it is valuable to know how page is written to disk. 4) inode: btrfs_inode_new btrfs_inode_request btrfs_inode_evict These can show where and when a inode is created, when a inode is evicted. 5) sync: btrfs_sync_file btrfs_sync_fs These show sync arguments. 6) transaction: btrfs_transaction_commit In transaction based filesystem, it will be useful to know the generation and who does commit. 7) back reference and cow: btrfs_delayed_tree_ref btrfs_delayed_data_ref btrfs_delayed_ref_head btrfs_cow_block Btrfs natively supports back references, these tracepoints are helpful on understanding btrfs's COW mechanism. 8) chunk: btrfs_chunk_alloc btrfs_chunk_free Chunk is a link between physical offset and logical offset, and stands for space infomation in btrfs, and these are helpful on tracing space things. 9) reserved_extent: btrfs_reserved_extent_alloc btrfs_reserved_extent_free These can show how btrfs uses its space. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1bbd26b4fc5c..77c65a0bea34 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2192,6 +2192,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, else write_flags = WRITE; + trace___extent_writepage(page, inode, wbc); + WARN_ON(!PageLocked(page)); pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || -- cgit v1.2.2 From 3387206f26e1b48703e810175b98611a4fd8e8ea Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Mon, 11 Apr 2011 21:52:52 +0000 Subject: btrfs: properly handle overlapping areas in memmove_extent_buffer Fix data corruption caused by memcpy() usage on overlapping data. I've observed it first when found out usermode linux crash on btrfs. ?all chain is the following: ------------[ cut here ]------------ WARNING: at /home/slyfox/linux-2.6/fs/btrfs/extent_io.c:3900 memcpy_extent_buffer+0x1a5/0x219() Call Trace: 6fa39a58: [<601b495e>] _raw_spin_unlock_irqrestore+0x18/0x1c 6fa39a68: [<60029ad9>] warn_slowpath_common+0x59/0x70 6fa39aa8: [<60029b05>] warn_slowpath_null+0x15/0x17 6fa39ab8: [<600efc97>] memcpy_extent_buffer+0x1a5/0x219 6fa39b48: [<600efd9f>] memmove_extent_buffer+0x94/0x208 6fa39bc8: [<600becbf>] btrfs_del_items+0x214/0x473 6fa39c78: [<600ce1b0>] btrfs_delete_one_dir_name+0x7c/0xda 6fa39cc8: [<600dad6b>] __btrfs_unlink_inode+0xad/0x25d 6fa39d08: [<600d7864>] btrfs_start_transaction+0xe/0x10 6fa39d48: [<600dc9ff>] btrfs_unlink_inode+0x1b/0x3b 6fa39d78: [<600e04bc>] btrfs_unlink+0x70/0xef 6fa39dc8: [<6007f0d0>] vfs_unlink+0x58/0xa3 6fa39df8: [<60080278>] do_unlinkat+0xd4/0x162 6fa39e48: [<600517db>] call_rcu_sched+0xe/0x10 6fa39e58: [<600452a8>] __put_cred+0x58/0x5a 6fa39e78: [<6007446c>] sys_faccessat+0x154/0x166 6fa39ed8: [<60080317>] sys_unlink+0x11/0x13 6fa39ee8: [<60016b80>] handle_syscall+0x58/0x70 6fa39f08: [<60021377>] userspace+0x2d4/0x381 6fa39fc8: [<60014507>] fork_handler+0x62/0x69 ---[ end trace 70b0ca2ef0266b93 ]--- http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg09302.html Signed-off-by: Sergei Trofimovich Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 77c65a0bea34..864e0496cc1c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3885,6 +3885,12 @@ static void move_pages(struct page *dst_page, struct page *src_page, kunmap_atomic(dst_kaddr, KM_USER0); } +static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) +{ + unsigned long distance = (src > dst) ? src - dst : dst - src; + return distance < len; +} + static void copy_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) @@ -3892,10 +3898,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page, char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); char *src_kaddr; - if (dst_page != src_page) + if (dst_page != src_page) { src_kaddr = kmap_atomic(src_page, KM_USER1); - else + } else { src_kaddr = dst_kaddr; + BUG_ON(areas_overlap(src_off, dst_off, len)); + } memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); kunmap_atomic(dst_kaddr, KM_USER0); @@ -3970,7 +3978,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, "len %lu len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } - if (dst_offset < src_offset) { + if (!areas_overlap(src_offset, dst_offset, len)) { memcpy_extent_buffer(dst, dst_offset, src_offset, len); return; } -- cgit v1.2.2 From 507903b81840a70cc6a179d4eb03584ad50e8c5b Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 6 Apr 2011 10:02:20 +0000 Subject: btrfs: using cached extent_state in set/unlock combinations In several places the sequence (set_extent_uptodate, unlock_extent) is used. This leads to a duplicate lookup of the extent state. This patch lets set_extent_uptodate return a cached extent_state which can be passed to unlock_extent_cached. The occurences of the above sequences are updated to use the cache. Only end_bio_extent_readpage is updated that it first gets a cached state to pass it to the readpage_end_io_hook as the prototype requested and is later on being used for set/unlock. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 70 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 864e0496cc1c..8dcfb77678de 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -690,6 +690,17 @@ static void cache_state(struct extent_state *state, } } +static void uncache_state(struct extent_state **cached_ptr) +{ + if (cached_ptr && (*cached_ptr)) { + struct extent_state *state = *cached_ptr; + if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { + *cached_ptr = NULL; + free_extent_state(state); + } + } +} + /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed. @@ -940,10 +951,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, } int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) + struct extent_state **cached_state, gfp_t mask) { - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - NULL, mask); + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, + NULL, cached_state, mask); } static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, @@ -1012,8 +1023,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, mask); } -int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, mask); @@ -1735,6 +1745,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; + struct extent_state *cached = NULL; + struct extent_state *state; + tree = &BTRFS_I(page->mapping->host)->io_tree; start = ((u64)page->index << PAGE_CACHE_SHIFT) + @@ -1749,9 +1762,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, start, 0); + if (state) { + /* + * take a reference on the state, unlock will drop + * the ref + */ + cache_state(state, &cached); + } + spin_unlock(&tree->lock); + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, - NULL); + state); if (ret) uptodate = 0; } @@ -1764,15 +1788,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; + uncache_state(&cached); continue; } } if (uptodate) { - set_extent_uptodate(tree, start, end, + set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); } - unlock_extent(tree, start, end, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); if (whole_page) { if (uptodate) { @@ -1811,6 +1836,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) do { struct page *page = bvec->bv_page; + struct extent_state *cached = NULL; tree = &BTRFS_I(page->mapping->host)->io_tree; start = ((u64)page->index << PAGE_CACHE_SHIFT) + @@ -1821,13 +1847,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) prefetchw(&bvec->bv_page->flags); if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); + set_extent_uptodate(tree, start, end, &cached, + GFP_ATOMIC); } else { ClearPageUptodate(page); SetPageError(page); } - unlock_extent(tree, start, end, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); } while (bvec >= bio->bi_io_vec); @@ -2016,14 +2043,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, while (cur <= end) { if (cur >= last_byte) { char *userpage; + struct extent_state *cached = NULL; + iosize = PAGE_CACHE_SIZE - page_offset; userpage = kmap_atomic(page, KM_USER0); memset(userpage + page_offset, 0, iosize); flush_dcache_page(page); kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + &cached, GFP_NOFS); + unlock_extent_cached(tree, cur, cur + iosize - 1, + &cached, GFP_NOFS); break; } em = get_extent(inode, page, page_offset, cur, @@ -2063,14 +2093,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { char *userpage; + struct extent_state *cached = NULL; + userpage = kmap_atomic(page, KM_USER0); memset(userpage + page_offset, 0, iosize); flush_dcache_page(page); kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + &cached, GFP_NOFS); + unlock_extent_cached(tree, cur, cur + iosize - 1, + &cached, GFP_NOFS); cur = cur + iosize; page_offset += iosize; continue; @@ -2789,9 +2822,12 @@ int extent_prepare_write(struct extent_io_tree *tree, iocount++; block_start = block_start + iosize; } else { - set_extent_uptodate(tree, block_start, cur_end, + struct extent_state *cached = NULL; + + set_extent_uptodate(tree, block_start, cur_end, &cached, GFP_NOFS); - unlock_extent(tree, block_start, cur_end, GFP_NOFS); + unlock_extent_cached(tree, block_start, cur_end, + &cached, GFP_NOFS); block_start = cur_end + 1; } page_offset = block_start & (PAGE_CACHE_SIZE - 1); @@ -3457,7 +3493,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); + NULL, GFP_NOFS); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || -- cgit v1.2.2 From 109b36a2bb3eebf5c9994980e724958a5b2b62b6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Apr 2011 13:57:39 -0400 Subject: Btrfs: make uncache_state unconditional The extent_io code can take cached pointers into the extent state trees, and these can make lookups much faster in common operations. The caching only happens when specific bits are set that prevent merging and splitting of the extent state. A help function was added to uncache the state, and it was testing the same set of conditionals. This can leak in very strange corner cases where the lock bit goes away unexpectedly. The uncaching should be unconditional. Once we have a ref on the extent we should always give it up. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8dcfb77678de..1c462f895c98 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -694,10 +694,8 @@ static void uncache_state(struct extent_state **cached_ptr) { if (cached_ptr && (*cached_ptr)) { struct extent_state *state = *cached_ptr; - if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { - *cached_ptr = NULL; - free_extent_state(state); - } + *cached_ptr = NULL; + free_extent_state(state); } } @@ -1764,7 +1762,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) spin_lock(&tree->lock); state = find_first_extent_bit_state(tree, start, 0); - if (state) { + if (state && state->start == start) { /* * take a reference on the state, unlock will drop * the ref -- cgit v1.2.2 From 0d399205edf3a4c290e76ebb36e541593af4a1b4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 16 Apr 2011 06:55:39 -0400 Subject: Btrfs end_bio_extent_readpage should look for locked bits A recent commit caches the extent state in end_bio_extent_readpage, but the search it does should look for locked extents. This fixes things to make it more effective. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1c462f895c98..5ae0bffaa4d8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1761,7 +1761,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) prefetchw(&bvec->bv_page->flags); spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, start, 0); + state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); if (state && state->start == start) { /* * take a reference on the state, unlock will drop -- cgit v1.2.2 From 43e817a1fdda17f3357602ed7964c248c8c53ae0 Mon Sep 17 00:00:00 2001 From: Itaru Kitayama Date: Mon, 25 Apr 2011 19:43:51 -0400 Subject: btrfs: fix wrong allocating flag when reading page the space cache use extent_readpages() to read free space information, so we can not use GFP_KERNEL flag to allocate memory, or it may lead to deadlock. Signed-off-by: Itaru Kitayama Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5ae0bffaa4d8..0d1196d6f786 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2681,7 +2681,7 @@ int extent_readpages(struct extent_io_tree *tree, prefetchw(&page->flags); list_del(&page->lru); if (!add_to_page_cache_lru(page, mapping, - page->index, GFP_KERNEL)) { + page->index, GFP_NOFS)) { __extent_read_full_page(tree, page, get_extent, &bio, 0, &bio_flags); } -- cgit v1.2.2