diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4_extents.h | 9 | ||||
-rw-r--r-- | fs/ext4/extents.c | 42 | ||||
-rw-r--r-- | fs/ext4/inode.c | 2 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 8 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 10 | ||||
-rw-r--r-- | fs/ext4/super.c | 15 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 28 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 33 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 91 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 69 |
10 files changed, 147 insertions, 160 deletions
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 2e29abb30f76..095c36f3b612 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -125,7 +125,7 @@ struct ext4_ext_path { | |||
125 | * positive retcode - signal for ext4_ext_walk_space(), see below | 125 | * positive retcode - signal for ext4_ext_walk_space(), see below |
126 | * callback must return valid extent (passed or newly created) | 126 | * callback must return valid extent (passed or newly created) |
127 | */ | 127 | */ |
128 | typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | 128 | typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t, |
129 | struct ext4_ext_cache *, | 129 | struct ext4_ext_cache *, |
130 | struct ext4_extent *, void *); | 130 | struct ext4_extent *, void *); |
131 | 131 | ||
@@ -133,8 +133,11 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | |||
133 | #define EXT_BREAK 1 | 133 | #define EXT_BREAK 1 |
134 | #define EXT_REPEAT 2 | 134 | #define EXT_REPEAT 2 |
135 | 135 | ||
136 | /* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ | 136 | /* |
137 | #define EXT_MAX_BLOCK 0xffffffff | 137 | * Maximum number of logical blocks in a file; ext4_extent's ee_block is |
138 | * __le32. | ||
139 | */ | ||
140 | #define EXT_MAX_BLOCKS 0xffffffff | ||
138 | 141 | ||
139 | /* | 142 | /* |
140 | * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an | 143 | * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5199bac7fc62..f815cc81e7a2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1408,7 +1408,7 @@ got_index: | |||
1408 | 1408 | ||
1409 | /* | 1409 | /* |
1410 | * ext4_ext_next_allocated_block: | 1410 | * ext4_ext_next_allocated_block: |
1411 | * returns allocated block in subsequent extent or EXT_MAX_BLOCK. | 1411 | * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. |
1412 | * NOTE: it considers block number from index entry as | 1412 | * NOTE: it considers block number from index entry as |
1413 | * allocated block. Thus, index entries have to be consistent | 1413 | * allocated block. Thus, index entries have to be consistent |
1414 | * with leaves. | 1414 | * with leaves. |
@@ -1422,7 +1422,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1422 | depth = path->p_depth; | 1422 | depth = path->p_depth; |
1423 | 1423 | ||
1424 | if (depth == 0 && path->p_ext == NULL) | 1424 | if (depth == 0 && path->p_ext == NULL) |
1425 | return EXT_MAX_BLOCK; | 1425 | return EXT_MAX_BLOCKS; |
1426 | 1426 | ||
1427 | while (depth >= 0) { | 1427 | while (depth >= 0) { |
1428 | if (depth == path->p_depth) { | 1428 | if (depth == path->p_depth) { |
@@ -1439,12 +1439,12 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1439 | depth--; | 1439 | depth--; |
1440 | } | 1440 | } |
1441 | 1441 | ||
1442 | return EXT_MAX_BLOCK; | 1442 | return EXT_MAX_BLOCKS; |
1443 | } | 1443 | } |
1444 | 1444 | ||
1445 | /* | 1445 | /* |
1446 | * ext4_ext_next_leaf_block: | 1446 | * ext4_ext_next_leaf_block: |
1447 | * returns first allocated block from next leaf or EXT_MAX_BLOCK | 1447 | * returns first allocated block from next leaf or EXT_MAX_BLOCKS |
1448 | */ | 1448 | */ |
1449 | static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | 1449 | static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, |
1450 | struct ext4_ext_path *path) | 1450 | struct ext4_ext_path *path) |
@@ -1456,7 +1456,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | |||
1456 | 1456 | ||
1457 | /* zero-tree has no leaf blocks at all */ | 1457 | /* zero-tree has no leaf blocks at all */ |
1458 | if (depth == 0) | 1458 | if (depth == 0) |
1459 | return EXT_MAX_BLOCK; | 1459 | return EXT_MAX_BLOCKS; |
1460 | 1460 | ||
1461 | /* go to index block */ | 1461 | /* go to index block */ |
1462 | depth--; | 1462 | depth--; |
@@ -1469,7 +1469,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | |||
1469 | depth--; | 1469 | depth--; |
1470 | } | 1470 | } |
1471 | 1471 | ||
1472 | return EXT_MAX_BLOCK; | 1472 | return EXT_MAX_BLOCKS; |
1473 | } | 1473 | } |
1474 | 1474 | ||
1475 | /* | 1475 | /* |
@@ -1677,13 +1677,13 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1677 | */ | 1677 | */ |
1678 | if (b2 < b1) { | 1678 | if (b2 < b1) { |
1679 | b2 = ext4_ext_next_allocated_block(path); | 1679 | b2 = ext4_ext_next_allocated_block(path); |
1680 | if (b2 == EXT_MAX_BLOCK) | 1680 | if (b2 == EXT_MAX_BLOCKS) |
1681 | goto out; | 1681 | goto out; |
1682 | } | 1682 | } |
1683 | 1683 | ||
1684 | /* check for wrap through zero on extent logical start block*/ | 1684 | /* check for wrap through zero on extent logical start block*/ |
1685 | if (b1 + len1 < b1) { | 1685 | if (b1 + len1 < b1) { |
1686 | len1 = EXT_MAX_BLOCK - b1; | 1686 | len1 = EXT_MAX_BLOCKS - b1; |
1687 | newext->ee_len = cpu_to_le16(len1); | 1687 | newext->ee_len = cpu_to_le16(len1); |
1688 | ret = 1; | 1688 | ret = 1; |
1689 | } | 1689 | } |
@@ -1767,7 +1767,7 @@ repeat: | |||
1767 | fex = EXT_LAST_EXTENT(eh); | 1767 | fex = EXT_LAST_EXTENT(eh); |
1768 | next = ext4_ext_next_leaf_block(inode, path); | 1768 | next = ext4_ext_next_leaf_block(inode, path); |
1769 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) | 1769 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) |
1770 | && next != EXT_MAX_BLOCK) { | 1770 | && next != EXT_MAX_BLOCKS) { |
1771 | ext_debug("next leaf block - %d\n", next); | 1771 | ext_debug("next leaf block - %d\n", next); |
1772 | BUG_ON(npath != NULL); | 1772 | BUG_ON(npath != NULL); |
1773 | npath = ext4_ext_find_extent(inode, next, NULL); | 1773 | npath = ext4_ext_find_extent(inode, next, NULL); |
@@ -1887,7 +1887,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1887 | BUG_ON(func == NULL); | 1887 | BUG_ON(func == NULL); |
1888 | BUG_ON(inode == NULL); | 1888 | BUG_ON(inode == NULL); |
1889 | 1889 | ||
1890 | while (block < last && block != EXT_MAX_BLOCK) { | 1890 | while (block < last && block != EXT_MAX_BLOCKS) { |
1891 | num = last - block; | 1891 | num = last - block; |
1892 | /* find extent for this block */ | 1892 | /* find extent for this block */ |
1893 | down_read(&EXT4_I(inode)->i_data_sem); | 1893 | down_read(&EXT4_I(inode)->i_data_sem); |
@@ -1958,7 +1958,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1958 | err = -EIO; | 1958 | err = -EIO; |
1959 | break; | 1959 | break; |
1960 | } | 1960 | } |
1961 | err = func(inode, path, &cbex, ex, cbdata); | 1961 | err = func(inode, next, &cbex, ex, cbdata); |
1962 | ext4_ext_drop_refs(path); | 1962 | ext4_ext_drop_refs(path); |
1963 | 1963 | ||
1964 | if (err < 0) | 1964 | if (err < 0) |
@@ -2020,7 +2020,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2020 | if (ex == NULL) { | 2020 | if (ex == NULL) { |
2021 | /* there is no extent yet, so gap is [0;-] */ | 2021 | /* there is no extent yet, so gap is [0;-] */ |
2022 | lblock = 0; | 2022 | lblock = 0; |
2023 | len = EXT_MAX_BLOCK; | 2023 | len = EXT_MAX_BLOCKS; |
2024 | ext_debug("cache gap(whole file):"); | 2024 | ext_debug("cache gap(whole file):"); |
2025 | } else if (block < le32_to_cpu(ex->ee_block)) { | 2025 | } else if (block < le32_to_cpu(ex->ee_block)) { |
2026 | lblock = block; | 2026 | lblock = block; |
@@ -2350,7 +2350,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2350 | * never happen because at least one of the end points | 2350 | * never happen because at least one of the end points |
2351 | * needs to be on the edge of the extent. | 2351 | * needs to be on the edge of the extent. |
2352 | */ | 2352 | */ |
2353 | if (end == EXT_MAX_BLOCK) { | 2353 | if (end == EXT_MAX_BLOCKS - 1) { |
2354 | ext_debug(" bad truncate %u:%u\n", | 2354 | ext_debug(" bad truncate %u:%u\n", |
2355 | start, end); | 2355 | start, end); |
2356 | block = 0; | 2356 | block = 0; |
@@ -2398,7 +2398,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2398 | * If this is a truncate, this condition | 2398 | * If this is a truncate, this condition |
2399 | * should never happen | 2399 | * should never happen |
2400 | */ | 2400 | */ |
2401 | if (end == EXT_MAX_BLOCK) { | 2401 | if (end == EXT_MAX_BLOCKS - 1) { |
2402 | ext_debug(" bad truncate %u:%u\n", | 2402 | ext_debug(" bad truncate %u:%u\n", |
2403 | start, end); | 2403 | start, end); |
2404 | err = -EIO; | 2404 | err = -EIO; |
@@ -2478,7 +2478,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2478 | * we need to remove it from the leaf | 2478 | * we need to remove it from the leaf |
2479 | */ | 2479 | */ |
2480 | if (num == 0) { | 2480 | if (num == 0) { |
2481 | if (end != EXT_MAX_BLOCK) { | 2481 | if (end != EXT_MAX_BLOCKS - 1) { |
2482 | /* | 2482 | /* |
2483 | * For hole punching, we need to scoot all the | 2483 | * For hole punching, we need to scoot all the |
2484 | * extents up when an extent is removed so that | 2484 | * extents up when an extent is removed so that |
@@ -3699,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
3699 | 3699 | ||
3700 | last_block = (inode->i_size + sb->s_blocksize - 1) | 3700 | last_block = (inode->i_size + sb->s_blocksize - 1) |
3701 | >> EXT4_BLOCK_SIZE_BITS(sb); | 3701 | >> EXT4_BLOCK_SIZE_BITS(sb); |
3702 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK); | 3702 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); |
3703 | 3703 | ||
3704 | /* In a multi-transaction truncate, we only make the final | 3704 | /* In a multi-transaction truncate, we only make the final |
3705 | * transaction synchronous. | 3705 | * transaction synchronous. |
@@ -3914,14 +3914,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3914 | /* | 3914 | /* |
3915 | * Callback function called for each extent to gather FIEMAP information. | 3915 | * Callback function called for each extent to gather FIEMAP information. |
3916 | */ | 3916 | */ |
3917 | static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | 3917 | static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next, |
3918 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | 3918 | struct ext4_ext_cache *newex, struct ext4_extent *ex, |
3919 | void *data) | 3919 | void *data) |
3920 | { | 3920 | { |
3921 | __u64 logical; | 3921 | __u64 logical; |
3922 | __u64 physical; | 3922 | __u64 physical; |
3923 | __u64 length; | 3923 | __u64 length; |
3924 | loff_t size; | ||
3925 | __u32 flags = 0; | 3924 | __u32 flags = 0; |
3926 | int ret = 0; | 3925 | int ret = 0; |
3927 | struct fiemap_extent_info *fieinfo = data; | 3926 | struct fiemap_extent_info *fieinfo = data; |
@@ -4103,8 +4102,7 @@ found_delayed_extent: | |||
4103 | if (ex && ext4_ext_is_uninitialized(ex)) | 4102 | if (ex && ext4_ext_is_uninitialized(ex)) |
4104 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 4103 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
4105 | 4104 | ||
4106 | size = i_size_read(inode); | 4105 | if (next == EXT_MAX_BLOCKS) |
4107 | if (logical + length >= size) | ||
4108 | flags |= FIEMAP_EXTENT_LAST; | 4106 | flags |= FIEMAP_EXTENT_LAST; |
4109 | 4107 | ||
4110 | ret = fiemap_fill_next_extent(fieinfo, logical, physical, | 4108 | ret = fiemap_fill_next_extent(fieinfo, logical, physical, |
@@ -4347,8 +4345,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4347 | 4345 | ||
4348 | start_blk = start >> inode->i_sb->s_blocksize_bits; | 4346 | start_blk = start >> inode->i_sb->s_blocksize_bits; |
4349 | last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; | 4347 | last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; |
4350 | if (last_blk >= EXT_MAX_BLOCK) | 4348 | if (last_blk >= EXT_MAX_BLOCKS) |
4351 | last_blk = EXT_MAX_BLOCK-1; | 4349 | last_blk = EXT_MAX_BLOCKS-1; |
4352 | len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; | 4350 | len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; |
4353 | 4351 | ||
4354 | /* | 4352 | /* |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a5763e3505ba..e3126c051006 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2634,7 +2634,7 @@ static int ext4_writepage(struct page *page, | |||
2634 | struct buffer_head *page_bufs = NULL; | 2634 | struct buffer_head *page_bufs = NULL; |
2635 | struct inode *inode = page->mapping->host; | 2635 | struct inode *inode = page->mapping->host; |
2636 | 2636 | ||
2637 | trace_ext4_writepage(inode, page); | 2637 | trace_ext4_writepage(page); |
2638 | size = i_size_read(inode); | 2638 | size = i_size_read(inode); |
2639 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2639 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2640 | len = size & ~PAGE_CACHE_MASK; | 2640 | len = size & ~PAGE_CACHE_MASK; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 859f2ae8864e..6ed859d56850 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3578,8 +3578,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3578 | free += next - bit; | 3578 | free += next - bit; |
3579 | 3579 | ||
3580 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); | 3580 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3581 | trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, | 3581 | trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, |
3582 | grp_blk_start + bit, next - bit); | 3582 | next - bit); |
3583 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3583 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3584 | bit = next + 1; | 3584 | bit = next + 1; |
3585 | } | 3585 | } |
@@ -3608,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3608 | ext4_group_t group; | 3608 | ext4_group_t group; |
3609 | ext4_grpblk_t bit; | 3609 | ext4_grpblk_t bit; |
3610 | 3610 | ||
3611 | trace_ext4_mb_release_group_pa(sb, pa); | 3611 | trace_ext4_mb_release_group_pa(pa); |
3612 | BUG_ON(pa->pa_deleted == 0); | 3612 | BUG_ON(pa->pa_deleted == 0); |
3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
@@ -4448,7 +4448,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4448 | * @inode: inode | 4448 | * @inode: inode |
4449 | * @block: start physical block to free | 4449 | * @block: start physical block to free |
4450 | * @count: number of blocks to count | 4450 | * @count: number of blocks to count |
4451 | * @metadata: Are these metadata blocks | 4451 | * @flags: flags used by ext4_free_blocks |
4452 | */ | 4452 | */ |
4453 | void ext4_free_blocks(handle_t *handle, struct inode *inode, | 4453 | void ext4_free_blocks(handle_t *handle, struct inode *inode, |
4454 | struct buffer_head *bh, ext4_fsblk_t block, | 4454 | struct buffer_head *bh, ext4_fsblk_t block, |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 2b8304bf3c50..f57455a1b1b2 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -1002,12 +1002,12 @@ mext_check_arguments(struct inode *orig_inode, | |||
1002 | return -EINVAL; | 1002 | return -EINVAL; |
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | if ((orig_start > EXT_MAX_BLOCK) || | 1005 | if ((orig_start >= EXT_MAX_BLOCKS) || |
1006 | (donor_start > EXT_MAX_BLOCK) || | 1006 | (donor_start >= EXT_MAX_BLOCKS) || |
1007 | (*len > EXT_MAX_BLOCK) || | 1007 | (*len > EXT_MAX_BLOCKS) || |
1008 | (orig_start + *len > EXT_MAX_BLOCK)) { | 1008 | (orig_start + *len >= EXT_MAX_BLOCKS)) { |
1009 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " | 1009 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
1010 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, | 1010 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, |
1011 | orig_inode->i_ino, donor_inode->i_ino); | 1011 | orig_inode->i_ino, donor_inode->i_ino); |
1012 | return -EINVAL; | 1012 | return -EINVAL; |
1013 | } | 1013 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cc5c157aa11d..9ea71aa864b3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2243,6 +2243,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2243 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, | 2243 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, |
2244 | * so that won't be a limiting factor. | 2244 | * so that won't be a limiting factor. |
2245 | * | 2245 | * |
2246 | * However there is other limiting factor. We do store extents in the form | ||
2247 | * of starting block and length, hence the resulting length of the extent | ||
2248 | * covering maximum file size must fit into on-disk format containers as | ||
2249 | * well. Given that length is always by 1 unit bigger than max unit (because | ||
2250 | * we count 0 as well) we have to lower the s_maxbytes by one fs block. | ||
2251 | * | ||
2246 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | 2252 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. |
2247 | */ | 2253 | */ |
2248 | static loff_t ext4_max_size(int blkbits, int has_huge_files) | 2254 | static loff_t ext4_max_size(int blkbits, int has_huge_files) |
@@ -2264,10 +2270,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) | |||
2264 | upper_limit <<= blkbits; | 2270 | upper_limit <<= blkbits; |
2265 | } | 2271 | } |
2266 | 2272 | ||
2267 | /* 32-bit extent-start container, ee_block */ | 2273 | /* |
2268 | res = 1LL << 32; | 2274 | * 32-bit extent-start container, ee_block. We lower the maxbytes |
2275 | * by one fs block, so ee_len can cover the extent of maximum file | ||
2276 | * size | ||
2277 | */ | ||
2278 | res = (1LL << 32) - 1; | ||
2269 | res <<= blkbits; | 2279 | res <<= blkbits; |
2270 | res -= 1; | ||
2271 | 2280 | ||
2272 | /* Sanity check against vm- & vfs- imposed limits */ | 2281 | /* Sanity check against vm- & vfs- imposed limits */ |
2273 | if (res > upper_limit) | 2282 | if (res > upper_limit) |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6a79fd0a1a32..2c62c5aae82f 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
97 | 97 | ||
98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | ||
101 | * Get our reference so that bh cannot be freed before | ||
102 | * we unlock it | ||
103 | */ | ||
104 | get_bh(bh); | ||
100 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
101 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 106 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
102 | jbd_unlock_bh_state(bh); | 107 | jbd_unlock_bh_state(bh); |
103 | jbd2_journal_remove_journal_head(bh); | ||
104 | BUFFER_TRACE(bh, "release"); | 108 | BUFFER_TRACE(bh, "release"); |
105 | __brelse(bh); | 109 | __brelse(bh); |
106 | } else { | 110 | } else { |
@@ -223,8 +227,8 @@ restart: | |||
223 | spin_lock(&journal->j_list_lock); | 227 | spin_lock(&journal->j_list_lock); |
224 | goto restart; | 228 | goto restart; |
225 | } | 229 | } |
230 | get_bh(bh); | ||
226 | if (buffer_locked(bh)) { | 231 | if (buffer_locked(bh)) { |
227 | atomic_inc(&bh->b_count); | ||
228 | spin_unlock(&journal->j_list_lock); | 232 | spin_unlock(&journal->j_list_lock); |
229 | jbd_unlock_bh_state(bh); | 233 | jbd_unlock_bh_state(bh); |
230 | wait_on_buffer(bh); | 234 | wait_on_buffer(bh); |
@@ -243,7 +247,6 @@ restart: | |||
243 | */ | 247 | */ |
244 | released = __jbd2_journal_remove_checkpoint(jh); | 248 | released = __jbd2_journal_remove_checkpoint(jh); |
245 | jbd_unlock_bh_state(bh); | 249 | jbd_unlock_bh_state(bh); |
246 | jbd2_journal_remove_journal_head(bh); | ||
247 | __brelse(bh); | 250 | __brelse(bh); |
248 | } | 251 | } |
249 | 252 | ||
@@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
284 | int ret = 0; | 287 | int ret = 0; |
285 | 288 | ||
286 | if (buffer_locked(bh)) { | 289 | if (buffer_locked(bh)) { |
287 | atomic_inc(&bh->b_count); | 290 | get_bh(bh); |
288 | spin_unlock(&journal->j_list_lock); | 291 | spin_unlock(&journal->j_list_lock); |
289 | jbd_unlock_bh_state(bh); | 292 | jbd_unlock_bh_state(bh); |
290 | wait_on_buffer(bh); | 293 | wait_on_buffer(bh); |
@@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
316 | ret = 1; | 319 | ret = 1; |
317 | if (unlikely(buffer_write_io_error(bh))) | 320 | if (unlikely(buffer_write_io_error(bh))) |
318 | ret = -EIO; | 321 | ret = -EIO; |
322 | get_bh(bh); | ||
319 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 323 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
320 | BUFFER_TRACE(bh, "remove from checkpoint"); | 324 | BUFFER_TRACE(bh, "remove from checkpoint"); |
321 | __jbd2_journal_remove_checkpoint(jh); | 325 | __jbd2_journal_remove_checkpoint(jh); |
322 | spin_unlock(&journal->j_list_lock); | 326 | spin_unlock(&journal->j_list_lock); |
323 | jbd_unlock_bh_state(bh); | 327 | jbd_unlock_bh_state(bh); |
324 | jbd2_journal_remove_journal_head(bh); | ||
325 | __brelse(bh); | 328 | __brelse(bh); |
326 | } else { | 329 | } else { |
327 | /* | 330 | /* |
@@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
554 | /* | 557 | /* |
555 | * journal_clean_one_cp_list | 558 | * journal_clean_one_cp_list |
556 | * | 559 | * |
557 | * Find all the written-back checkpoint buffers in the given list and release them. | 560 | * Find all the written-back checkpoint buffers in the given list and |
561 | * release them. | ||
558 | * | 562 | * |
559 | * Called with the journal locked. | 563 | * Called with the journal locked. |
560 | * Called with j_list_lock held. | 564 | * Called with j_list_lock held. |
@@ -663,8 +667,8 @@ out: | |||
663 | * checkpoint lists. | 667 | * checkpoint lists. |
664 | * | 668 | * |
665 | * The function returns 1 if it frees the transaction, 0 otherwise. | 669 | * The function returns 1 if it frees the transaction, 0 otherwise. |
670 | * The function can free jh and bh. | ||
666 | * | 671 | * |
667 | * This function is called with the journal locked. | ||
668 | * This function is called with j_list_lock held. | 672 | * This function is called with j_list_lock held. |
669 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | 673 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) |
670 | */ | 674 | */ |
@@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
684 | } | 688 | } |
685 | journal = transaction->t_journal; | 689 | journal = transaction->t_journal; |
686 | 690 | ||
691 | JBUFFER_TRACE(jh, "removing from transaction"); | ||
687 | __buffer_unlink(jh); | 692 | __buffer_unlink(jh); |
688 | jh->b_cp_transaction = NULL; | 693 | jh->b_cp_transaction = NULL; |
694 | jbd2_journal_put_journal_head(jh); | ||
689 | 695 | ||
690 | if (transaction->t_checkpoint_list != NULL || | 696 | if (transaction->t_checkpoint_list != NULL || |
691 | transaction->t_checkpoint_io_list != NULL) | 697 | transaction->t_checkpoint_io_list != NULL) |
692 | goto out; | 698 | goto out; |
693 | JBUFFER_TRACE(jh, "transaction has no more buffers"); | ||
694 | 699 | ||
695 | /* | 700 | /* |
696 | * There is one special case to worry about: if we have just pulled the | 701 | * There is one special case to worry about: if we have just pulled the |
@@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
701 | * The locking here around t_state is a bit sleazy. | 706 | * The locking here around t_state is a bit sleazy. |
702 | * See the comment at the end of jbd2_journal_commit_transaction(). | 707 | * See the comment at the end of jbd2_journal_commit_transaction(). |
703 | */ | 708 | */ |
704 | if (transaction->t_state != T_FINISHED) { | 709 | if (transaction->t_state != T_FINISHED) |
705 | JBUFFER_TRACE(jh, "belongs to running/committing transaction"); | ||
706 | goto out; | 710 | goto out; |
707 | } | ||
708 | 711 | ||
709 | /* OK, that was the last buffer for the transaction: we can now | 712 | /* OK, that was the last buffer for the transaction: we can now |
710 | safely remove this transaction from the log */ | 713 | safely remove this transaction from the log */ |
@@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
723 | wake_up(&journal->j_wait_logspace); | 726 | wake_up(&journal->j_wait_logspace); |
724 | ret = 1; | 727 | ret = 1; |
725 | out: | 728 | out: |
726 | JBUFFER_TRACE(jh, "exit"); | ||
727 | return ret; | 729 | return ret; |
728 | } | 730 | } |
729 | 731 | ||
@@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh, | |||
742 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); | 744 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); |
743 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); | 745 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
744 | 746 | ||
747 | /* Get reference for checkpointing transaction */ | ||
748 | jbd2_journal_grab_journal_head(jh2bh(jh)); | ||
745 | jh->b_cp_transaction = transaction; | 749 | jh->b_cp_transaction = transaction; |
746 | 750 | ||
747 | if (!transaction->t_checkpoint_list) { | 751 | if (!transaction->t_checkpoint_list) { |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7f21cf3aaf92..eef6979821a4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -848,10 +848,16 @@ restart_loop: | |||
848 | while (commit_transaction->t_forget) { | 848 | while (commit_transaction->t_forget) { |
849 | transaction_t *cp_transaction; | 849 | transaction_t *cp_transaction; |
850 | struct buffer_head *bh; | 850 | struct buffer_head *bh; |
851 | int try_to_free = 0; | ||
851 | 852 | ||
852 | jh = commit_transaction->t_forget; | 853 | jh = commit_transaction->t_forget; |
853 | spin_unlock(&journal->j_list_lock); | 854 | spin_unlock(&journal->j_list_lock); |
854 | bh = jh2bh(jh); | 855 | bh = jh2bh(jh); |
856 | /* | ||
857 | * Get a reference so that bh cannot be freed before we are | ||
858 | * done with it. | ||
859 | */ | ||
860 | get_bh(bh); | ||
855 | jbd_lock_bh_state(bh); | 861 | jbd_lock_bh_state(bh); |
856 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); | 862 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); |
857 | 863 | ||
@@ -914,28 +920,27 @@ restart_loop: | |||
914 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); | 920 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); |
915 | if (is_journal_aborted(journal)) | 921 | if (is_journal_aborted(journal)) |
916 | clear_buffer_jbddirty(bh); | 922 | clear_buffer_jbddirty(bh); |
917 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | ||
918 | __jbd2_journal_refile_buffer(jh); | ||
919 | jbd_unlock_bh_state(bh); | ||
920 | } else { | 923 | } else { |
921 | J_ASSERT_BH(bh, !buffer_dirty(bh)); | 924 | J_ASSERT_BH(bh, !buffer_dirty(bh)); |
922 | /* The buffer on BJ_Forget list and not jbddirty means | 925 | /* |
926 | * The buffer on BJ_Forget list and not jbddirty means | ||
923 | * it has been freed by this transaction and hence it | 927 | * it has been freed by this transaction and hence it |
924 | * could not have been reallocated until this | 928 | * could not have been reallocated until this |
925 | * transaction has committed. *BUT* it could be | 929 | * transaction has committed. *BUT* it could be |
926 | * reallocated once we have written all the data to | 930 | * reallocated once we have written all the data to |
927 | * disk and before we process the buffer on BJ_Forget | 931 | * disk and before we process the buffer on BJ_Forget |
928 | * list. */ | 932 | * list. |
929 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | 933 | */ |
930 | __jbd2_journal_refile_buffer(jh); | 934 | if (!jh->b_next_transaction) |
931 | if (!jh->b_transaction) { | 935 | try_to_free = 1; |
932 | jbd_unlock_bh_state(bh); | ||
933 | /* needs a brelse */ | ||
934 | jbd2_journal_remove_journal_head(bh); | ||
935 | release_buffer_page(bh); | ||
936 | } else | ||
937 | jbd_unlock_bh_state(bh); | ||
938 | } | 936 | } |
937 | JBUFFER_TRACE(jh, "refile or unfile buffer"); | ||
938 | __jbd2_journal_refile_buffer(jh); | ||
939 | jbd_unlock_bh_state(bh); | ||
940 | if (try_to_free) | ||
941 | release_buffer_page(bh); /* Drops bh reference */ | ||
942 | else | ||
943 | __brelse(bh); | ||
939 | cond_resched_lock(&journal->j_list_lock); | 944 | cond_resched_lock(&journal->j_list_lock); |
940 | } | 945 | } |
941 | spin_unlock(&journal->j_list_lock); | 946 | spin_unlock(&journal->j_list_lock); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9a7826990304..0dfa5b598e68 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
2078 | * When a buffer has its BH_JBD bit set it is immune from being released by | 2078 | * When a buffer has its BH_JBD bit set it is immune from being released by |
2079 | * core kernel code, mainly via ->b_count. | 2079 | * core kernel code, mainly via ->b_count. |
2080 | * | 2080 | * |
2081 | * A journal_head may be detached from its buffer_head when the journal_head's | 2081 | * A journal_head is detached from its buffer_head when the journal_head's |
2082 | * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. | 2082 | * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint |
2083 | * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the | 2083 | * transaction (b_cp_transaction) hold their references to b_jcount. |
2084 | * journal_head can be dropped if needed. | ||
2085 | * | 2084 | * |
2086 | * Various places in the kernel want to attach a journal_head to a buffer_head | 2085 | * Various places in the kernel want to attach a journal_head to a buffer_head |
2087 | * _before_ attaching the journal_head to a transaction. To protect the | 2086 | * _before_ attaching the journal_head to a transaction. To protect the |
@@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
2094 | * (Attach a journal_head if needed. Increments b_jcount) | 2093 | * (Attach a journal_head if needed. Increments b_jcount) |
2095 | * struct journal_head *jh = jbd2_journal_add_journal_head(bh); | 2094 | * struct journal_head *jh = jbd2_journal_add_journal_head(bh); |
2096 | * ... | 2095 | * ... |
2096 | * (Get another reference for transaction) | ||
2097 | * jbd2_journal_grab_journal_head(bh); | ||
2097 | * jh->b_transaction = xxx; | 2098 | * jh->b_transaction = xxx; |
2099 | * (Put original reference) | ||
2098 | * jbd2_journal_put_journal_head(jh); | 2100 | * jbd2_journal_put_journal_head(jh); |
2099 | * | ||
2100 | * Now, the journal_head's b_jcount is zero, but it is safe from being released | ||
2101 | * because it has a non-zero b_transaction. | ||
2102 | */ | 2101 | */ |
2103 | 2102 | ||
2104 | /* | 2103 | /* |
2105 | * Give a buffer_head a journal_head. | 2104 | * Give a buffer_head a journal_head. |
2106 | * | 2105 | * |
2107 | * Doesn't need the journal lock. | ||
2108 | * May sleep. | 2106 | * May sleep. |
2109 | */ | 2107 | */ |
2110 | struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) | 2108 | struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) |
@@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
2168 | struct journal_head *jh = bh2jh(bh); | 2166 | struct journal_head *jh = bh2jh(bh); |
2169 | 2167 | ||
2170 | J_ASSERT_JH(jh, jh->b_jcount >= 0); | 2168 | J_ASSERT_JH(jh, jh->b_jcount >= 0); |
2171 | 2169 | J_ASSERT_JH(jh, jh->b_transaction == NULL); | |
2172 | get_bh(bh); | 2170 | J_ASSERT_JH(jh, jh->b_next_transaction == NULL); |
2173 | if (jh->b_jcount == 0) { | 2171 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
2174 | if (jh->b_transaction == NULL && | 2172 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); |
2175 | jh->b_next_transaction == NULL && | 2173 | J_ASSERT_BH(bh, buffer_jbd(bh)); |
2176 | jh->b_cp_transaction == NULL) { | 2174 | J_ASSERT_BH(bh, jh2bh(jh) == bh); |
2177 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); | 2175 | BUFFER_TRACE(bh, "remove journal_head"); |
2178 | J_ASSERT_BH(bh, buffer_jbd(bh)); | 2176 | if (jh->b_frozen_data) { |
2179 | J_ASSERT_BH(bh, jh2bh(jh) == bh); | 2177 | printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); |
2180 | BUFFER_TRACE(bh, "remove journal_head"); | 2178 | jbd2_free(jh->b_frozen_data, bh->b_size); |
2181 | if (jh->b_frozen_data) { | ||
2182 | printk(KERN_WARNING "%s: freeing " | ||
2183 | "b_frozen_data\n", | ||
2184 | __func__); | ||
2185 | jbd2_free(jh->b_frozen_data, bh->b_size); | ||
2186 | } | ||
2187 | if (jh->b_committed_data) { | ||
2188 | printk(KERN_WARNING "%s: freeing " | ||
2189 | "b_committed_data\n", | ||
2190 | __func__); | ||
2191 | jbd2_free(jh->b_committed_data, bh->b_size); | ||
2192 | } | ||
2193 | bh->b_private = NULL; | ||
2194 | jh->b_bh = NULL; /* debug, really */ | ||
2195 | clear_buffer_jbd(bh); | ||
2196 | __brelse(bh); | ||
2197 | journal_free_journal_head(jh); | ||
2198 | } else { | ||
2199 | BUFFER_TRACE(bh, "journal_head was locked"); | ||
2200 | } | ||
2201 | } | 2179 | } |
2180 | if (jh->b_committed_data) { | ||
2181 | printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); | ||
2182 | jbd2_free(jh->b_committed_data, bh->b_size); | ||
2183 | } | ||
2184 | bh->b_private = NULL; | ||
2185 | jh->b_bh = NULL; /* debug, really */ | ||
2186 | clear_buffer_jbd(bh); | ||
2187 | journal_free_journal_head(jh); | ||
2202 | } | 2188 | } |
2203 | 2189 | ||
2204 | /* | 2190 | /* |
2205 | * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction | 2191 | * Drop a reference on the passed journal_head. If it fell to zero then |
2206 | * and has a zero b_jcount then remove and release its journal_head. If we did | ||
2207 | * see that the buffer is not used by any transaction we also "logically" | ||
2208 | * decrement ->b_count. | ||
2209 | * | ||
2210 | * We in fact take an additional increment on ->b_count as a convenience, | ||
2211 | * because the caller usually wants to do additional things with the bh | ||
2212 | * after calling here. | ||
2213 | * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some | ||
2214 | * time. Once the caller has run __brelse(), the buffer is eligible for | ||
2215 | * reaping by try_to_free_buffers(). | ||
2216 | */ | ||
2217 | void jbd2_journal_remove_journal_head(struct buffer_head *bh) | ||
2218 | { | ||
2219 | jbd_lock_bh_journal_head(bh); | ||
2220 | __journal_remove_journal_head(bh); | ||
2221 | jbd_unlock_bh_journal_head(bh); | ||
2222 | } | ||
2223 | |||
2224 | /* | ||
2225 | * Drop a reference on the passed journal_head. If it fell to zero then try to | ||
2226 | * release the journal_head from the buffer_head. | 2192 | * release the journal_head from the buffer_head. |
2227 | */ | 2193 | */ |
2228 | void jbd2_journal_put_journal_head(struct journal_head *jh) | 2194 | void jbd2_journal_put_journal_head(struct journal_head *jh) |
@@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) | |||
2232 | jbd_lock_bh_journal_head(bh); | 2198 | jbd_lock_bh_journal_head(bh); |
2233 | J_ASSERT_JH(jh, jh->b_jcount > 0); | 2199 | J_ASSERT_JH(jh, jh->b_jcount > 0); |
2234 | --jh->b_jcount; | 2200 | --jh->b_jcount; |
2235 | if (!jh->b_jcount && !jh->b_transaction) { | 2201 | if (!jh->b_jcount) { |
2236 | __journal_remove_journal_head(bh); | 2202 | __journal_remove_journal_head(bh); |
2203 | jbd_unlock_bh_journal_head(bh); | ||
2237 | __brelse(bh); | 2204 | __brelse(bh); |
2238 | } | 2205 | } else |
2239 | jbd_unlock_bh_journal_head(bh); | 2206 | jbd_unlock_bh_journal_head(bh); |
2240 | } | 2207 | } |
2241 | 2208 | ||
2242 | /* | 2209 | /* |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 3eec82d32fd4..2d7109414cdd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | 31 | ||
32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
33 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * jbd2_get_transaction: obtain a new transaction_t object. | 36 | * jbd2_get_transaction: obtain a new transaction_t object. |
@@ -764,7 +765,6 @@ repeat: | |||
764 | if (!jh->b_transaction) { | 765 | if (!jh->b_transaction) { |
765 | JBUFFER_TRACE(jh, "no transaction"); | 766 | JBUFFER_TRACE(jh, "no transaction"); |
766 | J_ASSERT_JH(jh, !jh->b_next_transaction); | 767 | J_ASSERT_JH(jh, !jh->b_next_transaction); |
767 | jh->b_transaction = transaction; | ||
768 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 768 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
769 | spin_lock(&journal->j_list_lock); | 769 | spin_lock(&journal->j_list_lock); |
770 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); | 770 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); |
@@ -814,7 +814,6 @@ out: | |||
814 | * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. | 814 | * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. |
815 | * @handle: transaction to add buffer modifications to | 815 | * @handle: transaction to add buffer modifications to |
816 | * @bh: bh to be used for metadata writes | 816 | * @bh: bh to be used for metadata writes |
817 | * @credits: variable that will receive credits for the buffer | ||
818 | * | 817 | * |
819 | * Returns an error code or 0 on success. | 818 | * Returns an error code or 0 on success. |
820 | * | 819 | * |
@@ -896,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
896 | * committed and so it's safe to clear the dirty bit. | 895 | * committed and so it's safe to clear the dirty bit. |
897 | */ | 896 | */ |
898 | clear_buffer_dirty(jh2bh(jh)); | 897 | clear_buffer_dirty(jh2bh(jh)); |
899 | jh->b_transaction = transaction; | ||
900 | |||
901 | /* first access by this transaction */ | 898 | /* first access by this transaction */ |
902 | jh->b_modified = 0; | 899 | jh->b_modified = 0; |
903 | 900 | ||
@@ -932,7 +929,6 @@ out: | |||
932 | * non-rewindable consequences | 929 | * non-rewindable consequences |
933 | * @handle: transaction | 930 | * @handle: transaction |
934 | * @bh: buffer to undo | 931 | * @bh: buffer to undo |
935 | * @credits: store the number of taken credits here (if not NULL) | ||
936 | * | 932 | * |
937 | * Sometimes there is a need to distinguish between metadata which has | 933 | * Sometimes there is a need to distinguish between metadata which has |
938 | * been committed to disk and that which has not. The ext3fs code uses | 934 | * been committed to disk and that which has not. The ext3fs code uses |
@@ -1232,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1232 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); | 1228 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); |
1233 | } else { | 1229 | } else { |
1234 | __jbd2_journal_unfile_buffer(jh); | 1230 | __jbd2_journal_unfile_buffer(jh); |
1235 | jbd2_journal_remove_journal_head(bh); | ||
1236 | __brelse(bh); | ||
1237 | if (!buffer_jbd(bh)) { | 1231 | if (!buffer_jbd(bh)) { |
1238 | spin_unlock(&journal->j_list_lock); | 1232 | spin_unlock(&journal->j_list_lock); |
1239 | jbd_unlock_bh_state(bh); | 1233 | jbd_unlock_bh_state(bh); |
@@ -1556,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1556 | mark_buffer_dirty(bh); /* Expose it to the VM */ | 1550 | mark_buffer_dirty(bh); /* Expose it to the VM */ |
1557 | } | 1551 | } |
1558 | 1552 | ||
1559 | void __jbd2_journal_unfile_buffer(struct journal_head *jh) | 1553 | /* |
1554 | * Remove buffer from all transactions. | ||
1555 | * | ||
1556 | * Called with bh_state lock and j_list_lock | ||
1557 | * | ||
1558 | * jh and bh may be already freed when this function returns. | ||
1559 | */ | ||
1560 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh) | ||
1560 | { | 1561 | { |
1561 | __jbd2_journal_temp_unlink_buffer(jh); | 1562 | __jbd2_journal_temp_unlink_buffer(jh); |
1562 | jh->b_transaction = NULL; | 1563 | jh->b_transaction = NULL; |
1564 | jbd2_journal_put_journal_head(jh); | ||
1563 | } | 1565 | } |
1564 | 1566 | ||
1565 | void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) | 1567 | void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) |
1566 | { | 1568 | { |
1567 | jbd_lock_bh_state(jh2bh(jh)); | 1569 | struct buffer_head *bh = jh2bh(jh); |
1570 | |||
1571 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
1572 | get_bh(bh); | ||
1573 | jbd_lock_bh_state(bh); | ||
1568 | spin_lock(&journal->j_list_lock); | 1574 | spin_lock(&journal->j_list_lock); |
1569 | __jbd2_journal_unfile_buffer(jh); | 1575 | __jbd2_journal_unfile_buffer(jh); |
1570 | spin_unlock(&journal->j_list_lock); | 1576 | spin_unlock(&journal->j_list_lock); |
1571 | jbd_unlock_bh_state(jh2bh(jh)); | 1577 | jbd_unlock_bh_state(bh); |
1578 | __brelse(bh); | ||
1572 | } | 1579 | } |
1573 | 1580 | ||
1574 | /* | 1581 | /* |
@@ -1595,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1595 | if (jh->b_jlist == BJ_None) { | 1602 | if (jh->b_jlist == BJ_None) { |
1596 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1603 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1597 | __jbd2_journal_remove_checkpoint(jh); | 1604 | __jbd2_journal_remove_checkpoint(jh); |
1598 | jbd2_journal_remove_journal_head(bh); | ||
1599 | __brelse(bh); | ||
1600 | } | 1605 | } |
1601 | } | 1606 | } |
1602 | spin_unlock(&journal->j_list_lock); | 1607 | spin_unlock(&journal->j_list_lock); |
@@ -1659,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, | |||
1659 | /* | 1664 | /* |
1660 | * We take our own ref against the journal_head here to avoid | 1665 | * We take our own ref against the journal_head here to avoid |
1661 | * having to add tons of locking around each instance of | 1666 | * having to add tons of locking around each instance of |
1662 | * jbd2_journal_remove_journal_head() and | ||
1663 | * jbd2_journal_put_journal_head(). | 1667 | * jbd2_journal_put_journal_head(). |
1664 | */ | 1668 | */ |
1665 | jh = jbd2_journal_grab_journal_head(bh); | 1669 | jh = jbd2_journal_grab_journal_head(bh); |
@@ -1697,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1697 | int may_free = 1; | 1701 | int may_free = 1; |
1698 | struct buffer_head *bh = jh2bh(jh); | 1702 | struct buffer_head *bh = jh2bh(jh); |
1699 | 1703 | ||
1700 | __jbd2_journal_unfile_buffer(jh); | ||
1701 | |||
1702 | if (jh->b_cp_transaction) { | 1704 | if (jh->b_cp_transaction) { |
1703 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1705 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1706 | __jbd2_journal_temp_unlink_buffer(jh); | ||
1704 | /* | 1707 | /* |
1705 | * We don't want to write the buffer anymore, clear the | 1708 | * We don't want to write the buffer anymore, clear the |
1706 | * bit so that we don't confuse checks in | 1709 | * bit so that we don't confuse checks in |
@@ -1711,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1711 | may_free = 0; | 1714 | may_free = 0; |
1712 | } else { | 1715 | } else { |
1713 | JBUFFER_TRACE(jh, "on running transaction"); | 1716 | JBUFFER_TRACE(jh, "on running transaction"); |
1714 | jbd2_journal_remove_journal_head(bh); | 1717 | __jbd2_journal_unfile_buffer(jh); |
1715 | __brelse(bh); | ||
1716 | } | 1718 | } |
1717 | return may_free; | 1719 | return may_free; |
1718 | } | 1720 | } |
@@ -1990,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
1990 | 1992 | ||
1991 | if (jh->b_transaction) | 1993 | if (jh->b_transaction) |
1992 | __jbd2_journal_temp_unlink_buffer(jh); | 1994 | __jbd2_journal_temp_unlink_buffer(jh); |
1995 | else | ||
1996 | jbd2_journal_grab_journal_head(bh); | ||
1993 | jh->b_transaction = transaction; | 1997 | jh->b_transaction = transaction; |
1994 | 1998 | ||
1995 | switch (jlist) { | 1999 | switch (jlist) { |
@@ -2041,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh, | |||
2041 | * already started to be used by a subsequent transaction, refile the | 2045 | * already started to be used by a subsequent transaction, refile the |
2042 | * buffer on that transaction's metadata list. | 2046 | * buffer on that transaction's metadata list. |
2043 | * | 2047 | * |
2044 | * Called under journal->j_list_lock | 2048 | * Called under j_list_lock |
2045 | * | ||
2046 | * Called under jbd_lock_bh_state(jh2bh(jh)) | 2049 | * Called under jbd_lock_bh_state(jh2bh(jh)) |
2050 | * | ||
2051 | * jh and bh may be already free when this function returns | ||
2047 | */ | 2052 | */ |
2048 | void __jbd2_journal_refile_buffer(struct journal_head *jh) | 2053 | void __jbd2_journal_refile_buffer(struct journal_head *jh) |
2049 | { | 2054 | { |
@@ -2067,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
2067 | 2072 | ||
2068 | was_dirty = test_clear_buffer_jbddirty(bh); | 2073 | was_dirty = test_clear_buffer_jbddirty(bh); |
2069 | __jbd2_journal_temp_unlink_buffer(jh); | 2074 | __jbd2_journal_temp_unlink_buffer(jh); |
2075 | /* | ||
2076 | * We set b_transaction here because b_next_transaction will inherit | ||
2077 | * our jh reference and thus __jbd2_journal_file_buffer() must not | ||
2078 | * take a new one. | ||
2079 | */ | ||
2070 | jh->b_transaction = jh->b_next_transaction; | 2080 | jh->b_transaction = jh->b_next_transaction; |
2071 | jh->b_next_transaction = NULL; | 2081 | jh->b_next_transaction = NULL; |
2072 | if (buffer_freed(bh)) | 2082 | if (buffer_freed(bh)) |
@@ -2083,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
2083 | } | 2093 | } |
2084 | 2094 | ||
2085 | /* | 2095 | /* |
2086 | * For the unlocked version of this call, also make sure that any | 2096 | * __jbd2_journal_refile_buffer() with necessary locking added. We take our |
2087 | * hanging journal_head is cleaned up if necessary. | 2097 | * bh reference so that we can safely unlock bh. |
2088 | * | 2098 | * |
2089 | * __jbd2_journal_refile_buffer is usually called as part of a single locked | 2099 | * The jh and bh may be freed by this call. |
2090 | * operation on a buffer_head, in which the caller is probably going to | ||
2091 | * be hooking the journal_head onto other lists. In that case it is up | ||
2092 | * to the caller to remove the journal_head if necessary. For the | ||
2093 | * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be | ||
2094 | * doing anything else to the buffer so we need to do the cleanup | ||
2095 | * ourselves to avoid a jh leak. | ||
2096 | * | ||
2097 | * *** The journal_head may be freed by this call! *** | ||
2098 | */ | 2100 | */ |
2099 | void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | 2101 | void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) |
2100 | { | 2102 | { |
2101 | struct buffer_head *bh = jh2bh(jh); | 2103 | struct buffer_head *bh = jh2bh(jh); |
2102 | 2104 | ||
2105 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
2106 | get_bh(bh); | ||
2103 | jbd_lock_bh_state(bh); | 2107 | jbd_lock_bh_state(bh); |
2104 | spin_lock(&journal->j_list_lock); | 2108 | spin_lock(&journal->j_list_lock); |
2105 | |||
2106 | __jbd2_journal_refile_buffer(jh); | 2109 | __jbd2_journal_refile_buffer(jh); |
2107 | jbd_unlock_bh_state(bh); | 2110 | jbd_unlock_bh_state(bh); |
2108 | jbd2_journal_remove_journal_head(bh); | ||
2109 | |||
2110 | spin_unlock(&journal->j_list_lock); | 2111 | spin_unlock(&journal->j_list_lock); |
2111 | __brelse(bh); | 2112 | __brelse(bh); |
2112 | } | 2113 | } |