diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-02 13:06:20 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-02 13:06:20 -0400 |
| commit | f1f8935a5c38a2c61e86a42bc971a2539eef2211 (patch) | |
| tree | 694950045f2f5d89507d7206cf6595e09cdfbd2c | |
| parent | 34116645d912f65d7eb4508a1db3c9d0e45facb1 (diff) | |
| parent | f2a44523b20f323e4aef7c16261d34d6f0a4bf06 (diff) | |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (97 commits)
jbd2: Unify log messages in jbd2 code
jbd/jbd2: validate sb->s_first in journal_get_superblock()
ext4: let ext4_ext_rm_leaf work with EXT_DEBUG defined
ext4: fix a syntax error in ext4_ext_insert_extent when debugging enabled
ext4: fix a typo in struct ext4_allocation_context
ext4: Don't normalize an falloc request if it can fit in 1 extent.
ext4: remove comments about extent mount option in ext4_new_inode()
ext4: let ext4_discard_partial_buffers handle unaligned range correctly
ext4: return ENOMEM if find_or_create_pages fails
ext4: move vars to local scope in ext4_discard_partial_page_buffers_no_lock()
ext4: Create helper function for EXT4_IO_END_UNWRITTEN and i_aiodio_unwritten
ext4: optimize locking for end_io extent conversion
ext4: remove unnecessary call to waitqueue_active()
ext4: Use correct locking for ext4_end_io_nolock()
ext4: fix race in xattr block allocation path
ext4: trace punch_hole correctly in ext4_ext_map_blocks
ext4: clean up AGGRESSIVE_TEST code
ext4: move variables to their scope
ext4: fix quota accounting during migration
ext4: migrate cleanup
...
34 files changed, 2898 insertions, 1329 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 232a575a0c48..4917cf24a5e0 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
| @@ -160,7 +160,9 @@ noload if the filesystem was not unmounted cleanly, | |||
| 160 | lead to any number of problems. | 160 | lead to any number of problems. |
| 161 | 161 | ||
| 162 | data=journal All data are committed into the journal prior to being | 162 | data=journal All data are committed into the journal prior to being |
| 163 | written into the main file system. | 163 | written into the main file system. Enabling |
| 164 | this mode will disable delayed allocation and | ||
| 165 | O_DIRECT support. | ||
| 164 | 166 | ||
| 165 | data=ordered (*) All data are forced directly out to the main file | 167 | data=ordered (*) All data are forced directly out to the main file |
| 166 | system prior to its metadata being committed to the | 168 | system prior to its metadata being committed to the |
| @@ -201,30 +203,19 @@ inode_readahead_blks=n This tuning parameter controls the maximum | |||
| 201 | table readahead algorithm will pre-read into | 203 | table readahead algorithm will pre-read into |
| 202 | the buffer cache. The default value is 32 blocks. | 204 | the buffer cache. The default value is 32 blocks. |
| 203 | 205 | ||
| 204 | orlov (*) This enables the new Orlov block allocator. It is | 206 | nouser_xattr Disables Extended User Attributes. If you have extended |
| 205 | enabled by default. | 207 | attribute support enabled in the kernel configuration |
| 206 | 208 | (CONFIG_EXT4_FS_XATTR), extended attribute support | |
| 207 | oldalloc This disables the Orlov block allocator and enables | 209 | is enabled by default on mount. See the attr(5) manual |
| 208 | the old block allocator. Orlov should have better | 210 | page and http://acl.bestbits.at/ for more information |
| 209 | performance - we'd like to get some feedback if it's | 211 | about extended attributes. |
| 210 | the contrary for you. | ||
| 211 | |||
| 212 | user_xattr Enables Extended User Attributes. Additionally, you | ||
| 213 | need to have extended attribute support enabled in the | ||
| 214 | kernel configuration (CONFIG_EXT4_FS_XATTR). See the | ||
| 215 | attr(5) manual page and http://acl.bestbits.at/ to | ||
| 216 | learn more about extended attributes. | ||
| 217 | |||
| 218 | nouser_xattr Disables Extended User Attributes. | ||
| 219 | |||
| 220 | acl Enables POSIX Access Control Lists support. | ||
| 221 | Additionally, you need to have ACL support enabled in | ||
| 222 | the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL). | ||
| 223 | See the acl(5) manual page and http://acl.bestbits.at/ | ||
| 224 | for more information. | ||
| 225 | 212 | ||
| 226 | noacl This option disables POSIX Access Control List | 213 | noacl This option disables POSIX Access Control List |
| 227 | support. | 214 | support. If ACL support is enabled in the kernel |
| 215 | configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL is | ||
| 216 | enabled by default on mount. See the acl(5) manual | ||
| 217 | page and http://acl.bestbits.at/ for more information | ||
| 218 | about acl. | ||
| 228 | 219 | ||
| 229 | bsddf (*) Make 'df' act like BSD. | 220 | bsddf (*) Make 'df' act like BSD. |
| 230 | minixdf Make 'df' act like Minix. | 221 | minixdf Make 'df' act like Minix. |
| @@ -419,8 +410,8 @@ written to the journal first, and then to its final location. | |||
| 419 | In the event of a crash, the journal can be replayed, bringing both data and | 410 | In the event of a crash, the journal can be replayed, bringing both data and |
| 420 | metadata into a consistent state. This mode is the slowest except when data | 411 | metadata into a consistent state. This mode is the slowest except when data |
| 421 | needs to be read from and written to disk at the same time where it | 412 | needs to be read from and written to disk at the same time where it |
| 422 | outperforms all others modes. Currently ext4 does not have delayed | 413 | outperforms all others modes. Enabling this mode will disable delayed |
| 423 | allocation support if this data journalling mode is selected. | 414 | allocation and O_DIRECT support. |
| 424 | 415 | ||
| 425 | /proc entries | 416 | /proc entries |
| 426 | ============= | 417 | ============= |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f8224adf496e..f6dba4505f1c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -28,7 +28,8 @@ | |||
| 28 | */ | 28 | */ |
| 29 | 29 | ||
| 30 | /* | 30 | /* |
| 31 | * Calculate the block group number and offset, given a block number | 31 | * Calculate the block group number and offset into the block/cluster |
| 32 | * allocation bitmap, given a block number | ||
| 32 | */ | 33 | */ |
| 33 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 34 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
| 34 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) | 35 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) |
| @@ -37,7 +38,8 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | |||
| 37 | ext4_grpblk_t offset; | 38 | ext4_grpblk_t offset; |
| 38 | 39 | ||
| 39 | blocknr = blocknr - le32_to_cpu(es->s_first_data_block); | 40 | blocknr = blocknr - le32_to_cpu(es->s_first_data_block); |
| 40 | offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); | 41 | offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >> |
| 42 | EXT4_SB(sb)->s_cluster_bits; | ||
| 41 | if (offsetp) | 43 | if (offsetp) |
| 42 | *offsetp = offset; | 44 | *offsetp = offset; |
| 43 | if (blockgrpp) | 45 | if (blockgrpp) |
| @@ -55,130 +57,169 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, | |||
| 55 | return 0; | 57 | return 0; |
| 56 | } | 58 | } |
| 57 | 59 | ||
| 58 | static int ext4_group_used_meta_blocks(struct super_block *sb, | 60 | /* Return the number of clusters used for file system metadata; this |
| 59 | ext4_group_t block_group, | 61 | * represents the overhead needed by the file system. |
| 60 | struct ext4_group_desc *gdp) | 62 | */ |
| 63 | unsigned ext4_num_overhead_clusters(struct super_block *sb, | ||
| 64 | ext4_group_t block_group, | ||
| 65 | struct ext4_group_desc *gdp) | ||
| 61 | { | 66 | { |
| 62 | ext4_fsblk_t tmp; | 67 | unsigned num_clusters; |
| 68 | int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c; | ||
| 69 | ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group); | ||
| 70 | ext4_fsblk_t itbl_blk; | ||
| 63 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 71 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 64 | /* block bitmap, inode bitmap, and inode table blocks */ | ||
| 65 | int used_blocks = sbi->s_itb_per_group + 2; | ||
| 66 | 72 | ||
| 67 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | 73 | /* This is the number of clusters used by the superblock, |
| 68 | if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), | 74 | * block group descriptors, and reserved block group |
| 69 | block_group)) | 75 | * descriptor blocks */ |
| 70 | used_blocks--; | 76 | num_clusters = ext4_num_base_meta_clusters(sb, block_group); |
| 71 | 77 | ||
| 72 | if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), | 78 | /* |
| 73 | block_group)) | 79 | * For the allocation bitmaps and inode table, we first need |
| 74 | used_blocks--; | 80 | * to check to see if the block is in the block group. If it |
| 75 | 81 | * is, then check to see if the cluster is already accounted | |
| 76 | tmp = ext4_inode_table(sb, gdp); | 82 | * for in the clusters used for the base metadata cluster, or |
| 77 | for (; tmp < ext4_inode_table(sb, gdp) + | 83 | * if we can increment the base metadata cluster to include |
| 78 | sbi->s_itb_per_group; tmp++) { | 84 | * that block. Otherwise, we will have to track the cluster |
| 79 | if (!ext4_block_in_group(sb, tmp, block_group)) | 85 | * used for the allocation bitmap or inode table explicitly. |
| 80 | used_blocks -= 1; | 86 | * Normally all of these blocks are contiguous, so the special |
| 87 | * case handling shouldn't be necessary except for *very* | ||
| 88 | * unusual file system layouts. | ||
| 89 | */ | ||
| 90 | if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) { | ||
| 91 | block_cluster = EXT4_B2C(sbi, (start - | ||
| 92 | ext4_block_bitmap(sb, gdp))); | ||
| 93 | if (block_cluster < num_clusters) | ||
| 94 | block_cluster = -1; | ||
| 95 | else if (block_cluster == num_clusters) { | ||
| 96 | num_clusters++; | ||
| 97 | block_cluster = -1; | ||
| 81 | } | 98 | } |
| 82 | } | 99 | } |
| 83 | return used_blocks; | ||
| 84 | } | ||
| 85 | 100 | ||
| 86 | /* Initializes an uninitialized block bitmap if given, and returns the | 101 | if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) { |
| 87 | * number of blocks free in the group. */ | 102 | inode_cluster = EXT4_B2C(sbi, |
| 88 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | 103 | start - ext4_inode_bitmap(sb, gdp)); |
| 89 | ext4_group_t block_group, struct ext4_group_desc *gdp) | 104 | if (inode_cluster < num_clusters) |
| 90 | { | 105 | inode_cluster = -1; |
| 91 | int bit, bit_max; | 106 | else if (inode_cluster == num_clusters) { |
| 92 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 107 | num_clusters++; |
| 93 | unsigned free_blocks, group_blocks; | 108 | inode_cluster = -1; |
| 94 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 95 | |||
| 96 | if (bh) { | ||
| 97 | J_ASSERT_BH(bh, buffer_locked(bh)); | ||
| 98 | |||
| 99 | /* If checksum is bad mark all blocks used to prevent allocation | ||
| 100 | * essentially implementing a per-group read-only flag. */ | ||
| 101 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | ||
| 102 | ext4_error(sb, "Checksum bad for group %u", | ||
| 103 | block_group); | ||
| 104 | ext4_free_blks_set(sb, gdp, 0); | ||
| 105 | ext4_free_inodes_set(sb, gdp, 0); | ||
| 106 | ext4_itable_unused_set(sb, gdp, 0); | ||
| 107 | memset(bh->b_data, 0xff, sb->s_blocksize); | ||
| 108 | return 0; | ||
| 109 | } | 109 | } |
| 110 | memset(bh->b_data, 0, sb->s_blocksize); | ||
| 111 | } | 110 | } |
| 112 | 111 | ||
| 113 | /* Check for superblock and gdt backups in this group */ | 112 | itbl_blk = ext4_inode_table(sb, gdp); |
| 114 | bit_max = ext4_bg_has_super(sb, block_group); | 113 | for (i = 0; i < sbi->s_itb_per_group; i++) { |
| 115 | 114 | if (ext4_block_in_group(sb, itbl_blk + i, block_group)) { | |
| 116 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || | 115 | c = EXT4_B2C(sbi, start - itbl_blk + i); |
| 117 | block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * | 116 | if ((c < num_clusters) || (c == inode_cluster) || |
| 118 | sbi->s_desc_per_block) { | 117 | (c == block_cluster) || (c == itbl_cluster)) |
| 119 | if (bit_max) { | 118 | continue; |
| 120 | bit_max += ext4_bg_num_gdb(sb, block_group); | 119 | if (c == num_clusters) { |
| 121 | bit_max += | 120 | num_clusters++; |
| 122 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); | 121 | continue; |
| 122 | } | ||
| 123 | num_clusters++; | ||
| 124 | itbl_cluster = c; | ||
| 123 | } | 125 | } |
| 124 | } else { /* For META_BG_BLOCK_GROUPS */ | ||
| 125 | bit_max += ext4_bg_num_gdb(sb, block_group); | ||
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | if (block_group == ngroups - 1) { | 128 | if (block_cluster != -1) |
| 129 | num_clusters++; | ||
| 130 | if (inode_cluster != -1) | ||
| 131 | num_clusters++; | ||
| 132 | |||
| 133 | return num_clusters; | ||
| 134 | } | ||
| 135 | |||
| 136 | static unsigned int num_clusters_in_group(struct super_block *sb, | ||
| 137 | ext4_group_t block_group) | ||
| 138 | { | ||
| 139 | unsigned int blocks; | ||
| 140 | |||
| 141 | if (block_group == ext4_get_groups_count(sb) - 1) { | ||
| 129 | /* | 142 | /* |
| 130 | * Even though mke2fs always initialize first and last group | 143 | * Even though mke2fs always initializes the first and |
| 131 | * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need | 144 | * last group, just in case some other tool was used, |
| 132 | * to make sure we calculate the right free blocks | 145 | * we need to make sure we calculate the right free |
| 146 | * blocks. | ||
| 133 | */ | 147 | */ |
| 134 | group_blocks = ext4_blocks_count(sbi->s_es) - | 148 | blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) - |
| 135 | ext4_group_first_block_no(sb, ngroups - 1); | 149 | ext4_group_first_block_no(sb, block_group); |
| 136 | } else { | 150 | } else |
| 137 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 151 | blocks = EXT4_BLOCKS_PER_GROUP(sb); |
| 138 | } | 152 | return EXT4_NUM_B2C(EXT4_SB(sb), blocks); |
| 153 | } | ||
| 139 | 154 | ||
| 140 | free_blocks = group_blocks - bit_max; | 155 | /* Initializes an uninitialized block bitmap */ |
| 156 | void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | ||
| 157 | ext4_group_t block_group, | ||
| 158 | struct ext4_group_desc *gdp) | ||
| 159 | { | ||
| 160 | unsigned int bit, bit_max; | ||
| 161 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 162 | ext4_fsblk_t start, tmp; | ||
| 163 | int flex_bg = 0; | ||
| 164 | |||
| 165 | J_ASSERT_BH(bh, buffer_locked(bh)); | ||
| 166 | |||
| 167 | /* If checksum is bad mark all blocks used to prevent allocation | ||
| 168 | * essentially implementing a per-group read-only flag. */ | ||
| 169 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | ||
| 170 | ext4_error(sb, "Checksum bad for group %u", block_group); | ||
| 171 | ext4_free_group_clusters_set(sb, gdp, 0); | ||
| 172 | ext4_free_inodes_set(sb, gdp, 0); | ||
| 173 | ext4_itable_unused_set(sb, gdp, 0); | ||
| 174 | memset(bh->b_data, 0xff, sb->s_blocksize); | ||
| 175 | return; | ||
| 176 | } | ||
| 177 | memset(bh->b_data, 0, sb->s_blocksize); | ||
| 141 | 178 | ||
| 142 | if (bh) { | 179 | bit_max = ext4_num_base_meta_clusters(sb, block_group); |
| 143 | ext4_fsblk_t start, tmp; | 180 | for (bit = 0; bit < bit_max; bit++) |
| 144 | int flex_bg = 0; | 181 | ext4_set_bit(bit, bh->b_data); |
| 145 | 182 | ||
| 146 | for (bit = 0; bit < bit_max; bit++) | 183 | start = ext4_group_first_block_no(sb, block_group); |
| 147 | ext4_set_bit(bit, bh->b_data); | ||
| 148 | 184 | ||
| 149 | start = ext4_group_first_block_no(sb, block_group); | 185 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
| 186 | flex_bg = 1; | ||
| 150 | 187 | ||
| 151 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | 188 | /* Set bits for block and inode bitmaps, and inode table */ |
| 152 | EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 189 | tmp = ext4_block_bitmap(sb, gdp); |
| 153 | flex_bg = 1; | 190 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) |
| 191 | ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); | ||
| 154 | 192 | ||
| 155 | /* Set bits for block and inode bitmaps, and inode table */ | 193 | tmp = ext4_inode_bitmap(sb, gdp); |
| 156 | tmp = ext4_block_bitmap(sb, gdp); | 194 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) |
| 157 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) | 195 | ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); |
| 158 | ext4_set_bit(tmp - start, bh->b_data); | ||
| 159 | 196 | ||
| 160 | tmp = ext4_inode_bitmap(sb, gdp); | 197 | tmp = ext4_inode_table(sb, gdp); |
| 198 | for (; tmp < ext4_inode_table(sb, gdp) + | ||
| 199 | sbi->s_itb_per_group; tmp++) { | ||
| 161 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) | 200 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) |
| 162 | ext4_set_bit(tmp - start, bh->b_data); | 201 | ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); |
| 163 | |||
| 164 | tmp = ext4_inode_table(sb, gdp); | ||
| 165 | for (; tmp < ext4_inode_table(sb, gdp) + | ||
| 166 | sbi->s_itb_per_group; tmp++) { | ||
| 167 | if (!flex_bg || | ||
| 168 | ext4_block_in_group(sb, tmp, block_group)) | ||
| 169 | ext4_set_bit(tmp - start, bh->b_data); | ||
| 170 | } | ||
| 171 | /* | ||
| 172 | * Also if the number of blocks within the group is | ||
| 173 | * less than the blocksize * 8 ( which is the size | ||
| 174 | * of bitmap ), set rest of the block bitmap to 1 | ||
| 175 | */ | ||
| 176 | ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8, | ||
| 177 | bh->b_data); | ||
| 178 | } | 202 | } |
| 179 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); | 203 | |
| 204 | /* | ||
| 205 | * Also if the number of blocks within the group is less than | ||
| 206 | * the blocksize * 8 ( which is the size of bitmap ), set rest | ||
| 207 | * of the block bitmap to 1 | ||
| 208 | */ | ||
| 209 | ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), | ||
| 210 | sb->s_blocksize * 8, bh->b_data); | ||
| 180 | } | 211 | } |
| 181 | 212 | ||
| 213 | /* Return the number of free blocks in a block group. It is used when | ||
| 214 | * the block bitmap is uninitialized, so we can't just count the bits | ||
| 215 | * in the bitmap. */ | ||
| 216 | unsigned ext4_free_clusters_after_init(struct super_block *sb, | ||
| 217 | ext4_group_t block_group, | ||
| 218 | struct ext4_group_desc *gdp) | ||
| 219 | { | ||
| 220 | return num_clusters_in_group(sb, block_group) - | ||
| 221 | ext4_num_overhead_clusters(sb, block_group, gdp); | ||
| 222 | } | ||
| 182 | 223 | ||
| 183 | /* | 224 | /* |
| 184 | * The free blocks are managed by bitmaps. A file system contains several | 225 | * The free blocks are managed by bitmaps. A file system contains several |
| @@ -362,53 +403,54 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 362 | } | 403 | } |
| 363 | 404 | ||
| 364 | /** | 405 | /** |
| 365 | * ext4_has_free_blocks() | 406 | * ext4_has_free_clusters() |
| 366 | * @sbi: in-core super block structure. | 407 | * @sbi: in-core super block structure. |
| 367 | * @nblocks: number of needed blocks | 408 | * @nclusters: number of needed blocks |
| 409 | * @flags: flags from ext4_mb_new_blocks() | ||
| 368 | * | 410 | * |
| 369 | * Check if filesystem has nblocks free & available for allocation. | 411 | * Check if filesystem has nclusters free & available for allocation. |
| 370 | * On success return 1, return 0 on failure. | 412 | * On success return 1, return 0 on failure. |
| 371 | */ | 413 | */ |
| 372 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi, | 414 | static int ext4_has_free_clusters(struct ext4_sb_info *sbi, |
| 373 | s64 nblocks, unsigned int flags) | 415 | s64 nclusters, unsigned int flags) |
| 374 | { | 416 | { |
| 375 | s64 free_blocks, dirty_blocks, root_blocks; | 417 | s64 free_clusters, dirty_clusters, root_clusters; |
| 376 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | 418 | struct percpu_counter *fcc = &sbi->s_freeclusters_counter; |
| 377 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; | 419 | struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter; |
| 378 | 420 | ||
| 379 | free_blocks = percpu_counter_read_positive(fbc); | 421 | free_clusters = percpu_counter_read_positive(fcc); |
| 380 | dirty_blocks = percpu_counter_read_positive(dbc); | 422 | dirty_clusters = percpu_counter_read_positive(dcc); |
| 381 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 423 | root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es)); |
| 382 | 424 | ||
| 383 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < | 425 | if (free_clusters - (nclusters + root_clusters + dirty_clusters) < |
| 384 | EXT4_FREEBLOCKS_WATERMARK) { | 426 | EXT4_FREECLUSTERS_WATERMARK) { |
| 385 | free_blocks = percpu_counter_sum_positive(fbc); | 427 | free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc)); |
| 386 | dirty_blocks = percpu_counter_sum_positive(dbc); | 428 | dirty_clusters = percpu_counter_sum_positive(dcc); |
| 387 | } | 429 | } |
| 388 | /* Check whether we have space after | 430 | /* Check whether we have space after accounting for current |
| 389 | * accounting for current dirty blocks & root reserved blocks. | 431 | * dirty clusters & root reserved clusters. |
| 390 | */ | 432 | */ |
| 391 | if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks)) | 433 | if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters)) |
| 392 | return 1; | 434 | return 1; |
| 393 | 435 | ||
| 394 | /* Hm, nope. Are (enough) root reserved blocks available? */ | 436 | /* Hm, nope. Are (enough) root reserved clusters available? */ |
| 395 | if (sbi->s_resuid == current_fsuid() || | 437 | if (sbi->s_resuid == current_fsuid() || |
| 396 | ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || | 438 | ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || |
| 397 | capable(CAP_SYS_RESOURCE) || | 439 | capable(CAP_SYS_RESOURCE) || |
| 398 | (flags & EXT4_MB_USE_ROOT_BLOCKS)) { | 440 | (flags & EXT4_MB_USE_ROOT_BLOCKS)) { |
| 399 | 441 | ||
| 400 | if (free_blocks >= (nblocks + dirty_blocks)) | 442 | if (free_clusters >= (nclusters + dirty_clusters)) |
| 401 | return 1; | 443 | return 1; |
| 402 | } | 444 | } |
| 403 | 445 | ||
| 404 | return 0; | 446 | return 0; |
| 405 | } | 447 | } |
| 406 | 448 | ||
| 407 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | 449 | int ext4_claim_free_clusters(struct ext4_sb_info *sbi, |
| 408 | s64 nblocks, unsigned int flags) | 450 | s64 nclusters, unsigned int flags) |
| 409 | { | 451 | { |
| 410 | if (ext4_has_free_blocks(sbi, nblocks, flags)) { | 452 | if (ext4_has_free_clusters(sbi, nclusters, flags)) { |
| 411 | percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); | 453 | percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters); |
| 412 | return 0; | 454 | return 0; |
| 413 | } else | 455 | } else |
| 414 | return -ENOSPC; | 456 | return -ENOSPC; |
| @@ -428,7 +470,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | |||
| 428 | */ | 470 | */ |
| 429 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) | 471 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) |
| 430 | { | 472 | { |
| 431 | if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || | 473 | if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || |
| 432 | (*retries)++ > 3 || | 474 | (*retries)++ > 3 || |
| 433 | !EXT4_SB(sb)->s_journal) | 475 | !EXT4_SB(sb)->s_journal) |
| 434 | return 0; | 476 | return 0; |
| @@ -444,7 +486,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
| 444 | * @handle: handle to this transaction | 486 | * @handle: handle to this transaction |
| 445 | * @inode: file inode | 487 | * @inode: file inode |
| 446 | * @goal: given target block(filesystem wide) | 488 | * @goal: given target block(filesystem wide) |
| 447 | * @count: pointer to total number of blocks needed | 489 | * @count: pointer to total number of clusters needed |
| 448 | * @errp: error code | 490 | * @errp: error code |
| 449 | * | 491 | * |
| 450 | * Return 1st allocated block number on success, *count stores total account | 492 | * Return 1st allocated block number on success, *count stores total account |
| @@ -476,18 +518,19 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
| 476 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 518 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
| 477 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; | 519 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; |
| 478 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 520 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 479 | dquot_alloc_block_nofail(inode, ar.len); | 521 | dquot_alloc_block_nofail(inode, |
| 522 | EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); | ||
| 480 | } | 523 | } |
| 481 | return ret; | 524 | return ret; |
| 482 | } | 525 | } |
| 483 | 526 | ||
| 484 | /** | 527 | /** |
| 485 | * ext4_count_free_blocks() -- count filesystem free blocks | 528 | * ext4_count_free_clusters() -- count filesystem free clusters |
| 486 | * @sb: superblock | 529 | * @sb: superblock |
| 487 | * | 530 | * |
| 488 | * Adds up the number of free blocks from each block group. | 531 | * Adds up the number of free clusters from each block group. |
| 489 | */ | 532 | */ |
| 490 | ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | 533 | ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) |
| 491 | { | 534 | { |
| 492 | ext4_fsblk_t desc_count; | 535 | ext4_fsblk_t desc_count; |
| 493 | struct ext4_group_desc *gdp; | 536 | struct ext4_group_desc *gdp; |
| @@ -508,7 +551,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
| 508 | gdp = ext4_get_group_desc(sb, i, NULL); | 551 | gdp = ext4_get_group_desc(sb, i, NULL); |
| 509 | if (!gdp) | 552 | if (!gdp) |
| 510 | continue; | 553 | continue; |
| 511 | desc_count += ext4_free_blks_count(sb, gdp); | 554 | desc_count += ext4_free_group_clusters(sb, gdp); |
| 512 | brelse(bitmap_bh); | 555 | brelse(bitmap_bh); |
| 513 | bitmap_bh = ext4_read_block_bitmap(sb, i); | 556 | bitmap_bh = ext4_read_block_bitmap(sb, i); |
| 514 | if (bitmap_bh == NULL) | 557 | if (bitmap_bh == NULL) |
| @@ -516,12 +559,13 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
| 516 | 559 | ||
| 517 | x = ext4_count_free(bitmap_bh, sb->s_blocksize); | 560 | x = ext4_count_free(bitmap_bh, sb->s_blocksize); |
| 518 | printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", | 561 | printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", |
| 519 | i, ext4_free_blks_count(sb, gdp), x); | 562 | i, ext4_free_group_clusters(sb, gdp), x); |
| 520 | bitmap_count += x; | 563 | bitmap_count += x; |
| 521 | } | 564 | } |
| 522 | brelse(bitmap_bh); | 565 | brelse(bitmap_bh); |
| 523 | printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" | 566 | printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu" |
| 524 | ", computed = %llu, %llu\n", ext4_free_blocks_count(es), | 567 | ", computed = %llu, %llu\n", |
| 568 | EXT4_B2C(sbi, ext4_free_blocks_count(es)), | ||
| 525 | desc_count, bitmap_count); | 569 | desc_count, bitmap_count); |
| 526 | return bitmap_count; | 570 | return bitmap_count; |
| 527 | #else | 571 | #else |
| @@ -530,7 +574,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
| 530 | gdp = ext4_get_group_desc(sb, i, NULL); | 574 | gdp = ext4_get_group_desc(sb, i, NULL); |
| 531 | if (!gdp) | 575 | if (!gdp) |
| 532 | continue; | 576 | continue; |
| 533 | desc_count += ext4_free_blks_count(sb, gdp); | 577 | desc_count += ext4_free_group_clusters(sb, gdp); |
| 534 | } | 578 | } |
| 535 | 579 | ||
| 536 | return desc_count; | 580 | return desc_count; |
| @@ -620,6 +664,31 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
| 620 | 664 | ||
| 621 | } | 665 | } |
| 622 | 666 | ||
| 667 | /* | ||
| 668 | * This function returns the number of file system metadata clusters at | ||
| 669 | * the beginning of a block group, including the reserved gdt blocks. | ||
| 670 | */ | ||
| 671 | unsigned ext4_num_base_meta_clusters(struct super_block *sb, | ||
| 672 | ext4_group_t block_group) | ||
| 673 | { | ||
| 674 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 675 | unsigned num; | ||
| 676 | |||
| 677 | /* Check for superblock and gdt backups in this group */ | ||
| 678 | num = ext4_bg_has_super(sb, block_group); | ||
| 679 | |||
| 680 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || | ||
| 681 | block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * | ||
| 682 | sbi->s_desc_per_block) { | ||
| 683 | if (num) { | ||
| 684 | num += ext4_bg_num_gdb(sb, block_group); | ||
| 685 | num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); | ||
| 686 | } | ||
| 687 | } else { /* For META_BG_BLOCK_GROUPS */ | ||
| 688 | num += ext4_bg_num_gdb(sb, block_group); | ||
| 689 | } | ||
| 690 | return EXT4_NUM_B2C(sbi, num); | ||
| 691 | } | ||
| 623 | /** | 692 | /** |
| 624 | * ext4_inode_to_goal_block - return a hint for block allocation | 693 | * ext4_inode_to_goal_block - return a hint for block allocation |
| 625 | * @inode: inode for block allocation | 694 | * @inode: inode for block allocation |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cec3145e532c..5b0e26a1272d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -144,9 +144,17 @@ struct ext4_allocation_request { | |||
| 144 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) | 144 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) |
| 145 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) | 145 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) |
| 146 | #define EXT4_MAP_UNINIT (1 << BH_Uninit) | 146 | #define EXT4_MAP_UNINIT (1 << BH_Uninit) |
| 147 | /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of | ||
| 148 | * ext4_map_blocks wants to know whether or not the underlying cluster has | ||
| 149 | * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that | ||
| 150 | * the requested mapping was from previously mapped (or delayed allocated) | ||
| 151 | * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster | ||
| 152 | * should never appear on buffer_head's state flags. | ||
| 153 | */ | ||
| 154 | #define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster) | ||
| 147 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ | 155 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ |
| 148 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ | 156 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ |
| 149 | EXT4_MAP_UNINIT) | 157 | EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER) |
| 150 | 158 | ||
| 151 | struct ext4_map_blocks { | 159 | struct ext4_map_blocks { |
| 152 | ext4_fsblk_t m_pblk; | 160 | ext4_fsblk_t m_pblk; |
| @@ -239,8 +247,11 @@ struct ext4_io_submit { | |||
| 239 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) | 247 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) |
| 240 | #endif | 248 | #endif |
| 241 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) | 249 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) |
| 250 | #define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \ | ||
| 251 | EXT4_SB(s)->s_cluster_bits) | ||
| 242 | #ifdef __KERNEL__ | 252 | #ifdef __KERNEL__ |
| 243 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) | 253 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) |
| 254 | # define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits) | ||
| 244 | #else | 255 | #else |
| 245 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) | 256 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) |
| 246 | #endif | 257 | #endif |
| @@ -258,6 +269,14 @@ struct ext4_io_submit { | |||
| 258 | #endif | 269 | #endif |
| 259 | #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) | 270 | #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) |
| 260 | 271 | ||
| 272 | /* Translate a block number to a cluster number */ | ||
| 273 | #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) | ||
| 274 | /* Translate a cluster number to a block number */ | ||
| 275 | #define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits) | ||
| 276 | /* Translate # of blks to # of clusters */ | ||
| 277 | #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ | ||
| 278 | (sbi)->s_cluster_bits) | ||
| 279 | |||
| 261 | /* | 280 | /* |
| 262 | * Structure of a blocks group descriptor | 281 | * Structure of a blocks group descriptor |
| 263 | */ | 282 | */ |
| @@ -289,7 +308,7 @@ struct ext4_group_desc | |||
| 289 | 308 | ||
| 290 | struct flex_groups { | 309 | struct flex_groups { |
| 291 | atomic_t free_inodes; | 310 | atomic_t free_inodes; |
| 292 | atomic_t free_blocks; | 311 | atomic_t free_clusters; |
| 293 | atomic_t used_dirs; | 312 | atomic_t used_dirs; |
| 294 | }; | 313 | }; |
| 295 | 314 | ||
| @@ -306,6 +325,7 @@ struct flex_groups { | |||
| 306 | #define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) | 325 | #define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) |
| 307 | #ifdef __KERNEL__ | 326 | #ifdef __KERNEL__ |
| 308 | # define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) | 327 | # define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) |
| 328 | # define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group) | ||
| 309 | # define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) | 329 | # define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) |
| 310 | # define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) | 330 | # define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) |
| 311 | # define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) | 331 | # define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) |
| @@ -358,8 +378,7 @@ struct flex_groups { | |||
| 358 | 378 | ||
| 359 | /* Flags that should be inherited by new inodes from their parent. */ | 379 | /* Flags that should be inherited by new inodes from their parent. */ |
| 360 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ | 380 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ |
| 361 | EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ | 381 | EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ |
| 362 | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ | ||
| 363 | EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ | 382 | EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ |
| 364 | EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) | 383 | EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) |
| 365 | 384 | ||
| @@ -520,6 +539,8 @@ struct ext4_new_group_data { | |||
| 520 | #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 | 539 | #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 |
| 521 | /* Don't normalize allocation size (used for fallocate) */ | 540 | /* Don't normalize allocation size (used for fallocate) */ |
| 522 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 | 541 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 |
| 542 | /* Request will not result in inode size update (user for fallocate) */ | ||
| 543 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 | ||
| 523 | 544 | ||
| 524 | /* | 545 | /* |
| 525 | * Flags used by ext4_free_blocks | 546 | * Flags used by ext4_free_blocks |
| @@ -528,6 +549,13 @@ struct ext4_new_group_data { | |||
| 528 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 | 549 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 |
| 529 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 | 550 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 |
| 530 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 | 551 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 |
| 552 | #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 | ||
| 553 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | ||
| 554 | |||
| 555 | /* | ||
| 556 | * Flags used by ext4_discard_partial_page_buffers | ||
| 557 | */ | ||
| 558 | #define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001 | ||
| 531 | 559 | ||
| 532 | /* | 560 | /* |
| 533 | * ioctl commands | 561 | * ioctl commands |
| @@ -538,9 +566,6 @@ struct ext4_new_group_data { | |||
| 538 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) | 566 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) |
| 539 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION | 567 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION |
| 540 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION | 568 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION |
| 541 | #ifdef CONFIG_JBD2_DEBUG | ||
| 542 | #define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) | ||
| 543 | #endif | ||
| 544 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) | 569 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) |
| 545 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) | 570 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) |
| 546 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | 571 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) |
| @@ -563,9 +588,6 @@ struct ext4_new_group_data { | |||
| 563 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) | 588 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) |
| 564 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) | 589 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) |
| 565 | #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) | 590 | #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) |
| 566 | #ifdef CONFIG_JBD2_DEBUG | ||
| 567 | #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) | ||
| 568 | #endif | ||
| 569 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION | 591 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION |
| 570 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION | 592 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION |
| 571 | #endif | 593 | #endif |
| @@ -837,6 +859,7 @@ struct ext4_inode_info { | |||
| 837 | ext4_group_t i_last_alloc_group; | 859 | ext4_group_t i_last_alloc_group; |
| 838 | 860 | ||
| 839 | /* allocation reservation info for delalloc */ | 861 | /* allocation reservation info for delalloc */ |
| 862 | /* In case of bigalloc, these refer to clusters rather than blocks */ | ||
| 840 | unsigned int i_reserved_data_blocks; | 863 | unsigned int i_reserved_data_blocks; |
| 841 | unsigned int i_reserved_meta_blocks; | 864 | unsigned int i_reserved_meta_blocks; |
| 842 | unsigned int i_allocated_meta_blocks; | 865 | unsigned int i_allocated_meta_blocks; |
| @@ -886,7 +909,6 @@ struct ext4_inode_info { | |||
| 886 | /* | 909 | /* |
| 887 | * Mount flags | 910 | * Mount flags |
| 888 | */ | 911 | */ |
| 889 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ | ||
| 890 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ | 912 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ |
| 891 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ | 913 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ |
| 892 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ | 914 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ |
| @@ -918,6 +940,9 @@ struct ext4_inode_info { | |||
| 918 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | 940 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ |
| 919 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ | 941 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ |
| 920 | 942 | ||
| 943 | #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly | ||
| 944 | specified delalloc */ | ||
| 945 | |||
| 921 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ | 946 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ |
| 922 | ~EXT4_MOUNT_##opt | 947 | ~EXT4_MOUNT_##opt |
| 923 | #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ | 948 | #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ |
| @@ -968,9 +993,9 @@ struct ext4_super_block { | |||
| 968 | /*10*/ __le32 s_free_inodes_count; /* Free inodes count */ | 993 | /*10*/ __le32 s_free_inodes_count; /* Free inodes count */ |
| 969 | __le32 s_first_data_block; /* First Data Block */ | 994 | __le32 s_first_data_block; /* First Data Block */ |
| 970 | __le32 s_log_block_size; /* Block size */ | 995 | __le32 s_log_block_size; /* Block size */ |
| 971 | __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ | 996 | __le32 s_log_cluster_size; /* Allocation cluster size */ |
| 972 | /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ | 997 | /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ |
| 973 | __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ | 998 | __le32 s_clusters_per_group; /* # Clusters per group */ |
| 974 | __le32 s_inodes_per_group; /* # Inodes per group */ | 999 | __le32 s_inodes_per_group; /* # Inodes per group */ |
| 975 | __le32 s_mtime; /* Mount time */ | 1000 | __le32 s_mtime; /* Mount time */ |
| 976 | /*30*/ __le32 s_wtime; /* Write time */ | 1001 | /*30*/ __le32 s_wtime; /* Write time */ |
| @@ -1066,7 +1091,10 @@ struct ext4_super_block { | |||
| 1066 | __u8 s_last_error_func[32]; /* function where the error happened */ | 1091 | __u8 s_last_error_func[32]; /* function where the error happened */ |
| 1067 | #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) | 1092 | #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) |
| 1068 | __u8 s_mount_opts[64]; | 1093 | __u8 s_mount_opts[64]; |
| 1069 | __le32 s_reserved[112]; /* Padding to the end of the block */ | 1094 | __le32 s_usr_quota_inum; /* inode for tracking user quota */ |
| 1095 | __le32 s_grp_quota_inum; /* inode for tracking group quota */ | ||
| 1096 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ | ||
| 1097 | __le32 s_reserved[109]; /* Padding to the end of the block */ | ||
| 1070 | }; | 1098 | }; |
| 1071 | 1099 | ||
| 1072 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) | 1100 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) |
| @@ -1086,6 +1114,7 @@ struct ext4_sb_info { | |||
| 1086 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | 1114 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ |
| 1087 | unsigned long s_inodes_per_block;/* Number of inodes per block */ | 1115 | unsigned long s_inodes_per_block;/* Number of inodes per block */ |
| 1088 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ | 1116 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ |
| 1117 | unsigned long s_clusters_per_group; /* Number of clusters in a group */ | ||
| 1089 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ | 1118 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ |
| 1090 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ | 1119 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ |
| 1091 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | 1120 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ |
| @@ -1094,6 +1123,8 @@ struct ext4_sb_info { | |||
| 1094 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ | 1123 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ |
| 1095 | unsigned long s_overhead_last; /* Last calculated overhead */ | 1124 | unsigned long s_overhead_last; /* Last calculated overhead */ |
| 1096 | unsigned long s_blocks_last; /* Last seen block count */ | 1125 | unsigned long s_blocks_last; /* Last seen block count */ |
| 1126 | unsigned int s_cluster_ratio; /* Number of blocks per cluster */ | ||
| 1127 | unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */ | ||
| 1097 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 1128 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
| 1098 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 1129 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
| 1099 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | 1130 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
| @@ -1117,10 +1148,10 @@ struct ext4_sb_info { | |||
| 1117 | u32 s_hash_seed[4]; | 1148 | u32 s_hash_seed[4]; |
| 1118 | int s_def_hash_version; | 1149 | int s_def_hash_version; |
| 1119 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ | 1150 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ |
| 1120 | struct percpu_counter s_freeblocks_counter; | 1151 | struct percpu_counter s_freeclusters_counter; |
| 1121 | struct percpu_counter s_freeinodes_counter; | 1152 | struct percpu_counter s_freeinodes_counter; |
| 1122 | struct percpu_counter s_dirs_counter; | 1153 | struct percpu_counter s_dirs_counter; |
| 1123 | struct percpu_counter s_dirtyblocks_counter; | 1154 | struct percpu_counter s_dirtyclusters_counter; |
| 1124 | struct blockgroup_lock *s_blockgroup_lock; | 1155 | struct blockgroup_lock *s_blockgroup_lock; |
| 1125 | struct proc_dir_entry *s_proc; | 1156 | struct proc_dir_entry *s_proc; |
| 1126 | struct kobject s_kobj; | 1157 | struct kobject s_kobj; |
| @@ -1136,10 +1167,6 @@ struct ext4_sb_info { | |||
| 1136 | u32 s_max_batch_time; | 1167 | u32 s_max_batch_time; |
| 1137 | u32 s_min_batch_time; | 1168 | u32 s_min_batch_time; |
| 1138 | struct block_device *journal_bdev; | 1169 | struct block_device *journal_bdev; |
| 1139 | #ifdef CONFIG_JBD2_DEBUG | ||
| 1140 | struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ | ||
| 1141 | wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ | ||
| 1142 | #endif | ||
| 1143 | #ifdef CONFIG_QUOTA | 1170 | #ifdef CONFIG_QUOTA |
| 1144 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | 1171 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ |
| 1145 | int s_jquota_fmt; /* Format of quota to use */ | 1172 | int s_jquota_fmt; /* Format of quota to use */ |
| @@ -1248,6 +1275,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
| 1248 | ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); | 1275 | ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); |
| 1249 | } | 1276 | } |
| 1250 | 1277 | ||
| 1278 | static inline void ext4_set_io_unwritten_flag(struct inode *inode, | ||
| 1279 | struct ext4_io_end *io_end) | ||
| 1280 | { | ||
| 1281 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
| 1282 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
| 1283 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
| 1284 | } | ||
| 1285 | } | ||
| 1286 | |||
| 1251 | /* | 1287 | /* |
| 1252 | * Inode dynamic state flags | 1288 | * Inode dynamic state flags |
| 1253 | */ | 1289 | */ |
| @@ -1360,6 +1396,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
| 1360 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 | 1396 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 |
| 1361 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 | 1397 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 |
| 1362 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 | 1398 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 |
| 1399 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 | ||
| 1363 | 1400 | ||
| 1364 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 | 1401 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
| 1365 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 | 1402 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 |
| @@ -1402,7 +1439,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
| 1402 | EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ | 1439 | EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ |
| 1403 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ | 1440 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ |
| 1404 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ | 1441 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ |
| 1405 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE) | 1442 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ |
| 1443 | EXT4_FEATURE_RO_COMPAT_BIGALLOC) | ||
| 1406 | 1444 | ||
| 1407 | /* | 1445 | /* |
| 1408 | * Default values for user and/or group using reserved blocks | 1446 | * Default values for user and/or group using reserved blocks |
| @@ -1735,9 +1773,9 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
| 1735 | unsigned int flags, | 1773 | unsigned int flags, |
| 1736 | unsigned long *count, | 1774 | unsigned long *count, |
| 1737 | int *errp); | 1775 | int *errp); |
| 1738 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | 1776 | extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi, |
| 1739 | s64 nblocks, unsigned int flags); | 1777 | s64 nclusters, unsigned int flags); |
| 1740 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1778 | extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *); |
| 1741 | extern void ext4_check_blocks_bitmap(struct super_block *); | 1779 | extern void ext4_check_blocks_bitmap(struct super_block *); |
| 1742 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1780 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
| 1743 | ext4_group_t block_group, | 1781 | ext4_group_t block_group, |
| @@ -1745,12 +1783,18 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
| 1745 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1783 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
| 1746 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | 1784 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, |
| 1747 | ext4_group_t block_group); | 1785 | ext4_group_t block_group); |
| 1748 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | 1786 | extern void ext4_init_block_bitmap(struct super_block *sb, |
| 1749 | struct buffer_head *bh, | 1787 | struct buffer_head *bh, |
| 1750 | ext4_group_t group, | 1788 | ext4_group_t group, |
| 1751 | struct ext4_group_desc *desc); | 1789 | struct ext4_group_desc *desc); |
| 1752 | #define ext4_free_blocks_after_init(sb, group, desc) \ | 1790 | extern unsigned ext4_free_clusters_after_init(struct super_block *sb, |
| 1753 | ext4_init_block_bitmap(sb, NULL, group, desc) | 1791 | ext4_group_t block_group, |
| 1792 | struct ext4_group_desc *gdp); | ||
| 1793 | extern unsigned ext4_num_base_meta_clusters(struct super_block *sb, | ||
| 1794 | ext4_group_t block_group); | ||
| 1795 | extern unsigned ext4_num_overhead_clusters(struct super_block *sb, | ||
| 1796 | ext4_group_t block_group, | ||
| 1797 | struct ext4_group_desc *gdp); | ||
| 1754 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); | 1798 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); |
| 1755 | 1799 | ||
| 1756 | /* dir.c */ | 1800 | /* dir.c */ |
| @@ -1776,7 +1820,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct | |||
| 1776 | 1820 | ||
| 1777 | /* ialloc.c */ | 1821 | /* ialloc.c */ |
| 1778 | extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, | 1822 | extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, |
| 1779 | const struct qstr *qstr, __u32 goal); | 1823 | const struct qstr *qstr, __u32 goal, |
| 1824 | uid_t *owner); | ||
| 1780 | extern void ext4_free_inode(handle_t *, struct inode *); | 1825 | extern void ext4_free_inode(handle_t *, struct inode *); |
| 1781 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | 1826 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
| 1782 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1827 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
| @@ -1839,6 +1884,12 @@ extern int ext4_block_truncate_page(handle_t *handle, | |||
| 1839 | struct address_space *mapping, loff_t from); | 1884 | struct address_space *mapping, loff_t from); |
| 1840 | extern int ext4_block_zero_page_range(handle_t *handle, | 1885 | extern int ext4_block_zero_page_range(handle_t *handle, |
| 1841 | struct address_space *mapping, loff_t from, loff_t length); | 1886 | struct address_space *mapping, loff_t from, loff_t length); |
| 1887 | extern int ext4_discard_partial_page_buffers(handle_t *handle, | ||
| 1888 | struct address_space *mapping, loff_t from, | ||
| 1889 | loff_t length, int flags); | ||
| 1890 | extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
| 1891 | struct inode *inode, struct page *page, loff_t from, | ||
| 1892 | loff_t length, int flags); | ||
| 1842 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1893 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
| 1843 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1894 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
| 1844 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1895 | extern void ext4_da_update_reserve_space(struct inode *inode, |
| @@ -1927,8 +1978,8 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, | |||
| 1927 | struct ext4_group_desc *bg); | 1978 | struct ext4_group_desc *bg); |
| 1928 | extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, | 1979 | extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, |
| 1929 | struct ext4_group_desc *bg); | 1980 | struct ext4_group_desc *bg); |
| 1930 | extern __u32 ext4_free_blks_count(struct super_block *sb, | 1981 | extern __u32 ext4_free_group_clusters(struct super_block *sb, |
| 1931 | struct ext4_group_desc *bg); | 1982 | struct ext4_group_desc *bg); |
| 1932 | extern __u32 ext4_free_inodes_count(struct super_block *sb, | 1983 | extern __u32 ext4_free_inodes_count(struct super_block *sb, |
| 1933 | struct ext4_group_desc *bg); | 1984 | struct ext4_group_desc *bg); |
| 1934 | extern __u32 ext4_used_dirs_count(struct super_block *sb, | 1985 | extern __u32 ext4_used_dirs_count(struct super_block *sb, |
| @@ -1941,8 +1992,9 @@ extern void ext4_inode_bitmap_set(struct super_block *sb, | |||
| 1941 | struct ext4_group_desc *bg, ext4_fsblk_t blk); | 1992 | struct ext4_group_desc *bg, ext4_fsblk_t blk); |
| 1942 | extern void ext4_inode_table_set(struct super_block *sb, | 1993 | extern void ext4_inode_table_set(struct super_block *sb, |
| 1943 | struct ext4_group_desc *bg, ext4_fsblk_t blk); | 1994 | struct ext4_group_desc *bg, ext4_fsblk_t blk); |
| 1944 | extern void ext4_free_blks_set(struct super_block *sb, | 1995 | extern void ext4_free_group_clusters_set(struct super_block *sb, |
| 1945 | struct ext4_group_desc *bg, __u32 count); | 1996 | struct ext4_group_desc *bg, |
| 1997 | __u32 count); | ||
| 1946 | extern void ext4_free_inodes_set(struct super_block *sb, | 1998 | extern void ext4_free_inodes_set(struct super_block *sb, |
| 1947 | struct ext4_group_desc *bg, __u32 count); | 1999 | struct ext4_group_desc *bg, __u32 count); |
| 1948 | extern void ext4_used_dirs_set(struct super_block *sb, | 2000 | extern void ext4_used_dirs_set(struct super_block *sb, |
| @@ -2051,13 +2103,13 @@ do { \ | |||
| 2051 | } while (0) | 2103 | } while (0) |
| 2052 | 2104 | ||
| 2053 | #ifdef CONFIG_SMP | 2105 | #ifdef CONFIG_SMP |
| 2054 | /* Each CPU can accumulate percpu_counter_batch blocks in their local | 2106 | /* Each CPU can accumulate percpu_counter_batch clusters in their local |
| 2055 | * counters. So we need to make sure we have free blocks more | 2107 | * counters. So we need to make sure we have free clusters more |
| 2056 | * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. | 2108 | * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. |
| 2057 | */ | 2109 | */ |
| 2058 | #define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) | 2110 | #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) |
| 2059 | #else | 2111 | #else |
| 2060 | #define EXT4_FREEBLOCKS_WATERMARK 0 | 2112 | #define EXT4_FREECLUSTERS_WATERMARK 0 |
| 2061 | #endif | 2113 | #endif |
| 2062 | 2114 | ||
| 2063 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | 2115 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) |
| @@ -2243,10 +2295,19 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | |||
| 2243 | enum ext4_state_bits { | 2295 | enum ext4_state_bits { |
| 2244 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | 2296 | BH_Uninit /* blocks are allocated but uninitialized on disk */ |
| 2245 | = BH_JBDPrivateStart, | 2297 | = BH_JBDPrivateStart, |
| 2298 | BH_AllocFromCluster, /* allocated blocks were part of already | ||
| 2299 | * allocated cluster. Note that this flag will | ||
| 2300 | * never, ever appear in a buffer_head's state | ||
| 2301 | * flag. See EXT4_MAP_FROM_CLUSTER to see where | ||
| 2302 | * this is used. */ | ||
| 2303 | BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This | ||
| 2304 | * flag is set when ext4_map_blocks is called on a | ||
| 2305 | * delayed allocated block to get its real mapping. */ | ||
| 2246 | }; | 2306 | }; |
| 2247 | 2307 | ||
| 2248 | BUFFER_FNS(Uninit, uninit) | 2308 | BUFFER_FNS(Uninit, uninit) |
| 2249 | TAS_BUFFER_FNS(Uninit, uninit) | 2309 | TAS_BUFFER_FNS(Uninit, uninit) |
| 2310 | BUFFER_FNS(Da_Mapped, da_mapped) | ||
| 2250 | 2311 | ||
| 2251 | /* | 2312 | /* |
| 2252 | * Add new method to test wether block and inode bitmaps are properly | 2313 | * Add new method to test wether block and inode bitmaps are properly |
| @@ -2282,4 +2343,6 @@ extern void ext4_resize_end(struct super_block *sb); | |||
| 2282 | 2343 | ||
| 2283 | #endif /* __KERNEL__ */ | 2344 | #endif /* __KERNEL__ */ |
| 2284 | 2345 | ||
| 2346 | #include "ext4_extents.h" | ||
| 2347 | |||
| 2285 | #endif /* _EXT4_H */ | 2348 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 095c36f3b612..a52db3a69a30 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
| @@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | |||
| 290 | struct ext4_ext_path *); | 290 | struct ext4_ext_path *); |
| 291 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 291 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
| 292 | extern int ext4_ext_check_inode(struct inode *inode); | 292 | extern int ext4_ext_check_inode(struct inode *inode); |
| 293 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, | ||
| 294 | int search_hint_reverse); | ||
| 293 | #endif /* _EXT4_EXTENTS */ | 295 | #endif /* _EXT4_EXTENTS */ |
| 294 | 296 | ||
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index f5240aa15601..aca179017582 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
| @@ -109,9 +109,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
| 109 | 109 | ||
| 110 | if (ext4_handle_valid(handle)) { | 110 | if (ext4_handle_valid(handle)) { |
| 111 | err = jbd2_journal_dirty_metadata(handle, bh); | 111 | err = jbd2_journal_dirty_metadata(handle, bh); |
| 112 | if (err) | 112 | if (err) { |
| 113 | ext4_journal_abort_handle(where, line, __func__, | 113 | /* Errors can only happen if there is a bug */ |
| 114 | bh, handle, err); | 114 | handle->h_err = err; |
| 115 | __ext4_journal_stop(where, line, handle); | ||
| 116 | } | ||
| 115 | } else { | 117 | } else { |
| 116 | if (inode) | 118 | if (inode) |
| 117 | mark_buffer_dirty_inode(bh, inode); | 119 | mark_buffer_dirty_inode(bh, inode); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 57cf568a98ab..61fa9e1614af 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -42,7 +42,6 @@ | |||
| 42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
| 43 | #include <linux/fiemap.h> | 43 | #include <linux/fiemap.h> |
| 44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
| 45 | #include "ext4_extents.h" | ||
| 46 | 45 | ||
| 47 | #include <trace/events/ext4.h> | 46 | #include <trace/events/ext4.h> |
| 48 | 47 | ||
| @@ -96,13 +95,17 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, | |||
| 96 | * - ENOMEM | 95 | * - ENOMEM |
| 97 | * - EIO | 96 | * - EIO |
| 98 | */ | 97 | */ |
| 99 | static int ext4_ext_dirty(handle_t *handle, struct inode *inode, | 98 | #define ext4_ext_dirty(handle, inode, path) \ |
| 100 | struct ext4_ext_path *path) | 99 | __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path)) |
| 100 | static int __ext4_ext_dirty(const char *where, unsigned int line, | ||
| 101 | handle_t *handle, struct inode *inode, | ||
| 102 | struct ext4_ext_path *path) | ||
| 101 | { | 103 | { |
| 102 | int err; | 104 | int err; |
| 103 | if (path->p_bh) { | 105 | if (path->p_bh) { |
| 104 | /* path points to block */ | 106 | /* path points to block */ |
| 105 | err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); | 107 | err = __ext4_handle_dirty_metadata(where, line, handle, |
| 108 | inode, path->p_bh); | ||
| 106 | } else { | 109 | } else { |
| 107 | /* path points to leaf/index in inode body */ | 110 | /* path points to leaf/index in inode body */ |
| 108 | err = ext4_mark_inode_dirty(handle, inode); | 111 | err = ext4_mark_inode_dirty(handle, inode); |
| @@ -114,11 +117,9 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
| 114 | struct ext4_ext_path *path, | 117 | struct ext4_ext_path *path, |
| 115 | ext4_lblk_t block) | 118 | ext4_lblk_t block) |
| 116 | { | 119 | { |
| 117 | int depth; | ||
| 118 | |||
| 119 | if (path) { | 120 | if (path) { |
| 121 | int depth = path->p_depth; | ||
| 120 | struct ext4_extent *ex; | 122 | struct ext4_extent *ex; |
| 121 | depth = path->p_depth; | ||
| 122 | 123 | ||
| 123 | /* | 124 | /* |
| 124 | * Try to predict block placement assuming that we are | 125 | * Try to predict block placement assuming that we are |
| @@ -180,12 +181,10 @@ static inline int ext4_ext_space_block(struct inode *inode, int check) | |||
| 180 | 181 | ||
| 181 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 182 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
| 182 | / sizeof(struct ext4_extent); | 183 | / sizeof(struct ext4_extent); |
| 183 | if (!check) { | ||
| 184 | #ifdef AGGRESSIVE_TEST | 184 | #ifdef AGGRESSIVE_TEST |
| 185 | if (size > 6) | 185 | if (!check && size > 6) |
| 186 | size = 6; | 186 | size = 6; |
| 187 | #endif | 187 | #endif |
| 188 | } | ||
| 189 | return size; | 188 | return size; |
| 190 | } | 189 | } |
| 191 | 190 | ||
| @@ -195,12 +194,10 @@ static inline int ext4_ext_space_block_idx(struct inode *inode, int check) | |||
| 195 | 194 | ||
| 196 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 195 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
| 197 | / sizeof(struct ext4_extent_idx); | 196 | / sizeof(struct ext4_extent_idx); |
| 198 | if (!check) { | ||
| 199 | #ifdef AGGRESSIVE_TEST | 197 | #ifdef AGGRESSIVE_TEST |
| 200 | if (size > 5) | 198 | if (!check && size > 5) |
| 201 | size = 5; | 199 | size = 5; |
| 202 | #endif | 200 | #endif |
| 203 | } | ||
| 204 | return size; | 201 | return size; |
| 205 | } | 202 | } |
| 206 | 203 | ||
| @@ -211,12 +208,10 @@ static inline int ext4_ext_space_root(struct inode *inode, int check) | |||
| 211 | size = sizeof(EXT4_I(inode)->i_data); | 208 | size = sizeof(EXT4_I(inode)->i_data); |
| 212 | size -= sizeof(struct ext4_extent_header); | 209 | size -= sizeof(struct ext4_extent_header); |
| 213 | size /= sizeof(struct ext4_extent); | 210 | size /= sizeof(struct ext4_extent); |
| 214 | if (!check) { | ||
| 215 | #ifdef AGGRESSIVE_TEST | 211 | #ifdef AGGRESSIVE_TEST |
| 216 | if (size > 3) | 212 | if (!check && size > 3) |
| 217 | size = 3; | 213 | size = 3; |
| 218 | #endif | 214 | #endif |
| 219 | } | ||
| 220 | return size; | 215 | return size; |
| 221 | } | 216 | } |
| 222 | 217 | ||
| @@ -227,12 +222,10 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
| 227 | size = sizeof(EXT4_I(inode)->i_data); | 222 | size = sizeof(EXT4_I(inode)->i_data); |
| 228 | size -= sizeof(struct ext4_extent_header); | 223 | size -= sizeof(struct ext4_extent_header); |
| 229 | size /= sizeof(struct ext4_extent_idx); | 224 | size /= sizeof(struct ext4_extent_idx); |
| 230 | if (!check) { | ||
| 231 | #ifdef AGGRESSIVE_TEST | 225 | #ifdef AGGRESSIVE_TEST |
| 232 | if (size > 4) | 226 | if (!check && size > 4) |
| 233 | size = 4; | 227 | size = 4; |
| 234 | #endif | 228 | #endif |
| 235 | } | ||
| 236 | return size; | 229 | return size; |
| 237 | } | 230 | } |
| 238 | 231 | ||
| @@ -244,7 +237,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
| 244 | int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) | 237 | int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) |
| 245 | { | 238 | { |
| 246 | struct ext4_inode_info *ei = EXT4_I(inode); | 239 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 247 | int idxs, num = 0; | 240 | int idxs; |
| 248 | 241 | ||
| 249 | idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 242 | idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
| 250 | / sizeof(struct ext4_extent_idx)); | 243 | / sizeof(struct ext4_extent_idx)); |
| @@ -259,6 +252,8 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) | |||
| 259 | */ | 252 | */ |
| 260 | if (ei->i_da_metadata_calc_len && | 253 | if (ei->i_da_metadata_calc_len && |
| 261 | ei->i_da_metadata_calc_last_lblock+1 == lblock) { | 254 | ei->i_da_metadata_calc_last_lblock+1 == lblock) { |
| 255 | int num = 0; | ||
| 256 | |||
| 262 | if ((ei->i_da_metadata_calc_len % idxs) == 0) | 257 | if ((ei->i_da_metadata_calc_len % idxs) == 0) |
| 263 | num++; | 258 | num++; |
| 264 | if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) | 259 | if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) |
| @@ -321,8 +316,6 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
| 321 | struct ext4_extent_header *eh, | 316 | struct ext4_extent_header *eh, |
| 322 | int depth) | 317 | int depth) |
| 323 | { | 318 | { |
| 324 | struct ext4_extent *ext; | ||
| 325 | struct ext4_extent_idx *ext_idx; | ||
| 326 | unsigned short entries; | 319 | unsigned short entries; |
| 327 | if (eh->eh_entries == 0) | 320 | if (eh->eh_entries == 0) |
| 328 | return 1; | 321 | return 1; |
| @@ -331,7 +324,7 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
| 331 | 324 | ||
| 332 | if (depth == 0) { | 325 | if (depth == 0) { |
| 333 | /* leaf entries */ | 326 | /* leaf entries */ |
| 334 | ext = EXT_FIRST_EXTENT(eh); | 327 | struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); |
| 335 | while (entries) { | 328 | while (entries) { |
| 336 | if (!ext4_valid_extent(inode, ext)) | 329 | if (!ext4_valid_extent(inode, ext)) |
| 337 | return 0; | 330 | return 0; |
| @@ -339,7 +332,7 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
| 339 | entries--; | 332 | entries--; |
| 340 | } | 333 | } |
| 341 | } else { | 334 | } else { |
| 342 | ext_idx = EXT_FIRST_INDEX(eh); | 335 | struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); |
| 343 | while (entries) { | 336 | while (entries) { |
| 344 | if (!ext4_valid_extent_idx(inode, ext_idx)) | 337 | if (!ext4_valid_extent_idx(inode, ext_idx)) |
| 345 | return 0; | 338 | return 0; |
| @@ -751,31 +744,30 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | |||
| 751 | return -EIO; | 744 | return -EIO; |
| 752 | } | 745 | } |
| 753 | 746 | ||
| 754 | len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; | ||
| 755 | if (logical > le32_to_cpu(curp->p_idx->ei_block)) { | 747 | if (logical > le32_to_cpu(curp->p_idx->ei_block)) { |
| 756 | /* insert after */ | 748 | /* insert after */ |
| 757 | if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { | 749 | ext_debug("insert new index %d after: %llu\n", logical, ptr); |
| 758 | len = (len - 1) * sizeof(struct ext4_extent_idx); | ||
| 759 | len = len < 0 ? 0 : len; | ||
| 760 | ext_debug("insert new index %d after: %llu. " | ||
| 761 | "move %d from 0x%p to 0x%p\n", | ||
| 762 | logical, ptr, len, | ||
| 763 | (curp->p_idx + 1), (curp->p_idx + 2)); | ||
| 764 | memmove(curp->p_idx + 2, curp->p_idx + 1, len); | ||
| 765 | } | ||
| 766 | ix = curp->p_idx + 1; | 750 | ix = curp->p_idx + 1; |
| 767 | } else { | 751 | } else { |
| 768 | /* insert before */ | 752 | /* insert before */ |
| 769 | len = len * sizeof(struct ext4_extent_idx); | 753 | ext_debug("insert new index %d before: %llu\n", logical, ptr); |
| 770 | len = len < 0 ? 0 : len; | ||
| 771 | ext_debug("insert new index %d before: %llu. " | ||
| 772 | "move %d from 0x%p to 0x%p\n", | ||
| 773 | logical, ptr, len, | ||
| 774 | curp->p_idx, (curp->p_idx + 1)); | ||
| 775 | memmove(curp->p_idx + 1, curp->p_idx, len); | ||
| 776 | ix = curp->p_idx; | 754 | ix = curp->p_idx; |
| 777 | } | 755 | } |
| 778 | 756 | ||
| 757 | len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; | ||
| 758 | BUG_ON(len < 0); | ||
| 759 | if (len > 0) { | ||
| 760 | ext_debug("insert new index %d: " | ||
| 761 | "move %d indices from 0x%p to 0x%p\n", | ||
| 762 | logical, len, ix, ix + 1); | ||
| 763 | memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); | ||
| 764 | } | ||
| 765 | |||
| 766 | if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { | ||
| 767 | EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); | ||
| 768 | return -EIO; | ||
| 769 | } | ||
| 770 | |||
| 779 | ix->ei_block = cpu_to_le32(logical); | 771 | ix->ei_block = cpu_to_le32(logical); |
| 780 | ext4_idx_store_pblock(ix, ptr); | 772 | ext4_idx_store_pblock(ix, ptr); |
| 781 | le16_add_cpu(&curp->p_hdr->eh_entries, 1); | 773 | le16_add_cpu(&curp->p_hdr->eh_entries, 1); |
| @@ -1042,16 +1034,14 @@ cleanup: | |||
| 1042 | */ | 1034 | */ |
| 1043 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | 1035 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, |
| 1044 | unsigned int flags, | 1036 | unsigned int flags, |
| 1045 | struct ext4_ext_path *path, | ||
| 1046 | struct ext4_extent *newext) | 1037 | struct ext4_extent *newext) |
| 1047 | { | 1038 | { |
| 1048 | struct ext4_ext_path *curp = path; | ||
| 1049 | struct ext4_extent_header *neh; | 1039 | struct ext4_extent_header *neh; |
| 1050 | struct buffer_head *bh; | 1040 | struct buffer_head *bh; |
| 1051 | ext4_fsblk_t newblock; | 1041 | ext4_fsblk_t newblock; |
| 1052 | int err = 0; | 1042 | int err = 0; |
| 1053 | 1043 | ||
| 1054 | newblock = ext4_ext_new_meta_block(handle, inode, path, | 1044 | newblock = ext4_ext_new_meta_block(handle, inode, NULL, |
| 1055 | newext, &err, flags); | 1045 | newext, &err, flags); |
| 1056 | if (newblock == 0) | 1046 | if (newblock == 0) |
| 1057 | return err; | 1047 | return err; |
| @@ -1071,7 +1061,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
| 1071 | } | 1061 | } |
| 1072 | 1062 | ||
| 1073 | /* move top-level index/leaf into new block */ | 1063 | /* move top-level index/leaf into new block */ |
| 1074 | memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); | 1064 | memmove(bh->b_data, EXT4_I(inode)->i_data, |
| 1065 | sizeof(EXT4_I(inode)->i_data)); | ||
| 1075 | 1066 | ||
| 1076 | /* set size of new block */ | 1067 | /* set size of new block */ |
| 1077 | neh = ext_block_hdr(bh); | 1068 | neh = ext_block_hdr(bh); |
| @@ -1089,32 +1080,23 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
| 1089 | if (err) | 1080 | if (err) |
| 1090 | goto out; | 1081 | goto out; |
| 1091 | 1082 | ||
| 1092 | /* create index in new top-level index: num,max,pointer */ | 1083 | /* Update top-level index: num,max,pointer */ |
| 1093 | err = ext4_ext_get_access(handle, inode, curp); | ||
| 1094 | if (err) | ||
| 1095 | goto out; | ||
| 1096 | |||
| 1097 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; | ||
| 1098 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); | ||
| 1099 | curp->p_hdr->eh_entries = cpu_to_le16(1); | ||
| 1100 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); | ||
| 1101 | |||
| 1102 | if (path[0].p_hdr->eh_depth) | ||
| 1103 | curp->p_idx->ei_block = | ||
| 1104 | EXT_FIRST_INDEX(path[0].p_hdr)->ei_block; | ||
| 1105 | else | ||
| 1106 | curp->p_idx->ei_block = | ||
| 1107 | EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; | ||
| 1108 | ext4_idx_store_pblock(curp->p_idx, newblock); | ||
| 1109 | |||
| 1110 | neh = ext_inode_hdr(inode); | 1084 | neh = ext_inode_hdr(inode); |
| 1085 | neh->eh_entries = cpu_to_le16(1); | ||
| 1086 | ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock); | ||
| 1087 | if (neh->eh_depth == 0) { | ||
| 1088 | /* Root extent block becomes index block */ | ||
| 1089 | neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); | ||
| 1090 | EXT_FIRST_INDEX(neh)->ei_block = | ||
| 1091 | EXT_FIRST_EXTENT(neh)->ee_block; | ||
| 1092 | } | ||
| 1111 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", | 1093 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", |
| 1112 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), | 1094 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), |
| 1113 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), | 1095 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
| 1114 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); | 1096 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); |
| 1115 | 1097 | ||
| 1116 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); | 1098 | neh->eh_depth = cpu_to_le16(neh->eh_depth + 1); |
| 1117 | err = ext4_ext_dirty(handle, inode, curp); | 1099 | ext4_mark_inode_dirty(handle, inode); |
| 1118 | out: | 1100 | out: |
| 1119 | brelse(bh); | 1101 | brelse(bh); |
| 1120 | 1102 | ||
| @@ -1162,8 +1144,7 @@ repeat: | |||
| 1162 | err = PTR_ERR(path); | 1144 | err = PTR_ERR(path); |
| 1163 | } else { | 1145 | } else { |
| 1164 | /* tree is full, time to grow in depth */ | 1146 | /* tree is full, time to grow in depth */ |
| 1165 | err = ext4_ext_grow_indepth(handle, inode, flags, | 1147 | err = ext4_ext_grow_indepth(handle, inode, flags, newext); |
| 1166 | path, newext); | ||
| 1167 | if (err) | 1148 | if (err) |
| 1168 | goto out; | 1149 | goto out; |
| 1169 | 1150 | ||
| @@ -1235,9 +1216,9 @@ static int ext4_ext_search_left(struct inode *inode, | |||
| 1235 | if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { | 1216 | if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { |
| 1236 | EXT4_ERROR_INODE(inode, | 1217 | EXT4_ERROR_INODE(inode, |
| 1237 | "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", | 1218 | "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", |
| 1238 | ix != NULL ? ix->ei_block : 0, | 1219 | ix != NULL ? le32_to_cpu(ix->ei_block) : 0, |
| 1239 | EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? | 1220 | EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? |
| 1240 | EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, | 1221 | le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, |
| 1241 | depth); | 1222 | depth); |
| 1242 | return -EIO; | 1223 | return -EIO; |
| 1243 | } | 1224 | } |
| @@ -1260,13 +1241,14 @@ static int ext4_ext_search_left(struct inode *inode, | |||
| 1260 | /* | 1241 | /* |
| 1261 | * search the closest allocated block to the right for *logical | 1242 | * search the closest allocated block to the right for *logical |
| 1262 | * and returns it at @logical + it's physical address at @phys | 1243 | * and returns it at @logical + it's physical address at @phys |
| 1263 | * if *logical is the smallest allocated block, the function | 1244 | * if *logical is the largest allocated block, the function |
| 1264 | * returns 0 at @phys | 1245 | * returns 0 at @phys |
| 1265 | * return value contains 0 (success) or error code | 1246 | * return value contains 0 (success) or error code |
| 1266 | */ | 1247 | */ |
| 1267 | static int ext4_ext_search_right(struct inode *inode, | 1248 | static int ext4_ext_search_right(struct inode *inode, |
| 1268 | struct ext4_ext_path *path, | 1249 | struct ext4_ext_path *path, |
| 1269 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1250 | ext4_lblk_t *logical, ext4_fsblk_t *phys, |
| 1251 | struct ext4_extent **ret_ex) | ||
| 1270 | { | 1252 | { |
| 1271 | struct buffer_head *bh = NULL; | 1253 | struct buffer_head *bh = NULL; |
| 1272 | struct ext4_extent_header *eh; | 1254 | struct ext4_extent_header *eh; |
| @@ -1308,9 +1290,7 @@ static int ext4_ext_search_right(struct inode *inode, | |||
| 1308 | return -EIO; | 1290 | return -EIO; |
| 1309 | } | 1291 | } |
| 1310 | } | 1292 | } |
| 1311 | *logical = le32_to_cpu(ex->ee_block); | 1293 | goto found_extent; |
| 1312 | *phys = ext4_ext_pblock(ex); | ||
| 1313 | return 0; | ||
| 1314 | } | 1294 | } |
| 1315 | 1295 | ||
| 1316 | if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { | 1296 | if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { |
| @@ -1323,9 +1303,7 @@ static int ext4_ext_search_right(struct inode *inode, | |||
| 1323 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { | 1303 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { |
| 1324 | /* next allocated block in this leaf */ | 1304 | /* next allocated block in this leaf */ |
| 1325 | ex++; | 1305 | ex++; |
| 1326 | *logical = le32_to_cpu(ex->ee_block); | 1306 | goto found_extent; |
| 1327 | *phys = ext4_ext_pblock(ex); | ||
| 1328 | return 0; | ||
| 1329 | } | 1307 | } |
| 1330 | 1308 | ||
| 1331 | /* go up and search for index to the right */ | 1309 | /* go up and search for index to the right */ |
| @@ -1368,9 +1346,12 @@ got_index: | |||
| 1368 | return -EIO; | 1346 | return -EIO; |
| 1369 | } | 1347 | } |
| 1370 | ex = EXT_FIRST_EXTENT(eh); | 1348 | ex = EXT_FIRST_EXTENT(eh); |
| 1349 | found_extent: | ||
| 1371 | *logical = le32_to_cpu(ex->ee_block); | 1350 | *logical = le32_to_cpu(ex->ee_block); |
| 1372 | *phys = ext4_ext_pblock(ex); | 1351 | *phys = ext4_ext_pblock(ex); |
| 1373 | put_bh(bh); | 1352 | *ret_ex = ex; |
| 1353 | if (bh) | ||
| 1354 | put_bh(bh); | ||
| 1374 | return 0; | 1355 | return 0; |
| 1375 | } | 1356 | } |
| 1376 | 1357 | ||
| @@ -1395,7 +1376,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
| 1395 | while (depth >= 0) { | 1376 | while (depth >= 0) { |
| 1396 | if (depth == path->p_depth) { | 1377 | if (depth == path->p_depth) { |
| 1397 | /* leaf */ | 1378 | /* leaf */ |
| 1398 | if (path[depth].p_ext != | 1379 | if (path[depth].p_ext && |
| 1380 | path[depth].p_ext != | ||
| 1399 | EXT_LAST_EXTENT(path[depth].p_hdr)) | 1381 | EXT_LAST_EXTENT(path[depth].p_hdr)) |
| 1400 | return le32_to_cpu(path[depth].p_ext[1].ee_block); | 1382 | return le32_to_cpu(path[depth].p_ext[1].ee_block); |
| 1401 | } else { | 1383 | } else { |
| @@ -1623,7 +1605,8 @@ static int ext4_ext_try_to_merge(struct inode *inode, | |||
| 1623 | * such that there will be no overlap, and then returns 1. | 1605 | * such that there will be no overlap, and then returns 1. |
| 1624 | * If there is no overlap found, it returns 0. | 1606 | * If there is no overlap found, it returns 0. |
| 1625 | */ | 1607 | */ |
| 1626 | static unsigned int ext4_ext_check_overlap(struct inode *inode, | 1608 | static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, |
| 1609 | struct inode *inode, | ||
| 1627 | struct ext4_extent *newext, | 1610 | struct ext4_extent *newext, |
| 1628 | struct ext4_ext_path *path) | 1611 | struct ext4_ext_path *path) |
| 1629 | { | 1612 | { |
| @@ -1637,6 +1620,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
| 1637 | if (!path[depth].p_ext) | 1620 | if (!path[depth].p_ext) |
| 1638 | goto out; | 1621 | goto out; |
| 1639 | b2 = le32_to_cpu(path[depth].p_ext->ee_block); | 1622 | b2 = le32_to_cpu(path[depth].p_ext->ee_block); |
| 1623 | b2 &= ~(sbi->s_cluster_ratio - 1); | ||
| 1640 | 1624 | ||
| 1641 | /* | 1625 | /* |
| 1642 | * get the next allocated block if the extent in the path | 1626 | * get the next allocated block if the extent in the path |
| @@ -1646,6 +1630,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
| 1646 | b2 = ext4_ext_next_allocated_block(path); | 1630 | b2 = ext4_ext_next_allocated_block(path); |
| 1647 | if (b2 == EXT_MAX_BLOCKS) | 1631 | if (b2 == EXT_MAX_BLOCKS) |
| 1648 | goto out; | 1632 | goto out; |
| 1633 | b2 &= ~(sbi->s_cluster_ratio - 1); | ||
| 1649 | } | 1634 | } |
| 1650 | 1635 | ||
| 1651 | /* check for wrap through zero on extent logical start block*/ | 1636 | /* check for wrap through zero on extent logical start block*/ |
| @@ -1697,7 +1682,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
| 1697 | /* try to insert block into found extent and return */ | 1682 | /* try to insert block into found extent and return */ |
| 1698 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) | 1683 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) |
| 1699 | && ext4_can_extents_be_merged(inode, ex, newext)) { | 1684 | && ext4_can_extents_be_merged(inode, ex, newext)) { |
| 1700 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", | 1685 | ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n", |
| 1701 | ext4_ext_is_uninitialized(newext), | 1686 | ext4_ext_is_uninitialized(newext), |
| 1702 | ext4_ext_get_actual_len(newext), | 1687 | ext4_ext_get_actual_len(newext), |
| 1703 | le32_to_cpu(ex->ee_block), | 1688 | le32_to_cpu(ex->ee_block), |
| @@ -1735,7 +1720,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
| 1735 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) | 1720 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) |
| 1736 | next = ext4_ext_next_leaf_block(path); | 1721 | next = ext4_ext_next_leaf_block(path); |
| 1737 | if (next != EXT_MAX_BLOCKS) { | 1722 | if (next != EXT_MAX_BLOCKS) { |
| 1738 | ext_debug("next leaf block - %d\n", next); | 1723 | ext_debug("next leaf block - %u\n", next); |
| 1739 | BUG_ON(npath != NULL); | 1724 | BUG_ON(npath != NULL); |
| 1740 | npath = ext4_ext_find_extent(inode, next, NULL); | 1725 | npath = ext4_ext_find_extent(inode, next, NULL); |
| 1741 | if (IS_ERR(npath)) | 1726 | if (IS_ERR(npath)) |
| @@ -1773,46 +1758,51 @@ has_space: | |||
| 1773 | 1758 | ||
| 1774 | if (!nearex) { | 1759 | if (!nearex) { |
| 1775 | /* there is no extent in this leaf, create first one */ | 1760 | /* there is no extent in this leaf, create first one */ |
| 1776 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", | 1761 | ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", |
| 1777 | le32_to_cpu(newext->ee_block), | 1762 | le32_to_cpu(newext->ee_block), |
| 1778 | ext4_ext_pblock(newext), | 1763 | ext4_ext_pblock(newext), |
| 1779 | ext4_ext_is_uninitialized(newext), | 1764 | ext4_ext_is_uninitialized(newext), |
| 1780 | ext4_ext_get_actual_len(newext)); | 1765 | ext4_ext_get_actual_len(newext)); |
| 1781 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1766 | nearex = EXT_FIRST_EXTENT(eh); |
| 1782 | } else if (le32_to_cpu(newext->ee_block) | 1767 | } else { |
| 1768 | if (le32_to_cpu(newext->ee_block) | ||
| 1783 | > le32_to_cpu(nearex->ee_block)) { | 1769 | > le32_to_cpu(nearex->ee_block)) { |
| 1784 | /* BUG_ON(newext->ee_block == nearex->ee_block); */ | 1770 | /* Insert after */ |
| 1785 | if (nearex != EXT_LAST_EXTENT(eh)) { | 1771 | ext_debug("insert %u:%llu:[%d]%d before: " |
| 1786 | len = EXT_MAX_EXTENT(eh) - nearex; | 1772 | "nearest %p\n", |
| 1787 | len = (len - 1) * sizeof(struct ext4_extent); | ||
| 1788 | len = len < 0 ? 0 : len; | ||
| 1789 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " | ||
| 1790 | "move %d from 0x%p to 0x%p\n", | ||
| 1791 | le32_to_cpu(newext->ee_block), | 1773 | le32_to_cpu(newext->ee_block), |
| 1792 | ext4_ext_pblock(newext), | 1774 | ext4_ext_pblock(newext), |
| 1793 | ext4_ext_is_uninitialized(newext), | 1775 | ext4_ext_is_uninitialized(newext), |
| 1794 | ext4_ext_get_actual_len(newext), | 1776 | ext4_ext_get_actual_len(newext), |
| 1795 | nearex, len, nearex + 1, nearex + 2); | 1777 | nearex); |
| 1796 | memmove(nearex + 2, nearex + 1, len); | 1778 | nearex++; |
| 1779 | } else { | ||
| 1780 | /* Insert before */ | ||
| 1781 | BUG_ON(newext->ee_block == nearex->ee_block); | ||
| 1782 | ext_debug("insert %u:%llu:[%d]%d after: " | ||
| 1783 | "nearest %p\n", | ||
| 1784 | le32_to_cpu(newext->ee_block), | ||
| 1785 | ext4_ext_pblock(newext), | ||
| 1786 | ext4_ext_is_uninitialized(newext), | ||
| 1787 | ext4_ext_get_actual_len(newext), | ||
| 1788 | nearex); | ||
| 1789 | } | ||
| 1790 | len = EXT_LAST_EXTENT(eh) - nearex + 1; | ||
| 1791 | if (len > 0) { | ||
| 1792 | ext_debug("insert %u:%llu:[%d]%d: " | ||
| 1793 | "move %d extents from 0x%p to 0x%p\n", | ||
| 1794 | le32_to_cpu(newext->ee_block), | ||
| 1795 | ext4_ext_pblock(newext), | ||
| 1796 | ext4_ext_is_uninitialized(newext), | ||
| 1797 | ext4_ext_get_actual_len(newext), | ||
| 1798 | len, nearex, nearex + 1); | ||
| 1799 | memmove(nearex + 1, nearex, | ||
| 1800 | len * sizeof(struct ext4_extent)); | ||
| 1797 | } | 1801 | } |
| 1798 | path[depth].p_ext = nearex + 1; | ||
| 1799 | } else { | ||
| 1800 | BUG_ON(newext->ee_block == nearex->ee_block); | ||
| 1801 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); | ||
| 1802 | len = len < 0 ? 0 : len; | ||
| 1803 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " | ||
| 1804 | "move %d from 0x%p to 0x%p\n", | ||
| 1805 | le32_to_cpu(newext->ee_block), | ||
| 1806 | ext4_ext_pblock(newext), | ||
| 1807 | ext4_ext_is_uninitialized(newext), | ||
| 1808 | ext4_ext_get_actual_len(newext), | ||
| 1809 | nearex, len, nearex, nearex + 1); | ||
| 1810 | memmove(nearex + 1, nearex, len); | ||
| 1811 | path[depth].p_ext = nearex; | ||
| 1812 | } | 1802 | } |
| 1813 | 1803 | ||
| 1814 | le16_add_cpu(&eh->eh_entries, 1); | 1804 | le16_add_cpu(&eh->eh_entries, 1); |
| 1815 | nearex = path[depth].p_ext; | 1805 | path[depth].p_ext = nearex; |
| 1816 | nearex->ee_block = newext->ee_block; | 1806 | nearex->ee_block = newext->ee_block; |
| 1817 | ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); | 1807 | ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); |
| 1818 | nearex->ee_len = newext->ee_len; | 1808 | nearex->ee_len = newext->ee_len; |
| @@ -1962,6 +1952,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, | |||
| 1962 | struct ext4_ext_cache *cex; | 1952 | struct ext4_ext_cache *cex; |
| 1963 | BUG_ON(len == 0); | 1953 | BUG_ON(len == 0); |
| 1964 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1954 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1955 | trace_ext4_ext_put_in_cache(inode, block, len, start); | ||
| 1965 | cex = &EXT4_I(inode)->i_cached_extent; | 1956 | cex = &EXT4_I(inode)->i_cached_extent; |
| 1966 | cex->ec_block = block; | 1957 | cex->ec_block = block; |
| 1967 | cex->ec_len = len; | 1958 | cex->ec_len = len; |
| @@ -2063,6 +2054,7 @@ errout: | |||
| 2063 | sbi->extent_cache_misses++; | 2054 | sbi->extent_cache_misses++; |
| 2064 | else | 2055 | else |
| 2065 | sbi->extent_cache_hits++; | 2056 | sbi->extent_cache_hits++; |
| 2057 | trace_ext4_ext_in_cache(inode, block, ret); | ||
| 2066 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 2058 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 2067 | return ret; | 2059 | return ret; |
| 2068 | } | 2060 | } |
| @@ -2130,6 +2122,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
| 2130 | if (err) | 2122 | if (err) |
| 2131 | return err; | 2123 | return err; |
| 2132 | ext_debug("index is empty, remove it, free block %llu\n", leaf); | 2124 | ext_debug("index is empty, remove it, free block %llu\n", leaf); |
| 2125 | trace_ext4_ext_rm_idx(inode, leaf); | ||
| 2126 | |||
| 2133 | ext4_free_blocks(handle, inode, NULL, leaf, 1, | 2127 | ext4_free_blocks(handle, inode, NULL, leaf, 1, |
| 2134 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 2128 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
| 2135 | return err; | 2129 | return err; |
| @@ -2158,7 +2152,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, | |||
| 2158 | * need to account for leaf block credit | 2152 | * need to account for leaf block credit |
| 2159 | * | 2153 | * |
| 2160 | * bitmaps and block group descriptor blocks | 2154 | * bitmaps and block group descriptor blocks |
| 2161 | * and other metadat blocks still need to be | 2155 | * and other metadata blocks still need to be |
| 2162 | * accounted. | 2156 | * accounted. |
| 2163 | */ | 2157 | */ |
| 2164 | /* 1 bitmap, 1 block group descriptor */ | 2158 | /* 1 bitmap, 1 block group descriptor */ |
| @@ -2195,14 +2189,40 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
| 2195 | } | 2189 | } |
| 2196 | 2190 | ||
| 2197 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 2191 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
| 2198 | struct ext4_extent *ex, | 2192 | struct ext4_extent *ex, |
| 2199 | ext4_lblk_t from, ext4_lblk_t to) | 2193 | ext4_fsblk_t *partial_cluster, |
| 2194 | ext4_lblk_t from, ext4_lblk_t to) | ||
| 2200 | { | 2195 | { |
| 2196 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 2201 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2197 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
| 2198 | ext4_fsblk_t pblk; | ||
| 2202 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2199 | int flags = EXT4_FREE_BLOCKS_FORGET; |
| 2203 | 2200 | ||
| 2204 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 2201 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
| 2205 | flags |= EXT4_FREE_BLOCKS_METADATA; | 2202 | flags |= EXT4_FREE_BLOCKS_METADATA; |
| 2203 | /* | ||
| 2204 | * For bigalloc file systems, we never free a partial cluster | ||
| 2205 | * at the beginning of the extent. Instead, we make a note | ||
| 2206 | * that we tried freeing the cluster, and check to see if we | ||
| 2207 | * need to free it on a subsequent call to ext4_remove_blocks, | ||
| 2208 | * or at the end of the ext4_truncate() operation. | ||
| 2209 | */ | ||
| 2210 | flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; | ||
| 2211 | |||
| 2212 | trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster); | ||
| 2213 | /* | ||
| 2214 | * If we have a partial cluster, and it's different from the | ||
| 2215 | * cluster of the last block, we need to explicitly free the | ||
| 2216 | * partial cluster here. | ||
| 2217 | */ | ||
| 2218 | pblk = ext4_ext_pblock(ex) + ee_len - 1; | ||
| 2219 | if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | ||
| 2220 | ext4_free_blocks(handle, inode, NULL, | ||
| 2221 | EXT4_C2B(sbi, *partial_cluster), | ||
| 2222 | sbi->s_cluster_ratio, flags); | ||
| 2223 | *partial_cluster = 0; | ||
| 2224 | } | ||
| 2225 | |||
| 2206 | #ifdef EXTENTS_STATS | 2226 | #ifdef EXTENTS_STATS |
| 2207 | { | 2227 | { |
| 2208 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2228 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| @@ -2222,12 +2242,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
| 2222 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2242 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
| 2223 | /* tail removal */ | 2243 | /* tail removal */ |
| 2224 | ext4_lblk_t num; | 2244 | ext4_lblk_t num; |
| 2225 | ext4_fsblk_t start; | ||
| 2226 | 2245 | ||
| 2227 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2246 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
| 2228 | start = ext4_ext_pblock(ex) + ee_len - num; | 2247 | pblk = ext4_ext_pblock(ex) + ee_len - num; |
| 2229 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2248 | ext_debug("free last %u blocks starting %llu\n", num, pblk); |
| 2230 | ext4_free_blocks(handle, inode, NULL, start, num, flags); | 2249 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); |
| 2250 | /* | ||
| 2251 | * If the block range to be freed didn't start at the | ||
| 2252 | * beginning of a cluster, and we removed the entire | ||
| 2253 | * extent, save the partial cluster here, since we | ||
| 2254 | * might need to delete if we determine that the | ||
| 2255 | * truncate operation has removed all of the blocks in | ||
| 2256 | * the cluster. | ||
| 2257 | */ | ||
| 2258 | if (pblk & (sbi->s_cluster_ratio - 1) && | ||
| 2259 | (ee_len == num)) | ||
| 2260 | *partial_cluster = EXT4_B2C(sbi, pblk); | ||
| 2261 | else | ||
| 2262 | *partial_cluster = 0; | ||
| 2231 | } else if (from == le32_to_cpu(ex->ee_block) | 2263 | } else if (from == le32_to_cpu(ex->ee_block) |
| 2232 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2264 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
| 2233 | /* head removal */ | 2265 | /* head removal */ |
| @@ -2238,7 +2270,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
| 2238 | start = ext4_ext_pblock(ex); | 2270 | start = ext4_ext_pblock(ex); |
| 2239 | 2271 | ||
| 2240 | ext_debug("free first %u blocks starting %llu\n", num, start); | 2272 | ext_debug("free first %u blocks starting %llu\n", num, start); |
| 2241 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2273 | ext4_free_blocks(handle, inode, NULL, start, num, flags); |
| 2242 | 2274 | ||
| 2243 | } else { | 2275 | } else { |
| 2244 | printk(KERN_INFO "strange request: removal(2) " | 2276 | printk(KERN_INFO "strange request: removal(2) " |
| @@ -2262,19 +2294,19 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
| 2262 | */ | 2294 | */ |
| 2263 | static int | 2295 | static int |
| 2264 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 2296 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
| 2265 | struct ext4_ext_path *path, ext4_lblk_t start, | 2297 | struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, |
| 2266 | ext4_lblk_t end) | 2298 | ext4_lblk_t start, ext4_lblk_t end) |
| 2267 | { | 2299 | { |
| 2300 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 2268 | int err = 0, correct_index = 0; | 2301 | int err = 0, correct_index = 0; |
| 2269 | int depth = ext_depth(inode), credits; | 2302 | int depth = ext_depth(inode), credits; |
| 2270 | struct ext4_extent_header *eh; | 2303 | struct ext4_extent_header *eh; |
| 2271 | ext4_lblk_t a, b, block; | 2304 | ext4_lblk_t a, b; |
| 2272 | unsigned num; | 2305 | unsigned num; |
| 2273 | ext4_lblk_t ex_ee_block; | 2306 | ext4_lblk_t ex_ee_block; |
| 2274 | unsigned short ex_ee_len; | 2307 | unsigned short ex_ee_len; |
| 2275 | unsigned uninitialized = 0; | 2308 | unsigned uninitialized = 0; |
| 2276 | struct ext4_extent *ex; | 2309 | struct ext4_extent *ex; |
| 2277 | struct ext4_map_blocks map; | ||
| 2278 | 2310 | ||
| 2279 | /* the header must be checked already in ext4_ext_remove_space() */ | 2311 | /* the header must be checked already in ext4_ext_remove_space() */ |
| 2280 | ext_debug("truncate since %u in leaf\n", start); | 2312 | ext_debug("truncate since %u in leaf\n", start); |
| @@ -2291,6 +2323,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2291 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2323 | ex_ee_block = le32_to_cpu(ex->ee_block); |
| 2292 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2324 | ex_ee_len = ext4_ext_get_actual_len(ex); |
| 2293 | 2325 | ||
| 2326 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); | ||
| 2327 | |||
| 2294 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2328 | while (ex >= EXT_FIRST_EXTENT(eh) && |
| 2295 | ex_ee_block + ex_ee_len > start) { | 2329 | ex_ee_block + ex_ee_len > start) { |
| 2296 | 2330 | ||
| @@ -2315,86 +2349,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2315 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2349 | ex_ee_block = le32_to_cpu(ex->ee_block); |
| 2316 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2350 | ex_ee_len = ext4_ext_get_actual_len(ex); |
| 2317 | continue; | 2351 | continue; |
| 2318 | } else if (a != ex_ee_block && | 2352 | } else if (b != ex_ee_block + ex_ee_len - 1) { |
| 2319 | b != ex_ee_block + ex_ee_len - 1) { | 2353 | EXT4_ERROR_INODE(inode," bad truncate %u:%u\n", |
| 2320 | /* | 2354 | start, end); |
| 2321 | * If this is a truncate, then this condition should | 2355 | err = -EIO; |
| 2322 | * never happen because at least one of the end points | 2356 | goto out; |
| 2323 | * needs to be on the edge of the extent. | ||
| 2324 | */ | ||
| 2325 | if (end == EXT_MAX_BLOCKS - 1) { | ||
| 2326 | ext_debug(" bad truncate %u:%u\n", | ||
| 2327 | start, end); | ||
| 2328 | block = 0; | ||
| 2329 | num = 0; | ||
| 2330 | err = -EIO; | ||
| 2331 | goto out; | ||
| 2332 | } | ||
| 2333 | /* | ||
| 2334 | * else this is a hole punch, so the extent needs to | ||
| 2335 | * be split since neither edge of the hole is on the | ||
| 2336 | * extent edge | ||
| 2337 | */ | ||
| 2338 | else{ | ||
| 2339 | map.m_pblk = ext4_ext_pblock(ex); | ||
| 2340 | map.m_lblk = ex_ee_block; | ||
| 2341 | map.m_len = b - ex_ee_block; | ||
| 2342 | |||
| 2343 | err = ext4_split_extent(handle, | ||
| 2344 | inode, path, &map, 0, | ||
| 2345 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT | | ||
| 2346 | EXT4_GET_BLOCKS_PRE_IO); | ||
| 2347 | |||
| 2348 | if (err < 0) | ||
| 2349 | goto out; | ||
| 2350 | |||
| 2351 | ex_ee_len = ext4_ext_get_actual_len(ex); | ||
| 2352 | |||
| 2353 | b = ex_ee_block+ex_ee_len - 1 < end ? | ||
| 2354 | ex_ee_block+ex_ee_len - 1 : end; | ||
| 2355 | |||
| 2356 | /* Then remove tail of this extent */ | ||
| 2357 | block = ex_ee_block; | ||
| 2358 | num = a - block; | ||
| 2359 | } | ||
| 2360 | } else if (a != ex_ee_block) { | 2357 | } else if (a != ex_ee_block) { |
| 2361 | /* remove tail of the extent */ | 2358 | /* remove tail of the extent */ |
| 2362 | block = ex_ee_block; | 2359 | num = a - ex_ee_block; |
| 2363 | num = a - block; | ||
| 2364 | } else if (b != ex_ee_block + ex_ee_len - 1) { | ||
| 2365 | /* remove head of the extent */ | ||
| 2366 | block = b; | ||
| 2367 | num = ex_ee_block + ex_ee_len - b; | ||
| 2368 | |||
| 2369 | /* | ||
| 2370 | * If this is a truncate, this condition | ||
| 2371 | * should never happen | ||
| 2372 | */ | ||
| 2373 | if (end == EXT_MAX_BLOCKS - 1) { | ||
| 2374 | ext_debug(" bad truncate %u:%u\n", | ||
| 2375 | start, end); | ||
| 2376 | err = -EIO; | ||
| 2377 | goto out; | ||
| 2378 | } | ||
| 2379 | } else { | 2360 | } else { |
| 2380 | /* remove whole extent: excellent! */ | 2361 | /* remove whole extent: excellent! */ |
| 2381 | block = ex_ee_block; | ||
| 2382 | num = 0; | 2362 | num = 0; |
| 2383 | if (a != ex_ee_block) { | ||
| 2384 | ext_debug(" bad truncate %u:%u\n", | ||
| 2385 | start, end); | ||
| 2386 | err = -EIO; | ||
| 2387 | goto out; | ||
| 2388 | } | ||
| 2389 | |||
| 2390 | if (b != ex_ee_block + ex_ee_len - 1) { | ||
| 2391 | ext_debug(" bad truncate %u:%u\n", | ||
| 2392 | start, end); | ||
| 2393 | err = -EIO; | ||
| 2394 | goto out; | ||
| 2395 | } | ||
| 2396 | } | 2363 | } |
| 2397 | |||
| 2398 | /* | 2364 | /* |
| 2399 | * 3 for leaf, sb, and inode plus 2 (bmap and group | 2365 | * 3 for leaf, sb, and inode plus 2 (bmap and group |
| 2400 | * descriptor) for each block group; assume two block | 2366 | * descriptor) for each block group; assume two block |
| @@ -2416,23 +2382,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2416 | if (err) | 2382 | if (err) |
| 2417 | goto out; | 2383 | goto out; |
| 2418 | 2384 | ||
| 2419 | err = ext4_remove_blocks(handle, inode, ex, a, b); | 2385 | err = ext4_remove_blocks(handle, inode, ex, partial_cluster, |
| 2386 | a, b); | ||
| 2420 | if (err) | 2387 | if (err) |
| 2421 | goto out; | 2388 | goto out; |
| 2422 | 2389 | ||
| 2423 | if (num == 0) { | 2390 | if (num == 0) |
| 2424 | /* this extent is removed; mark slot entirely unused */ | 2391 | /* this extent is removed; mark slot entirely unused */ |
| 2425 | ext4_ext_store_pblock(ex, 0); | 2392 | ext4_ext_store_pblock(ex, 0); |
| 2426 | } else if (block != ex_ee_block) { | ||
| 2427 | /* | ||
| 2428 | * If this was a head removal, then we need to update | ||
| 2429 | * the physical block since it is now at a different | ||
| 2430 | * location | ||
| 2431 | */ | ||
| 2432 | ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a)); | ||
| 2433 | } | ||
| 2434 | 2393 | ||
| 2435 | ex->ee_block = cpu_to_le32(block); | ||
| 2436 | ex->ee_len = cpu_to_le16(num); | 2394 | ex->ee_len = cpu_to_le16(num); |
| 2437 | /* | 2395 | /* |
| 2438 | * Do not mark uninitialized if all the blocks in the | 2396 | * Do not mark uninitialized if all the blocks in the |
| @@ -2440,11 +2398,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2440 | */ | 2398 | */ |
| 2441 | if (uninitialized && num) | 2399 | if (uninitialized && num) |
| 2442 | ext4_ext_mark_uninitialized(ex); | 2400 | ext4_ext_mark_uninitialized(ex); |
| 2443 | |||
| 2444 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
| 2445 | if (err) | ||
| 2446 | goto out; | ||
| 2447 | |||
| 2448 | /* | 2401 | /* |
| 2449 | * If the extent was completely released, | 2402 | * If the extent was completely released, |
| 2450 | * we need to remove it from the leaf | 2403 | * we need to remove it from the leaf |
| @@ -2464,9 +2417,14 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2464 | sizeof(struct ext4_extent)); | 2417 | sizeof(struct ext4_extent)); |
| 2465 | } | 2418 | } |
| 2466 | le16_add_cpu(&eh->eh_entries, -1); | 2419 | le16_add_cpu(&eh->eh_entries, -1); |
| 2467 | } | 2420 | } else |
| 2421 | *partial_cluster = 0; | ||
| 2468 | 2422 | ||
| 2469 | ext_debug("new extent: %u:%u:%llu\n", block, num, | 2423 | err = ext4_ext_dirty(handle, inode, path + depth); |
| 2424 | if (err) | ||
| 2425 | goto out; | ||
| 2426 | |||
| 2427 | ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num, | ||
| 2470 | ext4_ext_pblock(ex)); | 2428 | ext4_ext_pblock(ex)); |
| 2471 | ex--; | 2429 | ex--; |
| 2472 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2430 | ex_ee_block = le32_to_cpu(ex->ee_block); |
| @@ -2476,6 +2434,25 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2476 | if (correct_index && eh->eh_entries) | 2434 | if (correct_index && eh->eh_entries) |
| 2477 | err = ext4_ext_correct_indexes(handle, inode, path); | 2435 | err = ext4_ext_correct_indexes(handle, inode, path); |
| 2478 | 2436 | ||
| 2437 | /* | ||
| 2438 | * If there is still a entry in the leaf node, check to see if | ||
| 2439 | * it references the partial cluster. This is the only place | ||
| 2440 | * where it could; if it doesn't, we can free the cluster. | ||
| 2441 | */ | ||
| 2442 | if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && | ||
| 2443 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | ||
| 2444 | *partial_cluster)) { | ||
| 2445 | int flags = EXT4_FREE_BLOCKS_FORGET; | ||
| 2446 | |||
| 2447 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
| 2448 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
| 2449 | |||
| 2450 | ext4_free_blocks(handle, inode, NULL, | ||
| 2451 | EXT4_C2B(sbi, *partial_cluster), | ||
| 2452 | sbi->s_cluster_ratio, flags); | ||
| 2453 | *partial_cluster = 0; | ||
| 2454 | } | ||
| 2455 | |||
| 2479 | /* if this leaf is free, then we should | 2456 | /* if this leaf is free, then we should |
| 2480 | * remove it from index block above */ | 2457 | * remove it from index block above */ |
| 2481 | if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) | 2458 | if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) |
| @@ -2511,6 +2488,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
| 2511 | struct super_block *sb = inode->i_sb; | 2488 | struct super_block *sb = inode->i_sb; |
| 2512 | int depth = ext_depth(inode); | 2489 | int depth = ext_depth(inode); |
| 2513 | struct ext4_ext_path *path; | 2490 | struct ext4_ext_path *path; |
| 2491 | ext4_fsblk_t partial_cluster = 0; | ||
| 2514 | handle_t *handle; | 2492 | handle_t *handle; |
| 2515 | int i, err; | 2493 | int i, err; |
| 2516 | 2494 | ||
| @@ -2524,6 +2502,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
| 2524 | again: | 2502 | again: |
| 2525 | ext4_ext_invalidate_cache(inode); | 2503 | ext4_ext_invalidate_cache(inode); |
| 2526 | 2504 | ||
| 2505 | trace_ext4_ext_remove_space(inode, start, depth); | ||
| 2506 | |||
| 2527 | /* | 2507 | /* |
| 2528 | * We start scanning from right side, freeing all the blocks | 2508 | * We start scanning from right side, freeing all the blocks |
| 2529 | * after i_size and walking into the tree depth-wise. | 2509 | * after i_size and walking into the tree depth-wise. |
| @@ -2546,7 +2526,8 @@ again: | |||
| 2546 | if (i == depth) { | 2526 | if (i == depth) { |
| 2547 | /* this is leaf block */ | 2527 | /* this is leaf block */ |
| 2548 | err = ext4_ext_rm_leaf(handle, inode, path, | 2528 | err = ext4_ext_rm_leaf(handle, inode, path, |
| 2549 | start, EXT_MAX_BLOCKS - 1); | 2529 | &partial_cluster, start, |
| 2530 | EXT_MAX_BLOCKS - 1); | ||
| 2550 | /* root level has p_bh == NULL, brelse() eats this */ | 2531 | /* root level has p_bh == NULL, brelse() eats this */ |
| 2551 | brelse(path[i].p_bh); | 2532 | brelse(path[i].p_bh); |
| 2552 | path[i].p_bh = NULL; | 2533 | path[i].p_bh = NULL; |
| @@ -2618,6 +2599,24 @@ again: | |||
| 2618 | } | 2599 | } |
| 2619 | } | 2600 | } |
| 2620 | 2601 | ||
| 2602 | trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster, | ||
| 2603 | path->p_hdr->eh_entries); | ||
| 2604 | |||
| 2605 | /* If we still have something in the partial cluster and we have removed | ||
| 2606 | * even the first extent, then we should free the blocks in the partial | ||
| 2607 | * cluster as well. */ | ||
| 2608 | if (partial_cluster && path->p_hdr->eh_entries == 0) { | ||
| 2609 | int flags = EXT4_FREE_BLOCKS_FORGET; | ||
| 2610 | |||
| 2611 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
| 2612 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
| 2613 | |||
| 2614 | ext4_free_blocks(handle, inode, NULL, | ||
| 2615 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | ||
| 2616 | EXT4_SB(sb)->s_cluster_ratio, flags); | ||
| 2617 | partial_cluster = 0; | ||
| 2618 | } | ||
| 2619 | |||
| 2621 | /* TODO: flexible tree reduction should be here */ | 2620 | /* TODO: flexible tree reduction should be here */ |
| 2622 | if (path->p_hdr->eh_entries == 0) { | 2621 | if (path->p_hdr->eh_entries == 0) { |
| 2623 | /* | 2622 | /* |
| @@ -2909,17 +2908,29 @@ out: | |||
| 2909 | * a> There is no split required: Entire extent should be initialized | 2908 | * a> There is no split required: Entire extent should be initialized |
| 2910 | * b> Splits in two extents: Write is happening at either end of the extent | 2909 | * b> Splits in two extents: Write is happening at either end of the extent |
| 2911 | * c> Splits in three extents: Somone is writing in middle of the extent | 2910 | * c> Splits in three extents: Somone is writing in middle of the extent |
| 2911 | * | ||
| 2912 | * Pre-conditions: | ||
| 2913 | * - The extent pointed to by 'path' is uninitialized. | ||
| 2914 | * - The extent pointed to by 'path' contains a superset | ||
| 2915 | * of the logical span [map->m_lblk, map->m_lblk + map->m_len). | ||
| 2916 | * | ||
| 2917 | * Post-conditions on success: | ||
| 2918 | * - the returned value is the number of blocks beyond map->l_lblk | ||
| 2919 | * that are allocated and initialized. | ||
| 2920 | * It is guaranteed to be >= map->m_len. | ||
| 2912 | */ | 2921 | */ |
| 2913 | static int ext4_ext_convert_to_initialized(handle_t *handle, | 2922 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
| 2914 | struct inode *inode, | 2923 | struct inode *inode, |
| 2915 | struct ext4_map_blocks *map, | 2924 | struct ext4_map_blocks *map, |
| 2916 | struct ext4_ext_path *path) | 2925 | struct ext4_ext_path *path) |
| 2917 | { | 2926 | { |
| 2927 | struct ext4_extent_header *eh; | ||
| 2918 | struct ext4_map_blocks split_map; | 2928 | struct ext4_map_blocks split_map; |
| 2919 | struct ext4_extent zero_ex; | 2929 | struct ext4_extent zero_ex; |
| 2920 | struct ext4_extent *ex; | 2930 | struct ext4_extent *ex; |
| 2921 | ext4_lblk_t ee_block, eof_block; | 2931 | ext4_lblk_t ee_block, eof_block; |
| 2922 | unsigned int allocated, ee_len, depth; | 2932 | unsigned int ee_len, depth; |
| 2933 | int allocated; | ||
| 2923 | int err = 0; | 2934 | int err = 0; |
| 2924 | int split_flag = 0; | 2935 | int split_flag = 0; |
| 2925 | 2936 | ||
| @@ -2933,11 +2944,93 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
| 2933 | eof_block = map->m_lblk + map->m_len; | 2944 | eof_block = map->m_lblk + map->m_len; |
| 2934 | 2945 | ||
| 2935 | depth = ext_depth(inode); | 2946 | depth = ext_depth(inode); |
| 2947 | eh = path[depth].p_hdr; | ||
| 2936 | ex = path[depth].p_ext; | 2948 | ex = path[depth].p_ext; |
| 2937 | ee_block = le32_to_cpu(ex->ee_block); | 2949 | ee_block = le32_to_cpu(ex->ee_block); |
| 2938 | ee_len = ext4_ext_get_actual_len(ex); | 2950 | ee_len = ext4_ext_get_actual_len(ex); |
| 2939 | allocated = ee_len - (map->m_lblk - ee_block); | 2951 | allocated = ee_len - (map->m_lblk - ee_block); |
| 2940 | 2952 | ||
| 2953 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); | ||
| 2954 | |||
| 2955 | /* Pre-conditions */ | ||
| 2956 | BUG_ON(!ext4_ext_is_uninitialized(ex)); | ||
| 2957 | BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); | ||
| 2958 | BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len); | ||
| 2959 | |||
| 2960 | /* | ||
| 2961 | * Attempt to transfer newly initialized blocks from the currently | ||
| 2962 | * uninitialized extent to its left neighbor. This is much cheaper | ||
| 2963 | * than an insertion followed by a merge as those involve costly | ||
| 2964 | * memmove() calls. This is the common case in steady state for | ||
| 2965 | * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append | ||
| 2966 | * writes. | ||
| 2967 | * | ||
| 2968 | * Limitations of the current logic: | ||
| 2969 | * - L1: we only deal with writes at the start of the extent. | ||
| 2970 | * The approach could be extended to writes at the end | ||
| 2971 | * of the extent but this scenario was deemed less common. | ||
| 2972 | * - L2: we do not deal with writes covering the whole extent. | ||
| 2973 | * This would require removing the extent if the transfer | ||
| 2974 | * is possible. | ||
| 2975 | * - L3: we only attempt to merge with an extent stored in the | ||
| 2976 | * same extent tree node. | ||
| 2977 | */ | ||
| 2978 | if ((map->m_lblk == ee_block) && /*L1*/ | ||
| 2979 | (map->m_len < ee_len) && /*L2*/ | ||
| 2980 | (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/ | ||
| 2981 | struct ext4_extent *prev_ex; | ||
| 2982 | ext4_lblk_t prev_lblk; | ||
| 2983 | ext4_fsblk_t prev_pblk, ee_pblk; | ||
| 2984 | unsigned int prev_len, write_len; | ||
| 2985 | |||
| 2986 | prev_ex = ex - 1; | ||
| 2987 | prev_lblk = le32_to_cpu(prev_ex->ee_block); | ||
| 2988 | prev_len = ext4_ext_get_actual_len(prev_ex); | ||
| 2989 | prev_pblk = ext4_ext_pblock(prev_ex); | ||
| 2990 | ee_pblk = ext4_ext_pblock(ex); | ||
| 2991 | write_len = map->m_len; | ||
| 2992 | |||
| 2993 | /* | ||
| 2994 | * A transfer of blocks from 'ex' to 'prev_ex' is allowed | ||
| 2995 | * upon those conditions: | ||
| 2996 | * - C1: prev_ex is initialized, | ||
| 2997 | * - C2: prev_ex is logically abutting ex, | ||
| 2998 | * - C3: prev_ex is physically abutting ex, | ||
| 2999 | * - C4: prev_ex can receive the additional blocks without | ||
| 3000 | * overflowing the (initialized) length limit. | ||
| 3001 | */ | ||
| 3002 | if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/ | ||
| 3003 | ((prev_lblk + prev_len) == ee_block) && /*C2*/ | ||
| 3004 | ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ | ||
| 3005 | (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/ | ||
| 3006 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
| 3007 | if (err) | ||
| 3008 | goto out; | ||
| 3009 | |||
| 3010 | trace_ext4_ext_convert_to_initialized_fastpath(inode, | ||
| 3011 | map, ex, prev_ex); | ||
| 3012 | |||
| 3013 | /* Shift the start of ex by 'write_len' blocks */ | ||
| 3014 | ex->ee_block = cpu_to_le32(ee_block + write_len); | ||
| 3015 | ext4_ext_store_pblock(ex, ee_pblk + write_len); | ||
| 3016 | ex->ee_len = cpu_to_le16(ee_len - write_len); | ||
| 3017 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ | ||
| 3018 | |||
| 3019 | /* Extend prev_ex by 'write_len' blocks */ | ||
| 3020 | prev_ex->ee_len = cpu_to_le16(prev_len + write_len); | ||
| 3021 | |||
| 3022 | /* Mark the block containing both extents as dirty */ | ||
| 3023 | ext4_ext_dirty(handle, inode, path + depth); | ||
| 3024 | |||
| 3025 | /* Update path to point to the right extent */ | ||
| 3026 | path[depth].p_ext = prev_ex; | ||
| 3027 | |||
| 3028 | /* Result: number of initialized blocks past m_lblk */ | ||
| 3029 | allocated = write_len; | ||
| 3030 | goto out; | ||
| 3031 | } | ||
| 3032 | } | ||
| 3033 | |||
| 2941 | WARN_ON(map->m_lblk < ee_block); | 3034 | WARN_ON(map->m_lblk < ee_block); |
| 2942 | /* | 3035 | /* |
| 2943 | * It is safe to convert extent to initialized via explicit | 3036 | * It is safe to convert extent to initialized via explicit |
| @@ -3165,6 +3258,192 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
| 3165 | return ext4_mark_inode_dirty(handle, inode); | 3258 | return ext4_mark_inode_dirty(handle, inode); |
| 3166 | } | 3259 | } |
| 3167 | 3260 | ||
| 3261 | /** | ||
| 3262 | * ext4_find_delalloc_range: find delayed allocated block in the given range. | ||
| 3263 | * | ||
| 3264 | * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns | ||
| 3265 | * whether there are any buffers marked for delayed allocation. It returns '1' | ||
| 3266 | * on the first delalloc'ed buffer head found. If no buffer head in the given | ||
| 3267 | * range is marked for delalloc, it returns 0. | ||
| 3268 | * lblk_start should always be <= lblk_end. | ||
| 3269 | * search_hint_reverse is to indicate that searching in reverse from lblk_end to | ||
| 3270 | * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed | ||
| 3271 | * block sooner). This is useful when blocks are truncated sequentially from | ||
| 3272 | * lblk_start towards lblk_end. | ||
| 3273 | */ | ||
| 3274 | static int ext4_find_delalloc_range(struct inode *inode, | ||
| 3275 | ext4_lblk_t lblk_start, | ||
| 3276 | ext4_lblk_t lblk_end, | ||
| 3277 | int search_hint_reverse) | ||
| 3278 | { | ||
| 3279 | struct address_space *mapping = inode->i_mapping; | ||
| 3280 | struct buffer_head *head, *bh = NULL; | ||
| 3281 | struct page *page; | ||
| 3282 | ext4_lblk_t i, pg_lblk; | ||
| 3283 | pgoff_t index; | ||
| 3284 | |||
| 3285 | /* reverse search wont work if fs block size is less than page size */ | ||
| 3286 | if (inode->i_blkbits < PAGE_CACHE_SHIFT) | ||
| 3287 | search_hint_reverse = 0; | ||
| 3288 | |||
| 3289 | if (search_hint_reverse) | ||
| 3290 | i = lblk_end; | ||
| 3291 | else | ||
| 3292 | i = lblk_start; | ||
| 3293 | |||
| 3294 | index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 3295 | |||
| 3296 | while ((i >= lblk_start) && (i <= lblk_end)) { | ||
| 3297 | page = find_get_page(mapping, index); | ||
| 3298 | if (!page) | ||
| 3299 | goto nextpage; | ||
| 3300 | |||
| 3301 | if (!page_has_buffers(page)) | ||
| 3302 | goto nextpage; | ||
| 3303 | |||
| 3304 | head = page_buffers(page); | ||
| 3305 | if (!head) | ||
| 3306 | goto nextpage; | ||
| 3307 | |||
| 3308 | bh = head; | ||
| 3309 | pg_lblk = index << (PAGE_CACHE_SHIFT - | ||
| 3310 | inode->i_blkbits); | ||
| 3311 | do { | ||
| 3312 | if (unlikely(pg_lblk < lblk_start)) { | ||
| 3313 | /* | ||
| 3314 | * This is possible when fs block size is less | ||
| 3315 | * than page size and our cluster starts/ends in | ||
| 3316 | * middle of the page. So we need to skip the | ||
| 3317 | * initial few blocks till we reach the 'lblk' | ||
| 3318 | */ | ||
| 3319 | pg_lblk++; | ||
| 3320 | continue; | ||
| 3321 | } | ||
| 3322 | |||
| 3323 | /* Check if the buffer is delayed allocated and that it | ||
| 3324 | * is not yet mapped. (when da-buffers are mapped during | ||
| 3325 | * their writeout, their da_mapped bit is set.) | ||
| 3326 | */ | ||
| 3327 | if (buffer_delay(bh) && !buffer_da_mapped(bh)) { | ||
| 3328 | page_cache_release(page); | ||
| 3329 | trace_ext4_find_delalloc_range(inode, | ||
| 3330 | lblk_start, lblk_end, | ||
| 3331 | search_hint_reverse, | ||
| 3332 | 1, i); | ||
| 3333 | return 1; | ||
| 3334 | } | ||
| 3335 | if (search_hint_reverse) | ||
| 3336 | i--; | ||
| 3337 | else | ||
| 3338 | i++; | ||
| 3339 | } while ((i >= lblk_start) && (i <= lblk_end) && | ||
| 3340 | ((bh = bh->b_this_page) != head)); | ||
| 3341 | nextpage: | ||
| 3342 | if (page) | ||
| 3343 | page_cache_release(page); | ||
| 3344 | /* | ||
| 3345 | * Move to next page. 'i' will be the first lblk in the next | ||
| 3346 | * page. | ||
| 3347 | */ | ||
| 3348 | if (search_hint_reverse) | ||
| 3349 | index--; | ||
| 3350 | else | ||
| 3351 | index++; | ||
| 3352 | i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 3353 | } | ||
| 3354 | |||
| 3355 | trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end, | ||
| 3356 | search_hint_reverse, 0, 0); | ||
| 3357 | return 0; | ||
| 3358 | } | ||
| 3359 | |||
| 3360 | int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, | ||
| 3361 | int search_hint_reverse) | ||
| 3362 | { | ||
| 3363 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 3364 | ext4_lblk_t lblk_start, lblk_end; | ||
| 3365 | lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); | ||
| 3366 | lblk_end = lblk_start + sbi->s_cluster_ratio - 1; | ||
| 3367 | |||
| 3368 | return ext4_find_delalloc_range(inode, lblk_start, lblk_end, | ||
| 3369 | search_hint_reverse); | ||
| 3370 | } | ||
| 3371 | |||
| 3372 | /** | ||
| 3373 | * Determines how many complete clusters (out of those specified by the 'map') | ||
| 3374 | * are under delalloc and were reserved quota for. | ||
| 3375 | * This function is called when we are writing out the blocks that were | ||
| 3376 | * originally written with their allocation delayed, but then the space was | ||
| 3377 | * allocated using fallocate() before the delayed allocation could be resolved. | ||
| 3378 | * The cases to look for are: | ||
| 3379 | * ('=' indicated delayed allocated blocks | ||
| 3380 | * '-' indicates non-delayed allocated blocks) | ||
| 3381 | * (a) partial clusters towards beginning and/or end outside of allocated range | ||
| 3382 | * are not delalloc'ed. | ||
| 3383 | * Ex: | ||
| 3384 | * |----c---=|====c====|====c====|===-c----| | ||
| 3385 | * |++++++ allocated ++++++| | ||
| 3386 | * ==> 4 complete clusters in above example | ||
| 3387 | * | ||
| 3388 | * (b) partial cluster (outside of allocated range) towards either end is | ||
| 3389 | * marked for delayed allocation. In this case, we will exclude that | ||
| 3390 | * cluster. | ||
| 3391 | * Ex: | ||
| 3392 | * |----====c========|========c========| | ||
| 3393 | * |++++++ allocated ++++++| | ||
| 3394 | * ==> 1 complete clusters in above example | ||
| 3395 | * | ||
| 3396 | * Ex: | ||
| 3397 | * |================c================| | ||
| 3398 | * |++++++ allocated ++++++| | ||
| 3399 | * ==> 0 complete clusters in above example | ||
| 3400 | * | ||
| 3401 | * The ext4_da_update_reserve_space will be called only if we | ||
| 3402 | * determine here that there were some "entire" clusters that span | ||
| 3403 | * this 'allocated' range. | ||
| 3404 | * In the non-bigalloc case, this function will just end up returning num_blks | ||
| 3405 | * without ever calling ext4_find_delalloc_range. | ||
| 3406 | */ | ||
| 3407 | static unsigned int | ||
| 3408 | get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | ||
| 3409 | unsigned int num_blks) | ||
| 3410 | { | ||
| 3411 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 3412 | ext4_lblk_t alloc_cluster_start, alloc_cluster_end; | ||
| 3413 | ext4_lblk_t lblk_from, lblk_to, c_offset; | ||
| 3414 | unsigned int allocated_clusters = 0; | ||
| 3415 | |||
| 3416 | alloc_cluster_start = EXT4_B2C(sbi, lblk_start); | ||
| 3417 | alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1); | ||
| 3418 | |||
| 3419 | /* max possible clusters for this allocation */ | ||
| 3420 | allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1; | ||
| 3421 | |||
| 3422 | trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); | ||
| 3423 | |||
| 3424 | /* Check towards left side */ | ||
| 3425 | c_offset = lblk_start & (sbi->s_cluster_ratio - 1); | ||
| 3426 | if (c_offset) { | ||
| 3427 | lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); | ||
| 3428 | lblk_to = lblk_from + c_offset - 1; | ||
| 3429 | |||
| 3430 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) | ||
| 3431 | allocated_clusters--; | ||
| 3432 | } | ||
| 3433 | |||
| 3434 | /* Now check towards right. */ | ||
| 3435 | c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); | ||
| 3436 | if (allocated_clusters && c_offset) { | ||
| 3437 | lblk_from = lblk_start + num_blks; | ||
| 3438 | lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; | ||
| 3439 | |||
| 3440 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) | ||
| 3441 | allocated_clusters--; | ||
| 3442 | } | ||
| 3443 | |||
| 3444 | return allocated_clusters; | ||
| 3445 | } | ||
| 3446 | |||
| 3168 | static int | 3447 | static int |
| 3169 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3448 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
| 3170 | struct ext4_map_blocks *map, | 3449 | struct ext4_map_blocks *map, |
| @@ -3181,6 +3460,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
| 3181 | flags, allocated); | 3460 | flags, allocated); |
| 3182 | ext4_ext_show_leaf(inode, path); | 3461 | ext4_ext_show_leaf(inode, path); |
| 3183 | 3462 | ||
| 3463 | trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated, | ||
| 3464 | newblock); | ||
| 3465 | |||
| 3184 | /* get_block() before submit the IO, split the extent */ | 3466 | /* get_block() before submit the IO, split the extent */ |
| 3185 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3467 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
| 3186 | ret = ext4_split_unwritten_extents(handle, inode, map, | 3468 | ret = ext4_split_unwritten_extents(handle, inode, map, |
| @@ -3190,10 +3472,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
| 3190 | * that this IO needs to conversion to written when IO is | 3472 | * that this IO needs to conversion to written when IO is |
| 3191 | * completed | 3473 | * completed |
| 3192 | */ | 3474 | */ |
| 3193 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { | 3475 | if (io) |
| 3194 | io->flag = EXT4_IO_END_UNWRITTEN; | 3476 | ext4_set_io_unwritten_flag(inode, io); |
| 3195 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | 3477 | else |
| 3196 | } else | ||
| 3197 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3478 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
| 3198 | if (ext4_should_dioread_nolock(inode)) | 3479 | if (ext4_should_dioread_nolock(inode)) |
| 3199 | map->m_flags |= EXT4_MAP_UNINIT; | 3480 | map->m_flags |= EXT4_MAP_UNINIT; |
| @@ -3234,14 +3515,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
| 3234 | 3515 | ||
| 3235 | /* buffered write, writepage time, convert*/ | 3516 | /* buffered write, writepage time, convert*/ |
| 3236 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); | 3517 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); |
| 3237 | if (ret >= 0) { | 3518 | if (ret >= 0) |
| 3238 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3519 | ext4_update_inode_fsync_trans(handle, inode, 1); |
| 3239 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, | ||
| 3240 | map->m_len); | ||
| 3241 | if (err < 0) | ||
| 3242 | goto out2; | ||
| 3243 | } | ||
| 3244 | |||
| 3245 | out: | 3520 | out: |
| 3246 | if (ret <= 0) { | 3521 | if (ret <= 0) { |
| 3247 | err = ret; | 3522 | err = ret; |
| @@ -3270,11 +3545,24 @@ out: | |||
| 3270 | * But fallocate would have already updated quota and block | 3545 | * But fallocate would have already updated quota and block |
| 3271 | * count for this offset. So cancel these reservation | 3546 | * count for this offset. So cancel these reservation |
| 3272 | */ | 3547 | */ |
| 3273 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 3548 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
| 3274 | ext4_da_update_reserve_space(inode, allocated, 0); | 3549 | unsigned int reserved_clusters; |
| 3550 | reserved_clusters = get_reserved_cluster_alloc(inode, | ||
| 3551 | map->m_lblk, map->m_len); | ||
| 3552 | if (reserved_clusters) | ||
| 3553 | ext4_da_update_reserve_space(inode, | ||
| 3554 | reserved_clusters, | ||
| 3555 | 0); | ||
| 3556 | } | ||
| 3275 | 3557 | ||
| 3276 | map_out: | 3558 | map_out: |
| 3277 | map->m_flags |= EXT4_MAP_MAPPED; | 3559 | map->m_flags |= EXT4_MAP_MAPPED; |
| 3560 | if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) { | ||
| 3561 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, | ||
| 3562 | map->m_len); | ||
| 3563 | if (err < 0) | ||
| 3564 | goto out2; | ||
| 3565 | } | ||
| 3278 | out1: | 3566 | out1: |
| 3279 | if (allocated > map->m_len) | 3567 | if (allocated > map->m_len) |
| 3280 | allocated = map->m_len; | 3568 | allocated = map->m_len; |
| @@ -3290,6 +3578,111 @@ out2: | |||
| 3290 | } | 3578 | } |
| 3291 | 3579 | ||
| 3292 | /* | 3580 | /* |
| 3581 | * get_implied_cluster_alloc - check to see if the requested | ||
| 3582 | * allocation (in the map structure) overlaps with a cluster already | ||
| 3583 | * allocated in an extent. | ||
| 3584 | * @sb The filesystem superblock structure | ||
| 3585 | * @map The requested lblk->pblk mapping | ||
| 3586 | * @ex The extent structure which might contain an implied | ||
| 3587 | * cluster allocation | ||
| 3588 | * | ||
| 3589 | * This function is called by ext4_ext_map_blocks() after we failed to | ||
| 3590 | * find blocks that were already in the inode's extent tree. Hence, | ||
| 3591 | * we know that the beginning of the requested region cannot overlap | ||
| 3592 | * the extent from the inode's extent tree. There are three cases we | ||
| 3593 | * want to catch. The first is this case: | ||
| 3594 | * | ||
| 3595 | * |--- cluster # N--| | ||
| 3596 | * |--- extent ---| |---- requested region ---| | ||
| 3597 | * |==========| | ||
| 3598 | * | ||
| 3599 | * The second case that we need to test for is this one: | ||
| 3600 | * | ||
| 3601 | * |--------- cluster # N ----------------| | ||
| 3602 | * |--- requested region --| |------- extent ----| | ||
| 3603 | * |=======================| | ||
| 3604 | * | ||
| 3605 | * The third case is when the requested region lies between two extents | ||
| 3606 | * within the same cluster: | ||
| 3607 | * |------------- cluster # N-------------| | ||
| 3608 | * |----- ex -----| |---- ex_right ----| | ||
| 3609 | * |------ requested region ------| | ||
| 3610 | * |================| | ||
| 3611 | * | ||
| 3612 | * In each of the above cases, we need to set the map->m_pblk and | ||
| 3613 | * map->m_len so it corresponds to the return the extent labelled as | ||
| 3614 | * "|====|" from cluster #N, since it is already in use for data in | ||
| 3615 | * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to | ||
| 3616 | * signal to ext4_ext_map_blocks() that map->m_pblk should be treated | ||
| 3617 | * as a new "allocated" block region. Otherwise, we will return 0 and | ||
| 3618 | * ext4_ext_map_blocks() will then allocate one or more new clusters | ||
| 3619 | * by calling ext4_mb_new_blocks(). | ||
| 3620 | */ | ||
| 3621 | static int get_implied_cluster_alloc(struct super_block *sb, | ||
| 3622 | struct ext4_map_blocks *map, | ||
| 3623 | struct ext4_extent *ex, | ||
| 3624 | struct ext4_ext_path *path) | ||
| 3625 | { | ||
| 3626 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 3627 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | ||
| 3628 | ext4_lblk_t ex_cluster_start, ex_cluster_end; | ||
| 3629 | ext4_lblk_t rr_cluster_start, rr_cluster_end; | ||
| 3630 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | ||
| 3631 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); | ||
| 3632 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | ||
| 3633 | |||
| 3634 | /* The extent passed in that we are trying to match */ | ||
| 3635 | ex_cluster_start = EXT4_B2C(sbi, ee_block); | ||
| 3636 | ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1); | ||
| 3637 | |||
| 3638 | /* The requested region passed into ext4_map_blocks() */ | ||
| 3639 | rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); | ||
| 3640 | rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1); | ||
| 3641 | |||
| 3642 | if ((rr_cluster_start == ex_cluster_end) || | ||
| 3643 | (rr_cluster_start == ex_cluster_start)) { | ||
| 3644 | if (rr_cluster_start == ex_cluster_end) | ||
| 3645 | ee_start += ee_len - 1; | ||
| 3646 | map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + | ||
| 3647 | c_offset; | ||
| 3648 | map->m_len = min(map->m_len, | ||
| 3649 | (unsigned) sbi->s_cluster_ratio - c_offset); | ||
| 3650 | /* | ||
| 3651 | * Check for and handle this case: | ||
| 3652 | * | ||
| 3653 | * |--------- cluster # N-------------| | ||
| 3654 | * |------- extent ----| | ||
| 3655 | * |--- requested region ---| | ||
| 3656 | * |===========| | ||
| 3657 | */ | ||
| 3658 | |||
| 3659 | if (map->m_lblk < ee_block) | ||
| 3660 | map->m_len = min(map->m_len, ee_block - map->m_lblk); | ||
| 3661 | |||
| 3662 | /* | ||
| 3663 | * Check for the case where there is already another allocated | ||
| 3664 | * block to the right of 'ex' but before the end of the cluster. | ||
| 3665 | * | ||
| 3666 | * |------------- cluster # N-------------| | ||
| 3667 | * |----- ex -----| |---- ex_right ----| | ||
| 3668 | * |------ requested region ------| | ||
| 3669 | * |================| | ||
| 3670 | */ | ||
| 3671 | if (map->m_lblk > ee_block) { | ||
| 3672 | ext4_lblk_t next = ext4_ext_next_allocated_block(path); | ||
| 3673 | map->m_len = min(map->m_len, next - map->m_lblk); | ||
| 3674 | } | ||
| 3675 | |||
| 3676 | trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1); | ||
| 3677 | return 1; | ||
| 3678 | } | ||
| 3679 | |||
| 3680 | trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0); | ||
| 3681 | return 0; | ||
| 3682 | } | ||
| 3683 | |||
| 3684 | |||
| 3685 | /* | ||
| 3293 | * Block allocation/map/preallocation routine for extents based files | 3686 | * Block allocation/map/preallocation routine for extents based files |
| 3294 | * | 3687 | * |
| 3295 | * | 3688 | * |
| @@ -3311,15 +3704,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3311 | struct ext4_map_blocks *map, int flags) | 3704 | struct ext4_map_blocks *map, int flags) |
| 3312 | { | 3705 | { |
| 3313 | struct ext4_ext_path *path = NULL; | 3706 | struct ext4_ext_path *path = NULL; |
| 3314 | struct ext4_extent newex, *ex; | 3707 | struct ext4_extent newex, *ex, *ex2; |
| 3708 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 3315 | ext4_fsblk_t newblock = 0; | 3709 | ext4_fsblk_t newblock = 0; |
| 3316 | int err = 0, depth, ret; | 3710 | int free_on_err = 0, err = 0, depth, ret; |
| 3317 | unsigned int allocated = 0; | 3711 | unsigned int allocated = 0, offset = 0; |
| 3712 | unsigned int allocated_clusters = 0; | ||
| 3318 | unsigned int punched_out = 0; | 3713 | unsigned int punched_out = 0; |
| 3319 | unsigned int result = 0; | 3714 | unsigned int result = 0; |
| 3320 | struct ext4_allocation_request ar; | 3715 | struct ext4_allocation_request ar; |
| 3321 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3716 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
| 3322 | struct ext4_map_blocks punch_map; | 3717 | ext4_lblk_t cluster_offset; |
| 3323 | 3718 | ||
| 3324 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3719 | ext_debug("blocks %u/%u requested for inode %lu\n", |
| 3325 | map->m_lblk, map->m_len, inode->i_ino); | 3720 | map->m_lblk, map->m_len, inode->i_ino); |
| @@ -3329,6 +3724,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3329 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && | 3724 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && |
| 3330 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | 3725 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
| 3331 | if (!newex.ee_start_lo && !newex.ee_start_hi) { | 3726 | if (!newex.ee_start_lo && !newex.ee_start_hi) { |
| 3727 | if ((sbi->s_cluster_ratio > 1) && | ||
| 3728 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) | ||
| 3729 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
| 3730 | |||
| 3332 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { | 3731 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
| 3333 | /* | 3732 | /* |
| 3334 | * block isn't allocated yet and | 3733 | * block isn't allocated yet and |
| @@ -3339,6 +3738,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3339 | /* we should allocate requested block */ | 3738 | /* we should allocate requested block */ |
| 3340 | } else { | 3739 | } else { |
| 3341 | /* block is already allocated */ | 3740 | /* block is already allocated */ |
| 3741 | if (sbi->s_cluster_ratio > 1) | ||
| 3742 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
| 3342 | newblock = map->m_lblk | 3743 | newblock = map->m_lblk |
| 3343 | - le32_to_cpu(newex.ee_block) | 3744 | - le32_to_cpu(newex.ee_block) |
| 3344 | + ext4_ext_pblock(&newex); | 3745 | + ext4_ext_pblock(&newex); |
| @@ -3384,8 +3785,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3384 | * we split out initialized portions during a write. | 3785 | * we split out initialized portions during a write. |
| 3385 | */ | 3786 | */ |
| 3386 | ee_len = ext4_ext_get_actual_len(ex); | 3787 | ee_len = ext4_ext_get_actual_len(ex); |
| 3788 | |||
| 3789 | trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len); | ||
| 3790 | |||
| 3387 | /* if found extent covers block, simply return it */ | 3791 | /* if found extent covers block, simply return it */ |
| 3388 | if (in_range(map->m_lblk, ee_block, ee_len)) { | 3792 | if (in_range(map->m_lblk, ee_block, ee_len)) { |
| 3793 | struct ext4_map_blocks punch_map; | ||
| 3794 | ext4_fsblk_t partial_cluster = 0; | ||
| 3795 | |||
| 3389 | newblock = map->m_lblk - ee_block + ee_start; | 3796 | newblock = map->m_lblk - ee_block + ee_start; |
| 3390 | /* number of remaining blocks in the extent */ | 3797 | /* number of remaining blocks in the extent */ |
| 3391 | allocated = ee_len - (map->m_lblk - ee_block); | 3798 | allocated = ee_len - (map->m_lblk - ee_block); |
| @@ -3469,7 +3876,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3469 | ext4_ext_invalidate_cache(inode); | 3876 | ext4_ext_invalidate_cache(inode); |
| 3470 | 3877 | ||
| 3471 | err = ext4_ext_rm_leaf(handle, inode, path, | 3878 | err = ext4_ext_rm_leaf(handle, inode, path, |
| 3472 | map->m_lblk, map->m_lblk + punched_out); | 3879 | &partial_cluster, map->m_lblk, |
| 3880 | map->m_lblk + punched_out); | ||
| 3473 | 3881 | ||
| 3474 | if (!err && path->p_hdr->eh_entries == 0) { | 3882 | if (!err && path->p_hdr->eh_entries == 0) { |
| 3475 | /* | 3883 | /* |
| @@ -3492,6 +3900,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3492 | } | 3900 | } |
| 3493 | } | 3901 | } |
| 3494 | 3902 | ||
| 3903 | if ((sbi->s_cluster_ratio > 1) && | ||
| 3904 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) | ||
| 3905 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
| 3906 | |||
| 3495 | /* | 3907 | /* |
| 3496 | * requested block isn't allocated yet; | 3908 | * requested block isn't allocated yet; |
| 3497 | * we couldn't try to create block if create flag is zero | 3909 | * we couldn't try to create block if create flag is zero |
| @@ -3504,9 +3916,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3504 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); | 3916 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); |
| 3505 | goto out2; | 3917 | goto out2; |
| 3506 | } | 3918 | } |
| 3919 | |||
| 3507 | /* | 3920 | /* |
| 3508 | * Okay, we need to do block allocation. | 3921 | * Okay, we need to do block allocation. |
| 3509 | */ | 3922 | */ |
| 3923 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
| 3924 | newex.ee_block = cpu_to_le32(map->m_lblk); | ||
| 3925 | cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | ||
| 3926 | |||
| 3927 | /* | ||
| 3928 | * If we are doing bigalloc, check to see if the extent returned | ||
| 3929 | * by ext4_ext_find_extent() implies a cluster we can use. | ||
| 3930 | */ | ||
| 3931 | if (cluster_offset && ex && | ||
| 3932 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { | ||
| 3933 | ar.len = allocated = map->m_len; | ||
| 3934 | newblock = map->m_pblk; | ||
| 3935 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
| 3936 | goto got_allocated_blocks; | ||
| 3937 | } | ||
| 3510 | 3938 | ||
| 3511 | /* find neighbour allocated blocks */ | 3939 | /* find neighbour allocated blocks */ |
| 3512 | ar.lleft = map->m_lblk; | 3940 | ar.lleft = map->m_lblk; |
| @@ -3514,10 +3942,21 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3514 | if (err) | 3942 | if (err) |
| 3515 | goto out2; | 3943 | goto out2; |
| 3516 | ar.lright = map->m_lblk; | 3944 | ar.lright = map->m_lblk; |
| 3517 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); | 3945 | ex2 = NULL; |
| 3946 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2); | ||
| 3518 | if (err) | 3947 | if (err) |
| 3519 | goto out2; | 3948 | goto out2; |
| 3520 | 3949 | ||
| 3950 | /* Check if the extent after searching to the right implies a | ||
| 3951 | * cluster we can use. */ | ||
| 3952 | if ((sbi->s_cluster_ratio > 1) && ex2 && | ||
| 3953 | get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { | ||
| 3954 | ar.len = allocated = map->m_len; | ||
| 3955 | newblock = map->m_pblk; | ||
| 3956 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
| 3957 | goto got_allocated_blocks; | ||
| 3958 | } | ||
| 3959 | |||
| 3521 | /* | 3960 | /* |
| 3522 | * See if request is beyond maximum number of blocks we can have in | 3961 | * See if request is beyond maximum number of blocks we can have in |
| 3523 | * a single extent. For an initialized extent this limit is | 3962 | * a single extent. For an initialized extent this limit is |
| @@ -3532,9 +3971,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3532 | map->m_len = EXT_UNINIT_MAX_LEN; | 3971 | map->m_len = EXT_UNINIT_MAX_LEN; |
| 3533 | 3972 | ||
| 3534 | /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ | 3973 | /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ |
| 3535 | newex.ee_block = cpu_to_le32(map->m_lblk); | ||
| 3536 | newex.ee_len = cpu_to_le16(map->m_len); | 3974 | newex.ee_len = cpu_to_le16(map->m_len); |
| 3537 | err = ext4_ext_check_overlap(inode, &newex, path); | 3975 | err = ext4_ext_check_overlap(sbi, inode, &newex, path); |
| 3538 | if (err) | 3976 | if (err) |
| 3539 | allocated = ext4_ext_get_actual_len(&newex); | 3977 | allocated = ext4_ext_get_actual_len(&newex); |
| 3540 | else | 3978 | else |
| @@ -3544,7 +3982,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3544 | ar.inode = inode; | 3982 | ar.inode = inode; |
| 3545 | ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); | 3983 | ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); |
| 3546 | ar.logical = map->m_lblk; | 3984 | ar.logical = map->m_lblk; |
| 3547 | ar.len = allocated; | 3985 | /* |
| 3986 | * We calculate the offset from the beginning of the cluster | ||
| 3987 | * for the logical block number, since when we allocate a | ||
| 3988 | * physical cluster, the physical block should start at the | ||
| 3989 | * same offset from the beginning of the cluster. This is | ||
| 3990 | * needed so that future calls to get_implied_cluster_alloc() | ||
| 3991 | * work correctly. | ||
| 3992 | */ | ||
| 3993 | offset = map->m_lblk & (sbi->s_cluster_ratio - 1); | ||
| 3994 | ar.len = EXT4_NUM_B2C(sbi, offset+allocated); | ||
| 3995 | ar.goal -= offset; | ||
| 3996 | ar.logical -= offset; | ||
| 3548 | if (S_ISREG(inode->i_mode)) | 3997 | if (S_ISREG(inode->i_mode)) |
| 3549 | ar.flags = EXT4_MB_HINT_DATA; | 3998 | ar.flags = EXT4_MB_HINT_DATA; |
| 3550 | else | 3999 | else |
| @@ -3557,9 +4006,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3557 | goto out2; | 4006 | goto out2; |
| 3558 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", | 4007 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", |
| 3559 | ar.goal, newblock, allocated); | 4008 | ar.goal, newblock, allocated); |
| 4009 | free_on_err = 1; | ||
| 4010 | allocated_clusters = ar.len; | ||
| 4011 | ar.len = EXT4_C2B(sbi, ar.len) - offset; | ||
| 4012 | if (ar.len > allocated) | ||
| 4013 | ar.len = allocated; | ||
| 3560 | 4014 | ||
| 4015 | got_allocated_blocks: | ||
| 3561 | /* try to insert new extent into found leaf and return */ | 4016 | /* try to insert new extent into found leaf and return */ |
| 3562 | ext4_ext_store_pblock(&newex, newblock); | 4017 | ext4_ext_store_pblock(&newex, newblock + offset); |
| 3563 | newex.ee_len = cpu_to_le16(ar.len); | 4018 | newex.ee_len = cpu_to_le16(ar.len); |
| 3564 | /* Mark uninitialized */ | 4019 | /* Mark uninitialized */ |
| 3565 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ | 4020 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ |
| @@ -3572,10 +4027,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3572 | * that we need to perform conversion when IO is done. | 4027 | * that we need to perform conversion when IO is done. |
| 3573 | */ | 4028 | */ |
| 3574 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 4029 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
| 3575 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { | 4030 | if (io) |
| 3576 | io->flag = EXT4_IO_END_UNWRITTEN; | 4031 | ext4_set_io_unwritten_flag(inode, io); |
| 3577 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | 4032 | else |
| 3578 | } else | ||
| 3579 | ext4_set_inode_state(inode, | 4033 | ext4_set_inode_state(inode, |
| 3580 | EXT4_STATE_DIO_UNWRITTEN); | 4034 | EXT4_STATE_DIO_UNWRITTEN); |
| 3581 | } | 4035 | } |
| @@ -3583,11 +4037,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3583 | map->m_flags |= EXT4_MAP_UNINIT; | 4037 | map->m_flags |= EXT4_MAP_UNINIT; |
| 3584 | } | 4038 | } |
| 3585 | 4039 | ||
| 3586 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); | 4040 | err = 0; |
| 4041 | if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) | ||
| 4042 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | ||
| 4043 | path, ar.len); | ||
| 3587 | if (!err) | 4044 | if (!err) |
| 3588 | err = ext4_ext_insert_extent(handle, inode, path, | 4045 | err = ext4_ext_insert_extent(handle, inode, path, |
| 3589 | &newex, flags); | 4046 | &newex, flags); |
| 3590 | if (err) { | 4047 | if (err && free_on_err) { |
| 3591 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? | 4048 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? |
| 3592 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; | 4049 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; |
| 3593 | /* free data blocks we just allocated */ | 4050 | /* free data blocks we just allocated */ |
| @@ -3610,8 +4067,82 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 3610 | * Update reserved blocks/metadata blocks after successful | 4067 | * Update reserved blocks/metadata blocks after successful |
| 3611 | * block allocation which had been deferred till now. | 4068 | * block allocation which had been deferred till now. |
| 3612 | */ | 4069 | */ |
| 3613 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 4070 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
| 3614 | ext4_da_update_reserve_space(inode, allocated, 1); | 4071 | unsigned int reserved_clusters; |
| 4072 | /* | ||
| 4073 | * Check how many clusters we had reserved this allocated range | ||
| 4074 | */ | ||
| 4075 | reserved_clusters = get_reserved_cluster_alloc(inode, | ||
| 4076 | map->m_lblk, allocated); | ||
| 4077 | if (map->m_flags & EXT4_MAP_FROM_CLUSTER) { | ||
| 4078 | if (reserved_clusters) { | ||
| 4079 | /* | ||
| 4080 | * We have clusters reserved for this range. | ||
| 4081 | * But since we are not doing actual allocation | ||
| 4082 | * and are simply using blocks from previously | ||
| 4083 | * allocated cluster, we should release the | ||
| 4084 | * reservation and not claim quota. | ||
| 4085 | */ | ||
| 4086 | ext4_da_update_reserve_space(inode, | ||
| 4087 | reserved_clusters, 0); | ||
| 4088 | } | ||
| 4089 | } else { | ||
| 4090 | BUG_ON(allocated_clusters < reserved_clusters); | ||
| 4091 | /* We will claim quota for all newly allocated blocks.*/ | ||
| 4092 | ext4_da_update_reserve_space(inode, allocated_clusters, | ||
| 4093 | 1); | ||
| 4094 | if (reserved_clusters < allocated_clusters) { | ||
| 4095 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 4096 | int reservation = allocated_clusters - | ||
| 4097 | reserved_clusters; | ||
| 4098 | /* | ||
| 4099 | * It seems we claimed few clusters outside of | ||
| 4100 | * the range of this allocation. We should give | ||
| 4101 | * it back to the reservation pool. This can | ||
| 4102 | * happen in the following case: | ||
| 4103 | * | ||
| 4104 | * * Suppose s_cluster_ratio is 4 (i.e., each | ||
| 4105 | * cluster has 4 blocks. Thus, the clusters | ||
| 4106 | * are [0-3],[4-7],[8-11]... | ||
| 4107 | * * First comes delayed allocation write for | ||
| 4108 | * logical blocks 10 & 11. Since there were no | ||
| 4109 | * previous delayed allocated blocks in the | ||
| 4110 | * range [8-11], we would reserve 1 cluster | ||
| 4111 | * for this write. | ||
| 4112 | * * Next comes write for logical blocks 3 to 8. | ||
| 4113 | * In this case, we will reserve 2 clusters | ||
| 4114 | * (for [0-3] and [4-7]; and not for [8-11] as | ||
| 4115 | * that range has a delayed allocated blocks. | ||
| 4116 | * Thus total reserved clusters now becomes 3. | ||
| 4117 | * * Now, during the delayed allocation writeout | ||
| 4118 | * time, we will first write blocks [3-8] and | ||
| 4119 | * allocate 3 clusters for writing these | ||
| 4120 | * blocks. Also, we would claim all these | ||
| 4121 | * three clusters above. | ||
| 4122 | * * Now when we come here to writeout the | ||
| 4123 | * blocks [10-11], we would expect to claim | ||
| 4124 | * the reservation of 1 cluster we had made | ||
| 4125 | * (and we would claim it since there are no | ||
| 4126 | * more delayed allocated blocks in the range | ||
| 4127 | * [8-11]. But our reserved cluster count had | ||
| 4128 | * already gone to 0. | ||
| 4129 | * | ||
| 4130 | * Thus, at the step 4 above when we determine | ||
| 4131 | * that there are still some unwritten delayed | ||
| 4132 | * allocated blocks outside of our current | ||
| 4133 | * block range, we should increment the | ||
| 4134 | * reserved clusters count so that when the | ||
| 4135 | * remaining blocks finally gets written, we | ||
| 4136 | * could claim them. | ||
| 4137 | */ | ||
| 4138 | dquot_reserve_block(inode, | ||
| 4139 | EXT4_C2B(sbi, reservation)); | ||
| 4140 | spin_lock(&ei->i_block_reservation_lock); | ||
| 4141 | ei->i_reserved_data_blocks += reservation; | ||
| 4142 | spin_unlock(&ei->i_block_reservation_lock); | ||
| 4143 | } | ||
| 4144 | } | ||
| 4145 | } | ||
| 3615 | 4146 | ||
| 3616 | /* | 4147 | /* |
| 3617 | * Cache the extent and update transaction to commit on fdatasync only | 4148 | * Cache the extent and update transaction to commit on fdatasync only |
| @@ -3634,12 +4165,12 @@ out2: | |||
| 3634 | ext4_ext_drop_refs(path); | 4165 | ext4_ext_drop_refs(path); |
| 3635 | kfree(path); | 4166 | kfree(path); |
| 3636 | } | 4167 | } |
| 3637 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | ||
| 3638 | newblock, map->m_len, err ? err : allocated); | ||
| 3639 | |||
| 3640 | result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? | 4168 | result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? |
| 3641 | punched_out : allocated; | 4169 | punched_out : allocated; |
| 3642 | 4170 | ||
| 4171 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | ||
| 4172 | newblock, map->m_len, err ? err : result); | ||
| 4173 | |||
| 3643 | return err ? err : result; | 4174 | return err ? err : result; |
| 3644 | } | 4175 | } |
| 3645 | 4176 | ||
| @@ -3649,6 +4180,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
| 3649 | struct super_block *sb = inode->i_sb; | 4180 | struct super_block *sb = inode->i_sb; |
| 3650 | ext4_lblk_t last_block; | 4181 | ext4_lblk_t last_block; |
| 3651 | handle_t *handle; | 4182 | handle_t *handle; |
| 4183 | loff_t page_len; | ||
| 3652 | int err = 0; | 4184 | int err = 0; |
| 3653 | 4185 | ||
| 3654 | /* | 4186 | /* |
| @@ -3665,8 +4197,16 @@ void ext4_ext_truncate(struct inode *inode) | |||
| 3665 | if (IS_ERR(handle)) | 4197 | if (IS_ERR(handle)) |
| 3666 | return; | 4198 | return; |
| 3667 | 4199 | ||
| 3668 | if (inode->i_size & (sb->s_blocksize - 1)) | 4200 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { |
| 3669 | ext4_block_truncate_page(handle, mapping, inode->i_size); | 4201 | page_len = PAGE_CACHE_SIZE - |
| 4202 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
| 4203 | |||
| 4204 | err = ext4_discard_partial_page_buffers(handle, | ||
| 4205 | mapping, inode->i_size, page_len, 0); | ||
| 4206 | |||
| 4207 | if (err) | ||
| 4208 | goto out_stop; | ||
| 4209 | } | ||
| 3670 | 4210 | ||
| 3671 | if (ext4_orphan_add(handle, inode)) | 4211 | if (ext4_orphan_add(handle, inode)) |
| 3672 | goto out_stop; | 4212 | goto out_stop; |
| @@ -3760,6 +4300,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 3760 | int ret = 0; | 4300 | int ret = 0; |
| 3761 | int ret2 = 0; | 4301 | int ret2 = 0; |
| 3762 | int retries = 0; | 4302 | int retries = 0; |
| 4303 | int flags; | ||
| 3763 | struct ext4_map_blocks map; | 4304 | struct ext4_map_blocks map; |
| 3764 | unsigned int credits, blkbits = inode->i_blkbits; | 4305 | unsigned int credits, blkbits = inode->i_blkbits; |
| 3765 | 4306 | ||
| @@ -3796,6 +4337,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 3796 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | 4337 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); |
| 3797 | return ret; | 4338 | return ret; |
| 3798 | } | 4339 | } |
| 4340 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; | ||
| 4341 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
| 4342 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | ||
| 4343 | /* | ||
| 4344 | * Don't normalize the request if it can fit in one extent so | ||
| 4345 | * that it doesn't get unnecessarily split into multiple | ||
| 4346 | * extents. | ||
| 4347 | */ | ||
| 4348 | if (len <= EXT_UNINIT_MAX_LEN << blkbits) | ||
| 4349 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | ||
| 3799 | retry: | 4350 | retry: |
| 3800 | while (ret >= 0 && ret < max_blocks) { | 4351 | while (ret >= 0 && ret < max_blocks) { |
| 3801 | map.m_lblk = map.m_lblk + ret; | 4352 | map.m_lblk = map.m_lblk + ret; |
| @@ -3805,9 +4356,7 @@ retry: | |||
| 3805 | ret = PTR_ERR(handle); | 4356 | ret = PTR_ERR(handle); |
| 3806 | break; | 4357 | break; |
| 3807 | } | 4358 | } |
| 3808 | ret = ext4_map_blocks(handle, inode, &map, | 4359 | ret = ext4_map_blocks(handle, inode, &map, flags); |
| 3809 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | | ||
| 3810 | EXT4_GET_BLOCKS_NO_NORMALIZE); | ||
| 3811 | if (ret <= 0) { | 4360 | if (ret <= 0) { |
| 3812 | #ifdef EXT4FS_DEBUG | 4361 | #ifdef EXT4FS_DEBUG |
| 3813 | WARN_ON(ret <= 0); | 4362 | WARN_ON(ret <= 0); |
| @@ -4102,7 +4651,6 @@ found_delayed_extent: | |||
| 4102 | return EXT_BREAK; | 4651 | return EXT_BREAK; |
| 4103 | return EXT_CONTINUE; | 4652 | return EXT_CONTINUE; |
| 4104 | } | 4653 | } |
| 4105 | |||
| 4106 | /* fiemap flags we can handle specified here */ | 4654 | /* fiemap flags we can handle specified here */ |
| 4107 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | 4655 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) |
| 4108 | 4656 | ||
| @@ -4162,17 +4710,28 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
| 4162 | struct address_space *mapping = inode->i_mapping; | 4710 | struct address_space *mapping = inode->i_mapping; |
| 4163 | struct ext4_map_blocks map; | 4711 | struct ext4_map_blocks map; |
| 4164 | handle_t *handle; | 4712 | handle_t *handle; |
| 4165 | loff_t first_block_offset, last_block_offset, block_len; | 4713 | loff_t first_page, last_page, page_len; |
| 4166 | loff_t first_page, last_page, first_page_offset, last_page_offset; | 4714 | loff_t first_page_offset, last_page_offset; |
| 4167 | int ret, credits, blocks_released, err = 0; | 4715 | int ret, credits, blocks_released, err = 0; |
| 4168 | 4716 | ||
| 4717 | /* No need to punch hole beyond i_size */ | ||
| 4718 | if (offset >= inode->i_size) | ||
| 4719 | return 0; | ||
| 4720 | |||
| 4721 | /* | ||
| 4722 | * If the hole extends beyond i_size, set the hole | ||
| 4723 | * to end after the page that contains i_size | ||
| 4724 | */ | ||
| 4725 | if (offset + length > inode->i_size) { | ||
| 4726 | length = inode->i_size + | ||
| 4727 | PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - | ||
| 4728 | offset; | ||
| 4729 | } | ||
| 4730 | |||
| 4169 | first_block = (offset + sb->s_blocksize - 1) >> | 4731 | first_block = (offset + sb->s_blocksize - 1) >> |
| 4170 | EXT4_BLOCK_SIZE_BITS(sb); | 4732 | EXT4_BLOCK_SIZE_BITS(sb); |
| 4171 | last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | 4733 | last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); |
| 4172 | 4734 | ||
| 4173 | first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb); | ||
| 4174 | last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb); | ||
| 4175 | |||
| 4176 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 4735 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
| 4177 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | 4736 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; |
| 4178 | 4737 | ||
| @@ -4185,11 +4744,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
| 4185 | */ | 4744 | */ |
| 4186 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 4745 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
| 4187 | err = filemap_write_and_wait_range(mapping, | 4746 | err = filemap_write_and_wait_range(mapping, |
| 4188 | first_page_offset == 0 ? 0 : first_page_offset-1, | 4747 | offset, offset + length - 1); |
| 4189 | last_page_offset); | ||
| 4190 | 4748 | ||
| 4191 | if (err) | 4749 | if (err) |
| 4192 | return err; | 4750 | return err; |
| 4193 | } | 4751 | } |
| 4194 | 4752 | ||
| 4195 | /* Now release the pages */ | 4753 | /* Now release the pages */ |
| @@ -4211,24 +4769,64 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
| 4211 | goto out; | 4769 | goto out; |
| 4212 | 4770 | ||
| 4213 | /* | 4771 | /* |
| 4214 | * Now we need to zero out the un block aligned data. | 4772 | * Now we need to zero out the non-page-aligned data in the |
| 4215 | * If the file is smaller than a block, just | 4773 | * pages at the start and tail of the hole, and unmap the buffer |
| 4216 | * zero out the middle | 4774 | * heads for the block aligned regions of the page that were |
| 4775 | * completely zeroed. | ||
| 4217 | */ | 4776 | */ |
| 4218 | if (first_block > last_block) | 4777 | if (first_page > last_page) { |
| 4219 | ext4_block_zero_page_range(handle, mapping, offset, length); | 4778 | /* |
| 4220 | else { | 4779 | * If the file space being truncated is contained within a page |
| 4221 | /* zero out the head of the hole before the first block */ | 4780 | * just zero out and unmap the middle of that page |
| 4222 | block_len = first_block_offset - offset; | 4781 | */ |
| 4223 | if (block_len > 0) | 4782 | err = ext4_discard_partial_page_buffers(handle, |
| 4224 | ext4_block_zero_page_range(handle, mapping, | 4783 | mapping, offset, length, 0); |
| 4225 | offset, block_len); | 4784 | |
| 4226 | 4785 | if (err) | |
| 4227 | /* zero out the tail of the hole after the last block */ | 4786 | goto out; |
| 4228 | block_len = offset + length - last_block_offset; | 4787 | } else { |
| 4229 | if (block_len > 0) { | 4788 | /* |
| 4230 | ext4_block_zero_page_range(handle, mapping, | 4789 | * zero out and unmap the partial page that contains |
| 4231 | last_block_offset, block_len); | 4790 | * the start of the hole |
| 4791 | */ | ||
| 4792 | page_len = first_page_offset - offset; | ||
| 4793 | if (page_len > 0) { | ||
| 4794 | err = ext4_discard_partial_page_buffers(handle, mapping, | ||
| 4795 | offset, page_len, 0); | ||
| 4796 | if (err) | ||
| 4797 | goto out; | ||
| 4798 | } | ||
| 4799 | |||
| 4800 | /* | ||
| 4801 | * zero out and unmap the partial page that contains | ||
| 4802 | * the end of the hole | ||
| 4803 | */ | ||
| 4804 | page_len = offset + length - last_page_offset; | ||
| 4805 | if (page_len > 0) { | ||
| 4806 | err = ext4_discard_partial_page_buffers(handle, mapping, | ||
| 4807 | last_page_offset, page_len, 0); | ||
| 4808 | if (err) | ||
| 4809 | goto out; | ||
| 4810 | } | ||
| 4811 | } | ||
| 4812 | |||
| 4813 | |||
| 4814 | /* | ||
| 4815 | * If i_size is contained in the last page, we need to | ||
| 4816 | * unmap and zero the partial page after i_size | ||
| 4817 | */ | ||
| 4818 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
| 4819 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
| 4820 | |||
| 4821 | page_len = PAGE_CACHE_SIZE - | ||
| 4822 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
| 4823 | |||
| 4824 | if (page_len > 0) { | ||
| 4825 | err = ext4_discard_partial_page_buffers(handle, | ||
| 4826 | mapping, inode->i_size, page_len, 0); | ||
| 4827 | |||
| 4828 | if (err) | ||
| 4829 | goto out; | ||
| 4232 | } | 4830 | } |
| 4233 | } | 4831 | } |
| 4234 | 4832 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b9548f477bb8..cb70f1812a70 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -181,8 +181,8 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
| 181 | path.dentry = mnt->mnt_root; | 181 | path.dentry = mnt->mnt_root; |
| 182 | cp = d_path(&path, buf, sizeof(buf)); | 182 | cp = d_path(&path, buf, sizeof(buf)); |
| 183 | if (!IS_ERR(cp)) { | 183 | if (!IS_ERR(cp)) { |
| 184 | memcpy(sbi->s_es->s_last_mounted, cp, | 184 | strlcpy(sbi->s_es->s_last_mounted, cp, |
| 185 | sizeof(sbi->s_es->s_last_mounted)); | 185 | sizeof(sbi->s_es->s_last_mounted)); |
| 186 | ext4_mark_super_dirty(sb); | 186 | ext4_mark_super_dirty(sb); |
| 187 | } | 187 | } |
| 188 | } | 188 | } |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 036f78f7a1ef..00a2cb753efd 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
| @@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode) | |||
| 75 | * to written. | 75 | * to written. |
| 76 | * The function return the number of pending IOs on success. | 76 | * The function return the number of pending IOs on success. |
| 77 | */ | 77 | */ |
| 78 | extern int ext4_flush_completed_IO(struct inode *inode) | 78 | int ext4_flush_completed_IO(struct inode *inode) |
| 79 | { | 79 | { |
| 80 | ext4_io_end_t *io; | 80 | ext4_io_end_t *io; |
| 81 | struct ext4_inode_info *ei = EXT4_I(inode); | 81 | struct ext4_inode_info *ei = EXT4_I(inode); |
| @@ -83,14 +83,12 @@ extern int ext4_flush_completed_IO(struct inode *inode) | |||
| 83 | int ret = 0; | 83 | int ret = 0; |
| 84 | int ret2 = 0; | 84 | int ret2 = 0; |
| 85 | 85 | ||
| 86 | if (list_empty(&ei->i_completed_io_list)) | ||
| 87 | return ret; | ||
| 88 | |||
| 89 | dump_completed_IO(inode); | 86 | dump_completed_IO(inode); |
| 90 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 87 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
| 91 | while (!list_empty(&ei->i_completed_io_list)){ | 88 | while (!list_empty(&ei->i_completed_io_list)){ |
| 92 | io = list_entry(ei->i_completed_io_list.next, | 89 | io = list_entry(ei->i_completed_io_list.next, |
| 93 | ext4_io_end_t, list); | 90 | ext4_io_end_t, list); |
| 91 | list_del_init(&io->list); | ||
| 94 | /* | 92 | /* |
| 95 | * Calling ext4_end_io_nolock() to convert completed | 93 | * Calling ext4_end_io_nolock() to convert completed |
| 96 | * IO to written. | 94 | * IO to written. |
| @@ -107,11 +105,9 @@ extern int ext4_flush_completed_IO(struct inode *inode) | |||
| 107 | */ | 105 | */ |
| 108 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 106 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
| 109 | ret = ext4_end_io_nolock(io); | 107 | ret = ext4_end_io_nolock(io); |
| 110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
| 111 | if (ret < 0) | 108 | if (ret < 0) |
| 112 | ret2 = ret; | 109 | ret2 = ret; |
| 113 | else | 110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
| 114 | list_del_init(&io->list); | ||
| 115 | } | 111 | } |
| 116 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 112 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
| 117 | return (ret2 < 0) ? ret2 : 0; | 113 | return (ret2 < 0) ? ret2 : 0; |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 9c63f273b550..612bec255c6c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -78,7 +78,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
| 78 | * allocation, essentially implementing a per-group read-only flag. */ | 78 | * allocation, essentially implementing a per-group read-only flag. */ |
| 79 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 79 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { |
| 80 | ext4_error(sb, "Checksum bad for group %u", block_group); | 80 | ext4_error(sb, "Checksum bad for group %u", block_group); |
| 81 | ext4_free_blks_set(sb, gdp, 0); | 81 | ext4_free_group_clusters_set(sb, gdp, 0); |
| 82 | ext4_free_inodes_set(sb, gdp, 0); | 82 | ext4_free_inodes_set(sb, gdp, 0); |
| 83 | ext4_itable_unused_set(sb, gdp, 0); | 83 | ext4_itable_unused_set(sb, gdp, 0); |
| 84 | memset(bh->b_data, 0xff, sb->s_blocksize); | 84 | memset(bh->b_data, 0xff, sb->s_blocksize); |
| @@ -293,121 +293,9 @@ error_return: | |||
| 293 | ext4_std_error(sb, fatal); | 293 | ext4_std_error(sb, fatal); |
| 294 | } | 294 | } |
| 295 | 295 | ||
| 296 | /* | ||
| 297 | * There are two policies for allocating an inode. If the new inode is | ||
| 298 | * a directory, then a forward search is made for a block group with both | ||
| 299 | * free space and a low directory-to-inode ratio; if that fails, then of | ||
| 300 | * the groups with above-average free space, that group with the fewest | ||
| 301 | * directories already is chosen. | ||
| 302 | * | ||
| 303 | * For other inodes, search forward from the parent directory\'s block | ||
| 304 | * group to find a free inode. | ||
| 305 | */ | ||
| 306 | static int find_group_dir(struct super_block *sb, struct inode *parent, | ||
| 307 | ext4_group_t *best_group) | ||
| 308 | { | ||
| 309 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
| 310 | unsigned int freei, avefreei; | ||
| 311 | struct ext4_group_desc *desc, *best_desc = NULL; | ||
| 312 | ext4_group_t group; | ||
| 313 | int ret = -1; | ||
| 314 | |||
| 315 | freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); | ||
| 316 | avefreei = freei / ngroups; | ||
| 317 | |||
| 318 | for (group = 0; group < ngroups; group++) { | ||
| 319 | desc = ext4_get_group_desc(sb, group, NULL); | ||
| 320 | if (!desc || !ext4_free_inodes_count(sb, desc)) | ||
| 321 | continue; | ||
| 322 | if (ext4_free_inodes_count(sb, desc) < avefreei) | ||
| 323 | continue; | ||
| 324 | if (!best_desc || | ||
| 325 | (ext4_free_blks_count(sb, desc) > | ||
| 326 | ext4_free_blks_count(sb, best_desc))) { | ||
| 327 | *best_group = group; | ||
| 328 | best_desc = desc; | ||
| 329 | ret = 0; | ||
| 330 | } | ||
| 331 | } | ||
| 332 | return ret; | ||
| 333 | } | ||
| 334 | |||
| 335 | #define free_block_ratio 10 | ||
| 336 | |||
| 337 | static int find_group_flex(struct super_block *sb, struct inode *parent, | ||
| 338 | ext4_group_t *best_group) | ||
| 339 | { | ||
| 340 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 341 | struct ext4_group_desc *desc; | ||
| 342 | struct flex_groups *flex_group = sbi->s_flex_groups; | ||
| 343 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | ||
| 344 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); | ||
| 345 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
| 346 | int flex_size = ext4_flex_bg_size(sbi); | ||
| 347 | ext4_group_t best_flex = parent_fbg_group; | ||
| 348 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; | ||
| 349 | int flexbg_free_blocks; | ||
| 350 | int flex_freeb_ratio; | ||
| 351 | ext4_group_t n_fbg_groups; | ||
| 352 | ext4_group_t i; | ||
| 353 | |||
| 354 | n_fbg_groups = (ngroups + flex_size - 1) >> | ||
| 355 | sbi->s_log_groups_per_flex; | ||
| 356 | |||
| 357 | find_close_to_parent: | ||
| 358 | flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); | ||
| 359 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
| 360 | if (atomic_read(&flex_group[best_flex].free_inodes) && | ||
| 361 | flex_freeb_ratio > free_block_ratio) | ||
| 362 | goto found_flexbg; | ||
| 363 | |||
| 364 | if (best_flex && best_flex == parent_fbg_group) { | ||
| 365 | best_flex--; | ||
| 366 | goto find_close_to_parent; | ||
| 367 | } | ||
| 368 | |||
| 369 | for (i = 0; i < n_fbg_groups; i++) { | ||
| 370 | if (i == parent_fbg_group || i == parent_fbg_group - 1) | ||
| 371 | continue; | ||
| 372 | |||
| 373 | flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); | ||
| 374 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
| 375 | |||
| 376 | if (flex_freeb_ratio > free_block_ratio && | ||
| 377 | (atomic_read(&flex_group[i].free_inodes))) { | ||
| 378 | best_flex = i; | ||
| 379 | goto found_flexbg; | ||
| 380 | } | ||
| 381 | |||
| 382 | if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || | ||
| 383 | ((atomic_read(&flex_group[i].free_blocks) > | ||
| 384 | atomic_read(&flex_group[best_flex].free_blocks)) && | ||
| 385 | atomic_read(&flex_group[i].free_inodes))) | ||
| 386 | best_flex = i; | ||
| 387 | } | ||
| 388 | |||
| 389 | if (!atomic_read(&flex_group[best_flex].free_inodes) || | ||
| 390 | !atomic_read(&flex_group[best_flex].free_blocks)) | ||
| 391 | return -1; | ||
| 392 | |||
| 393 | found_flexbg: | ||
| 394 | for (i = best_flex * flex_size; i < ngroups && | ||
| 395 | i < (best_flex + 1) * flex_size; i++) { | ||
| 396 | desc = ext4_get_group_desc(sb, i, NULL); | ||
| 397 | if (ext4_free_inodes_count(sb, desc)) { | ||
| 398 | *best_group = i; | ||
| 399 | goto out; | ||
| 400 | } | ||
| 401 | } | ||
| 402 | |||
| 403 | return -1; | ||
| 404 | out: | ||
| 405 | return 0; | ||
| 406 | } | ||
| 407 | |||
| 408 | struct orlov_stats { | 296 | struct orlov_stats { |
| 409 | __u32 free_inodes; | 297 | __u32 free_inodes; |
| 410 | __u32 free_blocks; | 298 | __u32 free_clusters; |
| 411 | __u32 used_dirs; | 299 | __u32 used_dirs; |
| 412 | }; | 300 | }; |
| 413 | 301 | ||
| @@ -424,7 +312,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, | |||
| 424 | 312 | ||
| 425 | if (flex_size > 1) { | 313 | if (flex_size > 1) { |
| 426 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); | 314 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); |
| 427 | stats->free_blocks = atomic_read(&flex_group[g].free_blocks); | 315 | stats->free_clusters = atomic_read(&flex_group[g].free_clusters); |
| 428 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); | 316 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); |
| 429 | return; | 317 | return; |
| 430 | } | 318 | } |
| @@ -432,11 +320,11 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, | |||
| 432 | desc = ext4_get_group_desc(sb, g, NULL); | 320 | desc = ext4_get_group_desc(sb, g, NULL); |
| 433 | if (desc) { | 321 | if (desc) { |
| 434 | stats->free_inodes = ext4_free_inodes_count(sb, desc); | 322 | stats->free_inodes = ext4_free_inodes_count(sb, desc); |
| 435 | stats->free_blocks = ext4_free_blks_count(sb, desc); | 323 | stats->free_clusters = ext4_free_group_clusters(sb, desc); |
| 436 | stats->used_dirs = ext4_used_dirs_count(sb, desc); | 324 | stats->used_dirs = ext4_used_dirs_count(sb, desc); |
| 437 | } else { | 325 | } else { |
| 438 | stats->free_inodes = 0; | 326 | stats->free_inodes = 0; |
| 439 | stats->free_blocks = 0; | 327 | stats->free_clusters = 0; |
| 440 | stats->used_dirs = 0; | 328 | stats->used_dirs = 0; |
| 441 | } | 329 | } |
| 442 | } | 330 | } |
| @@ -471,10 +359,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
| 471 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); | 359 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); |
| 472 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 360 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
| 473 | unsigned int freei, avefreei; | 361 | unsigned int freei, avefreei; |
| 474 | ext4_fsblk_t freeb, avefreeb; | 362 | ext4_fsblk_t freeb, avefreec; |
| 475 | unsigned int ndirs; | 363 | unsigned int ndirs; |
| 476 | int max_dirs, min_inodes; | 364 | int max_dirs, min_inodes; |
| 477 | ext4_grpblk_t min_blocks; | 365 | ext4_grpblk_t min_clusters; |
| 478 | ext4_group_t i, grp, g, ngroups; | 366 | ext4_group_t i, grp, g, ngroups; |
| 479 | struct ext4_group_desc *desc; | 367 | struct ext4_group_desc *desc; |
| 480 | struct orlov_stats stats; | 368 | struct orlov_stats stats; |
| @@ -490,9 +378,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
| 490 | 378 | ||
| 491 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); | 379 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); |
| 492 | avefreei = freei / ngroups; | 380 | avefreei = freei / ngroups; |
| 493 | freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 381 | freeb = EXT4_C2B(sbi, |
| 494 | avefreeb = freeb; | 382 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
| 495 | do_div(avefreeb, ngroups); | 383 | avefreec = freeb; |
| 384 | do_div(avefreec, ngroups); | ||
| 496 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); | 385 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); |
| 497 | 386 | ||
| 498 | if (S_ISDIR(mode) && | 387 | if (S_ISDIR(mode) && |
| @@ -518,7 +407,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
| 518 | continue; | 407 | continue; |
| 519 | if (stats.free_inodes < avefreei) | 408 | if (stats.free_inodes < avefreei) |
| 520 | continue; | 409 | continue; |
| 521 | if (stats.free_blocks < avefreeb) | 410 | if (stats.free_clusters < avefreec) |
| 522 | continue; | 411 | continue; |
| 523 | grp = g; | 412 | grp = g; |
| 524 | ret = 0; | 413 | ret = 0; |
| @@ -556,7 +445,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
| 556 | min_inodes = avefreei - inodes_per_group*flex_size / 4; | 445 | min_inodes = avefreei - inodes_per_group*flex_size / 4; |
| 557 | if (min_inodes < 1) | 446 | if (min_inodes < 1) |
| 558 | min_inodes = 1; | 447 | min_inodes = 1; |
| 559 | min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; | 448 | min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4; |
| 560 | 449 | ||
| 561 | /* | 450 | /* |
| 562 | * Start looking in the flex group where we last allocated an | 451 | * Start looking in the flex group where we last allocated an |
| @@ -575,7 +464,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
| 575 | continue; | 464 | continue; |
| 576 | if (stats.free_inodes < min_inodes) | 465 | if (stats.free_inodes < min_inodes) |
| 577 | continue; | 466 | continue; |
| 578 | if (stats.free_blocks < min_blocks) | 467 | if (stats.free_clusters < min_clusters) |
| 579 | continue; | 468 | continue; |
| 580 | goto found_flex_bg; | 469 | goto found_flex_bg; |
| 581 | } | 470 | } |
| @@ -659,7 +548,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
| 659 | *group = parent_group; | 548 | *group = parent_group; |
| 660 | desc = ext4_get_group_desc(sb, *group, NULL); | 549 | desc = ext4_get_group_desc(sb, *group, NULL); |
| 661 | if (desc && ext4_free_inodes_count(sb, desc) && | 550 | if (desc && ext4_free_inodes_count(sb, desc) && |
| 662 | ext4_free_blks_count(sb, desc)) | 551 | ext4_free_group_clusters(sb, desc)) |
| 663 | return 0; | 552 | return 0; |
| 664 | 553 | ||
| 665 | /* | 554 | /* |
| @@ -683,7 +572,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
| 683 | *group -= ngroups; | 572 | *group -= ngroups; |
| 684 | desc = ext4_get_group_desc(sb, *group, NULL); | 573 | desc = ext4_get_group_desc(sb, *group, NULL); |
| 685 | if (desc && ext4_free_inodes_count(sb, desc) && | 574 | if (desc && ext4_free_inodes_count(sb, desc) && |
| 686 | ext4_free_blks_count(sb, desc)) | 575 | ext4_free_group_clusters(sb, desc)) |
| 687 | return 0; | 576 | return 0; |
| 688 | } | 577 | } |
| 689 | 578 | ||
| @@ -802,7 +691,7 @@ err_ret: | |||
| 802 | * group to find a free inode. | 691 | * group to find a free inode. |
| 803 | */ | 692 | */ |
| 804 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | 693 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, |
| 805 | const struct qstr *qstr, __u32 goal) | 694 | const struct qstr *qstr, __u32 goal, uid_t *owner) |
| 806 | { | 695 | { |
| 807 | struct super_block *sb; | 696 | struct super_block *sb; |
| 808 | struct buffer_head *inode_bitmap_bh = NULL; | 697 | struct buffer_head *inode_bitmap_bh = NULL; |
| @@ -816,8 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | |||
| 816 | int ret2, err = 0; | 705 | int ret2, err = 0; |
| 817 | struct inode *ret; | 706 | struct inode *ret; |
| 818 | ext4_group_t i; | 707 | ext4_group_t i; |
| 819 | int free = 0; | ||
| 820 | static int once = 1; | ||
| 821 | ext4_group_t flex_group; | 708 | ext4_group_t flex_group; |
| 822 | 709 | ||
| 823 | /* Cannot create files in a deleted directory */ | 710 | /* Cannot create files in a deleted directory */ |
| @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | |||
| 843 | goto got_group; | 730 | goto got_group; |
| 844 | } | 731 | } |
| 845 | 732 | ||
| 846 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { | 733 | if (S_ISDIR(mode)) |
| 847 | ret2 = find_group_flex(sb, dir, &group); | 734 | ret2 = find_group_orlov(sb, dir, &group, mode, qstr); |
| 848 | if (ret2 == -1) { | 735 | else |
| 849 | ret2 = find_group_other(sb, dir, &group, mode); | ||
| 850 | if (ret2 == 0 && once) { | ||
| 851 | once = 0; | ||
| 852 | printk(KERN_NOTICE "ext4: find_group_flex " | ||
| 853 | "failed, fallback succeeded dir %lu\n", | ||
| 854 | dir->i_ino); | ||
| 855 | } | ||
| 856 | } | ||
| 857 | goto got_group; | ||
| 858 | } | ||
| 859 | |||
| 860 | if (S_ISDIR(mode)) { | ||
| 861 | if (test_opt(sb, OLDALLOC)) | ||
| 862 | ret2 = find_group_dir(sb, dir, &group); | ||
| 863 | else | ||
| 864 | ret2 = find_group_orlov(sb, dir, &group, mode, qstr); | ||
| 865 | } else | ||
| 866 | ret2 = find_group_other(sb, dir, &group, mode); | 736 | ret2 = find_group_other(sb, dir, &group, mode); |
| 867 | 737 | ||
| 868 | got_group: | 738 | got_group: |
| @@ -950,26 +820,21 @@ got: | |||
| 950 | goto fail; | 820 | goto fail; |
| 951 | } | 821 | } |
| 952 | 822 | ||
| 953 | free = 0; | 823 | BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); |
| 954 | ext4_lock_group(sb, group); | 824 | err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); |
| 825 | brelse(block_bitmap_bh); | ||
| 826 | |||
| 955 | /* recheck and clear flag under lock if we still need to */ | 827 | /* recheck and clear flag under lock if we still need to */ |
| 828 | ext4_lock_group(sb, group); | ||
| 956 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 829 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
| 957 | free = ext4_free_blocks_after_init(sb, group, gdp); | ||
| 958 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 830 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
| 959 | ext4_free_blks_set(sb, gdp, free); | 831 | ext4_free_group_clusters_set(sb, gdp, |
| 832 | ext4_free_clusters_after_init(sb, group, gdp)); | ||
| 960 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, | 833 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, |
| 961 | gdp); | 834 | gdp); |
| 962 | } | 835 | } |
| 963 | ext4_unlock_group(sb, group); | 836 | ext4_unlock_group(sb, group); |
| 964 | 837 | ||
| 965 | /* Don't need to dirty bitmap block if we didn't change it */ | ||
| 966 | if (free) { | ||
| 967 | BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); | ||
| 968 | err = ext4_handle_dirty_metadata(handle, | ||
| 969 | NULL, block_bitmap_bh); | ||
| 970 | } | ||
| 971 | |||
| 972 | brelse(block_bitmap_bh); | ||
| 973 | if (err) | 838 | if (err) |
| 974 | goto fail; | 839 | goto fail; |
| 975 | } | 840 | } |
| @@ -987,8 +852,11 @@ got: | |||
| 987 | flex_group = ext4_flex_group(sbi, group); | 852 | flex_group = ext4_flex_group(sbi, group); |
| 988 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); | 853 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); |
| 989 | } | 854 | } |
| 990 | 855 | if (owner) { | |
| 991 | if (test_opt(sb, GRPID)) { | 856 | inode->i_mode = mode; |
| 857 | inode->i_uid = owner[0]; | ||
| 858 | inode->i_gid = owner[1]; | ||
| 859 | } else if (test_opt(sb, GRPID)) { | ||
| 992 | inode->i_mode = mode; | 860 | inode->i_mode = mode; |
| 993 | inode->i_uid = current_fsuid(); | 861 | inode->i_uid = current_fsuid(); |
| 994 | inode->i_gid = dir->i_gid; | 862 | inode->i_gid = dir->i_gid; |
| @@ -1005,11 +873,7 @@ got: | |||
| 1005 | ei->i_dir_start_lookup = 0; | 873 | ei->i_dir_start_lookup = 0; |
| 1006 | ei->i_disksize = 0; | 874 | ei->i_disksize = 0; |
| 1007 | 875 | ||
| 1008 | /* | 876 | /* Don't inherit extent flag from directory, amongst others. */ |
| 1009 | * Don't inherit extent flag from directory, amongst others. We set | ||
| 1010 | * extent flag on newly created directory and file only if -o extent | ||
| 1011 | * mount option is specified | ||
| 1012 | */ | ||
| 1013 | ei->i_flags = | 877 | ei->i_flags = |
| 1014 | ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); | 878 | ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); |
| 1015 | ei->i_file_acl = 0; | 879 | ei->i_file_acl = 0; |
| @@ -1235,7 +1099,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
| 1235 | * inode allocation from the current group, so we take alloc_sem lock, to | 1099 | * inode allocation from the current group, so we take alloc_sem lock, to |
| 1236 | * block ext4_claim_inode until we are finished. | 1100 | * block ext4_claim_inode until we are finished. |
| 1237 | */ | 1101 | */ |
| 1238 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | 1102 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, |
| 1239 | int barrier) | 1103 | int barrier) |
| 1240 | { | 1104 | { |
| 1241 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 1105 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 0962642119c0..3cfc73fbca8e 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
| @@ -699,6 +699,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
| 699 | /* | 699 | /* |
| 700 | * Okay, we need to do block allocation. | 700 | * Okay, we need to do block allocation. |
| 701 | */ | 701 | */ |
| 702 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
| 703 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
| 704 | EXT4_ERROR_INODE(inode, "Can't allocate blocks for " | ||
| 705 | "non-extent mapped inodes with bigalloc"); | ||
| 706 | return -ENOSPC; | ||
| 707 | } | ||
| 708 | |||
| 702 | goal = ext4_find_goal(inode, map->m_lblk, partial); | 709 | goal = ext4_find_goal(inode, map->m_lblk, partial); |
| 703 | 710 | ||
| 704 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 711 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
| @@ -1343,7 +1350,9 @@ void ext4_ind_truncate(struct inode *inode) | |||
| 1343 | __le32 nr = 0; | 1350 | __le32 nr = 0; |
| 1344 | int n = 0; | 1351 | int n = 0; |
| 1345 | ext4_lblk_t last_block, max_block; | 1352 | ext4_lblk_t last_block, max_block; |
| 1353 | loff_t page_len; | ||
| 1346 | unsigned blocksize = inode->i_sb->s_blocksize; | 1354 | unsigned blocksize = inode->i_sb->s_blocksize; |
| 1355 | int err; | ||
| 1347 | 1356 | ||
| 1348 | handle = start_transaction(inode); | 1357 | handle = start_transaction(inode); |
| 1349 | if (IS_ERR(handle)) | 1358 | if (IS_ERR(handle)) |
| @@ -1354,9 +1363,16 @@ void ext4_ind_truncate(struct inode *inode) | |||
| 1354 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | 1363 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) |
| 1355 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 1364 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
| 1356 | 1365 | ||
| 1357 | if (inode->i_size & (blocksize - 1)) | 1366 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { |
| 1358 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | 1367 | page_len = PAGE_CACHE_SIZE - |
| 1368 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
| 1369 | |||
| 1370 | err = ext4_discard_partial_page_buffers(handle, | ||
| 1371 | mapping, inode->i_size, page_len, 0); | ||
| 1372 | |||
| 1373 | if (err) | ||
| 1359 | goto out_stop; | 1374 | goto out_stop; |
| 1375 | } | ||
| 1360 | 1376 | ||
| 1361 | if (last_block != max_block) { | 1377 | if (last_block != max_block) { |
| 1362 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 1378 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0defe0bfe019..f2419a15b81a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -42,7 +42,6 @@ | |||
| 42 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
| 43 | #include "xattr.h" | 43 | #include "xattr.h" |
| 44 | #include "acl.h" | 44 | #include "acl.h" |
| 45 | #include "ext4_extents.h" | ||
| 46 | #include "truncate.h" | 45 | #include "truncate.h" |
| 47 | 46 | ||
| 48 | #include <trace/events/ext4.h> | 47 | #include <trace/events/ext4.h> |
| @@ -268,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
| 268 | struct ext4_inode_info *ei = EXT4_I(inode); | 267 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 269 | 268 | ||
| 270 | spin_lock(&ei->i_block_reservation_lock); | 269 | spin_lock(&ei->i_block_reservation_lock); |
| 271 | trace_ext4_da_update_reserve_space(inode, used); | 270 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); |
| 272 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 271 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
| 273 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 272 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
| 274 | "with only %d reserved data blocks\n", | 273 | "with only %d reserved data blocks\n", |
| @@ -281,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
| 281 | /* Update per-inode reservations */ | 280 | /* Update per-inode reservations */ |
| 282 | ei->i_reserved_data_blocks -= used; | 281 | ei->i_reserved_data_blocks -= used; |
| 283 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | 282 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
| 284 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 283 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
| 285 | used + ei->i_allocated_meta_blocks); | 284 | used + ei->i_allocated_meta_blocks); |
| 286 | ei->i_allocated_meta_blocks = 0; | 285 | ei->i_allocated_meta_blocks = 0; |
| 287 | 286 | ||
| @@ -291,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
| 291 | * only when we have written all of the delayed | 290 | * only when we have written all of the delayed |
| 292 | * allocation blocks. | 291 | * allocation blocks. |
| 293 | */ | 292 | */ |
| 294 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 293 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
| 295 | ei->i_reserved_meta_blocks); | 294 | ei->i_reserved_meta_blocks); |
| 296 | ei->i_reserved_meta_blocks = 0; | 295 | ei->i_reserved_meta_blocks = 0; |
| 297 | ei->i_da_metadata_calc_len = 0; | 296 | ei->i_da_metadata_calc_len = 0; |
| @@ -300,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
| 300 | 299 | ||
| 301 | /* Update quota subsystem for data blocks */ | 300 | /* Update quota subsystem for data blocks */ |
| 302 | if (quota_claim) | 301 | if (quota_claim) |
| 303 | dquot_claim_block(inode, used); | 302 | dquot_claim_block(inode, EXT4_C2B(sbi, used)); |
| 304 | else { | 303 | else { |
| 305 | /* | 304 | /* |
| 306 | * We did fallocate with an offset that is already delayed | 305 | * We did fallocate with an offset that is already delayed |
| 307 | * allocated. So on delayed allocated writeback we should | 306 | * allocated. So on delayed allocated writeback we should |
| 308 | * not re-claim the quota for fallocated blocks. | 307 | * not re-claim the quota for fallocated blocks. |
| 309 | */ | 308 | */ |
| 310 | dquot_release_reservation_block(inode, used); | 309 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); |
| 311 | } | 310 | } |
| 312 | 311 | ||
| 313 | /* | 312 | /* |
| @@ -399,6 +398,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
| 399 | } | 398 | } |
| 400 | 399 | ||
| 401 | /* | 400 | /* |
| 401 | * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map. | ||
| 402 | */ | ||
| 403 | static void set_buffers_da_mapped(struct inode *inode, | ||
| 404 | struct ext4_map_blocks *map) | ||
| 405 | { | ||
| 406 | struct address_space *mapping = inode->i_mapping; | ||
| 407 | struct pagevec pvec; | ||
| 408 | int i, nr_pages; | ||
| 409 | pgoff_t index, end; | ||
| 410 | |||
| 411 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 412 | end = (map->m_lblk + map->m_len - 1) >> | ||
| 413 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 414 | |||
| 415 | pagevec_init(&pvec, 0); | ||
| 416 | while (index <= end) { | ||
| 417 | nr_pages = pagevec_lookup(&pvec, mapping, index, | ||
| 418 | min(end - index + 1, | ||
| 419 | (pgoff_t)PAGEVEC_SIZE)); | ||
| 420 | if (nr_pages == 0) | ||
| 421 | break; | ||
| 422 | for (i = 0; i < nr_pages; i++) { | ||
| 423 | struct page *page = pvec.pages[i]; | ||
| 424 | struct buffer_head *bh, *head; | ||
| 425 | |||
| 426 | if (unlikely(page->mapping != mapping) || | ||
| 427 | !PageDirty(page)) | ||
| 428 | break; | ||
| 429 | |||
| 430 | if (page_has_buffers(page)) { | ||
| 431 | bh = head = page_buffers(page); | ||
| 432 | do { | ||
| 433 | set_buffer_da_mapped(bh); | ||
| 434 | bh = bh->b_this_page; | ||
| 435 | } while (bh != head); | ||
| 436 | } | ||
| 437 | index++; | ||
| 438 | } | ||
| 439 | pagevec_release(&pvec); | ||
| 440 | } | ||
| 441 | } | ||
| 442 | |||
| 443 | /* | ||
| 402 | * The ext4_map_blocks() function tries to look up the requested blocks, | 444 | * The ext4_map_blocks() function tries to look up the requested blocks, |
| 403 | * and returns if the blocks are already mapped. | 445 | * and returns if the blocks are already mapped. |
| 404 | * | 446 | * |
| @@ -416,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
| 416 | * the buffer head is mapped. | 458 | * the buffer head is mapped. |
| 417 | * | 459 | * |
| 418 | * It returns 0 if plain look up failed (blocks have not been allocated), in | 460 | * It returns 0 if plain look up failed (blocks have not been allocated), in |
| 419 | * that casem, buffer head is unmapped | 461 | * that case, buffer head is unmapped |
| 420 | * | 462 | * |
| 421 | * It returns the error in case of allocation failure. | 463 | * It returns the error in case of allocation failure. |
| 422 | */ | 464 | */ |
| @@ -435,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 435 | */ | 477 | */ |
| 436 | down_read((&EXT4_I(inode)->i_data_sem)); | 478 | down_read((&EXT4_I(inode)->i_data_sem)); |
| 437 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 479 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
| 438 | retval = ext4_ext_map_blocks(handle, inode, map, 0); | 480 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
| 481 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
| 439 | } else { | 482 | } else { |
| 440 | retval = ext4_ind_map_blocks(handle, inode, map, 0); | 483 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
| 484 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
| 441 | } | 485 | } |
| 442 | up_read((&EXT4_I(inode)->i_data_sem)); | 486 | up_read((&EXT4_I(inode)->i_data_sem)); |
| 443 | 487 | ||
| @@ -455,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 455 | * Returns if the blocks have already allocated | 499 | * Returns if the blocks have already allocated |
| 456 | * | 500 | * |
| 457 | * Note that if blocks have been preallocated | 501 | * Note that if blocks have been preallocated |
| 458 | * ext4_ext_get_block() returns th create = 0 | 502 | * ext4_ext_get_block() returns the create = 0 |
| 459 | * with buffer head unmapped. | 503 | * with buffer head unmapped. |
| 460 | */ | 504 | */ |
| 461 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 505 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
| @@ -517,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 517 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 561 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
| 518 | ext4_da_update_reserve_space(inode, retval, 1); | 562 | ext4_da_update_reserve_space(inode, retval, 1); |
| 519 | } | 563 | } |
| 520 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 564 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
| 521 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 565 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
| 522 | 566 | ||
| 567 | /* If we have successfully mapped the delayed allocated blocks, | ||
| 568 | * set the BH_Da_Mapped bit on them. Its important to do this | ||
| 569 | * under the protection of i_data_sem. | ||
| 570 | */ | ||
| 571 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | ||
| 572 | set_buffers_da_mapped(inode, map); | ||
| 573 | } | ||
| 574 | |||
| 523 | up_write((&EXT4_I(inode)->i_data_sem)); | 575 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 524 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 576 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
| 525 | int ret = check_block_validity(inode, map); | 577 | int ret = check_block_validity(inode, map); |
| @@ -909,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 909 | ext4_orphan_add(handle, inode); | 961 | ext4_orphan_add(handle, inode); |
| 910 | if (ret2 < 0) | 962 | if (ret2 < 0) |
| 911 | ret = ret2; | 963 | ret = ret2; |
| 964 | } else { | ||
| 965 | unlock_page(page); | ||
| 966 | page_cache_release(page); | ||
| 912 | } | 967 | } |
| 968 | |||
| 913 | ret2 = ext4_journal_stop(handle); | 969 | ret2 = ext4_journal_stop(handle); |
| 914 | if (!ret) | 970 | if (!ret) |
| 915 | ret = ret2; | 971 | ret = ret2; |
| @@ -1037,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1037 | } | 1093 | } |
| 1038 | 1094 | ||
| 1039 | /* | 1095 | /* |
| 1040 | * Reserve a single block located at lblock | 1096 | * Reserve a single cluster located at lblock |
| 1041 | */ | 1097 | */ |
| 1042 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1098 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
| 1043 | { | 1099 | { |
| 1044 | int retries = 0; | 1100 | int retries = 0; |
| 1045 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1101 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 1046 | struct ext4_inode_info *ei = EXT4_I(inode); | 1102 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 1047 | unsigned long md_needed; | 1103 | unsigned int md_needed; |
| 1048 | int ret; | 1104 | int ret; |
| 1049 | 1105 | ||
| 1050 | /* | 1106 | /* |
| @@ -1054,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |||
| 1054 | */ | 1110 | */ |
| 1055 | repeat: | 1111 | repeat: |
| 1056 | spin_lock(&ei->i_block_reservation_lock); | 1112 | spin_lock(&ei->i_block_reservation_lock); |
| 1057 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1113 | md_needed = EXT4_NUM_B2C(sbi, |
| 1114 | ext4_calc_metadata_amount(inode, lblock)); | ||
| 1058 | trace_ext4_da_reserve_space(inode, md_needed); | 1115 | trace_ext4_da_reserve_space(inode, md_needed); |
| 1059 | spin_unlock(&ei->i_block_reservation_lock); | 1116 | spin_unlock(&ei->i_block_reservation_lock); |
| 1060 | 1117 | ||
| @@ -1063,15 +1120,15 @@ repeat: | |||
| 1063 | * us from metadata over-estimation, though we may go over by | 1120 | * us from metadata over-estimation, though we may go over by |
| 1064 | * a small amount in the end. Here we just reserve for data. | 1121 | * a small amount in the end. Here we just reserve for data. |
| 1065 | */ | 1122 | */ |
| 1066 | ret = dquot_reserve_block(inode, 1); | 1123 | ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); |
| 1067 | if (ret) | 1124 | if (ret) |
| 1068 | return ret; | 1125 | return ret; |
| 1069 | /* | 1126 | /* |
| 1070 | * We do still charge estimated metadata to the sb though; | 1127 | * We do still charge estimated metadata to the sb though; |
| 1071 | * we cannot afford to run out of free blocks. | 1128 | * we cannot afford to run out of free blocks. |
| 1072 | */ | 1129 | */ |
| 1073 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { | 1130 | if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { |
| 1074 | dquot_release_reservation_block(inode, 1); | 1131 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
| 1075 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1132 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
| 1076 | yield(); | 1133 | yield(); |
| 1077 | goto repeat; | 1134 | goto repeat; |
| @@ -1118,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
| 1118 | * We can release all of the reserved metadata blocks | 1175 | * We can release all of the reserved metadata blocks |
| 1119 | * only when we have written all of the delayed | 1176 | * only when we have written all of the delayed |
| 1120 | * allocation blocks. | 1177 | * allocation blocks. |
| 1178 | * Note that in case of bigalloc, i_reserved_meta_blocks, | ||
| 1179 | * i_reserved_data_blocks, etc. refer to number of clusters. | ||
| 1121 | */ | 1180 | */ |
| 1122 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 1181 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
| 1123 | ei->i_reserved_meta_blocks); | 1182 | ei->i_reserved_meta_blocks); |
| 1124 | ei->i_reserved_meta_blocks = 0; | 1183 | ei->i_reserved_meta_blocks = 0; |
| 1125 | ei->i_da_metadata_calc_len = 0; | 1184 | ei->i_da_metadata_calc_len = 0; |
| 1126 | } | 1185 | } |
| 1127 | 1186 | ||
| 1128 | /* update fs dirty data blocks counter */ | 1187 | /* update fs dirty data blocks counter */ |
| 1129 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); | 1188 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); |
| 1130 | 1189 | ||
| 1131 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1190 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1132 | 1191 | ||
| 1133 | dquot_release_reservation_block(inode, to_free); | 1192 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); |
| 1134 | } | 1193 | } |
| 1135 | 1194 | ||
| 1136 | static void ext4_da_page_release_reservation(struct page *page, | 1195 | static void ext4_da_page_release_reservation(struct page *page, |
| @@ -1139,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
| 1139 | int to_release = 0; | 1198 | int to_release = 0; |
| 1140 | struct buffer_head *head, *bh; | 1199 | struct buffer_head *head, *bh; |
| 1141 | unsigned int curr_off = 0; | 1200 | unsigned int curr_off = 0; |
| 1201 | struct inode *inode = page->mapping->host; | ||
| 1202 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1203 | int num_clusters; | ||
| 1142 | 1204 | ||
| 1143 | head = page_buffers(page); | 1205 | head = page_buffers(page); |
| 1144 | bh = head; | 1206 | bh = head; |
| @@ -1148,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
| 1148 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1210 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
| 1149 | to_release++; | 1211 | to_release++; |
| 1150 | clear_buffer_delay(bh); | 1212 | clear_buffer_delay(bh); |
| 1213 | clear_buffer_da_mapped(bh); | ||
| 1151 | } | 1214 | } |
| 1152 | curr_off = next_off; | 1215 | curr_off = next_off; |
| 1153 | } while ((bh = bh->b_this_page) != head); | 1216 | } while ((bh = bh->b_this_page) != head); |
| 1154 | ext4_da_release_space(page->mapping->host, to_release); | 1217 | |
| 1218 | /* If we have released all the blocks belonging to a cluster, then we | ||
| 1219 | * need to release the reserved space for that cluster. */ | ||
| 1220 | num_clusters = EXT4_NUM_B2C(sbi, to_release); | ||
| 1221 | while (num_clusters > 0) { | ||
| 1222 | ext4_fsblk_t lblk; | ||
| 1223 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + | ||
| 1224 | ((num_clusters - 1) << sbi->s_cluster_bits); | ||
| 1225 | if (sbi->s_cluster_ratio == 1 || | ||
| 1226 | !ext4_find_delalloc_cluster(inode, lblk, 1)) | ||
| 1227 | ext4_da_release_space(inode, 1); | ||
| 1228 | |||
| 1229 | num_clusters--; | ||
| 1230 | } | ||
| 1155 | } | 1231 | } |
| 1156 | 1232 | ||
| 1157 | /* | 1233 | /* |
| @@ -1253,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
| 1253 | clear_buffer_delay(bh); | 1329 | clear_buffer_delay(bh); |
| 1254 | bh->b_blocknr = pblock; | 1330 | bh->b_blocknr = pblock; |
| 1255 | } | 1331 | } |
| 1332 | if (buffer_da_mapped(bh)) | ||
| 1333 | clear_buffer_da_mapped(bh); | ||
| 1256 | if (buffer_unwritten(bh) || | 1334 | if (buffer_unwritten(bh) || |
| 1257 | buffer_mapped(bh)) | 1335 | buffer_mapped(bh)) |
| 1258 | BUG_ON(bh->b_blocknr != pblock); | 1336 | BUG_ON(bh->b_blocknr != pblock); |
| @@ -1346,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
| 1346 | { | 1424 | { |
| 1347 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1425 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 1348 | printk(KERN_CRIT "Total free blocks count %lld\n", | 1426 | printk(KERN_CRIT "Total free blocks count %lld\n", |
| 1349 | ext4_count_free_blocks(inode->i_sb)); | 1427 | EXT4_C2B(EXT4_SB(inode->i_sb), |
| 1428 | ext4_count_free_clusters(inode->i_sb))); | ||
| 1350 | printk(KERN_CRIT "Free/Dirty block details\n"); | 1429 | printk(KERN_CRIT "Free/Dirty block details\n"); |
| 1351 | printk(KERN_CRIT "free_blocks=%lld\n", | 1430 | printk(KERN_CRIT "free_blocks=%lld\n", |
| 1352 | (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); | 1431 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
| 1432 | percpu_counter_sum(&sbi->s_freeclusters_counter))); | ||
| 1353 | printk(KERN_CRIT "dirty_blocks=%lld\n", | 1433 | printk(KERN_CRIT "dirty_blocks=%lld\n", |
| 1354 | (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | 1434 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
| 1435 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | ||
| 1355 | printk(KERN_CRIT "Block reservation details\n"); | 1436 | printk(KERN_CRIT "Block reservation details\n"); |
| 1356 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", | 1437 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", |
| 1357 | EXT4_I(inode)->i_reserved_data_blocks); | 1438 | EXT4_I(inode)->i_reserved_data_blocks); |
| @@ -1430,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
| 1430 | if (err == -EAGAIN) | 1511 | if (err == -EAGAIN) |
| 1431 | goto submit_io; | 1512 | goto submit_io; |
| 1432 | 1513 | ||
| 1433 | if (err == -ENOSPC && | 1514 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { |
| 1434 | ext4_count_free_blocks(sb)) { | ||
| 1435 | mpd->retval = err; | 1515 | mpd->retval = err; |
| 1436 | goto submit_io; | 1516 | goto submit_io; |
| 1437 | } | 1517 | } |
| @@ -1471,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
| 1471 | 1551 | ||
| 1472 | for (i = 0; i < map.m_len; i++) | 1552 | for (i = 0; i < map.m_len; i++) |
| 1473 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 1553 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
| 1474 | } | ||
| 1475 | 1554 | ||
| 1476 | if (ext4_should_order_data(mpd->inode)) { | 1555 | if (ext4_should_order_data(mpd->inode)) { |
| 1477 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 1556 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
| 1478 | if (err) | 1557 | if (err) { |
| 1479 | /* This only happens if the journal is aborted */ | 1558 | /* Only if the journal is aborted */ |
| 1480 | return; | 1559 | mpd->retval = err; |
| 1560 | goto submit_io; | ||
| 1561 | } | ||
| 1562 | } | ||
| 1481 | } | 1563 | } |
| 1482 | 1564 | ||
| 1483 | /* | 1565 | /* |
| @@ -1584,6 +1666,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
| 1584 | } | 1666 | } |
| 1585 | 1667 | ||
| 1586 | /* | 1668 | /* |
| 1669 | * This function is grabs code from the very beginning of | ||
| 1670 | * ext4_map_blocks, but assumes that the caller is from delayed write | ||
| 1671 | * time. This function looks up the requested blocks and sets the | ||
| 1672 | * buffer delay bit under the protection of i_data_sem. | ||
| 1673 | */ | ||
| 1674 | static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | ||
| 1675 | struct ext4_map_blocks *map, | ||
| 1676 | struct buffer_head *bh) | ||
| 1677 | { | ||
| 1678 | int retval; | ||
| 1679 | sector_t invalid_block = ~((sector_t) 0xffff); | ||
| 1680 | |||
| 1681 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | ||
| 1682 | invalid_block = ~0; | ||
| 1683 | |||
| 1684 | map->m_flags = 0; | ||
| 1685 | ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," | ||
| 1686 | "logical block %lu\n", inode->i_ino, map->m_len, | ||
| 1687 | (unsigned long) map->m_lblk); | ||
| 1688 | /* | ||
| 1689 | * Try to see if we can get the block without requesting a new | ||
| 1690 | * file system block. | ||
| 1691 | */ | ||
| 1692 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
| 1693 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
| 1694 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); | ||
| 1695 | else | ||
| 1696 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); | ||
| 1697 | |||
| 1698 | if (retval == 0) { | ||
| 1699 | /* | ||
| 1700 | * XXX: __block_prepare_write() unmaps passed block, | ||
| 1701 | * is it OK? | ||
| 1702 | */ | ||
| 1703 | /* If the block was allocated from previously allocated cluster, | ||
| 1704 | * then we dont need to reserve it again. */ | ||
| 1705 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | ||
| 1706 | retval = ext4_da_reserve_space(inode, iblock); | ||
| 1707 | if (retval) | ||
| 1708 | /* not enough space to reserve */ | ||
| 1709 | goto out_unlock; | ||
| 1710 | } | ||
| 1711 | |||
| 1712 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served | ||
| 1713 | * and it should not appear on the bh->b_state. | ||
| 1714 | */ | ||
| 1715 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
| 1716 | |||
| 1717 | map_bh(bh, inode->i_sb, invalid_block); | ||
| 1718 | set_buffer_new(bh); | ||
| 1719 | set_buffer_delay(bh); | ||
| 1720 | } | ||
| 1721 | |||
| 1722 | out_unlock: | ||
| 1723 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
| 1724 | |||
| 1725 | return retval; | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | /* | ||
| 1587 | * This is a special get_blocks_t callback which is used by | 1729 | * This is a special get_blocks_t callback which is used by |
| 1588 | * ext4_da_write_begin(). It will either return mapped block or | 1730 | * ext4_da_write_begin(). It will either return mapped block or |
| 1589 | * reserve space for a single block. | 1731 | * reserve space for a single block. |
| @@ -1600,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
| 1600 | { | 1742 | { |
| 1601 | struct ext4_map_blocks map; | 1743 | struct ext4_map_blocks map; |
| 1602 | int ret = 0; | 1744 | int ret = 0; |
| 1603 | sector_t invalid_block = ~((sector_t) 0xffff); | ||
| 1604 | |||
| 1605 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | ||
| 1606 | invalid_block = ~0; | ||
| 1607 | 1745 | ||
| 1608 | BUG_ON(create == 0); | 1746 | BUG_ON(create == 0); |
| 1609 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); | 1747 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
| @@ -1616,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
| 1616 | * preallocated blocks are unmapped but should treated | 1754 | * preallocated blocks are unmapped but should treated |
| 1617 | * the same as allocated blocks. | 1755 | * the same as allocated blocks. |
| 1618 | */ | 1756 | */ |
| 1619 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 1757 | ret = ext4_da_map_blocks(inode, iblock, &map, bh); |
| 1620 | if (ret < 0) | 1758 | if (ret <= 0) |
| 1621 | return ret; | 1759 | return ret; |
| 1622 | if (ret == 0) { | ||
| 1623 | if (buffer_delay(bh)) | ||
| 1624 | return 0; /* Not sure this could or should happen */ | ||
| 1625 | /* | ||
| 1626 | * XXX: __block_write_begin() unmaps passed block, is it OK? | ||
| 1627 | */ | ||
| 1628 | ret = ext4_da_reserve_space(inode, iblock); | ||
| 1629 | if (ret) | ||
| 1630 | /* not enough space to reserve */ | ||
| 1631 | return ret; | ||
| 1632 | |||
| 1633 | map_bh(bh, inode->i_sb, invalid_block); | ||
| 1634 | set_buffer_new(bh); | ||
| 1635 | set_buffer_delay(bh); | ||
| 1636 | return 0; | ||
| 1637 | } | ||
| 1638 | 1760 | ||
| 1639 | map_bh(bh, inode->i_sb, map.m_pblk); | 1761 | map_bh(bh, inode->i_sb, map.m_pblk); |
| 1640 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1762 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
| @@ -2050,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2050 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2172 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
| 2051 | pgoff_t done_index = 0; | 2173 | pgoff_t done_index = 0; |
| 2052 | pgoff_t end; | 2174 | pgoff_t end; |
| 2175 | struct blk_plug plug; | ||
| 2053 | 2176 | ||
| 2054 | trace_ext4_da_writepages(inode, wbc); | 2177 | trace_ext4_da_writepages(inode, wbc); |
| 2055 | 2178 | ||
| @@ -2128,6 +2251,7 @@ retry: | |||
| 2128 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2251 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
| 2129 | tag_pages_for_writeback(mapping, index, end); | 2252 | tag_pages_for_writeback(mapping, index, end); |
| 2130 | 2253 | ||
| 2254 | blk_start_plug(&plug); | ||
| 2131 | while (!ret && wbc->nr_to_write > 0) { | 2255 | while (!ret && wbc->nr_to_write > 0) { |
| 2132 | 2256 | ||
| 2133 | /* | 2257 | /* |
| @@ -2178,11 +2302,12 @@ retry: | |||
| 2178 | ret = 0; | 2302 | ret = 0; |
| 2179 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2303 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
| 2180 | /* | 2304 | /* |
| 2181 | * got one extent now try with | 2305 | * Got one extent now try with rest of the pages. |
| 2182 | * rest of the pages | 2306 | * If mpd.retval is set -EIO, journal is aborted. |
| 2307 | * So we don't need to write any more. | ||
| 2183 | */ | 2308 | */ |
| 2184 | pages_written += mpd.pages_written; | 2309 | pages_written += mpd.pages_written; |
| 2185 | ret = 0; | 2310 | ret = mpd.retval; |
| 2186 | io_done = 1; | 2311 | io_done = 1; |
| 2187 | } else if (wbc->nr_to_write) | 2312 | } else if (wbc->nr_to_write) |
| 2188 | /* | 2313 | /* |
| @@ -2192,6 +2317,7 @@ retry: | |||
| 2192 | */ | 2317 | */ |
| 2193 | break; | 2318 | break; |
| 2194 | } | 2319 | } |
| 2320 | blk_finish_plug(&plug); | ||
| 2195 | if (!io_done && !cycled) { | 2321 | if (!io_done && !cycled) { |
| 2196 | cycled = 1; | 2322 | cycled = 1; |
| 2197 | index = 0; | 2323 | index = 0; |
| @@ -2230,10 +2356,11 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
| 2230 | * Delalloc need an accurate free block accounting. So switch | 2356 | * Delalloc need an accurate free block accounting. So switch |
| 2231 | * to non delalloc when we are near to error range. | 2357 | * to non delalloc when we are near to error range. |
| 2232 | */ | 2358 | */ |
| 2233 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 2359 | free_blocks = EXT4_C2B(sbi, |
| 2234 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | 2360 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
| 2361 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | ||
| 2235 | if (2 * free_blocks < 3 * dirty_blocks || | 2362 | if (2 * free_blocks < 3 * dirty_blocks || |
| 2236 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 2363 | free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { |
| 2237 | /* | 2364 | /* |
| 2238 | * free block count is less than 150% of dirty blocks | 2365 | * free block count is less than 150% of dirty blocks |
| 2239 | * or free blocks is less than watermark | 2366 | * or free blocks is less than watermark |
| @@ -2259,6 +2386,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
| 2259 | pgoff_t index; | 2386 | pgoff_t index; |
| 2260 | struct inode *inode = mapping->host; | 2387 | struct inode *inode = mapping->host; |
| 2261 | handle_t *handle; | 2388 | handle_t *handle; |
| 2389 | loff_t page_len; | ||
| 2262 | 2390 | ||
| 2263 | index = pos >> PAGE_CACHE_SHIFT; | 2391 | index = pos >> PAGE_CACHE_SHIFT; |
| 2264 | 2392 | ||
| @@ -2305,6 +2433,13 @@ retry: | |||
| 2305 | */ | 2433 | */ |
| 2306 | if (pos + len > inode->i_size) | 2434 | if (pos + len > inode->i_size) |
| 2307 | ext4_truncate_failed_write(inode); | 2435 | ext4_truncate_failed_write(inode); |
| 2436 | } else { | ||
| 2437 | page_len = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2438 | if (page_len > 0) { | ||
| 2439 | ret = ext4_discard_partial_page_buffers_no_lock(handle, | ||
| 2440 | inode, page, pos - page_len, page_len, | ||
| 2441 | EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED); | ||
| 2442 | } | ||
| 2308 | } | 2443 | } |
| 2309 | 2444 | ||
| 2310 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2445 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
| @@ -2347,6 +2482,7 @@ static int ext4_da_write_end(struct file *file, | |||
| 2347 | loff_t new_i_size; | 2482 | loff_t new_i_size; |
| 2348 | unsigned long start, end; | 2483 | unsigned long start, end; |
| 2349 | int write_mode = (int)(unsigned long)fsdata; | 2484 | int write_mode = (int)(unsigned long)fsdata; |
| 2485 | loff_t page_len; | ||
| 2350 | 2486 | ||
| 2351 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | 2487 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { |
| 2352 | if (ext4_should_order_data(inode)) { | 2488 | if (ext4_should_order_data(inode)) { |
| @@ -2395,6 +2531,16 @@ static int ext4_da_write_end(struct file *file, | |||
| 2395 | } | 2531 | } |
| 2396 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2532 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 2397 | page, fsdata); | 2533 | page, fsdata); |
| 2534 | |||
| 2535 | page_len = PAGE_CACHE_SIZE - | ||
| 2536 | ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1)); | ||
| 2537 | |||
| 2538 | if (page_len > 0) { | ||
| 2539 | ret = ext4_discard_partial_page_buffers_no_lock(handle, | ||
| 2540 | inode, page, pos + copied - 1, page_len, | ||
| 2541 | EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED); | ||
| 2542 | } | ||
| 2543 | |||
| 2398 | copied = ret2; | 2544 | copied = ret2; |
| 2399 | if (ret2 < 0) | 2545 | if (ret2 < 0) |
| 2400 | ret = ret2; | 2546 | ret = ret2; |
| @@ -2689,10 +2835,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
| 2689 | * but being more careful is always safe for the future change. | 2835 | * but being more careful is always safe for the future change. |
| 2690 | */ | 2836 | */ |
| 2691 | inode = io_end->inode; | 2837 | inode = io_end->inode; |
| 2692 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 2838 | ext4_set_io_unwritten_flag(inode, io_end); |
| 2693 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
| 2694 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
| 2695 | } | ||
| 2696 | 2839 | ||
| 2697 | /* Add the io_end to per-inode completed io list*/ | 2840 | /* Add the io_end to per-inode completed io list*/ |
| 2698 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 2841 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
| @@ -2858,6 +3001,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
| 2858 | struct inode *inode = file->f_mapping->host; | 3001 | struct inode *inode = file->f_mapping->host; |
| 2859 | ssize_t ret; | 3002 | ssize_t ret; |
| 2860 | 3003 | ||
| 3004 | /* | ||
| 3005 | * If we are doing data journalling we don't support O_DIRECT | ||
| 3006 | */ | ||
| 3007 | if (ext4_should_journal_data(inode)) | ||
| 3008 | return 0; | ||
| 3009 | |||
| 2861 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | 3010 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); |
| 2862 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3011 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
| 2863 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3012 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
| @@ -2927,6 +3076,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
| 2927 | .bmap = ext4_bmap, | 3076 | .bmap = ext4_bmap, |
| 2928 | .invalidatepage = ext4_invalidatepage, | 3077 | .invalidatepage = ext4_invalidatepage, |
| 2929 | .releasepage = ext4_releasepage, | 3078 | .releasepage = ext4_releasepage, |
| 3079 | .direct_IO = ext4_direct_IO, | ||
| 2930 | .is_partially_uptodate = block_is_partially_uptodate, | 3080 | .is_partially_uptodate = block_is_partially_uptodate, |
| 2931 | .error_remove_page = generic_error_remove_page, | 3081 | .error_remove_page = generic_error_remove_page, |
| 2932 | }; | 3082 | }; |
| @@ -2963,6 +3113,227 @@ void ext4_set_aops(struct inode *inode) | |||
| 2963 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3113 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
| 2964 | } | 3114 | } |
| 2965 | 3115 | ||
| 3116 | |||
| 3117 | /* | ||
| 3118 | * ext4_discard_partial_page_buffers() | ||
| 3119 | * Wrapper function for ext4_discard_partial_page_buffers_no_lock. | ||
| 3120 | * This function finds and locks the page containing the offset | ||
| 3121 | * "from" and passes it to ext4_discard_partial_page_buffers_no_lock. | ||
| 3122 | * Calling functions that already have the page locked should call | ||
| 3123 | * ext4_discard_partial_page_buffers_no_lock directly. | ||
| 3124 | */ | ||
| 3125 | int ext4_discard_partial_page_buffers(handle_t *handle, | ||
| 3126 | struct address_space *mapping, loff_t from, | ||
| 3127 | loff_t length, int flags) | ||
| 3128 | { | ||
| 3129 | struct inode *inode = mapping->host; | ||
| 3130 | struct page *page; | ||
| 3131 | int err = 0; | ||
| 3132 | |||
| 3133 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | ||
| 3134 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
| 3135 | if (!page) | ||
| 3136 | return -ENOMEM; | ||
| 3137 | |||
| 3138 | err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page, | ||
| 3139 | from, length, flags); | ||
| 3140 | |||
| 3141 | unlock_page(page); | ||
| 3142 | page_cache_release(page); | ||
| 3143 | return err; | ||
| 3144 | } | ||
| 3145 | |||
| 3146 | /* | ||
| 3147 | * ext4_discard_partial_page_buffers_no_lock() | ||
| 3148 | * Zeros a page range of length 'length' starting from offset 'from'. | ||
| 3149 | * Buffer heads that correspond to the block aligned regions of the | ||
| 3150 | * zeroed range will be unmapped. Unblock aligned regions | ||
| 3151 | * will have the corresponding buffer head mapped if needed so that | ||
| 3152 | * that region of the page can be updated with the partial zero out. | ||
| 3153 | * | ||
| 3154 | * This function assumes that the page has already been locked. The | ||
| 3155 | * The range to be discarded must be contained with in the given page. | ||
| 3156 | * If the specified range exceeds the end of the page it will be shortened | ||
| 3157 | * to the end of the page that corresponds to 'from'. This function is | ||
| 3158 | * appropriate for updating a page and it buffer heads to be unmapped and | ||
| 3159 | * zeroed for blocks that have been either released, or are going to be | ||
| 3160 | * released. | ||
| 3161 | * | ||
| 3162 | * handle: The journal handle | ||
| 3163 | * inode: The files inode | ||
| 3164 | * page: A locked page that contains the offset "from" | ||
| 3165 | * from: The starting byte offset (from the begining of the file) | ||
| 3166 | * to begin discarding | ||
| 3167 | * len: The length of bytes to discard | ||
| 3168 | * flags: Optional flags that may be used: | ||
| 3169 | * | ||
| 3170 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | ||
| 3171 | * Only zero the regions of the page whose buffer heads | ||
| 3172 | * have already been unmapped. This flag is appropriate | ||
| 3173 | * for updateing the contents of a page whose blocks may | ||
| 3174 | * have already been released, and we only want to zero | ||
| 3175 | * out the regions that correspond to those released blocks. | ||
| 3176 | * | ||
| 3177 | * Returns zero on sucess or negative on failure. | ||
| 3178 | */ | ||
| 3179 | int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
| 3180 | struct inode *inode, struct page *page, loff_t from, | ||
| 3181 | loff_t length, int flags) | ||
| 3182 | { | ||
| 3183 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | ||
| 3184 | unsigned int offset = from & (PAGE_CACHE_SIZE-1); | ||
| 3185 | unsigned int blocksize, max, pos; | ||
| 3186 | ext4_lblk_t iblock; | ||
| 3187 | struct buffer_head *bh; | ||
| 3188 | int err = 0; | ||
| 3189 | |||
| 3190 | blocksize = inode->i_sb->s_blocksize; | ||
| 3191 | max = PAGE_CACHE_SIZE - offset; | ||
| 3192 | |||
| 3193 | if (index != page->index) | ||
| 3194 | return -EINVAL; | ||
| 3195 | |||
| 3196 | /* | ||
| 3197 | * correct length if it does not fall between | ||
| 3198 | * 'from' and the end of the page | ||
| 3199 | */ | ||
| 3200 | if (length > max || length < 0) | ||
| 3201 | length = max; | ||
| 3202 | |||
| 3203 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | ||
| 3204 | |||
| 3205 | if (!page_has_buffers(page)) { | ||
| 3206 | /* | ||
| 3207 | * If the range to be discarded covers a partial block | ||
| 3208 | * we need to get the page buffers. This is because | ||
| 3209 | * partial blocks cannot be released and the page needs | ||
| 3210 | * to be updated with the contents of the block before | ||
| 3211 | * we write the zeros on top of it. | ||
| 3212 | */ | ||
| 3213 | if ((from & (blocksize - 1)) || | ||
| 3214 | ((from + length) & (blocksize - 1))) { | ||
| 3215 | create_empty_buffers(page, blocksize, 0); | ||
| 3216 | } else { | ||
| 3217 | /* | ||
| 3218 | * If there are no partial blocks, | ||
| 3219 | * there is nothing to update, | ||
| 3220 | * so we can return now | ||
| 3221 | */ | ||
| 3222 | return 0; | ||
| 3223 | } | ||
| 3224 | } | ||
| 3225 | |||
| 3226 | /* Find the buffer that contains "offset" */ | ||
| 3227 | bh = page_buffers(page); | ||
| 3228 | pos = blocksize; | ||
| 3229 | while (offset >= pos) { | ||
| 3230 | bh = bh->b_this_page; | ||
| 3231 | iblock++; | ||
| 3232 | pos += blocksize; | ||
| 3233 | } | ||
| 3234 | |||
| 3235 | pos = offset; | ||
| 3236 | while (pos < offset + length) { | ||
| 3237 | unsigned int end_of_block, range_to_discard; | ||
| 3238 | |||
| 3239 | err = 0; | ||
| 3240 | |||
| 3241 | /* The length of space left to zero and unmap */ | ||
| 3242 | range_to_discard = offset + length - pos; | ||
| 3243 | |||
| 3244 | /* The length of space until the end of the block */ | ||
| 3245 | end_of_block = blocksize - (pos & (blocksize-1)); | ||
| 3246 | |||
| 3247 | /* | ||
| 3248 | * Do not unmap or zero past end of block | ||
| 3249 | * for this buffer head | ||
| 3250 | */ | ||
| 3251 | if (range_to_discard > end_of_block) | ||
| 3252 | range_to_discard = end_of_block; | ||
| 3253 | |||
| 3254 | |||
| 3255 | /* | ||
| 3256 | * Skip this buffer head if we are only zeroing unampped | ||
| 3257 | * regions of the page | ||
| 3258 | */ | ||
| 3259 | if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED && | ||
| 3260 | buffer_mapped(bh)) | ||
| 3261 | goto next; | ||
| 3262 | |||
| 3263 | /* If the range is block aligned, unmap */ | ||
| 3264 | if (range_to_discard == blocksize) { | ||
| 3265 | clear_buffer_dirty(bh); | ||
| 3266 | bh->b_bdev = NULL; | ||
| 3267 | clear_buffer_mapped(bh); | ||
| 3268 | clear_buffer_req(bh); | ||
| 3269 | clear_buffer_new(bh); | ||
| 3270 | clear_buffer_delay(bh); | ||
| 3271 | clear_buffer_unwritten(bh); | ||
| 3272 | clear_buffer_uptodate(bh); | ||
| 3273 | zero_user(page, pos, range_to_discard); | ||
| 3274 | BUFFER_TRACE(bh, "Buffer discarded"); | ||
| 3275 | goto next; | ||
| 3276 | } | ||
| 3277 | |||
| 3278 | /* | ||
| 3279 | * If this block is not completely contained in the range | ||
| 3280 | * to be discarded, then it is not going to be released. Because | ||
| 3281 | * we need to keep this block, we need to make sure this part | ||
| 3282 | * of the page is uptodate before we modify it by writeing | ||
| 3283 | * partial zeros on it. | ||
| 3284 | */ | ||
| 3285 | if (!buffer_mapped(bh)) { | ||
| 3286 | /* | ||
| 3287 | * Buffer head must be mapped before we can read | ||
| 3288 | * from the block | ||
| 3289 | */ | ||
| 3290 | BUFFER_TRACE(bh, "unmapped"); | ||
| 3291 | ext4_get_block(inode, iblock, bh, 0); | ||
| 3292 | /* unmapped? It's a hole - nothing to do */ | ||
| 3293 | if (!buffer_mapped(bh)) { | ||
| 3294 | BUFFER_TRACE(bh, "still unmapped"); | ||
| 3295 | goto next; | ||
| 3296 | } | ||
| 3297 | } | ||
| 3298 | |||
| 3299 | /* Ok, it's mapped. Make sure it's up-to-date */ | ||
| 3300 | if (PageUptodate(page)) | ||
| 3301 | set_buffer_uptodate(bh); | ||
| 3302 | |||
| 3303 | if (!buffer_uptodate(bh)) { | ||
| 3304 | err = -EIO; | ||
| 3305 | ll_rw_block(READ, 1, &bh); | ||
| 3306 | wait_on_buffer(bh); | ||
| 3307 | /* Uhhuh. Read error. Complain and punt.*/ | ||
| 3308 | if (!buffer_uptodate(bh)) | ||
| 3309 | goto next; | ||
| 3310 | } | ||
| 3311 | |||
| 3312 | if (ext4_should_journal_data(inode)) { | ||
| 3313 | BUFFER_TRACE(bh, "get write access"); | ||
| 3314 | err = ext4_journal_get_write_access(handle, bh); | ||
| 3315 | if (err) | ||
| 3316 | goto next; | ||
| 3317 | } | ||
| 3318 | |||
| 3319 | zero_user(page, pos, range_to_discard); | ||
| 3320 | |||
| 3321 | err = 0; | ||
| 3322 | if (ext4_should_journal_data(inode)) { | ||
| 3323 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
| 3324 | } else | ||
| 3325 | mark_buffer_dirty(bh); | ||
| 3326 | |||
| 3327 | BUFFER_TRACE(bh, "Partial buffer zeroed"); | ||
| 3328 | next: | ||
| 3329 | bh = bh->b_this_page; | ||
| 3330 | iblock++; | ||
| 3331 | pos += range_to_discard; | ||
| 3332 | } | ||
| 3333 | |||
| 3334 | return err; | ||
| 3335 | } | ||
| 3336 | |||
| 2966 | /* | 3337 | /* |
| 2967 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | 3338 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
| 2968 | * up to the end of the block which corresponds to `from'. | 3339 | * up to the end of the block which corresponds to `from'. |
| @@ -3005,7 +3376,7 @@ int ext4_block_zero_page_range(handle_t *handle, | |||
| 3005 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3376 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
| 3006 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3377 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
| 3007 | if (!page) | 3378 | if (!page) |
| 3008 | return -EINVAL; | 3379 | return -ENOMEM; |
| 3009 | 3380 | ||
| 3010 | blocksize = inode->i_sb->s_blocksize; | 3381 | blocksize = inode->i_sb->s_blocksize; |
| 3011 | max = blocksize - (offset & (blocksize - 1)); | 3382 | max = blocksize - (offset & (blocksize - 1)); |
| @@ -3074,11 +3445,8 @@ int ext4_block_zero_page_range(handle_t *handle, | |||
| 3074 | err = 0; | 3445 | err = 0; |
| 3075 | if (ext4_should_journal_data(inode)) { | 3446 | if (ext4_should_journal_data(inode)) { |
| 3076 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 3447 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
| 3077 | } else { | 3448 | } else |
| 3078 | if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) | ||
| 3079 | err = ext4_jbd2_file_inode(handle, inode); | ||
| 3080 | mark_buffer_dirty(bh); | 3449 | mark_buffer_dirty(bh); |
| 3081 | } | ||
| 3082 | 3450 | ||
| 3083 | unlock: | 3451 | unlock: |
| 3084 | unlock_page(page); | 3452 | unlock_page(page); |
| @@ -3119,6 +3487,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
| 3119 | return -ENOTSUPP; | 3487 | return -ENOTSUPP; |
| 3120 | } | 3488 | } |
| 3121 | 3489 | ||
| 3490 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { | ||
| 3491 | /* TODO: Add support for bigalloc file systems */ | ||
| 3492 | return -ENOTSUPP; | ||
| 3493 | } | ||
| 3494 | |||
| 3122 | return ext4_ext_punch_hole(file, offset, length); | 3495 | return ext4_ext_punch_hole(file, offset, length); |
| 3123 | } | 3496 | } |
| 3124 | 3497 | ||
| @@ -4420,6 +4793,7 @@ retry_alloc: | |||
| 4420 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | 4793 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { |
| 4421 | unlock_page(page); | 4794 | unlock_page(page); |
| 4422 | ret = VM_FAULT_SIGBUS; | 4795 | ret = VM_FAULT_SIGBUS; |
| 4796 | ext4_journal_stop(handle); | ||
| 4423 | goto out; | 4797 | goto out; |
| 4424 | } | 4798 | } |
| 4425 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 4799 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f18bfe37aff8..a56796814d6a 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 21 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
| 22 | { | 22 | { |
| 23 | struct inode *inode = filp->f_dentry->d_inode; | 23 | struct inode *inode = filp->f_dentry->d_inode; |
| 24 | struct super_block *sb = inode->i_sb; | ||
| 24 | struct ext4_inode_info *ei = EXT4_I(inode); | 25 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 25 | unsigned int flags; | 26 | unsigned int flags; |
| 26 | 27 | ||
| @@ -173,33 +174,8 @@ setversion_out: | |||
| 173 | mnt_drop_write(filp->f_path.mnt); | 174 | mnt_drop_write(filp->f_path.mnt); |
| 174 | return err; | 175 | return err; |
| 175 | } | 176 | } |
| 176 | #ifdef CONFIG_JBD2_DEBUG | ||
| 177 | case EXT4_IOC_WAIT_FOR_READONLY: | ||
| 178 | /* | ||
| 179 | * This is racy - by the time we're woken up and running, | ||
| 180 | * the superblock could be released. And the module could | ||
| 181 | * have been unloaded. So sue me. | ||
| 182 | * | ||
| 183 | * Returns 1 if it slept, else zero. | ||
| 184 | */ | ||
| 185 | { | ||
| 186 | struct super_block *sb = inode->i_sb; | ||
| 187 | DECLARE_WAITQUEUE(wait, current); | ||
| 188 | int ret = 0; | ||
| 189 | |||
| 190 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 191 | add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); | ||
| 192 | if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) { | ||
| 193 | schedule(); | ||
| 194 | ret = 1; | ||
| 195 | } | ||
| 196 | remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); | ||
| 197 | return ret; | ||
| 198 | } | ||
| 199 | #endif | ||
| 200 | case EXT4_IOC_GROUP_EXTEND: { | 177 | case EXT4_IOC_GROUP_EXTEND: { |
| 201 | ext4_fsblk_t n_blocks_count; | 178 | ext4_fsblk_t n_blocks_count; |
| 202 | struct super_block *sb = inode->i_sb; | ||
| 203 | int err, err2=0; | 179 | int err, err2=0; |
| 204 | 180 | ||
| 205 | err = ext4_resize_begin(sb); | 181 | err = ext4_resize_begin(sb); |
| @@ -209,6 +185,13 @@ setversion_out: | |||
| 209 | if (get_user(n_blocks_count, (__u32 __user *)arg)) | 185 | if (get_user(n_blocks_count, (__u32 __user *)arg)) |
| 210 | return -EFAULT; | 186 | return -EFAULT; |
| 211 | 187 | ||
| 188 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
| 189 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
| 190 | ext4_msg(sb, KERN_ERR, | ||
| 191 | "Online resizing not supported with bigalloc"); | ||
| 192 | return -EOPNOTSUPP; | ||
| 193 | } | ||
| 194 | |||
| 212 | err = mnt_want_write(filp->f_path.mnt); | 195 | err = mnt_want_write(filp->f_path.mnt); |
| 213 | if (err) | 196 | if (err) |
| 214 | return err; | 197 | return err; |
| @@ -250,6 +233,13 @@ setversion_out: | |||
| 250 | goto mext_out; | 233 | goto mext_out; |
| 251 | } | 234 | } |
| 252 | 235 | ||
| 236 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
| 237 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
| 238 | ext4_msg(sb, KERN_ERR, | ||
| 239 | "Online defrag not supported with bigalloc"); | ||
| 240 | return -EOPNOTSUPP; | ||
| 241 | } | ||
| 242 | |||
| 253 | err = mnt_want_write(filp->f_path.mnt); | 243 | err = mnt_want_write(filp->f_path.mnt); |
| 254 | if (err) | 244 | if (err) |
| 255 | goto mext_out; | 245 | goto mext_out; |
| @@ -270,7 +260,6 @@ mext_out: | |||
| 270 | 260 | ||
| 271 | case EXT4_IOC_GROUP_ADD: { | 261 | case EXT4_IOC_GROUP_ADD: { |
| 272 | struct ext4_new_group_data input; | 262 | struct ext4_new_group_data input; |
| 273 | struct super_block *sb = inode->i_sb; | ||
| 274 | int err, err2=0; | 263 | int err, err2=0; |
| 275 | 264 | ||
| 276 | err = ext4_resize_begin(sb); | 265 | err = ext4_resize_begin(sb); |
| @@ -281,6 +270,13 @@ mext_out: | |||
| 281 | sizeof(input))) | 270 | sizeof(input))) |
| 282 | return -EFAULT; | 271 | return -EFAULT; |
| 283 | 272 | ||
| 273 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
| 274 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
| 275 | ext4_msg(sb, KERN_ERR, | ||
| 276 | "Online resizing not supported with bigalloc"); | ||
| 277 | return -EOPNOTSUPP; | ||
| 278 | } | ||
| 279 | |||
| 284 | err = mnt_want_write(filp->f_path.mnt); | 280 | err = mnt_want_write(filp->f_path.mnt); |
| 285 | if (err) | 281 | if (err) |
| 286 | return err; | 282 | return err; |
| @@ -337,7 +333,6 @@ mext_out: | |||
| 337 | 333 | ||
| 338 | case FITRIM: | 334 | case FITRIM: |
| 339 | { | 335 | { |
| 340 | struct super_block *sb = inode->i_sb; | ||
| 341 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | 336 | struct request_queue *q = bdev_get_queue(sb->s_bdev); |
| 342 | struct fstrim_range range; | 337 | struct fstrim_range range; |
| 343 | int ret = 0; | 338 | int ret = 0; |
| @@ -348,7 +343,14 @@ mext_out: | |||
| 348 | if (!blk_queue_discard(q)) | 343 | if (!blk_queue_discard(q)) |
| 349 | return -EOPNOTSUPP; | 344 | return -EOPNOTSUPP; |
| 350 | 345 | ||
| 351 | if (copy_from_user(&range, (struct fstrim_range *)arg, | 346 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
| 347 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
| 348 | ext4_msg(sb, KERN_ERR, | ||
| 349 | "FITRIM not supported with bigalloc"); | ||
| 350 | return -EOPNOTSUPP; | ||
| 351 | } | ||
| 352 | |||
| 353 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | ||
| 352 | sizeof(range))) | 354 | sizeof(range))) |
| 353 | return -EFAULT; | 355 | return -EFAULT; |
| 354 | 356 | ||
| @@ -358,7 +360,7 @@ mext_out: | |||
| 358 | if (ret < 0) | 360 | if (ret < 0) |
| 359 | return ret; | 361 | return ret; |
| 360 | 362 | ||
| 361 | if (copy_to_user((struct fstrim_range *)arg, &range, | 363 | if (copy_to_user((struct fstrim_range __user *)arg, &range, |
| 362 | sizeof(range))) | 364 | sizeof(range))) |
| 363 | return -EFAULT; | 365 | return -EFAULT; |
| 364 | 366 | ||
| @@ -396,11 +398,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 396 | case EXT4_IOC32_SETVERSION_OLD: | 398 | case EXT4_IOC32_SETVERSION_OLD: |
| 397 | cmd = EXT4_IOC_SETVERSION_OLD; | 399 | cmd = EXT4_IOC_SETVERSION_OLD; |
| 398 | break; | 400 | break; |
| 399 | #ifdef CONFIG_JBD2_DEBUG | ||
| 400 | case EXT4_IOC32_WAIT_FOR_READONLY: | ||
| 401 | cmd = EXT4_IOC_WAIT_FOR_READONLY; | ||
| 402 | break; | ||
| 403 | #endif | ||
| 404 | case EXT4_IOC32_GETRSVSZ: | 401 | case EXT4_IOC32_GETRSVSZ: |
| 405 | cmd = EXT4_IOC_GETRSVSZ; | 402 | cmd = EXT4_IOC_GETRSVSZ; |
| 406 | break; | 403 | break; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 17a5a57c415a..e2d8be8f28bf 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -70,8 +70,8 @@ | |||
| 70 | * | 70 | * |
| 71 | * pa_lstart -> the logical start block for this prealloc space | 71 | * pa_lstart -> the logical start block for this prealloc space |
| 72 | * pa_pstart -> the physical start block for this prealloc space | 72 | * pa_pstart -> the physical start block for this prealloc space |
| 73 | * pa_len -> length for this prealloc space | 73 | * pa_len -> length for this prealloc space (in clusters) |
| 74 | * pa_free -> free space available in this prealloc space | 74 | * pa_free -> free space available in this prealloc space (in clusters) |
| 75 | * | 75 | * |
| 76 | * The inode preallocation space is used looking at the _logical_ start | 76 | * The inode preallocation space is used looking at the _logical_ start |
| 77 | * block. If only the logical file block falls within the range of prealloc | 77 | * block. If only the logical file block falls within the range of prealloc |
| @@ -126,7 +126,8 @@ | |||
| 126 | * list. In case of inode preallocation we follow a list of heuristics | 126 | * list. In case of inode preallocation we follow a list of heuristics |
| 127 | * based on file size. This can be found in ext4_mb_normalize_request. If | 127 | * based on file size. This can be found in ext4_mb_normalize_request. If |
| 128 | * we are doing a group prealloc we try to normalize the request to | 128 | * we are doing a group prealloc we try to normalize the request to |
| 129 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is | 129 | * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is |
| 130 | * dependent on the cluster size; for non-bigalloc file systems, it is | ||
| 130 | * 512 blocks. This can be tuned via | 131 | * 512 blocks. This can be tuned via |
| 131 | * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in | 132 | * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in |
| 132 | * terms of number of blocks. If we have mounted the file system with -O | 133 | * terms of number of blocks. If we have mounted the file system with -O |
| @@ -459,7 +460,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
| 459 | ext4_fsblk_t blocknr; | 460 | ext4_fsblk_t blocknr; |
| 460 | 461 | ||
| 461 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 462 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
| 462 | blocknr += first + i; | 463 | blocknr += EXT4_C2B(EXT4_SB(sb), first + i); |
| 463 | ext4_grp_locked_error(sb, e4b->bd_group, | 464 | ext4_grp_locked_error(sb, e4b->bd_group, |
| 464 | inode ? inode->i_ino : 0, | 465 | inode ? inode->i_ino : 0, |
| 465 | blocknr, | 466 | blocknr, |
| @@ -580,7 +581,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
| 580 | continue; | 581 | continue; |
| 581 | } | 582 | } |
| 582 | 583 | ||
| 583 | /* both bits in buddy2 must be 0 */ | 584 | /* both bits in buddy2 must be 1 */ |
| 584 | MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); | 585 | MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); |
| 585 | MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); | 586 | MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); |
| 586 | 587 | ||
| @@ -653,7 +654,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, | |||
| 653 | ext4_grpblk_t chunk; | 654 | ext4_grpblk_t chunk; |
| 654 | unsigned short border; | 655 | unsigned short border; |
| 655 | 656 | ||
| 656 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); | 657 | BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); |
| 657 | 658 | ||
| 658 | border = 2 << sb->s_blocksize_bits; | 659 | border = 2 << sb->s_blocksize_bits; |
| 659 | 660 | ||
| @@ -705,7 +706,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
| 705 | void *buddy, void *bitmap, ext4_group_t group) | 706 | void *buddy, void *bitmap, ext4_group_t group) |
| 706 | { | 707 | { |
| 707 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 708 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
| 708 | ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); | 709 | ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); |
| 709 | ext4_grpblk_t i = 0; | 710 | ext4_grpblk_t i = 0; |
| 710 | ext4_grpblk_t first; | 711 | ext4_grpblk_t first; |
| 711 | ext4_grpblk_t len; | 712 | ext4_grpblk_t len; |
| @@ -734,7 +735,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
| 734 | 735 | ||
| 735 | if (free != grp->bb_free) { | 736 | if (free != grp->bb_free) { |
| 736 | ext4_grp_locked_error(sb, group, 0, 0, | 737 | ext4_grp_locked_error(sb, group, 0, 0, |
| 737 | "%u blocks in bitmap, %u in gd", | 738 | "%u clusters in bitmap, %u in gd", |
| 738 | free, grp->bb_free); | 739 | free, grp->bb_free); |
| 739 | /* | 740 | /* |
| 740 | * If we intent to continue, we consider group descritor | 741 | * If we intent to continue, we consider group descritor |
| @@ -1339,7 +1340,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
| 1339 | ext4_fsblk_t blocknr; | 1340 | ext4_fsblk_t blocknr; |
| 1340 | 1341 | ||
| 1341 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 1342 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
| 1342 | blocknr += block; | 1343 | blocknr += EXT4_C2B(EXT4_SB(sb), block); |
| 1343 | ext4_grp_locked_error(sb, e4b->bd_group, | 1344 | ext4_grp_locked_error(sb, e4b->bd_group, |
| 1344 | inode ? inode->i_ino : 0, | 1345 | inode ? inode->i_ino : 0, |
| 1345 | blocknr, | 1346 | blocknr, |
| @@ -1390,7 +1391,6 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
| 1390 | { | 1391 | { |
| 1391 | int next = block; | 1392 | int next = block; |
| 1392 | int max; | 1393 | int max; |
| 1393 | int ord; | ||
| 1394 | void *buddy; | 1394 | void *buddy; |
| 1395 | 1395 | ||
| 1396 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); | 1396 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
| @@ -1432,9 +1432,8 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
| 1432 | if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) | 1432 | if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) |
| 1433 | break; | 1433 | break; |
| 1434 | 1434 | ||
| 1435 | ord = mb_find_order_for_block(e4b, next); | 1435 | order = mb_find_order_for_block(e4b, next); |
| 1436 | 1436 | ||
| 1437 | order = ord; | ||
| 1438 | block = next >> order; | 1437 | block = next >> order; |
| 1439 | ex->fe_len += 1 << order; | 1438 | ex->fe_len += 1 << order; |
| 1440 | } | 1439 | } |
| @@ -1624,8 +1623,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac, | |||
| 1624 | struct ext4_free_extent *gex = &ac->ac_g_ex; | 1623 | struct ext4_free_extent *gex = &ac->ac_g_ex; |
| 1625 | 1624 | ||
| 1626 | BUG_ON(ex->fe_len <= 0); | 1625 | BUG_ON(ex->fe_len <= 0); |
| 1627 | BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | 1626 | BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); |
| 1628 | BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | 1627 | BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); |
| 1629 | BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); | 1628 | BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); |
| 1630 | 1629 | ||
| 1631 | ac->ac_found++; | 1630 | ac->ac_found++; |
| @@ -1823,15 +1822,15 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
| 1823 | 1822 | ||
| 1824 | while (free && ac->ac_status == AC_STATUS_CONTINUE) { | 1823 | while (free && ac->ac_status == AC_STATUS_CONTINUE) { |
| 1825 | i = mb_find_next_zero_bit(bitmap, | 1824 | i = mb_find_next_zero_bit(bitmap, |
| 1826 | EXT4_BLOCKS_PER_GROUP(sb), i); | 1825 | EXT4_CLUSTERS_PER_GROUP(sb), i); |
| 1827 | if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { | 1826 | if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) { |
| 1828 | /* | 1827 | /* |
| 1829 | * IF we have corrupt bitmap, we won't find any | 1828 | * IF we have corrupt bitmap, we won't find any |
| 1830 | * free blocks even though group info says we | 1829 | * free blocks even though group info says we |
| 1831 | * we have free blocks | 1830 | * we have free blocks |
| 1832 | */ | 1831 | */ |
| 1833 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, | 1832 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
| 1834 | "%d free blocks as per " | 1833 | "%d free clusters as per " |
| 1835 | "group info. But bitmap says 0", | 1834 | "group info. But bitmap says 0", |
| 1836 | free); | 1835 | free); |
| 1837 | break; | 1836 | break; |
| @@ -1841,7 +1840,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
| 1841 | BUG_ON(ex.fe_len <= 0); | 1840 | BUG_ON(ex.fe_len <= 0); |
| 1842 | if (free < ex.fe_len) { | 1841 | if (free < ex.fe_len) { |
| 1843 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, | 1842 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
| 1844 | "%d free blocks as per " | 1843 | "%d free clusters as per " |
| 1845 | "group info. But got %d blocks", | 1844 | "group info. But got %d blocks", |
| 1846 | free, ex.fe_len); | 1845 | free, ex.fe_len); |
| 1847 | /* | 1846 | /* |
| @@ -1887,7 +1886,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
| 1887 | do_div(a, sbi->s_stripe); | 1886 | do_div(a, sbi->s_stripe); |
| 1888 | i = (a * sbi->s_stripe) - first_group_block; | 1887 | i = (a * sbi->s_stripe) - first_group_block; |
| 1889 | 1888 | ||
| 1890 | while (i < EXT4_BLOCKS_PER_GROUP(sb)) { | 1889 | while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { |
| 1891 | if (!mb_test_bit(i, bitmap)) { | 1890 | if (!mb_test_bit(i, bitmap)) { |
| 1892 | max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); | 1891 | max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); |
| 1893 | if (max >= sbi->s_stripe) { | 1892 | if (max >= sbi->s_stripe) { |
| @@ -2252,10 +2251,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
| 2252 | */ | 2251 | */ |
| 2253 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 2252 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
| 2254 | meta_group_info[i]->bb_free = | 2253 | meta_group_info[i]->bb_free = |
| 2255 | ext4_free_blocks_after_init(sb, group, desc); | 2254 | ext4_free_clusters_after_init(sb, group, desc); |
| 2256 | } else { | 2255 | } else { |
| 2257 | meta_group_info[i]->bb_free = | 2256 | meta_group_info[i]->bb_free = |
| 2258 | ext4_free_blks_count(sb, desc); | 2257 | ext4_free_group_clusters(sb, desc); |
| 2259 | } | 2258 | } |
| 2260 | 2259 | ||
| 2261 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2260 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
| @@ -2473,7 +2472,20 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2473 | sbi->s_mb_stats = MB_DEFAULT_STATS; | 2472 | sbi->s_mb_stats = MB_DEFAULT_STATS; |
| 2474 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; | 2473 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; |
| 2475 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; | 2474 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; |
| 2476 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2475 | /* |
| 2476 | * The default group preallocation is 512, which for 4k block | ||
| 2477 | * sizes translates to 2 megabytes. However for bigalloc file | ||
| 2478 | * systems, this is probably too big (i.e, if the cluster size | ||
| 2479 | * is 1 megabyte, then group preallocation size becomes half a | ||
| 2480 | * gigabyte!). As a default, we will keep a two megabyte | ||
| 2481 | * group pralloc size for cluster sizes up to 64k, and after | ||
| 2482 | * that, we will force a minimum group preallocation size of | ||
| 2483 | * 32 clusters. This translates to 8 megs when the cluster | ||
| 2484 | * size is 256k, and 32 megs when the cluster size is 1 meg, | ||
| 2485 | * which seems reasonable as a default. | ||
| 2486 | */ | ||
| 2487 | sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >> | ||
| 2488 | sbi->s_cluster_bits, 32); | ||
| 2477 | /* | 2489 | /* |
| 2478 | * If there is a s_stripe > 1, then we set the s_mb_group_prealloc | 2490 | * If there is a s_stripe > 1, then we set the s_mb_group_prealloc |
| 2479 | * to the lowest multiple of s_stripe which is bigger than | 2491 | * to the lowest multiple of s_stripe which is bigger than |
| @@ -2490,7 +2502,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2490 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2502 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
| 2491 | if (sbi->s_locality_groups == NULL) { | 2503 | if (sbi->s_locality_groups == NULL) { |
| 2492 | ret = -ENOMEM; | 2504 | ret = -ENOMEM; |
| 2493 | goto out; | 2505 | goto out_free_groupinfo_slab; |
| 2494 | } | 2506 | } |
| 2495 | for_each_possible_cpu(i) { | 2507 | for_each_possible_cpu(i) { |
| 2496 | struct ext4_locality_group *lg; | 2508 | struct ext4_locality_group *lg; |
| @@ -2503,9 +2515,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2503 | 2515 | ||
| 2504 | /* init file for buddy data */ | 2516 | /* init file for buddy data */ |
| 2505 | ret = ext4_mb_init_backend(sb); | 2517 | ret = ext4_mb_init_backend(sb); |
| 2506 | if (ret != 0) { | 2518 | if (ret != 0) |
| 2507 | goto out; | 2519 | goto out_free_locality_groups; |
| 2508 | } | ||
| 2509 | 2520 | ||
| 2510 | if (sbi->s_proc) | 2521 | if (sbi->s_proc) |
| 2511 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | 2522 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
| @@ -2513,11 +2524,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2513 | 2524 | ||
| 2514 | if (sbi->s_journal) | 2525 | if (sbi->s_journal) |
| 2515 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2526 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
| 2527 | |||
| 2528 | return 0; | ||
| 2529 | |||
| 2530 | out_free_locality_groups: | ||
| 2531 | free_percpu(sbi->s_locality_groups); | ||
| 2532 | sbi->s_locality_groups = NULL; | ||
| 2533 | out_free_groupinfo_slab: | ||
| 2534 | ext4_groupinfo_destroy_slabs(); | ||
| 2516 | out: | 2535 | out: |
| 2517 | if (ret) { | 2536 | kfree(sbi->s_mb_offsets); |
| 2518 | kfree(sbi->s_mb_offsets); | 2537 | sbi->s_mb_offsets = NULL; |
| 2519 | kfree(sbi->s_mb_maxs); | 2538 | kfree(sbi->s_mb_maxs); |
| 2520 | } | 2539 | sbi->s_mb_maxs = NULL; |
| 2521 | return ret; | 2540 | return ret; |
| 2522 | } | 2541 | } |
| 2523 | 2542 | ||
| @@ -2602,11 +2621,13 @@ int ext4_mb_release(struct super_block *sb) | |||
| 2602 | } | 2621 | } |
| 2603 | 2622 | ||
| 2604 | static inline int ext4_issue_discard(struct super_block *sb, | 2623 | static inline int ext4_issue_discard(struct super_block *sb, |
| 2605 | ext4_group_t block_group, ext4_grpblk_t block, int count) | 2624 | ext4_group_t block_group, ext4_grpblk_t cluster, int count) |
| 2606 | { | 2625 | { |
| 2607 | ext4_fsblk_t discard_block; | 2626 | ext4_fsblk_t discard_block; |
| 2608 | 2627 | ||
| 2609 | discard_block = block + ext4_group_first_block_no(sb, block_group); | 2628 | discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) + |
| 2629 | ext4_group_first_block_no(sb, block_group)); | ||
| 2630 | count = EXT4_C2B(EXT4_SB(sb), count); | ||
| 2610 | trace_ext4_discard_blocks(sb, | 2631 | trace_ext4_discard_blocks(sb, |
| 2611 | (unsigned long long) discard_block, count); | 2632 | (unsigned long long) discard_block, count); |
| 2612 | return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); | 2633 | return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
| @@ -2633,7 +2654,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
| 2633 | 2654 | ||
| 2634 | if (test_opt(sb, DISCARD)) | 2655 | if (test_opt(sb, DISCARD)) |
| 2635 | ext4_issue_discard(sb, entry->group, | 2656 | ext4_issue_discard(sb, entry->group, |
| 2636 | entry->start_blk, entry->count); | 2657 | entry->start_cluster, entry->count); |
| 2637 | 2658 | ||
| 2638 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2659 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
| 2639 | /* we expect to find existing buddy because it's pinned */ | 2660 | /* we expect to find existing buddy because it's pinned */ |
| @@ -2646,7 +2667,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
| 2646 | ext4_lock_group(sb, entry->group); | 2667 | ext4_lock_group(sb, entry->group); |
| 2647 | /* Take it out of per group rb tree */ | 2668 | /* Take it out of per group rb tree */ |
| 2648 | rb_erase(&entry->node, &(db->bb_free_root)); | 2669 | rb_erase(&entry->node, &(db->bb_free_root)); |
| 2649 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); | 2670 | mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); |
| 2650 | 2671 | ||
| 2651 | /* | 2672 | /* |
| 2652 | * Clear the trimmed flag for the group so that the next | 2673 | * Clear the trimmed flag for the group so that the next |
| @@ -2752,7 +2773,7 @@ void ext4_exit_mballoc(void) | |||
| 2752 | */ | 2773 | */ |
| 2753 | static noinline_for_stack int | 2774 | static noinline_for_stack int |
| 2754 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2775 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
| 2755 | handle_t *handle, unsigned int reserv_blks) | 2776 | handle_t *handle, unsigned int reserv_clstrs) |
| 2756 | { | 2777 | { |
| 2757 | struct buffer_head *bitmap_bh = NULL; | 2778 | struct buffer_head *bitmap_bh = NULL; |
| 2758 | struct ext4_group_desc *gdp; | 2779 | struct ext4_group_desc *gdp; |
| @@ -2783,7 +2804,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2783 | goto out_err; | 2804 | goto out_err; |
| 2784 | 2805 | ||
| 2785 | ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, | 2806 | ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, |
| 2786 | ext4_free_blks_count(sb, gdp)); | 2807 | ext4_free_group_clusters(sb, gdp)); |
| 2787 | 2808 | ||
| 2788 | err = ext4_journal_get_write_access(handle, gdp_bh); | 2809 | err = ext4_journal_get_write_access(handle, gdp_bh); |
| 2789 | if (err) | 2810 | if (err) |
| @@ -2791,7 +2812,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2791 | 2812 | ||
| 2792 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); | 2813 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); |
| 2793 | 2814 | ||
| 2794 | len = ac->ac_b_ex.fe_len; | 2815 | len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
| 2795 | if (!ext4_data_block_valid(sbi, block, len)) { | 2816 | if (!ext4_data_block_valid(sbi, block, len)) { |
| 2796 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " | 2817 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " |
| 2797 | "fs metadata\n", block, block+len); | 2818 | "fs metadata\n", block, block+len); |
| @@ -2823,28 +2844,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2823 | ac->ac_b_ex.fe_len); | 2844 | ac->ac_b_ex.fe_len); |
| 2824 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 2845 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
| 2825 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 2846 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
| 2826 | ext4_free_blks_set(sb, gdp, | 2847 | ext4_free_group_clusters_set(sb, gdp, |
| 2827 | ext4_free_blocks_after_init(sb, | 2848 | ext4_free_clusters_after_init(sb, |
| 2828 | ac->ac_b_ex.fe_group, gdp)); | 2849 | ac->ac_b_ex.fe_group, gdp)); |
| 2829 | } | 2850 | } |
| 2830 | len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; | 2851 | len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; |
| 2831 | ext4_free_blks_set(sb, gdp, len); | 2852 | ext4_free_group_clusters_set(sb, gdp, len); |
| 2832 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2853 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
| 2833 | 2854 | ||
| 2834 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | 2855 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); |
| 2835 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | 2856 | percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); |
| 2836 | /* | 2857 | /* |
| 2837 | * Now reduce the dirty block count also. Should not go negative | 2858 | * Now reduce the dirty block count also. Should not go negative |
| 2838 | */ | 2859 | */ |
| 2839 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2860 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
| 2840 | /* release all the reserved blocks if non delalloc */ | 2861 | /* release all the reserved blocks if non delalloc */ |
| 2841 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); | 2862 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
| 2863 | reserv_clstrs); | ||
| 2842 | 2864 | ||
| 2843 | if (sbi->s_log_groups_per_flex) { | 2865 | if (sbi->s_log_groups_per_flex) { |
| 2844 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2866 | ext4_group_t flex_group = ext4_flex_group(sbi, |
| 2845 | ac->ac_b_ex.fe_group); | 2867 | ac->ac_b_ex.fe_group); |
| 2846 | atomic_sub(ac->ac_b_ex.fe_len, | 2868 | atomic_sub(ac->ac_b_ex.fe_len, |
| 2847 | &sbi->s_flex_groups[flex_group].free_blocks); | 2869 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 2848 | } | 2870 | } |
| 2849 | 2871 | ||
| 2850 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 2872 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
| @@ -2886,6 +2908,7 @@ static noinline_for_stack void | |||
| 2886 | ext4_mb_normalize_request(struct ext4_allocation_context *ac, | 2908 | ext4_mb_normalize_request(struct ext4_allocation_context *ac, |
| 2887 | struct ext4_allocation_request *ar) | 2909 | struct ext4_allocation_request *ar) |
| 2888 | { | 2910 | { |
| 2911 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
| 2889 | int bsbits, max; | 2912 | int bsbits, max; |
| 2890 | ext4_lblk_t end; | 2913 | ext4_lblk_t end; |
| 2891 | loff_t size, orig_size, start_off; | 2914 | loff_t size, orig_size, start_off; |
| @@ -2916,7 +2939,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
| 2916 | 2939 | ||
| 2917 | /* first, let's learn actual file size | 2940 | /* first, let's learn actual file size |
| 2918 | * given current request is allocated */ | 2941 | * given current request is allocated */ |
| 2919 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | 2942 | size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len); |
| 2920 | size = size << bsbits; | 2943 | size = size << bsbits; |
| 2921 | if (size < i_size_read(ac->ac_inode)) | 2944 | if (size < i_size_read(ac->ac_inode)) |
| 2922 | size = i_size_read(ac->ac_inode); | 2945 | size = i_size_read(ac->ac_inode); |
| @@ -2988,7 +3011,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
| 2988 | continue; | 3011 | continue; |
| 2989 | } | 3012 | } |
| 2990 | 3013 | ||
| 2991 | pa_end = pa->pa_lstart + pa->pa_len; | 3014 | pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb), |
| 3015 | pa->pa_len); | ||
| 2992 | 3016 | ||
| 2993 | /* PA must not overlap original request */ | 3017 | /* PA must not overlap original request */ |
| 2994 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || | 3018 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || |
| @@ -3018,9 +3042,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
| 3018 | rcu_read_lock(); | 3042 | rcu_read_lock(); |
| 3019 | list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { | 3043 | list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { |
| 3020 | ext4_lblk_t pa_end; | 3044 | ext4_lblk_t pa_end; |
| 3045 | |||
| 3021 | spin_lock(&pa->pa_lock); | 3046 | spin_lock(&pa->pa_lock); |
| 3022 | if (pa->pa_deleted == 0) { | 3047 | if (pa->pa_deleted == 0) { |
| 3023 | pa_end = pa->pa_lstart + pa->pa_len; | 3048 | pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb), |
| 3049 | pa->pa_len); | ||
| 3024 | BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); | 3050 | BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); |
| 3025 | } | 3051 | } |
| 3026 | spin_unlock(&pa->pa_lock); | 3052 | spin_unlock(&pa->pa_lock); |
| @@ -3036,14 +3062,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
| 3036 | } | 3062 | } |
| 3037 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && | 3063 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && |
| 3038 | start > ac->ac_o_ex.fe_logical); | 3064 | start > ac->ac_o_ex.fe_logical); |
| 3039 | BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | 3065 | BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); |
| 3040 | 3066 | ||
| 3041 | /* now prepare goal request */ | 3067 | /* now prepare goal request */ |
| 3042 | 3068 | ||
| 3043 | /* XXX: is it better to align blocks WRT to logical | 3069 | /* XXX: is it better to align blocks WRT to logical |
| 3044 | * placement or satisfy big request as is */ | 3070 | * placement or satisfy big request as is */ |
| 3045 | ac->ac_g_ex.fe_logical = start; | 3071 | ac->ac_g_ex.fe_logical = start; |
| 3046 | ac->ac_g_ex.fe_len = size; | 3072 | ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size); |
| 3047 | 3073 | ||
| 3048 | /* define goal start in order to merge */ | 3074 | /* define goal start in order to merge */ |
| 3049 | if (ar->pright && (ar->lright == (start + size))) { | 3075 | if (ar->pright && (ar->lright == (start + size))) { |
| @@ -3112,14 +3138,16 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) | |||
| 3112 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | 3138 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, |
| 3113 | struct ext4_prealloc_space *pa) | 3139 | struct ext4_prealloc_space *pa) |
| 3114 | { | 3140 | { |
| 3141 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
| 3115 | ext4_fsblk_t start; | 3142 | ext4_fsblk_t start; |
| 3116 | ext4_fsblk_t end; | 3143 | ext4_fsblk_t end; |
| 3117 | int len; | 3144 | int len; |
| 3118 | 3145 | ||
| 3119 | /* found preallocated blocks, use them */ | 3146 | /* found preallocated blocks, use them */ |
| 3120 | start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); | 3147 | start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); |
| 3121 | end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); | 3148 | end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len), |
| 3122 | len = end - start; | 3149 | start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len)); |
| 3150 | len = EXT4_NUM_B2C(sbi, end - start); | ||
| 3123 | ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, | 3151 | ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, |
| 3124 | &ac->ac_b_ex.fe_start); | 3152 | &ac->ac_b_ex.fe_start); |
| 3125 | ac->ac_b_ex.fe_len = len; | 3153 | ac->ac_b_ex.fe_len = len; |
| @@ -3127,7 +3155,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | |||
| 3127 | ac->ac_pa = pa; | 3155 | ac->ac_pa = pa; |
| 3128 | 3156 | ||
| 3129 | BUG_ON(start < pa->pa_pstart); | 3157 | BUG_ON(start < pa->pa_pstart); |
| 3130 | BUG_ON(start + len > pa->pa_pstart + pa->pa_len); | 3158 | BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len)); |
| 3131 | BUG_ON(pa->pa_free < len); | 3159 | BUG_ON(pa->pa_free < len); |
| 3132 | pa->pa_free -= len; | 3160 | pa->pa_free -= len; |
| 3133 | 3161 | ||
| @@ -3193,6 +3221,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | |||
| 3193 | static noinline_for_stack int | 3221 | static noinline_for_stack int |
| 3194 | ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | 3222 | ext4_mb_use_preallocated(struct ext4_allocation_context *ac) |
| 3195 | { | 3223 | { |
| 3224 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
| 3196 | int order, i; | 3225 | int order, i; |
| 3197 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3226 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
| 3198 | struct ext4_locality_group *lg; | 3227 | struct ext4_locality_group *lg; |
| @@ -3210,12 +3239,14 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
| 3210 | /* all fields in this condition don't change, | 3239 | /* all fields in this condition don't change, |
| 3211 | * so we can skip locking for them */ | 3240 | * so we can skip locking for them */ |
| 3212 | if (ac->ac_o_ex.fe_logical < pa->pa_lstart || | 3241 | if (ac->ac_o_ex.fe_logical < pa->pa_lstart || |
| 3213 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) | 3242 | ac->ac_o_ex.fe_logical >= (pa->pa_lstart + |
| 3243 | EXT4_C2B(sbi, pa->pa_len))) | ||
| 3214 | continue; | 3244 | continue; |
| 3215 | 3245 | ||
| 3216 | /* non-extent files can't have physical blocks past 2^32 */ | 3246 | /* non-extent files can't have physical blocks past 2^32 */ |
| 3217 | if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && | 3247 | if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && |
| 3218 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) | 3248 | (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) > |
| 3249 | EXT4_MAX_BLOCK_FILE_PHYS)) | ||
| 3219 | continue; | 3250 | continue; |
| 3220 | 3251 | ||
| 3221 | /* found preallocated blocks, use them */ | 3252 | /* found preallocated blocks, use them */ |
| @@ -3291,7 +3322,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
| 3291 | 3322 | ||
| 3292 | while (n) { | 3323 | while (n) { |
| 3293 | entry = rb_entry(n, struct ext4_free_data, node); | 3324 | entry = rb_entry(n, struct ext4_free_data, node); |
| 3294 | ext4_set_bits(bitmap, entry->start_blk, entry->count); | 3325 | ext4_set_bits(bitmap, entry->start_cluster, entry->count); |
| 3295 | n = rb_next(n); | 3326 | n = rb_next(n); |
| 3296 | } | 3327 | } |
| 3297 | return; | 3328 | return; |
| @@ -3312,7 +3343,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
| 3312 | ext4_group_t groupnr; | 3343 | ext4_group_t groupnr; |
| 3313 | ext4_grpblk_t start; | 3344 | ext4_grpblk_t start; |
| 3314 | int preallocated = 0; | 3345 | int preallocated = 0; |
| 3315 | int count = 0; | ||
| 3316 | int len; | 3346 | int len; |
| 3317 | 3347 | ||
| 3318 | /* all form of preallocation discards first load group, | 3348 | /* all form of preallocation discards first load group, |
| @@ -3335,7 +3365,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
| 3335 | BUG_ON(groupnr != group); | 3365 | BUG_ON(groupnr != group); |
| 3336 | ext4_set_bits(bitmap, start, len); | 3366 | ext4_set_bits(bitmap, start, len); |
| 3337 | preallocated += len; | 3367 | preallocated += len; |
| 3338 | count++; | ||
| 3339 | } | 3368 | } |
| 3340 | mb_debug(1, "prellocated %u for group %u\n", preallocated, group); | 3369 | mb_debug(1, "prellocated %u for group %u\n", preallocated, group); |
| 3341 | } | 3370 | } |
| @@ -3412,6 +3441,7 @@ static noinline_for_stack int | |||
| 3412 | ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | 3441 | ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) |
| 3413 | { | 3442 | { |
| 3414 | struct super_block *sb = ac->ac_sb; | 3443 | struct super_block *sb = ac->ac_sb; |
| 3444 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 3415 | struct ext4_prealloc_space *pa; | 3445 | struct ext4_prealloc_space *pa; |
| 3416 | struct ext4_group_info *grp; | 3446 | struct ext4_group_info *grp; |
| 3417 | struct ext4_inode_info *ei; | 3447 | struct ext4_inode_info *ei; |
| @@ -3443,16 +3473,18 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
| 3443 | winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; | 3473 | winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; |
| 3444 | 3474 | ||
| 3445 | /* also, we should cover whole original request */ | 3475 | /* also, we should cover whole original request */ |
| 3446 | wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; | 3476 | wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len); |
| 3447 | 3477 | ||
| 3448 | /* the smallest one defines real window */ | 3478 | /* the smallest one defines real window */ |
| 3449 | win = min(winl, wins); | 3479 | win = min(winl, wins); |
| 3450 | 3480 | ||
| 3451 | offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; | 3481 | offs = ac->ac_o_ex.fe_logical % |
| 3482 | EXT4_C2B(sbi, ac->ac_b_ex.fe_len); | ||
| 3452 | if (offs && offs < win) | 3483 | if (offs && offs < win) |
| 3453 | win = offs; | 3484 | win = offs; |
| 3454 | 3485 | ||
| 3455 | ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; | 3486 | ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - |
| 3487 | EXT4_B2C(sbi, win); | ||
| 3456 | BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); | 3488 | BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); |
| 3457 | BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); | 3489 | BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); |
| 3458 | } | 3490 | } |
| @@ -3477,7 +3509,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
| 3477 | trace_ext4_mb_new_inode_pa(ac, pa); | 3509 | trace_ext4_mb_new_inode_pa(ac, pa); |
| 3478 | 3510 | ||
| 3479 | ext4_mb_use_inode_pa(ac, pa); | 3511 | ext4_mb_use_inode_pa(ac, pa); |
| 3480 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); | 3512 | atomic_add(pa->pa_free, &sbi->s_mb_preallocated); |
| 3481 | 3513 | ||
| 3482 | ei = EXT4_I(ac->ac_inode); | 3514 | ei = EXT4_I(ac->ac_inode); |
| 3483 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); | 3515 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); |
| @@ -3592,7 +3624,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
| 3592 | 3624 | ||
| 3593 | BUG_ON(pa->pa_deleted == 0); | 3625 | BUG_ON(pa->pa_deleted == 0); |
| 3594 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3626 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
| 3595 | grp_blk_start = pa->pa_pstart - bit; | 3627 | grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit); |
| 3596 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3628 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
| 3597 | end = bit + pa->pa_len; | 3629 | end = bit + pa->pa_len; |
| 3598 | 3630 | ||
| @@ -3607,7 +3639,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
| 3607 | free += next - bit; | 3639 | free += next - bit; |
| 3608 | 3640 | ||
| 3609 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); | 3641 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
| 3610 | trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, | 3642 | trace_ext4_mb_release_inode_pa(pa, (grp_blk_start + |
| 3643 | EXT4_C2B(sbi, bit)), | ||
| 3611 | next - bit); | 3644 | next - bit); |
| 3612 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3645 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
| 3613 | bit = next + 1; | 3646 | bit = next + 1; |
| @@ -3690,7 +3723,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
| 3690 | } | 3723 | } |
| 3691 | 3724 | ||
| 3692 | if (needed == 0) | 3725 | if (needed == 0) |
| 3693 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | 3726 | needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; |
| 3694 | 3727 | ||
| 3695 | INIT_LIST_HEAD(&list); | 3728 | INIT_LIST_HEAD(&list); |
| 3696 | repeat: | 3729 | repeat: |
| @@ -3958,7 +3991,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
| 3958 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | 3991 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) |
| 3959 | return; | 3992 | return; |
| 3960 | 3993 | ||
| 3961 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | 3994 | size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len); |
| 3962 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) | 3995 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) |
| 3963 | >> bsbits; | 3996 | >> bsbits; |
| 3964 | 3997 | ||
| @@ -3969,6 +4002,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
| 3969 | return; | 4002 | return; |
| 3970 | } | 4003 | } |
| 3971 | 4004 | ||
| 4005 | if (sbi->s_mb_group_prealloc <= 0) { | ||
| 4006 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; | ||
| 4007 | return; | ||
| 4008 | } | ||
| 4009 | |||
| 3972 | /* don't use group allocation for large files */ | 4010 | /* don't use group allocation for large files */ |
| 3973 | size = max(size, isize); | 4011 | size = max(size, isize); |
| 3974 | if (size > sbi->s_mb_stream_request) { | 4012 | if (size > sbi->s_mb_stream_request) { |
| @@ -4007,8 +4045,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
| 4007 | len = ar->len; | 4045 | len = ar->len; |
| 4008 | 4046 | ||
| 4009 | /* just a dirty hack to filter too big requests */ | 4047 | /* just a dirty hack to filter too big requests */ |
| 4010 | if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10) | 4048 | if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10) |
| 4011 | len = EXT4_BLOCKS_PER_GROUP(sb) - 10; | 4049 | len = EXT4_CLUSTERS_PER_GROUP(sb) - 10; |
| 4012 | 4050 | ||
| 4013 | /* start searching from the goal */ | 4051 | /* start searching from the goal */ |
| 4014 | goal = ar->goal; | 4052 | goal = ar->goal; |
| @@ -4019,18 +4057,15 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
| 4019 | 4057 | ||
| 4020 | /* set up allocation goals */ | 4058 | /* set up allocation goals */ |
| 4021 | memset(ac, 0, sizeof(struct ext4_allocation_context)); | 4059 | memset(ac, 0, sizeof(struct ext4_allocation_context)); |
| 4022 | ac->ac_b_ex.fe_logical = ar->logical; | 4060 | ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); |
| 4023 | ac->ac_status = AC_STATUS_CONTINUE; | 4061 | ac->ac_status = AC_STATUS_CONTINUE; |
| 4024 | ac->ac_sb = sb; | 4062 | ac->ac_sb = sb; |
| 4025 | ac->ac_inode = ar->inode; | 4063 | ac->ac_inode = ar->inode; |
| 4026 | ac->ac_o_ex.fe_logical = ar->logical; | 4064 | ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical; |
| 4027 | ac->ac_o_ex.fe_group = group; | 4065 | ac->ac_o_ex.fe_group = group; |
| 4028 | ac->ac_o_ex.fe_start = block; | 4066 | ac->ac_o_ex.fe_start = block; |
| 4029 | ac->ac_o_ex.fe_len = len; | 4067 | ac->ac_o_ex.fe_len = len; |
| 4030 | ac->ac_g_ex.fe_logical = ar->logical; | 4068 | ac->ac_g_ex = ac->ac_o_ex; |
| 4031 | ac->ac_g_ex.fe_group = group; | ||
| 4032 | ac->ac_g_ex.fe_start = block; | ||
| 4033 | ac->ac_g_ex.fe_len = len; | ||
| 4034 | ac->ac_flags = ar->flags; | 4069 | ac->ac_flags = ar->flags; |
| 4035 | 4070 | ||
| 4036 | /* we have to define context: we'll we work with a file or | 4071 | /* we have to define context: we'll we work with a file or |
| @@ -4182,13 +4217,14 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) | |||
| 4182 | */ | 4217 | */ |
| 4183 | static int ext4_mb_release_context(struct ext4_allocation_context *ac) | 4218 | static int ext4_mb_release_context(struct ext4_allocation_context *ac) |
| 4184 | { | 4219 | { |
| 4220 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
| 4185 | struct ext4_prealloc_space *pa = ac->ac_pa; | 4221 | struct ext4_prealloc_space *pa = ac->ac_pa; |
| 4186 | if (pa) { | 4222 | if (pa) { |
| 4187 | if (pa->pa_type == MB_GROUP_PA) { | 4223 | if (pa->pa_type == MB_GROUP_PA) { |
| 4188 | /* see comment in ext4_mb_use_group_pa() */ | 4224 | /* see comment in ext4_mb_use_group_pa() */ |
| 4189 | spin_lock(&pa->pa_lock); | 4225 | spin_lock(&pa->pa_lock); |
| 4190 | pa->pa_pstart += ac->ac_b_ex.fe_len; | 4226 | pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
| 4191 | pa->pa_lstart += ac->ac_b_ex.fe_len; | 4227 | pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
| 4192 | pa->pa_free -= ac->ac_b_ex.fe_len; | 4228 | pa->pa_free -= ac->ac_b_ex.fe_len; |
| 4193 | pa->pa_len -= ac->ac_b_ex.fe_len; | 4229 | pa->pa_len -= ac->ac_b_ex.fe_len; |
| 4194 | spin_unlock(&pa->pa_lock); | 4230 | spin_unlock(&pa->pa_lock); |
| @@ -4249,13 +4285,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4249 | struct super_block *sb; | 4285 | struct super_block *sb; |
| 4250 | ext4_fsblk_t block = 0; | 4286 | ext4_fsblk_t block = 0; |
| 4251 | unsigned int inquota = 0; | 4287 | unsigned int inquota = 0; |
| 4252 | unsigned int reserv_blks = 0; | 4288 | unsigned int reserv_clstrs = 0; |
| 4253 | 4289 | ||
| 4254 | sb = ar->inode->i_sb; | 4290 | sb = ar->inode->i_sb; |
| 4255 | sbi = EXT4_SB(sb); | 4291 | sbi = EXT4_SB(sb); |
| 4256 | 4292 | ||
| 4257 | trace_ext4_request_blocks(ar); | 4293 | trace_ext4_request_blocks(ar); |
| 4258 | 4294 | ||
| 4295 | /* Allow to use superuser reservation for quota file */ | ||
| 4296 | if (IS_NOQUOTA(ar->inode)) | ||
| 4297 | ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; | ||
| 4298 | |||
| 4259 | /* | 4299 | /* |
| 4260 | * For delayed allocation, we could skip the ENOSPC and | 4300 | * For delayed allocation, we could skip the ENOSPC and |
| 4261 | * EDQUOT check, as blocks and quotas have been already | 4301 | * EDQUOT check, as blocks and quotas have been already |
| @@ -4269,7 +4309,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4269 | * and verify allocation doesn't exceed the quota limits. | 4309 | * and verify allocation doesn't exceed the quota limits. |
| 4270 | */ | 4310 | */ |
| 4271 | while (ar->len && | 4311 | while (ar->len && |
| 4272 | ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { | 4312 | ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { |
| 4273 | 4313 | ||
| 4274 | /* let others to free the space */ | 4314 | /* let others to free the space */ |
| 4275 | yield(); | 4315 | yield(); |
| @@ -4279,12 +4319,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4279 | *errp = -ENOSPC; | 4319 | *errp = -ENOSPC; |
| 4280 | return 0; | 4320 | return 0; |
| 4281 | } | 4321 | } |
| 4282 | reserv_blks = ar->len; | 4322 | reserv_clstrs = ar->len; |
| 4283 | if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { | 4323 | if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { |
| 4284 | dquot_alloc_block_nofail(ar->inode, ar->len); | 4324 | dquot_alloc_block_nofail(ar->inode, |
| 4325 | EXT4_C2B(sbi, ar->len)); | ||
| 4285 | } else { | 4326 | } else { |
| 4286 | while (ar->len && | 4327 | while (ar->len && |
| 4287 | dquot_alloc_block(ar->inode, ar->len)) { | 4328 | dquot_alloc_block(ar->inode, |
| 4329 | EXT4_C2B(sbi, ar->len))) { | ||
| 4288 | 4330 | ||
| 4289 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4331 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
| 4290 | ar->len--; | 4332 | ar->len--; |
| @@ -4328,7 +4370,7 @@ repeat: | |||
| 4328 | ext4_mb_new_preallocation(ac); | 4370 | ext4_mb_new_preallocation(ac); |
| 4329 | } | 4371 | } |
| 4330 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4372 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
| 4331 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); | 4373 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); |
| 4332 | if (*errp == -EAGAIN) { | 4374 | if (*errp == -EAGAIN) { |
| 4333 | /* | 4375 | /* |
| 4334 | * drop the reference that we took | 4376 | * drop the reference that we took |
| @@ -4364,13 +4406,13 @@ out: | |||
| 4364 | if (ac) | 4406 | if (ac) |
| 4365 | kmem_cache_free(ext4_ac_cachep, ac); | 4407 | kmem_cache_free(ext4_ac_cachep, ac); |
| 4366 | if (inquota && ar->len < inquota) | 4408 | if (inquota && ar->len < inquota) |
| 4367 | dquot_free_block(ar->inode, inquota - ar->len); | 4409 | dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); |
| 4368 | if (!ar->len) { | 4410 | if (!ar->len) { |
| 4369 | if (!ext4_test_inode_state(ar->inode, | 4411 | if (!ext4_test_inode_state(ar->inode, |
| 4370 | EXT4_STATE_DELALLOC_RESERVED)) | 4412 | EXT4_STATE_DELALLOC_RESERVED)) |
| 4371 | /* release all the reserved blocks if non delalloc */ | 4413 | /* release all the reserved blocks if non delalloc */ |
| 4372 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 4414 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
| 4373 | reserv_blks); | 4415 | reserv_clstrs); |
| 4374 | } | 4416 | } |
| 4375 | 4417 | ||
| 4376 | trace_ext4_allocate_blocks(ar, (unsigned long long)block); | 4418 | trace_ext4_allocate_blocks(ar, (unsigned long long)block); |
| @@ -4388,7 +4430,7 @@ static int can_merge(struct ext4_free_data *entry1, | |||
| 4388 | { | 4430 | { |
| 4389 | if ((entry1->t_tid == entry2->t_tid) && | 4431 | if ((entry1->t_tid == entry2->t_tid) && |
| 4390 | (entry1->group == entry2->group) && | 4432 | (entry1->group == entry2->group) && |
| 4391 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) | 4433 | ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) |
| 4392 | return 1; | 4434 | return 1; |
| 4393 | return 0; | 4435 | return 0; |
| 4394 | } | 4436 | } |
| @@ -4398,7 +4440,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
| 4398 | struct ext4_free_data *new_entry) | 4440 | struct ext4_free_data *new_entry) |
| 4399 | { | 4441 | { |
| 4400 | ext4_group_t group = e4b->bd_group; | 4442 | ext4_group_t group = e4b->bd_group; |
| 4401 | ext4_grpblk_t block; | 4443 | ext4_grpblk_t cluster; |
| 4402 | struct ext4_free_data *entry; | 4444 | struct ext4_free_data *entry; |
| 4403 | struct ext4_group_info *db = e4b->bd_info; | 4445 | struct ext4_group_info *db = e4b->bd_info; |
| 4404 | struct super_block *sb = e4b->bd_sb; | 4446 | struct super_block *sb = e4b->bd_sb; |
| @@ -4411,7 +4453,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
| 4411 | BUG_ON(e4b->bd_buddy_page == NULL); | 4453 | BUG_ON(e4b->bd_buddy_page == NULL); |
| 4412 | 4454 | ||
| 4413 | new_node = &new_entry->node; | 4455 | new_node = &new_entry->node; |
| 4414 | block = new_entry->start_blk; | 4456 | cluster = new_entry->start_cluster; |
| 4415 | 4457 | ||
| 4416 | if (!*n) { | 4458 | if (!*n) { |
| 4417 | /* first free block exent. We need to | 4459 | /* first free block exent. We need to |
| @@ -4425,13 +4467,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
| 4425 | while (*n) { | 4467 | while (*n) { |
| 4426 | parent = *n; | 4468 | parent = *n; |
| 4427 | entry = rb_entry(parent, struct ext4_free_data, node); | 4469 | entry = rb_entry(parent, struct ext4_free_data, node); |
| 4428 | if (block < entry->start_blk) | 4470 | if (cluster < entry->start_cluster) |
| 4429 | n = &(*n)->rb_left; | 4471 | n = &(*n)->rb_left; |
| 4430 | else if (block >= (entry->start_blk + entry->count)) | 4472 | else if (cluster >= (entry->start_cluster + entry->count)) |
| 4431 | n = &(*n)->rb_right; | 4473 | n = &(*n)->rb_right; |
| 4432 | else { | 4474 | else { |
| 4433 | ext4_grp_locked_error(sb, group, 0, | 4475 | ext4_grp_locked_error(sb, group, 0, |
| 4434 | ext4_group_first_block_no(sb, group) + block, | 4476 | ext4_group_first_block_no(sb, group) + |
| 4477 | EXT4_C2B(sbi, cluster), | ||
| 4435 | "Block already on to-be-freed list"); | 4478 | "Block already on to-be-freed list"); |
| 4436 | return 0; | 4479 | return 0; |
| 4437 | } | 4480 | } |
| @@ -4445,7 +4488,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
| 4445 | if (node) { | 4488 | if (node) { |
| 4446 | entry = rb_entry(node, struct ext4_free_data, node); | 4489 | entry = rb_entry(node, struct ext4_free_data, node); |
| 4447 | if (can_merge(entry, new_entry)) { | 4490 | if (can_merge(entry, new_entry)) { |
| 4448 | new_entry->start_blk = entry->start_blk; | 4491 | new_entry->start_cluster = entry->start_cluster; |
| 4449 | new_entry->count += entry->count; | 4492 | new_entry->count += entry->count; |
| 4450 | rb_erase(node, &(db->bb_free_root)); | 4493 | rb_erase(node, &(db->bb_free_root)); |
| 4451 | spin_lock(&sbi->s_md_lock); | 4494 | spin_lock(&sbi->s_md_lock); |
| @@ -4496,6 +4539,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
| 4496 | ext4_group_t block_group; | 4539 | ext4_group_t block_group; |
| 4497 | struct ext4_sb_info *sbi; | 4540 | struct ext4_sb_info *sbi; |
| 4498 | struct ext4_buddy e4b; | 4541 | struct ext4_buddy e4b; |
| 4542 | unsigned int count_clusters; | ||
| 4499 | int err = 0; | 4543 | int err = 0; |
| 4500 | int ret; | 4544 | int ret; |
| 4501 | 4545 | ||
| @@ -4544,6 +4588,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
| 4544 | if (!ext4_should_writeback_data(inode)) | 4588 | if (!ext4_should_writeback_data(inode)) |
| 4545 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4589 | flags |= EXT4_FREE_BLOCKS_METADATA; |
| 4546 | 4590 | ||
| 4591 | /* | ||
| 4592 | * If the extent to be freed does not begin on a cluster | ||
| 4593 | * boundary, we need to deal with partial clusters at the | ||
| 4594 | * beginning and end of the extent. Normally we will free | ||
| 4595 | * blocks at the beginning or the end unless we are explicitly | ||
| 4596 | * requested to avoid doing so. | ||
| 4597 | */ | ||
| 4598 | overflow = block & (sbi->s_cluster_ratio - 1); | ||
| 4599 | if (overflow) { | ||
| 4600 | if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { | ||
| 4601 | overflow = sbi->s_cluster_ratio - overflow; | ||
| 4602 | block += overflow; | ||
| 4603 | if (count > overflow) | ||
| 4604 | count -= overflow; | ||
| 4605 | else | ||
| 4606 | return; | ||
| 4607 | } else { | ||
| 4608 | block -= overflow; | ||
| 4609 | count += overflow; | ||
| 4610 | } | ||
| 4611 | } | ||
| 4612 | overflow = count & (sbi->s_cluster_ratio - 1); | ||
| 4613 | if (overflow) { | ||
| 4614 | if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { | ||
| 4615 | if (count > overflow) | ||
| 4616 | count -= overflow; | ||
| 4617 | else | ||
| 4618 | return; | ||
| 4619 | } else | ||
| 4620 | count += sbi->s_cluster_ratio - overflow; | ||
| 4621 | } | ||
| 4622 | |||
| 4547 | do_more: | 4623 | do_more: |
| 4548 | overflow = 0; | 4624 | overflow = 0; |
| 4549 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4625 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
| @@ -4552,10 +4628,12 @@ do_more: | |||
| 4552 | * Check to see if we are freeing blocks across a group | 4628 | * Check to see if we are freeing blocks across a group |
| 4553 | * boundary. | 4629 | * boundary. |
| 4554 | */ | 4630 | */ |
| 4555 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { | 4631 | if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) { |
| 4556 | overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); | 4632 | overflow = EXT4_C2B(sbi, bit) + count - |
| 4633 | EXT4_BLOCKS_PER_GROUP(sb); | ||
| 4557 | count -= overflow; | 4634 | count -= overflow; |
| 4558 | } | 4635 | } |
| 4636 | count_clusters = EXT4_B2C(sbi, count); | ||
| 4559 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 4637 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
| 4560 | if (!bitmap_bh) { | 4638 | if (!bitmap_bh) { |
| 4561 | err = -EIO; | 4639 | err = -EIO; |
| @@ -4570,9 +4648,9 @@ do_more: | |||
| 4570 | if (in_range(ext4_block_bitmap(sb, gdp), block, count) || | 4648 | if (in_range(ext4_block_bitmap(sb, gdp), block, count) || |
| 4571 | in_range(ext4_inode_bitmap(sb, gdp), block, count) || | 4649 | in_range(ext4_inode_bitmap(sb, gdp), block, count) || |
| 4572 | in_range(block, ext4_inode_table(sb, gdp), | 4650 | in_range(block, ext4_inode_table(sb, gdp), |
| 4573 | EXT4_SB(sb)->s_itb_per_group) || | 4651 | EXT4_SB(sb)->s_itb_per_group) || |
| 4574 | in_range(block + count - 1, ext4_inode_table(sb, gdp), | 4652 | in_range(block + count - 1, ext4_inode_table(sb, gdp), |
| 4575 | EXT4_SB(sb)->s_itb_per_group)) { | 4653 | EXT4_SB(sb)->s_itb_per_group)) { |
| 4576 | 4654 | ||
| 4577 | ext4_error(sb, "Freeing blocks in system zone - " | 4655 | ext4_error(sb, "Freeing blocks in system zone - " |
| 4578 | "Block = %llu, count = %lu", block, count); | 4656 | "Block = %llu, count = %lu", block, count); |
| @@ -4597,11 +4675,11 @@ do_more: | |||
| 4597 | #ifdef AGGRESSIVE_CHECK | 4675 | #ifdef AGGRESSIVE_CHECK |
| 4598 | { | 4676 | { |
| 4599 | int i; | 4677 | int i; |
| 4600 | for (i = 0; i < count; i++) | 4678 | for (i = 0; i < count_clusters; i++) |
| 4601 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4679 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
| 4602 | } | 4680 | } |
| 4603 | #endif | 4681 | #endif |
| 4604 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); | 4682 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); |
| 4605 | 4683 | ||
| 4606 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4684 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
| 4607 | if (err) | 4685 | if (err) |
| @@ -4618,13 +4696,13 @@ do_more: | |||
| 4618 | err = -ENOMEM; | 4696 | err = -ENOMEM; |
| 4619 | goto error_return; | 4697 | goto error_return; |
| 4620 | } | 4698 | } |
| 4621 | new_entry->start_blk = bit; | 4699 | new_entry->start_cluster = bit; |
| 4622 | new_entry->group = block_group; | 4700 | new_entry->group = block_group; |
| 4623 | new_entry->count = count; | 4701 | new_entry->count = count_clusters; |
| 4624 | new_entry->t_tid = handle->h_transaction->t_tid; | 4702 | new_entry->t_tid = handle->h_transaction->t_tid; |
| 4625 | 4703 | ||
| 4626 | ext4_lock_group(sb, block_group); | 4704 | ext4_lock_group(sb, block_group); |
| 4627 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4705 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
| 4628 | ext4_mb_free_metadata(handle, &e4b, new_entry); | 4706 | ext4_mb_free_metadata(handle, &e4b, new_entry); |
| 4629 | } else { | 4707 | } else { |
| 4630 | /* need to update group_info->bb_free and bitmap | 4708 | /* need to update group_info->bb_free and bitmap |
| @@ -4632,25 +4710,29 @@ do_more: | |||
| 4632 | * them with group lock_held | 4710 | * them with group lock_held |
| 4633 | */ | 4711 | */ |
| 4634 | ext4_lock_group(sb, block_group); | 4712 | ext4_lock_group(sb, block_group); |
| 4635 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4713 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
| 4636 | mb_free_blocks(inode, &e4b, bit, count); | 4714 | mb_free_blocks(inode, &e4b, bit, count_clusters); |
| 4637 | } | 4715 | } |
| 4638 | 4716 | ||
| 4639 | ret = ext4_free_blks_count(sb, gdp) + count; | 4717 | ret = ext4_free_group_clusters(sb, gdp) + count_clusters; |
| 4640 | ext4_free_blks_set(sb, gdp, ret); | 4718 | ext4_free_group_clusters_set(sb, gdp, ret); |
| 4641 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | 4719 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); |
| 4642 | ext4_unlock_group(sb, block_group); | 4720 | ext4_unlock_group(sb, block_group); |
| 4643 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 4721 | percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); |
| 4644 | 4722 | ||
| 4645 | if (sbi->s_log_groups_per_flex) { | 4723 | if (sbi->s_log_groups_per_flex) { |
| 4646 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | 4724 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
| 4647 | atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); | 4725 | atomic_add(count_clusters, |
| 4726 | &sbi->s_flex_groups[flex_group].free_clusters); | ||
| 4648 | } | 4727 | } |
| 4649 | 4728 | ||
| 4650 | ext4_mb_unload_buddy(&e4b); | 4729 | ext4_mb_unload_buddy(&e4b); |
| 4651 | 4730 | ||
| 4652 | freed += count; | 4731 | freed += count; |
| 4653 | 4732 | ||
| 4733 | if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | ||
| 4734 | dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); | ||
| 4735 | |||
| 4654 | /* We dirtied the bitmap block */ | 4736 | /* We dirtied the bitmap block */ |
| 4655 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 4737 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
| 4656 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 4738 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
| @@ -4669,8 +4751,6 @@ do_more: | |||
| 4669 | } | 4751 | } |
| 4670 | ext4_mark_super_dirty(sb); | 4752 | ext4_mark_super_dirty(sb); |
| 4671 | error_return: | 4753 | error_return: |
| 4672 | if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | ||
| 4673 | dquot_free_block(inode, freed); | ||
| 4674 | brelse(bitmap_bh); | 4754 | brelse(bitmap_bh); |
| 4675 | ext4_std_error(sb, err); | 4755 | ext4_std_error(sb, err); |
| 4676 | return; | 4756 | return; |
| @@ -4778,16 +4858,17 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | |||
| 4778 | ext4_lock_group(sb, block_group); | 4858 | ext4_lock_group(sb, block_group); |
| 4779 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4859 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
| 4780 | mb_free_blocks(NULL, &e4b, bit, count); | 4860 | mb_free_blocks(NULL, &e4b, bit, count); |
| 4781 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | 4861 | blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); |
| 4782 | ext4_free_blks_set(sb, desc, blk_free_count); | 4862 | ext4_free_group_clusters_set(sb, desc, blk_free_count); |
| 4783 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | 4863 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); |
| 4784 | ext4_unlock_group(sb, block_group); | 4864 | ext4_unlock_group(sb, block_group); |
| 4785 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | 4865 | percpu_counter_add(&sbi->s_freeclusters_counter, |
| 4866 | EXT4_B2C(sbi, blocks_freed)); | ||
| 4786 | 4867 | ||
| 4787 | if (sbi->s_log_groups_per_flex) { | 4868 | if (sbi->s_log_groups_per_flex) { |
| 4788 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | 4869 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
| 4789 | atomic_add(blocks_freed, | 4870 | atomic_add(EXT4_B2C(sbi, blocks_freed), |
| 4790 | &sbi->s_flex_groups[flex_group].free_blocks); | 4871 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 4791 | } | 4872 | } |
| 4792 | 4873 | ||
| 4793 | ext4_mb_unload_buddy(&e4b); | 4874 | ext4_mb_unload_buddy(&e4b); |
| @@ -4948,7 +5029,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
| 4948 | struct ext4_group_info *grp; | 5029 | struct ext4_group_info *grp; |
| 4949 | ext4_group_t first_group, last_group; | 5030 | ext4_group_t first_group, last_group; |
| 4950 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | 5031 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); |
| 4951 | ext4_grpblk_t cnt = 0, first_block, last_block; | 5032 | ext4_grpblk_t cnt = 0, first_cluster, last_cluster; |
| 4952 | uint64_t start, len, minlen, trimmed = 0; | 5033 | uint64_t start, len, minlen, trimmed = 0; |
| 4953 | ext4_fsblk_t first_data_blk = | 5034 | ext4_fsblk_t first_data_blk = |
| 4954 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 5035 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
| @@ -4958,7 +5039,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
| 4958 | len = range->len >> sb->s_blocksize_bits; | 5039 | len = range->len >> sb->s_blocksize_bits; |
| 4959 | minlen = range->minlen >> sb->s_blocksize_bits; | 5040 | minlen = range->minlen >> sb->s_blocksize_bits; |
| 4960 | 5041 | ||
| 4961 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | 5042 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) |
| 4962 | return -EINVAL; | 5043 | return -EINVAL; |
| 4963 | if (start + len <= first_data_blk) | 5044 | if (start + len <= first_data_blk) |
| 4964 | goto out; | 5045 | goto out; |
| @@ -4969,11 +5050,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
| 4969 | 5050 | ||
| 4970 | /* Determine first and last group to examine based on start and len */ | 5051 | /* Determine first and last group to examine based on start and len */ |
| 4971 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | 5052 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, |
| 4972 | &first_group, &first_block); | 5053 | &first_group, &first_cluster); |
| 4973 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | 5054 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), |
| 4974 | &last_group, &last_block); | 5055 | &last_group, &last_cluster); |
| 4975 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | 5056 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; |
| 4976 | last_block = EXT4_BLOCKS_PER_GROUP(sb); | 5057 | last_cluster = EXT4_CLUSTERS_PER_GROUP(sb); |
| 4977 | 5058 | ||
| 4978 | if (first_group > last_group) | 5059 | if (first_group > last_group) |
| 4979 | return -EINVAL; | 5060 | return -EINVAL; |
| @@ -4993,20 +5074,20 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
| 4993 | * change it for the last group in which case start + | 5074 | * change it for the last group in which case start + |
| 4994 | * len < EXT4_BLOCKS_PER_GROUP(sb). | 5075 | * len < EXT4_BLOCKS_PER_GROUP(sb). |
| 4995 | */ | 5076 | */ |
| 4996 | if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) | 5077 | if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) |
| 4997 | last_block = first_block + len; | 5078 | last_cluster = first_cluster + len; |
| 4998 | len -= last_block - first_block; | 5079 | len -= last_cluster - first_cluster; |
| 4999 | 5080 | ||
| 5000 | if (grp->bb_free >= minlen) { | 5081 | if (grp->bb_free >= minlen) { |
| 5001 | cnt = ext4_trim_all_free(sb, group, first_block, | 5082 | cnt = ext4_trim_all_free(sb, group, first_cluster, |
| 5002 | last_block, minlen); | 5083 | last_cluster, minlen); |
| 5003 | if (cnt < 0) { | 5084 | if (cnt < 0) { |
| 5004 | ret = cnt; | 5085 | ret = cnt; |
| 5005 | break; | 5086 | break; |
| 5006 | } | 5087 | } |
| 5007 | } | 5088 | } |
| 5008 | trimmed += cnt; | 5089 | trimmed += cnt; |
| 5009 | first_block = 0; | 5090 | first_cluster = 0; |
| 5010 | } | 5091 | } |
| 5011 | range->len = trimmed * sb->s_blocksize; | 5092 | range->len = trimmed * sb->s_blocksize; |
| 5012 | 5093 | ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 9d4a636b546c..47705f3285e3 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
| @@ -106,7 +106,7 @@ struct ext4_free_data { | |||
| 106 | ext4_group_t group; | 106 | ext4_group_t group; |
| 107 | 107 | ||
| 108 | /* free block extent */ | 108 | /* free block extent */ |
| 109 | ext4_grpblk_t start_blk; | 109 | ext4_grpblk_t start_cluster; |
| 110 | ext4_grpblk_t count; | 110 | ext4_grpblk_t count; |
| 111 | 111 | ||
| 112 | /* transaction which freed this extent */ | 112 | /* transaction which freed this extent */ |
| @@ -139,9 +139,9 @@ enum { | |||
| 139 | 139 | ||
| 140 | struct ext4_free_extent { | 140 | struct ext4_free_extent { |
| 141 | ext4_lblk_t fe_logical; | 141 | ext4_lblk_t fe_logical; |
| 142 | ext4_grpblk_t fe_start; | 142 | ext4_grpblk_t fe_start; /* In cluster units */ |
| 143 | ext4_group_t fe_group; | 143 | ext4_group_t fe_group; |
| 144 | ext4_grpblk_t fe_len; | 144 | ext4_grpblk_t fe_len; /* In cluster units */ |
| 145 | }; | 145 | }; |
| 146 | 146 | ||
| 147 | /* | 147 | /* |
| @@ -175,7 +175,7 @@ struct ext4_allocation_context { | |||
| 175 | /* the best found extent */ | 175 | /* the best found extent */ |
| 176 | struct ext4_free_extent ac_b_ex; | 176 | struct ext4_free_extent ac_b_ex; |
| 177 | 177 | ||
| 178 | /* copy of the bext found extent taken before preallocation efforts */ | 178 | /* copy of the best found extent taken before preallocation efforts */ |
| 179 | struct ext4_free_extent ac_f_ex; | 179 | struct ext4_free_extent ac_f_ex; |
| 180 | 180 | ||
| 181 | /* number of iterations done. we have to track to limit searching */ | 181 | /* number of iterations done. we have to track to limit searching */ |
| @@ -216,6 +216,7 @@ struct ext4_buddy { | |||
| 216 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | 216 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
| 217 | struct ext4_free_extent *fex) | 217 | struct ext4_free_extent *fex) |
| 218 | { | 218 | { |
| 219 | return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start; | 219 | return ext4_group_first_block_no(sb, fex->fe_group) + |
| 220 | (fex->fe_start << EXT4_SB(sb)->s_cluster_bits); | ||
| 220 | } | 221 | } |
| 221 | #endif | 222 | #endif |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b57b98fb44d1..f729377bf043 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
| @@ -15,19 +15,18 @@ | |||
| 15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
| 16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| 17 | #include "ext4_jbd2.h" | 17 | #include "ext4_jbd2.h" |
| 18 | #include "ext4_extents.h" | ||
| 19 | 18 | ||
| 20 | /* | 19 | /* |
| 21 | * The contiguous blocks details which can be | 20 | * The contiguous blocks details which can be |
| 22 | * represented by a single extent | 21 | * represented by a single extent |
| 23 | */ | 22 | */ |
| 24 | struct list_blocks_struct { | 23 | struct migrate_struct { |
| 25 | ext4_lblk_t first_block, last_block; | 24 | ext4_lblk_t first_block, last_block, curr_block; |
| 26 | ext4_fsblk_t first_pblock, last_pblock; | 25 | ext4_fsblk_t first_pblock, last_pblock; |
| 27 | }; | 26 | }; |
| 28 | 27 | ||
| 29 | static int finish_range(handle_t *handle, struct inode *inode, | 28 | static int finish_range(handle_t *handle, struct inode *inode, |
| 30 | struct list_blocks_struct *lb) | 29 | struct migrate_struct *lb) |
| 31 | 30 | ||
| 32 | { | 31 | { |
| 33 | int retval = 0, needed; | 32 | int retval = 0, needed; |
| @@ -87,8 +86,7 @@ err_out: | |||
| 87 | } | 86 | } |
| 88 | 87 | ||
| 89 | static int update_extent_range(handle_t *handle, struct inode *inode, | 88 | static int update_extent_range(handle_t *handle, struct inode *inode, |
| 90 | ext4_fsblk_t pblock, ext4_lblk_t blk_num, | 89 | ext4_fsblk_t pblock, struct migrate_struct *lb) |
| 91 | struct list_blocks_struct *lb) | ||
| 92 | { | 90 | { |
| 93 | int retval; | 91 | int retval; |
| 94 | /* | 92 | /* |
| @@ -96,9 +94,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode, | |||
| 96 | */ | 94 | */ |
| 97 | if (lb->first_pblock && | 95 | if (lb->first_pblock && |
| 98 | (lb->last_pblock+1 == pblock) && | 96 | (lb->last_pblock+1 == pblock) && |
| 99 | (lb->last_block+1 == blk_num)) { | 97 | (lb->last_block+1 == lb->curr_block)) { |
| 100 | lb->last_pblock = pblock; | 98 | lb->last_pblock = pblock; |
| 101 | lb->last_block = blk_num; | 99 | lb->last_block = lb->curr_block; |
| 100 | lb->curr_block++; | ||
| 102 | return 0; | 101 | return 0; |
| 103 | } | 102 | } |
| 104 | /* | 103 | /* |
| @@ -106,64 +105,49 @@ static int update_extent_range(handle_t *handle, struct inode *inode, | |||
| 106 | */ | 105 | */ |
| 107 | retval = finish_range(handle, inode, lb); | 106 | retval = finish_range(handle, inode, lb); |
| 108 | lb->first_pblock = lb->last_pblock = pblock; | 107 | lb->first_pblock = lb->last_pblock = pblock; |
| 109 | lb->first_block = lb->last_block = blk_num; | 108 | lb->first_block = lb->last_block = lb->curr_block; |
| 110 | 109 | lb->curr_block++; | |
| 111 | return retval; | 110 | return retval; |
| 112 | } | 111 | } |
| 113 | 112 | ||
| 114 | static int update_ind_extent_range(handle_t *handle, struct inode *inode, | 113 | static int update_ind_extent_range(handle_t *handle, struct inode *inode, |
| 115 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | 114 | ext4_fsblk_t pblock, |
| 116 | struct list_blocks_struct *lb) | 115 | struct migrate_struct *lb) |
| 117 | { | 116 | { |
| 118 | struct buffer_head *bh; | 117 | struct buffer_head *bh; |
| 119 | __le32 *i_data; | 118 | __le32 *i_data; |
| 120 | int i, retval = 0; | 119 | int i, retval = 0; |
| 121 | ext4_lblk_t blk_count = *blk_nump; | ||
| 122 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | 120 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; |
| 123 | 121 | ||
| 124 | if (!pblock) { | ||
| 125 | /* Only update the file block number */ | ||
| 126 | *blk_nump += max_entries; | ||
| 127 | return 0; | ||
| 128 | } | ||
| 129 | |||
| 130 | bh = sb_bread(inode->i_sb, pblock); | 122 | bh = sb_bread(inode->i_sb, pblock); |
| 131 | if (!bh) | 123 | if (!bh) |
| 132 | return -EIO; | 124 | return -EIO; |
| 133 | 125 | ||
| 134 | i_data = (__le32 *)bh->b_data; | 126 | i_data = (__le32 *)bh->b_data; |
| 135 | for (i = 0; i < max_entries; i++, blk_count++) { | 127 | for (i = 0; i < max_entries; i++) { |
| 136 | if (i_data[i]) { | 128 | if (i_data[i]) { |
| 137 | retval = update_extent_range(handle, inode, | 129 | retval = update_extent_range(handle, inode, |
| 138 | le32_to_cpu(i_data[i]), | 130 | le32_to_cpu(i_data[i]), lb); |
| 139 | blk_count, lb); | ||
| 140 | if (retval) | 131 | if (retval) |
| 141 | break; | 132 | break; |
| 133 | } else { | ||
| 134 | lb->curr_block++; | ||
| 142 | } | 135 | } |
| 143 | } | 136 | } |
| 144 | |||
| 145 | /* Update the file block number */ | ||
| 146 | *blk_nump = blk_count; | ||
| 147 | put_bh(bh); | 137 | put_bh(bh); |
| 148 | return retval; | 138 | return retval; |
| 149 | 139 | ||
| 150 | } | 140 | } |
| 151 | 141 | ||
| 152 | static int update_dind_extent_range(handle_t *handle, struct inode *inode, | 142 | static int update_dind_extent_range(handle_t *handle, struct inode *inode, |
| 153 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | 143 | ext4_fsblk_t pblock, |
| 154 | struct list_blocks_struct *lb) | 144 | struct migrate_struct *lb) |
| 155 | { | 145 | { |
| 156 | struct buffer_head *bh; | 146 | struct buffer_head *bh; |
| 157 | __le32 *i_data; | 147 | __le32 *i_data; |
| 158 | int i, retval = 0; | 148 | int i, retval = 0; |
| 159 | ext4_lblk_t blk_count = *blk_nump; | ||
| 160 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | 149 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; |
| 161 | 150 | ||
| 162 | if (!pblock) { | ||
| 163 | /* Only update the file block number */ | ||
| 164 | *blk_nump += max_entries * max_entries; | ||
| 165 | return 0; | ||
| 166 | } | ||
| 167 | bh = sb_bread(inode->i_sb, pblock); | 151 | bh = sb_bread(inode->i_sb, pblock); |
| 168 | if (!bh) | 152 | if (!bh) |
| 169 | return -EIO; | 153 | return -EIO; |
| @@ -172,38 +156,28 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode, | |||
| 172 | for (i = 0; i < max_entries; i++) { | 156 | for (i = 0; i < max_entries; i++) { |
| 173 | if (i_data[i]) { | 157 | if (i_data[i]) { |
| 174 | retval = update_ind_extent_range(handle, inode, | 158 | retval = update_ind_extent_range(handle, inode, |
| 175 | le32_to_cpu(i_data[i]), | 159 | le32_to_cpu(i_data[i]), lb); |
| 176 | &blk_count, lb); | ||
| 177 | if (retval) | 160 | if (retval) |
| 178 | break; | 161 | break; |
| 179 | } else { | 162 | } else { |
| 180 | /* Only update the file block number */ | 163 | /* Only update the file block number */ |
| 181 | blk_count += max_entries; | 164 | lb->curr_block += max_entries; |
| 182 | } | 165 | } |
| 183 | } | 166 | } |
| 184 | |||
| 185 | /* Update the file block number */ | ||
| 186 | *blk_nump = blk_count; | ||
| 187 | put_bh(bh); | 167 | put_bh(bh); |
| 188 | return retval; | 168 | return retval; |
| 189 | 169 | ||
| 190 | } | 170 | } |
| 191 | 171 | ||
| 192 | static int update_tind_extent_range(handle_t *handle, struct inode *inode, | 172 | static int update_tind_extent_range(handle_t *handle, struct inode *inode, |
| 193 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | 173 | ext4_fsblk_t pblock, |
| 194 | struct list_blocks_struct *lb) | 174 | struct migrate_struct *lb) |
| 195 | { | 175 | { |
| 196 | struct buffer_head *bh; | 176 | struct buffer_head *bh; |
| 197 | __le32 *i_data; | 177 | __le32 *i_data; |
| 198 | int i, retval = 0; | 178 | int i, retval = 0; |
| 199 | ext4_lblk_t blk_count = *blk_nump; | ||
| 200 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | 179 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; |
| 201 | 180 | ||
| 202 | if (!pblock) { | ||
| 203 | /* Only update the file block number */ | ||
| 204 | *blk_nump += max_entries * max_entries * max_entries; | ||
| 205 | return 0; | ||
| 206 | } | ||
| 207 | bh = sb_bread(inode->i_sb, pblock); | 181 | bh = sb_bread(inode->i_sb, pblock); |
| 208 | if (!bh) | 182 | if (!bh) |
| 209 | return -EIO; | 183 | return -EIO; |
| @@ -212,16 +186,14 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode, | |||
| 212 | for (i = 0; i < max_entries; i++) { | 186 | for (i = 0; i < max_entries; i++) { |
| 213 | if (i_data[i]) { | 187 | if (i_data[i]) { |
| 214 | retval = update_dind_extent_range(handle, inode, | 188 | retval = update_dind_extent_range(handle, inode, |
| 215 | le32_to_cpu(i_data[i]), | 189 | le32_to_cpu(i_data[i]), lb); |
| 216 | &blk_count, lb); | ||
| 217 | if (retval) | 190 | if (retval) |
| 218 | break; | 191 | break; |
| 219 | } else | 192 | } else { |
| 220 | /* Only update the file block number */ | 193 | /* Only update the file block number */ |
| 221 | blk_count += max_entries * max_entries; | 194 | lb->curr_block += max_entries * max_entries; |
| 195 | } | ||
| 222 | } | 196 | } |
| 223 | /* Update the file block number */ | ||
| 224 | *blk_nump = blk_count; | ||
| 225 | put_bh(bh); | 197 | put_bh(bh); |
| 226 | return retval; | 198 | return retval; |
| 227 | 199 | ||
| @@ -462,12 +434,12 @@ int ext4_ext_migrate(struct inode *inode) | |||
| 462 | handle_t *handle; | 434 | handle_t *handle; |
| 463 | int retval = 0, i; | 435 | int retval = 0, i; |
| 464 | __le32 *i_data; | 436 | __le32 *i_data; |
| 465 | ext4_lblk_t blk_count = 0; | ||
| 466 | struct ext4_inode_info *ei; | 437 | struct ext4_inode_info *ei; |
| 467 | struct inode *tmp_inode = NULL; | 438 | struct inode *tmp_inode = NULL; |
| 468 | struct list_blocks_struct lb; | 439 | struct migrate_struct lb; |
| 469 | unsigned long max_entries; | 440 | unsigned long max_entries; |
| 470 | __u32 goal; | 441 | __u32 goal; |
| 442 | uid_t owner[2]; | ||
| 471 | 443 | ||
| 472 | /* | 444 | /* |
| 473 | * If the filesystem does not support extents, or the inode | 445 | * If the filesystem does not support extents, or the inode |
| @@ -495,10 +467,12 @@ int ext4_ext_migrate(struct inode *inode) | |||
| 495 | } | 467 | } |
| 496 | goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * | 468 | goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * |
| 497 | EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; | 469 | EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; |
| 470 | owner[0] = inode->i_uid; | ||
| 471 | owner[1] = inode->i_gid; | ||
| 498 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, | 472 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, |
| 499 | S_IFREG, NULL, goal); | 473 | S_IFREG, NULL, goal, owner); |
| 500 | if (IS_ERR(tmp_inode)) { | 474 | if (IS_ERR(tmp_inode)) { |
| 501 | retval = -ENOMEM; | 475 | retval = PTR_ERR(inode); |
| 502 | ext4_journal_stop(handle); | 476 | ext4_journal_stop(handle); |
| 503 | return retval; | 477 | return retval; |
| 504 | } | 478 | } |
| @@ -551,35 +525,32 @@ int ext4_ext_migrate(struct inode *inode) | |||
| 551 | 525 | ||
| 552 | /* 32 bit block address 4 bytes */ | 526 | /* 32 bit block address 4 bytes */ |
| 553 | max_entries = inode->i_sb->s_blocksize >> 2; | 527 | max_entries = inode->i_sb->s_blocksize >> 2; |
| 554 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { | 528 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++) { |
| 555 | if (i_data[i]) { | 529 | if (i_data[i]) { |
| 556 | retval = update_extent_range(handle, tmp_inode, | 530 | retval = update_extent_range(handle, tmp_inode, |
| 557 | le32_to_cpu(i_data[i]), | 531 | le32_to_cpu(i_data[i]), &lb); |
| 558 | blk_count, &lb); | ||
| 559 | if (retval) | 532 | if (retval) |
| 560 | goto err_out; | 533 | goto err_out; |
| 561 | } | 534 | } else |
| 535 | lb.curr_block++; | ||
| 562 | } | 536 | } |
| 563 | if (i_data[EXT4_IND_BLOCK]) { | 537 | if (i_data[EXT4_IND_BLOCK]) { |
| 564 | retval = update_ind_extent_range(handle, tmp_inode, | 538 | retval = update_ind_extent_range(handle, tmp_inode, |
| 565 | le32_to_cpu(i_data[EXT4_IND_BLOCK]), | 539 | le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb); |
| 566 | &blk_count, &lb); | ||
| 567 | if (retval) | 540 | if (retval) |
| 568 | goto err_out; | 541 | goto err_out; |
| 569 | } else | 542 | } else |
| 570 | blk_count += max_entries; | 543 | lb.curr_block += max_entries; |
| 571 | if (i_data[EXT4_DIND_BLOCK]) { | 544 | if (i_data[EXT4_DIND_BLOCK]) { |
| 572 | retval = update_dind_extent_range(handle, tmp_inode, | 545 | retval = update_dind_extent_range(handle, tmp_inode, |
| 573 | le32_to_cpu(i_data[EXT4_DIND_BLOCK]), | 546 | le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb); |
| 574 | &blk_count, &lb); | ||
| 575 | if (retval) | 547 | if (retval) |
| 576 | goto err_out; | 548 | goto err_out; |
| 577 | } else | 549 | } else |
| 578 | blk_count += max_entries * max_entries; | 550 | lb.curr_block += max_entries * max_entries; |
| 579 | if (i_data[EXT4_TIND_BLOCK]) { | 551 | if (i_data[EXT4_TIND_BLOCK]) { |
| 580 | retval = update_tind_extent_range(handle, tmp_inode, | 552 | retval = update_tind_extent_range(handle, tmp_inode, |
| 581 | le32_to_cpu(i_data[EXT4_TIND_BLOCK]), | 553 | le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb); |
| 582 | &blk_count, &lb); | ||
| 583 | if (retval) | 554 | if (retval) |
| 584 | goto err_out; | 555 | goto err_out; |
| 585 | } | 556 | } |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 9bdef3f537c5..7ea4ba4eff2a 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
| @@ -109,7 +109,7 @@ static int kmmpd(void *data) | |||
| 109 | mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); | 109 | mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); |
| 110 | bdevname(bh->b_bdev, mmp->mmp_bdevname); | 110 | bdevname(bh->b_bdev, mmp->mmp_bdevname); |
| 111 | 111 | ||
| 112 | memcpy(mmp->mmp_nodename, init_utsname()->sysname, | 112 | memcpy(mmp->mmp_nodename, init_utsname()->nodename, |
| 113 | sizeof(mmp->mmp_nodename)); | 113 | sizeof(mmp->mmp_nodename)); |
| 114 | 114 | ||
| 115 | while (!kthread_should_stop()) { | 115 | while (!kthread_should_stop()) { |
| @@ -125,8 +125,9 @@ static int kmmpd(void *data) | |||
| 125 | * Don't spew too many error messages. Print one every | 125 | * Don't spew too many error messages. Print one every |
| 126 | * (s_mmp_update_interval * 60) seconds. | 126 | * (s_mmp_update_interval * 60) seconds. |
| 127 | */ | 127 | */ |
| 128 | if (retval && (failed_writes % 60) == 0) { | 128 | if (retval) { |
| 129 | ext4_error(sb, "Error writing to MMP block"); | 129 | if ((failed_writes % 60) == 0) |
| 130 | ext4_error(sb, "Error writing to MMP block"); | ||
| 130 | failed_writes++; | 131 | failed_writes++; |
| 131 | } | 132 | } |
| 132 | 133 | ||
| @@ -295,7 +296,8 @@ skip: | |||
| 295 | /* | 296 | /* |
| 296 | * write a new random sequence number. | 297 | * write a new random sequence number. |
| 297 | */ | 298 | */ |
| 298 | mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); | 299 | seq = mmp_new_seq(); |
| 300 | mmp->mmp_seq = cpu_to_le32(seq); | ||
| 299 | 301 | ||
| 300 | retval = write_mmp_block(bh); | 302 | retval = write_mmp_block(bh); |
| 301 | if (retval) | 303 | if (retval) |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index f57455a1b1b2..c5826c623e7a 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include <linux/quotaops.h> | 17 | #include <linux/quotaops.h> |
| 18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
| 19 | #include "ext4_jbd2.h" | 19 | #include "ext4_jbd2.h" |
| 20 | #include "ext4_extents.h" | ||
| 21 | #include "ext4.h" | 20 | #include "ext4.h" |
| 22 | 21 | ||
| 23 | /** | 22 | /** |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1c924faeb6c8..2a75eed2ef06 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -1586,7 +1586,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1586 | dxtrace(dx_show_index("node", frames[1].entries)); | 1586 | dxtrace(dx_show_index("node", frames[1].entries)); |
| 1587 | dxtrace(dx_show_index("node", | 1587 | dxtrace(dx_show_index("node", |
| 1588 | ((struct dx_node *) bh2->b_data)->entries)); | 1588 | ((struct dx_node *) bh2->b_data)->entries)); |
| 1589 | err = ext4_handle_dirty_metadata(handle, inode, bh2); | 1589 | err = ext4_handle_dirty_metadata(handle, dir, bh2); |
| 1590 | if (err) | 1590 | if (err) |
| 1591 | goto journal_error; | 1591 | goto journal_error; |
| 1592 | brelse (bh2); | 1592 | brelse (bh2); |
| @@ -1612,7 +1612,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1612 | if (err) | 1612 | if (err) |
| 1613 | goto journal_error; | 1613 | goto journal_error; |
| 1614 | } | 1614 | } |
| 1615 | err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh); | 1615 | err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh); |
| 1616 | if (err) { | 1616 | if (err) { |
| 1617 | ext4_std_error(inode->i_sb, err); | 1617 | ext4_std_error(inode->i_sb, err); |
| 1618 | goto cleanup; | 1618 | goto cleanup; |
| @@ -1707,9 +1707,8 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode) | |||
| 1707 | */ | 1707 | */ |
| 1708 | static void ext4_dec_count(handle_t *handle, struct inode *inode) | 1708 | static void ext4_dec_count(handle_t *handle, struct inode *inode) |
| 1709 | { | 1709 | { |
| 1710 | drop_nlink(inode); | 1710 | if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) |
| 1711 | if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) | 1711 | drop_nlink(inode); |
| 1712 | inc_nlink(inode); | ||
| 1713 | } | 1712 | } |
| 1714 | 1713 | ||
| 1715 | 1714 | ||
| @@ -1756,7 +1755,7 @@ retry: | |||
| 1756 | if (IS_DIRSYNC(dir)) | 1755 | if (IS_DIRSYNC(dir)) |
| 1757 | ext4_handle_sync(handle); | 1756 | ext4_handle_sync(handle); |
| 1758 | 1757 | ||
| 1759 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); | 1758 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL); |
| 1760 | err = PTR_ERR(inode); | 1759 | err = PTR_ERR(inode); |
| 1761 | if (!IS_ERR(inode)) { | 1760 | if (!IS_ERR(inode)) { |
| 1762 | inode->i_op = &ext4_file_inode_operations; | 1761 | inode->i_op = &ext4_file_inode_operations; |
| @@ -1792,7 +1791,7 @@ retry: | |||
| 1792 | if (IS_DIRSYNC(dir)) | 1791 | if (IS_DIRSYNC(dir)) |
| 1793 | ext4_handle_sync(handle); | 1792 | ext4_handle_sync(handle); |
| 1794 | 1793 | ||
| 1795 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); | 1794 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL); |
| 1796 | err = PTR_ERR(inode); | 1795 | err = PTR_ERR(inode); |
| 1797 | if (!IS_ERR(inode)) { | 1796 | if (!IS_ERR(inode)) { |
| 1798 | init_special_inode(inode, inode->i_mode, rdev); | 1797 | init_special_inode(inode, inode->i_mode, rdev); |
| @@ -1832,7 +1831,7 @@ retry: | |||
| 1832 | ext4_handle_sync(handle); | 1831 | ext4_handle_sync(handle); |
| 1833 | 1832 | ||
| 1834 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode, | 1833 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode, |
| 1835 | &dentry->d_name, 0); | 1834 | &dentry->d_name, 0, NULL); |
| 1836 | err = PTR_ERR(inode); | 1835 | err = PTR_ERR(inode); |
| 1837 | if (IS_ERR(inode)) | 1836 | if (IS_ERR(inode)) |
| 1838 | goto out_stop; | 1837 | goto out_stop; |
| @@ -1863,7 +1862,7 @@ retry: | |||
| 1863 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1862 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
| 1864 | inode->i_nlink = 2; | 1863 | inode->i_nlink = 2; |
| 1865 | BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); | 1864 | BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); |
| 1866 | err = ext4_handle_dirty_metadata(handle, dir, dir_block); | 1865 | err = ext4_handle_dirty_metadata(handle, inode, dir_block); |
| 1867 | if (err) | 1866 | if (err) |
| 1868 | goto out_clear_inode; | 1867 | goto out_clear_inode; |
| 1869 | err = ext4_mark_inode_dirty(handle, inode); | 1868 | err = ext4_mark_inode_dirty(handle, inode); |
| @@ -2279,7 +2278,7 @@ retry: | |||
| 2279 | ext4_handle_sync(handle); | 2278 | ext4_handle_sync(handle); |
| 2280 | 2279 | ||
| 2281 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, | 2280 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, |
| 2282 | &dentry->d_name, 0); | 2281 | &dentry->d_name, 0, NULL); |
| 2283 | err = PTR_ERR(inode); | 2282 | err = PTR_ERR(inode); |
| 2284 | if (IS_ERR(inode)) | 2283 | if (IS_ERR(inode)) |
| 2285 | goto out_stop; | 2284 | goto out_stop; |
| @@ -2530,7 +2529,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 2530 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = | 2529 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = |
| 2531 | cpu_to_le32(new_dir->i_ino); | 2530 | cpu_to_le32(new_dir->i_ino); |
| 2532 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | 2531 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); |
| 2533 | retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh); | 2532 | retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh); |
| 2534 | if (retval) { | 2533 | if (retval) { |
| 2535 | ext4_std_error(old_dir->i_sb, retval); | 2534 | ext4_std_error(old_dir->i_sb, retval); |
| 2536 | goto end_rename; | 2535 | goto end_rename; |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 92f38ee13f8a..7ce1d0b19c94 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
| @@ -70,7 +70,6 @@ static void put_io_page(struct ext4_io_page *io_page) | |||
| 70 | void ext4_free_io_end(ext4_io_end_t *io) | 70 | void ext4_free_io_end(ext4_io_end_t *io) |
| 71 | { | 71 | { |
| 72 | int i; | 72 | int i; |
| 73 | wait_queue_head_t *wq; | ||
| 74 | 73 | ||
| 75 | BUG_ON(!io); | 74 | BUG_ON(!io); |
| 76 | if (io->page) | 75 | if (io->page) |
| @@ -78,56 +77,43 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
| 78 | for (i = 0; i < io->num_io_pages; i++) | 77 | for (i = 0; i < io->num_io_pages; i++) |
| 79 | put_io_page(io->pages[i]); | 78 | put_io_page(io->pages[i]); |
| 80 | io->num_io_pages = 0; | 79 | io->num_io_pages = 0; |
| 81 | wq = ext4_ioend_wq(io->inode); | 80 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) |
| 82 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && | 81 | wake_up_all(ext4_ioend_wq(io->inode)); |
| 83 | waitqueue_active(wq)) | ||
| 84 | wake_up_all(wq); | ||
| 85 | kmem_cache_free(io_end_cachep, io); | 82 | kmem_cache_free(io_end_cachep, io); |
| 86 | } | 83 | } |
| 87 | 84 | ||
| 88 | /* | 85 | /* |
| 89 | * check a range of space and convert unwritten extents to written. | 86 | * check a range of space and convert unwritten extents to written. |
| 87 | * | ||
| 88 | * Called with inode->i_mutex; we depend on this when we manipulate | ||
| 89 | * io->flag, since we could otherwise race with ext4_flush_completed_IO() | ||
| 90 | */ | 90 | */ |
| 91 | int ext4_end_io_nolock(ext4_io_end_t *io) | 91 | int ext4_end_io_nolock(ext4_io_end_t *io) |
| 92 | { | 92 | { |
| 93 | struct inode *inode = io->inode; | 93 | struct inode *inode = io->inode; |
| 94 | loff_t offset = io->offset; | 94 | loff_t offset = io->offset; |
| 95 | ssize_t size = io->size; | 95 | ssize_t size = io->size; |
| 96 | wait_queue_head_t *wq; | ||
| 97 | int ret = 0; | 96 | int ret = 0; |
| 98 | 97 | ||
| 99 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 98 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
| 100 | "list->prev 0x%p\n", | 99 | "list->prev 0x%p\n", |
| 101 | io, inode->i_ino, io->list.next, io->list.prev); | 100 | io, inode->i_ino, io->list.next, io->list.prev); |
| 102 | 101 | ||
| 103 | if (list_empty(&io->list)) | ||
| 104 | return ret; | ||
| 105 | |||
| 106 | if (!(io->flag & EXT4_IO_END_UNWRITTEN)) | ||
| 107 | return ret; | ||
| 108 | |||
| 109 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 102 | ret = ext4_convert_unwritten_extents(inode, offset, size); |
| 110 | if (ret < 0) { | 103 | if (ret < 0) { |
| 111 | printk(KERN_EMERG "%s: failed to convert unwritten " | 104 | ext4_msg(inode->i_sb, KERN_EMERG, |
| 112 | "extents to written extents, error is %d " | 105 | "failed to convert unwritten extents to written " |
| 113 | "io is still on inode %lu aio dio list\n", | 106 | "extents -- potential data loss! " |
| 114 | __func__, ret, inode->i_ino); | 107 | "(inode %lu, offset %llu, size %zd, error %d)", |
| 115 | return ret; | 108 | inode->i_ino, offset, size, ret); |
| 116 | } | 109 | } |
| 117 | 110 | ||
| 118 | if (io->iocb) | 111 | if (io->iocb) |
| 119 | aio_complete(io->iocb, io->result, 0); | 112 | aio_complete(io->iocb, io->result, 0); |
| 120 | /* clear the DIO AIO unwritten flag */ | ||
| 121 | if (io->flag & EXT4_IO_END_UNWRITTEN) { | ||
| 122 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
| 123 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
| 124 | wq = ext4_ioend_wq(io->inode); | ||
| 125 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) && | ||
| 126 | waitqueue_active(wq)) { | ||
| 127 | wake_up_all(wq); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | 113 | ||
| 114 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
| 115 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) | ||
| 116 | wake_up_all(ext4_ioend_wq(io->inode)); | ||
| 131 | return ret; | 117 | return ret; |
| 132 | } | 118 | } |
| 133 | 119 | ||
| @@ -140,9 +126,15 @@ static void ext4_end_io_work(struct work_struct *work) | |||
| 140 | struct inode *inode = io->inode; | 126 | struct inode *inode = io->inode; |
| 141 | struct ext4_inode_info *ei = EXT4_I(inode); | 127 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 142 | unsigned long flags; | 128 | unsigned long flags; |
| 143 | int ret; | 129 | |
| 130 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
| 131 | if (list_empty(&io->list)) { | ||
| 132 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
| 133 | goto free; | ||
| 134 | } | ||
| 144 | 135 | ||
| 145 | if (!mutex_trylock(&inode->i_mutex)) { | 136 | if (!mutex_trylock(&inode->i_mutex)) { |
| 137 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
| 146 | /* | 138 | /* |
| 147 | * Requeue the work instead of waiting so that the work | 139 | * Requeue the work instead of waiting so that the work |
| 148 | * items queued after this can be processed. | 140 | * items queued after this can be processed. |
| @@ -159,17 +151,11 @@ static void ext4_end_io_work(struct work_struct *work) | |||
| 159 | io->flag |= EXT4_IO_END_QUEUED; | 151 | io->flag |= EXT4_IO_END_QUEUED; |
| 160 | return; | 152 | return; |
| 161 | } | 153 | } |
| 162 | ret = ext4_end_io_nolock(io); | 154 | list_del_init(&io->list); |
| 163 | if (ret < 0) { | ||
| 164 | mutex_unlock(&inode->i_mutex); | ||
| 165 | return; | ||
| 166 | } | ||
| 167 | |||
| 168 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
| 169 | if (!list_empty(&io->list)) | ||
| 170 | list_del_init(&io->list); | ||
| 171 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 155 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
| 156 | (void) ext4_end_io_nolock(io); | ||
| 172 | mutex_unlock(&inode->i_mutex); | 157 | mutex_unlock(&inode->i_mutex); |
| 158 | free: | ||
| 173 | ext4_free_io_end(io); | 159 | ext4_free_io_end(io); |
| 174 | } | 160 | } |
| 175 | 161 | ||
| @@ -350,10 +336,8 @@ submit_and_retry: | |||
| 350 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | 336 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && |
| 351 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | 337 | (io_end->pages[io_end->num_io_pages-1] != io_page)) |
| 352 | goto submit_and_retry; | 338 | goto submit_and_retry; |
| 353 | if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 339 | if (buffer_uninit(bh)) |
| 354 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 340 | ext4_set_io_unwritten_flag(inode, io_end); |
| 355 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
| 356 | } | ||
| 357 | io->io_end->size += bh->b_size; | 341 | io->io_end->size += bh->b_size; |
| 358 | io->io_next_block++; | 342 | io->io_next_block++; |
| 359 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 343 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 707d3f16f7ce..996780ab4f4e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -875,7 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 875 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ | 875 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ |
| 876 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ | 876 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ |
| 877 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ | 877 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ |
| 878 | ext4_free_blks_set(sb, gdp, input->free_blocks_count); | 878 | ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count); |
| 879 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | 879 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); |
| 880 | gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); | 880 | gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); |
| 881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | 881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); |
| @@ -937,8 +937,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 937 | input->reserved_blocks); | 937 | input->reserved_blocks); |
| 938 | 938 | ||
| 939 | /* Update the free space counts */ | 939 | /* Update the free space counts */ |
| 940 | percpu_counter_add(&sbi->s_freeblocks_counter, | 940 | percpu_counter_add(&sbi->s_freeclusters_counter, |
| 941 | input->free_blocks_count); | 941 | EXT4_B2C(sbi, input->free_blocks_count)); |
| 942 | percpu_counter_add(&sbi->s_freeinodes_counter, | 942 | percpu_counter_add(&sbi->s_freeinodes_counter, |
| 943 | EXT4_INODES_PER_GROUP(sb)); | 943 | EXT4_INODES_PER_GROUP(sb)); |
| 944 | 944 | ||
| @@ -946,8 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 946 | sbi->s_log_groups_per_flex) { | 946 | sbi->s_log_groups_per_flex) { |
| 947 | ext4_group_t flex_group; | 947 | ext4_group_t flex_group; |
| 948 | flex_group = ext4_flex_group(sbi, input->group); | 948 | flex_group = ext4_flex_group(sbi, input->group); |
| 949 | atomic_add(input->free_blocks_count, | 949 | atomic_add(EXT4_B2C(sbi, input->free_blocks_count), |
| 950 | &sbi->s_flex_groups[flex_group].free_blocks); | 950 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 951 | atomic_add(EXT4_INODES_PER_GROUP(sb), | 951 | atomic_add(EXT4_INODES_PER_GROUP(sb), |
| 952 | &sbi->s_flex_groups[flex_group].free_inodes); | 952 | &sbi->s_flex_groups[flex_group].free_inodes); |
| 953 | } | 953 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 44d0c8db2239..9953d80145ad 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include <linux/freezer.h> | 45 | #include <linux/freezer.h> |
| 46 | 46 | ||
| 47 | #include "ext4.h" | 47 | #include "ext4.h" |
| 48 | #include "ext4_extents.h" | ||
| 48 | #include "ext4_jbd2.h" | 49 | #include "ext4_jbd2.h" |
| 49 | #include "xattr.h" | 50 | #include "xattr.h" |
| 50 | #include "acl.h" | 51 | #include "acl.h" |
| @@ -163,8 +164,8 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, | |||
| 163 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); | 164 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); |
| 164 | } | 165 | } |
| 165 | 166 | ||
| 166 | __u32 ext4_free_blks_count(struct super_block *sb, | 167 | __u32 ext4_free_group_clusters(struct super_block *sb, |
| 167 | struct ext4_group_desc *bg) | 168 | struct ext4_group_desc *bg) |
| 168 | { | 169 | { |
| 169 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | | 170 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | |
| 170 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 171 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
| @@ -219,8 +220,8 @@ void ext4_inode_table_set(struct super_block *sb, | |||
| 219 | bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); | 220 | bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); |
| 220 | } | 221 | } |
| 221 | 222 | ||
| 222 | void ext4_free_blks_set(struct super_block *sb, | 223 | void ext4_free_group_clusters_set(struct super_block *sb, |
| 223 | struct ext4_group_desc *bg, __u32 count) | 224 | struct ext4_group_desc *bg, __u32 count) |
| 224 | { | 225 | { |
| 225 | bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); | 226 | bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); |
| 226 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 227 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
| @@ -414,6 +415,22 @@ static void save_error_info(struct super_block *sb, const char *func, | |||
| 414 | ext4_commit_super(sb, 1); | 415 | ext4_commit_super(sb, 1); |
| 415 | } | 416 | } |
| 416 | 417 | ||
| 418 | /* | ||
| 419 | * The del_gendisk() function uninitializes the disk-specific data | ||
| 420 | * structures, including the bdi structure, without telling anyone | ||
| 421 | * else. Once this happens, any attempt to call mark_buffer_dirty() | ||
| 422 | * (for example, by ext4_commit_super), will cause a kernel OOPS. | ||
| 423 | * This is a kludge to prevent these oops until we can put in a proper | ||
| 424 | * hook in del_gendisk() to inform the VFS and file system layers. | ||
| 425 | */ | ||
| 426 | static int block_device_ejected(struct super_block *sb) | ||
| 427 | { | ||
| 428 | struct inode *bd_inode = sb->s_bdev->bd_inode; | ||
| 429 | struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info; | ||
| 430 | |||
| 431 | return bdi->dev == NULL; | ||
| 432 | } | ||
| 433 | |||
| 417 | 434 | ||
| 418 | /* Deal with the reporting of failure conditions on a filesystem such as | 435 | /* Deal with the reporting of failure conditions on a filesystem such as |
| 419 | * inconsistencies detected or read IO failures. | 436 | * inconsistencies detected or read IO failures. |
| @@ -821,10 +838,10 @@ static void ext4_put_super(struct super_block *sb) | |||
| 821 | brelse(sbi->s_group_desc[i]); | 838 | brelse(sbi->s_group_desc[i]); |
| 822 | ext4_kvfree(sbi->s_group_desc); | 839 | ext4_kvfree(sbi->s_group_desc); |
| 823 | ext4_kvfree(sbi->s_flex_groups); | 840 | ext4_kvfree(sbi->s_flex_groups); |
| 824 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 841 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
| 825 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 842 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| 826 | percpu_counter_destroy(&sbi->s_dirs_counter); | 843 | percpu_counter_destroy(&sbi->s_dirs_counter); |
| 827 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | 844 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
| 828 | brelse(sbi->s_sbh); | 845 | brelse(sbi->s_sbh); |
| 829 | #ifdef CONFIG_QUOTA | 846 | #ifdef CONFIG_QUOTA |
| 830 | for (i = 0; i < MAXQUOTAS; i++) | 847 | for (i = 0; i < MAXQUOTAS; i++) |
| @@ -1057,8 +1074,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 1057 | seq_puts(seq, ",nouid32"); | 1074 | seq_puts(seq, ",nouid32"); |
| 1058 | if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) | 1075 | if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) |
| 1059 | seq_puts(seq, ",debug"); | 1076 | seq_puts(seq, ",debug"); |
| 1060 | if (test_opt(sb, OLDALLOC)) | ||
| 1061 | seq_puts(seq, ",oldalloc"); | ||
| 1062 | #ifdef CONFIG_EXT4_FS_XATTR | 1077 | #ifdef CONFIG_EXT4_FS_XATTR |
| 1063 | if (test_opt(sb, XATTR_USER)) | 1078 | if (test_opt(sb, XATTR_USER)) |
| 1064 | seq_puts(seq, ",user_xattr"); | 1079 | seq_puts(seq, ",user_xattr"); |
| @@ -1567,10 +1582,12 @@ static int parse_options(char *options, struct super_block *sb, | |||
| 1567 | set_opt(sb, DEBUG); | 1582 | set_opt(sb, DEBUG); |
| 1568 | break; | 1583 | break; |
| 1569 | case Opt_oldalloc: | 1584 | case Opt_oldalloc: |
| 1570 | set_opt(sb, OLDALLOC); | 1585 | ext4_msg(sb, KERN_WARNING, |
| 1586 | "Ignoring deprecated oldalloc option"); | ||
| 1571 | break; | 1587 | break; |
| 1572 | case Opt_orlov: | 1588 | case Opt_orlov: |
| 1573 | clear_opt(sb, OLDALLOC); | 1589 | ext4_msg(sb, KERN_WARNING, |
| 1590 | "Ignoring deprecated orlov option"); | ||
| 1574 | break; | 1591 | break; |
| 1575 | #ifdef CONFIG_EXT4_FS_XATTR | 1592 | #ifdef CONFIG_EXT4_FS_XATTR |
| 1576 | case Opt_user_xattr: | 1593 | case Opt_user_xattr: |
| @@ -1801,6 +1818,7 @@ set_qf_format: | |||
| 1801 | break; | 1818 | break; |
| 1802 | case Opt_nodelalloc: | 1819 | case Opt_nodelalloc: |
| 1803 | clear_opt(sb, DELALLOC); | 1820 | clear_opt(sb, DELALLOC); |
| 1821 | clear_opt2(sb, EXPLICIT_DELALLOC); | ||
| 1804 | break; | 1822 | break; |
| 1805 | case Opt_mblk_io_submit: | 1823 | case Opt_mblk_io_submit: |
| 1806 | set_opt(sb, MBLK_IO_SUBMIT); | 1824 | set_opt(sb, MBLK_IO_SUBMIT); |
| @@ -1817,6 +1835,7 @@ set_qf_format: | |||
| 1817 | break; | 1835 | break; |
| 1818 | case Opt_delalloc: | 1836 | case Opt_delalloc: |
| 1819 | set_opt(sb, DELALLOC); | 1837 | set_opt(sb, DELALLOC); |
| 1838 | set_opt2(sb, EXPLICIT_DELALLOC); | ||
| 1820 | break; | 1839 | break; |
| 1821 | case Opt_block_validity: | 1840 | case Opt_block_validity: |
| 1822 | set_opt(sb, BLOCK_VALIDITY); | 1841 | set_opt(sb, BLOCK_VALIDITY); |
| @@ -1935,7 +1954,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
| 1935 | res = MS_RDONLY; | 1954 | res = MS_RDONLY; |
| 1936 | } | 1955 | } |
| 1937 | if (read_only) | 1956 | if (read_only) |
| 1938 | return res; | 1957 | goto done; |
| 1939 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) | 1958 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) |
| 1940 | ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " | 1959 | ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " |
| 1941 | "running e2fsck is recommended"); | 1960 | "running e2fsck is recommended"); |
| @@ -1966,6 +1985,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
| 1966 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 1985 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
| 1967 | 1986 | ||
| 1968 | ext4_commit_super(sb, 1); | 1987 | ext4_commit_super(sb, 1); |
| 1988 | done: | ||
| 1969 | if (test_opt(sb, DEBUG)) | 1989 | if (test_opt(sb, DEBUG)) |
| 1970 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " | 1990 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " |
| 1971 | "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", | 1991 | "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", |
| @@ -2015,8 +2035,8 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
| 2015 | flex_group = ext4_flex_group(sbi, i); | 2035 | flex_group = ext4_flex_group(sbi, i); |
| 2016 | atomic_add(ext4_free_inodes_count(sb, gdp), | 2036 | atomic_add(ext4_free_inodes_count(sb, gdp), |
| 2017 | &sbi->s_flex_groups[flex_group].free_inodes); | 2037 | &sbi->s_flex_groups[flex_group].free_inodes); |
| 2018 | atomic_add(ext4_free_blks_count(sb, gdp), | 2038 | atomic_add(ext4_free_group_clusters(sb, gdp), |
| 2019 | &sbi->s_flex_groups[flex_group].free_blocks); | 2039 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 2020 | atomic_add(ext4_used_dirs_count(sb, gdp), | 2040 | atomic_add(ext4_used_dirs_count(sb, gdp), |
| 2021 | &sbi->s_flex_groups[flex_group].used_dirs); | 2041 | &sbi->s_flex_groups[flex_group].used_dirs); |
| 2022 | } | 2042 | } |
| @@ -2134,7 +2154,8 @@ static int ext4_check_descriptors(struct super_block *sb, | |||
| 2134 | if (NULL != first_not_zeroed) | 2154 | if (NULL != first_not_zeroed) |
| 2135 | *first_not_zeroed = grp; | 2155 | *first_not_zeroed = grp; |
| 2136 | 2156 | ||
| 2137 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); | 2157 | ext4_free_blocks_count_set(sbi->s_es, |
| 2158 | EXT4_C2B(sbi, ext4_count_free_clusters(sb))); | ||
| 2138 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); | 2159 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); |
| 2139 | return 1; | 2160 | return 1; |
| 2140 | } | 2161 | } |
| @@ -2454,7 +2475,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, | |||
| 2454 | char *buf) | 2475 | char *buf) |
| 2455 | { | 2476 | { |
| 2456 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2477 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
| 2457 | (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | 2478 | (s64) EXT4_C2B(sbi, |
| 2479 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | ||
| 2458 | } | 2480 | } |
| 2459 | 2481 | ||
| 2460 | static ssize_t session_write_kbytes_show(struct ext4_attr *a, | 2482 | static ssize_t session_write_kbytes_show(struct ext4_attr *a, |
| @@ -2682,6 +2704,13 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) | |||
| 2682 | return 0; | 2704 | return 0; |
| 2683 | } | 2705 | } |
| 2684 | } | 2706 | } |
| 2707 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && | ||
| 2708 | !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { | ||
| 2709 | ext4_msg(sb, KERN_ERR, | ||
| 2710 | "Can't support bigalloc feature without " | ||
| 2711 | "extents feature\n"); | ||
| 2712 | return 0; | ||
| 2713 | } | ||
| 2685 | return 1; | 2714 | return 1; |
| 2686 | } | 2715 | } |
| 2687 | 2716 | ||
| @@ -3087,10 +3116,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3087 | char *cp; | 3116 | char *cp; |
| 3088 | const char *descr; | 3117 | const char *descr; |
| 3089 | int ret = -ENOMEM; | 3118 | int ret = -ENOMEM; |
| 3090 | int blocksize; | 3119 | int blocksize, clustersize; |
| 3091 | unsigned int db_count; | 3120 | unsigned int db_count; |
| 3092 | unsigned int i; | 3121 | unsigned int i; |
| 3093 | int needs_recovery, has_huge_files; | 3122 | int needs_recovery, has_huge_files, has_bigalloc; |
| 3094 | __u64 blocks_count; | 3123 | __u64 blocks_count; |
| 3095 | int err; | 3124 | int err; |
| 3096 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 3125 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
| @@ -3224,6 +3253,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3224 | &journal_ioprio, NULL, 0)) | 3253 | &journal_ioprio, NULL, 0)) |
| 3225 | goto failed_mount; | 3254 | goto failed_mount; |
| 3226 | 3255 | ||
| 3256 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
| 3257 | printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " | ||
| 3258 | "with data=journal disables delayed " | ||
| 3259 | "allocation and O_DIRECT support!\n"); | ||
| 3260 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | ||
| 3261 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
| 3262 | "both data=journal and delalloc"); | ||
| 3263 | goto failed_mount; | ||
| 3264 | } | ||
| 3265 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
| 3266 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
| 3267 | "both data=journal and delalloc"); | ||
| 3268 | goto failed_mount; | ||
| 3269 | } | ||
| 3270 | if (test_opt(sb, DELALLOC)) | ||
| 3271 | clear_opt(sb, DELALLOC); | ||
| 3272 | } | ||
| 3273 | |||
| 3274 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | ||
| 3275 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
| 3276 | if (blocksize < PAGE_SIZE) { | ||
| 3277 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
| 3278 | "dioread_nolock if block size != PAGE_SIZE"); | ||
| 3279 | goto failed_mount; | ||
| 3280 | } | ||
| 3281 | } | ||
| 3282 | |||
| 3227 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 3283 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
| 3228 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); | 3284 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
| 3229 | 3285 | ||
| @@ -3265,8 +3321,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3265 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) | 3321 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) |
| 3266 | goto failed_mount; | 3322 | goto failed_mount; |
| 3267 | 3323 | ||
| 3268 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | ||
| 3269 | |||
| 3270 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 3324 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
| 3271 | blocksize > EXT4_MAX_BLOCK_SIZE) { | 3325 | blocksize > EXT4_MAX_BLOCK_SIZE) { |
| 3272 | ext4_msg(sb, KERN_ERR, | 3326 | ext4_msg(sb, KERN_ERR, |
| @@ -3369,12 +3423,53 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3369 | sb->s_dirt = 1; | 3423 | sb->s_dirt = 1; |
| 3370 | } | 3424 | } |
| 3371 | 3425 | ||
| 3372 | if (sbi->s_blocks_per_group > blocksize * 8) { | 3426 | /* Handle clustersize */ |
| 3373 | ext4_msg(sb, KERN_ERR, | 3427 | clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); |
| 3374 | "#blocks per group too big: %lu", | 3428 | has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
| 3375 | sbi->s_blocks_per_group); | 3429 | EXT4_FEATURE_RO_COMPAT_BIGALLOC); |
| 3376 | goto failed_mount; | 3430 | if (has_bigalloc) { |
| 3431 | if (clustersize < blocksize) { | ||
| 3432 | ext4_msg(sb, KERN_ERR, | ||
| 3433 | "cluster size (%d) smaller than " | ||
| 3434 | "block size (%d)", clustersize, blocksize); | ||
| 3435 | goto failed_mount; | ||
| 3436 | } | ||
| 3437 | sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - | ||
| 3438 | le32_to_cpu(es->s_log_block_size); | ||
| 3439 | sbi->s_clusters_per_group = | ||
| 3440 | le32_to_cpu(es->s_clusters_per_group); | ||
| 3441 | if (sbi->s_clusters_per_group > blocksize * 8) { | ||
| 3442 | ext4_msg(sb, KERN_ERR, | ||
| 3443 | "#clusters per group too big: %lu", | ||
| 3444 | sbi->s_clusters_per_group); | ||
| 3445 | goto failed_mount; | ||
| 3446 | } | ||
| 3447 | if (sbi->s_blocks_per_group != | ||
| 3448 | (sbi->s_clusters_per_group * (clustersize / blocksize))) { | ||
| 3449 | ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " | ||
| 3450 | "clusters per group (%lu) inconsistent", | ||
| 3451 | sbi->s_blocks_per_group, | ||
| 3452 | sbi->s_clusters_per_group); | ||
| 3453 | goto failed_mount; | ||
| 3454 | } | ||
| 3455 | } else { | ||
| 3456 | if (clustersize != blocksize) { | ||
| 3457 | ext4_warning(sb, "fragment/cluster size (%d) != " | ||
| 3458 | "block size (%d)", clustersize, | ||
| 3459 | blocksize); | ||
| 3460 | clustersize = blocksize; | ||
| 3461 | } | ||
| 3462 | if (sbi->s_blocks_per_group > blocksize * 8) { | ||
| 3463 | ext4_msg(sb, KERN_ERR, | ||
| 3464 | "#blocks per group too big: %lu", | ||
| 3465 | sbi->s_blocks_per_group); | ||
| 3466 | goto failed_mount; | ||
| 3467 | } | ||
| 3468 | sbi->s_clusters_per_group = sbi->s_blocks_per_group; | ||
| 3469 | sbi->s_cluster_bits = 0; | ||
| 3377 | } | 3470 | } |
| 3471 | sbi->s_cluster_ratio = clustersize / blocksize; | ||
| 3472 | |||
| 3378 | if (sbi->s_inodes_per_group > blocksize * 8) { | 3473 | if (sbi->s_inodes_per_group > blocksize * 8) { |
| 3379 | ext4_msg(sb, KERN_ERR, | 3474 | ext4_msg(sb, KERN_ERR, |
| 3380 | "#inodes per group too big: %lu", | 3475 | "#inodes per group too big: %lu", |
| @@ -3446,10 +3541,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3446 | goto failed_mount; | 3541 | goto failed_mount; |
| 3447 | } | 3542 | } |
| 3448 | 3543 | ||
| 3449 | #ifdef CONFIG_PROC_FS | ||
| 3450 | if (ext4_proc_root) | 3544 | if (ext4_proc_root) |
| 3451 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | 3545 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); |
| 3452 | #endif | ||
| 3453 | 3546 | ||
| 3454 | bgl_lock_init(sbi->s_blockgroup_lock); | 3547 | bgl_lock_init(sbi->s_blockgroup_lock); |
| 3455 | 3548 | ||
| @@ -3483,8 +3576,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3483 | sbi->s_err_report.function = print_daily_error_info; | 3576 | sbi->s_err_report.function = print_daily_error_info; |
| 3484 | sbi->s_err_report.data = (unsigned long) sb; | 3577 | sbi->s_err_report.data = (unsigned long) sb; |
| 3485 | 3578 | ||
| 3486 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | 3579 | err = percpu_counter_init(&sbi->s_freeclusters_counter, |
| 3487 | ext4_count_free_blocks(sb)); | 3580 | ext4_count_free_clusters(sb)); |
| 3488 | if (!err) { | 3581 | if (!err) { |
| 3489 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | 3582 | err = percpu_counter_init(&sbi->s_freeinodes_counter, |
| 3490 | ext4_count_free_inodes(sb)); | 3583 | ext4_count_free_inodes(sb)); |
| @@ -3494,7 +3587,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3494 | ext4_count_dirs(sb)); | 3587 | ext4_count_dirs(sb)); |
| 3495 | } | 3588 | } |
| 3496 | if (!err) { | 3589 | if (!err) { |
| 3497 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | 3590 | err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); |
| 3498 | } | 3591 | } |
| 3499 | if (err) { | 3592 | if (err) { |
| 3500 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 3593 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
| @@ -3609,13 +3702,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3609 | * The journal may have updated the bg summary counts, so we | 3702 | * The journal may have updated the bg summary counts, so we |
| 3610 | * need to update the global counters. | 3703 | * need to update the global counters. |
| 3611 | */ | 3704 | */ |
| 3612 | percpu_counter_set(&sbi->s_freeblocks_counter, | 3705 | percpu_counter_set(&sbi->s_freeclusters_counter, |
| 3613 | ext4_count_free_blocks(sb)); | 3706 | ext4_count_free_clusters(sb)); |
| 3614 | percpu_counter_set(&sbi->s_freeinodes_counter, | 3707 | percpu_counter_set(&sbi->s_freeinodes_counter, |
| 3615 | ext4_count_free_inodes(sb)); | 3708 | ext4_count_free_inodes(sb)); |
| 3616 | percpu_counter_set(&sbi->s_dirs_counter, | 3709 | percpu_counter_set(&sbi->s_dirs_counter, |
| 3617 | ext4_count_dirs(sb)); | 3710 | ext4_count_dirs(sb)); |
| 3618 | percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); | 3711 | percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); |
| 3619 | 3712 | ||
| 3620 | no_journal: | 3713 | no_journal: |
| 3621 | /* | 3714 | /* |
| @@ -3679,25 +3772,6 @@ no_journal: | |||
| 3679 | "available"); | 3772 | "available"); |
| 3680 | } | 3773 | } |
| 3681 | 3774 | ||
| 3682 | if (test_opt(sb, DELALLOC) && | ||
| 3683 | (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { | ||
| 3684 | ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " | ||
| 3685 | "requested data journaling mode"); | ||
| 3686 | clear_opt(sb, DELALLOC); | ||
| 3687 | } | ||
| 3688 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
| 3689 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
| 3690 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " | ||
| 3691 | "option - requested data journaling mode"); | ||
| 3692 | clear_opt(sb, DIOREAD_NOLOCK); | ||
| 3693 | } | ||
| 3694 | if (sb->s_blocksize < PAGE_SIZE) { | ||
| 3695 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " | ||
| 3696 | "option - block size is too small"); | ||
| 3697 | clear_opt(sb, DIOREAD_NOLOCK); | ||
| 3698 | } | ||
| 3699 | } | ||
| 3700 | |||
| 3701 | err = ext4_setup_system_zone(sb); | 3775 | err = ext4_setup_system_zone(sb); |
| 3702 | if (err) { | 3776 | if (err) { |
| 3703 | ext4_msg(sb, KERN_ERR, "failed to initialize system " | 3777 | ext4_msg(sb, KERN_ERR, "failed to initialize system " |
| @@ -3710,22 +3784,19 @@ no_journal: | |||
| 3710 | if (err) { | 3784 | if (err) { |
| 3711 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", | 3785 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", |
| 3712 | err); | 3786 | err); |
| 3713 | goto failed_mount4; | 3787 | goto failed_mount5; |
| 3714 | } | 3788 | } |
| 3715 | 3789 | ||
| 3716 | err = ext4_register_li_request(sb, first_not_zeroed); | 3790 | err = ext4_register_li_request(sb, first_not_zeroed); |
| 3717 | if (err) | 3791 | if (err) |
| 3718 | goto failed_mount4; | 3792 | goto failed_mount6; |
| 3719 | 3793 | ||
| 3720 | sbi->s_kobj.kset = ext4_kset; | 3794 | sbi->s_kobj.kset = ext4_kset; |
| 3721 | init_completion(&sbi->s_kobj_unregister); | 3795 | init_completion(&sbi->s_kobj_unregister); |
| 3722 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, | 3796 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, |
| 3723 | "%s", sb->s_id); | 3797 | "%s", sb->s_id); |
| 3724 | if (err) { | 3798 | if (err) |
| 3725 | ext4_mb_release(sb); | 3799 | goto failed_mount7; |
| 3726 | ext4_ext_release(sb); | ||
| 3727 | goto failed_mount4; | ||
| 3728 | }; | ||
| 3729 | 3800 | ||
| 3730 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; | 3801 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; |
| 3731 | ext4_orphan_cleanup(sb, es); | 3802 | ext4_orphan_cleanup(sb, es); |
| @@ -3759,13 +3830,19 @@ cantfind_ext4: | |||
| 3759 | ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); | 3830 | ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); |
| 3760 | goto failed_mount; | 3831 | goto failed_mount; |
| 3761 | 3832 | ||
| 3833 | failed_mount7: | ||
| 3834 | ext4_unregister_li_request(sb); | ||
| 3835 | failed_mount6: | ||
| 3836 | ext4_ext_release(sb); | ||
| 3837 | failed_mount5: | ||
| 3838 | ext4_mb_release(sb); | ||
| 3839 | ext4_release_system_zone(sb); | ||
| 3762 | failed_mount4: | 3840 | failed_mount4: |
| 3763 | iput(root); | 3841 | iput(root); |
| 3764 | sb->s_root = NULL; | 3842 | sb->s_root = NULL; |
| 3765 | ext4_msg(sb, KERN_ERR, "mount failed"); | 3843 | ext4_msg(sb, KERN_ERR, "mount failed"); |
| 3766 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 3844 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); |
| 3767 | failed_mount_wq: | 3845 | failed_mount_wq: |
| 3768 | ext4_release_system_zone(sb); | ||
| 3769 | if (sbi->s_journal) { | 3846 | if (sbi->s_journal) { |
| 3770 | jbd2_journal_destroy(sbi->s_journal); | 3847 | jbd2_journal_destroy(sbi->s_journal); |
| 3771 | sbi->s_journal = NULL; | 3848 | sbi->s_journal = NULL; |
| @@ -3774,10 +3851,10 @@ failed_mount3: | |||
| 3774 | del_timer(&sbi->s_err_report); | 3851 | del_timer(&sbi->s_err_report); |
| 3775 | if (sbi->s_flex_groups) | 3852 | if (sbi->s_flex_groups) |
| 3776 | ext4_kvfree(sbi->s_flex_groups); | 3853 | ext4_kvfree(sbi->s_flex_groups); |
| 3777 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 3854 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
| 3778 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 3855 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| 3779 | percpu_counter_destroy(&sbi->s_dirs_counter); | 3856 | percpu_counter_destroy(&sbi->s_dirs_counter); |
| 3780 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | 3857 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
| 3781 | if (sbi->s_mmp_tsk) | 3858 | if (sbi->s_mmp_tsk) |
| 3782 | kthread_stop(sbi->s_mmp_tsk); | 3859 | kthread_stop(sbi->s_mmp_tsk); |
| 3783 | failed_mount2: | 3860 | failed_mount2: |
| @@ -4064,7 +4141,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
| 4064 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; | 4141 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; |
| 4065 | int error = 0; | 4142 | int error = 0; |
| 4066 | 4143 | ||
| 4067 | if (!sbh) | 4144 | if (!sbh || block_device_ejected(sb)) |
| 4068 | return error; | 4145 | return error; |
| 4069 | if (buffer_write_io_error(sbh)) { | 4146 | if (buffer_write_io_error(sbh)) { |
| 4070 | /* | 4147 | /* |
| @@ -4100,8 +4177,9 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
| 4100 | else | 4177 | else |
| 4101 | es->s_kbytes_written = | 4178 | es->s_kbytes_written = |
| 4102 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | 4179 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); |
| 4103 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 4180 | ext4_free_blocks_count_set(es, |
| 4104 | &EXT4_SB(sb)->s_freeblocks_counter)); | 4181 | EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( |
| 4182 | &EXT4_SB(sb)->s_freeclusters_counter))); | ||
| 4105 | es->s_free_inodes_count = | 4183 | es->s_free_inodes_count = |
| 4106 | cpu_to_le32(percpu_counter_sum_positive( | 4184 | cpu_to_le32(percpu_counter_sum_positive( |
| 4107 | &EXT4_SB(sb)->s_freeinodes_counter)); | 4185 | &EXT4_SB(sb)->s_freeinodes_counter)); |
| @@ -4506,16 +4584,34 @@ restore_opts: | |||
| 4506 | return err; | 4584 | return err; |
| 4507 | } | 4585 | } |
| 4508 | 4586 | ||
| 4587 | /* | ||
| 4588 | * Note: calculating the overhead so we can be compatible with | ||
| 4589 | * historical BSD practice is quite difficult in the face of | ||
| 4590 | * clusters/bigalloc. This is because multiple metadata blocks from | ||
| 4591 | * different block group can end up in the same allocation cluster. | ||
| 4592 | * Calculating the exact overhead in the face of clustered allocation | ||
| 4593 | * requires either O(all block bitmaps) in memory or O(number of block | ||
| 4594 | * groups**2) in time. We will still calculate the superblock for | ||
| 4595 | * older file systems --- and if we come across with a bigalloc file | ||
| 4596 | * system with zero in s_overhead_clusters the estimate will be close to | ||
| 4597 | * correct especially for very large cluster sizes --- but for newer | ||
| 4598 | * file systems, it's better to calculate this figure once at mkfs | ||
| 4599 | * time, and store it in the superblock. If the superblock value is | ||
| 4600 | * present (even for non-bigalloc file systems), we will use it. | ||
| 4601 | */ | ||
| 4509 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | 4602 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) |
| 4510 | { | 4603 | { |
| 4511 | struct super_block *sb = dentry->d_sb; | 4604 | struct super_block *sb = dentry->d_sb; |
| 4512 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4605 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 4513 | struct ext4_super_block *es = sbi->s_es; | 4606 | struct ext4_super_block *es = sbi->s_es; |
| 4607 | struct ext4_group_desc *gdp; | ||
| 4514 | u64 fsid; | 4608 | u64 fsid; |
| 4515 | s64 bfree; | 4609 | s64 bfree; |
| 4516 | 4610 | ||
| 4517 | if (test_opt(sb, MINIX_DF)) { | 4611 | if (test_opt(sb, MINIX_DF)) { |
| 4518 | sbi->s_overhead_last = 0; | 4612 | sbi->s_overhead_last = 0; |
| 4613 | } else if (es->s_overhead_clusters) { | ||
| 4614 | sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters); | ||
| 4519 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { | 4615 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { |
| 4520 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); | 4616 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
| 4521 | ext4_fsblk_t overhead = 0; | 4617 | ext4_fsblk_t overhead = 0; |
| @@ -4530,24 +4626,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 4530 | * All of the blocks before first_data_block are | 4626 | * All of the blocks before first_data_block are |
| 4531 | * overhead | 4627 | * overhead |
| 4532 | */ | 4628 | */ |
| 4533 | overhead = le32_to_cpu(es->s_first_data_block); | 4629 | overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); |
| 4534 | 4630 | ||
| 4535 | /* | 4631 | /* |
| 4536 | * Add the overhead attributed to the superblock and | 4632 | * Add the overhead found in each block group |
| 4537 | * block group descriptors. If the sparse superblocks | ||
| 4538 | * feature is turned on, then not all groups have this. | ||
| 4539 | */ | 4633 | */ |
| 4540 | for (i = 0; i < ngroups; i++) { | 4634 | for (i = 0; i < ngroups; i++) { |
| 4541 | overhead += ext4_bg_has_super(sb, i) + | 4635 | gdp = ext4_get_group_desc(sb, i, NULL); |
| 4542 | ext4_bg_num_gdb(sb, i); | 4636 | overhead += ext4_num_overhead_clusters(sb, i, gdp); |
| 4543 | cond_resched(); | 4637 | cond_resched(); |
| 4544 | } | 4638 | } |
| 4545 | |||
| 4546 | /* | ||
| 4547 | * Every block group has an inode bitmap, a block | ||
| 4548 | * bitmap, and an inode table. | ||
| 4549 | */ | ||
| 4550 | overhead += ngroups * (2 + sbi->s_itb_per_group); | ||
| 4551 | sbi->s_overhead_last = overhead; | 4639 | sbi->s_overhead_last = overhead; |
| 4552 | smp_wmb(); | 4640 | smp_wmb(); |
| 4553 | sbi->s_blocks_last = ext4_blocks_count(es); | 4641 | sbi->s_blocks_last = ext4_blocks_count(es); |
| @@ -4555,11 +4643,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 4555 | 4643 | ||
| 4556 | buf->f_type = EXT4_SUPER_MAGIC; | 4644 | buf->f_type = EXT4_SUPER_MAGIC; |
| 4557 | buf->f_bsize = sb->s_blocksize; | 4645 | buf->f_bsize = sb->s_blocksize; |
| 4558 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 4646 | buf->f_blocks = (ext4_blocks_count(es) - |
| 4559 | bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - | 4647 | EXT4_C2B(sbi, sbi->s_overhead_last)); |
| 4560 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | 4648 | bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - |
| 4649 | percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); | ||
| 4561 | /* prevent underflow in case that few free space is available */ | 4650 | /* prevent underflow in case that few free space is available */ |
| 4562 | buf->f_bfree = max_t(s64, bfree, 0); | 4651 | buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0)); |
| 4563 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 4652 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
| 4564 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 4653 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
| 4565 | buf->f_bavail = 0; | 4654 | buf->f_bavail = 0; |
| @@ -4980,13 +5069,11 @@ static int __init ext4_init_fs(void) | |||
| 4980 | return err; | 5069 | return err; |
| 4981 | err = ext4_init_system_zone(); | 5070 | err = ext4_init_system_zone(); |
| 4982 | if (err) | 5071 | if (err) |
| 4983 | goto out7; | 5072 | goto out6; |
| 4984 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 5073 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
| 4985 | if (!ext4_kset) | 5074 | if (!ext4_kset) |
| 4986 | goto out6; | ||
| 4987 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
| 4988 | if (!ext4_proc_root) | ||
| 4989 | goto out5; | 5075 | goto out5; |
| 5076 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
| 4990 | 5077 | ||
| 4991 | err = ext4_init_feat_adverts(); | 5078 | err = ext4_init_feat_adverts(); |
| 4992 | if (err) | 5079 | if (err) |
| @@ -5022,12 +5109,12 @@ out2: | |||
| 5022 | out3: | 5109 | out3: |
| 5023 | ext4_exit_feat_adverts(); | 5110 | ext4_exit_feat_adverts(); |
| 5024 | out4: | 5111 | out4: |
| 5025 | remove_proc_entry("fs/ext4", NULL); | 5112 | if (ext4_proc_root) |
| 5026 | out5: | 5113 | remove_proc_entry("fs/ext4", NULL); |
| 5027 | kset_unregister(ext4_kset); | 5114 | kset_unregister(ext4_kset); |
| 5028 | out6: | 5115 | out5: |
| 5029 | ext4_exit_system_zone(); | 5116 | ext4_exit_system_zone(); |
| 5030 | out7: | 5117 | out6: |
| 5031 | ext4_exit_pageio(); | 5118 | ext4_exit_pageio(); |
| 5032 | return err; | 5119 | return err; |
| 5033 | } | 5120 | } |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c757adc97250..93a00d89a220 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -820,8 +820,14 @@ inserted: | |||
| 820 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 820 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
| 821 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 821 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
| 822 | 822 | ||
| 823 | /* | ||
| 824 | * take i_data_sem because we will test | ||
| 825 | * i_delalloc_reserved_flag in ext4_mb_new_blocks | ||
| 826 | */ | ||
| 827 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
| 823 | block = ext4_new_meta_blocks(handle, inode, goal, 0, | 828 | block = ext4_new_meta_blocks(handle, inode, goal, 0, |
| 824 | NULL, &error); | 829 | NULL, &error); |
| 830 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
| 825 | if (error) | 831 | if (error) |
| 826 | goto cleanup; | 832 | goto cleanup; |
| 827 | 833 | ||
| @@ -985,11 +991,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
| 985 | no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); | 991 | no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| 986 | ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); | 992 | ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| 987 | 993 | ||
| 988 | error = ext4_get_inode_loc(inode, &is.iloc); | 994 | error = ext4_reserve_inode_write(handle, inode, &is.iloc); |
| 989 | if (error) | ||
| 990 | goto cleanup; | ||
| 991 | |||
| 992 | error = ext4_journal_get_write_access(handle, is.iloc.bh); | ||
| 993 | if (error) | 995 | if (error) |
| 994 | goto cleanup; | 996 | goto cleanup; |
| 995 | 997 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 9fe061fb8779..fea8dd661d2b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
| @@ -1135,6 +1135,14 @@ static int journal_get_superblock(journal_t *journal) | |||
| 1135 | goto out; | 1135 | goto out; |
| 1136 | } | 1136 | } |
| 1137 | 1137 | ||
| 1138 | if (be32_to_cpu(sb->s_first) == 0 || | ||
| 1139 | be32_to_cpu(sb->s_first) >= journal->j_maxlen) { | ||
| 1140 | printk(KERN_WARNING | ||
| 1141 | "JBD: Invalid start block of journal: %u\n", | ||
| 1142 | be32_to_cpu(sb->s_first)); | ||
| 1143 | goto out; | ||
| 1144 | } | ||
| 1145 | |||
| 1138 | return 0; | 1146 | return 0; |
| 1139 | 1147 | ||
| 1140 | out: | 1148 | out: |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index eef6979821a4..68d704db787f 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -352,7 +352,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 352 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 352 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
| 353 | 353 | ||
| 354 | trace_jbd2_start_commit(journal, commit_transaction); | 354 | trace_jbd2_start_commit(journal, commit_transaction); |
| 355 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 355 | jbd_debug(1, "JBD2: starting commit of transaction %d\n", |
| 356 | commit_transaction->t_tid); | 356 | commit_transaction->t_tid); |
| 357 | 357 | ||
| 358 | write_lock(&journal->j_state_lock); | 358 | write_lock(&journal->j_state_lock); |
| @@ -427,7 +427,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 427 | __jbd2_journal_clean_checkpoint_list(journal); | 427 | __jbd2_journal_clean_checkpoint_list(journal); |
| 428 | spin_unlock(&journal->j_list_lock); | 428 | spin_unlock(&journal->j_list_lock); |
| 429 | 429 | ||
| 430 | jbd_debug (3, "JBD: commit phase 1\n"); | 430 | jbd_debug(3, "JBD2: commit phase 1\n"); |
| 431 | 431 | ||
| 432 | /* | 432 | /* |
| 433 | * Switch to a new revoke table. | 433 | * Switch to a new revoke table. |
| @@ -447,7 +447,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 447 | wake_up(&journal->j_wait_transaction_locked); | 447 | wake_up(&journal->j_wait_transaction_locked); |
| 448 | write_unlock(&journal->j_state_lock); | 448 | write_unlock(&journal->j_state_lock); |
| 449 | 449 | ||
| 450 | jbd_debug (3, "JBD: commit phase 2\n"); | 450 | jbd_debug(3, "JBD2: commit phase 2\n"); |
| 451 | 451 | ||
| 452 | /* | 452 | /* |
| 453 | * Now start flushing things to disk, in the order they appear | 453 | * Now start flushing things to disk, in the order they appear |
| @@ -462,7 +462,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 462 | WRITE_SYNC); | 462 | WRITE_SYNC); |
| 463 | blk_finish_plug(&plug); | 463 | blk_finish_plug(&plug); |
| 464 | 464 | ||
| 465 | jbd_debug(3, "JBD: commit phase 2\n"); | 465 | jbd_debug(3, "JBD2: commit phase 2\n"); |
| 466 | 466 | ||
| 467 | /* | 467 | /* |
| 468 | * Way to go: we have now written out all of the data for a | 468 | * Way to go: we have now written out all of the data for a |
| @@ -522,7 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 522 | 522 | ||
| 523 | J_ASSERT (bufs == 0); | 523 | J_ASSERT (bufs == 0); |
| 524 | 524 | ||
| 525 | jbd_debug(4, "JBD: get descriptor\n"); | 525 | jbd_debug(4, "JBD2: get descriptor\n"); |
| 526 | 526 | ||
| 527 | descriptor = jbd2_journal_get_descriptor_buffer(journal); | 527 | descriptor = jbd2_journal_get_descriptor_buffer(journal); |
| 528 | if (!descriptor) { | 528 | if (!descriptor) { |
| @@ -531,7 +531,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 531 | } | 531 | } |
| 532 | 532 | ||
| 533 | bh = jh2bh(descriptor); | 533 | bh = jh2bh(descriptor); |
| 534 | jbd_debug(4, "JBD: got buffer %llu (%p)\n", | 534 | jbd_debug(4, "JBD2: got buffer %llu (%p)\n", |
| 535 | (unsigned long long)bh->b_blocknr, bh->b_data); | 535 | (unsigned long long)bh->b_blocknr, bh->b_data); |
| 536 | header = (journal_header_t *)&bh->b_data[0]; | 536 | header = (journal_header_t *)&bh->b_data[0]; |
| 537 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 537 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
| @@ -625,7 +625,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 625 | commit_transaction->t_buffers == NULL || | 625 | commit_transaction->t_buffers == NULL || |
| 626 | space_left < tag_bytes + 16) { | 626 | space_left < tag_bytes + 16) { |
| 627 | 627 | ||
| 628 | jbd_debug(4, "JBD: Submit %d IOs\n", bufs); | 628 | jbd_debug(4, "JBD2: Submit %d IOs\n", bufs); |
| 629 | 629 | ||
| 630 | /* Write an end-of-descriptor marker before | 630 | /* Write an end-of-descriptor marker before |
| 631 | submitting the IOs. "tag" still points to | 631 | submitting the IOs. "tag" still points to |
| @@ -707,7 +707,7 @@ start_journal_io: | |||
| 707 | so we incur less scheduling load. | 707 | so we incur less scheduling load. |
| 708 | */ | 708 | */ |
| 709 | 709 | ||
| 710 | jbd_debug(3, "JBD: commit phase 3\n"); | 710 | jbd_debug(3, "JBD2: commit phase 3\n"); |
| 711 | 711 | ||
| 712 | /* | 712 | /* |
| 713 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 713 | * akpm: these are BJ_IO, and j_list_lock is not needed. |
| @@ -771,7 +771,7 @@ wait_for_iobuf: | |||
| 771 | 771 | ||
| 772 | J_ASSERT (commit_transaction->t_shadow_list == NULL); | 772 | J_ASSERT (commit_transaction->t_shadow_list == NULL); |
| 773 | 773 | ||
| 774 | jbd_debug(3, "JBD: commit phase 4\n"); | 774 | jbd_debug(3, "JBD2: commit phase 4\n"); |
| 775 | 775 | ||
| 776 | /* Here we wait for the revoke record and descriptor record buffers */ | 776 | /* Here we wait for the revoke record and descriptor record buffers */ |
| 777 | wait_for_ctlbuf: | 777 | wait_for_ctlbuf: |
| @@ -801,7 +801,7 @@ wait_for_iobuf: | |||
| 801 | if (err) | 801 | if (err) |
| 802 | jbd2_journal_abort(journal, err); | 802 | jbd2_journal_abort(journal, err); |
| 803 | 803 | ||
| 804 | jbd_debug(3, "JBD: commit phase 5\n"); | 804 | jbd_debug(3, "JBD2: commit phase 5\n"); |
| 805 | write_lock(&journal->j_state_lock); | 805 | write_lock(&journal->j_state_lock); |
| 806 | J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); | 806 | J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); |
| 807 | commit_transaction->t_state = T_COMMIT_JFLUSH; | 807 | commit_transaction->t_state = T_COMMIT_JFLUSH; |
| @@ -830,7 +830,7 @@ wait_for_iobuf: | |||
| 830 | transaction can be removed from any checkpoint list it was on | 830 | transaction can be removed from any checkpoint list it was on |
| 831 | before. */ | 831 | before. */ |
| 832 | 832 | ||
| 833 | jbd_debug(3, "JBD: commit phase 6\n"); | 833 | jbd_debug(3, "JBD2: commit phase 6\n"); |
| 834 | 834 | ||
| 835 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 835 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
| 836 | J_ASSERT(commit_transaction->t_buffers == NULL); | 836 | J_ASSERT(commit_transaction->t_buffers == NULL); |
| @@ -964,7 +964,7 @@ restart_loop: | |||
| 964 | 964 | ||
| 965 | /* Done with this transaction! */ | 965 | /* Done with this transaction! */ |
| 966 | 966 | ||
| 967 | jbd_debug(3, "JBD: commit phase 7\n"); | 967 | jbd_debug(3, "JBD2: commit phase 7\n"); |
| 968 | 968 | ||
| 969 | J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); | 969 | J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); |
| 970 | 970 | ||
| @@ -1039,7 +1039,7 @@ restart_loop: | |||
| 1039 | journal->j_commit_callback(journal, commit_transaction); | 1039 | journal->j_commit_callback(journal, commit_transaction); |
| 1040 | 1040 | ||
| 1041 | trace_jbd2_end_commit(journal, commit_transaction); | 1041 | trace_jbd2_end_commit(journal, commit_transaction); |
| 1042 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1042 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", |
| 1043 | journal->j_commit_sequence, journal->j_tail_sequence); | 1043 | journal->j_commit_sequence, journal->j_tail_sequence); |
| 1044 | if (to_free) | 1044 | if (to_free) |
| 1045 | kfree(commit_transaction); | 1045 | kfree(commit_transaction); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f24df13adc4e..0fa0123151d3 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -491,7 +491,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) | |||
| 491 | */ | 491 | */ |
| 492 | 492 | ||
| 493 | journal->j_commit_request = target; | 493 | journal->j_commit_request = target; |
| 494 | jbd_debug(1, "JBD: requesting commit %d/%d\n", | 494 | jbd_debug(1, "JBD2: requesting commit %d/%d\n", |
| 495 | journal->j_commit_request, | 495 | journal->j_commit_request, |
| 496 | journal->j_commit_sequence); | 496 | journal->j_commit_sequence); |
| 497 | wake_up(&journal->j_wait_commit); | 497 | wake_up(&journal->j_wait_commit); |
| @@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) | |||
| 500 | /* This should never happen, but if it does, preserve | 500 | /* This should never happen, but if it does, preserve |
| 501 | the evidence before kjournald goes into a loop and | 501 | the evidence before kjournald goes into a loop and |
| 502 | increments j_commit_sequence beyond all recognition. */ | 502 | increments j_commit_sequence beyond all recognition. */ |
| 503 | WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", | 503 | WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", |
| 504 | journal->j_commit_request, | 504 | journal->j_commit_request, |
| 505 | journal->j_commit_sequence, | 505 | journal->j_commit_sequence, |
| 506 | target, journal->j_running_transaction ? | 506 | target, journal->j_running_transaction ? |
| @@ -645,7 +645,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
| 645 | } | 645 | } |
| 646 | #endif | 646 | #endif |
| 647 | while (tid_gt(tid, journal->j_commit_sequence)) { | 647 | while (tid_gt(tid, journal->j_commit_sequence)) { |
| 648 | jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", | 648 | jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", |
| 649 | tid, journal->j_commit_sequence); | 649 | tid, journal->j_commit_sequence); |
| 650 | wake_up(&journal->j_wait_commit); | 650 | wake_up(&journal->j_wait_commit); |
| 651 | read_unlock(&journal->j_state_lock); | 651 | read_unlock(&journal->j_state_lock); |
| @@ -1093,7 +1093,7 @@ static int journal_reset(journal_t *journal) | |||
| 1093 | first = be32_to_cpu(sb->s_first); | 1093 | first = be32_to_cpu(sb->s_first); |
| 1094 | last = be32_to_cpu(sb->s_maxlen); | 1094 | last = be32_to_cpu(sb->s_maxlen); |
| 1095 | if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { | 1095 | if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { |
| 1096 | printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", | 1096 | printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n", |
| 1097 | first, last); | 1097 | first, last); |
| 1098 | journal_fail_superblock(journal); | 1098 | journal_fail_superblock(journal); |
| 1099 | return -EINVAL; | 1099 | return -EINVAL; |
| @@ -1139,7 +1139,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
| 1139 | */ | 1139 | */ |
| 1140 | if (sb->s_start == 0 && journal->j_tail_sequence == | 1140 | if (sb->s_start == 0 && journal->j_tail_sequence == |
| 1141 | journal->j_transaction_sequence) { | 1141 | journal->j_transaction_sequence) { |
| 1142 | jbd_debug(1,"JBD: Skipping superblock update on recovered sb " | 1142 | jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " |
| 1143 | "(start %ld, seq %d, errno %d)\n", | 1143 | "(start %ld, seq %d, errno %d)\n", |
| 1144 | journal->j_tail, journal->j_tail_sequence, | 1144 | journal->j_tail, journal->j_tail_sequence, |
| 1145 | journal->j_errno); | 1145 | journal->j_errno); |
| @@ -1163,7 +1163,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
| 1163 | } | 1163 | } |
| 1164 | 1164 | ||
| 1165 | read_lock(&journal->j_state_lock); | 1165 | read_lock(&journal->j_state_lock); |
| 1166 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 1166 | jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", |
| 1167 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1167 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
| 1168 | 1168 | ||
| 1169 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 1169 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
| @@ -1216,8 +1216,8 @@ static int journal_get_superblock(journal_t *journal) | |||
| 1216 | ll_rw_block(READ, 1, &bh); | 1216 | ll_rw_block(READ, 1, &bh); |
| 1217 | wait_on_buffer(bh); | 1217 | wait_on_buffer(bh); |
| 1218 | if (!buffer_uptodate(bh)) { | 1218 | if (!buffer_uptodate(bh)) { |
| 1219 | printk (KERN_ERR | 1219 | printk(KERN_ERR |
| 1220 | "JBD: IO error reading journal superblock\n"); | 1220 | "JBD2: IO error reading journal superblock\n"); |
| 1221 | goto out; | 1221 | goto out; |
| 1222 | } | 1222 | } |
| 1223 | } | 1223 | } |
| @@ -1228,7 +1228,7 @@ static int journal_get_superblock(journal_t *journal) | |||
| 1228 | 1228 | ||
| 1229 | if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || | 1229 | if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || |
| 1230 | sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { | 1230 | sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { |
| 1231 | printk(KERN_WARNING "JBD: no valid journal superblock found\n"); | 1231 | printk(KERN_WARNING "JBD2: no valid journal superblock found\n"); |
| 1232 | goto out; | 1232 | goto out; |
| 1233 | } | 1233 | } |
| 1234 | 1234 | ||
| @@ -1240,14 +1240,22 @@ static int journal_get_superblock(journal_t *journal) | |||
| 1240 | journal->j_format_version = 2; | 1240 | journal->j_format_version = 2; |
| 1241 | break; | 1241 | break; |
| 1242 | default: | 1242 | default: |
| 1243 | printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); | 1243 | printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n"); |
| 1244 | goto out; | 1244 | goto out; |
| 1245 | } | 1245 | } |
| 1246 | 1246 | ||
| 1247 | if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) | 1247 | if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) |
| 1248 | journal->j_maxlen = be32_to_cpu(sb->s_maxlen); | 1248 | journal->j_maxlen = be32_to_cpu(sb->s_maxlen); |
| 1249 | else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { | 1249 | else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { |
| 1250 | printk (KERN_WARNING "JBD: journal file too short\n"); | 1250 | printk(KERN_WARNING "JBD2: journal file too short\n"); |
| 1251 | goto out; | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | if (be32_to_cpu(sb->s_first) == 0 || | ||
| 1255 | be32_to_cpu(sb->s_first) >= journal->j_maxlen) { | ||
| 1256 | printk(KERN_WARNING | ||
| 1257 | "JBD2: Invalid start block of journal: %u\n", | ||
| 1258 | be32_to_cpu(sb->s_first)); | ||
| 1251 | goto out; | 1259 | goto out; |
| 1252 | } | 1260 | } |
| 1253 | 1261 | ||
| @@ -1310,8 +1318,8 @@ int jbd2_journal_load(journal_t *journal) | |||
| 1310 | ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || | 1318 | ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || |
| 1311 | (sb->s_feature_incompat & | 1319 | (sb->s_feature_incompat & |
| 1312 | ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { | 1320 | ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { |
| 1313 | printk (KERN_WARNING | 1321 | printk(KERN_WARNING |
| 1314 | "JBD: Unrecognised features on journal\n"); | 1322 | "JBD2: Unrecognised features on journal\n"); |
| 1315 | return -EINVAL; | 1323 | return -EINVAL; |
| 1316 | } | 1324 | } |
| 1317 | } | 1325 | } |
| @@ -1346,7 +1354,7 @@ int jbd2_journal_load(journal_t *journal) | |||
| 1346 | return 0; | 1354 | return 0; |
| 1347 | 1355 | ||
| 1348 | recovery_error: | 1356 | recovery_error: |
| 1349 | printk (KERN_WARNING "JBD: recovery failed\n"); | 1357 | printk(KERN_WARNING "JBD2: recovery failed\n"); |
| 1350 | return -EIO; | 1358 | return -EIO; |
| 1351 | } | 1359 | } |
| 1352 | 1360 | ||
| @@ -1577,7 +1585,7 @@ static int journal_convert_superblock_v1(journal_t *journal, | |||
| 1577 | struct buffer_head *bh; | 1585 | struct buffer_head *bh; |
| 1578 | 1586 | ||
| 1579 | printk(KERN_WARNING | 1587 | printk(KERN_WARNING |
| 1580 | "JBD: Converting superblock from version 1 to 2.\n"); | 1588 | "JBD2: Converting superblock from version 1 to 2.\n"); |
| 1581 | 1589 | ||
| 1582 | /* Pre-initialise new fields to zero */ | 1590 | /* Pre-initialise new fields to zero */ |
| 1583 | offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); | 1591 | offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); |
| @@ -1694,7 +1702,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
| 1694 | if (!journal->j_tail) | 1702 | if (!journal->j_tail) |
| 1695 | goto no_recovery; | 1703 | goto no_recovery; |
| 1696 | 1704 | ||
| 1697 | printk (KERN_WARNING "JBD: %s recovery information on journal\n", | 1705 | printk(KERN_WARNING "JBD2: %s recovery information on journal\n", |
| 1698 | write ? "Clearing" : "Ignoring"); | 1706 | write ? "Clearing" : "Ignoring"); |
| 1699 | 1707 | ||
| 1700 | err = jbd2_journal_skip_recovery(journal); | 1708 | err = jbd2_journal_skip_recovery(journal); |
| @@ -2020,7 +2028,7 @@ static int journal_init_jbd2_journal_head_cache(void) | |||
| 2020 | retval = 0; | 2028 | retval = 0; |
| 2021 | if (!jbd2_journal_head_cache) { | 2029 | if (!jbd2_journal_head_cache) { |
| 2022 | retval = -ENOMEM; | 2030 | retval = -ENOMEM; |
| 2023 | printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); | 2031 | printk(KERN_EMERG "JBD2: no memory for journal_head cache\n"); |
| 2024 | } | 2032 | } |
| 2025 | return retval; | 2033 | return retval; |
| 2026 | } | 2034 | } |
| @@ -2383,7 +2391,7 @@ static void __exit journal_exit(void) | |||
| 2383 | #ifdef CONFIG_JBD2_DEBUG | 2391 | #ifdef CONFIG_JBD2_DEBUG |
| 2384 | int n = atomic_read(&nr_journal_heads); | 2392 | int n = atomic_read(&nr_journal_heads); |
| 2385 | if (n) | 2393 | if (n) |
| 2386 | printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); | 2394 | printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); |
| 2387 | #endif | 2395 | #endif |
| 2388 | jbd2_remove_debugfs_entry(); | 2396 | jbd2_remove_debugfs_entry(); |
| 2389 | jbd2_remove_jbd_stats_proc_entry(); | 2397 | jbd2_remove_jbd_stats_proc_entry(); |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 1cad869494f0..da6d7baf1390 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
| @@ -89,7 +89,7 @@ static int do_readahead(journal_t *journal, unsigned int start) | |||
| 89 | err = jbd2_journal_bmap(journal, next, &blocknr); | 89 | err = jbd2_journal_bmap(journal, next, &blocknr); |
| 90 | 90 | ||
| 91 | if (err) { | 91 | if (err) { |
| 92 | printk (KERN_ERR "JBD: bad block at offset %u\n", | 92 | printk(KERN_ERR "JBD2: bad block at offset %u\n", |
| 93 | next); | 93 | next); |
| 94 | goto failed; | 94 | goto failed; |
| 95 | } | 95 | } |
| @@ -138,14 +138,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal, | |||
| 138 | *bhp = NULL; | 138 | *bhp = NULL; |
| 139 | 139 | ||
| 140 | if (offset >= journal->j_maxlen) { | 140 | if (offset >= journal->j_maxlen) { |
| 141 | printk(KERN_ERR "JBD: corrupted journal superblock\n"); | 141 | printk(KERN_ERR "JBD2: corrupted journal superblock\n"); |
| 142 | return -EIO; | 142 | return -EIO; |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | err = jbd2_journal_bmap(journal, offset, &blocknr); | 145 | err = jbd2_journal_bmap(journal, offset, &blocknr); |
| 146 | 146 | ||
| 147 | if (err) { | 147 | if (err) { |
| 148 | printk (KERN_ERR "JBD: bad block at offset %u\n", | 148 | printk(KERN_ERR "JBD2: bad block at offset %u\n", |
| 149 | offset); | 149 | offset); |
| 150 | return err; | 150 | return err; |
| 151 | } | 151 | } |
| @@ -163,7 +163,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, | |||
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | if (!buffer_uptodate(bh)) { | 165 | if (!buffer_uptodate(bh)) { |
| 166 | printk (KERN_ERR "JBD: Failed to read block at offset %u\n", | 166 | printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", |
| 167 | offset); | 167 | offset); |
| 168 | brelse(bh); | 168 | brelse(bh); |
| 169 | return -EIO; | 169 | return -EIO; |
| @@ -251,10 +251,10 @@ int jbd2_journal_recover(journal_t *journal) | |||
| 251 | if (!err) | 251 | if (!err) |
| 252 | err = do_one_pass(journal, &info, PASS_REPLAY); | 252 | err = do_one_pass(journal, &info, PASS_REPLAY); |
| 253 | 253 | ||
| 254 | jbd_debug(1, "JBD: recovery, exit status %d, " | 254 | jbd_debug(1, "JBD2: recovery, exit status %d, " |
| 255 | "recovered transactions %u to %u\n", | 255 | "recovered transactions %u to %u\n", |
| 256 | err, info.start_transaction, info.end_transaction); | 256 | err, info.start_transaction, info.end_transaction); |
| 257 | jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", | 257 | jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", |
| 258 | info.nr_replays, info.nr_revoke_hits, info.nr_revokes); | 258 | info.nr_replays, info.nr_revoke_hits, info.nr_revokes); |
| 259 | 259 | ||
| 260 | /* Restart the log at the next transaction ID, thus invalidating | 260 | /* Restart the log at the next transaction ID, thus invalidating |
| @@ -293,14 +293,14 @@ int jbd2_journal_skip_recovery(journal_t *journal) | |||
| 293 | err = do_one_pass(journal, &info, PASS_SCAN); | 293 | err = do_one_pass(journal, &info, PASS_SCAN); |
| 294 | 294 | ||
| 295 | if (err) { | 295 | if (err) { |
| 296 | printk(KERN_ERR "JBD: error %d scanning journal\n", err); | 296 | printk(KERN_ERR "JBD2: error %d scanning journal\n", err); |
| 297 | ++journal->j_transaction_sequence; | 297 | ++journal->j_transaction_sequence; |
| 298 | } else { | 298 | } else { |
| 299 | #ifdef CONFIG_JBD2_DEBUG | 299 | #ifdef CONFIG_JBD2_DEBUG |
| 300 | int dropped = info.end_transaction - | 300 | int dropped = info.end_transaction - |
| 301 | be32_to_cpu(journal->j_superblock->s_sequence); | 301 | be32_to_cpu(journal->j_superblock->s_sequence); |
| 302 | jbd_debug(1, | 302 | jbd_debug(1, |
| 303 | "JBD: ignoring %d transaction%s from the journal.\n", | 303 | "JBD2: ignoring %d transaction%s from the journal.\n", |
| 304 | dropped, (dropped == 1) ? "" : "s"); | 304 | dropped, (dropped == 1) ? "" : "s"); |
| 305 | #endif | 305 | #endif |
| 306 | journal->j_transaction_sequence = ++info.end_transaction; | 306 | journal->j_transaction_sequence = ++info.end_transaction; |
| @@ -338,7 +338,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, | |||
| 338 | wrap(journal, *next_log_block); | 338 | wrap(journal, *next_log_block); |
| 339 | err = jread(&obh, journal, io_block); | 339 | err = jread(&obh, journal, io_block); |
| 340 | if (err) { | 340 | if (err) { |
| 341 | printk(KERN_ERR "JBD: IO error %d recovering block " | 341 | printk(KERN_ERR "JBD2: IO error %d recovering block " |
| 342 | "%lu in log\n", err, io_block); | 342 | "%lu in log\n", err, io_block); |
| 343 | return 1; | 343 | return 1; |
| 344 | } else { | 344 | } else { |
| @@ -411,7 +411,7 @@ static int do_one_pass(journal_t *journal, | |||
| 411 | * either the next descriptor block or the final commit | 411 | * either the next descriptor block or the final commit |
| 412 | * record. */ | 412 | * record. */ |
| 413 | 413 | ||
| 414 | jbd_debug(3, "JBD: checking block %ld\n", next_log_block); | 414 | jbd_debug(3, "JBD2: checking block %ld\n", next_log_block); |
| 415 | err = jread(&bh, journal, next_log_block); | 415 | err = jread(&bh, journal, next_log_block); |
| 416 | if (err) | 416 | if (err) |
| 417 | goto failed; | 417 | goto failed; |
| @@ -491,8 +491,8 @@ static int do_one_pass(journal_t *journal, | |||
| 491 | /* Recover what we can, but | 491 | /* Recover what we can, but |
| 492 | * report failure at the end. */ | 492 | * report failure at the end. */ |
| 493 | success = err; | 493 | success = err; |
| 494 | printk (KERN_ERR | 494 | printk(KERN_ERR |
| 495 | "JBD: IO error %d recovering " | 495 | "JBD2: IO error %d recovering " |
| 496 | "block %ld in log\n", | 496 | "block %ld in log\n", |
| 497 | err, io_block); | 497 | err, io_block); |
| 498 | } else { | 498 | } else { |
| @@ -520,7 +520,7 @@ static int do_one_pass(journal_t *journal, | |||
| 520 | journal->j_blocksize); | 520 | journal->j_blocksize); |
| 521 | if (nbh == NULL) { | 521 | if (nbh == NULL) { |
| 522 | printk(KERN_ERR | 522 | printk(KERN_ERR |
| 523 | "JBD: Out of memory " | 523 | "JBD2: Out of memory " |
| 524 | "during recovery.\n"); | 524 | "during recovery.\n"); |
| 525 | err = -ENOMEM; | 525 | err = -ENOMEM; |
| 526 | brelse(bh); | 526 | brelse(bh); |
| @@ -689,7 +689,7 @@ static int do_one_pass(journal_t *journal, | |||
| 689 | /* It's really bad news if different passes end up at | 689 | /* It's really bad news if different passes end up at |
| 690 | * different places (but possible due to IO errors). */ | 690 | * different places (but possible due to IO errors). */ |
| 691 | if (info->end_transaction != next_commit_ID) { | 691 | if (info->end_transaction != next_commit_ID) { |
| 692 | printk (KERN_ERR "JBD: recovery pass %d ended at " | 692 | printk(KERN_ERR "JBD2: recovery pass %d ended at " |
| 693 | "transaction %u, expected %u\n", | 693 | "transaction %u, expected %u\n", |
| 694 | pass, next_commit_ID, info->end_transaction); | 694 | pass, next_commit_ID, info->end_transaction); |
| 695 | if (!success) | 695 | if (!success) |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 2d7109414cdd..a0e41a4c080e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
| 28 | #include <linux/hrtimer.h> | 28 | #include <linux/hrtimer.h> |
| 29 | #include <linux/backing-dev.h> | 29 | #include <linux/backing-dev.h> |
| 30 | #include <linux/bug.h> | ||
| 30 | #include <linux/module.h> | 31 | #include <linux/module.h> |
| 31 | 32 | ||
| 32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 33 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
| @@ -115,7 +116,7 @@ static inline void update_t_max_wait(transaction_t *transaction, | |||
| 115 | */ | 116 | */ |
| 116 | 117 | ||
| 117 | static int start_this_handle(journal_t *journal, handle_t *handle, | 118 | static int start_this_handle(journal_t *journal, handle_t *handle, |
| 118 | int gfp_mask) | 119 | gfp_t gfp_mask) |
| 119 | { | 120 | { |
| 120 | transaction_t *transaction, *new_transaction = NULL; | 121 | transaction_t *transaction, *new_transaction = NULL; |
| 121 | tid_t tid; | 122 | tid_t tid; |
| @@ -124,7 +125,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
| 124 | unsigned long ts = jiffies; | 125 | unsigned long ts = jiffies; |
| 125 | 126 | ||
| 126 | if (nblocks > journal->j_max_transaction_buffers) { | 127 | if (nblocks > journal->j_max_transaction_buffers) { |
| 127 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", | 128 | printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", |
| 128 | current->comm, nblocks, | 129 | current->comm, nblocks, |
| 129 | journal->j_max_transaction_buffers); | 130 | journal->j_max_transaction_buffers); |
| 130 | return -ENOSPC; | 131 | return -ENOSPC; |
| @@ -320,7 +321,7 @@ static handle_t *new_handle(int nblocks) | |||
| 320 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value | 321 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value |
| 321 | * on failure. | 322 | * on failure. |
| 322 | */ | 323 | */ |
| 323 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) | 324 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask) |
| 324 | { | 325 | { |
| 325 | handle_t *handle = journal_current_handle(); | 326 | handle_t *handle = journal_current_handle(); |
| 326 | int err; | 327 | int err; |
| @@ -443,7 +444,7 @@ out: | |||
| 443 | * transaction capabable of guaranteeing the requested number of | 444 | * transaction capabable of guaranteeing the requested number of |
| 444 | * credits. | 445 | * credits. |
| 445 | */ | 446 | */ |
| 446 | int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) | 447 | int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) |
| 447 | { | 448 | { |
| 448 | transaction_t *transaction = handle->h_transaction; | 449 | transaction_t *transaction = handle->h_transaction; |
| 449 | journal_t *journal = transaction->t_journal; | 450 | journal_t *journal = transaction->t_journal; |
| @@ -563,7 +564,7 @@ static void warn_dirty_buffer(struct buffer_head *bh) | |||
| 563 | char b[BDEVNAME_SIZE]; | 564 | char b[BDEVNAME_SIZE]; |
| 564 | 565 | ||
| 565 | printk(KERN_WARNING | 566 | printk(KERN_WARNING |
| 566 | "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " | 567 | "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " |
| 567 | "There's a risk of filesystem corruption in case of system " | 568 | "There's a risk of filesystem corruption in case of system " |
| 568 | "crash.\n", | 569 | "crash.\n", |
| 569 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); | 570 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); |
| @@ -1049,6 +1050,10 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, | |||
| 1049 | * mark dirty metadata which needs to be journaled as part of the current | 1050 | * mark dirty metadata which needs to be journaled as part of the current |
| 1050 | * transaction. | 1051 | * transaction. |
| 1051 | * | 1052 | * |
| 1053 | * The buffer must have previously had jbd2_journal_get_write_access() | ||
| 1054 | * called so that it has a valid journal_head attached to the buffer | ||
| 1055 | * head. | ||
| 1056 | * | ||
| 1052 | * The buffer is placed on the transaction's metadata list and is marked | 1057 | * The buffer is placed on the transaction's metadata list and is marked |
| 1053 | * as belonging to the transaction. | 1058 | * as belonging to the transaction. |
| 1054 | * | 1059 | * |
| @@ -1065,11 +1070,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
| 1065 | transaction_t *transaction = handle->h_transaction; | 1070 | transaction_t *transaction = handle->h_transaction; |
| 1066 | journal_t *journal = transaction->t_journal; | 1071 | journal_t *journal = transaction->t_journal; |
| 1067 | struct journal_head *jh = bh2jh(bh); | 1072 | struct journal_head *jh = bh2jh(bh); |
| 1073 | int ret = 0; | ||
| 1068 | 1074 | ||
| 1069 | jbd_debug(5, "journal_head %p\n", jh); | 1075 | jbd_debug(5, "journal_head %p\n", jh); |
| 1070 | JBUFFER_TRACE(jh, "entry"); | 1076 | JBUFFER_TRACE(jh, "entry"); |
| 1071 | if (is_handle_aborted(handle)) | 1077 | if (is_handle_aborted(handle)) |
| 1072 | goto out; | 1078 | goto out; |
| 1079 | if (!buffer_jbd(bh)) { | ||
| 1080 | ret = -EUCLEAN; | ||
| 1081 | goto out; | ||
| 1082 | } | ||
| 1073 | 1083 | ||
| 1074 | jbd_lock_bh_state(bh); | 1084 | jbd_lock_bh_state(bh); |
| 1075 | 1085 | ||
| @@ -1093,8 +1103,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
| 1093 | */ | 1103 | */ |
| 1094 | if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { | 1104 | if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { |
| 1095 | JBUFFER_TRACE(jh, "fastpath"); | 1105 | JBUFFER_TRACE(jh, "fastpath"); |
| 1096 | J_ASSERT_JH(jh, jh->b_transaction == | 1106 | if (unlikely(jh->b_transaction != |
| 1097 | journal->j_running_transaction); | 1107 | journal->j_running_transaction)) { |
| 1108 | printk(KERN_EMERG "JBD: %s: " | ||
| 1109 | "jh->b_transaction (%llu, %p, %u) != " | ||
| 1110 | "journal->j_running_transaction (%p, %u)", | ||
| 1111 | journal->j_devname, | ||
| 1112 | (unsigned long long) bh->b_blocknr, | ||
| 1113 | jh->b_transaction, | ||
| 1114 | jh->b_transaction ? jh->b_transaction->t_tid : 0, | ||
| 1115 | journal->j_running_transaction, | ||
| 1116 | journal->j_running_transaction ? | ||
| 1117 | journal->j_running_transaction->t_tid : 0); | ||
| 1118 | ret = -EINVAL; | ||
| 1119 | } | ||
| 1098 | goto out_unlock_bh; | 1120 | goto out_unlock_bh; |
| 1099 | } | 1121 | } |
| 1100 | 1122 | ||
| @@ -1108,9 +1130,32 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
| 1108 | */ | 1130 | */ |
| 1109 | if (jh->b_transaction != transaction) { | 1131 | if (jh->b_transaction != transaction) { |
| 1110 | JBUFFER_TRACE(jh, "already on other transaction"); | 1132 | JBUFFER_TRACE(jh, "already on other transaction"); |
| 1111 | J_ASSERT_JH(jh, jh->b_transaction == | 1133 | if (unlikely(jh->b_transaction != |
| 1112 | journal->j_committing_transaction); | 1134 | journal->j_committing_transaction)) { |
| 1113 | J_ASSERT_JH(jh, jh->b_next_transaction == transaction); | 1135 | printk(KERN_EMERG "JBD: %s: " |
| 1136 | "jh->b_transaction (%llu, %p, %u) != " | ||
| 1137 | "journal->j_committing_transaction (%p, %u)", | ||
| 1138 | journal->j_devname, | ||
| 1139 | (unsigned long long) bh->b_blocknr, | ||
| 1140 | jh->b_transaction, | ||
| 1141 | jh->b_transaction ? jh->b_transaction->t_tid : 0, | ||
| 1142 | journal->j_committing_transaction, | ||
| 1143 | journal->j_committing_transaction ? | ||
| 1144 | journal->j_committing_transaction->t_tid : 0); | ||
| 1145 | ret = -EINVAL; | ||
| 1146 | } | ||
| 1147 | if (unlikely(jh->b_next_transaction != transaction)) { | ||
| 1148 | printk(KERN_EMERG "JBD: %s: " | ||
| 1149 | "jh->b_next_transaction (%llu, %p, %u) != " | ||
| 1150 | "transaction (%p, %u)", | ||
| 1151 | journal->j_devname, | ||
| 1152 | (unsigned long long) bh->b_blocknr, | ||
| 1153 | jh->b_next_transaction, | ||
| 1154 | jh->b_next_transaction ? | ||
| 1155 | jh->b_next_transaction->t_tid : 0, | ||
| 1156 | transaction, transaction->t_tid); | ||
| 1157 | ret = -EINVAL; | ||
| 1158 | } | ||
| 1114 | /* And this case is illegal: we can't reuse another | 1159 | /* And this case is illegal: we can't reuse another |
| 1115 | * transaction's data buffer, ever. */ | 1160 | * transaction's data buffer, ever. */ |
| 1116 | goto out_unlock_bh; | 1161 | goto out_unlock_bh; |
| @@ -1127,7 +1172,8 @@ out_unlock_bh: | |||
| 1127 | jbd_unlock_bh_state(bh); | 1172 | jbd_unlock_bh_state(bh); |
| 1128 | out: | 1173 | out: |
| 1129 | JBUFFER_TRACE(jh, "exit"); | 1174 | JBUFFER_TRACE(jh, "exit"); |
| 1130 | return 0; | 1175 | WARN_ON(ret); /* All errors are bugs, so dump the stack */ |
| 1176 | return ret; | ||
| 1131 | } | 1177 | } |
| 1132 | 1178 | ||
| 1133 | /* | 1179 | /* |
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 53792bf36c71..ce1b719e8bd4 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h | |||
| @@ -197,8 +197,8 @@ struct ext2_group_desc | |||
| 197 | 197 | ||
| 198 | /* Flags that should be inherited by new inodes from their parent. */ | 198 | /* Flags that should be inherited by new inodes from their parent. */ |
| 199 | #define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\ | 199 | #define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\ |
| 200 | EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\ | 200 | EXT2_SYNC_FL | EXT2_NODUMP_FL |\ |
| 201 | EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\ | 201 | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL |\ |
| 202 | EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ | 202 | EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ |
| 203 | EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) | 203 | EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) |
| 204 | 204 | ||
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index f5fceffd4cfe..dec99116a0e4 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h | |||
| @@ -180,8 +180,8 @@ struct ext3_group_desc | |||
| 180 | 180 | ||
| 181 | /* Flags that should be inherited by new inodes from their parent. */ | 181 | /* Flags that should be inherited by new inodes from their parent. */ |
| 182 | #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\ | 182 | #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\ |
| 183 | EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\ | 183 | EXT3_SYNC_FL | EXT3_NODUMP_FL |\ |
| 184 | EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\ | 184 | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\ |
| 185 | EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ | 185 | EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ |
| 186 | EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) | 186 | EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) |
| 187 | 187 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index 7a049fd2aa4c..78af9385f415 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -770,12 +770,13 @@ struct inode { | |||
| 770 | unsigned long i_ino; | 770 | unsigned long i_ino; |
| 771 | unsigned int i_nlink; | 771 | unsigned int i_nlink; |
| 772 | dev_t i_rdev; | 772 | dev_t i_rdev; |
| 773 | loff_t i_size; | ||
| 774 | struct timespec i_atime; | 773 | struct timespec i_atime; |
| 775 | struct timespec i_mtime; | 774 | struct timespec i_mtime; |
| 776 | struct timespec i_ctime; | 775 | struct timespec i_ctime; |
| 777 | unsigned int i_blkbits; | 776 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ |
| 777 | unsigned short i_bytes; | ||
| 778 | blkcnt_t i_blocks; | 778 | blkcnt_t i_blocks; |
| 779 | loff_t i_size; | ||
| 779 | 780 | ||
| 780 | #ifdef __NEED_I_SIZE_ORDERED | 781 | #ifdef __NEED_I_SIZE_ORDERED |
| 781 | seqcount_t i_size_seqcount; | 782 | seqcount_t i_size_seqcount; |
| @@ -783,7 +784,6 @@ struct inode { | |||
| 783 | 784 | ||
| 784 | /* Misc */ | 785 | /* Misc */ |
| 785 | unsigned long i_state; | 786 | unsigned long i_state; |
| 786 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ | ||
| 787 | struct mutex i_mutex; | 787 | struct mutex i_mutex; |
| 788 | 788 | ||
| 789 | unsigned long dirtied_when; /* jiffies of first dirtying */ | 789 | unsigned long dirtied_when; /* jiffies of first dirtying */ |
| @@ -797,9 +797,10 @@ struct inode { | |||
| 797 | struct rcu_head i_rcu; | 797 | struct rcu_head i_rcu; |
| 798 | }; | 798 | }; |
| 799 | atomic_t i_count; | 799 | atomic_t i_count; |
| 800 | unsigned int i_blkbits; | ||
| 800 | u64 i_version; | 801 | u64 i_version; |
| 801 | unsigned short i_bytes; | ||
| 802 | atomic_t i_dio_count; | 802 | atomic_t i_dio_count; |
| 803 | atomic_t i_writecount; | ||
| 803 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ | 804 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ |
| 804 | struct file_lock *i_flock; | 805 | struct file_lock *i_flock; |
| 805 | struct address_space i_data; | 806 | struct address_space i_data; |
| @@ -823,7 +824,6 @@ struct inode { | |||
| 823 | #ifdef CONFIG_IMA | 824 | #ifdef CONFIG_IMA |
| 824 | atomic_t i_readcount; /* struct files open RO */ | 825 | atomic_t i_readcount; /* struct files open RO */ |
| 825 | #endif | 826 | #endif |
| 826 | atomic_t i_writecount; | ||
| 827 | void *i_private; /* fs or device private pointer */ | 827 | void *i_private; /* fs or device private pointer */ |
| 828 | }; | 828 | }; |
| 829 | 829 | ||
diff --git a/include/linux/jbd.h b/include/linux/jbd.h index e6a5e34bed4f..c7acdde3243d 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h | |||
| @@ -244,6 +244,7 @@ typedef struct journal_superblock_s | |||
| 244 | 244 | ||
| 245 | #include <linux/fs.h> | 245 | #include <linux/fs.h> |
| 246 | #include <linux/sched.h> | 246 | #include <linux/sched.h> |
| 247 | #include <linux/jbd_common.h> | ||
| 247 | 248 | ||
| 248 | #define J_ASSERT(assert) BUG_ON(!(assert)) | 249 | #define J_ASSERT(assert) BUG_ON(!(assert)) |
| 249 | 250 | ||
| @@ -270,69 +271,6 @@ typedef struct journal_superblock_s | |||
| 270 | #define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) | 271 | #define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) |
| 271 | #endif | 272 | #endif |
| 272 | 273 | ||
| 273 | enum jbd_state_bits { | ||
| 274 | BH_JBD /* Has an attached ext3 journal_head */ | ||
| 275 | = BH_PrivateStart, | ||
| 276 | BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ | ||
| 277 | BH_Freed, /* Has been freed (truncated) */ | ||
| 278 | BH_Revoked, /* Has been revoked from the log */ | ||
| 279 | BH_RevokeValid, /* Revoked flag is valid */ | ||
| 280 | BH_JBDDirty, /* Is dirty but journaled */ | ||
| 281 | BH_State, /* Pins most journal_head state */ | ||
| 282 | BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ | ||
| 283 | BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ | ||
| 284 | }; | ||
| 285 | |||
| 286 | BUFFER_FNS(JBD, jbd) | ||
| 287 | BUFFER_FNS(JWrite, jwrite) | ||
| 288 | BUFFER_FNS(JBDDirty, jbddirty) | ||
| 289 | TAS_BUFFER_FNS(JBDDirty, jbddirty) | ||
| 290 | BUFFER_FNS(Revoked, revoked) | ||
| 291 | TAS_BUFFER_FNS(Revoked, revoked) | ||
| 292 | BUFFER_FNS(RevokeValid, revokevalid) | ||
| 293 | TAS_BUFFER_FNS(RevokeValid, revokevalid) | ||
| 294 | BUFFER_FNS(Freed, freed) | ||
| 295 | |||
| 296 | static inline struct buffer_head *jh2bh(struct journal_head *jh) | ||
| 297 | { | ||
| 298 | return jh->b_bh; | ||
| 299 | } | ||
| 300 | |||
| 301 | static inline struct journal_head *bh2jh(struct buffer_head *bh) | ||
| 302 | { | ||
| 303 | return bh->b_private; | ||
| 304 | } | ||
| 305 | |||
| 306 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | ||
| 307 | { | ||
| 308 | bit_spin_lock(BH_State, &bh->b_state); | ||
| 309 | } | ||
| 310 | |||
| 311 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | ||
| 312 | { | ||
| 313 | return bit_spin_trylock(BH_State, &bh->b_state); | ||
| 314 | } | ||
| 315 | |||
| 316 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | ||
| 317 | { | ||
| 318 | return bit_spin_is_locked(BH_State, &bh->b_state); | ||
| 319 | } | ||
| 320 | |||
| 321 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | ||
| 322 | { | ||
| 323 | bit_spin_unlock(BH_State, &bh->b_state); | ||
| 324 | } | ||
| 325 | |||
| 326 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | ||
| 327 | { | ||
| 328 | bit_spin_lock(BH_JournalHead, &bh->b_state); | ||
| 329 | } | ||
| 330 | |||
| 331 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | ||
| 332 | { | ||
| 333 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | ||
| 334 | } | ||
| 335 | |||
| 336 | struct jbd_revoke_table_s; | 274 | struct jbd_revoke_table_s; |
| 337 | 275 | ||
| 338 | /** | 276 | /** |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 38f307b8c334..2092ea21e469 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
| @@ -275,6 +275,7 @@ typedef struct journal_superblock_s | |||
| 275 | 275 | ||
| 276 | #include <linux/fs.h> | 276 | #include <linux/fs.h> |
| 277 | #include <linux/sched.h> | 277 | #include <linux/sched.h> |
| 278 | #include <linux/jbd_common.h> | ||
| 278 | 279 | ||
| 279 | #define J_ASSERT(assert) BUG_ON(!(assert)) | 280 | #define J_ASSERT(assert) BUG_ON(!(assert)) |
| 280 | 281 | ||
| @@ -302,70 +303,6 @@ typedef struct journal_superblock_s | |||
| 302 | #define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) | 303 | #define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) |
| 303 | #endif | 304 | #endif |
| 304 | 305 | ||
| 305 | enum jbd_state_bits { | ||
| 306 | BH_JBD /* Has an attached ext3 journal_head */ | ||
| 307 | = BH_PrivateStart, | ||
| 308 | BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ | ||
| 309 | BH_Freed, /* Has been freed (truncated) */ | ||
| 310 | BH_Revoked, /* Has been revoked from the log */ | ||
| 311 | BH_RevokeValid, /* Revoked flag is valid */ | ||
| 312 | BH_JBDDirty, /* Is dirty but journaled */ | ||
| 313 | BH_State, /* Pins most journal_head state */ | ||
| 314 | BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ | ||
| 315 | BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ | ||
| 316 | BH_JBDPrivateStart, /* First bit available for private use by FS */ | ||
| 317 | }; | ||
| 318 | |||
| 319 | BUFFER_FNS(JBD, jbd) | ||
| 320 | BUFFER_FNS(JWrite, jwrite) | ||
| 321 | BUFFER_FNS(JBDDirty, jbddirty) | ||
| 322 | TAS_BUFFER_FNS(JBDDirty, jbddirty) | ||
| 323 | BUFFER_FNS(Revoked, revoked) | ||
| 324 | TAS_BUFFER_FNS(Revoked, revoked) | ||
| 325 | BUFFER_FNS(RevokeValid, revokevalid) | ||
| 326 | TAS_BUFFER_FNS(RevokeValid, revokevalid) | ||
| 327 | BUFFER_FNS(Freed, freed) | ||
| 328 | |||
| 329 | static inline struct buffer_head *jh2bh(struct journal_head *jh) | ||
| 330 | { | ||
| 331 | return jh->b_bh; | ||
| 332 | } | ||
| 333 | |||
| 334 | static inline struct journal_head *bh2jh(struct buffer_head *bh) | ||
| 335 | { | ||
| 336 | return bh->b_private; | ||
| 337 | } | ||
| 338 | |||
| 339 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | ||
| 340 | { | ||
| 341 | bit_spin_lock(BH_State, &bh->b_state); | ||
| 342 | } | ||
| 343 | |||
| 344 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | ||
| 345 | { | ||
| 346 | return bit_spin_trylock(BH_State, &bh->b_state); | ||
| 347 | } | ||
| 348 | |||
| 349 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | ||
| 350 | { | ||
| 351 | return bit_spin_is_locked(BH_State, &bh->b_state); | ||
| 352 | } | ||
| 353 | |||
| 354 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | ||
| 355 | { | ||
| 356 | bit_spin_unlock(BH_State, &bh->b_state); | ||
| 357 | } | ||
| 358 | |||
| 359 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | ||
| 360 | { | ||
| 361 | bit_spin_lock(BH_JournalHead, &bh->b_state); | ||
| 362 | } | ||
| 363 | |||
| 364 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | ||
| 365 | { | ||
| 366 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | ||
| 367 | } | ||
| 368 | |||
| 369 | /* Flags in jbd_inode->i_flags */ | 306 | /* Flags in jbd_inode->i_flags */ |
| 370 | #define __JI_COMMIT_RUNNING 0 | 307 | #define __JI_COMMIT_RUNNING 0 |
| 371 | /* Commit of the inode data in progress. We use this flag to protect us from | 308 | /* Commit of the inode data in progress. We use this flag to protect us from |
| @@ -1106,9 +1043,9 @@ static inline handle_t *journal_current_handle(void) | |||
| 1106 | */ | 1043 | */ |
| 1107 | 1044 | ||
| 1108 | extern handle_t *jbd2_journal_start(journal_t *, int nblocks); | 1045 | extern handle_t *jbd2_journal_start(journal_t *, int nblocks); |
| 1109 | extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask); | 1046 | extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask); |
| 1110 | extern int jbd2_journal_restart(handle_t *, int nblocks); | 1047 | extern int jbd2_journal_restart(handle_t *, int nblocks); |
| 1111 | extern int jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask); | 1048 | extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask); |
| 1112 | extern int jbd2_journal_extend (handle_t *, int nblocks); | 1049 | extern int jbd2_journal_extend (handle_t *, int nblocks); |
| 1113 | extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); | 1050 | extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); |
| 1114 | extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); | 1051 | extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); |
diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h new file mode 100644 index 000000000000..6230f8556a4e --- /dev/null +++ b/include/linux/jbd_common.h | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | #ifndef _LINUX_JBD_STATE_H | ||
| 2 | #define _LINUX_JBD_STATE_H | ||
| 3 | |||
| 4 | enum jbd_state_bits { | ||
| 5 | BH_JBD /* Has an attached ext3 journal_head */ | ||
| 6 | = BH_PrivateStart, | ||
| 7 | BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ | ||
| 8 | BH_Freed, /* Has been freed (truncated) */ | ||
| 9 | BH_Revoked, /* Has been revoked from the log */ | ||
| 10 | BH_RevokeValid, /* Revoked flag is valid */ | ||
| 11 | BH_JBDDirty, /* Is dirty but journaled */ | ||
| 12 | BH_State, /* Pins most journal_head state */ | ||
| 13 | BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ | ||
| 14 | BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ | ||
| 15 | BH_JBDPrivateStart, /* First bit available for private use by FS */ | ||
| 16 | }; | ||
| 17 | |||
| 18 | BUFFER_FNS(JBD, jbd) | ||
| 19 | BUFFER_FNS(JWrite, jwrite) | ||
| 20 | BUFFER_FNS(JBDDirty, jbddirty) | ||
| 21 | TAS_BUFFER_FNS(JBDDirty, jbddirty) | ||
| 22 | BUFFER_FNS(Revoked, revoked) | ||
| 23 | TAS_BUFFER_FNS(Revoked, revoked) | ||
| 24 | BUFFER_FNS(RevokeValid, revokevalid) | ||
| 25 | TAS_BUFFER_FNS(RevokeValid, revokevalid) | ||
| 26 | BUFFER_FNS(Freed, freed) | ||
| 27 | |||
| 28 | static inline struct buffer_head *jh2bh(struct journal_head *jh) | ||
| 29 | { | ||
| 30 | return jh->b_bh; | ||
| 31 | } | ||
| 32 | |||
| 33 | static inline struct journal_head *bh2jh(struct buffer_head *bh) | ||
| 34 | { | ||
| 35 | return bh->b_private; | ||
| 36 | } | ||
| 37 | |||
| 38 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | ||
| 39 | { | ||
| 40 | bit_spin_lock(BH_State, &bh->b_state); | ||
| 41 | } | ||
| 42 | |||
| 43 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | ||
| 44 | { | ||
| 45 | return bit_spin_trylock(BH_State, &bh->b_state); | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | ||
| 49 | { | ||
| 50 | return bit_spin_is_locked(BH_State, &bh->b_state); | ||
| 51 | } | ||
| 52 | |||
| 53 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | ||
| 54 | { | ||
| 55 | bit_spin_unlock(BH_State, &bh->b_state); | ||
| 56 | } | ||
| 57 | |||
| 58 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | ||
| 59 | { | ||
| 60 | bit_spin_lock(BH_JournalHead, &bh->b_state); | ||
| 61 | } | ||
| 62 | |||
| 63 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | ||
| 64 | { | ||
| 65 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | ||
| 66 | } | ||
| 67 | |||
| 68 | #endif | ||
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index b50a54736242..748ff7cbe555 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
| @@ -9,9 +9,12 @@ | |||
| 9 | 9 | ||
| 10 | struct ext4_allocation_context; | 10 | struct ext4_allocation_context; |
| 11 | struct ext4_allocation_request; | 11 | struct ext4_allocation_request; |
| 12 | struct ext4_extent; | ||
| 12 | struct ext4_prealloc_space; | 13 | struct ext4_prealloc_space; |
| 13 | struct ext4_inode_info; | 14 | struct ext4_inode_info; |
| 14 | struct mpage_da_data; | 15 | struct mpage_da_data; |
| 16 | struct ext4_map_blocks; | ||
| 17 | struct ext4_extent; | ||
| 15 | 18 | ||
| 16 | #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) | 19 | #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) |
| 17 | 20 | ||
| @@ -1032,9 +1035,9 @@ TRACE_EVENT(ext4_forget, | |||
| 1032 | ); | 1035 | ); |
| 1033 | 1036 | ||
| 1034 | TRACE_EVENT(ext4_da_update_reserve_space, | 1037 | TRACE_EVENT(ext4_da_update_reserve_space, |
| 1035 | TP_PROTO(struct inode *inode, int used_blocks), | 1038 | TP_PROTO(struct inode *inode, int used_blocks, int quota_claim), |
| 1036 | 1039 | ||
| 1037 | TP_ARGS(inode, used_blocks), | 1040 | TP_ARGS(inode, used_blocks, quota_claim), |
| 1038 | 1041 | ||
| 1039 | TP_STRUCT__entry( | 1042 | TP_STRUCT__entry( |
| 1040 | __field( dev_t, dev ) | 1043 | __field( dev_t, dev ) |
| @@ -1045,6 +1048,7 @@ TRACE_EVENT(ext4_da_update_reserve_space, | |||
| 1045 | __field( int, reserved_data_blocks ) | 1048 | __field( int, reserved_data_blocks ) |
| 1046 | __field( int, reserved_meta_blocks ) | 1049 | __field( int, reserved_meta_blocks ) |
| 1047 | __field( int, allocated_meta_blocks ) | 1050 | __field( int, allocated_meta_blocks ) |
| 1051 | __field( int, quota_claim ) | ||
| 1048 | ), | 1052 | ), |
| 1049 | 1053 | ||
| 1050 | TP_fast_assign( | 1054 | TP_fast_assign( |
| @@ -1053,19 +1057,24 @@ TRACE_EVENT(ext4_da_update_reserve_space, | |||
| 1053 | __entry->mode = inode->i_mode; | 1057 | __entry->mode = inode->i_mode; |
| 1054 | __entry->i_blocks = inode->i_blocks; | 1058 | __entry->i_blocks = inode->i_blocks; |
| 1055 | __entry->used_blocks = used_blocks; | 1059 | __entry->used_blocks = used_blocks; |
| 1056 | __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 1060 | __entry->reserved_data_blocks = |
| 1057 | __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; | 1061 | EXT4_I(inode)->i_reserved_data_blocks; |
| 1058 | __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; | 1062 | __entry->reserved_meta_blocks = |
| 1063 | EXT4_I(inode)->i_reserved_meta_blocks; | ||
| 1064 | __entry->allocated_meta_blocks = | ||
| 1065 | EXT4_I(inode)->i_allocated_meta_blocks; | ||
| 1066 | __entry->quota_claim = quota_claim; | ||
| 1059 | ), | 1067 | ), |
| 1060 | 1068 | ||
| 1061 | TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " | 1069 | TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " |
| 1062 | "reserved_data_blocks %d reserved_meta_blocks %d " | 1070 | "reserved_data_blocks %d reserved_meta_blocks %d " |
| 1063 | "allocated_meta_blocks %d", | 1071 | "allocated_meta_blocks %d quota_claim %d", |
| 1064 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1072 | MAJOR(__entry->dev), MINOR(__entry->dev), |
| 1065 | (unsigned long) __entry->ino, | 1073 | (unsigned long) __entry->ino, |
| 1066 | __entry->mode, __entry->i_blocks, | 1074 | __entry->mode, __entry->i_blocks, |
| 1067 | __entry->used_blocks, __entry->reserved_data_blocks, | 1075 | __entry->used_blocks, __entry->reserved_data_blocks, |
| 1068 | __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) | 1076 | __entry->reserved_meta_blocks, __entry->allocated_meta_blocks, |
| 1077 | __entry->quota_claim) | ||
| 1069 | ); | 1078 | ); |
| 1070 | 1079 | ||
| 1071 | TRACE_EVENT(ext4_da_reserve_space, | 1080 | TRACE_EVENT(ext4_da_reserve_space, |
| @@ -1386,6 +1395,87 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit, | |||
| 1386 | TP_ARGS(inode) | 1395 | TP_ARGS(inode) |
| 1387 | ); | 1396 | ); |
| 1388 | 1397 | ||
| 1398 | /* 'ux' is the uninitialized extent. */ | ||
| 1399 | TRACE_EVENT(ext4_ext_convert_to_initialized_enter, | ||
| 1400 | TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, | ||
| 1401 | struct ext4_extent *ux), | ||
| 1402 | |||
| 1403 | TP_ARGS(inode, map, ux), | ||
| 1404 | |||
| 1405 | TP_STRUCT__entry( | ||
| 1406 | __field( ino_t, ino ) | ||
| 1407 | __field( dev_t, dev ) | ||
| 1408 | __field( ext4_lblk_t, m_lblk ) | ||
| 1409 | __field( unsigned, m_len ) | ||
| 1410 | __field( ext4_lblk_t, u_lblk ) | ||
| 1411 | __field( unsigned, u_len ) | ||
| 1412 | __field( ext4_fsblk_t, u_pblk ) | ||
| 1413 | ), | ||
| 1414 | |||
| 1415 | TP_fast_assign( | ||
| 1416 | __entry->ino = inode->i_ino; | ||
| 1417 | __entry->dev = inode->i_sb->s_dev; | ||
| 1418 | __entry->m_lblk = map->m_lblk; | ||
| 1419 | __entry->m_len = map->m_len; | ||
| 1420 | __entry->u_lblk = le32_to_cpu(ux->ee_block); | ||
| 1421 | __entry->u_len = ext4_ext_get_actual_len(ux); | ||
| 1422 | __entry->u_pblk = ext4_ext_pblock(ux); | ||
| 1423 | ), | ||
| 1424 | |||
| 1425 | TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u " | ||
| 1426 | "u_pblk %llu", | ||
| 1427 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1428 | (unsigned long) __entry->ino, | ||
| 1429 | __entry->m_lblk, __entry->m_len, | ||
| 1430 | __entry->u_lblk, __entry->u_len, __entry->u_pblk) | ||
| 1431 | ); | ||
| 1432 | |||
| 1433 | /* | ||
| 1434 | * 'ux' is the uninitialized extent. | ||
| 1435 | * 'ix' is the initialized extent to which blocks are transferred. | ||
| 1436 | */ | ||
| 1437 | TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath, | ||
| 1438 | TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, | ||
| 1439 | struct ext4_extent *ux, struct ext4_extent *ix), | ||
| 1440 | |||
| 1441 | TP_ARGS(inode, map, ux, ix), | ||
| 1442 | |||
| 1443 | TP_STRUCT__entry( | ||
| 1444 | __field( ino_t, ino ) | ||
| 1445 | __field( dev_t, dev ) | ||
| 1446 | __field( ext4_lblk_t, m_lblk ) | ||
| 1447 | __field( unsigned, m_len ) | ||
| 1448 | __field( ext4_lblk_t, u_lblk ) | ||
| 1449 | __field( unsigned, u_len ) | ||
| 1450 | __field( ext4_fsblk_t, u_pblk ) | ||
| 1451 | __field( ext4_lblk_t, i_lblk ) | ||
| 1452 | __field( unsigned, i_len ) | ||
| 1453 | __field( ext4_fsblk_t, i_pblk ) | ||
| 1454 | ), | ||
| 1455 | |||
| 1456 | TP_fast_assign( | ||
| 1457 | __entry->ino = inode->i_ino; | ||
| 1458 | __entry->dev = inode->i_sb->s_dev; | ||
| 1459 | __entry->m_lblk = map->m_lblk; | ||
| 1460 | __entry->m_len = map->m_len; | ||
| 1461 | __entry->u_lblk = le32_to_cpu(ux->ee_block); | ||
| 1462 | __entry->u_len = ext4_ext_get_actual_len(ux); | ||
| 1463 | __entry->u_pblk = ext4_ext_pblock(ux); | ||
| 1464 | __entry->i_lblk = le32_to_cpu(ix->ee_block); | ||
| 1465 | __entry->i_len = ext4_ext_get_actual_len(ix); | ||
| 1466 | __entry->i_pblk = ext4_ext_pblock(ix); | ||
| 1467 | ), | ||
| 1468 | |||
| 1469 | TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u " | ||
| 1470 | "u_lblk %u u_len %u u_pblk %llu " | ||
| 1471 | "i_lblk %u i_len %u i_pblk %llu ", | ||
| 1472 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1473 | (unsigned long) __entry->ino, | ||
| 1474 | __entry->m_lblk, __entry->m_len, | ||
| 1475 | __entry->u_lblk, __entry->u_len, __entry->u_pblk, | ||
| 1476 | __entry->i_lblk, __entry->i_len, __entry->i_pblk) | ||
| 1477 | ); | ||
| 1478 | |||
| 1389 | DECLARE_EVENT_CLASS(ext4__map_blocks_enter, | 1479 | DECLARE_EVENT_CLASS(ext4__map_blocks_enter, |
| 1390 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk, | 1480 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk, |
| 1391 | unsigned int len, unsigned int flags), | 1481 | unsigned int len, unsigned int flags), |
| @@ -1589,6 +1679,382 @@ DEFINE_EVENT(ext4__trim, ext4_trim_all_free, | |||
| 1589 | TP_ARGS(sb, group, start, len) | 1679 | TP_ARGS(sb, group, start, len) |
| 1590 | ); | 1680 | ); |
| 1591 | 1681 | ||
| 1682 | TRACE_EVENT(ext4_ext_handle_uninitialized_extents, | ||
| 1683 | TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, | ||
| 1684 | unsigned int allocated, ext4_fsblk_t newblock), | ||
| 1685 | |||
| 1686 | TP_ARGS(inode, map, allocated, newblock), | ||
| 1687 | |||
| 1688 | TP_STRUCT__entry( | ||
| 1689 | __field( ino_t, ino ) | ||
| 1690 | __field( dev_t, dev ) | ||
| 1691 | __field( ext4_lblk_t, lblk ) | ||
| 1692 | __field( ext4_fsblk_t, pblk ) | ||
| 1693 | __field( unsigned int, len ) | ||
| 1694 | __field( int, flags ) | ||
| 1695 | __field( unsigned int, allocated ) | ||
| 1696 | __field( ext4_fsblk_t, newblk ) | ||
| 1697 | ), | ||
| 1698 | |||
| 1699 | TP_fast_assign( | ||
| 1700 | __entry->ino = inode->i_ino; | ||
| 1701 | __entry->dev = inode->i_sb->s_dev; | ||
| 1702 | __entry->lblk = map->m_lblk; | ||
| 1703 | __entry->pblk = map->m_pblk; | ||
| 1704 | __entry->len = map->m_len; | ||
| 1705 | __entry->flags = map->m_flags; | ||
| 1706 | __entry->allocated = allocated; | ||
| 1707 | __entry->newblk = newblock; | ||
| 1708 | ), | ||
| 1709 | |||
| 1710 | TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %d" | ||
| 1711 | "allocated %d newblock %llu", | ||
| 1712 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1713 | (unsigned long) __entry->ino, | ||
| 1714 | (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, | ||
| 1715 | __entry->len, __entry->flags, | ||
| 1716 | (unsigned int) __entry->allocated, | ||
| 1717 | (unsigned long long) __entry->newblk) | ||
| 1718 | ); | ||
| 1719 | |||
| 1720 | TRACE_EVENT(ext4_get_implied_cluster_alloc_exit, | ||
| 1721 | TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret), | ||
| 1722 | |||
| 1723 | TP_ARGS(sb, map, ret), | ||
| 1724 | |||
| 1725 | TP_STRUCT__entry( | ||
| 1726 | __field( dev_t, dev ) | ||
| 1727 | __field( ext4_lblk_t, lblk ) | ||
| 1728 | __field( ext4_fsblk_t, pblk ) | ||
| 1729 | __field( unsigned int, len ) | ||
| 1730 | __field( unsigned int, flags ) | ||
| 1731 | __field( int, ret ) | ||
| 1732 | ), | ||
| 1733 | |||
| 1734 | TP_fast_assign( | ||
| 1735 | __entry->dev = sb->s_dev; | ||
| 1736 | __entry->lblk = map->m_lblk; | ||
| 1737 | __entry->pblk = map->m_pblk; | ||
| 1738 | __entry->len = map->m_len; | ||
| 1739 | __entry->flags = map->m_flags; | ||
| 1740 | __entry->ret = ret; | ||
| 1741 | ), | ||
| 1742 | |||
| 1743 | TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %u ret %d", | ||
| 1744 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1745 | __entry->lblk, (unsigned long long) __entry->pblk, | ||
| 1746 | __entry->len, __entry->flags, __entry->ret) | ||
| 1747 | ); | ||
| 1748 | |||
| 1749 | TRACE_EVENT(ext4_ext_put_in_cache, | ||
| 1750 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, | ||
| 1751 | ext4_fsblk_t start), | ||
| 1752 | |||
| 1753 | TP_ARGS(inode, lblk, len, start), | ||
| 1754 | |||
| 1755 | TP_STRUCT__entry( | ||
| 1756 | __field( ino_t, ino ) | ||
| 1757 | __field( dev_t, dev ) | ||
| 1758 | __field( ext4_lblk_t, lblk ) | ||
| 1759 | __field( unsigned int, len ) | ||
| 1760 | __field( ext4_fsblk_t, start ) | ||
| 1761 | ), | ||
| 1762 | |||
| 1763 | TP_fast_assign( | ||
| 1764 | __entry->ino = inode->i_ino; | ||
| 1765 | __entry->dev = inode->i_sb->s_dev; | ||
| 1766 | __entry->lblk = lblk; | ||
| 1767 | __entry->len = len; | ||
| 1768 | __entry->start = start; | ||
| 1769 | ), | ||
| 1770 | |||
| 1771 | TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu", | ||
| 1772 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1773 | (unsigned long) __entry->ino, | ||
| 1774 | (unsigned) __entry->lblk, | ||
| 1775 | __entry->len, | ||
| 1776 | (unsigned long long) __entry->start) | ||
| 1777 | ); | ||
| 1778 | |||
| 1779 | TRACE_EVENT(ext4_ext_in_cache, | ||
| 1780 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret), | ||
| 1781 | |||
| 1782 | TP_ARGS(inode, lblk, ret), | ||
| 1783 | |||
| 1784 | TP_STRUCT__entry( | ||
| 1785 | __field( ino_t, ino ) | ||
| 1786 | __field( dev_t, dev ) | ||
| 1787 | __field( ext4_lblk_t, lblk ) | ||
| 1788 | __field( int, ret ) | ||
| 1789 | ), | ||
| 1790 | |||
| 1791 | TP_fast_assign( | ||
| 1792 | __entry->ino = inode->i_ino; | ||
| 1793 | __entry->dev = inode->i_sb->s_dev; | ||
| 1794 | __entry->lblk = lblk; | ||
| 1795 | __entry->ret = ret; | ||
| 1796 | ), | ||
| 1797 | |||
| 1798 | TP_printk("dev %d,%d ino %lu lblk %u ret %d", | ||
| 1799 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1800 | (unsigned long) __entry->ino, | ||
| 1801 | (unsigned) __entry->lblk, | ||
| 1802 | __entry->ret) | ||
| 1803 | |||
| 1804 | ); | ||
| 1805 | |||
| 1806 | TRACE_EVENT(ext4_find_delalloc_range, | ||
| 1807 | TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to, | ||
| 1808 | int reverse, int found, ext4_lblk_t found_blk), | ||
| 1809 | |||
| 1810 | TP_ARGS(inode, from, to, reverse, found, found_blk), | ||
| 1811 | |||
| 1812 | TP_STRUCT__entry( | ||
| 1813 | __field( ino_t, ino ) | ||
| 1814 | __field( dev_t, dev ) | ||
| 1815 | __field( ext4_lblk_t, from ) | ||
| 1816 | __field( ext4_lblk_t, to ) | ||
| 1817 | __field( int, reverse ) | ||
| 1818 | __field( int, found ) | ||
| 1819 | __field( ext4_lblk_t, found_blk ) | ||
| 1820 | ), | ||
| 1821 | |||
| 1822 | TP_fast_assign( | ||
| 1823 | __entry->ino = inode->i_ino; | ||
| 1824 | __entry->dev = inode->i_sb->s_dev; | ||
| 1825 | __entry->from = from; | ||
| 1826 | __entry->to = to; | ||
| 1827 | __entry->reverse = reverse; | ||
| 1828 | __entry->found = found; | ||
| 1829 | __entry->found_blk = found_blk; | ||
| 1830 | ), | ||
| 1831 | |||
| 1832 | TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d " | ||
| 1833 | "(blk = %u)", | ||
| 1834 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1835 | (unsigned long) __entry->ino, | ||
| 1836 | (unsigned) __entry->from, (unsigned) __entry->to, | ||
| 1837 | __entry->reverse, __entry->found, | ||
| 1838 | (unsigned) __entry->found_blk) | ||
| 1839 | ); | ||
| 1840 | |||
| 1841 | TRACE_EVENT(ext4_get_reserved_cluster_alloc, | ||
| 1842 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len), | ||
| 1843 | |||
| 1844 | TP_ARGS(inode, lblk, len), | ||
| 1845 | |||
| 1846 | TP_STRUCT__entry( | ||
| 1847 | __field( ino_t, ino ) | ||
| 1848 | __field( dev_t, dev ) | ||
| 1849 | __field( ext4_lblk_t, lblk ) | ||
| 1850 | __field( unsigned int, len ) | ||
| 1851 | ), | ||
| 1852 | |||
| 1853 | TP_fast_assign( | ||
| 1854 | __entry->ino = inode->i_ino; | ||
| 1855 | __entry->dev = inode->i_sb->s_dev; | ||
| 1856 | __entry->lblk = lblk; | ||
| 1857 | __entry->len = len; | ||
| 1858 | ), | ||
| 1859 | |||
| 1860 | TP_printk("dev %d,%d ino %lu lblk %u len %u", | ||
| 1861 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1862 | (unsigned long) __entry->ino, | ||
| 1863 | (unsigned) __entry->lblk, | ||
| 1864 | __entry->len) | ||
| 1865 | ); | ||
| 1866 | |||
| 1867 | TRACE_EVENT(ext4_ext_show_extent, | ||
| 1868 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, | ||
| 1869 | unsigned short len), | ||
| 1870 | |||
| 1871 | TP_ARGS(inode, lblk, pblk, len), | ||
| 1872 | |||
| 1873 | TP_STRUCT__entry( | ||
| 1874 | __field( ino_t, ino ) | ||
| 1875 | __field( dev_t, dev ) | ||
| 1876 | __field( ext4_lblk_t, lblk ) | ||
| 1877 | __field( ext4_fsblk_t, pblk ) | ||
| 1878 | __field( unsigned short, len ) | ||
| 1879 | ), | ||
| 1880 | |||
| 1881 | TP_fast_assign( | ||
| 1882 | __entry->ino = inode->i_ino; | ||
| 1883 | __entry->dev = inode->i_sb->s_dev; | ||
| 1884 | __entry->lblk = lblk; | ||
| 1885 | __entry->pblk = pblk; | ||
| 1886 | __entry->len = len; | ||
| 1887 | ), | ||
| 1888 | |||
| 1889 | TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u", | ||
| 1890 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1891 | (unsigned long) __entry->ino, | ||
| 1892 | (unsigned) __entry->lblk, | ||
| 1893 | (unsigned long long) __entry->pblk, | ||
| 1894 | (unsigned short) __entry->len) | ||
| 1895 | ); | ||
| 1896 | |||
| 1897 | TRACE_EVENT(ext4_remove_blocks, | ||
| 1898 | TP_PROTO(struct inode *inode, struct ext4_extent *ex, | ||
| 1899 | ext4_lblk_t from, ext4_fsblk_t to, | ||
| 1900 | ext4_fsblk_t partial_cluster), | ||
| 1901 | |||
| 1902 | TP_ARGS(inode, ex, from, to, partial_cluster), | ||
| 1903 | |||
| 1904 | TP_STRUCT__entry( | ||
| 1905 | __field( ino_t, ino ) | ||
| 1906 | __field( dev_t, dev ) | ||
| 1907 | __field( ext4_lblk_t, ee_lblk ) | ||
| 1908 | __field( ext4_fsblk_t, ee_pblk ) | ||
| 1909 | __field( unsigned short, ee_len ) | ||
| 1910 | __field( ext4_lblk_t, from ) | ||
| 1911 | __field( ext4_lblk_t, to ) | ||
| 1912 | __field( ext4_fsblk_t, partial ) | ||
| 1913 | ), | ||
| 1914 | |||
| 1915 | TP_fast_assign( | ||
| 1916 | __entry->ino = inode->i_ino; | ||
| 1917 | __entry->dev = inode->i_sb->s_dev; | ||
| 1918 | __entry->ee_lblk = cpu_to_le32(ex->ee_block); | ||
| 1919 | __entry->ee_pblk = ext4_ext_pblock(ex); | ||
| 1920 | __entry->ee_len = ext4_ext_get_actual_len(ex); | ||
| 1921 | __entry->from = from; | ||
| 1922 | __entry->to = to; | ||
| 1923 | __entry->partial = partial_cluster; | ||
| 1924 | ), | ||
| 1925 | |||
| 1926 | TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" | ||
| 1927 | "from %u to %u partial_cluster %u", | ||
| 1928 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1929 | (unsigned long) __entry->ino, | ||
| 1930 | (unsigned) __entry->ee_lblk, | ||
| 1931 | (unsigned long long) __entry->ee_pblk, | ||
| 1932 | (unsigned short) __entry->ee_len, | ||
| 1933 | (unsigned) __entry->from, | ||
| 1934 | (unsigned) __entry->to, | ||
| 1935 | (unsigned) __entry->partial) | ||
| 1936 | ); | ||
| 1937 | |||
| 1938 | TRACE_EVENT(ext4_ext_rm_leaf, | ||
| 1939 | TP_PROTO(struct inode *inode, ext4_lblk_t start, | ||
| 1940 | struct ext4_extent *ex, ext4_fsblk_t partial_cluster), | ||
| 1941 | |||
| 1942 | TP_ARGS(inode, start, ex, partial_cluster), | ||
| 1943 | |||
| 1944 | TP_STRUCT__entry( | ||
| 1945 | __field( ino_t, ino ) | ||
| 1946 | __field( dev_t, dev ) | ||
| 1947 | __field( ext4_lblk_t, start ) | ||
| 1948 | __field( ext4_lblk_t, ee_lblk ) | ||
| 1949 | __field( ext4_fsblk_t, ee_pblk ) | ||
| 1950 | __field( short, ee_len ) | ||
| 1951 | __field( ext4_fsblk_t, partial ) | ||
| 1952 | ), | ||
| 1953 | |||
| 1954 | TP_fast_assign( | ||
| 1955 | __entry->ino = inode->i_ino; | ||
| 1956 | __entry->dev = inode->i_sb->s_dev; | ||
| 1957 | __entry->start = start; | ||
| 1958 | __entry->ee_lblk = le32_to_cpu(ex->ee_block); | ||
| 1959 | __entry->ee_pblk = ext4_ext_pblock(ex); | ||
| 1960 | __entry->ee_len = ext4_ext_get_actual_len(ex); | ||
| 1961 | __entry->partial = partial_cluster; | ||
| 1962 | ), | ||
| 1963 | |||
| 1964 | TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" | ||
| 1965 | "partial_cluster %u", | ||
| 1966 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1967 | (unsigned long) __entry->ino, | ||
| 1968 | (unsigned) __entry->start, | ||
| 1969 | (unsigned) __entry->ee_lblk, | ||
| 1970 | (unsigned long long) __entry->ee_pblk, | ||
| 1971 | (unsigned short) __entry->ee_len, | ||
| 1972 | (unsigned) __entry->partial) | ||
| 1973 | ); | ||
| 1974 | |||
| 1975 | TRACE_EVENT(ext4_ext_rm_idx, | ||
| 1976 | TP_PROTO(struct inode *inode, ext4_fsblk_t pblk), | ||
| 1977 | |||
| 1978 | TP_ARGS(inode, pblk), | ||
| 1979 | |||
| 1980 | TP_STRUCT__entry( | ||
| 1981 | __field( ino_t, ino ) | ||
| 1982 | __field( dev_t, dev ) | ||
| 1983 | __field( ext4_fsblk_t, pblk ) | ||
| 1984 | ), | ||
| 1985 | |||
| 1986 | TP_fast_assign( | ||
| 1987 | __entry->ino = inode->i_ino; | ||
| 1988 | __entry->dev = inode->i_sb->s_dev; | ||
| 1989 | __entry->pblk = pblk; | ||
| 1990 | ), | ||
| 1991 | |||
| 1992 | TP_printk("dev %d,%d ino %lu index_pblk %llu", | ||
| 1993 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1994 | (unsigned long) __entry->ino, | ||
| 1995 | (unsigned long long) __entry->pblk) | ||
| 1996 | ); | ||
| 1997 | |||
| 1998 | TRACE_EVENT(ext4_ext_remove_space, | ||
| 1999 | TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth), | ||
| 2000 | |||
| 2001 | TP_ARGS(inode, start, depth), | ||
| 2002 | |||
| 2003 | TP_STRUCT__entry( | ||
| 2004 | __field( ino_t, ino ) | ||
| 2005 | __field( dev_t, dev ) | ||
| 2006 | __field( ext4_lblk_t, start ) | ||
| 2007 | __field( int, depth ) | ||
| 2008 | ), | ||
| 2009 | |||
| 2010 | TP_fast_assign( | ||
| 2011 | __entry->ino = inode->i_ino; | ||
| 2012 | __entry->dev = inode->i_sb->s_dev; | ||
| 2013 | __entry->start = start; | ||
| 2014 | __entry->depth = depth; | ||
| 2015 | ), | ||
| 2016 | |||
| 2017 | TP_printk("dev %d,%d ino %lu since %u depth %d", | ||
| 2018 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 2019 | (unsigned long) __entry->ino, | ||
| 2020 | (unsigned) __entry->start, | ||
| 2021 | __entry->depth) | ||
| 2022 | ); | ||
| 2023 | |||
| 2024 | TRACE_EVENT(ext4_ext_remove_space_done, | ||
| 2025 | TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth, | ||
| 2026 | ext4_lblk_t partial, unsigned short eh_entries), | ||
| 2027 | |||
| 2028 | TP_ARGS(inode, start, depth, partial, eh_entries), | ||
| 2029 | |||
| 2030 | TP_STRUCT__entry( | ||
| 2031 | __field( ino_t, ino ) | ||
| 2032 | __field( dev_t, dev ) | ||
| 2033 | __field( ext4_lblk_t, start ) | ||
| 2034 | __field( int, depth ) | ||
| 2035 | __field( ext4_lblk_t, partial ) | ||
| 2036 | __field( unsigned short, eh_entries ) | ||
| 2037 | ), | ||
| 2038 | |||
| 2039 | TP_fast_assign( | ||
| 2040 | __entry->ino = inode->i_ino; | ||
| 2041 | __entry->dev = inode->i_sb->s_dev; | ||
| 2042 | __entry->start = start; | ||
| 2043 | __entry->depth = depth; | ||
| 2044 | __entry->partial = partial; | ||
| 2045 | __entry->eh_entries = eh_entries; | ||
| 2046 | ), | ||
| 2047 | |||
| 2048 | TP_printk("dev %d,%d ino %lu since %u depth %d partial %u " | ||
| 2049 | "remaining_entries %u", | ||
| 2050 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 2051 | (unsigned long) __entry->ino, | ||
| 2052 | (unsigned) __entry->start, | ||
| 2053 | __entry->depth, | ||
| 2054 | (unsigned) __entry->partial, | ||
| 2055 | (unsigned short) __entry->eh_entries) | ||
| 2056 | ); | ||
| 2057 | |||
| 1592 | #endif /* _TRACE_EXT4_H */ | 2058 | #endif /* _TRACE_EXT4_H */ |
| 1593 | 2059 | ||
| 1594 | /* This part must be outside protection */ | 2060 | /* This part must be outside protection */ |
