diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-02 13:06:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-02 13:06:20 -0400 |
commit | f1f8935a5c38a2c61e86a42bc971a2539eef2211 (patch) | |
tree | 694950045f2f5d89507d7206cf6595e09cdfbd2c /fs | |
parent | 34116645d912f65d7eb4508a1db3c9d0e45facb1 (diff) | |
parent | f2a44523b20f323e4aef7c16261d34d6f0a4bf06 (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (97 commits)
jbd2: Unify log messages in jbd2 code
jbd/jbd2: validate sb->s_first in journal_get_superblock()
ext4: let ext4_ext_rm_leaf work with EXT_DEBUG defined
ext4: fix a syntax error in ext4_ext_insert_extent when debugging enabled
ext4: fix a typo in struct ext4_allocation_context
ext4: Don't normalize an falloc request if it can fit in 1 extent.
ext4: remove comments about extent mount option in ext4_new_inode()
ext4: let ext4_discard_partial_buffers handle unaligned range correctly
ext4: return ENOMEM if find_or_create_pages fails
ext4: move vars to local scope in ext4_discard_partial_page_buffers_no_lock()
ext4: Create helper function for EXT4_IO_END_UNWRITTEN and i_aiodio_unwritten
ext4: optimize locking for end_io extent conversion
ext4: remove unnecessary call to waitqueue_active()
ext4: Use correct locking for ext4_end_io_nolock()
ext4: fix race in xattr block allocation path
ext4: trace punch_hole correctly in ext4_ext_map_blocks
ext4: clean up AGGRESSIVE_TEST code
ext4: move variables to their scope
ext4: fix quota accounting during migration
ext4: migrate cleanup
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/balloc.c | 345 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 141 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 2 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 8 | ||||
-rw-r--r-- | fs/ext4/extents.c | 1168 | ||||
-rw-r--r-- | fs/ext4/file.c | 4 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 10 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 204 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 20 | ||||
-rw-r--r-- | fs/ext4/inode.c | 512 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 65 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 331 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 11 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 109 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 10 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 1 | ||||
-rw-r--r-- | fs/ext4/namei.c | 21 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 66 | ||||
-rw-r--r-- | fs/ext4/resize.c | 10 | ||||
-rw-r--r-- | fs/ext4/super.c | 263 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 12 | ||||
-rw-r--r-- | fs/jbd/journal.c | 8 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 26 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 44 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 28 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 68 |
26 files changed, 2328 insertions, 1159 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f8224adf496..f6dba4505f1 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -28,7 +28,8 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * Calculate the block group number and offset, given a block number | 31 | * Calculate the block group number and offset into the block/cluster |
32 | * allocation bitmap, given a block number | ||
32 | */ | 33 | */ |
33 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 34 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
34 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) | 35 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) |
@@ -37,7 +38,8 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | |||
37 | ext4_grpblk_t offset; | 38 | ext4_grpblk_t offset; |
38 | 39 | ||
39 | blocknr = blocknr - le32_to_cpu(es->s_first_data_block); | 40 | blocknr = blocknr - le32_to_cpu(es->s_first_data_block); |
40 | offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); | 41 | offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >> |
42 | EXT4_SB(sb)->s_cluster_bits; | ||
41 | if (offsetp) | 43 | if (offsetp) |
42 | *offsetp = offset; | 44 | *offsetp = offset; |
43 | if (blockgrpp) | 45 | if (blockgrpp) |
@@ -55,130 +57,169 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, | |||
55 | return 0; | 57 | return 0; |
56 | } | 58 | } |
57 | 59 | ||
58 | static int ext4_group_used_meta_blocks(struct super_block *sb, | 60 | /* Return the number of clusters used for file system metadata; this |
59 | ext4_group_t block_group, | 61 | * represents the overhead needed by the file system. |
60 | struct ext4_group_desc *gdp) | 62 | */ |
63 | unsigned ext4_num_overhead_clusters(struct super_block *sb, | ||
64 | ext4_group_t block_group, | ||
65 | struct ext4_group_desc *gdp) | ||
61 | { | 66 | { |
62 | ext4_fsblk_t tmp; | 67 | unsigned num_clusters; |
68 | int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c; | ||
69 | ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group); | ||
70 | ext4_fsblk_t itbl_blk; | ||
63 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 71 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
64 | /* block bitmap, inode bitmap, and inode table blocks */ | ||
65 | int used_blocks = sbi->s_itb_per_group + 2; | ||
66 | 72 | ||
67 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | 73 | /* This is the number of clusters used by the superblock, |
68 | if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), | 74 | * block group descriptors, and reserved block group |
69 | block_group)) | 75 | * descriptor blocks */ |
70 | used_blocks--; | 76 | num_clusters = ext4_num_base_meta_clusters(sb, block_group); |
71 | 77 | ||
72 | if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), | 78 | /* |
73 | block_group)) | 79 | * For the allocation bitmaps and inode table, we first need |
74 | used_blocks--; | 80 | * to check to see if the block is in the block group. If it |
75 | 81 | * is, then check to see if the cluster is already accounted | |
76 | tmp = ext4_inode_table(sb, gdp); | 82 | * for in the clusters used for the base metadata cluster, or |
77 | for (; tmp < ext4_inode_table(sb, gdp) + | 83 | * if we can increment the base metadata cluster to include |
78 | sbi->s_itb_per_group; tmp++) { | 84 | * that block. Otherwise, we will have to track the cluster |
79 | if (!ext4_block_in_group(sb, tmp, block_group)) | 85 | * used for the allocation bitmap or inode table explicitly. |
80 | used_blocks -= 1; | 86 | * Normally all of these blocks are contiguous, so the special |
87 | * case handling shouldn't be necessary except for *very* | ||
88 | * unusual file system layouts. | ||
89 | */ | ||
90 | if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) { | ||
91 | block_cluster = EXT4_B2C(sbi, (start - | ||
92 | ext4_block_bitmap(sb, gdp))); | ||
93 | if (block_cluster < num_clusters) | ||
94 | block_cluster = -1; | ||
95 | else if (block_cluster == num_clusters) { | ||
96 | num_clusters++; | ||
97 | block_cluster = -1; | ||
81 | } | 98 | } |
82 | } | 99 | } |
83 | return used_blocks; | ||
84 | } | ||
85 | 100 | ||
86 | /* Initializes an uninitialized block bitmap if given, and returns the | 101 | if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) { |
87 | * number of blocks free in the group. */ | 102 | inode_cluster = EXT4_B2C(sbi, |
88 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | 103 | start - ext4_inode_bitmap(sb, gdp)); |
89 | ext4_group_t block_group, struct ext4_group_desc *gdp) | 104 | if (inode_cluster < num_clusters) |
90 | { | 105 | inode_cluster = -1; |
91 | int bit, bit_max; | 106 | else if (inode_cluster == num_clusters) { |
92 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 107 | num_clusters++; |
93 | unsigned free_blocks, group_blocks; | 108 | inode_cluster = -1; |
94 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
95 | |||
96 | if (bh) { | ||
97 | J_ASSERT_BH(bh, buffer_locked(bh)); | ||
98 | |||
99 | /* If checksum is bad mark all blocks used to prevent allocation | ||
100 | * essentially implementing a per-group read-only flag. */ | ||
101 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | ||
102 | ext4_error(sb, "Checksum bad for group %u", | ||
103 | block_group); | ||
104 | ext4_free_blks_set(sb, gdp, 0); | ||
105 | ext4_free_inodes_set(sb, gdp, 0); | ||
106 | ext4_itable_unused_set(sb, gdp, 0); | ||
107 | memset(bh->b_data, 0xff, sb->s_blocksize); | ||
108 | return 0; | ||
109 | } | 109 | } |
110 | memset(bh->b_data, 0, sb->s_blocksize); | ||
111 | } | 110 | } |
112 | 111 | ||
113 | /* Check for superblock and gdt backups in this group */ | 112 | itbl_blk = ext4_inode_table(sb, gdp); |
114 | bit_max = ext4_bg_has_super(sb, block_group); | 113 | for (i = 0; i < sbi->s_itb_per_group; i++) { |
115 | 114 | if (ext4_block_in_group(sb, itbl_blk + i, block_group)) { | |
116 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || | 115 | c = EXT4_B2C(sbi, start - itbl_blk + i); |
117 | block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * | 116 | if ((c < num_clusters) || (c == inode_cluster) || |
118 | sbi->s_desc_per_block) { | 117 | (c == block_cluster) || (c == itbl_cluster)) |
119 | if (bit_max) { | 118 | continue; |
120 | bit_max += ext4_bg_num_gdb(sb, block_group); | 119 | if (c == num_clusters) { |
121 | bit_max += | 120 | num_clusters++; |
122 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); | 121 | continue; |
122 | } | ||
123 | num_clusters++; | ||
124 | itbl_cluster = c; | ||
123 | } | 125 | } |
124 | } else { /* For META_BG_BLOCK_GROUPS */ | ||
125 | bit_max += ext4_bg_num_gdb(sb, block_group); | ||
126 | } | 126 | } |
127 | 127 | ||
128 | if (block_group == ngroups - 1) { | 128 | if (block_cluster != -1) |
129 | num_clusters++; | ||
130 | if (inode_cluster != -1) | ||
131 | num_clusters++; | ||
132 | |||
133 | return num_clusters; | ||
134 | } | ||
135 | |||
136 | static unsigned int num_clusters_in_group(struct super_block *sb, | ||
137 | ext4_group_t block_group) | ||
138 | { | ||
139 | unsigned int blocks; | ||
140 | |||
141 | if (block_group == ext4_get_groups_count(sb) - 1) { | ||
129 | /* | 142 | /* |
130 | * Even though mke2fs always initialize first and last group | 143 | * Even though mke2fs always initializes the first and |
131 | * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need | 144 | * last group, just in case some other tool was used, |
132 | * to make sure we calculate the right free blocks | 145 | * we need to make sure we calculate the right free |
146 | * blocks. | ||
133 | */ | 147 | */ |
134 | group_blocks = ext4_blocks_count(sbi->s_es) - | 148 | blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) - |
135 | ext4_group_first_block_no(sb, ngroups - 1); | 149 | ext4_group_first_block_no(sb, block_group); |
136 | } else { | 150 | } else |
137 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 151 | blocks = EXT4_BLOCKS_PER_GROUP(sb); |
138 | } | 152 | return EXT4_NUM_B2C(EXT4_SB(sb), blocks); |
153 | } | ||
139 | 154 | ||
140 | free_blocks = group_blocks - bit_max; | 155 | /* Initializes an uninitialized block bitmap */ |
156 | void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | ||
157 | ext4_group_t block_group, | ||
158 | struct ext4_group_desc *gdp) | ||
159 | { | ||
160 | unsigned int bit, bit_max; | ||
161 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
162 | ext4_fsblk_t start, tmp; | ||
163 | int flex_bg = 0; | ||
164 | |||
165 | J_ASSERT_BH(bh, buffer_locked(bh)); | ||
166 | |||
167 | /* If checksum is bad mark all blocks used to prevent allocation | ||
168 | * essentially implementing a per-group read-only flag. */ | ||
169 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | ||
170 | ext4_error(sb, "Checksum bad for group %u", block_group); | ||
171 | ext4_free_group_clusters_set(sb, gdp, 0); | ||
172 | ext4_free_inodes_set(sb, gdp, 0); | ||
173 | ext4_itable_unused_set(sb, gdp, 0); | ||
174 | memset(bh->b_data, 0xff, sb->s_blocksize); | ||
175 | return; | ||
176 | } | ||
177 | memset(bh->b_data, 0, sb->s_blocksize); | ||
141 | 178 | ||
142 | if (bh) { | 179 | bit_max = ext4_num_base_meta_clusters(sb, block_group); |
143 | ext4_fsblk_t start, tmp; | 180 | for (bit = 0; bit < bit_max; bit++) |
144 | int flex_bg = 0; | 181 | ext4_set_bit(bit, bh->b_data); |
145 | 182 | ||
146 | for (bit = 0; bit < bit_max; bit++) | 183 | start = ext4_group_first_block_no(sb, block_group); |
147 | ext4_set_bit(bit, bh->b_data); | ||
148 | 184 | ||
149 | start = ext4_group_first_block_no(sb, block_group); | 185 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
186 | flex_bg = 1; | ||
150 | 187 | ||
151 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | 188 | /* Set bits for block and inode bitmaps, and inode table */ |
152 | EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 189 | tmp = ext4_block_bitmap(sb, gdp); |
153 | flex_bg = 1; | 190 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) |
191 | ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); | ||
154 | 192 | ||
155 | /* Set bits for block and inode bitmaps, and inode table */ | 193 | tmp = ext4_inode_bitmap(sb, gdp); |
156 | tmp = ext4_block_bitmap(sb, gdp); | 194 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) |
157 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) | 195 | ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); |
158 | ext4_set_bit(tmp - start, bh->b_data); | ||
159 | 196 | ||
160 | tmp = ext4_inode_bitmap(sb, gdp); | 197 | tmp = ext4_inode_table(sb, gdp); |
198 | for (; tmp < ext4_inode_table(sb, gdp) + | ||
199 | sbi->s_itb_per_group; tmp++) { | ||
161 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) | 200 | if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) |
162 | ext4_set_bit(tmp - start, bh->b_data); | 201 | ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); |
163 | |||
164 | tmp = ext4_inode_table(sb, gdp); | ||
165 | for (; tmp < ext4_inode_table(sb, gdp) + | ||
166 | sbi->s_itb_per_group; tmp++) { | ||
167 | if (!flex_bg || | ||
168 | ext4_block_in_group(sb, tmp, block_group)) | ||
169 | ext4_set_bit(tmp - start, bh->b_data); | ||
170 | } | ||
171 | /* | ||
172 | * Also if the number of blocks within the group is | ||
173 | * less than the blocksize * 8 ( which is the size | ||
174 | * of bitmap ), set rest of the block bitmap to 1 | ||
175 | */ | ||
176 | ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8, | ||
177 | bh->b_data); | ||
178 | } | 202 | } |
179 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); | 203 | |
204 | /* | ||
205 | * Also if the number of blocks within the group is less than | ||
206 | * the blocksize * 8 ( which is the size of bitmap ), set rest | ||
207 | * of the block bitmap to 1 | ||
208 | */ | ||
209 | ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), | ||
210 | sb->s_blocksize * 8, bh->b_data); | ||
180 | } | 211 | } |
181 | 212 | ||
213 | /* Return the number of free blocks in a block group. It is used when | ||
214 | * the block bitmap is uninitialized, so we can't just count the bits | ||
215 | * in the bitmap. */ | ||
216 | unsigned ext4_free_clusters_after_init(struct super_block *sb, | ||
217 | ext4_group_t block_group, | ||
218 | struct ext4_group_desc *gdp) | ||
219 | { | ||
220 | return num_clusters_in_group(sb, block_group) - | ||
221 | ext4_num_overhead_clusters(sb, block_group, gdp); | ||
222 | } | ||
182 | 223 | ||
183 | /* | 224 | /* |
184 | * The free blocks are managed by bitmaps. A file system contains several | 225 | * The free blocks are managed by bitmaps. A file system contains several |
@@ -362,53 +403,54 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
362 | } | 403 | } |
363 | 404 | ||
364 | /** | 405 | /** |
365 | * ext4_has_free_blocks() | 406 | * ext4_has_free_clusters() |
366 | * @sbi: in-core super block structure. | 407 | * @sbi: in-core super block structure. |
367 | * @nblocks: number of needed blocks | 408 | * @nclusters: number of needed blocks |
409 | * @flags: flags from ext4_mb_new_blocks() | ||
368 | * | 410 | * |
369 | * Check if filesystem has nblocks free & available for allocation. | 411 | * Check if filesystem has nclusters free & available for allocation. |
370 | * On success return 1, return 0 on failure. | 412 | * On success return 1, return 0 on failure. |
371 | */ | 413 | */ |
372 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi, | 414 | static int ext4_has_free_clusters(struct ext4_sb_info *sbi, |
373 | s64 nblocks, unsigned int flags) | 415 | s64 nclusters, unsigned int flags) |
374 | { | 416 | { |
375 | s64 free_blocks, dirty_blocks, root_blocks; | 417 | s64 free_clusters, dirty_clusters, root_clusters; |
376 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | 418 | struct percpu_counter *fcc = &sbi->s_freeclusters_counter; |
377 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; | 419 | struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter; |
378 | 420 | ||
379 | free_blocks = percpu_counter_read_positive(fbc); | 421 | free_clusters = percpu_counter_read_positive(fcc); |
380 | dirty_blocks = percpu_counter_read_positive(dbc); | 422 | dirty_clusters = percpu_counter_read_positive(dcc); |
381 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 423 | root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es)); |
382 | 424 | ||
383 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < | 425 | if (free_clusters - (nclusters + root_clusters + dirty_clusters) < |
384 | EXT4_FREEBLOCKS_WATERMARK) { | 426 | EXT4_FREECLUSTERS_WATERMARK) { |
385 | free_blocks = percpu_counter_sum_positive(fbc); | 427 | free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc)); |
386 | dirty_blocks = percpu_counter_sum_positive(dbc); | 428 | dirty_clusters = percpu_counter_sum_positive(dcc); |
387 | } | 429 | } |
388 | /* Check whether we have space after | 430 | /* Check whether we have space after accounting for current |
389 | * accounting for current dirty blocks & root reserved blocks. | 431 | * dirty clusters & root reserved clusters. |
390 | */ | 432 | */ |
391 | if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks)) | 433 | if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters)) |
392 | return 1; | 434 | return 1; |
393 | 435 | ||
394 | /* Hm, nope. Are (enough) root reserved blocks available? */ | 436 | /* Hm, nope. Are (enough) root reserved clusters available? */ |
395 | if (sbi->s_resuid == current_fsuid() || | 437 | if (sbi->s_resuid == current_fsuid() || |
396 | ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || | 438 | ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || |
397 | capable(CAP_SYS_RESOURCE) || | 439 | capable(CAP_SYS_RESOURCE) || |
398 | (flags & EXT4_MB_USE_ROOT_BLOCKS)) { | 440 | (flags & EXT4_MB_USE_ROOT_BLOCKS)) { |
399 | 441 | ||
400 | if (free_blocks >= (nblocks + dirty_blocks)) | 442 | if (free_clusters >= (nclusters + dirty_clusters)) |
401 | return 1; | 443 | return 1; |
402 | } | 444 | } |
403 | 445 | ||
404 | return 0; | 446 | return 0; |
405 | } | 447 | } |
406 | 448 | ||
407 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | 449 | int ext4_claim_free_clusters(struct ext4_sb_info *sbi, |
408 | s64 nblocks, unsigned int flags) | 450 | s64 nclusters, unsigned int flags) |
409 | { | 451 | { |
410 | if (ext4_has_free_blocks(sbi, nblocks, flags)) { | 452 | if (ext4_has_free_clusters(sbi, nclusters, flags)) { |
411 | percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); | 453 | percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters); |
412 | return 0; | 454 | return 0; |
413 | } else | 455 | } else |
414 | return -ENOSPC; | 456 | return -ENOSPC; |
@@ -428,7 +470,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | |||
428 | */ | 470 | */ |
429 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) | 471 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) |
430 | { | 472 | { |
431 | if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || | 473 | if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || |
432 | (*retries)++ > 3 || | 474 | (*retries)++ > 3 || |
433 | !EXT4_SB(sb)->s_journal) | 475 | !EXT4_SB(sb)->s_journal) |
434 | return 0; | 476 | return 0; |
@@ -444,7 +486,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
444 | * @handle: handle to this transaction | 486 | * @handle: handle to this transaction |
445 | * @inode: file inode | 487 | * @inode: file inode |
446 | * @goal: given target block(filesystem wide) | 488 | * @goal: given target block(filesystem wide) |
447 | * @count: pointer to total number of blocks needed | 489 | * @count: pointer to total number of clusters needed |
448 | * @errp: error code | 490 | * @errp: error code |
449 | * | 491 | * |
450 | * Return 1st allocated block number on success, *count stores total account | 492 | * Return 1st allocated block number on success, *count stores total account |
@@ -476,18 +518,19 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
476 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 518 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
477 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; | 519 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; |
478 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 520 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
479 | dquot_alloc_block_nofail(inode, ar.len); | 521 | dquot_alloc_block_nofail(inode, |
522 | EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); | ||
480 | } | 523 | } |
481 | return ret; | 524 | return ret; |
482 | } | 525 | } |
483 | 526 | ||
484 | /** | 527 | /** |
485 | * ext4_count_free_blocks() -- count filesystem free blocks | 528 | * ext4_count_free_clusters() -- count filesystem free clusters |
486 | * @sb: superblock | 529 | * @sb: superblock |
487 | * | 530 | * |
488 | * Adds up the number of free blocks from each block group. | 531 | * Adds up the number of free clusters from each block group. |
489 | */ | 532 | */ |
490 | ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | 533 | ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) |
491 | { | 534 | { |
492 | ext4_fsblk_t desc_count; | 535 | ext4_fsblk_t desc_count; |
493 | struct ext4_group_desc *gdp; | 536 | struct ext4_group_desc *gdp; |
@@ -508,7 +551,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
508 | gdp = ext4_get_group_desc(sb, i, NULL); | 551 | gdp = ext4_get_group_desc(sb, i, NULL); |
509 | if (!gdp) | 552 | if (!gdp) |
510 | continue; | 553 | continue; |
511 | desc_count += ext4_free_blks_count(sb, gdp); | 554 | desc_count += ext4_free_group_clusters(sb, gdp); |
512 | brelse(bitmap_bh); | 555 | brelse(bitmap_bh); |
513 | bitmap_bh = ext4_read_block_bitmap(sb, i); | 556 | bitmap_bh = ext4_read_block_bitmap(sb, i); |
514 | if (bitmap_bh == NULL) | 557 | if (bitmap_bh == NULL) |
@@ -516,12 +559,13 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
516 | 559 | ||
517 | x = ext4_count_free(bitmap_bh, sb->s_blocksize); | 560 | x = ext4_count_free(bitmap_bh, sb->s_blocksize); |
518 | printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", | 561 | printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", |
519 | i, ext4_free_blks_count(sb, gdp), x); | 562 | i, ext4_free_group_clusters(sb, gdp), x); |
520 | bitmap_count += x; | 563 | bitmap_count += x; |
521 | } | 564 | } |
522 | brelse(bitmap_bh); | 565 | brelse(bitmap_bh); |
523 | printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" | 566 | printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu" |
524 | ", computed = %llu, %llu\n", ext4_free_blocks_count(es), | 567 | ", computed = %llu, %llu\n", |
568 | EXT4_B2C(sbi, ext4_free_blocks_count(es)), | ||
525 | desc_count, bitmap_count); | 569 | desc_count, bitmap_count); |
526 | return bitmap_count; | 570 | return bitmap_count; |
527 | #else | 571 | #else |
@@ -530,7 +574,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
530 | gdp = ext4_get_group_desc(sb, i, NULL); | 574 | gdp = ext4_get_group_desc(sb, i, NULL); |
531 | if (!gdp) | 575 | if (!gdp) |
532 | continue; | 576 | continue; |
533 | desc_count += ext4_free_blks_count(sb, gdp); | 577 | desc_count += ext4_free_group_clusters(sb, gdp); |
534 | } | 578 | } |
535 | 579 | ||
536 | return desc_count; | 580 | return desc_count; |
@@ -620,6 +664,31 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
620 | 664 | ||
621 | } | 665 | } |
622 | 666 | ||
667 | /* | ||
668 | * This function returns the number of file system metadata clusters at | ||
669 | * the beginning of a block group, including the reserved gdt blocks. | ||
670 | */ | ||
671 | unsigned ext4_num_base_meta_clusters(struct super_block *sb, | ||
672 | ext4_group_t block_group) | ||
673 | { | ||
674 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
675 | unsigned num; | ||
676 | |||
677 | /* Check for superblock and gdt backups in this group */ | ||
678 | num = ext4_bg_has_super(sb, block_group); | ||
679 | |||
680 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || | ||
681 | block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * | ||
682 | sbi->s_desc_per_block) { | ||
683 | if (num) { | ||
684 | num += ext4_bg_num_gdb(sb, block_group); | ||
685 | num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); | ||
686 | } | ||
687 | } else { /* For META_BG_BLOCK_GROUPS */ | ||
688 | num += ext4_bg_num_gdb(sb, block_group); | ||
689 | } | ||
690 | return EXT4_NUM_B2C(sbi, num); | ||
691 | } | ||
623 | /** | 692 | /** |
624 | * ext4_inode_to_goal_block - return a hint for block allocation | 693 | * ext4_inode_to_goal_block - return a hint for block allocation |
625 | * @inode: inode for block allocation | 694 | * @inode: inode for block allocation |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cec3145e532..5b0e26a1272 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -144,9 +144,17 @@ struct ext4_allocation_request { | |||
144 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) | 144 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) |
145 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) | 145 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) |
146 | #define EXT4_MAP_UNINIT (1 << BH_Uninit) | 146 | #define EXT4_MAP_UNINIT (1 << BH_Uninit) |
147 | /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of | ||
148 | * ext4_map_blocks wants to know whether or not the underlying cluster has | ||
149 | * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that | ||
150 | * the requested mapping was from previously mapped (or delayed allocated) | ||
151 | * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster | ||
152 | * should never appear on buffer_head's state flags. | ||
153 | */ | ||
154 | #define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster) | ||
147 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ | 155 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ |
148 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ | 156 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ |
149 | EXT4_MAP_UNINIT) | 157 | EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER) |
150 | 158 | ||
151 | struct ext4_map_blocks { | 159 | struct ext4_map_blocks { |
152 | ext4_fsblk_t m_pblk; | 160 | ext4_fsblk_t m_pblk; |
@@ -239,8 +247,11 @@ struct ext4_io_submit { | |||
239 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) | 247 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) |
240 | #endif | 248 | #endif |
241 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) | 249 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) |
250 | #define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \ | ||
251 | EXT4_SB(s)->s_cluster_bits) | ||
242 | #ifdef __KERNEL__ | 252 | #ifdef __KERNEL__ |
243 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) | 253 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) |
254 | # define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits) | ||
244 | #else | 255 | #else |
245 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) | 256 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) |
246 | #endif | 257 | #endif |
@@ -258,6 +269,14 @@ struct ext4_io_submit { | |||
258 | #endif | 269 | #endif |
259 | #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) | 270 | #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) |
260 | 271 | ||
272 | /* Translate a block number to a cluster number */ | ||
273 | #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) | ||
274 | /* Translate a cluster number to a block number */ | ||
275 | #define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits) | ||
276 | /* Translate # of blks to # of clusters */ | ||
277 | #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ | ||
278 | (sbi)->s_cluster_bits) | ||
279 | |||
261 | /* | 280 | /* |
262 | * Structure of a blocks group descriptor | 281 | * Structure of a blocks group descriptor |
263 | */ | 282 | */ |
@@ -289,7 +308,7 @@ struct ext4_group_desc | |||
289 | 308 | ||
290 | struct flex_groups { | 309 | struct flex_groups { |
291 | atomic_t free_inodes; | 310 | atomic_t free_inodes; |
292 | atomic_t free_blocks; | 311 | atomic_t free_clusters; |
293 | atomic_t used_dirs; | 312 | atomic_t used_dirs; |
294 | }; | 313 | }; |
295 | 314 | ||
@@ -306,6 +325,7 @@ struct flex_groups { | |||
306 | #define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) | 325 | #define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) |
307 | #ifdef __KERNEL__ | 326 | #ifdef __KERNEL__ |
308 | # define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) | 327 | # define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) |
328 | # define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group) | ||
309 | # define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) | 329 | # define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) |
310 | # define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) | 330 | # define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) |
311 | # define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) | 331 | # define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) |
@@ -358,8 +378,7 @@ struct flex_groups { | |||
358 | 378 | ||
359 | /* Flags that should be inherited by new inodes from their parent. */ | 379 | /* Flags that should be inherited by new inodes from their parent. */ |
360 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ | 380 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ |
361 | EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ | 381 | EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ |
362 | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ | ||
363 | EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ | 382 | EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ |
364 | EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) | 383 | EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) |
365 | 384 | ||
@@ -520,6 +539,8 @@ struct ext4_new_group_data { | |||
520 | #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 | 539 | #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 |
521 | /* Don't normalize allocation size (used for fallocate) */ | 540 | /* Don't normalize allocation size (used for fallocate) */ |
522 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 | 541 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 |
542 | /* Request will not result in inode size update (user for fallocate) */ | ||
543 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 | ||
523 | 544 | ||
524 | /* | 545 | /* |
525 | * Flags used by ext4_free_blocks | 546 | * Flags used by ext4_free_blocks |
@@ -528,6 +549,13 @@ struct ext4_new_group_data { | |||
528 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 | 549 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 |
529 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 | 550 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 |
530 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 | 551 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 |
552 | #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 | ||
553 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | ||
554 | |||
555 | /* | ||
556 | * Flags used by ext4_discard_partial_page_buffers | ||
557 | */ | ||
558 | #define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001 | ||
531 | 559 | ||
532 | /* | 560 | /* |
533 | * ioctl commands | 561 | * ioctl commands |
@@ -538,9 +566,6 @@ struct ext4_new_group_data { | |||
538 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) | 566 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) |
539 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION | 567 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION |
540 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION | 568 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION |
541 | #ifdef CONFIG_JBD2_DEBUG | ||
542 | #define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) | ||
543 | #endif | ||
544 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) | 569 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) |
545 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) | 570 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) |
546 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | 571 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) |
@@ -563,9 +588,6 @@ struct ext4_new_group_data { | |||
563 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) | 588 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) |
564 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) | 589 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) |
565 | #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) | 590 | #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) |
566 | #ifdef CONFIG_JBD2_DEBUG | ||
567 | #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) | ||
568 | #endif | ||
569 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION | 591 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION |
570 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION | 592 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION |
571 | #endif | 593 | #endif |
@@ -837,6 +859,7 @@ struct ext4_inode_info { | |||
837 | ext4_group_t i_last_alloc_group; | 859 | ext4_group_t i_last_alloc_group; |
838 | 860 | ||
839 | /* allocation reservation info for delalloc */ | 861 | /* allocation reservation info for delalloc */ |
862 | /* In case of bigalloc, these refer to clusters rather than blocks */ | ||
840 | unsigned int i_reserved_data_blocks; | 863 | unsigned int i_reserved_data_blocks; |
841 | unsigned int i_reserved_meta_blocks; | 864 | unsigned int i_reserved_meta_blocks; |
842 | unsigned int i_allocated_meta_blocks; | 865 | unsigned int i_allocated_meta_blocks; |
@@ -886,7 +909,6 @@ struct ext4_inode_info { | |||
886 | /* | 909 | /* |
887 | * Mount flags | 910 | * Mount flags |
888 | */ | 911 | */ |
889 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ | ||
890 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ | 912 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ |
891 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ | 913 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ |
892 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ | 914 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ |
@@ -918,6 +940,9 @@ struct ext4_inode_info { | |||
918 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | 940 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ |
919 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ | 941 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ |
920 | 942 | ||
943 | #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly | ||
944 | specified delalloc */ | ||
945 | |||
921 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ | 946 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ |
922 | ~EXT4_MOUNT_##opt | 947 | ~EXT4_MOUNT_##opt |
923 | #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ | 948 | #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ |
@@ -968,9 +993,9 @@ struct ext4_super_block { | |||
968 | /*10*/ __le32 s_free_inodes_count; /* Free inodes count */ | 993 | /*10*/ __le32 s_free_inodes_count; /* Free inodes count */ |
969 | __le32 s_first_data_block; /* First Data Block */ | 994 | __le32 s_first_data_block; /* First Data Block */ |
970 | __le32 s_log_block_size; /* Block size */ | 995 | __le32 s_log_block_size; /* Block size */ |
971 | __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ | 996 | __le32 s_log_cluster_size; /* Allocation cluster size */ |
972 | /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ | 997 | /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ |
973 | __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ | 998 | __le32 s_clusters_per_group; /* # Clusters per group */ |
974 | __le32 s_inodes_per_group; /* # Inodes per group */ | 999 | __le32 s_inodes_per_group; /* # Inodes per group */ |
975 | __le32 s_mtime; /* Mount time */ | 1000 | __le32 s_mtime; /* Mount time */ |
976 | /*30*/ __le32 s_wtime; /* Write time */ | 1001 | /*30*/ __le32 s_wtime; /* Write time */ |
@@ -1066,7 +1091,10 @@ struct ext4_super_block { | |||
1066 | __u8 s_last_error_func[32]; /* function where the error happened */ | 1091 | __u8 s_last_error_func[32]; /* function where the error happened */ |
1067 | #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) | 1092 | #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) |
1068 | __u8 s_mount_opts[64]; | 1093 | __u8 s_mount_opts[64]; |
1069 | __le32 s_reserved[112]; /* Padding to the end of the block */ | 1094 | __le32 s_usr_quota_inum; /* inode for tracking user quota */ |
1095 | __le32 s_grp_quota_inum; /* inode for tracking group quota */ | ||
1096 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ | ||
1097 | __le32 s_reserved[109]; /* Padding to the end of the block */ | ||
1070 | }; | 1098 | }; |
1071 | 1099 | ||
1072 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) | 1100 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) |
@@ -1086,6 +1114,7 @@ struct ext4_sb_info { | |||
1086 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | 1114 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ |
1087 | unsigned long s_inodes_per_block;/* Number of inodes per block */ | 1115 | unsigned long s_inodes_per_block;/* Number of inodes per block */ |
1088 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ | 1116 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ |
1117 | unsigned long s_clusters_per_group; /* Number of clusters in a group */ | ||
1089 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ | 1118 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ |
1090 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ | 1119 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ |
1091 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | 1120 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ |
@@ -1094,6 +1123,8 @@ struct ext4_sb_info { | |||
1094 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ | 1123 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ |
1095 | unsigned long s_overhead_last; /* Last calculated overhead */ | 1124 | unsigned long s_overhead_last; /* Last calculated overhead */ |
1096 | unsigned long s_blocks_last; /* Last seen block count */ | 1125 | unsigned long s_blocks_last; /* Last seen block count */ |
1126 | unsigned int s_cluster_ratio; /* Number of blocks per cluster */ | ||
1127 | unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */ | ||
1097 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 1128 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
1098 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 1129 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
1099 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | 1130 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
@@ -1117,10 +1148,10 @@ struct ext4_sb_info { | |||
1117 | u32 s_hash_seed[4]; | 1148 | u32 s_hash_seed[4]; |
1118 | int s_def_hash_version; | 1149 | int s_def_hash_version; |
1119 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ | 1150 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ |
1120 | struct percpu_counter s_freeblocks_counter; | 1151 | struct percpu_counter s_freeclusters_counter; |
1121 | struct percpu_counter s_freeinodes_counter; | 1152 | struct percpu_counter s_freeinodes_counter; |
1122 | struct percpu_counter s_dirs_counter; | 1153 | struct percpu_counter s_dirs_counter; |
1123 | struct percpu_counter s_dirtyblocks_counter; | 1154 | struct percpu_counter s_dirtyclusters_counter; |
1124 | struct blockgroup_lock *s_blockgroup_lock; | 1155 | struct blockgroup_lock *s_blockgroup_lock; |
1125 | struct proc_dir_entry *s_proc; | 1156 | struct proc_dir_entry *s_proc; |
1126 | struct kobject s_kobj; | 1157 | struct kobject s_kobj; |
@@ -1136,10 +1167,6 @@ struct ext4_sb_info { | |||
1136 | u32 s_max_batch_time; | 1167 | u32 s_max_batch_time; |
1137 | u32 s_min_batch_time; | 1168 | u32 s_min_batch_time; |
1138 | struct block_device *journal_bdev; | 1169 | struct block_device *journal_bdev; |
1139 | #ifdef CONFIG_JBD2_DEBUG | ||
1140 | struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ | ||
1141 | wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ | ||
1142 | #endif | ||
1143 | #ifdef CONFIG_QUOTA | 1170 | #ifdef CONFIG_QUOTA |
1144 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | 1171 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ |
1145 | int s_jquota_fmt; /* Format of quota to use */ | 1172 | int s_jquota_fmt; /* Format of quota to use */ |
@@ -1248,6 +1275,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
1248 | ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); | 1275 | ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); |
1249 | } | 1276 | } |
1250 | 1277 | ||
1278 | static inline void ext4_set_io_unwritten_flag(struct inode *inode, | ||
1279 | struct ext4_io_end *io_end) | ||
1280 | { | ||
1281 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
1282 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
1283 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
1284 | } | ||
1285 | } | ||
1286 | |||
1251 | /* | 1287 | /* |
1252 | * Inode dynamic state flags | 1288 | * Inode dynamic state flags |
1253 | */ | 1289 | */ |
@@ -1360,6 +1396,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1360 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 | 1396 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 |
1361 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 | 1397 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 |
1362 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 | 1398 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 |
1399 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 | ||
1363 | 1400 | ||
1364 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 | 1401 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
1365 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 | 1402 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 |
@@ -1402,7 +1439,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1402 | EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ | 1439 | EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ |
1403 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ | 1440 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ |
1404 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ | 1441 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ |
1405 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE) | 1442 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ |
1443 | EXT4_FEATURE_RO_COMPAT_BIGALLOC) | ||
1406 | 1444 | ||
1407 | /* | 1445 | /* |
1408 | * Default values for user and/or group using reserved blocks | 1446 | * Default values for user and/or group using reserved blocks |
@@ -1735,9 +1773,9 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
1735 | unsigned int flags, | 1773 | unsigned int flags, |
1736 | unsigned long *count, | 1774 | unsigned long *count, |
1737 | int *errp); | 1775 | int *errp); |
1738 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | 1776 | extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi, |
1739 | s64 nblocks, unsigned int flags); | 1777 | s64 nclusters, unsigned int flags); |
1740 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1778 | extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *); |
1741 | extern void ext4_check_blocks_bitmap(struct super_block *); | 1779 | extern void ext4_check_blocks_bitmap(struct super_block *); |
1742 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1780 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
1743 | ext4_group_t block_group, | 1781 | ext4_group_t block_group, |
@@ -1745,12 +1783,18 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
1745 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1783 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
1746 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | 1784 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, |
1747 | ext4_group_t block_group); | 1785 | ext4_group_t block_group); |
1748 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | 1786 | extern void ext4_init_block_bitmap(struct super_block *sb, |
1749 | struct buffer_head *bh, | 1787 | struct buffer_head *bh, |
1750 | ext4_group_t group, | 1788 | ext4_group_t group, |
1751 | struct ext4_group_desc *desc); | 1789 | struct ext4_group_desc *desc); |
1752 | #define ext4_free_blocks_after_init(sb, group, desc) \ | 1790 | extern unsigned ext4_free_clusters_after_init(struct super_block *sb, |
1753 | ext4_init_block_bitmap(sb, NULL, group, desc) | 1791 | ext4_group_t block_group, |
1792 | struct ext4_group_desc *gdp); | ||
1793 | extern unsigned ext4_num_base_meta_clusters(struct super_block *sb, | ||
1794 | ext4_group_t block_group); | ||
1795 | extern unsigned ext4_num_overhead_clusters(struct super_block *sb, | ||
1796 | ext4_group_t block_group, | ||
1797 | struct ext4_group_desc *gdp); | ||
1754 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); | 1798 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); |
1755 | 1799 | ||
1756 | /* dir.c */ | 1800 | /* dir.c */ |
@@ -1776,7 +1820,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct | |||
1776 | 1820 | ||
1777 | /* ialloc.c */ | 1821 | /* ialloc.c */ |
1778 | extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, | 1822 | extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, |
1779 | const struct qstr *qstr, __u32 goal); | 1823 | const struct qstr *qstr, __u32 goal, |
1824 | uid_t *owner); | ||
1780 | extern void ext4_free_inode(handle_t *, struct inode *); | 1825 | extern void ext4_free_inode(handle_t *, struct inode *); |
1781 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | 1826 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
1782 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1827 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
@@ -1839,6 +1884,12 @@ extern int ext4_block_truncate_page(handle_t *handle, | |||
1839 | struct address_space *mapping, loff_t from); | 1884 | struct address_space *mapping, loff_t from); |
1840 | extern int ext4_block_zero_page_range(handle_t *handle, | 1885 | extern int ext4_block_zero_page_range(handle_t *handle, |
1841 | struct address_space *mapping, loff_t from, loff_t length); | 1886 | struct address_space *mapping, loff_t from, loff_t length); |
1887 | extern int ext4_discard_partial_page_buffers(handle_t *handle, | ||
1888 | struct address_space *mapping, loff_t from, | ||
1889 | loff_t length, int flags); | ||
1890 | extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
1891 | struct inode *inode, struct page *page, loff_t from, | ||
1892 | loff_t length, int flags); | ||
1842 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1893 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1843 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1894 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1844 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1895 | extern void ext4_da_update_reserve_space(struct inode *inode, |
@@ -1927,8 +1978,8 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, | |||
1927 | struct ext4_group_desc *bg); | 1978 | struct ext4_group_desc *bg); |
1928 | extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, | 1979 | extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, |
1929 | struct ext4_group_desc *bg); | 1980 | struct ext4_group_desc *bg); |
1930 | extern __u32 ext4_free_blks_count(struct super_block *sb, | 1981 | extern __u32 ext4_free_group_clusters(struct super_block *sb, |
1931 | struct ext4_group_desc *bg); | 1982 | struct ext4_group_desc *bg); |
1932 | extern __u32 ext4_free_inodes_count(struct super_block *sb, | 1983 | extern __u32 ext4_free_inodes_count(struct super_block *sb, |
1933 | struct ext4_group_desc *bg); | 1984 | struct ext4_group_desc *bg); |
1934 | extern __u32 ext4_used_dirs_count(struct super_block *sb, | 1985 | extern __u32 ext4_used_dirs_count(struct super_block *sb, |
@@ -1941,8 +1992,9 @@ extern void ext4_inode_bitmap_set(struct super_block *sb, | |||
1941 | struct ext4_group_desc *bg, ext4_fsblk_t blk); | 1992 | struct ext4_group_desc *bg, ext4_fsblk_t blk); |
1942 | extern void ext4_inode_table_set(struct super_block *sb, | 1993 | extern void ext4_inode_table_set(struct super_block *sb, |
1943 | struct ext4_group_desc *bg, ext4_fsblk_t blk); | 1994 | struct ext4_group_desc *bg, ext4_fsblk_t blk); |
1944 | extern void ext4_free_blks_set(struct super_block *sb, | 1995 | extern void ext4_free_group_clusters_set(struct super_block *sb, |
1945 | struct ext4_group_desc *bg, __u32 count); | 1996 | struct ext4_group_desc *bg, |
1997 | __u32 count); | ||
1946 | extern void ext4_free_inodes_set(struct super_block *sb, | 1998 | extern void ext4_free_inodes_set(struct super_block *sb, |
1947 | struct ext4_group_desc *bg, __u32 count); | 1999 | struct ext4_group_desc *bg, __u32 count); |
1948 | extern void ext4_used_dirs_set(struct super_block *sb, | 2000 | extern void ext4_used_dirs_set(struct super_block *sb, |
@@ -2051,13 +2103,13 @@ do { \ | |||
2051 | } while (0) | 2103 | } while (0) |
2052 | 2104 | ||
2053 | #ifdef CONFIG_SMP | 2105 | #ifdef CONFIG_SMP |
2054 | /* Each CPU can accumulate percpu_counter_batch blocks in their local | 2106 | /* Each CPU can accumulate percpu_counter_batch clusters in their local |
2055 | * counters. So we need to make sure we have free blocks more | 2107 | * counters. So we need to make sure we have free clusters more |
2056 | * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. | 2108 | * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. |
2057 | */ | 2109 | */ |
2058 | #define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) | 2110 | #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) |
2059 | #else | 2111 | #else |
2060 | #define EXT4_FREEBLOCKS_WATERMARK 0 | 2112 | #define EXT4_FREECLUSTERS_WATERMARK 0 |
2061 | #endif | 2113 | #endif |
2062 | 2114 | ||
2063 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | 2115 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) |
@@ -2243,10 +2295,19 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | |||
2243 | enum ext4_state_bits { | 2295 | enum ext4_state_bits { |
2244 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | 2296 | BH_Uninit /* blocks are allocated but uninitialized on disk */ |
2245 | = BH_JBDPrivateStart, | 2297 | = BH_JBDPrivateStart, |
2298 | BH_AllocFromCluster, /* allocated blocks were part of already | ||
2299 | * allocated cluster. Note that this flag will | ||
2300 | * never, ever appear in a buffer_head's state | ||
2301 | * flag. See EXT4_MAP_FROM_CLUSTER to see where | ||
2302 | * this is used. */ | ||
2303 | BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This | ||
2304 | * flag is set when ext4_map_blocks is called on a | ||
2305 | * delayed allocated block to get its real mapping. */ | ||
2246 | }; | 2306 | }; |
2247 | 2307 | ||
2248 | BUFFER_FNS(Uninit, uninit) | 2308 | BUFFER_FNS(Uninit, uninit) |
2249 | TAS_BUFFER_FNS(Uninit, uninit) | 2309 | TAS_BUFFER_FNS(Uninit, uninit) |
2310 | BUFFER_FNS(Da_Mapped, da_mapped) | ||
2250 | 2311 | ||
2251 | /* | 2312 | /* |
2252 | * Add new method to test wether block and inode bitmaps are properly | 2313 | * Add new method to test wether block and inode bitmaps are properly |
@@ -2282,4 +2343,6 @@ extern void ext4_resize_end(struct super_block *sb); | |||
2282 | 2343 | ||
2283 | #endif /* __KERNEL__ */ | 2344 | #endif /* __KERNEL__ */ |
2284 | 2345 | ||
2346 | #include "ext4_extents.h" | ||
2347 | |||
2285 | #endif /* _EXT4_H */ | 2348 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 095c36f3b61..a52db3a69a3 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | |||
290 | struct ext4_ext_path *); | 290 | struct ext4_ext_path *); |
291 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 291 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
292 | extern int ext4_ext_check_inode(struct inode *inode); | 292 | extern int ext4_ext_check_inode(struct inode *inode); |
293 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, | ||
294 | int search_hint_reverse); | ||
293 | #endif /* _EXT4_EXTENTS */ | 295 | #endif /* _EXT4_EXTENTS */ |
294 | 296 | ||
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index f5240aa1560..aca17901758 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -109,9 +109,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
109 | 109 | ||
110 | if (ext4_handle_valid(handle)) { | 110 | if (ext4_handle_valid(handle)) { |
111 | err = jbd2_journal_dirty_metadata(handle, bh); | 111 | err = jbd2_journal_dirty_metadata(handle, bh); |
112 | if (err) | 112 | if (err) { |
113 | ext4_journal_abort_handle(where, line, __func__, | 113 | /* Errors can only happen if there is a bug */ |
114 | bh, handle, err); | 114 | handle->h_err = err; |
115 | __ext4_journal_stop(where, line, handle); | ||
116 | } | ||
115 | } else { | 117 | } else { |
116 | if (inode) | 118 | if (inode) |
117 | mark_buffer_dirty_inode(bh, inode); | 119 | mark_buffer_dirty_inode(bh, inode); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 57cf568a98a..61fa9e1614a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | #include <linux/fiemap.h> | 43 | #include <linux/fiemap.h> |
44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
45 | #include "ext4_extents.h" | ||
46 | 45 | ||
47 | #include <trace/events/ext4.h> | 46 | #include <trace/events/ext4.h> |
48 | 47 | ||
@@ -96,13 +95,17 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, | |||
96 | * - ENOMEM | 95 | * - ENOMEM |
97 | * - EIO | 96 | * - EIO |
98 | */ | 97 | */ |
99 | static int ext4_ext_dirty(handle_t *handle, struct inode *inode, | 98 | #define ext4_ext_dirty(handle, inode, path) \ |
100 | struct ext4_ext_path *path) | 99 | __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path)) |
100 | static int __ext4_ext_dirty(const char *where, unsigned int line, | ||
101 | handle_t *handle, struct inode *inode, | ||
102 | struct ext4_ext_path *path) | ||
101 | { | 103 | { |
102 | int err; | 104 | int err; |
103 | if (path->p_bh) { | 105 | if (path->p_bh) { |
104 | /* path points to block */ | 106 | /* path points to block */ |
105 | err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); | 107 | err = __ext4_handle_dirty_metadata(where, line, handle, |
108 | inode, path->p_bh); | ||
106 | } else { | 109 | } else { |
107 | /* path points to leaf/index in inode body */ | 110 | /* path points to leaf/index in inode body */ |
108 | err = ext4_mark_inode_dirty(handle, inode); | 111 | err = ext4_mark_inode_dirty(handle, inode); |
@@ -114,11 +117,9 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
114 | struct ext4_ext_path *path, | 117 | struct ext4_ext_path *path, |
115 | ext4_lblk_t block) | 118 | ext4_lblk_t block) |
116 | { | 119 | { |
117 | int depth; | ||
118 | |||
119 | if (path) { | 120 | if (path) { |
121 | int depth = path->p_depth; | ||
120 | struct ext4_extent *ex; | 122 | struct ext4_extent *ex; |
121 | depth = path->p_depth; | ||
122 | 123 | ||
123 | /* | 124 | /* |
124 | * Try to predict block placement assuming that we are | 125 | * Try to predict block placement assuming that we are |
@@ -180,12 +181,10 @@ static inline int ext4_ext_space_block(struct inode *inode, int check) | |||
180 | 181 | ||
181 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 182 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
182 | / sizeof(struct ext4_extent); | 183 | / sizeof(struct ext4_extent); |
183 | if (!check) { | ||
184 | #ifdef AGGRESSIVE_TEST | 184 | #ifdef AGGRESSIVE_TEST |
185 | if (size > 6) | 185 | if (!check && size > 6) |
186 | size = 6; | 186 | size = 6; |
187 | #endif | 187 | #endif |
188 | } | ||
189 | return size; | 188 | return size; |
190 | } | 189 | } |
191 | 190 | ||
@@ -195,12 +194,10 @@ static inline int ext4_ext_space_block_idx(struct inode *inode, int check) | |||
195 | 194 | ||
196 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 195 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
197 | / sizeof(struct ext4_extent_idx); | 196 | / sizeof(struct ext4_extent_idx); |
198 | if (!check) { | ||
199 | #ifdef AGGRESSIVE_TEST | 197 | #ifdef AGGRESSIVE_TEST |
200 | if (size > 5) | 198 | if (!check && size > 5) |
201 | size = 5; | 199 | size = 5; |
202 | #endif | 200 | #endif |
203 | } | ||
204 | return size; | 201 | return size; |
205 | } | 202 | } |
206 | 203 | ||
@@ -211,12 +208,10 @@ static inline int ext4_ext_space_root(struct inode *inode, int check) | |||
211 | size = sizeof(EXT4_I(inode)->i_data); | 208 | size = sizeof(EXT4_I(inode)->i_data); |
212 | size -= sizeof(struct ext4_extent_header); | 209 | size -= sizeof(struct ext4_extent_header); |
213 | size /= sizeof(struct ext4_extent); | 210 | size /= sizeof(struct ext4_extent); |
214 | if (!check) { | ||
215 | #ifdef AGGRESSIVE_TEST | 211 | #ifdef AGGRESSIVE_TEST |
216 | if (size > 3) | 212 | if (!check && size > 3) |
217 | size = 3; | 213 | size = 3; |
218 | #endif | 214 | #endif |
219 | } | ||
220 | return size; | 215 | return size; |
221 | } | 216 | } |
222 | 217 | ||
@@ -227,12 +222,10 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
227 | size = sizeof(EXT4_I(inode)->i_data); | 222 | size = sizeof(EXT4_I(inode)->i_data); |
228 | size -= sizeof(struct ext4_extent_header); | 223 | size -= sizeof(struct ext4_extent_header); |
229 | size /= sizeof(struct ext4_extent_idx); | 224 | size /= sizeof(struct ext4_extent_idx); |
230 | if (!check) { | ||
231 | #ifdef AGGRESSIVE_TEST | 225 | #ifdef AGGRESSIVE_TEST |
232 | if (size > 4) | 226 | if (!check && size > 4) |
233 | size = 4; | 227 | size = 4; |
234 | #endif | 228 | #endif |
235 | } | ||
236 | return size; | 229 | return size; |
237 | } | 230 | } |
238 | 231 | ||
@@ -244,7 +237,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
244 | int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) | 237 | int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) |
245 | { | 238 | { |
246 | struct ext4_inode_info *ei = EXT4_I(inode); | 239 | struct ext4_inode_info *ei = EXT4_I(inode); |
247 | int idxs, num = 0; | 240 | int idxs; |
248 | 241 | ||
249 | idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 242 | idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
250 | / sizeof(struct ext4_extent_idx)); | 243 | / sizeof(struct ext4_extent_idx)); |
@@ -259,6 +252,8 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) | |||
259 | */ | 252 | */ |
260 | if (ei->i_da_metadata_calc_len && | 253 | if (ei->i_da_metadata_calc_len && |
261 | ei->i_da_metadata_calc_last_lblock+1 == lblock) { | 254 | ei->i_da_metadata_calc_last_lblock+1 == lblock) { |
255 | int num = 0; | ||
256 | |||
262 | if ((ei->i_da_metadata_calc_len % idxs) == 0) | 257 | if ((ei->i_da_metadata_calc_len % idxs) == 0) |
263 | num++; | 258 | num++; |
264 | if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) | 259 | if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) |
@@ -321,8 +316,6 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
321 | struct ext4_extent_header *eh, | 316 | struct ext4_extent_header *eh, |
322 | int depth) | 317 | int depth) |
323 | { | 318 | { |
324 | struct ext4_extent *ext; | ||
325 | struct ext4_extent_idx *ext_idx; | ||
326 | unsigned short entries; | 319 | unsigned short entries; |
327 | if (eh->eh_entries == 0) | 320 | if (eh->eh_entries == 0) |
328 | return 1; | 321 | return 1; |
@@ -331,7 +324,7 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
331 | 324 | ||
332 | if (depth == 0) { | 325 | if (depth == 0) { |
333 | /* leaf entries */ | 326 | /* leaf entries */ |
334 | ext = EXT_FIRST_EXTENT(eh); | 327 | struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); |
335 | while (entries) { | 328 | while (entries) { |
336 | if (!ext4_valid_extent(inode, ext)) | 329 | if (!ext4_valid_extent(inode, ext)) |
337 | return 0; | 330 | return 0; |
@@ -339,7 +332,7 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
339 | entries--; | 332 | entries--; |
340 | } | 333 | } |
341 | } else { | 334 | } else { |
342 | ext_idx = EXT_FIRST_INDEX(eh); | 335 | struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); |
343 | while (entries) { | 336 | while (entries) { |
344 | if (!ext4_valid_extent_idx(inode, ext_idx)) | 337 | if (!ext4_valid_extent_idx(inode, ext_idx)) |
345 | return 0; | 338 | return 0; |
@@ -751,31 +744,30 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | |||
751 | return -EIO; | 744 | return -EIO; |
752 | } | 745 | } |
753 | 746 | ||
754 | len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; | ||
755 | if (logical > le32_to_cpu(curp->p_idx->ei_block)) { | 747 | if (logical > le32_to_cpu(curp->p_idx->ei_block)) { |
756 | /* insert after */ | 748 | /* insert after */ |
757 | if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { | 749 | ext_debug("insert new index %d after: %llu\n", logical, ptr); |
758 | len = (len - 1) * sizeof(struct ext4_extent_idx); | ||
759 | len = len < 0 ? 0 : len; | ||
760 | ext_debug("insert new index %d after: %llu. " | ||
761 | "move %d from 0x%p to 0x%p\n", | ||
762 | logical, ptr, len, | ||
763 | (curp->p_idx + 1), (curp->p_idx + 2)); | ||
764 | memmove(curp->p_idx + 2, curp->p_idx + 1, len); | ||
765 | } | ||
766 | ix = curp->p_idx + 1; | 750 | ix = curp->p_idx + 1; |
767 | } else { | 751 | } else { |
768 | /* insert before */ | 752 | /* insert before */ |
769 | len = len * sizeof(struct ext4_extent_idx); | 753 | ext_debug("insert new index %d before: %llu\n", logical, ptr); |
770 | len = len < 0 ? 0 : len; | ||
771 | ext_debug("insert new index %d before: %llu. " | ||
772 | "move %d from 0x%p to 0x%p\n", | ||
773 | logical, ptr, len, | ||
774 | curp->p_idx, (curp->p_idx + 1)); | ||
775 | memmove(curp->p_idx + 1, curp->p_idx, len); | ||
776 | ix = curp->p_idx; | 754 | ix = curp->p_idx; |
777 | } | 755 | } |
778 | 756 | ||
757 | len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; | ||
758 | BUG_ON(len < 0); | ||
759 | if (len > 0) { | ||
760 | ext_debug("insert new index %d: " | ||
761 | "move %d indices from 0x%p to 0x%p\n", | ||
762 | logical, len, ix, ix + 1); | ||
763 | memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); | ||
764 | } | ||
765 | |||
766 | if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { | ||
767 | EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); | ||
768 | return -EIO; | ||
769 | } | ||
770 | |||
779 | ix->ei_block = cpu_to_le32(logical); | 771 | ix->ei_block = cpu_to_le32(logical); |
780 | ext4_idx_store_pblock(ix, ptr); | 772 | ext4_idx_store_pblock(ix, ptr); |
781 | le16_add_cpu(&curp->p_hdr->eh_entries, 1); | 773 | le16_add_cpu(&curp->p_hdr->eh_entries, 1); |
@@ -1042,16 +1034,14 @@ cleanup: | |||
1042 | */ | 1034 | */ |
1043 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | 1035 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, |
1044 | unsigned int flags, | 1036 | unsigned int flags, |
1045 | struct ext4_ext_path *path, | ||
1046 | struct ext4_extent *newext) | 1037 | struct ext4_extent *newext) |
1047 | { | 1038 | { |
1048 | struct ext4_ext_path *curp = path; | ||
1049 | struct ext4_extent_header *neh; | 1039 | struct ext4_extent_header *neh; |
1050 | struct buffer_head *bh; | 1040 | struct buffer_head *bh; |
1051 | ext4_fsblk_t newblock; | 1041 | ext4_fsblk_t newblock; |
1052 | int err = 0; | 1042 | int err = 0; |
1053 | 1043 | ||
1054 | newblock = ext4_ext_new_meta_block(handle, inode, path, | 1044 | newblock = ext4_ext_new_meta_block(handle, inode, NULL, |
1055 | newext, &err, flags); | 1045 | newext, &err, flags); |
1056 | if (newblock == 0) | 1046 | if (newblock == 0) |
1057 | return err; | 1047 | return err; |
@@ -1071,7 +1061,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1071 | } | 1061 | } |
1072 | 1062 | ||
1073 | /* move top-level index/leaf into new block */ | 1063 | /* move top-level index/leaf into new block */ |
1074 | memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); | 1064 | memmove(bh->b_data, EXT4_I(inode)->i_data, |
1065 | sizeof(EXT4_I(inode)->i_data)); | ||
1075 | 1066 | ||
1076 | /* set size of new block */ | 1067 | /* set size of new block */ |
1077 | neh = ext_block_hdr(bh); | 1068 | neh = ext_block_hdr(bh); |
@@ -1089,32 +1080,23 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1089 | if (err) | 1080 | if (err) |
1090 | goto out; | 1081 | goto out; |
1091 | 1082 | ||
1092 | /* create index in new top-level index: num,max,pointer */ | 1083 | /* Update top-level index: num,max,pointer */ |
1093 | err = ext4_ext_get_access(handle, inode, curp); | ||
1094 | if (err) | ||
1095 | goto out; | ||
1096 | |||
1097 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; | ||
1098 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); | ||
1099 | curp->p_hdr->eh_entries = cpu_to_le16(1); | ||
1100 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); | ||
1101 | |||
1102 | if (path[0].p_hdr->eh_depth) | ||
1103 | curp->p_idx->ei_block = | ||
1104 | EXT_FIRST_INDEX(path[0].p_hdr)->ei_block; | ||
1105 | else | ||
1106 | curp->p_idx->ei_block = | ||
1107 | EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; | ||
1108 | ext4_idx_store_pblock(curp->p_idx, newblock); | ||
1109 | |||
1110 | neh = ext_inode_hdr(inode); | 1084 | neh = ext_inode_hdr(inode); |
1085 | neh->eh_entries = cpu_to_le16(1); | ||
1086 | ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock); | ||
1087 | if (neh->eh_depth == 0) { | ||
1088 | /* Root extent block becomes index block */ | ||
1089 | neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); | ||
1090 | EXT_FIRST_INDEX(neh)->ei_block = | ||
1091 | EXT_FIRST_EXTENT(neh)->ee_block; | ||
1092 | } | ||
1111 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", | 1093 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", |
1112 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), | 1094 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), |
1113 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), | 1095 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1114 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); | 1096 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); |
1115 | 1097 | ||
1116 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); | 1098 | neh->eh_depth = cpu_to_le16(neh->eh_depth + 1); |
1117 | err = ext4_ext_dirty(handle, inode, curp); | 1099 | ext4_mark_inode_dirty(handle, inode); |
1118 | out: | 1100 | out: |
1119 | brelse(bh); | 1101 | brelse(bh); |
1120 | 1102 | ||
@@ -1162,8 +1144,7 @@ repeat: | |||
1162 | err = PTR_ERR(path); | 1144 | err = PTR_ERR(path); |
1163 | } else { | 1145 | } else { |
1164 | /* tree is full, time to grow in depth */ | 1146 | /* tree is full, time to grow in depth */ |
1165 | err = ext4_ext_grow_indepth(handle, inode, flags, | 1147 | err = ext4_ext_grow_indepth(handle, inode, flags, newext); |
1166 | path, newext); | ||
1167 | if (err) | 1148 | if (err) |
1168 | goto out; | 1149 | goto out; |
1169 | 1150 | ||
@@ -1235,9 +1216,9 @@ static int ext4_ext_search_left(struct inode *inode, | |||
1235 | if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { | 1216 | if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { |
1236 | EXT4_ERROR_INODE(inode, | 1217 | EXT4_ERROR_INODE(inode, |
1237 | "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", | 1218 | "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", |
1238 | ix != NULL ? ix->ei_block : 0, | 1219 | ix != NULL ? le32_to_cpu(ix->ei_block) : 0, |
1239 | EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? | 1220 | EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? |
1240 | EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, | 1221 | le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, |
1241 | depth); | 1222 | depth); |
1242 | return -EIO; | 1223 | return -EIO; |
1243 | } | 1224 | } |
@@ -1260,13 +1241,14 @@ static int ext4_ext_search_left(struct inode *inode, | |||
1260 | /* | 1241 | /* |
1261 | * search the closest allocated block to the right for *logical | 1242 | * search the closest allocated block to the right for *logical |
1262 | * and returns it at @logical + it's physical address at @phys | 1243 | * and returns it at @logical + it's physical address at @phys |
1263 | * if *logical is the smallest allocated block, the function | 1244 | * if *logical is the largest allocated block, the function |
1264 | * returns 0 at @phys | 1245 | * returns 0 at @phys |
1265 | * return value contains 0 (success) or error code | 1246 | * return value contains 0 (success) or error code |
1266 | */ | 1247 | */ |
1267 | static int ext4_ext_search_right(struct inode *inode, | 1248 | static int ext4_ext_search_right(struct inode *inode, |
1268 | struct ext4_ext_path *path, | 1249 | struct ext4_ext_path *path, |
1269 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1250 | ext4_lblk_t *logical, ext4_fsblk_t *phys, |
1251 | struct ext4_extent **ret_ex) | ||
1270 | { | 1252 | { |
1271 | struct buffer_head *bh = NULL; | 1253 | struct buffer_head *bh = NULL; |
1272 | struct ext4_extent_header *eh; | 1254 | struct ext4_extent_header *eh; |
@@ -1308,9 +1290,7 @@ static int ext4_ext_search_right(struct inode *inode, | |||
1308 | return -EIO; | 1290 | return -EIO; |
1309 | } | 1291 | } |
1310 | } | 1292 | } |
1311 | *logical = le32_to_cpu(ex->ee_block); | 1293 | goto found_extent; |
1312 | *phys = ext4_ext_pblock(ex); | ||
1313 | return 0; | ||
1314 | } | 1294 | } |
1315 | 1295 | ||
1316 | if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { | 1296 | if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { |
@@ -1323,9 +1303,7 @@ static int ext4_ext_search_right(struct inode *inode, | |||
1323 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { | 1303 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { |
1324 | /* next allocated block in this leaf */ | 1304 | /* next allocated block in this leaf */ |
1325 | ex++; | 1305 | ex++; |
1326 | *logical = le32_to_cpu(ex->ee_block); | 1306 | goto found_extent; |
1327 | *phys = ext4_ext_pblock(ex); | ||
1328 | return 0; | ||
1329 | } | 1307 | } |
1330 | 1308 | ||
1331 | /* go up and search for index to the right */ | 1309 | /* go up and search for index to the right */ |
@@ -1368,9 +1346,12 @@ got_index: | |||
1368 | return -EIO; | 1346 | return -EIO; |
1369 | } | 1347 | } |
1370 | ex = EXT_FIRST_EXTENT(eh); | 1348 | ex = EXT_FIRST_EXTENT(eh); |
1349 | found_extent: | ||
1371 | *logical = le32_to_cpu(ex->ee_block); | 1350 | *logical = le32_to_cpu(ex->ee_block); |
1372 | *phys = ext4_ext_pblock(ex); | 1351 | *phys = ext4_ext_pblock(ex); |
1373 | put_bh(bh); | 1352 | *ret_ex = ex; |
1353 | if (bh) | ||
1354 | put_bh(bh); | ||
1374 | return 0; | 1355 | return 0; |
1375 | } | 1356 | } |
1376 | 1357 | ||
@@ -1395,7 +1376,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1395 | while (depth >= 0) { | 1376 | while (depth >= 0) { |
1396 | if (depth == path->p_depth) { | 1377 | if (depth == path->p_depth) { |
1397 | /* leaf */ | 1378 | /* leaf */ |
1398 | if (path[depth].p_ext != | 1379 | if (path[depth].p_ext && |
1380 | path[depth].p_ext != | ||
1399 | EXT_LAST_EXTENT(path[depth].p_hdr)) | 1381 | EXT_LAST_EXTENT(path[depth].p_hdr)) |
1400 | return le32_to_cpu(path[depth].p_ext[1].ee_block); | 1382 | return le32_to_cpu(path[depth].p_ext[1].ee_block); |
1401 | } else { | 1383 | } else { |
@@ -1623,7 +1605,8 @@ static int ext4_ext_try_to_merge(struct inode *inode, | |||
1623 | * such that there will be no overlap, and then returns 1. | 1605 | * such that there will be no overlap, and then returns 1. |
1624 | * If there is no overlap found, it returns 0. | 1606 | * If there is no overlap found, it returns 0. |
1625 | */ | 1607 | */ |
1626 | static unsigned int ext4_ext_check_overlap(struct inode *inode, | 1608 | static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, |
1609 | struct inode *inode, | ||
1627 | struct ext4_extent *newext, | 1610 | struct ext4_extent *newext, |
1628 | struct ext4_ext_path *path) | 1611 | struct ext4_ext_path *path) |
1629 | { | 1612 | { |
@@ -1637,6 +1620,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1637 | if (!path[depth].p_ext) | 1620 | if (!path[depth].p_ext) |
1638 | goto out; | 1621 | goto out; |
1639 | b2 = le32_to_cpu(path[depth].p_ext->ee_block); | 1622 | b2 = le32_to_cpu(path[depth].p_ext->ee_block); |
1623 | b2 &= ~(sbi->s_cluster_ratio - 1); | ||
1640 | 1624 | ||
1641 | /* | 1625 | /* |
1642 | * get the next allocated block if the extent in the path | 1626 | * get the next allocated block if the extent in the path |
@@ -1646,6 +1630,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1646 | b2 = ext4_ext_next_allocated_block(path); | 1630 | b2 = ext4_ext_next_allocated_block(path); |
1647 | if (b2 == EXT_MAX_BLOCKS) | 1631 | if (b2 == EXT_MAX_BLOCKS) |
1648 | goto out; | 1632 | goto out; |
1633 | b2 &= ~(sbi->s_cluster_ratio - 1); | ||
1649 | } | 1634 | } |
1650 | 1635 | ||
1651 | /* check for wrap through zero on extent logical start block*/ | 1636 | /* check for wrap through zero on extent logical start block*/ |
@@ -1697,7 +1682,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1697 | /* try to insert block into found extent and return */ | 1682 | /* try to insert block into found extent and return */ |
1698 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) | 1683 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) |
1699 | && ext4_can_extents_be_merged(inode, ex, newext)) { | 1684 | && ext4_can_extents_be_merged(inode, ex, newext)) { |
1700 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", | 1685 | ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n", |
1701 | ext4_ext_is_uninitialized(newext), | 1686 | ext4_ext_is_uninitialized(newext), |
1702 | ext4_ext_get_actual_len(newext), | 1687 | ext4_ext_get_actual_len(newext), |
1703 | le32_to_cpu(ex->ee_block), | 1688 | le32_to_cpu(ex->ee_block), |
@@ -1735,7 +1720,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1735 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) | 1720 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) |
1736 | next = ext4_ext_next_leaf_block(path); | 1721 | next = ext4_ext_next_leaf_block(path); |
1737 | if (next != EXT_MAX_BLOCKS) { | 1722 | if (next != EXT_MAX_BLOCKS) { |
1738 | ext_debug("next leaf block - %d\n", next); | 1723 | ext_debug("next leaf block - %u\n", next); |
1739 | BUG_ON(npath != NULL); | 1724 | BUG_ON(npath != NULL); |
1740 | npath = ext4_ext_find_extent(inode, next, NULL); | 1725 | npath = ext4_ext_find_extent(inode, next, NULL); |
1741 | if (IS_ERR(npath)) | 1726 | if (IS_ERR(npath)) |
@@ -1773,46 +1758,51 @@ has_space: | |||
1773 | 1758 | ||
1774 | if (!nearex) { | 1759 | if (!nearex) { |
1775 | /* there is no extent in this leaf, create first one */ | 1760 | /* there is no extent in this leaf, create first one */ |
1776 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", | 1761 | ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", |
1777 | le32_to_cpu(newext->ee_block), | 1762 | le32_to_cpu(newext->ee_block), |
1778 | ext4_ext_pblock(newext), | 1763 | ext4_ext_pblock(newext), |
1779 | ext4_ext_is_uninitialized(newext), | 1764 | ext4_ext_is_uninitialized(newext), |
1780 | ext4_ext_get_actual_len(newext)); | 1765 | ext4_ext_get_actual_len(newext)); |
1781 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1766 | nearex = EXT_FIRST_EXTENT(eh); |
1782 | } else if (le32_to_cpu(newext->ee_block) | 1767 | } else { |
1768 | if (le32_to_cpu(newext->ee_block) | ||
1783 | > le32_to_cpu(nearex->ee_block)) { | 1769 | > le32_to_cpu(nearex->ee_block)) { |
1784 | /* BUG_ON(newext->ee_block == nearex->ee_block); */ | 1770 | /* Insert after */ |
1785 | if (nearex != EXT_LAST_EXTENT(eh)) { | 1771 | ext_debug("insert %u:%llu:[%d]%d before: " |
1786 | len = EXT_MAX_EXTENT(eh) - nearex; | 1772 | "nearest %p\n", |
1787 | len = (len - 1) * sizeof(struct ext4_extent); | ||
1788 | len = len < 0 ? 0 : len; | ||
1789 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " | ||
1790 | "move %d from 0x%p to 0x%p\n", | ||
1791 | le32_to_cpu(newext->ee_block), | 1773 | le32_to_cpu(newext->ee_block), |
1792 | ext4_ext_pblock(newext), | 1774 | ext4_ext_pblock(newext), |
1793 | ext4_ext_is_uninitialized(newext), | 1775 | ext4_ext_is_uninitialized(newext), |
1794 | ext4_ext_get_actual_len(newext), | 1776 | ext4_ext_get_actual_len(newext), |
1795 | nearex, len, nearex + 1, nearex + 2); | 1777 | nearex); |
1796 | memmove(nearex + 2, nearex + 1, len); | 1778 | nearex++; |
1779 | } else { | ||
1780 | /* Insert before */ | ||
1781 | BUG_ON(newext->ee_block == nearex->ee_block); | ||
1782 | ext_debug("insert %u:%llu:[%d]%d after: " | ||
1783 | "nearest %p\n", | ||
1784 | le32_to_cpu(newext->ee_block), | ||
1785 | ext4_ext_pblock(newext), | ||
1786 | ext4_ext_is_uninitialized(newext), | ||
1787 | ext4_ext_get_actual_len(newext), | ||
1788 | nearex); | ||
1789 | } | ||
1790 | len = EXT_LAST_EXTENT(eh) - nearex + 1; | ||
1791 | if (len > 0) { | ||
1792 | ext_debug("insert %u:%llu:[%d]%d: " | ||
1793 | "move %d extents from 0x%p to 0x%p\n", | ||
1794 | le32_to_cpu(newext->ee_block), | ||
1795 | ext4_ext_pblock(newext), | ||
1796 | ext4_ext_is_uninitialized(newext), | ||
1797 | ext4_ext_get_actual_len(newext), | ||
1798 | len, nearex, nearex + 1); | ||
1799 | memmove(nearex + 1, nearex, | ||
1800 | len * sizeof(struct ext4_extent)); | ||
1797 | } | 1801 | } |
1798 | path[depth].p_ext = nearex + 1; | ||
1799 | } else { | ||
1800 | BUG_ON(newext->ee_block == nearex->ee_block); | ||
1801 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); | ||
1802 | len = len < 0 ? 0 : len; | ||
1803 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " | ||
1804 | "move %d from 0x%p to 0x%p\n", | ||
1805 | le32_to_cpu(newext->ee_block), | ||
1806 | ext4_ext_pblock(newext), | ||
1807 | ext4_ext_is_uninitialized(newext), | ||
1808 | ext4_ext_get_actual_len(newext), | ||
1809 | nearex, len, nearex, nearex + 1); | ||
1810 | memmove(nearex + 1, nearex, len); | ||
1811 | path[depth].p_ext = nearex; | ||
1812 | } | 1802 | } |
1813 | 1803 | ||
1814 | le16_add_cpu(&eh->eh_entries, 1); | 1804 | le16_add_cpu(&eh->eh_entries, 1); |
1815 | nearex = path[depth].p_ext; | 1805 | path[depth].p_ext = nearex; |
1816 | nearex->ee_block = newext->ee_block; | 1806 | nearex->ee_block = newext->ee_block; |
1817 | ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); | 1807 | ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); |
1818 | nearex->ee_len = newext->ee_len; | 1808 | nearex->ee_len = newext->ee_len; |
@@ -1962,6 +1952,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, | |||
1962 | struct ext4_ext_cache *cex; | 1952 | struct ext4_ext_cache *cex; |
1963 | BUG_ON(len == 0); | 1953 | BUG_ON(len == 0); |
1964 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1954 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1955 | trace_ext4_ext_put_in_cache(inode, block, len, start); | ||
1965 | cex = &EXT4_I(inode)->i_cached_extent; | 1956 | cex = &EXT4_I(inode)->i_cached_extent; |
1966 | cex->ec_block = block; | 1957 | cex->ec_block = block; |
1967 | cex->ec_len = len; | 1958 | cex->ec_len = len; |
@@ -2063,6 +2054,7 @@ errout: | |||
2063 | sbi->extent_cache_misses++; | 2054 | sbi->extent_cache_misses++; |
2064 | else | 2055 | else |
2065 | sbi->extent_cache_hits++; | 2056 | sbi->extent_cache_hits++; |
2057 | trace_ext4_ext_in_cache(inode, block, ret); | ||
2066 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 2058 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
2067 | return ret; | 2059 | return ret; |
2068 | } | 2060 | } |
@@ -2130,6 +2122,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2130 | if (err) | 2122 | if (err) |
2131 | return err; | 2123 | return err; |
2132 | ext_debug("index is empty, remove it, free block %llu\n", leaf); | 2124 | ext_debug("index is empty, remove it, free block %llu\n", leaf); |
2125 | trace_ext4_ext_rm_idx(inode, leaf); | ||
2126 | |||
2133 | ext4_free_blocks(handle, inode, NULL, leaf, 1, | 2127 | ext4_free_blocks(handle, inode, NULL, leaf, 1, |
2134 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 2128 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
2135 | return err; | 2129 | return err; |
@@ -2158,7 +2152,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, | |||
2158 | * need to account for leaf block credit | 2152 | * need to account for leaf block credit |
2159 | * | 2153 | * |
2160 | * bitmaps and block group descriptor blocks | 2154 | * bitmaps and block group descriptor blocks |
2161 | * and other metadat blocks still need to be | 2155 | * and other metadata blocks still need to be |
2162 | * accounted. | 2156 | * accounted. |
2163 | */ | 2157 | */ |
2164 | /* 1 bitmap, 1 block group descriptor */ | 2158 | /* 1 bitmap, 1 block group descriptor */ |
@@ -2195,14 +2189,40 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
2195 | } | 2189 | } |
2196 | 2190 | ||
2197 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 2191 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
2198 | struct ext4_extent *ex, | 2192 | struct ext4_extent *ex, |
2199 | ext4_lblk_t from, ext4_lblk_t to) | 2193 | ext4_fsblk_t *partial_cluster, |
2194 | ext4_lblk_t from, ext4_lblk_t to) | ||
2200 | { | 2195 | { |
2196 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
2201 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2197 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2198 | ext4_fsblk_t pblk; | ||
2202 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2199 | int flags = EXT4_FREE_BLOCKS_FORGET; |
2203 | 2200 | ||
2204 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 2201 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
2205 | flags |= EXT4_FREE_BLOCKS_METADATA; | 2202 | flags |= EXT4_FREE_BLOCKS_METADATA; |
2203 | /* | ||
2204 | * For bigalloc file systems, we never free a partial cluster | ||
2205 | * at the beginning of the extent. Instead, we make a note | ||
2206 | * that we tried freeing the cluster, and check to see if we | ||
2207 | * need to free it on a subsequent call to ext4_remove_blocks, | ||
2208 | * or at the end of the ext4_truncate() operation. | ||
2209 | */ | ||
2210 | flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; | ||
2211 | |||
2212 | trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster); | ||
2213 | /* | ||
2214 | * If we have a partial cluster, and it's different from the | ||
2215 | * cluster of the last block, we need to explicitly free the | ||
2216 | * partial cluster here. | ||
2217 | */ | ||
2218 | pblk = ext4_ext_pblock(ex) + ee_len - 1; | ||
2219 | if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | ||
2220 | ext4_free_blocks(handle, inode, NULL, | ||
2221 | EXT4_C2B(sbi, *partial_cluster), | ||
2222 | sbi->s_cluster_ratio, flags); | ||
2223 | *partial_cluster = 0; | ||
2224 | } | ||
2225 | |||
2206 | #ifdef EXTENTS_STATS | 2226 | #ifdef EXTENTS_STATS |
2207 | { | 2227 | { |
2208 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2228 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -2222,12 +2242,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2222 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2242 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2223 | /* tail removal */ | 2243 | /* tail removal */ |
2224 | ext4_lblk_t num; | 2244 | ext4_lblk_t num; |
2225 | ext4_fsblk_t start; | ||
2226 | 2245 | ||
2227 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2246 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2228 | start = ext4_ext_pblock(ex) + ee_len - num; | 2247 | pblk = ext4_ext_pblock(ex) + ee_len - num; |
2229 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2248 | ext_debug("free last %u blocks starting %llu\n", num, pblk); |
2230 | ext4_free_blocks(handle, inode, NULL, start, num, flags); | 2249 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); |
2250 | /* | ||
2251 | * If the block range to be freed didn't start at the | ||
2252 | * beginning of a cluster, and we removed the entire | ||
2253 | * extent, save the partial cluster here, since we | ||
2254 | * might need to delete if we determine that the | ||
2255 | * truncate operation has removed all of the blocks in | ||
2256 | * the cluster. | ||
2257 | */ | ||
2258 | if (pblk & (sbi->s_cluster_ratio - 1) && | ||
2259 | (ee_len == num)) | ||
2260 | *partial_cluster = EXT4_B2C(sbi, pblk); | ||
2261 | else | ||
2262 | *partial_cluster = 0; | ||
2231 | } else if (from == le32_to_cpu(ex->ee_block) | 2263 | } else if (from == le32_to_cpu(ex->ee_block) |
2232 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2264 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2233 | /* head removal */ | 2265 | /* head removal */ |
@@ -2238,7 +2270,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2238 | start = ext4_ext_pblock(ex); | 2270 | start = ext4_ext_pblock(ex); |
2239 | 2271 | ||
2240 | ext_debug("free first %u blocks starting %llu\n", num, start); | 2272 | ext_debug("free first %u blocks starting %llu\n", num, start); |
2241 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2273 | ext4_free_blocks(handle, inode, NULL, start, num, flags); |
2242 | 2274 | ||
2243 | } else { | 2275 | } else { |
2244 | printk(KERN_INFO "strange request: removal(2) " | 2276 | printk(KERN_INFO "strange request: removal(2) " |
@@ -2262,19 +2294,19 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2262 | */ | 2294 | */ |
2263 | static int | 2295 | static int |
2264 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 2296 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
2265 | struct ext4_ext_path *path, ext4_lblk_t start, | 2297 | struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, |
2266 | ext4_lblk_t end) | 2298 | ext4_lblk_t start, ext4_lblk_t end) |
2267 | { | 2299 | { |
2300 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
2268 | int err = 0, correct_index = 0; | 2301 | int err = 0, correct_index = 0; |
2269 | int depth = ext_depth(inode), credits; | 2302 | int depth = ext_depth(inode), credits; |
2270 | struct ext4_extent_header *eh; | 2303 | struct ext4_extent_header *eh; |
2271 | ext4_lblk_t a, b, block; | 2304 | ext4_lblk_t a, b; |
2272 | unsigned num; | 2305 | unsigned num; |
2273 | ext4_lblk_t ex_ee_block; | 2306 | ext4_lblk_t ex_ee_block; |
2274 | unsigned short ex_ee_len; | 2307 | unsigned short ex_ee_len; |
2275 | unsigned uninitialized = 0; | 2308 | unsigned uninitialized = 0; |
2276 | struct ext4_extent *ex; | 2309 | struct ext4_extent *ex; |
2277 | struct ext4_map_blocks map; | ||
2278 | 2310 | ||
2279 | /* the header must be checked already in ext4_ext_remove_space() */ | 2311 | /* the header must be checked already in ext4_ext_remove_space() */ |
2280 | ext_debug("truncate since %u in leaf\n", start); | 2312 | ext_debug("truncate since %u in leaf\n", start); |
@@ -2291,6 +2323,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2291 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2323 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2292 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2324 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2293 | 2325 | ||
2326 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); | ||
2327 | |||
2294 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2328 | while (ex >= EXT_FIRST_EXTENT(eh) && |
2295 | ex_ee_block + ex_ee_len > start) { | 2329 | ex_ee_block + ex_ee_len > start) { |
2296 | 2330 | ||
@@ -2315,86 +2349,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2315 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2349 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2316 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2350 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2317 | continue; | 2351 | continue; |
2318 | } else if (a != ex_ee_block && | 2352 | } else if (b != ex_ee_block + ex_ee_len - 1) { |
2319 | b != ex_ee_block + ex_ee_len - 1) { | 2353 | EXT4_ERROR_INODE(inode," bad truncate %u:%u\n", |
2320 | /* | 2354 | start, end); |
2321 | * If this is a truncate, then this condition should | 2355 | err = -EIO; |
2322 | * never happen because at least one of the end points | 2356 | goto out; |
2323 | * needs to be on the edge of the extent. | ||
2324 | */ | ||
2325 | if (end == EXT_MAX_BLOCKS - 1) { | ||
2326 | ext_debug(" bad truncate %u:%u\n", | ||
2327 | start, end); | ||
2328 | block = 0; | ||
2329 | num = 0; | ||
2330 | err = -EIO; | ||
2331 | goto out; | ||
2332 | } | ||
2333 | /* | ||
2334 | * else this is a hole punch, so the extent needs to | ||
2335 | * be split since neither edge of the hole is on the | ||
2336 | * extent edge | ||
2337 | */ | ||
2338 | else{ | ||
2339 | map.m_pblk = ext4_ext_pblock(ex); | ||
2340 | map.m_lblk = ex_ee_block; | ||
2341 | map.m_len = b - ex_ee_block; | ||
2342 | |||
2343 | err = ext4_split_extent(handle, | ||
2344 | inode, path, &map, 0, | ||
2345 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT | | ||
2346 | EXT4_GET_BLOCKS_PRE_IO); | ||
2347 | |||
2348 | if (err < 0) | ||
2349 | goto out; | ||
2350 | |||
2351 | ex_ee_len = ext4_ext_get_actual_len(ex); | ||
2352 | |||
2353 | b = ex_ee_block+ex_ee_len - 1 < end ? | ||
2354 | ex_ee_block+ex_ee_len - 1 : end; | ||
2355 | |||
2356 | /* Then remove tail of this extent */ | ||
2357 | block = ex_ee_block; | ||
2358 | num = a - block; | ||
2359 | } | ||
2360 | } else if (a != ex_ee_block) { | 2357 | } else if (a != ex_ee_block) { |
2361 | /* remove tail of the extent */ | 2358 | /* remove tail of the extent */ |
2362 | block = ex_ee_block; | 2359 | num = a - ex_ee_block; |
2363 | num = a - block; | ||
2364 | } else if (b != ex_ee_block + ex_ee_len - 1) { | ||
2365 | /* remove head of the extent */ | ||
2366 | block = b; | ||
2367 | num = ex_ee_block + ex_ee_len - b; | ||
2368 | |||
2369 | /* | ||
2370 | * If this is a truncate, this condition | ||
2371 | * should never happen | ||
2372 | */ | ||
2373 | if (end == EXT_MAX_BLOCKS - 1) { | ||
2374 | ext_debug(" bad truncate %u:%u\n", | ||
2375 | start, end); | ||
2376 | err = -EIO; | ||
2377 | goto out; | ||
2378 | } | ||
2379 | } else { | 2360 | } else { |
2380 | /* remove whole extent: excellent! */ | 2361 | /* remove whole extent: excellent! */ |
2381 | block = ex_ee_block; | ||
2382 | num = 0; | 2362 | num = 0; |
2383 | if (a != ex_ee_block) { | ||
2384 | ext_debug(" bad truncate %u:%u\n", | ||
2385 | start, end); | ||
2386 | err = -EIO; | ||
2387 | goto out; | ||
2388 | } | ||
2389 | |||
2390 | if (b != ex_ee_block + ex_ee_len - 1) { | ||
2391 | ext_debug(" bad truncate %u:%u\n", | ||
2392 | start, end); | ||
2393 | err = -EIO; | ||
2394 | goto out; | ||
2395 | } | ||
2396 | } | 2363 | } |
2397 | |||
2398 | /* | 2364 | /* |
2399 | * 3 for leaf, sb, and inode plus 2 (bmap and group | 2365 | * 3 for leaf, sb, and inode plus 2 (bmap and group |
2400 | * descriptor) for each block group; assume two block | 2366 | * descriptor) for each block group; assume two block |
@@ -2416,23 +2382,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2416 | if (err) | 2382 | if (err) |
2417 | goto out; | 2383 | goto out; |
2418 | 2384 | ||
2419 | err = ext4_remove_blocks(handle, inode, ex, a, b); | 2385 | err = ext4_remove_blocks(handle, inode, ex, partial_cluster, |
2386 | a, b); | ||
2420 | if (err) | 2387 | if (err) |
2421 | goto out; | 2388 | goto out; |
2422 | 2389 | ||
2423 | if (num == 0) { | 2390 | if (num == 0) |
2424 | /* this extent is removed; mark slot entirely unused */ | 2391 | /* this extent is removed; mark slot entirely unused */ |
2425 | ext4_ext_store_pblock(ex, 0); | 2392 | ext4_ext_store_pblock(ex, 0); |
2426 | } else if (block != ex_ee_block) { | ||
2427 | /* | ||
2428 | * If this was a head removal, then we need to update | ||
2429 | * the physical block since it is now at a different | ||
2430 | * location | ||
2431 | */ | ||
2432 | ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a)); | ||
2433 | } | ||
2434 | 2393 | ||
2435 | ex->ee_block = cpu_to_le32(block); | ||
2436 | ex->ee_len = cpu_to_le16(num); | 2394 | ex->ee_len = cpu_to_le16(num); |
2437 | /* | 2395 | /* |
2438 | * Do not mark uninitialized if all the blocks in the | 2396 | * Do not mark uninitialized if all the blocks in the |
@@ -2440,11 +2398,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2440 | */ | 2398 | */ |
2441 | if (uninitialized && num) | 2399 | if (uninitialized && num) |
2442 | ext4_ext_mark_uninitialized(ex); | 2400 | ext4_ext_mark_uninitialized(ex); |
2443 | |||
2444 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
2445 | if (err) | ||
2446 | goto out; | ||
2447 | |||
2448 | /* | 2401 | /* |
2449 | * If the extent was completely released, | 2402 | * If the extent was completely released, |
2450 | * we need to remove it from the leaf | 2403 | * we need to remove it from the leaf |
@@ -2464,9 +2417,14 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2464 | sizeof(struct ext4_extent)); | 2417 | sizeof(struct ext4_extent)); |
2465 | } | 2418 | } |
2466 | le16_add_cpu(&eh->eh_entries, -1); | 2419 | le16_add_cpu(&eh->eh_entries, -1); |
2467 | } | 2420 | } else |
2421 | *partial_cluster = 0; | ||
2468 | 2422 | ||
2469 | ext_debug("new extent: %u:%u:%llu\n", block, num, | 2423 | err = ext4_ext_dirty(handle, inode, path + depth); |
2424 | if (err) | ||
2425 | goto out; | ||
2426 | |||
2427 | ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num, | ||
2470 | ext4_ext_pblock(ex)); | 2428 | ext4_ext_pblock(ex)); |
2471 | ex--; | 2429 | ex--; |
2472 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2430 | ex_ee_block = le32_to_cpu(ex->ee_block); |
@@ -2476,6 +2434,25 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2476 | if (correct_index && eh->eh_entries) | 2434 | if (correct_index && eh->eh_entries) |
2477 | err = ext4_ext_correct_indexes(handle, inode, path); | 2435 | err = ext4_ext_correct_indexes(handle, inode, path); |
2478 | 2436 | ||
2437 | /* | ||
2438 | * If there is still a entry in the leaf node, check to see if | ||
2439 | * it references the partial cluster. This is the only place | ||
2440 | * where it could; if it doesn't, we can free the cluster. | ||
2441 | */ | ||
2442 | if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && | ||
2443 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | ||
2444 | *partial_cluster)) { | ||
2445 | int flags = EXT4_FREE_BLOCKS_FORGET; | ||
2446 | |||
2447 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2448 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2449 | |||
2450 | ext4_free_blocks(handle, inode, NULL, | ||
2451 | EXT4_C2B(sbi, *partial_cluster), | ||
2452 | sbi->s_cluster_ratio, flags); | ||
2453 | *partial_cluster = 0; | ||
2454 | } | ||
2455 | |||
2479 | /* if this leaf is free, then we should | 2456 | /* if this leaf is free, then we should |
2480 | * remove it from index block above */ | 2457 | * remove it from index block above */ |
2481 | if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) | 2458 | if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) |
@@ -2511,6 +2488,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2511 | struct super_block *sb = inode->i_sb; | 2488 | struct super_block *sb = inode->i_sb; |
2512 | int depth = ext_depth(inode); | 2489 | int depth = ext_depth(inode); |
2513 | struct ext4_ext_path *path; | 2490 | struct ext4_ext_path *path; |
2491 | ext4_fsblk_t partial_cluster = 0; | ||
2514 | handle_t *handle; | 2492 | handle_t *handle; |
2515 | int i, err; | 2493 | int i, err; |
2516 | 2494 | ||
@@ -2524,6 +2502,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2524 | again: | 2502 | again: |
2525 | ext4_ext_invalidate_cache(inode); | 2503 | ext4_ext_invalidate_cache(inode); |
2526 | 2504 | ||
2505 | trace_ext4_ext_remove_space(inode, start, depth); | ||
2506 | |||
2527 | /* | 2507 | /* |
2528 | * We start scanning from right side, freeing all the blocks | 2508 | * We start scanning from right side, freeing all the blocks |
2529 | * after i_size and walking into the tree depth-wise. | 2509 | * after i_size and walking into the tree depth-wise. |
@@ -2546,7 +2526,8 @@ again: | |||
2546 | if (i == depth) { | 2526 | if (i == depth) { |
2547 | /* this is leaf block */ | 2527 | /* this is leaf block */ |
2548 | err = ext4_ext_rm_leaf(handle, inode, path, | 2528 | err = ext4_ext_rm_leaf(handle, inode, path, |
2549 | start, EXT_MAX_BLOCKS - 1); | 2529 | &partial_cluster, start, |
2530 | EXT_MAX_BLOCKS - 1); | ||
2550 | /* root level has p_bh == NULL, brelse() eats this */ | 2531 | /* root level has p_bh == NULL, brelse() eats this */ |
2551 | brelse(path[i].p_bh); | 2532 | brelse(path[i].p_bh); |
2552 | path[i].p_bh = NULL; | 2533 | path[i].p_bh = NULL; |
@@ -2618,6 +2599,24 @@ again: | |||
2618 | } | 2599 | } |
2619 | } | 2600 | } |
2620 | 2601 | ||
2602 | trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster, | ||
2603 | path->p_hdr->eh_entries); | ||
2604 | |||
2605 | /* If we still have something in the partial cluster and we have removed | ||
2606 | * even the first extent, then we should free the blocks in the partial | ||
2607 | * cluster as well. */ | ||
2608 | if (partial_cluster && path->p_hdr->eh_entries == 0) { | ||
2609 | int flags = EXT4_FREE_BLOCKS_FORGET; | ||
2610 | |||
2611 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2612 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2613 | |||
2614 | ext4_free_blocks(handle, inode, NULL, | ||
2615 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | ||
2616 | EXT4_SB(sb)->s_cluster_ratio, flags); | ||
2617 | partial_cluster = 0; | ||
2618 | } | ||
2619 | |||
2621 | /* TODO: flexible tree reduction should be here */ | 2620 | /* TODO: flexible tree reduction should be here */ |
2622 | if (path->p_hdr->eh_entries == 0) { | 2621 | if (path->p_hdr->eh_entries == 0) { |
2623 | /* | 2622 | /* |
@@ -2909,17 +2908,29 @@ out: | |||
2909 | * a> There is no split required: Entire extent should be initialized | 2908 | * a> There is no split required: Entire extent should be initialized |
2910 | * b> Splits in two extents: Write is happening at either end of the extent | 2909 | * b> Splits in two extents: Write is happening at either end of the extent |
2911 | * c> Splits in three extents: Somone is writing in middle of the extent | 2910 | * c> Splits in three extents: Somone is writing in middle of the extent |
2911 | * | ||
2912 | * Pre-conditions: | ||
2913 | * - The extent pointed to by 'path' is uninitialized. | ||
2914 | * - The extent pointed to by 'path' contains a superset | ||
2915 | * of the logical span [map->m_lblk, map->m_lblk + map->m_len). | ||
2916 | * | ||
2917 | * Post-conditions on success: | ||
2918 | * - the returned value is the number of blocks beyond map->l_lblk | ||
2919 | * that are allocated and initialized. | ||
2920 | * It is guaranteed to be >= map->m_len. | ||
2912 | */ | 2921 | */ |
2913 | static int ext4_ext_convert_to_initialized(handle_t *handle, | 2922 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
2914 | struct inode *inode, | 2923 | struct inode *inode, |
2915 | struct ext4_map_blocks *map, | 2924 | struct ext4_map_blocks *map, |
2916 | struct ext4_ext_path *path) | 2925 | struct ext4_ext_path *path) |
2917 | { | 2926 | { |
2927 | struct ext4_extent_header *eh; | ||
2918 | struct ext4_map_blocks split_map; | 2928 | struct ext4_map_blocks split_map; |
2919 | struct ext4_extent zero_ex; | 2929 | struct ext4_extent zero_ex; |
2920 | struct ext4_extent *ex; | 2930 | struct ext4_extent *ex; |
2921 | ext4_lblk_t ee_block, eof_block; | 2931 | ext4_lblk_t ee_block, eof_block; |
2922 | unsigned int allocated, ee_len, depth; | 2932 | unsigned int ee_len, depth; |
2933 | int allocated; | ||
2923 | int err = 0; | 2934 | int err = 0; |
2924 | int split_flag = 0; | 2935 | int split_flag = 0; |
2925 | 2936 | ||
@@ -2933,11 +2944,93 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2933 | eof_block = map->m_lblk + map->m_len; | 2944 | eof_block = map->m_lblk + map->m_len; |
2934 | 2945 | ||
2935 | depth = ext_depth(inode); | 2946 | depth = ext_depth(inode); |
2947 | eh = path[depth].p_hdr; | ||
2936 | ex = path[depth].p_ext; | 2948 | ex = path[depth].p_ext; |
2937 | ee_block = le32_to_cpu(ex->ee_block); | 2949 | ee_block = le32_to_cpu(ex->ee_block); |
2938 | ee_len = ext4_ext_get_actual_len(ex); | 2950 | ee_len = ext4_ext_get_actual_len(ex); |
2939 | allocated = ee_len - (map->m_lblk - ee_block); | 2951 | allocated = ee_len - (map->m_lblk - ee_block); |
2940 | 2952 | ||
2953 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); | ||
2954 | |||
2955 | /* Pre-conditions */ | ||
2956 | BUG_ON(!ext4_ext_is_uninitialized(ex)); | ||
2957 | BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); | ||
2958 | BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len); | ||
2959 | |||
2960 | /* | ||
2961 | * Attempt to transfer newly initialized blocks from the currently | ||
2962 | * uninitialized extent to its left neighbor. This is much cheaper | ||
2963 | * than an insertion followed by a merge as those involve costly | ||
2964 | * memmove() calls. This is the common case in steady state for | ||
2965 | * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append | ||
2966 | * writes. | ||
2967 | * | ||
2968 | * Limitations of the current logic: | ||
2969 | * - L1: we only deal with writes at the start of the extent. | ||
2970 | * The approach could be extended to writes at the end | ||
2971 | * of the extent but this scenario was deemed less common. | ||
2972 | * - L2: we do not deal with writes covering the whole extent. | ||
2973 | * This would require removing the extent if the transfer | ||
2974 | * is possible. | ||
2975 | * - L3: we only attempt to merge with an extent stored in the | ||
2976 | * same extent tree node. | ||
2977 | */ | ||
2978 | if ((map->m_lblk == ee_block) && /*L1*/ | ||
2979 | (map->m_len < ee_len) && /*L2*/ | ||
2980 | (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/ | ||
2981 | struct ext4_extent *prev_ex; | ||
2982 | ext4_lblk_t prev_lblk; | ||
2983 | ext4_fsblk_t prev_pblk, ee_pblk; | ||
2984 | unsigned int prev_len, write_len; | ||
2985 | |||
2986 | prev_ex = ex - 1; | ||
2987 | prev_lblk = le32_to_cpu(prev_ex->ee_block); | ||
2988 | prev_len = ext4_ext_get_actual_len(prev_ex); | ||
2989 | prev_pblk = ext4_ext_pblock(prev_ex); | ||
2990 | ee_pblk = ext4_ext_pblock(ex); | ||
2991 | write_len = map->m_len; | ||
2992 | |||
2993 | /* | ||
2994 | * A transfer of blocks from 'ex' to 'prev_ex' is allowed | ||
2995 | * upon those conditions: | ||
2996 | * - C1: prev_ex is initialized, | ||
2997 | * - C2: prev_ex is logically abutting ex, | ||
2998 | * - C3: prev_ex is physically abutting ex, | ||
2999 | * - C4: prev_ex can receive the additional blocks without | ||
3000 | * overflowing the (initialized) length limit. | ||
3001 | */ | ||
3002 | if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/ | ||
3003 | ((prev_lblk + prev_len) == ee_block) && /*C2*/ | ||
3004 | ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ | ||
3005 | (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/ | ||
3006 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3007 | if (err) | ||
3008 | goto out; | ||
3009 | |||
3010 | trace_ext4_ext_convert_to_initialized_fastpath(inode, | ||
3011 | map, ex, prev_ex); | ||
3012 | |||
3013 | /* Shift the start of ex by 'write_len' blocks */ | ||
3014 | ex->ee_block = cpu_to_le32(ee_block + write_len); | ||
3015 | ext4_ext_store_pblock(ex, ee_pblk + write_len); | ||
3016 | ex->ee_len = cpu_to_le16(ee_len - write_len); | ||
3017 | ext4_ext_mark_uninitialized(ex); /* Restore the flag */ | ||
3018 | |||
3019 | /* Extend prev_ex by 'write_len' blocks */ | ||
3020 | prev_ex->ee_len = cpu_to_le16(prev_len + write_len); | ||
3021 | |||
3022 | /* Mark the block containing both extents as dirty */ | ||
3023 | ext4_ext_dirty(handle, inode, path + depth); | ||
3024 | |||
3025 | /* Update path to point to the right extent */ | ||
3026 | path[depth].p_ext = prev_ex; | ||
3027 | |||
3028 | /* Result: number of initialized blocks past m_lblk */ | ||
3029 | allocated = write_len; | ||
3030 | goto out; | ||
3031 | } | ||
3032 | } | ||
3033 | |||
2941 | WARN_ON(map->m_lblk < ee_block); | 3034 | WARN_ON(map->m_lblk < ee_block); |
2942 | /* | 3035 | /* |
2943 | * It is safe to convert extent to initialized via explicit | 3036 | * It is safe to convert extent to initialized via explicit |
@@ -3165,6 +3258,192 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3165 | return ext4_mark_inode_dirty(handle, inode); | 3258 | return ext4_mark_inode_dirty(handle, inode); |
3166 | } | 3259 | } |
3167 | 3260 | ||
3261 | /** | ||
3262 | * ext4_find_delalloc_range: find delayed allocated block in the given range. | ||
3263 | * | ||
3264 | * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns | ||
3265 | * whether there are any buffers marked for delayed allocation. It returns '1' | ||
3266 | * on the first delalloc'ed buffer head found. If no buffer head in the given | ||
3267 | * range is marked for delalloc, it returns 0. | ||
3268 | * lblk_start should always be <= lblk_end. | ||
3269 | * search_hint_reverse is to indicate that searching in reverse from lblk_end to | ||
3270 | * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed | ||
3271 | * block sooner). This is useful when blocks are truncated sequentially from | ||
3272 | * lblk_start towards lblk_end. | ||
3273 | */ | ||
3274 | static int ext4_find_delalloc_range(struct inode *inode, | ||
3275 | ext4_lblk_t lblk_start, | ||
3276 | ext4_lblk_t lblk_end, | ||
3277 | int search_hint_reverse) | ||
3278 | { | ||
3279 | struct address_space *mapping = inode->i_mapping; | ||
3280 | struct buffer_head *head, *bh = NULL; | ||
3281 | struct page *page; | ||
3282 | ext4_lblk_t i, pg_lblk; | ||
3283 | pgoff_t index; | ||
3284 | |||
3285 | /* reverse search wont work if fs block size is less than page size */ | ||
3286 | if (inode->i_blkbits < PAGE_CACHE_SHIFT) | ||
3287 | search_hint_reverse = 0; | ||
3288 | |||
3289 | if (search_hint_reverse) | ||
3290 | i = lblk_end; | ||
3291 | else | ||
3292 | i = lblk_start; | ||
3293 | |||
3294 | index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
3295 | |||
3296 | while ((i >= lblk_start) && (i <= lblk_end)) { | ||
3297 | page = find_get_page(mapping, index); | ||
3298 | if (!page) | ||
3299 | goto nextpage; | ||
3300 | |||
3301 | if (!page_has_buffers(page)) | ||
3302 | goto nextpage; | ||
3303 | |||
3304 | head = page_buffers(page); | ||
3305 | if (!head) | ||
3306 | goto nextpage; | ||
3307 | |||
3308 | bh = head; | ||
3309 | pg_lblk = index << (PAGE_CACHE_SHIFT - | ||
3310 | inode->i_blkbits); | ||
3311 | do { | ||
3312 | if (unlikely(pg_lblk < lblk_start)) { | ||
3313 | /* | ||
3314 | * This is possible when fs block size is less | ||
3315 | * than page size and our cluster starts/ends in | ||
3316 | * middle of the page. So we need to skip the | ||
3317 | * initial few blocks till we reach the 'lblk' | ||
3318 | */ | ||
3319 | pg_lblk++; | ||
3320 | continue; | ||
3321 | } | ||
3322 | |||
3323 | /* Check if the buffer is delayed allocated and that it | ||
3324 | * is not yet mapped. (when da-buffers are mapped during | ||
3325 | * their writeout, their da_mapped bit is set.) | ||
3326 | */ | ||
3327 | if (buffer_delay(bh) && !buffer_da_mapped(bh)) { | ||
3328 | page_cache_release(page); | ||
3329 | trace_ext4_find_delalloc_range(inode, | ||
3330 | lblk_start, lblk_end, | ||
3331 | search_hint_reverse, | ||
3332 | 1, i); | ||
3333 | return 1; | ||
3334 | } | ||
3335 | if (search_hint_reverse) | ||
3336 | i--; | ||
3337 | else | ||
3338 | i++; | ||
3339 | } while ((i >= lblk_start) && (i <= lblk_end) && | ||
3340 | ((bh = bh->b_this_page) != head)); | ||
3341 | nextpage: | ||
3342 | if (page) | ||
3343 | page_cache_release(page); | ||
3344 | /* | ||
3345 | * Move to next page. 'i' will be the first lblk in the next | ||
3346 | * page. | ||
3347 | */ | ||
3348 | if (search_hint_reverse) | ||
3349 | index--; | ||
3350 | else | ||
3351 | index++; | ||
3352 | i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
3353 | } | ||
3354 | |||
3355 | trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end, | ||
3356 | search_hint_reverse, 0, 0); | ||
3357 | return 0; | ||
3358 | } | ||
3359 | |||
3360 | int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, | ||
3361 | int search_hint_reverse) | ||
3362 | { | ||
3363 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
3364 | ext4_lblk_t lblk_start, lblk_end; | ||
3365 | lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); | ||
3366 | lblk_end = lblk_start + sbi->s_cluster_ratio - 1; | ||
3367 | |||
3368 | return ext4_find_delalloc_range(inode, lblk_start, lblk_end, | ||
3369 | search_hint_reverse); | ||
3370 | } | ||
3371 | |||
3372 | /** | ||
3373 | * Determines how many complete clusters (out of those specified by the 'map') | ||
3374 | * are under delalloc and were reserved quota for. | ||
3375 | * This function is called when we are writing out the blocks that were | ||
3376 | * originally written with their allocation delayed, but then the space was | ||
3377 | * allocated using fallocate() before the delayed allocation could be resolved. | ||
3378 | * The cases to look for are: | ||
3379 | * ('=' indicated delayed allocated blocks | ||
3380 | * '-' indicates non-delayed allocated blocks) | ||
3381 | * (a) partial clusters towards beginning and/or end outside of allocated range | ||
3382 | * are not delalloc'ed. | ||
3383 | * Ex: | ||
3384 | * |----c---=|====c====|====c====|===-c----| | ||
3385 | * |++++++ allocated ++++++| | ||
3386 | * ==> 4 complete clusters in above example | ||
3387 | * | ||
3388 | * (b) partial cluster (outside of allocated range) towards either end is | ||
3389 | * marked for delayed allocation. In this case, we will exclude that | ||
3390 | * cluster. | ||
3391 | * Ex: | ||
3392 | * |----====c========|========c========| | ||
3393 | * |++++++ allocated ++++++| | ||
3394 | * ==> 1 complete clusters in above example | ||
3395 | * | ||
3396 | * Ex: | ||
3397 | * |================c================| | ||
3398 | * |++++++ allocated ++++++| | ||
3399 | * ==> 0 complete clusters in above example | ||
3400 | * | ||
3401 | * The ext4_da_update_reserve_space will be called only if we | ||
3402 | * determine here that there were some "entire" clusters that span | ||
3403 | * this 'allocated' range. | ||
3404 | * In the non-bigalloc case, this function will just end up returning num_blks | ||
3405 | * without ever calling ext4_find_delalloc_range. | ||
3406 | */ | ||
3407 | static unsigned int | ||
3408 | get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | ||
3409 | unsigned int num_blks) | ||
3410 | { | ||
3411 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
3412 | ext4_lblk_t alloc_cluster_start, alloc_cluster_end; | ||
3413 | ext4_lblk_t lblk_from, lblk_to, c_offset; | ||
3414 | unsigned int allocated_clusters = 0; | ||
3415 | |||
3416 | alloc_cluster_start = EXT4_B2C(sbi, lblk_start); | ||
3417 | alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1); | ||
3418 | |||
3419 | /* max possible clusters for this allocation */ | ||
3420 | allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1; | ||
3421 | |||
3422 | trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); | ||
3423 | |||
3424 | /* Check towards left side */ | ||
3425 | c_offset = lblk_start & (sbi->s_cluster_ratio - 1); | ||
3426 | if (c_offset) { | ||
3427 | lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); | ||
3428 | lblk_to = lblk_from + c_offset - 1; | ||
3429 | |||
3430 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) | ||
3431 | allocated_clusters--; | ||
3432 | } | ||
3433 | |||
3434 | /* Now check towards right. */ | ||
3435 | c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); | ||
3436 | if (allocated_clusters && c_offset) { | ||
3437 | lblk_from = lblk_start + num_blks; | ||
3438 | lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; | ||
3439 | |||
3440 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) | ||
3441 | allocated_clusters--; | ||
3442 | } | ||
3443 | |||
3444 | return allocated_clusters; | ||
3445 | } | ||
3446 | |||
3168 | static int | 3447 | static int |
3169 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3448 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3170 | struct ext4_map_blocks *map, | 3449 | struct ext4_map_blocks *map, |
@@ -3181,6 +3460,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3181 | flags, allocated); | 3460 | flags, allocated); |
3182 | ext4_ext_show_leaf(inode, path); | 3461 | ext4_ext_show_leaf(inode, path); |
3183 | 3462 | ||
3463 | trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated, | ||
3464 | newblock); | ||
3465 | |||
3184 | /* get_block() before submit the IO, split the extent */ | 3466 | /* get_block() before submit the IO, split the extent */ |
3185 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3467 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3186 | ret = ext4_split_unwritten_extents(handle, inode, map, | 3468 | ret = ext4_split_unwritten_extents(handle, inode, map, |
@@ -3190,10 +3472,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3190 | * that this IO needs to conversion to written when IO is | 3472 | * that this IO needs to conversion to written when IO is |
3191 | * completed | 3473 | * completed |
3192 | */ | 3474 | */ |
3193 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { | 3475 | if (io) |
3194 | io->flag = EXT4_IO_END_UNWRITTEN; | 3476 | ext4_set_io_unwritten_flag(inode, io); |
3195 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | 3477 | else |
3196 | } else | ||
3197 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3478 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3198 | if (ext4_should_dioread_nolock(inode)) | 3479 | if (ext4_should_dioread_nolock(inode)) |
3199 | map->m_flags |= EXT4_MAP_UNINIT; | 3480 | map->m_flags |= EXT4_MAP_UNINIT; |
@@ -3234,14 +3515,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3234 | 3515 | ||
3235 | /* buffered write, writepage time, convert*/ | 3516 | /* buffered write, writepage time, convert*/ |
3236 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); | 3517 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); |
3237 | if (ret >= 0) { | 3518 | if (ret >= 0) |
3238 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3519 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3239 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, | ||
3240 | map->m_len); | ||
3241 | if (err < 0) | ||
3242 | goto out2; | ||
3243 | } | ||
3244 | |||
3245 | out: | 3520 | out: |
3246 | if (ret <= 0) { | 3521 | if (ret <= 0) { |
3247 | err = ret; | 3522 | err = ret; |
@@ -3270,11 +3545,24 @@ out: | |||
3270 | * But fallocate would have already updated quota and block | 3545 | * But fallocate would have already updated quota and block |
3271 | * count for this offset. So cancel these reservation | 3546 | * count for this offset. So cancel these reservation |
3272 | */ | 3547 | */ |
3273 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 3548 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
3274 | ext4_da_update_reserve_space(inode, allocated, 0); | 3549 | unsigned int reserved_clusters; |
3550 | reserved_clusters = get_reserved_cluster_alloc(inode, | ||
3551 | map->m_lblk, map->m_len); | ||
3552 | if (reserved_clusters) | ||
3553 | ext4_da_update_reserve_space(inode, | ||
3554 | reserved_clusters, | ||
3555 | 0); | ||
3556 | } | ||
3275 | 3557 | ||
3276 | map_out: | 3558 | map_out: |
3277 | map->m_flags |= EXT4_MAP_MAPPED; | 3559 | map->m_flags |= EXT4_MAP_MAPPED; |
3560 | if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) { | ||
3561 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, | ||
3562 | map->m_len); | ||
3563 | if (err < 0) | ||
3564 | goto out2; | ||
3565 | } | ||
3278 | out1: | 3566 | out1: |
3279 | if (allocated > map->m_len) | 3567 | if (allocated > map->m_len) |
3280 | allocated = map->m_len; | 3568 | allocated = map->m_len; |
@@ -3290,6 +3578,111 @@ out2: | |||
3290 | } | 3578 | } |
3291 | 3579 | ||
3292 | /* | 3580 | /* |
3581 | * get_implied_cluster_alloc - check to see if the requested | ||
3582 | * allocation (in the map structure) overlaps with a cluster already | ||
3583 | * allocated in an extent. | ||
3584 | * @sb The filesystem superblock structure | ||
3585 | * @map The requested lblk->pblk mapping | ||
3586 | * @ex The extent structure which might contain an implied | ||
3587 | * cluster allocation | ||
3588 | * | ||
3589 | * This function is called by ext4_ext_map_blocks() after we failed to | ||
3590 | * find blocks that were already in the inode's extent tree. Hence, | ||
3591 | * we know that the beginning of the requested region cannot overlap | ||
3592 | * the extent from the inode's extent tree. There are three cases we | ||
3593 | * want to catch. The first is this case: | ||
3594 | * | ||
3595 | * |--- cluster # N--| | ||
3596 | * |--- extent ---| |---- requested region ---| | ||
3597 | * |==========| | ||
3598 | * | ||
3599 | * The second case that we need to test for is this one: | ||
3600 | * | ||
3601 | * |--------- cluster # N ----------------| | ||
3602 | * |--- requested region --| |------- extent ----| | ||
3603 | * |=======================| | ||
3604 | * | ||
3605 | * The third case is when the requested region lies between two extents | ||
3606 | * within the same cluster: | ||
3607 | * |------------- cluster # N-------------| | ||
3608 | * |----- ex -----| |---- ex_right ----| | ||
3609 | * |------ requested region ------| | ||
3610 | * |================| | ||
3611 | * | ||
3612 | * In each of the above cases, we need to set the map->m_pblk and | ||
3613 | * map->m_len so it corresponds to the return the extent labelled as | ||
3614 | * "|====|" from cluster #N, since it is already in use for data in | ||
3615 | * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to | ||
3616 | * signal to ext4_ext_map_blocks() that map->m_pblk should be treated | ||
3617 | * as a new "allocated" block region. Otherwise, we will return 0 and | ||
3618 | * ext4_ext_map_blocks() will then allocate one or more new clusters | ||
3619 | * by calling ext4_mb_new_blocks(). | ||
3620 | */ | ||
3621 | static int get_implied_cluster_alloc(struct super_block *sb, | ||
3622 | struct ext4_map_blocks *map, | ||
3623 | struct ext4_extent *ex, | ||
3624 | struct ext4_ext_path *path) | ||
3625 | { | ||
3626 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
3627 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | ||
3628 | ext4_lblk_t ex_cluster_start, ex_cluster_end; | ||
3629 | ext4_lblk_t rr_cluster_start, rr_cluster_end; | ||
3630 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | ||
3631 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); | ||
3632 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | ||
3633 | |||
3634 | /* The extent passed in that we are trying to match */ | ||
3635 | ex_cluster_start = EXT4_B2C(sbi, ee_block); | ||
3636 | ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1); | ||
3637 | |||
3638 | /* The requested region passed into ext4_map_blocks() */ | ||
3639 | rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); | ||
3640 | rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1); | ||
3641 | |||
3642 | if ((rr_cluster_start == ex_cluster_end) || | ||
3643 | (rr_cluster_start == ex_cluster_start)) { | ||
3644 | if (rr_cluster_start == ex_cluster_end) | ||
3645 | ee_start += ee_len - 1; | ||
3646 | map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + | ||
3647 | c_offset; | ||
3648 | map->m_len = min(map->m_len, | ||
3649 | (unsigned) sbi->s_cluster_ratio - c_offset); | ||
3650 | /* | ||
3651 | * Check for and handle this case: | ||
3652 | * | ||
3653 | * |--------- cluster # N-------------| | ||
3654 | * |------- extent ----| | ||
3655 | * |--- requested region ---| | ||
3656 | * |===========| | ||
3657 | */ | ||
3658 | |||
3659 | if (map->m_lblk < ee_block) | ||
3660 | map->m_len = min(map->m_len, ee_block - map->m_lblk); | ||
3661 | |||
3662 | /* | ||
3663 | * Check for the case where there is already another allocated | ||
3664 | * block to the right of 'ex' but before the end of the cluster. | ||
3665 | * | ||
3666 | * |------------- cluster # N-------------| | ||
3667 | * |----- ex -----| |---- ex_right ----| | ||
3668 | * |------ requested region ------| | ||
3669 | * |================| | ||
3670 | */ | ||
3671 | if (map->m_lblk > ee_block) { | ||
3672 | ext4_lblk_t next = ext4_ext_next_allocated_block(path); | ||
3673 | map->m_len = min(map->m_len, next - map->m_lblk); | ||
3674 | } | ||
3675 | |||
3676 | trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1); | ||
3677 | return 1; | ||
3678 | } | ||
3679 | |||
3680 | trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0); | ||
3681 | return 0; | ||
3682 | } | ||
3683 | |||
3684 | |||
3685 | /* | ||
3293 | * Block allocation/map/preallocation routine for extents based files | 3686 | * Block allocation/map/preallocation routine for extents based files |
3294 | * | 3687 | * |
3295 | * | 3688 | * |
@@ -3311,15 +3704,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3311 | struct ext4_map_blocks *map, int flags) | 3704 | struct ext4_map_blocks *map, int flags) |
3312 | { | 3705 | { |
3313 | struct ext4_ext_path *path = NULL; | 3706 | struct ext4_ext_path *path = NULL; |
3314 | struct ext4_extent newex, *ex; | 3707 | struct ext4_extent newex, *ex, *ex2; |
3708 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
3315 | ext4_fsblk_t newblock = 0; | 3709 | ext4_fsblk_t newblock = 0; |
3316 | int err = 0, depth, ret; | 3710 | int free_on_err = 0, err = 0, depth, ret; |
3317 | unsigned int allocated = 0; | 3711 | unsigned int allocated = 0, offset = 0; |
3712 | unsigned int allocated_clusters = 0; | ||
3318 | unsigned int punched_out = 0; | 3713 | unsigned int punched_out = 0; |
3319 | unsigned int result = 0; | 3714 | unsigned int result = 0; |
3320 | struct ext4_allocation_request ar; | 3715 | struct ext4_allocation_request ar; |
3321 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3716 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3322 | struct ext4_map_blocks punch_map; | 3717 | ext4_lblk_t cluster_offset; |
3323 | 3718 | ||
3324 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3719 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3325 | map->m_lblk, map->m_len, inode->i_ino); | 3720 | map->m_lblk, map->m_len, inode->i_ino); |
@@ -3329,6 +3724,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3329 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && | 3724 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && |
3330 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | 3725 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
3331 | if (!newex.ee_start_lo && !newex.ee_start_hi) { | 3726 | if (!newex.ee_start_lo && !newex.ee_start_hi) { |
3727 | if ((sbi->s_cluster_ratio > 1) && | ||
3728 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) | ||
3729 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3730 | |||
3332 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { | 3731 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
3333 | /* | 3732 | /* |
3334 | * block isn't allocated yet and | 3733 | * block isn't allocated yet and |
@@ -3339,6 +3738,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3339 | /* we should allocate requested block */ | 3738 | /* we should allocate requested block */ |
3340 | } else { | 3739 | } else { |
3341 | /* block is already allocated */ | 3740 | /* block is already allocated */ |
3741 | if (sbi->s_cluster_ratio > 1) | ||
3742 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3342 | newblock = map->m_lblk | 3743 | newblock = map->m_lblk |
3343 | - le32_to_cpu(newex.ee_block) | 3744 | - le32_to_cpu(newex.ee_block) |
3344 | + ext4_ext_pblock(&newex); | 3745 | + ext4_ext_pblock(&newex); |
@@ -3384,8 +3785,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3384 | * we split out initialized portions during a write. | 3785 | * we split out initialized portions during a write. |
3385 | */ | 3786 | */ |
3386 | ee_len = ext4_ext_get_actual_len(ex); | 3787 | ee_len = ext4_ext_get_actual_len(ex); |
3788 | |||
3789 | trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len); | ||
3790 | |||
3387 | /* if found extent covers block, simply return it */ | 3791 | /* if found extent covers block, simply return it */ |
3388 | if (in_range(map->m_lblk, ee_block, ee_len)) { | 3792 | if (in_range(map->m_lblk, ee_block, ee_len)) { |
3793 | struct ext4_map_blocks punch_map; | ||
3794 | ext4_fsblk_t partial_cluster = 0; | ||
3795 | |||
3389 | newblock = map->m_lblk - ee_block + ee_start; | 3796 | newblock = map->m_lblk - ee_block + ee_start; |
3390 | /* number of remaining blocks in the extent */ | 3797 | /* number of remaining blocks in the extent */ |
3391 | allocated = ee_len - (map->m_lblk - ee_block); | 3798 | allocated = ee_len - (map->m_lblk - ee_block); |
@@ -3469,7 +3876,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3469 | ext4_ext_invalidate_cache(inode); | 3876 | ext4_ext_invalidate_cache(inode); |
3470 | 3877 | ||
3471 | err = ext4_ext_rm_leaf(handle, inode, path, | 3878 | err = ext4_ext_rm_leaf(handle, inode, path, |
3472 | map->m_lblk, map->m_lblk + punched_out); | 3879 | &partial_cluster, map->m_lblk, |
3880 | map->m_lblk + punched_out); | ||
3473 | 3881 | ||
3474 | if (!err && path->p_hdr->eh_entries == 0) { | 3882 | if (!err && path->p_hdr->eh_entries == 0) { |
3475 | /* | 3883 | /* |
@@ -3492,6 +3900,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3492 | } | 3900 | } |
3493 | } | 3901 | } |
3494 | 3902 | ||
3903 | if ((sbi->s_cluster_ratio > 1) && | ||
3904 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) | ||
3905 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3906 | |||
3495 | /* | 3907 | /* |
3496 | * requested block isn't allocated yet; | 3908 | * requested block isn't allocated yet; |
3497 | * we couldn't try to create block if create flag is zero | 3909 | * we couldn't try to create block if create flag is zero |
@@ -3504,9 +3916,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3504 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); | 3916 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); |
3505 | goto out2; | 3917 | goto out2; |
3506 | } | 3918 | } |
3919 | |||
3507 | /* | 3920 | /* |
3508 | * Okay, we need to do block allocation. | 3921 | * Okay, we need to do block allocation. |
3509 | */ | 3922 | */ |
3923 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
3924 | newex.ee_block = cpu_to_le32(map->m_lblk); | ||
3925 | cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | ||
3926 | |||
3927 | /* | ||
3928 | * If we are doing bigalloc, check to see if the extent returned | ||
3929 | * by ext4_ext_find_extent() implies a cluster we can use. | ||
3930 | */ | ||
3931 | if (cluster_offset && ex && | ||
3932 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { | ||
3933 | ar.len = allocated = map->m_len; | ||
3934 | newblock = map->m_pblk; | ||
3935 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3936 | goto got_allocated_blocks; | ||
3937 | } | ||
3510 | 3938 | ||
3511 | /* find neighbour allocated blocks */ | 3939 | /* find neighbour allocated blocks */ |
3512 | ar.lleft = map->m_lblk; | 3940 | ar.lleft = map->m_lblk; |
@@ -3514,10 +3942,21 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3514 | if (err) | 3942 | if (err) |
3515 | goto out2; | 3943 | goto out2; |
3516 | ar.lright = map->m_lblk; | 3944 | ar.lright = map->m_lblk; |
3517 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); | 3945 | ex2 = NULL; |
3946 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2); | ||
3518 | if (err) | 3947 | if (err) |
3519 | goto out2; | 3948 | goto out2; |
3520 | 3949 | ||
3950 | /* Check if the extent after searching to the right implies a | ||
3951 | * cluster we can use. */ | ||
3952 | if ((sbi->s_cluster_ratio > 1) && ex2 && | ||
3953 | get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { | ||
3954 | ar.len = allocated = map->m_len; | ||
3955 | newblock = map->m_pblk; | ||
3956 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3957 | goto got_allocated_blocks; | ||
3958 | } | ||
3959 | |||
3521 | /* | 3960 | /* |
3522 | * See if request is beyond maximum number of blocks we can have in | 3961 | * See if request is beyond maximum number of blocks we can have in |
3523 | * a single extent. For an initialized extent this limit is | 3962 | * a single extent. For an initialized extent this limit is |
@@ -3532,9 +3971,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3532 | map->m_len = EXT_UNINIT_MAX_LEN; | 3971 | map->m_len = EXT_UNINIT_MAX_LEN; |
3533 | 3972 | ||
3534 | /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ | 3973 | /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ |
3535 | newex.ee_block = cpu_to_le32(map->m_lblk); | ||
3536 | newex.ee_len = cpu_to_le16(map->m_len); | 3974 | newex.ee_len = cpu_to_le16(map->m_len); |
3537 | err = ext4_ext_check_overlap(inode, &newex, path); | 3975 | err = ext4_ext_check_overlap(sbi, inode, &newex, path); |
3538 | if (err) | 3976 | if (err) |
3539 | allocated = ext4_ext_get_actual_len(&newex); | 3977 | allocated = ext4_ext_get_actual_len(&newex); |
3540 | else | 3978 | else |
@@ -3544,7 +3982,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3544 | ar.inode = inode; | 3982 | ar.inode = inode; |
3545 | ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); | 3983 | ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); |
3546 | ar.logical = map->m_lblk; | 3984 | ar.logical = map->m_lblk; |
3547 | ar.len = allocated; | 3985 | /* |
3986 | * We calculate the offset from the beginning of the cluster | ||
3987 | * for the logical block number, since when we allocate a | ||
3988 | * physical cluster, the physical block should start at the | ||
3989 | * same offset from the beginning of the cluster. This is | ||
3990 | * needed so that future calls to get_implied_cluster_alloc() | ||
3991 | * work correctly. | ||
3992 | */ | ||
3993 | offset = map->m_lblk & (sbi->s_cluster_ratio - 1); | ||
3994 | ar.len = EXT4_NUM_B2C(sbi, offset+allocated); | ||
3995 | ar.goal -= offset; | ||
3996 | ar.logical -= offset; | ||
3548 | if (S_ISREG(inode->i_mode)) | 3997 | if (S_ISREG(inode->i_mode)) |
3549 | ar.flags = EXT4_MB_HINT_DATA; | 3998 | ar.flags = EXT4_MB_HINT_DATA; |
3550 | else | 3999 | else |
@@ -3557,9 +4006,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3557 | goto out2; | 4006 | goto out2; |
3558 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", | 4007 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", |
3559 | ar.goal, newblock, allocated); | 4008 | ar.goal, newblock, allocated); |
4009 | free_on_err = 1; | ||
4010 | allocated_clusters = ar.len; | ||
4011 | ar.len = EXT4_C2B(sbi, ar.len) - offset; | ||
4012 | if (ar.len > allocated) | ||
4013 | ar.len = allocated; | ||
3560 | 4014 | ||
4015 | got_allocated_blocks: | ||
3561 | /* try to insert new extent into found leaf and return */ | 4016 | /* try to insert new extent into found leaf and return */ |
3562 | ext4_ext_store_pblock(&newex, newblock); | 4017 | ext4_ext_store_pblock(&newex, newblock + offset); |
3563 | newex.ee_len = cpu_to_le16(ar.len); | 4018 | newex.ee_len = cpu_to_le16(ar.len); |
3564 | /* Mark uninitialized */ | 4019 | /* Mark uninitialized */ |
3565 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ | 4020 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ |
@@ -3572,10 +4027,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3572 | * that we need to perform conversion when IO is done. | 4027 | * that we need to perform conversion when IO is done. |
3573 | */ | 4028 | */ |
3574 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 4029 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3575 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { | 4030 | if (io) |
3576 | io->flag = EXT4_IO_END_UNWRITTEN; | 4031 | ext4_set_io_unwritten_flag(inode, io); |
3577 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | 4032 | else |
3578 | } else | ||
3579 | ext4_set_inode_state(inode, | 4033 | ext4_set_inode_state(inode, |
3580 | EXT4_STATE_DIO_UNWRITTEN); | 4034 | EXT4_STATE_DIO_UNWRITTEN); |
3581 | } | 4035 | } |
@@ -3583,11 +4037,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3583 | map->m_flags |= EXT4_MAP_UNINIT; | 4037 | map->m_flags |= EXT4_MAP_UNINIT; |
3584 | } | 4038 | } |
3585 | 4039 | ||
3586 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); | 4040 | err = 0; |
4041 | if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) | ||
4042 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | ||
4043 | path, ar.len); | ||
3587 | if (!err) | 4044 | if (!err) |
3588 | err = ext4_ext_insert_extent(handle, inode, path, | 4045 | err = ext4_ext_insert_extent(handle, inode, path, |
3589 | &newex, flags); | 4046 | &newex, flags); |
3590 | if (err) { | 4047 | if (err && free_on_err) { |
3591 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? | 4048 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? |
3592 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; | 4049 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; |
3593 | /* free data blocks we just allocated */ | 4050 | /* free data blocks we just allocated */ |
@@ -3610,8 +4067,82 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3610 | * Update reserved blocks/metadata blocks after successful | 4067 | * Update reserved blocks/metadata blocks after successful |
3611 | * block allocation which had been deferred till now. | 4068 | * block allocation which had been deferred till now. |
3612 | */ | 4069 | */ |
3613 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 4070 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
3614 | ext4_da_update_reserve_space(inode, allocated, 1); | 4071 | unsigned int reserved_clusters; |
4072 | /* | ||
4073 | * Check how many clusters we had reserved this allocated range | ||
4074 | */ | ||
4075 | reserved_clusters = get_reserved_cluster_alloc(inode, | ||
4076 | map->m_lblk, allocated); | ||
4077 | if (map->m_flags & EXT4_MAP_FROM_CLUSTER) { | ||
4078 | if (reserved_clusters) { | ||
4079 | /* | ||
4080 | * We have clusters reserved for this range. | ||
4081 | * But since we are not doing actual allocation | ||
4082 | * and are simply using blocks from previously | ||
4083 | * allocated cluster, we should release the | ||
4084 | * reservation and not claim quota. | ||
4085 | */ | ||
4086 | ext4_da_update_reserve_space(inode, | ||
4087 | reserved_clusters, 0); | ||
4088 | } | ||
4089 | } else { | ||
4090 | BUG_ON(allocated_clusters < reserved_clusters); | ||
4091 | /* We will claim quota for all newly allocated blocks.*/ | ||
4092 | ext4_da_update_reserve_space(inode, allocated_clusters, | ||
4093 | 1); | ||
4094 | if (reserved_clusters < allocated_clusters) { | ||
4095 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
4096 | int reservation = allocated_clusters - | ||
4097 | reserved_clusters; | ||
4098 | /* | ||
4099 | * It seems we claimed few clusters outside of | ||
4100 | * the range of this allocation. We should give | ||
4101 | * it back to the reservation pool. This can | ||
4102 | * happen in the following case: | ||
4103 | * | ||
4104 | * * Suppose s_cluster_ratio is 4 (i.e., each | ||
4105 | * cluster has 4 blocks. Thus, the clusters | ||
4106 | * are [0-3],[4-7],[8-11]... | ||
4107 | * * First comes delayed allocation write for | ||
4108 | * logical blocks 10 & 11. Since there were no | ||
4109 | * previous delayed allocated blocks in the | ||
4110 | * range [8-11], we would reserve 1 cluster | ||
4111 | * for this write. | ||
4112 | * * Next comes write for logical blocks 3 to 8. | ||
4113 | * In this case, we will reserve 2 clusters | ||
4114 | * (for [0-3] and [4-7]; and not for [8-11] as | ||
4115 | * that range has a delayed allocated blocks. | ||
4116 | * Thus total reserved clusters now becomes 3. | ||
4117 | * * Now, during the delayed allocation writeout | ||
4118 | * time, we will first write blocks [3-8] and | ||
4119 | * allocate 3 clusters for writing these | ||
4120 | * blocks. Also, we would claim all these | ||
4121 | * three clusters above. | ||
4122 | * * Now when we come here to writeout the | ||
4123 | * blocks [10-11], we would expect to claim | ||
4124 | * the reservation of 1 cluster we had made | ||
4125 | * (and we would claim it since there are no | ||
4126 | * more delayed allocated blocks in the range | ||
4127 | * [8-11]. But our reserved cluster count had | ||
4128 | * already gone to 0. | ||
4129 | * | ||
4130 | * Thus, at the step 4 above when we determine | ||
4131 | * that there are still some unwritten delayed | ||
4132 | * allocated blocks outside of our current | ||
4133 | * block range, we should increment the | ||
4134 | * reserved clusters count so that when the | ||
4135 | * remaining blocks finally gets written, we | ||
4136 | * could claim them. | ||
4137 | */ | ||
4138 | dquot_reserve_block(inode, | ||
4139 | EXT4_C2B(sbi, reservation)); | ||
4140 | spin_lock(&ei->i_block_reservation_lock); | ||
4141 | ei->i_reserved_data_blocks += reservation; | ||
4142 | spin_unlock(&ei->i_block_reservation_lock); | ||
4143 | } | ||
4144 | } | ||
4145 | } | ||
3615 | 4146 | ||
3616 | /* | 4147 | /* |
3617 | * Cache the extent and update transaction to commit on fdatasync only | 4148 | * Cache the extent and update transaction to commit on fdatasync only |
@@ -3634,12 +4165,12 @@ out2: | |||
3634 | ext4_ext_drop_refs(path); | 4165 | ext4_ext_drop_refs(path); |
3635 | kfree(path); | 4166 | kfree(path); |
3636 | } | 4167 | } |
3637 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | ||
3638 | newblock, map->m_len, err ? err : allocated); | ||
3639 | |||
3640 | result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? | 4168 | result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? |
3641 | punched_out : allocated; | 4169 | punched_out : allocated; |
3642 | 4170 | ||
4171 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | ||
4172 | newblock, map->m_len, err ? err : result); | ||
4173 | |||
3643 | return err ? err : result; | 4174 | return err ? err : result; |
3644 | } | 4175 | } |
3645 | 4176 | ||
@@ -3649,6 +4180,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
3649 | struct super_block *sb = inode->i_sb; | 4180 | struct super_block *sb = inode->i_sb; |
3650 | ext4_lblk_t last_block; | 4181 | ext4_lblk_t last_block; |
3651 | handle_t *handle; | 4182 | handle_t *handle; |
4183 | loff_t page_len; | ||
3652 | int err = 0; | 4184 | int err = 0; |
3653 | 4185 | ||
3654 | /* | 4186 | /* |
@@ -3665,8 +4197,16 @@ void ext4_ext_truncate(struct inode *inode) | |||
3665 | if (IS_ERR(handle)) | 4197 | if (IS_ERR(handle)) |
3666 | return; | 4198 | return; |
3667 | 4199 | ||
3668 | if (inode->i_size & (sb->s_blocksize - 1)) | 4200 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { |
3669 | ext4_block_truncate_page(handle, mapping, inode->i_size); | 4201 | page_len = PAGE_CACHE_SIZE - |
4202 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
4203 | |||
4204 | err = ext4_discard_partial_page_buffers(handle, | ||
4205 | mapping, inode->i_size, page_len, 0); | ||
4206 | |||
4207 | if (err) | ||
4208 | goto out_stop; | ||
4209 | } | ||
3670 | 4210 | ||
3671 | if (ext4_orphan_add(handle, inode)) | 4211 | if (ext4_orphan_add(handle, inode)) |
3672 | goto out_stop; | 4212 | goto out_stop; |
@@ -3760,6 +4300,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3760 | int ret = 0; | 4300 | int ret = 0; |
3761 | int ret2 = 0; | 4301 | int ret2 = 0; |
3762 | int retries = 0; | 4302 | int retries = 0; |
4303 | int flags; | ||
3763 | struct ext4_map_blocks map; | 4304 | struct ext4_map_blocks map; |
3764 | unsigned int credits, blkbits = inode->i_blkbits; | 4305 | unsigned int credits, blkbits = inode->i_blkbits; |
3765 | 4306 | ||
@@ -3796,6 +4337,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3796 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | 4337 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); |
3797 | return ret; | 4338 | return ret; |
3798 | } | 4339 | } |
4340 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; | ||
4341 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
4342 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | ||
4343 | /* | ||
4344 | * Don't normalize the request if it can fit in one extent so | ||
4345 | * that it doesn't get unnecessarily split into multiple | ||
4346 | * extents. | ||
4347 | */ | ||
4348 | if (len <= EXT_UNINIT_MAX_LEN << blkbits) | ||
4349 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | ||
3799 | retry: | 4350 | retry: |
3800 | while (ret >= 0 && ret < max_blocks) { | 4351 | while (ret >= 0 && ret < max_blocks) { |
3801 | map.m_lblk = map.m_lblk + ret; | 4352 | map.m_lblk = map.m_lblk + ret; |
@@ -3805,9 +4356,7 @@ retry: | |||
3805 | ret = PTR_ERR(handle); | 4356 | ret = PTR_ERR(handle); |
3806 | break; | 4357 | break; |
3807 | } | 4358 | } |
3808 | ret = ext4_map_blocks(handle, inode, &map, | 4359 | ret = ext4_map_blocks(handle, inode, &map, flags); |
3809 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | | ||
3810 | EXT4_GET_BLOCKS_NO_NORMALIZE); | ||
3811 | if (ret <= 0) { | 4360 | if (ret <= 0) { |
3812 | #ifdef EXT4FS_DEBUG | 4361 | #ifdef EXT4FS_DEBUG |
3813 | WARN_ON(ret <= 0); | 4362 | WARN_ON(ret <= 0); |
@@ -4102,7 +4651,6 @@ found_delayed_extent: | |||
4102 | return EXT_BREAK; | 4651 | return EXT_BREAK; |
4103 | return EXT_CONTINUE; | 4652 | return EXT_CONTINUE; |
4104 | } | 4653 | } |
4105 | |||
4106 | /* fiemap flags we can handle specified here */ | 4654 | /* fiemap flags we can handle specified here */ |
4107 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | 4655 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) |
4108 | 4656 | ||
@@ -4162,17 +4710,28 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4162 | struct address_space *mapping = inode->i_mapping; | 4710 | struct address_space *mapping = inode->i_mapping; |
4163 | struct ext4_map_blocks map; | 4711 | struct ext4_map_blocks map; |
4164 | handle_t *handle; | 4712 | handle_t *handle; |
4165 | loff_t first_block_offset, last_block_offset, block_len; | 4713 | loff_t first_page, last_page, page_len; |
4166 | loff_t first_page, last_page, first_page_offset, last_page_offset; | 4714 | loff_t first_page_offset, last_page_offset; |
4167 | int ret, credits, blocks_released, err = 0; | 4715 | int ret, credits, blocks_released, err = 0; |
4168 | 4716 | ||
4717 | /* No need to punch hole beyond i_size */ | ||
4718 | if (offset >= inode->i_size) | ||
4719 | return 0; | ||
4720 | |||
4721 | /* | ||
4722 | * If the hole extends beyond i_size, set the hole | ||
4723 | * to end after the page that contains i_size | ||
4724 | */ | ||
4725 | if (offset + length > inode->i_size) { | ||
4726 | length = inode->i_size + | ||
4727 | PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - | ||
4728 | offset; | ||
4729 | } | ||
4730 | |||
4169 | first_block = (offset + sb->s_blocksize - 1) >> | 4731 | first_block = (offset + sb->s_blocksize - 1) >> |
4170 | EXT4_BLOCK_SIZE_BITS(sb); | 4732 | EXT4_BLOCK_SIZE_BITS(sb); |
4171 | last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | 4733 | last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); |
4172 | 4734 | ||
4173 | first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb); | ||
4174 | last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb); | ||
4175 | |||
4176 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 4735 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
4177 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | 4736 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; |
4178 | 4737 | ||
@@ -4185,11 +4744,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4185 | */ | 4744 | */ |
4186 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 4745 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
4187 | err = filemap_write_and_wait_range(mapping, | 4746 | err = filemap_write_and_wait_range(mapping, |
4188 | first_page_offset == 0 ? 0 : first_page_offset-1, | 4747 | offset, offset + length - 1); |
4189 | last_page_offset); | ||
4190 | 4748 | ||
4191 | if (err) | 4749 | if (err) |
4192 | return err; | 4750 | return err; |
4193 | } | 4751 | } |
4194 | 4752 | ||
4195 | /* Now release the pages */ | 4753 | /* Now release the pages */ |
@@ -4211,24 +4769,64 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4211 | goto out; | 4769 | goto out; |
4212 | 4770 | ||
4213 | /* | 4771 | /* |
4214 | * Now we need to zero out the un block aligned data. | 4772 | * Now we need to zero out the non-page-aligned data in the |
4215 | * If the file is smaller than a block, just | 4773 | * pages at the start and tail of the hole, and unmap the buffer |
4216 | * zero out the middle | 4774 | * heads for the block aligned regions of the page that were |
4775 | * completely zeroed. | ||
4217 | */ | 4776 | */ |
4218 | if (first_block > last_block) | 4777 | if (first_page > last_page) { |
4219 | ext4_block_zero_page_range(handle, mapping, offset, length); | 4778 | /* |
4220 | else { | 4779 | * If the file space being truncated is contained within a page |
4221 | /* zero out the head of the hole before the first block */ | 4780 | * just zero out and unmap the middle of that page |
4222 | block_len = first_block_offset - offset; | 4781 | */ |
4223 | if (block_len > 0) | 4782 | err = ext4_discard_partial_page_buffers(handle, |
4224 | ext4_block_zero_page_range(handle, mapping, | 4783 | mapping, offset, length, 0); |
4225 | offset, block_len); | 4784 | |
4226 | 4785 | if (err) | |
4227 | /* zero out the tail of the hole after the last block */ | 4786 | goto out; |
4228 | block_len = offset + length - last_block_offset; | 4787 | } else { |
4229 | if (block_len > 0) { | 4788 | /* |
4230 | ext4_block_zero_page_range(handle, mapping, | 4789 | * zero out and unmap the partial page that contains |
4231 | last_block_offset, block_len); | 4790 | * the start of the hole |
4791 | */ | ||
4792 | page_len = first_page_offset - offset; | ||
4793 | if (page_len > 0) { | ||
4794 | err = ext4_discard_partial_page_buffers(handle, mapping, | ||
4795 | offset, page_len, 0); | ||
4796 | if (err) | ||
4797 | goto out; | ||
4798 | } | ||
4799 | |||
4800 | /* | ||
4801 | * zero out and unmap the partial page that contains | ||
4802 | * the end of the hole | ||
4803 | */ | ||
4804 | page_len = offset + length - last_page_offset; | ||
4805 | if (page_len > 0) { | ||
4806 | err = ext4_discard_partial_page_buffers(handle, mapping, | ||
4807 | last_page_offset, page_len, 0); | ||
4808 | if (err) | ||
4809 | goto out; | ||
4810 | } | ||
4811 | } | ||
4812 | |||
4813 | |||
4814 | /* | ||
4815 | * If i_size is contained in the last page, we need to | ||
4816 | * unmap and zero the partial page after i_size | ||
4817 | */ | ||
4818 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
4819 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
4820 | |||
4821 | page_len = PAGE_CACHE_SIZE - | ||
4822 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
4823 | |||
4824 | if (page_len > 0) { | ||
4825 | err = ext4_discard_partial_page_buffers(handle, | ||
4826 | mapping, inode->i_size, page_len, 0); | ||
4827 | |||
4828 | if (err) | ||
4829 | goto out; | ||
4232 | } | 4830 | } |
4233 | } | 4831 | } |
4234 | 4832 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b9548f477bb..cb70f1812a7 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -181,8 +181,8 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
181 | path.dentry = mnt->mnt_root; | 181 | path.dentry = mnt->mnt_root; |
182 | cp = d_path(&path, buf, sizeof(buf)); | 182 | cp = d_path(&path, buf, sizeof(buf)); |
183 | if (!IS_ERR(cp)) { | 183 | if (!IS_ERR(cp)) { |
184 | memcpy(sbi->s_es->s_last_mounted, cp, | 184 | strlcpy(sbi->s_es->s_last_mounted, cp, |
185 | sizeof(sbi->s_es->s_last_mounted)); | 185 | sizeof(sbi->s_es->s_last_mounted)); |
186 | ext4_mark_super_dirty(sb); | 186 | ext4_mark_super_dirty(sb); |
187 | } | 187 | } |
188 | } | 188 | } |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 036f78f7a1e..00a2cb753ef 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode) | |||
75 | * to written. | 75 | * to written. |
76 | * The function return the number of pending IOs on success. | 76 | * The function return the number of pending IOs on success. |
77 | */ | 77 | */ |
78 | extern int ext4_flush_completed_IO(struct inode *inode) | 78 | int ext4_flush_completed_IO(struct inode *inode) |
79 | { | 79 | { |
80 | ext4_io_end_t *io; | 80 | ext4_io_end_t *io; |
81 | struct ext4_inode_info *ei = EXT4_I(inode); | 81 | struct ext4_inode_info *ei = EXT4_I(inode); |
@@ -83,14 +83,12 @@ extern int ext4_flush_completed_IO(struct inode *inode) | |||
83 | int ret = 0; | 83 | int ret = 0; |
84 | int ret2 = 0; | 84 | int ret2 = 0; |
85 | 85 | ||
86 | if (list_empty(&ei->i_completed_io_list)) | ||
87 | return ret; | ||
88 | |||
89 | dump_completed_IO(inode); | 86 | dump_completed_IO(inode); |
90 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 87 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
91 | while (!list_empty(&ei->i_completed_io_list)){ | 88 | while (!list_empty(&ei->i_completed_io_list)){ |
92 | io = list_entry(ei->i_completed_io_list.next, | 89 | io = list_entry(ei->i_completed_io_list.next, |
93 | ext4_io_end_t, list); | 90 | ext4_io_end_t, list); |
91 | list_del_init(&io->list); | ||
94 | /* | 92 | /* |
95 | * Calling ext4_end_io_nolock() to convert completed | 93 | * Calling ext4_end_io_nolock() to convert completed |
96 | * IO to written. | 94 | * IO to written. |
@@ -107,11 +105,9 @@ extern int ext4_flush_completed_IO(struct inode *inode) | |||
107 | */ | 105 | */ |
108 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 106 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
109 | ret = ext4_end_io_nolock(io); | 107 | ret = ext4_end_io_nolock(io); |
110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
111 | if (ret < 0) | 108 | if (ret < 0) |
112 | ret2 = ret; | 109 | ret2 = ret; |
113 | else | 110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
114 | list_del_init(&io->list); | ||
115 | } | 111 | } |
116 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 112 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
117 | return (ret2 < 0) ? ret2 : 0; | 113 | return (ret2 < 0) ? ret2 : 0; |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 9c63f273b55..612bec255c6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -78,7 +78,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
78 | * allocation, essentially implementing a per-group read-only flag. */ | 78 | * allocation, essentially implementing a per-group read-only flag. */ |
79 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 79 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { |
80 | ext4_error(sb, "Checksum bad for group %u", block_group); | 80 | ext4_error(sb, "Checksum bad for group %u", block_group); |
81 | ext4_free_blks_set(sb, gdp, 0); | 81 | ext4_free_group_clusters_set(sb, gdp, 0); |
82 | ext4_free_inodes_set(sb, gdp, 0); | 82 | ext4_free_inodes_set(sb, gdp, 0); |
83 | ext4_itable_unused_set(sb, gdp, 0); | 83 | ext4_itable_unused_set(sb, gdp, 0); |
84 | memset(bh->b_data, 0xff, sb->s_blocksize); | 84 | memset(bh->b_data, 0xff, sb->s_blocksize); |
@@ -293,121 +293,9 @@ error_return: | |||
293 | ext4_std_error(sb, fatal); | 293 | ext4_std_error(sb, fatal); |
294 | } | 294 | } |
295 | 295 | ||
296 | /* | ||
297 | * There are two policies for allocating an inode. If the new inode is | ||
298 | * a directory, then a forward search is made for a block group with both | ||
299 | * free space and a low directory-to-inode ratio; if that fails, then of | ||
300 | * the groups with above-average free space, that group with the fewest | ||
301 | * directories already is chosen. | ||
302 | * | ||
303 | * For other inodes, search forward from the parent directory\'s block | ||
304 | * group to find a free inode. | ||
305 | */ | ||
306 | static int find_group_dir(struct super_block *sb, struct inode *parent, | ||
307 | ext4_group_t *best_group) | ||
308 | { | ||
309 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
310 | unsigned int freei, avefreei; | ||
311 | struct ext4_group_desc *desc, *best_desc = NULL; | ||
312 | ext4_group_t group; | ||
313 | int ret = -1; | ||
314 | |||
315 | freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); | ||
316 | avefreei = freei / ngroups; | ||
317 | |||
318 | for (group = 0; group < ngroups; group++) { | ||
319 | desc = ext4_get_group_desc(sb, group, NULL); | ||
320 | if (!desc || !ext4_free_inodes_count(sb, desc)) | ||
321 | continue; | ||
322 | if (ext4_free_inodes_count(sb, desc) < avefreei) | ||
323 | continue; | ||
324 | if (!best_desc || | ||
325 | (ext4_free_blks_count(sb, desc) > | ||
326 | ext4_free_blks_count(sb, best_desc))) { | ||
327 | *best_group = group; | ||
328 | best_desc = desc; | ||
329 | ret = 0; | ||
330 | } | ||
331 | } | ||
332 | return ret; | ||
333 | } | ||
334 | |||
335 | #define free_block_ratio 10 | ||
336 | |||
337 | static int find_group_flex(struct super_block *sb, struct inode *parent, | ||
338 | ext4_group_t *best_group) | ||
339 | { | ||
340 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
341 | struct ext4_group_desc *desc; | ||
342 | struct flex_groups *flex_group = sbi->s_flex_groups; | ||
343 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | ||
344 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); | ||
345 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
346 | int flex_size = ext4_flex_bg_size(sbi); | ||
347 | ext4_group_t best_flex = parent_fbg_group; | ||
348 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; | ||
349 | int flexbg_free_blocks; | ||
350 | int flex_freeb_ratio; | ||
351 | ext4_group_t n_fbg_groups; | ||
352 | ext4_group_t i; | ||
353 | |||
354 | n_fbg_groups = (ngroups + flex_size - 1) >> | ||
355 | sbi->s_log_groups_per_flex; | ||
356 | |||
357 | find_close_to_parent: | ||
358 | flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); | ||
359 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
360 | if (atomic_read(&flex_group[best_flex].free_inodes) && | ||
361 | flex_freeb_ratio > free_block_ratio) | ||
362 | goto found_flexbg; | ||
363 | |||
364 | if (best_flex && best_flex == parent_fbg_group) { | ||
365 | best_flex--; | ||
366 | goto find_close_to_parent; | ||
367 | } | ||
368 | |||
369 | for (i = 0; i < n_fbg_groups; i++) { | ||
370 | if (i == parent_fbg_group || i == parent_fbg_group - 1) | ||
371 | continue; | ||
372 | |||
373 | flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); | ||
374 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
375 | |||
376 | if (flex_freeb_ratio > free_block_ratio && | ||
377 | (atomic_read(&flex_group[i].free_inodes))) { | ||
378 | best_flex = i; | ||
379 | goto found_flexbg; | ||
380 | } | ||
381 | |||
382 | if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || | ||
383 | ((atomic_read(&flex_group[i].free_blocks) > | ||
384 | atomic_read(&flex_group[best_flex].free_blocks)) && | ||
385 | atomic_read(&flex_group[i].free_inodes))) | ||
386 | best_flex = i; | ||
387 | } | ||
388 | |||
389 | if (!atomic_read(&flex_group[best_flex].free_inodes) || | ||
390 | !atomic_read(&flex_group[best_flex].free_blocks)) | ||
391 | return -1; | ||
392 | |||
393 | found_flexbg: | ||
394 | for (i = best_flex * flex_size; i < ngroups && | ||
395 | i < (best_flex + 1) * flex_size; i++) { | ||
396 | desc = ext4_get_group_desc(sb, i, NULL); | ||
397 | if (ext4_free_inodes_count(sb, desc)) { | ||
398 | *best_group = i; | ||
399 | goto out; | ||
400 | } | ||
401 | } | ||
402 | |||
403 | return -1; | ||
404 | out: | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | struct orlov_stats { | 296 | struct orlov_stats { |
409 | __u32 free_inodes; | 297 | __u32 free_inodes; |
410 | __u32 free_blocks; | 298 | __u32 free_clusters; |
411 | __u32 used_dirs; | 299 | __u32 used_dirs; |
412 | }; | 300 | }; |
413 | 301 | ||
@@ -424,7 +312,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, | |||
424 | 312 | ||
425 | if (flex_size > 1) { | 313 | if (flex_size > 1) { |
426 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); | 314 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); |
427 | stats->free_blocks = atomic_read(&flex_group[g].free_blocks); | 315 | stats->free_clusters = atomic_read(&flex_group[g].free_clusters); |
428 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); | 316 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); |
429 | return; | 317 | return; |
430 | } | 318 | } |
@@ -432,11 +320,11 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, | |||
432 | desc = ext4_get_group_desc(sb, g, NULL); | 320 | desc = ext4_get_group_desc(sb, g, NULL); |
433 | if (desc) { | 321 | if (desc) { |
434 | stats->free_inodes = ext4_free_inodes_count(sb, desc); | 322 | stats->free_inodes = ext4_free_inodes_count(sb, desc); |
435 | stats->free_blocks = ext4_free_blks_count(sb, desc); | 323 | stats->free_clusters = ext4_free_group_clusters(sb, desc); |
436 | stats->used_dirs = ext4_used_dirs_count(sb, desc); | 324 | stats->used_dirs = ext4_used_dirs_count(sb, desc); |
437 | } else { | 325 | } else { |
438 | stats->free_inodes = 0; | 326 | stats->free_inodes = 0; |
439 | stats->free_blocks = 0; | 327 | stats->free_clusters = 0; |
440 | stats->used_dirs = 0; | 328 | stats->used_dirs = 0; |
441 | } | 329 | } |
442 | } | 330 | } |
@@ -471,10 +359,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
471 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); | 359 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); |
472 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 360 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
473 | unsigned int freei, avefreei; | 361 | unsigned int freei, avefreei; |
474 | ext4_fsblk_t freeb, avefreeb; | 362 | ext4_fsblk_t freeb, avefreec; |
475 | unsigned int ndirs; | 363 | unsigned int ndirs; |
476 | int max_dirs, min_inodes; | 364 | int max_dirs, min_inodes; |
477 | ext4_grpblk_t min_blocks; | 365 | ext4_grpblk_t min_clusters; |
478 | ext4_group_t i, grp, g, ngroups; | 366 | ext4_group_t i, grp, g, ngroups; |
479 | struct ext4_group_desc *desc; | 367 | struct ext4_group_desc *desc; |
480 | struct orlov_stats stats; | 368 | struct orlov_stats stats; |
@@ -490,9 +378,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
490 | 378 | ||
491 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); | 379 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); |
492 | avefreei = freei / ngroups; | 380 | avefreei = freei / ngroups; |
493 | freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 381 | freeb = EXT4_C2B(sbi, |
494 | avefreeb = freeb; | 382 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
495 | do_div(avefreeb, ngroups); | 383 | avefreec = freeb; |
384 | do_div(avefreec, ngroups); | ||
496 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); | 385 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); |
497 | 386 | ||
498 | if (S_ISDIR(mode) && | 387 | if (S_ISDIR(mode) && |
@@ -518,7 +407,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
518 | continue; | 407 | continue; |
519 | if (stats.free_inodes < avefreei) | 408 | if (stats.free_inodes < avefreei) |
520 | continue; | 409 | continue; |
521 | if (stats.free_blocks < avefreeb) | 410 | if (stats.free_clusters < avefreec) |
522 | continue; | 411 | continue; |
523 | grp = g; | 412 | grp = g; |
524 | ret = 0; | 413 | ret = 0; |
@@ -556,7 +445,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
556 | min_inodes = avefreei - inodes_per_group*flex_size / 4; | 445 | min_inodes = avefreei - inodes_per_group*flex_size / 4; |
557 | if (min_inodes < 1) | 446 | if (min_inodes < 1) |
558 | min_inodes = 1; | 447 | min_inodes = 1; |
559 | min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; | 448 | min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4; |
560 | 449 | ||
561 | /* | 450 | /* |
562 | * Start looking in the flex group where we last allocated an | 451 | * Start looking in the flex group where we last allocated an |
@@ -575,7 +464,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
575 | continue; | 464 | continue; |
576 | if (stats.free_inodes < min_inodes) | 465 | if (stats.free_inodes < min_inodes) |
577 | continue; | 466 | continue; |
578 | if (stats.free_blocks < min_blocks) | 467 | if (stats.free_clusters < min_clusters) |
579 | continue; | 468 | continue; |
580 | goto found_flex_bg; | 469 | goto found_flex_bg; |
581 | } | 470 | } |
@@ -659,7 +548,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
659 | *group = parent_group; | 548 | *group = parent_group; |
660 | desc = ext4_get_group_desc(sb, *group, NULL); | 549 | desc = ext4_get_group_desc(sb, *group, NULL); |
661 | if (desc && ext4_free_inodes_count(sb, desc) && | 550 | if (desc && ext4_free_inodes_count(sb, desc) && |
662 | ext4_free_blks_count(sb, desc)) | 551 | ext4_free_group_clusters(sb, desc)) |
663 | return 0; | 552 | return 0; |
664 | 553 | ||
665 | /* | 554 | /* |
@@ -683,7 +572,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
683 | *group -= ngroups; | 572 | *group -= ngroups; |
684 | desc = ext4_get_group_desc(sb, *group, NULL); | 573 | desc = ext4_get_group_desc(sb, *group, NULL); |
685 | if (desc && ext4_free_inodes_count(sb, desc) && | 574 | if (desc && ext4_free_inodes_count(sb, desc) && |
686 | ext4_free_blks_count(sb, desc)) | 575 | ext4_free_group_clusters(sb, desc)) |
687 | return 0; | 576 | return 0; |
688 | } | 577 | } |
689 | 578 | ||
@@ -802,7 +691,7 @@ err_ret: | |||
802 | * group to find a free inode. | 691 | * group to find a free inode. |
803 | */ | 692 | */ |
804 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | 693 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, |
805 | const struct qstr *qstr, __u32 goal) | 694 | const struct qstr *qstr, __u32 goal, uid_t *owner) |
806 | { | 695 | { |
807 | struct super_block *sb; | 696 | struct super_block *sb; |
808 | struct buffer_head *inode_bitmap_bh = NULL; | 697 | struct buffer_head *inode_bitmap_bh = NULL; |
@@ -816,8 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | |||
816 | int ret2, err = 0; | 705 | int ret2, err = 0; |
817 | struct inode *ret; | 706 | struct inode *ret; |
818 | ext4_group_t i; | 707 | ext4_group_t i; |
819 | int free = 0; | ||
820 | static int once = 1; | ||
821 | ext4_group_t flex_group; | 708 | ext4_group_t flex_group; |
822 | 709 | ||
823 | /* Cannot create files in a deleted directory */ | 710 | /* Cannot create files in a deleted directory */ |
@@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | |||
843 | goto got_group; | 730 | goto got_group; |
844 | } | 731 | } |
845 | 732 | ||
846 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { | 733 | if (S_ISDIR(mode)) |
847 | ret2 = find_group_flex(sb, dir, &group); | 734 | ret2 = find_group_orlov(sb, dir, &group, mode, qstr); |
848 | if (ret2 == -1) { | 735 | else |
849 | ret2 = find_group_other(sb, dir, &group, mode); | ||
850 | if (ret2 == 0 && once) { | ||
851 | once = 0; | ||
852 | printk(KERN_NOTICE "ext4: find_group_flex " | ||
853 | "failed, fallback succeeded dir %lu\n", | ||
854 | dir->i_ino); | ||
855 | } | ||
856 | } | ||
857 | goto got_group; | ||
858 | } | ||
859 | |||
860 | if (S_ISDIR(mode)) { | ||
861 | if (test_opt(sb, OLDALLOC)) | ||
862 | ret2 = find_group_dir(sb, dir, &group); | ||
863 | else | ||
864 | ret2 = find_group_orlov(sb, dir, &group, mode, qstr); | ||
865 | } else | ||
866 | ret2 = find_group_other(sb, dir, &group, mode); | 736 | ret2 = find_group_other(sb, dir, &group, mode); |
867 | 737 | ||
868 | got_group: | 738 | got_group: |
@@ -950,26 +820,21 @@ got: | |||
950 | goto fail; | 820 | goto fail; |
951 | } | 821 | } |
952 | 822 | ||
953 | free = 0; | 823 | BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); |
954 | ext4_lock_group(sb, group); | 824 | err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); |
825 | brelse(block_bitmap_bh); | ||
826 | |||
955 | /* recheck and clear flag under lock if we still need to */ | 827 | /* recheck and clear flag under lock if we still need to */ |
828 | ext4_lock_group(sb, group); | ||
956 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 829 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
957 | free = ext4_free_blocks_after_init(sb, group, gdp); | ||
958 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 830 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
959 | ext4_free_blks_set(sb, gdp, free); | 831 | ext4_free_group_clusters_set(sb, gdp, |
832 | ext4_free_clusters_after_init(sb, group, gdp)); | ||
960 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, | 833 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, |
961 | gdp); | 834 | gdp); |
962 | } | 835 | } |
963 | ext4_unlock_group(sb, group); | 836 | ext4_unlock_group(sb, group); |
964 | 837 | ||
965 | /* Don't need to dirty bitmap block if we didn't change it */ | ||
966 | if (free) { | ||
967 | BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); | ||
968 | err = ext4_handle_dirty_metadata(handle, | ||
969 | NULL, block_bitmap_bh); | ||
970 | } | ||
971 | |||
972 | brelse(block_bitmap_bh); | ||
973 | if (err) | 838 | if (err) |
974 | goto fail; | 839 | goto fail; |
975 | } | 840 | } |
@@ -987,8 +852,11 @@ got: | |||
987 | flex_group = ext4_flex_group(sbi, group); | 852 | flex_group = ext4_flex_group(sbi, group); |
988 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); | 853 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); |
989 | } | 854 | } |
990 | 855 | if (owner) { | |
991 | if (test_opt(sb, GRPID)) { | 856 | inode->i_mode = mode; |
857 | inode->i_uid = owner[0]; | ||
858 | inode->i_gid = owner[1]; | ||
859 | } else if (test_opt(sb, GRPID)) { | ||
992 | inode->i_mode = mode; | 860 | inode->i_mode = mode; |
993 | inode->i_uid = current_fsuid(); | 861 | inode->i_uid = current_fsuid(); |
994 | inode->i_gid = dir->i_gid; | 862 | inode->i_gid = dir->i_gid; |
@@ -1005,11 +873,7 @@ got: | |||
1005 | ei->i_dir_start_lookup = 0; | 873 | ei->i_dir_start_lookup = 0; |
1006 | ei->i_disksize = 0; | 874 | ei->i_disksize = 0; |
1007 | 875 | ||
1008 | /* | 876 | /* Don't inherit extent flag from directory, amongst others. */ |
1009 | * Don't inherit extent flag from directory, amongst others. We set | ||
1010 | * extent flag on newly created directory and file only if -o extent | ||
1011 | * mount option is specified | ||
1012 | */ | ||
1013 | ei->i_flags = | 877 | ei->i_flags = |
1014 | ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); | 878 | ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); |
1015 | ei->i_file_acl = 0; | 879 | ei->i_file_acl = 0; |
@@ -1235,7 +1099,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1235 | * inode allocation from the current group, so we take alloc_sem lock, to | 1099 | * inode allocation from the current group, so we take alloc_sem lock, to |
1236 | * block ext4_claim_inode until we are finished. | 1100 | * block ext4_claim_inode until we are finished. |
1237 | */ | 1101 | */ |
1238 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | 1102 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, |
1239 | int barrier) | 1103 | int barrier) |
1240 | { | 1104 | { |
1241 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 1105 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 0962642119c..3cfc73fbca8 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -699,6 +699,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
699 | /* | 699 | /* |
700 | * Okay, we need to do block allocation. | 700 | * Okay, we need to do block allocation. |
701 | */ | 701 | */ |
702 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
703 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
704 | EXT4_ERROR_INODE(inode, "Can't allocate blocks for " | ||
705 | "non-extent mapped inodes with bigalloc"); | ||
706 | return -ENOSPC; | ||
707 | } | ||
708 | |||
702 | goal = ext4_find_goal(inode, map->m_lblk, partial); | 709 | goal = ext4_find_goal(inode, map->m_lblk, partial); |
703 | 710 | ||
704 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 711 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
@@ -1343,7 +1350,9 @@ void ext4_ind_truncate(struct inode *inode) | |||
1343 | __le32 nr = 0; | 1350 | __le32 nr = 0; |
1344 | int n = 0; | 1351 | int n = 0; |
1345 | ext4_lblk_t last_block, max_block; | 1352 | ext4_lblk_t last_block, max_block; |
1353 | loff_t page_len; | ||
1346 | unsigned blocksize = inode->i_sb->s_blocksize; | 1354 | unsigned blocksize = inode->i_sb->s_blocksize; |
1355 | int err; | ||
1347 | 1356 | ||
1348 | handle = start_transaction(inode); | 1357 | handle = start_transaction(inode); |
1349 | if (IS_ERR(handle)) | 1358 | if (IS_ERR(handle)) |
@@ -1354,9 +1363,16 @@ void ext4_ind_truncate(struct inode *inode) | |||
1354 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | 1363 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) |
1355 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 1364 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
1356 | 1365 | ||
1357 | if (inode->i_size & (blocksize - 1)) | 1366 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { |
1358 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | 1367 | page_len = PAGE_CACHE_SIZE - |
1368 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
1369 | |||
1370 | err = ext4_discard_partial_page_buffers(handle, | ||
1371 | mapping, inode->i_size, page_len, 0); | ||
1372 | |||
1373 | if (err) | ||
1359 | goto out_stop; | 1374 | goto out_stop; |
1375 | } | ||
1360 | 1376 | ||
1361 | if (last_block != max_block) { | 1377 | if (last_block != max_block) { |
1362 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 1378 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0defe0bfe01..f2419a15b81 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
43 | #include "xattr.h" | 43 | #include "xattr.h" |
44 | #include "acl.h" | 44 | #include "acl.h" |
45 | #include "ext4_extents.h" | ||
46 | #include "truncate.h" | 45 | #include "truncate.h" |
47 | 46 | ||
48 | #include <trace/events/ext4.h> | 47 | #include <trace/events/ext4.h> |
@@ -268,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
268 | struct ext4_inode_info *ei = EXT4_I(inode); | 267 | struct ext4_inode_info *ei = EXT4_I(inode); |
269 | 268 | ||
270 | spin_lock(&ei->i_block_reservation_lock); | 269 | spin_lock(&ei->i_block_reservation_lock); |
271 | trace_ext4_da_update_reserve_space(inode, used); | 270 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); |
272 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 271 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
273 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 272 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
274 | "with only %d reserved data blocks\n", | 273 | "with only %d reserved data blocks\n", |
@@ -281,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
281 | /* Update per-inode reservations */ | 280 | /* Update per-inode reservations */ |
282 | ei->i_reserved_data_blocks -= used; | 281 | ei->i_reserved_data_blocks -= used; |
283 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | 282 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
284 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 283 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
285 | used + ei->i_allocated_meta_blocks); | 284 | used + ei->i_allocated_meta_blocks); |
286 | ei->i_allocated_meta_blocks = 0; | 285 | ei->i_allocated_meta_blocks = 0; |
287 | 286 | ||
@@ -291,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
291 | * only when we have written all of the delayed | 290 | * only when we have written all of the delayed |
292 | * allocation blocks. | 291 | * allocation blocks. |
293 | */ | 292 | */ |
294 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 293 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
295 | ei->i_reserved_meta_blocks); | 294 | ei->i_reserved_meta_blocks); |
296 | ei->i_reserved_meta_blocks = 0; | 295 | ei->i_reserved_meta_blocks = 0; |
297 | ei->i_da_metadata_calc_len = 0; | 296 | ei->i_da_metadata_calc_len = 0; |
@@ -300,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
300 | 299 | ||
301 | /* Update quota subsystem for data blocks */ | 300 | /* Update quota subsystem for data blocks */ |
302 | if (quota_claim) | 301 | if (quota_claim) |
303 | dquot_claim_block(inode, used); | 302 | dquot_claim_block(inode, EXT4_C2B(sbi, used)); |
304 | else { | 303 | else { |
305 | /* | 304 | /* |
306 | * We did fallocate with an offset that is already delayed | 305 | * We did fallocate with an offset that is already delayed |
307 | * allocated. So on delayed allocated writeback we should | 306 | * allocated. So on delayed allocated writeback we should |
308 | * not re-claim the quota for fallocated blocks. | 307 | * not re-claim the quota for fallocated blocks. |
309 | */ | 308 | */ |
310 | dquot_release_reservation_block(inode, used); | 309 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); |
311 | } | 310 | } |
312 | 311 | ||
313 | /* | 312 | /* |
@@ -399,6 +398,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
399 | } | 398 | } |
400 | 399 | ||
401 | /* | 400 | /* |
401 | * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map. | ||
402 | */ | ||
403 | static void set_buffers_da_mapped(struct inode *inode, | ||
404 | struct ext4_map_blocks *map) | ||
405 | { | ||
406 | struct address_space *mapping = inode->i_mapping; | ||
407 | struct pagevec pvec; | ||
408 | int i, nr_pages; | ||
409 | pgoff_t index, end; | ||
410 | |||
411 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
412 | end = (map->m_lblk + map->m_len - 1) >> | ||
413 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
414 | |||
415 | pagevec_init(&pvec, 0); | ||
416 | while (index <= end) { | ||
417 | nr_pages = pagevec_lookup(&pvec, mapping, index, | ||
418 | min(end - index + 1, | ||
419 | (pgoff_t)PAGEVEC_SIZE)); | ||
420 | if (nr_pages == 0) | ||
421 | break; | ||
422 | for (i = 0; i < nr_pages; i++) { | ||
423 | struct page *page = pvec.pages[i]; | ||
424 | struct buffer_head *bh, *head; | ||
425 | |||
426 | if (unlikely(page->mapping != mapping) || | ||
427 | !PageDirty(page)) | ||
428 | break; | ||
429 | |||
430 | if (page_has_buffers(page)) { | ||
431 | bh = head = page_buffers(page); | ||
432 | do { | ||
433 | set_buffer_da_mapped(bh); | ||
434 | bh = bh->b_this_page; | ||
435 | } while (bh != head); | ||
436 | } | ||
437 | index++; | ||
438 | } | ||
439 | pagevec_release(&pvec); | ||
440 | } | ||
441 | } | ||
442 | |||
443 | /* | ||
402 | * The ext4_map_blocks() function tries to look up the requested blocks, | 444 | * The ext4_map_blocks() function tries to look up the requested blocks, |
403 | * and returns if the blocks are already mapped. | 445 | * and returns if the blocks are already mapped. |
404 | * | 446 | * |
@@ -416,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
416 | * the buffer head is mapped. | 458 | * the buffer head is mapped. |
417 | * | 459 | * |
418 | * It returns 0 if plain look up failed (blocks have not been allocated), in | 460 | * It returns 0 if plain look up failed (blocks have not been allocated), in |
419 | * that casem, buffer head is unmapped | 461 | * that case, buffer head is unmapped |
420 | * | 462 | * |
421 | * It returns the error in case of allocation failure. | 463 | * It returns the error in case of allocation failure. |
422 | */ | 464 | */ |
@@ -435,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
435 | */ | 477 | */ |
436 | down_read((&EXT4_I(inode)->i_data_sem)); | 478 | down_read((&EXT4_I(inode)->i_data_sem)); |
437 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 479 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
438 | retval = ext4_ext_map_blocks(handle, inode, map, 0); | 480 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
481 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
439 | } else { | 482 | } else { |
440 | retval = ext4_ind_map_blocks(handle, inode, map, 0); | 483 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
484 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
441 | } | 485 | } |
442 | up_read((&EXT4_I(inode)->i_data_sem)); | 486 | up_read((&EXT4_I(inode)->i_data_sem)); |
443 | 487 | ||
@@ -455,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
455 | * Returns if the blocks have already allocated | 499 | * Returns if the blocks have already allocated |
456 | * | 500 | * |
457 | * Note that if blocks have been preallocated | 501 | * Note that if blocks have been preallocated |
458 | * ext4_ext_get_block() returns th create = 0 | 502 | * ext4_ext_get_block() returns the create = 0 |
459 | * with buffer head unmapped. | 503 | * with buffer head unmapped. |
460 | */ | 504 | */ |
461 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 505 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
@@ -517,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
517 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 561 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
518 | ext4_da_update_reserve_space(inode, retval, 1); | 562 | ext4_da_update_reserve_space(inode, retval, 1); |
519 | } | 563 | } |
520 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 564 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
521 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 565 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
522 | 566 | ||
567 | /* If we have successfully mapped the delayed allocated blocks, | ||
568 | * set the BH_Da_Mapped bit on them. Its important to do this | ||
569 | * under the protection of i_data_sem. | ||
570 | */ | ||
571 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | ||
572 | set_buffers_da_mapped(inode, map); | ||
573 | } | ||
574 | |||
523 | up_write((&EXT4_I(inode)->i_data_sem)); | 575 | up_write((&EXT4_I(inode)->i_data_sem)); |
524 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 576 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
525 | int ret = check_block_validity(inode, map); | 577 | int ret = check_block_validity(inode, map); |
@@ -909,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file, | |||
909 | ext4_orphan_add(handle, inode); | 961 | ext4_orphan_add(handle, inode); |
910 | if (ret2 < 0) | 962 | if (ret2 < 0) |
911 | ret = ret2; | 963 | ret = ret2; |
964 | } else { | ||
965 | unlock_page(page); | ||
966 | page_cache_release(page); | ||
912 | } | 967 | } |
968 | |||
913 | ret2 = ext4_journal_stop(handle); | 969 | ret2 = ext4_journal_stop(handle); |
914 | if (!ret) | 970 | if (!ret) |
915 | ret = ret2; | 971 | ret = ret2; |
@@ -1037,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file, | |||
1037 | } | 1093 | } |
1038 | 1094 | ||
1039 | /* | 1095 | /* |
1040 | * Reserve a single block located at lblock | 1096 | * Reserve a single cluster located at lblock |
1041 | */ | 1097 | */ |
1042 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1098 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1043 | { | 1099 | { |
1044 | int retries = 0; | 1100 | int retries = 0; |
1045 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1101 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1046 | struct ext4_inode_info *ei = EXT4_I(inode); | 1102 | struct ext4_inode_info *ei = EXT4_I(inode); |
1047 | unsigned long md_needed; | 1103 | unsigned int md_needed; |
1048 | int ret; | 1104 | int ret; |
1049 | 1105 | ||
1050 | /* | 1106 | /* |
@@ -1054,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |||
1054 | */ | 1110 | */ |
1055 | repeat: | 1111 | repeat: |
1056 | spin_lock(&ei->i_block_reservation_lock); | 1112 | spin_lock(&ei->i_block_reservation_lock); |
1057 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1113 | md_needed = EXT4_NUM_B2C(sbi, |
1114 | ext4_calc_metadata_amount(inode, lblock)); | ||
1058 | trace_ext4_da_reserve_space(inode, md_needed); | 1115 | trace_ext4_da_reserve_space(inode, md_needed); |
1059 | spin_unlock(&ei->i_block_reservation_lock); | 1116 | spin_unlock(&ei->i_block_reservation_lock); |
1060 | 1117 | ||
@@ -1063,15 +1120,15 @@ repeat: | |||
1063 | * us from metadata over-estimation, though we may go over by | 1120 | * us from metadata over-estimation, though we may go over by |
1064 | * a small amount in the end. Here we just reserve for data. | 1121 | * a small amount in the end. Here we just reserve for data. |
1065 | */ | 1122 | */ |
1066 | ret = dquot_reserve_block(inode, 1); | 1123 | ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); |
1067 | if (ret) | 1124 | if (ret) |
1068 | return ret; | 1125 | return ret; |
1069 | /* | 1126 | /* |
1070 | * We do still charge estimated metadata to the sb though; | 1127 | * We do still charge estimated metadata to the sb though; |
1071 | * we cannot afford to run out of free blocks. | 1128 | * we cannot afford to run out of free blocks. |
1072 | */ | 1129 | */ |
1073 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { | 1130 | if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { |
1074 | dquot_release_reservation_block(inode, 1); | 1131 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
1075 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1132 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1076 | yield(); | 1133 | yield(); |
1077 | goto repeat; | 1134 | goto repeat; |
@@ -1118,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1118 | * We can release all of the reserved metadata blocks | 1175 | * We can release all of the reserved metadata blocks |
1119 | * only when we have written all of the delayed | 1176 | * only when we have written all of the delayed |
1120 | * allocation blocks. | 1177 | * allocation blocks. |
1178 | * Note that in case of bigalloc, i_reserved_meta_blocks, | ||
1179 | * i_reserved_data_blocks, etc. refer to number of clusters. | ||
1121 | */ | 1180 | */ |
1122 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 1181 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
1123 | ei->i_reserved_meta_blocks); | 1182 | ei->i_reserved_meta_blocks); |
1124 | ei->i_reserved_meta_blocks = 0; | 1183 | ei->i_reserved_meta_blocks = 0; |
1125 | ei->i_da_metadata_calc_len = 0; | 1184 | ei->i_da_metadata_calc_len = 0; |
1126 | } | 1185 | } |
1127 | 1186 | ||
1128 | /* update fs dirty data blocks counter */ | 1187 | /* update fs dirty data blocks counter */ |
1129 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); | 1188 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); |
1130 | 1189 | ||
1131 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1190 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1132 | 1191 | ||
1133 | dquot_release_reservation_block(inode, to_free); | 1192 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); |
1134 | } | 1193 | } |
1135 | 1194 | ||
1136 | static void ext4_da_page_release_reservation(struct page *page, | 1195 | static void ext4_da_page_release_reservation(struct page *page, |
@@ -1139,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1139 | int to_release = 0; | 1198 | int to_release = 0; |
1140 | struct buffer_head *head, *bh; | 1199 | struct buffer_head *head, *bh; |
1141 | unsigned int curr_off = 0; | 1200 | unsigned int curr_off = 0; |
1201 | struct inode *inode = page->mapping->host; | ||
1202 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1203 | int num_clusters; | ||
1142 | 1204 | ||
1143 | head = page_buffers(page); | 1205 | head = page_buffers(page); |
1144 | bh = head; | 1206 | bh = head; |
@@ -1148,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1148 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1210 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
1149 | to_release++; | 1211 | to_release++; |
1150 | clear_buffer_delay(bh); | 1212 | clear_buffer_delay(bh); |
1213 | clear_buffer_da_mapped(bh); | ||
1151 | } | 1214 | } |
1152 | curr_off = next_off; | 1215 | curr_off = next_off; |
1153 | } while ((bh = bh->b_this_page) != head); | 1216 | } while ((bh = bh->b_this_page) != head); |
1154 | ext4_da_release_space(page->mapping->host, to_release); | 1217 | |
1218 | /* If we have released all the blocks belonging to a cluster, then we | ||
1219 | * need to release the reserved space for that cluster. */ | ||
1220 | num_clusters = EXT4_NUM_B2C(sbi, to_release); | ||
1221 | while (num_clusters > 0) { | ||
1222 | ext4_fsblk_t lblk; | ||
1223 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + | ||
1224 | ((num_clusters - 1) << sbi->s_cluster_bits); | ||
1225 | if (sbi->s_cluster_ratio == 1 || | ||
1226 | !ext4_find_delalloc_cluster(inode, lblk, 1)) | ||
1227 | ext4_da_release_space(inode, 1); | ||
1228 | |||
1229 | num_clusters--; | ||
1230 | } | ||
1155 | } | 1231 | } |
1156 | 1232 | ||
1157 | /* | 1233 | /* |
@@ -1253,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1253 | clear_buffer_delay(bh); | 1329 | clear_buffer_delay(bh); |
1254 | bh->b_blocknr = pblock; | 1330 | bh->b_blocknr = pblock; |
1255 | } | 1331 | } |
1332 | if (buffer_da_mapped(bh)) | ||
1333 | clear_buffer_da_mapped(bh); | ||
1256 | if (buffer_unwritten(bh) || | 1334 | if (buffer_unwritten(bh) || |
1257 | buffer_mapped(bh)) | 1335 | buffer_mapped(bh)) |
1258 | BUG_ON(bh->b_blocknr != pblock); | 1336 | BUG_ON(bh->b_blocknr != pblock); |
@@ -1346,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1346 | { | 1424 | { |
1347 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1425 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1348 | printk(KERN_CRIT "Total free blocks count %lld\n", | 1426 | printk(KERN_CRIT "Total free blocks count %lld\n", |
1349 | ext4_count_free_blocks(inode->i_sb)); | 1427 | EXT4_C2B(EXT4_SB(inode->i_sb), |
1428 | ext4_count_free_clusters(inode->i_sb))); | ||
1350 | printk(KERN_CRIT "Free/Dirty block details\n"); | 1429 | printk(KERN_CRIT "Free/Dirty block details\n"); |
1351 | printk(KERN_CRIT "free_blocks=%lld\n", | 1430 | printk(KERN_CRIT "free_blocks=%lld\n", |
1352 | (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); | 1431 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1432 | percpu_counter_sum(&sbi->s_freeclusters_counter))); | ||
1353 | printk(KERN_CRIT "dirty_blocks=%lld\n", | 1433 | printk(KERN_CRIT "dirty_blocks=%lld\n", |
1354 | (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | 1434 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1435 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | ||
1355 | printk(KERN_CRIT "Block reservation details\n"); | 1436 | printk(KERN_CRIT "Block reservation details\n"); |
1356 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", | 1437 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", |
1357 | EXT4_I(inode)->i_reserved_data_blocks); | 1438 | EXT4_I(inode)->i_reserved_data_blocks); |
@@ -1430,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
1430 | if (err == -EAGAIN) | 1511 | if (err == -EAGAIN) |
1431 | goto submit_io; | 1512 | goto submit_io; |
1432 | 1513 | ||
1433 | if (err == -ENOSPC && | 1514 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { |
1434 | ext4_count_free_blocks(sb)) { | ||
1435 | mpd->retval = err; | 1515 | mpd->retval = err; |
1436 | goto submit_io; | 1516 | goto submit_io; |
1437 | } | 1517 | } |
@@ -1471,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
1471 | 1551 | ||
1472 | for (i = 0; i < map.m_len; i++) | 1552 | for (i = 0; i < map.m_len; i++) |
1473 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 1553 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
1474 | } | ||
1475 | 1554 | ||
1476 | if (ext4_should_order_data(mpd->inode)) { | 1555 | if (ext4_should_order_data(mpd->inode)) { |
1477 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 1556 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
1478 | if (err) | 1557 | if (err) { |
1479 | /* This only happens if the journal is aborted */ | 1558 | /* Only if the journal is aborted */ |
1480 | return; | 1559 | mpd->retval = err; |
1560 | goto submit_io; | ||
1561 | } | ||
1562 | } | ||
1481 | } | 1563 | } |
1482 | 1564 | ||
1483 | /* | 1565 | /* |
@@ -1584,6 +1666,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
1584 | } | 1666 | } |
1585 | 1667 | ||
1586 | /* | 1668 | /* |
1669 | * This function is grabs code from the very beginning of | ||
1670 | * ext4_map_blocks, but assumes that the caller is from delayed write | ||
1671 | * time. This function looks up the requested blocks and sets the | ||
1672 | * buffer delay bit under the protection of i_data_sem. | ||
1673 | */ | ||
1674 | static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | ||
1675 | struct ext4_map_blocks *map, | ||
1676 | struct buffer_head *bh) | ||
1677 | { | ||
1678 | int retval; | ||
1679 | sector_t invalid_block = ~((sector_t) 0xffff); | ||
1680 | |||
1681 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | ||
1682 | invalid_block = ~0; | ||
1683 | |||
1684 | map->m_flags = 0; | ||
1685 | ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," | ||
1686 | "logical block %lu\n", inode->i_ino, map->m_len, | ||
1687 | (unsigned long) map->m_lblk); | ||
1688 | /* | ||
1689 | * Try to see if we can get the block without requesting a new | ||
1690 | * file system block. | ||
1691 | */ | ||
1692 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
1693 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
1694 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); | ||
1695 | else | ||
1696 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); | ||
1697 | |||
1698 | if (retval == 0) { | ||
1699 | /* | ||
1700 | * XXX: __block_prepare_write() unmaps passed block, | ||
1701 | * is it OK? | ||
1702 | */ | ||
1703 | /* If the block was allocated from previously allocated cluster, | ||
1704 | * then we dont need to reserve it again. */ | ||
1705 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | ||
1706 | retval = ext4_da_reserve_space(inode, iblock); | ||
1707 | if (retval) | ||
1708 | /* not enough space to reserve */ | ||
1709 | goto out_unlock; | ||
1710 | } | ||
1711 | |||
1712 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served | ||
1713 | * and it should not appear on the bh->b_state. | ||
1714 | */ | ||
1715 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
1716 | |||
1717 | map_bh(bh, inode->i_sb, invalid_block); | ||
1718 | set_buffer_new(bh); | ||
1719 | set_buffer_delay(bh); | ||
1720 | } | ||
1721 | |||
1722 | out_unlock: | ||
1723 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
1724 | |||
1725 | return retval; | ||
1726 | } | ||
1727 | |||
1728 | /* | ||
1587 | * This is a special get_blocks_t callback which is used by | 1729 | * This is a special get_blocks_t callback which is used by |
1588 | * ext4_da_write_begin(). It will either return mapped block or | 1730 | * ext4_da_write_begin(). It will either return mapped block or |
1589 | * reserve space for a single block. | 1731 | * reserve space for a single block. |
@@ -1600,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1600 | { | 1742 | { |
1601 | struct ext4_map_blocks map; | 1743 | struct ext4_map_blocks map; |
1602 | int ret = 0; | 1744 | int ret = 0; |
1603 | sector_t invalid_block = ~((sector_t) 0xffff); | ||
1604 | |||
1605 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | ||
1606 | invalid_block = ~0; | ||
1607 | 1745 | ||
1608 | BUG_ON(create == 0); | 1746 | BUG_ON(create == 0); |
1609 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); | 1747 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
@@ -1616,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1616 | * preallocated blocks are unmapped but should treated | 1754 | * preallocated blocks are unmapped but should treated |
1617 | * the same as allocated blocks. | 1755 | * the same as allocated blocks. |
1618 | */ | 1756 | */ |
1619 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 1757 | ret = ext4_da_map_blocks(inode, iblock, &map, bh); |
1620 | if (ret < 0) | 1758 | if (ret <= 0) |
1621 | return ret; | 1759 | return ret; |
1622 | if (ret == 0) { | ||
1623 | if (buffer_delay(bh)) | ||
1624 | return 0; /* Not sure this could or should happen */ | ||
1625 | /* | ||
1626 | * XXX: __block_write_begin() unmaps passed block, is it OK? | ||
1627 | */ | ||
1628 | ret = ext4_da_reserve_space(inode, iblock); | ||
1629 | if (ret) | ||
1630 | /* not enough space to reserve */ | ||
1631 | return ret; | ||
1632 | |||
1633 | map_bh(bh, inode->i_sb, invalid_block); | ||
1634 | set_buffer_new(bh); | ||
1635 | set_buffer_delay(bh); | ||
1636 | return 0; | ||
1637 | } | ||
1638 | 1760 | ||
1639 | map_bh(bh, inode->i_sb, map.m_pblk); | 1761 | map_bh(bh, inode->i_sb, map.m_pblk); |
1640 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1762 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
@@ -2050,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2050 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2172 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2051 | pgoff_t done_index = 0; | 2173 | pgoff_t done_index = 0; |
2052 | pgoff_t end; | 2174 | pgoff_t end; |
2175 | struct blk_plug plug; | ||
2053 | 2176 | ||
2054 | trace_ext4_da_writepages(inode, wbc); | 2177 | trace_ext4_da_writepages(inode, wbc); |
2055 | 2178 | ||
@@ -2128,6 +2251,7 @@ retry: | |||
2128 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2251 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2129 | tag_pages_for_writeback(mapping, index, end); | 2252 | tag_pages_for_writeback(mapping, index, end); |
2130 | 2253 | ||
2254 | blk_start_plug(&plug); | ||
2131 | while (!ret && wbc->nr_to_write > 0) { | 2255 | while (!ret && wbc->nr_to_write > 0) { |
2132 | 2256 | ||
2133 | /* | 2257 | /* |
@@ -2178,11 +2302,12 @@ retry: | |||
2178 | ret = 0; | 2302 | ret = 0; |
2179 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2303 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
2180 | /* | 2304 | /* |
2181 | * got one extent now try with | 2305 | * Got one extent now try with rest of the pages. |
2182 | * rest of the pages | 2306 | * If mpd.retval is set -EIO, journal is aborted. |
2307 | * So we don't need to write any more. | ||
2183 | */ | 2308 | */ |
2184 | pages_written += mpd.pages_written; | 2309 | pages_written += mpd.pages_written; |
2185 | ret = 0; | 2310 | ret = mpd.retval; |
2186 | io_done = 1; | 2311 | io_done = 1; |
2187 | } else if (wbc->nr_to_write) | 2312 | } else if (wbc->nr_to_write) |
2188 | /* | 2313 | /* |
@@ -2192,6 +2317,7 @@ retry: | |||
2192 | */ | 2317 | */ |
2193 | break; | 2318 | break; |
2194 | } | 2319 | } |
2320 | blk_finish_plug(&plug); | ||
2195 | if (!io_done && !cycled) { | 2321 | if (!io_done && !cycled) { |
2196 | cycled = 1; | 2322 | cycled = 1; |
2197 | index = 0; | 2323 | index = 0; |
@@ -2230,10 +2356,11 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2230 | * Delalloc need an accurate free block accounting. So switch | 2356 | * Delalloc need an accurate free block accounting. So switch |
2231 | * to non delalloc when we are near to error range. | 2357 | * to non delalloc when we are near to error range. |
2232 | */ | 2358 | */ |
2233 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 2359 | free_blocks = EXT4_C2B(sbi, |
2234 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | 2360 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
2361 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | ||
2235 | if (2 * free_blocks < 3 * dirty_blocks || | 2362 | if (2 * free_blocks < 3 * dirty_blocks || |
2236 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 2363 | free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { |
2237 | /* | 2364 | /* |
2238 | * free block count is less than 150% of dirty blocks | 2365 | * free block count is less than 150% of dirty blocks |
2239 | * or free blocks is less than watermark | 2366 | * or free blocks is less than watermark |
@@ -2259,6 +2386,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2259 | pgoff_t index; | 2386 | pgoff_t index; |
2260 | struct inode *inode = mapping->host; | 2387 | struct inode *inode = mapping->host; |
2261 | handle_t *handle; | 2388 | handle_t *handle; |
2389 | loff_t page_len; | ||
2262 | 2390 | ||
2263 | index = pos >> PAGE_CACHE_SHIFT; | 2391 | index = pos >> PAGE_CACHE_SHIFT; |
2264 | 2392 | ||
@@ -2305,6 +2433,13 @@ retry: | |||
2305 | */ | 2433 | */ |
2306 | if (pos + len > inode->i_size) | 2434 | if (pos + len > inode->i_size) |
2307 | ext4_truncate_failed_write(inode); | 2435 | ext4_truncate_failed_write(inode); |
2436 | } else { | ||
2437 | page_len = pos & (PAGE_CACHE_SIZE - 1); | ||
2438 | if (page_len > 0) { | ||
2439 | ret = ext4_discard_partial_page_buffers_no_lock(handle, | ||
2440 | inode, page, pos - page_len, page_len, | ||
2441 | EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED); | ||
2442 | } | ||
2308 | } | 2443 | } |
2309 | 2444 | ||
2310 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2445 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -2347,6 +2482,7 @@ static int ext4_da_write_end(struct file *file, | |||
2347 | loff_t new_i_size; | 2482 | loff_t new_i_size; |
2348 | unsigned long start, end; | 2483 | unsigned long start, end; |
2349 | int write_mode = (int)(unsigned long)fsdata; | 2484 | int write_mode = (int)(unsigned long)fsdata; |
2485 | loff_t page_len; | ||
2350 | 2486 | ||
2351 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | 2487 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { |
2352 | if (ext4_should_order_data(inode)) { | 2488 | if (ext4_should_order_data(inode)) { |
@@ -2395,6 +2531,16 @@ static int ext4_da_write_end(struct file *file, | |||
2395 | } | 2531 | } |
2396 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2532 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
2397 | page, fsdata); | 2533 | page, fsdata); |
2534 | |||
2535 | page_len = PAGE_CACHE_SIZE - | ||
2536 | ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1)); | ||
2537 | |||
2538 | if (page_len > 0) { | ||
2539 | ret = ext4_discard_partial_page_buffers_no_lock(handle, | ||
2540 | inode, page, pos + copied - 1, page_len, | ||
2541 | EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED); | ||
2542 | } | ||
2543 | |||
2398 | copied = ret2; | 2544 | copied = ret2; |
2399 | if (ret2 < 0) | 2545 | if (ret2 < 0) |
2400 | ret = ret2; | 2546 | ret = ret2; |
@@ -2689,10 +2835,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2689 | * but being more careful is always safe for the future change. | 2835 | * but being more careful is always safe for the future change. |
2690 | */ | 2836 | */ |
2691 | inode = io_end->inode; | 2837 | inode = io_end->inode; |
2692 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 2838 | ext4_set_io_unwritten_flag(inode, io_end); |
2693 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
2694 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
2695 | } | ||
2696 | 2839 | ||
2697 | /* Add the io_end to per-inode completed io list*/ | 2840 | /* Add the io_end to per-inode completed io list*/ |
2698 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 2841 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
@@ -2858,6 +3001,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
2858 | struct inode *inode = file->f_mapping->host; | 3001 | struct inode *inode = file->f_mapping->host; |
2859 | ssize_t ret; | 3002 | ssize_t ret; |
2860 | 3003 | ||
3004 | /* | ||
3005 | * If we are doing data journalling we don't support O_DIRECT | ||
3006 | */ | ||
3007 | if (ext4_should_journal_data(inode)) | ||
3008 | return 0; | ||
3009 | |||
2861 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | 3010 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); |
2862 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3011 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
2863 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3012 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
@@ -2927,6 +3076,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
2927 | .bmap = ext4_bmap, | 3076 | .bmap = ext4_bmap, |
2928 | .invalidatepage = ext4_invalidatepage, | 3077 | .invalidatepage = ext4_invalidatepage, |
2929 | .releasepage = ext4_releasepage, | 3078 | .releasepage = ext4_releasepage, |
3079 | .direct_IO = ext4_direct_IO, | ||
2930 | .is_partially_uptodate = block_is_partially_uptodate, | 3080 | .is_partially_uptodate = block_is_partially_uptodate, |
2931 | .error_remove_page = generic_error_remove_page, | 3081 | .error_remove_page = generic_error_remove_page, |
2932 | }; | 3082 | }; |
@@ -2963,6 +3113,227 @@ void ext4_set_aops(struct inode *inode) | |||
2963 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3113 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
2964 | } | 3114 | } |
2965 | 3115 | ||
3116 | |||
3117 | /* | ||
3118 | * ext4_discard_partial_page_buffers() | ||
3119 | * Wrapper function for ext4_discard_partial_page_buffers_no_lock. | ||
3120 | * This function finds and locks the page containing the offset | ||
3121 | * "from" and passes it to ext4_discard_partial_page_buffers_no_lock. | ||
3122 | * Calling functions that already have the page locked should call | ||
3123 | * ext4_discard_partial_page_buffers_no_lock directly. | ||
3124 | */ | ||
3125 | int ext4_discard_partial_page_buffers(handle_t *handle, | ||
3126 | struct address_space *mapping, loff_t from, | ||
3127 | loff_t length, int flags) | ||
3128 | { | ||
3129 | struct inode *inode = mapping->host; | ||
3130 | struct page *page; | ||
3131 | int err = 0; | ||
3132 | |||
3133 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | ||
3134 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
3135 | if (!page) | ||
3136 | return -ENOMEM; | ||
3137 | |||
3138 | err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page, | ||
3139 | from, length, flags); | ||
3140 | |||
3141 | unlock_page(page); | ||
3142 | page_cache_release(page); | ||
3143 | return err; | ||
3144 | } | ||
3145 | |||
3146 | /* | ||
3147 | * ext4_discard_partial_page_buffers_no_lock() | ||
3148 | * Zeros a page range of length 'length' starting from offset 'from'. | ||
3149 | * Buffer heads that correspond to the block aligned regions of the | ||
3150 | * zeroed range will be unmapped. Unblock aligned regions | ||
3151 | * will have the corresponding buffer head mapped if needed so that | ||
3152 | * that region of the page can be updated with the partial zero out. | ||
3153 | * | ||
3154 | * This function assumes that the page has already been locked. The | ||
3155 | * The range to be discarded must be contained with in the given page. | ||
3156 | * If the specified range exceeds the end of the page it will be shortened | ||
3157 | * to the end of the page that corresponds to 'from'. This function is | ||
3158 | * appropriate for updating a page and it buffer heads to be unmapped and | ||
3159 | * zeroed for blocks that have been either released, or are going to be | ||
3160 | * released. | ||
3161 | * | ||
3162 | * handle: The journal handle | ||
3163 | * inode: The files inode | ||
3164 | * page: A locked page that contains the offset "from" | ||
3165 | * from: The starting byte offset (from the begining of the file) | ||
3166 | * to begin discarding | ||
3167 | * len: The length of bytes to discard | ||
3168 | * flags: Optional flags that may be used: | ||
3169 | * | ||
3170 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | ||
3171 | * Only zero the regions of the page whose buffer heads | ||
3172 | * have already been unmapped. This flag is appropriate | ||
3173 | * for updateing the contents of a page whose blocks may | ||
3174 | * have already been released, and we only want to zero | ||
3175 | * out the regions that correspond to those released blocks. | ||
3176 | * | ||
3177 | * Returns zero on sucess or negative on failure. | ||
3178 | */ | ||
3179 | int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
3180 | struct inode *inode, struct page *page, loff_t from, | ||
3181 | loff_t length, int flags) | ||
3182 | { | ||
3183 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | ||
3184 | unsigned int offset = from & (PAGE_CACHE_SIZE-1); | ||
3185 | unsigned int blocksize, max, pos; | ||
3186 | ext4_lblk_t iblock; | ||
3187 | struct buffer_head *bh; | ||
3188 | int err = 0; | ||
3189 | |||
3190 | blocksize = inode->i_sb->s_blocksize; | ||
3191 | max = PAGE_CACHE_SIZE - offset; | ||
3192 | |||
3193 | if (index != page->index) | ||
3194 | return -EINVAL; | ||
3195 | |||
3196 | /* | ||
3197 | * correct length if it does not fall between | ||
3198 | * 'from' and the end of the page | ||
3199 | */ | ||
3200 | if (length > max || length < 0) | ||
3201 | length = max; | ||
3202 | |||
3203 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | ||
3204 | |||
3205 | if (!page_has_buffers(page)) { | ||
3206 | /* | ||
3207 | * If the range to be discarded covers a partial block | ||
3208 | * we need to get the page buffers. This is because | ||
3209 | * partial blocks cannot be released and the page needs | ||
3210 | * to be updated with the contents of the block before | ||
3211 | * we write the zeros on top of it. | ||
3212 | */ | ||
3213 | if ((from & (blocksize - 1)) || | ||
3214 | ((from + length) & (blocksize - 1))) { | ||
3215 | create_empty_buffers(page, blocksize, 0); | ||
3216 | } else { | ||
3217 | /* | ||
3218 | * If there are no partial blocks, | ||
3219 | * there is nothing to update, | ||
3220 | * so we can return now | ||
3221 | */ | ||
3222 | return 0; | ||
3223 | } | ||
3224 | } | ||
3225 | |||
3226 | /* Find the buffer that contains "offset" */ | ||
3227 | bh = page_buffers(page); | ||
3228 | pos = blocksize; | ||
3229 | while (offset >= pos) { | ||
3230 | bh = bh->b_this_page; | ||
3231 | iblock++; | ||
3232 | pos += blocksize; | ||
3233 | } | ||
3234 | |||
3235 | pos = offset; | ||
3236 | while (pos < offset + length) { | ||
3237 | unsigned int end_of_block, range_to_discard; | ||
3238 | |||
3239 | err = 0; | ||
3240 | |||
3241 | /* The length of space left to zero and unmap */ | ||
3242 | range_to_discard = offset + length - pos; | ||
3243 | |||
3244 | /* The length of space until the end of the block */ | ||
3245 | end_of_block = blocksize - (pos & (blocksize-1)); | ||
3246 | |||
3247 | /* | ||
3248 | * Do not unmap or zero past end of block | ||
3249 | * for this buffer head | ||
3250 | */ | ||
3251 | if (range_to_discard > end_of_block) | ||
3252 | range_to_discard = end_of_block; | ||
3253 | |||
3254 | |||
3255 | /* | ||
3256 | * Skip this buffer head if we are only zeroing unampped | ||
3257 | * regions of the page | ||
3258 | */ | ||
3259 | if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED && | ||
3260 | buffer_mapped(bh)) | ||
3261 | goto next; | ||
3262 | |||
3263 | /* If the range is block aligned, unmap */ | ||
3264 | if (range_to_discard == blocksize) { | ||
3265 | clear_buffer_dirty(bh); | ||
3266 | bh->b_bdev = NULL; | ||
3267 | clear_buffer_mapped(bh); | ||
3268 | clear_buffer_req(bh); | ||
3269 | clear_buffer_new(bh); | ||
3270 | clear_buffer_delay(bh); | ||
3271 | clear_buffer_unwritten(bh); | ||
3272 | clear_buffer_uptodate(bh); | ||
3273 | zero_user(page, pos, range_to_discard); | ||
3274 | BUFFER_TRACE(bh, "Buffer discarded"); | ||
3275 | goto next; | ||
3276 | } | ||
3277 | |||
3278 | /* | ||
3279 | * If this block is not completely contained in the range | ||
3280 | * to be discarded, then it is not going to be released. Because | ||
3281 | * we need to keep this block, we need to make sure this part | ||
3282 | * of the page is uptodate before we modify it by writeing | ||
3283 | * partial zeros on it. | ||
3284 | */ | ||
3285 | if (!buffer_mapped(bh)) { | ||
3286 | /* | ||
3287 | * Buffer head must be mapped before we can read | ||
3288 | * from the block | ||
3289 | */ | ||
3290 | BUFFER_TRACE(bh, "unmapped"); | ||
3291 | ext4_get_block(inode, iblock, bh, 0); | ||
3292 | /* unmapped? It's a hole - nothing to do */ | ||
3293 | if (!buffer_mapped(bh)) { | ||
3294 | BUFFER_TRACE(bh, "still unmapped"); | ||
3295 | goto next; | ||
3296 | } | ||
3297 | } | ||
3298 | |||
3299 | /* Ok, it's mapped. Make sure it's up-to-date */ | ||
3300 | if (PageUptodate(page)) | ||
3301 | set_buffer_uptodate(bh); | ||
3302 | |||
3303 | if (!buffer_uptodate(bh)) { | ||
3304 | err = -EIO; | ||
3305 | ll_rw_block(READ, 1, &bh); | ||
3306 | wait_on_buffer(bh); | ||
3307 | /* Uhhuh. Read error. Complain and punt.*/ | ||
3308 | if (!buffer_uptodate(bh)) | ||
3309 | goto next; | ||
3310 | } | ||
3311 | |||
3312 | if (ext4_should_journal_data(inode)) { | ||
3313 | BUFFER_TRACE(bh, "get write access"); | ||
3314 | err = ext4_journal_get_write_access(handle, bh); | ||
3315 | if (err) | ||
3316 | goto next; | ||
3317 | } | ||
3318 | |||
3319 | zero_user(page, pos, range_to_discard); | ||
3320 | |||
3321 | err = 0; | ||
3322 | if (ext4_should_journal_data(inode)) { | ||
3323 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
3324 | } else | ||
3325 | mark_buffer_dirty(bh); | ||
3326 | |||
3327 | BUFFER_TRACE(bh, "Partial buffer zeroed"); | ||
3328 | next: | ||
3329 | bh = bh->b_this_page; | ||
3330 | iblock++; | ||
3331 | pos += range_to_discard; | ||
3332 | } | ||
3333 | |||
3334 | return err; | ||
3335 | } | ||
3336 | |||
2966 | /* | 3337 | /* |
2967 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | 3338 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
2968 | * up to the end of the block which corresponds to `from'. | 3339 | * up to the end of the block which corresponds to `from'. |
@@ -3005,7 +3376,7 @@ int ext4_block_zero_page_range(handle_t *handle, | |||
3005 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3376 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3006 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3377 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
3007 | if (!page) | 3378 | if (!page) |
3008 | return -EINVAL; | 3379 | return -ENOMEM; |
3009 | 3380 | ||
3010 | blocksize = inode->i_sb->s_blocksize; | 3381 | blocksize = inode->i_sb->s_blocksize; |
3011 | max = blocksize - (offset & (blocksize - 1)); | 3382 | max = blocksize - (offset & (blocksize - 1)); |
@@ -3074,11 +3445,8 @@ int ext4_block_zero_page_range(handle_t *handle, | |||
3074 | err = 0; | 3445 | err = 0; |
3075 | if (ext4_should_journal_data(inode)) { | 3446 | if (ext4_should_journal_data(inode)) { |
3076 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 3447 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
3077 | } else { | 3448 | } else |
3078 | if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) | ||
3079 | err = ext4_jbd2_file_inode(handle, inode); | ||
3080 | mark_buffer_dirty(bh); | 3449 | mark_buffer_dirty(bh); |
3081 | } | ||
3082 | 3450 | ||
3083 | unlock: | 3451 | unlock: |
3084 | unlock_page(page); | 3452 | unlock_page(page); |
@@ -3119,6 +3487,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3119 | return -ENOTSUPP; | 3487 | return -ENOTSUPP; |
3120 | } | 3488 | } |
3121 | 3489 | ||
3490 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { | ||
3491 | /* TODO: Add support for bigalloc file systems */ | ||
3492 | return -ENOTSUPP; | ||
3493 | } | ||
3494 | |||
3122 | return ext4_ext_punch_hole(file, offset, length); | 3495 | return ext4_ext_punch_hole(file, offset, length); |
3123 | } | 3496 | } |
3124 | 3497 | ||
@@ -4420,6 +4793,7 @@ retry_alloc: | |||
4420 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | 4793 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { |
4421 | unlock_page(page); | 4794 | unlock_page(page); |
4422 | ret = VM_FAULT_SIGBUS; | 4795 | ret = VM_FAULT_SIGBUS; |
4796 | ext4_journal_stop(handle); | ||
4423 | goto out; | 4797 | goto out; |
4424 | } | 4798 | } |
4425 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 4799 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f18bfe37aff..a56796814d6 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -21,6 +21,7 @@ | |||
21 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 21 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
22 | { | 22 | { |
23 | struct inode *inode = filp->f_dentry->d_inode; | 23 | struct inode *inode = filp->f_dentry->d_inode; |
24 | struct super_block *sb = inode->i_sb; | ||
24 | struct ext4_inode_info *ei = EXT4_I(inode); | 25 | struct ext4_inode_info *ei = EXT4_I(inode); |
25 | unsigned int flags; | 26 | unsigned int flags; |
26 | 27 | ||
@@ -173,33 +174,8 @@ setversion_out: | |||
173 | mnt_drop_write(filp->f_path.mnt); | 174 | mnt_drop_write(filp->f_path.mnt); |
174 | return err; | 175 | return err; |
175 | } | 176 | } |
176 | #ifdef CONFIG_JBD2_DEBUG | ||
177 | case EXT4_IOC_WAIT_FOR_READONLY: | ||
178 | /* | ||
179 | * This is racy - by the time we're woken up and running, | ||
180 | * the superblock could be released. And the module could | ||
181 | * have been unloaded. So sue me. | ||
182 | * | ||
183 | * Returns 1 if it slept, else zero. | ||
184 | */ | ||
185 | { | ||
186 | struct super_block *sb = inode->i_sb; | ||
187 | DECLARE_WAITQUEUE(wait, current); | ||
188 | int ret = 0; | ||
189 | |||
190 | set_current_state(TASK_INTERRUPTIBLE); | ||
191 | add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); | ||
192 | if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) { | ||
193 | schedule(); | ||
194 | ret = 1; | ||
195 | } | ||
196 | remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); | ||
197 | return ret; | ||
198 | } | ||
199 | #endif | ||
200 | case EXT4_IOC_GROUP_EXTEND: { | 177 | case EXT4_IOC_GROUP_EXTEND: { |
201 | ext4_fsblk_t n_blocks_count; | 178 | ext4_fsblk_t n_blocks_count; |
202 | struct super_block *sb = inode->i_sb; | ||
203 | int err, err2=0; | 179 | int err, err2=0; |
204 | 180 | ||
205 | err = ext4_resize_begin(sb); | 181 | err = ext4_resize_begin(sb); |
@@ -209,6 +185,13 @@ setversion_out: | |||
209 | if (get_user(n_blocks_count, (__u32 __user *)arg)) | 185 | if (get_user(n_blocks_count, (__u32 __user *)arg)) |
210 | return -EFAULT; | 186 | return -EFAULT; |
211 | 187 | ||
188 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
189 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
190 | ext4_msg(sb, KERN_ERR, | ||
191 | "Online resizing not supported with bigalloc"); | ||
192 | return -EOPNOTSUPP; | ||
193 | } | ||
194 | |||
212 | err = mnt_want_write(filp->f_path.mnt); | 195 | err = mnt_want_write(filp->f_path.mnt); |
213 | if (err) | 196 | if (err) |
214 | return err; | 197 | return err; |
@@ -250,6 +233,13 @@ setversion_out: | |||
250 | goto mext_out; | 233 | goto mext_out; |
251 | } | 234 | } |
252 | 235 | ||
236 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
237 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
238 | ext4_msg(sb, KERN_ERR, | ||
239 | "Online defrag not supported with bigalloc"); | ||
240 | return -EOPNOTSUPP; | ||
241 | } | ||
242 | |||
253 | err = mnt_want_write(filp->f_path.mnt); | 243 | err = mnt_want_write(filp->f_path.mnt); |
254 | if (err) | 244 | if (err) |
255 | goto mext_out; | 245 | goto mext_out; |
@@ -270,7 +260,6 @@ mext_out: | |||
270 | 260 | ||
271 | case EXT4_IOC_GROUP_ADD: { | 261 | case EXT4_IOC_GROUP_ADD: { |
272 | struct ext4_new_group_data input; | 262 | struct ext4_new_group_data input; |
273 | struct super_block *sb = inode->i_sb; | ||
274 | int err, err2=0; | 263 | int err, err2=0; |
275 | 264 | ||
276 | err = ext4_resize_begin(sb); | 265 | err = ext4_resize_begin(sb); |
@@ -281,6 +270,13 @@ mext_out: | |||
281 | sizeof(input))) | 270 | sizeof(input))) |
282 | return -EFAULT; | 271 | return -EFAULT; |
283 | 272 | ||
273 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
274 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
275 | ext4_msg(sb, KERN_ERR, | ||
276 | "Online resizing not supported with bigalloc"); | ||
277 | return -EOPNOTSUPP; | ||
278 | } | ||
279 | |||
284 | err = mnt_want_write(filp->f_path.mnt); | 280 | err = mnt_want_write(filp->f_path.mnt); |
285 | if (err) | 281 | if (err) |
286 | return err; | 282 | return err; |
@@ -337,7 +333,6 @@ mext_out: | |||
337 | 333 | ||
338 | case FITRIM: | 334 | case FITRIM: |
339 | { | 335 | { |
340 | struct super_block *sb = inode->i_sb; | ||
341 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | 336 | struct request_queue *q = bdev_get_queue(sb->s_bdev); |
342 | struct fstrim_range range; | 337 | struct fstrim_range range; |
343 | int ret = 0; | 338 | int ret = 0; |
@@ -348,7 +343,14 @@ mext_out: | |||
348 | if (!blk_queue_discard(q)) | 343 | if (!blk_queue_discard(q)) |
349 | return -EOPNOTSUPP; | 344 | return -EOPNOTSUPP; |
350 | 345 | ||
351 | if (copy_from_user(&range, (struct fstrim_range *)arg, | 346 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
347 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
348 | ext4_msg(sb, KERN_ERR, | ||
349 | "FITRIM not supported with bigalloc"); | ||
350 | return -EOPNOTSUPP; | ||
351 | } | ||
352 | |||
353 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | ||
352 | sizeof(range))) | 354 | sizeof(range))) |
353 | return -EFAULT; | 355 | return -EFAULT; |
354 | 356 | ||
@@ -358,7 +360,7 @@ mext_out: | |||
358 | if (ret < 0) | 360 | if (ret < 0) |
359 | return ret; | 361 | return ret; |
360 | 362 | ||
361 | if (copy_to_user((struct fstrim_range *)arg, &range, | 363 | if (copy_to_user((struct fstrim_range __user *)arg, &range, |
362 | sizeof(range))) | 364 | sizeof(range))) |
363 | return -EFAULT; | 365 | return -EFAULT; |
364 | 366 | ||
@@ -396,11 +398,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
396 | case EXT4_IOC32_SETVERSION_OLD: | 398 | case EXT4_IOC32_SETVERSION_OLD: |
397 | cmd = EXT4_IOC_SETVERSION_OLD; | 399 | cmd = EXT4_IOC_SETVERSION_OLD; |
398 | break; | 400 | break; |
399 | #ifdef CONFIG_JBD2_DEBUG | ||
400 | case EXT4_IOC32_WAIT_FOR_READONLY: | ||
401 | cmd = EXT4_IOC_WAIT_FOR_READONLY; | ||
402 | break; | ||
403 | #endif | ||
404 | case EXT4_IOC32_GETRSVSZ: | 401 | case EXT4_IOC32_GETRSVSZ: |
405 | cmd = EXT4_IOC_GETRSVSZ; | 402 | cmd = EXT4_IOC_GETRSVSZ; |
406 | break; | 403 | break; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 17a5a57c415..e2d8be8f28b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -70,8 +70,8 @@ | |||
70 | * | 70 | * |
71 | * pa_lstart -> the logical start block for this prealloc space | 71 | * pa_lstart -> the logical start block for this prealloc space |
72 | * pa_pstart -> the physical start block for this prealloc space | 72 | * pa_pstart -> the physical start block for this prealloc space |
73 | * pa_len -> length for this prealloc space | 73 | * pa_len -> length for this prealloc space (in clusters) |
74 | * pa_free -> free space available in this prealloc space | 74 | * pa_free -> free space available in this prealloc space (in clusters) |
75 | * | 75 | * |
76 | * The inode preallocation space is used looking at the _logical_ start | 76 | * The inode preallocation space is used looking at the _logical_ start |
77 | * block. If only the logical file block falls within the range of prealloc | 77 | * block. If only the logical file block falls within the range of prealloc |
@@ -126,7 +126,8 @@ | |||
126 | * list. In case of inode preallocation we follow a list of heuristics | 126 | * list. In case of inode preallocation we follow a list of heuristics |
127 | * based on file size. This can be found in ext4_mb_normalize_request. If | 127 | * based on file size. This can be found in ext4_mb_normalize_request. If |
128 | * we are doing a group prealloc we try to normalize the request to | 128 | * we are doing a group prealloc we try to normalize the request to |
129 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is | 129 | * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is |
130 | * dependent on the cluster size; for non-bigalloc file systems, it is | ||
130 | * 512 blocks. This can be tuned via | 131 | * 512 blocks. This can be tuned via |
131 | * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in | 132 | * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in |
132 | * terms of number of blocks. If we have mounted the file system with -O | 133 | * terms of number of blocks. If we have mounted the file system with -O |
@@ -459,7 +460,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
459 | ext4_fsblk_t blocknr; | 460 | ext4_fsblk_t blocknr; |
460 | 461 | ||
461 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 462 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
462 | blocknr += first + i; | 463 | blocknr += EXT4_C2B(EXT4_SB(sb), first + i); |
463 | ext4_grp_locked_error(sb, e4b->bd_group, | 464 | ext4_grp_locked_error(sb, e4b->bd_group, |
464 | inode ? inode->i_ino : 0, | 465 | inode ? inode->i_ino : 0, |
465 | blocknr, | 466 | blocknr, |
@@ -580,7 +581,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
580 | continue; | 581 | continue; |
581 | } | 582 | } |
582 | 583 | ||
583 | /* both bits in buddy2 must be 0 */ | 584 | /* both bits in buddy2 must be 1 */ |
584 | MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); | 585 | MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); |
585 | MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); | 586 | MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); |
586 | 587 | ||
@@ -653,7 +654,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, | |||
653 | ext4_grpblk_t chunk; | 654 | ext4_grpblk_t chunk; |
654 | unsigned short border; | 655 | unsigned short border; |
655 | 656 | ||
656 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); | 657 | BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); |
657 | 658 | ||
658 | border = 2 << sb->s_blocksize_bits; | 659 | border = 2 << sb->s_blocksize_bits; |
659 | 660 | ||
@@ -705,7 +706,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
705 | void *buddy, void *bitmap, ext4_group_t group) | 706 | void *buddy, void *bitmap, ext4_group_t group) |
706 | { | 707 | { |
707 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 708 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
708 | ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); | 709 | ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); |
709 | ext4_grpblk_t i = 0; | 710 | ext4_grpblk_t i = 0; |
710 | ext4_grpblk_t first; | 711 | ext4_grpblk_t first; |
711 | ext4_grpblk_t len; | 712 | ext4_grpblk_t len; |
@@ -734,7 +735,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
734 | 735 | ||
735 | if (free != grp->bb_free) { | 736 | if (free != grp->bb_free) { |
736 | ext4_grp_locked_error(sb, group, 0, 0, | 737 | ext4_grp_locked_error(sb, group, 0, 0, |
737 | "%u blocks in bitmap, %u in gd", | 738 | "%u clusters in bitmap, %u in gd", |
738 | free, grp->bb_free); | 739 | free, grp->bb_free); |
739 | /* | 740 | /* |
740 | * If we intent to continue, we consider group descritor | 741 | * If we intent to continue, we consider group descritor |
@@ -1339,7 +1340,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1339 | ext4_fsblk_t blocknr; | 1340 | ext4_fsblk_t blocknr; |
1340 | 1341 | ||
1341 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 1342 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
1342 | blocknr += block; | 1343 | blocknr += EXT4_C2B(EXT4_SB(sb), block); |
1343 | ext4_grp_locked_error(sb, e4b->bd_group, | 1344 | ext4_grp_locked_error(sb, e4b->bd_group, |
1344 | inode ? inode->i_ino : 0, | 1345 | inode ? inode->i_ino : 0, |
1345 | blocknr, | 1346 | blocknr, |
@@ -1390,7 +1391,6 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
1390 | { | 1391 | { |
1391 | int next = block; | 1392 | int next = block; |
1392 | int max; | 1393 | int max; |
1393 | int ord; | ||
1394 | void *buddy; | 1394 | void *buddy; |
1395 | 1395 | ||
1396 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); | 1396 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
@@ -1432,9 +1432,8 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
1432 | if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) | 1432 | if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) |
1433 | break; | 1433 | break; |
1434 | 1434 | ||
1435 | ord = mb_find_order_for_block(e4b, next); | 1435 | order = mb_find_order_for_block(e4b, next); |
1436 | 1436 | ||
1437 | order = ord; | ||
1438 | block = next >> order; | 1437 | block = next >> order; |
1439 | ex->fe_len += 1 << order; | 1438 | ex->fe_len += 1 << order; |
1440 | } | 1439 | } |
@@ -1624,8 +1623,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac, | |||
1624 | struct ext4_free_extent *gex = &ac->ac_g_ex; | 1623 | struct ext4_free_extent *gex = &ac->ac_g_ex; |
1625 | 1624 | ||
1626 | BUG_ON(ex->fe_len <= 0); | 1625 | BUG_ON(ex->fe_len <= 0); |
1627 | BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | 1626 | BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); |
1628 | BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | 1627 | BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); |
1629 | BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); | 1628 | BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); |
1630 | 1629 | ||
1631 | ac->ac_found++; | 1630 | ac->ac_found++; |
@@ -1823,15 +1822,15 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1823 | 1822 | ||
1824 | while (free && ac->ac_status == AC_STATUS_CONTINUE) { | 1823 | while (free && ac->ac_status == AC_STATUS_CONTINUE) { |
1825 | i = mb_find_next_zero_bit(bitmap, | 1824 | i = mb_find_next_zero_bit(bitmap, |
1826 | EXT4_BLOCKS_PER_GROUP(sb), i); | 1825 | EXT4_CLUSTERS_PER_GROUP(sb), i); |
1827 | if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { | 1826 | if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) { |
1828 | /* | 1827 | /* |
1829 | * IF we have corrupt bitmap, we won't find any | 1828 | * IF we have corrupt bitmap, we won't find any |
1830 | * free blocks even though group info says we | 1829 | * free blocks even though group info says we |
1831 | * we have free blocks | 1830 | * we have free blocks |
1832 | */ | 1831 | */ |
1833 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, | 1832 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
1834 | "%d free blocks as per " | 1833 | "%d free clusters as per " |
1835 | "group info. But bitmap says 0", | 1834 | "group info. But bitmap says 0", |
1836 | free); | 1835 | free); |
1837 | break; | 1836 | break; |
@@ -1841,7 +1840,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1841 | BUG_ON(ex.fe_len <= 0); | 1840 | BUG_ON(ex.fe_len <= 0); |
1842 | if (free < ex.fe_len) { | 1841 | if (free < ex.fe_len) { |
1843 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, | 1842 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
1844 | "%d free blocks as per " | 1843 | "%d free clusters as per " |
1845 | "group info. But got %d blocks", | 1844 | "group info. But got %d blocks", |
1846 | free, ex.fe_len); | 1845 | free, ex.fe_len); |
1847 | /* | 1846 | /* |
@@ -1887,7 +1886,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1887 | do_div(a, sbi->s_stripe); | 1886 | do_div(a, sbi->s_stripe); |
1888 | i = (a * sbi->s_stripe) - first_group_block; | 1887 | i = (a * sbi->s_stripe) - first_group_block; |
1889 | 1888 | ||
1890 | while (i < EXT4_BLOCKS_PER_GROUP(sb)) { | 1889 | while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { |
1891 | if (!mb_test_bit(i, bitmap)) { | 1890 | if (!mb_test_bit(i, bitmap)) { |
1892 | max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); | 1891 | max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); |
1893 | if (max >= sbi->s_stripe) { | 1892 | if (max >= sbi->s_stripe) { |
@@ -2252,10 +2251,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2252 | */ | 2251 | */ |
2253 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 2252 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
2254 | meta_group_info[i]->bb_free = | 2253 | meta_group_info[i]->bb_free = |
2255 | ext4_free_blocks_after_init(sb, group, desc); | 2254 | ext4_free_clusters_after_init(sb, group, desc); |
2256 | } else { | 2255 | } else { |
2257 | meta_group_info[i]->bb_free = | 2256 | meta_group_info[i]->bb_free = |
2258 | ext4_free_blks_count(sb, desc); | 2257 | ext4_free_group_clusters(sb, desc); |
2259 | } | 2258 | } |
2260 | 2259 | ||
2261 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2260 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
@@ -2473,7 +2472,20 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2473 | sbi->s_mb_stats = MB_DEFAULT_STATS; | 2472 | sbi->s_mb_stats = MB_DEFAULT_STATS; |
2474 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; | 2473 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; |
2475 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; | 2474 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; |
2476 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2475 | /* |
2476 | * The default group preallocation is 512, which for 4k block | ||
2477 | * sizes translates to 2 megabytes. However for bigalloc file | ||
2478 | * systems, this is probably too big (i.e, if the cluster size | ||
2479 | * is 1 megabyte, then group preallocation size becomes half a | ||
2480 | * gigabyte!). As a default, we will keep a two megabyte | ||
2481 | * group pralloc size for cluster sizes up to 64k, and after | ||
2482 | * that, we will force a minimum group preallocation size of | ||
2483 | * 32 clusters. This translates to 8 megs when the cluster | ||
2484 | * size is 256k, and 32 megs when the cluster size is 1 meg, | ||
2485 | * which seems reasonable as a default. | ||
2486 | */ | ||
2487 | sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >> | ||
2488 | sbi->s_cluster_bits, 32); | ||
2477 | /* | 2489 | /* |
2478 | * If there is a s_stripe > 1, then we set the s_mb_group_prealloc | 2490 | * If there is a s_stripe > 1, then we set the s_mb_group_prealloc |
2479 | * to the lowest multiple of s_stripe which is bigger than | 2491 | * to the lowest multiple of s_stripe which is bigger than |
@@ -2490,7 +2502,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2490 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2502 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2491 | if (sbi->s_locality_groups == NULL) { | 2503 | if (sbi->s_locality_groups == NULL) { |
2492 | ret = -ENOMEM; | 2504 | ret = -ENOMEM; |
2493 | goto out; | 2505 | goto out_free_groupinfo_slab; |
2494 | } | 2506 | } |
2495 | for_each_possible_cpu(i) { | 2507 | for_each_possible_cpu(i) { |
2496 | struct ext4_locality_group *lg; | 2508 | struct ext4_locality_group *lg; |
@@ -2503,9 +2515,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2503 | 2515 | ||
2504 | /* init file for buddy data */ | 2516 | /* init file for buddy data */ |
2505 | ret = ext4_mb_init_backend(sb); | 2517 | ret = ext4_mb_init_backend(sb); |
2506 | if (ret != 0) { | 2518 | if (ret != 0) |
2507 | goto out; | 2519 | goto out_free_locality_groups; |
2508 | } | ||
2509 | 2520 | ||
2510 | if (sbi->s_proc) | 2521 | if (sbi->s_proc) |
2511 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | 2522 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
@@ -2513,11 +2524,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2513 | 2524 | ||
2514 | if (sbi->s_journal) | 2525 | if (sbi->s_journal) |
2515 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2526 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2527 | |||
2528 | return 0; | ||
2529 | |||
2530 | out_free_locality_groups: | ||
2531 | free_percpu(sbi->s_locality_groups); | ||
2532 | sbi->s_locality_groups = NULL; | ||
2533 | out_free_groupinfo_slab: | ||
2534 | ext4_groupinfo_destroy_slabs(); | ||
2516 | out: | 2535 | out: |
2517 | if (ret) { | 2536 | kfree(sbi->s_mb_offsets); |
2518 | kfree(sbi->s_mb_offsets); | 2537 | sbi->s_mb_offsets = NULL; |
2519 | kfree(sbi->s_mb_maxs); | 2538 | kfree(sbi->s_mb_maxs); |
2520 | } | 2539 | sbi->s_mb_maxs = NULL; |
2521 | return ret; | 2540 | return ret; |
2522 | } | 2541 | } |
2523 | 2542 | ||
@@ -2602,11 +2621,13 @@ int ext4_mb_release(struct super_block *sb) | |||
2602 | } | 2621 | } |
2603 | 2622 | ||
2604 | static inline int ext4_issue_discard(struct super_block *sb, | 2623 | static inline int ext4_issue_discard(struct super_block *sb, |
2605 | ext4_group_t block_group, ext4_grpblk_t block, int count) | 2624 | ext4_group_t block_group, ext4_grpblk_t cluster, int count) |
2606 | { | 2625 | { |
2607 | ext4_fsblk_t discard_block; | 2626 | ext4_fsblk_t discard_block; |
2608 | 2627 | ||
2609 | discard_block = block + ext4_group_first_block_no(sb, block_group); | 2628 | discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) + |
2629 | ext4_group_first_block_no(sb, block_group)); | ||
2630 | count = EXT4_C2B(EXT4_SB(sb), count); | ||
2610 | trace_ext4_discard_blocks(sb, | 2631 | trace_ext4_discard_blocks(sb, |
2611 | (unsigned long long) discard_block, count); | 2632 | (unsigned long long) discard_block, count); |
2612 | return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); | 2633 | return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
@@ -2633,7 +2654,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2633 | 2654 | ||
2634 | if (test_opt(sb, DISCARD)) | 2655 | if (test_opt(sb, DISCARD)) |
2635 | ext4_issue_discard(sb, entry->group, | 2656 | ext4_issue_discard(sb, entry->group, |
2636 | entry->start_blk, entry->count); | 2657 | entry->start_cluster, entry->count); |
2637 | 2658 | ||
2638 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2659 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2639 | /* we expect to find existing buddy because it's pinned */ | 2660 | /* we expect to find existing buddy because it's pinned */ |
@@ -2646,7 +2667,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2646 | ext4_lock_group(sb, entry->group); | 2667 | ext4_lock_group(sb, entry->group); |
2647 | /* Take it out of per group rb tree */ | 2668 | /* Take it out of per group rb tree */ |
2648 | rb_erase(&entry->node, &(db->bb_free_root)); | 2669 | rb_erase(&entry->node, &(db->bb_free_root)); |
2649 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); | 2670 | mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); |
2650 | 2671 | ||
2651 | /* | 2672 | /* |
2652 | * Clear the trimmed flag for the group so that the next | 2673 | * Clear the trimmed flag for the group so that the next |
@@ -2752,7 +2773,7 @@ void ext4_exit_mballoc(void) | |||
2752 | */ | 2773 | */ |
2753 | static noinline_for_stack int | 2774 | static noinline_for_stack int |
2754 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2775 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
2755 | handle_t *handle, unsigned int reserv_blks) | 2776 | handle_t *handle, unsigned int reserv_clstrs) |
2756 | { | 2777 | { |
2757 | struct buffer_head *bitmap_bh = NULL; | 2778 | struct buffer_head *bitmap_bh = NULL; |
2758 | struct ext4_group_desc *gdp; | 2779 | struct ext4_group_desc *gdp; |
@@ -2783,7 +2804,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2783 | goto out_err; | 2804 | goto out_err; |
2784 | 2805 | ||
2785 | ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, | 2806 | ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, |
2786 | ext4_free_blks_count(sb, gdp)); | 2807 | ext4_free_group_clusters(sb, gdp)); |
2787 | 2808 | ||
2788 | err = ext4_journal_get_write_access(handle, gdp_bh); | 2809 | err = ext4_journal_get_write_access(handle, gdp_bh); |
2789 | if (err) | 2810 | if (err) |
@@ -2791,7 +2812,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2791 | 2812 | ||
2792 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); | 2813 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); |
2793 | 2814 | ||
2794 | len = ac->ac_b_ex.fe_len; | 2815 | len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
2795 | if (!ext4_data_block_valid(sbi, block, len)) { | 2816 | if (!ext4_data_block_valid(sbi, block, len)) { |
2796 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " | 2817 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " |
2797 | "fs metadata\n", block, block+len); | 2818 | "fs metadata\n", block, block+len); |
@@ -2823,28 +2844,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2823 | ac->ac_b_ex.fe_len); | 2844 | ac->ac_b_ex.fe_len); |
2824 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 2845 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
2825 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 2846 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
2826 | ext4_free_blks_set(sb, gdp, | 2847 | ext4_free_group_clusters_set(sb, gdp, |
2827 | ext4_free_blocks_after_init(sb, | 2848 | ext4_free_clusters_after_init(sb, |
2828 | ac->ac_b_ex.fe_group, gdp)); | 2849 | ac->ac_b_ex.fe_group, gdp)); |
2829 | } | 2850 | } |
2830 | len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; | 2851 | len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; |
2831 | ext4_free_blks_set(sb, gdp, len); | 2852 | ext4_free_group_clusters_set(sb, gdp, len); |
2832 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2853 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
2833 | 2854 | ||
2834 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | 2855 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); |
2835 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | 2856 | percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); |
2836 | /* | 2857 | /* |
2837 | * Now reduce the dirty block count also. Should not go negative | 2858 | * Now reduce the dirty block count also. Should not go negative |
2838 | */ | 2859 | */ |
2839 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2860 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
2840 | /* release all the reserved blocks if non delalloc */ | 2861 | /* release all the reserved blocks if non delalloc */ |
2841 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); | 2862 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
2863 | reserv_clstrs); | ||
2842 | 2864 | ||
2843 | if (sbi->s_log_groups_per_flex) { | 2865 | if (sbi->s_log_groups_per_flex) { |
2844 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2866 | ext4_group_t flex_group = ext4_flex_group(sbi, |
2845 | ac->ac_b_ex.fe_group); | 2867 | ac->ac_b_ex.fe_group); |
2846 | atomic_sub(ac->ac_b_ex.fe_len, | 2868 | atomic_sub(ac->ac_b_ex.fe_len, |
2847 | &sbi->s_flex_groups[flex_group].free_blocks); | 2869 | &sbi->s_flex_groups[flex_group].free_clusters); |
2848 | } | 2870 | } |
2849 | 2871 | ||
2850 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 2872 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
@@ -2886,6 +2908,7 @@ static noinline_for_stack void | |||
2886 | ext4_mb_normalize_request(struct ext4_allocation_context *ac, | 2908 | ext4_mb_normalize_request(struct ext4_allocation_context *ac, |
2887 | struct ext4_allocation_request *ar) | 2909 | struct ext4_allocation_request *ar) |
2888 | { | 2910 | { |
2911 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
2889 | int bsbits, max; | 2912 | int bsbits, max; |
2890 | ext4_lblk_t end; | 2913 | ext4_lblk_t end; |
2891 | loff_t size, orig_size, start_off; | 2914 | loff_t size, orig_size, start_off; |
@@ -2916,7 +2939,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2916 | 2939 | ||
2917 | /* first, let's learn actual file size | 2940 | /* first, let's learn actual file size |
2918 | * given current request is allocated */ | 2941 | * given current request is allocated */ |
2919 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | 2942 | size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len); |
2920 | size = size << bsbits; | 2943 | size = size << bsbits; |
2921 | if (size < i_size_read(ac->ac_inode)) | 2944 | if (size < i_size_read(ac->ac_inode)) |
2922 | size = i_size_read(ac->ac_inode); | 2945 | size = i_size_read(ac->ac_inode); |
@@ -2988,7 +3011,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2988 | continue; | 3011 | continue; |
2989 | } | 3012 | } |
2990 | 3013 | ||
2991 | pa_end = pa->pa_lstart + pa->pa_len; | 3014 | pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb), |
3015 | pa->pa_len); | ||
2992 | 3016 | ||
2993 | /* PA must not overlap original request */ | 3017 | /* PA must not overlap original request */ |
2994 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || | 3018 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || |
@@ -3018,9 +3042,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3018 | rcu_read_lock(); | 3042 | rcu_read_lock(); |
3019 | list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { | 3043 | list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { |
3020 | ext4_lblk_t pa_end; | 3044 | ext4_lblk_t pa_end; |
3045 | |||
3021 | spin_lock(&pa->pa_lock); | 3046 | spin_lock(&pa->pa_lock); |
3022 | if (pa->pa_deleted == 0) { | 3047 | if (pa->pa_deleted == 0) { |
3023 | pa_end = pa->pa_lstart + pa->pa_len; | 3048 | pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb), |
3049 | pa->pa_len); | ||
3024 | BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); | 3050 | BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); |
3025 | } | 3051 | } |
3026 | spin_unlock(&pa->pa_lock); | 3052 | spin_unlock(&pa->pa_lock); |
@@ -3036,14 +3062,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3036 | } | 3062 | } |
3037 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && | 3063 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && |
3038 | start > ac->ac_o_ex.fe_logical); | 3064 | start > ac->ac_o_ex.fe_logical); |
3039 | BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | 3065 | BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); |
3040 | 3066 | ||
3041 | /* now prepare goal request */ | 3067 | /* now prepare goal request */ |
3042 | 3068 | ||
3043 | /* XXX: is it better to align blocks WRT to logical | 3069 | /* XXX: is it better to align blocks WRT to logical |
3044 | * placement or satisfy big request as is */ | 3070 | * placement or satisfy big request as is */ |
3045 | ac->ac_g_ex.fe_logical = start; | 3071 | ac->ac_g_ex.fe_logical = start; |
3046 | ac->ac_g_ex.fe_len = size; | 3072 | ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size); |
3047 | 3073 | ||
3048 | /* define goal start in order to merge */ | 3074 | /* define goal start in order to merge */ |
3049 | if (ar->pright && (ar->lright == (start + size))) { | 3075 | if (ar->pright && (ar->lright == (start + size))) { |
@@ -3112,14 +3138,16 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) | |||
3112 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | 3138 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, |
3113 | struct ext4_prealloc_space *pa) | 3139 | struct ext4_prealloc_space *pa) |
3114 | { | 3140 | { |
3141 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
3115 | ext4_fsblk_t start; | 3142 | ext4_fsblk_t start; |
3116 | ext4_fsblk_t end; | 3143 | ext4_fsblk_t end; |
3117 | int len; | 3144 | int len; |
3118 | 3145 | ||
3119 | /* found preallocated blocks, use them */ | 3146 | /* found preallocated blocks, use them */ |
3120 | start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); | 3147 | start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); |
3121 | end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); | 3148 | end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len), |
3122 | len = end - start; | 3149 | start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len)); |
3150 | len = EXT4_NUM_B2C(sbi, end - start); | ||
3123 | ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, | 3151 | ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, |
3124 | &ac->ac_b_ex.fe_start); | 3152 | &ac->ac_b_ex.fe_start); |
3125 | ac->ac_b_ex.fe_len = len; | 3153 | ac->ac_b_ex.fe_len = len; |
@@ -3127,7 +3155,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | |||
3127 | ac->ac_pa = pa; | 3155 | ac->ac_pa = pa; |
3128 | 3156 | ||
3129 | BUG_ON(start < pa->pa_pstart); | 3157 | BUG_ON(start < pa->pa_pstart); |
3130 | BUG_ON(start + len > pa->pa_pstart + pa->pa_len); | 3158 | BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len)); |
3131 | BUG_ON(pa->pa_free < len); | 3159 | BUG_ON(pa->pa_free < len); |
3132 | pa->pa_free -= len; | 3160 | pa->pa_free -= len; |
3133 | 3161 | ||
@@ -3193,6 +3221,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | |||
3193 | static noinline_for_stack int | 3221 | static noinline_for_stack int |
3194 | ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | 3222 | ext4_mb_use_preallocated(struct ext4_allocation_context *ac) |
3195 | { | 3223 | { |
3224 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
3196 | int order, i; | 3225 | int order, i; |
3197 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3226 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
3198 | struct ext4_locality_group *lg; | 3227 | struct ext4_locality_group *lg; |
@@ -3210,12 +3239,14 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3210 | /* all fields in this condition don't change, | 3239 | /* all fields in this condition don't change, |
3211 | * so we can skip locking for them */ | 3240 | * so we can skip locking for them */ |
3212 | if (ac->ac_o_ex.fe_logical < pa->pa_lstart || | 3241 | if (ac->ac_o_ex.fe_logical < pa->pa_lstart || |
3213 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) | 3242 | ac->ac_o_ex.fe_logical >= (pa->pa_lstart + |
3243 | EXT4_C2B(sbi, pa->pa_len))) | ||
3214 | continue; | 3244 | continue; |
3215 | 3245 | ||
3216 | /* non-extent files can't have physical blocks past 2^32 */ | 3246 | /* non-extent files can't have physical blocks past 2^32 */ |
3217 | if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && | 3247 | if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && |
3218 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) | 3248 | (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) > |
3249 | EXT4_MAX_BLOCK_FILE_PHYS)) | ||
3219 | continue; | 3250 | continue; |
3220 | 3251 | ||
3221 | /* found preallocated blocks, use them */ | 3252 | /* found preallocated blocks, use them */ |
@@ -3291,7 +3322,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3291 | 3322 | ||
3292 | while (n) { | 3323 | while (n) { |
3293 | entry = rb_entry(n, struct ext4_free_data, node); | 3324 | entry = rb_entry(n, struct ext4_free_data, node); |
3294 | ext4_set_bits(bitmap, entry->start_blk, entry->count); | 3325 | ext4_set_bits(bitmap, entry->start_cluster, entry->count); |
3295 | n = rb_next(n); | 3326 | n = rb_next(n); |
3296 | } | 3327 | } |
3297 | return; | 3328 | return; |
@@ -3312,7 +3343,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3312 | ext4_group_t groupnr; | 3343 | ext4_group_t groupnr; |
3313 | ext4_grpblk_t start; | 3344 | ext4_grpblk_t start; |
3314 | int preallocated = 0; | 3345 | int preallocated = 0; |
3315 | int count = 0; | ||
3316 | int len; | 3346 | int len; |
3317 | 3347 | ||
3318 | /* all form of preallocation discards first load group, | 3348 | /* all form of preallocation discards first load group, |
@@ -3335,7 +3365,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3335 | BUG_ON(groupnr != group); | 3365 | BUG_ON(groupnr != group); |
3336 | ext4_set_bits(bitmap, start, len); | 3366 | ext4_set_bits(bitmap, start, len); |
3337 | preallocated += len; | 3367 | preallocated += len; |
3338 | count++; | ||
3339 | } | 3368 | } |
3340 | mb_debug(1, "prellocated %u for group %u\n", preallocated, group); | 3369 | mb_debug(1, "prellocated %u for group %u\n", preallocated, group); |
3341 | } | 3370 | } |
@@ -3412,6 +3441,7 @@ static noinline_for_stack int | |||
3412 | ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | 3441 | ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) |
3413 | { | 3442 | { |
3414 | struct super_block *sb = ac->ac_sb; | 3443 | struct super_block *sb = ac->ac_sb; |
3444 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
3415 | struct ext4_prealloc_space *pa; | 3445 | struct ext4_prealloc_space *pa; |
3416 | struct ext4_group_info *grp; | 3446 | struct ext4_group_info *grp; |
3417 | struct ext4_inode_info *ei; | 3447 | struct ext4_inode_info *ei; |
@@ -3443,16 +3473,18 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
3443 | winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; | 3473 | winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; |
3444 | 3474 | ||
3445 | /* also, we should cover whole original request */ | 3475 | /* also, we should cover whole original request */ |
3446 | wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; | 3476 | wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len); |
3447 | 3477 | ||
3448 | /* the smallest one defines real window */ | 3478 | /* the smallest one defines real window */ |
3449 | win = min(winl, wins); | 3479 | win = min(winl, wins); |
3450 | 3480 | ||
3451 | offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; | 3481 | offs = ac->ac_o_ex.fe_logical % |
3482 | EXT4_C2B(sbi, ac->ac_b_ex.fe_len); | ||
3452 | if (offs && offs < win) | 3483 | if (offs && offs < win) |
3453 | win = offs; | 3484 | win = offs; |
3454 | 3485 | ||
3455 | ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; | 3486 | ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - |
3487 | EXT4_B2C(sbi, win); | ||
3456 | BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); | 3488 | BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); |
3457 | BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); | 3489 | BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); |
3458 | } | 3490 | } |
@@ -3477,7 +3509,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
3477 | trace_ext4_mb_new_inode_pa(ac, pa); | 3509 | trace_ext4_mb_new_inode_pa(ac, pa); |
3478 | 3510 | ||
3479 | ext4_mb_use_inode_pa(ac, pa); | 3511 | ext4_mb_use_inode_pa(ac, pa); |
3480 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); | 3512 | atomic_add(pa->pa_free, &sbi->s_mb_preallocated); |
3481 | 3513 | ||
3482 | ei = EXT4_I(ac->ac_inode); | 3514 | ei = EXT4_I(ac->ac_inode); |
3483 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); | 3515 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); |
@@ -3592,7 +3624,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3592 | 3624 | ||
3593 | BUG_ON(pa->pa_deleted == 0); | 3625 | BUG_ON(pa->pa_deleted == 0); |
3594 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3626 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3595 | grp_blk_start = pa->pa_pstart - bit; | 3627 | grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit); |
3596 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3628 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3597 | end = bit + pa->pa_len; | 3629 | end = bit + pa->pa_len; |
3598 | 3630 | ||
@@ -3607,7 +3639,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3607 | free += next - bit; | 3639 | free += next - bit; |
3608 | 3640 | ||
3609 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); | 3641 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3610 | trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, | 3642 | trace_ext4_mb_release_inode_pa(pa, (grp_blk_start + |
3643 | EXT4_C2B(sbi, bit)), | ||
3611 | next - bit); | 3644 | next - bit); |
3612 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3645 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3613 | bit = next + 1; | 3646 | bit = next + 1; |
@@ -3690,7 +3723,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3690 | } | 3723 | } |
3691 | 3724 | ||
3692 | if (needed == 0) | 3725 | if (needed == 0) |
3693 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | 3726 | needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; |
3694 | 3727 | ||
3695 | INIT_LIST_HEAD(&list); | 3728 | INIT_LIST_HEAD(&list); |
3696 | repeat: | 3729 | repeat: |
@@ -3958,7 +3991,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
3958 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | 3991 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) |
3959 | return; | 3992 | return; |
3960 | 3993 | ||
3961 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | 3994 | size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len); |
3962 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) | 3995 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) |
3963 | >> bsbits; | 3996 | >> bsbits; |
3964 | 3997 | ||
@@ -3969,6 +4002,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
3969 | return; | 4002 | return; |
3970 | } | 4003 | } |
3971 | 4004 | ||
4005 | if (sbi->s_mb_group_prealloc <= 0) { | ||
4006 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; | ||
4007 | return; | ||
4008 | } | ||
4009 | |||
3972 | /* don't use group allocation for large files */ | 4010 | /* don't use group allocation for large files */ |
3973 | size = max(size, isize); | 4011 | size = max(size, isize); |
3974 | if (size > sbi->s_mb_stream_request) { | 4012 | if (size > sbi->s_mb_stream_request) { |
@@ -4007,8 +4045,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4007 | len = ar->len; | 4045 | len = ar->len; |
4008 | 4046 | ||
4009 | /* just a dirty hack to filter too big requests */ | 4047 | /* just a dirty hack to filter too big requests */ |
4010 | if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10) | 4048 | if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10) |
4011 | len = EXT4_BLOCKS_PER_GROUP(sb) - 10; | 4049 | len = EXT4_CLUSTERS_PER_GROUP(sb) - 10; |
4012 | 4050 | ||
4013 | /* start searching from the goal */ | 4051 | /* start searching from the goal */ |
4014 | goal = ar->goal; | 4052 | goal = ar->goal; |
@@ -4019,18 +4057,15 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4019 | 4057 | ||
4020 | /* set up allocation goals */ | 4058 | /* set up allocation goals */ |
4021 | memset(ac, 0, sizeof(struct ext4_allocation_context)); | 4059 | memset(ac, 0, sizeof(struct ext4_allocation_context)); |
4022 | ac->ac_b_ex.fe_logical = ar->logical; | 4060 | ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); |
4023 | ac->ac_status = AC_STATUS_CONTINUE; | 4061 | ac->ac_status = AC_STATUS_CONTINUE; |
4024 | ac->ac_sb = sb; | 4062 | ac->ac_sb = sb; |
4025 | ac->ac_inode = ar->inode; | 4063 | ac->ac_inode = ar->inode; |
4026 | ac->ac_o_ex.fe_logical = ar->logical; | 4064 | ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical; |
4027 | ac->ac_o_ex.fe_group = group; | 4065 | ac->ac_o_ex.fe_group = group; |
4028 | ac->ac_o_ex.fe_start = block; | 4066 | ac->ac_o_ex.fe_start = block; |
4029 | ac->ac_o_ex.fe_len = len; | 4067 | ac->ac_o_ex.fe_len = len; |
4030 | ac->ac_g_ex.fe_logical = ar->logical; | 4068 | ac->ac_g_ex = ac->ac_o_ex; |
4031 | ac->ac_g_ex.fe_group = group; | ||
4032 | ac->ac_g_ex.fe_start = block; | ||
4033 | ac->ac_g_ex.fe_len = len; | ||
4034 | ac->ac_flags = ar->flags; | 4069 | ac->ac_flags = ar->flags; |
4035 | 4070 | ||
4036 | /* we have to define context: we'll we work with a file or | 4071 | /* we have to define context: we'll we work with a file or |
@@ -4182,13 +4217,14 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) | |||
4182 | */ | 4217 | */ |
4183 | static int ext4_mb_release_context(struct ext4_allocation_context *ac) | 4218 | static int ext4_mb_release_context(struct ext4_allocation_context *ac) |
4184 | { | 4219 | { |
4220 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
4185 | struct ext4_prealloc_space *pa = ac->ac_pa; | 4221 | struct ext4_prealloc_space *pa = ac->ac_pa; |
4186 | if (pa) { | 4222 | if (pa) { |
4187 | if (pa->pa_type == MB_GROUP_PA) { | 4223 | if (pa->pa_type == MB_GROUP_PA) { |
4188 | /* see comment in ext4_mb_use_group_pa() */ | 4224 | /* see comment in ext4_mb_use_group_pa() */ |
4189 | spin_lock(&pa->pa_lock); | 4225 | spin_lock(&pa->pa_lock); |
4190 | pa->pa_pstart += ac->ac_b_ex.fe_len; | 4226 | pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
4191 | pa->pa_lstart += ac->ac_b_ex.fe_len; | 4227 | pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
4192 | pa->pa_free -= ac->ac_b_ex.fe_len; | 4228 | pa->pa_free -= ac->ac_b_ex.fe_len; |
4193 | pa->pa_len -= ac->ac_b_ex.fe_len; | 4229 | pa->pa_len -= ac->ac_b_ex.fe_len; |
4194 | spin_unlock(&pa->pa_lock); | 4230 | spin_unlock(&pa->pa_lock); |
@@ -4249,13 +4285,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4249 | struct super_block *sb; | 4285 | struct super_block *sb; |
4250 | ext4_fsblk_t block = 0; | 4286 | ext4_fsblk_t block = 0; |
4251 | unsigned int inquota = 0; | 4287 | unsigned int inquota = 0; |
4252 | unsigned int reserv_blks = 0; | 4288 | unsigned int reserv_clstrs = 0; |
4253 | 4289 | ||
4254 | sb = ar->inode->i_sb; | 4290 | sb = ar->inode->i_sb; |
4255 | sbi = EXT4_SB(sb); | 4291 | sbi = EXT4_SB(sb); |
4256 | 4292 | ||
4257 | trace_ext4_request_blocks(ar); | 4293 | trace_ext4_request_blocks(ar); |
4258 | 4294 | ||
4295 | /* Allow to use superuser reservation for quota file */ | ||
4296 | if (IS_NOQUOTA(ar->inode)) | ||
4297 | ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; | ||
4298 | |||
4259 | /* | 4299 | /* |
4260 | * For delayed allocation, we could skip the ENOSPC and | 4300 | * For delayed allocation, we could skip the ENOSPC and |
4261 | * EDQUOT check, as blocks and quotas have been already | 4301 | * EDQUOT check, as blocks and quotas have been already |
@@ -4269,7 +4309,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4269 | * and verify allocation doesn't exceed the quota limits. | 4309 | * and verify allocation doesn't exceed the quota limits. |
4270 | */ | 4310 | */ |
4271 | while (ar->len && | 4311 | while (ar->len && |
4272 | ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { | 4312 | ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { |
4273 | 4313 | ||
4274 | /* let others to free the space */ | 4314 | /* let others to free the space */ |
4275 | yield(); | 4315 | yield(); |
@@ -4279,12 +4319,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4279 | *errp = -ENOSPC; | 4319 | *errp = -ENOSPC; |
4280 | return 0; | 4320 | return 0; |
4281 | } | 4321 | } |
4282 | reserv_blks = ar->len; | 4322 | reserv_clstrs = ar->len; |
4283 | if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { | 4323 | if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { |
4284 | dquot_alloc_block_nofail(ar->inode, ar->len); | 4324 | dquot_alloc_block_nofail(ar->inode, |
4325 | EXT4_C2B(sbi, ar->len)); | ||
4285 | } else { | 4326 | } else { |
4286 | while (ar->len && | 4327 | while (ar->len && |
4287 | dquot_alloc_block(ar->inode, ar->len)) { | 4328 | dquot_alloc_block(ar->inode, |
4329 | EXT4_C2B(sbi, ar->len))) { | ||
4288 | 4330 | ||
4289 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4331 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
4290 | ar->len--; | 4332 | ar->len--; |
@@ -4328,7 +4370,7 @@ repeat: | |||
4328 | ext4_mb_new_preallocation(ac); | 4370 | ext4_mb_new_preallocation(ac); |
4329 | } | 4371 | } |
4330 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4372 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4331 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); | 4373 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); |
4332 | if (*errp == -EAGAIN) { | 4374 | if (*errp == -EAGAIN) { |
4333 | /* | 4375 | /* |
4334 | * drop the reference that we took | 4376 | * drop the reference that we took |
@@ -4364,13 +4406,13 @@ out: | |||
4364 | if (ac) | 4406 | if (ac) |
4365 | kmem_cache_free(ext4_ac_cachep, ac); | 4407 | kmem_cache_free(ext4_ac_cachep, ac); |
4366 | if (inquota && ar->len < inquota) | 4408 | if (inquota && ar->len < inquota) |
4367 | dquot_free_block(ar->inode, inquota - ar->len); | 4409 | dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); |
4368 | if (!ar->len) { | 4410 | if (!ar->len) { |
4369 | if (!ext4_test_inode_state(ar->inode, | 4411 | if (!ext4_test_inode_state(ar->inode, |
4370 | EXT4_STATE_DELALLOC_RESERVED)) | 4412 | EXT4_STATE_DELALLOC_RESERVED)) |
4371 | /* release all the reserved blocks if non delalloc */ | 4413 | /* release all the reserved blocks if non delalloc */ |
4372 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 4414 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
4373 | reserv_blks); | 4415 | reserv_clstrs); |
4374 | } | 4416 | } |
4375 | 4417 | ||
4376 | trace_ext4_allocate_blocks(ar, (unsigned long long)block); | 4418 | trace_ext4_allocate_blocks(ar, (unsigned long long)block); |
@@ -4388,7 +4430,7 @@ static int can_merge(struct ext4_free_data *entry1, | |||
4388 | { | 4430 | { |
4389 | if ((entry1->t_tid == entry2->t_tid) && | 4431 | if ((entry1->t_tid == entry2->t_tid) && |
4390 | (entry1->group == entry2->group) && | 4432 | (entry1->group == entry2->group) && |
4391 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) | 4433 | ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) |
4392 | return 1; | 4434 | return 1; |
4393 | return 0; | 4435 | return 0; |
4394 | } | 4436 | } |
@@ -4398,7 +4440,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4398 | struct ext4_free_data *new_entry) | 4440 | struct ext4_free_data *new_entry) |
4399 | { | 4441 | { |
4400 | ext4_group_t group = e4b->bd_group; | 4442 | ext4_group_t group = e4b->bd_group; |
4401 | ext4_grpblk_t block; | 4443 | ext4_grpblk_t cluster; |
4402 | struct ext4_free_data *entry; | 4444 | struct ext4_free_data *entry; |
4403 | struct ext4_group_info *db = e4b->bd_info; | 4445 | struct ext4_group_info *db = e4b->bd_info; |
4404 | struct super_block *sb = e4b->bd_sb; | 4446 | struct super_block *sb = e4b->bd_sb; |
@@ -4411,7 +4453,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4411 | BUG_ON(e4b->bd_buddy_page == NULL); | 4453 | BUG_ON(e4b->bd_buddy_page == NULL); |
4412 | 4454 | ||
4413 | new_node = &new_entry->node; | 4455 | new_node = &new_entry->node; |
4414 | block = new_entry->start_blk; | 4456 | cluster = new_entry->start_cluster; |
4415 | 4457 | ||
4416 | if (!*n) { | 4458 | if (!*n) { |
4417 | /* first free block exent. We need to | 4459 | /* first free block exent. We need to |
@@ -4425,13 +4467,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4425 | while (*n) { | 4467 | while (*n) { |
4426 | parent = *n; | 4468 | parent = *n; |
4427 | entry = rb_entry(parent, struct ext4_free_data, node); | 4469 | entry = rb_entry(parent, struct ext4_free_data, node); |
4428 | if (block < entry->start_blk) | 4470 | if (cluster < entry->start_cluster) |
4429 | n = &(*n)->rb_left; | 4471 | n = &(*n)->rb_left; |
4430 | else if (block >= (entry->start_blk + entry->count)) | 4472 | else if (cluster >= (entry->start_cluster + entry->count)) |
4431 | n = &(*n)->rb_right; | 4473 | n = &(*n)->rb_right; |
4432 | else { | 4474 | else { |
4433 | ext4_grp_locked_error(sb, group, 0, | 4475 | ext4_grp_locked_error(sb, group, 0, |
4434 | ext4_group_first_block_no(sb, group) + block, | 4476 | ext4_group_first_block_no(sb, group) + |
4477 | EXT4_C2B(sbi, cluster), | ||
4435 | "Block already on to-be-freed list"); | 4478 | "Block already on to-be-freed list"); |
4436 | return 0; | 4479 | return 0; |
4437 | } | 4480 | } |
@@ -4445,7 +4488,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4445 | if (node) { | 4488 | if (node) { |
4446 | entry = rb_entry(node, struct ext4_free_data, node); | 4489 | entry = rb_entry(node, struct ext4_free_data, node); |
4447 | if (can_merge(entry, new_entry)) { | 4490 | if (can_merge(entry, new_entry)) { |
4448 | new_entry->start_blk = entry->start_blk; | 4491 | new_entry->start_cluster = entry->start_cluster; |
4449 | new_entry->count += entry->count; | 4492 | new_entry->count += entry->count; |
4450 | rb_erase(node, &(db->bb_free_root)); | 4493 | rb_erase(node, &(db->bb_free_root)); |
4451 | spin_lock(&sbi->s_md_lock); | 4494 | spin_lock(&sbi->s_md_lock); |
@@ -4496,6 +4539,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4496 | ext4_group_t block_group; | 4539 | ext4_group_t block_group; |
4497 | struct ext4_sb_info *sbi; | 4540 | struct ext4_sb_info *sbi; |
4498 | struct ext4_buddy e4b; | 4541 | struct ext4_buddy e4b; |
4542 | unsigned int count_clusters; | ||
4499 | int err = 0; | 4543 | int err = 0; |
4500 | int ret; | 4544 | int ret; |
4501 | 4545 | ||
@@ -4544,6 +4588,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4544 | if (!ext4_should_writeback_data(inode)) | 4588 | if (!ext4_should_writeback_data(inode)) |
4545 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4589 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4546 | 4590 | ||
4591 | /* | ||
4592 | * If the extent to be freed does not begin on a cluster | ||
4593 | * boundary, we need to deal with partial clusters at the | ||
4594 | * beginning and end of the extent. Normally we will free | ||
4595 | * blocks at the beginning or the end unless we are explicitly | ||
4596 | * requested to avoid doing so. | ||
4597 | */ | ||
4598 | overflow = block & (sbi->s_cluster_ratio - 1); | ||
4599 | if (overflow) { | ||
4600 | if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { | ||
4601 | overflow = sbi->s_cluster_ratio - overflow; | ||
4602 | block += overflow; | ||
4603 | if (count > overflow) | ||
4604 | count -= overflow; | ||
4605 | else | ||
4606 | return; | ||
4607 | } else { | ||
4608 | block -= overflow; | ||
4609 | count += overflow; | ||
4610 | } | ||
4611 | } | ||
4612 | overflow = count & (sbi->s_cluster_ratio - 1); | ||
4613 | if (overflow) { | ||
4614 | if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { | ||
4615 | if (count > overflow) | ||
4616 | count -= overflow; | ||
4617 | else | ||
4618 | return; | ||
4619 | } else | ||
4620 | count += sbi->s_cluster_ratio - overflow; | ||
4621 | } | ||
4622 | |||
4547 | do_more: | 4623 | do_more: |
4548 | overflow = 0; | 4624 | overflow = 0; |
4549 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4625 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -4552,10 +4628,12 @@ do_more: | |||
4552 | * Check to see if we are freeing blocks across a group | 4628 | * Check to see if we are freeing blocks across a group |
4553 | * boundary. | 4629 | * boundary. |
4554 | */ | 4630 | */ |
4555 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { | 4631 | if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) { |
4556 | overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); | 4632 | overflow = EXT4_C2B(sbi, bit) + count - |
4633 | EXT4_BLOCKS_PER_GROUP(sb); | ||
4557 | count -= overflow; | 4634 | count -= overflow; |
4558 | } | 4635 | } |
4636 | count_clusters = EXT4_B2C(sbi, count); | ||
4559 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 4637 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
4560 | if (!bitmap_bh) { | 4638 | if (!bitmap_bh) { |
4561 | err = -EIO; | 4639 | err = -EIO; |
@@ -4570,9 +4648,9 @@ do_more: | |||
4570 | if (in_range(ext4_block_bitmap(sb, gdp), block, count) || | 4648 | if (in_range(ext4_block_bitmap(sb, gdp), block, count) || |
4571 | in_range(ext4_inode_bitmap(sb, gdp), block, count) || | 4649 | in_range(ext4_inode_bitmap(sb, gdp), block, count) || |
4572 | in_range(block, ext4_inode_table(sb, gdp), | 4650 | in_range(block, ext4_inode_table(sb, gdp), |
4573 | EXT4_SB(sb)->s_itb_per_group) || | 4651 | EXT4_SB(sb)->s_itb_per_group) || |
4574 | in_range(block + count - 1, ext4_inode_table(sb, gdp), | 4652 | in_range(block + count - 1, ext4_inode_table(sb, gdp), |
4575 | EXT4_SB(sb)->s_itb_per_group)) { | 4653 | EXT4_SB(sb)->s_itb_per_group)) { |
4576 | 4654 | ||
4577 | ext4_error(sb, "Freeing blocks in system zone - " | 4655 | ext4_error(sb, "Freeing blocks in system zone - " |
4578 | "Block = %llu, count = %lu", block, count); | 4656 | "Block = %llu, count = %lu", block, count); |
@@ -4597,11 +4675,11 @@ do_more: | |||
4597 | #ifdef AGGRESSIVE_CHECK | 4675 | #ifdef AGGRESSIVE_CHECK |
4598 | { | 4676 | { |
4599 | int i; | 4677 | int i; |
4600 | for (i = 0; i < count; i++) | 4678 | for (i = 0; i < count_clusters; i++) |
4601 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4679 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4602 | } | 4680 | } |
4603 | #endif | 4681 | #endif |
4604 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); | 4682 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); |
4605 | 4683 | ||
4606 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4684 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4607 | if (err) | 4685 | if (err) |
@@ -4618,13 +4696,13 @@ do_more: | |||
4618 | err = -ENOMEM; | 4696 | err = -ENOMEM; |
4619 | goto error_return; | 4697 | goto error_return; |
4620 | } | 4698 | } |
4621 | new_entry->start_blk = bit; | 4699 | new_entry->start_cluster = bit; |
4622 | new_entry->group = block_group; | 4700 | new_entry->group = block_group; |
4623 | new_entry->count = count; | 4701 | new_entry->count = count_clusters; |
4624 | new_entry->t_tid = handle->h_transaction->t_tid; | 4702 | new_entry->t_tid = handle->h_transaction->t_tid; |
4625 | 4703 | ||
4626 | ext4_lock_group(sb, block_group); | 4704 | ext4_lock_group(sb, block_group); |
4627 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4705 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
4628 | ext4_mb_free_metadata(handle, &e4b, new_entry); | 4706 | ext4_mb_free_metadata(handle, &e4b, new_entry); |
4629 | } else { | 4707 | } else { |
4630 | /* need to update group_info->bb_free and bitmap | 4708 | /* need to update group_info->bb_free and bitmap |
@@ -4632,25 +4710,29 @@ do_more: | |||
4632 | * them with group lock_held | 4710 | * them with group lock_held |
4633 | */ | 4711 | */ |
4634 | ext4_lock_group(sb, block_group); | 4712 | ext4_lock_group(sb, block_group); |
4635 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4713 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
4636 | mb_free_blocks(inode, &e4b, bit, count); | 4714 | mb_free_blocks(inode, &e4b, bit, count_clusters); |
4637 | } | 4715 | } |
4638 | 4716 | ||
4639 | ret = ext4_free_blks_count(sb, gdp) + count; | 4717 | ret = ext4_free_group_clusters(sb, gdp) + count_clusters; |
4640 | ext4_free_blks_set(sb, gdp, ret); | 4718 | ext4_free_group_clusters_set(sb, gdp, ret); |
4641 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | 4719 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); |
4642 | ext4_unlock_group(sb, block_group); | 4720 | ext4_unlock_group(sb, block_group); |
4643 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 4721 | percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); |
4644 | 4722 | ||
4645 | if (sbi->s_log_groups_per_flex) { | 4723 | if (sbi->s_log_groups_per_flex) { |
4646 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | 4724 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
4647 | atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); | 4725 | atomic_add(count_clusters, |
4726 | &sbi->s_flex_groups[flex_group].free_clusters); | ||
4648 | } | 4727 | } |
4649 | 4728 | ||
4650 | ext4_mb_unload_buddy(&e4b); | 4729 | ext4_mb_unload_buddy(&e4b); |
4651 | 4730 | ||
4652 | freed += count; | 4731 | freed += count; |
4653 | 4732 | ||
4733 | if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | ||
4734 | dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); | ||
4735 | |||
4654 | /* We dirtied the bitmap block */ | 4736 | /* We dirtied the bitmap block */ |
4655 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 4737 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
4656 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 4738 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
@@ -4669,8 +4751,6 @@ do_more: | |||
4669 | } | 4751 | } |
4670 | ext4_mark_super_dirty(sb); | 4752 | ext4_mark_super_dirty(sb); |
4671 | error_return: | 4753 | error_return: |
4672 | if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | ||
4673 | dquot_free_block(inode, freed); | ||
4674 | brelse(bitmap_bh); | 4754 | brelse(bitmap_bh); |
4675 | ext4_std_error(sb, err); | 4755 | ext4_std_error(sb, err); |
4676 | return; | 4756 | return; |
@@ -4778,16 +4858,17 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | |||
4778 | ext4_lock_group(sb, block_group); | 4858 | ext4_lock_group(sb, block_group); |
4779 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4859 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4780 | mb_free_blocks(NULL, &e4b, bit, count); | 4860 | mb_free_blocks(NULL, &e4b, bit, count); |
4781 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | 4861 | blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); |
4782 | ext4_free_blks_set(sb, desc, blk_free_count); | 4862 | ext4_free_group_clusters_set(sb, desc, blk_free_count); |
4783 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | 4863 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); |
4784 | ext4_unlock_group(sb, block_group); | 4864 | ext4_unlock_group(sb, block_group); |
4785 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | 4865 | percpu_counter_add(&sbi->s_freeclusters_counter, |
4866 | EXT4_B2C(sbi, blocks_freed)); | ||
4786 | 4867 | ||
4787 | if (sbi->s_log_groups_per_flex) { | 4868 | if (sbi->s_log_groups_per_flex) { |
4788 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | 4869 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
4789 | atomic_add(blocks_freed, | 4870 | atomic_add(EXT4_B2C(sbi, blocks_freed), |
4790 | &sbi->s_flex_groups[flex_group].free_blocks); | 4871 | &sbi->s_flex_groups[flex_group].free_clusters); |
4791 | } | 4872 | } |
4792 | 4873 | ||
4793 | ext4_mb_unload_buddy(&e4b); | 4874 | ext4_mb_unload_buddy(&e4b); |
@@ -4948,7 +5029,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4948 | struct ext4_group_info *grp; | 5029 | struct ext4_group_info *grp; |
4949 | ext4_group_t first_group, last_group; | 5030 | ext4_group_t first_group, last_group; |
4950 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | 5031 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); |
4951 | ext4_grpblk_t cnt = 0, first_block, last_block; | 5032 | ext4_grpblk_t cnt = 0, first_cluster, last_cluster; |
4952 | uint64_t start, len, minlen, trimmed = 0; | 5033 | uint64_t start, len, minlen, trimmed = 0; |
4953 | ext4_fsblk_t first_data_blk = | 5034 | ext4_fsblk_t first_data_blk = |
4954 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 5035 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
@@ -4958,7 +5039,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4958 | len = range->len >> sb->s_blocksize_bits; | 5039 | len = range->len >> sb->s_blocksize_bits; |
4959 | minlen = range->minlen >> sb->s_blocksize_bits; | 5040 | minlen = range->minlen >> sb->s_blocksize_bits; |
4960 | 5041 | ||
4961 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | 5042 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) |
4962 | return -EINVAL; | 5043 | return -EINVAL; |
4963 | if (start + len <= first_data_blk) | 5044 | if (start + len <= first_data_blk) |
4964 | goto out; | 5045 | goto out; |
@@ -4969,11 +5050,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4969 | 5050 | ||
4970 | /* Determine first and last group to examine based on start and len */ | 5051 | /* Determine first and last group to examine based on start and len */ |
4971 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | 5052 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, |
4972 | &first_group, &first_block); | 5053 | &first_group, &first_cluster); |
4973 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | 5054 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), |
4974 | &last_group, &last_block); | 5055 | &last_group, &last_cluster); |
4975 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | 5056 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; |
4976 | last_block = EXT4_BLOCKS_PER_GROUP(sb); | 5057 | last_cluster = EXT4_CLUSTERS_PER_GROUP(sb); |
4977 | 5058 | ||
4978 | if (first_group > last_group) | 5059 | if (first_group > last_group) |
4979 | return -EINVAL; | 5060 | return -EINVAL; |
@@ -4993,20 +5074,20 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4993 | * change it for the last group in which case start + | 5074 | * change it for the last group in which case start + |
4994 | * len < EXT4_BLOCKS_PER_GROUP(sb). | 5075 | * len < EXT4_BLOCKS_PER_GROUP(sb). |
4995 | */ | 5076 | */ |
4996 | if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) | 5077 | if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) |
4997 | last_block = first_block + len; | 5078 | last_cluster = first_cluster + len; |
4998 | len -= last_block - first_block; | 5079 | len -= last_cluster - first_cluster; |
4999 | 5080 | ||
5000 | if (grp->bb_free >= minlen) { | 5081 | if (grp->bb_free >= minlen) { |
5001 | cnt = ext4_trim_all_free(sb, group, first_block, | 5082 | cnt = ext4_trim_all_free(sb, group, first_cluster, |
5002 | last_block, minlen); | 5083 | last_cluster, minlen); |
5003 | if (cnt < 0) { | 5084 | if (cnt < 0) { |
5004 | ret = cnt; | 5085 | ret = cnt; |
5005 | break; | 5086 | break; |
5006 | } | 5087 | } |
5007 | } | 5088 | } |
5008 | trimmed += cnt; | 5089 | trimmed += cnt; |
5009 | first_block = 0; | 5090 | first_cluster = 0; |
5010 | } | 5091 | } |
5011 | range->len = trimmed * sb->s_blocksize; | 5092 | range->len = trimmed * sb->s_blocksize; |
5012 | 5093 | ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 9d4a636b546..47705f3285e 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -106,7 +106,7 @@ struct ext4_free_data { | |||
106 | ext4_group_t group; | 106 | ext4_group_t group; |
107 | 107 | ||
108 | /* free block extent */ | 108 | /* free block extent */ |
109 | ext4_grpblk_t start_blk; | 109 | ext4_grpblk_t start_cluster; |
110 | ext4_grpblk_t count; | 110 | ext4_grpblk_t count; |
111 | 111 | ||
112 | /* transaction which freed this extent */ | 112 | /* transaction which freed this extent */ |
@@ -139,9 +139,9 @@ enum { | |||
139 | 139 | ||
140 | struct ext4_free_extent { | 140 | struct ext4_free_extent { |
141 | ext4_lblk_t fe_logical; | 141 | ext4_lblk_t fe_logical; |
142 | ext4_grpblk_t fe_start; | 142 | ext4_grpblk_t fe_start; /* In cluster units */ |
143 | ext4_group_t fe_group; | 143 | ext4_group_t fe_group; |
144 | ext4_grpblk_t fe_len; | 144 | ext4_grpblk_t fe_len; /* In cluster units */ |
145 | }; | 145 | }; |
146 | 146 | ||
147 | /* | 147 | /* |
@@ -175,7 +175,7 @@ struct ext4_allocation_context { | |||
175 | /* the best found extent */ | 175 | /* the best found extent */ |
176 | struct ext4_free_extent ac_b_ex; | 176 | struct ext4_free_extent ac_b_ex; |
177 | 177 | ||
178 | /* copy of the bext found extent taken before preallocation efforts */ | 178 | /* copy of the best found extent taken before preallocation efforts */ |
179 | struct ext4_free_extent ac_f_ex; | 179 | struct ext4_free_extent ac_f_ex; |
180 | 180 | ||
181 | /* number of iterations done. we have to track to limit searching */ | 181 | /* number of iterations done. we have to track to limit searching */ |
@@ -216,6 +216,7 @@ struct ext4_buddy { | |||
216 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | 216 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
217 | struct ext4_free_extent *fex) | 217 | struct ext4_free_extent *fex) |
218 | { | 218 | { |
219 | return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start; | 219 | return ext4_group_first_block_no(sb, fex->fe_group) + |
220 | (fex->fe_start << EXT4_SB(sb)->s_cluster_bits); | ||
220 | } | 221 | } |
221 | #endif | 222 | #endif |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b57b98fb44d..f729377bf04 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -15,19 +15,18 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include "ext4_jbd2.h" | 17 | #include "ext4_jbd2.h" |
18 | #include "ext4_extents.h" | ||
19 | 18 | ||
20 | /* | 19 | /* |
21 | * The contiguous blocks details which can be | 20 | * The contiguous blocks details which can be |
22 | * represented by a single extent | 21 | * represented by a single extent |
23 | */ | 22 | */ |
24 | struct list_blocks_struct { | 23 | struct migrate_struct { |
25 | ext4_lblk_t first_block, last_block; | 24 | ext4_lblk_t first_block, last_block, curr_block; |
26 | ext4_fsblk_t first_pblock, last_pblock; | 25 | ext4_fsblk_t first_pblock, last_pblock; |
27 | }; | 26 | }; |
28 | 27 | ||
29 | static int finish_range(handle_t *handle, struct inode *inode, | 28 | static int finish_range(handle_t *handle, struct inode *inode, |
30 | struct list_blocks_struct *lb) | 29 | struct migrate_struct *lb) |
31 | 30 | ||
32 | { | 31 | { |
33 | int retval = 0, needed; | 32 | int retval = 0, needed; |
@@ -87,8 +86,7 @@ err_out: | |||
87 | } | 86 | } |
88 | 87 | ||
89 | static int update_extent_range(handle_t *handle, struct inode *inode, | 88 | static int update_extent_range(handle_t *handle, struct inode *inode, |
90 | ext4_fsblk_t pblock, ext4_lblk_t blk_num, | 89 | ext4_fsblk_t pblock, struct migrate_struct *lb) |
91 | struct list_blocks_struct *lb) | ||
92 | { | 90 | { |
93 | int retval; | 91 | int retval; |
94 | /* | 92 | /* |
@@ -96,9 +94,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode, | |||
96 | */ | 94 | */ |
97 | if (lb->first_pblock && | 95 | if (lb->first_pblock && |
98 | (lb->last_pblock+1 == pblock) && | 96 | (lb->last_pblock+1 == pblock) && |
99 | (lb->last_block+1 == blk_num)) { | 97 | (lb->last_block+1 == lb->curr_block)) { |
100 | lb->last_pblock = pblock; | 98 | lb->last_pblock = pblock; |
101 | lb->last_block = blk_num; | 99 | lb->last_block = lb->curr_block; |
100 | lb->curr_block++; | ||
102 | return 0; | 101 | return 0; |
103 | } | 102 | } |
104 | /* | 103 | /* |
@@ -106,64 +105,49 @@ static int update_extent_range(handle_t *handle, struct inode *inode, | |||
106 | */ | 105 | */ |
107 | retval = finish_range(handle, inode, lb); | 106 | retval = finish_range(handle, inode, lb); |
108 | lb->first_pblock = lb->last_pblock = pblock; | 107 | lb->first_pblock = lb->last_pblock = pblock; |
109 | lb->first_block = lb->last_block = blk_num; | 108 | lb->first_block = lb->last_block = lb->curr_block; |
110 | 109 | lb->curr_block++; | |
111 | return retval; | 110 | return retval; |
112 | } | 111 | } |
113 | 112 | ||
114 | static int update_ind_extent_range(handle_t *handle, struct inode *inode, | 113 | static int update_ind_extent_range(handle_t *handle, struct inode *inode, |
115 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | 114 | ext4_fsblk_t pblock, |
116 | struct list_blocks_struct *lb) | 115 | struct migrate_struct *lb) |
117 | { | 116 | { |
118 | struct buffer_head *bh; | 117 | struct buffer_head *bh; |
119 | __le32 *i_data; | 118 | __le32 *i_data; |
120 | int i, retval = 0; | 119 | int i, retval = 0; |
121 | ext4_lblk_t blk_count = *blk_nump; | ||
122 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | 120 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; |
123 | 121 | ||
124 | if (!pblock) { | ||
125 | /* Only update the file block number */ | ||
126 | *blk_nump += max_entries; | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | bh = sb_bread(inode->i_sb, pblock); | 122 | bh = sb_bread(inode->i_sb, pblock); |
131 | if (!bh) | 123 | if (!bh) |
132 | return -EIO; | 124 | return -EIO; |
133 | 125 | ||
134 | i_data = (__le32 *)bh->b_data; | 126 | i_data = (__le32 *)bh->b_data; |
135 | for (i = 0; i < max_entries; i++, blk_count++) { | 127 | for (i = 0; i < max_entries; i++) { |
136 | if (i_data[i]) { | 128 | if (i_data[i]) { |
137 | retval = update_extent_range(handle, inode, | 129 | retval = update_extent_range(handle, inode, |
138 | le32_to_cpu(i_data[i]), | 130 | le32_to_cpu(i_data[i]), lb); |
139 | blk_count, lb); | ||
140 | if (retval) | 131 | if (retval) |
141 | break; | 132 | break; |
133 | } else { | ||
134 | lb->curr_block++; | ||
142 | } | 135 | } |
143 | } | 136 | } |
144 | |||
145 | /* Update the file block number */ | ||
146 | *blk_nump = blk_count; | ||
147 | put_bh(bh); | 137 | put_bh(bh); |
148 | return retval; | 138 | return retval; |
149 | 139 | ||
150 | } | 140 | } |
151 | 141 | ||
152 | static int update_dind_extent_range(handle_t *handle, struct inode *inode, | 142 | static int update_dind_extent_range(handle_t *handle, struct inode *inode, |
153 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | 143 | ext4_fsblk_t pblock, |
154 | struct list_blocks_struct *lb) | 144 | struct migrate_struct *lb) |
155 | { | 145 | { |
156 | struct buffer_head *bh; | 146 | struct buffer_head *bh; |
157 | __le32 *i_data; | 147 | __le32 *i_data; |
158 | int i, retval = 0; | 148 | int i, retval = 0; |
159 | ext4_lblk_t blk_count = *blk_nump; | ||
160 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | 149 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; |
161 | 150 | ||
162 | if (!pblock) { | ||
163 | /* Only update the file block number */ | ||
164 | *blk_nump += max_entries * max_entries; | ||
165 | return 0; | ||
166 | } | ||
167 | bh = sb_bread(inode->i_sb, pblock); | 151 | bh = sb_bread(inode->i_sb, pblock); |
168 | if (!bh) | 152 | if (!bh) |
169 | return -EIO; | 153 | return -EIO; |
@@ -172,38 +156,28 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode, | |||
172 | for (i = 0; i < max_entries; i++) { | 156 | for (i = 0; i < max_entries; i++) { |
173 | if (i_data[i]) { | 157 | if (i_data[i]) { |
174 | retval = update_ind_extent_range(handle, inode, | 158 | retval = update_ind_extent_range(handle, inode, |
175 | le32_to_cpu(i_data[i]), | 159 | le32_to_cpu(i_data[i]), lb); |
176 | &blk_count, lb); | ||
177 | if (retval) | 160 | if (retval) |
178 | break; | 161 | break; |
179 | } else { | 162 | } else { |
180 | /* Only update the file block number */ | 163 | /* Only update the file block number */ |
181 | blk_count += max_entries; | 164 | lb->curr_block += max_entries; |
182 | } | 165 | } |
183 | } | 166 | } |
184 | |||
185 | /* Update the file block number */ | ||
186 | *blk_nump = blk_count; | ||
187 | put_bh(bh); | 167 | put_bh(bh); |
188 | return retval; | 168 | return retval; |
189 | 169 | ||
190 | } | 170 | } |
191 | 171 | ||
192 | static int update_tind_extent_range(handle_t *handle, struct inode *inode, | 172 | static int update_tind_extent_range(handle_t *handle, struct inode *inode, |
193 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | 173 | ext4_fsblk_t pblock, |
194 | struct list_blocks_struct *lb) | 174 | struct migrate_struct *lb) |
195 | { | 175 | { |
196 | struct buffer_head *bh; | 176 | struct buffer_head *bh; |
197 | __le32 *i_data; | 177 | __le32 *i_data; |
198 | int i, retval = 0; | 178 | int i, retval = 0; |
199 | ext4_lblk_t blk_count = *blk_nump; | ||
200 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | 179 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; |
201 | 180 | ||
202 | if (!pblock) { | ||
203 | /* Only update the file block number */ | ||
204 | *blk_nump += max_entries * max_entries * max_entries; | ||
205 | return 0; | ||
206 | } | ||
207 | bh = sb_bread(inode->i_sb, pblock); | 181 | bh = sb_bread(inode->i_sb, pblock); |
208 | if (!bh) | 182 | if (!bh) |
209 | return -EIO; | 183 | return -EIO; |
@@ -212,16 +186,14 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode, | |||
212 | for (i = 0; i < max_entries; i++) { | 186 | for (i = 0; i < max_entries; i++) { |
213 | if (i_data[i]) { | 187 | if (i_data[i]) { |
214 | retval = update_dind_extent_range(handle, inode, | 188 | retval = update_dind_extent_range(handle, inode, |
215 | le32_to_cpu(i_data[i]), | 189 | le32_to_cpu(i_data[i]), lb); |
216 | &blk_count, lb); | ||
217 | if (retval) | 190 | if (retval) |
218 | break; | 191 | break; |
219 | } else | 192 | } else { |
220 | /* Only update the file block number */ | 193 | /* Only update the file block number */ |
221 | blk_count += max_entries * max_entries; | 194 | lb->curr_block += max_entries * max_entries; |
195 | } | ||
222 | } | 196 | } |
223 | /* Update the file block number */ | ||
224 | *blk_nump = blk_count; | ||
225 | put_bh(bh); | 197 | put_bh(bh); |
226 | return retval; | 198 | return retval; |
227 | 199 | ||
@@ -462,12 +434,12 @@ int ext4_ext_migrate(struct inode *inode) | |||
462 | handle_t *handle; | 434 | handle_t *handle; |
463 | int retval = 0, i; | 435 | int retval = 0, i; |
464 | __le32 *i_data; | 436 | __le32 *i_data; |
465 | ext4_lblk_t blk_count = 0; | ||
466 | struct ext4_inode_info *ei; | 437 | struct ext4_inode_info *ei; |
467 | struct inode *tmp_inode = NULL; | 438 | struct inode *tmp_inode = NULL; |
468 | struct list_blocks_struct lb; | 439 | struct migrate_struct lb; |
469 | unsigned long max_entries; | 440 | unsigned long max_entries; |
470 | __u32 goal; | 441 | __u32 goal; |
442 | uid_t owner[2]; | ||
471 | 443 | ||
472 | /* | 444 | /* |
473 | * If the filesystem does not support extents, or the inode | 445 | * If the filesystem does not support extents, or the inode |
@@ -495,10 +467,12 @@ int ext4_ext_migrate(struct inode *inode) | |||
495 | } | 467 | } |
496 | goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * | 468 | goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * |
497 | EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; | 469 | EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; |
470 | owner[0] = inode->i_uid; | ||
471 | owner[1] = inode->i_gid; | ||
498 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, | 472 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, |
499 | S_IFREG, NULL, goal); | 473 | S_IFREG, NULL, goal, owner); |
500 | if (IS_ERR(tmp_inode)) { | 474 | if (IS_ERR(tmp_inode)) { |
501 | retval = -ENOMEM; | 475 | retval = PTR_ERR(inode); |
502 | ext4_journal_stop(handle); | 476 | ext4_journal_stop(handle); |
503 | return retval; | 477 | return retval; |
504 | } | 478 | } |
@@ -551,35 +525,32 @@ int ext4_ext_migrate(struct inode *inode) | |||
551 | 525 | ||
552 | /* 32 bit block address 4 bytes */ | 526 | /* 32 bit block address 4 bytes */ |
553 | max_entries = inode->i_sb->s_blocksize >> 2; | 527 | max_entries = inode->i_sb->s_blocksize >> 2; |
554 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { | 528 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++) { |
555 | if (i_data[i]) { | 529 | if (i_data[i]) { |
556 | retval = update_extent_range(handle, tmp_inode, | 530 | retval = update_extent_range(handle, tmp_inode, |
557 | le32_to_cpu(i_data[i]), | 531 | le32_to_cpu(i_data[i]), &lb); |
558 | blk_count, &lb); | ||
559 | if (retval) | 532 | if (retval) |
560 | goto err_out; | 533 | goto err_out; |
561 | } | 534 | } else |
535 | lb.curr_block++; | ||
562 | } | 536 | } |
563 | if (i_data[EXT4_IND_BLOCK]) { | 537 | if (i_data[EXT4_IND_BLOCK]) { |
564 | retval = update_ind_extent_range(handle, tmp_inode, | 538 | retval = update_ind_extent_range(handle, tmp_inode, |
565 | le32_to_cpu(i_data[EXT4_IND_BLOCK]), | 539 | le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb); |
566 | &blk_count, &lb); | ||
567 | if (retval) | 540 | if (retval) |
568 | goto err_out; | 541 | goto err_out; |
569 | } else | 542 | } else |
570 | blk_count += max_entries; | 543 | lb.curr_block += max_entries; |
571 | if (i_data[EXT4_DIND_BLOCK]) { | 544 | if (i_data[EXT4_DIND_BLOCK]) { |
572 | retval = update_dind_extent_range(handle, tmp_inode, | 545 | retval = update_dind_extent_range(handle, tmp_inode, |
573 | le32_to_cpu(i_data[EXT4_DIND_BLOCK]), | 546 | le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb); |
574 | &blk_count, &lb); | ||
575 | if (retval) | 547 | if (retval) |
576 | goto err_out; | 548 | goto err_out; |
577 | } else | 549 | } else |
578 | blk_count += max_entries * max_entries; | 550 | lb.curr_block += max_entries * max_entries; |
579 | if (i_data[EXT4_TIND_BLOCK]) { | 551 | if (i_data[EXT4_TIND_BLOCK]) { |
580 | retval = update_tind_extent_range(handle, tmp_inode, | 552 | retval = update_tind_extent_range(handle, tmp_inode, |
581 | le32_to_cpu(i_data[EXT4_TIND_BLOCK]), | 553 | le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb); |
582 | &blk_count, &lb); | ||
583 | if (retval) | 554 | if (retval) |
584 | goto err_out; | 555 | goto err_out; |
585 | } | 556 | } |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 9bdef3f537c..7ea4ba4eff2 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
@@ -109,7 +109,7 @@ static int kmmpd(void *data) | |||
109 | mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); | 109 | mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); |
110 | bdevname(bh->b_bdev, mmp->mmp_bdevname); | 110 | bdevname(bh->b_bdev, mmp->mmp_bdevname); |
111 | 111 | ||
112 | memcpy(mmp->mmp_nodename, init_utsname()->sysname, | 112 | memcpy(mmp->mmp_nodename, init_utsname()->nodename, |
113 | sizeof(mmp->mmp_nodename)); | 113 | sizeof(mmp->mmp_nodename)); |
114 | 114 | ||
115 | while (!kthread_should_stop()) { | 115 | while (!kthread_should_stop()) { |
@@ -125,8 +125,9 @@ static int kmmpd(void *data) | |||
125 | * Don't spew too many error messages. Print one every | 125 | * Don't spew too many error messages. Print one every |
126 | * (s_mmp_update_interval * 60) seconds. | 126 | * (s_mmp_update_interval * 60) seconds. |
127 | */ | 127 | */ |
128 | if (retval && (failed_writes % 60) == 0) { | 128 | if (retval) { |
129 | ext4_error(sb, "Error writing to MMP block"); | 129 | if ((failed_writes % 60) == 0) |
130 | ext4_error(sb, "Error writing to MMP block"); | ||
130 | failed_writes++; | 131 | failed_writes++; |
131 | } | 132 | } |
132 | 133 | ||
@@ -295,7 +296,8 @@ skip: | |||
295 | /* | 296 | /* |
296 | * write a new random sequence number. | 297 | * write a new random sequence number. |
297 | */ | 298 | */ |
298 | mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); | 299 | seq = mmp_new_seq(); |
300 | mmp->mmp_seq = cpu_to_le32(seq); | ||
299 | 301 | ||
300 | retval = write_mmp_block(bh); | 302 | retval = write_mmp_block(bh); |
301 | if (retval) | 303 | if (retval) |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index f57455a1b1b..c5826c623e7 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/quotaops.h> | 17 | #include <linux/quotaops.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include "ext4_jbd2.h" | 19 | #include "ext4_jbd2.h" |
20 | #include "ext4_extents.h" | ||
21 | #include "ext4.h" | 20 | #include "ext4.h" |
22 | 21 | ||
23 | /** | 22 | /** |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1c924faeb6c..2a75eed2ef0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1586,7 +1586,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1586 | dxtrace(dx_show_index("node", frames[1].entries)); | 1586 | dxtrace(dx_show_index("node", frames[1].entries)); |
1587 | dxtrace(dx_show_index("node", | 1587 | dxtrace(dx_show_index("node", |
1588 | ((struct dx_node *) bh2->b_data)->entries)); | 1588 | ((struct dx_node *) bh2->b_data)->entries)); |
1589 | err = ext4_handle_dirty_metadata(handle, inode, bh2); | 1589 | err = ext4_handle_dirty_metadata(handle, dir, bh2); |
1590 | if (err) | 1590 | if (err) |
1591 | goto journal_error; | 1591 | goto journal_error; |
1592 | brelse (bh2); | 1592 | brelse (bh2); |
@@ -1612,7 +1612,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1612 | if (err) | 1612 | if (err) |
1613 | goto journal_error; | 1613 | goto journal_error; |
1614 | } | 1614 | } |
1615 | err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh); | 1615 | err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh); |
1616 | if (err) { | 1616 | if (err) { |
1617 | ext4_std_error(inode->i_sb, err); | 1617 | ext4_std_error(inode->i_sb, err); |
1618 | goto cleanup; | 1618 | goto cleanup; |
@@ -1707,9 +1707,8 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode) | |||
1707 | */ | 1707 | */ |
1708 | static void ext4_dec_count(handle_t *handle, struct inode *inode) | 1708 | static void ext4_dec_count(handle_t *handle, struct inode *inode) |
1709 | { | 1709 | { |
1710 | drop_nlink(inode); | 1710 | if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) |
1711 | if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) | 1711 | drop_nlink(inode); |
1712 | inc_nlink(inode); | ||
1713 | } | 1712 | } |
1714 | 1713 | ||
1715 | 1714 | ||
@@ -1756,7 +1755,7 @@ retry: | |||
1756 | if (IS_DIRSYNC(dir)) | 1755 | if (IS_DIRSYNC(dir)) |
1757 | ext4_handle_sync(handle); | 1756 | ext4_handle_sync(handle); |
1758 | 1757 | ||
1759 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); | 1758 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL); |
1760 | err = PTR_ERR(inode); | 1759 | err = PTR_ERR(inode); |
1761 | if (!IS_ERR(inode)) { | 1760 | if (!IS_ERR(inode)) { |
1762 | inode->i_op = &ext4_file_inode_operations; | 1761 | inode->i_op = &ext4_file_inode_operations; |
@@ -1792,7 +1791,7 @@ retry: | |||
1792 | if (IS_DIRSYNC(dir)) | 1791 | if (IS_DIRSYNC(dir)) |
1793 | ext4_handle_sync(handle); | 1792 | ext4_handle_sync(handle); |
1794 | 1793 | ||
1795 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); | 1794 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL); |
1796 | err = PTR_ERR(inode); | 1795 | err = PTR_ERR(inode); |
1797 | if (!IS_ERR(inode)) { | 1796 | if (!IS_ERR(inode)) { |
1798 | init_special_inode(inode, inode->i_mode, rdev); | 1797 | init_special_inode(inode, inode->i_mode, rdev); |
@@ -1832,7 +1831,7 @@ retry: | |||
1832 | ext4_handle_sync(handle); | 1831 | ext4_handle_sync(handle); |
1833 | 1832 | ||
1834 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode, | 1833 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode, |
1835 | &dentry->d_name, 0); | 1834 | &dentry->d_name, 0, NULL); |
1836 | err = PTR_ERR(inode); | 1835 | err = PTR_ERR(inode); |
1837 | if (IS_ERR(inode)) | 1836 | if (IS_ERR(inode)) |
1838 | goto out_stop; | 1837 | goto out_stop; |
@@ -1863,7 +1862,7 @@ retry: | |||
1863 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1862 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1864 | inode->i_nlink = 2; | 1863 | inode->i_nlink = 2; |
1865 | BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); | 1864 | BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); |
1866 | err = ext4_handle_dirty_metadata(handle, dir, dir_block); | 1865 | err = ext4_handle_dirty_metadata(handle, inode, dir_block); |
1867 | if (err) | 1866 | if (err) |
1868 | goto out_clear_inode; | 1867 | goto out_clear_inode; |
1869 | err = ext4_mark_inode_dirty(handle, inode); | 1868 | err = ext4_mark_inode_dirty(handle, inode); |
@@ -2279,7 +2278,7 @@ retry: | |||
2279 | ext4_handle_sync(handle); | 2278 | ext4_handle_sync(handle); |
2280 | 2279 | ||
2281 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, | 2280 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, |
2282 | &dentry->d_name, 0); | 2281 | &dentry->d_name, 0, NULL); |
2283 | err = PTR_ERR(inode); | 2282 | err = PTR_ERR(inode); |
2284 | if (IS_ERR(inode)) | 2283 | if (IS_ERR(inode)) |
2285 | goto out_stop; | 2284 | goto out_stop; |
@@ -2530,7 +2529,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2530 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = | 2529 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = |
2531 | cpu_to_le32(new_dir->i_ino); | 2530 | cpu_to_le32(new_dir->i_ino); |
2532 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | 2531 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); |
2533 | retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh); | 2532 | retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh); |
2534 | if (retval) { | 2533 | if (retval) { |
2535 | ext4_std_error(old_dir->i_sb, retval); | 2534 | ext4_std_error(old_dir->i_sb, retval); |
2536 | goto end_rename; | 2535 | goto end_rename; |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 92f38ee13f8..7ce1d0b19c9 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -70,7 +70,6 @@ static void put_io_page(struct ext4_io_page *io_page) | |||
70 | void ext4_free_io_end(ext4_io_end_t *io) | 70 | void ext4_free_io_end(ext4_io_end_t *io) |
71 | { | 71 | { |
72 | int i; | 72 | int i; |
73 | wait_queue_head_t *wq; | ||
74 | 73 | ||
75 | BUG_ON(!io); | 74 | BUG_ON(!io); |
76 | if (io->page) | 75 | if (io->page) |
@@ -78,56 +77,43 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
78 | for (i = 0; i < io->num_io_pages; i++) | 77 | for (i = 0; i < io->num_io_pages; i++) |
79 | put_io_page(io->pages[i]); | 78 | put_io_page(io->pages[i]); |
80 | io->num_io_pages = 0; | 79 | io->num_io_pages = 0; |
81 | wq = ext4_ioend_wq(io->inode); | 80 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) |
82 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && | 81 | wake_up_all(ext4_ioend_wq(io->inode)); |
83 | waitqueue_active(wq)) | ||
84 | wake_up_all(wq); | ||
85 | kmem_cache_free(io_end_cachep, io); | 82 | kmem_cache_free(io_end_cachep, io); |
86 | } | 83 | } |
87 | 84 | ||
88 | /* | 85 | /* |
89 | * check a range of space and convert unwritten extents to written. | 86 | * check a range of space and convert unwritten extents to written. |
87 | * | ||
88 | * Called with inode->i_mutex; we depend on this when we manipulate | ||
89 | * io->flag, since we could otherwise race with ext4_flush_completed_IO() | ||
90 | */ | 90 | */ |
91 | int ext4_end_io_nolock(ext4_io_end_t *io) | 91 | int ext4_end_io_nolock(ext4_io_end_t *io) |
92 | { | 92 | { |
93 | struct inode *inode = io->inode; | 93 | struct inode *inode = io->inode; |
94 | loff_t offset = io->offset; | 94 | loff_t offset = io->offset; |
95 | ssize_t size = io->size; | 95 | ssize_t size = io->size; |
96 | wait_queue_head_t *wq; | ||
97 | int ret = 0; | 96 | int ret = 0; |
98 | 97 | ||
99 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 98 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
100 | "list->prev 0x%p\n", | 99 | "list->prev 0x%p\n", |
101 | io, inode->i_ino, io->list.next, io->list.prev); | 100 | io, inode->i_ino, io->list.next, io->list.prev); |
102 | 101 | ||
103 | if (list_empty(&io->list)) | ||
104 | return ret; | ||
105 | |||
106 | if (!(io->flag & EXT4_IO_END_UNWRITTEN)) | ||
107 | return ret; | ||
108 | |||
109 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 102 | ret = ext4_convert_unwritten_extents(inode, offset, size); |
110 | if (ret < 0) { | 103 | if (ret < 0) { |
111 | printk(KERN_EMERG "%s: failed to convert unwritten " | 104 | ext4_msg(inode->i_sb, KERN_EMERG, |
112 | "extents to written extents, error is %d " | 105 | "failed to convert unwritten extents to written " |
113 | "io is still on inode %lu aio dio list\n", | 106 | "extents -- potential data loss! " |
114 | __func__, ret, inode->i_ino); | 107 | "(inode %lu, offset %llu, size %zd, error %d)", |
115 | return ret; | 108 | inode->i_ino, offset, size, ret); |
116 | } | 109 | } |
117 | 110 | ||
118 | if (io->iocb) | 111 | if (io->iocb) |
119 | aio_complete(io->iocb, io->result, 0); | 112 | aio_complete(io->iocb, io->result, 0); |
120 | /* clear the DIO AIO unwritten flag */ | ||
121 | if (io->flag & EXT4_IO_END_UNWRITTEN) { | ||
122 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
123 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
124 | wq = ext4_ioend_wq(io->inode); | ||
125 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) && | ||
126 | waitqueue_active(wq)) { | ||
127 | wake_up_all(wq); | ||
128 | } | ||
129 | } | ||
130 | 113 | ||
114 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
115 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) | ||
116 | wake_up_all(ext4_ioend_wq(io->inode)); | ||
131 | return ret; | 117 | return ret; |
132 | } | 118 | } |
133 | 119 | ||
@@ -140,9 +126,15 @@ static void ext4_end_io_work(struct work_struct *work) | |||
140 | struct inode *inode = io->inode; | 126 | struct inode *inode = io->inode; |
141 | struct ext4_inode_info *ei = EXT4_I(inode); | 127 | struct ext4_inode_info *ei = EXT4_I(inode); |
142 | unsigned long flags; | 128 | unsigned long flags; |
143 | int ret; | 129 | |
130 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
131 | if (list_empty(&io->list)) { | ||
132 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
133 | goto free; | ||
134 | } | ||
144 | 135 | ||
145 | if (!mutex_trylock(&inode->i_mutex)) { | 136 | if (!mutex_trylock(&inode->i_mutex)) { |
137 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
146 | /* | 138 | /* |
147 | * Requeue the work instead of waiting so that the work | 139 | * Requeue the work instead of waiting so that the work |
148 | * items queued after this can be processed. | 140 | * items queued after this can be processed. |
@@ -159,17 +151,11 @@ static void ext4_end_io_work(struct work_struct *work) | |||
159 | io->flag |= EXT4_IO_END_QUEUED; | 151 | io->flag |= EXT4_IO_END_QUEUED; |
160 | return; | 152 | return; |
161 | } | 153 | } |
162 | ret = ext4_end_io_nolock(io); | 154 | list_del_init(&io->list); |
163 | if (ret < 0) { | ||
164 | mutex_unlock(&inode->i_mutex); | ||
165 | return; | ||
166 | } | ||
167 | |||
168 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
169 | if (!list_empty(&io->list)) | ||
170 | list_del_init(&io->list); | ||
171 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 155 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
156 | (void) ext4_end_io_nolock(io); | ||
172 | mutex_unlock(&inode->i_mutex); | 157 | mutex_unlock(&inode->i_mutex); |
158 | free: | ||
173 | ext4_free_io_end(io); | 159 | ext4_free_io_end(io); |
174 | } | 160 | } |
175 | 161 | ||
@@ -350,10 +336,8 @@ submit_and_retry: | |||
350 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | 336 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && |
351 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | 337 | (io_end->pages[io_end->num_io_pages-1] != io_page)) |
352 | goto submit_and_retry; | 338 | goto submit_and_retry; |
353 | if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 339 | if (buffer_uninit(bh)) |
354 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 340 | ext4_set_io_unwritten_flag(inode, io_end); |
355 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
356 | } | ||
357 | io->io_end->size += bh->b_size; | 341 | io->io_end->size += bh->b_size; |
358 | io->io_next_block++; | 342 | io->io_next_block++; |
359 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 343 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 707d3f16f7c..996780ab4f4 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -875,7 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
875 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ | 875 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ |
876 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ | 876 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ |
877 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ | 877 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ |
878 | ext4_free_blks_set(sb, gdp, input->free_blocks_count); | 878 | ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count); |
879 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | 879 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); |
880 | gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); | 880 | gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); |
881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | 881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); |
@@ -937,8 +937,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
937 | input->reserved_blocks); | 937 | input->reserved_blocks); |
938 | 938 | ||
939 | /* Update the free space counts */ | 939 | /* Update the free space counts */ |
940 | percpu_counter_add(&sbi->s_freeblocks_counter, | 940 | percpu_counter_add(&sbi->s_freeclusters_counter, |
941 | input->free_blocks_count); | 941 | EXT4_B2C(sbi, input->free_blocks_count)); |
942 | percpu_counter_add(&sbi->s_freeinodes_counter, | 942 | percpu_counter_add(&sbi->s_freeinodes_counter, |
943 | EXT4_INODES_PER_GROUP(sb)); | 943 | EXT4_INODES_PER_GROUP(sb)); |
944 | 944 | ||
@@ -946,8 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
946 | sbi->s_log_groups_per_flex) { | 946 | sbi->s_log_groups_per_flex) { |
947 | ext4_group_t flex_group; | 947 | ext4_group_t flex_group; |
948 | flex_group = ext4_flex_group(sbi, input->group); | 948 | flex_group = ext4_flex_group(sbi, input->group); |
949 | atomic_add(input->free_blocks_count, | 949 | atomic_add(EXT4_B2C(sbi, input->free_blocks_count), |
950 | &sbi->s_flex_groups[flex_group].free_blocks); | 950 | &sbi->s_flex_groups[flex_group].free_clusters); |
951 | atomic_add(EXT4_INODES_PER_GROUP(sb), | 951 | atomic_add(EXT4_INODES_PER_GROUP(sb), |
952 | &sbi->s_flex_groups[flex_group].free_inodes); | 952 | &sbi->s_flex_groups[flex_group].free_inodes); |
953 | } | 953 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 44d0c8db223..9953d80145a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/freezer.h> | 45 | #include <linux/freezer.h> |
46 | 46 | ||
47 | #include "ext4.h" | 47 | #include "ext4.h" |
48 | #include "ext4_extents.h" | ||
48 | #include "ext4_jbd2.h" | 49 | #include "ext4_jbd2.h" |
49 | #include "xattr.h" | 50 | #include "xattr.h" |
50 | #include "acl.h" | 51 | #include "acl.h" |
@@ -163,8 +164,8 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, | |||
163 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); | 164 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); |
164 | } | 165 | } |
165 | 166 | ||
166 | __u32 ext4_free_blks_count(struct super_block *sb, | 167 | __u32 ext4_free_group_clusters(struct super_block *sb, |
167 | struct ext4_group_desc *bg) | 168 | struct ext4_group_desc *bg) |
168 | { | 169 | { |
169 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | | 170 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | |
170 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 171 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
@@ -219,8 +220,8 @@ void ext4_inode_table_set(struct super_block *sb, | |||
219 | bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); | 220 | bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); |
220 | } | 221 | } |
221 | 222 | ||
222 | void ext4_free_blks_set(struct super_block *sb, | 223 | void ext4_free_group_clusters_set(struct super_block *sb, |
223 | struct ext4_group_desc *bg, __u32 count) | 224 | struct ext4_group_desc *bg, __u32 count) |
224 | { | 225 | { |
225 | bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); | 226 | bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); |
226 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 227 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
@@ -414,6 +415,22 @@ static void save_error_info(struct super_block *sb, const char *func, | |||
414 | ext4_commit_super(sb, 1); | 415 | ext4_commit_super(sb, 1); |
415 | } | 416 | } |
416 | 417 | ||
418 | /* | ||
419 | * The del_gendisk() function uninitializes the disk-specific data | ||
420 | * structures, including the bdi structure, without telling anyone | ||
421 | * else. Once this happens, any attempt to call mark_buffer_dirty() | ||
422 | * (for example, by ext4_commit_super), will cause a kernel OOPS. | ||
423 | * This is a kludge to prevent these oops until we can put in a proper | ||
424 | * hook in del_gendisk() to inform the VFS and file system layers. | ||
425 | */ | ||
426 | static int block_device_ejected(struct super_block *sb) | ||
427 | { | ||
428 | struct inode *bd_inode = sb->s_bdev->bd_inode; | ||
429 | struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info; | ||
430 | |||
431 | return bdi->dev == NULL; | ||
432 | } | ||
433 | |||
417 | 434 | ||
418 | /* Deal with the reporting of failure conditions on a filesystem such as | 435 | /* Deal with the reporting of failure conditions on a filesystem such as |
419 | * inconsistencies detected or read IO failures. | 436 | * inconsistencies detected or read IO failures. |
@@ -821,10 +838,10 @@ static void ext4_put_super(struct super_block *sb) | |||
821 | brelse(sbi->s_group_desc[i]); | 838 | brelse(sbi->s_group_desc[i]); |
822 | ext4_kvfree(sbi->s_group_desc); | 839 | ext4_kvfree(sbi->s_group_desc); |
823 | ext4_kvfree(sbi->s_flex_groups); | 840 | ext4_kvfree(sbi->s_flex_groups); |
824 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 841 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
825 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 842 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
826 | percpu_counter_destroy(&sbi->s_dirs_counter); | 843 | percpu_counter_destroy(&sbi->s_dirs_counter); |
827 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | 844 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
828 | brelse(sbi->s_sbh); | 845 | brelse(sbi->s_sbh); |
829 | #ifdef CONFIG_QUOTA | 846 | #ifdef CONFIG_QUOTA |
830 | for (i = 0; i < MAXQUOTAS; i++) | 847 | for (i = 0; i < MAXQUOTAS; i++) |
@@ -1057,8 +1074,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1057 | seq_puts(seq, ",nouid32"); | 1074 | seq_puts(seq, ",nouid32"); |
1058 | if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) | 1075 | if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) |
1059 | seq_puts(seq, ",debug"); | 1076 | seq_puts(seq, ",debug"); |
1060 | if (test_opt(sb, OLDALLOC)) | ||
1061 | seq_puts(seq, ",oldalloc"); | ||
1062 | #ifdef CONFIG_EXT4_FS_XATTR | 1077 | #ifdef CONFIG_EXT4_FS_XATTR |
1063 | if (test_opt(sb, XATTR_USER)) | 1078 | if (test_opt(sb, XATTR_USER)) |
1064 | seq_puts(seq, ",user_xattr"); | 1079 | seq_puts(seq, ",user_xattr"); |
@@ -1567,10 +1582,12 @@ static int parse_options(char *options, struct super_block *sb, | |||
1567 | set_opt(sb, DEBUG); | 1582 | set_opt(sb, DEBUG); |
1568 | break; | 1583 | break; |
1569 | case Opt_oldalloc: | 1584 | case Opt_oldalloc: |
1570 | set_opt(sb, OLDALLOC); | 1585 | ext4_msg(sb, KERN_WARNING, |
1586 | "Ignoring deprecated oldalloc option"); | ||
1571 | break; | 1587 | break; |
1572 | case Opt_orlov: | 1588 | case Opt_orlov: |
1573 | clear_opt(sb, OLDALLOC); | 1589 | ext4_msg(sb, KERN_WARNING, |
1590 | "Ignoring deprecated orlov option"); | ||
1574 | break; | 1591 | break; |
1575 | #ifdef CONFIG_EXT4_FS_XATTR | 1592 | #ifdef CONFIG_EXT4_FS_XATTR |
1576 | case Opt_user_xattr: | 1593 | case Opt_user_xattr: |
@@ -1801,6 +1818,7 @@ set_qf_format: | |||
1801 | break; | 1818 | break; |
1802 | case Opt_nodelalloc: | 1819 | case Opt_nodelalloc: |
1803 | clear_opt(sb, DELALLOC); | 1820 | clear_opt(sb, DELALLOC); |
1821 | clear_opt2(sb, EXPLICIT_DELALLOC); | ||
1804 | break; | 1822 | break; |
1805 | case Opt_mblk_io_submit: | 1823 | case Opt_mblk_io_submit: |
1806 | set_opt(sb, MBLK_IO_SUBMIT); | 1824 | set_opt(sb, MBLK_IO_SUBMIT); |
@@ -1817,6 +1835,7 @@ set_qf_format: | |||
1817 | break; | 1835 | break; |
1818 | case Opt_delalloc: | 1836 | case Opt_delalloc: |
1819 | set_opt(sb, DELALLOC); | 1837 | set_opt(sb, DELALLOC); |
1838 | set_opt2(sb, EXPLICIT_DELALLOC); | ||
1820 | break; | 1839 | break; |
1821 | case Opt_block_validity: | 1840 | case Opt_block_validity: |
1822 | set_opt(sb, BLOCK_VALIDITY); | 1841 | set_opt(sb, BLOCK_VALIDITY); |
@@ -1935,7 +1954,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1935 | res = MS_RDONLY; | 1954 | res = MS_RDONLY; |
1936 | } | 1955 | } |
1937 | if (read_only) | 1956 | if (read_only) |
1938 | return res; | 1957 | goto done; |
1939 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) | 1958 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) |
1940 | ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " | 1959 | ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " |
1941 | "running e2fsck is recommended"); | 1960 | "running e2fsck is recommended"); |
@@ -1966,6 +1985,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1966 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 1985 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
1967 | 1986 | ||
1968 | ext4_commit_super(sb, 1); | 1987 | ext4_commit_super(sb, 1); |
1988 | done: | ||
1969 | if (test_opt(sb, DEBUG)) | 1989 | if (test_opt(sb, DEBUG)) |
1970 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " | 1990 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " |
1971 | "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", | 1991 | "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", |
@@ -2015,8 +2035,8 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
2015 | flex_group = ext4_flex_group(sbi, i); | 2035 | flex_group = ext4_flex_group(sbi, i); |
2016 | atomic_add(ext4_free_inodes_count(sb, gdp), | 2036 | atomic_add(ext4_free_inodes_count(sb, gdp), |
2017 | &sbi->s_flex_groups[flex_group].free_inodes); | 2037 | &sbi->s_flex_groups[flex_group].free_inodes); |
2018 | atomic_add(ext4_free_blks_count(sb, gdp), | 2038 | atomic_add(ext4_free_group_clusters(sb, gdp), |
2019 | &sbi->s_flex_groups[flex_group].free_blocks); | 2039 | &sbi->s_flex_groups[flex_group].free_clusters); |
2020 | atomic_add(ext4_used_dirs_count(sb, gdp), | 2040 | atomic_add(ext4_used_dirs_count(sb, gdp), |
2021 | &sbi->s_flex_groups[flex_group].used_dirs); | 2041 | &sbi->s_flex_groups[flex_group].used_dirs); |
2022 | } | 2042 | } |
@@ -2134,7 +2154,8 @@ static int ext4_check_descriptors(struct super_block *sb, | |||
2134 | if (NULL != first_not_zeroed) | 2154 | if (NULL != first_not_zeroed) |
2135 | *first_not_zeroed = grp; | 2155 | *first_not_zeroed = grp; |
2136 | 2156 | ||
2137 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); | 2157 | ext4_free_blocks_count_set(sbi->s_es, |
2158 | EXT4_C2B(sbi, ext4_count_free_clusters(sb))); | ||
2138 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); | 2159 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); |
2139 | return 1; | 2160 | return 1; |
2140 | } | 2161 | } |
@@ -2454,7 +2475,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, | |||
2454 | char *buf) | 2475 | char *buf) |
2455 | { | 2476 | { |
2456 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2477 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2457 | (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | 2478 | (s64) EXT4_C2B(sbi, |
2479 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | ||
2458 | } | 2480 | } |
2459 | 2481 | ||
2460 | static ssize_t session_write_kbytes_show(struct ext4_attr *a, | 2482 | static ssize_t session_write_kbytes_show(struct ext4_attr *a, |
@@ -2682,6 +2704,13 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) | |||
2682 | return 0; | 2704 | return 0; |
2683 | } | 2705 | } |
2684 | } | 2706 | } |
2707 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && | ||
2708 | !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { | ||
2709 | ext4_msg(sb, KERN_ERR, | ||
2710 | "Can't support bigalloc feature without " | ||
2711 | "extents feature\n"); | ||
2712 | return 0; | ||
2713 | } | ||
2685 | return 1; | 2714 | return 1; |
2686 | } | 2715 | } |
2687 | 2716 | ||
@@ -3087,10 +3116,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3087 | char *cp; | 3116 | char *cp; |
3088 | const char *descr; | 3117 | const char *descr; |
3089 | int ret = -ENOMEM; | 3118 | int ret = -ENOMEM; |
3090 | int blocksize; | 3119 | int blocksize, clustersize; |
3091 | unsigned int db_count; | 3120 | unsigned int db_count; |
3092 | unsigned int i; | 3121 | unsigned int i; |
3093 | int needs_recovery, has_huge_files; | 3122 | int needs_recovery, has_huge_files, has_bigalloc; |
3094 | __u64 blocks_count; | 3123 | __u64 blocks_count; |
3095 | int err; | 3124 | int err; |
3096 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 3125 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
@@ -3224,6 +3253,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3224 | &journal_ioprio, NULL, 0)) | 3253 | &journal_ioprio, NULL, 0)) |
3225 | goto failed_mount; | 3254 | goto failed_mount; |
3226 | 3255 | ||
3256 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
3257 | printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " | ||
3258 | "with data=journal disables delayed " | ||
3259 | "allocation and O_DIRECT support!\n"); | ||
3260 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | ||
3261 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
3262 | "both data=journal and delalloc"); | ||
3263 | goto failed_mount; | ||
3264 | } | ||
3265 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
3266 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
3267 | "both data=journal and delalloc"); | ||
3268 | goto failed_mount; | ||
3269 | } | ||
3270 | if (test_opt(sb, DELALLOC)) | ||
3271 | clear_opt(sb, DELALLOC); | ||
3272 | } | ||
3273 | |||
3274 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | ||
3275 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
3276 | if (blocksize < PAGE_SIZE) { | ||
3277 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
3278 | "dioread_nolock if block size != PAGE_SIZE"); | ||
3279 | goto failed_mount; | ||
3280 | } | ||
3281 | } | ||
3282 | |||
3227 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 3283 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
3228 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); | 3284 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
3229 | 3285 | ||
@@ -3265,8 +3321,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3265 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) | 3321 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) |
3266 | goto failed_mount; | 3322 | goto failed_mount; |
3267 | 3323 | ||
3268 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | ||
3269 | |||
3270 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 3324 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
3271 | blocksize > EXT4_MAX_BLOCK_SIZE) { | 3325 | blocksize > EXT4_MAX_BLOCK_SIZE) { |
3272 | ext4_msg(sb, KERN_ERR, | 3326 | ext4_msg(sb, KERN_ERR, |
@@ -3369,12 +3423,53 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3369 | sb->s_dirt = 1; | 3423 | sb->s_dirt = 1; |
3370 | } | 3424 | } |
3371 | 3425 | ||
3372 | if (sbi->s_blocks_per_group > blocksize * 8) { | 3426 | /* Handle clustersize */ |
3373 | ext4_msg(sb, KERN_ERR, | 3427 | clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); |
3374 | "#blocks per group too big: %lu", | 3428 | has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3375 | sbi->s_blocks_per_group); | 3429 | EXT4_FEATURE_RO_COMPAT_BIGALLOC); |
3376 | goto failed_mount; | 3430 | if (has_bigalloc) { |
3431 | if (clustersize < blocksize) { | ||
3432 | ext4_msg(sb, KERN_ERR, | ||
3433 | "cluster size (%d) smaller than " | ||
3434 | "block size (%d)", clustersize, blocksize); | ||
3435 | goto failed_mount; | ||
3436 | } | ||
3437 | sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - | ||
3438 | le32_to_cpu(es->s_log_block_size); | ||
3439 | sbi->s_clusters_per_group = | ||
3440 | le32_to_cpu(es->s_clusters_per_group); | ||
3441 | if (sbi->s_clusters_per_group > blocksize * 8) { | ||
3442 | ext4_msg(sb, KERN_ERR, | ||
3443 | "#clusters per group too big: %lu", | ||
3444 | sbi->s_clusters_per_group); | ||
3445 | goto failed_mount; | ||
3446 | } | ||
3447 | if (sbi->s_blocks_per_group != | ||
3448 | (sbi->s_clusters_per_group * (clustersize / blocksize))) { | ||
3449 | ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " | ||
3450 | "clusters per group (%lu) inconsistent", | ||
3451 | sbi->s_blocks_per_group, | ||
3452 | sbi->s_clusters_per_group); | ||
3453 | goto failed_mount; | ||
3454 | } | ||
3455 | } else { | ||
3456 | if (clustersize != blocksize) { | ||
3457 | ext4_warning(sb, "fragment/cluster size (%d) != " | ||
3458 | "block size (%d)", clustersize, | ||
3459 | blocksize); | ||
3460 | clustersize = blocksize; | ||
3461 | } | ||
3462 | if (sbi->s_blocks_per_group > blocksize * 8) { | ||
3463 | ext4_msg(sb, KERN_ERR, | ||
3464 | "#blocks per group too big: %lu", | ||
3465 | sbi->s_blocks_per_group); | ||
3466 | goto failed_mount; | ||
3467 | } | ||
3468 | sbi->s_clusters_per_group = sbi->s_blocks_per_group; | ||
3469 | sbi->s_cluster_bits = 0; | ||
3377 | } | 3470 | } |
3471 | sbi->s_cluster_ratio = clustersize / blocksize; | ||
3472 | |||
3378 | if (sbi->s_inodes_per_group > blocksize * 8) { | 3473 | if (sbi->s_inodes_per_group > blocksize * 8) { |
3379 | ext4_msg(sb, KERN_ERR, | 3474 | ext4_msg(sb, KERN_ERR, |
3380 | "#inodes per group too big: %lu", | 3475 | "#inodes per group too big: %lu", |
@@ -3446,10 +3541,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3446 | goto failed_mount; | 3541 | goto failed_mount; |
3447 | } | 3542 | } |
3448 | 3543 | ||
3449 | #ifdef CONFIG_PROC_FS | ||
3450 | if (ext4_proc_root) | 3544 | if (ext4_proc_root) |
3451 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | 3545 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); |
3452 | #endif | ||
3453 | 3546 | ||
3454 | bgl_lock_init(sbi->s_blockgroup_lock); | 3547 | bgl_lock_init(sbi->s_blockgroup_lock); |
3455 | 3548 | ||
@@ -3483,8 +3576,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3483 | sbi->s_err_report.function = print_daily_error_info; | 3576 | sbi->s_err_report.function = print_daily_error_info; |
3484 | sbi->s_err_report.data = (unsigned long) sb; | 3577 | sbi->s_err_report.data = (unsigned long) sb; |
3485 | 3578 | ||
3486 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | 3579 | err = percpu_counter_init(&sbi->s_freeclusters_counter, |
3487 | ext4_count_free_blocks(sb)); | 3580 | ext4_count_free_clusters(sb)); |
3488 | if (!err) { | 3581 | if (!err) { |
3489 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | 3582 | err = percpu_counter_init(&sbi->s_freeinodes_counter, |
3490 | ext4_count_free_inodes(sb)); | 3583 | ext4_count_free_inodes(sb)); |
@@ -3494,7 +3587,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3494 | ext4_count_dirs(sb)); | 3587 | ext4_count_dirs(sb)); |
3495 | } | 3588 | } |
3496 | if (!err) { | 3589 | if (!err) { |
3497 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | 3590 | err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); |
3498 | } | 3591 | } |
3499 | if (err) { | 3592 | if (err) { |
3500 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 3593 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
@@ -3609,13 +3702,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3609 | * The journal may have updated the bg summary counts, so we | 3702 | * The journal may have updated the bg summary counts, so we |
3610 | * need to update the global counters. | 3703 | * need to update the global counters. |
3611 | */ | 3704 | */ |
3612 | percpu_counter_set(&sbi->s_freeblocks_counter, | 3705 | percpu_counter_set(&sbi->s_freeclusters_counter, |
3613 | ext4_count_free_blocks(sb)); | 3706 | ext4_count_free_clusters(sb)); |
3614 | percpu_counter_set(&sbi->s_freeinodes_counter, | 3707 | percpu_counter_set(&sbi->s_freeinodes_counter, |
3615 | ext4_count_free_inodes(sb)); | 3708 | ext4_count_free_inodes(sb)); |
3616 | percpu_counter_set(&sbi->s_dirs_counter, | 3709 | percpu_counter_set(&sbi->s_dirs_counter, |
3617 | ext4_count_dirs(sb)); | 3710 | ext4_count_dirs(sb)); |
3618 | percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); | 3711 | percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); |
3619 | 3712 | ||
3620 | no_journal: | 3713 | no_journal: |
3621 | /* | 3714 | /* |
@@ -3679,25 +3772,6 @@ no_journal: | |||
3679 | "available"); | 3772 | "available"); |
3680 | } | 3773 | } |
3681 | 3774 | ||
3682 | if (test_opt(sb, DELALLOC) && | ||
3683 | (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { | ||
3684 | ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " | ||
3685 | "requested data journaling mode"); | ||
3686 | clear_opt(sb, DELALLOC); | ||
3687 | } | ||
3688 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
3689 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
3690 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " | ||
3691 | "option - requested data journaling mode"); | ||
3692 | clear_opt(sb, DIOREAD_NOLOCK); | ||
3693 | } | ||
3694 | if (sb->s_blocksize < PAGE_SIZE) { | ||
3695 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " | ||
3696 | "option - block size is too small"); | ||
3697 | clear_opt(sb, DIOREAD_NOLOCK); | ||
3698 | } | ||
3699 | } | ||
3700 | |||
3701 | err = ext4_setup_system_zone(sb); | 3775 | err = ext4_setup_system_zone(sb); |
3702 | if (err) { | 3776 | if (err) { |
3703 | ext4_msg(sb, KERN_ERR, "failed to initialize system " | 3777 | ext4_msg(sb, KERN_ERR, "failed to initialize system " |
@@ -3710,22 +3784,19 @@ no_journal: | |||
3710 | if (err) { | 3784 | if (err) { |
3711 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", | 3785 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", |
3712 | err); | 3786 | err); |
3713 | goto failed_mount4; | 3787 | goto failed_mount5; |
3714 | } | 3788 | } |
3715 | 3789 | ||
3716 | err = ext4_register_li_request(sb, first_not_zeroed); | 3790 | err = ext4_register_li_request(sb, first_not_zeroed); |
3717 | if (err) | 3791 | if (err) |
3718 | goto failed_mount4; | 3792 | goto failed_mount6; |
3719 | 3793 | ||
3720 | sbi->s_kobj.kset = ext4_kset; | 3794 | sbi->s_kobj.kset = ext4_kset; |
3721 | init_completion(&sbi->s_kobj_unregister); | 3795 | init_completion(&sbi->s_kobj_unregister); |
3722 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, | 3796 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, |
3723 | "%s", sb->s_id); | 3797 | "%s", sb->s_id); |
3724 | if (err) { | 3798 | if (err) |
3725 | ext4_mb_release(sb); | 3799 | goto failed_mount7; |
3726 | ext4_ext_release(sb); | ||
3727 | goto failed_mount4; | ||
3728 | }; | ||
3729 | 3800 | ||
3730 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; | 3801 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; |
3731 | ext4_orphan_cleanup(sb, es); | 3802 | ext4_orphan_cleanup(sb, es); |
@@ -3759,13 +3830,19 @@ cantfind_ext4: | |||
3759 | ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); | 3830 | ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); |
3760 | goto failed_mount; | 3831 | goto failed_mount; |
3761 | 3832 | ||
3833 | failed_mount7: | ||
3834 | ext4_unregister_li_request(sb); | ||
3835 | failed_mount6: | ||
3836 | ext4_ext_release(sb); | ||
3837 | failed_mount5: | ||
3838 | ext4_mb_release(sb); | ||
3839 | ext4_release_system_zone(sb); | ||
3762 | failed_mount4: | 3840 | failed_mount4: |
3763 | iput(root); | 3841 | iput(root); |
3764 | sb->s_root = NULL; | 3842 | sb->s_root = NULL; |
3765 | ext4_msg(sb, KERN_ERR, "mount failed"); | 3843 | ext4_msg(sb, KERN_ERR, "mount failed"); |
3766 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 3844 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); |
3767 | failed_mount_wq: | 3845 | failed_mount_wq: |
3768 | ext4_release_system_zone(sb); | ||
3769 | if (sbi->s_journal) { | 3846 | if (sbi->s_journal) { |
3770 | jbd2_journal_destroy(sbi->s_journal); | 3847 | jbd2_journal_destroy(sbi->s_journal); |
3771 | sbi->s_journal = NULL; | 3848 | sbi->s_journal = NULL; |
@@ -3774,10 +3851,10 @@ failed_mount3: | |||
3774 | del_timer(&sbi->s_err_report); | 3851 | del_timer(&sbi->s_err_report); |
3775 | if (sbi->s_flex_groups) | 3852 | if (sbi->s_flex_groups) |
3776 | ext4_kvfree(sbi->s_flex_groups); | 3853 | ext4_kvfree(sbi->s_flex_groups); |
3777 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 3854 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
3778 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 3855 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
3779 | percpu_counter_destroy(&sbi->s_dirs_counter); | 3856 | percpu_counter_destroy(&sbi->s_dirs_counter); |
3780 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | 3857 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
3781 | if (sbi->s_mmp_tsk) | 3858 | if (sbi->s_mmp_tsk) |
3782 | kthread_stop(sbi->s_mmp_tsk); | 3859 | kthread_stop(sbi->s_mmp_tsk); |
3783 | failed_mount2: | 3860 | failed_mount2: |
@@ -4064,7 +4141,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
4064 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; | 4141 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; |
4065 | int error = 0; | 4142 | int error = 0; |
4066 | 4143 | ||
4067 | if (!sbh) | 4144 | if (!sbh || block_device_ejected(sb)) |
4068 | return error; | 4145 | return error; |
4069 | if (buffer_write_io_error(sbh)) { | 4146 | if (buffer_write_io_error(sbh)) { |
4070 | /* | 4147 | /* |
@@ -4100,8 +4177,9 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
4100 | else | 4177 | else |
4101 | es->s_kbytes_written = | 4178 | es->s_kbytes_written = |
4102 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | 4179 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); |
4103 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 4180 | ext4_free_blocks_count_set(es, |
4104 | &EXT4_SB(sb)->s_freeblocks_counter)); | 4181 | EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( |
4182 | &EXT4_SB(sb)->s_freeclusters_counter))); | ||
4105 | es->s_free_inodes_count = | 4183 | es->s_free_inodes_count = |
4106 | cpu_to_le32(percpu_counter_sum_positive( | 4184 | cpu_to_le32(percpu_counter_sum_positive( |
4107 | &EXT4_SB(sb)->s_freeinodes_counter)); | 4185 | &EXT4_SB(sb)->s_freeinodes_counter)); |
@@ -4506,16 +4584,34 @@ restore_opts: | |||
4506 | return err; | 4584 | return err; |
4507 | } | 4585 | } |
4508 | 4586 | ||
4587 | /* | ||
4588 | * Note: calculating the overhead so we can be compatible with | ||
4589 | * historical BSD practice is quite difficult in the face of | ||
4590 | * clusters/bigalloc. This is because multiple metadata blocks from | ||
4591 | * different block group can end up in the same allocation cluster. | ||
4592 | * Calculating the exact overhead in the face of clustered allocation | ||
4593 | * requires either O(all block bitmaps) in memory or O(number of block | ||
4594 | * groups**2) in time. We will still calculate the superblock for | ||
4595 | * older file systems --- and if we come across with a bigalloc file | ||
4596 | * system with zero in s_overhead_clusters the estimate will be close to | ||
4597 | * correct especially for very large cluster sizes --- but for newer | ||
4598 | * file systems, it's better to calculate this figure once at mkfs | ||
4599 | * time, and store it in the superblock. If the superblock value is | ||
4600 | * present (even for non-bigalloc file systems), we will use it. | ||
4601 | */ | ||
4509 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | 4602 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) |
4510 | { | 4603 | { |
4511 | struct super_block *sb = dentry->d_sb; | 4604 | struct super_block *sb = dentry->d_sb; |
4512 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4605 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4513 | struct ext4_super_block *es = sbi->s_es; | 4606 | struct ext4_super_block *es = sbi->s_es; |
4607 | struct ext4_group_desc *gdp; | ||
4514 | u64 fsid; | 4608 | u64 fsid; |
4515 | s64 bfree; | 4609 | s64 bfree; |
4516 | 4610 | ||
4517 | if (test_opt(sb, MINIX_DF)) { | 4611 | if (test_opt(sb, MINIX_DF)) { |
4518 | sbi->s_overhead_last = 0; | 4612 | sbi->s_overhead_last = 0; |
4613 | } else if (es->s_overhead_clusters) { | ||
4614 | sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters); | ||
4519 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { | 4615 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { |
4520 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); | 4616 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
4521 | ext4_fsblk_t overhead = 0; | 4617 | ext4_fsblk_t overhead = 0; |
@@ -4530,24 +4626,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
4530 | * All of the blocks before first_data_block are | 4626 | * All of the blocks before first_data_block are |
4531 | * overhead | 4627 | * overhead |
4532 | */ | 4628 | */ |
4533 | overhead = le32_to_cpu(es->s_first_data_block); | 4629 | overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); |
4534 | 4630 | ||
4535 | /* | 4631 | /* |
4536 | * Add the overhead attributed to the superblock and | 4632 | * Add the overhead found in each block group |
4537 | * block group descriptors. If the sparse superblocks | ||
4538 | * feature is turned on, then not all groups have this. | ||
4539 | */ | 4633 | */ |
4540 | for (i = 0; i < ngroups; i++) { | 4634 | for (i = 0; i < ngroups; i++) { |
4541 | overhead += ext4_bg_has_super(sb, i) + | 4635 | gdp = ext4_get_group_desc(sb, i, NULL); |
4542 | ext4_bg_num_gdb(sb, i); | 4636 | overhead += ext4_num_overhead_clusters(sb, i, gdp); |
4543 | cond_resched(); | 4637 | cond_resched(); |
4544 | } | 4638 | } |
4545 | |||
4546 | /* | ||
4547 | * Every block group has an inode bitmap, a block | ||
4548 | * bitmap, and an inode table. | ||
4549 | */ | ||
4550 | overhead += ngroups * (2 + sbi->s_itb_per_group); | ||
4551 | sbi->s_overhead_last = overhead; | 4639 | sbi->s_overhead_last = overhead; |
4552 | smp_wmb(); | 4640 | smp_wmb(); |
4553 | sbi->s_blocks_last = ext4_blocks_count(es); | 4641 | sbi->s_blocks_last = ext4_blocks_count(es); |
@@ -4555,11 +4643,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
4555 | 4643 | ||
4556 | buf->f_type = EXT4_SUPER_MAGIC; | 4644 | buf->f_type = EXT4_SUPER_MAGIC; |
4557 | buf->f_bsize = sb->s_blocksize; | 4645 | buf->f_bsize = sb->s_blocksize; |
4558 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 4646 | buf->f_blocks = (ext4_blocks_count(es) - |
4559 | bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - | 4647 | EXT4_C2B(sbi, sbi->s_overhead_last)); |
4560 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | 4648 | bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - |
4649 | percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); | ||
4561 | /* prevent underflow in case that few free space is available */ | 4650 | /* prevent underflow in case that few free space is available */ |
4562 | buf->f_bfree = max_t(s64, bfree, 0); | 4651 | buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0)); |
4563 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 4652 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
4564 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 4653 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
4565 | buf->f_bavail = 0; | 4654 | buf->f_bavail = 0; |
@@ -4980,13 +5069,11 @@ static int __init ext4_init_fs(void) | |||
4980 | return err; | 5069 | return err; |
4981 | err = ext4_init_system_zone(); | 5070 | err = ext4_init_system_zone(); |
4982 | if (err) | 5071 | if (err) |
4983 | goto out7; | 5072 | goto out6; |
4984 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 5073 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
4985 | if (!ext4_kset) | 5074 | if (!ext4_kset) |
4986 | goto out6; | ||
4987 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
4988 | if (!ext4_proc_root) | ||
4989 | goto out5; | 5075 | goto out5; |
5076 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
4990 | 5077 | ||
4991 | err = ext4_init_feat_adverts(); | 5078 | err = ext4_init_feat_adverts(); |
4992 | if (err) | 5079 | if (err) |
@@ -5022,12 +5109,12 @@ out2: | |||
5022 | out3: | 5109 | out3: |
5023 | ext4_exit_feat_adverts(); | 5110 | ext4_exit_feat_adverts(); |
5024 | out4: | 5111 | out4: |
5025 | remove_proc_entry("fs/ext4", NULL); | 5112 | if (ext4_proc_root) |
5026 | out5: | 5113 | remove_proc_entry("fs/ext4", NULL); |
5027 | kset_unregister(ext4_kset); | 5114 | kset_unregister(ext4_kset); |
5028 | out6: | 5115 | out5: |
5029 | ext4_exit_system_zone(); | 5116 | ext4_exit_system_zone(); |
5030 | out7: | 5117 | out6: |
5031 | ext4_exit_pageio(); | 5118 | ext4_exit_pageio(); |
5032 | return err; | 5119 | return err; |
5033 | } | 5120 | } |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c757adc9725..93a00d89a22 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -820,8 +820,14 @@ inserted: | |||
820 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 820 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
821 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 821 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
822 | 822 | ||
823 | /* | ||
824 | * take i_data_sem because we will test | ||
825 | * i_delalloc_reserved_flag in ext4_mb_new_blocks | ||
826 | */ | ||
827 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
823 | block = ext4_new_meta_blocks(handle, inode, goal, 0, | 828 | block = ext4_new_meta_blocks(handle, inode, goal, 0, |
824 | NULL, &error); | 829 | NULL, &error); |
830 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
825 | if (error) | 831 | if (error) |
826 | goto cleanup; | 832 | goto cleanup; |
827 | 833 | ||
@@ -985,11 +991,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
985 | no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); | 991 | no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); |
986 | ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); | 992 | ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); |
987 | 993 | ||
988 | error = ext4_get_inode_loc(inode, &is.iloc); | 994 | error = ext4_reserve_inode_write(handle, inode, &is.iloc); |
989 | if (error) | ||
990 | goto cleanup; | ||
991 | |||
992 | error = ext4_journal_get_write_access(handle, is.iloc.bh); | ||
993 | if (error) | 995 | if (error) |
994 | goto cleanup; | 996 | goto cleanup; |
995 | 997 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 9fe061fb877..fea8dd661d2 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -1135,6 +1135,14 @@ static int journal_get_superblock(journal_t *journal) | |||
1135 | goto out; | 1135 | goto out; |
1136 | } | 1136 | } |
1137 | 1137 | ||
1138 | if (be32_to_cpu(sb->s_first) == 0 || | ||
1139 | be32_to_cpu(sb->s_first) >= journal->j_maxlen) { | ||
1140 | printk(KERN_WARNING | ||
1141 | "JBD: Invalid start block of journal: %u\n", | ||
1142 | be32_to_cpu(sb->s_first)); | ||
1143 | goto out; | ||
1144 | } | ||
1145 | |||
1138 | return 0; | 1146 | return 0; |
1139 | 1147 | ||
1140 | out: | 1148 | out: |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index eef6979821a..68d704db787 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -352,7 +352,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
352 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 352 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
353 | 353 | ||
354 | trace_jbd2_start_commit(journal, commit_transaction); | 354 | trace_jbd2_start_commit(journal, commit_transaction); |
355 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 355 | jbd_debug(1, "JBD2: starting commit of transaction %d\n", |
356 | commit_transaction->t_tid); | 356 | commit_transaction->t_tid); |
357 | 357 | ||
358 | write_lock(&journal->j_state_lock); | 358 | write_lock(&journal->j_state_lock); |
@@ -427,7 +427,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
427 | __jbd2_journal_clean_checkpoint_list(journal); | 427 | __jbd2_journal_clean_checkpoint_list(journal); |
428 | spin_unlock(&journal->j_list_lock); | 428 | spin_unlock(&journal->j_list_lock); |
429 | 429 | ||
430 | jbd_debug (3, "JBD: commit phase 1\n"); | 430 | jbd_debug(3, "JBD2: commit phase 1\n"); |
431 | 431 | ||
432 | /* | 432 | /* |
433 | * Switch to a new revoke table. | 433 | * Switch to a new revoke table. |
@@ -447,7 +447,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
447 | wake_up(&journal->j_wait_transaction_locked); | 447 | wake_up(&journal->j_wait_transaction_locked); |
448 | write_unlock(&journal->j_state_lock); | 448 | write_unlock(&journal->j_state_lock); |
449 | 449 | ||
450 | jbd_debug (3, "JBD: commit phase 2\n"); | 450 | jbd_debug(3, "JBD2: commit phase 2\n"); |
451 | 451 | ||
452 | /* | 452 | /* |
453 | * Now start flushing things to disk, in the order they appear | 453 | * Now start flushing things to disk, in the order they appear |
@@ -462,7 +462,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
462 | WRITE_SYNC); | 462 | WRITE_SYNC); |
463 | blk_finish_plug(&plug); | 463 | blk_finish_plug(&plug); |
464 | 464 | ||
465 | jbd_debug(3, "JBD: commit phase 2\n"); | 465 | jbd_debug(3, "JBD2: commit phase 2\n"); |
466 | 466 | ||
467 | /* | 467 | /* |
468 | * Way to go: we have now written out all of the data for a | 468 | * Way to go: we have now written out all of the data for a |
@@ -522,7 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
522 | 522 | ||
523 | J_ASSERT (bufs == 0); | 523 | J_ASSERT (bufs == 0); |
524 | 524 | ||
525 | jbd_debug(4, "JBD: get descriptor\n"); | 525 | jbd_debug(4, "JBD2: get descriptor\n"); |
526 | 526 | ||
527 | descriptor = jbd2_journal_get_descriptor_buffer(journal); | 527 | descriptor = jbd2_journal_get_descriptor_buffer(journal); |
528 | if (!descriptor) { | 528 | if (!descriptor) { |
@@ -531,7 +531,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
531 | } | 531 | } |
532 | 532 | ||
533 | bh = jh2bh(descriptor); | 533 | bh = jh2bh(descriptor); |
534 | jbd_debug(4, "JBD: got buffer %llu (%p)\n", | 534 | jbd_debug(4, "JBD2: got buffer %llu (%p)\n", |
535 | (unsigned long long)bh->b_blocknr, bh->b_data); | 535 | (unsigned long long)bh->b_blocknr, bh->b_data); |
536 | header = (journal_header_t *)&bh->b_data[0]; | 536 | header = (journal_header_t *)&bh->b_data[0]; |
537 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 537 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
@@ -625,7 +625,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
625 | commit_transaction->t_buffers == NULL || | 625 | commit_transaction->t_buffers == NULL || |
626 | space_left < tag_bytes + 16) { | 626 | space_left < tag_bytes + 16) { |
627 | 627 | ||
628 | jbd_debug(4, "JBD: Submit %d IOs\n", bufs); | 628 | jbd_debug(4, "JBD2: Submit %d IOs\n", bufs); |
629 | 629 | ||
630 | /* Write an end-of-descriptor marker before | 630 | /* Write an end-of-descriptor marker before |
631 | submitting the IOs. "tag" still points to | 631 | submitting the IOs. "tag" still points to |
@@ -707,7 +707,7 @@ start_journal_io: | |||
707 | so we incur less scheduling load. | 707 | so we incur less scheduling load. |
708 | */ | 708 | */ |
709 | 709 | ||
710 | jbd_debug(3, "JBD: commit phase 3\n"); | 710 | jbd_debug(3, "JBD2: commit phase 3\n"); |
711 | 711 | ||
712 | /* | 712 | /* |
713 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 713 | * akpm: these are BJ_IO, and j_list_lock is not needed. |
@@ -771,7 +771,7 @@ wait_for_iobuf: | |||
771 | 771 | ||
772 | J_ASSERT (commit_transaction->t_shadow_list == NULL); | 772 | J_ASSERT (commit_transaction->t_shadow_list == NULL); |
773 | 773 | ||
774 | jbd_debug(3, "JBD: commit phase 4\n"); | 774 | jbd_debug(3, "JBD2: commit phase 4\n"); |
775 | 775 | ||
776 | /* Here we wait for the revoke record and descriptor record buffers */ | 776 | /* Here we wait for the revoke record and descriptor record buffers */ |
777 | wait_for_ctlbuf: | 777 | wait_for_ctlbuf: |
@@ -801,7 +801,7 @@ wait_for_iobuf: | |||
801 | if (err) | 801 | if (err) |
802 | jbd2_journal_abort(journal, err); | 802 | jbd2_journal_abort(journal, err); |
803 | 803 | ||
804 | jbd_debug(3, "JBD: commit phase 5\n"); | 804 | jbd_debug(3, "JBD2: commit phase 5\n"); |
805 | write_lock(&journal->j_state_lock); | 805 | write_lock(&journal->j_state_lock); |
806 | J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); | 806 | J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); |
807 | commit_transaction->t_state = T_COMMIT_JFLUSH; | 807 | commit_transaction->t_state = T_COMMIT_JFLUSH; |
@@ -830,7 +830,7 @@ wait_for_iobuf: | |||
830 | transaction can be removed from any checkpoint list it was on | 830 | transaction can be removed from any checkpoint list it was on |
831 | before. */ | 831 | before. */ |
832 | 832 | ||
833 | jbd_debug(3, "JBD: commit phase 6\n"); | 833 | jbd_debug(3, "JBD2: commit phase 6\n"); |
834 | 834 | ||
835 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 835 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
836 | J_ASSERT(commit_transaction->t_buffers == NULL); | 836 | J_ASSERT(commit_transaction->t_buffers == NULL); |
@@ -964,7 +964,7 @@ restart_loop: | |||
964 | 964 | ||
965 | /* Done with this transaction! */ | 965 | /* Done with this transaction! */ |
966 | 966 | ||
967 | jbd_debug(3, "JBD: commit phase 7\n"); | 967 | jbd_debug(3, "JBD2: commit phase 7\n"); |
968 | 968 | ||
969 | J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); | 969 | J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); |
970 | 970 | ||
@@ -1039,7 +1039,7 @@ restart_loop: | |||
1039 | journal->j_commit_callback(journal, commit_transaction); | 1039 | journal->j_commit_callback(journal, commit_transaction); |
1040 | 1040 | ||
1041 | trace_jbd2_end_commit(journal, commit_transaction); | 1041 | trace_jbd2_end_commit(journal, commit_transaction); |
1042 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1042 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", |
1043 | journal->j_commit_sequence, journal->j_tail_sequence); | 1043 | journal->j_commit_sequence, journal->j_tail_sequence); |
1044 | if (to_free) | 1044 | if (to_free) |
1045 | kfree(commit_transaction); | 1045 | kfree(commit_transaction); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f24df13adc4..0fa0123151d 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -491,7 +491,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) | |||
491 | */ | 491 | */ |
492 | 492 | ||
493 | journal->j_commit_request = target; | 493 | journal->j_commit_request = target; |
494 | jbd_debug(1, "JBD: requesting commit %d/%d\n", | 494 | jbd_debug(1, "JBD2: requesting commit %d/%d\n", |
495 | journal->j_commit_request, | 495 | journal->j_commit_request, |
496 | journal->j_commit_sequence); | 496 | journal->j_commit_sequence); |
497 | wake_up(&journal->j_wait_commit); | 497 | wake_up(&journal->j_wait_commit); |
@@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) | |||
500 | /* This should never happen, but if it does, preserve | 500 | /* This should never happen, but if it does, preserve |
501 | the evidence before kjournald goes into a loop and | 501 | the evidence before kjournald goes into a loop and |
502 | increments j_commit_sequence beyond all recognition. */ | 502 | increments j_commit_sequence beyond all recognition. */ |
503 | WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", | 503 | WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", |
504 | journal->j_commit_request, | 504 | journal->j_commit_request, |
505 | journal->j_commit_sequence, | 505 | journal->j_commit_sequence, |
506 | target, journal->j_running_transaction ? | 506 | target, journal->j_running_transaction ? |
@@ -645,7 +645,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
645 | } | 645 | } |
646 | #endif | 646 | #endif |
647 | while (tid_gt(tid, journal->j_commit_sequence)) { | 647 | while (tid_gt(tid, journal->j_commit_sequence)) { |
648 | jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", | 648 | jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", |
649 | tid, journal->j_commit_sequence); | 649 | tid, journal->j_commit_sequence); |
650 | wake_up(&journal->j_wait_commit); | 650 | wake_up(&journal->j_wait_commit); |
651 | read_unlock(&journal->j_state_lock); | 651 | read_unlock(&journal->j_state_lock); |
@@ -1093,7 +1093,7 @@ static int journal_reset(journal_t *journal) | |||
1093 | first = be32_to_cpu(sb->s_first); | 1093 | first = be32_to_cpu(sb->s_first); |
1094 | last = be32_to_cpu(sb->s_maxlen); | 1094 | last = be32_to_cpu(sb->s_maxlen); |
1095 | if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { | 1095 | if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { |
1096 | printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", | 1096 | printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n", |
1097 | first, last); | 1097 | first, last); |
1098 | journal_fail_superblock(journal); | 1098 | journal_fail_superblock(journal); |
1099 | return -EINVAL; | 1099 | return -EINVAL; |
@@ -1139,7 +1139,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1139 | */ | 1139 | */ |
1140 | if (sb->s_start == 0 && journal->j_tail_sequence == | 1140 | if (sb->s_start == 0 && journal->j_tail_sequence == |
1141 | journal->j_transaction_sequence) { | 1141 | journal->j_transaction_sequence) { |
1142 | jbd_debug(1,"JBD: Skipping superblock update on recovered sb " | 1142 | jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " |
1143 | "(start %ld, seq %d, errno %d)\n", | 1143 | "(start %ld, seq %d, errno %d)\n", |
1144 | journal->j_tail, journal->j_tail_sequence, | 1144 | journal->j_tail, journal->j_tail_sequence, |
1145 | journal->j_errno); | 1145 | journal->j_errno); |
@@ -1163,7 +1163,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1163 | } | 1163 | } |
1164 | 1164 | ||
1165 | read_lock(&journal->j_state_lock); | 1165 | read_lock(&journal->j_state_lock); |
1166 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 1166 | jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", |
1167 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1167 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
1168 | 1168 | ||
1169 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 1169 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
@@ -1216,8 +1216,8 @@ static int journal_get_superblock(journal_t *journal) | |||
1216 | ll_rw_block(READ, 1, &bh); | 1216 | ll_rw_block(READ, 1, &bh); |
1217 | wait_on_buffer(bh); | 1217 | wait_on_buffer(bh); |
1218 | if (!buffer_uptodate(bh)) { | 1218 | if (!buffer_uptodate(bh)) { |
1219 | printk (KERN_ERR | 1219 | printk(KERN_ERR |
1220 | "JBD: IO error reading journal superblock\n"); | 1220 | "JBD2: IO error reading journal superblock\n"); |
1221 | goto out; | 1221 | goto out; |
1222 | } | 1222 | } |
1223 | } | 1223 | } |
@@ -1228,7 +1228,7 @@ static int journal_get_superblock(journal_t *journal) | |||
1228 | 1228 | ||
1229 | if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || | 1229 | if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || |
1230 | sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { | 1230 | sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { |
1231 | printk(KERN_WARNING "JBD: no valid journal superblock found\n"); | 1231 | printk(KERN_WARNING "JBD2: no valid journal superblock found\n"); |
1232 | goto out; | 1232 | goto out; |
1233 | } | 1233 | } |
1234 | 1234 | ||
@@ -1240,14 +1240,22 @@ static int journal_get_superblock(journal_t *journal) | |||
1240 | journal->j_format_version = 2; | 1240 | journal->j_format_version = 2; |
1241 | break; | 1241 | break; |
1242 | default: | 1242 | default: |
1243 | printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); | 1243 | printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n"); |
1244 | goto out; | 1244 | goto out; |
1245 | } | 1245 | } |
1246 | 1246 | ||
1247 | if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) | 1247 | if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) |
1248 | journal->j_maxlen = be32_to_cpu(sb->s_maxlen); | 1248 | journal->j_maxlen = be32_to_cpu(sb->s_maxlen); |
1249 | else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { | 1249 | else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { |
1250 | printk (KERN_WARNING "JBD: journal file too short\n"); | 1250 | printk(KERN_WARNING "JBD2: journal file too short\n"); |
1251 | goto out; | ||
1252 | } | ||
1253 | |||
1254 | if (be32_to_cpu(sb->s_first) == 0 || | ||
1255 | be32_to_cpu(sb->s_first) >= journal->j_maxlen) { | ||
1256 | printk(KERN_WARNING | ||
1257 | "JBD2: Invalid start block of journal: %u\n", | ||
1258 | be32_to_cpu(sb->s_first)); | ||
1251 | goto out; | 1259 | goto out; |
1252 | } | 1260 | } |
1253 | 1261 | ||
@@ -1310,8 +1318,8 @@ int jbd2_journal_load(journal_t *journal) | |||
1310 | ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || | 1318 | ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || |
1311 | (sb->s_feature_incompat & | 1319 | (sb->s_feature_incompat & |
1312 | ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { | 1320 | ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { |
1313 | printk (KERN_WARNING | 1321 | printk(KERN_WARNING |
1314 | "JBD: Unrecognised features on journal\n"); | 1322 | "JBD2: Unrecognised features on journal\n"); |
1315 | return -EINVAL; | 1323 | return -EINVAL; |
1316 | } | 1324 | } |
1317 | } | 1325 | } |
@@ -1346,7 +1354,7 @@ int jbd2_journal_load(journal_t *journal) | |||
1346 | return 0; | 1354 | return 0; |
1347 | 1355 | ||
1348 | recovery_error: | 1356 | recovery_error: |
1349 | printk (KERN_WARNING "JBD: recovery failed\n"); | 1357 | printk(KERN_WARNING "JBD2: recovery failed\n"); |
1350 | return -EIO; | 1358 | return -EIO; |
1351 | } | 1359 | } |
1352 | 1360 | ||
@@ -1577,7 +1585,7 @@ static int journal_convert_superblock_v1(journal_t *journal, | |||
1577 | struct buffer_head *bh; | 1585 | struct buffer_head *bh; |
1578 | 1586 | ||
1579 | printk(KERN_WARNING | 1587 | printk(KERN_WARNING |
1580 | "JBD: Converting superblock from version 1 to 2.\n"); | 1588 | "JBD2: Converting superblock from version 1 to 2.\n"); |
1581 | 1589 | ||
1582 | /* Pre-initialise new fields to zero */ | 1590 | /* Pre-initialise new fields to zero */ |
1583 | offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); | 1591 | offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); |
@@ -1694,7 +1702,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1694 | if (!journal->j_tail) | 1702 | if (!journal->j_tail) |
1695 | goto no_recovery; | 1703 | goto no_recovery; |
1696 | 1704 | ||
1697 | printk (KERN_WARNING "JBD: %s recovery information on journal\n", | 1705 | printk(KERN_WARNING "JBD2: %s recovery information on journal\n", |
1698 | write ? "Clearing" : "Ignoring"); | 1706 | write ? "Clearing" : "Ignoring"); |
1699 | 1707 | ||
1700 | err = jbd2_journal_skip_recovery(journal); | 1708 | err = jbd2_journal_skip_recovery(journal); |
@@ -2020,7 +2028,7 @@ static int journal_init_jbd2_journal_head_cache(void) | |||
2020 | retval = 0; | 2028 | retval = 0; |
2021 | if (!jbd2_journal_head_cache) { | 2029 | if (!jbd2_journal_head_cache) { |
2022 | retval = -ENOMEM; | 2030 | retval = -ENOMEM; |
2023 | printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); | 2031 | printk(KERN_EMERG "JBD2: no memory for journal_head cache\n"); |
2024 | } | 2032 | } |
2025 | return retval; | 2033 | return retval; |
2026 | } | 2034 | } |
@@ -2383,7 +2391,7 @@ static void __exit journal_exit(void) | |||
2383 | #ifdef CONFIG_JBD2_DEBUG | 2391 | #ifdef CONFIG_JBD2_DEBUG |
2384 | int n = atomic_read(&nr_journal_heads); | 2392 | int n = atomic_read(&nr_journal_heads); |
2385 | if (n) | 2393 | if (n) |
2386 | printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); | 2394 | printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); |
2387 | #endif | 2395 | #endif |
2388 | jbd2_remove_debugfs_entry(); | 2396 | jbd2_remove_debugfs_entry(); |
2389 | jbd2_remove_jbd_stats_proc_entry(); | 2397 | jbd2_remove_jbd_stats_proc_entry(); |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 1cad869494f..da6d7baf139 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -89,7 +89,7 @@ static int do_readahead(journal_t *journal, unsigned int start) | |||
89 | err = jbd2_journal_bmap(journal, next, &blocknr); | 89 | err = jbd2_journal_bmap(journal, next, &blocknr); |
90 | 90 | ||
91 | if (err) { | 91 | if (err) { |
92 | printk (KERN_ERR "JBD: bad block at offset %u\n", | 92 | printk(KERN_ERR "JBD2: bad block at offset %u\n", |
93 | next); | 93 | next); |
94 | goto failed; | 94 | goto failed; |
95 | } | 95 | } |
@@ -138,14 +138,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal, | |||
138 | *bhp = NULL; | 138 | *bhp = NULL; |
139 | 139 | ||
140 | if (offset >= journal->j_maxlen) { | 140 | if (offset >= journal->j_maxlen) { |
141 | printk(KERN_ERR "JBD: corrupted journal superblock\n"); | 141 | printk(KERN_ERR "JBD2: corrupted journal superblock\n"); |
142 | return -EIO; | 142 | return -EIO; |
143 | } | 143 | } |
144 | 144 | ||
145 | err = jbd2_journal_bmap(journal, offset, &blocknr); | 145 | err = jbd2_journal_bmap(journal, offset, &blocknr); |
146 | 146 | ||
147 | if (err) { | 147 | if (err) { |
148 | printk (KERN_ERR "JBD: bad block at offset %u\n", | 148 | printk(KERN_ERR "JBD2: bad block at offset %u\n", |
149 | offset); | 149 | offset); |
150 | return err; | 150 | return err; |
151 | } | 151 | } |
@@ -163,7 +163,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, | |||
163 | } | 163 | } |
164 | 164 | ||
165 | if (!buffer_uptodate(bh)) { | 165 | if (!buffer_uptodate(bh)) { |
166 | printk (KERN_ERR "JBD: Failed to read block at offset %u\n", | 166 | printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", |
167 | offset); | 167 | offset); |
168 | brelse(bh); | 168 | brelse(bh); |
169 | return -EIO; | 169 | return -EIO; |
@@ -251,10 +251,10 @@ int jbd2_journal_recover(journal_t *journal) | |||
251 | if (!err) | 251 | if (!err) |
252 | err = do_one_pass(journal, &info, PASS_REPLAY); | 252 | err = do_one_pass(journal, &info, PASS_REPLAY); |
253 | 253 | ||
254 | jbd_debug(1, "JBD: recovery, exit status %d, " | 254 | jbd_debug(1, "JBD2: recovery, exit status %d, " |
255 | "recovered transactions %u to %u\n", | 255 | "recovered transactions %u to %u\n", |
256 | err, info.start_transaction, info.end_transaction); | 256 | err, info.start_transaction, info.end_transaction); |
257 | jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", | 257 | jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", |
258 | info.nr_replays, info.nr_revoke_hits, info.nr_revokes); | 258 | info.nr_replays, info.nr_revoke_hits, info.nr_revokes); |
259 | 259 | ||
260 | /* Restart the log at the next transaction ID, thus invalidating | 260 | /* Restart the log at the next transaction ID, thus invalidating |
@@ -293,14 +293,14 @@ int jbd2_journal_skip_recovery(journal_t *journal) | |||
293 | err = do_one_pass(journal, &info, PASS_SCAN); | 293 | err = do_one_pass(journal, &info, PASS_SCAN); |
294 | 294 | ||
295 | if (err) { | 295 | if (err) { |
296 | printk(KERN_ERR "JBD: error %d scanning journal\n", err); | 296 | printk(KERN_ERR "JBD2: error %d scanning journal\n", err); |
297 | ++journal->j_transaction_sequence; | 297 | ++journal->j_transaction_sequence; |
298 | } else { | 298 | } else { |
299 | #ifdef CONFIG_JBD2_DEBUG | 299 | #ifdef CONFIG_JBD2_DEBUG |
300 | int dropped = info.end_transaction - | 300 | int dropped = info.end_transaction - |
301 | be32_to_cpu(journal->j_superblock->s_sequence); | 301 | be32_to_cpu(journal->j_superblock->s_sequence); |
302 | jbd_debug(1, | 302 | jbd_debug(1, |
303 | "JBD: ignoring %d transaction%s from the journal.\n", | 303 | "JBD2: ignoring %d transaction%s from the journal.\n", |
304 | dropped, (dropped == 1) ? "" : "s"); | 304 | dropped, (dropped == 1) ? "" : "s"); |
305 | #endif | 305 | #endif |
306 | journal->j_transaction_sequence = ++info.end_transaction; | 306 | journal->j_transaction_sequence = ++info.end_transaction; |
@@ -338,7 +338,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, | |||
338 | wrap(journal, *next_log_block); | 338 | wrap(journal, *next_log_block); |
339 | err = jread(&obh, journal, io_block); | 339 | err = jread(&obh, journal, io_block); |
340 | if (err) { | 340 | if (err) { |
341 | printk(KERN_ERR "JBD: IO error %d recovering block " | 341 | printk(KERN_ERR "JBD2: IO error %d recovering block " |
342 | "%lu in log\n", err, io_block); | 342 | "%lu in log\n", err, io_block); |
343 | return 1; | 343 | return 1; |
344 | } else { | 344 | } else { |
@@ -411,7 +411,7 @@ static int do_one_pass(journal_t *journal, | |||
411 | * either the next descriptor block or the final commit | 411 | * either the next descriptor block or the final commit |
412 | * record. */ | 412 | * record. */ |
413 | 413 | ||
414 | jbd_debug(3, "JBD: checking block %ld\n", next_log_block); | 414 | jbd_debug(3, "JBD2: checking block %ld\n", next_log_block); |
415 | err = jread(&bh, journal, next_log_block); | 415 | err = jread(&bh, journal, next_log_block); |
416 | if (err) | 416 | if (err) |
417 | goto failed; | 417 | goto failed; |
@@ -491,8 +491,8 @@ static int do_one_pass(journal_t *journal, | |||
491 | /* Recover what we can, but | 491 | /* Recover what we can, but |
492 | * report failure at the end. */ | 492 | * report failure at the end. */ |
493 | success = err; | 493 | success = err; |
494 | printk (KERN_ERR | 494 | printk(KERN_ERR |
495 | "JBD: IO error %d recovering " | 495 | "JBD2: IO error %d recovering " |
496 | "block %ld in log\n", | 496 | "block %ld in log\n", |
497 | err, io_block); | 497 | err, io_block); |
498 | } else { | 498 | } else { |
@@ -520,7 +520,7 @@ static int do_one_pass(journal_t *journal, | |||
520 | journal->j_blocksize); | 520 | journal->j_blocksize); |
521 | if (nbh == NULL) { | 521 | if (nbh == NULL) { |
522 | printk(KERN_ERR | 522 | printk(KERN_ERR |
523 | "JBD: Out of memory " | 523 | "JBD2: Out of memory " |
524 | "during recovery.\n"); | 524 | "during recovery.\n"); |
525 | err = -ENOMEM; | 525 | err = -ENOMEM; |
526 | brelse(bh); | 526 | brelse(bh); |
@@ -689,7 +689,7 @@ static int do_one_pass(journal_t *journal, | |||
689 | /* It's really bad news if different passes end up at | 689 | /* It's really bad news if different passes end up at |
690 | * different places (but possible due to IO errors). */ | 690 | * different places (but possible due to IO errors). */ |
691 | if (info->end_transaction != next_commit_ID) { | 691 | if (info->end_transaction != next_commit_ID) { |
692 | printk (KERN_ERR "JBD: recovery pass %d ended at " | 692 | printk(KERN_ERR "JBD2: recovery pass %d ended at " |
693 | "transaction %u, expected %u\n", | 693 | "transaction %u, expected %u\n", |
694 | pass, next_commit_ID, info->end_transaction); | 694 | pass, next_commit_ID, info->end_transaction); |
695 | if (!success) | 695 | if (!success) |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 2d7109414cd..a0e41a4c080 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | 28 | #include <linux/hrtimer.h> |
29 | #include <linux/backing-dev.h> | 29 | #include <linux/backing-dev.h> |
30 | #include <linux/bug.h> | ||
30 | #include <linux/module.h> | 31 | #include <linux/module.h> |
31 | 32 | ||
32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 33 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
@@ -115,7 +116,7 @@ static inline void update_t_max_wait(transaction_t *transaction, | |||
115 | */ | 116 | */ |
116 | 117 | ||
117 | static int start_this_handle(journal_t *journal, handle_t *handle, | 118 | static int start_this_handle(journal_t *journal, handle_t *handle, |
118 | int gfp_mask) | 119 | gfp_t gfp_mask) |
119 | { | 120 | { |
120 | transaction_t *transaction, *new_transaction = NULL; | 121 | transaction_t *transaction, *new_transaction = NULL; |
121 | tid_t tid; | 122 | tid_t tid; |
@@ -124,7 +125,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
124 | unsigned long ts = jiffies; | 125 | unsigned long ts = jiffies; |
125 | 126 | ||
126 | if (nblocks > journal->j_max_transaction_buffers) { | 127 | if (nblocks > journal->j_max_transaction_buffers) { |
127 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", | 128 | printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", |
128 | current->comm, nblocks, | 129 | current->comm, nblocks, |
129 | journal->j_max_transaction_buffers); | 130 | journal->j_max_transaction_buffers); |
130 | return -ENOSPC; | 131 | return -ENOSPC; |
@@ -320,7 +321,7 @@ static handle_t *new_handle(int nblocks) | |||
320 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value | 321 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value |
321 | * on failure. | 322 | * on failure. |
322 | */ | 323 | */ |
323 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) | 324 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask) |
324 | { | 325 | { |
325 | handle_t *handle = journal_current_handle(); | 326 | handle_t *handle = journal_current_handle(); |
326 | int err; | 327 | int err; |
@@ -443,7 +444,7 @@ out: | |||
443 | * transaction capabable of guaranteeing the requested number of | 444 | * transaction capabable of guaranteeing the requested number of |
444 | * credits. | 445 | * credits. |
445 | */ | 446 | */ |
446 | int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) | 447 | int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) |
447 | { | 448 | { |
448 | transaction_t *transaction = handle->h_transaction; | 449 | transaction_t *transaction = handle->h_transaction; |
449 | journal_t *journal = transaction->t_journal; | 450 | journal_t *journal = transaction->t_journal; |
@@ -563,7 +564,7 @@ static void warn_dirty_buffer(struct buffer_head *bh) | |||
563 | char b[BDEVNAME_SIZE]; | 564 | char b[BDEVNAME_SIZE]; |
564 | 565 | ||
565 | printk(KERN_WARNING | 566 | printk(KERN_WARNING |
566 | "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " | 567 | "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " |
567 | "There's a risk of filesystem corruption in case of system " | 568 | "There's a risk of filesystem corruption in case of system " |
568 | "crash.\n", | 569 | "crash.\n", |
569 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); | 570 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); |
@@ -1049,6 +1050,10 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, | |||
1049 | * mark dirty metadata which needs to be journaled as part of the current | 1050 | * mark dirty metadata which needs to be journaled as part of the current |
1050 | * transaction. | 1051 | * transaction. |
1051 | * | 1052 | * |
1053 | * The buffer must have previously had jbd2_journal_get_write_access() | ||
1054 | * called so that it has a valid journal_head attached to the buffer | ||
1055 | * head. | ||
1056 | * | ||
1052 | * The buffer is placed on the transaction's metadata list and is marked | 1057 | * The buffer is placed on the transaction's metadata list and is marked |
1053 | * as belonging to the transaction. | 1058 | * as belonging to the transaction. |
1054 | * | 1059 | * |
@@ -1065,11 +1070,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1065 | transaction_t *transaction = handle->h_transaction; | 1070 | transaction_t *transaction = handle->h_transaction; |
1066 | journal_t *journal = transaction->t_journal; | 1071 | journal_t *journal = transaction->t_journal; |
1067 | struct journal_head *jh = bh2jh(bh); | 1072 | struct journal_head *jh = bh2jh(bh); |
1073 | int ret = 0; | ||
1068 | 1074 | ||
1069 | jbd_debug(5, "journal_head %p\n", jh); | 1075 | jbd_debug(5, "journal_head %p\n", jh); |
1070 | JBUFFER_TRACE(jh, "entry"); | 1076 | JBUFFER_TRACE(jh, "entry"); |
1071 | if (is_handle_aborted(handle)) | 1077 | if (is_handle_aborted(handle)) |
1072 | goto out; | 1078 | goto out; |
1079 | if (!buffer_jbd(bh)) { | ||
1080 | ret = -EUCLEAN; | ||
1081 | goto out; | ||
1082 | } | ||
1073 | 1083 | ||
1074 | jbd_lock_bh_state(bh); | 1084 | jbd_lock_bh_state(bh); |
1075 | 1085 | ||
@@ -1093,8 +1103,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1093 | */ | 1103 | */ |
1094 | if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { | 1104 | if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { |
1095 | JBUFFER_TRACE(jh, "fastpath"); | 1105 | JBUFFER_TRACE(jh, "fastpath"); |
1096 | J_ASSERT_JH(jh, jh->b_transaction == | 1106 | if (unlikely(jh->b_transaction != |
1097 | journal->j_running_transaction); | 1107 | journal->j_running_transaction)) { |
1108 | printk(KERN_EMERG "JBD: %s: " | ||
1109 | "jh->b_transaction (%llu, %p, %u) != " | ||
1110 | "journal->j_running_transaction (%p, %u)", | ||
1111 | journal->j_devname, | ||
1112 | (unsigned long long) bh->b_blocknr, | ||
1113 | jh->b_transaction, | ||
1114 | jh->b_transaction ? jh->b_transaction->t_tid : 0, | ||
1115 | journal->j_running_transaction, | ||
1116 | journal->j_running_transaction ? | ||
1117 | journal->j_running_transaction->t_tid : 0); | ||
1118 | ret = -EINVAL; | ||
1119 | } | ||
1098 | goto out_unlock_bh; | 1120 | goto out_unlock_bh; |
1099 | } | 1121 | } |
1100 | 1122 | ||
@@ -1108,9 +1130,32 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1108 | */ | 1130 | */ |
1109 | if (jh->b_transaction != transaction) { | 1131 | if (jh->b_transaction != transaction) { |
1110 | JBUFFER_TRACE(jh, "already on other transaction"); | 1132 | JBUFFER_TRACE(jh, "already on other transaction"); |
1111 | J_ASSERT_JH(jh, jh->b_transaction == | 1133 | if (unlikely(jh->b_transaction != |
1112 | journal->j_committing_transaction); | 1134 | journal->j_committing_transaction)) { |
1113 | J_ASSERT_JH(jh, jh->b_next_transaction == transaction); | 1135 | printk(KERN_EMERG "JBD: %s: " |
1136 | "jh->b_transaction (%llu, %p, %u) != " | ||
1137 | "journal->j_committing_transaction (%p, %u)", | ||
1138 | journal->j_devname, | ||
1139 | (unsigned long long) bh->b_blocknr, | ||
1140 | jh->b_transaction, | ||
1141 | jh->b_transaction ? jh->b_transaction->t_tid : 0, | ||
1142 | journal->j_committing_transaction, | ||
1143 | journal->j_committing_transaction ? | ||
1144 | journal->j_committing_transaction->t_tid : 0); | ||
1145 | ret = -EINVAL; | ||
1146 | } | ||
1147 | if (unlikely(jh->b_next_transaction != transaction)) { | ||
1148 | printk(KERN_EMERG "JBD: %s: " | ||
1149 | "jh->b_next_transaction (%llu, %p, %u) != " | ||
1150 | "transaction (%p, %u)", | ||
1151 | journal->j_devname, | ||
1152 | (unsigned long long) bh->b_blocknr, | ||
1153 | jh->b_next_transaction, | ||
1154 | jh->b_next_transaction ? | ||
1155 | jh->b_next_transaction->t_tid : 0, | ||
1156 | transaction, transaction->t_tid); | ||
1157 | ret = -EINVAL; | ||
1158 | } | ||
1114 | /* And this case is illegal: we can't reuse another | 1159 | /* And this case is illegal: we can't reuse another |
1115 | * transaction's data buffer, ever. */ | 1160 | * transaction's data buffer, ever. */ |
1116 | goto out_unlock_bh; | 1161 | goto out_unlock_bh; |
@@ -1127,7 +1172,8 @@ out_unlock_bh: | |||
1127 | jbd_unlock_bh_state(bh); | 1172 | jbd_unlock_bh_state(bh); |
1128 | out: | 1173 | out: |
1129 | JBUFFER_TRACE(jh, "exit"); | 1174 | JBUFFER_TRACE(jh, "exit"); |
1130 | return 0; | 1175 | WARN_ON(ret); /* All errors are bugs, so dump the stack */ |
1176 | return ret; | ||
1131 | } | 1177 | } |
1132 | 1178 | ||
1133 | /* | 1179 | /* |