diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 18 | ||||
-rw-r--r-- | fs/ext4/dir.c | 158 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 189 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 58 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 29 | ||||
-rw-r--r-- | fs/ext4/extents.c | 214 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 144 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 5 | ||||
-rw-r--r-- | fs/ext4/file.c | 38 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 52 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 13 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 40 | ||||
-rw-r--r-- | fs/ext4/inline.c | 168 | ||||
-rw-r--r-- | fs/ext4/inode.c | 1791 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 6 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 32 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 3 | ||||
-rw-r--r-- | fs/ext4/namei.c | 54 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 336 | ||||
-rw-r--r-- | fs/ext4/resize.c | 24 | ||||
-rw-r--r-- | fs/ext4/super.c | 189 |
21 files changed, 1839 insertions, 1722 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d0f13eada0ed..ddd715e42a5c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb, | |||
38 | ext4_group_t group; | 38 | ext4_group_t group; |
39 | 39 | ||
40 | if (test_opt2(sb, STD_GROUP_SIZE)) | 40 | if (test_opt2(sb, STD_GROUP_SIZE)) |
41 | group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + | 41 | group = (block - |
42 | block) >> | 42 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >> |
43 | (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); | 43 | (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); |
44 | else | 44 | else |
45 | ext4_get_group_no_and_offset(sb, block, &group, NULL); | 45 | ext4_get_group_no_and_offset(sb, block, &group, NULL); |
@@ -682,11 +682,15 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) | |||
682 | 682 | ||
683 | static inline int test_root(ext4_group_t a, int b) | 683 | static inline int test_root(ext4_group_t a, int b) |
684 | { | 684 | { |
685 | int num = b; | 685 | while (1) { |
686 | 686 | if (a < b) | |
687 | while (a > num) | 687 | return 0; |
688 | num *= b; | 688 | if (a == b) |
689 | return num == a; | 689 | return 1; |
690 | if ((a % b) != 0) | ||
691 | return 0; | ||
692 | a = a / b; | ||
693 | } | ||
690 | } | 694 | } |
691 | 695 | ||
692 | static int ext4_group_sparse(ext4_group_t group) | 696 | static int ext4_group_sparse(ext4_group_t group) |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index f8d56e4254e0..3c7d288ae94c 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -29,8 +29,7 @@ | |||
29 | #include "ext4.h" | 29 | #include "ext4.h" |
30 | #include "xattr.h" | 30 | #include "xattr.h" |
31 | 31 | ||
32 | static int ext4_dx_readdir(struct file *filp, | 32 | static int ext4_dx_readdir(struct file *, struct dir_context *); |
33 | void *dirent, filldir_t filldir); | ||
34 | 33 | ||
35 | /** | 34 | /** |
36 | * Check if the given dir-inode refers to an htree-indexed directory | 35 | * Check if the given dir-inode refers to an htree-indexed directory |
@@ -103,60 +102,56 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, | |||
103 | return 1; | 102 | return 1; |
104 | } | 103 | } |
105 | 104 | ||
106 | static int ext4_readdir(struct file *filp, | 105 | static int ext4_readdir(struct file *file, struct dir_context *ctx) |
107 | void *dirent, filldir_t filldir) | ||
108 | { | 106 | { |
109 | int error = 0; | ||
110 | unsigned int offset; | 107 | unsigned int offset; |
111 | int i, stored; | 108 | int i, stored; |
112 | struct ext4_dir_entry_2 *de; | 109 | struct ext4_dir_entry_2 *de; |
113 | int err; | 110 | int err; |
114 | struct inode *inode = file_inode(filp); | 111 | struct inode *inode = file_inode(file); |
115 | struct super_block *sb = inode->i_sb; | 112 | struct super_block *sb = inode->i_sb; |
116 | int ret = 0; | ||
117 | int dir_has_error = 0; | 113 | int dir_has_error = 0; |
118 | 114 | ||
119 | if (is_dx_dir(inode)) { | 115 | if (is_dx_dir(inode)) { |
120 | err = ext4_dx_readdir(filp, dirent, filldir); | 116 | err = ext4_dx_readdir(file, ctx); |
121 | if (err != ERR_BAD_DX_DIR) { | 117 | if (err != ERR_BAD_DX_DIR) { |
122 | ret = err; | 118 | return err; |
123 | goto out; | ||
124 | } | 119 | } |
125 | /* | 120 | /* |
126 | * We don't set the inode dirty flag since it's not | 121 | * We don't set the inode dirty flag since it's not |
127 | * critical that it get flushed back to the disk. | 122 | * critical that it get flushed back to the disk. |
128 | */ | 123 | */ |
129 | ext4_clear_inode_flag(file_inode(filp), | 124 | ext4_clear_inode_flag(file_inode(file), |
130 | EXT4_INODE_INDEX); | 125 | EXT4_INODE_INDEX); |
131 | } | 126 | } |
132 | 127 | ||
133 | if (ext4_has_inline_data(inode)) { | 128 | if (ext4_has_inline_data(inode)) { |
134 | int has_inline_data = 1; | 129 | int has_inline_data = 1; |
135 | ret = ext4_read_inline_dir(filp, dirent, filldir, | 130 | int ret = ext4_read_inline_dir(file, ctx, |
136 | &has_inline_data); | 131 | &has_inline_data); |
137 | if (has_inline_data) | 132 | if (has_inline_data) |
138 | return ret; | 133 | return ret; |
139 | } | 134 | } |
140 | 135 | ||
141 | stored = 0; | 136 | stored = 0; |
142 | offset = filp->f_pos & (sb->s_blocksize - 1); | 137 | offset = ctx->pos & (sb->s_blocksize - 1); |
143 | 138 | ||
144 | while (!error && !stored && filp->f_pos < inode->i_size) { | 139 | while (ctx->pos < inode->i_size) { |
145 | struct ext4_map_blocks map; | 140 | struct ext4_map_blocks map; |
146 | struct buffer_head *bh = NULL; | 141 | struct buffer_head *bh = NULL; |
147 | 142 | ||
148 | map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); | 143 | map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); |
149 | map.m_len = 1; | 144 | map.m_len = 1; |
150 | err = ext4_map_blocks(NULL, inode, &map, 0); | 145 | err = ext4_map_blocks(NULL, inode, &map, 0); |
151 | if (err > 0) { | 146 | if (err > 0) { |
152 | pgoff_t index = map.m_pblk >> | 147 | pgoff_t index = map.m_pblk >> |
153 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 148 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
154 | if (!ra_has_index(&filp->f_ra, index)) | 149 | if (!ra_has_index(&file->f_ra, index)) |
155 | page_cache_sync_readahead( | 150 | page_cache_sync_readahead( |
156 | sb->s_bdev->bd_inode->i_mapping, | 151 | sb->s_bdev->bd_inode->i_mapping, |
157 | &filp->f_ra, filp, | 152 | &file->f_ra, file, |
158 | index, 1); | 153 | index, 1); |
159 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 154 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
160 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); | 155 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); |
161 | } | 156 | } |
162 | 157 | ||
@@ -166,16 +161,16 @@ static int ext4_readdir(struct file *filp, | |||
166 | */ | 161 | */ |
167 | if (!bh) { | 162 | if (!bh) { |
168 | if (!dir_has_error) { | 163 | if (!dir_has_error) { |
169 | EXT4_ERROR_FILE(filp, 0, | 164 | EXT4_ERROR_FILE(file, 0, |
170 | "directory contains a " | 165 | "directory contains a " |
171 | "hole at offset %llu", | 166 | "hole at offset %llu", |
172 | (unsigned long long) filp->f_pos); | 167 | (unsigned long long) ctx->pos); |
173 | dir_has_error = 1; | 168 | dir_has_error = 1; |
174 | } | 169 | } |
175 | /* corrupt size? Maybe no more blocks to read */ | 170 | /* corrupt size? Maybe no more blocks to read */ |
176 | if (filp->f_pos > inode->i_blocks << 9) | 171 | if (ctx->pos > inode->i_blocks << 9) |
177 | break; | 172 | break; |
178 | filp->f_pos += sb->s_blocksize - offset; | 173 | ctx->pos += sb->s_blocksize - offset; |
179 | continue; | 174 | continue; |
180 | } | 175 | } |
181 | 176 | ||
@@ -183,21 +178,20 @@ static int ext4_readdir(struct file *filp, | |||
183 | if (!buffer_verified(bh) && | 178 | if (!buffer_verified(bh) && |
184 | !ext4_dirent_csum_verify(inode, | 179 | !ext4_dirent_csum_verify(inode, |
185 | (struct ext4_dir_entry *)bh->b_data)) { | 180 | (struct ext4_dir_entry *)bh->b_data)) { |
186 | EXT4_ERROR_FILE(filp, 0, "directory fails checksum " | 181 | EXT4_ERROR_FILE(file, 0, "directory fails checksum " |
187 | "at offset %llu", | 182 | "at offset %llu", |
188 | (unsigned long long)filp->f_pos); | 183 | (unsigned long long)ctx->pos); |
189 | filp->f_pos += sb->s_blocksize - offset; | 184 | ctx->pos += sb->s_blocksize - offset; |
190 | brelse(bh); | 185 | brelse(bh); |
191 | continue; | 186 | continue; |
192 | } | 187 | } |
193 | set_buffer_verified(bh); | 188 | set_buffer_verified(bh); |
194 | 189 | ||
195 | revalidate: | ||
196 | /* If the dir block has changed since the last call to | 190 | /* If the dir block has changed since the last call to |
197 | * readdir(2), then we might be pointing to an invalid | 191 | * readdir(2), then we might be pointing to an invalid |
198 | * dirent right now. Scan from the start of the block | 192 | * dirent right now. Scan from the start of the block |
199 | * to make sure. */ | 193 | * to make sure. */ |
200 | if (filp->f_version != inode->i_version) { | 194 | if (file->f_version != inode->i_version) { |
201 | for (i = 0; i < sb->s_blocksize && i < offset; ) { | 195 | for (i = 0; i < sb->s_blocksize && i < offset; ) { |
202 | de = (struct ext4_dir_entry_2 *) | 196 | de = (struct ext4_dir_entry_2 *) |
203 | (bh->b_data + i); | 197 | (bh->b_data + i); |
@@ -214,57 +208,46 @@ revalidate: | |||
214 | sb->s_blocksize); | 208 | sb->s_blocksize); |
215 | } | 209 | } |
216 | offset = i; | 210 | offset = i; |
217 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | 211 | ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) |
218 | | offset; | 212 | | offset; |
219 | filp->f_version = inode->i_version; | 213 | file->f_version = inode->i_version; |
220 | } | 214 | } |
221 | 215 | ||
222 | while (!error && filp->f_pos < inode->i_size | 216 | while (ctx->pos < inode->i_size |
223 | && offset < sb->s_blocksize) { | 217 | && offset < sb->s_blocksize) { |
224 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 218 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
225 | if (ext4_check_dir_entry(inode, filp, de, bh, | 219 | if (ext4_check_dir_entry(inode, file, de, bh, |
226 | bh->b_data, bh->b_size, | 220 | bh->b_data, bh->b_size, |
227 | offset)) { | 221 | offset)) { |
228 | /* | 222 | /* |
229 | * On error, skip the f_pos to the next block | 223 | * On error, skip to the next block |
230 | */ | 224 | */ |
231 | filp->f_pos = (filp->f_pos | | 225 | ctx->pos = (ctx->pos | |
232 | (sb->s_blocksize - 1)) + 1; | 226 | (sb->s_blocksize - 1)) + 1; |
233 | brelse(bh); | 227 | break; |
234 | ret = stored; | ||
235 | goto out; | ||
236 | } | 228 | } |
237 | offset += ext4_rec_len_from_disk(de->rec_len, | 229 | offset += ext4_rec_len_from_disk(de->rec_len, |
238 | sb->s_blocksize); | 230 | sb->s_blocksize); |
239 | if (le32_to_cpu(de->inode)) { | 231 | if (le32_to_cpu(de->inode)) { |
240 | /* We might block in the next section | 232 | if (!dir_emit(ctx, de->name, |
241 | * if the data destination is | ||
242 | * currently swapped out. So, use a | ||
243 | * version stamp to detect whether or | ||
244 | * not the directory has been modified | ||
245 | * during the copy operation. | ||
246 | */ | ||
247 | u64 version = filp->f_version; | ||
248 | |||
249 | error = filldir(dirent, de->name, | ||
250 | de->name_len, | 233 | de->name_len, |
251 | filp->f_pos, | ||
252 | le32_to_cpu(de->inode), | 234 | le32_to_cpu(de->inode), |
253 | get_dtype(sb, de->file_type)); | 235 | get_dtype(sb, de->file_type))) { |
254 | if (error) | 236 | brelse(bh); |
255 | break; | 237 | return 0; |
256 | if (version != filp->f_version) | 238 | } |
257 | goto revalidate; | ||
258 | stored++; | ||
259 | } | 239 | } |
260 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len, | 240 | ctx->pos += ext4_rec_len_from_disk(de->rec_len, |
261 | sb->s_blocksize); | 241 | sb->s_blocksize); |
262 | } | 242 | } |
263 | offset = 0; | 243 | offset = 0; |
264 | brelse(bh); | 244 | brelse(bh); |
245 | if (ctx->pos < inode->i_size) { | ||
246 | if (!dir_relax(inode)) | ||
247 | return 0; | ||
248 | } | ||
265 | } | 249 | } |
266 | out: | 250 | return 0; |
267 | return ret; | ||
268 | } | 251 | } |
269 | 252 | ||
270 | static inline int is_32bit_api(void) | 253 | static inline int is_32bit_api(void) |
@@ -492,16 +475,12 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
492 | * for all entres on the fname linked list. (Normally there is only | 475 | * for all entres on the fname linked list. (Normally there is only |
493 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 476 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
494 | */ | 477 | */ |
495 | static int call_filldir(struct file *filp, void *dirent, | 478 | static int call_filldir(struct file *file, struct dir_context *ctx, |
496 | filldir_t filldir, struct fname *fname) | 479 | struct fname *fname) |
497 | { | 480 | { |
498 | struct dir_private_info *info = filp->private_data; | 481 | struct dir_private_info *info = file->private_data; |
499 | loff_t curr_pos; | 482 | struct inode *inode = file_inode(file); |
500 | struct inode *inode = file_inode(filp); | 483 | struct super_block *sb = inode->i_sb; |
501 | struct super_block *sb; | ||
502 | int error; | ||
503 | |||
504 | sb = inode->i_sb; | ||
505 | 484 | ||
506 | if (!fname) { | 485 | if (!fname) { |
507 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " | 486 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " |
@@ -509,47 +488,44 @@ static int call_filldir(struct file *filp, void *dirent, | |||
509 | inode->i_ino, current->comm); | 488 | inode->i_ino, current->comm); |
510 | return 0; | 489 | return 0; |
511 | } | 490 | } |
512 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); | 491 | ctx->pos = hash2pos(file, fname->hash, fname->minor_hash); |
513 | while (fname) { | 492 | while (fname) { |
514 | error = filldir(dirent, fname->name, | 493 | if (!dir_emit(ctx, fname->name, |
515 | fname->name_len, curr_pos, | 494 | fname->name_len, |
516 | fname->inode, | 495 | fname->inode, |
517 | get_dtype(sb, fname->file_type)); | 496 | get_dtype(sb, fname->file_type))) { |
518 | if (error) { | ||
519 | filp->f_pos = curr_pos; | ||
520 | info->extra_fname = fname; | 497 | info->extra_fname = fname; |
521 | return error; | 498 | return 1; |
522 | } | 499 | } |
523 | fname = fname->next; | 500 | fname = fname->next; |
524 | } | 501 | } |
525 | return 0; | 502 | return 0; |
526 | } | 503 | } |
527 | 504 | ||
528 | static int ext4_dx_readdir(struct file *filp, | 505 | static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) |
529 | void *dirent, filldir_t filldir) | ||
530 | { | 506 | { |
531 | struct dir_private_info *info = filp->private_data; | 507 | struct dir_private_info *info = file->private_data; |
532 | struct inode *inode = file_inode(filp); | 508 | struct inode *inode = file_inode(file); |
533 | struct fname *fname; | 509 | struct fname *fname; |
534 | int ret; | 510 | int ret; |
535 | 511 | ||
536 | if (!info) { | 512 | if (!info) { |
537 | info = ext4_htree_create_dir_info(filp, filp->f_pos); | 513 | info = ext4_htree_create_dir_info(file, ctx->pos); |
538 | if (!info) | 514 | if (!info) |
539 | return -ENOMEM; | 515 | return -ENOMEM; |
540 | filp->private_data = info; | 516 | file->private_data = info; |
541 | } | 517 | } |
542 | 518 | ||
543 | if (filp->f_pos == ext4_get_htree_eof(filp)) | 519 | if (ctx->pos == ext4_get_htree_eof(file)) |
544 | return 0; /* EOF */ | 520 | return 0; /* EOF */ |
545 | 521 | ||
546 | /* Some one has messed with f_pos; reset the world */ | 522 | /* Some one has messed with f_pos; reset the world */ |
547 | if (info->last_pos != filp->f_pos) { | 523 | if (info->last_pos != ctx->pos) { |
548 | free_rb_tree_fname(&info->root); | 524 | free_rb_tree_fname(&info->root); |
549 | info->curr_node = NULL; | 525 | info->curr_node = NULL; |
550 | info->extra_fname = NULL; | 526 | info->extra_fname = NULL; |
551 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); | 527 | info->curr_hash = pos2maj_hash(file, ctx->pos); |
552 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); | 528 | info->curr_minor_hash = pos2min_hash(file, ctx->pos); |
553 | } | 529 | } |
554 | 530 | ||
555 | /* | 531 | /* |
@@ -557,7 +533,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
557 | * chain, return them first. | 533 | * chain, return them first. |
558 | */ | 534 | */ |
559 | if (info->extra_fname) { | 535 | if (info->extra_fname) { |
560 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) | 536 | if (call_filldir(file, ctx, info->extra_fname)) |
561 | goto finished; | 537 | goto finished; |
562 | info->extra_fname = NULL; | 538 | info->extra_fname = NULL; |
563 | goto next_node; | 539 | goto next_node; |
@@ -571,17 +547,17 @@ static int ext4_dx_readdir(struct file *filp, | |||
571 | * cached entries. | 547 | * cached entries. |
572 | */ | 548 | */ |
573 | if ((!info->curr_node) || | 549 | if ((!info->curr_node) || |
574 | (filp->f_version != inode->i_version)) { | 550 | (file->f_version != inode->i_version)) { |
575 | info->curr_node = NULL; | 551 | info->curr_node = NULL; |
576 | free_rb_tree_fname(&info->root); | 552 | free_rb_tree_fname(&info->root); |
577 | filp->f_version = inode->i_version; | 553 | file->f_version = inode->i_version; |
578 | ret = ext4_htree_fill_tree(filp, info->curr_hash, | 554 | ret = ext4_htree_fill_tree(file, info->curr_hash, |
579 | info->curr_minor_hash, | 555 | info->curr_minor_hash, |
580 | &info->next_hash); | 556 | &info->next_hash); |
581 | if (ret < 0) | 557 | if (ret < 0) |
582 | return ret; | 558 | return ret; |
583 | if (ret == 0) { | 559 | if (ret == 0) { |
584 | filp->f_pos = ext4_get_htree_eof(filp); | 560 | ctx->pos = ext4_get_htree_eof(file); |
585 | break; | 561 | break; |
586 | } | 562 | } |
587 | info->curr_node = rb_first(&info->root); | 563 | info->curr_node = rb_first(&info->root); |
@@ -590,7 +566,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
590 | fname = rb_entry(info->curr_node, struct fname, rb_hash); | 566 | fname = rb_entry(info->curr_node, struct fname, rb_hash); |
591 | info->curr_hash = fname->hash; | 567 | info->curr_hash = fname->hash; |
592 | info->curr_minor_hash = fname->minor_hash; | 568 | info->curr_minor_hash = fname->minor_hash; |
593 | if (call_filldir(filp, dirent, filldir, fname)) | 569 | if (call_filldir(file, ctx, fname)) |
594 | break; | 570 | break; |
595 | next_node: | 571 | next_node: |
596 | info->curr_node = rb_next(info->curr_node); | 572 | info->curr_node = rb_next(info->curr_node); |
@@ -601,7 +577,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
601 | info->curr_minor_hash = fname->minor_hash; | 577 | info->curr_minor_hash = fname->minor_hash; |
602 | } else { | 578 | } else { |
603 | if (info->next_hash == ~0) { | 579 | if (info->next_hash == ~0) { |
604 | filp->f_pos = ext4_get_htree_eof(filp); | 580 | ctx->pos = ext4_get_htree_eof(file); |
605 | break; | 581 | break; |
606 | } | 582 | } |
607 | info->curr_hash = info->next_hash; | 583 | info->curr_hash = info->next_hash; |
@@ -609,7 +585,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
609 | } | 585 | } |
610 | } | 586 | } |
611 | finished: | 587 | finished: |
612 | info->last_pos = filp->f_pos; | 588 | info->last_pos = ctx->pos; |
613 | return 0; | 589 | return 0; |
614 | } | 590 | } |
615 | 591 | ||
@@ -624,7 +600,7 @@ static int ext4_release_dir(struct inode *inode, struct file *filp) | |||
624 | const struct file_operations ext4_dir_operations = { | 600 | const struct file_operations ext4_dir_operations = { |
625 | .llseek = ext4_dir_llseek, | 601 | .llseek = ext4_dir_llseek, |
626 | .read = generic_read_dir, | 602 | .read = generic_read_dir, |
627 | .readdir = ext4_readdir, | 603 | .iterate = ext4_readdir, |
628 | .unlocked_ioctl = ext4_ioctl, | 604 | .unlocked_ioctl = ext4_ioctl, |
629 | #ifdef CONFIG_COMPAT | 605 | #ifdef CONFIG_COMPAT |
630 | .compat_ioctl = ext4_compat_ioctl, | 606 | .compat_ioctl = ext4_compat_ioctl, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5aae3d12d400..b577e45425b0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -177,38 +177,28 @@ struct ext4_map_blocks { | |||
177 | }; | 177 | }; |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * For delayed allocation tracking | ||
181 | */ | ||
182 | struct mpage_da_data { | ||
183 | struct inode *inode; | ||
184 | sector_t b_blocknr; /* start block number of extent */ | ||
185 | size_t b_size; /* size of extent */ | ||
186 | unsigned long b_state; /* state of the extent */ | ||
187 | unsigned long first_page, next_page; /* extent of pages */ | ||
188 | struct writeback_control *wbc; | ||
189 | int io_done; | ||
190 | int pages_written; | ||
191 | int retval; | ||
192 | }; | ||
193 | |||
194 | /* | ||
195 | * Flags for ext4_io_end->flags | 180 | * Flags for ext4_io_end->flags |
196 | */ | 181 | */ |
197 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 182 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
198 | #define EXT4_IO_END_ERROR 0x0002 | 183 | #define EXT4_IO_END_DIRECT 0x0002 |
199 | #define EXT4_IO_END_DIRECT 0x0004 | ||
200 | 184 | ||
201 | /* | 185 | /* |
202 | * For converting uninitialized extents on a work queue. | 186 | * For converting uninitialized extents on a work queue. 'handle' is used for |
187 | * buffered writeback. | ||
203 | */ | 188 | */ |
204 | typedef struct ext4_io_end { | 189 | typedef struct ext4_io_end { |
205 | struct list_head list; /* per-file finished IO list */ | 190 | struct list_head list; /* per-file finished IO list */ |
191 | handle_t *handle; /* handle reserved for extent | ||
192 | * conversion */ | ||
206 | struct inode *inode; /* file being written to */ | 193 | struct inode *inode; /* file being written to */ |
194 | struct bio *bio; /* Linked list of completed | ||
195 | * bios covering the extent */ | ||
207 | unsigned int flag; /* unwritten or not */ | 196 | unsigned int flag; /* unwritten or not */ |
208 | loff_t offset; /* offset in the file */ | 197 | loff_t offset; /* offset in the file */ |
209 | ssize_t size; /* size of the extent */ | 198 | ssize_t size; /* size of the extent */ |
210 | struct kiocb *iocb; /* iocb struct for AIO */ | 199 | struct kiocb *iocb; /* iocb struct for AIO */ |
211 | int result; /* error value for AIO */ | 200 | int result; /* error value for AIO */ |
201 | atomic_t count; /* reference counter */ | ||
212 | } ext4_io_end_t; | 202 | } ext4_io_end_t; |
213 | 203 | ||
214 | struct ext4_io_submit { | 204 | struct ext4_io_submit { |
@@ -581,11 +571,6 @@ enum { | |||
581 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | 571 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 |
582 | 572 | ||
583 | /* | 573 | /* |
584 | * Flags used by ext4_discard_partial_page_buffers | ||
585 | */ | ||
586 | #define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001 | ||
587 | |||
588 | /* | ||
589 | * ioctl commands | 574 | * ioctl commands |
590 | */ | 575 | */ |
591 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS | 576 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS |
@@ -879,6 +864,7 @@ struct ext4_inode_info { | |||
879 | rwlock_t i_es_lock; | 864 | rwlock_t i_es_lock; |
880 | struct list_head i_es_lru; | 865 | struct list_head i_es_lru; |
881 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 866 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ |
867 | unsigned long i_touch_when; /* jiffies of last accessing */ | ||
882 | 868 | ||
883 | /* ialloc */ | 869 | /* ialloc */ |
884 | ext4_group_t i_last_alloc_group; | 870 | ext4_group_t i_last_alloc_group; |
@@ -903,12 +889,22 @@ struct ext4_inode_info { | |||
903 | qsize_t i_reserved_quota; | 889 | qsize_t i_reserved_quota; |
904 | #endif | 890 | #endif |
905 | 891 | ||
906 | /* completed IOs that might need unwritten extents handling */ | 892 | /* Lock protecting lists below */ |
907 | struct list_head i_completed_io_list; | ||
908 | spinlock_t i_completed_io_lock; | 893 | spinlock_t i_completed_io_lock; |
894 | /* | ||
895 | * Completed IOs that need unwritten extents handling and have | ||
896 | * transaction reserved | ||
897 | */ | ||
898 | struct list_head i_rsv_conversion_list; | ||
899 | /* | ||
900 | * Completed IOs that need unwritten extents handling and don't have | ||
901 | * transaction reserved | ||
902 | */ | ||
903 | struct list_head i_unrsv_conversion_list; | ||
909 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 904 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
910 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ | 905 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ |
911 | struct work_struct i_unwritten_work; /* deferred extent conversion */ | 906 | struct work_struct i_rsv_conversion_work; |
907 | struct work_struct i_unrsv_conversion_work; | ||
912 | 908 | ||
913 | spinlock_t i_block_reservation_lock; | 909 | spinlock_t i_block_reservation_lock; |
914 | 910 | ||
@@ -1245,7 +1241,6 @@ struct ext4_sb_info { | |||
1245 | unsigned int s_mb_stats; | 1241 | unsigned int s_mb_stats; |
1246 | unsigned int s_mb_order2_reqs; | 1242 | unsigned int s_mb_order2_reqs; |
1247 | unsigned int s_mb_group_prealloc; | 1243 | unsigned int s_mb_group_prealloc; |
1248 | unsigned int s_max_writeback_mb_bump; | ||
1249 | unsigned int s_max_dir_size_kb; | 1244 | unsigned int s_max_dir_size_kb; |
1250 | /* where last allocation was done - for stream allocation */ | 1245 | /* where last allocation was done - for stream allocation */ |
1251 | unsigned long s_mb_last_group; | 1246 | unsigned long s_mb_last_group; |
@@ -1281,8 +1276,10 @@ struct ext4_sb_info { | |||
1281 | struct flex_groups *s_flex_groups; | 1276 | struct flex_groups *s_flex_groups; |
1282 | ext4_group_t s_flex_groups_allocated; | 1277 | ext4_group_t s_flex_groups_allocated; |
1283 | 1278 | ||
1284 | /* workqueue for dio unwritten */ | 1279 | /* workqueue for unreserved extent convertions (dio) */ |
1285 | struct workqueue_struct *dio_unwritten_wq; | 1280 | struct workqueue_struct *unrsv_conversion_wq; |
1281 | /* workqueue for reserved extent conversions (buffered io) */ | ||
1282 | struct workqueue_struct *rsv_conversion_wq; | ||
1286 | 1283 | ||
1287 | /* timer for periodic error stats printing */ | 1284 | /* timer for periodic error stats printing */ |
1288 | struct timer_list s_err_report; | 1285 | struct timer_list s_err_report; |
@@ -1307,6 +1304,7 @@ struct ext4_sb_info { | |||
1307 | /* Reclaim extents from extent status tree */ | 1304 | /* Reclaim extents from extent status tree */ |
1308 | struct shrinker s_es_shrinker; | 1305 | struct shrinker s_es_shrinker; |
1309 | struct list_head s_es_lru; | 1306 | struct list_head s_es_lru; |
1307 | unsigned long s_es_last_sorted; | ||
1310 | struct percpu_counter s_extent_cache_cnt; | 1308 | struct percpu_counter s_extent_cache_cnt; |
1311 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1309 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; |
1312 | }; | 1310 | }; |
@@ -1342,6 +1340,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, | |||
1342 | struct ext4_io_end *io_end) | 1340 | struct ext4_io_end *io_end) |
1343 | { | 1341 | { |
1344 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1342 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
1343 | /* Writeback has to have coversion transaction reserved */ | ||
1344 | WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle && | ||
1345 | !(io_end->flag & EXT4_IO_END_DIRECT)); | ||
1345 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1346 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
1346 | atomic_inc(&EXT4_I(inode)->i_unwritten); | 1347 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
1347 | } | 1348 | } |
@@ -1999,7 +2000,6 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype) | |||
1999 | 2000 | ||
2000 | /* fsync.c */ | 2001 | /* fsync.c */ |
2001 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); | 2002 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); |
2002 | extern int ext4_flush_unwritten_io(struct inode *); | ||
2003 | 2003 | ||
2004 | /* hash.c */ | 2004 | /* hash.c */ |
2005 | extern int ext4fs_dirhash(const char *name, int len, struct | 2005 | extern int ext4fs_dirhash(const char *name, int len, struct |
@@ -2088,7 +2088,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
2088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 2088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
2089 | extern int ext4_can_truncate(struct inode *inode); | 2089 | extern int ext4_can_truncate(struct inode *inode); |
2090 | extern void ext4_truncate(struct inode *); | 2090 | extern void ext4_truncate(struct inode *); |
2091 | extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); | 2091 | extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); |
2092 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | 2092 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); |
2093 | extern void ext4_set_inode_flags(struct inode *); | 2093 | extern void ext4_set_inode_flags(struct inode *); |
2094 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 2094 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
@@ -2096,9 +2096,12 @@ extern int ext4_alloc_da_blocks(struct inode *inode); | |||
2096 | extern void ext4_set_aops(struct inode *inode); | 2096 | extern void ext4_set_aops(struct inode *inode); |
2097 | extern int ext4_writepage_trans_blocks(struct inode *); | 2097 | extern int ext4_writepage_trans_blocks(struct inode *); |
2098 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 2098 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
2099 | extern int ext4_discard_partial_page_buffers(handle_t *handle, | 2099 | extern int ext4_block_truncate_page(handle_t *handle, |
2100 | struct address_space *mapping, loff_t from, | 2100 | struct address_space *mapping, loff_t from); |
2101 | loff_t length, int flags); | 2101 | extern int ext4_block_zero_page_range(handle_t *handle, |
2102 | struct address_space *mapping, loff_t from, loff_t length); | ||
2103 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | ||
2104 | loff_t lstart, loff_t lend); | ||
2102 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2105 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2103 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 2106 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
2104 | extern void ext4_da_update_reserve_space(struct inode *inode, | 2107 | extern void ext4_da_update_reserve_space(struct inode *inode, |
@@ -2111,7 +2114,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
2111 | const struct iovec *iov, loff_t offset, | 2114 | const struct iovec *iov, loff_t offset, |
2112 | unsigned long nr_segs); | 2115 | unsigned long nr_segs); |
2113 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | 2116 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); |
2114 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); | 2117 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); |
2115 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); | 2118 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); |
2116 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, | 2119 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, |
2117 | ext4_lblk_t first, ext4_lblk_t stop); | 2120 | ext4_lblk_t first, ext4_lblk_t stop); |
@@ -2166,42 +2169,96 @@ extern int ext4_alloc_flex_bg_array(struct super_block *sb, | |||
2166 | ext4_group_t ngroup); | 2169 | ext4_group_t ngroup); |
2167 | extern const char *ext4_decode_error(struct super_block *sb, int errno, | 2170 | extern const char *ext4_decode_error(struct super_block *sb, int errno, |
2168 | char nbuf[16]); | 2171 | char nbuf[16]); |
2172 | |||
2169 | extern __printf(4, 5) | 2173 | extern __printf(4, 5) |
2170 | void __ext4_error(struct super_block *, const char *, unsigned int, | 2174 | void __ext4_error(struct super_block *, const char *, unsigned int, |
2171 | const char *, ...); | 2175 | const char *, ...); |
2172 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, \ | ||
2173 | __LINE__, ## message) | ||
2174 | extern __printf(5, 6) | 2176 | extern __printf(5, 6) |
2175 | void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, | 2177 | void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, |
2176 | const char *, ...); | 2178 | const char *, ...); |
2177 | extern __printf(5, 6) | 2179 | extern __printf(5, 6) |
2178 | void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, | 2180 | void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, |
2179 | const char *, ...); | 2181 | const char *, ...); |
2180 | extern void __ext4_std_error(struct super_block *, const char *, | 2182 | extern void __ext4_std_error(struct super_block *, const char *, |
2181 | unsigned int, int); | 2183 | unsigned int, int); |
2182 | extern __printf(4, 5) | 2184 | extern __printf(4, 5) |
2183 | void __ext4_abort(struct super_block *, const char *, unsigned int, | 2185 | void __ext4_abort(struct super_block *, const char *, unsigned int, |
2184 | const char *, ...); | 2186 | const char *, ...); |
2185 | #define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ | ||
2186 | __LINE__, ## message) | ||
2187 | extern __printf(4, 5) | 2187 | extern __printf(4, 5) |
2188 | void __ext4_warning(struct super_block *, const char *, unsigned int, | 2188 | void __ext4_warning(struct super_block *, const char *, unsigned int, |
2189 | const char *, ...); | 2189 | const char *, ...); |
2190 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ | ||
2191 | __LINE__, ## message) | ||
2192 | extern __printf(3, 4) | 2190 | extern __printf(3, 4) |
2193 | void ext4_msg(struct super_block *, const char *, const char *, ...); | 2191 | void __ext4_msg(struct super_block *, const char *, const char *, ...); |
2194 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, | 2192 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, |
2195 | const char *, unsigned int, const char *); | 2193 | const char *, unsigned int, const char *); |
2196 | #define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ | ||
2197 | __LINE__, msg) | ||
2198 | extern __printf(7, 8) | 2194 | extern __printf(7, 8) |
2199 | void __ext4_grp_locked_error(const char *, unsigned int, | 2195 | void __ext4_grp_locked_error(const char *, unsigned int, |
2200 | struct super_block *, ext4_group_t, | 2196 | struct super_block *, ext4_group_t, |
2201 | unsigned long, ext4_fsblk_t, | 2197 | unsigned long, ext4_fsblk_t, |
2202 | const char *, ...); | 2198 | const char *, ...); |
2203 | #define ext4_grp_locked_error(sb, grp, message...) \ | 2199 | |
2204 | __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) | 2200 | #ifdef CONFIG_PRINTK |
2201 | |||
2202 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | ||
2203 | __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) | ||
2204 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | ||
2205 | __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) | ||
2206 | #define ext4_error(sb, fmt, ...) \ | ||
2207 | __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2208 | #define ext4_abort(sb, fmt, ...) \ | ||
2209 | __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2210 | #define ext4_warning(sb, fmt, ...) \ | ||
2211 | __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2212 | #define ext4_msg(sb, level, fmt, ...) \ | ||
2213 | __ext4_msg(sb, level, fmt, ##__VA_ARGS__) | ||
2214 | #define dump_mmp_msg(sb, mmp, msg) \ | ||
2215 | __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg) | ||
2216 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | ||
2217 | __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \ | ||
2218 | fmt, ##__VA_ARGS__) | ||
2219 | |||
2220 | #else | ||
2221 | |||
2222 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | ||
2223 | do { \ | ||
2224 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2225 | __ext4_error_inode(inode, "", 0, block, " "); \ | ||
2226 | } while (0) | ||
2227 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | ||
2228 | do { \ | ||
2229 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2230 | __ext4_error_file(file, "", 0, block, " "); \ | ||
2231 | } while (0) | ||
2232 | #define ext4_error(sb, fmt, ...) \ | ||
2233 | do { \ | ||
2234 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2235 | __ext4_error(sb, "", 0, " "); \ | ||
2236 | } while (0) | ||
2237 | #define ext4_abort(sb, fmt, ...) \ | ||
2238 | do { \ | ||
2239 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2240 | __ext4_abort(sb, "", 0, " "); \ | ||
2241 | } while (0) | ||
2242 | #define ext4_warning(sb, fmt, ...) \ | ||
2243 | do { \ | ||
2244 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2245 | __ext4_warning(sb, "", 0, " "); \ | ||
2246 | } while (0) | ||
2247 | #define ext4_msg(sb, level, fmt, ...) \ | ||
2248 | do { \ | ||
2249 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2250 | __ext4_msg(sb, "", " "); \ | ||
2251 | } while (0) | ||
2252 | #define dump_mmp_msg(sb, mmp, msg) \ | ||
2253 | __dump_mmp_msg(sb, mmp, "", 0, "") | ||
2254 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | ||
2255 | do { \ | ||
2256 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2257 | __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \ | ||
2258 | } while (0) | ||
2259 | |||
2260 | #endif | ||
2261 | |||
2205 | extern void ext4_update_dynamic_rev(struct super_block *sb); | 2262 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
2206 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 2263 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
2207 | __u32 compat); | 2264 | __u32 compat); |
@@ -2312,6 +2369,7 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | |||
2312 | { | 2369 | { |
2313 | struct ext4_group_info ***grp_info; | 2370 | struct ext4_group_info ***grp_info; |
2314 | long indexv, indexh; | 2371 | long indexv, indexh; |
2372 | BUG_ON(group >= EXT4_SB(sb)->s_groups_count); | ||
2315 | grp_info = EXT4_SB(sb)->s_group_info; | 2373 | grp_info = EXT4_SB(sb)->s_group_info; |
2316 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); | 2374 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); |
2317 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); | 2375 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); |
@@ -2515,7 +2573,7 @@ extern int ext4_try_create_inline_dir(handle_t *handle, | |||
2515 | struct inode *parent, | 2573 | struct inode *parent, |
2516 | struct inode *inode); | 2574 | struct inode *inode); |
2517 | extern int ext4_read_inline_dir(struct file *filp, | 2575 | extern int ext4_read_inline_dir(struct file *filp, |
2518 | void *dirent, filldir_t filldir, | 2576 | struct dir_context *ctx, |
2519 | int *has_inline_data); | 2577 | int *has_inline_data); |
2520 | extern int htree_inlinedir_to_tree(struct file *dir_file, | 2578 | extern int htree_inlinedir_to_tree(struct file *dir_file, |
2521 | struct inode *dir, ext4_lblk_t block, | 2579 | struct inode *dir, ext4_lblk_t block, |
@@ -2598,8 +2656,7 @@ struct ext4_extent; | |||
2598 | 2656 | ||
2599 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 2657 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
2600 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 2658 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
2601 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | 2659 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); |
2602 | int chunk); | ||
2603 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | 2660 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
2604 | struct ext4_map_blocks *map, int flags); | 2661 | struct ext4_map_blocks *map, int flags); |
2605 | extern void ext4_ext_truncate(handle_t *, struct inode *); | 2662 | extern void ext4_ext_truncate(handle_t *, struct inode *); |
@@ -2609,8 +2666,8 @@ extern void ext4_ext_init(struct super_block *); | |||
2609 | extern void ext4_ext_release(struct super_block *); | 2666 | extern void ext4_ext_release(struct super_block *); |
2610 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, | 2667 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
2611 | loff_t len); | 2668 | loff_t len); |
2612 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 2669 | extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
2613 | ssize_t len); | 2670 | loff_t offset, ssize_t len); |
2614 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | 2671 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, |
2615 | struct ext4_map_blocks *map, int flags); | 2672 | struct ext4_map_blocks *map, int flags); |
2616 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 2673 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
@@ -2650,12 +2707,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2650 | 2707 | ||
2651 | /* page-io.c */ | 2708 | /* page-io.c */ |
2652 | extern int __init ext4_init_pageio(void); | 2709 | extern int __init ext4_init_pageio(void); |
2653 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | ||
2654 | extern void ext4_exit_pageio(void); | 2710 | extern void ext4_exit_pageio(void); |
2655 | extern void ext4_ioend_shutdown(struct inode *); | ||
2656 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
2657 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2711 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2658 | extern void ext4_end_io_work(struct work_struct *work); | 2712 | extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); |
2713 | extern int ext4_put_io_end(ext4_io_end_t *io_end); | ||
2714 | extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); | ||
2715 | extern void ext4_io_submit_init(struct ext4_io_submit *io, | ||
2716 | struct writeback_control *wbc); | ||
2717 | extern void ext4_end_io_rsv_work(struct work_struct *work); | ||
2718 | extern void ext4_end_io_unrsv_work(struct work_struct *work); | ||
2659 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2719 | extern void ext4_io_submit(struct ext4_io_submit *io); |
2660 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2720 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
2661 | struct page *page, | 2721 | struct page *page, |
@@ -2668,20 +2728,17 @@ extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); | |||
2668 | extern int ext4_mmp_csum_verify(struct super_block *sb, | 2728 | extern int ext4_mmp_csum_verify(struct super_block *sb, |
2669 | struct mmp_struct *mmp); | 2729 | struct mmp_struct *mmp); |
2670 | 2730 | ||
2671 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2731 | /* |
2732 | * Note that these flags will never ever appear in a buffer_head's state flag. | ||
2733 | * See EXT4_MAP_... to see where this is used. | ||
2734 | */ | ||
2672 | enum ext4_state_bits { | 2735 | enum ext4_state_bits { |
2673 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | 2736 | BH_Uninit /* blocks are allocated but uninitialized on disk */ |
2674 | = BH_JBDPrivateStart, | 2737 | = BH_JBDPrivateStart, |
2675 | BH_AllocFromCluster, /* allocated blocks were part of already | 2738 | BH_AllocFromCluster, /* allocated blocks were part of already |
2676 | * allocated cluster. Note that this flag will | 2739 | * allocated cluster. */ |
2677 | * never, ever appear in a buffer_head's state | ||
2678 | * flag. See EXT4_MAP_FROM_CLUSTER to see where | ||
2679 | * this is used. */ | ||
2680 | }; | 2740 | }; |
2681 | 2741 | ||
2682 | BUFFER_FNS(Uninit, uninit) | ||
2683 | TAS_BUFFER_FNS(Uninit, uninit) | ||
2684 | |||
2685 | /* | 2742 | /* |
2686 | * Add new method to test whether block and inode bitmaps are properly | 2743 | * Add new method to test whether block and inode bitmaps are properly |
2687 | * initialized. With uninit_bg reading the block from disk is not enough | 2744 | * initialized. With uninit_bg reading the block from disk is not enough |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 451eb4045330..72a3600aedbd 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -38,31 +38,43 @@ static void ext4_put_nojournal(handle_t *handle) | |||
38 | /* | 38 | /* |
39 | * Wrappers for jbd2_journal_start/end. | 39 | * Wrappers for jbd2_journal_start/end. |
40 | */ | 40 | */ |
41 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | 41 | static int ext4_journal_check_start(struct super_block *sb) |
42 | int type, int nblocks) | ||
43 | { | 42 | { |
44 | journal_t *journal; | 43 | journal_t *journal; |
45 | 44 | ||
46 | might_sleep(); | 45 | might_sleep(); |
47 | |||
48 | trace_ext4_journal_start(sb, nblocks, _RET_IP_); | ||
49 | if (sb->s_flags & MS_RDONLY) | 46 | if (sb->s_flags & MS_RDONLY) |
50 | return ERR_PTR(-EROFS); | 47 | return -EROFS; |
51 | |||
52 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); | 48 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); |
53 | journal = EXT4_SB(sb)->s_journal; | 49 | journal = EXT4_SB(sb)->s_journal; |
54 | if (!journal) | ||
55 | return ext4_get_nojournal(); | ||
56 | /* | 50 | /* |
57 | * Special case here: if the journal has aborted behind our | 51 | * Special case here: if the journal has aborted behind our |
58 | * backs (eg. EIO in the commit thread), then we still need to | 52 | * backs (eg. EIO in the commit thread), then we still need to |
59 | * take the FS itself readonly cleanly. | 53 | * take the FS itself readonly cleanly. |
60 | */ | 54 | */ |
61 | if (is_journal_aborted(journal)) { | 55 | if (journal && is_journal_aborted(journal)) { |
62 | ext4_abort(sb, "Detected aborted journal"); | 56 | ext4_abort(sb, "Detected aborted journal"); |
63 | return ERR_PTR(-EROFS); | 57 | return -EROFS; |
64 | } | 58 | } |
65 | return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line); | 59 | return 0; |
60 | } | ||
61 | |||
62 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | ||
63 | int type, int blocks, int rsv_blocks) | ||
64 | { | ||
65 | journal_t *journal; | ||
66 | int err; | ||
67 | |||
68 | trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_); | ||
69 | err = ext4_journal_check_start(sb); | ||
70 | if (err < 0) | ||
71 | return ERR_PTR(err); | ||
72 | |||
73 | journal = EXT4_SB(sb)->s_journal; | ||
74 | if (!journal) | ||
75 | return ext4_get_nojournal(); | ||
76 | return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS, | ||
77 | type, line); | ||
66 | } | 78 | } |
67 | 79 | ||
68 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) | 80 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) |
@@ -86,6 +98,30 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) | |||
86 | return err; | 98 | return err; |
87 | } | 99 | } |
88 | 100 | ||
101 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | ||
102 | int type) | ||
103 | { | ||
104 | struct super_block *sb; | ||
105 | int err; | ||
106 | |||
107 | if (!ext4_handle_valid(handle)) | ||
108 | return ext4_get_nojournal(); | ||
109 | |||
110 | sb = handle->h_journal->j_private; | ||
111 | trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits, | ||
112 | _RET_IP_); | ||
113 | err = ext4_journal_check_start(sb); | ||
114 | if (err < 0) { | ||
115 | jbd2_journal_free_reserved(handle); | ||
116 | return ERR_PTR(err); | ||
117 | } | ||
118 | |||
119 | err = jbd2_journal_start_reserved(handle, type, line); | ||
120 | if (err < 0) | ||
121 | return ERR_PTR(err); | ||
122 | return handle; | ||
123 | } | ||
124 | |||
89 | void ext4_journal_abort_handle(const char *caller, unsigned int line, | 125 | void ext4_journal_abort_handle(const char *caller, unsigned int line, |
90 | const char *err_fn, struct buffer_head *bh, | 126 | const char *err_fn, struct buffer_head *bh, |
91 | handle_t *handle, int err) | 127 | handle_t *handle, int err) |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index c8c6885406db..2877258d9497 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode) | |||
134 | #define EXT4_HT_MIGRATE 8 | 134 | #define EXT4_HT_MIGRATE 8 |
135 | #define EXT4_HT_MOVE_EXTENTS 9 | 135 | #define EXT4_HT_MOVE_EXTENTS 9 |
136 | #define EXT4_HT_XATTR 10 | 136 | #define EXT4_HT_XATTR 10 |
137 | #define EXT4_HT_MAX 11 | 137 | #define EXT4_HT_EXT_CONVERT 11 |
138 | #define EXT4_HT_MAX 12 | ||
138 | 139 | ||
139 | /** | 140 | /** |
140 | * struct ext4_journal_cb_entry - Base structure for callback information. | 141 | * struct ext4_journal_cb_entry - Base structure for callback information. |
@@ -265,7 +266,7 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, | |||
265 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) | 266 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) |
266 | 267 | ||
267 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | 268 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, |
268 | int type, int nblocks); | 269 | int type, int blocks, int rsv_blocks); |
269 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); | 270 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); |
270 | 271 | ||
271 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) | 272 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) |
@@ -300,21 +301,37 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) | |||
300 | } | 301 | } |
301 | 302 | ||
302 | #define ext4_journal_start_sb(sb, type, nblocks) \ | 303 | #define ext4_journal_start_sb(sb, type, nblocks) \ |
303 | __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks)) | 304 | __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0) |
304 | 305 | ||
305 | #define ext4_journal_start(inode, type, nblocks) \ | 306 | #define ext4_journal_start(inode, type, nblocks) \ |
306 | __ext4_journal_start((inode), __LINE__, (type), (nblocks)) | 307 | __ext4_journal_start((inode), __LINE__, (type), (nblocks), 0) |
308 | |||
309 | #define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \ | ||
310 | __ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks)) | ||
307 | 311 | ||
308 | static inline handle_t *__ext4_journal_start(struct inode *inode, | 312 | static inline handle_t *__ext4_journal_start(struct inode *inode, |
309 | unsigned int line, int type, | 313 | unsigned int line, int type, |
310 | int nblocks) | 314 | int blocks, int rsv_blocks) |
311 | { | 315 | { |
312 | return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks); | 316 | return __ext4_journal_start_sb(inode->i_sb, line, type, blocks, |
317 | rsv_blocks); | ||
313 | } | 318 | } |
314 | 319 | ||
315 | #define ext4_journal_stop(handle) \ | 320 | #define ext4_journal_stop(handle) \ |
316 | __ext4_journal_stop(__func__, __LINE__, (handle)) | 321 | __ext4_journal_stop(__func__, __LINE__, (handle)) |
317 | 322 | ||
323 | #define ext4_journal_start_reserved(handle, type) \ | ||
324 | __ext4_journal_start_reserved((handle), __LINE__, (type)) | ||
325 | |||
326 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | ||
327 | int type); | ||
328 | |||
329 | static inline void ext4_journal_free_reserved(handle_t *handle) | ||
330 | { | ||
331 | if (ext4_handle_valid(handle)) | ||
332 | jbd2_journal_free_reserved(handle); | ||
333 | } | ||
334 | |||
318 | static inline handle_t *ext4_journal_current_handle(void) | 335 | static inline handle_t *ext4_journal_current_handle(void) |
319 | { | 336 | { |
320 | return journal_current_handle(); | 337 | return journal_current_handle(); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc0f1910b9cf..72ba4705d4fa 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2125,7 +2125,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
2125 | next_del = ext4_find_delayed_extent(inode, &es); | 2125 | next_del = ext4_find_delayed_extent(inode, &es); |
2126 | if (!exists && next_del) { | 2126 | if (!exists && next_del) { |
2127 | exists = 1; | 2127 | exists = 1; |
2128 | flags |= FIEMAP_EXTENT_DELALLOC; | 2128 | flags |= (FIEMAP_EXTENT_DELALLOC | |
2129 | FIEMAP_EXTENT_UNKNOWN); | ||
2129 | } | 2130 | } |
2130 | up_read(&EXT4_I(inode)->i_data_sem); | 2131 | up_read(&EXT4_I(inode)->i_data_sem); |
2131 | 2132 | ||
@@ -2328,17 +2329,15 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, | |||
2328 | } | 2329 | } |
2329 | 2330 | ||
2330 | /* | 2331 | /* |
2331 | * How many index/leaf blocks need to change/allocate to modify nrblocks? | 2332 | * How many index/leaf blocks need to change/allocate to add @extents extents? |
2332 | * | 2333 | * |
2333 | * if nrblocks are fit in a single extent (chunk flag is 1), then | 2334 | * If we add a single extent, then in the worse case, each tree level |
2334 | * in the worse case, each tree level index/leaf need to be changed | 2335 | * index/leaf need to be changed in case of the tree split. |
2335 | * if the tree split due to insert a new extent, then the old tree | ||
2336 | * index/leaf need to be updated too | ||
2337 | * | 2336 | * |
2338 | * If the nrblocks are discontiguous, they could cause | 2337 | * If more extents are inserted, they could cause the whole tree split more |
2339 | * the whole tree split more than once, but this is really rare. | 2338 | * than once, but this is really rare. |
2340 | */ | 2339 | */ |
2341 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 2340 | int ext4_ext_index_trans_blocks(struct inode *inode, int extents) |
2342 | { | 2341 | { |
2343 | int index; | 2342 | int index; |
2344 | int depth; | 2343 | int depth; |
@@ -2349,7 +2348,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
2349 | 2348 | ||
2350 | depth = ext_depth(inode); | 2349 | depth = ext_depth(inode); |
2351 | 2350 | ||
2352 | if (chunk) | 2351 | if (extents <= 1) |
2353 | index = depth * 2; | 2352 | index = depth * 2; |
2354 | else | 2353 | else |
2355 | index = depth * 3; | 2354 | index = depth * 3; |
@@ -2357,20 +2356,24 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
2357 | return index; | 2356 | return index; |
2358 | } | 2357 | } |
2359 | 2358 | ||
2359 | static inline int get_default_free_blocks_flags(struct inode *inode) | ||
2360 | { | ||
2361 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2362 | return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; | ||
2363 | else if (ext4_should_journal_data(inode)) | ||
2364 | return EXT4_FREE_BLOCKS_FORGET; | ||
2365 | return 0; | ||
2366 | } | ||
2367 | |||
2360 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 2368 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
2361 | struct ext4_extent *ex, | 2369 | struct ext4_extent *ex, |
2362 | ext4_fsblk_t *partial_cluster, | 2370 | long long *partial_cluster, |
2363 | ext4_lblk_t from, ext4_lblk_t to) | 2371 | ext4_lblk_t from, ext4_lblk_t to) |
2364 | { | 2372 | { |
2365 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2373 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2366 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2374 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2367 | ext4_fsblk_t pblk; | 2375 | ext4_fsblk_t pblk; |
2368 | int flags = 0; | 2376 | int flags = get_default_free_blocks_flags(inode); |
2369 | |||
2370 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2371 | flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; | ||
2372 | else if (ext4_should_journal_data(inode)) | ||
2373 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
2374 | 2377 | ||
2375 | /* | 2378 | /* |
2376 | * For bigalloc file systems, we never free a partial cluster | 2379 | * For bigalloc file systems, we never free a partial cluster |
@@ -2388,7 +2391,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2388 | * partial cluster here. | 2391 | * partial cluster here. |
2389 | */ | 2392 | */ |
2390 | pblk = ext4_ext_pblock(ex) + ee_len - 1; | 2393 | pblk = ext4_ext_pblock(ex) + ee_len - 1; |
2391 | if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | 2394 | if ((*partial_cluster > 0) && |
2395 | (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | ||
2392 | ext4_free_blocks(handle, inode, NULL, | 2396 | ext4_free_blocks(handle, inode, NULL, |
2393 | EXT4_C2B(sbi, *partial_cluster), | 2397 | EXT4_C2B(sbi, *partial_cluster), |
2394 | sbi->s_cluster_ratio, flags); | 2398 | sbi->s_cluster_ratio, flags); |
@@ -2414,41 +2418,46 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2414 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2418 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2415 | /* tail removal */ | 2419 | /* tail removal */ |
2416 | ext4_lblk_t num; | 2420 | ext4_lblk_t num; |
2421 | unsigned int unaligned; | ||
2417 | 2422 | ||
2418 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2423 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2419 | pblk = ext4_ext_pblock(ex) + ee_len - num; | 2424 | pblk = ext4_ext_pblock(ex) + ee_len - num; |
2420 | ext_debug("free last %u blocks starting %llu\n", num, pblk); | 2425 | /* |
2426 | * Usually we want to free partial cluster at the end of the | ||
2427 | * extent, except for the situation when the cluster is still | ||
2428 | * used by any other extent (partial_cluster is negative). | ||
2429 | */ | ||
2430 | if (*partial_cluster < 0 && | ||
2431 | -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1)) | ||
2432 | flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; | ||
2433 | |||
2434 | ext_debug("free last %u blocks starting %llu partial %lld\n", | ||
2435 | num, pblk, *partial_cluster); | ||
2421 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); | 2436 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); |
2422 | /* | 2437 | /* |
2423 | * If the block range to be freed didn't start at the | 2438 | * If the block range to be freed didn't start at the |
2424 | * beginning of a cluster, and we removed the entire | 2439 | * beginning of a cluster, and we removed the entire |
2425 | * extent, save the partial cluster here, since we | 2440 | * extent and the cluster is not used by any other extent, |
2426 | * might need to delete if we determine that the | 2441 | * save the partial cluster here, since we might need to |
2427 | * truncate operation has removed all of the blocks in | 2442 | * delete if we determine that the truncate operation has |
2428 | * the cluster. | 2443 | * removed all of the blocks in the cluster. |
2444 | * | ||
2445 | * On the other hand, if we did not manage to free the whole | ||
2446 | * extent, we have to mark the cluster as used (store negative | ||
2447 | * cluster number in partial_cluster). | ||
2429 | */ | 2448 | */ |
2430 | if (pblk & (sbi->s_cluster_ratio - 1) && | 2449 | unaligned = pblk & (sbi->s_cluster_ratio - 1); |
2431 | (ee_len == num)) | 2450 | if (unaligned && (ee_len == num) && |
2451 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) | ||
2432 | *partial_cluster = EXT4_B2C(sbi, pblk); | 2452 | *partial_cluster = EXT4_B2C(sbi, pblk); |
2433 | else | 2453 | else if (unaligned) |
2454 | *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); | ||
2455 | else if (*partial_cluster > 0) | ||
2434 | *partial_cluster = 0; | 2456 | *partial_cluster = 0; |
2435 | } else if (from == le32_to_cpu(ex->ee_block) | 2457 | } else |
2436 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2458 | ext4_error(sbi->s_sb, "strange request: removal(2) " |
2437 | /* head removal */ | 2459 | "%u-%u from %u:%u\n", |
2438 | ext4_lblk_t num; | 2460 | from, to, le32_to_cpu(ex->ee_block), ee_len); |
2439 | ext4_fsblk_t start; | ||
2440 | |||
2441 | num = to - from; | ||
2442 | start = ext4_ext_pblock(ex); | ||
2443 | |||
2444 | ext_debug("free first %u blocks starting %llu\n", num, start); | ||
2445 | ext4_free_blocks(handle, inode, NULL, start, num, flags); | ||
2446 | |||
2447 | } else { | ||
2448 | printk(KERN_INFO "strange request: removal(2) " | ||
2449 | "%u-%u from %u:%u\n", | ||
2450 | from, to, le32_to_cpu(ex->ee_block), ee_len); | ||
2451 | } | ||
2452 | return 0; | 2461 | return 0; |
2453 | } | 2462 | } |
2454 | 2463 | ||
@@ -2461,12 +2470,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2461 | * @handle: The journal handle | 2470 | * @handle: The journal handle |
2462 | * @inode: The files inode | 2471 | * @inode: The files inode |
2463 | * @path: The path to the leaf | 2472 | * @path: The path to the leaf |
2473 | * @partial_cluster: The cluster which we'll have to free if all extents | ||
2474 | * has been released from it. It gets negative in case | ||
2475 | * that the cluster is still used. | ||
2464 | * @start: The first block to remove | 2476 | * @start: The first block to remove |
2465 | * @end: The last block to remove | 2477 | * @end: The last block to remove |
2466 | */ | 2478 | */ |
2467 | static int | 2479 | static int |
2468 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 2480 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
2469 | struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, | 2481 | struct ext4_ext_path *path, |
2482 | long long *partial_cluster, | ||
2470 | ext4_lblk_t start, ext4_lblk_t end) | 2483 | ext4_lblk_t start, ext4_lblk_t end) |
2471 | { | 2484 | { |
2472 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2485 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -2479,6 +2492,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2479 | unsigned short ex_ee_len; | 2492 | unsigned short ex_ee_len; |
2480 | unsigned uninitialized = 0; | 2493 | unsigned uninitialized = 0; |
2481 | struct ext4_extent *ex; | 2494 | struct ext4_extent *ex; |
2495 | ext4_fsblk_t pblk; | ||
2482 | 2496 | ||
2483 | /* the header must be checked already in ext4_ext_remove_space() */ | 2497 | /* the header must be checked already in ext4_ext_remove_space() */ |
2484 | ext_debug("truncate since %u in leaf to %u\n", start, end); | 2498 | ext_debug("truncate since %u in leaf to %u\n", start, end); |
@@ -2490,7 +2504,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2490 | return -EIO; | 2504 | return -EIO; |
2491 | } | 2505 | } |
2492 | /* find where to start removing */ | 2506 | /* find where to start removing */ |
2493 | ex = EXT_LAST_EXTENT(eh); | 2507 | ex = path[depth].p_ext; |
2508 | if (!ex) | ||
2509 | ex = EXT_LAST_EXTENT(eh); | ||
2494 | 2510 | ||
2495 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2511 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2496 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2512 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2517,6 +2533,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2517 | 2533 | ||
2518 | /* If this extent is beyond the end of the hole, skip it */ | 2534 | /* If this extent is beyond the end of the hole, skip it */ |
2519 | if (end < ex_ee_block) { | 2535 | if (end < ex_ee_block) { |
2536 | /* | ||
2537 | * We're going to skip this extent and move to another, | ||
2538 | * so if this extent is not cluster aligned we have | ||
2539 | * to mark the current cluster as used to avoid | ||
2540 | * accidentally freeing it later on | ||
2541 | */ | ||
2542 | pblk = ext4_ext_pblock(ex); | ||
2543 | if (pblk & (sbi->s_cluster_ratio - 1)) | ||
2544 | *partial_cluster = | ||
2545 | -((long long)EXT4_B2C(sbi, pblk)); | ||
2520 | ex--; | 2546 | ex--; |
2521 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2547 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2522 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2548 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2592,7 +2618,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2592 | sizeof(struct ext4_extent)); | 2618 | sizeof(struct ext4_extent)); |
2593 | } | 2619 | } |
2594 | le16_add_cpu(&eh->eh_entries, -1); | 2620 | le16_add_cpu(&eh->eh_entries, -1); |
2595 | } else | 2621 | } else if (*partial_cluster > 0) |
2596 | *partial_cluster = 0; | 2622 | *partial_cluster = 0; |
2597 | 2623 | ||
2598 | err = ext4_ext_dirty(handle, inode, path + depth); | 2624 | err = ext4_ext_dirty(handle, inode, path + depth); |
@@ -2610,17 +2636,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2610 | err = ext4_ext_correct_indexes(handle, inode, path); | 2636 | err = ext4_ext_correct_indexes(handle, inode, path); |
2611 | 2637 | ||
2612 | /* | 2638 | /* |
2613 | * If there is still a entry in the leaf node, check to see if | 2639 | * Free the partial cluster only if the current extent does not |
2614 | * it references the partial cluster. This is the only place | 2640 | * reference it. Otherwise we might free used cluster. |
2615 | * where it could; if it doesn't, we can free the cluster. | ||
2616 | */ | 2641 | */ |
2617 | if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && | 2642 | if (*partial_cluster > 0 && |
2618 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2643 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != |
2619 | *partial_cluster)) { | 2644 | *partial_cluster)) { |
2620 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2645 | int flags = get_default_free_blocks_flags(inode); |
2621 | |||
2622 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2623 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2624 | 2646 | ||
2625 | ext4_free_blocks(handle, inode, NULL, | 2647 | ext4_free_blocks(handle, inode, NULL, |
2626 | EXT4_C2B(sbi, *partial_cluster), | 2648 | EXT4_C2B(sbi, *partial_cluster), |
@@ -2664,7 +2686,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2664 | struct super_block *sb = inode->i_sb; | 2686 | struct super_block *sb = inode->i_sb; |
2665 | int depth = ext_depth(inode); | 2687 | int depth = ext_depth(inode); |
2666 | struct ext4_ext_path *path = NULL; | 2688 | struct ext4_ext_path *path = NULL; |
2667 | ext4_fsblk_t partial_cluster = 0; | 2689 | long long partial_cluster = 0; |
2668 | handle_t *handle; | 2690 | handle_t *handle; |
2669 | int i = 0, err = 0; | 2691 | int i = 0, err = 0; |
2670 | 2692 | ||
@@ -2676,7 +2698,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2676 | return PTR_ERR(handle); | 2698 | return PTR_ERR(handle); |
2677 | 2699 | ||
2678 | again: | 2700 | again: |
2679 | trace_ext4_ext_remove_space(inode, start, depth); | 2701 | trace_ext4_ext_remove_space(inode, start, end, depth); |
2680 | 2702 | ||
2681 | /* | 2703 | /* |
2682 | * Check if we are removing extents inside the extent tree. If that | 2704 | * Check if we are removing extents inside the extent tree. If that |
@@ -2813,6 +2835,9 @@ again: | |||
2813 | err = -EIO; | 2835 | err = -EIO; |
2814 | break; | 2836 | break; |
2815 | } | 2837 | } |
2838 | /* Yield here to deal with large extent trees. | ||
2839 | * Should be a no-op if we did IO above. */ | ||
2840 | cond_resched(); | ||
2816 | if (WARN_ON(i + 1 > depth)) { | 2841 | if (WARN_ON(i + 1 > depth)) { |
2817 | err = -EIO; | 2842 | err = -EIO; |
2818 | break; | 2843 | break; |
@@ -2844,17 +2869,14 @@ again: | |||
2844 | } | 2869 | } |
2845 | } | 2870 | } |
2846 | 2871 | ||
2847 | trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster, | 2872 | trace_ext4_ext_remove_space_done(inode, start, end, depth, |
2848 | path->p_hdr->eh_entries); | 2873 | partial_cluster, path->p_hdr->eh_entries); |
2849 | 2874 | ||
2850 | /* If we still have something in the partial cluster and we have removed | 2875 | /* If we still have something in the partial cluster and we have removed |
2851 | * even the first extent, then we should free the blocks in the partial | 2876 | * even the first extent, then we should free the blocks in the partial |
2852 | * cluster as well. */ | 2877 | * cluster as well. */ |
2853 | if (partial_cluster && path->p_hdr->eh_entries == 0) { | 2878 | if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { |
2854 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2879 | int flags = get_default_free_blocks_flags(inode); |
2855 | |||
2856 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2857 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2858 | 2880 | ||
2859 | ext4_free_blocks(handle, inode, NULL, | 2881 | ext4_free_blocks(handle, inode, NULL, |
2860 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | 2882 | EXT4_C2B(EXT4_SB(sb), partial_cluster), |
@@ -4242,8 +4264,8 @@ got_allocated_blocks: | |||
4242 | /* not a good idea to call discard here directly, | 4264 | /* not a good idea to call discard here directly, |
4243 | * but otherwise we'd need to call it every free() */ | 4265 | * but otherwise we'd need to call it every free() */ |
4244 | ext4_discard_preallocations(inode); | 4266 | ext4_discard_preallocations(inode); |
4245 | ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), | 4267 | ext4_free_blocks(handle, inode, NULL, newblock, |
4246 | ext4_ext_get_actual_len(&newex), fb_flags); | 4268 | EXT4_C2B(sbi, allocated_clusters), fb_flags); |
4247 | goto out2; | 4269 | goto out2; |
4248 | } | 4270 | } |
4249 | 4271 | ||
@@ -4363,8 +4385,9 @@ out2: | |||
4363 | } | 4385 | } |
4364 | 4386 | ||
4365 | out3: | 4387 | out3: |
4366 | trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated); | 4388 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
4367 | 4389 | err ? err : allocated); | |
4390 | ext4_es_lru_add(inode); | ||
4368 | return err ? err : allocated; | 4391 | return err ? err : allocated; |
4369 | } | 4392 | } |
4370 | 4393 | ||
@@ -4386,9 +4409,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) | |||
4386 | 4409 | ||
4387 | last_block = (inode->i_size + sb->s_blocksize - 1) | 4410 | last_block = (inode->i_size + sb->s_blocksize - 1) |
4388 | >> EXT4_BLOCK_SIZE_BITS(sb); | 4411 | >> EXT4_BLOCK_SIZE_BITS(sb); |
4412 | retry: | ||
4389 | err = ext4_es_remove_extent(inode, last_block, | 4413 | err = ext4_es_remove_extent(inode, last_block, |
4390 | EXT_MAX_BLOCKS - last_block); | 4414 | EXT_MAX_BLOCKS - last_block); |
4415 | if (err == -ENOMEM) { | ||
4416 | cond_resched(); | ||
4417 | congestion_wait(BLK_RW_ASYNC, HZ/50); | ||
4418 | goto retry; | ||
4419 | } | ||
4420 | if (err) { | ||
4421 | ext4_std_error(inode->i_sb, err); | ||
4422 | return; | ||
4423 | } | ||
4391 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); | 4424 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); |
4425 | ext4_std_error(inode->i_sb, err); | ||
4392 | } | 4426 | } |
4393 | 4427 | ||
4394 | static void ext4_falloc_update_inode(struct inode *inode, | 4428 | static void ext4_falloc_update_inode(struct inode *inode, |
@@ -4446,7 +4480,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4446 | return -EOPNOTSUPP; | 4480 | return -EOPNOTSUPP; |
4447 | 4481 | ||
4448 | if (mode & FALLOC_FL_PUNCH_HOLE) | 4482 | if (mode & FALLOC_FL_PUNCH_HOLE) |
4449 | return ext4_punch_hole(file, offset, len); | 4483 | return ext4_punch_hole(inode, offset, len); |
4450 | 4484 | ||
4451 | ret = ext4_convert_inline_data(inode); | 4485 | ret = ext4_convert_inline_data(inode); |
4452 | if (ret) | 4486 | if (ret) |
@@ -4548,10 +4582,9 @@ retry: | |||
4548 | * function, to convert the fallocated extents after IO is completed. | 4582 | * function, to convert the fallocated extents after IO is completed. |
4549 | * Returns 0 on success. | 4583 | * Returns 0 on success. |
4550 | */ | 4584 | */ |
4551 | int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 4585 | int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
4552 | ssize_t len) | 4586 | loff_t offset, ssize_t len) |
4553 | { | 4587 | { |
4554 | handle_t *handle; | ||
4555 | unsigned int max_blocks; | 4588 | unsigned int max_blocks; |
4556 | int ret = 0; | 4589 | int ret = 0; |
4557 | int ret2 = 0; | 4590 | int ret2 = 0; |
@@ -4566,16 +4599,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4566 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - | 4599 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - |
4567 | map.m_lblk); | 4600 | map.m_lblk); |
4568 | /* | 4601 | /* |
4569 | * credits to insert 1 extent into extent tree | 4602 | * This is somewhat ugly but the idea is clear: When transaction is |
4603 | * reserved, everything goes into it. Otherwise we rather start several | ||
4604 | * smaller transactions for conversion of each extent separately. | ||
4570 | */ | 4605 | */ |
4571 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 4606 | if (handle) { |
4607 | handle = ext4_journal_start_reserved(handle, | ||
4608 | EXT4_HT_EXT_CONVERT); | ||
4609 | if (IS_ERR(handle)) | ||
4610 | return PTR_ERR(handle); | ||
4611 | credits = 0; | ||
4612 | } else { | ||
4613 | /* | ||
4614 | * credits to insert 1 extent into extent tree | ||
4615 | */ | ||
4616 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
4617 | } | ||
4572 | while (ret >= 0 && ret < max_blocks) { | 4618 | while (ret >= 0 && ret < max_blocks) { |
4573 | map.m_lblk += ret; | 4619 | map.m_lblk += ret; |
4574 | map.m_len = (max_blocks -= ret); | 4620 | map.m_len = (max_blocks -= ret); |
4575 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | 4621 | if (credits) { |
4576 | if (IS_ERR(handle)) { | 4622 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
4577 | ret = PTR_ERR(handle); | 4623 | credits); |
4578 | break; | 4624 | if (IS_ERR(handle)) { |
4625 | ret = PTR_ERR(handle); | ||
4626 | break; | ||
4627 | } | ||
4579 | } | 4628 | } |
4580 | ret = ext4_map_blocks(handle, inode, &map, | 4629 | ret = ext4_map_blocks(handle, inode, &map, |
4581 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 4630 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
@@ -4586,10 +4635,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4586 | inode->i_ino, map.m_lblk, | 4635 | inode->i_ino, map.m_lblk, |
4587 | map.m_len, ret); | 4636 | map.m_len, ret); |
4588 | ext4_mark_inode_dirty(handle, inode); | 4637 | ext4_mark_inode_dirty(handle, inode); |
4589 | ret2 = ext4_journal_stop(handle); | 4638 | if (credits) |
4590 | if (ret <= 0 || ret2 ) | 4639 | ret2 = ext4_journal_stop(handle); |
4640 | if (ret <= 0 || ret2) | ||
4591 | break; | 4641 | break; |
4592 | } | 4642 | } |
4643 | if (!credits) | ||
4644 | ret2 = ext4_journal_stop(handle); | ||
4593 | return ret > 0 ? ret2 : ret; | 4645 | return ret > 0 ? ret2 : ret; |
4594 | } | 4646 | } |
4595 | 4647 | ||
@@ -4659,7 +4711,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
4659 | error = ext4_get_inode_loc(inode, &iloc); | 4711 | error = ext4_get_inode_loc(inode, &iloc); |
4660 | if (error) | 4712 | if (error) |
4661 | return error; | 4713 | return error; |
4662 | physical = iloc.bh->b_blocknr << blockbits; | 4714 | physical = (__u64)iloc.bh->b_blocknr << blockbits; |
4663 | offset = EXT4_GOOD_OLD_INODE_SIZE + | 4715 | offset = EXT4_GOOD_OLD_INODE_SIZE + |
4664 | EXT4_I(inode)->i_extra_isize; | 4716 | EXT4_I(inode)->i_extra_isize; |
4665 | physical += offset; | 4717 | physical += offset; |
@@ -4667,7 +4719,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
4667 | flags |= FIEMAP_EXTENT_DATA_INLINE; | 4719 | flags |= FIEMAP_EXTENT_DATA_INLINE; |
4668 | brelse(iloc.bh); | 4720 | brelse(iloc.bh); |
4669 | } else { /* external block */ | 4721 | } else { /* external block */ |
4670 | physical = EXT4_I(inode)->i_file_acl << blockbits; | 4722 | physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; |
4671 | length = inode->i_sb->s_blocksize; | 4723 | length = inode->i_sb->s_blocksize; |
4672 | } | 4724 | } |
4673 | 4725 | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e6941e622d31..91cb110da1b4 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * Ext4 extents status tree core functions. | 10 | * Ext4 extents status tree core functions. |
11 | */ | 11 | */ |
12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
13 | #include <linux/list_sort.h> | ||
13 | #include "ext4.h" | 14 | #include "ext4.h" |
14 | #include "extents_status.h" | 15 | #include "extents_status.h" |
15 | #include "ext4_extents.h" | 16 | #include "ext4_extents.h" |
@@ -147,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
147 | ext4_lblk_t end); | 148 | ext4_lblk_t end); |
148 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 149 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, |
149 | int nr_to_scan); | 150 | int nr_to_scan); |
151 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
152 | struct ext4_inode_info *locked_ei); | ||
150 | 153 | ||
151 | int __init ext4_init_es(void) | 154 | int __init ext4_init_es(void) |
152 | { | 155 | { |
@@ -291,7 +294,6 @@ out: | |||
291 | 294 | ||
292 | read_unlock(&EXT4_I(inode)->i_es_lock); | 295 | read_unlock(&EXT4_I(inode)->i_es_lock); |
293 | 296 | ||
294 | ext4_es_lru_add(inode); | ||
295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 297 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
296 | } | 298 | } |
297 | 299 | ||
@@ -439,7 +441,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
439 | */ | 441 | */ |
440 | if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { | 442 | if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { |
441 | if (in_range(es->es_lblk, ee_block, ee_len)) { | 443 | if (in_range(es->es_lblk, ee_block, ee_len)) { |
442 | pr_warn("ES insert assertation failed for " | 444 | pr_warn("ES insert assertion failed for " |
443 | "inode: %lu we can find an extent " | 445 | "inode: %lu we can find an extent " |
444 | "at block [%d/%d/%llu/%c], but we " | 446 | "at block [%d/%d/%llu/%c], but we " |
445 | "want to add an delayed/hole extent " | 447 | "want to add an delayed/hole extent " |
@@ -458,7 +460,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
458 | */ | 460 | */ |
459 | if (es->es_lblk < ee_block || | 461 | if (es->es_lblk < ee_block || |
460 | ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { | 462 | ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { |
461 | pr_warn("ES insert assertation failed for inode: %lu " | 463 | pr_warn("ES insert assertion failed for inode: %lu " |
462 | "ex_status [%d/%d/%llu/%c] != " | 464 | "ex_status [%d/%d/%llu/%c] != " |
463 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | 465 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, |
464 | ee_block, ee_len, ee_start, | 466 | ee_block, ee_len, ee_start, |
@@ -468,7 +470,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
468 | } | 470 | } |
469 | 471 | ||
470 | if (ee_status ^ es_status) { | 472 | if (ee_status ^ es_status) { |
471 | pr_warn("ES insert assertation failed for inode: %lu " | 473 | pr_warn("ES insert assertion failed for inode: %lu " |
472 | "ex_status [%d/%d/%llu/%c] != " | 474 | "ex_status [%d/%d/%llu/%c] != " |
473 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | 475 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, |
474 | ee_block, ee_len, ee_start, | 476 | ee_block, ee_len, ee_start, |
@@ -481,7 +483,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
481 | * that we don't want to add an written/unwritten extent. | 483 | * that we don't want to add an written/unwritten extent. |
482 | */ | 484 | */ |
483 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { | 485 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { |
484 | pr_warn("ES insert assertation failed for inode: %lu " | 486 | pr_warn("ES insert assertion failed for inode: %lu " |
485 | "can't find an extent at block %d but we want " | 487 | "can't find an extent at block %d but we want " |
486 | "to add an written/unwritten extent " | 488 | "to add an written/unwritten extent " |
487 | "[%d/%d/%llu/%llx]\n", inode->i_ino, | 489 | "[%d/%d/%llu/%llx]\n", inode->i_ino, |
@@ -519,7 +521,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
519 | * We want to add a delayed/hole extent but this | 521 | * We want to add a delayed/hole extent but this |
520 | * block has been allocated. | 522 | * block has been allocated. |
521 | */ | 523 | */ |
522 | pr_warn("ES insert assertation failed for inode: %lu " | 524 | pr_warn("ES insert assertion failed for inode: %lu " |
523 | "We can find blocks but we want to add a " | 525 | "We can find blocks but we want to add a " |
524 | "delayed/hole extent [%d/%d/%llu/%llx]\n", | 526 | "delayed/hole extent [%d/%d/%llu/%llx]\n", |
525 | inode->i_ino, es->es_lblk, es->es_len, | 527 | inode->i_ino, es->es_lblk, es->es_len, |
@@ -527,13 +529,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
527 | return; | 529 | return; |
528 | } else if (ext4_es_is_written(es)) { | 530 | } else if (ext4_es_is_written(es)) { |
529 | if (retval != es->es_len) { | 531 | if (retval != es->es_len) { |
530 | pr_warn("ES insert assertation failed for " | 532 | pr_warn("ES insert assertion failed for " |
531 | "inode: %lu retval %d != es_len %d\n", | 533 | "inode: %lu retval %d != es_len %d\n", |
532 | inode->i_ino, retval, es->es_len); | 534 | inode->i_ino, retval, es->es_len); |
533 | return; | 535 | return; |
534 | } | 536 | } |
535 | if (map.m_pblk != ext4_es_pblock(es)) { | 537 | if (map.m_pblk != ext4_es_pblock(es)) { |
536 | pr_warn("ES insert assertation failed for " | 538 | pr_warn("ES insert assertion failed for " |
537 | "inode: %lu m_pblk %llu != " | 539 | "inode: %lu m_pblk %llu != " |
538 | "es_pblk %llu\n", | 540 | "es_pblk %llu\n", |
539 | inode->i_ino, map.m_pblk, | 541 | inode->i_ino, map.m_pblk, |
@@ -549,7 +551,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
549 | } | 551 | } |
550 | } else if (retval == 0) { | 552 | } else if (retval == 0) { |
551 | if (ext4_es_is_written(es)) { | 553 | if (ext4_es_is_written(es)) { |
552 | pr_warn("ES insert assertation failed for inode: %lu " | 554 | pr_warn("ES insert assertion failed for inode: %lu " |
553 | "We can't find the block but we want to add " | 555 | "We can't find the block but we want to add " |
554 | "an written extent [%d/%d/%llu/%llx]\n", | 556 | "an written extent [%d/%d/%llu/%llx]\n", |
555 | inode->i_ino, es->es_lblk, es->es_len, | 557 | inode->i_ino, es->es_lblk, es->es_len, |
@@ -632,10 +634,8 @@ out: | |||
632 | } | 634 | } |
633 | 635 | ||
634 | /* | 636 | /* |
635 | * ext4_es_insert_extent() adds a space to a extent status tree. | 637 | * ext4_es_insert_extent() adds information to an inode's extent |
636 | * | 638 | * status tree. |
637 | * ext4_es_insert_extent is called by ext4_da_write_begin and | ||
638 | * ext4_es_remove_extent. | ||
639 | * | 639 | * |
640 | * Return 0 on success, error code on failure. | 640 | * Return 0 on success, error code on failure. |
641 | */ | 641 | */ |
@@ -667,12 +667,17 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
667 | err = __es_remove_extent(inode, lblk, end); | 667 | err = __es_remove_extent(inode, lblk, end); |
668 | if (err != 0) | 668 | if (err != 0) |
669 | goto error; | 669 | goto error; |
670 | retry: | ||
670 | err = __es_insert_extent(inode, &newes); | 671 | err = __es_insert_extent(inode, &newes); |
672 | if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | ||
673 | EXT4_I(inode))) | ||
674 | goto retry; | ||
675 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | ||
676 | err = 0; | ||
671 | 677 | ||
672 | error: | 678 | error: |
673 | write_unlock(&EXT4_I(inode)->i_es_lock); | 679 | write_unlock(&EXT4_I(inode)->i_es_lock); |
674 | 680 | ||
675 | ext4_es_lru_add(inode); | ||
676 | ext4_es_print_tree(inode); | 681 | ext4_es_print_tree(inode); |
677 | 682 | ||
678 | return err; | 683 | return err; |
@@ -734,7 +739,6 @@ out: | |||
734 | 739 | ||
735 | read_unlock(&EXT4_I(inode)->i_es_lock); | 740 | read_unlock(&EXT4_I(inode)->i_es_lock); |
736 | 741 | ||
737 | ext4_es_lru_add(inode); | ||
738 | trace_ext4_es_lookup_extent_exit(inode, es, found); | 742 | trace_ext4_es_lookup_extent_exit(inode, es, found); |
739 | return found; | 743 | return found; |
740 | } | 744 | } |
@@ -748,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
748 | struct extent_status orig_es; | 752 | struct extent_status orig_es; |
749 | ext4_lblk_t len1, len2; | 753 | ext4_lblk_t len1, len2; |
750 | ext4_fsblk_t block; | 754 | ext4_fsblk_t block; |
751 | int err = 0; | 755 | int err; |
752 | 756 | ||
757 | retry: | ||
758 | err = 0; | ||
753 | es = __es_tree_search(&tree->root, lblk); | 759 | es = __es_tree_search(&tree->root, lblk); |
754 | if (!es) | 760 | if (!es) |
755 | goto out; | 761 | goto out; |
@@ -784,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
784 | if (err) { | 790 | if (err) { |
785 | es->es_lblk = orig_es.es_lblk; | 791 | es->es_lblk = orig_es.es_lblk; |
786 | es->es_len = orig_es.es_len; | 792 | es->es_len = orig_es.es_len; |
793 | if ((err == -ENOMEM) && | ||
794 | __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | ||
795 | EXT4_I(inode))) | ||
796 | goto retry; | ||
787 | goto out; | 797 | goto out; |
788 | } | 798 | } |
789 | } else { | 799 | } else { |
@@ -878,38 +888,64 @@ int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
878 | EXTENT_STATUS_WRITTEN); | 888 | EXTENT_STATUS_WRITTEN); |
879 | } | 889 | } |
880 | 890 | ||
881 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 891 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, |
892 | struct list_head *b) | ||
893 | { | ||
894 | struct ext4_inode_info *eia, *eib; | ||
895 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
896 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
897 | |||
898 | if (eia->i_touch_when == eib->i_touch_when) | ||
899 | return 0; | ||
900 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
901 | return 1; | ||
902 | else | ||
903 | return -1; | ||
904 | } | ||
905 | |||
906 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
907 | struct ext4_inode_info *locked_ei) | ||
882 | { | 908 | { |
883 | struct ext4_sb_info *sbi = container_of(shrink, | ||
884 | struct ext4_sb_info, s_es_shrinker); | ||
885 | struct ext4_inode_info *ei; | 909 | struct ext4_inode_info *ei; |
886 | struct list_head *cur, *tmp, scanned; | 910 | struct list_head *cur, *tmp; |
887 | int nr_to_scan = sc->nr_to_scan; | 911 | LIST_HEAD(skiped); |
888 | int ret, nr_shrunk = 0; | 912 | int ret, nr_shrunk = 0; |
889 | 913 | ||
890 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 914 | spin_lock(&sbi->s_es_lru_lock); |
891 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | ||
892 | |||
893 | if (!nr_to_scan) | ||
894 | return ret; | ||
895 | 915 | ||
896 | INIT_LIST_HEAD(&scanned); | 916 | /* |
917 | * If the inode that is at the head of LRU list is newer than | ||
918 | * last_sorted time, that means that we need to sort this list. | ||
919 | */ | ||
920 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); | ||
921 | if (sbi->s_es_last_sorted < ei->i_touch_when) { | ||
922 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
923 | sbi->s_es_last_sorted = jiffies; | ||
924 | } | ||
897 | 925 | ||
898 | spin_lock(&sbi->s_es_lru_lock); | ||
899 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 926 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
900 | list_move_tail(cur, &scanned); | 927 | /* |
928 | * If we have already reclaimed all extents from extent | ||
929 | * status tree, just stop the loop immediately. | ||
930 | */ | ||
931 | if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) | ||
932 | break; | ||
901 | 933 | ||
902 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 934 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); |
903 | 935 | ||
904 | read_lock(&ei->i_es_lock); | 936 | /* Skip the inode that is newer than the last_sorted time */ |
905 | if (ei->i_es_lru_nr == 0) { | 937 | if (sbi->s_es_last_sorted < ei->i_touch_when) { |
906 | read_unlock(&ei->i_es_lock); | 938 | list_move_tail(cur, &skiped); |
907 | continue; | 939 | continue; |
908 | } | 940 | } |
909 | read_unlock(&ei->i_es_lock); | 941 | |
942 | if (ei->i_es_lru_nr == 0 || ei == locked_ei) | ||
943 | continue; | ||
910 | 944 | ||
911 | write_lock(&ei->i_es_lock); | 945 | write_lock(&ei->i_es_lock); |
912 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | 946 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); |
947 | if (ei->i_es_lru_nr == 0) | ||
948 | list_del_init(&ei->i_es_lru); | ||
913 | write_unlock(&ei->i_es_lock); | 949 | write_unlock(&ei->i_es_lock); |
914 | 950 | ||
915 | nr_shrunk += ret; | 951 | nr_shrunk += ret; |
@@ -917,29 +953,50 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
917 | if (nr_to_scan == 0) | 953 | if (nr_to_scan == 0) |
918 | break; | 954 | break; |
919 | } | 955 | } |
920 | list_splice_tail(&scanned, &sbi->s_es_lru); | 956 | |
957 | /* Move the newer inodes into the tail of the LRU list. */ | ||
958 | list_splice_tail(&skiped, &sbi->s_es_lru); | ||
921 | spin_unlock(&sbi->s_es_lru_lock); | 959 | spin_unlock(&sbi->s_es_lru_lock); |
922 | 960 | ||
961 | if (locked_ei && nr_shrunk == 0) | ||
962 | nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | ||
963 | |||
964 | return nr_shrunk; | ||
965 | } | ||
966 | |||
967 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | ||
968 | { | ||
969 | struct ext4_sb_info *sbi = container_of(shrink, | ||
970 | struct ext4_sb_info, s_es_shrinker); | ||
971 | int nr_to_scan = sc->nr_to_scan; | ||
972 | int ret, nr_shrunk; | ||
973 | |||
974 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | ||
975 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | ||
976 | |||
977 | if (!nr_to_scan) | ||
978 | return ret; | ||
979 | |||
980 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | ||
981 | |||
923 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 982 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); |
924 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 983 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); |
925 | return ret; | 984 | return ret; |
926 | } | 985 | } |
927 | 986 | ||
928 | void ext4_es_register_shrinker(struct super_block *sb) | 987 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) |
929 | { | 988 | { |
930 | struct ext4_sb_info *sbi; | ||
931 | |||
932 | sbi = EXT4_SB(sb); | ||
933 | INIT_LIST_HEAD(&sbi->s_es_lru); | 989 | INIT_LIST_HEAD(&sbi->s_es_lru); |
934 | spin_lock_init(&sbi->s_es_lru_lock); | 990 | spin_lock_init(&sbi->s_es_lru_lock); |
991 | sbi->s_es_last_sorted = 0; | ||
935 | sbi->s_es_shrinker.shrink = ext4_es_shrink; | 992 | sbi->s_es_shrinker.shrink = ext4_es_shrink; |
936 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 993 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
937 | register_shrinker(&sbi->s_es_shrinker); | 994 | register_shrinker(&sbi->s_es_shrinker); |
938 | } | 995 | } |
939 | 996 | ||
940 | void ext4_es_unregister_shrinker(struct super_block *sb) | 997 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) |
941 | { | 998 | { |
942 | unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker); | 999 | unregister_shrinker(&sbi->s_es_shrinker); |
943 | } | 1000 | } |
944 | 1001 | ||
945 | void ext4_es_lru_add(struct inode *inode) | 1002 | void ext4_es_lru_add(struct inode *inode) |
@@ -947,11 +1004,14 @@ void ext4_es_lru_add(struct inode *inode) | |||
947 | struct ext4_inode_info *ei = EXT4_I(inode); | 1004 | struct ext4_inode_info *ei = EXT4_I(inode); |
948 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1005 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
949 | 1006 | ||
1007 | ei->i_touch_when = jiffies; | ||
1008 | |||
1009 | if (!list_empty(&ei->i_es_lru)) | ||
1010 | return; | ||
1011 | |||
950 | spin_lock(&sbi->s_es_lru_lock); | 1012 | spin_lock(&sbi->s_es_lru_lock); |
951 | if (list_empty(&ei->i_es_lru)) | 1013 | if (list_empty(&ei->i_es_lru)) |
952 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 1014 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); |
953 | else | ||
954 | list_move_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
955 | spin_unlock(&sbi->s_es_lru_lock); | 1015 | spin_unlock(&sbi->s_es_lru_lock); |
956 | } | 1016 | } |
957 | 1017 | ||
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f740eb03b707..e936730cc5b0 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -39,6 +39,7 @@ | |||
39 | EXTENT_STATUS_DELAYED | \ | 39 | EXTENT_STATUS_DELAYED | \ |
40 | EXTENT_STATUS_HOLE) | 40 | EXTENT_STATUS_HOLE) |
41 | 41 | ||
42 | struct ext4_sb_info; | ||
42 | struct ext4_extent; | 43 | struct ext4_extent; |
43 | 44 | ||
44 | struct extent_status { | 45 | struct extent_status { |
@@ -119,8 +120,8 @@ static inline void ext4_es_store_status(struct extent_status *es, | |||
119 | es->es_pblk = block; | 120 | es->es_pblk = block; |
120 | } | 121 | } |
121 | 122 | ||
122 | extern void ext4_es_register_shrinker(struct super_block *sb); | 123 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
123 | extern void ext4_es_unregister_shrinker(struct super_block *sb); | 124 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
124 | extern void ext4_es_lru_add(struct inode *inode); | 125 | extern void ext4_es_lru_add(struct inode *inode); |
125 | extern void ext4_es_lru_del(struct inode *inode); | 126 | extern void ext4_es_lru_del(struct inode *inode); |
126 | 127 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b1b4d51b5d86..6f4cc567c382 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -312,7 +312,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, | |||
312 | blkbits = inode->i_sb->s_blocksize_bits; | 312 | blkbits = inode->i_sb->s_blocksize_bits; |
313 | startoff = *offset; | 313 | startoff = *offset; |
314 | lastoff = startoff; | 314 | lastoff = startoff; |
315 | endoff = (map->m_lblk + map->m_len) << blkbits; | 315 | endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; |
316 | 316 | ||
317 | index = startoff >> PAGE_CACHE_SHIFT; | 317 | index = startoff >> PAGE_CACHE_SHIFT; |
318 | end = endoff >> PAGE_CACHE_SHIFT; | 318 | end = endoff >> PAGE_CACHE_SHIFT; |
@@ -457,7 +457,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
457 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 457 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
458 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 458 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
459 | if (last != start) | 459 | if (last != start) |
460 | dataoff = last << blkbits; | 460 | dataoff = (loff_t)last << blkbits; |
461 | break; | 461 | break; |
462 | } | 462 | } |
463 | 463 | ||
@@ -468,7 +468,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
470 | if (last != start) | 470 | if (last != start) |
471 | dataoff = last << blkbits; | 471 | dataoff = (loff_t)last << blkbits; |
472 | break; | 472 | break; |
473 | } | 473 | } |
474 | 474 | ||
@@ -486,7 +486,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
486 | } | 486 | } |
487 | 487 | ||
488 | last++; | 488 | last++; |
489 | dataoff = last << blkbits; | 489 | dataoff = (loff_t)last << blkbits; |
490 | } while (last <= end); | 490 | } while (last <= end); |
491 | 491 | ||
492 | mutex_unlock(&inode->i_mutex); | 492 | mutex_unlock(&inode->i_mutex); |
@@ -494,17 +494,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
494 | if (dataoff > isize) | 494 | if (dataoff > isize) |
495 | return -ENXIO; | 495 | return -ENXIO; |
496 | 496 | ||
497 | if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 497 | return vfs_setpos(file, dataoff, maxsize); |
498 | return -EINVAL; | ||
499 | if (dataoff > maxsize) | ||
500 | return -EINVAL; | ||
501 | |||
502 | if (dataoff != file->f_pos) { | ||
503 | file->f_pos = dataoff; | ||
504 | file->f_version = 0; | ||
505 | } | ||
506 | |||
507 | return dataoff; | ||
508 | } | 498 | } |
509 | 499 | ||
510 | /* | 500 | /* |
@@ -540,7 +530,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
540 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 530 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
541 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 531 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
542 | last += ret; | 532 | last += ret; |
543 | holeoff = last << blkbits; | 533 | holeoff = (loff_t)last << blkbits; |
544 | continue; | 534 | continue; |
545 | } | 535 | } |
546 | 536 | ||
@@ -551,7 +541,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
551 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 541 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
552 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 542 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
553 | last = es.es_lblk + es.es_len; | 543 | last = es.es_lblk + es.es_len; |
554 | holeoff = last << blkbits; | 544 | holeoff = (loff_t)last << blkbits; |
555 | continue; | 545 | continue; |
556 | } | 546 | } |
557 | 547 | ||
@@ -566,7 +556,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
566 | &map, &holeoff); | 556 | &map, &holeoff); |
567 | if (!unwritten) { | 557 | if (!unwritten) { |
568 | last += ret; | 558 | last += ret; |
569 | holeoff = last << blkbits; | 559 | holeoff = (loff_t)last << blkbits; |
570 | continue; | 560 | continue; |
571 | } | 561 | } |
572 | } | 562 | } |
@@ -580,17 +570,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
580 | if (holeoff > isize) | 570 | if (holeoff > isize) |
581 | holeoff = isize; | 571 | holeoff = isize; |
582 | 572 | ||
583 | if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 573 | return vfs_setpos(file, holeoff, maxsize); |
584 | return -EINVAL; | ||
585 | if (holeoff > maxsize) | ||
586 | return -EINVAL; | ||
587 | |||
588 | if (holeoff != file->f_pos) { | ||
589 | file->f_pos = holeoff; | ||
590 | file->f_version = 0; | ||
591 | } | ||
592 | |||
593 | return holeoff; | ||
594 | } | 574 | } |
595 | 575 | ||
596 | /* | 576 | /* |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e0ba8a408def..a8bc47f75fa0 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -73,32 +73,6 @@ static int ext4_sync_parent(struct inode *inode) | |||
73 | return ret; | 73 | return ret; |
74 | } | 74 | } |
75 | 75 | ||
76 | /** | ||
77 | * __sync_file - generic_file_fsync without the locking and filemap_write | ||
78 | * @inode: inode to sync | ||
79 | * @datasync: only sync essential metadata if true | ||
80 | * | ||
81 | * This is just generic_file_fsync without the locking. This is needed for | ||
82 | * nojournal mode to make sure this inodes data/metadata makes it to disk | ||
83 | * properly. The i_mutex should be held already. | ||
84 | */ | ||
85 | static int __sync_inode(struct inode *inode, int datasync) | ||
86 | { | ||
87 | int err; | ||
88 | int ret; | ||
89 | |||
90 | ret = sync_mapping_buffers(inode->i_mapping); | ||
91 | if (!(inode->i_state & I_DIRTY)) | ||
92 | return ret; | ||
93 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
94 | return ret; | ||
95 | |||
96 | err = sync_inode_metadata(inode, 1); | ||
97 | if (ret == 0) | ||
98 | ret = err; | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | /* | 76 | /* |
103 | * akpm: A new design for ext4_sync_file(). | 77 | * akpm: A new design for ext4_sync_file(). |
104 | * | 78 | * |
@@ -116,7 +90,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
116 | struct inode *inode = file->f_mapping->host; | 90 | struct inode *inode = file->f_mapping->host; |
117 | struct ext4_inode_info *ei = EXT4_I(inode); | 91 | struct ext4_inode_info *ei = EXT4_I(inode); |
118 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 92 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
119 | int ret, err; | 93 | int ret = 0, err; |
120 | tid_t commit_tid; | 94 | tid_t commit_tid; |
121 | bool needs_barrier = false; | 95 | bool needs_barrier = false; |
122 | 96 | ||
@@ -124,25 +98,24 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
124 | 98 | ||
125 | trace_ext4_sync_file_enter(file, datasync); | 99 | trace_ext4_sync_file_enter(file, datasync); |
126 | 100 | ||
127 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 101 | if (inode->i_sb->s_flags & MS_RDONLY) { |
128 | if (ret) | 102 | /* Make sure that we read updated s_mount_flags value */ |
129 | return ret; | 103 | smp_rmb(); |
130 | mutex_lock(&inode->i_mutex); | 104 | if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) |
131 | 105 | ret = -EROFS; | |
132 | if (inode->i_sb->s_flags & MS_RDONLY) | ||
133 | goto out; | ||
134 | |||
135 | ret = ext4_flush_unwritten_io(inode); | ||
136 | if (ret < 0) | ||
137 | goto out; | 106 | goto out; |
107 | } | ||
138 | 108 | ||
139 | if (!journal) { | 109 | if (!journal) { |
140 | ret = __sync_inode(inode, datasync); | 110 | ret = generic_file_fsync(file, start, end, datasync); |
141 | if (!ret && !hlist_empty(&inode->i_dentry)) | 111 | if (!ret && !hlist_empty(&inode->i_dentry)) |
142 | ret = ext4_sync_parent(inode); | 112 | ret = ext4_sync_parent(inode); |
143 | goto out; | 113 | goto out; |
144 | } | 114 | } |
145 | 115 | ||
116 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
117 | if (ret) | ||
118 | return ret; | ||
146 | /* | 119 | /* |
147 | * data=writeback,ordered: | 120 | * data=writeback,ordered: |
148 | * The caller's filemap_fdatawrite()/wait will sync the data. | 121 | * The caller's filemap_fdatawrite()/wait will sync the data. |
@@ -172,8 +145,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
172 | if (!ret) | 145 | if (!ret) |
173 | ret = err; | 146 | ret = err; |
174 | } | 147 | } |
175 | out: | 148 | out: |
176 | mutex_unlock(&inode->i_mutex); | ||
177 | trace_ext4_sync_file_exit(inode, ret); | 149 | trace_ext4_sync_file_exit(inode, ret); |
178 | return ret; | 150 | return ret; |
179 | } | 151 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 00a818d67b54..8bf5999875ee 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -734,11 +734,8 @@ repeat_in_this_group: | |||
734 | ino = ext4_find_next_zero_bit((unsigned long *) | 734 | ino = ext4_find_next_zero_bit((unsigned long *) |
735 | inode_bitmap_bh->b_data, | 735 | inode_bitmap_bh->b_data, |
736 | EXT4_INODES_PER_GROUP(sb), ino); | 736 | EXT4_INODES_PER_GROUP(sb), ino); |
737 | if (ino >= EXT4_INODES_PER_GROUP(sb)) { | 737 | if (ino >= EXT4_INODES_PER_GROUP(sb)) |
738 | if (++group == ngroups) | 738 | goto next_group; |
739 | group = 0; | ||
740 | continue; | ||
741 | } | ||
742 | if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { | 739 | if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { |
743 | ext4_error(sb, "reserved inode found cleared - " | 740 | ext4_error(sb, "reserved inode found cleared - " |
744 | "inode=%lu", ino + 1); | 741 | "inode=%lu", ino + 1); |
@@ -747,7 +744,8 @@ repeat_in_this_group: | |||
747 | if (!handle) { | 744 | if (!handle) { |
748 | BUG_ON(nblocks <= 0); | 745 | BUG_ON(nblocks <= 0); |
749 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, | 746 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, |
750 | handle_type, nblocks); | 747 | handle_type, nblocks, |
748 | 0); | ||
751 | if (IS_ERR(handle)) { | 749 | if (IS_ERR(handle)) { |
752 | err = PTR_ERR(handle); | 750 | err = PTR_ERR(handle); |
753 | ext4_std_error(sb, err); | 751 | ext4_std_error(sb, err); |
@@ -768,6 +766,9 @@ repeat_in_this_group: | |||
768 | goto got; /* we grabbed the inode! */ | 766 | goto got; /* we grabbed the inode! */ |
769 | if (ino < EXT4_INODES_PER_GROUP(sb)) | 767 | if (ino < EXT4_INODES_PER_GROUP(sb)) |
770 | goto repeat_in_this_group; | 768 | goto repeat_in_this_group; |
769 | next_group: | ||
770 | if (++group == ngroups) | ||
771 | group = 0; | ||
771 | } | 772 | } |
772 | err = -ENOSPC; | 773 | err = -ENOSPC; |
773 | goto out; | 774 | goto out; |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b8d5d351e24f..87b30cd357e7 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -624,7 +624,7 @@ cleanup: | |||
624 | partial--; | 624 | partial--; |
625 | } | 625 | } |
626 | out: | 626 | out: |
627 | trace_ext4_ind_map_blocks_exit(inode, map, err); | 627 | trace_ext4_ind_map_blocks_exit(inode, flags, map, err); |
628 | return err; | 628 | return err; |
629 | } | 629 | } |
630 | 630 | ||
@@ -675,11 +675,6 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
675 | 675 | ||
676 | retry: | 676 | retry: |
677 | if (rw == READ && ext4_should_dioread_nolock(inode)) { | 677 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
678 | if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) { | ||
679 | mutex_lock(&inode->i_mutex); | ||
680 | ext4_flush_unwritten_io(inode); | ||
681 | mutex_unlock(&inode->i_mutex); | ||
682 | } | ||
683 | /* | 678 | /* |
684 | * Nolock dioread optimization may be dynamically disabled | 679 | * Nolock dioread optimization may be dynamically disabled |
685 | * via ext4_inode_block_unlocked_dio(). Check inode's state | 680 | * via ext4_inode_block_unlocked_dio(). Check inode's state |
@@ -779,27 +774,18 @@ int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) | |||
779 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | 774 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; |
780 | } | 775 | } |
781 | 776 | ||
782 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 777 | /* |
778 | * Calculate number of indirect blocks touched by mapping @nrblocks logically | ||
779 | * contiguous blocks | ||
780 | */ | ||
781 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks) | ||
783 | { | 782 | { |
784 | int indirects; | ||
785 | |||
786 | /* if nrblocks are contiguous */ | ||
787 | if (chunk) { | ||
788 | /* | ||
789 | * With N contiguous data blocks, we need at most | ||
790 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | ||
791 | * 2 dindirect blocks, and 1 tindirect block | ||
792 | */ | ||
793 | return DIV_ROUND_UP(nrblocks, | ||
794 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | ||
795 | } | ||
796 | /* | 783 | /* |
797 | * if nrblocks are not contiguous, worse case, each block touch | 784 | * With N contiguous data blocks, we need at most |
798 | * a indirect block, and each indirect block touch a double indirect | 785 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
799 | * block, plus a triple indirect block | 786 | * 2 dindirect blocks, and 1 tindirect block |
800 | */ | 787 | */ |
801 | indirects = nrblocks * 2 + 1; | 788 | return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
802 | return indirects; | ||
803 | } | 789 | } |
804 | 790 | ||
805 | /* | 791 | /* |
@@ -940,11 +926,13 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
940 | __le32 *last) | 926 | __le32 *last) |
941 | { | 927 | { |
942 | __le32 *p; | 928 | __le32 *p; |
943 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | 929 | int flags = EXT4_FREE_BLOCKS_VALIDATED; |
944 | int err; | 930 | int err; |
945 | 931 | ||
946 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 932 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
947 | flags |= EXT4_FREE_BLOCKS_METADATA; | 933 | flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; |
934 | else if (ext4_should_journal_data(inode)) | ||
935 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
948 | 936 | ||
949 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 937 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
950 | count)) { | 938 | count)) { |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3e2bf873e8a8..d9ecbf1113a7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -72,7 +72,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode, | |||
72 | entry = (struct ext4_xattr_entry *) | 72 | entry = (struct ext4_xattr_entry *) |
73 | ((void *)raw_inode + EXT4_I(inode)->i_inline_off); | 73 | ((void *)raw_inode + EXT4_I(inode)->i_inline_off); |
74 | 74 | ||
75 | free += le32_to_cpu(entry->e_value_size); | 75 | free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); |
76 | goto out; | 76 | goto out; |
77 | } | 77 | } |
78 | 78 | ||
@@ -1404,16 +1404,15 @@ out: | |||
1404 | * offset as if '.' and '..' really take place. | 1404 | * offset as if '.' and '..' really take place. |
1405 | * | 1405 | * |
1406 | */ | 1406 | */ |
1407 | int ext4_read_inline_dir(struct file *filp, | 1407 | int ext4_read_inline_dir(struct file *file, |
1408 | void *dirent, filldir_t filldir, | 1408 | struct dir_context *ctx, |
1409 | int *has_inline_data) | 1409 | int *has_inline_data) |
1410 | { | 1410 | { |
1411 | int error = 0; | ||
1412 | unsigned int offset, parent_ino; | 1411 | unsigned int offset, parent_ino; |
1413 | int i, stored; | 1412 | int i; |
1414 | struct ext4_dir_entry_2 *de; | 1413 | struct ext4_dir_entry_2 *de; |
1415 | struct super_block *sb; | 1414 | struct super_block *sb; |
1416 | struct inode *inode = file_inode(filp); | 1415 | struct inode *inode = file_inode(file); |
1417 | int ret, inline_size = 0; | 1416 | int ret, inline_size = 0; |
1418 | struct ext4_iloc iloc; | 1417 | struct ext4_iloc iloc; |
1419 | void *dir_buf = NULL; | 1418 | void *dir_buf = NULL; |
@@ -1444,9 +1443,8 @@ int ext4_read_inline_dir(struct file *filp, | |||
1444 | goto out; | 1443 | goto out; |
1445 | 1444 | ||
1446 | sb = inode->i_sb; | 1445 | sb = inode->i_sb; |
1447 | stored = 0; | ||
1448 | parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); | 1446 | parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); |
1449 | offset = filp->f_pos; | 1447 | offset = ctx->pos; |
1450 | 1448 | ||
1451 | /* | 1449 | /* |
1452 | * dotdot_offset and dotdot_size is the real offset and | 1450 | * dotdot_offset and dotdot_size is the real offset and |
@@ -1460,104 +1458,74 @@ int ext4_read_inline_dir(struct file *filp, | |||
1460 | extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; | 1458 | extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; |
1461 | extra_size = extra_offset + inline_size; | 1459 | extra_size = extra_offset + inline_size; |
1462 | 1460 | ||
1463 | while (!error && !stored && filp->f_pos < extra_size) { | 1461 | /* |
1464 | revalidate: | 1462 | * If the version has changed since the last call to |
1465 | /* | 1463 | * readdir(2), then we might be pointing to an invalid |
1466 | * If the version has changed since the last call to | 1464 | * dirent right now. Scan from the start of the inline |
1467 | * readdir(2), then we might be pointing to an invalid | 1465 | * dir to make sure. |
1468 | * dirent right now. Scan from the start of the inline | 1466 | */ |
1469 | * dir to make sure. | 1467 | if (file->f_version != inode->i_version) { |
1470 | */ | 1468 | for (i = 0; i < extra_size && i < offset;) { |
1471 | if (filp->f_version != inode->i_version) { | 1469 | /* |
1472 | for (i = 0; i < extra_size && i < offset;) { | 1470 | * "." is with offset 0 and |
1473 | /* | 1471 | * ".." is dotdot_offset. |
1474 | * "." is with offset 0 and | 1472 | */ |
1475 | * ".." is dotdot_offset. | 1473 | if (!i) { |
1476 | */ | 1474 | i = dotdot_offset; |
1477 | if (!i) { | 1475 | continue; |
1478 | i = dotdot_offset; | 1476 | } else if (i == dotdot_offset) { |
1479 | continue; | 1477 | i = dotdot_size; |
1480 | } else if (i == dotdot_offset) { | ||
1481 | i = dotdot_size; | ||
1482 | continue; | ||
1483 | } | ||
1484 | /* for other entry, the real offset in | ||
1485 | * the buf has to be tuned accordingly. | ||
1486 | */ | ||
1487 | de = (struct ext4_dir_entry_2 *) | ||
1488 | (dir_buf + i - extra_offset); | ||
1489 | /* It's too expensive to do a full | ||
1490 | * dirent test each time round this | ||
1491 | * loop, but we do have to test at | ||
1492 | * least that it is non-zero. A | ||
1493 | * failure will be detected in the | ||
1494 | * dirent test below. */ | ||
1495 | if (ext4_rec_len_from_disk(de->rec_len, | ||
1496 | extra_size) < EXT4_DIR_REC_LEN(1)) | ||
1497 | break; | ||
1498 | i += ext4_rec_len_from_disk(de->rec_len, | ||
1499 | extra_size); | ||
1500 | } | ||
1501 | offset = i; | ||
1502 | filp->f_pos = offset; | ||
1503 | filp->f_version = inode->i_version; | ||
1504 | } | ||
1505 | |||
1506 | while (!error && filp->f_pos < extra_size) { | ||
1507 | if (filp->f_pos == 0) { | ||
1508 | error = filldir(dirent, ".", 1, 0, inode->i_ino, | ||
1509 | DT_DIR); | ||
1510 | if (error) | ||
1511 | break; | ||
1512 | stored++; | ||
1513 | filp->f_pos = dotdot_offset; | ||
1514 | continue; | 1478 | continue; |
1515 | } | 1479 | } |
1480 | /* for other entry, the real offset in | ||
1481 | * the buf has to be tuned accordingly. | ||
1482 | */ | ||
1483 | de = (struct ext4_dir_entry_2 *) | ||
1484 | (dir_buf + i - extra_offset); | ||
1485 | /* It's too expensive to do a full | ||
1486 | * dirent test each time round this | ||
1487 | * loop, but we do have to test at | ||
1488 | * least that it is non-zero. A | ||
1489 | * failure will be detected in the | ||
1490 | * dirent test below. */ | ||
1491 | if (ext4_rec_len_from_disk(de->rec_len, extra_size) | ||
1492 | < EXT4_DIR_REC_LEN(1)) | ||
1493 | break; | ||
1494 | i += ext4_rec_len_from_disk(de->rec_len, | ||
1495 | extra_size); | ||
1496 | } | ||
1497 | offset = i; | ||
1498 | ctx->pos = offset; | ||
1499 | file->f_version = inode->i_version; | ||
1500 | } | ||
1516 | 1501 | ||
1517 | if (filp->f_pos == dotdot_offset) { | 1502 | while (ctx->pos < extra_size) { |
1518 | error = filldir(dirent, "..", 2, | 1503 | if (ctx->pos == 0) { |
1519 | dotdot_offset, | 1504 | if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) |
1520 | parent_ino, DT_DIR); | 1505 | goto out; |
1521 | if (error) | 1506 | ctx->pos = dotdot_offset; |
1522 | break; | 1507 | continue; |
1523 | stored++; | 1508 | } |
1524 | 1509 | ||
1525 | filp->f_pos = dotdot_size; | 1510 | if (ctx->pos == dotdot_offset) { |
1526 | continue; | 1511 | if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) |
1527 | } | 1512 | goto out; |
1513 | ctx->pos = dotdot_size; | ||
1514 | continue; | ||
1515 | } | ||
1528 | 1516 | ||
1529 | de = (struct ext4_dir_entry_2 *) | 1517 | de = (struct ext4_dir_entry_2 *) |
1530 | (dir_buf + filp->f_pos - extra_offset); | 1518 | (dir_buf + ctx->pos - extra_offset); |
1531 | if (ext4_check_dir_entry(inode, filp, de, | 1519 | if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, |
1532 | iloc.bh, dir_buf, | 1520 | extra_size, ctx->pos)) |
1533 | extra_size, filp->f_pos)) { | 1521 | goto out; |
1534 | ret = stored; | 1522 | if (le32_to_cpu(de->inode)) { |
1523 | if (!dir_emit(ctx, de->name, de->name_len, | ||
1524 | le32_to_cpu(de->inode), | ||
1525 | get_dtype(sb, de->file_type))) | ||
1535 | goto out; | 1526 | goto out; |
1536 | } | ||
1537 | if (le32_to_cpu(de->inode)) { | ||
1538 | /* We might block in the next section | ||
1539 | * if the data destination is | ||
1540 | * currently swapped out. So, use a | ||
1541 | * version stamp to detect whether or | ||
1542 | * not the directory has been modified | ||
1543 | * during the copy operation. | ||
1544 | */ | ||
1545 | u64 version = filp->f_version; | ||
1546 | |||
1547 | error = filldir(dirent, de->name, | ||
1548 | de->name_len, | ||
1549 | filp->f_pos, | ||
1550 | le32_to_cpu(de->inode), | ||
1551 | get_dtype(sb, de->file_type)); | ||
1552 | if (error) | ||
1553 | break; | ||
1554 | if (version != filp->f_version) | ||
1555 | goto revalidate; | ||
1556 | stored++; | ||
1557 | } | ||
1558 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len, | ||
1559 | extra_size); | ||
1560 | } | 1527 | } |
1528 | ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); | ||
1561 | } | 1529 | } |
1562 | out: | 1530 | out: |
1563 | kfree(dir_buf); | 1531 | kfree(dir_buf); |
@@ -1842,7 +1810,7 @@ int ext4_inline_data_fiemap(struct inode *inode, | |||
1842 | if (error) | 1810 | if (error) |
1843 | goto out; | 1811 | goto out; |
1844 | 1812 | ||
1845 | physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; | 1813 | physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; |
1846 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; | 1814 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; |
1847 | physical += offsetof(struct ext4_inode, i_block); | 1815 | physical += offsetof(struct ext4_inode, i_block); |
1848 | length = i_size_read(inode); | 1816 | length = i_size_read(inode); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6382b89ecbd..dd32a2eacd0d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -132,12 +132,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
132 | new_size); | 132 | new_size); |
133 | } | 133 | } |
134 | 134 | ||
135 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 135 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
136 | unsigned int length); | ||
136 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 137 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
137 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 138 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
138 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 139 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
139 | struct inode *inode, struct page *page, loff_t from, | 140 | int pextents); |
140 | loff_t length, int flags); | ||
141 | 141 | ||
142 | /* | 142 | /* |
143 | * Test whether an inode is a fast symlink. | 143 | * Test whether an inode is a fast symlink. |
@@ -215,7 +215,8 @@ void ext4_evict_inode(struct inode *inode) | |||
215 | filemap_write_and_wait(&inode->i_data); | 215 | filemap_write_and_wait(&inode->i_data); |
216 | } | 216 | } |
217 | truncate_inode_pages(&inode->i_data, 0); | 217 | truncate_inode_pages(&inode->i_data, 0); |
218 | ext4_ioend_shutdown(inode); | 218 | |
219 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
219 | goto no_delete; | 220 | goto no_delete; |
220 | } | 221 | } |
221 | 222 | ||
@@ -225,8 +226,8 @@ void ext4_evict_inode(struct inode *inode) | |||
225 | if (ext4_should_order_data(inode)) | 226 | if (ext4_should_order_data(inode)) |
226 | ext4_begin_ordered_truncate(inode, 0); | 227 | ext4_begin_ordered_truncate(inode, 0); |
227 | truncate_inode_pages(&inode->i_data, 0); | 228 | truncate_inode_pages(&inode->i_data, 0); |
228 | ext4_ioend_shutdown(inode); | ||
229 | 229 | ||
230 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
230 | if (is_bad_inode(inode)) | 231 | if (is_bad_inode(inode)) |
231 | goto no_delete; | 232 | goto no_delete; |
232 | 233 | ||
@@ -423,66 +424,6 @@ static int __check_block_validity(struct inode *inode, const char *func, | |||
423 | #define check_block_validity(inode, map) \ | 424 | #define check_block_validity(inode, map) \ |
424 | __check_block_validity((inode), __func__, __LINE__, (map)) | 425 | __check_block_validity((inode), __func__, __LINE__, (map)) |
425 | 426 | ||
426 | /* | ||
427 | * Return the number of contiguous dirty pages in a given inode | ||
428 | * starting at page frame idx. | ||
429 | */ | ||
430 | static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | ||
431 | unsigned int max_pages) | ||
432 | { | ||
433 | struct address_space *mapping = inode->i_mapping; | ||
434 | pgoff_t index; | ||
435 | struct pagevec pvec; | ||
436 | pgoff_t num = 0; | ||
437 | int i, nr_pages, done = 0; | ||
438 | |||
439 | if (max_pages == 0) | ||
440 | return 0; | ||
441 | pagevec_init(&pvec, 0); | ||
442 | while (!done) { | ||
443 | index = idx; | ||
444 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
445 | PAGECACHE_TAG_DIRTY, | ||
446 | (pgoff_t)PAGEVEC_SIZE); | ||
447 | if (nr_pages == 0) | ||
448 | break; | ||
449 | for (i = 0; i < nr_pages; i++) { | ||
450 | struct page *page = pvec.pages[i]; | ||
451 | struct buffer_head *bh, *head; | ||
452 | |||
453 | lock_page(page); | ||
454 | if (unlikely(page->mapping != mapping) || | ||
455 | !PageDirty(page) || | ||
456 | PageWriteback(page) || | ||
457 | page->index != idx) { | ||
458 | done = 1; | ||
459 | unlock_page(page); | ||
460 | break; | ||
461 | } | ||
462 | if (page_has_buffers(page)) { | ||
463 | bh = head = page_buffers(page); | ||
464 | do { | ||
465 | if (!buffer_delay(bh) && | ||
466 | !buffer_unwritten(bh)) | ||
467 | done = 1; | ||
468 | bh = bh->b_this_page; | ||
469 | } while (!done && (bh != head)); | ||
470 | } | ||
471 | unlock_page(page); | ||
472 | if (done) | ||
473 | break; | ||
474 | idx++; | ||
475 | num++; | ||
476 | if (num >= max_pages) { | ||
477 | done = 1; | ||
478 | break; | ||
479 | } | ||
480 | } | ||
481 | pagevec_release(&pvec); | ||
482 | } | ||
483 | return num; | ||
484 | } | ||
485 | |||
486 | #ifdef ES_AGGRESSIVE_TEST | 427 | #ifdef ES_AGGRESSIVE_TEST |
487 | static void ext4_map_blocks_es_recheck(handle_t *handle, | 428 | static void ext4_map_blocks_es_recheck(handle_t *handle, |
488 | struct inode *inode, | 429 | struct inode *inode, |
@@ -524,7 +465,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, | |||
524 | if (es_map->m_lblk != map->m_lblk || | 465 | if (es_map->m_lblk != map->m_lblk || |
525 | es_map->m_flags != map->m_flags || | 466 | es_map->m_flags != map->m_flags || |
526 | es_map->m_pblk != map->m_pblk) { | 467 | es_map->m_pblk != map->m_pblk) { |
527 | printk("ES cache assertation failed for inode: %lu " | 468 | printk("ES cache assertion failed for inode: %lu " |
528 | "es_cached ex [%d/%d/%llu/%x] != " | 469 | "es_cached ex [%d/%d/%llu/%x] != " |
529 | "found ex [%d/%d/%llu/%x] retval %d flags %x\n", | 470 | "found ex [%d/%d/%llu/%x] retval %d flags %x\n", |
530 | inode->i_ino, es_map->m_lblk, es_map->m_len, | 471 | inode->i_ino, es_map->m_lblk, es_map->m_len, |
@@ -575,6 +516,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
575 | 516 | ||
576 | /* Lookup extent status tree firstly */ | 517 | /* Lookup extent status tree firstly */ |
577 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 518 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
519 | ext4_es_lru_add(inode); | ||
578 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 520 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
579 | map->m_pblk = ext4_es_pblock(&es) + | 521 | map->m_pblk = ext4_es_pblock(&es) + |
580 | map->m_lblk - es.es_lblk; | 522 | map->m_lblk - es.es_lblk; |
@@ -613,14 +555,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
613 | int ret; | 555 | int ret; |
614 | unsigned long long status; | 556 | unsigned long long status; |
615 | 557 | ||
616 | #ifdef ES_AGGRESSIVE_TEST | 558 | if (unlikely(retval != map->m_len)) { |
617 | if (retval != map->m_len) { | 559 | ext4_warning(inode->i_sb, |
618 | printk("ES len assertation failed for inode: %lu " | 560 | "ES len assertion failed for inode " |
619 | "retval %d != map->m_len %d " | 561 | "%lu: retval %d != map->m_len %d", |
620 | "in %s (lookup)\n", inode->i_ino, retval, | 562 | inode->i_ino, retval, map->m_len); |
621 | map->m_len, __func__); | 563 | WARN_ON(1); |
622 | } | 564 | } |
623 | #endif | ||
624 | 565 | ||
625 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 566 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
626 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 567 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
@@ -714,14 +655,13 @@ found: | |||
714 | int ret; | 655 | int ret; |
715 | unsigned long long status; | 656 | unsigned long long status; |
716 | 657 | ||
717 | #ifdef ES_AGGRESSIVE_TEST | 658 | if (unlikely(retval != map->m_len)) { |
718 | if (retval != map->m_len) { | 659 | ext4_warning(inode->i_sb, |
719 | printk("ES len assertation failed for inode: %lu " | 660 | "ES len assertion failed for inode " |
720 | "retval %d != map->m_len %d " | 661 | "%lu: retval %d != map->m_len %d", |
721 | "in %s (allocation)\n", inode->i_ino, retval, | 662 | inode->i_ino, retval, map->m_len); |
722 | map->m_len, __func__); | 663 | WARN_ON(1); |
723 | } | 664 | } |
724 | #endif | ||
725 | 665 | ||
726 | /* | 666 | /* |
727 | * If the extent has been zeroed out, we don't need to update | 667 | * If the extent has been zeroed out, we don't need to update |
@@ -1118,10 +1058,13 @@ static int ext4_write_end(struct file *file, | |||
1118 | } | 1058 | } |
1119 | } | 1059 | } |
1120 | 1060 | ||
1121 | if (ext4_has_inline_data(inode)) | 1061 | if (ext4_has_inline_data(inode)) { |
1122 | copied = ext4_write_inline_data_end(inode, pos, len, | 1062 | ret = ext4_write_inline_data_end(inode, pos, len, |
1123 | copied, page); | 1063 | copied, page); |
1124 | else | 1064 | if (ret < 0) |
1065 | goto errout; | ||
1066 | copied = ret; | ||
1067 | } else | ||
1125 | copied = block_write_end(file, mapping, pos, | 1068 | copied = block_write_end(file, mapping, pos, |
1126 | len, copied, page, fsdata); | 1069 | len, copied, page, fsdata); |
1127 | 1070 | ||
@@ -1157,8 +1100,6 @@ static int ext4_write_end(struct file *file, | |||
1157 | if (i_size_changed) | 1100 | if (i_size_changed) |
1158 | ext4_mark_inode_dirty(handle, inode); | 1101 | ext4_mark_inode_dirty(handle, inode); |
1159 | 1102 | ||
1160 | if (copied < 0) | ||
1161 | ret = copied; | ||
1162 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1103 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1163 | /* if we have allocated more blocks and copied | 1104 | /* if we have allocated more blocks and copied |
1164 | * less. We will have blocks allocated outside | 1105 | * less. We will have blocks allocated outside |
@@ -1415,21 +1356,28 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1415 | } | 1356 | } |
1416 | 1357 | ||
1417 | static void ext4_da_page_release_reservation(struct page *page, | 1358 | static void ext4_da_page_release_reservation(struct page *page, |
1418 | unsigned long offset) | 1359 | unsigned int offset, |
1360 | unsigned int length) | ||
1419 | { | 1361 | { |
1420 | int to_release = 0; | 1362 | int to_release = 0; |
1421 | struct buffer_head *head, *bh; | 1363 | struct buffer_head *head, *bh; |
1422 | unsigned int curr_off = 0; | 1364 | unsigned int curr_off = 0; |
1423 | struct inode *inode = page->mapping->host; | 1365 | struct inode *inode = page->mapping->host; |
1424 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1366 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1367 | unsigned int stop = offset + length; | ||
1425 | int num_clusters; | 1368 | int num_clusters; |
1426 | ext4_fsblk_t lblk; | 1369 | ext4_fsblk_t lblk; |
1427 | 1370 | ||
1371 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
1372 | |||
1428 | head = page_buffers(page); | 1373 | head = page_buffers(page); |
1429 | bh = head; | 1374 | bh = head; |
1430 | do { | 1375 | do { |
1431 | unsigned int next_off = curr_off + bh->b_size; | 1376 | unsigned int next_off = curr_off + bh->b_size; |
1432 | 1377 | ||
1378 | if (next_off > stop) | ||
1379 | break; | ||
1380 | |||
1433 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1381 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
1434 | to_release++; | 1382 | to_release++; |
1435 | clear_buffer_delay(bh); | 1383 | clear_buffer_delay(bh); |
@@ -1460,140 +1408,43 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1460 | * Delayed allocation stuff | 1408 | * Delayed allocation stuff |
1461 | */ | 1409 | */ |
1462 | 1410 | ||
1463 | /* | 1411 | struct mpage_da_data { |
1464 | * mpage_da_submit_io - walks through extent of pages and try to write | 1412 | struct inode *inode; |
1465 | * them with writepage() call back | 1413 | struct writeback_control *wbc; |
1466 | * | ||
1467 | * @mpd->inode: inode | ||
1468 | * @mpd->first_page: first page of the extent | ||
1469 | * @mpd->next_page: page after the last page of the extent | ||
1470 | * | ||
1471 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
1472 | * to be allocated. this may be wrong if allocation failed. | ||
1473 | * | ||
1474 | * As pages are already locked by write_cache_pages(), we can't use it | ||
1475 | */ | ||
1476 | static int mpage_da_submit_io(struct mpage_da_data *mpd, | ||
1477 | struct ext4_map_blocks *map) | ||
1478 | { | ||
1479 | struct pagevec pvec; | ||
1480 | unsigned long index, end; | ||
1481 | int ret = 0, err, nr_pages, i; | ||
1482 | struct inode *inode = mpd->inode; | ||
1483 | struct address_space *mapping = inode->i_mapping; | ||
1484 | loff_t size = i_size_read(inode); | ||
1485 | unsigned int len, block_start; | ||
1486 | struct buffer_head *bh, *page_bufs = NULL; | ||
1487 | sector_t pblock = 0, cur_logical = 0; | ||
1488 | struct ext4_io_submit io_submit; | ||
1489 | 1414 | ||
1490 | BUG_ON(mpd->next_page <= mpd->first_page); | 1415 | pgoff_t first_page; /* The first page to write */ |
1491 | memset(&io_submit, 0, sizeof(io_submit)); | 1416 | pgoff_t next_page; /* Current page to examine */ |
1417 | pgoff_t last_page; /* Last page to examine */ | ||
1492 | /* | 1418 | /* |
1493 | * We need to start from the first_page to the next_page - 1 | 1419 | * Extent to map - this can be after first_page because that can be |
1494 | * to make sure we also write the mapped dirty buffer_heads. | 1420 | * fully mapped. We somewhat abuse m_flags to store whether the extent |
1495 | * If we look at mpd->b_blocknr we would only be looking | 1421 | * is delalloc or unwritten. |
1496 | * at the currently mapped buffer_heads. | ||
1497 | */ | 1422 | */ |
1498 | index = mpd->first_page; | 1423 | struct ext4_map_blocks map; |
1499 | end = mpd->next_page - 1; | 1424 | struct ext4_io_submit io_submit; /* IO submission data */ |
1500 | 1425 | }; | |
1501 | pagevec_init(&pvec, 0); | ||
1502 | while (index <= end) { | ||
1503 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1504 | if (nr_pages == 0) | ||
1505 | break; | ||
1506 | for (i = 0; i < nr_pages; i++) { | ||
1507 | int skip_page = 0; | ||
1508 | struct page *page = pvec.pages[i]; | ||
1509 | |||
1510 | index = page->index; | ||
1511 | if (index > end) | ||
1512 | break; | ||
1513 | |||
1514 | if (index == size >> PAGE_CACHE_SHIFT) | ||
1515 | len = size & ~PAGE_CACHE_MASK; | ||
1516 | else | ||
1517 | len = PAGE_CACHE_SIZE; | ||
1518 | if (map) { | ||
1519 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
1520 | inode->i_blkbits); | ||
1521 | pblock = map->m_pblk + (cur_logical - | ||
1522 | map->m_lblk); | ||
1523 | } | ||
1524 | index++; | ||
1525 | |||
1526 | BUG_ON(!PageLocked(page)); | ||
1527 | BUG_ON(PageWriteback(page)); | ||
1528 | |||
1529 | bh = page_bufs = page_buffers(page); | ||
1530 | block_start = 0; | ||
1531 | do { | ||
1532 | if (map && (cur_logical >= map->m_lblk) && | ||
1533 | (cur_logical <= (map->m_lblk + | ||
1534 | (map->m_len - 1)))) { | ||
1535 | if (buffer_delay(bh)) { | ||
1536 | clear_buffer_delay(bh); | ||
1537 | bh->b_blocknr = pblock; | ||
1538 | } | ||
1539 | if (buffer_unwritten(bh) || | ||
1540 | buffer_mapped(bh)) | ||
1541 | BUG_ON(bh->b_blocknr != pblock); | ||
1542 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
1543 | set_buffer_uninit(bh); | ||
1544 | clear_buffer_unwritten(bh); | ||
1545 | } | ||
1546 | |||
1547 | /* | ||
1548 | * skip page if block allocation undone and | ||
1549 | * block is dirty | ||
1550 | */ | ||
1551 | if (ext4_bh_delay_or_unwritten(NULL, bh)) | ||
1552 | skip_page = 1; | ||
1553 | bh = bh->b_this_page; | ||
1554 | block_start += bh->b_size; | ||
1555 | cur_logical++; | ||
1556 | pblock++; | ||
1557 | } while (bh != page_bufs); | ||
1558 | |||
1559 | if (skip_page) { | ||
1560 | unlock_page(page); | ||
1561 | continue; | ||
1562 | } | ||
1563 | |||
1564 | clear_page_dirty_for_io(page); | ||
1565 | err = ext4_bio_write_page(&io_submit, page, len, | ||
1566 | mpd->wbc); | ||
1567 | if (!err) | ||
1568 | mpd->pages_written++; | ||
1569 | /* | ||
1570 | * In error case, we have to continue because | ||
1571 | * remaining pages are still locked | ||
1572 | */ | ||
1573 | if (ret == 0) | ||
1574 | ret = err; | ||
1575 | } | ||
1576 | pagevec_release(&pvec); | ||
1577 | } | ||
1578 | ext4_io_submit(&io_submit); | ||
1579 | return ret; | ||
1580 | } | ||
1581 | 1426 | ||
1582 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | 1427 | static void mpage_release_unused_pages(struct mpage_da_data *mpd, |
1428 | bool invalidate) | ||
1583 | { | 1429 | { |
1584 | int nr_pages, i; | 1430 | int nr_pages, i; |
1585 | pgoff_t index, end; | 1431 | pgoff_t index, end; |
1586 | struct pagevec pvec; | 1432 | struct pagevec pvec; |
1587 | struct inode *inode = mpd->inode; | 1433 | struct inode *inode = mpd->inode; |
1588 | struct address_space *mapping = inode->i_mapping; | 1434 | struct address_space *mapping = inode->i_mapping; |
1589 | ext4_lblk_t start, last; | 1435 | |
1436 | /* This is necessary when next_page == 0. */ | ||
1437 | if (mpd->first_page >= mpd->next_page) | ||
1438 | return; | ||
1590 | 1439 | ||
1591 | index = mpd->first_page; | 1440 | index = mpd->first_page; |
1592 | end = mpd->next_page - 1; | 1441 | end = mpd->next_page - 1; |
1593 | 1442 | if (invalidate) { | |
1594 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1443 | ext4_lblk_t start, last; |
1595 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1444 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1596 | ext4_es_remove_extent(inode, start, last - start + 1); | 1445 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1446 | ext4_es_remove_extent(inode, start, last - start + 1); | ||
1447 | } | ||
1597 | 1448 | ||
1598 | pagevec_init(&pvec, 0); | 1449 | pagevec_init(&pvec, 0); |
1599 | while (index <= end) { | 1450 | while (index <= end) { |
@@ -1606,14 +1457,15 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |||
1606 | break; | 1457 | break; |
1607 | BUG_ON(!PageLocked(page)); | 1458 | BUG_ON(!PageLocked(page)); |
1608 | BUG_ON(PageWriteback(page)); | 1459 | BUG_ON(PageWriteback(page)); |
1609 | block_invalidatepage(page, 0); | 1460 | if (invalidate) { |
1610 | ClearPageUptodate(page); | 1461 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
1462 | ClearPageUptodate(page); | ||
1463 | } | ||
1611 | unlock_page(page); | 1464 | unlock_page(page); |
1612 | } | 1465 | } |
1613 | index = pvec.pages[nr_pages - 1]->index + 1; | 1466 | index = pvec.pages[nr_pages - 1]->index + 1; |
1614 | pagevec_release(&pvec); | 1467 | pagevec_release(&pvec); |
1615 | } | 1468 | } |
1616 | return; | ||
1617 | } | 1469 | } |
1618 | 1470 | ||
1619 | static void ext4_print_free_blocks(struct inode *inode) | 1471 | static void ext4_print_free_blocks(struct inode *inode) |
@@ -1642,215 +1494,6 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1642 | return; | 1494 | return; |
1643 | } | 1495 | } |
1644 | 1496 | ||
1645 | /* | ||
1646 | * mpage_da_map_and_submit - go through given space, map them | ||
1647 | * if necessary, and then submit them for I/O | ||
1648 | * | ||
1649 | * @mpd - bh describing space | ||
1650 | * | ||
1651 | * The function skips space we know is already mapped to disk blocks. | ||
1652 | * | ||
1653 | */ | ||
1654 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | ||
1655 | { | ||
1656 | int err, blks, get_blocks_flags; | ||
1657 | struct ext4_map_blocks map, *mapp = NULL; | ||
1658 | sector_t next = mpd->b_blocknr; | ||
1659 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | ||
1660 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | ||
1661 | handle_t *handle = NULL; | ||
1662 | |||
1663 | /* | ||
1664 | * If the blocks are mapped already, or we couldn't accumulate | ||
1665 | * any blocks, then proceed immediately to the submission stage. | ||
1666 | */ | ||
1667 | if ((mpd->b_size == 0) || | ||
1668 | ((mpd->b_state & (1 << BH_Mapped)) && | ||
1669 | !(mpd->b_state & (1 << BH_Delay)) && | ||
1670 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
1671 | goto submit_io; | ||
1672 | |||
1673 | handle = ext4_journal_current_handle(); | ||
1674 | BUG_ON(!handle); | ||
1675 | |||
1676 | /* | ||
1677 | * Call ext4_map_blocks() to allocate any delayed allocation | ||
1678 | * blocks, or to convert an uninitialized extent to be | ||
1679 | * initialized (in the case where we have written into | ||
1680 | * one or more preallocated blocks). | ||
1681 | * | ||
1682 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | ||
1683 | * indicate that we are on the delayed allocation path. This | ||
1684 | * affects functions in many different parts of the allocation | ||
1685 | * call path. This flag exists primarily because we don't | ||
1686 | * want to change *many* call functions, so ext4_map_blocks() | ||
1687 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the | ||
1688 | * inode's allocation semaphore is taken. | ||
1689 | * | ||
1690 | * If the blocks in questions were delalloc blocks, set | ||
1691 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | ||
1692 | * variables are updated after the blocks have been allocated. | ||
1693 | */ | ||
1694 | map.m_lblk = next; | ||
1695 | map.m_len = max_blocks; | ||
1696 | /* | ||
1697 | * We're in delalloc path and it is possible that we're going to | ||
1698 | * need more metadata blocks than previously reserved. However | ||
1699 | * we must not fail because we're in writeback and there is | ||
1700 | * nothing we can do about it so it might result in data loss. | ||
1701 | * So use reserved blocks to allocate metadata if possible. | ||
1702 | */ | ||
1703 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | ||
1704 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | ||
1705 | if (ext4_should_dioread_nolock(mpd->inode)) | ||
1706 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
1707 | if (mpd->b_state & (1 << BH_Delay)) | ||
1708 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
1709 | |||
1710 | |||
1711 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | ||
1712 | if (blks < 0) { | ||
1713 | struct super_block *sb = mpd->inode->i_sb; | ||
1714 | |||
1715 | err = blks; | ||
1716 | /* | ||
1717 | * If get block returns EAGAIN or ENOSPC and there | ||
1718 | * appears to be free blocks we will just let | ||
1719 | * mpage_da_submit_io() unlock all of the pages. | ||
1720 | */ | ||
1721 | if (err == -EAGAIN) | ||
1722 | goto submit_io; | ||
1723 | |||
1724 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { | ||
1725 | mpd->retval = err; | ||
1726 | goto submit_io; | ||
1727 | } | ||
1728 | |||
1729 | /* | ||
1730 | * get block failure will cause us to loop in | ||
1731 | * writepages, because a_ops->writepage won't be able | ||
1732 | * to make progress. The page will be redirtied by | ||
1733 | * writepage and writepages will again try to write | ||
1734 | * the same. | ||
1735 | */ | ||
1736 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { | ||
1737 | ext4_msg(sb, KERN_CRIT, | ||
1738 | "delayed block allocation failed for inode %lu " | ||
1739 | "at logical offset %llu with max blocks %zd " | ||
1740 | "with error %d", mpd->inode->i_ino, | ||
1741 | (unsigned long long) next, | ||
1742 | mpd->b_size >> mpd->inode->i_blkbits, err); | ||
1743 | ext4_msg(sb, KERN_CRIT, | ||
1744 | "This should not happen!! Data will be lost"); | ||
1745 | if (err == -ENOSPC) | ||
1746 | ext4_print_free_blocks(mpd->inode); | ||
1747 | } | ||
1748 | /* invalidate all the pages */ | ||
1749 | ext4_da_block_invalidatepages(mpd); | ||
1750 | |||
1751 | /* Mark this page range as having been completed */ | ||
1752 | mpd->io_done = 1; | ||
1753 | return; | ||
1754 | } | ||
1755 | BUG_ON(blks == 0); | ||
1756 | |||
1757 | mapp = ↦ | ||
1758 | if (map.m_flags & EXT4_MAP_NEW) { | ||
1759 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | ||
1760 | int i; | ||
1761 | |||
1762 | for (i = 0; i < map.m_len; i++) | ||
1763 | unmap_underlying_metadata(bdev, map.m_pblk + i); | ||
1764 | } | ||
1765 | |||
1766 | /* | ||
1767 | * Update on-disk size along with block allocation. | ||
1768 | */ | ||
1769 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | ||
1770 | if (disksize > i_size_read(mpd->inode)) | ||
1771 | disksize = i_size_read(mpd->inode); | ||
1772 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | ||
1773 | ext4_update_i_disksize(mpd->inode, disksize); | ||
1774 | err = ext4_mark_inode_dirty(handle, mpd->inode); | ||
1775 | if (err) | ||
1776 | ext4_error(mpd->inode->i_sb, | ||
1777 | "Failed to mark inode %lu dirty", | ||
1778 | mpd->inode->i_ino); | ||
1779 | } | ||
1780 | |||
1781 | submit_io: | ||
1782 | mpage_da_submit_io(mpd, mapp); | ||
1783 | mpd->io_done = 1; | ||
1784 | } | ||
1785 | |||
1786 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | ||
1787 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
1788 | |||
1789 | /* | ||
1790 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
1791 | * | ||
1792 | * @mpd->lbh - extent of blocks | ||
1793 | * @logical - logical number of the block in the file | ||
1794 | * @b_state - b_state of the buffer head added | ||
1795 | * | ||
1796 | * the function is used to collect contig. blocks in same state | ||
1797 | */ | ||
1798 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical, | ||
1799 | unsigned long b_state) | ||
1800 | { | ||
1801 | sector_t next; | ||
1802 | int blkbits = mpd->inode->i_blkbits; | ||
1803 | int nrblocks = mpd->b_size >> blkbits; | ||
1804 | |||
1805 | /* | ||
1806 | * XXX Don't go larger than mballoc is willing to allocate | ||
1807 | * This is a stopgap solution. We eventually need to fold | ||
1808 | * mpage_da_submit_io() into this function and then call | ||
1809 | * ext4_map_blocks() multiple times in a loop | ||
1810 | */ | ||
1811 | if (nrblocks >= (8*1024*1024 >> blkbits)) | ||
1812 | goto flush_it; | ||
1813 | |||
1814 | /* check if the reserved journal credits might overflow */ | ||
1815 | if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) { | ||
1816 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1817 | /* | ||
1818 | * With non-extent format we are limited by the journal | ||
1819 | * credit available. Total credit needed to insert | ||
1820 | * nrblocks contiguous blocks is dependent on the | ||
1821 | * nrblocks. So limit nrblocks. | ||
1822 | */ | ||
1823 | goto flush_it; | ||
1824 | } | ||
1825 | } | ||
1826 | /* | ||
1827 | * First block in the extent | ||
1828 | */ | ||
1829 | if (mpd->b_size == 0) { | ||
1830 | mpd->b_blocknr = logical; | ||
1831 | mpd->b_size = 1 << blkbits; | ||
1832 | mpd->b_state = b_state & BH_FLAGS; | ||
1833 | return; | ||
1834 | } | ||
1835 | |||
1836 | next = mpd->b_blocknr + nrblocks; | ||
1837 | /* | ||
1838 | * Can we merge the block to our big extent? | ||
1839 | */ | ||
1840 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { | ||
1841 | mpd->b_size += 1 << blkbits; | ||
1842 | return; | ||
1843 | } | ||
1844 | |||
1845 | flush_it: | ||
1846 | /* | ||
1847 | * We couldn't merge the block to our extent, so we | ||
1848 | * need to flush current extent and start new one | ||
1849 | */ | ||
1850 | mpage_da_map_and_submit(mpd); | ||
1851 | return; | ||
1852 | } | ||
1853 | |||
1854 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | 1497 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
1855 | { | 1498 | { |
1856 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); | 1499 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
@@ -1885,7 +1528,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1885 | 1528 | ||
1886 | /* Lookup extent status tree firstly */ | 1529 | /* Lookup extent status tree firstly */ |
1887 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1530 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
1888 | 1531 | ext4_es_lru_add(inode); | |
1889 | if (ext4_es_is_hole(&es)) { | 1532 | if (ext4_es_is_hole(&es)) { |
1890 | retval = 0; | 1533 | retval = 0; |
1891 | down_read((&EXT4_I(inode)->i_data_sem)); | 1534 | down_read((&EXT4_I(inode)->i_data_sem)); |
@@ -1992,14 +1635,13 @@ add_delayed: | |||
1992 | int ret; | 1635 | int ret; |
1993 | unsigned long long status; | 1636 | unsigned long long status; |
1994 | 1637 | ||
1995 | #ifdef ES_AGGRESSIVE_TEST | 1638 | if (unlikely(retval != map->m_len)) { |
1996 | if (retval != map->m_len) { | 1639 | ext4_warning(inode->i_sb, |
1997 | printk("ES len assertation failed for inode: %lu " | 1640 | "ES len assertion failed for inode " |
1998 | "retval %d != map->m_len %d " | 1641 | "%lu: retval %d != map->m_len %d", |
1999 | "in %s (lookup)\n", inode->i_ino, retval, | 1642 | inode->i_ino, retval, map->m_len); |
2000 | map->m_len, __func__); | 1643 | WARN_ON(1); |
2001 | } | 1644 | } |
2002 | #endif | ||
2003 | 1645 | ||
2004 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 1646 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
2005 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 1647 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
@@ -2156,7 +1798,7 @@ out: | |||
2156 | * lock so we have to do some magic. | 1798 | * lock so we have to do some magic. |
2157 | * | 1799 | * |
2158 | * This function can get called via... | 1800 | * This function can get called via... |
2159 | * - ext4_da_writepages after taking page lock (have journal handle) | 1801 | * - ext4_writepages after taking page lock (have journal handle) |
2160 | * - journal_submit_inode_data_buffers (no journal handle) | 1802 | * - journal_submit_inode_data_buffers (no journal handle) |
2161 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) | 1803 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) |
2162 | * - grab_page_cache when doing write_begin (have journal handle) | 1804 | * - grab_page_cache when doing write_begin (have journal handle) |
@@ -2234,76 +1876,405 @@ static int ext4_writepage(struct page *page, | |||
2234 | */ | 1876 | */ |
2235 | return __ext4_journalled_writepage(page, len); | 1877 | return __ext4_journalled_writepage(page, len); |
2236 | 1878 | ||
2237 | memset(&io_submit, 0, sizeof(io_submit)); | 1879 | ext4_io_submit_init(&io_submit, wbc); |
1880 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
1881 | if (!io_submit.io_end) { | ||
1882 | redirty_page_for_writepage(wbc, page); | ||
1883 | unlock_page(page); | ||
1884 | return -ENOMEM; | ||
1885 | } | ||
2238 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); | 1886 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); |
2239 | ext4_io_submit(&io_submit); | 1887 | ext4_io_submit(&io_submit); |
1888 | /* Drop io_end reference we got from init */ | ||
1889 | ext4_put_io_end_defer(io_submit.io_end); | ||
2240 | return ret; | 1890 | return ret; |
2241 | } | 1891 | } |
2242 | 1892 | ||
1893 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) | ||
1894 | |||
2243 | /* | 1895 | /* |
2244 | * This is called via ext4_da_writepages() to | 1896 | * mballoc gives us at most this number of blocks... |
2245 | * calculate the total number of credits to reserve to fit | 1897 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). |
2246 | * a single extent allocation into a single transaction, | 1898 | * The rest of mballoc seems to handle chunks upto full group size. |
2247 | * ext4_da_writpeages() will loop calling this before | ||
2248 | * the block allocation. | ||
2249 | */ | 1899 | */ |
1900 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 | ||
2250 | 1901 | ||
2251 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | 1902 | /* |
1903 | * mpage_add_bh_to_extent - try to add bh to extent of blocks to map | ||
1904 | * | ||
1905 | * @mpd - extent of blocks | ||
1906 | * @lblk - logical number of the block in the file | ||
1907 | * @b_state - b_state of the buffer head added | ||
1908 | * | ||
1909 | * the function is used to collect contig. blocks in same state | ||
1910 | */ | ||
1911 | static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, | ||
1912 | unsigned long b_state) | ||
2252 | { | 1913 | { |
2253 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 1914 | struct ext4_map_blocks *map = &mpd->map; |
1915 | |||
1916 | /* Don't go larger than mballoc is willing to allocate */ | ||
1917 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) | ||
1918 | return 0; | ||
1919 | |||
1920 | /* First block in the extent? */ | ||
1921 | if (map->m_len == 0) { | ||
1922 | map->m_lblk = lblk; | ||
1923 | map->m_len = 1; | ||
1924 | map->m_flags = b_state & BH_FLAGS; | ||
1925 | return 1; | ||
1926 | } | ||
1927 | |||
1928 | /* Can we merge the block to our big extent? */ | ||
1929 | if (lblk == map->m_lblk + map->m_len && | ||
1930 | (b_state & BH_FLAGS) == map->m_flags) { | ||
1931 | map->m_len++; | ||
1932 | return 1; | ||
1933 | } | ||
1934 | return 0; | ||
1935 | } | ||
2254 | 1936 | ||
1937 | static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, | ||
1938 | struct buffer_head *head, | ||
1939 | struct buffer_head *bh, | ||
1940 | ext4_lblk_t lblk) | ||
1941 | { | ||
1942 | struct inode *inode = mpd->inode; | ||
1943 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
1944 | >> inode->i_blkbits; | ||
1945 | |||
1946 | do { | ||
1947 | BUG_ON(buffer_locked(bh)); | ||
1948 | |||
1949 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || | ||
1950 | (!buffer_delay(bh) && !buffer_unwritten(bh)) || | ||
1951 | lblk >= blocks) { | ||
1952 | /* Found extent to map? */ | ||
1953 | if (mpd->map.m_len) | ||
1954 | return false; | ||
1955 | if (lblk >= blocks) | ||
1956 | return true; | ||
1957 | continue; | ||
1958 | } | ||
1959 | if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state)) | ||
1960 | return false; | ||
1961 | } while (lblk++, (bh = bh->b_this_page) != head); | ||
1962 | return true; | ||
1963 | } | ||
1964 | |||
1965 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) | ||
1966 | { | ||
1967 | int len; | ||
1968 | loff_t size = i_size_read(mpd->inode); | ||
1969 | int err; | ||
1970 | |||
1971 | BUG_ON(page->index != mpd->first_page); | ||
1972 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
1973 | len = size & ~PAGE_CACHE_MASK; | ||
1974 | else | ||
1975 | len = PAGE_CACHE_SIZE; | ||
1976 | clear_page_dirty_for_io(page); | ||
1977 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); | ||
1978 | if (!err) | ||
1979 | mpd->wbc->nr_to_write--; | ||
1980 | mpd->first_page++; | ||
1981 | |||
1982 | return err; | ||
1983 | } | ||
1984 | |||
1985 | /* | ||
1986 | * mpage_map_buffers - update buffers corresponding to changed extent and | ||
1987 | * submit fully mapped pages for IO | ||
1988 | * | ||
1989 | * @mpd - description of extent to map, on return next extent to map | ||
1990 | * | ||
1991 | * Scan buffers corresponding to changed extent (we expect corresponding pages | ||
1992 | * to be already locked) and update buffer state according to new extent state. | ||
1993 | * We map delalloc buffers to their physical location, clear unwritten bits, | ||
1994 | * and mark buffers as uninit when we perform writes to uninitialized extents | ||
1995 | * and do extent conversion after IO is finished. If the last page is not fully | ||
1996 | * mapped, we update @map to the next extent in the last page that needs | ||
1997 | * mapping. Otherwise we submit the page for IO. | ||
1998 | */ | ||
1999 | static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | ||
2000 | { | ||
2001 | struct pagevec pvec; | ||
2002 | int nr_pages, i; | ||
2003 | struct inode *inode = mpd->inode; | ||
2004 | struct buffer_head *head, *bh; | ||
2005 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; | ||
2006 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
2007 | >> inode->i_blkbits; | ||
2008 | pgoff_t start, end; | ||
2009 | ext4_lblk_t lblk; | ||
2010 | sector_t pblock; | ||
2011 | int err; | ||
2012 | |||
2013 | start = mpd->map.m_lblk >> bpp_bits; | ||
2014 | end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; | ||
2015 | lblk = start << bpp_bits; | ||
2016 | pblock = mpd->map.m_pblk; | ||
2017 | |||
2018 | pagevec_init(&pvec, 0); | ||
2019 | while (start <= end) { | ||
2020 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start, | ||
2021 | PAGEVEC_SIZE); | ||
2022 | if (nr_pages == 0) | ||
2023 | break; | ||
2024 | for (i = 0; i < nr_pages; i++) { | ||
2025 | struct page *page = pvec.pages[i]; | ||
2026 | |||
2027 | if (page->index > end) | ||
2028 | break; | ||
2029 | /* Upto 'end' pages must be contiguous */ | ||
2030 | BUG_ON(page->index != start); | ||
2031 | bh = head = page_buffers(page); | ||
2032 | do { | ||
2033 | if (lblk < mpd->map.m_lblk) | ||
2034 | continue; | ||
2035 | if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { | ||
2036 | /* | ||
2037 | * Buffer after end of mapped extent. | ||
2038 | * Find next buffer in the page to map. | ||
2039 | */ | ||
2040 | mpd->map.m_len = 0; | ||
2041 | mpd->map.m_flags = 0; | ||
2042 | add_page_bufs_to_extent(mpd, head, bh, | ||
2043 | lblk); | ||
2044 | pagevec_release(&pvec); | ||
2045 | return 0; | ||
2046 | } | ||
2047 | if (buffer_delay(bh)) { | ||
2048 | clear_buffer_delay(bh); | ||
2049 | bh->b_blocknr = pblock++; | ||
2050 | } | ||
2051 | clear_buffer_unwritten(bh); | ||
2052 | } while (++lblk < blocks && | ||
2053 | (bh = bh->b_this_page) != head); | ||
2054 | |||
2055 | /* | ||
2056 | * FIXME: This is going to break if dioread_nolock | ||
2057 | * supports blocksize < pagesize as we will try to | ||
2058 | * convert potentially unmapped parts of inode. | ||
2059 | */ | ||
2060 | mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; | ||
2061 | /* Page fully mapped - let IO run! */ | ||
2062 | err = mpage_submit_page(mpd, page); | ||
2063 | if (err < 0) { | ||
2064 | pagevec_release(&pvec); | ||
2065 | return err; | ||
2066 | } | ||
2067 | start++; | ||
2068 | } | ||
2069 | pagevec_release(&pvec); | ||
2070 | } | ||
2071 | /* Extent fully mapped and matches with page boundary. We are done. */ | ||
2072 | mpd->map.m_len = 0; | ||
2073 | mpd->map.m_flags = 0; | ||
2074 | return 0; | ||
2075 | } | ||
2076 | |||
2077 | static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | ||
2078 | { | ||
2079 | struct inode *inode = mpd->inode; | ||
2080 | struct ext4_map_blocks *map = &mpd->map; | ||
2081 | int get_blocks_flags; | ||
2082 | int err; | ||
2083 | |||
2084 | trace_ext4_da_write_pages_extent(inode, map); | ||
2255 | /* | 2085 | /* |
2256 | * With non-extent format the journal credit needed to | 2086 | * Call ext4_map_blocks() to allocate any delayed allocation blocks, or |
2257 | * insert nrblocks contiguous block is dependent on | 2087 | * to convert an uninitialized extent to be initialized (in the case |
2258 | * number of contiguous block. So we will limit | 2088 | * where we have written into one or more preallocated blocks). It is |
2259 | * number of contiguous block to a sane value | 2089 | * possible that we're going to need more metadata blocks than |
2090 | * previously reserved. However we must not fail because we're in | ||
2091 | * writeback and there is nothing we can do about it so it might result | ||
2092 | * in data loss. So use reserved blocks to allocate metadata if | ||
2093 | * possible. | ||
2094 | * | ||
2095 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks | ||
2096 | * in question are delalloc blocks. This affects functions in many | ||
2097 | * different parts of the allocation call path. This flag exists | ||
2098 | * primarily because we don't want to change *many* call functions, so | ||
2099 | * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag | ||
2100 | * once the inode's allocation semaphore is taken. | ||
2260 | */ | 2101 | */ |
2261 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && | 2102 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | |
2262 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2103 | EXT4_GET_BLOCKS_METADATA_NOFAIL; |
2263 | max_blocks = EXT4_MAX_TRANS_DATA; | 2104 | if (ext4_should_dioread_nolock(inode)) |
2105 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
2106 | if (map->m_flags & (1 << BH_Delay)) | ||
2107 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
2264 | 2108 | ||
2265 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2109 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); |
2110 | if (err < 0) | ||
2111 | return err; | ||
2112 | if (map->m_flags & EXT4_MAP_UNINIT) { | ||
2113 | if (!mpd->io_submit.io_end->handle && | ||
2114 | ext4_handle_valid(handle)) { | ||
2115 | mpd->io_submit.io_end->handle = handle->h_rsv_handle; | ||
2116 | handle->h_rsv_handle = NULL; | ||
2117 | } | ||
2118 | ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); | ||
2119 | } | ||
2120 | |||
2121 | BUG_ON(map->m_len == 0); | ||
2122 | if (map->m_flags & EXT4_MAP_NEW) { | ||
2123 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
2124 | int i; | ||
2125 | |||
2126 | for (i = 0; i < map->m_len; i++) | ||
2127 | unmap_underlying_metadata(bdev, map->m_pblk + i); | ||
2128 | } | ||
2129 | return 0; | ||
2266 | } | 2130 | } |
2267 | 2131 | ||
2268 | /* | 2132 | /* |
2269 | * write_cache_pages_da - walk the list of dirty pages of the given | 2133 | * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length |
2270 | * address space and accumulate pages that need writing, and call | 2134 | * mpd->len and submit pages underlying it for IO |
2271 | * mpage_da_map_and_submit to map a single contiguous memory region | 2135 | * |
2272 | * and then write them. | 2136 | * @handle - handle for journal operations |
2137 | * @mpd - extent to map | ||
2138 | * | ||
2139 | * The function maps extent starting at mpd->lblk of length mpd->len. If it is | ||
2140 | * delayed, blocks are allocated, if it is unwritten, we may need to convert | ||
2141 | * them to initialized or split the described range from larger unwritten | ||
2142 | * extent. Note that we need not map all the described range since allocation | ||
2143 | * can return less blocks or the range is covered by more unwritten extents. We | ||
2144 | * cannot map more because we are limited by reserved transaction credits. On | ||
2145 | * the other hand we always make sure that the last touched page is fully | ||
2146 | * mapped so that it can be written out (and thus forward progress is | ||
2147 | * guaranteed). After mapping we submit all mapped pages for IO. | ||
2273 | */ | 2148 | */ |
2274 | static int write_cache_pages_da(handle_t *handle, | 2149 | static int mpage_map_and_submit_extent(handle_t *handle, |
2275 | struct address_space *mapping, | 2150 | struct mpage_da_data *mpd, |
2276 | struct writeback_control *wbc, | 2151 | bool *give_up_on_write) |
2277 | struct mpage_da_data *mpd, | ||
2278 | pgoff_t *done_index) | ||
2279 | { | 2152 | { |
2280 | struct buffer_head *bh, *head; | 2153 | struct inode *inode = mpd->inode; |
2281 | struct inode *inode = mapping->host; | 2154 | struct ext4_map_blocks *map = &mpd->map; |
2282 | struct pagevec pvec; | 2155 | int err; |
2283 | unsigned int nr_pages; | 2156 | loff_t disksize; |
2284 | sector_t logical; | ||
2285 | pgoff_t index, end; | ||
2286 | long nr_to_write = wbc->nr_to_write; | ||
2287 | int i, tag, ret = 0; | ||
2288 | |||
2289 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2290 | mpd->wbc = wbc; | ||
2291 | mpd->inode = inode; | ||
2292 | pagevec_init(&pvec, 0); | ||
2293 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2294 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2295 | 2157 | ||
2296 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2158 | mpd->io_submit.io_end->offset = |
2159 | ((loff_t)map->m_lblk) << inode->i_blkbits; | ||
2160 | do { | ||
2161 | err = mpage_map_one_extent(handle, mpd); | ||
2162 | if (err < 0) { | ||
2163 | struct super_block *sb = inode->i_sb; | ||
2164 | |||
2165 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | ||
2166 | goto invalidate_dirty_pages; | ||
2167 | /* | ||
2168 | * Let the uper layers retry transient errors. | ||
2169 | * In the case of ENOSPC, if ext4_count_free_blocks() | ||
2170 | * is non-zero, a commit should free up blocks. | ||
2171 | */ | ||
2172 | if ((err == -ENOMEM) || | ||
2173 | (err == -ENOSPC && ext4_count_free_clusters(sb))) | ||
2174 | return err; | ||
2175 | ext4_msg(sb, KERN_CRIT, | ||
2176 | "Delayed block allocation failed for " | ||
2177 | "inode %lu at logical offset %llu with" | ||
2178 | " max blocks %u with error %d", | ||
2179 | inode->i_ino, | ||
2180 | (unsigned long long)map->m_lblk, | ||
2181 | (unsigned)map->m_len, -err); | ||
2182 | ext4_msg(sb, KERN_CRIT, | ||
2183 | "This should not happen!! Data will " | ||
2184 | "be lost\n"); | ||
2185 | if (err == -ENOSPC) | ||
2186 | ext4_print_free_blocks(inode); | ||
2187 | invalidate_dirty_pages: | ||
2188 | *give_up_on_write = true; | ||
2189 | return err; | ||
2190 | } | ||
2191 | /* | ||
2192 | * Update buffer state, submit mapped pages, and get us new | ||
2193 | * extent to map | ||
2194 | */ | ||
2195 | err = mpage_map_and_submit_buffers(mpd); | ||
2196 | if (err < 0) | ||
2197 | return err; | ||
2198 | } while (map->m_len); | ||
2199 | |||
2200 | /* Update on-disk size after IO is submitted */ | ||
2201 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; | ||
2202 | if (disksize > i_size_read(inode)) | ||
2203 | disksize = i_size_read(inode); | ||
2204 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2205 | int err2; | ||
2206 | |||
2207 | ext4_update_i_disksize(inode, disksize); | ||
2208 | err2 = ext4_mark_inode_dirty(handle, inode); | ||
2209 | if (err2) | ||
2210 | ext4_error(inode->i_sb, | ||
2211 | "Failed to mark inode %lu dirty", | ||
2212 | inode->i_ino); | ||
2213 | if (!err) | ||
2214 | err = err2; | ||
2215 | } | ||
2216 | return err; | ||
2217 | } | ||
2218 | |||
2219 | /* | ||
2220 | * Calculate the total number of credits to reserve for one writepages | ||
2221 | * iteration. This is called from ext4_writepages(). We map an extent of | ||
2222 | * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping | ||
2223 | * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + | ||
2224 | * bpp - 1 blocks in bpp different extents. | ||
2225 | */ | ||
2226 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | ||
2227 | { | ||
2228 | int bpp = ext4_journal_blocks_per_page(inode); | ||
2229 | |||
2230 | return ext4_meta_trans_blocks(inode, | ||
2231 | MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); | ||
2232 | } | ||
2233 | |||
2234 | /* | ||
2235 | * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages | ||
2236 | * and underlying extent to map | ||
2237 | * | ||
2238 | * @mpd - where to look for pages | ||
2239 | * | ||
2240 | * Walk dirty pages in the mapping. If they are fully mapped, submit them for | ||
2241 | * IO immediately. When we find a page which isn't mapped we start accumulating | ||
2242 | * extent of buffers underlying these pages that needs mapping (formed by | ||
2243 | * either delayed or unwritten buffers). We also lock the pages containing | ||
2244 | * these buffers. The extent found is returned in @mpd structure (starting at | ||
2245 | * mpd->lblk with length mpd->len blocks). | ||
2246 | * | ||
2247 | * Note that this function can attach bios to one io_end structure which are | ||
2248 | * neither logically nor physically contiguous. Although it may seem as an | ||
2249 | * unnecessary complication, it is actually inevitable in blocksize < pagesize | ||
2250 | * case as we need to track IO to all buffers underlying a page in one io_end. | ||
2251 | */ | ||
2252 | static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) | ||
2253 | { | ||
2254 | struct address_space *mapping = mpd->inode->i_mapping; | ||
2255 | struct pagevec pvec; | ||
2256 | unsigned int nr_pages; | ||
2257 | pgoff_t index = mpd->first_page; | ||
2258 | pgoff_t end = mpd->last_page; | ||
2259 | int tag; | ||
2260 | int i, err = 0; | ||
2261 | int blkbits = mpd->inode->i_blkbits; | ||
2262 | ext4_lblk_t lblk; | ||
2263 | struct buffer_head *head; | ||
2264 | |||
2265 | if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) | ||
2297 | tag = PAGECACHE_TAG_TOWRITE; | 2266 | tag = PAGECACHE_TAG_TOWRITE; |
2298 | else | 2267 | else |
2299 | tag = PAGECACHE_TAG_DIRTY; | 2268 | tag = PAGECACHE_TAG_DIRTY; |
2300 | 2269 | ||
2301 | *done_index = index; | 2270 | pagevec_init(&pvec, 0); |
2271 | mpd->map.m_len = 0; | ||
2272 | mpd->next_page = index; | ||
2302 | while (index <= end) { | 2273 | while (index <= end) { |
2303 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2274 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2304 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2275 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2305 | if (nr_pages == 0) | 2276 | if (nr_pages == 0) |
2306 | return 0; | 2277 | goto out; |
2307 | 2278 | ||
2308 | for (i = 0; i < nr_pages; i++) { | 2279 | for (i = 0; i < nr_pages; i++) { |
2309 | struct page *page = pvec.pages[i]; | 2280 | struct page *page = pvec.pages[i]; |
@@ -2318,31 +2289,21 @@ static int write_cache_pages_da(handle_t *handle, | |||
2318 | if (page->index > end) | 2289 | if (page->index > end) |
2319 | goto out; | 2290 | goto out; |
2320 | 2291 | ||
2321 | *done_index = page->index + 1; | 2292 | /* If we can't merge this page, we are done. */ |
2322 | 2293 | if (mpd->map.m_len > 0 && mpd->next_page != page->index) | |
2323 | /* | 2294 | goto out; |
2324 | * If we can't merge this page, and we have | ||
2325 | * accumulated an contiguous region, write it | ||
2326 | */ | ||
2327 | if ((mpd->next_page != page->index) && | ||
2328 | (mpd->next_page != mpd->first_page)) { | ||
2329 | mpage_da_map_and_submit(mpd); | ||
2330 | goto ret_extent_tail; | ||
2331 | } | ||
2332 | 2295 | ||
2333 | lock_page(page); | 2296 | lock_page(page); |
2334 | |||
2335 | /* | 2297 | /* |
2336 | * If the page is no longer dirty, or its | 2298 | * If the page is no longer dirty, or its mapping no |
2337 | * mapping no longer corresponds to inode we | 2299 | * longer corresponds to inode we are writing (which |
2338 | * are writing (which means it has been | 2300 | * means it has been truncated or invalidated), or the |
2339 | * truncated or invalidated), or the page is | 2301 | * page is already under writeback and we are not doing |
2340 | * already under writeback and we are not | 2302 | * a data integrity writeback, skip the page |
2341 | * doing a data integrity writeback, skip the page | ||
2342 | */ | 2303 | */ |
2343 | if (!PageDirty(page) || | 2304 | if (!PageDirty(page) || |
2344 | (PageWriteback(page) && | 2305 | (PageWriteback(page) && |
2345 | (wbc->sync_mode == WB_SYNC_NONE)) || | 2306 | (mpd->wbc->sync_mode == WB_SYNC_NONE)) || |
2346 | unlikely(page->mapping != mapping)) { | 2307 | unlikely(page->mapping != mapping)) { |
2347 | unlock_page(page); | 2308 | unlock_page(page); |
2348 | continue; | 2309 | continue; |
@@ -2351,106 +2312,70 @@ static int write_cache_pages_da(handle_t *handle, | |||
2351 | wait_on_page_writeback(page); | 2312 | wait_on_page_writeback(page); |
2352 | BUG_ON(PageWriteback(page)); | 2313 | BUG_ON(PageWriteback(page)); |
2353 | 2314 | ||
2354 | /* | 2315 | if (mpd->map.m_len == 0) |
2355 | * If we have inline data and arrive here, it means that | ||
2356 | * we will soon create the block for the 1st page, so | ||
2357 | * we'd better clear the inline data here. | ||
2358 | */ | ||
2359 | if (ext4_has_inline_data(inode)) { | ||
2360 | BUG_ON(ext4_test_inode_state(inode, | ||
2361 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2362 | ext4_destroy_inline_data(handle, inode); | ||
2363 | } | ||
2364 | |||
2365 | if (mpd->next_page != page->index) | ||
2366 | mpd->first_page = page->index; | 2316 | mpd->first_page = page->index; |
2367 | mpd->next_page = page->index + 1; | 2317 | mpd->next_page = page->index + 1; |
2368 | logical = (sector_t) page->index << | ||
2369 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2370 | |||
2371 | /* Add all dirty buffers to mpd */ | 2318 | /* Add all dirty buffers to mpd */ |
2319 | lblk = ((ext4_lblk_t)page->index) << | ||
2320 | (PAGE_CACHE_SHIFT - blkbits); | ||
2372 | head = page_buffers(page); | 2321 | head = page_buffers(page); |
2373 | bh = head; | 2322 | if (!add_page_bufs_to_extent(mpd, head, head, lblk)) |
2374 | do { | 2323 | goto out; |
2375 | BUG_ON(buffer_locked(bh)); | 2324 | /* So far everything mapped? Submit the page for IO. */ |
2376 | /* | 2325 | if (mpd->map.m_len == 0) { |
2377 | * We need to try to allocate unmapped blocks | 2326 | err = mpage_submit_page(mpd, page); |
2378 | * in the same page. Otherwise we won't make | 2327 | if (err < 0) |
2379 | * progress with the page in ext4_writepage | ||
2380 | */ | ||
2381 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2382 | mpage_add_bh_to_extent(mpd, logical, | ||
2383 | bh->b_state); | ||
2384 | if (mpd->io_done) | ||
2385 | goto ret_extent_tail; | ||
2386 | } else if (buffer_dirty(bh) && | ||
2387 | buffer_mapped(bh)) { | ||
2388 | /* | ||
2389 | * mapped dirty buffer. We need to | ||
2390 | * update the b_state because we look | ||
2391 | * at b_state in mpage_da_map_blocks. | ||
2392 | * We don't update b_size because if we | ||
2393 | * find an unmapped buffer_head later | ||
2394 | * we need to use the b_state flag of | ||
2395 | * that buffer_head. | ||
2396 | */ | ||
2397 | if (mpd->b_size == 0) | ||
2398 | mpd->b_state = | ||
2399 | bh->b_state & BH_FLAGS; | ||
2400 | } | ||
2401 | logical++; | ||
2402 | } while ((bh = bh->b_this_page) != head); | ||
2403 | |||
2404 | if (nr_to_write > 0) { | ||
2405 | nr_to_write--; | ||
2406 | if (nr_to_write == 0 && | ||
2407 | wbc->sync_mode == WB_SYNC_NONE) | ||
2408 | /* | ||
2409 | * We stop writing back only if we are | ||
2410 | * not doing integrity sync. In case of | ||
2411 | * integrity sync we have to keep going | ||
2412 | * because someone may be concurrently | ||
2413 | * dirtying pages, and we might have | ||
2414 | * synced a lot of newly appeared dirty | ||
2415 | * pages, but have not synced all of the | ||
2416 | * old dirty pages. | ||
2417 | */ | ||
2418 | goto out; | 2328 | goto out; |
2419 | } | 2329 | } |
2330 | |||
2331 | /* | ||
2332 | * Accumulated enough dirty pages? This doesn't apply | ||
2333 | * to WB_SYNC_ALL mode. For integrity sync we have to | ||
2334 | * keep going because someone may be concurrently | ||
2335 | * dirtying pages, and we might have synced a lot of | ||
2336 | * newly appeared dirty pages, but have not synced all | ||
2337 | * of the old dirty pages. | ||
2338 | */ | ||
2339 | if (mpd->wbc->sync_mode == WB_SYNC_NONE && | ||
2340 | mpd->next_page - mpd->first_page >= | ||
2341 | mpd->wbc->nr_to_write) | ||
2342 | goto out; | ||
2420 | } | 2343 | } |
2421 | pagevec_release(&pvec); | 2344 | pagevec_release(&pvec); |
2422 | cond_resched(); | 2345 | cond_resched(); |
2423 | } | 2346 | } |
2424 | return 0; | 2347 | return 0; |
2425 | ret_extent_tail: | ||
2426 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2427 | out: | 2348 | out: |
2428 | pagevec_release(&pvec); | 2349 | pagevec_release(&pvec); |
2429 | cond_resched(); | 2350 | return err; |
2430 | return ret; | ||
2431 | } | 2351 | } |
2432 | 2352 | ||
2353 | static int __writepage(struct page *page, struct writeback_control *wbc, | ||
2354 | void *data) | ||
2355 | { | ||
2356 | struct address_space *mapping = data; | ||
2357 | int ret = ext4_writepage(page, wbc); | ||
2358 | mapping_set_error(mapping, ret); | ||
2359 | return ret; | ||
2360 | } | ||
2433 | 2361 | ||
2434 | static int ext4_da_writepages(struct address_space *mapping, | 2362 | static int ext4_writepages(struct address_space *mapping, |
2435 | struct writeback_control *wbc) | 2363 | struct writeback_control *wbc) |
2436 | { | 2364 | { |
2437 | pgoff_t index; | 2365 | pgoff_t writeback_index = 0; |
2366 | long nr_to_write = wbc->nr_to_write; | ||
2438 | int range_whole = 0; | 2367 | int range_whole = 0; |
2368 | int cycled = 1; | ||
2439 | handle_t *handle = NULL; | 2369 | handle_t *handle = NULL; |
2440 | struct mpage_da_data mpd; | 2370 | struct mpage_da_data mpd; |
2441 | struct inode *inode = mapping->host; | 2371 | struct inode *inode = mapping->host; |
2442 | int pages_written = 0; | 2372 | int needed_blocks, rsv_blocks = 0, ret = 0; |
2443 | unsigned int max_pages; | ||
2444 | int range_cyclic, cycled = 1, io_done = 0; | ||
2445 | int needed_blocks, ret = 0; | ||
2446 | long desired_nr_to_write, nr_to_writebump = 0; | ||
2447 | loff_t range_start = wbc->range_start; | ||
2448 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2373 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2449 | pgoff_t done_index = 0; | 2374 | bool done; |
2450 | pgoff_t end; | ||
2451 | struct blk_plug plug; | 2375 | struct blk_plug plug; |
2376 | bool give_up_on_write = false; | ||
2452 | 2377 | ||
2453 | trace_ext4_da_writepages(inode, wbc); | 2378 | trace_ext4_writepages(inode, wbc); |
2454 | 2379 | ||
2455 | /* | 2380 | /* |
2456 | * No pages to write? This is mainly a kludge to avoid starting | 2381 | * No pages to write? This is mainly a kludge to avoid starting |
@@ -2460,164 +2385,165 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2460 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 2385 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2461 | return 0; | 2386 | return 0; |
2462 | 2387 | ||
2388 | if (ext4_should_journal_data(inode)) { | ||
2389 | struct blk_plug plug; | ||
2390 | int ret; | ||
2391 | |||
2392 | blk_start_plug(&plug); | ||
2393 | ret = write_cache_pages(mapping, wbc, __writepage, mapping); | ||
2394 | blk_finish_plug(&plug); | ||
2395 | return ret; | ||
2396 | } | ||
2397 | |||
2463 | /* | 2398 | /* |
2464 | * If the filesystem has aborted, it is read-only, so return | 2399 | * If the filesystem has aborted, it is read-only, so return |
2465 | * right away instead of dumping stack traces later on that | 2400 | * right away instead of dumping stack traces later on that |
2466 | * will obscure the real source of the problem. We test | 2401 | * will obscure the real source of the problem. We test |
2467 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because | 2402 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because |
2468 | * the latter could be true if the filesystem is mounted | 2403 | * the latter could be true if the filesystem is mounted |
2469 | * read-only, and in that case, ext4_da_writepages should | 2404 | * read-only, and in that case, ext4_writepages should |
2470 | * *never* be called, so if that ever happens, we would want | 2405 | * *never* be called, so if that ever happens, we would want |
2471 | * the stack trace. | 2406 | * the stack trace. |
2472 | */ | 2407 | */ |
2473 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2408 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
2474 | return -EROFS; | 2409 | return -EROFS; |
2475 | 2410 | ||
2411 | if (ext4_should_dioread_nolock(inode)) { | ||
2412 | /* | ||
2413 | * We may need to convert upto one extent per block in | ||
2414 | * the page and we may dirty the inode. | ||
2415 | */ | ||
2416 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); | ||
2417 | } | ||
2418 | |||
2419 | /* | ||
2420 | * If we have inline data and arrive here, it means that | ||
2421 | * we will soon create the block for the 1st page, so | ||
2422 | * we'd better clear the inline data here. | ||
2423 | */ | ||
2424 | if (ext4_has_inline_data(inode)) { | ||
2425 | /* Just inode will be modified... */ | ||
2426 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | ||
2427 | if (IS_ERR(handle)) { | ||
2428 | ret = PTR_ERR(handle); | ||
2429 | goto out_writepages; | ||
2430 | } | ||
2431 | BUG_ON(ext4_test_inode_state(inode, | ||
2432 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2433 | ext4_destroy_inline_data(handle, inode); | ||
2434 | ext4_journal_stop(handle); | ||
2435 | } | ||
2436 | |||
2476 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2437 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2477 | range_whole = 1; | 2438 | range_whole = 1; |
2478 | 2439 | ||
2479 | range_cyclic = wbc->range_cyclic; | ||
2480 | if (wbc->range_cyclic) { | 2440 | if (wbc->range_cyclic) { |
2481 | index = mapping->writeback_index; | 2441 | writeback_index = mapping->writeback_index; |
2482 | if (index) | 2442 | if (writeback_index) |
2483 | cycled = 0; | 2443 | cycled = 0; |
2484 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2444 | mpd.first_page = writeback_index; |
2485 | wbc->range_end = LLONG_MAX; | 2445 | mpd.last_page = -1; |
2486 | wbc->range_cyclic = 0; | ||
2487 | end = -1; | ||
2488 | } else { | 2446 | } else { |
2489 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2447 | mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; |
2490 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2448 | mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; |
2491 | } | ||
2492 | |||
2493 | /* | ||
2494 | * This works around two forms of stupidity. The first is in | ||
2495 | * the writeback code, which caps the maximum number of pages | ||
2496 | * written to be 1024 pages. This is wrong on multiple | ||
2497 | * levels; different architectues have a different page size, | ||
2498 | * which changes the maximum amount of data which gets | ||
2499 | * written. Secondly, 4 megabytes is way too small. XFS | ||
2500 | * forces this value to be 16 megabytes by multiplying | ||
2501 | * nr_to_write parameter by four, and then relies on its | ||
2502 | * allocator to allocate larger extents to make them | ||
2503 | * contiguous. Unfortunately this brings us to the second | ||
2504 | * stupidity, which is that ext4's mballoc code only allocates | ||
2505 | * at most 2048 blocks. So we force contiguous writes up to | ||
2506 | * the number of dirty blocks in the inode, or | ||
2507 | * sbi->max_writeback_mb_bump whichever is smaller. | ||
2508 | */ | ||
2509 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | ||
2510 | if (!range_cyclic && range_whole) { | ||
2511 | if (wbc->nr_to_write == LONG_MAX) | ||
2512 | desired_nr_to_write = wbc->nr_to_write; | ||
2513 | else | ||
2514 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2515 | } else | ||
2516 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | ||
2517 | max_pages); | ||
2518 | if (desired_nr_to_write > max_pages) | ||
2519 | desired_nr_to_write = max_pages; | ||
2520 | |||
2521 | if (wbc->nr_to_write < desired_nr_to_write) { | ||
2522 | nr_to_writebump = desired_nr_to_write - wbc->nr_to_write; | ||
2523 | wbc->nr_to_write = desired_nr_to_write; | ||
2524 | } | 2449 | } |
2525 | 2450 | ||
2451 | mpd.inode = inode; | ||
2452 | mpd.wbc = wbc; | ||
2453 | ext4_io_submit_init(&mpd.io_submit, wbc); | ||
2526 | retry: | 2454 | retry: |
2527 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2455 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2528 | tag_pages_for_writeback(mapping, index, end); | 2456 | tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); |
2529 | 2457 | done = false; | |
2530 | blk_start_plug(&plug); | 2458 | blk_start_plug(&plug); |
2531 | while (!ret && wbc->nr_to_write > 0) { | 2459 | while (!done && mpd.first_page <= mpd.last_page) { |
2460 | /* For each extent of pages we use new io_end */ | ||
2461 | mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); | ||
2462 | if (!mpd.io_submit.io_end) { | ||
2463 | ret = -ENOMEM; | ||
2464 | break; | ||
2465 | } | ||
2532 | 2466 | ||
2533 | /* | 2467 | /* |
2534 | * we insert one extent at a time. So we need | 2468 | * We have two constraints: We find one extent to map and we |
2535 | * credit needed for single extent allocation. | 2469 | * must always write out whole page (makes a difference when |
2536 | * journalled mode is currently not supported | 2470 | * blocksize < pagesize) so that we don't block on IO when we |
2537 | * by delalloc | 2471 | * try to write out the rest of the page. Journalled mode is |
2472 | * not supported by delalloc. | ||
2538 | */ | 2473 | */ |
2539 | BUG_ON(ext4_should_journal_data(inode)); | 2474 | BUG_ON(ext4_should_journal_data(inode)); |
2540 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2475 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2541 | 2476 | ||
2542 | /* start a new transaction*/ | 2477 | /* start a new transaction */ |
2543 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 2478 | handle = ext4_journal_start_with_reserve(inode, |
2544 | needed_blocks); | 2479 | EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); |
2545 | if (IS_ERR(handle)) { | 2480 | if (IS_ERR(handle)) { |
2546 | ret = PTR_ERR(handle); | 2481 | ret = PTR_ERR(handle); |
2547 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2482 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2548 | "%ld pages, ino %lu; err %d", __func__, | 2483 | "%ld pages, ino %lu; err %d", __func__, |
2549 | wbc->nr_to_write, inode->i_ino, ret); | 2484 | wbc->nr_to_write, inode->i_ino, ret); |
2550 | blk_finish_plug(&plug); | 2485 | /* Release allocated io_end */ |
2551 | goto out_writepages; | 2486 | ext4_put_io_end(mpd.io_submit.io_end); |
2487 | break; | ||
2552 | } | 2488 | } |
2553 | 2489 | ||
2554 | /* | 2490 | trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); |
2555 | * Now call write_cache_pages_da() to find the next | 2491 | ret = mpage_prepare_extent_to_map(&mpd); |
2556 | * contiguous region of logical blocks that need | 2492 | if (!ret) { |
2557 | * blocks to be allocated by ext4 and submit them. | 2493 | if (mpd.map.m_len) |
2558 | */ | 2494 | ret = mpage_map_and_submit_extent(handle, &mpd, |
2559 | ret = write_cache_pages_da(handle, mapping, | 2495 | &give_up_on_write); |
2560 | wbc, &mpd, &done_index); | 2496 | else { |
2561 | /* | 2497 | /* |
2562 | * If we have a contiguous extent of pages and we | 2498 | * We scanned the whole range (or exhausted |
2563 | * haven't done the I/O yet, map the blocks and submit | 2499 | * nr_to_write), submitted what was mapped and |
2564 | * them for I/O. | 2500 | * didn't find anything needing mapping. We are |
2565 | */ | 2501 | * done. |
2566 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 2502 | */ |
2567 | mpage_da_map_and_submit(&mpd); | 2503 | done = true; |
2568 | ret = MPAGE_DA_EXTENT_TAIL; | 2504 | } |
2569 | } | 2505 | } |
2570 | trace_ext4_da_write_pages(inode, &mpd); | ||
2571 | wbc->nr_to_write -= mpd.pages_written; | ||
2572 | |||
2573 | ext4_journal_stop(handle); | 2506 | ext4_journal_stop(handle); |
2574 | 2507 | /* Submit prepared bio */ | |
2575 | if ((mpd.retval == -ENOSPC) && sbi->s_journal) { | 2508 | ext4_io_submit(&mpd.io_submit); |
2576 | /* commit the transaction which would | 2509 | /* Unlock pages we didn't use */ |
2510 | mpage_release_unused_pages(&mpd, give_up_on_write); | ||
2511 | /* Drop our io_end reference we got from init */ | ||
2512 | ext4_put_io_end(mpd.io_submit.io_end); | ||
2513 | |||
2514 | if (ret == -ENOSPC && sbi->s_journal) { | ||
2515 | /* | ||
2516 | * Commit the transaction which would | ||
2577 | * free blocks released in the transaction | 2517 | * free blocks released in the transaction |
2578 | * and try again | 2518 | * and try again |
2579 | */ | 2519 | */ |
2580 | jbd2_journal_force_commit_nested(sbi->s_journal); | 2520 | jbd2_journal_force_commit_nested(sbi->s_journal); |
2581 | ret = 0; | 2521 | ret = 0; |
2582 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2522 | continue; |
2583 | /* | 2523 | } |
2584 | * Got one extent now try with rest of the pages. | 2524 | /* Fatal error - ENOMEM, EIO... */ |
2585 | * If mpd.retval is set -EIO, journal is aborted. | 2525 | if (ret) |
2586 | * So we don't need to write any more. | ||
2587 | */ | ||
2588 | pages_written += mpd.pages_written; | ||
2589 | ret = mpd.retval; | ||
2590 | io_done = 1; | ||
2591 | } else if (wbc->nr_to_write) | ||
2592 | /* | ||
2593 | * There is no more writeout needed | ||
2594 | * or we requested for a noblocking writeout | ||
2595 | * and we found the device congested | ||
2596 | */ | ||
2597 | break; | 2526 | break; |
2598 | } | 2527 | } |
2599 | blk_finish_plug(&plug); | 2528 | blk_finish_plug(&plug); |
2600 | if (!io_done && !cycled) { | 2529 | if (!ret && !cycled) { |
2601 | cycled = 1; | 2530 | cycled = 1; |
2602 | index = 0; | 2531 | mpd.last_page = writeback_index - 1; |
2603 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2532 | mpd.first_page = 0; |
2604 | wbc->range_end = mapping->writeback_index - 1; | ||
2605 | goto retry; | 2533 | goto retry; |
2606 | } | 2534 | } |
2607 | 2535 | ||
2608 | /* Update index */ | 2536 | /* Update index */ |
2609 | wbc->range_cyclic = range_cyclic; | ||
2610 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2537 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
2611 | /* | 2538 | /* |
2612 | * set the writeback_index so that range_cyclic | 2539 | * Set the writeback_index so that range_cyclic |
2613 | * mode will write it back later | 2540 | * mode will write it back later |
2614 | */ | 2541 | */ |
2615 | mapping->writeback_index = done_index; | 2542 | mapping->writeback_index = mpd.first_page; |
2616 | 2543 | ||
2617 | out_writepages: | 2544 | out_writepages: |
2618 | wbc->nr_to_write -= nr_to_writebump; | 2545 | trace_ext4_writepages_result(inode, wbc, ret, |
2619 | wbc->range_start = range_start; | 2546 | nr_to_write - wbc->nr_to_write); |
2620 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | ||
2621 | return ret; | 2547 | return ret; |
2622 | } | 2548 | } |
2623 | 2549 | ||
@@ -2829,7 +2755,8 @@ static int ext4_da_write_end(struct file *file, | |||
2829 | return ret ? ret : copied; | 2755 | return ret ? ret : copied; |
2830 | } | 2756 | } |
2831 | 2757 | ||
2832 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | 2758 | static void ext4_da_invalidatepage(struct page *page, unsigned int offset, |
2759 | unsigned int length) | ||
2833 | { | 2760 | { |
2834 | /* | 2761 | /* |
2835 | * Drop reserved blocks | 2762 | * Drop reserved blocks |
@@ -2838,10 +2765,10 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | |||
2838 | if (!page_has_buffers(page)) | 2765 | if (!page_has_buffers(page)) |
2839 | goto out; | 2766 | goto out; |
2840 | 2767 | ||
2841 | ext4_da_page_release_reservation(page, offset); | 2768 | ext4_da_page_release_reservation(page, offset, length); |
2842 | 2769 | ||
2843 | out: | 2770 | out: |
2844 | ext4_invalidatepage(page, offset); | 2771 | ext4_invalidatepage(page, offset, length); |
2845 | 2772 | ||
2846 | return; | 2773 | return; |
2847 | } | 2774 | } |
@@ -2864,7 +2791,7 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
2864 | * laptop_mode, not even desirable). However, to do otherwise | 2791 | * laptop_mode, not even desirable). However, to do otherwise |
2865 | * would require replicating code paths in: | 2792 | * would require replicating code paths in: |
2866 | * | 2793 | * |
2867 | * ext4_da_writepages() -> | 2794 | * ext4_writepages() -> |
2868 | * write_cache_pages() ---> (via passed in callback function) | 2795 | * write_cache_pages() ---> (via passed in callback function) |
2869 | * __mpage_da_writepage() --> | 2796 | * __mpage_da_writepage() --> |
2870 | * mpage_add_bh_to_extent() | 2797 | * mpage_add_bh_to_extent() |
@@ -2989,37 +2916,40 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
2989 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 2916 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
2990 | } | 2917 | } |
2991 | 2918 | ||
2992 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 2919 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
2920 | unsigned int length) | ||
2993 | { | 2921 | { |
2994 | trace_ext4_invalidatepage(page, offset); | 2922 | trace_ext4_invalidatepage(page, offset, length); |
2995 | 2923 | ||
2996 | /* No journalling happens on data buffers when this function is used */ | 2924 | /* No journalling happens on data buffers when this function is used */ |
2997 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); | 2925 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); |
2998 | 2926 | ||
2999 | block_invalidatepage(page, offset); | 2927 | block_invalidatepage(page, offset, length); |
3000 | } | 2928 | } |
3001 | 2929 | ||
3002 | static int __ext4_journalled_invalidatepage(struct page *page, | 2930 | static int __ext4_journalled_invalidatepage(struct page *page, |
3003 | unsigned long offset) | 2931 | unsigned int offset, |
2932 | unsigned int length) | ||
3004 | { | 2933 | { |
3005 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 2934 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3006 | 2935 | ||
3007 | trace_ext4_journalled_invalidatepage(page, offset); | 2936 | trace_ext4_journalled_invalidatepage(page, offset, length); |
3008 | 2937 | ||
3009 | /* | 2938 | /* |
3010 | * If it's a full truncate we just forget about the pending dirtying | 2939 | * If it's a full truncate we just forget about the pending dirtying |
3011 | */ | 2940 | */ |
3012 | if (offset == 0) | 2941 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
3013 | ClearPageChecked(page); | 2942 | ClearPageChecked(page); |
3014 | 2943 | ||
3015 | return jbd2_journal_invalidatepage(journal, page, offset); | 2944 | return jbd2_journal_invalidatepage(journal, page, offset, length); |
3016 | } | 2945 | } |
3017 | 2946 | ||
3018 | /* Wrapper for aops... */ | 2947 | /* Wrapper for aops... */ |
3019 | static void ext4_journalled_invalidatepage(struct page *page, | 2948 | static void ext4_journalled_invalidatepage(struct page *page, |
3020 | unsigned long offset) | 2949 | unsigned int offset, |
2950 | unsigned int length) | ||
3021 | { | 2951 | { |
3022 | WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); | 2952 | WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); |
3023 | } | 2953 | } |
3024 | 2954 | ||
3025 | static int ext4_releasepage(struct page *page, gfp_t wait) | 2955 | static int ext4_releasepage(struct page *page, gfp_t wait) |
@@ -3067,9 +2997,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3067 | struct inode *inode = file_inode(iocb->ki_filp); | 2997 | struct inode *inode = file_inode(iocb->ki_filp); |
3068 | ext4_io_end_t *io_end = iocb->private; | 2998 | ext4_io_end_t *io_end = iocb->private; |
3069 | 2999 | ||
3070 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3000 | /* if not async direct IO just return */ |
3071 | if (!io_end || !size) | 3001 | if (!io_end) { |
3072 | goto out; | 3002 | inode_dio_done(inode); |
3003 | if (is_async) | ||
3004 | aio_complete(iocb, ret, 0); | ||
3005 | return; | ||
3006 | } | ||
3073 | 3007 | ||
3074 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 3008 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
3075 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 3009 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
@@ -3077,25 +3011,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3077 | size); | 3011 | size); |
3078 | 3012 | ||
3079 | iocb->private = NULL; | 3013 | iocb->private = NULL; |
3080 | |||
3081 | /* if not aio dio with unwritten extents, just free io and return */ | ||
3082 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
3083 | ext4_free_io_end(io_end); | ||
3084 | out: | ||
3085 | inode_dio_done(inode); | ||
3086 | if (is_async) | ||
3087 | aio_complete(iocb, ret, 0); | ||
3088 | return; | ||
3089 | } | ||
3090 | |||
3091 | io_end->offset = offset; | 3014 | io_end->offset = offset; |
3092 | io_end->size = size; | 3015 | io_end->size = size; |
3093 | if (is_async) { | 3016 | if (is_async) { |
3094 | io_end->iocb = iocb; | 3017 | io_end->iocb = iocb; |
3095 | io_end->result = ret; | 3018 | io_end->result = ret; |
3096 | } | 3019 | } |
3097 | 3020 | ext4_put_io_end_defer(io_end); | |
3098 | ext4_add_complete_io(io_end); | ||
3099 | } | 3021 | } |
3100 | 3022 | ||
3101 | /* | 3023 | /* |
@@ -3129,6 +3051,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3129 | get_block_t *get_block_func = NULL; | 3051 | get_block_t *get_block_func = NULL; |
3130 | int dio_flags = 0; | 3052 | int dio_flags = 0; |
3131 | loff_t final_size = offset + count; | 3053 | loff_t final_size = offset + count; |
3054 | ext4_io_end_t *io_end = NULL; | ||
3132 | 3055 | ||
3133 | /* Use the old path for reads and writes beyond i_size. */ | 3056 | /* Use the old path for reads and writes beyond i_size. */ |
3134 | if (rw != WRITE || final_size > inode->i_size) | 3057 | if (rw != WRITE || final_size > inode->i_size) |
@@ -3136,11 +3059,18 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3136 | 3059 | ||
3137 | BUG_ON(iocb->private == NULL); | 3060 | BUG_ON(iocb->private == NULL); |
3138 | 3061 | ||
3062 | /* | ||
3063 | * Make all waiters for direct IO properly wait also for extent | ||
3064 | * conversion. This also disallows race between truncate() and | ||
3065 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. | ||
3066 | */ | ||
3067 | if (rw == WRITE) | ||
3068 | atomic_inc(&inode->i_dio_count); | ||
3069 | |||
3139 | /* If we do a overwrite dio, i_mutex locking can be released */ | 3070 | /* If we do a overwrite dio, i_mutex locking can be released */ |
3140 | overwrite = *((int *)iocb->private); | 3071 | overwrite = *((int *)iocb->private); |
3141 | 3072 | ||
3142 | if (overwrite) { | 3073 | if (overwrite) { |
3143 | atomic_inc(&inode->i_dio_count); | ||
3144 | down_read(&EXT4_I(inode)->i_data_sem); | 3074 | down_read(&EXT4_I(inode)->i_data_sem); |
3145 | mutex_unlock(&inode->i_mutex); | 3075 | mutex_unlock(&inode->i_mutex); |
3146 | } | 3076 | } |
@@ -3167,13 +3097,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3167 | iocb->private = NULL; | 3097 | iocb->private = NULL; |
3168 | ext4_inode_aio_set(inode, NULL); | 3098 | ext4_inode_aio_set(inode, NULL); |
3169 | if (!is_sync_kiocb(iocb)) { | 3099 | if (!is_sync_kiocb(iocb)) { |
3170 | ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); | 3100 | io_end = ext4_init_io_end(inode, GFP_NOFS); |
3171 | if (!io_end) { | 3101 | if (!io_end) { |
3172 | ret = -ENOMEM; | 3102 | ret = -ENOMEM; |
3173 | goto retake_lock; | 3103 | goto retake_lock; |
3174 | } | 3104 | } |
3175 | io_end->flag |= EXT4_IO_END_DIRECT; | 3105 | io_end->flag |= EXT4_IO_END_DIRECT; |
3176 | iocb->private = io_end; | 3106 | /* |
3107 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | ||
3108 | */ | ||
3109 | iocb->private = ext4_get_io_end(io_end); | ||
3177 | /* | 3110 | /* |
3178 | * we save the io structure for current async direct | 3111 | * we save the io structure for current async direct |
3179 | * IO, so that later ext4_map_blocks() could flag the | 3112 | * IO, so that later ext4_map_blocks() could flag the |
@@ -3197,33 +3130,42 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3197 | NULL, | 3130 | NULL, |
3198 | dio_flags); | 3131 | dio_flags); |
3199 | 3132 | ||
3200 | if (iocb->private) | ||
3201 | ext4_inode_aio_set(inode, NULL); | ||
3202 | /* | 3133 | /* |
3203 | * The io_end structure takes a reference to the inode, that | 3134 | * Put our reference to io_end. This can free the io_end structure e.g. |
3204 | * structure needs to be destroyed and the reference to the | 3135 | * in sync IO case or in case of error. It can even perform extent |
3205 | * inode need to be dropped, when IO is complete, even with 0 | 3136 | * conversion if all bios we submitted finished before we got here. |
3206 | * byte write, or failed. | 3137 | * Note that in that case iocb->private can be already set to NULL |
3207 | * | 3138 | * here. |
3208 | * In the successful AIO DIO case, the io_end structure will | ||
3209 | * be destroyed and the reference to the inode will be dropped | ||
3210 | * after the end_io call back function is called. | ||
3211 | * | ||
3212 | * In the case there is 0 byte write, or error case, since VFS | ||
3213 | * direct IO won't invoke the end_io call back function, we | ||
3214 | * need to free the end_io structure here. | ||
3215 | */ | 3139 | */ |
3216 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3140 | if (io_end) { |
3217 | ext4_free_io_end(iocb->private); | 3141 | ext4_inode_aio_set(inode, NULL); |
3218 | iocb->private = NULL; | 3142 | ext4_put_io_end(io_end); |
3219 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | 3143 | /* |
3144 | * When no IO was submitted ext4_end_io_dio() was not | ||
3145 | * called so we have to put iocb's reference. | ||
3146 | */ | ||
3147 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { | ||
3148 | WARN_ON(iocb->private != io_end); | ||
3149 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
3150 | WARN_ON(io_end->iocb); | ||
3151 | /* | ||
3152 | * Generic code already did inode_dio_done() so we | ||
3153 | * have to clear EXT4_IO_END_DIRECT to not do it for | ||
3154 | * the second time. | ||
3155 | */ | ||
3156 | io_end->flag = 0; | ||
3157 | ext4_put_io_end(io_end); | ||
3158 | iocb->private = NULL; | ||
3159 | } | ||
3160 | } | ||
3161 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3220 | EXT4_STATE_DIO_UNWRITTEN)) { | 3162 | EXT4_STATE_DIO_UNWRITTEN)) { |
3221 | int err; | 3163 | int err; |
3222 | /* | 3164 | /* |
3223 | * for non AIO case, since the IO is already | 3165 | * for non AIO case, since the IO is already |
3224 | * completed, we could do the conversion right here | 3166 | * completed, we could do the conversion right here |
3225 | */ | 3167 | */ |
3226 | err = ext4_convert_unwritten_extents(inode, | 3168 | err = ext4_convert_unwritten_extents(NULL, inode, |
3227 | offset, ret); | 3169 | offset, ret); |
3228 | if (err < 0) | 3170 | if (err < 0) |
3229 | ret = err; | 3171 | ret = err; |
@@ -3231,9 +3173,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3231 | } | 3173 | } |
3232 | 3174 | ||
3233 | retake_lock: | 3175 | retake_lock: |
3176 | if (rw == WRITE) | ||
3177 | inode_dio_done(inode); | ||
3234 | /* take i_mutex locking again if we do a ovewrite dio */ | 3178 | /* take i_mutex locking again if we do a ovewrite dio */ |
3235 | if (overwrite) { | 3179 | if (overwrite) { |
3236 | inode_dio_done(inode); | ||
3237 | up_read(&EXT4_I(inode)->i_data_sem); | 3180 | up_read(&EXT4_I(inode)->i_data_sem); |
3238 | mutex_lock(&inode->i_mutex); | 3181 | mutex_lock(&inode->i_mutex); |
3239 | } | 3182 | } |
@@ -3292,6 +3235,7 @@ static const struct address_space_operations ext4_aops = { | |||
3292 | .readpage = ext4_readpage, | 3235 | .readpage = ext4_readpage, |
3293 | .readpages = ext4_readpages, | 3236 | .readpages = ext4_readpages, |
3294 | .writepage = ext4_writepage, | 3237 | .writepage = ext4_writepage, |
3238 | .writepages = ext4_writepages, | ||
3295 | .write_begin = ext4_write_begin, | 3239 | .write_begin = ext4_write_begin, |
3296 | .write_end = ext4_write_end, | 3240 | .write_end = ext4_write_end, |
3297 | .bmap = ext4_bmap, | 3241 | .bmap = ext4_bmap, |
@@ -3307,6 +3251,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3307 | .readpage = ext4_readpage, | 3251 | .readpage = ext4_readpage, |
3308 | .readpages = ext4_readpages, | 3252 | .readpages = ext4_readpages, |
3309 | .writepage = ext4_writepage, | 3253 | .writepage = ext4_writepage, |
3254 | .writepages = ext4_writepages, | ||
3310 | .write_begin = ext4_write_begin, | 3255 | .write_begin = ext4_write_begin, |
3311 | .write_end = ext4_journalled_write_end, | 3256 | .write_end = ext4_journalled_write_end, |
3312 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3257 | .set_page_dirty = ext4_journalled_set_page_dirty, |
@@ -3322,7 +3267,7 @@ static const struct address_space_operations ext4_da_aops = { | |||
3322 | .readpage = ext4_readpage, | 3267 | .readpage = ext4_readpage, |
3323 | .readpages = ext4_readpages, | 3268 | .readpages = ext4_readpages, |
3324 | .writepage = ext4_writepage, | 3269 | .writepage = ext4_writepage, |
3325 | .writepages = ext4_da_writepages, | 3270 | .writepages = ext4_writepages, |
3326 | .write_begin = ext4_da_write_begin, | 3271 | .write_begin = ext4_da_write_begin, |
3327 | .write_end = ext4_da_write_end, | 3272 | .write_end = ext4_da_write_end, |
3328 | .bmap = ext4_bmap, | 3273 | .bmap = ext4_bmap, |
@@ -3355,89 +3300,56 @@ void ext4_set_aops(struct inode *inode) | |||
3355 | inode->i_mapping->a_ops = &ext4_aops; | 3300 | inode->i_mapping->a_ops = &ext4_aops; |
3356 | } | 3301 | } |
3357 | 3302 | ||
3358 | |||
3359 | /* | 3303 | /* |
3360 | * ext4_discard_partial_page_buffers() | 3304 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
3361 | * Wrapper function for ext4_discard_partial_page_buffers_no_lock. | 3305 | * up to the end of the block which corresponds to `from'. |
3362 | * This function finds and locks the page containing the offset | 3306 | * This required during truncate. We need to physically zero the tail end |
3363 | * "from" and passes it to ext4_discard_partial_page_buffers_no_lock. | 3307 | * of that block so it doesn't yield old data if the file is later grown. |
3364 | * Calling functions that already have the page locked should call | ||
3365 | * ext4_discard_partial_page_buffers_no_lock directly. | ||
3366 | */ | 3308 | */ |
3367 | int ext4_discard_partial_page_buffers(handle_t *handle, | 3309 | int ext4_block_truncate_page(handle_t *handle, |
3368 | struct address_space *mapping, loff_t from, | 3310 | struct address_space *mapping, loff_t from) |
3369 | loff_t length, int flags) | ||
3370 | { | 3311 | { |
3312 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3313 | unsigned length; | ||
3314 | unsigned blocksize; | ||
3371 | struct inode *inode = mapping->host; | 3315 | struct inode *inode = mapping->host; |
3372 | struct page *page; | ||
3373 | int err = 0; | ||
3374 | 3316 | ||
3375 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3317 | blocksize = inode->i_sb->s_blocksize; |
3376 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3318 | length = blocksize - (offset & (blocksize - 1)); |
3377 | if (!page) | ||
3378 | return -ENOMEM; | ||
3379 | |||
3380 | err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page, | ||
3381 | from, length, flags); | ||
3382 | 3319 | ||
3383 | unlock_page(page); | 3320 | return ext4_block_zero_page_range(handle, mapping, from, length); |
3384 | page_cache_release(page); | ||
3385 | return err; | ||
3386 | } | 3321 | } |
3387 | 3322 | ||
3388 | /* | 3323 | /* |
3389 | * ext4_discard_partial_page_buffers_no_lock() | 3324 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' |
3390 | * Zeros a page range of length 'length' starting from offset 'from'. | 3325 | * starting from file offset 'from'. The range to be zero'd must |
3391 | * Buffer heads that correspond to the block aligned regions of the | 3326 | * be contained with in one block. If the specified range exceeds |
3392 | * zeroed range will be unmapped. Unblock aligned regions | 3327 | * the end of the block it will be shortened to end of the block |
3393 | * will have the corresponding buffer head mapped if needed so that | 3328 | * that cooresponds to 'from' |
3394 | * that region of the page can be updated with the partial zero out. | ||
3395 | * | ||
3396 | * This function assumes that the page has already been locked. The | ||
3397 | * The range to be discarded must be contained with in the given page. | ||
3398 | * If the specified range exceeds the end of the page it will be shortened | ||
3399 | * to the end of the page that corresponds to 'from'. This function is | ||
3400 | * appropriate for updating a page and it buffer heads to be unmapped and | ||
3401 | * zeroed for blocks that have been either released, or are going to be | ||
3402 | * released. | ||
3403 | * | ||
3404 | * handle: The journal handle | ||
3405 | * inode: The files inode | ||
3406 | * page: A locked page that contains the offset "from" | ||
3407 | * from: The starting byte offset (from the beginning of the file) | ||
3408 | * to begin discarding | ||
3409 | * len: The length of bytes to discard | ||
3410 | * flags: Optional flags that may be used: | ||
3411 | * | ||
3412 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | ||
3413 | * Only zero the regions of the page whose buffer heads | ||
3414 | * have already been unmapped. This flag is appropriate | ||
3415 | * for updating the contents of a page whose blocks may | ||
3416 | * have already been released, and we only want to zero | ||
3417 | * out the regions that correspond to those released blocks. | ||
3418 | * | ||
3419 | * Returns zero on success or negative on failure. | ||
3420 | */ | 3329 | */ |
3421 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 3330 | int ext4_block_zero_page_range(handle_t *handle, |
3422 | struct inode *inode, struct page *page, loff_t from, | 3331 | struct address_space *mapping, loff_t from, loff_t length) |
3423 | loff_t length, int flags) | ||
3424 | { | 3332 | { |
3425 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3333 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3426 | unsigned int offset = from & (PAGE_CACHE_SIZE-1); | 3334 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3427 | unsigned int blocksize, max, pos; | 3335 | unsigned blocksize, max, pos; |
3428 | ext4_lblk_t iblock; | 3336 | ext4_lblk_t iblock; |
3337 | struct inode *inode = mapping->host; | ||
3429 | struct buffer_head *bh; | 3338 | struct buffer_head *bh; |
3339 | struct page *page; | ||
3430 | int err = 0; | 3340 | int err = 0; |
3431 | 3341 | ||
3432 | blocksize = inode->i_sb->s_blocksize; | 3342 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3433 | max = PAGE_CACHE_SIZE - offset; | 3343 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
3344 | if (!page) | ||
3345 | return -ENOMEM; | ||
3434 | 3346 | ||
3435 | if (index != page->index) | 3347 | blocksize = inode->i_sb->s_blocksize; |
3436 | return -EINVAL; | 3348 | max = blocksize - (offset & (blocksize - 1)); |
3437 | 3349 | ||
3438 | /* | 3350 | /* |
3439 | * correct length if it does not fall between | 3351 | * correct length if it does not fall between |
3440 | * 'from' and the end of the page | 3352 | * 'from' and the end of the block |
3441 | */ | 3353 | */ |
3442 | if (length > max || length < 0) | 3354 | if (length > max || length < 0) |
3443 | length = max; | 3355 | length = max; |
@@ -3455,106 +3367,91 @@ static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | |||
3455 | iblock++; | 3367 | iblock++; |
3456 | pos += blocksize; | 3368 | pos += blocksize; |
3457 | } | 3369 | } |
3458 | 3370 | if (buffer_freed(bh)) { | |
3459 | pos = offset; | 3371 | BUFFER_TRACE(bh, "freed: skip"); |
3460 | while (pos < offset + length) { | 3372 | goto unlock; |
3461 | unsigned int end_of_block, range_to_discard; | 3373 | } |
3462 | 3374 | if (!buffer_mapped(bh)) { | |
3463 | err = 0; | 3375 | BUFFER_TRACE(bh, "unmapped"); |
3464 | 3376 | ext4_get_block(inode, iblock, bh, 0); | |
3465 | /* The length of space left to zero and unmap */ | 3377 | /* unmapped? It's a hole - nothing to do */ |
3466 | range_to_discard = offset + length - pos; | ||
3467 | |||
3468 | /* The length of space until the end of the block */ | ||
3469 | end_of_block = blocksize - (pos & (blocksize-1)); | ||
3470 | |||
3471 | /* | ||
3472 | * Do not unmap or zero past end of block | ||
3473 | * for this buffer head | ||
3474 | */ | ||
3475 | if (range_to_discard > end_of_block) | ||
3476 | range_to_discard = end_of_block; | ||
3477 | |||
3478 | |||
3479 | /* | ||
3480 | * Skip this buffer head if we are only zeroing unampped | ||
3481 | * regions of the page | ||
3482 | */ | ||
3483 | if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED && | ||
3484 | buffer_mapped(bh)) | ||
3485 | goto next; | ||
3486 | |||
3487 | /* If the range is block aligned, unmap */ | ||
3488 | if (range_to_discard == blocksize) { | ||
3489 | clear_buffer_dirty(bh); | ||
3490 | bh->b_bdev = NULL; | ||
3491 | clear_buffer_mapped(bh); | ||
3492 | clear_buffer_req(bh); | ||
3493 | clear_buffer_new(bh); | ||
3494 | clear_buffer_delay(bh); | ||
3495 | clear_buffer_unwritten(bh); | ||
3496 | clear_buffer_uptodate(bh); | ||
3497 | zero_user(page, pos, range_to_discard); | ||
3498 | BUFFER_TRACE(bh, "Buffer discarded"); | ||
3499 | goto next; | ||
3500 | } | ||
3501 | |||
3502 | /* | ||
3503 | * If this block is not completely contained in the range | ||
3504 | * to be discarded, then it is not going to be released. Because | ||
3505 | * we need to keep this block, we need to make sure this part | ||
3506 | * of the page is uptodate before we modify it by writeing | ||
3507 | * partial zeros on it. | ||
3508 | */ | ||
3509 | if (!buffer_mapped(bh)) { | 3378 | if (!buffer_mapped(bh)) { |
3510 | /* | 3379 | BUFFER_TRACE(bh, "still unmapped"); |
3511 | * Buffer head must be mapped before we can read | 3380 | goto unlock; |
3512 | * from the block | ||
3513 | */ | ||
3514 | BUFFER_TRACE(bh, "unmapped"); | ||
3515 | ext4_get_block(inode, iblock, bh, 0); | ||
3516 | /* unmapped? It's a hole - nothing to do */ | ||
3517 | if (!buffer_mapped(bh)) { | ||
3518 | BUFFER_TRACE(bh, "still unmapped"); | ||
3519 | goto next; | ||
3520 | } | ||
3521 | } | 3381 | } |
3382 | } | ||
3522 | 3383 | ||
3523 | /* Ok, it's mapped. Make sure it's up-to-date */ | 3384 | /* Ok, it's mapped. Make sure it's up-to-date */ |
3524 | if (PageUptodate(page)) | 3385 | if (PageUptodate(page)) |
3525 | set_buffer_uptodate(bh); | 3386 | set_buffer_uptodate(bh); |
3526 | 3387 | ||
3527 | if (!buffer_uptodate(bh)) { | 3388 | if (!buffer_uptodate(bh)) { |
3528 | err = -EIO; | 3389 | err = -EIO; |
3529 | ll_rw_block(READ, 1, &bh); | 3390 | ll_rw_block(READ, 1, &bh); |
3530 | wait_on_buffer(bh); | 3391 | wait_on_buffer(bh); |
3531 | /* Uhhuh. Read error. Complain and punt.*/ | 3392 | /* Uhhuh. Read error. Complain and punt. */ |
3532 | if (!buffer_uptodate(bh)) | 3393 | if (!buffer_uptodate(bh)) |
3533 | goto next; | 3394 | goto unlock; |
3534 | } | 3395 | } |
3396 | if (ext4_should_journal_data(inode)) { | ||
3397 | BUFFER_TRACE(bh, "get write access"); | ||
3398 | err = ext4_journal_get_write_access(handle, bh); | ||
3399 | if (err) | ||
3400 | goto unlock; | ||
3401 | } | ||
3402 | zero_user(page, offset, length); | ||
3403 | BUFFER_TRACE(bh, "zeroed end of block"); | ||
3535 | 3404 | ||
3536 | if (ext4_should_journal_data(inode)) { | 3405 | if (ext4_should_journal_data(inode)) { |
3537 | BUFFER_TRACE(bh, "get write access"); | 3406 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
3538 | err = ext4_journal_get_write_access(handle, bh); | 3407 | } else { |
3539 | if (err) | 3408 | err = 0; |
3540 | goto next; | 3409 | mark_buffer_dirty(bh); |
3541 | } | 3410 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) |
3411 | err = ext4_jbd2_file_inode(handle, inode); | ||
3412 | } | ||
3542 | 3413 | ||
3543 | zero_user(page, pos, range_to_discard); | 3414 | unlock: |
3415 | unlock_page(page); | ||
3416 | page_cache_release(page); | ||
3417 | return err; | ||
3418 | } | ||
3544 | 3419 | ||
3545 | err = 0; | 3420 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
3546 | if (ext4_should_journal_data(inode)) { | 3421 | loff_t lstart, loff_t length) |
3547 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 3422 | { |
3548 | } else | 3423 | struct super_block *sb = inode->i_sb; |
3549 | mark_buffer_dirty(bh); | 3424 | struct address_space *mapping = inode->i_mapping; |
3425 | unsigned partial_start, partial_end; | ||
3426 | ext4_fsblk_t start, end; | ||
3427 | loff_t byte_end = (lstart + length - 1); | ||
3428 | int err = 0; | ||
3550 | 3429 | ||
3551 | BUFFER_TRACE(bh, "Partial buffer zeroed"); | 3430 | partial_start = lstart & (sb->s_blocksize - 1); |
3552 | next: | 3431 | partial_end = byte_end & (sb->s_blocksize - 1); |
3553 | bh = bh->b_this_page; | ||
3554 | iblock++; | ||
3555 | pos += range_to_discard; | ||
3556 | } | ||
3557 | 3432 | ||
3433 | start = lstart >> sb->s_blocksize_bits; | ||
3434 | end = byte_end >> sb->s_blocksize_bits; | ||
3435 | |||
3436 | /* Handle partial zero within the single block */ | ||
3437 | if (start == end && | ||
3438 | (partial_start || (partial_end != sb->s_blocksize - 1))) { | ||
3439 | err = ext4_block_zero_page_range(handle, mapping, | ||
3440 | lstart, length); | ||
3441 | return err; | ||
3442 | } | ||
3443 | /* Handle partial zero out on the start of the range */ | ||
3444 | if (partial_start) { | ||
3445 | err = ext4_block_zero_page_range(handle, mapping, | ||
3446 | lstart, sb->s_blocksize); | ||
3447 | if (err) | ||
3448 | return err; | ||
3449 | } | ||
3450 | /* Handle partial zero out on the end of the range */ | ||
3451 | if (partial_end != sb->s_blocksize - 1) | ||
3452 | err = ext4_block_zero_page_range(handle, mapping, | ||
3453 | byte_end - partial_end, | ||
3454 | partial_end + 1); | ||
3558 | return err; | 3455 | return err; |
3559 | } | 3456 | } |
3560 | 3457 | ||
@@ -3580,14 +3477,12 @@ int ext4_can_truncate(struct inode *inode) | |||
3580 | * Returns: 0 on success or negative on failure | 3477 | * Returns: 0 on success or negative on failure |
3581 | */ | 3478 | */ |
3582 | 3479 | ||
3583 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | 3480 | int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) |
3584 | { | 3481 | { |
3585 | struct inode *inode = file_inode(file); | ||
3586 | struct super_block *sb = inode->i_sb; | 3482 | struct super_block *sb = inode->i_sb; |
3587 | ext4_lblk_t first_block, stop_block; | 3483 | ext4_lblk_t first_block, stop_block; |
3588 | struct address_space *mapping = inode->i_mapping; | 3484 | struct address_space *mapping = inode->i_mapping; |
3589 | loff_t first_page, last_page, page_len; | 3485 | loff_t first_block_offset, last_block_offset; |
3590 | loff_t first_page_offset, last_page_offset; | ||
3591 | handle_t *handle; | 3486 | handle_t *handle; |
3592 | unsigned int credits; | 3487 | unsigned int credits; |
3593 | int ret = 0; | 3488 | int ret = 0; |
@@ -3638,23 +3533,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3638 | offset; | 3533 | offset; |
3639 | } | 3534 | } |
3640 | 3535 | ||
3641 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 3536 | first_block_offset = round_up(offset, sb->s_blocksize); |
3642 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | 3537 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; |
3643 | 3538 | ||
3644 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | 3539 | /* Now release the pages and zero block aligned part of pages*/ |
3645 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | 3540 | if (last_block_offset > first_block_offset) |
3646 | 3541 | truncate_pagecache_range(inode, first_block_offset, | |
3647 | /* Now release the pages */ | 3542 | last_block_offset); |
3648 | if (last_page_offset > first_page_offset) { | ||
3649 | truncate_pagecache_range(inode, first_page_offset, | ||
3650 | last_page_offset - 1); | ||
3651 | } | ||
3652 | 3543 | ||
3653 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | 3544 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
3654 | ext4_inode_block_unlocked_dio(inode); | 3545 | ext4_inode_block_unlocked_dio(inode); |
3655 | ret = ext4_flush_unwritten_io(inode); | ||
3656 | if (ret) | ||
3657 | goto out_dio; | ||
3658 | inode_dio_wait(inode); | 3546 | inode_dio_wait(inode); |
3659 | 3547 | ||
3660 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3548 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
@@ -3668,66 +3556,10 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3668 | goto out_dio; | 3556 | goto out_dio; |
3669 | } | 3557 | } |
3670 | 3558 | ||
3671 | /* | 3559 | ret = ext4_zero_partial_blocks(handle, inode, offset, |
3672 | * Now we need to zero out the non-page-aligned data in the | 3560 | length); |
3673 | * pages at the start and tail of the hole, and unmap the | 3561 | if (ret) |
3674 | * buffer heads for the block aligned regions of the page that | 3562 | goto out_stop; |
3675 | * were completely zeroed. | ||
3676 | */ | ||
3677 | if (first_page > last_page) { | ||
3678 | /* | ||
3679 | * If the file space being truncated is contained | ||
3680 | * within a page just zero out and unmap the middle of | ||
3681 | * that page | ||
3682 | */ | ||
3683 | ret = ext4_discard_partial_page_buffers(handle, | ||
3684 | mapping, offset, length, 0); | ||
3685 | |||
3686 | if (ret) | ||
3687 | goto out_stop; | ||
3688 | } else { | ||
3689 | /* | ||
3690 | * zero out and unmap the partial page that contains | ||
3691 | * the start of the hole | ||
3692 | */ | ||
3693 | page_len = first_page_offset - offset; | ||
3694 | if (page_len > 0) { | ||
3695 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
3696 | offset, page_len, 0); | ||
3697 | if (ret) | ||
3698 | goto out_stop; | ||
3699 | } | ||
3700 | |||
3701 | /* | ||
3702 | * zero out and unmap the partial page that contains | ||
3703 | * the end of the hole | ||
3704 | */ | ||
3705 | page_len = offset + length - last_page_offset; | ||
3706 | if (page_len > 0) { | ||
3707 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
3708 | last_page_offset, page_len, 0); | ||
3709 | if (ret) | ||
3710 | goto out_stop; | ||
3711 | } | ||
3712 | } | ||
3713 | |||
3714 | /* | ||
3715 | * If i_size is contained in the last page, we need to | ||
3716 | * unmap and zero the partial page after i_size | ||
3717 | */ | ||
3718 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
3719 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
3720 | page_len = PAGE_CACHE_SIZE - | ||
3721 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
3722 | |||
3723 | if (page_len > 0) { | ||
3724 | ret = ext4_discard_partial_page_buffers(handle, | ||
3725 | mapping, inode->i_size, page_len, 0); | ||
3726 | |||
3727 | if (ret) | ||
3728 | goto out_stop; | ||
3729 | } | ||
3730 | } | ||
3731 | 3563 | ||
3732 | first_block = (offset + sb->s_blocksize - 1) >> | 3564 | first_block = (offset + sb->s_blocksize - 1) >> |
3733 | EXT4_BLOCK_SIZE_BITS(sb); | 3565 | EXT4_BLOCK_SIZE_BITS(sb); |
@@ -3803,7 +3635,6 @@ void ext4_truncate(struct inode *inode) | |||
3803 | unsigned int credits; | 3635 | unsigned int credits; |
3804 | handle_t *handle; | 3636 | handle_t *handle; |
3805 | struct address_space *mapping = inode->i_mapping; | 3637 | struct address_space *mapping = inode->i_mapping; |
3806 | loff_t page_len; | ||
3807 | 3638 | ||
3808 | /* | 3639 | /* |
3809 | * There is a possibility that we're either freeing the inode | 3640 | * There is a possibility that we're either freeing the inode |
@@ -3830,12 +3661,6 @@ void ext4_truncate(struct inode *inode) | |||
3830 | return; | 3661 | return; |
3831 | } | 3662 | } |
3832 | 3663 | ||
3833 | /* | ||
3834 | * finish any pending end_io work so we won't run the risk of | ||
3835 | * converting any truncated blocks to initialized later | ||
3836 | */ | ||
3837 | ext4_flush_unwritten_io(inode); | ||
3838 | |||
3839 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3664 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3840 | credits = ext4_writepage_trans_blocks(inode); | 3665 | credits = ext4_writepage_trans_blocks(inode); |
3841 | else | 3666 | else |
@@ -3847,14 +3672,8 @@ void ext4_truncate(struct inode *inode) | |||
3847 | return; | 3672 | return; |
3848 | } | 3673 | } |
3849 | 3674 | ||
3850 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { | 3675 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) |
3851 | page_len = PAGE_CACHE_SIZE - | 3676 | ext4_block_truncate_page(handle, mapping, inode->i_size); |
3852 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
3853 | |||
3854 | if (ext4_discard_partial_page_buffers(handle, | ||
3855 | mapping, inode->i_size, page_len, 0)) | ||
3856 | goto out_stop; | ||
3857 | } | ||
3858 | 3677 | ||
3859 | /* | 3678 | /* |
3860 | * We add the inode to the orphan list, so that if this | 3679 | * We add the inode to the orphan list, so that if this |
@@ -4623,7 +4442,8 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) | |||
4623 | inode->i_size >> PAGE_CACHE_SHIFT); | 4442 | inode->i_size >> PAGE_CACHE_SHIFT); |
4624 | if (!page) | 4443 | if (!page) |
4625 | return; | 4444 | return; |
4626 | ret = __ext4_journalled_invalidatepage(page, offset); | 4445 | ret = __ext4_journalled_invalidatepage(page, offset, |
4446 | PAGE_CACHE_SIZE - offset); | ||
4627 | unlock_page(page); | 4447 | unlock_page(page); |
4628 | page_cache_release(page); | 4448 | page_cache_release(page); |
4629 | if (ret != -EBUSY) | 4449 | if (ret != -EBUSY) |
@@ -4805,7 +4625,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4805 | struct kstat *stat) | 4625 | struct kstat *stat) |
4806 | { | 4626 | { |
4807 | struct inode *inode; | 4627 | struct inode *inode; |
4808 | unsigned long delalloc_blocks; | 4628 | unsigned long long delalloc_blocks; |
4809 | 4629 | ||
4810 | inode = dentry->d_inode; | 4630 | inode = dentry->d_inode; |
4811 | generic_fillattr(inode, stat); | 4631 | generic_fillattr(inode, stat); |
@@ -4823,15 +4643,16 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4823 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), | 4643 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), |
4824 | EXT4_I(inode)->i_reserved_data_blocks); | 4644 | EXT4_I(inode)->i_reserved_data_blocks); |
4825 | 4645 | ||
4826 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 4646 | stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); |
4827 | return 0; | 4647 | return 0; |
4828 | } | 4648 | } |
4829 | 4649 | ||
4830 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 4650 | static int ext4_index_trans_blocks(struct inode *inode, int lblocks, |
4651 | int pextents) | ||
4831 | { | 4652 | { |
4832 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4653 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
4833 | return ext4_ind_trans_blocks(inode, nrblocks, chunk); | 4654 | return ext4_ind_trans_blocks(inode, lblocks); |
4834 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 4655 | return ext4_ext_index_trans_blocks(inode, pextents); |
4835 | } | 4656 | } |
4836 | 4657 | ||
4837 | /* | 4658 | /* |
@@ -4845,7 +4666,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4845 | * | 4666 | * |
4846 | * Also account for superblock, inode, quota and xattr blocks | 4667 | * Also account for superblock, inode, quota and xattr blocks |
4847 | */ | 4668 | */ |
4848 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 4669 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
4670 | int pextents) | ||
4849 | { | 4671 | { |
4850 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 4672 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
4851 | int gdpblocks; | 4673 | int gdpblocks; |
@@ -4853,14 +4675,10 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4853 | int ret = 0; | 4675 | int ret = 0; |
4854 | 4676 | ||
4855 | /* | 4677 | /* |
4856 | * How many index blocks need to touch to modify nrblocks? | 4678 | * How many index blocks need to touch to map @lblocks logical blocks |
4857 | * The "Chunk" flag indicating whether the nrblocks is | 4679 | * to @pextents physical extents? |
4858 | * physically contiguous on disk | ||
4859 | * | ||
4860 | * For Direct IO and fallocate, they calls get_block to allocate | ||
4861 | * one single extent at a time, so they could set the "Chunk" flag | ||
4862 | */ | 4680 | */ |
4863 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | 4681 | idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); |
4864 | 4682 | ||
4865 | ret = idxblocks; | 4683 | ret = idxblocks; |
4866 | 4684 | ||
@@ -4868,12 +4686,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4868 | * Now let's see how many group bitmaps and group descriptors need | 4686 | * Now let's see how many group bitmaps and group descriptors need |
4869 | * to account | 4687 | * to account |
4870 | */ | 4688 | */ |
4871 | groups = idxblocks; | 4689 | groups = idxblocks + pextents; |
4872 | if (chunk) | ||
4873 | groups += 1; | ||
4874 | else | ||
4875 | groups += nrblocks; | ||
4876 | |||
4877 | gdpblocks = groups; | 4690 | gdpblocks = groups; |
4878 | if (groups > ngroups) | 4691 | if (groups > ngroups) |
4879 | groups = ngroups; | 4692 | groups = ngroups; |
@@ -4904,7 +4717,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
4904 | int bpp = ext4_journal_blocks_per_page(inode); | 4717 | int bpp = ext4_journal_blocks_per_page(inode); |
4905 | int ret; | 4718 | int ret; |
4906 | 4719 | ||
4907 | ret = ext4_meta_trans_blocks(inode, bpp, 0); | 4720 | ret = ext4_meta_trans_blocks(inode, bpp, bpp); |
4908 | 4721 | ||
4909 | /* Account for data blocks for journalled mode */ | 4722 | /* Account for data blocks for journalled mode */ |
4910 | if (ext4_should_journal_data(inode)) | 4723 | if (ext4_should_journal_data(inode)) |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9491ac0590f7..c0427e2f6648 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -77,8 +77,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) | |||
77 | memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); | 77 | memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); |
78 | memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); | 78 | memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); |
79 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); | 79 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); |
80 | memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree)); | 80 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); |
81 | memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr)); | 81 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); |
82 | ext4_es_lru_del(inode1); | ||
83 | ext4_es_lru_del(inode2); | ||
82 | 84 | ||
83 | isize = i_size_read(inode1); | 85 | isize = i_size_read(inode1); |
84 | i_size_write(inode1, i_size_read(inode2)); | 86 | i_size_write(inode1, i_size_read(inode2)); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index def84082a9a9..4bbbf13bd743 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2105,6 +2105,7 @@ repeat: | |||
2105 | group = ac->ac_g_ex.fe_group; | 2105 | group = ac->ac_g_ex.fe_group; |
2106 | 2106 | ||
2107 | for (i = 0; i < ngroups; group++, i++) { | 2107 | for (i = 0; i < ngroups; group++, i++) { |
2108 | cond_resched(); | ||
2108 | /* | 2109 | /* |
2109 | * Artificially restricted ngroups for non-extent | 2110 | * Artificially restricted ngroups for non-extent |
2110 | * files makes group > ngroups possible on first loop. | 2111 | * files makes group > ngroups possible on first loop. |
@@ -4405,17 +4406,20 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4405 | repeat: | 4406 | repeat: |
4406 | /* allocate space in core */ | 4407 | /* allocate space in core */ |
4407 | *errp = ext4_mb_regular_allocator(ac); | 4408 | *errp = ext4_mb_regular_allocator(ac); |
4408 | if (*errp) { | 4409 | if (*errp) |
4409 | ext4_discard_allocated_blocks(ac); | 4410 | goto discard_and_exit; |
4410 | goto errout; | ||
4411 | } | ||
4412 | 4411 | ||
4413 | /* as we've just preallocated more space than | 4412 | /* as we've just preallocated more space than |
4414 | * user requested orinally, we store allocated | 4413 | * user requested originally, we store allocated |
4415 | * space in a special descriptor */ | 4414 | * space in a special descriptor */ |
4416 | if (ac->ac_status == AC_STATUS_FOUND && | 4415 | if (ac->ac_status == AC_STATUS_FOUND && |
4417 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) | 4416 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) |
4418 | ext4_mb_new_preallocation(ac); | 4417 | *errp = ext4_mb_new_preallocation(ac); |
4418 | if (*errp) { | ||
4419 | discard_and_exit: | ||
4420 | ext4_discard_allocated_blocks(ac); | ||
4421 | goto errout; | ||
4422 | } | ||
4419 | } | 4423 | } |
4420 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4424 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4421 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); | 4425 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); |
@@ -4612,10 +4616,11 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4612 | BUG_ON(bh && (count > 1)); | 4616 | BUG_ON(bh && (count > 1)); |
4613 | 4617 | ||
4614 | for (i = 0; i < count; i++) { | 4618 | for (i = 0; i < count; i++) { |
4619 | cond_resched(); | ||
4615 | if (!bh) | 4620 | if (!bh) |
4616 | tbh = sb_find_get_block(inode->i_sb, | 4621 | tbh = sb_find_get_block(inode->i_sb, |
4617 | block + i); | 4622 | block + i); |
4618 | if (unlikely(!tbh)) | 4623 | if (!tbh) |
4619 | continue; | 4624 | continue; |
4620 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4625 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4621 | inode, tbh, block + i); | 4626 | inode, tbh, block + i); |
@@ -4735,11 +4740,16 @@ do_more: | |||
4735 | * blocks being freed are metadata. these blocks shouldn't | 4740 | * blocks being freed are metadata. these blocks shouldn't |
4736 | * be used until this transaction is committed | 4741 | * be used until this transaction is committed |
4737 | */ | 4742 | */ |
4743 | retry: | ||
4738 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); | 4744 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); |
4739 | if (!new_entry) { | 4745 | if (!new_entry) { |
4740 | ext4_mb_unload_buddy(&e4b); | 4746 | /* |
4741 | err = -ENOMEM; | 4747 | * We use a retry loop because |
4742 | goto error_return; | 4748 | * ext4_free_blocks() is not allowed to fail. |
4749 | */ | ||
4750 | cond_resched(); | ||
4751 | congestion_wait(BLK_RW_ASYNC, HZ/50); | ||
4752 | goto retry; | ||
4743 | } | 4753 | } |
4744 | new_entry->efd_start_cluster = bit; | 4754 | new_entry->efd_start_cluster = bit; |
4745 | new_entry->efd_group = block_group; | 4755 | new_entry->efd_group = block_group; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3dcbf364022f..e86dddbd8296 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -912,7 +912,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
912 | struct page *pagep[2] = {NULL, NULL}; | 912 | struct page *pagep[2] = {NULL, NULL}; |
913 | handle_t *handle; | 913 | handle_t *handle; |
914 | ext4_lblk_t orig_blk_offset; | 914 | ext4_lblk_t orig_blk_offset; |
915 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | ||
916 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 915 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
917 | unsigned int w_flags = 0; | 916 | unsigned int w_flags = 0; |
918 | unsigned int tmp_data_size, data_size, replaced_size; | 917 | unsigned int tmp_data_size, data_size, replaced_size; |
@@ -940,8 +939,6 @@ again: | |||
940 | orig_blk_offset = orig_page_offset * blocks_per_page + | 939 | orig_blk_offset = orig_page_offset * blocks_per_page + |
941 | data_offset_in_page; | 940 | data_offset_in_page; |
942 | 941 | ||
943 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | ||
944 | |||
945 | /* Calculate data_size */ | 942 | /* Calculate data_size */ |
946 | if ((orig_blk_offset + block_len_in_page - 1) == | 943 | if ((orig_blk_offset + block_len_in_page - 1) == |
947 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | 944 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6653fc35ecb7..35f55a0dbc4b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -918,11 +918,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
918 | bh->b_data, bh->b_size, | 918 | bh->b_data, bh->b_size, |
919 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | 919 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) |
920 | + ((char *)de - bh->b_data))) { | 920 | + ((char *)de - bh->b_data))) { |
921 | /* On error, skip the f_pos to the next block. */ | 921 | /* silently ignore the rest of the block */ |
922 | dir_file->f_pos = (dir_file->f_pos | | 922 | break; |
923 | (dir->i_sb->s_blocksize - 1)) + 1; | ||
924 | brelse(bh); | ||
925 | return count; | ||
926 | } | 923 | } |
927 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 924 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
928 | if ((hinfo->hash < start_hash) || | 925 | if ((hinfo->hash < start_hash) || |
@@ -2299,6 +2296,45 @@ retry: | |||
2299 | return err; | 2296 | return err; |
2300 | } | 2297 | } |
2301 | 2298 | ||
2299 | static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
2300 | { | ||
2301 | handle_t *handle; | ||
2302 | struct inode *inode; | ||
2303 | int err, retries = 0; | ||
2304 | |||
2305 | dquot_initialize(dir); | ||
2306 | |||
2307 | retry: | ||
2308 | inode = ext4_new_inode_start_handle(dir, mode, | ||
2309 | NULL, 0, NULL, | ||
2310 | EXT4_HT_DIR, | ||
2311 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + | ||
2312 | 4 + EXT4_XATTR_TRANS_BLOCKS); | ||
2313 | handle = ext4_journal_current_handle(); | ||
2314 | err = PTR_ERR(inode); | ||
2315 | if (!IS_ERR(inode)) { | ||
2316 | inode->i_op = &ext4_file_inode_operations; | ||
2317 | inode->i_fop = &ext4_file_operations; | ||
2318 | ext4_set_aops(inode); | ||
2319 | d_tmpfile(dentry, inode); | ||
2320 | err = ext4_orphan_add(handle, inode); | ||
2321 | if (err) | ||
2322 | goto err_drop_inode; | ||
2323 | mark_inode_dirty(inode); | ||
2324 | unlock_new_inode(inode); | ||
2325 | } | ||
2326 | if (handle) | ||
2327 | ext4_journal_stop(handle); | ||
2328 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) | ||
2329 | goto retry; | ||
2330 | return err; | ||
2331 | err_drop_inode: | ||
2332 | ext4_journal_stop(handle); | ||
2333 | unlock_new_inode(inode); | ||
2334 | iput(inode); | ||
2335 | return err; | ||
2336 | } | ||
2337 | |||
2302 | struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, | 2338 | struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, |
2303 | struct ext4_dir_entry_2 *de, | 2339 | struct ext4_dir_entry_2 *de, |
2304 | int blocksize, int csum_size, | 2340 | int blocksize, int csum_size, |
@@ -2906,7 +2942,7 @@ static int ext4_link(struct dentry *old_dentry, | |||
2906 | retry: | 2942 | retry: |
2907 | handle = ext4_journal_start(dir, EXT4_HT_DIR, | 2943 | handle = ext4_journal_start(dir, EXT4_HT_DIR, |
2908 | (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 2944 | (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
2909 | EXT4_INDEX_EXTRA_TRANS_BLOCKS)); | 2945 | EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1); |
2910 | if (IS_ERR(handle)) | 2946 | if (IS_ERR(handle)) |
2911 | return PTR_ERR(handle); | 2947 | return PTR_ERR(handle); |
2912 | 2948 | ||
@@ -2920,6 +2956,11 @@ retry: | |||
2920 | err = ext4_add_entry(handle, dentry, inode); | 2956 | err = ext4_add_entry(handle, dentry, inode); |
2921 | if (!err) { | 2957 | if (!err) { |
2922 | ext4_mark_inode_dirty(handle, inode); | 2958 | ext4_mark_inode_dirty(handle, inode); |
2959 | /* this can happen only for tmpfile being | ||
2960 | * linked the first time | ||
2961 | */ | ||
2962 | if (inode->i_nlink == 1) | ||
2963 | ext4_orphan_del(handle, inode); | ||
2923 | d_instantiate(dentry, inode); | 2964 | d_instantiate(dentry, inode); |
2924 | } else { | 2965 | } else { |
2925 | drop_nlink(inode); | 2966 | drop_nlink(inode); |
@@ -3172,6 +3213,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
3172 | .mkdir = ext4_mkdir, | 3213 | .mkdir = ext4_mkdir, |
3173 | .rmdir = ext4_rmdir, | 3214 | .rmdir = ext4_rmdir, |
3174 | .mknod = ext4_mknod, | 3215 | .mknod = ext4_mknod, |
3216 | .tmpfile = ext4_tmpfile, | ||
3175 | .rename = ext4_rename, | 3217 | .rename = ext4_rename, |
3176 | .setattr = ext4_setattr, | 3218 | .setattr = ext4_setattr, |
3177 | .setxattr = generic_setxattr, | 3219 | .setxattr = generic_setxattr, |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4acf1f78881b..6625d210fb45 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
28 | #include <linux/ratelimit.h> | ||
28 | 29 | ||
29 | #include "ext4_jbd2.h" | 30 | #include "ext4_jbd2.h" |
30 | #include "xattr.h" | 31 | #include "xattr.h" |
@@ -46,46 +47,121 @@ void ext4_exit_pageio(void) | |||
46 | } | 47 | } |
47 | 48 | ||
48 | /* | 49 | /* |
49 | * This function is called by ext4_evict_inode() to make sure there is | 50 | * Print an buffer I/O error compatible with the fs/buffer.c. This |
50 | * no more pending I/O completion work left to do. | 51 | * provides compatibility with dmesg scrapers that look for a specific |
52 | * buffer I/O error message. We really need a unified error reporting | ||
53 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
54 | * probably not going to happen in my lifetime, due to LKML politics... | ||
51 | */ | 55 | */ |
52 | void ext4_ioend_shutdown(struct inode *inode) | 56 | static void buffer_io_error(struct buffer_head *bh) |
57 | { | ||
58 | char b[BDEVNAME_SIZE]; | ||
59 | printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | ||
60 | bdevname(bh->b_bdev, b), | ||
61 | (unsigned long long)bh->b_blocknr); | ||
62 | } | ||
63 | |||
64 | static void ext4_finish_bio(struct bio *bio) | ||
53 | { | 65 | { |
54 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | 66 | int i; |
67 | int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
55 | 68 | ||
56 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | 69 | for (i = 0; i < bio->bi_vcnt; i++) { |
57 | /* | 70 | struct bio_vec *bvec = &bio->bi_io_vec[i]; |
58 | * We need to make sure the work structure is finished being | 71 | struct page *page = bvec->bv_page; |
59 | * used before we let the inode get destroyed. | 72 | struct buffer_head *bh, *head; |
60 | */ | 73 | unsigned bio_start = bvec->bv_offset; |
61 | if (work_pending(&EXT4_I(inode)->i_unwritten_work)) | 74 | unsigned bio_end = bio_start + bvec->bv_len; |
62 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | 75 | unsigned under_io = 0; |
76 | unsigned long flags; | ||
77 | |||
78 | if (!page) | ||
79 | continue; | ||
80 | |||
81 | if (error) { | ||
82 | SetPageError(page); | ||
83 | set_bit(AS_EIO, &page->mapping->flags); | ||
84 | } | ||
85 | bh = head = page_buffers(page); | ||
86 | /* | ||
87 | * We check all buffers in the page under BH_Uptodate_Lock | ||
88 | * to avoid races with other end io clearing async_write flags | ||
89 | */ | ||
90 | local_irq_save(flags); | ||
91 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | ||
92 | do { | ||
93 | if (bh_offset(bh) < bio_start || | ||
94 | bh_offset(bh) + bh->b_size > bio_end) { | ||
95 | if (buffer_async_write(bh)) | ||
96 | under_io++; | ||
97 | continue; | ||
98 | } | ||
99 | clear_buffer_async_write(bh); | ||
100 | if (error) | ||
101 | buffer_io_error(bh); | ||
102 | } while ((bh = bh->b_this_page) != head); | ||
103 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
104 | local_irq_restore(flags); | ||
105 | if (!under_io) | ||
106 | end_page_writeback(page); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | static void ext4_release_io_end(ext4_io_end_t *io_end) | ||
111 | { | ||
112 | struct bio *bio, *next_bio; | ||
113 | |||
114 | BUG_ON(!list_empty(&io_end->list)); | ||
115 | BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
116 | WARN_ON(io_end->handle); | ||
117 | |||
118 | if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) | ||
119 | wake_up_all(ext4_ioend_wq(io_end->inode)); | ||
120 | |||
121 | for (bio = io_end->bio; bio; bio = next_bio) { | ||
122 | next_bio = bio->bi_private; | ||
123 | ext4_finish_bio(bio); | ||
124 | bio_put(bio); | ||
125 | } | ||
126 | if (io_end->flag & EXT4_IO_END_DIRECT) | ||
127 | inode_dio_done(io_end->inode); | ||
128 | if (io_end->iocb) | ||
129 | aio_complete(io_end->iocb, io_end->result, 0); | ||
130 | kmem_cache_free(io_end_cachep, io_end); | ||
63 | } | 131 | } |
64 | 132 | ||
65 | void ext4_free_io_end(ext4_io_end_t *io) | 133 | static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) |
66 | { | 134 | { |
67 | BUG_ON(!io); | 135 | struct inode *inode = io_end->inode; |
68 | BUG_ON(!list_empty(&io->list)); | ||
69 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | ||
70 | 136 | ||
71 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) | 137 | io_end->flag &= ~EXT4_IO_END_UNWRITTEN; |
72 | wake_up_all(ext4_ioend_wq(io->inode)); | 138 | /* Wake up anyone waiting on unwritten extent conversion */ |
73 | kmem_cache_free(io_end_cachep, io); | 139 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) |
140 | wake_up_all(ext4_ioend_wq(inode)); | ||
74 | } | 141 | } |
75 | 142 | ||
76 | /* check a range of space and convert unwritten extents to written. */ | 143 | /* |
144 | * Check a range of space and convert unwritten extents to written. Note that | ||
145 | * we are protected from truncate touching same part of extent tree by the | ||
146 | * fact that truncate code waits for all DIO to finish (thus exclusion from | ||
147 | * direct IO is achieved) and also waits for PageWriteback bits. Thus we | ||
148 | * cannot get to ext4_ext_truncate() before all IOs overlapping that range are | ||
149 | * completed (happens from ext4_free_ioend()). | ||
150 | */ | ||
77 | static int ext4_end_io(ext4_io_end_t *io) | 151 | static int ext4_end_io(ext4_io_end_t *io) |
78 | { | 152 | { |
79 | struct inode *inode = io->inode; | 153 | struct inode *inode = io->inode; |
80 | loff_t offset = io->offset; | 154 | loff_t offset = io->offset; |
81 | ssize_t size = io->size; | 155 | ssize_t size = io->size; |
156 | handle_t *handle = io->handle; | ||
82 | int ret = 0; | 157 | int ret = 0; |
83 | 158 | ||
84 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 159 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
85 | "list->prev 0x%p\n", | 160 | "list->prev 0x%p\n", |
86 | io, inode->i_ino, io->list.next, io->list.prev); | 161 | io, inode->i_ino, io->list.next, io->list.prev); |
87 | 162 | ||
88 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 163 | io->handle = NULL; /* Following call will use up the handle */ |
164 | ret = ext4_convert_unwritten_extents(handle, inode, offset, size); | ||
89 | if (ret < 0) { | 165 | if (ret < 0) { |
90 | ext4_msg(inode->i_sb, KERN_EMERG, | 166 | ext4_msg(inode->i_sb, KERN_EMERG, |
91 | "failed to convert unwritten extents to written " | 167 | "failed to convert unwritten extents to written " |
@@ -93,30 +169,22 @@ static int ext4_end_io(ext4_io_end_t *io) | |||
93 | "(inode %lu, offset %llu, size %zd, error %d)", | 169 | "(inode %lu, offset %llu, size %zd, error %d)", |
94 | inode->i_ino, offset, size, ret); | 170 | inode->i_ino, offset, size, ret); |
95 | } | 171 | } |
96 | /* Wake up anyone waiting on unwritten extent conversion */ | 172 | ext4_clear_io_unwritten_flag(io); |
97 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) | 173 | ext4_release_io_end(io); |
98 | wake_up_all(ext4_ioend_wq(inode)); | ||
99 | if (io->flag & EXT4_IO_END_DIRECT) | ||
100 | inode_dio_done(inode); | ||
101 | if (io->iocb) | ||
102 | aio_complete(io->iocb, io->result, 0); | ||
103 | return ret; | 174 | return ret; |
104 | } | 175 | } |
105 | 176 | ||
106 | static void dump_completed_IO(struct inode *inode) | 177 | static void dump_completed_IO(struct inode *inode, struct list_head *head) |
107 | { | 178 | { |
108 | #ifdef EXT4FS_DEBUG | 179 | #ifdef EXT4FS_DEBUG |
109 | struct list_head *cur, *before, *after; | 180 | struct list_head *cur, *before, *after; |
110 | ext4_io_end_t *io, *io0, *io1; | 181 | ext4_io_end_t *io, *io0, *io1; |
111 | 182 | ||
112 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { | 183 | if (list_empty(head)) |
113 | ext4_debug("inode %lu completed_io list is empty\n", | ||
114 | inode->i_ino); | ||
115 | return; | 184 | return; |
116 | } | ||
117 | 185 | ||
118 | ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino); | 186 | ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); |
119 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) { | 187 | list_for_each_entry(io, head, list) { |
120 | cur = &io->list; | 188 | cur = &io->list; |
121 | before = cur->prev; | 189 | before = cur->prev; |
122 | io0 = container_of(before, ext4_io_end_t, list); | 190 | io0 = container_of(before, ext4_io_end_t, list); |
@@ -130,23 +198,30 @@ static void dump_completed_IO(struct inode *inode) | |||
130 | } | 198 | } |
131 | 199 | ||
132 | /* Add the io_end to per-inode completed end_io list. */ | 200 | /* Add the io_end to per-inode completed end_io list. */ |
133 | void ext4_add_complete_io(ext4_io_end_t *io_end) | 201 | static void ext4_add_complete_io(ext4_io_end_t *io_end) |
134 | { | 202 | { |
135 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); | 203 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); |
136 | struct workqueue_struct *wq; | 204 | struct workqueue_struct *wq; |
137 | unsigned long flags; | 205 | unsigned long flags; |
138 | 206 | ||
139 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); | 207 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); |
140 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | ||
141 | |||
142 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 208 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
143 | if (list_empty(&ei->i_completed_io_list)) | 209 | if (io_end->handle) { |
144 | queue_work(wq, &ei->i_unwritten_work); | 210 | wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; |
145 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 211 | if (list_empty(&ei->i_rsv_conversion_list)) |
212 | queue_work(wq, &ei->i_rsv_conversion_work); | ||
213 | list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); | ||
214 | } else { | ||
215 | wq = EXT4_SB(io_end->inode->i_sb)->unrsv_conversion_wq; | ||
216 | if (list_empty(&ei->i_unrsv_conversion_list)) | ||
217 | queue_work(wq, &ei->i_unrsv_conversion_work); | ||
218 | list_add_tail(&io_end->list, &ei->i_unrsv_conversion_list); | ||
219 | } | ||
146 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 220 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
147 | } | 221 | } |
148 | 222 | ||
149 | static int ext4_do_flush_completed_IO(struct inode *inode) | 223 | static int ext4_do_flush_completed_IO(struct inode *inode, |
224 | struct list_head *head) | ||
150 | { | 225 | { |
151 | ext4_io_end_t *io; | 226 | ext4_io_end_t *io; |
152 | struct list_head unwritten; | 227 | struct list_head unwritten; |
@@ -155,8 +230,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
155 | int err, ret = 0; | 230 | int err, ret = 0; |
156 | 231 | ||
157 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 232 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
158 | dump_completed_IO(inode); | 233 | dump_completed_IO(inode, head); |
159 | list_replace_init(&ei->i_completed_io_list, &unwritten); | 234 | list_replace_init(head, &unwritten); |
160 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 235 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
161 | 236 | ||
162 | while (!list_empty(&unwritten)) { | 237 | while (!list_empty(&unwritten)) { |
@@ -167,30 +242,25 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
167 | err = ext4_end_io(io); | 242 | err = ext4_end_io(io); |
168 | if (unlikely(!ret && err)) | 243 | if (unlikely(!ret && err)) |
169 | ret = err; | 244 | ret = err; |
170 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
171 | ext4_free_io_end(io); | ||
172 | } | 245 | } |
173 | return ret; | 246 | return ret; |
174 | } | 247 | } |
175 | 248 | ||
176 | /* | 249 | /* |
177 | * work on completed aio dio IO, to convert unwritten extents to extents | 250 | * work on completed IO, to convert unwritten extents to extents |
178 | */ | 251 | */ |
179 | void ext4_end_io_work(struct work_struct *work) | 252 | void ext4_end_io_rsv_work(struct work_struct *work) |
180 | { | 253 | { |
181 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, | 254 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, |
182 | i_unwritten_work); | 255 | i_rsv_conversion_work); |
183 | ext4_do_flush_completed_IO(&ei->vfs_inode); | 256 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); |
184 | } | 257 | } |
185 | 258 | ||
186 | int ext4_flush_unwritten_io(struct inode *inode) | 259 | void ext4_end_io_unrsv_work(struct work_struct *work) |
187 | { | 260 | { |
188 | int ret; | 261 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, |
189 | WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && | 262 | i_unrsv_conversion_work); |
190 | !(inode->i_state & I_FREEING)); | 263 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_unrsv_conversion_list); |
191 | ret = ext4_do_flush_completed_IO(inode); | ||
192 | ext4_unwritten_wait(inode); | ||
193 | return ret; | ||
194 | } | 264 | } |
195 | 265 | ||
196 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | 266 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) |
@@ -200,83 +270,59 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | |||
200 | atomic_inc(&EXT4_I(inode)->i_ioend_count); | 270 | atomic_inc(&EXT4_I(inode)->i_ioend_count); |
201 | io->inode = inode; | 271 | io->inode = inode; |
202 | INIT_LIST_HEAD(&io->list); | 272 | INIT_LIST_HEAD(&io->list); |
273 | atomic_set(&io->count, 1); | ||
203 | } | 274 | } |
204 | return io; | 275 | return io; |
205 | } | 276 | } |
206 | 277 | ||
207 | /* | 278 | void ext4_put_io_end_defer(ext4_io_end_t *io_end) |
208 | * Print an buffer I/O error compatible with the fs/buffer.c. This | ||
209 | * provides compatibility with dmesg scrapers that look for a specific | ||
210 | * buffer I/O error message. We really need a unified error reporting | ||
211 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
212 | * probably not going to happen in my lifetime, due to LKML politics... | ||
213 | */ | ||
214 | static void buffer_io_error(struct buffer_head *bh) | ||
215 | { | 279 | { |
216 | char b[BDEVNAME_SIZE]; | 280 | if (atomic_dec_and_test(&io_end->count)) { |
217 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | 281 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { |
218 | bdevname(bh->b_bdev, b), | 282 | ext4_release_io_end(io_end); |
219 | (unsigned long long)bh->b_blocknr); | 283 | return; |
284 | } | ||
285 | ext4_add_complete_io(io_end); | ||
286 | } | ||
220 | } | 287 | } |
221 | 288 | ||
289 | int ext4_put_io_end(ext4_io_end_t *io_end) | ||
290 | { | ||
291 | int err = 0; | ||
292 | |||
293 | if (atomic_dec_and_test(&io_end->count)) { | ||
294 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { | ||
295 | err = ext4_convert_unwritten_extents(io_end->handle, | ||
296 | io_end->inode, io_end->offset, | ||
297 | io_end->size); | ||
298 | io_end->handle = NULL; | ||
299 | ext4_clear_io_unwritten_flag(io_end); | ||
300 | } | ||
301 | ext4_release_io_end(io_end); | ||
302 | } | ||
303 | return err; | ||
304 | } | ||
305 | |||
306 | ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) | ||
307 | { | ||
308 | atomic_inc(&io_end->count); | ||
309 | return io_end; | ||
310 | } | ||
311 | |||
312 | /* BIO completion function for page writeback */ | ||
222 | static void ext4_end_bio(struct bio *bio, int error) | 313 | static void ext4_end_bio(struct bio *bio, int error) |
223 | { | 314 | { |
224 | ext4_io_end_t *io_end = bio->bi_private; | 315 | ext4_io_end_t *io_end = bio->bi_private; |
225 | struct inode *inode; | ||
226 | int i; | ||
227 | int blocksize; | ||
228 | sector_t bi_sector = bio->bi_sector; | 316 | sector_t bi_sector = bio->bi_sector; |
229 | 317 | ||
230 | BUG_ON(!io_end); | 318 | BUG_ON(!io_end); |
231 | inode = io_end->inode; | ||
232 | blocksize = 1 << inode->i_blkbits; | ||
233 | bio->bi_private = NULL; | ||
234 | bio->bi_end_io = NULL; | 319 | bio->bi_end_io = NULL; |
235 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 320 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
236 | error = 0; | 321 | error = 0; |
237 | for (i = 0; i < bio->bi_vcnt; i++) { | ||
238 | struct bio_vec *bvec = &bio->bi_io_vec[i]; | ||
239 | struct page *page = bvec->bv_page; | ||
240 | struct buffer_head *bh, *head; | ||
241 | unsigned bio_start = bvec->bv_offset; | ||
242 | unsigned bio_end = bio_start + bvec->bv_len; | ||
243 | unsigned under_io = 0; | ||
244 | unsigned long flags; | ||
245 | |||
246 | if (!page) | ||
247 | continue; | ||
248 | |||
249 | if (error) { | ||
250 | SetPageError(page); | ||
251 | set_bit(AS_EIO, &page->mapping->flags); | ||
252 | } | ||
253 | bh = head = page_buffers(page); | ||
254 | /* | ||
255 | * We check all buffers in the page under BH_Uptodate_Lock | ||
256 | * to avoid races with other end io clearing async_write flags | ||
257 | */ | ||
258 | local_irq_save(flags); | ||
259 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | ||
260 | do { | ||
261 | if (bh_offset(bh) < bio_start || | ||
262 | bh_offset(bh) + blocksize > bio_end) { | ||
263 | if (buffer_async_write(bh)) | ||
264 | under_io++; | ||
265 | continue; | ||
266 | } | ||
267 | clear_buffer_async_write(bh); | ||
268 | if (error) | ||
269 | buffer_io_error(bh); | ||
270 | } while ((bh = bh->b_this_page) != head); | ||
271 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
272 | local_irq_restore(flags); | ||
273 | if (!under_io) | ||
274 | end_page_writeback(page); | ||
275 | } | ||
276 | bio_put(bio); | ||
277 | 322 | ||
278 | if (error) { | 323 | if (error) { |
279 | io_end->flag |= EXT4_IO_END_ERROR; | 324 | struct inode *inode = io_end->inode; |
325 | |||
280 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | 326 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " |
281 | "(offset %llu size %ld starting block %llu)", | 327 | "(offset %llu size %ld starting block %llu)", |
282 | inode->i_ino, | 328 | inode->i_ino, |
@@ -286,12 +332,23 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
286 | bi_sector >> (inode->i_blkbits - 9)); | 332 | bi_sector >> (inode->i_blkbits - 9)); |
287 | } | 333 | } |
288 | 334 | ||
289 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 335 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { |
290 | ext4_free_io_end(io_end); | 336 | /* |
291 | return; | 337 | * Link bio into list hanging from io_end. We have to do it |
338 | * atomically as bio completions can be racing against each | ||
339 | * other. | ||
340 | */ | ||
341 | bio->bi_private = xchg(&io_end->bio, bio); | ||
342 | ext4_put_io_end_defer(io_end); | ||
343 | } else { | ||
344 | /* | ||
345 | * Drop io_end reference early. Inode can get freed once | ||
346 | * we finish the bio. | ||
347 | */ | ||
348 | ext4_put_io_end_defer(io_end); | ||
349 | ext4_finish_bio(bio); | ||
350 | bio_put(bio); | ||
292 | } | 351 | } |
293 | |||
294 | ext4_add_complete_io(io_end); | ||
295 | } | 352 | } |
296 | 353 | ||
297 | void ext4_io_submit(struct ext4_io_submit *io) | 354 | void ext4_io_submit(struct ext4_io_submit *io) |
@@ -305,43 +362,38 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
305 | bio_put(io->io_bio); | 362 | bio_put(io->io_bio); |
306 | } | 363 | } |
307 | io->io_bio = NULL; | 364 | io->io_bio = NULL; |
308 | io->io_op = 0; | 365 | } |
366 | |||
367 | void ext4_io_submit_init(struct ext4_io_submit *io, | ||
368 | struct writeback_control *wbc) | ||
369 | { | ||
370 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
371 | io->io_bio = NULL; | ||
309 | io->io_end = NULL; | 372 | io->io_end = NULL; |
310 | } | 373 | } |
311 | 374 | ||
312 | static int io_submit_init(struct ext4_io_submit *io, | 375 | static int io_submit_init_bio(struct ext4_io_submit *io, |
313 | struct inode *inode, | 376 | struct buffer_head *bh) |
314 | struct writeback_control *wbc, | ||
315 | struct buffer_head *bh) | ||
316 | { | 377 | { |
317 | ext4_io_end_t *io_end; | ||
318 | struct page *page = bh->b_page; | ||
319 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 378 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
320 | struct bio *bio; | 379 | struct bio *bio; |
321 | 380 | ||
322 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
323 | if (!io_end) | ||
324 | return -ENOMEM; | ||
325 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); | 381 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); |
382 | if (!bio) | ||
383 | return -ENOMEM; | ||
326 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 384 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
327 | bio->bi_bdev = bh->b_bdev; | 385 | bio->bi_bdev = bh->b_bdev; |
328 | bio->bi_private = io->io_end = io_end; | ||
329 | bio->bi_end_io = ext4_end_bio; | 386 | bio->bi_end_io = ext4_end_bio; |
330 | 387 | bio->bi_private = ext4_get_io_end(io->io_end); | |
331 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | ||
332 | |||
333 | io->io_bio = bio; | 388 | io->io_bio = bio; |
334 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
335 | io->io_next_block = bh->b_blocknr; | 389 | io->io_next_block = bh->b_blocknr; |
336 | return 0; | 390 | return 0; |
337 | } | 391 | } |
338 | 392 | ||
339 | static int io_submit_add_bh(struct ext4_io_submit *io, | 393 | static int io_submit_add_bh(struct ext4_io_submit *io, |
340 | struct inode *inode, | 394 | struct inode *inode, |
341 | struct writeback_control *wbc, | ||
342 | struct buffer_head *bh) | 395 | struct buffer_head *bh) |
343 | { | 396 | { |
344 | ext4_io_end_t *io_end; | ||
345 | int ret; | 397 | int ret; |
346 | 398 | ||
347 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | 399 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { |
@@ -349,18 +401,14 @@ submit_and_retry: | |||
349 | ext4_io_submit(io); | 401 | ext4_io_submit(io); |
350 | } | 402 | } |
351 | if (io->io_bio == NULL) { | 403 | if (io->io_bio == NULL) { |
352 | ret = io_submit_init(io, inode, wbc, bh); | 404 | ret = io_submit_init_bio(io, bh); |
353 | if (ret) | 405 | if (ret) |
354 | return ret; | 406 | return ret; |
355 | } | 407 | } |
356 | io_end = io->io_end; | ||
357 | if (test_clear_buffer_uninit(bh)) | ||
358 | ext4_set_io_unwritten_flag(inode, io_end); | ||
359 | io->io_end->size += bh->b_size; | ||
360 | io->io_next_block++; | ||
361 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 408 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
362 | if (ret != bh->b_size) | 409 | if (ret != bh->b_size) |
363 | goto submit_and_retry; | 410 | goto submit_and_retry; |
411 | io->io_next_block++; | ||
364 | return 0; | 412 | return 0; |
365 | } | 413 | } |
366 | 414 | ||
@@ -432,7 +480,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
432 | do { | 480 | do { |
433 | if (!buffer_async_write(bh)) | 481 | if (!buffer_async_write(bh)) |
434 | continue; | 482 | continue; |
435 | ret = io_submit_add_bh(io, inode, wbc, bh); | 483 | ret = io_submit_add_bh(io, inode, bh); |
436 | if (ret) { | 484 | if (ret) { |
437 | /* | 485 | /* |
438 | * We only get here on ENOMEM. Not much else | 486 | * We only get here on ENOMEM. Not much else |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b27c96d01965..c5adbb318a90 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -79,12 +79,20 @@ static int verify_group_input(struct super_block *sb, | |||
79 | ext4_fsblk_t end = start + input->blocks_count; | 79 | ext4_fsblk_t end = start + input->blocks_count; |
80 | ext4_group_t group = input->group; | 80 | ext4_group_t group = input->group; |
81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; | 81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; |
82 | unsigned overhead = ext4_group_overhead_blocks(sb, group); | 82 | unsigned overhead; |
83 | ext4_fsblk_t metaend = start + overhead; | 83 | ext4_fsblk_t metaend; |
84 | struct buffer_head *bh = NULL; | 84 | struct buffer_head *bh = NULL; |
85 | ext4_grpblk_t free_blocks_count, offset; | 85 | ext4_grpblk_t free_blocks_count, offset; |
86 | int err = -EINVAL; | 86 | int err = -EINVAL; |
87 | 87 | ||
88 | if (group != sbi->s_groups_count) { | ||
89 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", | ||
90 | input->group, sbi->s_groups_count); | ||
91 | return -EINVAL; | ||
92 | } | ||
93 | |||
94 | overhead = ext4_group_overhead_blocks(sb, group); | ||
95 | metaend = start + overhead; | ||
88 | input->free_blocks_count = free_blocks_count = | 96 | input->free_blocks_count = free_blocks_count = |
89 | input->blocks_count - 2 - overhead - sbi->s_itb_per_group; | 97 | input->blocks_count - 2 - overhead - sbi->s_itb_per_group; |
90 | 98 | ||
@@ -96,10 +104,7 @@ static int verify_group_input(struct super_block *sb, | |||
96 | free_blocks_count, input->reserved_blocks); | 104 | free_blocks_count, input->reserved_blocks); |
97 | 105 | ||
98 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); | 106 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); |
99 | if (group != sbi->s_groups_count) | 107 | if (offset != 0) |
100 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", | ||
101 | input->group, sbi->s_groups_count); | ||
102 | else if (offset != 0) | ||
103 | ext4_warning(sb, "Last group not full"); | 108 | ext4_warning(sb, "Last group not full"); |
104 | else if (input->reserved_blocks > input->blocks_count / 5) | 109 | else if (input->reserved_blocks > input->blocks_count / 5) |
105 | ext4_warning(sb, "Reserved blocks too high (%u)", | 110 | ext4_warning(sb, "Reserved blocks too high (%u)", |
@@ -1551,11 +1556,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
1551 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 1556 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? |
1552 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1557 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
1553 | struct inode *inode = NULL; | 1558 | struct inode *inode = NULL; |
1554 | int gdb_off, gdb_num; | 1559 | int gdb_off; |
1555 | int err; | 1560 | int err; |
1556 | __u16 bg_flags = 0; | 1561 | __u16 bg_flags = 0; |
1557 | 1562 | ||
1558 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | ||
1559 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); | 1563 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); |
1560 | 1564 | ||
1561 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1565 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, |
@@ -1656,12 +1660,10 @@ errout: | |||
1656 | err = err2; | 1660 | err = err2; |
1657 | 1661 | ||
1658 | if (!err) { | 1662 | if (!err) { |
1659 | ext4_fsblk_t first_block; | ||
1660 | first_block = ext4_group_first_block_no(sb, 0); | ||
1661 | if (test_opt(sb, DEBUG)) | 1663 | if (test_opt(sb, DEBUG)) |
1662 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " | 1664 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " |
1663 | "blocks\n", ext4_blocks_count(es)); | 1665 | "blocks\n", ext4_blocks_count(es)); |
1664 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, | 1666 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, |
1665 | (char *)es, sizeof(struct ext4_super_block), 0); | 1667 | (char *)es, sizeof(struct ext4_super_block), 0); |
1666 | } | 1668 | } |
1667 | return err; | 1669 | return err; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94cc84db7c9a..b59373b625e9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -69,6 +69,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
69 | static void ext4_clear_journal_err(struct super_block *sb, | 69 | static void ext4_clear_journal_err(struct super_block *sb, |
70 | struct ext4_super_block *es); | 70 | struct ext4_super_block *es); |
71 | static int ext4_sync_fs(struct super_block *sb, int wait); | 71 | static int ext4_sync_fs(struct super_block *sb, int wait); |
72 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); | ||
72 | static int ext4_remount(struct super_block *sb, int *flags, char *data); | 73 | static int ext4_remount(struct super_block *sb, int *flags, char *data); |
73 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | 74 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); |
74 | static int ext4_unfreeze(struct super_block *sb); | 75 | static int ext4_unfreeze(struct super_block *sb); |
@@ -398,6 +399,11 @@ static void ext4_handle_error(struct super_block *sb) | |||
398 | } | 399 | } |
399 | if (test_opt(sb, ERRORS_RO)) { | 400 | if (test_opt(sb, ERRORS_RO)) { |
400 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 401 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
402 | /* | ||
403 | * Make sure updated value of ->s_mount_flags will be visible | ||
404 | * before ->s_flags update | ||
405 | */ | ||
406 | smp_wmb(); | ||
401 | sb->s_flags |= MS_RDONLY; | 407 | sb->s_flags |= MS_RDONLY; |
402 | } | 408 | } |
403 | if (test_opt(sb, ERRORS_PANIC)) | 409 | if (test_opt(sb, ERRORS_PANIC)) |
@@ -422,9 +428,9 @@ void __ext4_error(struct super_block *sb, const char *function, | |||
422 | ext4_handle_error(sb); | 428 | ext4_handle_error(sb); |
423 | } | 429 | } |
424 | 430 | ||
425 | void ext4_error_inode(struct inode *inode, const char *function, | 431 | void __ext4_error_inode(struct inode *inode, const char *function, |
426 | unsigned int line, ext4_fsblk_t block, | 432 | unsigned int line, ext4_fsblk_t block, |
427 | const char *fmt, ...) | 433 | const char *fmt, ...) |
428 | { | 434 | { |
429 | va_list args; | 435 | va_list args; |
430 | struct va_format vaf; | 436 | struct va_format vaf; |
@@ -451,9 +457,9 @@ void ext4_error_inode(struct inode *inode, const char *function, | |||
451 | ext4_handle_error(inode->i_sb); | 457 | ext4_handle_error(inode->i_sb); |
452 | } | 458 | } |
453 | 459 | ||
454 | void ext4_error_file(struct file *file, const char *function, | 460 | void __ext4_error_file(struct file *file, const char *function, |
455 | unsigned int line, ext4_fsblk_t block, | 461 | unsigned int line, ext4_fsblk_t block, |
456 | const char *fmt, ...) | 462 | const char *fmt, ...) |
457 | { | 463 | { |
458 | va_list args; | 464 | va_list args; |
459 | struct va_format vaf; | 465 | struct va_format vaf; |
@@ -570,8 +576,13 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
570 | 576 | ||
571 | if ((sb->s_flags & MS_RDONLY) == 0) { | 577 | if ((sb->s_flags & MS_RDONLY) == 0) { |
572 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 578 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
573 | sb->s_flags |= MS_RDONLY; | ||
574 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | 579 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; |
580 | /* | ||
581 | * Make sure updated value of ->s_mount_flags will be visible | ||
582 | * before ->s_flags update | ||
583 | */ | ||
584 | smp_wmb(); | ||
585 | sb->s_flags |= MS_RDONLY; | ||
575 | if (EXT4_SB(sb)->s_journal) | 586 | if (EXT4_SB(sb)->s_journal) |
576 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | 587 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); |
577 | save_error_info(sb, function, line); | 588 | save_error_info(sb, function, line); |
@@ -580,7 +591,8 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
580 | panic("EXT4-fs panic from previous error\n"); | 591 | panic("EXT4-fs panic from previous error\n"); |
581 | } | 592 | } |
582 | 593 | ||
583 | void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) | 594 | void __ext4_msg(struct super_block *sb, |
595 | const char *prefix, const char *fmt, ...) | ||
584 | { | 596 | { |
585 | struct va_format vaf; | 597 | struct va_format vaf; |
586 | va_list args; | 598 | va_list args; |
@@ -750,8 +762,10 @@ static void ext4_put_super(struct super_block *sb) | |||
750 | ext4_unregister_li_request(sb); | 762 | ext4_unregister_li_request(sb); |
751 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 763 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
752 | 764 | ||
753 | flush_workqueue(sbi->dio_unwritten_wq); | 765 | flush_workqueue(sbi->unrsv_conversion_wq); |
754 | destroy_workqueue(sbi->dio_unwritten_wq); | 766 | flush_workqueue(sbi->rsv_conversion_wq); |
767 | destroy_workqueue(sbi->unrsv_conversion_wq); | ||
768 | destroy_workqueue(sbi->rsv_conversion_wq); | ||
755 | 769 | ||
756 | if (sbi->s_journal) { | 770 | if (sbi->s_journal) { |
757 | err = jbd2_journal_destroy(sbi->s_journal); | 771 | err = jbd2_journal_destroy(sbi->s_journal); |
@@ -760,7 +774,7 @@ static void ext4_put_super(struct super_block *sb) | |||
760 | ext4_abort(sb, "Couldn't clean up the journal"); | 774 | ext4_abort(sb, "Couldn't clean up the journal"); |
761 | } | 775 | } |
762 | 776 | ||
763 | ext4_es_unregister_shrinker(sb); | 777 | ext4_es_unregister_shrinker(sbi); |
764 | del_timer(&sbi->s_err_report); | 778 | del_timer(&sbi->s_err_report); |
765 | ext4_release_system_zone(sb); | 779 | ext4_release_system_zone(sb); |
766 | ext4_mb_release(sb); | 780 | ext4_mb_release(sb); |
@@ -849,6 +863,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
849 | rwlock_init(&ei->i_es_lock); | 863 | rwlock_init(&ei->i_es_lock); |
850 | INIT_LIST_HEAD(&ei->i_es_lru); | 864 | INIT_LIST_HEAD(&ei->i_es_lru); |
851 | ei->i_es_lru_nr = 0; | 865 | ei->i_es_lru_nr = 0; |
866 | ei->i_touch_when = 0; | ||
852 | ei->i_reserved_data_blocks = 0; | 867 | ei->i_reserved_data_blocks = 0; |
853 | ei->i_reserved_meta_blocks = 0; | 868 | ei->i_reserved_meta_blocks = 0; |
854 | ei->i_allocated_meta_blocks = 0; | 869 | ei->i_allocated_meta_blocks = 0; |
@@ -859,13 +874,15 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
859 | ei->i_reserved_quota = 0; | 874 | ei->i_reserved_quota = 0; |
860 | #endif | 875 | #endif |
861 | ei->jinode = NULL; | 876 | ei->jinode = NULL; |
862 | INIT_LIST_HEAD(&ei->i_completed_io_list); | 877 | INIT_LIST_HEAD(&ei->i_rsv_conversion_list); |
878 | INIT_LIST_HEAD(&ei->i_unrsv_conversion_list); | ||
863 | spin_lock_init(&ei->i_completed_io_lock); | 879 | spin_lock_init(&ei->i_completed_io_lock); |
864 | ei->i_sync_tid = 0; | 880 | ei->i_sync_tid = 0; |
865 | ei->i_datasync_tid = 0; | 881 | ei->i_datasync_tid = 0; |
866 | atomic_set(&ei->i_ioend_count, 0); | 882 | atomic_set(&ei->i_ioend_count, 0); |
867 | atomic_set(&ei->i_unwritten, 0); | 883 | atomic_set(&ei->i_unwritten, 0); |
868 | INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work); | 884 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); |
885 | INIT_WORK(&ei->i_unrsv_conversion_work, ext4_end_io_unrsv_work); | ||
869 | 886 | ||
870 | return &ei->vfs_inode; | 887 | return &ei->vfs_inode; |
871 | } | 888 | } |
@@ -1093,6 +1110,7 @@ static const struct super_operations ext4_nojournal_sops = { | |||
1093 | .dirty_inode = ext4_dirty_inode, | 1110 | .dirty_inode = ext4_dirty_inode, |
1094 | .drop_inode = ext4_drop_inode, | 1111 | .drop_inode = ext4_drop_inode, |
1095 | .evict_inode = ext4_evict_inode, | 1112 | .evict_inode = ext4_evict_inode, |
1113 | .sync_fs = ext4_sync_fs_nojournal, | ||
1096 | .put_super = ext4_put_super, | 1114 | .put_super = ext4_put_super, |
1097 | .statfs = ext4_statfs, | 1115 | .statfs = ext4_statfs, |
1098 | .remount_fs = ext4_remount, | 1116 | .remount_fs = ext4_remount, |
@@ -1341,7 +1359,7 @@ static const struct mount_opts { | |||
1341 | {Opt_delalloc, EXT4_MOUNT_DELALLOC, | 1359 | {Opt_delalloc, EXT4_MOUNT_DELALLOC, |
1342 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, | 1360 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, |
1343 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, | 1361 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, |
1344 | MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT}, | 1362 | MOPT_EXT4_ONLY | MOPT_CLEAR}, |
1345 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, | 1363 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, |
1346 | MOPT_EXT4_ONLY | MOPT_SET}, | 1364 | MOPT_EXT4_ONLY | MOPT_SET}, |
1347 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | | 1365 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | |
@@ -1684,12 +1702,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
1684 | 1702 | ||
1685 | if (sbi->s_qf_names[GRPQUOTA]) | 1703 | if (sbi->s_qf_names[GRPQUOTA]) |
1686 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | 1704 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); |
1687 | |||
1688 | if (test_opt(sb, USRQUOTA)) | ||
1689 | seq_puts(seq, ",usrquota"); | ||
1690 | |||
1691 | if (test_opt(sb, GRPQUOTA)) | ||
1692 | seq_puts(seq, ",grpquota"); | ||
1693 | #endif | 1705 | #endif |
1694 | } | 1706 | } |
1695 | 1707 | ||
@@ -1908,7 +1920,6 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1908 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1920 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1909 | struct ext4_group_desc *gdp = NULL; | 1921 | struct ext4_group_desc *gdp = NULL; |
1910 | ext4_group_t flex_group; | 1922 | ext4_group_t flex_group; |
1911 | unsigned int groups_per_flex = 0; | ||
1912 | int i, err; | 1923 | int i, err; |
1913 | 1924 | ||
1914 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1925 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
@@ -1916,7 +1927,6 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1916 | sbi->s_log_groups_per_flex = 0; | 1927 | sbi->s_log_groups_per_flex = 0; |
1917 | return 1; | 1928 | return 1; |
1918 | } | 1929 | } |
1919 | groups_per_flex = 1U << sbi->s_log_groups_per_flex; | ||
1920 | 1930 | ||
1921 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); | 1931 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); |
1922 | if (err) | 1932 | if (err) |
@@ -2164,19 +2174,22 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2164 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 2174 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
2165 | dquot_initialize(inode); | 2175 | dquot_initialize(inode); |
2166 | if (inode->i_nlink) { | 2176 | if (inode->i_nlink) { |
2167 | ext4_msg(sb, KERN_DEBUG, | 2177 | if (test_opt(sb, DEBUG)) |
2168 | "%s: truncating inode %lu to %lld bytes", | 2178 | ext4_msg(sb, KERN_DEBUG, |
2169 | __func__, inode->i_ino, inode->i_size); | 2179 | "%s: truncating inode %lu to %lld bytes", |
2180 | __func__, inode->i_ino, inode->i_size); | ||
2170 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", | 2181 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
2171 | inode->i_ino, inode->i_size); | 2182 | inode->i_ino, inode->i_size); |
2172 | mutex_lock(&inode->i_mutex); | 2183 | mutex_lock(&inode->i_mutex); |
2184 | truncate_inode_pages(inode->i_mapping, inode->i_size); | ||
2173 | ext4_truncate(inode); | 2185 | ext4_truncate(inode); |
2174 | mutex_unlock(&inode->i_mutex); | 2186 | mutex_unlock(&inode->i_mutex); |
2175 | nr_truncates++; | 2187 | nr_truncates++; |
2176 | } else { | 2188 | } else { |
2177 | ext4_msg(sb, KERN_DEBUG, | 2189 | if (test_opt(sb, DEBUG)) |
2178 | "%s: deleting unreferenced inode %lu", | 2190 | ext4_msg(sb, KERN_DEBUG, |
2179 | __func__, inode->i_ino); | 2191 | "%s: deleting unreferenced inode %lu", |
2192 | __func__, inode->i_ino); | ||
2180 | jbd_debug(2, "deleting unreferenced inode %lu\n", | 2193 | jbd_debug(2, "deleting unreferenced inode %lu\n", |
2181 | inode->i_ino); | 2194 | inode->i_ino); |
2182 | nr_orphans++; | 2195 | nr_orphans++; |
@@ -2377,7 +2390,10 @@ struct ext4_attr { | |||
2377 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); | 2390 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); |
2378 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, | 2391 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, |
2379 | const char *, size_t); | 2392 | const char *, size_t); |
2380 | int offset; | 2393 | union { |
2394 | int offset; | ||
2395 | int deprecated_val; | ||
2396 | } u; | ||
2381 | }; | 2397 | }; |
2382 | 2398 | ||
2383 | static int parse_strtoull(const char *buf, | 2399 | static int parse_strtoull(const char *buf, |
@@ -2446,7 +2462,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2446 | static ssize_t sbi_ui_show(struct ext4_attr *a, | 2462 | static ssize_t sbi_ui_show(struct ext4_attr *a, |
2447 | struct ext4_sb_info *sbi, char *buf) | 2463 | struct ext4_sb_info *sbi, char *buf) |
2448 | { | 2464 | { |
2449 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2465 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
2450 | 2466 | ||
2451 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | 2467 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); |
2452 | } | 2468 | } |
@@ -2455,7 +2471,7 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, | |||
2455 | struct ext4_sb_info *sbi, | 2471 | struct ext4_sb_info *sbi, |
2456 | const char *buf, size_t count) | 2472 | const char *buf, size_t count) |
2457 | { | 2473 | { |
2458 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2474 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
2459 | unsigned long t; | 2475 | unsigned long t; |
2460 | int ret; | 2476 | int ret; |
2461 | 2477 | ||
@@ -2504,12 +2520,20 @@ static ssize_t trigger_test_error(struct ext4_attr *a, | |||
2504 | return count; | 2520 | return count; |
2505 | } | 2521 | } |
2506 | 2522 | ||
2523 | static ssize_t sbi_deprecated_show(struct ext4_attr *a, | ||
2524 | struct ext4_sb_info *sbi, char *buf) | ||
2525 | { | ||
2526 | return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); | ||
2527 | } | ||
2528 | |||
2507 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ | 2529 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ |
2508 | static struct ext4_attr ext4_attr_##_name = { \ | 2530 | static struct ext4_attr ext4_attr_##_name = { \ |
2509 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 2531 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
2510 | .show = _show, \ | 2532 | .show = _show, \ |
2511 | .store = _store, \ | 2533 | .store = _store, \ |
2512 | .offset = offsetof(struct ext4_sb_info, _elname), \ | 2534 | .u = { \ |
2535 | .offset = offsetof(struct ext4_sb_info, _elname),\ | ||
2536 | }, \ | ||
2513 | } | 2537 | } |
2514 | #define EXT4_ATTR(name, mode, show, store) \ | 2538 | #define EXT4_ATTR(name, mode, show, store) \ |
2515 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2539 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
@@ -2520,6 +2544,14 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | |||
2520 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2544 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
2521 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) | 2545 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) |
2522 | #define ATTR_LIST(name) &ext4_attr_##name.attr | 2546 | #define ATTR_LIST(name) &ext4_attr_##name.attr |
2547 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ | ||
2548 | static struct ext4_attr ext4_attr_##_name = { \ | ||
2549 | .attr = {.name = __stringify(_name), .mode = 0444 }, \ | ||
2550 | .show = sbi_deprecated_show, \ | ||
2551 | .u = { \ | ||
2552 | .deprecated_val = _val, \ | ||
2553 | }, \ | ||
2554 | } | ||
2523 | 2555 | ||
2524 | EXT4_RO_ATTR(delayed_allocation_blocks); | 2556 | EXT4_RO_ATTR(delayed_allocation_blocks); |
2525 | EXT4_RO_ATTR(session_write_kbytes); | 2557 | EXT4_RO_ATTR(session_write_kbytes); |
@@ -2534,7 +2566,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); | |||
2534 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | 2566 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); |
2535 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2567 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2536 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2568 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2537 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | 2569 | EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); |
2538 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); | 2570 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); |
2539 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | 2571 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); |
2540 | 2572 | ||
@@ -3451,7 +3483,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3451 | } | 3483 | } |
3452 | if (test_opt(sb, DIOREAD_NOLOCK)) { | 3484 | if (test_opt(sb, DIOREAD_NOLOCK)) { |
3453 | ext4_msg(sb, KERN_ERR, "can't mount with " | 3485 | ext4_msg(sb, KERN_ERR, "can't mount with " |
3454 | "both data=journal and delalloc"); | 3486 | "both data=journal and dioread_nolock"); |
3455 | goto failed_mount; | 3487 | goto failed_mount; |
3456 | } | 3488 | } |
3457 | if (test_opt(sb, DELALLOC)) | 3489 | if (test_opt(sb, DELALLOC)) |
@@ -3586,10 +3618,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3586 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); | 3618 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); |
3587 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); | 3619 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); |
3588 | 3620 | ||
3589 | /* Do we have standard group size of blocksize * 8 blocks ? */ | ||
3590 | if (sbi->s_blocks_per_group == blocksize << 3) | ||
3591 | set_opt2(sb, STD_GROUP_SIZE); | ||
3592 | |||
3593 | for (i = 0; i < 4; i++) | 3621 | for (i = 0; i < 4; i++) |
3594 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); | 3622 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); |
3595 | sbi->s_def_hash_version = es->s_def_hash_version; | 3623 | sbi->s_def_hash_version = es->s_def_hash_version; |
@@ -3659,6 +3687,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3659 | goto failed_mount; | 3687 | goto failed_mount; |
3660 | } | 3688 | } |
3661 | 3689 | ||
3690 | /* Do we have standard group size of clustersize * 8 blocks ? */ | ||
3691 | if (sbi->s_blocks_per_group == clustersize << 3) | ||
3692 | set_opt2(sb, STD_GROUP_SIZE); | ||
3693 | |||
3662 | /* | 3694 | /* |
3663 | * Test whether we have more sectors than will fit in sector_t, | 3695 | * Test whether we have more sectors than will fit in sector_t, |
3664 | * and whether the max offset is addressable by the page cache. | 3696 | * and whether the max offset is addressable by the page cache. |
@@ -3763,7 +3795,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3763 | sbi->s_err_report.data = (unsigned long) sb; | 3795 | sbi->s_err_report.data = (unsigned long) sb; |
3764 | 3796 | ||
3765 | /* Register extent status tree shrinker */ | 3797 | /* Register extent status tree shrinker */ |
3766 | ext4_es_register_shrinker(sb); | 3798 | ext4_es_register_shrinker(sbi); |
3767 | 3799 | ||
3768 | err = percpu_counter_init(&sbi->s_freeclusters_counter, | 3800 | err = percpu_counter_init(&sbi->s_freeclusters_counter, |
3769 | ext4_count_free_clusters(sb)); | 3801 | ext4_count_free_clusters(sb)); |
@@ -3787,7 +3819,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3787 | } | 3819 | } |
3788 | 3820 | ||
3789 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3821 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3790 | sbi->s_max_writeback_mb_bump = 128; | ||
3791 | sbi->s_extent_max_zeroout_kb = 32; | 3822 | sbi->s_extent_max_zeroout_kb = 32; |
3792 | 3823 | ||
3793 | /* | 3824 | /* |
@@ -3915,12 +3946,20 @@ no_journal: | |||
3915 | * The maximum number of concurrent works can be high and | 3946 | * The maximum number of concurrent works can be high and |
3916 | * concurrency isn't really necessary. Limit it to 1. | 3947 | * concurrency isn't really necessary. Limit it to 1. |
3917 | */ | 3948 | */ |
3918 | EXT4_SB(sb)->dio_unwritten_wq = | 3949 | EXT4_SB(sb)->rsv_conversion_wq = |
3919 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | 3950 | alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); |
3920 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3951 | if (!EXT4_SB(sb)->rsv_conversion_wq) { |
3921 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3952 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); |
3922 | ret = -ENOMEM; | 3953 | ret = -ENOMEM; |
3923 | goto failed_mount_wq; | 3954 | goto failed_mount4; |
3955 | } | ||
3956 | |||
3957 | EXT4_SB(sb)->unrsv_conversion_wq = | ||
3958 | alloc_workqueue("ext4-unrsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | ||
3959 | if (!EXT4_SB(sb)->unrsv_conversion_wq) { | ||
3960 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); | ||
3961 | ret = -ENOMEM; | ||
3962 | goto failed_mount4; | ||
3924 | } | 3963 | } |
3925 | 3964 | ||
3926 | /* | 3965 | /* |
@@ -4074,14 +4113,17 @@ failed_mount4a: | |||
4074 | sb->s_root = NULL; | 4113 | sb->s_root = NULL; |
4075 | failed_mount4: | 4114 | failed_mount4: |
4076 | ext4_msg(sb, KERN_ERR, "mount failed"); | 4115 | ext4_msg(sb, KERN_ERR, "mount failed"); |
4077 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 4116 | if (EXT4_SB(sb)->rsv_conversion_wq) |
4117 | destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
4118 | if (EXT4_SB(sb)->unrsv_conversion_wq) | ||
4119 | destroy_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
4078 | failed_mount_wq: | 4120 | failed_mount_wq: |
4079 | if (sbi->s_journal) { | 4121 | if (sbi->s_journal) { |
4080 | jbd2_journal_destroy(sbi->s_journal); | 4122 | jbd2_journal_destroy(sbi->s_journal); |
4081 | sbi->s_journal = NULL; | 4123 | sbi->s_journal = NULL; |
4082 | } | 4124 | } |
4083 | failed_mount3: | 4125 | failed_mount3: |
4084 | ext4_es_unregister_shrinker(sb); | 4126 | ext4_es_unregister_shrinker(sbi); |
4085 | del_timer(&sbi->s_err_report); | 4127 | del_timer(&sbi->s_err_report); |
4086 | if (sbi->s_flex_groups) | 4128 | if (sbi->s_flex_groups) |
4087 | ext4_kvfree(sbi->s_flex_groups); | 4129 | ext4_kvfree(sbi->s_flex_groups); |
@@ -4517,19 +4559,52 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
4517 | { | 4559 | { |
4518 | int ret = 0; | 4560 | int ret = 0; |
4519 | tid_t target; | 4561 | tid_t target; |
4562 | bool needs_barrier = false; | ||
4520 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4563 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4521 | 4564 | ||
4522 | trace_ext4_sync_fs(sb, wait); | 4565 | trace_ext4_sync_fs(sb, wait); |
4523 | flush_workqueue(sbi->dio_unwritten_wq); | 4566 | flush_workqueue(sbi->rsv_conversion_wq); |
4567 | flush_workqueue(sbi->unrsv_conversion_wq); | ||
4524 | /* | 4568 | /* |
4525 | * Writeback quota in non-journalled quota case - journalled quota has | 4569 | * Writeback quota in non-journalled quota case - journalled quota has |
4526 | * no dirty dquots | 4570 | * no dirty dquots |
4527 | */ | 4571 | */ |
4528 | dquot_writeback_dquots(sb, -1); | 4572 | dquot_writeback_dquots(sb, -1); |
4573 | /* | ||
4574 | * Data writeback is possible w/o journal transaction, so barrier must | ||
4575 | * being sent at the end of the function. But we can skip it if | ||
4576 | * transaction_commit will do it for us. | ||
4577 | */ | ||
4578 | target = jbd2_get_latest_transaction(sbi->s_journal); | ||
4579 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && | ||
4580 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | ||
4581 | needs_barrier = true; | ||
4582 | |||
4529 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | 4583 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { |
4530 | if (wait) | 4584 | if (wait) |
4531 | jbd2_log_wait_commit(sbi->s_journal, target); | 4585 | ret = jbd2_log_wait_commit(sbi->s_journal, target); |
4532 | } | 4586 | } |
4587 | if (needs_barrier) { | ||
4588 | int err; | ||
4589 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
4590 | if (!ret) | ||
4591 | ret = err; | ||
4592 | } | ||
4593 | |||
4594 | return ret; | ||
4595 | } | ||
4596 | |||
4597 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) | ||
4598 | { | ||
4599 | int ret = 0; | ||
4600 | |||
4601 | trace_ext4_sync_fs(sb, wait); | ||
4602 | flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
4603 | flush_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
4604 | dquot_writeback_dquots(sb, -1); | ||
4605 | if (wait && test_opt(sb, BARRIER)) | ||
4606 | ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
4607 | |||
4533 | return ret; | 4608 | return ret; |
4534 | } | 4609 | } |
4535 | 4610 | ||
@@ -4652,6 +4727,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4652 | goto restore_opts; | 4727 | goto restore_opts; |
4653 | } | 4728 | } |
4654 | 4729 | ||
4730 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
4731 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | ||
4732 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
4733 | "both data=journal and delalloc"); | ||
4734 | err = -EINVAL; | ||
4735 | goto restore_opts; | ||
4736 | } | ||
4737 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
4738 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
4739 | "both data=journal and dioread_nolock"); | ||
4740 | err = -EINVAL; | ||
4741 | goto restore_opts; | ||
4742 | } | ||
4743 | } | ||
4744 | |||
4655 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) | 4745 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) |
4656 | ext4_abort(sb, "Abort forced by user"); | 4746 | ext4_abort(sb, "Abort forced by user"); |
4657 | 4747 | ||
@@ -5406,6 +5496,7 @@ static void __exit ext4_exit_fs(void) | |||
5406 | kset_unregister(ext4_kset); | 5496 | kset_unregister(ext4_kset); |
5407 | ext4_exit_system_zone(); | 5497 | ext4_exit_system_zone(); |
5408 | ext4_exit_pageio(); | 5498 | ext4_exit_pageio(); |
5499 | ext4_exit_es(); | ||
5409 | } | 5500 | } |
5410 | 5501 | ||
5411 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 5502 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |