aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig11
-rw-r--r--fs/ext4/acl.c74
-rw-r--r--fs/ext4/balloc.c81
-rw-r--r--fs/ext4/block_validity.c9
-rw-r--r--fs/ext4/dir.c14
-rw-r--r--fs/ext4/ext4.h148
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/ext4_jbd2.c86
-rw-r--r--fs/ext4/ext4_jbd2.h68
-rw-r--r--fs/ext4/extents.c394
-rw-r--r--fs/ext4/file.c13
-rw-r--r--fs/ext4/fsync.c68
-rw-r--r--fs/ext4/ialloc.c52
-rw-r--r--fs/ext4/inode.c965
-rw-r--r--fs/ext4/ioctl.c41
-rw-r--r--fs/ext4/mballoc.c179
-rw-r--r--fs/ext4/mballoc.h10
-rw-r--r--fs/ext4/migrate.c63
-rw-r--r--fs/ext4/move_extent.c313
-rw-r--r--fs/ext4/namei.c124
-rw-r--r--fs/ext4/resize.c104
-rw-r--r--fs/ext4/super.c537
-rw-r--r--fs/ext4/xattr.c112
-rw-r--r--fs/ext4/xattr_security.c21
-rw-r--r--fs/ext4/xattr_trusted.c20
-rw-r--r--fs/ext4/xattr_user.c25
26 files changed, 2108 insertions, 1427 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9f2d45d75b1a..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -26,6 +26,17 @@ config EXT4_FS
26 26
27 If unsure, say N. 27 If unsure, say N.
28 28
29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
32 depends on EXT3_FS=n || EXT2_FS=n
33 default y
34 help
35 Allow the ext4 file system driver code to be used for ext2 or
36 ext3 file system mounts. This allows users to reduce their
37 compiled kernel size by using one file system driver for
38 ext2, ext3, and ext4 file systems.
39
29config EXT4_FS_XATTR 40config EXT4_FS_XATTR
30 bool "Ext4 extended attributes" 41 bool "Ext4 extended attributes"
31 depends on EXT4_FS 42 depends on EXT4_FS
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b0..8a2a29d35a6f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
364 * Extended attribute handlers 364 * Extended attribute handlers
365 */ 365 */
366static size_t 366static size_t
367ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 367ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
368 const char *name, size_t name_len) 368 const char *name, size_t name_len, int type)
369{ 369{
370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
371 371
372 if (!test_opt(inode->i_sb, POSIX_ACL)) 372 if (!test_opt(dentry->d_sb, POSIX_ACL))
373 return 0; 373 return 0;
374 if (list && size <= list_len) 374 if (list && size <= list_len)
375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
377} 377}
378 378
379static size_t 379static size_t
380ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 380ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
381 const char *name, size_t name_len) 381 const char *name, size_t name_len, int type)
382{ 382{
383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
384 384
385 if (!test_opt(inode->i_sb, POSIX_ACL)) 385 if (!test_opt(dentry->d_sb, POSIX_ACL))
386 return 0; 386 return 0;
387 if (list && size <= list_len) 387 if (list && size <= list_len)
388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
390} 390}
391 391
392static int 392static int
393ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 393ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
394 size_t size, int type)
394{ 395{
395 struct posix_acl *acl; 396 struct posix_acl *acl;
396 int error; 397 int error;
397 398
398 if (!test_opt(inode->i_sb, POSIX_ACL)) 399 if (strcmp(name, "") != 0)
400 return -EINVAL;
401 if (!test_opt(dentry->d_sb, POSIX_ACL))
399 return -EOPNOTSUPP; 402 return -EOPNOTSUPP;
400 403
401 acl = ext4_get_acl(inode, type); 404 acl = ext4_get_acl(dentry->d_inode, type);
402 if (IS_ERR(acl)) 405 if (IS_ERR(acl))
403 return PTR_ERR(acl); 406 return PTR_ERR(acl);
404 if (acl == NULL) 407 if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
410} 413}
411 414
412static int 415static int
413ext4_xattr_get_acl_access(struct inode *inode, const char *name, 416ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
414 void *buffer, size_t size) 417 size_t size, int flags, int type)
415{
416 if (strcmp(name, "") != 0)
417 return -EINVAL;
418 return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
419}
420
421static int
422ext4_xattr_get_acl_default(struct inode *inode, const char *name,
423 void *buffer, size_t size)
424{
425 if (strcmp(name, "") != 0)
426 return -EINVAL;
427 return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
428}
429
430static int
431ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
432 size_t size)
433{ 418{
419 struct inode *inode = dentry->d_inode;
434 handle_t *handle; 420 handle_t *handle;
435 struct posix_acl *acl; 421 struct posix_acl *acl;
436 int error, retries = 0; 422 int error, retries = 0;
437 423
424 if (strcmp(name, "") != 0)
425 return -EINVAL;
438 if (!test_opt(inode->i_sb, POSIX_ACL)) 426 if (!test_opt(inode->i_sb, POSIX_ACL))
439 return -EOPNOTSUPP; 427 return -EOPNOTSUPP;
440 if (!is_owner_or_cap(inode)) 428 if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
466 return error; 454 return error;
467} 455}
468 456
469static int
470ext4_xattr_set_acl_access(struct inode *inode, const char *name,
471 const void *value, size_t size, int flags)
472{
473 if (strcmp(name, "") != 0)
474 return -EINVAL;
475 return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
476}
477
478static int
479ext4_xattr_set_acl_default(struct inode *inode, const char *name,
480 const void *value, size_t size, int flags)
481{
482 if (strcmp(name, "") != 0)
483 return -EINVAL;
484 return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
485}
486
487struct xattr_handler ext4_xattr_acl_access_handler = { 457struct xattr_handler ext4_xattr_acl_access_handler = {
488 .prefix = POSIX_ACL_XATTR_ACCESS, 458 .prefix = POSIX_ACL_XATTR_ACCESS,
459 .flags = ACL_TYPE_ACCESS,
489 .list = ext4_xattr_list_acl_access, 460 .list = ext4_xattr_list_acl_access,
490 .get = ext4_xattr_get_acl_access, 461 .get = ext4_xattr_get_acl,
491 .set = ext4_xattr_set_acl_access, 462 .set = ext4_xattr_set_acl,
492}; 463};
493 464
494struct xattr_handler ext4_xattr_acl_default_handler = { 465struct xattr_handler ext4_xattr_acl_default_handler = {
495 .prefix = POSIX_ACL_XATTR_DEFAULT, 466 .prefix = POSIX_ACL_XATTR_DEFAULT,
467 .flags = ACL_TYPE_DEFAULT,
496 .list = ext4_xattr_list_acl_default, 468 .list = ext4_xattr_list_acl_default,
497 .get = ext4_xattr_get_acl_default, 469 .get = ext4_xattr_get_acl,
498 .set = ext4_xattr_set_acl_default, 470 .set = ext4_xattr_set_acl,
499}; 471};
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1d0418980f8d..d2f37a5516c7 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -97,8 +97,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
97 /* If checksum is bad mark all blocks used to prevent allocation 97 /* If checksum is bad mark all blocks used to prevent allocation
98 * essentially implementing a per-group read-only flag. */ 98 * essentially implementing a per-group read-only flag. */
99 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 99 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
100 ext4_error(sb, __func__, 100 ext4_error(sb, "Checksum bad for group %u",
101 "Checksum bad for group %u", block_group); 101 block_group);
102 ext4_free_blks_set(sb, gdp, 0); 102 ext4_free_blks_set(sb, gdp, 0);
103 ext4_free_inodes_set(sb, gdp, 0); 103 ext4_free_inodes_set(sb, gdp, 0);
104 ext4_itable_unused_set(sb, gdp, 0); 104 ext4_itable_unused_set(sb, gdp, 0);
@@ -130,8 +130,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
130 * to make sure we calculate the right free blocks 130 * to make sure we calculate the right free blocks
131 */ 131 */
132 group_blocks = ext4_blocks_count(sbi->s_es) - 132 group_blocks = ext4_blocks_count(sbi->s_es) -
133 le32_to_cpu(sbi->s_es->s_first_data_block) - 133 ext4_group_first_block_no(sb, ngroups - 1);
134 (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
135 } else { 134 } else {
136 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 135 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
137 } 136 }
@@ -189,9 +188,6 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
189 * when a file system is mounted (see ext4_fill_super). 188 * when a file system is mounted (see ext4_fill_super).
190 */ 189 */
191 190
192
193#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
194
195/** 191/**
196 * ext4_get_group_desc() -- load group descriptor from disk 192 * ext4_get_group_desc() -- load group descriptor from disk
197 * @sb: super block 193 * @sb: super block
@@ -210,10 +206,8 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
210 struct ext4_sb_info *sbi = EXT4_SB(sb); 206 struct ext4_sb_info *sbi = EXT4_SB(sb);
211 207
212 if (block_group >= ngroups) { 208 if (block_group >= ngroups) {
213 ext4_error(sb, "ext4_get_group_desc", 209 ext4_error(sb, "block_group >= groups_count - block_group = %u,"
214 "block_group >= groups_count - " 210 " groups_count = %u", block_group, ngroups);
215 "block_group = %u, groups_count = %u",
216 block_group, ngroups);
217 211
218 return NULL; 212 return NULL;
219 } 213 }
@@ -221,8 +215,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
221 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 215 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
222 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 216 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
223 if (!sbi->s_group_desc[group_desc]) { 217 if (!sbi->s_group_desc[group_desc]) {
224 ext4_error(sb, "ext4_get_group_desc", 218 ext4_error(sb, "Group descriptor not loaded - "
225 "Group descriptor not loaded - "
226 "block_group = %u, group_desc = %u, desc = %u", 219 "block_group = %u, group_desc = %u, desc = %u",
227 block_group, group_desc, offset); 220 block_group, group_desc, offset);
228 return NULL; 221 return NULL;
@@ -282,9 +275,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
282 return 1; 275 return 1;
283 276
284err_out: 277err_out:
285 ext4_error(sb, __func__, 278 ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
286 "Invalid block bitmap - "
287 "block_group = %d, block = %llu",
288 block_group, bitmap_blk); 279 block_group, bitmap_blk);
289 return 0; 280 return 0;
290} 281}
@@ -311,8 +302,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
311 bitmap_blk = ext4_block_bitmap(sb, desc); 302 bitmap_blk = ext4_block_bitmap(sb, desc);
312 bh = sb_getblk(sb, bitmap_blk); 303 bh = sb_getblk(sb, bitmap_blk);
313 if (unlikely(!bh)) { 304 if (unlikely(!bh)) {
314 ext4_error(sb, __func__, 305 ext4_error(sb, "Cannot read block bitmap - "
315 "Cannot read block bitmap - "
316 "block_group = %u, block_bitmap = %llu", 306 "block_group = %u, block_bitmap = %llu",
317 block_group, bitmap_blk); 307 block_group, bitmap_blk);
318 return NULL; 308 return NULL;
@@ -354,8 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
354 set_bitmap_uptodate(bh); 344 set_bitmap_uptodate(bh);
355 if (bh_submit_read(bh) < 0) { 345 if (bh_submit_read(bh) < 0) {
356 put_bh(bh); 346 put_bh(bh);
357 ext4_error(sb, __func__, 347 ext4_error(sb, "Cannot read block bitmap - "
358 "Cannot read block bitmap - "
359 "block_group = %u, block_bitmap = %llu", 348 "block_group = %u, block_bitmap = %llu",
360 block_group, bitmap_blk); 349 block_group, bitmap_blk);
361 return NULL; 350 return NULL;
@@ -419,8 +408,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
419 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 408 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
420 in_range(block + count - 1, ext4_inode_table(sb, desc), 409 in_range(block + count - 1, ext4_inode_table(sb, desc),
421 sbi->s_itb_per_group)) { 410 sbi->s_itb_per_group)) {
422 ext4_error(sb, __func__, 411 ext4_error(sb, "Adding blocks in system zones - "
423 "Adding blocks in system zones - "
424 "Block = %llu, count = %lu", 412 "Block = %llu, count = %lu",
425 block, count); 413 block, count);
426 goto error_return; 414 goto error_return;
@@ -453,8 +441,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
453 BUFFER_TRACE(bitmap_bh, "clear bit"); 441 BUFFER_TRACE(bitmap_bh, "clear bit");
454 if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), 442 if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
455 bit + i, bitmap_bh->b_data)) { 443 bit + i, bitmap_bh->b_data)) {
456 ext4_error(sb, __func__, 444 ext4_error(sb, "bit already cleared for block %llu",
457 "bit already cleared for block %llu",
458 (ext4_fsblk_t)(block + i)); 445 (ext4_fsblk_t)(block + i));
459 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 446 BUFFER_TRACE(bitmap_bh, "bit already cleared");
460 } else { 447 } else {
@@ -499,44 +486,6 @@ error_return:
499} 486}
500 487
501/** 488/**
502 * ext4_free_blocks() -- Free given blocks and update quota
503 * @handle: handle for this transaction
504 * @inode: inode
505 * @block: start physical block to free
506 * @count: number of blocks to count
507 * @metadata: Are these metadata blocks
508 */
509void ext4_free_blocks(handle_t *handle, struct inode *inode,
510 ext4_fsblk_t block, unsigned long count,
511 int metadata)
512{
513 struct super_block *sb;
514 unsigned long dquot_freed_blocks;
515
516 /* this isn't the right place to decide whether block is metadata
517 * inode.c/extents.c knows better, but for safety ... */
518 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
519 metadata = 1;
520
521 /* We need to make sure we don't reuse
522 * block released untill the transaction commit.
523 * writeback mode have weak data consistency so
524 * don't force data as metadata when freeing block
525 * for writeback mode.
526 */
527 if (metadata == 0 && !ext4_should_writeback_data(inode))
528 metadata = 1;
529
530 sb = inode->i_sb;
531
532 ext4_mb_free_blocks(handle, inode, block, count,
533 metadata, &dquot_freed_blocks);
534 if (dquot_freed_blocks)
535 vfs_dq_free_block(inode, dquot_freed_blocks);
536 return;
537}
538
539/**
540 * ext4_has_free_blocks() 489 * ext4_has_free_blocks()
541 * @sbi: in-core super block structure. 490 * @sbi: in-core super block structure.
542 * @nblocks: number of needed blocks 491 * @nblocks: number of needed blocks
@@ -761,7 +710,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
761static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, 710static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
762 ext4_group_t group) 711 ext4_group_t group)
763{ 712{
764 return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; 713 if (!ext4_bg_has_super(sb, group))
714 return 0;
715
716 if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
717 return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
718 else
719 return EXT4_SB(sb)->s_gdb_count;
765} 720}
766 721
767/** 722/**
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 50784ef07563..538c48655084 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,9 +16,9 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
21#include <linux/slab.h>
22#include "ext4.h" 22#include "ext4.h"
23 23
24struct ext4_system_zone { 24struct ext4_system_zone {
@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb)
160 if (ext4_bg_has_super(sb, i) && 160 if (ext4_bg_has_super(sb, i) &&
161 ((i < 5) || ((i % flex_size) == 0))) 161 ((i < 5) || ((i % flex_size) == 0)))
162 add_system_zone(sbi, ext4_group_first_block_no(sb, i), 162 add_system_zone(sbi, ext4_group_first_block_no(sb, i),
163 sbi->s_gdb_count + 1); 163 ext4_bg_num_gdb(sb, i) + 1);
164 gdp = ext4_get_group_desc(sb, i, NULL); 164 gdp = ext4_get_group_desc(sb, i, NULL);
165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); 165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
166 if (ret) 166 if (ret)
@@ -206,14 +206,14 @@ void ext4_release_system_zone(struct super_block *sb)
206 entry = rb_entry(n, struct ext4_system_zone, node); 206 entry = rb_entry(n, struct ext4_system_zone, node);
207 kmem_cache_free(ext4_system_zone_cachep, entry); 207 kmem_cache_free(ext4_system_zone_cachep, entry);
208 if (!parent) 208 if (!parent)
209 EXT4_SB(sb)->system_blks.rb_node = NULL; 209 EXT4_SB(sb)->system_blks = RB_ROOT;
210 else if (parent->rb_left == n) 210 else if (parent->rb_left == n)
211 parent->rb_left = NULL; 211 parent->rb_left = NULL;
212 else if (parent->rb_right == n) 212 else if (parent->rb_right == n)
213 parent->rb_right = NULL; 213 parent->rb_right = NULL;
214 n = parent; 214 n = parent;
215 } 215 }
216 EXT4_SB(sb)->system_blks.rb_node = NULL; 216 EXT4_SB(sb)->system_blks = RB_ROOT;
217} 217}
218 218
219/* 219/*
@@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
228 struct rb_node *n = sbi->system_blks.rb_node; 228 struct rb_node *n = sbi->system_blks.rb_node;
229 229
230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || 230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
231 (start_blk + count < start_blk) ||
231 (start_blk + count > ext4_blocks_count(sbi->s_es))) 232 (start_blk + count > ext4_blocks_count(sbi->s_es)))
232 return 0; 233 return 0;
233 while (n) { 234 while (n) {
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 9dc93168e262..86cb6d86a048 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -83,10 +83,12 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
83 error_msg = "inode out of bounds"; 83 error_msg = "inode out of bounds";
84 84
85 if (error_msg != NULL) 85 if (error_msg != NULL)
86 ext4_error(dir->i_sb, function, 86 __ext4_error(dir->i_sb, function,
87 "bad entry in directory #%lu: %s - " 87 "bad entry in directory #%lu: %s - block=%llu"
88 "offset=%u, inode=%u, rec_len=%d, name_len=%d", 88 "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
89 dir->i_ino, error_msg, offset, 89 dir->i_ino, error_msg,
90 (unsigned long long) bh->b_blocknr,
91 (unsigned) (offset%bh->b_size), offset,
90 le32_to_cpu(de->inode), 92 le32_to_cpu(de->inode),
91 rlen, de->name_len); 93 rlen, de->name_len);
92 return error_msg == NULL ? 1 : 0; 94 return error_msg == NULL ? 1 : 0;
@@ -150,7 +152,7 @@ static int ext4_readdir(struct file *filp,
150 */ 152 */
151 if (!bh) { 153 if (!bh) {
152 if (!dir_has_error) { 154 if (!dir_has_error) {
153 ext4_error(sb, __func__, "directory #%lu " 155 ext4_error(sb, "directory #%lu "
154 "contains a hole at offset %Lu", 156 "contains a hole at offset %Lu",
155 inode->i_ino, 157 inode->i_ino,
156 (unsigned long long) filp->f_pos); 158 (unsigned long long) filp->f_pos);
@@ -303,7 +305,7 @@ static void free_rb_tree_fname(struct rb_root *root)
303 kfree(old); 305 kfree(old);
304 } 306 }
305 if (!parent) 307 if (!parent)
306 root->rb_node = NULL; 308 *root = RB_ROOT;
307 else if (parent->rb_left == n) 309 else if (parent->rb_left == n)
308 parent->rb_left = NULL; 310 parent->rb_left = NULL;
309 else if (parent->rb_right == n) 311 else if (parent->rb_right == n)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8825515eeddd..bf938cf7c5f0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -53,6 +53,12 @@
53#define ext4_debug(f, a...) do {} while (0) 53#define ext4_debug(f, a...) do {} while (0)
54#endif 54#endif
55 55
56#define EXT4_ERROR_INODE(inode, fmt, a...) \
57 ext4_error_inode(__func__, (inode), (fmt), ## a);
58
59#define EXT4_ERROR_FILE(file, fmt, a...) \
60 ext4_error_file(__func__, (file), (fmt), ## a);
61
56/* data type for block offset of block group */ 62/* data type for block offset of block group */
57typedef int ext4_grpblk_t; 63typedef int ext4_grpblk_t;
58 64
@@ -133,14 +139,14 @@ struct mpage_da_data {
133 int pages_written; 139 int pages_written;
134 int retval; 140 int retval;
135}; 141};
136#define DIO_AIO_UNWRITTEN 0x1 142#define EXT4_IO_UNWRITTEN 0x1
137typedef struct ext4_io_end { 143typedef struct ext4_io_end {
138 struct list_head list; /* per-file finished AIO list */ 144 struct list_head list; /* per-file finished AIO list */
139 struct inode *inode; /* file being written to */ 145 struct inode *inode; /* file being written to */
140 unsigned int flag; /* unwritten or not */ 146 unsigned int flag; /* unwritten or not */
141 int error; /* I/O error code */ 147 struct page *page; /* page struct for buffer write */
142 ext4_lblk_t offset; /* offset in the file */ 148 loff_t offset; /* offset in the file */
143 size_t size; /* size of the extent */ 149 ssize_t size; /* size of the extent */
144 struct work_struct work; /* data work queue */ 150 struct work_struct work; /* data work queue */
145} ext4_io_end_t; 151} ext4_io_end_t;
146 152
@@ -284,10 +290,12 @@ struct flex_groups {
284#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 290#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
285#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ 291#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
286#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ 292#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
293#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
294#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
287#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 295#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
288 296
289#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ 297#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
290#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ 298#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */
291 299
292/* Flags that should be inherited by new inodes from their parent. */ 300/* Flags that should be inherited by new inodes from their parent. */
293#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ 301#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
@@ -313,17 +321,6 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
313 return flags & EXT4_OTHER_FLMASK; 321 return flags & EXT4_OTHER_FLMASK;
314} 322}
315 323
316/*
317 * Inode dynamic state flags
318 */
319#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
320#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
321#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
322#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
323#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
324#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */
325#define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/
326
327/* Used to pass group descriptor data when online resize is done */ 324/* Used to pass group descriptor data when online resize is done */
328struct ext4_new_group_input { 325struct ext4_new_group_input {
329 __u32 group; /* Group number for this data */ 326 __u32 group; /* Group number for this data */
@@ -361,19 +358,23 @@ struct ext4_new_group_data {
361 so set the magic i_delalloc_reserve_flag after taking the 358 so set the magic i_delalloc_reserve_flag after taking the
362 inode allocation semaphore for */ 359 inode allocation semaphore for */
363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 360#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
364 /* Call ext4_da_update_reserve_space() after successfully
365 allocating the blocks */
366#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
367 /* caller is from the direct IO path, request to creation of an 361 /* caller is from the direct IO path, request to creation of an
368 unitialized extents if not allocated, split the uninitialized 362 unitialized extents if not allocated, split the uninitialized
369 extent if blocks has been preallocated already*/ 363 extent if blocks has been preallocated already*/
370#define EXT4_GET_BLOCKS_DIO 0x0010 364#define EXT4_GET_BLOCKS_PRE_IO 0x0008
371#define EXT4_GET_BLOCKS_CONVERT 0x0020 365#define EXT4_GET_BLOCKS_CONVERT 0x0010
372#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 366#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
367 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
368 /* Convert extent to initialized after IO complete */
369#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
373 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
374 /* Convert extent to initialized after direct IO complete */ 371
375#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ 372/*
376 EXT4_GET_BLOCKS_DIO_CREATE_EXT) 373 * Flags used by ext4_free_blocks
374 */
375#define EXT4_FREE_BLOCKS_METADATA 0x0001
376#define EXT4_FREE_BLOCKS_FORGET 0x0002
377#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
377 378
378/* 379/*
379 * ioctl commands 380 * ioctl commands
@@ -627,7 +628,7 @@ struct ext4_inode_info {
627 * near to their parent directory's inode. 628 * near to their parent directory's inode.
628 */ 629 */
629 ext4_group_t i_block_group; 630 ext4_group_t i_block_group;
630 __u32 i_state; /* Dynamic state flags for ext4 */ 631 unsigned long i_state_flags; /* Dynamic state flags */
631 632
632 ext4_lblk_t i_dir_start_lookup; 633 ext4_lblk_t i_dir_start_lookup;
633#ifdef CONFIG_EXT4_FS_XATTR 634#ifdef CONFIG_EXT4_FS_XATTR
@@ -693,16 +694,30 @@ struct ext4_inode_info {
693 unsigned int i_reserved_meta_blocks; 694 unsigned int i_reserved_meta_blocks;
694 unsigned int i_allocated_meta_blocks; 695 unsigned int i_allocated_meta_blocks;
695 unsigned short i_delalloc_reserved_flag; 696 unsigned short i_delalloc_reserved_flag;
697 sector_t i_da_metadata_calc_last_lblock;
698 int i_da_metadata_calc_len;
696 699
697 /* on-disk additional length */ 700 /* on-disk additional length */
698 __u16 i_extra_isize; 701 __u16 i_extra_isize;
699 702
700 spinlock_t i_block_reservation_lock; 703 spinlock_t i_block_reservation_lock;
704#ifdef CONFIG_QUOTA
705 /* quota space reservation, managed internally by quota code */
706 qsize_t i_reserved_quota;
707#endif
701 708
702 /* completed async DIOs that might need unwritten extents handling */ 709 /* completed IOs that might need unwritten extents handling */
703 struct list_head i_aio_dio_complete_list; 710 struct list_head i_completed_io_list;
711 spinlock_t i_completed_io_lock;
704 /* current io_end structure for async DIO write*/ 712 /* current io_end structure for async DIO write*/
705 ext4_io_end_t *cur_aio_dio; 713 ext4_io_end_t *cur_aio_dio;
714
715 /*
716 * Transactions that contain inode's metadata needed to complete
717 * fsync and fdatasync, respectively.
718 */
719 tid_t i_sync_tid;
720 tid_t i_datasync_tid;
706}; 721};
707 722
708/* 723/*
@@ -744,12 +759,14 @@ struct ext4_inode_info {
744#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ 759#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
745#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ 760#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
746#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ 761#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
762#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
747#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 763#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
748#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 764#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
749#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 765#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
750#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 766#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
751#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 767#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
752#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 768#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
769#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
753 770
754#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 771#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
755#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 772#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -997,7 +1014,7 @@ struct ext4_sb_info {
997 atomic_t s_lock_busy; 1014 atomic_t s_lock_busy;
998 1015
999 /* locality groups */ 1016 /* locality groups */
1000 struct ext4_locality_group *s_locality_groups; 1017 struct ext4_locality_group __percpu *s_locality_groups;
1001 1018
1002 /* for write statistics */ 1019 /* for write statistics */
1003 unsigned long s_sectors_written_start; 1020 unsigned long s_sectors_written_start;
@@ -1033,6 +1050,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1033 (ino >= EXT4_FIRST_INO(sb) && 1050 (ino >= EXT4_FIRST_INO(sb) &&
1034 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); 1051 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
1035} 1052}
1053
1054/*
1055 * Inode dynamic state flags
1056 */
1057enum {
1058 EXT4_STATE_JDATA, /* journaled data exists */
1059 EXT4_STATE_NEW, /* inode is newly created */
1060 EXT4_STATE_XATTR, /* has in-inode xattrs */
1061 EXT4_STATE_NO_EXPAND, /* No space for expansion */
1062 EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
1063 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
1064 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1065};
1066
1067static inline int ext4_test_inode_state(struct inode *inode, int bit)
1068{
1069 return test_bit(bit, &EXT4_I(inode)->i_state_flags);
1070}
1071
1072static inline void ext4_set_inode_state(struct inode *inode, int bit)
1073{
1074 set_bit(bit, &EXT4_I(inode)->i_state_flags);
1075}
1076
1077static inline void ext4_clear_inode_state(struct inode *inode, int bit)
1078{
1079 clear_bit(bit, &EXT4_I(inode)->i_state_flags);
1080}
1036#else 1081#else
1037/* Assume that user mode programs are passing in an ext4fs superblock, not 1082/* Assume that user mode programs are passing in an ext4fs superblock, not
1038 * a kernel struct super_block. This will allow us to call the feature-test 1083 * a kernel struct super_block. This will allow us to call the feature-test
@@ -1109,6 +1154,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1109#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 1154#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
1110#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 1155#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
1111#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1156#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
1157#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
1158#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1112 1159
1113#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR 1160#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
1114#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1161#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1324,8 +1371,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1324 ext4_fsblk_t goal, unsigned long *count, int *errp); 1371 ext4_fsblk_t goal, unsigned long *count, int *errp);
1325extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1372extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1326extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1373extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1327extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1328 ext4_fsblk_t block, unsigned long count, int metadata);
1329extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1374extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1330 ext4_fsblk_t block, unsigned long count); 1375 ext4_fsblk_t block, unsigned long count);
1331extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1376extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1384,16 +1429,15 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
1384extern void ext4_discard_preallocations(struct inode *); 1429extern void ext4_discard_preallocations(struct inode *);
1385extern int __init init_ext4_mballoc(void); 1430extern int __init init_ext4_mballoc(void);
1386extern void exit_ext4_mballoc(void); 1431extern void exit_ext4_mballoc(void);
1387extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1432extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1388 ext4_fsblk_t, unsigned long, int, unsigned long *); 1433 struct buffer_head *bh, ext4_fsblk_t block,
1434 unsigned long count, int flags);
1389extern int ext4_mb_add_groupinfo(struct super_block *sb, 1435extern int ext4_mb_add_groupinfo(struct super_block *sb,
1390 ext4_group_t i, struct ext4_group_desc *desc); 1436 ext4_group_t i, struct ext4_group_desc *desc);
1391extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); 1437extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
1392extern void ext4_mb_put_buddy_cache_lock(struct super_block *, 1438extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
1393 ext4_group_t, int); 1439 ext4_group_t, int);
1394/* inode.c */ 1440/* inode.c */
1395int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1396 struct buffer_head *bh, ext4_fsblk_t blocknr);
1397struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1441struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1398 ext4_lblk_t, int, int *); 1442 ext4_lblk_t, int, int *);
1399struct buffer_head *ext4_bread(handle_t *, struct inode *, 1443struct buffer_head *ext4_bread(handle_t *, struct inode *,
@@ -1402,7 +1446,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
1402 struct buffer_head *bh_result, int create); 1446 struct buffer_head *bh_result, int create);
1403 1447
1404extern struct inode *ext4_iget(struct super_block *, unsigned long); 1448extern struct inode *ext4_iget(struct super_block *, unsigned long);
1405extern int ext4_write_inode(struct inode *, int); 1449extern int ext4_write_inode(struct inode *, struct writeback_control *);
1406extern int ext4_setattr(struct dentry *, struct iattr *); 1450extern int ext4_setattr(struct dentry *, struct iattr *);
1407extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 1451extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
1408 struct kstat *stat); 1452 struct kstat *stat);
@@ -1424,8 +1468,10 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1424extern int ext4_block_truncate_page(handle_t *handle, 1468extern int ext4_block_truncate_page(handle_t *handle,
1425 struct address_space *mapping, loff_t from); 1469 struct address_space *mapping, loff_t from);
1426extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1470extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1427extern qsize_t ext4_get_reserved_space(struct inode *inode); 1471extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1428extern int flush_aio_dio_completed_IO(struct inode *inode); 1472extern int flush_completed_IO(struct inode *inode);
1473extern void ext4_da_update_reserve_space(struct inode *inode,
1474 int used, int quota_claim);
1429/* ioctl.c */ 1475/* ioctl.c */
1430extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1476extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1431extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); 1477extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
@@ -1449,13 +1495,20 @@ extern int ext4_group_extend(struct super_block *sb,
1449 ext4_fsblk_t n_blocks_count); 1495 ext4_fsblk_t n_blocks_count);
1450 1496
1451/* super.c */ 1497/* super.c */
1452extern void ext4_error(struct super_block *, const char *, const char *, ...) 1498extern void __ext4_error(struct super_block *, const char *, const char *, ...)
1499 __attribute__ ((format (printf, 3, 4)));
1500#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message)
1501extern void ext4_error_inode(const char *, struct inode *, const char *, ...)
1502 __attribute__ ((format (printf, 3, 4)));
1503extern void ext4_error_file(const char *, struct file *, const char *, ...)
1453 __attribute__ ((format (printf, 3, 4))); 1504 __attribute__ ((format (printf, 3, 4)));
1454extern void __ext4_std_error(struct super_block *, const char *, int); 1505extern void __ext4_std_error(struct super_block *, const char *, int);
1455extern void ext4_abort(struct super_block *, const char *, const char *, ...) 1506extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1456 __attribute__ ((format (printf, 3, 4))); 1507 __attribute__ ((format (printf, 3, 4)));
1457extern void ext4_warning(struct super_block *, const char *, const char *, ...) 1508extern void __ext4_warning(struct super_block *, const char *,
1509 const char *, ...)
1458 __attribute__ ((format (printf, 3, 4))); 1510 __attribute__ ((format (printf, 3, 4)));
1511#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message)
1459extern void ext4_msg(struct super_block *, const char *, const char *, ...) 1512extern void ext4_msg(struct super_block *, const char *, const char *, ...)
1460 __attribute__ ((format (printf, 3, 4))); 1513 __attribute__ ((format (printf, 3, 4)));
1461extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, 1514extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
@@ -1728,7 +1781,7 @@ extern void ext4_ext_release(struct super_block *);
1728extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 1781extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1729 loff_t len); 1782 loff_t len);
1730extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 1783extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
1731 loff_t len); 1784 ssize_t len);
1732extern int ext4_get_blocks(handle_t *handle, struct inode *inode, 1785extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1733 sector_t block, unsigned int max_blocks, 1786 sector_t block, unsigned int max_blocks,
1734 struct buffer_head *bh, int flags); 1787 struct buffer_head *bh, int flags);
@@ -1740,6 +1793,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
1740 __u64 len, __u64 *moved_len); 1793 __u64 len, __u64 *moved_len);
1741 1794
1742 1795
1796/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
1797enum ext4_state_bits {
1798 BH_Uninit /* blocks are allocated but uninitialized on disk */
1799 = BH_JBDPrivateStart,
1800};
1801
1802BUFFER_FNS(Uninit, uninit)
1803TAS_BUFFER_FNS(Uninit, uninit)
1804
1743/* 1805/*
1744 * Add new method to test wether block and inode bitmaps are properly 1806 * Add new method to test wether block and inode bitmaps are properly
1745 * initialized. With uninit_bg reading the block from disk is not enough 1807 * initialized. With uninit_bg reading the block from disk is not enough
@@ -1757,6 +1819,8 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
1757 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); 1819 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
1758} 1820}
1759 1821
1822#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
1823
1760#endif /* __KERNEL__ */ 1824#endif /* __KERNEL__ */
1761 1825
1762#endif /* _EXT4_H */ 1826#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 6a9409920dee..53d2764d71ca 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -4,6 +4,8 @@
4 4
5#include "ext4_jbd2.h" 5#include "ext4_jbd2.h"
6 6
7#include <trace/events/ext4.h>
8
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 9int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 10 struct buffer_head *bh)
9{ 11{
@@ -32,35 +34,69 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
32 return err; 34 return err;
33} 35}
34 36
35int __ext4_journal_forget(const char *where, handle_t *handle, 37/*
36 struct buffer_head *bh) 38 * The ext4 forget function must perform a revoke if we are freeing data
39 * which has been journaled. Metadata (eg. indirect blocks) must be
40 * revoked in all cases.
41 *
42 * "bh" may be NULL: a metadata block may have been freed from memory
43 * but there may still be a record of it in the journal, and that record
44 * still needs to be revoked.
45 *
46 * If the handle isn't valid we're not journaling, but we still need to
47 * call into ext4_journal_revoke() to put the buffer head.
48 */
49int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
50 struct inode *inode, struct buffer_head *bh,
51 ext4_fsblk_t blocknr)
37{ 52{
38 int err = 0; 53 int err;
39 54
40 if (ext4_handle_valid(handle)) { 55 might_sleep();
41 err = jbd2_journal_forget(handle, bh); 56
42 if (err) 57 trace_ext4_forget(inode, is_metadata, blocknr);
43 ext4_journal_abort_handle(where, __func__, bh, 58 BUFFER_TRACE(bh, "enter");
44 handle, err); 59
45 } 60 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
46 else 61 "data mode %x\n",
62 bh, is_metadata, inode->i_mode,
63 test_opt(inode->i_sb, DATA_FLAGS));
64
65 /* In the no journal case, we can just do a bforget and return */
66 if (!ext4_handle_valid(handle)) {
47 bforget(bh); 67 bforget(bh);
48 return err; 68 return 0;
49} 69 }
50 70
51int __ext4_journal_revoke(const char *where, handle_t *handle, 71 /* Never use the revoke function if we are doing full data
52 ext4_fsblk_t blocknr, struct buffer_head *bh) 72 * journaling: there is no need to, and a V1 superblock won't
53{ 73 * support it. Otherwise, only skip the revoke on un-journaled
54 int err = 0; 74 * data blocks. */
55 75
56 if (ext4_handle_valid(handle)) { 76 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
57 err = jbd2_journal_revoke(handle, blocknr, bh); 77 (!is_metadata && !ext4_should_journal_data(inode))) {
58 if (err) 78 if (bh) {
59 ext4_journal_abort_handle(where, __func__, bh, 79 BUFFER_TRACE(bh, "call jbd2_journal_forget");
60 handle, err); 80 err = jbd2_journal_forget(handle, bh);
81 if (err)
82 ext4_journal_abort_handle(where, __func__, bh,
83 handle, err);
84 return err;
85 }
86 return 0;
61 } 87 }
62 else 88
63 bforget(bh); 89 /*
90 * data!=journal && (is_metadata || should_journal_data(inode))
91 */
92 BUFFER_TRACE(bh, "call jbd2_journal_revoke");
93 err = jbd2_journal_revoke(handle, blocknr, bh);
94 if (err) {
95 ext4_journal_abort_handle(where, __func__, bh, handle, err);
96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err);
98 }
99 BUFFER_TRACE(bh, "exit");
64 return err; 100 return err;
65} 101}
66 102
@@ -89,14 +125,14 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
89 ext4_journal_abort_handle(where, __func__, bh, 125 ext4_journal_abort_handle(where, __func__, bh,
90 handle, err); 126 handle, err);
91 } else { 127 } else {
92 if (inode && bh) 128 if (inode)
93 mark_buffer_dirty_inode(bh, inode); 129 mark_buffer_dirty_inode(bh, inode);
94 else 130 else
95 mark_buffer_dirty(bh); 131 mark_buffer_dirty(bh);
96 if (inode && inode_needs_sync(inode)) { 132 if (inode && inode_needs_sync(inode)) {
97 sync_dirty_buffer(bh); 133 sync_dirty_buffer(bh);
98 if (buffer_req(bh) && !buffer_uptodate(bh)) { 134 if (buffer_req(bh) && !buffer_uptodate(bh)) {
99 ext4_error(inode->i_sb, __func__, 135 ext4_error(inode->i_sb,
100 "IO error syncing inode, " 136 "IO error syncing inode, "
101 "inode=%lu, block=%llu", 137 "inode=%lu, block=%llu",
102 inode->i_ino, 138 inode->i_ino,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index a2865980342f..b79ad5126468 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -49,7 +49,7 @@
49 49
50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ 50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \ 51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 52 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
53 53
54/* 54/*
55 * Define the number of metadata blocks we need to account to modify data. 55 * Define the number of metadata blocks we need to account to modify data.
@@ -57,7 +57,7 @@
57 * This include super block, inode block, quota blocks and xattr blocks 57 * This include super block, inode block, quota blocks and xattr blocks
58 */ 58 */
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ 59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 60 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
61 61
62/* Delete operations potentially hit one directory's namespace plus an 62/* Delete operations potentially hit one directory's namespace plus an
63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be 63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
@@ -92,6 +92,7 @@
92 * but inode, sb and group updates are done only once */ 92 * but inode, sb and group updates are done only once */
93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ 93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
94 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) 94 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
95
95#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ 96#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
96 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) 97 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
97#else 98#else
@@ -99,6 +100,9 @@
99#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 100#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
100#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 101#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
101#endif 102#endif
103#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
102 106
103int 107int
104ext4_mark_iloc_dirty(handle_t *handle, 108ext4_mark_iloc_dirty(handle_t *handle,
@@ -116,12 +120,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
116int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); 120int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
117 121
118/* 122/*
119 * Wrapper functions with which ext4 calls into JBD. The intent here is 123 * Wrapper functions with which ext4 calls into JBD.
120 * to allow these to be turned into appropriate stubs so ext4 can control
121 * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
122 * been done yet.
123 */ 124 */
124
125void ext4_journal_abort_handle(const char *caller, const char *err_fn, 125void ext4_journal_abort_handle(const char *caller, const char *err_fn,
126 struct buffer_head *bh, handle_t *handle, int err); 126 struct buffer_head *bh, handle_t *handle, int err);
127 127
@@ -131,13 +131,9 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
131int __ext4_journal_get_write_access(const char *where, handle_t *handle, 131int __ext4_journal_get_write_access(const char *where, handle_t *handle,
132 struct buffer_head *bh); 132 struct buffer_head *bh);
133 133
134/* When called with an invalid handle, this will still do a put on the BH */ 134int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
135int __ext4_journal_forget(const char *where, handle_t *handle, 135 struct inode *inode, struct buffer_head *bh,
136 struct buffer_head *bh); 136 ext4_fsblk_t blocknr);
137
138/* When called with an invalid handle, this will still do a put on the BH */
139int __ext4_journal_revoke(const char *where, handle_t *handle,
140 ext4_fsblk_t blocknr, struct buffer_head *bh);
141 137
142int __ext4_journal_get_create_access(const char *where, 138int __ext4_journal_get_create_access(const char *where,
143 handle_t *handle, struct buffer_head *bh); 139 handle_t *handle, struct buffer_head *bh);
@@ -149,12 +145,11 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
149 __ext4_journal_get_undo_access(__func__, (handle), (bh)) 145 __ext4_journal_get_undo_access(__func__, (handle), (bh))
150#define ext4_journal_get_write_access(handle, bh) \ 146#define ext4_journal_get_write_access(handle, bh) \
151 __ext4_journal_get_write_access(__func__, (handle), (bh)) 147 __ext4_journal_get_write_access(__func__, (handle), (bh))
152#define ext4_journal_revoke(handle, blocknr, bh) \ 148#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
153 __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) 149 __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
150 (block_nr))
154#define ext4_journal_get_create_access(handle, bh) \ 151#define ext4_journal_get_create_access(handle, bh) \
155 __ext4_journal_get_create_access(__func__, (handle), (bh)) 152 __ext4_journal_get_create_access(__func__, (handle), (bh))
156#define ext4_journal_forget(handle, bh) \
157 __ext4_journal_forget(__func__, (handle), (bh))
158#define ext4_handle_dirty_metadata(handle, inode, bh) \ 153#define ext4_handle_dirty_metadata(handle, inode, bh) \
159 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) 154 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
160 155
@@ -254,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
254 return 0; 249 return 0;
255} 250}
256 251
252static inline void ext4_update_inode_fsync_trans(handle_t *handle,
253 struct inode *inode,
254 int datasync)
255{
256 struct ext4_inode_info *ei = EXT4_I(inode);
257
258 if (ext4_handle_valid(handle)) {
259 ei->i_sync_tid = handle->h_transaction->t_tid;
260 if (datasync)
261 ei->i_datasync_tid = handle->h_transaction->t_tid;
262 }
263}
264
257/* super.c */ 265/* super.c */
258int ext4_force_commit(struct super_block *sb); 266int ext4_force_commit(struct super_block *sb);
259 267
@@ -296,4 +304,28 @@ static inline int ext4_should_writeback_data(struct inode *inode)
296 return 0; 304 return 0;
297} 305}
298 306
307/*
308 * This function controls whether or not we should try to go down the
309 * dioread_nolock code paths, which makes it safe to avoid taking
310 * i_mutex for direct I/O reads. This only works for extent-based
311 * files, and it doesn't work for nobh or if data journaling is
312 * enabled, since the dioread_nolock code uses b_private to pass
313 * information back to the I/O completion handler, and this conflicts
314 * with the jbd's use of b_private.
315 */
316static inline int ext4_should_dioread_nolock(struct inode *inode)
317{
318 if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
319 return 0;
320 if (test_opt(inode->i_sb, NOBH))
321 return 0;
322 if (!S_ISREG(inode->i_mode))
323 return 0;
324 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
325 return 0;
326 if (ext4_should_journal_data(inode))
327 return 0;
328 return 1;
329}
330
299#endif /* _EXT4_JBD2_H */ 331#endif /* _EXT4_JBD2_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 715264b4bae4..94c8ee81f5e1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
195 if (S_ISREG(inode->i_mode)) 195 if (S_ISREG(inode->i_mode))
196 block_group++; 196 block_group++;
197 } 197 }
198 bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + 198 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
199 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
200 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 199 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
201 200
202 /* 201 /*
@@ -296,29 +295,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 295 * to allocate @blocks
297 * Worse case is one block per extent 296 * Worse case is one block per extent
298 */ 297 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 298int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 299{
301 int lcap, icap, rcap, leafs, idxs, num; 300 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 301 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 302
308 /* number of new leaf blocks needed */ 303 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 304 / sizeof(struct ext4_extent_idx));
310 305
311 /* 306 /*
312 * Worse case, we need separate index block(s) 307 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 308 * previous da block, it can share index blocks with the
309 * previous block, so we only need to allocate a new index
310 * block every idxs leaf blocks. At ldxs**2 blocks, we need
311 * an additional index block, and at ldxs**3 blocks, yet
312 * another index blocks.
314 */ 313 */
315 idxs = (leafs + icap - 1) / icap; 314 if (ei->i_da_metadata_calc_len &&
316 do { 315 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 316 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 317 num++;
319 } while (idxs > rcap); 318 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
319 num++;
320 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
321 num++;
322 ei->i_da_metadata_calc_len = 0;
323 } else
324 ei->i_da_metadata_calc_len++;
325 ei->i_da_metadata_calc_last_lblock++;
326 return num;
327 }
320 328
321 return num; 329 /*
330 * In the worst case we need a new set of index blocks at
331 * every level of the inode's extent tree.
332 */
333 ei->i_da_metadata_calc_len = 1;
334 ei->i_da_metadata_calc_last_lblock = lblock;
335 return ext_depth(inode) + 1;
322} 336}
323 337
324static int 338static int
@@ -425,7 +439,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
425 return 0; 439 return 0;
426 440
427corrupted: 441corrupted:
428 ext4_error(inode->i_sb, function, 442 __ext4_error(inode->i_sb, function,
429 "bad header/extent in inode #%lu: %s - magic %x, " 443 "bad header/extent in inode #%lu: %s - magic %x, "
430 "entries %u, max %u(%u), depth %u(%u)", 444 "entries %u, max %u(%u), depth %u(%u)",
431 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), 445 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
@@ -688,7 +702,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
688 } 702 }
689 eh = ext_block_hdr(bh); 703 eh = ext_block_hdr(bh);
690 ppos++; 704 ppos++;
691 BUG_ON(ppos > depth); 705 if (unlikely(ppos > depth)) {
706 put_bh(bh);
707 EXT4_ERROR_INODE(inode,
708 "ppos %d > depth %d", ppos, depth);
709 goto err;
710 }
692 path[ppos].p_bh = bh; 711 path[ppos].p_bh = bh;
693 path[ppos].p_hdr = eh; 712 path[ppos].p_hdr = eh;
694 i--; 713 i--;
@@ -734,7 +753,12 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
734 if (err) 753 if (err)
735 return err; 754 return err;
736 755
737 BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); 756 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
757 EXT4_ERROR_INODE(inode,
758 "logical %d == ei_block %d!",
759 logical, le32_to_cpu(curp->p_idx->ei_block));
760 return -EIO;
761 }
738 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; 762 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
739 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 763 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
740 /* insert after */ 764 /* insert after */
@@ -764,9 +788,17 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
764 ext4_idx_store_pblock(ix, ptr); 788 ext4_idx_store_pblock(ix, ptr);
765 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 789 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
766 790
767 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 791 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
768 > le16_to_cpu(curp->p_hdr->eh_max)); 792 > le16_to_cpu(curp->p_hdr->eh_max))) {
769 BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); 793 EXT4_ERROR_INODE(inode,
794 "logical %d == ei_block %d!",
795 logical, le32_to_cpu(curp->p_idx->ei_block));
796 return -EIO;
797 }
798 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
799 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
800 return -EIO;
801 }
770 802
771 err = ext4_ext_dirty(handle, inode, curp); 803 err = ext4_ext_dirty(handle, inode, curp);
772 ext4_std_error(inode->i_sb, err); 804 ext4_std_error(inode->i_sb, err);
@@ -804,7 +836,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
804 836
805 /* if current leaf will be split, then we should use 837 /* if current leaf will be split, then we should use
806 * border from split point */ 838 * border from split point */
807 BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); 839 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
840 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
841 return -EIO;
842 }
808 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 843 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
809 border = path[depth].p_ext[1].ee_block; 844 border = path[depth].p_ext[1].ee_block;
810 ext_debug("leaf will be split." 845 ext_debug("leaf will be split."
@@ -845,7 +880,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
845 880
846 /* initialize new leaf */ 881 /* initialize new leaf */
847 newblock = ablocks[--a]; 882 newblock = ablocks[--a];
848 BUG_ON(newblock == 0); 883 if (unlikely(newblock == 0)) {
884 EXT4_ERROR_INODE(inode, "newblock == 0!");
885 err = -EIO;
886 goto cleanup;
887 }
849 bh = sb_getblk(inode->i_sb, newblock); 888 bh = sb_getblk(inode->i_sb, newblock);
850 if (!bh) { 889 if (!bh) {
851 err = -EIO; 890 err = -EIO;
@@ -865,7 +904,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
865 ex = EXT_FIRST_EXTENT(neh); 904 ex = EXT_FIRST_EXTENT(neh);
866 905
867 /* move remainder of path[depth] to the new leaf */ 906 /* move remainder of path[depth] to the new leaf */
868 BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); 907 if (unlikely(path[depth].p_hdr->eh_entries !=
908 path[depth].p_hdr->eh_max)) {
909 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
910 path[depth].p_hdr->eh_entries,
911 path[depth].p_hdr->eh_max);
912 err = -EIO;
913 goto cleanup;
914 }
869 /* start copy from next extent */ 915 /* start copy from next extent */
870 /* TODO: we could do it by single memmove */ 916 /* TODO: we could do it by single memmove */
871 m = 0; 917 m = 0;
@@ -912,7 +958,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
912 958
913 /* create intermediate indexes */ 959 /* create intermediate indexes */
914 k = depth - at - 1; 960 k = depth - at - 1;
915 BUG_ON(k < 0); 961 if (unlikely(k < 0)) {
962 EXT4_ERROR_INODE(inode, "k %d < 0!", k);
963 err = -EIO;
964 goto cleanup;
965 }
916 if (k) 966 if (k)
917 ext_debug("create %d intermediate indices\n", k); 967 ext_debug("create %d intermediate indices\n", k);
918 /* insert new index into current index block */ 968 /* insert new index into current index block */
@@ -949,8 +999,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
949 999
950 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 1000 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
951 EXT_MAX_INDEX(path[i].p_hdr)); 1001 EXT_MAX_INDEX(path[i].p_hdr));
952 BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != 1002 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
953 EXT_LAST_INDEX(path[i].p_hdr)); 1003 EXT_LAST_INDEX(path[i].p_hdr))) {
1004 EXT4_ERROR_INODE(inode,
1005 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1006 le32_to_cpu(path[i].p_ext->ee_block));
1007 err = -EIO;
1008 goto cleanup;
1009 }
954 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 1010 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
955 ext_debug("%d: move %d:%llu in new index %llu\n", i, 1011 ext_debug("%d: move %d:%llu in new index %llu\n", i,
956 le32_to_cpu(path[i].p_idx->ei_block), 1012 le32_to_cpu(path[i].p_idx->ei_block),
@@ -1007,7 +1063,8 @@ cleanup:
1007 for (i = 0; i < depth; i++) { 1063 for (i = 0; i < depth; i++) {
1008 if (!ablocks[i]) 1064 if (!ablocks[i])
1009 continue; 1065 continue;
1010 ext4_free_blocks(handle, inode, ablocks[i], 1, 1); 1066 ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
1067 EXT4_FREE_BLOCKS_METADATA);
1011 } 1068 }
1012 } 1069 }
1013 kfree(ablocks); 1070 kfree(ablocks);
@@ -1187,7 +1244,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1187 struct ext4_extent *ex; 1244 struct ext4_extent *ex;
1188 int depth, ee_len; 1245 int depth, ee_len;
1189 1246
1190 BUG_ON(path == NULL); 1247 if (unlikely(path == NULL)) {
1248 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1249 return -EIO;
1250 }
1191 depth = path->p_depth; 1251 depth = path->p_depth;
1192 *phys = 0; 1252 *phys = 0;
1193 1253
@@ -1201,15 +1261,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1201 ex = path[depth].p_ext; 1261 ex = path[depth].p_ext;
1202 ee_len = ext4_ext_get_actual_len(ex); 1262 ee_len = ext4_ext_get_actual_len(ex);
1203 if (*logical < le32_to_cpu(ex->ee_block)) { 1263 if (*logical < le32_to_cpu(ex->ee_block)) {
1204 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); 1264 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1265 EXT4_ERROR_INODE(inode,
1266 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1267 *logical, le32_to_cpu(ex->ee_block));
1268 return -EIO;
1269 }
1205 while (--depth >= 0) { 1270 while (--depth >= 0) {
1206 ix = path[depth].p_idx; 1271 ix = path[depth].p_idx;
1207 BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); 1272 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1273 EXT4_ERROR_INODE(inode,
1274 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1275 ix != NULL ? ix->ei_block : 0,
1276 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1277 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
1278 depth);
1279 return -EIO;
1280 }
1208 } 1281 }
1209 return 0; 1282 return 0;
1210 } 1283 }
1211 1284
1212 BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); 1285 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1286 EXT4_ERROR_INODE(inode,
1287 "logical %d < ee_block %d + ee_len %d!",
1288 *logical, le32_to_cpu(ex->ee_block), ee_len);
1289 return -EIO;
1290 }
1213 1291
1214 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1292 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1215 *phys = ext_pblock(ex) + ee_len - 1; 1293 *phys = ext_pblock(ex) + ee_len - 1;
@@ -1235,7 +1313,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1235 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1313 int depth; /* Note, NOT eh_depth; depth from top of tree */
1236 int ee_len; 1314 int ee_len;
1237 1315
1238 BUG_ON(path == NULL); 1316 if (unlikely(path == NULL)) {
1317 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1318 return -EIO;
1319 }
1239 depth = path->p_depth; 1320 depth = path->p_depth;
1240 *phys = 0; 1321 *phys = 0;
1241 1322
@@ -1249,17 +1330,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1249 ex = path[depth].p_ext; 1330 ex = path[depth].p_ext;
1250 ee_len = ext4_ext_get_actual_len(ex); 1331 ee_len = ext4_ext_get_actual_len(ex);
1251 if (*logical < le32_to_cpu(ex->ee_block)) { 1332 if (*logical < le32_to_cpu(ex->ee_block)) {
1252 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); 1333 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1334 EXT4_ERROR_INODE(inode,
1335 "first_extent(path[%d].p_hdr) != ex",
1336 depth);
1337 return -EIO;
1338 }
1253 while (--depth >= 0) { 1339 while (--depth >= 0) {
1254 ix = path[depth].p_idx; 1340 ix = path[depth].p_idx;
1255 BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); 1341 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1342 EXT4_ERROR_INODE(inode,
1343 "ix != EXT_FIRST_INDEX *logical %d!",
1344 *logical);
1345 return -EIO;
1346 }
1256 } 1347 }
1257 *logical = le32_to_cpu(ex->ee_block); 1348 *logical = le32_to_cpu(ex->ee_block);
1258 *phys = ext_pblock(ex); 1349 *phys = ext_pblock(ex);
1259 return 0; 1350 return 0;
1260 } 1351 }
1261 1352
1262 BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); 1353 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1354 EXT4_ERROR_INODE(inode,
1355 "logical %d < ee_block %d + ee_len %d!",
1356 *logical, le32_to_cpu(ex->ee_block), ee_len);
1357 return -EIO;
1358 }
1263 1359
1264 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1360 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1265 /* next allocated block in this leaf */ 1361 /* next allocated block in this leaf */
@@ -1398,8 +1494,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1398 1494
1399 eh = path[depth].p_hdr; 1495 eh = path[depth].p_hdr;
1400 ex = path[depth].p_ext; 1496 ex = path[depth].p_ext;
1401 BUG_ON(ex == NULL); 1497
1402 BUG_ON(eh == NULL); 1498 if (unlikely(ex == NULL || eh == NULL)) {
1499 EXT4_ERROR_INODE(inode,
1500 "ex %p == NULL or eh %p == NULL", ex, eh);
1501 return -EIO;
1502 }
1403 1503
1404 if (depth == 0) { 1504 if (depth == 0) {
1405 /* there is no tree at all */ 1505 /* there is no tree at all */
@@ -1522,8 +1622,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
1522 merge_done = 1; 1622 merge_done = 1;
1523 WARN_ON(eh->eh_entries == 0); 1623 WARN_ON(eh->eh_entries == 0);
1524 if (!eh->eh_entries) 1624 if (!eh->eh_entries)
1525 ext4_error(inode->i_sb, "ext4_ext_try_to_merge", 1625 ext4_error(inode->i_sb,
1526 "inode#%lu, eh->eh_entries = 0!", inode->i_ino); 1626 "inode#%lu, eh->eh_entries = 0!",
1627 inode->i_ino);
1527 } 1628 }
1528 1629
1529 return merge_done; 1630 return merge_done;
@@ -1596,13 +1697,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1596 ext4_lblk_t next; 1697 ext4_lblk_t next;
1597 unsigned uninitialized = 0; 1698 unsigned uninitialized = 0;
1598 1699
1599 BUG_ON(ext4_ext_get_actual_len(newext) == 0); 1700 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1701 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1702 return -EIO;
1703 }
1600 depth = ext_depth(inode); 1704 depth = ext_depth(inode);
1601 ex = path[depth].p_ext; 1705 ex = path[depth].p_ext;
1602 BUG_ON(path[depth].p_hdr == NULL); 1706 if (unlikely(path[depth].p_hdr == NULL)) {
1707 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1708 return -EIO;
1709 }
1603 1710
1604 /* try to insert block into found extent and return */ 1711 /* try to insert block into found extent and return */
1605 if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) 1712 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1606 && ext4_can_extents_be_merged(inode, ex, newext)) { 1713 && ext4_can_extents_be_merged(inode, ex, newext)) {
1607 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1714 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
1608 ext4_ext_is_uninitialized(newext), 1715 ext4_ext_is_uninitialized(newext),
@@ -1723,7 +1830,7 @@ has_space:
1723 1830
1724merge: 1831merge:
1725 /* try to merge extents to the right */ 1832 /* try to merge extents to the right */
1726 if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) 1833 if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
1727 ext4_ext_try_to_merge(inode, path, nearex); 1834 ext4_ext_try_to_merge(inode, path, nearex);
1728 1835
1729 /* try to merge extents to the left */ 1836 /* try to merge extents to the left */
@@ -1761,7 +1868,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1761 while (block < last && block != EXT_MAX_BLOCK) { 1868 while (block < last && block != EXT_MAX_BLOCK) {
1762 num = last - block; 1869 num = last - block;
1763 /* find extent for this block */ 1870 /* find extent for this block */
1871 down_read(&EXT4_I(inode)->i_data_sem);
1764 path = ext4_ext_find_extent(inode, block, path); 1872 path = ext4_ext_find_extent(inode, block, path);
1873 up_read(&EXT4_I(inode)->i_data_sem);
1765 if (IS_ERR(path)) { 1874 if (IS_ERR(path)) {
1766 err = PTR_ERR(path); 1875 err = PTR_ERR(path);
1767 path = NULL; 1876 path = NULL;
@@ -1769,7 +1878,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1769 } 1878 }
1770 1879
1771 depth = ext_depth(inode); 1880 depth = ext_depth(inode);
1772 BUG_ON(path[depth].p_hdr == NULL); 1881 if (unlikely(path[depth].p_hdr == NULL)) {
1882 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1883 err = -EIO;
1884 break;
1885 }
1773 ex = path[depth].p_ext; 1886 ex = path[depth].p_ext;
1774 next = ext4_ext_next_allocated_block(path); 1887 next = ext4_ext_next_allocated_block(path);
1775 1888
@@ -1820,7 +1933,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1820 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1933 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1821 } 1934 }
1822 1935
1823 BUG_ON(cbex.ec_len == 0); 1936 if (unlikely(cbex.ec_len == 0)) {
1937 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
1938 err = -EIO;
1939 break;
1940 }
1824 err = func(inode, path, &cbex, ex, cbdata); 1941 err = func(inode, path, &cbex, ex, cbdata);
1825 ext4_ext_drop_refs(path); 1942 ext4_ext_drop_refs(path);
1826 1943
@@ -1934,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1934 2051
1935 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && 2052 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
1936 cex->ec_type != EXT4_EXT_CACHE_EXTENT); 2053 cex->ec_type != EXT4_EXT_CACHE_EXTENT);
1937 if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { 2054 if (in_range(block, cex->ec_block, cex->ec_len)) {
1938 ex->ee_block = cpu_to_le32(cex->ec_block); 2055 ex->ee_block = cpu_to_le32(cex->ec_block);
1939 ext4_ext_store_pblock(ex, cex->ec_start); 2056 ext4_ext_store_pblock(ex, cex->ec_start);
1940 ex->ee_len = cpu_to_le16(cex->ec_len); 2057 ex->ee_len = cpu_to_le16(cex->ec_len);
@@ -1957,14 +2074,16 @@ errout:
1957static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 2074static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1958 struct ext4_ext_path *path) 2075 struct ext4_ext_path *path)
1959{ 2076{
1960 struct buffer_head *bh;
1961 int err; 2077 int err;
1962 ext4_fsblk_t leaf; 2078 ext4_fsblk_t leaf;
1963 2079
1964 /* free index block */ 2080 /* free index block */
1965 path--; 2081 path--;
1966 leaf = idx_pblock(path->p_idx); 2082 leaf = idx_pblock(path->p_idx);
1967 BUG_ON(path->p_hdr->eh_entries == 0); 2083 if (unlikely(path->p_hdr->eh_entries == 0)) {
2084 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2085 return -EIO;
2086 }
1968 err = ext4_ext_get_access(handle, inode, path); 2087 err = ext4_ext_get_access(handle, inode, path);
1969 if (err) 2088 if (err)
1970 return err; 2089 return err;
@@ -1973,9 +2092,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1973 if (err) 2092 if (err)
1974 return err; 2093 return err;
1975 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2094 ext_debug("index is empty, remove it, free block %llu\n", leaf);
1976 bh = sb_find_get_block(inode->i_sb, leaf); 2095 ext4_free_blocks(handle, inode, 0, leaf, 1,
1977 ext4_forget(handle, 1, inode, bh, leaf); 2096 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1978 ext4_free_blocks(handle, inode, leaf, 1, 1);
1979 return err; 2097 return err;
1980} 2098}
1981 2099
@@ -2042,12 +2160,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2042 struct ext4_extent *ex, 2160 struct ext4_extent *ex,
2043 ext4_lblk_t from, ext4_lblk_t to) 2161 ext4_lblk_t from, ext4_lblk_t to)
2044{ 2162{
2045 struct buffer_head *bh;
2046 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2163 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2047 int i, metadata = 0; 2164 int flags = EXT4_FREE_BLOCKS_FORGET;
2048 2165
2049 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2166 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2050 metadata = 1; 2167 flags |= EXT4_FREE_BLOCKS_METADATA;
2051#ifdef EXTENTS_STATS 2168#ifdef EXTENTS_STATS
2052 { 2169 {
2053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2170 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2189,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2072 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2189 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2073 start = ext_pblock(ex) + ee_len - num; 2190 start = ext_pblock(ex) + ee_len - num;
2074 ext_debug("free last %u blocks starting %llu\n", num, start); 2191 ext_debug("free last %u blocks starting %llu\n", num, start);
2075 for (i = 0; i < num; i++) { 2192 ext4_free_blocks(handle, inode, 0, start, num, flags);
2076 bh = sb_find_get_block(inode->i_sb, start + i);
2077 ext4_forget(handle, 0, inode, bh, start + i);
2078 }
2079 ext4_free_blocks(handle, inode, start, num, metadata);
2080 } else if (from == le32_to_cpu(ex->ee_block) 2193 } else if (from == le32_to_cpu(ex->ee_block)
2081 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2194 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2082 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2195 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2108,8 +2221,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2108 if (!path[depth].p_hdr) 2221 if (!path[depth].p_hdr)
2109 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2222 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2110 eh = path[depth].p_hdr; 2223 eh = path[depth].p_hdr;
2111 BUG_ON(eh == NULL); 2224 if (unlikely(path[depth].p_hdr == NULL)) {
2112 2225 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2226 return -EIO;
2227 }
2113 /* find where to start removing */ 2228 /* find where to start removing */
2114 ex = EXT_LAST_EXTENT(eh); 2229 ex = EXT_LAST_EXTENT(eh);
2115 2230
@@ -2167,7 +2282,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2167 correct_index = 1; 2282 correct_index = 1;
2168 credits += (ext_depth(inode)) + 1; 2283 credits += (ext_depth(inode)) + 1;
2169 } 2284 }
2170 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 2285 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2171 2286
2172 err = ext4_ext_truncate_extend_restart(handle, inode, credits); 2287 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
2173 if (err) 2288 if (err)
@@ -2972,7 +3087,7 @@ fix_extent_len:
2972 ext4_ext_dirty(handle, inode, path + depth); 3087 ext4_ext_dirty(handle, inode, path + depth);
2973 return err; 3088 return err;
2974} 3089}
2975static int ext4_convert_unwritten_extents_dio(handle_t *handle, 3090static int ext4_convert_unwritten_extents_endio(handle_t *handle,
2976 struct inode *inode, 3091 struct inode *inode,
2977 struct ext4_ext_path *path) 3092 struct ext4_ext_path *path)
2978{ 3093{
@@ -3027,6 +3142,14 @@ out:
3027 return err; 3142 return err;
3028} 3143}
3029 3144
3145static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3146 sector_t block, int count)
3147{
3148 int i;
3149 for (i = 0; i < count; i++)
3150 unmap_underlying_metadata(bdev, block + i);
3151}
3152
3030static int 3153static int
3031ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3154ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3032 ext4_lblk_t iblock, unsigned int max_blocks, 3155 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3044,8 +3167,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3044 flags, allocated); 3167 flags, allocated);
3045 ext4_ext_show_leaf(inode, path); 3168 ext4_ext_show_leaf(inode, path);
3046 3169
3047 /* DIO get_block() before submit the IO, split the extent */ 3170 /* get_block() before submit the IO, split the extent */
3048 if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { 3171 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3049 ret = ext4_split_unwritten_extents(handle, 3172 ret = ext4_split_unwritten_extents(handle,
3050 inode, path, iblock, 3173 inode, path, iblock,
3051 max_blocks, flags); 3174 max_blocks, flags);
@@ -3055,15 +3178,19 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3055 * completed 3178 * completed
3056 */ 3179 */
3057 if (io) 3180 if (io)
3058 io->flag = DIO_AIO_UNWRITTEN; 3181 io->flag = EXT4_IO_UNWRITTEN;
3059 else 3182 else
3060 EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; 3183 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3184 if (ext4_should_dioread_nolock(inode))
3185 set_buffer_uninit(bh_result);
3061 goto out; 3186 goto out;
3062 } 3187 }
3063 /* async DIO end_io complete, convert the filled extent to written */ 3188 /* IO end_io complete, convert the filled extent to written */
3064 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { 3189 if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3065 ret = ext4_convert_unwritten_extents_dio(handle, inode, 3190 ret = ext4_convert_unwritten_extents_endio(handle, inode,
3066 path); 3191 path);
3192 if (ret >= 0)
3193 ext4_update_inode_fsync_trans(handle, inode, 1);
3067 goto out2; 3194 goto out2;
3068 } 3195 }
3069 /* buffered IO case */ 3196 /* buffered IO case */
@@ -3091,6 +3218,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3091 ret = ext4_ext_convert_to_initialized(handle, inode, 3218 ret = ext4_ext_convert_to_initialized(handle, inode,
3092 path, iblock, 3219 path, iblock,
3093 max_blocks); 3220 max_blocks);
3221 if (ret >= 0)
3222 ext4_update_inode_fsync_trans(handle, inode, 1);
3094out: 3223out:
3095 if (ret <= 0) { 3224 if (ret <= 0) {
3096 err = ret; 3225 err = ret;
@@ -3098,6 +3227,30 @@ out:
3098 } else 3227 } else
3099 allocated = ret; 3228 allocated = ret;
3100 set_buffer_new(bh_result); 3229 set_buffer_new(bh_result);
3230 /*
3231 * if we allocated more blocks than requested
3232 * we need to make sure we unmap the extra block
3233 * allocated. The actual needed block will get
3234 * unmapped later when we find the buffer_head marked
3235 * new.
3236 */
3237 if (allocated > max_blocks) {
3238 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3239 newblock + max_blocks,
3240 allocated - max_blocks);
3241 allocated = max_blocks;
3242 }
3243
3244 /*
3245 * If we have done fallocate with the offset that is already
3246 * delayed allocated, we would have block reservation
3247 * and quota reservation done in the delayed write path.
3248 * But fallocate would have already updated quota and block
3249 * count for this offset. So cancel these reservation
3250 */
3251 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3252 ext4_da_update_reserve_space(inode, allocated, 0);
3253
3101map_out: 3254map_out:
3102 set_buffer_mapped(bh_result); 3255 set_buffer_mapped(bh_result);
3103out1: 3256out1:
@@ -3138,7 +3291,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3138{ 3291{
3139 struct ext4_ext_path *path = NULL; 3292 struct ext4_ext_path *path = NULL;
3140 struct ext4_extent_header *eh; 3293 struct ext4_extent_header *eh;
3141 struct ext4_extent newex, *ex; 3294 struct ext4_extent newex, *ex, *last_ex;
3142 ext4_fsblk_t newblock; 3295 ext4_fsblk_t newblock;
3143 int err = 0, depth, ret, cache_type; 3296 int err = 0, depth, ret, cache_type;
3144 unsigned int allocated = 0; 3297 unsigned int allocated = 0;
@@ -3190,7 +3343,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3343 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3344 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3345 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3346 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
3347 EXT4_ERROR_INODE(inode, "bad extent address "
3348 "iblock: %d, depth: %d pblock %lld",
3349 iblock, depth, path[depth].p_block);
3350 err = -EIO;
3351 goto out2;
3352 }
3194 eh = path[depth].p_hdr; 3353 eh = path[depth].p_hdr;
3195 3354
3196 ex = path[depth].p_ext; 3355 ex = path[depth].p_ext;
@@ -3205,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3205 */ 3364 */
3206 ee_len = ext4_ext_get_actual_len(ex); 3365 ee_len = ext4_ext_get_actual_len(ex);
3207 /* if found extent covers block, simply return it */ 3366 /* if found extent covers block, simply return it */
3208 if (iblock >= ee_block && iblock < ee_block + ee_len) { 3367 if (in_range(iblock, ee_block, ee_len)) {
3209 newblock = iblock - ee_block + ee_start; 3368 newblock = iblock - ee_block + ee_start;
3210 /* number of remaining blocks in the extent */ 3369 /* number of remaining blocks in the extent */
3211 allocated = ee_len - (iblock - ee_block); 3370 allocated = ee_len - (iblock - ee_block);
@@ -3297,21 +3456,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3297 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 3456 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
3298 ext4_ext_mark_uninitialized(&newex); 3457 ext4_ext_mark_uninitialized(&newex);
3299 /* 3458 /*
3300 * io_end structure was created for every async 3459 * io_end structure was created for every IO write to an
3301 * direct IO write to the middle of the file. 3460 * uninitialized extent. To avoid unecessary conversion,
3302 * To avoid unecessary convertion for every aio dio rewrite 3461 * here we flag the IO that really needs the conversion.
3303 * to the mid of file, here we flag the IO that is really
3304 * need the convertion.
3305 * For non asycn direct IO case, flag the inode state 3462 * For non asycn direct IO case, flag the inode state
3306 * that we need to perform convertion when IO is done. 3463 * that we need to perform convertion when IO is done.
3307 */ 3464 */
3308 if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { 3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3309 if (io) 3466 if (io)
3310 io->flag = DIO_AIO_UNWRITTEN; 3467 io->flag = EXT4_IO_UNWRITTEN;
3311 else 3468 else
3312 EXT4_I(inode)->i_state |= 3469 ext4_set_inode_state(inode,
3313 EXT4_STATE_DIO_UNWRITTEN;; 3470 EXT4_STATE_DIO_UNWRITTEN);
3314 } 3471 }
3472 if (ext4_should_dioread_nolock(inode))
3473 set_buffer_uninit(bh_result);
3474 }
3475
3476 if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
3477 if (unlikely(!eh->eh_entries)) {
3478 EXT4_ERROR_INODE(inode,
3479 "eh->eh_entries == 0 ee_block %d",
3480 ex->ee_block);
3481 err = -EIO;
3482 goto out2;
3483 }
3484 last_ex = EXT_LAST_EXTENT(eh);
3485 if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
3486 + ext4_ext_get_actual_len(last_ex))
3487 EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
3315 } 3488 }
3316 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3489 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3317 if (err) { 3490 if (err) {
@@ -3319,20 +3492,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3319 /* not a good idea to call discard here directly, 3492 /* not a good idea to call discard here directly,
3320 * but otherwise we'd need to call it every free() */ 3493 * but otherwise we'd need to call it every free() */
3321 ext4_discard_preallocations(inode); 3494 ext4_discard_preallocations(inode);
3322 ext4_free_blocks(handle, inode, ext_pblock(&newex), 3495 ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
3323 ext4_ext_get_actual_len(&newex), 0); 3496 ext4_ext_get_actual_len(&newex), 0);
3324 goto out2; 3497 goto out2;
3325 } 3498 }
3326 3499
3327 /* previous routine could use block we allocated */ 3500 /* previous routine could use block we allocated */
3328 newblock = ext_pblock(&newex); 3501 newblock = ext_pblock(&newex);
3329 allocated = ext4_ext_get_actual_len(&newex); 3502 allocated = ext4_ext_get_actual_len(&newex);
3503 if (allocated > max_blocks)
3504 allocated = max_blocks;
3330 set_buffer_new(bh_result); 3505 set_buffer_new(bh_result);
3331 3506
3332 /* Cache only when it is _not_ an uninitialized extent */ 3507 /*
3333 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) 3508 * Update reserved blocks/metadata blocks after successful
3509 * block allocation which had been deferred till now.
3510 */
3511 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3512 ext4_da_update_reserve_space(inode, allocated, 1);
3513
3514 /*
3515 * Cache the extent and update transaction to commit on fdatasync only
3516 * when it is _not_ an uninitialized extent.
3517 */
3518 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3334 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 3519 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
3335 EXT4_EXT_CACHE_EXTENT); 3520 EXT4_EXT_CACHE_EXTENT);
3521 ext4_update_inode_fsync_trans(handle, inode, 1);
3522 } else
3523 ext4_update_inode_fsync_trans(handle, inode, 0);
3336out: 3524out:
3337 if (allocated > max_blocks) 3525 if (allocated > max_blocks)
3338 allocated = max_blocks; 3526 allocated = max_blocks;
@@ -3431,6 +3619,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
3431 i_size_write(inode, new_size); 3619 i_size_write(inode, new_size);
3432 if (new_size > EXT4_I(inode)->i_disksize) 3620 if (new_size > EXT4_I(inode)->i_disksize)
3433 ext4_update_i_disksize(inode, new_size); 3621 ext4_update_i_disksize(inode, new_size);
3622 } else {
3623 /*
3624 * Mark that we allocate beyond EOF so the subsequent truncate
3625 * can proceed even if the new size is the same as i_size.
3626 */
3627 if (new_size > i_size_read(inode))
3628 EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
3434 } 3629 }
3435 3630
3436} 3631}
@@ -3535,7 +3730,7 @@ retry:
3535 * Returns 0 on success. 3730 * Returns 0 on success.
3536 */ 3731 */
3537int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 3732int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3538 loff_t len) 3733 ssize_t len)
3539{ 3734{
3540 handle_t *handle; 3735 handle_t *handle;
3541 ext4_lblk_t block; 3736 ext4_lblk_t block;
@@ -3567,7 +3762,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3567 map_bh.b_state = 0; 3762 map_bh.b_state = 0;
3568 ret = ext4_get_blocks(handle, inode, block, 3763 ret = ext4_get_blocks(handle, inode, block,
3569 max_blocks, &map_bh, 3764 max_blocks, &map_bh,
3570 EXT4_GET_BLOCKS_DIO_CONVERT_EXT); 3765 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
3571 if (ret <= 0) { 3766 if (ret <= 0) {
3572 WARN_ON(ret <= 0); 3767 WARN_ON(ret <= 0);
3573 printk(KERN_ERR "%s: ext4_ext_get_blocks " 3768 printk(KERN_ERR "%s: ext4_ext_get_blocks "
@@ -3671,7 +3866,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
3671 int error = 0; 3866 int error = 0;
3672 3867
3673 /* in-inode? */ 3868 /* in-inode? */
3674 if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { 3869 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
3675 struct ext4_iloc iloc; 3870 struct ext4_iloc iloc;
3676 int offset; /* offset of xattr in inode */ 3871 int offset; /* offset of xattr in inode */
3677 3872
@@ -3699,7 +3894,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3699 __u64 start, __u64 len) 3894 __u64 start, __u64 len)
3700{ 3895{
3701 ext4_lblk_t start_blk; 3896 ext4_lblk_t start_blk;
3702 ext4_lblk_t len_blks;
3703 int error = 0; 3897 int error = 0;
3704 3898
3705 /* fallback to generic here if not in extents fmt */ 3899 /* fallback to generic here if not in extents fmt */
@@ -3713,17 +3907,21 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3713 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 3907 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
3714 error = ext4_xattr_fiemap(inode, fieinfo); 3908 error = ext4_xattr_fiemap(inode, fieinfo);
3715 } else { 3909 } else {
3910 ext4_lblk_t len_blks;
3911 __u64 last_blk;
3912
3716 start_blk = start >> inode->i_sb->s_blocksize_bits; 3913 start_blk = start >> inode->i_sb->s_blocksize_bits;
3717 len_blks = len >> inode->i_sb->s_blocksize_bits; 3914 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
3915 if (last_blk >= EXT_MAX_BLOCK)
3916 last_blk = EXT_MAX_BLOCK-1;
3917 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
3718 3918
3719 /* 3919 /*
3720 * Walk the extent tree gathering extent information. 3920 * Walk the extent tree gathering extent information.
3721 * ext4_ext_fiemap_cb will push extents back to user. 3921 * ext4_ext_fiemap_cb will push extents back to user.
3722 */ 3922 */
3723 down_read(&EXT4_I(inode)->i_data_sem);
3724 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3923 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3725 ext4_ext_fiemap_cb, fieinfo); 3924 ext4_ext_fiemap_cb, fieinfo);
3726 up_read(&EXT4_I(inode)->i_data_sem);
3727 } 3925 }
3728 3926
3729 return error; 3927 return error;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 9630583cef28..d0776e410f34 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,6 +23,7 @@
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/path.h> 25#include <linux/path.h>
26#include <linux/quotaops.h>
26#include "ext4.h" 27#include "ext4.h"
27#include "ext4_jbd2.h" 28#include "ext4_jbd2.h"
28#include "xattr.h" 29#include "xattr.h"
@@ -35,9 +36,9 @@
35 */ 36 */
36static int ext4_release_file(struct inode *inode, struct file *filp) 37static int ext4_release_file(struct inode *inode, struct file *filp)
37{ 38{
38 if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { 39 if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
39 ext4_alloc_da_blocks(inode); 40 ext4_alloc_da_blocks(inode);
40 EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; 41 ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
41 } 42 }
42 /* if we are the last writer on the inode, drop the block reservation */ 43 /* if we are the last writer on the inode, drop the block reservation */
43 if ((filp->f_mode & FMODE_WRITE) && 44 if ((filp->f_mode & FMODE_WRITE) &&
@@ -116,18 +117,16 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
116 * devices or filesystem images. 117 * devices or filesystem images.
117 */ 118 */
118 memset(buf, 0, sizeof(buf)); 119 memset(buf, 0, sizeof(buf));
119 path.mnt = mnt->mnt_parent; 120 path.mnt = mnt;
120 path.dentry = mnt->mnt_mountpoint; 121 path.dentry = mnt->mnt_root;
121 path_get(&path);
122 cp = d_path(&path, buf, sizeof(buf)); 122 cp = d_path(&path, buf, sizeof(buf));
123 path_put(&path);
124 if (!IS_ERR(cp)) { 123 if (!IS_ERR(cp)) {
125 memcpy(sbi->s_es->s_last_mounted, cp, 124 memcpy(sbi->s_es->s_last_mounted, cp,
126 sizeof(sbi->s_es->s_last_mounted)); 125 sizeof(sbi->s_es->s_last_mounted));
127 sb->s_dirt = 1; 126 sb->s_dirt = 1;
128 } 127 }
129 } 128 }
130 return generic_file_open(inode, filp); 129 return dquot_file_open(inode, filp);
131} 130}
132 131
133const struct file_operations ext4_file_operations = { 132const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2b1531266ee2..0d0c3239c1cd 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -51,25 +51,30 @@
51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) 51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
52{ 52{
53 struct inode *inode = dentry->d_inode; 53 struct inode *inode = dentry->d_inode;
54 struct ext4_inode_info *ei = EXT4_I(inode);
54 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 55 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
55 int err, ret = 0; 56 int ret;
57 tid_t commit_tid;
56 58
57 J_ASSERT(ext4_journal_current_handle() == NULL); 59 J_ASSERT(ext4_journal_current_handle() == NULL);
58 60
59 trace_ext4_sync_file(file, dentry, datasync); 61 trace_ext4_sync_file(file, dentry, datasync);
60 62
61 ret = flush_aio_dio_completed_IO(inode); 63 if (inode->i_sb->s_flags & MS_RDONLY)
64 return 0;
65
66 ret = flush_completed_IO(inode);
62 if (ret < 0) 67 if (ret < 0)
63 goto out; 68 return ret;
69
70 if (!journal)
71 return simple_fsync(file, dentry, datasync);
72
64 /* 73 /*
65 * data=writeback: 74 * data=writeback,ordered:
66 * The caller's filemap_fdatawrite()/wait will sync the data. 75 * The caller's filemap_fdatawrite()/wait will sync the data.
67 * sync_inode() will sync the metadata 76 * Metadata is in the journal, we wait for proper transaction to
68 * 77 * commit here.
69 * data=ordered:
70 * The caller's filemap_fdatawrite() will write the data and
71 * sync_inode() will write the inode if it is dirty. Then the caller's
72 * filemap_fdatawait() will wait on the pages.
73 * 78 *
74 * data=journal: 79 * data=journal:
75 * filemap_fdatawrite won't do anything (the buffers are clean). 80 * filemap_fdatawrite won't do anything (the buffers are clean).
@@ -79,32 +84,25 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
79 * (they were dirtied by commit). But that's OK - the blocks are 84 * (they were dirtied by commit). But that's OK - the blocks are
80 * safe in-journal, which is all fsync() needs to ensure. 85 * safe in-journal, which is all fsync() needs to ensure.
81 */ 86 */
82 if (ext4_should_journal_data(inode)) { 87 if (ext4_should_journal_data(inode))
83 ret = ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
84 goto out;
85 }
86 89
87 if (!journal) 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
88 ret = sync_mapping_buffers(inode->i_mapping); 91 if (jbd2_log_start_commit(journal, commit_tid)) {
89 92 /*
90 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 93 * When the journal is on a different device than the
91 goto out; 94 * fs data disk, we need to issue the barrier in
92 95 * writeback mode. (In ordered mode, the jbd2 layer
93 /* 96 * will take care of issuing the barrier. In
94 * The VFS has written the file data. If the inode is unaltered 97 * data=journal, all of the data blocks are written to
95 * then we need not start a commit. 98 * the journal device.)
96 */ 99 */
97 if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { 100 if (ext4_should_writeback_data(inode) &&
98 struct writeback_control wbc = { 101 (journal->j_fs_dev != journal->j_dev) &&
99 .sync_mode = WB_SYNC_ALL, 102 (journal->j_flags & JBD2_BARRIER))
100 .nr_to_write = 0, /* sys_fsync did this */ 103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
101 }; 104 jbd2_log_wait_commit(journal, commit_tid);
102 err = sync_inode(inode, &wbc); 105 } else if (journal->j_flags & JBD2_BARRIER)
103 if (ret == 0)
104 ret = err;
105 }
106out:
107 if (journal && (journal->j_flags & JBD2_BARRIER))
108 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
109 return ret; 107 return ret;
110} 108}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f3624ead4f6c..57f6eef6ccd6 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,8 +76,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
76 /* If checksum is bad mark all blocks and inodes use to prevent 76 /* If checksum is bad mark all blocks and inodes use to prevent
77 * allocation, essentially implementing a per-group read-only flag. */ 77 * allocation, essentially implementing a per-group read-only flag. */
78 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 78 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
79 ext4_error(sb, __func__, "Checksum bad for group %u", 79 ext4_error(sb, "Checksum bad for group %u", block_group);
80 block_group);
81 ext4_free_blks_set(sb, gdp, 0); 80 ext4_free_blks_set(sb, gdp, 0);
82 ext4_free_inodes_set(sb, gdp, 0); 81 ext4_free_inodes_set(sb, gdp, 0);
83 ext4_itable_unused_set(sb, gdp, 0); 82 ext4_itable_unused_set(sb, gdp, 0);
@@ -111,8 +110,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
111 bitmap_blk = ext4_inode_bitmap(sb, desc); 110 bitmap_blk = ext4_inode_bitmap(sb, desc);
112 bh = sb_getblk(sb, bitmap_blk); 111 bh = sb_getblk(sb, bitmap_blk);
113 if (unlikely(!bh)) { 112 if (unlikely(!bh)) {
114 ext4_error(sb, __func__, 113 ext4_error(sb, "Cannot read inode bitmap - "
115 "Cannot read inode bitmap - "
116 "block_group = %u, inode_bitmap = %llu", 114 "block_group = %u, inode_bitmap = %llu",
117 block_group, bitmap_blk); 115 block_group, bitmap_blk);
118 return NULL; 116 return NULL;
@@ -153,8 +151,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
153 set_bitmap_uptodate(bh); 151 set_bitmap_uptodate(bh);
154 if (bh_submit_read(bh) < 0) { 152 if (bh_submit_read(bh) < 0) {
155 put_bh(bh); 153 put_bh(bh);
156 ext4_error(sb, __func__, 154 ext4_error(sb, "Cannot read inode bitmap - "
157 "Cannot read inode bitmap - "
158 "block_group = %u, inode_bitmap = %llu", 155 "block_group = %u, inode_bitmap = %llu",
159 block_group, bitmap_blk); 156 block_group, bitmap_blk);
160 return NULL; 157 return NULL;
@@ -217,10 +214,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
217 * Note: we must free any quota before locking the superblock, 214 * Note: we must free any quota before locking the superblock,
218 * as writing the quota to disk may need the lock as well. 215 * as writing the quota to disk may need the lock as well.
219 */ 216 */
220 vfs_dq_init(inode); 217 dquot_initialize(inode);
221 ext4_xattr_delete_inode(handle, inode); 218 ext4_xattr_delete_inode(handle, inode);
222 vfs_dq_free_inode(inode); 219 dquot_free_inode(inode);
223 vfs_dq_drop(inode); 220 dquot_drop(inode);
224 221
225 is_directory = S_ISDIR(inode->i_mode); 222 is_directory = S_ISDIR(inode->i_mode);
226 223
@@ -229,8 +226,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
229 226
230 es = EXT4_SB(sb)->s_es; 227 es = EXT4_SB(sb)->s_es;
231 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 228 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
232 ext4_error(sb, "ext4_free_inode", 229 ext4_error(sb, "reserved or nonexistent inode %lu", ino);
233 "reserved or nonexistent inode %lu", ino);
234 goto error_return; 230 goto error_return;
235 } 231 }
236 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 232 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -248,8 +244,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
248 cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), 244 cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
249 bit, bitmap_bh->b_data); 245 bit, bitmap_bh->b_data);
250 if (!cleared) 246 if (!cleared)
251 ext4_error(sb, "ext4_free_inode", 247 ext4_error(sb, "bit already cleared for inode %lu", ino);
252 "bit already cleared for inode %lu", ino);
253 else { 248 else {
254 gdp = ext4_get_group_desc(sb, block_group, &bh2); 249 gdp = ext4_get_group_desc(sb, block_group, &bh2);
255 250
@@ -268,7 +263,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
268 ext4_group_t f; 263 ext4_group_t f;
269 264
270 f = ext4_flex_group(sbi, block_group); 265 f = ext4_flex_group(sbi, block_group);
271 atomic_dec(&sbi->s_flex_groups[f].free_inodes); 266 atomic_dec(&sbi->s_flex_groups[f].used_dirs);
272 } 267 }
273 268
274 } 269 }
@@ -736,8 +731,7 @@ static int ext4_claim_inode(struct super_block *sb,
736 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 731 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
737 ino > EXT4_INODES_PER_GROUP(sb)) { 732 ino > EXT4_INODES_PER_GROUP(sb)) {
738 ext4_unlock_group(sb, group); 733 ext4_unlock_group(sb, group);
739 ext4_error(sb, __func__, 734 ext4_error(sb, "reserved inode or inode > inodes count - "
740 "reserved inode or inode > inodes count - "
741 "block_group = %u, inode=%lu", group, 735 "block_group = %u, inode=%lu", group,
742 ino + group * EXT4_INODES_PER_GROUP(sb)); 736 ino + group * EXT4_INODES_PER_GROUP(sb));
743 return 1; 737 return 1;
@@ -779,7 +773,7 @@ static int ext4_claim_inode(struct super_block *sb,
779 if (sbi->s_log_groups_per_flex) { 773 if (sbi->s_log_groups_per_flex) {
780 ext4_group_t f = ext4_flex_group(sbi, group); 774 ext4_group_t f = ext4_flex_group(sbi, group);
781 775
782 atomic_inc(&sbi->s_flex_groups[f].free_inodes); 776 atomic_inc(&sbi->s_flex_groups[f].used_dirs);
783 } 777 }
784 } 778 }
785 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 779 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
@@ -904,7 +898,7 @@ repeat_in_this_group:
904 BUFFER_TRACE(inode_bitmap_bh, 898 BUFFER_TRACE(inode_bitmap_bh,
905 "call ext4_handle_dirty_metadata"); 899 "call ext4_handle_dirty_metadata");
906 err = ext4_handle_dirty_metadata(handle, 900 err = ext4_handle_dirty_metadata(handle,
907 inode, 901 NULL,
908 inode_bitmap_bh); 902 inode_bitmap_bh);
909 if (err) 903 if (err)
910 goto fail; 904 goto fail;
@@ -1029,15 +1023,16 @@ got:
1029 inode->i_generation = sbi->s_next_generation++; 1023 inode->i_generation = sbi->s_next_generation++;
1030 spin_unlock(&sbi->s_next_gen_lock); 1024 spin_unlock(&sbi->s_next_gen_lock);
1031 1025
1032 ei->i_state = EXT4_STATE_NEW; 1026 ei->i_state_flags = 0;
1027 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1033 1028
1034 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1029 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
1035 1030
1036 ret = inode; 1031 ret = inode;
1037 if (vfs_dq_alloc_inode(inode)) { 1032 dquot_initialize(inode);
1038 err = -EDQUOT; 1033 err = dquot_alloc_inode(inode);
1034 if (err)
1039 goto fail_drop; 1035 goto fail_drop;
1040 }
1041 1036
1042 err = ext4_init_acl(handle, inode, dir); 1037 err = ext4_init_acl(handle, inode, dir);
1043 if (err) 1038 if (err)
@@ -1074,10 +1069,10 @@ really_out:
1074 return ret; 1069 return ret;
1075 1070
1076fail_free_drop: 1071fail_free_drop:
1077 vfs_dq_free_inode(inode); 1072 dquot_free_inode(inode);
1078 1073
1079fail_drop: 1074fail_drop:
1080 vfs_dq_drop(inode); 1075 dquot_drop(inode);
1081 inode->i_flags |= S_NOQUOTA; 1076 inode->i_flags |= S_NOQUOTA;
1082 inode->i_nlink = 0; 1077 inode->i_nlink = 0;
1083 unlock_new_inode(inode); 1078 unlock_new_inode(inode);
@@ -1098,8 +1093,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
1098 1093
1099 /* Error cases - e2fsck has already cleaned up for us */ 1094 /* Error cases - e2fsck has already cleaned up for us */
1100 if (ino > max_ino) { 1095 if (ino > max_ino) {
1101 ext4_warning(sb, __func__, 1096 ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
1102 "bad orphan ino %lu! e2fsck was run?", ino);
1103 goto error; 1097 goto error;
1104 } 1098 }
1105 1099
@@ -1107,8 +1101,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
1107 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 1101 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
1108 bitmap_bh = ext4_read_inode_bitmap(sb, block_group); 1102 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
1109 if (!bitmap_bh) { 1103 if (!bitmap_bh) {
1110 ext4_warning(sb, __func__, 1104 ext4_warning(sb, "inode bitmap error for orphan %lu", ino);
1111 "inode bitmap error for orphan %lu", ino);
1112 goto error; 1105 goto error;
1113 } 1106 }
1114 1107
@@ -1140,8 +1133,7 @@ iget_failed:
1140 err = PTR_ERR(inode); 1133 err = PTR_ERR(inode);
1141 inode = NULL; 1134 inode = NULL;
1142bad_orphan: 1135bad_orphan:
1143 ext4_warning(sb, __func__, 1136 ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
1144 "bad orphan inode %lu! e2fsck was run?", ino);
1145 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 1137 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
1146 bit, (unsigned long long)bitmap_bh->b_blocknr, 1138 bit, (unsigned long long)bitmap_bh->b_blocknr,
1147 ext4_test_bit(bit, bitmap_bh->b_data)); 1139 ext4_test_bit(bit, bitmap_bh->b_data));
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2c8caa51addb..5381802d6052 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,8 @@
38#include <linux/uio.h> 38#include <linux/uio.h>
39#include <linux/bio.h> 39#include <linux/bio.h>
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/kernel.h>
42#include <linux/slab.h>
41 43
42#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
43#include "xattr.h" 45#include "xattr.h"
@@ -71,58 +73,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
71} 73}
72 74
73/* 75/*
74 * The ext4 forget function must perform a revoke if we are freeing data
75 * which has been journaled. Metadata (eg. indirect blocks) must be
76 * revoked in all cases.
77 *
78 * "bh" may be NULL: a metadata block may have been freed from memory
79 * but there may still be a record of it in the journal, and that record
80 * still needs to be revoked.
81 *
82 * If the handle isn't valid we're not journaling, but we still need to
83 * call into ext4_journal_revoke() to put the buffer head.
84 */
85int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
86 struct buffer_head *bh, ext4_fsblk_t blocknr)
87{
88 int err;
89
90 might_sleep();
91
92 BUFFER_TRACE(bh, "enter");
93
94 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
95 "data mode %x\n",
96 bh, is_metadata, inode->i_mode,
97 test_opt(inode->i_sb, DATA_FLAGS));
98
99 /* Never use the revoke function if we are doing full data
100 * journaling: there is no need to, and a V1 superblock won't
101 * support it. Otherwise, only skip the revoke on un-journaled
102 * data blocks. */
103
104 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
105 (!is_metadata && !ext4_should_journal_data(inode))) {
106 if (bh) {
107 BUFFER_TRACE(bh, "call jbd2_journal_forget");
108 return ext4_journal_forget(handle, bh);
109 }
110 return 0;
111 }
112
113 /*
114 * data!=journal && (is_metadata || should_journal_data(inode))
115 */
116 BUFFER_TRACE(bh, "call ext4_journal_revoke");
117 err = ext4_journal_revoke(handle, blocknr, bh);
118 if (err)
119 ext4_abort(inode->i_sb, __func__,
120 "error %d when attempting revoke", err);
121 BUFFER_TRACE(bh, "exit");
122 return err;
123}
124
125/*
126 * Work out how many blocks we need to proceed with the next chunk of a 76 * Work out how many blocks we need to proceed with the next chunk of a
127 * truncate transaction. 77 * truncate transaction.
128 */ 78 */
@@ -222,6 +172,9 @@ void ext4_delete_inode(struct inode *inode)
222 handle_t *handle; 172 handle_t *handle;
223 int err; 173 int err;
224 174
175 if (!is_bad_inode(inode))
176 dquot_initialize(inode);
177
225 if (ext4_should_order_data(inode)) 178 if (ext4_should_order_data(inode))
226 ext4_begin_ordered_truncate(inode, 0); 179 ext4_begin_ordered_truncate(inode, 0);
227 truncate_inode_pages(&inode->i_data, 0); 180 truncate_inode_pages(&inode->i_data, 0);
@@ -246,7 +199,7 @@ void ext4_delete_inode(struct inode *inode)
246 inode->i_size = 0; 199 inode->i_size = 0;
247 err = ext4_mark_inode_dirty(handle, inode); 200 err = ext4_mark_inode_dirty(handle, inode);
248 if (err) { 201 if (err) {
249 ext4_warning(inode->i_sb, __func__, 202 ext4_warning(inode->i_sb,
250 "couldn't mark inode dirty (err %d)", err); 203 "couldn't mark inode dirty (err %d)", err);
251 goto stop_handle; 204 goto stop_handle;
252 } 205 }
@@ -264,7 +217,7 @@ void ext4_delete_inode(struct inode *inode)
264 if (err > 0) 217 if (err > 0)
265 err = ext4_journal_restart(handle, 3); 218 err = ext4_journal_restart(handle, 3);
266 if (err != 0) { 219 if (err != 0) {
267 ext4_warning(inode->i_sb, __func__, 220 ext4_warning(inode->i_sb,
268 "couldn't extend journal (err %d)", err); 221 "couldn't extend journal (err %d)", err);
269 stop_handle: 222 stop_handle:
270 ext4_journal_stop(handle); 223 ext4_journal_stop(handle);
@@ -375,8 +328,7 @@ static int ext4_block_to_path(struct inode *inode,
375 offsets[n++] = i_block & (ptrs - 1); 328 offsets[n++] = i_block & (ptrs - 1);
376 final = ptrs; 329 final = ptrs;
377 } else { 330 } else {
378 ext4_warning(inode->i_sb, "ext4_block_to_path", 331 ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
379 "block %lu > max in inode %lu",
380 i_block + direct_blocks + 332 i_block + direct_blocks +
381 indirect_blocks + double_blocks, inode->i_ino); 333 indirect_blocks + double_blocks, inode->i_ino);
382 } 334 }
@@ -396,7 +348,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
396 if (blk && 348 if (blk &&
397 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 349 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
398 blk, 1))) { 350 blk, 1))) {
399 ext4_error(inode->i_sb, function, 351 __ext4_error(inode->i_sb, function,
400 "invalid block reference %u " 352 "invalid block reference %u "
401 "in inode #%lu", blk, inode->i_ino); 353 "in inode #%lu", blk, inode->i_ino);
402 return -EIO; 354 return -EIO;
@@ -659,7 +611,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
659 if (*err) 611 if (*err)
660 goto failed_out; 612 goto failed_out;
661 613
662 BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); 614 if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
615 EXT4_ERROR_INODE(inode,
616 "current_block %llu + count %lu > %d!",
617 current_block, count,
618 EXT4_MAX_BLOCK_FILE_PHYS);
619 *err = -EIO;
620 goto failed_out;
621 }
663 622
664 target -= count; 623 target -= count;
665 /* allocate blocks for indirect blocks */ 624 /* allocate blocks for indirect blocks */
@@ -695,7 +654,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
695 ar.flags = EXT4_MB_HINT_DATA; 654 ar.flags = EXT4_MB_HINT_DATA;
696 655
697 current_block = ext4_mb_new_blocks(handle, &ar, err); 656 current_block = ext4_mb_new_blocks(handle, &ar, err);
698 BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); 657 if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
658 EXT4_ERROR_INODE(inode,
659 "current_block %llu + ar.len %d > %d!",
660 current_block, ar.len,
661 EXT4_MAX_BLOCK_FILE_PHYS);
662 *err = -EIO;
663 goto failed_out;
664 }
699 665
700 if (*err && (target == blks)) { 666 if (*err && (target == blks)) {
701 /* 667 /*
@@ -721,7 +687,7 @@ allocated:
721 return ret; 687 return ret;
722failed_out: 688failed_out:
723 for (i = 0; i < index; i++) 689 for (i = 0; i < index; i++)
724 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 690 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
725 return ret; 691 return ret;
726} 692}
727 693
@@ -817,14 +783,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
817 return err; 783 return err;
818failed: 784failed:
819 /* Allocation failed, free what we already allocated */ 785 /* Allocation failed, free what we already allocated */
786 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
820 for (i = 1; i <= n ; i++) { 787 for (i = 1; i <= n ; i++) {
821 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); 788 /*
822 ext4_journal_forget(handle, branch[i].bh); 789 * branch[i].bh is newly allocated, so there is no
790 * need to revoke the block, which is why we don't
791 * need to set EXT4_FREE_BLOCKS_METADATA.
792 */
793 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
794 EXT4_FREE_BLOCKS_FORGET);
823 } 795 }
824 for (i = 0; i < indirect_blks; i++) 796 for (i = n+1; i < indirect_blks; i++)
825 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 797 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
826 798
827 ext4_free_blocks(handle, inode, new_blocks[i], num, 0); 799 ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
828 800
829 return err; 801 return err;
830} 802}
@@ -903,12 +875,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
903 875
904err_out: 876err_out:
905 for (i = 1; i <= num; i++) { 877 for (i = 1; i <= num; i++) {
906 BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); 878 /*
907 ext4_journal_forget(handle, where[i].bh); 879 * branch[i].bh is newly allocated, so there is no
908 ext4_free_blocks(handle, inode, 880 * need to revoke the block, which is why we don't
909 le32_to_cpu(where[i-1].key), 1, 0); 881 * need to set EXT4_FREE_BLOCKS_METADATA.
882 */
883 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
884 EXT4_FREE_BLOCKS_FORGET);
910 } 885 }
911 ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); 886 ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
887 blks, 0);
912 888
913 return err; 889 return err;
914} 890}
@@ -1021,10 +997,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
1021 if (!err) 997 if (!err)
1022 err = ext4_splice_branch(handle, inode, iblock, 998 err = ext4_splice_branch(handle, inode, iblock,
1023 partial, indirect_blks, count); 999 partial, indirect_blks, count);
1024 else 1000 if (err)
1025 goto cleanup; 1001 goto cleanup;
1026 1002
1027 set_buffer_new(bh_result); 1003 set_buffer_new(bh_result);
1004
1005 ext4_update_inode_fsync_trans(handle, inode, 1);
1028got_it: 1006got_it:
1029 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 1007 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
1030 if (count > blocks_to_boundary) 1008 if (count > blocks_to_boundary)
@@ -1043,92 +1021,121 @@ out:
1043 return err; 1021 return err;
1044} 1022}
1045 1023
1046qsize_t ext4_get_reserved_space(struct inode *inode) 1024#ifdef CONFIG_QUOTA
1025qsize_t *ext4_get_reserved_space(struct inode *inode)
1047{ 1026{
1048 unsigned long long total; 1027 return &EXT4_I(inode)->i_reserved_quota;
1049
1050 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1051 total = EXT4_I(inode)->i_reserved_data_blocks +
1052 EXT4_I(inode)->i_reserved_meta_blocks;
1053 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1054
1055 return total;
1056} 1028}
1029#endif
1030
1057/* 1031/*
1058 * Calculate the number of metadata blocks need to reserve 1032 * Calculate the number of metadata blocks need to reserve
1059 * to allocate @blocks for non extent file based file 1033 * to allocate a new block at @lblocks for non extent file based file
1060 */ 1034 */
1061static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1035static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1036 sector_t lblock)
1062{ 1037{
1063 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1038 struct ext4_inode_info *ei = EXT4_I(inode);
1064 int ind_blks, dind_blks, tind_blks; 1039 sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
1065 1040 int blk_bits;
1066 /* number of new indirect blocks needed */
1067 ind_blks = (blocks + icap - 1) / icap;
1068 1041
1069 dind_blks = (ind_blks + icap - 1) / icap; 1042 if (lblock < EXT4_NDIR_BLOCKS)
1043 return 0;
1070 1044
1071 tind_blks = 1; 1045 lblock -= EXT4_NDIR_BLOCKS;
1072 1046
1073 return ind_blks + dind_blks + tind_blks; 1047 if (ei->i_da_metadata_calc_len &&
1048 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1049 ei->i_da_metadata_calc_len++;
1050 return 0;
1051 }
1052 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1053 ei->i_da_metadata_calc_len = 1;
1054 blk_bits = order_base_2(lblock);
1055 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1074} 1056}
1075 1057
1076/* 1058/*
1077 * Calculate the number of metadata blocks need to reserve 1059 * Calculate the number of metadata blocks need to reserve
1078 * to allocate given number of blocks 1060 * to allocate a block located at @lblock
1079 */ 1061 */
1080static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1062static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1081{ 1063{
1082 if (!blocks)
1083 return 0;
1084
1085 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1064 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1086 return ext4_ext_calc_metadata_amount(inode, blocks); 1065 return ext4_ext_calc_metadata_amount(inode, lblock);
1087 1066
1088 return ext4_indirect_calc_metadata_amount(inode, blocks); 1067 return ext4_indirect_calc_metadata_amount(inode, lblock);
1089} 1068}
1090 1069
1091static void ext4_da_update_reserve_space(struct inode *inode, int used) 1070/*
1071 * Called with i_data_sem down, which is important since we can call
1072 * ext4_discard_preallocations() from here.
1073 */
1074void ext4_da_update_reserve_space(struct inode *inode,
1075 int used, int quota_claim)
1092{ 1076{
1093 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1077 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1094 int total, mdb, mdb_free; 1078 struct ext4_inode_info *ei = EXT4_I(inode);
1095 1079 int mdb_free = 0, allocated_meta_blocks = 0;
1096 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1080
1097 /* recalculate the number of metablocks still need to be reserved */ 1081 spin_lock(&ei->i_block_reservation_lock);
1098 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1082 trace_ext4_da_update_reserve_space(inode, used);
1099 mdb = ext4_calc_metadata_amount(inode, total); 1083 if (unlikely(used > ei->i_reserved_data_blocks)) {
1100 1084 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1101 /* figure out how many metablocks to release */ 1085 "with only %d reserved data blocks\n",
1102 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1086 __func__, inode->i_ino, used,
1103 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1087 ei->i_reserved_data_blocks);
1104 1088 WARN_ON(1);
1105 if (mdb_free) { 1089 used = ei->i_reserved_data_blocks;
1106 /* Account for allocated meta_blocks */ 1090 }
1107 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; 1091
1108 1092 /* Update per-inode reservations */
1109 /* update fs dirty blocks counter */ 1093 ei->i_reserved_data_blocks -= used;
1094 used += ei->i_allocated_meta_blocks;
1095 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1096 allocated_meta_blocks = ei->i_allocated_meta_blocks;
1097 ei->i_allocated_meta_blocks = 0;
1098 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1099
1100 if (ei->i_reserved_data_blocks == 0) {
1101 /*
1102 * We can release all of the reserved metadata blocks
1103 * only when we have written all of the delayed
1104 * allocation blocks.
1105 */
1106 mdb_free = ei->i_reserved_meta_blocks;
1107 ei->i_reserved_meta_blocks = 0;
1108 ei->i_da_metadata_calc_len = 0;
1110 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1109 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1111 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1112 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1113 } 1110 }
1114
1115 /* update per-inode reservations */
1116 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1117 EXT4_I(inode)->i_reserved_data_blocks -= used;
1118 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1111 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1119 1112
1120 /* 1113 /* Update quota subsystem */
1121 * free those over-booking quota for metadata blocks 1114 if (quota_claim) {
1122 */ 1115 dquot_claim_block(inode, used);
1123 if (mdb_free) 1116 if (mdb_free)
1124 vfs_dq_release_reservation_block(inode, mdb_free); 1117 dquot_release_reservation_block(inode, mdb_free);
1118 } else {
1119 /*
1120 * We did fallocate with an offset that is already delayed
1121 * allocated. So on delayed allocated writeback we should
1122 * not update the quota for allocated blocks. But then
1123 * converting an fallocate region to initialized region would
1124 * have caused a metadata allocation. So claim quota for
1125 * that
1126 */
1127 if (allocated_meta_blocks)
1128 dquot_claim_block(inode, allocated_meta_blocks);
1129 dquot_release_reservation_block(inode, mdb_free + used);
1130 }
1125 1131
1126 /* 1132 /*
1127 * If we have done all the pending block allocations and if 1133 * If we have done all the pending block allocations and if
1128 * there aren't any writers on the inode, we can discard the 1134 * there aren't any writers on the inode, we can discard the
1129 * inode's preallocations. 1135 * inode's preallocations.
1130 */ 1136 */
1131 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1137 if ((ei->i_reserved_data_blocks == 0) &&
1138 (atomic_read(&inode->i_writecount) == 0))
1132 ext4_discard_preallocations(inode); 1139 ext4_discard_preallocations(inode);
1133} 1140}
1134 1141
@@ -1136,7 +1143,7 @@ static int check_block_validity(struct inode *inode, const char *msg,
1136 sector_t logical, sector_t phys, int len) 1143 sector_t logical, sector_t phys, int len)
1137{ 1144{
1138 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { 1145 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1139 ext4_error(inode->i_sb, msg, 1146 __ext4_error(inode->i_sb, msg,
1140 "inode #%lu logical block %llu mapped to %llu " 1147 "inode #%lu logical block %llu mapped to %llu "
1141 "(size %d)", inode->i_ino, 1148 "(size %d)", inode->i_ino,
1142 (unsigned long long) logical, 1149 (unsigned long long) logical,
@@ -1318,20 +1325,22 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1318 * i_data's format changing. Force the migrate 1325 * i_data's format changing. Force the migrate
1319 * to fail by clearing migrate flags 1326 * to fail by clearing migrate flags
1320 */ 1327 */
1321 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1328 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
1322 } 1329 }
1323 }
1324 1330
1331 /*
1332 * Update reserved blocks/metadata blocks after successful
1333 * block allocation which had been deferred till now. We don't
1334 * support fallocate for non extent files. So we can update
1335 * reserve space here.
1336 */
1337 if ((retval > 0) &&
1338 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
1339 ext4_da_update_reserve_space(inode, retval, 1);
1340 }
1325 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1341 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1326 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1342 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1327 1343
1328 /*
1329 * Update reserved blocks/metadata blocks after successful
1330 * block allocation which had been deferred till now.
1331 */
1332 if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
1333 ext4_da_update_reserve_space(inode, retval);
1334
1335 up_write((&EXT4_I(inode)->i_data_sem)); 1344 up_write((&EXT4_I(inode)->i_data_sem));
1336 if (retval > 0 && buffer_mapped(bh)) { 1345 if (retval > 0 && buffer_mapped(bh)) {
1337 int ret = check_block_validity(inode, "file system " 1346 int ret = check_block_validity(inode, "file system "
@@ -1534,6 +1543,18 @@ static int do_journal_get_write_access(handle_t *handle,
1534 return ext4_journal_get_write_access(handle, bh); 1543 return ext4_journal_get_write_access(handle, bh);
1535} 1544}
1536 1545
1546/*
1547 * Truncate blocks that were not used by write. We have to truncate the
1548 * pagecache as well so that corresponding buffers get properly unmapped.
1549 */
1550static void ext4_truncate_failed_write(struct inode *inode)
1551{
1552 truncate_inode_pages(inode->i_mapping, inode->i_size);
1553 ext4_truncate(inode);
1554}
1555
1556static int ext4_get_block_write(struct inode *inode, sector_t iblock,
1557 struct buffer_head *bh_result, int create);
1537static int ext4_write_begin(struct file *file, struct address_space *mapping, 1558static int ext4_write_begin(struct file *file, struct address_space *mapping,
1538 loff_t pos, unsigned len, unsigned flags, 1559 loff_t pos, unsigned len, unsigned flags,
1539 struct page **pagep, void **fsdata) 1560 struct page **pagep, void **fsdata)
@@ -1575,8 +1596,12 @@ retry:
1575 } 1596 }
1576 *pagep = page; 1597 *pagep = page;
1577 1598
1578 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1599 if (ext4_should_dioread_nolock(inode))
1579 ext4_get_block); 1600 ret = block_write_begin(file, mapping, pos, len, flags, pagep,
1601 fsdata, ext4_get_block_write);
1602 else
1603 ret = block_write_begin(file, mapping, pos, len, flags, pagep,
1604 fsdata, ext4_get_block);
1580 1605
1581 if (!ret && ext4_should_journal_data(inode)) { 1606 if (!ret && ext4_should_journal_data(inode)) {
1582 ret = walk_page_buffers(handle, page_buffers(page), 1607 ret = walk_page_buffers(handle, page_buffers(page),
@@ -1599,7 +1624,7 @@ retry:
1599 1624
1600 ext4_journal_stop(handle); 1625 ext4_journal_stop(handle);
1601 if (pos + len > inode->i_size) { 1626 if (pos + len > inode->i_size) {
1602 ext4_truncate(inode); 1627 ext4_truncate_failed_write(inode);
1603 /* 1628 /*
1604 * If truncate failed early the inode might 1629 * If truncate failed early the inode might
1605 * still be on the orphan list; we need to 1630 * still be on the orphan list; we need to
@@ -1709,7 +1734,7 @@ static int ext4_ordered_write_end(struct file *file,
1709 ret = ret2; 1734 ret = ret2;
1710 1735
1711 if (pos + len > inode->i_size) { 1736 if (pos + len > inode->i_size) {
1712 ext4_truncate(inode); 1737 ext4_truncate_failed_write(inode);
1713 /* 1738 /*
1714 * If truncate failed early the inode might still be 1739 * If truncate failed early the inode might still be
1715 * on the orphan list; we need to make sure the inode 1740 * on the orphan list; we need to make sure the inode
@@ -1751,7 +1776,7 @@ static int ext4_writeback_write_end(struct file *file,
1751 ret = ret2; 1776 ret = ret2;
1752 1777
1753 if (pos + len > inode->i_size) { 1778 if (pos + len > inode->i_size) {
1754 ext4_truncate(inode); 1779 ext4_truncate_failed_write(inode);
1755 /* 1780 /*
1756 * If truncate failed early the inode might still be 1781 * If truncate failed early the inode might still be
1757 * on the orphan list; we need to make sure the inode 1782 * on the orphan list; we need to make sure the inode
@@ -1793,7 +1818,7 @@ static int ext4_journalled_write_end(struct file *file,
1793 new_i_size = pos + copied; 1818 new_i_size = pos + copied;
1794 if (new_i_size > inode->i_size) 1819 if (new_i_size > inode->i_size)
1795 i_size_write(inode, pos+copied); 1820 i_size_write(inode, pos+copied);
1796 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 1821 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1797 if (new_i_size > EXT4_I(inode)->i_disksize) { 1822 if (new_i_size > EXT4_I(inode)->i_disksize) {
1798 ext4_update_i_disksize(inode, new_i_size); 1823 ext4_update_i_disksize(inode, new_i_size);
1799 ret2 = ext4_mark_inode_dirty(handle, inode); 1824 ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -1814,7 +1839,7 @@ static int ext4_journalled_write_end(struct file *file,
1814 if (!ret) 1839 if (!ret)
1815 ret = ret2; 1840 ret = ret2;
1816 if (pos + len > inode->i_size) { 1841 if (pos + len > inode->i_size) {
1817 ext4_truncate(inode); 1842 ext4_truncate_failed_write(inode);
1818 /* 1843 /*
1819 * If truncate failed early the inode might still be 1844 * If truncate failed early the inode might still be
1820 * on the orphan list; we need to make sure the inode 1845 * on the orphan list; we need to make sure the inode
@@ -1827,11 +1852,16 @@ static int ext4_journalled_write_end(struct file *file,
1827 return ret ? ret : copied; 1852 return ret ? ret : copied;
1828} 1853}
1829 1854
1830static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1855/*
1856 * Reserve a single block located at lblock
1857 */
1858static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1831{ 1859{
1832 int retries = 0; 1860 int retries = 0;
1833 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1861 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1834 unsigned long md_needed, mdblocks, total = 0; 1862 struct ext4_inode_info *ei = EXT4_I(inode);
1863 unsigned long md_needed, md_reserved;
1864 int ret;
1835 1865
1836 /* 1866 /*
1837 * recalculate the amount of metadata blocks to reserve 1867 * recalculate the amount of metadata blocks to reserve
@@ -1839,86 +1869,80 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1839 * worse case is one extent per block 1869 * worse case is one extent per block
1840 */ 1870 */
1841repeat: 1871repeat:
1842 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1872 spin_lock(&ei->i_block_reservation_lock);
1843 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1873 md_reserved = ei->i_reserved_meta_blocks;
1844 mdblocks = ext4_calc_metadata_amount(inode, total); 1874 md_needed = ext4_calc_metadata_amount(inode, lblock);
1845 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1875 trace_ext4_da_reserve_space(inode, md_needed);
1846 1876 spin_unlock(&ei->i_block_reservation_lock);
1847 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1848 total = md_needed + nrblocks;
1849 1877
1850 /* 1878 /*
1851 * Make quota reservation here to prevent quota overflow 1879 * Make quota reservation here to prevent quota overflow
1852 * later. Real quota accounting is done at pages writeout 1880 * later. Real quota accounting is done at pages writeout
1853 * time. 1881 * time.
1854 */ 1882 */
1855 if (vfs_dq_reserve_block(inode, total)) { 1883 ret = dquot_reserve_block(inode, md_needed + 1);
1856 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1884 if (ret)
1857 return -EDQUOT; 1885 return ret;
1858 }
1859 1886
1860 if (ext4_claim_free_blocks(sbi, total)) { 1887 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1861 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1888 dquot_release_reservation_block(inode, md_needed + 1);
1862 vfs_dq_release_reservation_block(inode, total);
1863 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1889 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1864 yield(); 1890 yield();
1865 goto repeat; 1891 goto repeat;
1866 } 1892 }
1867 return -ENOSPC; 1893 return -ENOSPC;
1868 } 1894 }
1869 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1895 spin_lock(&ei->i_block_reservation_lock);
1870 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; 1896 ei->i_reserved_data_blocks++;
1897 ei->i_reserved_meta_blocks += md_needed;
1898 spin_unlock(&ei->i_block_reservation_lock);
1871 1899
1872 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1873 return 0; /* success */ 1900 return 0; /* success */
1874} 1901}
1875 1902
1876static void ext4_da_release_space(struct inode *inode, int to_free) 1903static void ext4_da_release_space(struct inode *inode, int to_free)
1877{ 1904{
1878 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1905 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1879 int total, mdb, mdb_free, release; 1906 struct ext4_inode_info *ei = EXT4_I(inode);
1880 1907
1881 if (!to_free) 1908 if (!to_free)
1882 return; /* Nothing to release, exit */ 1909 return; /* Nothing to release, exit */
1883 1910
1884 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1911 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1885 1912
1886 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1913 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1887 /* 1914 /*
1888 * if there is no reserved blocks, but we try to free some 1915 * if there aren't enough reserved blocks, then the
1889 * then the counter is messed up somewhere. 1916 * counter is messed up somewhere. Since this
1890 * but since this function is called from invalidate 1917 * function is called from invalidate page, it's
1891 * page, it's harmless to return without any action 1918 * harmless to return without any action.
1892 */ 1919 */
1893 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1920 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1894 "blocks for inode %lu, but there is no reserved " 1921 "ino %lu, to_free %d with only %d reserved "
1895 "data blocks\n", to_free, inode->i_ino); 1922 "data blocks\n", inode->i_ino, to_free,
1896 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1923 ei->i_reserved_data_blocks);
1897 return; 1924 WARN_ON(1);
1925 to_free = ei->i_reserved_data_blocks;
1898 } 1926 }
1927 ei->i_reserved_data_blocks -= to_free;
1899 1928
1900 /* recalculate the number of metablocks still need to be reserved */ 1929 if (ei->i_reserved_data_blocks == 0) {
1901 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1930 /*
1902 mdb = ext4_calc_metadata_amount(inode, total); 1931 * We can release all of the reserved metadata blocks
1903 1932 * only when we have written all of the delayed
1904 /* figure out how many metablocks to release */ 1933 * allocation blocks.
1905 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1934 */
1906 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1935 to_free += ei->i_reserved_meta_blocks;
1907 1936 ei->i_reserved_meta_blocks = 0;
1908 release = to_free + mdb_free; 1937 ei->i_da_metadata_calc_len = 0;
1909 1938 }
1910 /* update fs dirty blocks counter for truncate case */
1911 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1912 1939
1913 /* update per-inode reservations */ 1940 /* update fs dirty blocks counter */
1914 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1941 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1915 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1916 1942
1917 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1918 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1919 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1943 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1920 1944
1921 vfs_dq_release_reservation_block(inode, release); 1945 dquot_release_reservation_block(inode, to_free);
1922} 1946}
1923 1947
1924static void ext4_da_page_release_reservation(struct page *page, 1948static void ext4_da_page_release_reservation(struct page *page,
@@ -2095,6 +2119,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
2095 } else if (buffer_mapped(bh)) 2119 } else if (buffer_mapped(bh))
2096 BUG_ON(bh->b_blocknr != pblock); 2120 BUG_ON(bh->b_blocknr != pblock);
2097 2121
2122 if (buffer_uninit(exbh))
2123 set_buffer_uninit(bh);
2098 cur_logical++; 2124 cur_logical++;
2099 pblock++; 2125 pblock++;
2100 } while ((bh = bh->b_this_page) != head); 2126 } while ((bh = bh->b_this_page) != head);
@@ -2137,17 +2163,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2137 break; 2163 break;
2138 for (i = 0; i < nr_pages; i++) { 2164 for (i = 0; i < nr_pages; i++) {
2139 struct page *page = pvec.pages[i]; 2165 struct page *page = pvec.pages[i];
2140 index = page->index; 2166 if (page->index > end)
2141 if (index > end)
2142 break; 2167 break;
2143 index++;
2144
2145 BUG_ON(!PageLocked(page)); 2168 BUG_ON(!PageLocked(page));
2146 BUG_ON(PageWriteback(page)); 2169 BUG_ON(PageWriteback(page));
2147 block_invalidatepage(page, 0); 2170 block_invalidatepage(page, 0);
2148 ClearPageUptodate(page); 2171 ClearPageUptodate(page);
2149 unlock_page(page); 2172 unlock_page(page);
2150 } 2173 }
2174 index = pvec.pages[nr_pages - 1]->index + 1;
2175 pagevec_release(&pvec);
2151 } 2176 }
2152 return; 2177 return;
2153} 2178}
@@ -2223,10 +2248,12 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2223 * variables are updated after the blocks have been allocated. 2248 * variables are updated after the blocks have been allocated.
2224 */ 2249 */
2225 new.b_state = 0; 2250 new.b_state = 0;
2226 get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | 2251 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
2227 EXT4_GET_BLOCKS_DELALLOC_RESERVE); 2252 if (ext4_should_dioread_nolock(mpd->inode))
2253 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
2228 if (mpd->b_state & (1 << BH_Delay)) 2254 if (mpd->b_state & (1 << BH_Delay))
2229 get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; 2255 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2256
2230 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2257 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
2231 &new, get_blocks_flags); 2258 &new, get_blocks_flags);
2232 if (blks < 0) { 2259 if (blks < 0) {
@@ -2524,7 +2551,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2524 * XXX: __block_prepare_write() unmaps passed block, 2551 * XXX: __block_prepare_write() unmaps passed block,
2525 * is it OK? 2552 * is it OK?
2526 */ 2553 */
2527 ret = ext4_da_reserve_space(inode, 1); 2554 ret = ext4_da_reserve_space(inode, iblock);
2528 if (ret) 2555 if (ret)
2529 /* not enough space to reserve */ 2556 /* not enough space to reserve */
2530 return ret; 2557 return ret;
@@ -2600,7 +2627,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
2600} 2627}
2601 2628
2602static int __ext4_journalled_writepage(struct page *page, 2629static int __ext4_journalled_writepage(struct page *page,
2603 struct writeback_control *wbc,
2604 unsigned int len) 2630 unsigned int len)
2605{ 2631{
2606 struct address_space *mapping = page->mapping; 2632 struct address_space *mapping = page->mapping;
@@ -2635,11 +2661,14 @@ static int __ext4_journalled_writepage(struct page *page,
2635 ret = err; 2661 ret = err;
2636 2662
2637 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); 2663 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
2638 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 2664 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
2639out: 2665out:
2640 return ret; 2666 return ret;
2641} 2667}
2642 2668
2669static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
2670static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2671
2643/* 2672/*
2644 * Note that we don't need to start a transaction unless we're journaling data 2673 * Note that we don't need to start a transaction unless we're journaling data
2645 * because we should have holes filled from ext4_page_mkwrite(). We even don't 2674 * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@ -2687,7 +2716,7 @@ static int ext4_writepage(struct page *page,
2687 int ret = 0; 2716 int ret = 0;
2688 loff_t size; 2717 loff_t size;
2689 unsigned int len; 2718 unsigned int len;
2690 struct buffer_head *page_bufs; 2719 struct buffer_head *page_bufs = NULL;
2691 struct inode *inode = page->mapping->host; 2720 struct inode *inode = page->mapping->host;
2692 2721
2693 trace_ext4_writepage(inode, page); 2722 trace_ext4_writepage(inode, page);
@@ -2758,12 +2787,16 @@ static int ext4_writepage(struct page *page,
2758 * doesn't seem much point in redirtying the page here. 2787 * doesn't seem much point in redirtying the page here.
2759 */ 2788 */
2760 ClearPageChecked(page); 2789 ClearPageChecked(page);
2761 return __ext4_journalled_writepage(page, wbc, len); 2790 return __ext4_journalled_writepage(page, len);
2762 } 2791 }
2763 2792
2764 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2793 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2765 ret = nobh_writepage(page, noalloc_get_block_write, wbc); 2794 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2766 else 2795 else if (page_bufs && buffer_uninit(page_bufs)) {
2796 ext4_set_bh_endio(page_bufs, inode);
2797 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2798 wbc, ext4_end_io_buffer_write);
2799 } else
2767 ret = block_write_full_page(page, noalloc_get_block_write, 2800 ret = block_write_full_page(page, noalloc_get_block_write,
2768 wbc); 2801 wbc);
2769 2802
@@ -2788,7 +2821,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2788 * number of contiguous block. So we will limit 2821 * number of contiguous block. So we will limit
2789 * number of contiguous block to a sane value 2822 * number of contiguous block to a sane value
2790 */ 2823 */
2791 if (!(inode->i_flags & EXT4_EXTENTS_FL) && 2824 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
2792 (max_blocks > EXT4_MAX_TRANS_DATA)) 2825 (max_blocks > EXT4_MAX_TRANS_DATA))
2793 max_blocks = EXT4_MAX_TRANS_DATA; 2826 max_blocks = EXT4_MAX_TRANS_DATA;
2794 2827
@@ -2933,7 +2966,7 @@ retry:
2933 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, 2966 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
2934 &mpd); 2967 &mpd);
2935 /* 2968 /*
2936 * If we have a contigous extent of pages and we 2969 * If we have a contiguous extent of pages and we
2937 * haven't done the I/O yet, map the blocks and submit 2970 * haven't done the I/O yet, map the blocks and submit
2938 * them for I/O. 2971 * them for I/O.
2939 */ 2972 */
@@ -2999,8 +3032,7 @@ retry:
2999out_writepages: 3032out_writepages:
3000 if (!no_nrwrite_index_update) 3033 if (!no_nrwrite_index_update)
3001 wbc->no_nrwrite_index_update = 0; 3034 wbc->no_nrwrite_index_update = 0;
3002 if (wbc->nr_to_write > nr_to_writebump) 3035 wbc->nr_to_write -= nr_to_writebump;
3003 wbc->nr_to_write -= nr_to_writebump;
3004 wbc->range_start = range_start; 3036 wbc->range_start = range_start;
3005 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 3037 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
3006 return ret; 3038 return ret;
@@ -3025,11 +3057,18 @@ static int ext4_nonda_switch(struct super_block *sb)
3025 if (2 * free_blocks < 3 * dirty_blocks || 3057 if (2 * free_blocks < 3 * dirty_blocks ||
3026 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3058 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
3027 /* 3059 /*
3028 * free block count is less that 150% of dirty blocks 3060 * free block count is less than 150% of dirty blocks
3029 * or free blocks is less that watermark 3061 * or free blocks is less than watermark
3030 */ 3062 */
3031 return 1; 3063 return 1;
3032 } 3064 }
3065 /*
3066 * Even if we don't switch but are nearing capacity,
3067 * start pushing delalloc when 1/2 of free blocks are dirty.
3068 */
3069 if (free_blocks < 2 * dirty_blocks)
3070 writeback_inodes_sb_if_idle(sb);
3071
3033 return 0; 3072 return 0;
3034} 3073}
3035 3074
@@ -3037,7 +3076,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
3037 loff_t pos, unsigned len, unsigned flags, 3076 loff_t pos, unsigned len, unsigned flags,
3038 struct page **pagep, void **fsdata) 3077 struct page **pagep, void **fsdata)
3039{ 3078{
3040 int ret, retries = 0; 3079 int ret, retries = 0, quota_retries = 0;
3041 struct page *page; 3080 struct page *page;
3042 pgoff_t index; 3081 pgoff_t index;
3043 unsigned from, to; 3082 unsigned from, to;
@@ -3091,11 +3130,27 @@ retry:
3091 * i_size_read because we hold i_mutex. 3130 * i_size_read because we hold i_mutex.
3092 */ 3131 */
3093 if (pos + len > inode->i_size) 3132 if (pos + len > inode->i_size)
3094 ext4_truncate(inode); 3133 ext4_truncate_failed_write(inode);
3095 } 3134 }
3096 3135
3097 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3136 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3098 goto retry; 3137 goto retry;
3138
3139 if ((ret == -EDQUOT) &&
3140 EXT4_I(inode)->i_reserved_meta_blocks &&
3141 (quota_retries++ < 3)) {
3142 /*
3143 * Since we often over-estimate the number of meta
3144 * data blocks required, we may sometimes get a
3145 * spurios out of quota error even though there would
3146 * be enough space once we write the data blocks and
3147 * find out how many meta data blocks were _really_
3148 * required. So try forcing the inode write to see if
3149 * that helps.
3150 */
3151 write_inode_now(inode, (quota_retries == 3));
3152 goto retry;
3153 }
3099out: 3154out:
3100 return ret; 3155 return ret;
3101} 3156}
@@ -3284,7 +3339,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3284 filemap_write_and_wait(mapping); 3339 filemap_write_and_wait(mapping);
3285 } 3340 }
3286 3341
3287 if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { 3342 if (EXT4_JOURNAL(inode) &&
3343 ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
3288 /* 3344 /*
3289 * This is a REALLY heavyweight approach, but the use of 3345 * This is a REALLY heavyweight approach, but the use of
3290 * bmap on dirty files is expected to be extremely rare: 3346 * bmap on dirty files is expected to be extremely rare:
@@ -3303,7 +3359,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3303 * everything they get. 3359 * everything they get.
3304 */ 3360 */
3305 3361
3306 EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; 3362 ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
3307 journal = EXT4_JOURNAL(inode); 3363 journal = EXT4_JOURNAL(inode);
3308 jbd2_journal_lock_updates(journal); 3364 jbd2_journal_lock_updates(journal);
3309 err = jbd2_journal_flush(journal); 3365 err = jbd2_journal_flush(journal);
@@ -3328,11 +3384,45 @@ ext4_readpages(struct file *file, struct address_space *mapping,
3328 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 3384 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
3329} 3385}
3330 3386
3387static void ext4_free_io_end(ext4_io_end_t *io)
3388{
3389 BUG_ON(!io);
3390 if (io->page)
3391 put_page(io->page);
3392 iput(io->inode);
3393 kfree(io);
3394}
3395
3396static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
3397{
3398 struct buffer_head *head, *bh;
3399 unsigned int curr_off = 0;
3400
3401 if (!page_has_buffers(page))
3402 return;
3403 head = bh = page_buffers(page);
3404 do {
3405 if (offset <= curr_off && test_clear_buffer_uninit(bh)
3406 && bh->b_private) {
3407 ext4_free_io_end(bh->b_private);
3408 bh->b_private = NULL;
3409 bh->b_end_io = NULL;
3410 }
3411 curr_off = curr_off + bh->b_size;
3412 bh = bh->b_this_page;
3413 } while (bh != head);
3414}
3415
3331static void ext4_invalidatepage(struct page *page, unsigned long offset) 3416static void ext4_invalidatepage(struct page *page, unsigned long offset)
3332{ 3417{
3333 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3418 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3334 3419
3335 /* 3420 /*
3421 * free any io_end structure allocated for buffers to be discarded
3422 */
3423 if (ext4_should_dioread_nolock(page->mapping->host))
3424 ext4_invalidatepage_free_endio(page, offset);
3425 /*
3336 * If it's a full truncate we just forget about the pending dirtying 3426 * If it's a full truncate we just forget about the pending dirtying
3337 */ 3427 */
3338 if (offset == 0) 3428 if (offset == 0)
@@ -3403,7 +3493,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3403 } 3493 }
3404 3494
3405retry: 3495retry:
3406 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3496 if (rw == READ && ext4_should_dioread_nolock(inode))
3497 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
3498 inode->i_sb->s_bdev, iov,
3499 offset, nr_segs,
3500 ext4_get_block, NULL);
3501 else
3502 ret = blockdev_direct_IO(rw, iocb, inode,
3503 inode->i_sb->s_bdev, iov,
3407 offset, nr_segs, 3504 offset, nr_segs,
3408 ext4_get_block, NULL); 3505 ext4_get_block, NULL);
3409 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3506 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3419,6 +3516,9 @@ retry:
3419 * but cannot extend i_size. Bail out and pretend 3516 * but cannot extend i_size. Bail out and pretend
3420 * the write failed... */ 3517 * the write failed... */
3421 ret = PTR_ERR(handle); 3518 ret = PTR_ERR(handle);
3519 if (inode->i_nlink)
3520 ext4_orphan_del(NULL, inode);
3521
3422 goto out; 3522 goto out;
3423 } 3523 }
3424 if (inode->i_nlink) 3524 if (inode->i_nlink)
@@ -3446,75 +3546,63 @@ out:
3446 return ret; 3546 return ret;
3447} 3547}
3448 3548
3449static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, 3549static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3450 struct buffer_head *bh_result, int create) 3550 struct buffer_head *bh_result, int create)
3451{ 3551{
3452 handle_t *handle = NULL; 3552 handle_t *handle = ext4_journal_current_handle();
3453 int ret = 0; 3553 int ret = 0;
3454 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 3554 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
3455 int dio_credits; 3555 int dio_credits;
3556 int started = 0;
3456 3557
3457 ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", 3558 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
3458 inode->i_ino, create); 3559 inode->i_ino, create);
3459 /* 3560 /*
3460 * DIO VFS code passes create = 0 flag for write to 3561 * ext4_get_block in prepare for a DIO write or buffer write.
3461 * the middle of file. It does this to avoid block 3562 * We allocate an uinitialized extent if blocks haven't been allocated.
3462 * allocation for holes, to prevent expose stale data 3563 * The extent will be converted to initialized after IO complete.
3463 * out when there is parallel buffered read (which does
3464 * not hold the i_mutex lock) while direct IO write has
3465 * not completed. DIO request on holes finally falls back
3466 * to buffered IO for this reason.
3467 *
3468 * For ext4 extent based file, since we support fallocate,
3469 * new allocated extent as uninitialized, for holes, we
3470 * could fallocate blocks for holes, thus parallel
3471 * buffered IO read will zero out the page when read on
3472 * a hole while parallel DIO write to the hole has not completed.
3473 *
3474 * when we come here, we know it's a direct IO write to
3475 * to the middle of file (<i_size)
3476 * so it's safe to override the create flag from VFS.
3477 */ 3564 */
3478 create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; 3565 create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
3479 3566
3480 if (max_blocks > DIO_MAX_BLOCKS) 3567 if (!handle) {
3481 max_blocks = DIO_MAX_BLOCKS; 3568 if (max_blocks > DIO_MAX_BLOCKS)
3482 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); 3569 max_blocks = DIO_MAX_BLOCKS;
3483 handle = ext4_journal_start(inode, dio_credits); 3570 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
3484 if (IS_ERR(handle)) { 3571 handle = ext4_journal_start(inode, dio_credits);
3485 ret = PTR_ERR(handle); 3572 if (IS_ERR(handle)) {
3486 goto out; 3573 ret = PTR_ERR(handle);
3574 goto out;
3575 }
3576 started = 1;
3487 } 3577 }
3578
3488 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, 3579 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
3489 create); 3580 create);
3490 if (ret > 0) { 3581 if (ret > 0) {
3491 bh_result->b_size = (ret << inode->i_blkbits); 3582 bh_result->b_size = (ret << inode->i_blkbits);
3492 ret = 0; 3583 ret = 0;
3493 } 3584 }
3494 ext4_journal_stop(handle); 3585 if (started)
3586 ext4_journal_stop(handle);
3495out: 3587out:
3496 return ret; 3588 return ret;
3497} 3589}
3498 3590
3499static void ext4_free_io_end(ext4_io_end_t *io) 3591static void dump_completed_IO(struct inode * inode)
3500{
3501 BUG_ON(!io);
3502 iput(io->inode);
3503 kfree(io);
3504}
3505static void dump_aio_dio_list(struct inode * inode)
3506{ 3592{
3507#ifdef EXT4_DEBUG 3593#ifdef EXT4_DEBUG
3508 struct list_head *cur, *before, *after; 3594 struct list_head *cur, *before, *after;
3509 ext4_io_end_t *io, *io0, *io1; 3595 ext4_io_end_t *io, *io0, *io1;
3596 unsigned long flags;
3510 3597
3511 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3598 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3512 ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); 3599 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3513 return; 3600 return;
3514 } 3601 }
3515 3602
3516 ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); 3603 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3517 list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ 3604 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3605 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3518 cur = &io->list; 3606 cur = &io->list;
3519 before = cur->prev; 3607 before = cur->prev;
3520 io0 = container_of(before, ext4_io_end_t, list); 3608 io0 = container_of(before, ext4_io_end_t, list);
@@ -3524,32 +3612,31 @@ static void dump_aio_dio_list(struct inode * inode)
3524 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", 3612 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
3525 io, inode->i_ino, io0, io1); 3613 io, inode->i_ino, io0, io1);
3526 } 3614 }
3615 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3527#endif 3616#endif
3528} 3617}
3529 3618
3530/* 3619/*
3531 * check a range of space and convert unwritten extents to written. 3620 * check a range of space and convert unwritten extents to written.
3532 */ 3621 */
3533static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) 3622static int ext4_end_io_nolock(ext4_io_end_t *io)
3534{ 3623{
3535 struct inode *inode = io->inode; 3624 struct inode *inode = io->inode;
3536 loff_t offset = io->offset; 3625 loff_t offset = io->offset;
3537 size_t size = io->size; 3626 ssize_t size = io->size;
3538 int ret = 0; 3627 int ret = 0;
3539 3628
3540 ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," 3629 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3541 "list->prev 0x%p\n", 3630 "list->prev 0x%p\n",
3542 io, inode->i_ino, io->list.next, io->list.prev); 3631 io, inode->i_ino, io->list.next, io->list.prev);
3543 3632
3544 if (list_empty(&io->list)) 3633 if (list_empty(&io->list))
3545 return ret; 3634 return ret;
3546 3635
3547 if (io->flag != DIO_AIO_UNWRITTEN) 3636 if (io->flag != EXT4_IO_UNWRITTEN)
3548 return ret; 3637 return ret;
3549 3638
3550 if (offset + size <= i_size_read(inode)) 3639 ret = ext4_convert_unwritten_extents(inode, offset, size);
3551 ret = ext4_convert_unwritten_extents(inode, offset, size);
3552
3553 if (ret < 0) { 3640 if (ret < 0) {
3554 printk(KERN_EMERG "%s: failed to convert unwritten" 3641 printk(KERN_EMERG "%s: failed to convert unwritten"
3555 "extents to written extents, error is %d" 3642 "extents to written extents, error is %d"
@@ -3562,50 +3649,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
3562 io->flag = 0; 3649 io->flag = 0;
3563 return ret; 3650 return ret;
3564} 3651}
3652
3565/* 3653/*
3566 * work on completed aio dio IO, to convert unwritten extents to extents 3654 * work on completed aio dio IO, to convert unwritten extents to extents
3567 */ 3655 */
3568static void ext4_end_aio_dio_work(struct work_struct *work) 3656static void ext4_end_io_work(struct work_struct *work)
3569{ 3657{
3570 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 3658 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3571 struct inode *inode = io->inode; 3659 struct inode *inode = io->inode;
3572 int ret = 0; 3660 struct ext4_inode_info *ei = EXT4_I(inode);
3661 unsigned long flags;
3662 int ret;
3573 3663
3574 mutex_lock(&inode->i_mutex); 3664 mutex_lock(&inode->i_mutex);
3575 ret = ext4_end_aio_dio_nolock(io); 3665 ret = ext4_end_io_nolock(io);
3576 if (ret >= 0) { 3666 if (ret < 0) {
3577 if (!list_empty(&io->list)) 3667 mutex_unlock(&inode->i_mutex);
3578 list_del_init(&io->list); 3668 return;
3579 ext4_free_io_end(io);
3580 } 3669 }
3670
3671 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3672 if (!list_empty(&io->list))
3673 list_del_init(&io->list);
3674 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3581 mutex_unlock(&inode->i_mutex); 3675 mutex_unlock(&inode->i_mutex);
3676 ext4_free_io_end(io);
3582} 3677}
3678
3583/* 3679/*
3584 * This function is called from ext4_sync_file(). 3680 * This function is called from ext4_sync_file().
3585 * 3681 *
3586 * When AIO DIO IO is completed, the work to convert unwritten 3682 * When IO is completed, the work to convert unwritten extents to
3587 * extents to written is queued on workqueue but may not get immediately 3683 * written is queued on workqueue but may not get immediately
3588 * scheduled. When fsync is called, we need to ensure the 3684 * scheduled. When fsync is called, we need to ensure the
3589 * conversion is complete before fsync returns. 3685 * conversion is complete before fsync returns.
3590 * The inode keeps track of a list of completed AIO from DIO path 3686 * The inode keeps track of a list of pending/completed IO that
3591 * that might needs to do the conversion. This function walks through 3687 * might needs to do the conversion. This function walks through
3592 * the list and convert the related unwritten extents to written. 3688 * the list and convert the related unwritten extents for completed IO
3689 * to written.
3690 * The function return the number of pending IOs on success.
3593 */ 3691 */
3594int flush_aio_dio_completed_IO(struct inode *inode) 3692int flush_completed_IO(struct inode *inode)
3595{ 3693{
3596 ext4_io_end_t *io; 3694 ext4_io_end_t *io;
3695 struct ext4_inode_info *ei = EXT4_I(inode);
3696 unsigned long flags;
3597 int ret = 0; 3697 int ret = 0;
3598 int ret2 = 0; 3698 int ret2 = 0;
3599 3699
3600 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) 3700 if (list_empty(&ei->i_completed_io_list))
3601 return ret; 3701 return ret;
3602 3702
3603 dump_aio_dio_list(inode); 3703 dump_completed_IO(inode);
3604 while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3704 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3605 io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, 3705 while (!list_empty(&ei->i_completed_io_list)){
3706 io = list_entry(ei->i_completed_io_list.next,
3606 ext4_io_end_t, list); 3707 ext4_io_end_t, list);
3607 /* 3708 /*
3608 * Calling ext4_end_aio_dio_nolock() to convert completed 3709 * Calling ext4_end_io_nolock() to convert completed
3609 * IO to written. 3710 * IO to written.
3610 * 3711 *
3611 * When ext4_sync_file() is called, run_queue() may already 3712 * When ext4_sync_file() is called, run_queue() may already
@@ -3618,20 +3719,23 @@ int flush_aio_dio_completed_IO(struct inode *inode)
3618 * avoid double converting from both fsync and background work 3719 * avoid double converting from both fsync and background work
3619 * queue work. 3720 * queue work.
3620 */ 3721 */
3621 ret = ext4_end_aio_dio_nolock(io); 3722 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3723 ret = ext4_end_io_nolock(io);
3724 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3622 if (ret < 0) 3725 if (ret < 0)
3623 ret2 = ret; 3726 ret2 = ret;
3624 else 3727 else
3625 list_del_init(&io->list); 3728 list_del_init(&io->list);
3626 } 3729 }
3730 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3627 return (ret2 < 0) ? ret2 : 0; 3731 return (ret2 < 0) ? ret2 : 0;
3628} 3732}
3629 3733
3630static ext4_io_end_t *ext4_init_io_end (struct inode *inode) 3734static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3631{ 3735{
3632 ext4_io_end_t *io = NULL; 3736 ext4_io_end_t *io = NULL;
3633 3737
3634 io = kmalloc(sizeof(*io), GFP_NOFS); 3738 io = kmalloc(sizeof(*io), flags);
3635 3739
3636 if (io) { 3740 if (io) {
3637 igrab(inode); 3741 igrab(inode);
@@ -3639,8 +3743,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
3639 io->flag = 0; 3743 io->flag = 0;
3640 io->offset = 0; 3744 io->offset = 0;
3641 io->size = 0; 3745 io->size = 0;
3642 io->error = 0; 3746 io->page = NULL;
3643 INIT_WORK(&io->work, ext4_end_aio_dio_work); 3747 INIT_WORK(&io->work, ext4_end_io_work);
3644 INIT_LIST_HEAD(&io->list); 3748 INIT_LIST_HEAD(&io->list);
3645 } 3749 }
3646 3750
@@ -3652,6 +3756,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3652{ 3756{
3653 ext4_io_end_t *io_end = iocb->private; 3757 ext4_io_end_t *io_end = iocb->private;
3654 struct workqueue_struct *wq; 3758 struct workqueue_struct *wq;
3759 unsigned long flags;
3760 struct ext4_inode_info *ei;
3655 3761
3656 /* if not async direct IO or dio with 0 bytes write, just return */ 3762 /* if not async direct IO or dio with 0 bytes write, just return */
3657 if (!io_end || !size) 3763 if (!io_end || !size)
@@ -3663,7 +3769,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3663 size); 3769 size);
3664 3770
3665 /* if not aio dio with unwritten extents, just free io and return */ 3771 /* if not aio dio with unwritten extents, just free io and return */
3666 if (io_end->flag != DIO_AIO_UNWRITTEN){ 3772 if (io_end->flag != EXT4_IO_UNWRITTEN){
3667 ext4_free_io_end(io_end); 3773 ext4_free_io_end(io_end);
3668 iocb->private = NULL; 3774 iocb->private = NULL;
3669 return; 3775 return;
@@ -3671,16 +3777,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3671 3777
3672 io_end->offset = offset; 3778 io_end->offset = offset;
3673 io_end->size = size; 3779 io_end->size = size;
3780 io_end->flag = EXT4_IO_UNWRITTEN;
3674 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3781 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3675 3782
3676 /* queue the work to convert unwritten extents to written */ 3783 /* queue the work to convert unwritten extents to written */
3677 queue_work(wq, &io_end->work); 3784 queue_work(wq, &io_end->work);
3678 3785
3679 /* Add the io_end to per-inode completed aio dio list*/ 3786 /* Add the io_end to per-inode completed aio dio list*/
3680 list_add_tail(&io_end->list, 3787 ei = EXT4_I(io_end->inode);
3681 &EXT4_I(io_end->inode)->i_aio_dio_complete_list); 3788 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3789 list_add_tail(&io_end->list, &ei->i_completed_io_list);
3790 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3682 iocb->private = NULL; 3791 iocb->private = NULL;
3683} 3792}
3793
3794static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3795{
3796 ext4_io_end_t *io_end = bh->b_private;
3797 struct workqueue_struct *wq;
3798 struct inode *inode;
3799 unsigned long flags;
3800
3801 if (!test_clear_buffer_uninit(bh) || !io_end)
3802 goto out;
3803
3804 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
3805 printk("sb umounted, discard end_io request for inode %lu\n",
3806 io_end->inode->i_ino);
3807 ext4_free_io_end(io_end);
3808 goto out;
3809 }
3810
3811 io_end->flag = EXT4_IO_UNWRITTEN;
3812 inode = io_end->inode;
3813
3814 /* Add the io_end to per-inode completed io list*/
3815 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3816 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
3817 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3818
3819 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
3820 /* queue the work to convert unwritten extents to written */
3821 queue_work(wq, &io_end->work);
3822out:
3823 bh->b_private = NULL;
3824 bh->b_end_io = NULL;
3825 clear_buffer_uninit(bh);
3826 end_buffer_async_write(bh, uptodate);
3827}
3828
3829static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3830{
3831 ext4_io_end_t *io_end;
3832 struct page *page = bh->b_page;
3833 loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
3834 size_t size = bh->b_size;
3835
3836retry:
3837 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3838 if (!io_end) {
3839 if (printk_ratelimit())
3840 printk(KERN_WARNING "%s: allocation fail\n", __func__);
3841 schedule();
3842 goto retry;
3843 }
3844 io_end->offset = offset;
3845 io_end->size = size;
3846 /*
3847 * We need to hold a reference to the page to make sure it
3848 * doesn't get evicted before ext4_end_io_work() has a chance
3849 * to convert the extent from written to unwritten.
3850 */
3851 io_end->page = page;
3852 get_page(io_end->page);
3853
3854 bh->b_private = io_end;
3855 bh->b_end_io = ext4_end_io_buffer_write;
3856 return 0;
3857}
3858
3684/* 3859/*
3685 * For ext4 extent files, ext4 will do direct-io write to holes, 3860 * For ext4 extent files, ext4 will do direct-io write to holes,
3686 * preallocated extents, and those write extend the file, no need to 3861 * preallocated extents, and those write extend the file, no need to
@@ -3734,7 +3909,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3734 iocb->private = NULL; 3909 iocb->private = NULL;
3735 EXT4_I(inode)->cur_aio_dio = NULL; 3910 EXT4_I(inode)->cur_aio_dio = NULL;
3736 if (!is_sync_kiocb(iocb)) { 3911 if (!is_sync_kiocb(iocb)) {
3737 iocb->private = ext4_init_io_end(inode); 3912 iocb->private = ext4_init_io_end(inode, GFP_NOFS);
3738 if (!iocb->private) 3913 if (!iocb->private)
3739 return -ENOMEM; 3914 return -ENOMEM;
3740 /* 3915 /*
@@ -3750,7 +3925,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3750 ret = blockdev_direct_IO(rw, iocb, inode, 3925 ret = blockdev_direct_IO(rw, iocb, inode,
3751 inode->i_sb->s_bdev, iov, 3926 inode->i_sb->s_bdev, iov,
3752 offset, nr_segs, 3927 offset, nr_segs,
3753 ext4_get_block_dio_write, 3928 ext4_get_block_write,
3754 ext4_end_io_dio); 3929 ext4_end_io_dio);
3755 if (iocb->private) 3930 if (iocb->private)
3756 EXT4_I(inode)->cur_aio_dio = NULL; 3931 EXT4_I(inode)->cur_aio_dio = NULL;
@@ -3771,8 +3946,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3771 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3946 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3772 ext4_free_io_end(iocb->private); 3947 ext4_free_io_end(iocb->private);
3773 iocb->private = NULL; 3948 iocb->private = NULL;
3774 } else if (ret > 0 && (EXT4_I(inode)->i_state & 3949 } else if (ret > 0 && ext4_test_inode_state(inode,
3775 EXT4_STATE_DIO_UNWRITTEN)) { 3950 EXT4_STATE_DIO_UNWRITTEN)) {
3776 int err; 3951 int err;
3777 /* 3952 /*
3778 * for non AIO case, since the IO is already 3953 * for non AIO case, since the IO is already
@@ -3782,7 +3957,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3782 offset, ret); 3957 offset, ret);
3783 if (err < 0) 3958 if (err < 0)
3784 ret = err; 3959 ret = err;
3785 EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; 3960 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3786 } 3961 }
3787 return ret; 3962 return ret;
3788 } 3963 }
@@ -4064,7 +4239,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
4064 int k, err; 4239 int k, err;
4065 4240
4066 *top = 0; 4241 *top = 0;
4067 /* Make k index the deepest non-null offest + 1 */ 4242 /* Make k index the deepest non-null offset + 1 */
4068 for (k = depth; k > 1 && !offsets[k-1]; k--) 4243 for (k = depth; k > 1 && !offsets[k-1]; k--)
4069 ; 4244 ;
4070 partial = ext4_get_branch(inode, k, offsets, chain, &err); 4245 partial = ext4_get_branch(inode, k, offsets, chain, &err);
@@ -4113,13 +4288,27 @@ no_top:
4113 * We release `count' blocks on disk, but (last - first) may be greater 4288 * We release `count' blocks on disk, but (last - first) may be greater
4114 * than `count' because there can be holes in there. 4289 * than `count' because there can be holes in there.
4115 */ 4290 */
4116static void ext4_clear_blocks(handle_t *handle, struct inode *inode, 4291static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4117 struct buffer_head *bh, 4292 struct buffer_head *bh,
4118 ext4_fsblk_t block_to_free, 4293 ext4_fsblk_t block_to_free,
4119 unsigned long count, __le32 *first, 4294 unsigned long count, __le32 *first,
4120 __le32 *last) 4295 __le32 *last)
4121{ 4296{
4122 __le32 *p; 4297 __le32 *p;
4298 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
4299
4300 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4301 flags |= EXT4_FREE_BLOCKS_METADATA;
4302
4303 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
4304 count)) {
4305 ext4_error(inode->i_sb, "inode #%lu: "
4306 "attempt to clear blocks %llu len %lu, invalid",
4307 inode->i_ino, (unsigned long long) block_to_free,
4308 count);
4309 return 1;
4310 }
4311
4123 if (try_to_extend_transaction(handle, inode)) { 4312 if (try_to_extend_transaction(handle, inode)) {
4124 if (bh) { 4313 if (bh) {
4125 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4314 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4134,27 +4323,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4134 } 4323 }
4135 } 4324 }
4136 4325
4137 /* 4326 for (p = first; p < last; p++)
4138 * Any buffers which are on the journal will be in memory. We 4327 *p = 0;
4139 * find them on the hash table so jbd2_journal_revoke() will
4140 * run jbd2_journal_forget() on them. We've already detached
4141 * each block from the file, so bforget() in
4142 * jbd2_journal_forget() should be safe.
4143 *
4144 * AKPM: turn on bforget in jbd2_journal_forget()!!!
4145 */
4146 for (p = first; p < last; p++) {
4147 u32 nr = le32_to_cpu(*p);
4148 if (nr) {
4149 struct buffer_head *tbh;
4150
4151 *p = 0;
4152 tbh = sb_find_get_block(inode->i_sb, nr);
4153 ext4_forget(handle, 0, inode, tbh, nr);
4154 }
4155 }
4156 4328
4157 ext4_free_blocks(handle, inode, block_to_free, count, 0); 4329 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
4330 return 0;
4158} 4331}
4159 4332
4160/** 4333/**
@@ -4210,9 +4383,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4210 } else if (nr == block_to_free + count) { 4383 } else if (nr == block_to_free + count) {
4211 count++; 4384 count++;
4212 } else { 4385 } else {
4213 ext4_clear_blocks(handle, inode, this_bh, 4386 if (ext4_clear_blocks(handle, inode, this_bh,
4214 block_to_free, 4387 block_to_free, count,
4215 count, block_to_free_p, p); 4388 block_to_free_p, p))
4389 break;
4216 block_to_free = nr; 4390 block_to_free = nr;
4217 block_to_free_p = p; 4391 block_to_free_p = p;
4218 count = 1; 4392 count = 1;
@@ -4236,7 +4410,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4236 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) 4410 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
4237 ext4_handle_dirty_metadata(handle, inode, this_bh); 4411 ext4_handle_dirty_metadata(handle, inode, this_bh);
4238 else 4412 else
4239 ext4_error(inode->i_sb, __func__, 4413 ext4_error(inode->i_sb,
4240 "circular indirect block detected, " 4414 "circular indirect block detected, "
4241 "inode=%lu, block=%llu", 4415 "inode=%lu, block=%llu",
4242 inode->i_ino, 4416 inode->i_ino,
@@ -4276,6 +4450,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4276 if (!nr) 4450 if (!nr)
4277 continue; /* A hole */ 4451 continue; /* A hole */
4278 4452
4453 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
4454 nr, 1)) {
4455 ext4_error(inode->i_sb,
4456 "indirect mapped block in inode "
4457 "#%lu invalid (level %d, blk #%lu)",
4458 inode->i_ino, depth,
4459 (unsigned long) nr);
4460 break;
4461 }
4462
4279 /* Go read the buffer for the next level down */ 4463 /* Go read the buffer for the next level down */
4280 bh = sb_bread(inode->i_sb, nr); 4464 bh = sb_bread(inode->i_sb, nr);
4281 4465
@@ -4284,7 +4468,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4284 * (should be rare). 4468 * (should be rare).
4285 */ 4469 */
4286 if (!bh) { 4470 if (!bh) {
4287 ext4_error(inode->i_sb, "ext4_free_branches", 4471 ext4_error(inode->i_sb,
4288 "Read failure, inode=%lu, block=%llu", 4472 "Read failure, inode=%lu, block=%llu",
4289 inode->i_ino, nr); 4473 inode->i_ino, nr);
4290 continue; 4474 continue;
@@ -4342,7 +4526,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4342 blocks_for_truncate(inode)); 4526 blocks_for_truncate(inode));
4343 } 4527 }
4344 4528
4345 ext4_free_blocks(handle, inode, nr, 1, 1); 4529 ext4_free_blocks(handle, inode, 0, nr, 1,
4530 EXT4_FREE_BLOCKS_METADATA);
4346 4531
4347 if (parent_bh) { 4532 if (parent_bh) {
4348 /* 4533 /*
@@ -4427,8 +4612,10 @@ void ext4_truncate(struct inode *inode)
4427 if (!ext4_can_truncate(inode)) 4612 if (!ext4_can_truncate(inode))
4428 return; 4613 return;
4429 4614
4615 EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
4616
4430 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 4617 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
4431 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 4618 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
4432 4619
4433 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 4620 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
4434 ext4_ext_truncate(inode); 4621 ext4_ext_truncate(inode);
@@ -4598,9 +4785,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
4598 4785
4599 bh = sb_getblk(sb, block); 4786 bh = sb_getblk(sb, block);
4600 if (!bh) { 4787 if (!bh) {
4601 ext4_error(sb, "ext4_get_inode_loc", "unable to read " 4788 ext4_error(sb, "unable to read inode block - "
4602 "inode block - inode=%lu, block=%llu", 4789 "inode=%lu, block=%llu", inode->i_ino, block);
4603 inode->i_ino, block);
4604 return -EIO; 4790 return -EIO;
4605 } 4791 }
4606 if (!buffer_uptodate(bh)) { 4792 if (!buffer_uptodate(bh)) {
@@ -4698,9 +4884,8 @@ make_io:
4698 submit_bh(READ_META, bh); 4884 submit_bh(READ_META, bh);
4699 wait_on_buffer(bh); 4885 wait_on_buffer(bh);
4700 if (!buffer_uptodate(bh)) { 4886 if (!buffer_uptodate(bh)) {
4701 ext4_error(sb, __func__, 4887 ext4_error(sb, "unable to read inode block - inode=%lu,"
4702 "unable to read inode block - inode=%lu, " 4888 " block=%llu", inode->i_ino, block);
4703 "block=%llu", inode->i_ino, block);
4704 brelse(bh); 4889 brelse(bh);
4705 return -EIO; 4890 return -EIO;
4706 } 4891 }
@@ -4714,7 +4899,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
4714{ 4899{
4715 /* We have all inode data except xattrs in memory here. */ 4900 /* We have all inode data except xattrs in memory here. */
4716 return __ext4_get_inode_loc(inode, iloc, 4901 return __ext4_get_inode_loc(inode, iloc,
4717 !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); 4902 !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
4718} 4903}
4719 4904
4720void ext4_set_inode_flags(struct inode *inode) 4905void ext4_set_inode_flags(struct inode *inode)
@@ -4781,8 +4966,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4781 struct ext4_iloc iloc; 4966 struct ext4_iloc iloc;
4782 struct ext4_inode *raw_inode; 4967 struct ext4_inode *raw_inode;
4783 struct ext4_inode_info *ei; 4968 struct ext4_inode_info *ei;
4784 struct buffer_head *bh;
4785 struct inode *inode; 4969 struct inode *inode;
4970 journal_t *journal = EXT4_SB(sb)->s_journal;
4786 long ret; 4971 long ret;
4787 int block; 4972 int block;
4788 4973
@@ -4793,11 +4978,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4793 return inode; 4978 return inode;
4794 4979
4795 ei = EXT4_I(inode); 4980 ei = EXT4_I(inode);
4981 iloc.bh = 0;
4796 4982
4797 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4983 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4798 if (ret < 0) 4984 if (ret < 0)
4799 goto bad_inode; 4985 goto bad_inode;
4800 bh = iloc.bh;
4801 raw_inode = ext4_raw_inode(&iloc); 4986 raw_inode = ext4_raw_inode(&iloc);
4802 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 4987 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4803 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 4988 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@@ -4808,7 +4993,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4808 } 4993 }
4809 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 4994 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
4810 4995
4811 ei->i_state = 0; 4996 ei->i_state_flags = 0;
4812 ei->i_dir_start_lookup = 0; 4997 ei->i_dir_start_lookup = 0;
4813 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 4998 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
4814 /* We now have enough fields to check if the inode was active or not. 4999 /* We now have enough fields to check if the inode was active or not.
@@ -4820,7 +5005,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4820 if (inode->i_mode == 0 || 5005 if (inode->i_mode == 0 ||
4821 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { 5006 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
4822 /* this inode is deleted */ 5007 /* this inode is deleted */
4823 brelse(bh);
4824 ret = -ESTALE; 5008 ret = -ESTALE;
4825 goto bad_inode; 5009 goto bad_inode;
4826 } 5010 }
@@ -4837,6 +5021,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4837 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 5021 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4838 inode->i_size = ext4_isize(raw_inode); 5022 inode->i_size = ext4_isize(raw_inode);
4839 ei->i_disksize = inode->i_size; 5023 ei->i_disksize = inode->i_size;
5024#ifdef CONFIG_QUOTA
5025 ei->i_reserved_quota = 0;
5026#endif
4840 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 5027 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4841 ei->i_block_group = iloc.block_group; 5028 ei->i_block_group = iloc.block_group;
4842 ei->i_last_alloc_group = ~0; 5029 ei->i_last_alloc_group = ~0;
@@ -4848,11 +5035,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4848 ei->i_data[block] = raw_inode->i_block[block]; 5035 ei->i_data[block] = raw_inode->i_block[block];
4849 INIT_LIST_HEAD(&ei->i_orphan); 5036 INIT_LIST_HEAD(&ei->i_orphan);
4850 5037
5038 /*
5039 * Set transaction id's of transactions that have to be committed
5040 * to finish f[data]sync. We set them to currently running transaction
5041 * as we cannot be sure that the inode or some of its metadata isn't
5042 * part of the transaction - the inode could have been reclaimed and
5043 * now it is reread from disk.
5044 */
5045 if (journal) {
5046 transaction_t *transaction;
5047 tid_t tid;
5048
5049 spin_lock(&journal->j_state_lock);
5050 if (journal->j_running_transaction)
5051 transaction = journal->j_running_transaction;
5052 else
5053 transaction = journal->j_committing_transaction;
5054 if (transaction)
5055 tid = transaction->t_tid;
5056 else
5057 tid = journal->j_commit_sequence;
5058 spin_unlock(&journal->j_state_lock);
5059 ei->i_sync_tid = tid;
5060 ei->i_datasync_tid = tid;
5061 }
5062
4851 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 5063 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4852 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 5064 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
4853 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 5065 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
4854 EXT4_INODE_SIZE(inode->i_sb)) { 5066 EXT4_INODE_SIZE(inode->i_sb)) {
4855 brelse(bh);
4856 ret = -EIO; 5067 ret = -EIO;
4857 goto bad_inode; 5068 goto bad_inode;
4858 } 5069 }
@@ -4865,7 +5076,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4865 EXT4_GOOD_OLD_INODE_SIZE + 5076 EXT4_GOOD_OLD_INODE_SIZE +
4866 ei->i_extra_isize; 5077 ei->i_extra_isize;
4867 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 5078 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
4868 ei->i_state |= EXT4_STATE_XATTR; 5079 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
4869 } 5080 }
4870 } else 5081 } else
4871 ei->i_extra_isize = 0; 5082 ei->i_extra_isize = 0;
@@ -4884,12 +5095,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4884 5095
4885 ret = 0; 5096 ret = 0;
4886 if (ei->i_file_acl && 5097 if (ei->i_file_acl &&
4887 ((ei->i_file_acl < 5098 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4888 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + 5099 ext4_error(sb, "bad extended attribute block %llu inode #%lu",
4889 EXT4_SB(sb)->s_gdb_count)) ||
4890 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
4891 ext4_error(sb, __func__,
4892 "bad extended attribute block %llu in inode #%lu",
4893 ei->i_file_acl, inode->i_ino); 5100 ei->i_file_acl, inode->i_ino);
4894 ret = -EIO; 5101 ret = -EIO;
4895 goto bad_inode; 5102 goto bad_inode;
@@ -4905,10 +5112,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4905 /* Validate block references which are part of inode */ 5112 /* Validate block references which are part of inode */
4906 ret = ext4_check_inode_blockref(inode); 5113 ret = ext4_check_inode_blockref(inode);
4907 } 5114 }
4908 if (ret) { 5115 if (ret)
4909 brelse(bh);
4910 goto bad_inode; 5116 goto bad_inode;
4911 }
4912 5117
4913 if (S_ISREG(inode->i_mode)) { 5118 if (S_ISREG(inode->i_mode)) {
4914 inode->i_op = &ext4_file_inode_operations; 5119 inode->i_op = &ext4_file_inode_operations;
@@ -4936,10 +5141,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4936 init_special_inode(inode, inode->i_mode, 5141 init_special_inode(inode, inode->i_mode,
4937 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 5142 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4938 } else { 5143 } else {
4939 brelse(bh);
4940 ret = -EIO; 5144 ret = -EIO;
4941 ext4_error(inode->i_sb, __func__, 5145 ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
4942 "bogus i_mode (%o) for inode=%lu",
4943 inode->i_mode, inode->i_ino); 5146 inode->i_mode, inode->i_ino);
4944 goto bad_inode; 5147 goto bad_inode;
4945 } 5148 }
@@ -4949,6 +5152,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4949 return inode; 5152 return inode;
4950 5153
4951bad_inode: 5154bad_inode:
5155 brelse(iloc.bh);
4952 iget_failed(inode); 5156 iget_failed(inode);
4953 return ERR_PTR(ret); 5157 return ERR_PTR(ret);
4954} 5158}
@@ -5010,7 +5214,7 @@ static int ext4_do_update_inode(handle_t *handle,
5010 5214
5011 /* For fields not not tracking in the in-memory inode, 5215 /* For fields not not tracking in the in-memory inode,
5012 * initialise them to zero for new inodes. */ 5216 * initialise them to zero for new inodes. */
5013 if (ei->i_state & EXT4_STATE_NEW) 5217 if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
5014 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 5218 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
5015 5219
5016 ext4_get_inode_flags(ei); 5220 ext4_get_inode_flags(ei);
@@ -5074,7 +5278,7 @@ static int ext4_do_update_inode(handle_t *handle,
5074 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 5278 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
5075 sb->s_dirt = 1; 5279 sb->s_dirt = 1;
5076 ext4_handle_sync(handle); 5280 ext4_handle_sync(handle);
5077 err = ext4_handle_dirty_metadata(handle, inode, 5281 err = ext4_handle_dirty_metadata(handle, NULL,
5078 EXT4_SB(sb)->s_sbh); 5282 EXT4_SB(sb)->s_sbh);
5079 } 5283 }
5080 } 5284 }
@@ -5103,11 +5307,12 @@ static int ext4_do_update_inode(handle_t *handle,
5103 } 5307 }
5104 5308
5105 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 5309 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
5106 rc = ext4_handle_dirty_metadata(handle, inode, bh); 5310 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
5107 if (!err) 5311 if (!err)
5108 err = rc; 5312 err = rc;
5109 ei->i_state &= ~EXT4_STATE_NEW; 5313 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
5110 5314
5315 ext4_update_inode_fsync_trans(handle, inode, 0);
5111out_brelse: 5316out_brelse:
5112 brelse(bh); 5317 brelse(bh);
5113 ext4_std_error(inode->i_sb, err); 5318 ext4_std_error(inode->i_sb, err);
@@ -5149,7 +5354,7 @@ out_brelse:
5149 * `stuff()' is running, and the new i_size will be lost. Plus the inode 5354 * `stuff()' is running, and the new i_size will be lost. Plus the inode
5150 * will no longer be on the superblock's dirty inode list. 5355 * will no longer be on the superblock's dirty inode list.
5151 */ 5356 */
5152int ext4_write_inode(struct inode *inode, int wait) 5357int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5153{ 5358{
5154 int err; 5359 int err;
5155 5360
@@ -5163,7 +5368,7 @@ int ext4_write_inode(struct inode *inode, int wait)
5163 return -EIO; 5368 return -EIO;
5164 } 5369 }
5165 5370
5166 if (!wait) 5371 if (wbc->sync_mode != WB_SYNC_ALL)
5167 return 0; 5372 return 0;
5168 5373
5169 err = ext4_force_commit(inode->i_sb); 5374 err = ext4_force_commit(inode->i_sb);
@@ -5173,13 +5378,11 @@ int ext4_write_inode(struct inode *inode, int wait)
5173 err = ext4_get_inode_loc(inode, &iloc); 5378 err = ext4_get_inode_loc(inode, &iloc);
5174 if (err) 5379 if (err)
5175 return err; 5380 return err;
5176 if (wait) 5381 if (wbc->sync_mode == WB_SYNC_ALL)
5177 sync_dirty_buffer(iloc.bh); 5382 sync_dirty_buffer(iloc.bh);
5178 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 5383 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
5179 ext4_error(inode->i_sb, __func__, 5384 ext4_error(inode->i_sb, "IO error syncing inode, "
5180 "IO error syncing inode, " 5385 "inode=%lu, block=%llu", inode->i_ino,
5181 "inode=%lu, block=%llu",
5182 inode->i_ino,
5183 (unsigned long long)iloc.bh->b_blocknr); 5386 (unsigned long long)iloc.bh->b_blocknr);
5184 err = -EIO; 5387 err = -EIO;
5185 } 5388 }
@@ -5221,19 +5424,21 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5221 if (error) 5424 if (error)
5222 return error; 5425 return error;
5223 5426
5427 if (ia_valid & ATTR_SIZE)
5428 dquot_initialize(inode);
5224 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 5429 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
5225 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 5430 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
5226 handle_t *handle; 5431 handle_t *handle;
5227 5432
5228 /* (user+group)*(old+new) structure, inode write (sb, 5433 /* (user+group)*(old+new) structure, inode write (sb,
5229 * inode block, ? - but truncate inode update has it) */ 5434 * inode block, ? - but truncate inode update has it) */
5230 handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ 5435 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
5231 EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); 5436 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
5232 if (IS_ERR(handle)) { 5437 if (IS_ERR(handle)) {
5233 error = PTR_ERR(handle); 5438 error = PTR_ERR(handle);
5234 goto err_out; 5439 goto err_out;
5235 } 5440 }
5236 error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; 5441 error = dquot_transfer(inode, attr);
5237 if (error) { 5442 if (error) {
5238 ext4_journal_stop(handle); 5443 ext4_journal_stop(handle);
5239 return error; 5444 return error;
@@ -5260,7 +5465,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5260 } 5465 }
5261 5466
5262 if (S_ISREG(inode->i_mode) && 5467 if (S_ISREG(inode->i_mode) &&
5263 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 5468 attr->ia_valid & ATTR_SIZE &&
5469 (attr->ia_size < inode->i_size ||
5470 (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
5264 handle_t *handle; 5471 handle_t *handle;
5265 5472
5266 handle = ext4_journal_start(inode, 3); 5473 handle = ext4_journal_start(inode, 3);
@@ -5291,6 +5498,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5291 goto err_out; 5498 goto err_out;
5292 } 5499 }
5293 } 5500 }
5501 /* ext4_truncate will clear the flag */
5502 if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
5503 ext4_truncate(inode);
5294 } 5504 }
5295 5505
5296 rc = inode_setattr(inode, attr); 5506 rc = inode_setattr(inode, attr);
@@ -5376,7 +5586,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5376 * worse case, the indexs blocks spread over different block groups 5586 * worse case, the indexs blocks spread over different block groups
5377 * 5587 *
5378 * If datablocks are discontiguous, they are possible to spread over 5588 * If datablocks are discontiguous, they are possible to spread over
5379 * different block groups too. If they are contiugous, with flexbg, 5589 * different block groups too. If they are contiuguous, with flexbg,
5380 * they could still across block group boundary. 5590 * they could still across block group boundary.
5381 * 5591 *
5382 * Also account for superblock, inode, quota and xattr blocks 5592 * Also account for superblock, inode, quota and xattr blocks
@@ -5452,7 +5662,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
5452 * Calculate the journal credits for a chunk of data modification. 5662 * Calculate the journal credits for a chunk of data modification.
5453 * 5663 *
5454 * This is called from DIO, fallocate or whoever calling 5664 * This is called from DIO, fallocate or whoever calling
5455 * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. 5665 * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
5456 * 5666 *
5457 * journal buffers for data blocks are not included here, as DIO 5667 * journal buffers for data blocks are not included here, as DIO
5458 * and fallocate do no need to journal data buffers. 5668 * and fallocate do no need to journal data buffers.
@@ -5529,8 +5739,8 @@ static int ext4_expand_extra_isize(struct inode *inode,
5529 entry = IFIRST(header); 5739 entry = IFIRST(header);
5530 5740
5531 /* No extended attributes present */ 5741 /* No extended attributes present */
5532 if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || 5742 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
5533 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 5743 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
5534 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, 5744 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
5535 new_extra_isize); 5745 new_extra_isize);
5536 EXT4_I(inode)->i_extra_isize = new_extra_isize; 5746 EXT4_I(inode)->i_extra_isize = new_extra_isize;
@@ -5574,7 +5784,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5574 err = ext4_reserve_inode_write(handle, inode, &iloc); 5784 err = ext4_reserve_inode_write(handle, inode, &iloc);
5575 if (ext4_handle_valid(handle) && 5785 if (ext4_handle_valid(handle) &&
5576 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 5786 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
5577 !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { 5787 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
5578 /* 5788 /*
5579 * We need extra buffer credits since we may write into EA block 5789 * We need extra buffer credits since we may write into EA block
5580 * with this same handle. If journal_extend fails, then it will 5790 * with this same handle. If journal_extend fails, then it will
@@ -5588,10 +5798,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5588 sbi->s_want_extra_isize, 5798 sbi->s_want_extra_isize,
5589 iloc, handle); 5799 iloc, handle);
5590 if (ret) { 5800 if (ret) {
5591 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 5801 ext4_set_inode_state(inode,
5802 EXT4_STATE_NO_EXPAND);
5592 if (mnt_count != 5803 if (mnt_count !=
5593 le16_to_cpu(sbi->s_es->s_mnt_count)) { 5804 le16_to_cpu(sbi->s_es->s_mnt_count)) {
5594 ext4_warning(inode->i_sb, __func__, 5805 ext4_warning(inode->i_sb,
5595 "Unable to expand inode %lu. Delete" 5806 "Unable to expand inode %lu. Delete"
5596 " some EAs or run e2fsck.", 5807 " some EAs or run e2fsck.",
5597 inode->i_ino); 5808 inode->i_ino);
@@ -5613,7 +5824,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5613 * i_size has been changed by generic_commit_write() and we thus need 5824 * i_size has been changed by generic_commit_write() and we thus need
5614 * to include the updated inode in the current transaction. 5825 * to include the updated inode in the current transaction.
5615 * 5826 *
5616 * Also, vfs_dq_alloc_block() will always dirty the inode when blocks 5827 * Also, dquot_alloc_block() will always dirty the inode when blocks
5617 * are allocated to the file. 5828 * are allocated to the file.
5618 * 5829 *
5619 * If the inode is marked synchronous, we don't honour that here - doing 5830 * If the inode is marked synchronous, we don't honour that here - doing
@@ -5655,7 +5866,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
5655 err = jbd2_journal_get_write_access(handle, iloc.bh); 5866 err = jbd2_journal_get_write_access(handle, iloc.bh);
5656 if (!err) 5867 if (!err)
5657 err = ext4_handle_dirty_metadata(handle, 5868 err = ext4_handle_dirty_metadata(handle,
5658 inode, 5869 NULL,
5659 iloc.bh); 5870 iloc.bh);
5660 brelse(iloc.bh); 5871 brelse(iloc.bh);
5661 } 5872 }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c1cdf613e725..016d0249294f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -92,6 +92,15 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
92 flags &= ~EXT4_EXTENTS_FL; 92 flags &= ~EXT4_EXTENTS_FL;
93 } 93 }
94 94
95 if (flags & EXT4_EOFBLOCKS_FL) {
96 /* we don't support adding EOFBLOCKS flag */
97 if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
98 err = -EOPNOTSUPP;
99 goto flags_out;
100 }
101 } else if (oldflags & EXT4_EOFBLOCKS_FL)
102 ext4_truncate(inode);
103
95 handle = ext4_journal_start(inode, 1); 104 handle = ext4_journal_start(inode, 1);
96 if (IS_ERR(handle)) { 105 if (IS_ERR(handle)) {
97 err = PTR_ERR(handle); 106 err = PTR_ERR(handle);
@@ -221,31 +230,39 @@ setversion_out:
221 struct file *donor_filp; 230 struct file *donor_filp;
222 int err; 231 int err;
223 232
233 if (!(filp->f_mode & FMODE_READ) ||
234 !(filp->f_mode & FMODE_WRITE))
235 return -EBADF;
236
224 if (copy_from_user(&me, 237 if (copy_from_user(&me,
225 (struct move_extent __user *)arg, sizeof(me))) 238 (struct move_extent __user *)arg, sizeof(me)))
226 return -EFAULT; 239 return -EFAULT;
240 me.moved_len = 0;
227 241
228 donor_filp = fget(me.donor_fd); 242 donor_filp = fget(me.donor_fd);
229 if (!donor_filp) 243 if (!donor_filp)
230 return -EBADF; 244 return -EBADF;
231 245
232 if (!capable(CAP_DAC_OVERRIDE)) { 246 if (!(donor_filp->f_mode & FMODE_WRITE)) {
233 if ((current->real_cred->fsuid != inode->i_uid) || 247 err = -EBADF;
234 !(inode->i_mode & S_IRUSR) || 248 goto mext_out;
235 !(donor_filp->f_dentry->d_inode->i_mode &
236 S_IRUSR)) {
237 fput(donor_filp);
238 return -EACCES;
239 }
240 } 249 }
241 250
251 err = mnt_want_write(filp->f_path.mnt);
252 if (err)
253 goto mext_out;
254
242 err = ext4_move_extents(filp, donor_filp, me.orig_start, 255 err = ext4_move_extents(filp, donor_filp, me.orig_start,
243 me.donor_start, me.len, &me.moved_len); 256 me.donor_start, me.len, &me.moved_len);
244 fput(donor_filp); 257 mnt_drop_write(filp->f_path.mnt);
245 258 if (me.moved_len > 0)
246 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) 259 file_remove_suid(donor_filp);
247 return -EFAULT;
248 260
261 if (copy_to_user((struct move_extent __user *)arg,
262 &me, sizeof(me)))
263 err = -EFAULT;
264mext_out:
265 fput(donor_filp);
249 return err; 266 return err;
250 } 267 }
251 268
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bba12824defa..bde9d0b170c2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -23,6 +23,7 @@
23 23
24#include "mballoc.h" 24#include "mballoc.h"
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/slab.h>
26#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
27 28
28/* 29/*
@@ -69,7 +70,7 @@
69 * 70 *
70 * pa_lstart -> the logical start block for this prealloc space 71 * pa_lstart -> the logical start block for this prealloc space
71 * pa_pstart -> the physical start block for this prealloc space 72 * pa_pstart -> the physical start block for this prealloc space
72 * pa_len -> lenght for this prealloc space 73 * pa_len -> length for this prealloc space
73 * pa_free -> free space available in this prealloc space 74 * pa_free -> free space available in this prealloc space
74 * 75 *
75 * The inode preallocation space is used looking at the _logical_ start 76 * The inode preallocation space is used looking at the _logical_ start
@@ -142,7 +143,7 @@
142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 143 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
143 * value of s_mb_order2_reqs can be tuned via 144 * value of s_mb_order2_reqs can be tuned via
144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to 145 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
145 * stripe size (sbi->s_stripe), we try to search for contigous block in 146 * stripe size (sbi->s_stripe), we try to search for contiguous block in
146 * stripe size. This should result in better allocation on RAID setups. If 147 * stripe size. This should result in better allocation on RAID setups. If
147 * not, we search in the specific group using bitmap for best extents. The 148 * not, we search in the specific group using bitmap for best extents. The
148 * tunable min_to_scan and max_to_scan control the behaviour here. 149 * tunable min_to_scan and max_to_scan control the behaviour here.
@@ -441,10 +442,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
441 for (i = 0; i < count; i++) { 442 for (i = 0; i < count; i++) {
442 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { 443 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
443 ext4_fsblk_t blocknr; 444 ext4_fsblk_t blocknr;
444 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); 445
446 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
445 blocknr += first + i; 447 blocknr += first + i;
446 blocknr +=
447 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
448 ext4_grp_locked_error(sb, e4b->bd_group, 448 ext4_grp_locked_error(sb, e4b->bd_group,
449 __func__, "double-free of inode" 449 __func__, "double-free of inode"
450 " %lu's block %llu(bit %u in group %u)", 450 " %lu's block %llu(bit %u in group %u)",
@@ -1255,10 +1255,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1255 1255
1256 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { 1256 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
1257 ext4_fsblk_t blocknr; 1257 ext4_fsblk_t blocknr;
1258 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); 1258
1259 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1259 blocknr += block; 1260 blocknr += block;
1260 blocknr +=
1261 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1262 ext4_grp_locked_error(sb, e4b->bd_group, 1261 ext4_grp_locked_error(sb, e4b->bd_group,
1263 __func__, "double-free of inode" 1262 __func__, "double-free of inode"
1264 " %lu's block %llu(bit %u in group %u)", 1263 " %lu's block %llu(bit %u in group %u)",
@@ -1631,7 +1630,6 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1631 int max; 1630 int max;
1632 int err; 1631 int err;
1633 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 1632 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1634 struct ext4_super_block *es = sbi->s_es;
1635 struct ext4_free_extent ex; 1633 struct ext4_free_extent ex;
1636 1634
1637 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) 1635 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
@@ -1648,8 +1646,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1648 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { 1646 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1649 ext4_fsblk_t start; 1647 ext4_fsblk_t start;
1650 1648
1651 start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + 1649 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1652 ex.fe_start + le32_to_cpu(es->s_first_data_block); 1650 ex.fe_start;
1653 /* use do_div to get remainder (would be 64-bit modulo) */ 1651 /* use do_div to get remainder (would be 64-bit modulo) */
1654 if (do_div(start, sbi->s_stripe) == 0) { 1652 if (do_div(start, sbi->s_stripe) == 0) {
1655 ac->ac_found++; 1653 ac->ac_found++;
@@ -1803,8 +1801,8 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1803 BUG_ON(sbi->s_stripe == 0); 1801 BUG_ON(sbi->s_stripe == 0);
1804 1802
1805 /* find first stripe-aligned block in group */ 1803 /* find first stripe-aligned block in group */
1806 first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) 1804 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1807 + le32_to_cpu(sbi->s_es->s_first_data_block); 1805
1808 a = first_group_block + sbi->s_stripe - 1; 1806 a = first_group_block + sbi->s_stripe - 1;
1809 do_div(a, sbi->s_stripe); 1807 do_div(a, sbi->s_stripe);
1810 i = (a * sbi->s_stripe) - first_group_block; 1808 i = (a * sbi->s_stripe) - first_group_block;
@@ -2256,7 +2254,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2256 2254
2257 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2255 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2258 init_rwsem(&meta_group_info[i]->alloc_sem); 2256 init_rwsem(&meta_group_info[i]->alloc_sem);
2259 meta_group_info[i]->bb_free_root.rb_node = NULL; 2257 meta_group_info[i]->bb_free_root = RB_ROOT;
2260 2258
2261#ifdef DOUBLE_CHECK 2259#ifdef DOUBLE_CHECK
2262 { 2260 {
@@ -2529,7 +2527,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2529 struct ext4_group_info *db; 2527 struct ext4_group_info *db;
2530 int err, count = 0, count2 = 0; 2528 int err, count = 0, count2 = 0;
2531 struct ext4_free_data *entry; 2529 struct ext4_free_data *entry;
2532 ext4_fsblk_t discard_block;
2533 struct list_head *l, *ltmp; 2530 struct list_head *l, *ltmp;
2534 2531
2535 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2532 list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@ -2559,13 +2556,16 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2559 page_cache_release(e4b.bd_bitmap_page); 2556 page_cache_release(e4b.bd_bitmap_page);
2560 } 2557 }
2561 ext4_unlock_group(sb, entry->group); 2558 ext4_unlock_group(sb, entry->group);
2562 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2559 if (test_opt(sb, DISCARD)) {
2563 + entry->start_blk 2560 ext4_fsblk_t discard_block;
2564 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2561
2565 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, 2562 discard_block = entry->start_blk +
2566 entry->count); 2563 ext4_group_first_block_no(sb, entry->group);
2567 sb_issue_discard(sb, discard_block, entry->count); 2564 trace_ext4_discard_blocks(sb,
2568 2565 (unsigned long long)discard_block,
2566 entry->count);
2567 sb_issue_discard(sb, discard_block, entry->count);
2568 }
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2569 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2570 ext4_mb_release_desc(&e4b);
2571 } 2571 }
@@ -2698,14 +2698,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2698 if (err) 2698 if (err)
2699 goto out_err; 2699 goto out_err;
2700 2700
2701 block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) 2701 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2702 + ac->ac_b_ex.fe_start
2703 + le32_to_cpu(es->s_first_data_block);
2704 2702
2705 len = ac->ac_b_ex.fe_len; 2703 len = ac->ac_b_ex.fe_len;
2706 if (!ext4_data_block_valid(sbi, block, len)) { 2704 if (!ext4_data_block_valid(sbi, block, len)) {
2707 ext4_error(sb, __func__, 2705 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2708 "Allocating blocks %llu-%llu which overlap "
2709 "fs metadata\n", block, block+len); 2706 "fs metadata\n", block, block+len);
2710 /* File system mounted not to panic on error 2707 /* File system mounted not to panic on error
2711 * Fix the bitmap and repeat the block allocation 2708 * Fix the bitmap and repeat the block allocation
@@ -2750,12 +2747,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2750 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2747 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2751 /* release all the reserved blocks if non delalloc */ 2748 /* release all the reserved blocks if non delalloc */
2752 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2749 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2753 else {
2754 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2755 ac->ac_b_ex.fe_len);
2756 /* convert reserved quota blocks to real quota blocks */
2757 vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
2758 }
2759 2750
2760 if (sbi->s_log_groups_per_flex) { 2751 if (sbi->s_log_groups_per_flex) {
2761 ext4_group_t flex_group = ext4_flex_group(sbi, 2752 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3006,6 +2997,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3006} 2997}
3007 2998
3008/* 2999/*
3000 * Called on failure; free up any blocks from the inode PA for this
3001 * context. We don't need this for MB_GROUP_PA because we only change
3002 * pa_free in ext4_mb_release_context(), but on failure, we've already
3003 * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
3004 */
3005static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3006{
3007 struct ext4_prealloc_space *pa = ac->ac_pa;
3008 int len;
3009
3010 if (pa && pa->pa_type == MB_INODE_PA) {
3011 len = ac->ac_b_ex.fe_len;
3012 pa->pa_free += len;
3013 }
3014
3015}
3016
3017/*
3009 * use blocks preallocated to inode 3018 * use blocks preallocated to inode
3010 */ 3019 */
3011static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3020static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
@@ -3144,9 +3153,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3144 /* The max size of hash table is PREALLOC_TB_SIZE */ 3153 /* The max size of hash table is PREALLOC_TB_SIZE */
3145 order = PREALLOC_TB_SIZE - 1; 3154 order = PREALLOC_TB_SIZE - 1;
3146 3155
3147 goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + 3156 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3148 ac->ac_g_ex.fe_start +
3149 le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
3150 /* 3157 /*
3151 * search for the prealloc space that is having 3158 * search for the prealloc space that is having
3152 * minimal distance from the goal block. 3159 * minimal distance from the goal block.
@@ -3509,8 +3516,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3509 if (bit >= end) 3516 if (bit >= end)
3510 break; 3517 break;
3511 next = mb_find_next_bit(bitmap_bh->b_data, end, bit); 3518 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3512 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + 3519 start = ext4_group_first_block_no(sb, group) + bit;
3513 le32_to_cpu(sbi->s_es->s_first_data_block);
3514 mb_debug(1, " free preallocated %u/%u in group %u\n", 3520 mb_debug(1, " free preallocated %u/%u in group %u\n",
3515 (unsigned) start, (unsigned) next - bit, 3521 (unsigned) start, (unsigned) next - bit,
3516 (unsigned) group); 3522 (unsigned) group);
@@ -3606,15 +3612,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3606 3612
3607 bitmap_bh = ext4_read_block_bitmap(sb, group); 3613 bitmap_bh = ext4_read_block_bitmap(sb, group);
3608 if (bitmap_bh == NULL) { 3614 if (bitmap_bh == NULL) {
3609 ext4_error(sb, __func__, "Error in reading block " 3615 ext4_error(sb, "Error reading block bitmap for %u", group);
3610 "bitmap for %u", group);
3611 return 0; 3616 return 0;
3612 } 3617 }
3613 3618
3614 err = ext4_mb_load_buddy(sb, group, &e4b); 3619 err = ext4_mb_load_buddy(sb, group, &e4b);
3615 if (err) { 3620 if (err) {
3616 ext4_error(sb, __func__, "Error in loading buddy " 3621 ext4_error(sb, "Error loading buddy information for %u", group);
3617 "information for %u", group);
3618 put_bh(bitmap_bh); 3622 put_bh(bitmap_bh);
3619 return 0; 3623 return 0;
3620 } 3624 }
@@ -3787,15 +3791,15 @@ repeat:
3787 3791
3788 err = ext4_mb_load_buddy(sb, group, &e4b); 3792 err = ext4_mb_load_buddy(sb, group, &e4b);
3789 if (err) { 3793 if (err) {
3790 ext4_error(sb, __func__, "Error in loading buddy " 3794 ext4_error(sb, "Error loading buddy information for %u",
3791 "information for %u", group); 3795 group);
3792 continue; 3796 continue;
3793 } 3797 }
3794 3798
3795 bitmap_bh = ext4_read_block_bitmap(sb, group); 3799 bitmap_bh = ext4_read_block_bitmap(sb, group);
3796 if (bitmap_bh == NULL) { 3800 if (bitmap_bh == NULL) {
3797 ext4_error(sb, __func__, "Error in reading block " 3801 ext4_error(sb, "Error reading block bitmap for %u",
3798 "bitmap for %u", group); 3802 group);
3799 ext4_mb_release_desc(&e4b); 3803 ext4_mb_release_desc(&e4b);
3800 continue; 3804 continue;
3801 } 3805 }
@@ -3921,7 +3925,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3921 3925
3922 /* don't use group allocation for large files */ 3926 /* don't use group allocation for large files */
3923 size = max(size, isize); 3927 size = max(size, isize);
3924 if (size >= sbi->s_mb_stream_request) { 3928 if (size > sbi->s_mb_stream_request) {
3925 ac->ac_flags |= EXT4_MB_STREAM_ALLOC; 3929 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3926 return; 3930 return;
3927 } 3931 }
@@ -3932,7 +3936,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3932 * per cpu locality group is to reduce the contention between block 3936 * per cpu locality group is to reduce the contention between block
3933 * request from multiple CPUs. 3937 * request from multiple CPUs.
3934 */ 3938 */
3935 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); 3939 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
3936 3940
3937 /* we're going to use group allocation */ 3941 /* we're going to use group allocation */
3938 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; 3942 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4060,8 +4064,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4060 4064
4061 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 4065 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4062 if (ext4_mb_load_buddy(sb, group, &e4b)) { 4066 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4063 ext4_error(sb, __func__, "Error in loading buddy " 4067 ext4_error(sb, "Error loading buddy information for %u",
4064 "information for %u", group); 4068 group);
4065 continue; 4069 continue;
4066 } 4070 }
4067 ext4_lock_group(sb, group); 4071 ext4_lock_group(sb, group);
@@ -4237,7 +4241,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4237 return 0; 4241 return 0;
4238 } 4242 }
4239 reserv_blks = ar->len; 4243 reserv_blks = ar->len;
4240 while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) { 4244 while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
4241 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4245 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4242 ar->len--; 4246 ar->len--;
4243 } 4247 }
@@ -4290,6 +4294,7 @@ repeat:
4290 ac->ac_status = AC_STATUS_CONTINUE; 4294 ac->ac_status = AC_STATUS_CONTINUE;
4291 goto repeat; 4295 goto repeat;
4292 } else if (*errp) { 4296 } else if (*errp) {
4297 ext4_discard_allocated_blocks(ac);
4293 ac->ac_b_ex.fe_len = 0; 4298 ac->ac_b_ex.fe_len = 0;
4294 ar->len = 0; 4299 ar->len = 0;
4295 ext4_mb_show_ac(ac); 4300 ext4_mb_show_ac(ac);
@@ -4313,7 +4318,7 @@ out2:
4313 kmem_cache_free(ext4_ac_cachep, ac); 4318 kmem_cache_free(ext4_ac_cachep, ac);
4314out1: 4319out1:
4315 if (inquota && ar->len < inquota) 4320 if (inquota && ar->len < inquota)
4316 vfs_dq_free_block(ar->inode, inquota - ar->len); 4321 dquot_free_block(ar->inode, inquota - ar->len);
4317out3: 4322out3:
4318 if (!ar->len) { 4323 if (!ar->len) {
4319 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4324 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
@@ -4422,18 +4427,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 return 0; 4427 return 0;
4423} 4428}
4424 4429
4425/* 4430/**
4426 * Main entry point into mballoc to free blocks 4431 * ext4_free_blocks() -- Free given blocks and update quota
4432 * @handle: handle for this transaction
4433 * @inode: inode
4434 * @block: start physical block to free
4435 * @count: number of blocks to count
4436 * @metadata: Are these metadata blocks
4427 */ 4437 */
4428void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4438void ext4_free_blocks(handle_t *handle, struct inode *inode,
4429 ext4_fsblk_t block, unsigned long count, 4439 struct buffer_head *bh, ext4_fsblk_t block,
4430 int metadata, unsigned long *freed) 4440 unsigned long count, int flags)
4431{ 4441{
4432 struct buffer_head *bitmap_bh = NULL; 4442 struct buffer_head *bitmap_bh = NULL;
4433 struct super_block *sb = inode->i_sb; 4443 struct super_block *sb = inode->i_sb;
4434 struct ext4_allocation_context *ac = NULL; 4444 struct ext4_allocation_context *ac = NULL;
4435 struct ext4_group_desc *gdp; 4445 struct ext4_group_desc *gdp;
4436 struct ext4_super_block *es; 4446 struct ext4_super_block *es;
4447 unsigned long freed = 0;
4437 unsigned int overflow; 4448 unsigned int overflow;
4438 ext4_grpblk_t bit; 4449 ext4_grpblk_t bit;
4439 struct buffer_head *gd_bh; 4450 struct buffer_head *gd_bh;
@@ -4443,21 +4454,49 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4443 int err = 0; 4454 int err = 0;
4444 int ret; 4455 int ret;
4445 4456
4446 *freed = 0; 4457 if (bh) {
4458 if (block)
4459 BUG_ON(block != bh->b_blocknr);
4460 else
4461 block = bh->b_blocknr;
4462 }
4447 4463
4448 sbi = EXT4_SB(sb); 4464 sbi = EXT4_SB(sb);
4449 es = EXT4_SB(sb)->s_es; 4465 es = EXT4_SB(sb)->s_es;
4450 if (block < le32_to_cpu(es->s_first_data_block) || 4466 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4451 block + count < block || 4467 !ext4_data_block_valid(sbi, block, count)) {
4452 block + count > ext4_blocks_count(es)) { 4468 ext4_error(sb, "Freeing blocks not in datazone - "
4453 ext4_error(sb, __func__, 4469 "block = %llu, count = %lu", block, count);
4454 "Freeing blocks not in datazone - "
4455 "block = %llu, count = %lu", block, count);
4456 goto error_return; 4470 goto error_return;
4457 } 4471 }
4458 4472
4459 ext4_debug("freeing block %llu\n", block); 4473 ext4_debug("freeing block %llu\n", block);
4460 trace_ext4_free_blocks(inode, block, count, metadata); 4474 trace_ext4_free_blocks(inode, block, count, flags);
4475
4476 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4477 struct buffer_head *tbh = bh;
4478 int i;
4479
4480 BUG_ON(bh && (count > 1));
4481
4482 for (i = 0; i < count; i++) {
4483 if (!bh)
4484 tbh = sb_find_get_block(inode->i_sb,
4485 block + i);
4486 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4487 inode, tbh, block + i);
4488 }
4489 }
4490
4491 /*
4492 * We need to make sure we don't reuse the freed block until
4493 * after the transaction is committed, which we can do by
4494 * treating the block as metadata, below. We make an
4495 * exception if the inode is to be written in writeback mode
4496 * since writeback mode has weak data consistency guarantees.
4497 */
4498 if (!ext4_should_writeback_data(inode))
4499 flags |= EXT4_FREE_BLOCKS_METADATA;
4461 4500
4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4501 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4463 if (ac) { 4502 if (ac) {
@@ -4495,8 +4534,7 @@ do_more:
4495 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4534 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4496 EXT4_SB(sb)->s_itb_per_group)) { 4535 EXT4_SB(sb)->s_itb_per_group)) {
4497 4536
4498 ext4_error(sb, __func__, 4537 ext4_error(sb, "Freeing blocks in system zone - "
4499 "Freeing blocks in system zone - "
4500 "Block = %llu, count = %lu", block, count); 4538 "Block = %llu, count = %lu", block, count);
4501 /* err = 0. ext4_std_error should be a no op */ 4539 /* err = 0. ext4_std_error should be a no op */
4502 goto error_return; 4540 goto error_return;
@@ -4533,7 +4571,8 @@ do_more:
4533 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4571 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4534 if (err) 4572 if (err)
4535 goto error_return; 4573 goto error_return;
4536 if (metadata && ext4_handle_valid(handle)) { 4574
4575 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4537 struct ext4_free_data *new_entry; 4576 struct ext4_free_data *new_entry;
4538 /* 4577 /*
4539 * blocks being freed are metadata. these blocks shouldn't 4578 * blocks being freed are metadata. these blocks shouldn't
@@ -4572,7 +4611,7 @@ do_more:
4572 4611
4573 ext4_mb_release_desc(&e4b); 4612 ext4_mb_release_desc(&e4b);
4574 4613
4575 *freed += count; 4614 freed += count;
4576 4615
4577 /* We dirtied the bitmap block */ 4616 /* We dirtied the bitmap block */
4578 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4617 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4592,6 +4631,8 @@ do_more:
4592 } 4631 }
4593 sb->s_dirt = 1; 4632 sb->s_dirt = 1;
4594error_return: 4633error_return:
4634 if (freed)
4635 dquot_free_block(inode, freed);
4595 brelse(bitmap_bh); 4636 brelse(bitmap_bh);
4596 ext4_std_error(sb, err); 4637 ext4_std_error(sb, err);
4597 if (ac) 4638 if (ac)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..b619322c76f0 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
@@ -221,16 +220,9 @@ struct ext4_buddy {
221#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) 220#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
222#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) 221#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
223 222
224#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
225
226static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 223static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
227 struct ext4_free_extent *fex) 224 struct ext4_free_extent *fex)
228{ 225{
229 ext4_fsblk_t block; 226 return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start;
230
231 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
232 + fex->fe_start
233 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
234 return block;
235} 227}
236#endif 228#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a93d5b80f3e2..34dcfc52ef44 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,6 +13,7 @@
13 */ 13 */
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/slab.h>
16#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
17#include "ext4_extents.h" 18#include "ext4_extents.h"
18 19
@@ -238,7 +239,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
238 * So allocate a credit of 3. We may update 239 * So allocate a credit of 3. We may update
239 * quota (user and group). 240 * quota (user and group).
240 */ 241 */
241 needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 242 needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
242 243
243 if (ext4_journal_extend(handle, needed) != 0) 244 if (ext4_journal_extend(handle, needed) != 0)
244 retval = ext4_journal_restart(handle, needed); 245 retval = ext4_journal_restart(handle, needed);
@@ -262,13 +263,17 @@ static int free_dind_blocks(handle_t *handle,
262 for (i = 0; i < max_entries; i++) { 263 for (i = 0; i < max_entries; i++) {
263 if (tmp_idata[i]) { 264 if (tmp_idata[i]) {
264 extend_credit_for_blkdel(handle, inode); 265 extend_credit_for_blkdel(handle, inode);
265 ext4_free_blocks(handle, inode, 266 ext4_free_blocks(handle, inode, 0,
266 le32_to_cpu(tmp_idata[i]), 1, 1); 267 le32_to_cpu(tmp_idata[i]), 1,
268 EXT4_FREE_BLOCKS_METADATA |
269 EXT4_FREE_BLOCKS_FORGET);
267 } 270 }
268 } 271 }
269 put_bh(bh); 272 put_bh(bh);
270 extend_credit_for_blkdel(handle, inode); 273 extend_credit_for_blkdel(handle, inode);
271 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 274 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
275 EXT4_FREE_BLOCKS_METADATA |
276 EXT4_FREE_BLOCKS_FORGET);
272 return 0; 277 return 0;
273} 278}
274 279
@@ -297,7 +302,9 @@ static int free_tind_blocks(handle_t *handle,
297 } 302 }
298 put_bh(bh); 303 put_bh(bh);
299 extend_credit_for_blkdel(handle, inode); 304 extend_credit_for_blkdel(handle, inode);
300 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 305 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
306 EXT4_FREE_BLOCKS_METADATA |
307 EXT4_FREE_BLOCKS_FORGET);
301 return 0; 308 return 0;
302} 309}
303 310
@@ -308,8 +315,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
308 /* ei->i_data[EXT4_IND_BLOCK] */ 315 /* ei->i_data[EXT4_IND_BLOCK] */
309 if (i_data[0]) { 316 if (i_data[0]) {
310 extend_credit_for_blkdel(handle, inode); 317 extend_credit_for_blkdel(handle, inode);
311 ext4_free_blocks(handle, inode, 318 ext4_free_blocks(handle, inode, 0,
312 le32_to_cpu(i_data[0]), 1, 1); 319 le32_to_cpu(i_data[0]), 1,
320 EXT4_FREE_BLOCKS_METADATA |
321 EXT4_FREE_BLOCKS_FORGET);
313 } 322 }
314 323
315 /* ei->i_data[EXT4_DIND_BLOCK] */ 324 /* ei->i_data[EXT4_DIND_BLOCK] */
@@ -357,12 +366,12 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
357 * happened after we started the migrate. We need to 366 * happened after we started the migrate. We need to
358 * fail the migrate 367 * fail the migrate
359 */ 368 */
360 if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { 369 if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
361 retval = -EAGAIN; 370 retval = -EAGAIN;
362 up_write(&EXT4_I(inode)->i_data_sem); 371 up_write(&EXT4_I(inode)->i_data_sem);
363 goto err_out; 372 goto err_out;
364 } else 373 } else
365 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 374 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
366 /* 375 /*
367 * We have the extent map build with the tmp inode. 376 * We have the extent map build with the tmp inode.
368 * Now copy the i_data across 377 * Now copy the i_data across
@@ -419,7 +428,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
419 } 428 }
420 put_bh(bh); 429 put_bh(bh);
421 extend_credit_for_blkdel(handle, inode); 430 extend_credit_for_blkdel(handle, inode);
422 ext4_free_blocks(handle, inode, block, 1, 1); 431 ext4_free_blocks(handle, inode, 0, block, 1,
432 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
423 return retval; 433 return retval;
424} 434}
425 435
@@ -477,7 +487,7 @@ int ext4_ext_migrate(struct inode *inode)
477 handle = ext4_journal_start(inode, 487 handle = ext4_journal_start(inode,
478 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 488 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
479 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 489 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
480 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) 490 EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
481 + 1); 491 + 1);
482 if (IS_ERR(handle)) { 492 if (IS_ERR(handle)) {
483 retval = PTR_ERR(handle); 493 retval = PTR_ERR(handle);
@@ -494,14 +504,10 @@ int ext4_ext_migrate(struct inode *inode)
494 } 504 }
495 i_size_write(tmp_inode, i_size_read(inode)); 505 i_size_write(tmp_inode, i_size_read(inode));
496 /* 506 /*
497 * We don't want the inode to be reclaimed 507 * Set the i_nlink to zero so it will be deleted later
498 * if we got interrupted in between. We have 508 * when we drop inode reference.
499 * this tmp inode carrying reference to the
500 * data blocks of the original file. We set
501 * the i_nlink to zero at the last stage after
502 * switching the original file to extent format
503 */ 509 */
504 tmp_inode->i_nlink = 1; 510 tmp_inode->i_nlink = 0;
505 511
506 ext4_ext_tree_init(handle, tmp_inode); 512 ext4_ext_tree_init(handle, tmp_inode);
507 ext4_orphan_add(handle, tmp_inode); 513 ext4_orphan_add(handle, tmp_inode);
@@ -524,10 +530,20 @@ int ext4_ext_migrate(struct inode *inode)
524 * allocation. 530 * allocation.
525 */ 531 */
526 down_read((&EXT4_I(inode)->i_data_sem)); 532 down_read((&EXT4_I(inode)->i_data_sem));
527 EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; 533 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
528 up_read((&EXT4_I(inode)->i_data_sem)); 534 up_read((&EXT4_I(inode)->i_data_sem));
529 535
530 handle = ext4_journal_start(inode, 1); 536 handle = ext4_journal_start(inode, 1);
537 if (IS_ERR(handle)) {
538 /*
539 * It is impossible to update on-disk structures without
540 * a handle, so just rollback in-core changes and live other
541 * work to orphan_list_cleanup()
542 */
543 ext4_orphan_del(NULL, tmp_inode);
544 retval = PTR_ERR(handle);
545 goto out;
546 }
531 547
532 ei = EXT4_I(inode); 548 ei = EXT4_I(inode);
533 i_data = ei->i_data; 549 i_data = ei->i_data;
@@ -609,15 +625,8 @@ err_out:
609 625
610 /* Reset the extent details */ 626 /* Reset the extent details */
611 ext4_ext_tree_init(handle, tmp_inode); 627 ext4_ext_tree_init(handle, tmp_inode);
612
613 /*
614 * Set the i_nlink to zero so that
615 * generic_drop_inode really deletes the
616 * inode
617 */
618 tmp_inode->i_nlink = 0;
619
620 ext4_journal_stop(handle); 628 ext4_journal_stop(handle);
629out:
621 unlock_new_inode(tmp_inode); 630 unlock_new_inode(tmp_inode);
622 iput(tmp_inode); 631 iput(tmp_inode);
623 632
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 25b6b1457360..d1fc662cc311 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/slab.h>
18#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
19#include "ext4_extents.h" 20#include "ext4_extents.h"
20#include "ext4.h" 21#include "ext4.h"
@@ -77,12 +78,14 @@ static int
77mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 78mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
78 struct ext4_extent **extent) 79 struct ext4_extent **extent)
79{ 80{
81 struct ext4_extent_header *eh;
80 int ppos, leaf_ppos = path->p_depth; 82 int ppos, leaf_ppos = path->p_depth;
81 83
82 ppos = leaf_ppos; 84 ppos = leaf_ppos;
83 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 85 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
84 /* leaf block */ 86 /* leaf block */
85 *extent = ++path[ppos].p_ext; 87 *extent = ++path[ppos].p_ext;
88 path[ppos].p_block = ext_pblock(path[ppos].p_ext);
86 return 0; 89 return 0;
87 } 90 }
88 91
@@ -119,9 +122,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
119 ext_block_hdr(path[cur_ppos+1].p_bh); 122 ext_block_hdr(path[cur_ppos+1].p_bh);
120 } 123 }
121 124
125 path[leaf_ppos].p_ext = *extent = NULL;
126
127 eh = path[leaf_ppos].p_hdr;
128 if (le16_to_cpu(eh->eh_entries) == 0)
129 /* empty leaf is found */
130 return -ENODATA;
131
122 /* leaf block */ 132 /* leaf block */
123 path[leaf_ppos].p_ext = *extent = 133 path[leaf_ppos].p_ext = *extent =
124 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 134 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
135 path[leaf_ppos].p_block =
136 ext_pblock(path[leaf_ppos].p_ext);
125 return 0; 137 return 0;
126 } 138 }
127 } 139 }
@@ -141,12 +153,12 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
141 int ret = 0; 153 int ret = 0;
142 154
143 if (inode1 == NULL) { 155 if (inode1 == NULL) {
144 ext4_error(inode2->i_sb, function, 156 __ext4_error(inode2->i_sb, function,
145 "Both inodes should not be NULL: " 157 "Both inodes should not be NULL: "
146 "inode1 NULL inode2 %lu", inode2->i_ino); 158 "inode1 NULL inode2 %lu", inode2->i_ino);
147 ret = -EIO; 159 ret = -EIO;
148 } else if (inode2 == NULL) { 160 } else if (inode2 == NULL) {
149 ext4_error(inode1->i_sb, function, 161 __ext4_error(inode1->i_sb, function,
150 "Both inodes should not be NULL: " 162 "Both inodes should not be NULL: "
151 "inode1 %lu inode2 NULL", inode1->i_ino); 163 "inode1 %lu inode2 NULL", inode1->i_ino);
152 ret = -EIO; 164 ret = -EIO;
@@ -155,40 +167,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
155} 167}
156 168
157/** 169/**
158 * mext_double_down_read - Acquire two inodes' read semaphore 170 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
159 * 171 *
160 * @orig_inode: original inode structure 172 * @orig_inode: original inode structure
161 * @donor_inode: donor inode structure 173 * @donor_inode: donor inode structure
162 * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. 174 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
175 * i_ino order.
163 */ 176 */
164static void 177static void
165mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) 178double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
166{
167 struct inode *first = orig_inode, *second = donor_inode;
168
169 /*
170 * Use the inode number to provide the stable locking order instead
171 * of its address, because the C language doesn't guarantee you can
172 * compare pointers that don't come from the same array.
173 */
174 if (donor_inode->i_ino < orig_inode->i_ino) {
175 first = donor_inode;
176 second = orig_inode;
177 }
178
179 down_read(&EXT4_I(first)->i_data_sem);
180 down_read(&EXT4_I(second)->i_data_sem);
181}
182
183/**
184 * mext_double_down_write - Acquire two inodes' write semaphore
185 *
186 * @orig_inode: original inode structure
187 * @donor_inode: donor inode structure
188 * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
189 */
190static void
191mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
192{ 179{
193 struct inode *first = orig_inode, *second = donor_inode; 180 struct inode *first = orig_inode, *second = donor_inode;
194 181
@@ -203,32 +190,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
203 } 190 }
204 191
205 down_write(&EXT4_I(first)->i_data_sem); 192 down_write(&EXT4_I(first)->i_data_sem);
206 down_write(&EXT4_I(second)->i_data_sem); 193 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
207}
208
209/**
210 * mext_double_up_read - Release two inodes' read semaphore
211 *
212 * @orig_inode: original inode structure to be released its lock first
213 * @donor_inode: donor inode structure to be released its lock second
214 * Release read semaphore of two inodes (orig and donor).
215 */
216static void
217mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
218{
219 up_read(&EXT4_I(orig_inode)->i_data_sem);
220 up_read(&EXT4_I(donor_inode)->i_data_sem);
221} 194}
222 195
223/** 196/**
224 * mext_double_up_write - Release two inodes' write semaphore 197 * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
225 * 198 *
226 * @orig_inode: original inode structure to be released its lock first 199 * @orig_inode: original inode structure to be released its lock first
227 * @donor_inode: donor inode structure to be released its lock second 200 * @donor_inode: donor inode structure to be released its lock second
228 * Release write semaphore of two inodes (orig and donor). 201 * Release write lock of i_data_sem of two inodes (orig and donor).
229 */ 202 */
230static void 203static void
231mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) 204double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
232{ 205{
233 up_write(&EXT4_I(orig_inode)->i_data_sem); 206 up_write(&EXT4_I(orig_inode)->i_data_sem);
234 up_write(&EXT4_I(donor_inode)->i_data_sem); 207 up_write(&EXT4_I(donor_inode)->i_data_sem);
@@ -280,6 +253,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
280 } 253 }
281 254
282 o_start->ee_len = start_ext->ee_len; 255 o_start->ee_len = start_ext->ee_len;
256 eblock = le32_to_cpu(start_ext->ee_block);
283 new_flag = 1; 257 new_flag = 1;
284 258
285 } else if (start_ext->ee_len && new_ext->ee_len && 259 } else if (start_ext->ee_len && new_ext->ee_len &&
@@ -290,6 +264,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
290 * orig |------------------------------| 264 * orig |------------------------------|
291 */ 265 */
292 o_start->ee_len = start_ext->ee_len; 266 o_start->ee_len = start_ext->ee_len;
267 eblock = le32_to_cpu(start_ext->ee_block);
293 new_flag = 1; 268 new_flag = 1;
294 269
295 } else if (!start_ext->ee_len && new_ext->ee_len && 270 } else if (!start_ext->ee_len && new_ext->ee_len &&
@@ -503,7 +478,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
503 struct ext4_extent *oext, *o_start, *o_end, *prev_ext; 478 struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
504 struct ext4_extent new_ext, start_ext, end_ext; 479 struct ext4_extent new_ext, start_ext, end_ext;
505 ext4_lblk_t new_ext_end; 480 ext4_lblk_t new_ext_end;
506 ext4_fsblk_t new_phys_end;
507 int oext_alen, new_ext_alen, end_ext_alen; 481 int oext_alen, new_ext_alen, end_ext_alen;
508 int depth = ext_depth(orig_inode); 482 int depth = ext_depth(orig_inode);
509 int ret; 483 int ret;
@@ -517,7 +491,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
517 new_ext.ee_len = dext->ee_len; 491 new_ext.ee_len = dext->ee_len;
518 new_ext_alen = ext4_ext_get_actual_len(&new_ext); 492 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
519 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; 493 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
520 new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
521 494
522 /* 495 /*
523 * Case: original extent is first 496 * Case: original extent is first
@@ -530,6 +503,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
530 le32_to_cpu(oext->ee_block) + oext_alen) { 503 le32_to_cpu(oext->ee_block) + oext_alen) {
531 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - 504 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
532 le32_to_cpu(oext->ee_block)); 505 le32_to_cpu(oext->ee_block));
506 start_ext.ee_block = oext->ee_block;
533 copy_extent_status(oext, &start_ext); 507 copy_extent_status(oext, &start_ext);
534 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { 508 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
535 prev_ext = oext - 1; 509 prev_ext = oext - 1;
@@ -543,6 +517,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
543 start_ext.ee_len = cpu_to_le16( 517 start_ext.ee_len = cpu_to_le16(
544 ext4_ext_get_actual_len(prev_ext) + 518 ext4_ext_get_actual_len(prev_ext) +
545 new_ext_alen); 519 new_ext_alen);
520 start_ext.ee_block = oext->ee_block;
546 copy_extent_status(prev_ext, &start_ext); 521 copy_extent_status(prev_ext, &start_ext);
547 new_ext.ee_len = 0; 522 new_ext.ee_len = 0;
548 } 523 }
@@ -554,7 +529,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
554 * new_ext |-------| 529 * new_ext |-------|
555 */ 530 */
556 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { 531 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
557 ext4_error(orig_inode->i_sb, __func__, 532 ext4_error(orig_inode->i_sb,
558 "new_ext_end(%u) should be less than or equal to " 533 "new_ext_end(%u) should be less than or equal to "
559 "oext->ee_block(%u) + oext_alen(%d) - 1", 534 "oext->ee_block(%u) + oext_alen(%d) - 1",
560 new_ext_end, le32_to_cpu(oext->ee_block), 535 new_ext_end, le32_to_cpu(oext->ee_block),
@@ -596,7 +571,7 @@ out:
596 * @tmp_oext: the extent that will belong to the donor inode 571 * @tmp_oext: the extent that will belong to the donor inode
597 * @orig_off: block offset of original inode 572 * @orig_off: block offset of original inode
598 * @donor_off: block offset of donor inode 573 * @donor_off: block offset of donor inode
599 * @max_count: the maximun length of extents 574 * @max_count: the maximum length of extents
600 * 575 *
601 * Return 0 on success, or a negative error value on failure. 576 * Return 0 on success, or a negative error value on failure.
602 */ 577 */
@@ -661,6 +636,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
661 * @donor_inode: donor inode 636 * @donor_inode: donor inode
662 * @from: block offset of orig_inode 637 * @from: block offset of orig_inode
663 * @count: block count to be replaced 638 * @count: block count to be replaced
639 * @err: pointer to save return value
664 * 640 *
665 * Replace original inode extents and donor inode extents page by page. 641 * Replace original inode extents and donor inode extents page by page.
666 * We implement this replacement in the following three steps: 642 * We implement this replacement in the following three steps:
@@ -671,33 +647,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
671 * 3. Change the block information of donor inode to point at the saved 647 * 3. Change the block information of donor inode to point at the saved
672 * original inode blocks in the dummy extents. 648 * original inode blocks in the dummy extents.
673 * 649 *
674 * Return 0 on success, or a negative error value on failure. 650 * Return replaced block count.
675 */ 651 */
676static int 652static int
677mext_replace_branches(handle_t *handle, struct inode *orig_inode, 653mext_replace_branches(handle_t *handle, struct inode *orig_inode,
678 struct inode *donor_inode, ext4_lblk_t from, 654 struct inode *donor_inode, ext4_lblk_t from,
679 ext4_lblk_t count) 655 ext4_lblk_t count, int *err)
680{ 656{
681 struct ext4_ext_path *orig_path = NULL; 657 struct ext4_ext_path *orig_path = NULL;
682 struct ext4_ext_path *donor_path = NULL; 658 struct ext4_ext_path *donor_path = NULL;
683 struct ext4_extent *oext, *dext; 659 struct ext4_extent *oext, *dext;
684 struct ext4_extent tmp_dext, tmp_oext; 660 struct ext4_extent tmp_dext, tmp_oext;
685 ext4_lblk_t orig_off = from, donor_off = from; 661 ext4_lblk_t orig_off = from, donor_off = from;
686 int err = 0;
687 int depth; 662 int depth;
688 int replaced_count = 0; 663 int replaced_count = 0;
689 int dext_alen; 664 int dext_alen;
690 665
691 mext_double_down_write(orig_inode, donor_inode); 666 /* Protect extent trees against block allocations via delalloc */
667 double_down_write_data_sem(orig_inode, donor_inode);
692 668
693 /* Get the original extent for the block "orig_off" */ 669 /* Get the original extent for the block "orig_off" */
694 err = get_ext_path(orig_inode, orig_off, &orig_path); 670 *err = get_ext_path(orig_inode, orig_off, &orig_path);
695 if (err) 671 if (*err)
696 goto out; 672 goto out;
697 673
698 /* Get the donor extent for the head */ 674 /* Get the donor extent for the head */
699 err = get_ext_path(donor_inode, donor_off, &donor_path); 675 *err = get_ext_path(donor_inode, donor_off, &donor_path);
700 if (err) 676 if (*err)
701 goto out; 677 goto out;
702 depth = ext_depth(orig_inode); 678 depth = ext_depth(orig_inode);
703 oext = orig_path[depth].p_ext; 679 oext = orig_path[depth].p_ext;
@@ -707,39 +683,39 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
707 dext = donor_path[depth].p_ext; 683 dext = donor_path[depth].p_ext;
708 tmp_dext = *dext; 684 tmp_dext = *dext;
709 685
710 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 686 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
711 donor_off, count); 687 donor_off, count);
712 if (err) 688 if (*err)
713 goto out; 689 goto out;
714 690
715 /* Loop for the donor extents */ 691 /* Loop for the donor extents */
716 while (1) { 692 while (1) {
717 /* The extent for donor must be found. */ 693 /* The extent for donor must be found. */
718 if (!dext) { 694 if (!dext) {
719 ext4_error(donor_inode->i_sb, __func__, 695 ext4_error(donor_inode->i_sb,
720 "The extent for donor must be found"); 696 "The extent for donor must be found");
721 err = -EIO; 697 *err = -EIO;
722 goto out; 698 goto out;
723 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { 699 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
724 ext4_error(donor_inode->i_sb, __func__, 700 ext4_error(donor_inode->i_sb,
725 "Donor offset(%u) and the first block of donor " 701 "Donor offset(%u) and the first block of donor "
726 "extent(%u) should be equal", 702 "extent(%u) should be equal",
727 donor_off, 703 donor_off,
728 le32_to_cpu(tmp_dext.ee_block)); 704 le32_to_cpu(tmp_dext.ee_block));
729 err = -EIO; 705 *err = -EIO;
730 goto out; 706 goto out;
731 } 707 }
732 708
733 /* Set donor extent to orig extent */ 709 /* Set donor extent to orig extent */
734 err = mext_leaf_block(handle, orig_inode, 710 *err = mext_leaf_block(handle, orig_inode,
735 orig_path, &tmp_dext, &orig_off); 711 orig_path, &tmp_dext, &orig_off);
736 if (err < 0) 712 if (*err)
737 goto out; 713 goto out;
738 714
739 /* Set orig extent to donor extent */ 715 /* Set orig extent to donor extent */
740 err = mext_leaf_block(handle, donor_inode, 716 *err = mext_leaf_block(handle, donor_inode,
741 donor_path, &tmp_oext, &donor_off); 717 donor_path, &tmp_oext, &donor_off);
742 if (err < 0) 718 if (*err)
743 goto out; 719 goto out;
744 720
745 dext_alen = ext4_ext_get_actual_len(&tmp_dext); 721 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
@@ -753,35 +729,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
753 729
754 if (orig_path) 730 if (orig_path)
755 ext4_ext_drop_refs(orig_path); 731 ext4_ext_drop_refs(orig_path);
756 err = get_ext_path(orig_inode, orig_off, &orig_path); 732 *err = get_ext_path(orig_inode, orig_off, &orig_path);
757 if (err) 733 if (*err)
758 goto out; 734 goto out;
759 depth = ext_depth(orig_inode); 735 depth = ext_depth(orig_inode);
760 oext = orig_path[depth].p_ext; 736 oext = orig_path[depth].p_ext;
761 if (le32_to_cpu(oext->ee_block) +
762 ext4_ext_get_actual_len(oext) <= orig_off) {
763 err = 0;
764 goto out;
765 }
766 tmp_oext = *oext; 737 tmp_oext = *oext;
767 738
768 if (donor_path) 739 if (donor_path)
769 ext4_ext_drop_refs(donor_path); 740 ext4_ext_drop_refs(donor_path);
770 err = get_ext_path(donor_inode, donor_off, &donor_path); 741 *err = get_ext_path(donor_inode, donor_off, &donor_path);
771 if (err) 742 if (*err)
772 goto out; 743 goto out;
773 depth = ext_depth(donor_inode); 744 depth = ext_depth(donor_inode);
774 dext = donor_path[depth].p_ext; 745 dext = donor_path[depth].p_ext;
775 if (le32_to_cpu(dext->ee_block) +
776 ext4_ext_get_actual_len(dext) <= donor_off) {
777 err = 0;
778 goto out;
779 }
780 tmp_dext = *dext; 746 tmp_dext = *dext;
781 747
782 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 748 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
783 donor_off, count - replaced_count); 749 donor_off, count - replaced_count);
784 if (err) 750 if (*err)
785 goto out; 751 goto out;
786 } 752 }
787 753
@@ -795,8 +761,12 @@ out:
795 kfree(donor_path); 761 kfree(donor_path);
796 } 762 }
797 763
798 mext_double_up_write(orig_inode, donor_inode); 764 ext4_ext_invalidate_cache(orig_inode);
799 return err; 765 ext4_ext_invalidate_cache(donor_inode);
766
767 double_up_write_data_sem(orig_inode, donor_inode);
768
769 return replaced_count;
800} 770}
801 771
802/** 772/**
@@ -808,16 +778,17 @@ out:
808 * @data_offset_in_page: block index where data swapping starts 778 * @data_offset_in_page: block index where data swapping starts
809 * @block_len_in_page: the number of blocks to be swapped 779 * @block_len_in_page: the number of blocks to be swapped
810 * @uninit: orig extent is uninitialized or not 780 * @uninit: orig extent is uninitialized or not
781 * @err: pointer to save return value
811 * 782 *
812 * Save the data in original inode blocks and replace original inode extents 783 * Save the data in original inode blocks and replace original inode extents
813 * with donor inode extents by calling mext_replace_branches(). 784 * with donor inode extents by calling mext_replace_branches().
814 * Finally, write out the saved data in new original inode blocks. Return 0 785 * Finally, write out the saved data in new original inode blocks. Return
815 * on success, or a negative error value on failure. 786 * replaced block count.
816 */ 787 */
817static int 788static int
818move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 789move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
819 pgoff_t orig_page_offset, int data_offset_in_page, 790 pgoff_t orig_page_offset, int data_offset_in_page,
820 int block_len_in_page, int uninit) 791 int block_len_in_page, int uninit, int *err)
821{ 792{
822 struct inode *orig_inode = o_filp->f_dentry->d_inode; 793 struct inode *orig_inode = o_filp->f_dentry->d_inode;
823 struct address_space *mapping = orig_inode->i_mapping; 794 struct address_space *mapping = orig_inode->i_mapping;
@@ -829,9 +800,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
829 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 800 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
830 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 801 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
831 unsigned int w_flags = 0; 802 unsigned int w_flags = 0;
832 unsigned int tmp_data_len, data_len; 803 unsigned int tmp_data_size, data_size, replaced_size;
833 void *fsdata; 804 void *fsdata;
834 int ret, i, jblocks; 805 int i, jblocks;
806 int err2 = 0;
807 int replaced_count = 0;
835 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 808 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
836 809
837 /* 810 /*
@@ -841,8 +814,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
841 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 814 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
842 handle = ext4_journal_start(orig_inode, jblocks); 815 handle = ext4_journal_start(orig_inode, jblocks);
843 if (IS_ERR(handle)) { 816 if (IS_ERR(handle)) {
844 ret = PTR_ERR(handle); 817 *err = PTR_ERR(handle);
845 return ret; 818 return 0;
846 } 819 }
847 820
848 if (segment_eq(get_fs(), KERNEL_DS)) 821 if (segment_eq(get_fs(), KERNEL_DS))
@@ -858,39 +831,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
858 * Just swap data blocks between orig and donor. 831 * Just swap data blocks between orig and donor.
859 */ 832 */
860 if (uninit) { 833 if (uninit) {
861 ret = mext_replace_branches(handle, orig_inode, 834 replaced_count = mext_replace_branches(handle, orig_inode,
862 donor_inode, orig_blk_offset, 835 donor_inode, orig_blk_offset,
863 block_len_in_page); 836 block_len_in_page, err);
864
865 /* Clear the inode cache not to refer to the old data */
866 ext4_ext_invalidate_cache(orig_inode);
867 ext4_ext_invalidate_cache(donor_inode);
868 goto out2; 837 goto out2;
869 } 838 }
870 839
871 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 840 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
872 841
873 /* Calculate data_len */ 842 /* Calculate data_size */
874 if ((orig_blk_offset + block_len_in_page - 1) == 843 if ((orig_blk_offset + block_len_in_page - 1) ==
875 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 844 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
876 /* Replace the last block */ 845 /* Replace the last block */
877 tmp_data_len = orig_inode->i_size & (blocksize - 1); 846 tmp_data_size = orig_inode->i_size & (blocksize - 1);
878 /* 847 /*
879 * If data_len equal zero, it shows data_len is multiples of 848 * If data_size equal zero, it shows data_size is multiples of
880 * blocksize. So we set appropriate value. 849 * blocksize. So we set appropriate value.
881 */ 850 */
882 if (tmp_data_len == 0) 851 if (tmp_data_size == 0)
883 tmp_data_len = blocksize; 852 tmp_data_size = blocksize;
884 853
885 data_len = tmp_data_len + 854 data_size = tmp_data_size +
886 ((block_len_in_page - 1) << orig_inode->i_blkbits); 855 ((block_len_in_page - 1) << orig_inode->i_blkbits);
887 } else { 856 } else
888 data_len = block_len_in_page << orig_inode->i_blkbits; 857 data_size = block_len_in_page << orig_inode->i_blkbits;
889 } 858
859 replaced_size = data_size;
890 860
891 ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, 861 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
892 &page, &fsdata); 862 &page, &fsdata);
893 if (unlikely(ret < 0)) 863 if (unlikely(*err < 0))
894 goto out; 864 goto out;
895 865
896 if (!PageUptodate(page)) { 866 if (!PageUptodate(page)) {
@@ -911,14 +881,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
911 /* Release old bh and drop refs */ 881 /* Release old bh and drop refs */
912 try_to_release_page(page, 0); 882 try_to_release_page(page, 0);
913 883
914 ret = mext_replace_branches(handle, orig_inode, donor_inode, 884 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
915 orig_blk_offset, block_len_in_page); 885 orig_blk_offset, block_len_in_page,
916 if (ret < 0) 886 &err2);
917 goto out; 887 if (err2) {
918 888 if (replaced_count) {
919 /* Clear the inode cache not to refer to the old data */ 889 block_len_in_page = replaced_count;
920 ext4_ext_invalidate_cache(orig_inode); 890 replaced_size =
921 ext4_ext_invalidate_cache(donor_inode); 891 block_len_in_page << orig_inode->i_blkbits;
892 } else
893 goto out;
894 }
922 895
923 if (!page_has_buffers(page)) 896 if (!page_has_buffers(page))
924 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 897 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
@@ -928,16 +901,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
928 bh = bh->b_this_page; 901 bh = bh->b_this_page;
929 902
930 for (i = 0; i < block_len_in_page; i++) { 903 for (i = 0; i < block_len_in_page; i++) {
931 ret = ext4_get_block(orig_inode, 904 *err = ext4_get_block(orig_inode,
932 (sector_t)(orig_blk_offset + i), bh, 0); 905 (sector_t)(orig_blk_offset + i), bh, 0);
933 if (ret < 0) 906 if (*err < 0)
934 goto out; 907 goto out;
935 908
936 if (bh->b_this_page != NULL) 909 if (bh->b_this_page != NULL)
937 bh = bh->b_this_page; 910 bh = bh->b_this_page;
938 } 911 }
939 912
940 ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, 913 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
941 page, fsdata); 914 page, fsdata);
942 page = NULL; 915 page = NULL;
943 916
@@ -951,18 +924,20 @@ out:
951out2: 924out2:
952 ext4_journal_stop(handle); 925 ext4_journal_stop(handle);
953 926
954 return ret < 0 ? ret : 0; 927 if (err2)
928 *err = err2;
929
930 return replaced_count;
955} 931}
956 932
957/** 933/**
958 * mext_check_argumants - Check whether move extent can be done 934 * mext_check_arguments - Check whether move extent can be done
959 * 935 *
960 * @orig_inode: original inode 936 * @orig_inode: original inode
961 * @donor_inode: donor inode 937 * @donor_inode: donor inode
962 * @orig_start: logical start offset in block for orig 938 * @orig_start: logical start offset in block for orig
963 * @donor_start: logical start offset in block for donor 939 * @donor_start: logical start offset in block for donor
964 * @len: the number of blocks to be moved 940 * @len: the number of blocks to be moved
965 * @moved_len: moved block length
966 * 941 *
967 * Check the arguments of ext4_move_extents() whether the files can be 942 * Check the arguments of ext4_move_extents() whether the files can be
968 * exchanged with each other. 943 * exchanged with each other.
@@ -970,18 +945,17 @@ out2:
970 */ 945 */
971static int 946static int
972mext_check_arguments(struct inode *orig_inode, 947mext_check_arguments(struct inode *orig_inode,
973 struct inode *donor_inode, __u64 orig_start, 948 struct inode *donor_inode, __u64 orig_start,
974 __u64 donor_start, __u64 *len, __u64 moved_len) 949 __u64 donor_start, __u64 *len)
975{ 950{
976 ext4_lblk_t orig_blocks, donor_blocks; 951 ext4_lblk_t orig_blocks, donor_blocks;
977 unsigned int blkbits = orig_inode->i_blkbits; 952 unsigned int blkbits = orig_inode->i_blkbits;
978 unsigned int blocksize = 1 << blkbits; 953 unsigned int blocksize = 1 << blkbits;
979 954
980 /* Regular file check */ 955 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
981 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { 956 ext4_debug("ext4 move extent: suid or sgid is set"
982 ext4_debug("ext4 move extent: The argument files should be " 957 " to donor file [ino:orig %lu, donor %lu]\n",
983 "regular file [ino:orig %lu, donor %lu]\n", 958 orig_inode->i_ino, donor_inode->i_ino);
984 orig_inode->i_ino, donor_inode->i_ino);
985 return -EINVAL; 959 return -EINVAL;
986 } 960 }
987 961
@@ -1025,13 +999,6 @@ mext_check_arguments(struct inode *orig_inode,
1025 return -EINVAL; 999 return -EINVAL;
1026 } 1000 }
1027 1001
1028 if (moved_len) {
1029 ext4_debug("ext4 move extent: moved_len should be 0 "
1030 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1031 donor_inode->i_ino);
1032 return -EINVAL;
1033 }
1034
1035 if ((orig_start > EXT_MAX_BLOCK) || 1002 if ((orig_start > EXT_MAX_BLOCK) ||
1036 (donor_start > EXT_MAX_BLOCK) || 1003 (donor_start > EXT_MAX_BLOCK) ||
1037 (*len > EXT_MAX_BLOCK) || 1004 (*len > EXT_MAX_BLOCK) ||
@@ -1088,7 +1055,7 @@ mext_check_arguments(struct inode *orig_inode,
1088 } 1055 }
1089 1056
1090 if (!*len) { 1057 if (!*len) {
1091 ext4_debug("ext4 move extent: len shoudld not be 0 " 1058 ext4_debug("ext4 move extent: len should not be 0 "
1092 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 1059 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1093 donor_inode->i_ino); 1060 donor_inode->i_ino);
1094 return -EINVAL; 1061 return -EINVAL;
@@ -1232,16 +1199,24 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1232 return -EINVAL; 1199 return -EINVAL;
1233 } 1200 }
1234 1201
1235 /* protect orig and donor against a truncate */ 1202 /* Regular file check */
1203 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
1204 ext4_debug("ext4 move extent: The argument files should be "
1205 "regular file [ino:orig %lu, donor %lu]\n",
1206 orig_inode->i_ino, donor_inode->i_ino);
1207 return -EINVAL;
1208 }
1209
1210 /* Protect orig and donor inodes against a truncate */
1236 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1211 ret1 = mext_inode_double_lock(orig_inode, donor_inode);
1237 if (ret1 < 0) 1212 if (ret1 < 0)
1238 return ret1; 1213 return ret1;
1239 1214
1240 mext_double_down_read(orig_inode, donor_inode); 1215 /* Protect extent tree against block allocations via delalloc */
1216 double_down_write_data_sem(orig_inode, donor_inode);
1241 /* Check the filesystem environment whether move_extent can be done */ 1217 /* Check the filesystem environment whether move_extent can be done */
1242 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, 1218 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
1243 donor_start, &len, *moved_len); 1219 donor_start, &len);
1244 mext_double_up_read(orig_inode, donor_inode);
1245 if (ret1) 1220 if (ret1)
1246 goto out; 1221 goto out;
1247 1222
@@ -1355,36 +1330,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1355 seq_start = le32_to_cpu(ext_cur->ee_block); 1330 seq_start = le32_to_cpu(ext_cur->ee_block);
1356 rest_blocks = seq_blocks; 1331 rest_blocks = seq_blocks;
1357 1332
1358 /* Discard preallocations of two inodes */ 1333 /*
1359 down_write(&EXT4_I(orig_inode)->i_data_sem); 1334 * Up semaphore to avoid following problems:
1360 ext4_discard_preallocations(orig_inode); 1335 * a. transaction deadlock among ext4_journal_start,
1361 up_write(&EXT4_I(orig_inode)->i_data_sem); 1336 * ->write_begin via pagefault, and jbd2_journal_commit
1362 1337 * b. racing with ->readpage, ->write_begin, and ext4_get_block
1363 down_write(&EXT4_I(donor_inode)->i_data_sem); 1338 * in move_extent_per_page
1364 ext4_discard_preallocations(donor_inode); 1339 */
1365 up_write(&EXT4_I(donor_inode)->i_data_sem); 1340 double_up_write_data_sem(orig_inode, donor_inode);
1366 1341
1367 while (orig_page_offset <= seq_end_page) { 1342 while (orig_page_offset <= seq_end_page) {
1368 1343
1369 /* Swap original branches with new branches */ 1344 /* Swap original branches with new branches */
1370 ret1 = move_extent_per_page(o_filp, donor_inode, 1345 block_len_in_page = move_extent_per_page(
1346 o_filp, donor_inode,
1371 orig_page_offset, 1347 orig_page_offset,
1372 data_offset_in_page, 1348 data_offset_in_page,
1373 block_len_in_page, uninit); 1349 block_len_in_page, uninit,
1374 if (ret1 < 0) 1350 &ret1);
1375 goto out; 1351
1376 orig_page_offset++;
1377 /* Count how many blocks we have exchanged */ 1352 /* Count how many blocks we have exchanged */
1378 *moved_len += block_len_in_page; 1353 *moved_len += block_len_in_page;
1354 if (ret1 < 0)
1355 break;
1379 if (*moved_len > len) { 1356 if (*moved_len > len) {
1380 ext4_error(orig_inode->i_sb, __func__, 1357 ext4_error(orig_inode->i_sb,
1381 "We replaced blocks too much! " 1358 "We replaced blocks too much! "
1382 "sum of replaced: %llu requested: %llu", 1359 "sum of replaced: %llu requested: %llu",
1383 *moved_len, len); 1360 *moved_len, len);
1384 ret1 = -EIO; 1361 ret1 = -EIO;
1385 goto out; 1362 break;
1386 } 1363 }
1387 1364
1365 orig_page_offset++;
1388 data_offset_in_page = 0; 1366 data_offset_in_page = 0;
1389 rest_blocks -= block_len_in_page; 1367 rest_blocks -= block_len_in_page;
1390 if (rest_blocks > blocks_per_page) 1368 if (rest_blocks > blocks_per_page)
@@ -1393,6 +1371,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1393 block_len_in_page = rest_blocks; 1371 block_len_in_page = rest_blocks;
1394 } 1372 }
1395 1373
1374 double_down_write_data_sem(orig_inode, donor_inode);
1375 if (ret1 < 0)
1376 break;
1377
1396 /* Decrease buffer counter */ 1378 /* Decrease buffer counter */
1397 if (holecheck_path) 1379 if (holecheck_path)
1398 ext4_ext_drop_refs(holecheck_path); 1380 ext4_ext_drop_refs(holecheck_path);
@@ -1414,6 +1396,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1414 1396
1415 } 1397 }
1416out: 1398out:
1399 if (*moved_len) {
1400 ext4_discard_preallocations(orig_inode);
1401 ext4_discard_preallocations(donor_inode);
1402 }
1403
1417 if (orig_path) { 1404 if (orig_path) {
1418 ext4_ext_drop_refs(orig_path); 1405 ext4_ext_drop_refs(orig_path);
1419 kfree(orig_path); 1406 kfree(orig_path);
@@ -1422,7 +1409,7 @@ out:
1422 ext4_ext_drop_refs(holecheck_path); 1409 ext4_ext_drop_refs(holecheck_path);
1423 kfree(holecheck_path); 1410 kfree(holecheck_path);
1424 } 1411 }
1425 1412 double_up_write_data_sem(orig_inode, donor_inode);
1426 ret2 = mext_inode_double_unlock(orig_inode, donor_inode); 1413 ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
1427 1414
1428 if (ret1) 1415 if (ret1)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6d2c1b897fc7..0c070fabd108 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -383,8 +383,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
383 if (root->info.hash_version != DX_HASH_TEA && 383 if (root->info.hash_version != DX_HASH_TEA &&
384 root->info.hash_version != DX_HASH_HALF_MD4 && 384 root->info.hash_version != DX_HASH_HALF_MD4 &&
385 root->info.hash_version != DX_HASH_LEGACY) { 385 root->info.hash_version != DX_HASH_LEGACY) {
386 ext4_warning(dir->i_sb, __func__, 386 ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
387 "Unrecognised inode hash code %d",
388 root->info.hash_version); 387 root->info.hash_version);
389 brelse(bh); 388 brelse(bh);
390 *err = ERR_BAD_DX_DIR; 389 *err = ERR_BAD_DX_DIR;
@@ -399,8 +398,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
399 hash = hinfo->hash; 398 hash = hinfo->hash;
400 399
401 if (root->info.unused_flags & 1) { 400 if (root->info.unused_flags & 1) {
402 ext4_warning(dir->i_sb, __func__, 401 ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
403 "Unimplemented inode hash flags: %#06x",
404 root->info.unused_flags); 402 root->info.unused_flags);
405 brelse(bh); 403 brelse(bh);
406 *err = ERR_BAD_DX_DIR; 404 *err = ERR_BAD_DX_DIR;
@@ -408,8 +406,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
408 } 406 }
409 407
410 if ((indirect = root->info.indirect_levels) > 1) { 408 if ((indirect = root->info.indirect_levels) > 1) {
411 ext4_warning(dir->i_sb, __func__, 409 ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
412 "Unimplemented inode hash depth: %#06x",
413 root->info.indirect_levels); 410 root->info.indirect_levels);
414 brelse(bh); 411 brelse(bh);
415 *err = ERR_BAD_DX_DIR; 412 *err = ERR_BAD_DX_DIR;
@@ -421,8 +418,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
421 418
422 if (dx_get_limit(entries) != dx_root_limit(dir, 419 if (dx_get_limit(entries) != dx_root_limit(dir,
423 root->info.info_length)) { 420 root->info.info_length)) {
424 ext4_warning(dir->i_sb, __func__, 421 ext4_warning(dir->i_sb, "dx entry: limit != root limit");
425 "dx entry: limit != root limit");
426 brelse(bh); 422 brelse(bh);
427 *err = ERR_BAD_DX_DIR; 423 *err = ERR_BAD_DX_DIR;
428 goto fail; 424 goto fail;
@@ -433,7 +429,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
433 { 429 {
434 count = dx_get_count(entries); 430 count = dx_get_count(entries);
435 if (!count || count > dx_get_limit(entries)) { 431 if (!count || count > dx_get_limit(entries)) {
436 ext4_warning(dir->i_sb, __func__, 432 ext4_warning(dir->i_sb,
437 "dx entry: no count or count > limit"); 433 "dx entry: no count or count > limit");
438 brelse(bh); 434 brelse(bh);
439 *err = ERR_BAD_DX_DIR; 435 *err = ERR_BAD_DX_DIR;
@@ -478,7 +474,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
478 goto fail2; 474 goto fail2;
479 at = entries = ((struct dx_node *) bh->b_data)->entries; 475 at = entries = ((struct dx_node *) bh->b_data)->entries;
480 if (dx_get_limit(entries) != dx_node_limit (dir)) { 476 if (dx_get_limit(entries) != dx_node_limit (dir)) {
481 ext4_warning(dir->i_sb, __func__, 477 ext4_warning(dir->i_sb,
482 "dx entry: limit != node limit"); 478 "dx entry: limit != node limit");
483 brelse(bh); 479 brelse(bh);
484 *err = ERR_BAD_DX_DIR; 480 *err = ERR_BAD_DX_DIR;
@@ -494,7 +490,7 @@ fail2:
494 } 490 }
495fail: 491fail:
496 if (*err == ERR_BAD_DX_DIR) 492 if (*err == ERR_BAD_DX_DIR)
497 ext4_warning(dir->i_sb, __func__, 493 ext4_warning(dir->i_sb,
498 "Corrupt dir inode %ld, running e2fsck is " 494 "Corrupt dir inode %ld, running e2fsck is "
499 "recommended.", dir->i_ino); 495 "recommended.", dir->i_ino);
500 return NULL; 496 return NULL;
@@ -947,9 +943,8 @@ restart:
947 wait_on_buffer(bh); 943 wait_on_buffer(bh);
948 if (!buffer_uptodate(bh)) { 944 if (!buffer_uptodate(bh)) {
949 /* read error, skip block & hope for the best */ 945 /* read error, skip block & hope for the best */
950 ext4_error(sb, __func__, "reading directory #%lu " 946 ext4_error(sb, "reading directory #%lu offset %lu",
951 "offset %lu", dir->i_ino, 947 dir->i_ino, (unsigned long)block);
952 (unsigned long)block);
953 brelse(bh); 948 brelse(bh);
954 goto next; 949 goto next;
955 } 950 }
@@ -1041,7 +1036,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1041 retval = ext4_htree_next_block(dir, hash, frame, 1036 retval = ext4_htree_next_block(dir, hash, frame,
1042 frames, NULL); 1037 frames, NULL);
1043 if (retval < 0) { 1038 if (retval < 0) {
1044 ext4_warning(sb, __func__, 1039 ext4_warning(sb,
1045 "error reading index page in directory #%lu", 1040 "error reading index page in directory #%lu",
1046 dir->i_ino); 1041 dir->i_ino);
1047 *err = retval; 1042 *err = retval;
@@ -1071,14 +1066,13 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1071 __u32 ino = le32_to_cpu(de->inode); 1066 __u32 ino = le32_to_cpu(de->inode);
1072 brelse(bh); 1067 brelse(bh);
1073 if (!ext4_valid_inum(dir->i_sb, ino)) { 1068 if (!ext4_valid_inum(dir->i_sb, ino)) {
1074 ext4_error(dir->i_sb, "ext4_lookup", 1069 ext4_error(dir->i_sb, "bad inode number: %u", ino);
1075 "bad inode number: %u", ino);
1076 return ERR_PTR(-EIO); 1070 return ERR_PTR(-EIO);
1077 } 1071 }
1078 inode = ext4_iget(dir->i_sb, ino); 1072 inode = ext4_iget(dir->i_sb, ino);
1079 if (unlikely(IS_ERR(inode))) { 1073 if (unlikely(IS_ERR(inode))) {
1080 if (PTR_ERR(inode) == -ESTALE) { 1074 if (PTR_ERR(inode) == -ESTALE) {
1081 ext4_error(dir->i_sb, __func__, 1075 ext4_error(dir->i_sb,
1082 "deleted inode referenced: %u", 1076 "deleted inode referenced: %u",
1083 ino); 1077 ino);
1084 return ERR_PTR(-EIO); 1078 return ERR_PTR(-EIO);
@@ -1110,7 +1104,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
1110 brelse(bh); 1104 brelse(bh);
1111 1105
1112 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { 1106 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
1113 ext4_error(child->d_inode->i_sb, "ext4_get_parent", 1107 ext4_error(child->d_inode->i_sb,
1114 "bad inode number: %u", ino); 1108 "bad inode number: %u", ino);
1115 return ERR_PTR(-EIO); 1109 return ERR_PTR(-EIO);
1116 } 1110 }
@@ -1292,9 +1286,6 @@ errout:
1292 * add_dirent_to_buf will attempt search the directory block for 1286 * add_dirent_to_buf will attempt search the directory block for
1293 * space. It will return -ENOSPC if no space is available, and -EIO 1287 * space. It will return -ENOSPC if no space is available, and -EIO
1294 * and -EEXIST if directory entry already exists. 1288 * and -EEXIST if directory entry already exists.
1295 *
1296 * NOTE! bh is NOT released in the case where ENOSPC is returned. In
1297 * all other cases bh is released.
1298 */ 1289 */
1299static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, 1290static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1300 struct inode *inode, struct ext4_dir_entry_2 *de, 1291 struct inode *inode, struct ext4_dir_entry_2 *de,
@@ -1315,14 +1306,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1315 top = bh->b_data + blocksize - reclen; 1306 top = bh->b_data + blocksize - reclen;
1316 while ((char *) de <= top) { 1307 while ((char *) de <= top) {
1317 if (!ext4_check_dir_entry("ext4_add_entry", dir, de, 1308 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1318 bh, offset)) { 1309 bh, offset))
1319 brelse(bh);
1320 return -EIO; 1310 return -EIO;
1321 } 1311 if (ext4_match(namelen, name, de))
1322 if (ext4_match(namelen, name, de)) {
1323 brelse(bh);
1324 return -EEXIST; 1312 return -EEXIST;
1325 }
1326 nlen = EXT4_DIR_REC_LEN(de->name_len); 1313 nlen = EXT4_DIR_REC_LEN(de->name_len);
1327 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); 1314 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1328 if ((de->inode? rlen - nlen: rlen) >= reclen) 1315 if ((de->inode? rlen - nlen: rlen) >= reclen)
@@ -1337,7 +1324,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1337 err = ext4_journal_get_write_access(handle, bh); 1324 err = ext4_journal_get_write_access(handle, bh);
1338 if (err) { 1325 if (err) {
1339 ext4_std_error(dir->i_sb, err); 1326 ext4_std_error(dir->i_sb, err);
1340 brelse(bh);
1341 return err; 1327 return err;
1342 } 1328 }
1343 1329
@@ -1377,7 +1363,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1377 err = ext4_handle_dirty_metadata(handle, dir, bh); 1363 err = ext4_handle_dirty_metadata(handle, dir, bh);
1378 if (err) 1364 if (err)
1379 ext4_std_error(dir->i_sb, err); 1365 ext4_std_error(dir->i_sb, err);
1380 brelse(bh);
1381 return 0; 1366 return 0;
1382} 1367}
1383 1368
@@ -1419,7 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1419 de = (struct ext4_dir_entry_2 *)((char *)fde + 1404 de = (struct ext4_dir_entry_2 *)((char *)fde +
1420 ext4_rec_len_from_disk(fde->rec_len, blocksize)); 1405 ext4_rec_len_from_disk(fde->rec_len, blocksize));
1421 if ((char *) de >= (((char *) root) + blocksize)) { 1406 if ((char *) de >= (((char *) root) + blocksize)) {
1422 ext4_error(dir->i_sb, __func__, 1407 ext4_error(dir->i_sb,
1423 "invalid rec_len for '..' in inode %lu", 1408 "invalid rec_len for '..' in inode %lu",
1424 dir->i_ino); 1409 dir->i_ino);
1425 brelse(bh); 1410 brelse(bh);
@@ -1471,7 +1456,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1471 if (!(de)) 1456 if (!(de))
1472 return retval; 1457 return retval;
1473 1458
1474 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1459 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1460 brelse(bh);
1461 return retval;
1475} 1462}
1476 1463
1477/* 1464/*
@@ -1514,8 +1501,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1514 if(!bh) 1501 if(!bh)
1515 return retval; 1502 return retval;
1516 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1503 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1517 if (retval != -ENOSPC) 1504 if (retval != -ENOSPC) {
1505 brelse(bh);
1518 return retval; 1506 return retval;
1507 }
1519 1508
1520 if (blocks == 1 && !dx_fallback && 1509 if (blocks == 1 && !dx_fallback &&
1521 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) 1510 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
@@ -1528,7 +1517,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1528 de = (struct ext4_dir_entry_2 *) bh->b_data; 1517 de = (struct ext4_dir_entry_2 *) bh->b_data;
1529 de->inode = 0; 1518 de->inode = 0;
1530 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); 1519 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
1531 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1520 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1521 brelse(bh);
1522 return retval;
1532} 1523}
1533 1524
1534/* 1525/*
@@ -1561,10 +1552,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1561 goto journal_error; 1552 goto journal_error;
1562 1553
1563 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1554 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1564 if (err != -ENOSPC) { 1555 if (err != -ENOSPC)
1565 bh = NULL;
1566 goto cleanup; 1556 goto cleanup;
1567 }
1568 1557
1569 /* Block full, should compress but for now just split */ 1558 /* Block full, should compress but for now just split */
1570 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", 1559 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
@@ -1580,8 +1569,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1580 1569
1581 if (levels && (dx_get_count(frames->entries) == 1570 if (levels && (dx_get_count(frames->entries) ==
1582 dx_get_limit(frames->entries))) { 1571 dx_get_limit(frames->entries))) {
1583 ext4_warning(sb, __func__, 1572 ext4_warning(sb, "Directory index full!");
1584 "Directory index full!");
1585 err = -ENOSPC; 1573 err = -ENOSPC;
1586 goto cleanup; 1574 goto cleanup;
1587 } 1575 }
@@ -1657,7 +1645,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1657 if (!de) 1645 if (!de)
1658 goto cleanup; 1646 goto cleanup;
1659 err = add_dirent_to_buf(handle, dentry, inode, de, bh); 1647 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
1660 bh = NULL;
1661 goto cleanup; 1648 goto cleanup;
1662 1649
1663journal_error: 1650journal_error:
@@ -1772,10 +1759,12 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1772 struct inode *inode; 1759 struct inode *inode;
1773 int err, retries = 0; 1760 int err, retries = 0;
1774 1761
1762 dquot_initialize(dir);
1763
1775retry: 1764retry:
1776 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1765 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1777 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1766 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1778 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1767 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1779 if (IS_ERR(handle)) 1768 if (IS_ERR(handle))
1780 return PTR_ERR(handle); 1769 return PTR_ERR(handle);
1781 1770
@@ -1806,10 +1795,12 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1806 if (!new_valid_dev(rdev)) 1795 if (!new_valid_dev(rdev))
1807 return -EINVAL; 1796 return -EINVAL;
1808 1797
1798 dquot_initialize(dir);
1799
1809retry: 1800retry:
1810 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1801 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1811 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1802 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1812 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1803 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1813 if (IS_ERR(handle)) 1804 if (IS_ERR(handle))
1814 return PTR_ERR(handle); 1805 return PTR_ERR(handle);
1815 1806
@@ -1843,10 +1834,12 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1843 if (EXT4_DIR_LINK_MAX(dir)) 1834 if (EXT4_DIR_LINK_MAX(dir))
1844 return -EMLINK; 1835 return -EMLINK;
1845 1836
1837 dquot_initialize(dir);
1838
1846retry: 1839retry:
1847 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1840 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1848 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1841 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1849 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1842 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1850 if (IS_ERR(handle)) 1843 if (IS_ERR(handle))
1851 return PTR_ERR(handle); 1844 return PTR_ERR(handle);
1852 1845
@@ -1922,11 +1915,11 @@ static int empty_dir(struct inode *inode)
1922 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1915 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1923 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { 1916 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
1924 if (err) 1917 if (err)
1925 ext4_error(inode->i_sb, __func__, 1918 ext4_error(inode->i_sb,
1926 "error %d reading directory #%lu offset 0", 1919 "error %d reading directory #%lu offset 0",
1927 err, inode->i_ino); 1920 err, inode->i_ino);
1928 else 1921 else
1929 ext4_warning(inode->i_sb, __func__, 1922 ext4_warning(inode->i_sb,
1930 "bad directory (dir #%lu) - no data block", 1923 "bad directory (dir #%lu) - no data block",
1931 inode->i_ino); 1924 inode->i_ino);
1932 return 1; 1925 return 1;
@@ -1937,7 +1930,7 @@ static int empty_dir(struct inode *inode)
1937 !le32_to_cpu(de1->inode) || 1930 !le32_to_cpu(de1->inode) ||
1938 strcmp(".", de->name) || 1931 strcmp(".", de->name) ||
1939 strcmp("..", de1->name)) { 1932 strcmp("..", de1->name)) {
1940 ext4_warning(inode->i_sb, "empty_dir", 1933 ext4_warning(inode->i_sb,
1941 "bad directory (dir #%lu) - no `.' or `..'", 1934 "bad directory (dir #%lu) - no `.' or `..'",
1942 inode->i_ino); 1935 inode->i_ino);
1943 brelse(bh); 1936 brelse(bh);
@@ -1955,7 +1948,7 @@ static int empty_dir(struct inode *inode)
1955 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1948 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1956 if (!bh) { 1949 if (!bh) {
1957 if (err) 1950 if (err)
1958 ext4_error(sb, __func__, 1951 ext4_error(sb,
1959 "error %d reading directory" 1952 "error %d reading directory"
1960 " #%lu offset %u", 1953 " #%lu offset %u",
1961 err, inode->i_ino, offset); 1954 err, inode->i_ino, offset);
@@ -2026,11 +2019,18 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2026 err = ext4_reserve_inode_write(handle, inode, &iloc); 2019 err = ext4_reserve_inode_write(handle, inode, &iloc);
2027 if (err) 2020 if (err)
2028 goto out_unlock; 2021 goto out_unlock;
2022 /*
2023 * Due to previous errors inode may be already a part of on-disk
2024 * orphan list. If so skip on-disk list modification.
2025 */
2026 if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
2027 (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
2028 goto mem_insert;
2029 2029
2030 /* Insert this inode at the head of the on-disk orphan list... */ 2030 /* Insert this inode at the head of the on-disk orphan list... */
2031 NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); 2031 NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
2032 EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); 2032 EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2033 err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); 2033 err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
2034 rc = ext4_mark_iloc_dirty(handle, inode, &iloc); 2034 rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2035 if (!err) 2035 if (!err)
2036 err = rc; 2036 err = rc;
@@ -2043,6 +2043,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2043 * 2043 *
2044 * This is safe: on error we're going to ignore the orphan list 2044 * This is safe: on error we're going to ignore the orphan list
2045 * anyway on the next recovery. */ 2045 * anyway on the next recovery. */
2046mem_insert:
2046 if (!err) 2047 if (!err)
2047 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2048 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2048 2049
@@ -2102,7 +2103,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2102 if (err) 2103 if (err)
2103 goto out_brelse; 2104 goto out_brelse;
2104 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); 2105 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2105 err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh); 2106 err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
2106 } else { 2107 } else {
2107 struct ext4_iloc iloc2; 2108 struct ext4_iloc iloc2;
2108 struct inode *i_prev = 2109 struct inode *i_prev =
@@ -2142,7 +2143,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2142 2143
2143 /* Initialize quotas before so that eventual writes go in 2144 /* Initialize quotas before so that eventual writes go in
2144 * separate transaction */ 2145 * separate transaction */
2145 vfs_dq_init(dentry->d_inode); 2146 dquot_initialize(dir);
2147 dquot_initialize(dentry->d_inode);
2148
2146 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); 2149 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2147 if (IS_ERR(handle)) 2150 if (IS_ERR(handle))
2148 return PTR_ERR(handle); 2151 return PTR_ERR(handle);
@@ -2169,7 +2172,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2169 if (retval) 2172 if (retval)
2170 goto end_rmdir; 2173 goto end_rmdir;
2171 if (!EXT4_DIR_LINK_EMPTY(inode)) 2174 if (!EXT4_DIR_LINK_EMPTY(inode))
2172 ext4_warning(inode->i_sb, "ext4_rmdir", 2175 ext4_warning(inode->i_sb,
2173 "empty directory has too many links (%d)", 2176 "empty directory has too many links (%d)",
2174 inode->i_nlink); 2177 inode->i_nlink);
2175 inode->i_version++; 2178 inode->i_version++;
@@ -2201,7 +2204,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2201 2204
2202 /* Initialize quotas before so that eventual writes go 2205 /* Initialize quotas before so that eventual writes go
2203 * in separate transaction */ 2206 * in separate transaction */
2204 vfs_dq_init(dentry->d_inode); 2207 dquot_initialize(dir);
2208 dquot_initialize(dentry->d_inode);
2209
2205 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); 2210 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2206 if (IS_ERR(handle)) 2211 if (IS_ERR(handle))
2207 return PTR_ERR(handle); 2212 return PTR_ERR(handle);
@@ -2221,7 +2226,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2221 goto end_unlink; 2226 goto end_unlink;
2222 2227
2223 if (!inode->i_nlink) { 2228 if (!inode->i_nlink) {
2224 ext4_warning(inode->i_sb, "ext4_unlink", 2229 ext4_warning(inode->i_sb,
2225 "Deleting nonexistent file (%lu), %d", 2230 "Deleting nonexistent file (%lu), %d",
2226 inode->i_ino, inode->i_nlink); 2231 inode->i_ino, inode->i_nlink);
2227 inode->i_nlink = 1; 2232 inode->i_nlink = 1;
@@ -2256,10 +2261,12 @@ static int ext4_symlink(struct inode *dir,
2256 if (l > dir->i_sb->s_blocksize) 2261 if (l > dir->i_sb->s_blocksize)
2257 return -ENAMETOOLONG; 2262 return -ENAMETOOLONG;
2258 2263
2264 dquot_initialize(dir);
2265
2259retry: 2266retry:
2260 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2267 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2261 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2268 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2262 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 2269 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2263 if (IS_ERR(handle)) 2270 if (IS_ERR(handle))
2264 return PTR_ERR(handle); 2271 return PTR_ERR(handle);
2265 2272
@@ -2314,6 +2321,8 @@ static int ext4_link(struct dentry *old_dentry,
2314 if (inode->i_nlink >= EXT4_LINK_MAX) 2321 if (inode->i_nlink >= EXT4_LINK_MAX)
2315 return -EMLINK; 2322 return -EMLINK;
2316 2323
2324 dquot_initialize(dir);
2325
2317 /* 2326 /*
2318 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing 2327 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2319 * otherwise has the potential to corrupt the orphan inode list. 2328 * otherwise has the potential to corrupt the orphan inode list.
@@ -2364,12 +2373,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2364 struct ext4_dir_entry_2 *old_de, *new_de; 2373 struct ext4_dir_entry_2 *old_de, *new_de;
2365 int retval, force_da_alloc = 0; 2374 int retval, force_da_alloc = 0;
2366 2375
2376 dquot_initialize(old_dir);
2377 dquot_initialize(new_dir);
2378
2367 old_bh = new_bh = dir_bh = NULL; 2379 old_bh = new_bh = dir_bh = NULL;
2368 2380
2369 /* Initialize quotas before so that eventual writes go 2381 /* Initialize quotas before so that eventual writes go
2370 * in separate transaction */ 2382 * in separate transaction */
2371 if (new_dentry->d_inode) 2383 if (new_dentry->d_inode)
2372 vfs_dq_init(new_dentry->d_inode); 2384 dquot_initialize(new_dentry->d_inode);
2373 handle = ext4_journal_start(old_dir, 2 * 2385 handle = ext4_journal_start(old_dir, 2 *
2374 EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + 2386 EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2375 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); 2387 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
@@ -2468,7 +2480,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2468 } 2480 }
2469 } 2481 }
2470 if (retval) { 2482 if (retval) {
2471 ext4_warning(old_dir->i_sb, "ext4_rename", 2483 ext4_warning(old_dir->i_sb,
2472 "Deleting old file (%lu), %d, error=%d", 2484 "Deleting old file (%lu), %d, error=%d",
2473 old_dir->i_ino, old_dir->i_nlink, retval); 2485 old_dir->i_ino, old_dir->i_nlink, retval);
2474 } 2486 }
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3cfc343c41b5..5692c48754a0 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -48,65 +48,54 @@ static int verify_group_input(struct super_block *sb,
48 48
49 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 49 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
50 if (group != sbi->s_groups_count) 50 if (group != sbi->s_groups_count)
51 ext4_warning(sb, __func__, 51 ext4_warning(sb, "Cannot add at group %u (only %u groups)",
52 "Cannot add at group %u (only %u groups)",
53 input->group, sbi->s_groups_count); 52 input->group, sbi->s_groups_count);
54 else if (offset != 0) 53 else if (offset != 0)
55 ext4_warning(sb, __func__, "Last group not full"); 54 ext4_warning(sb, "Last group not full");
56 else if (input->reserved_blocks > input->blocks_count / 5) 55 else if (input->reserved_blocks > input->blocks_count / 5)
57 ext4_warning(sb, __func__, "Reserved blocks too high (%u)", 56 ext4_warning(sb, "Reserved blocks too high (%u)",
58 input->reserved_blocks); 57 input->reserved_blocks);
59 else if (free_blocks_count < 0) 58 else if (free_blocks_count < 0)
60 ext4_warning(sb, __func__, "Bad blocks count %u", 59 ext4_warning(sb, "Bad blocks count %u",
61 input->blocks_count); 60 input->blocks_count);
62 else if (!(bh = sb_bread(sb, end - 1))) 61 else if (!(bh = sb_bread(sb, end - 1)))
63 ext4_warning(sb, __func__, 62 ext4_warning(sb, "Cannot read last block (%llu)",
64 "Cannot read last block (%llu)",
65 end - 1); 63 end - 1);
66 else if (outside(input->block_bitmap, start, end)) 64 else if (outside(input->block_bitmap, start, end))
67 ext4_warning(sb, __func__, 65 ext4_warning(sb, "Block bitmap not in group (block %llu)",
68 "Block bitmap not in group (block %llu)",
69 (unsigned long long)input->block_bitmap); 66 (unsigned long long)input->block_bitmap);
70 else if (outside(input->inode_bitmap, start, end)) 67 else if (outside(input->inode_bitmap, start, end))
71 ext4_warning(sb, __func__, 68 ext4_warning(sb, "Inode bitmap not in group (block %llu)",
72 "Inode bitmap not in group (block %llu)",
73 (unsigned long long)input->inode_bitmap); 69 (unsigned long long)input->inode_bitmap);
74 else if (outside(input->inode_table, start, end) || 70 else if (outside(input->inode_table, start, end) ||
75 outside(itend - 1, start, end)) 71 outside(itend - 1, start, end))
76 ext4_warning(sb, __func__, 72 ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
77 "Inode table not in group (blocks %llu-%llu)",
78 (unsigned long long)input->inode_table, itend - 1); 73 (unsigned long long)input->inode_table, itend - 1);
79 else if (input->inode_bitmap == input->block_bitmap) 74 else if (input->inode_bitmap == input->block_bitmap)
80 ext4_warning(sb, __func__, 75 ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
81 "Block bitmap same as inode bitmap (%llu)",
82 (unsigned long long)input->block_bitmap); 76 (unsigned long long)input->block_bitmap);
83 else if (inside(input->block_bitmap, input->inode_table, itend)) 77 else if (inside(input->block_bitmap, input->inode_table, itend))
84 ext4_warning(sb, __func__, 78 ext4_warning(sb, "Block bitmap (%llu) in inode table "
85 "Block bitmap (%llu) in inode table (%llu-%llu)", 79 "(%llu-%llu)",
86 (unsigned long long)input->block_bitmap, 80 (unsigned long long)input->block_bitmap,
87 (unsigned long long)input->inode_table, itend - 1); 81 (unsigned long long)input->inode_table, itend - 1);
88 else if (inside(input->inode_bitmap, input->inode_table, itend)) 82 else if (inside(input->inode_bitmap, input->inode_table, itend))
89 ext4_warning(sb, __func__, 83 ext4_warning(sb, "Inode bitmap (%llu) in inode table "
90 "Inode bitmap (%llu) in inode table (%llu-%llu)", 84 "(%llu-%llu)",
91 (unsigned long long)input->inode_bitmap, 85 (unsigned long long)input->inode_bitmap,
92 (unsigned long long)input->inode_table, itend - 1); 86 (unsigned long long)input->inode_table, itend - 1);
93 else if (inside(input->block_bitmap, start, metaend)) 87 else if (inside(input->block_bitmap, start, metaend))
94 ext4_warning(sb, __func__, 88 ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
95 "Block bitmap (%llu) in GDT table"
96 " (%llu-%llu)",
97 (unsigned long long)input->block_bitmap, 89 (unsigned long long)input->block_bitmap,
98 start, metaend - 1); 90 start, metaend - 1);
99 else if (inside(input->inode_bitmap, start, metaend)) 91 else if (inside(input->inode_bitmap, start, metaend))
100 ext4_warning(sb, __func__, 92 ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
101 "Inode bitmap (%llu) in GDT table"
102 " (%llu-%llu)",
103 (unsigned long long)input->inode_bitmap, 93 (unsigned long long)input->inode_bitmap,
104 start, metaend - 1); 94 start, metaend - 1);
105 else if (inside(input->inode_table, start, metaend) || 95 else if (inside(input->inode_table, start, metaend) ||
106 inside(itend - 1, start, metaend)) 96 inside(itend - 1, start, metaend))
107 ext4_warning(sb, __func__, 97 ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
108 "Inode table (%llu-%llu) overlaps" 98 "(%llu-%llu)",
109 "GDT table (%llu-%llu)",
110 (unsigned long long)input->inode_table, 99 (unsigned long long)input->inode_table,
111 itend - 1, start, metaend - 1); 100 itend - 1, start, metaend - 1);
112 else 101 else
@@ -247,7 +236,7 @@ static int setup_new_group_blocks(struct super_block *sb,
247 goto exit_bh; 236 goto exit_bh;
248 237
249 if (IS_ERR(gdb = bclean(handle, sb, block))) { 238 if (IS_ERR(gdb = bclean(handle, sb, block))) {
250 err = PTR_ERR(bh); 239 err = PTR_ERR(gdb);
251 goto exit_bh; 240 goto exit_bh;
252 } 241 }
253 ext4_handle_dirty_metadata(handle, NULL, gdb); 242 ext4_handle_dirty_metadata(handle, NULL, gdb);
@@ -364,8 +353,7 @@ static int verify_reserved_gdb(struct super_block *sb,
364 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 353 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
365 if (le32_to_cpu(*p++) != 354 if (le32_to_cpu(*p++) !=
366 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 355 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
367 ext4_warning(sb, __func__, 356 ext4_warning(sb, "reserved GDT %llu"
368 "reserved GDT %llu"
369 " missing grp %d (%llu)", 357 " missing grp %d (%llu)",
370 blk, grp, 358 blk, grp,
371 grp * 359 grp *
@@ -420,8 +408,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
420 */ 408 */
421 if (EXT4_SB(sb)->s_sbh->b_blocknr != 409 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
422 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 410 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
423 ext4_warning(sb, __func__, 411 ext4_warning(sb, "won't resize using backup superblock at %llu",
424 "won't resize using backup superblock at %llu",
425 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 412 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
426 return -EPERM; 413 return -EPERM;
427 } 414 }
@@ -444,8 +431,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
444 431
445 data = (__le32 *)dind->b_data; 432 data = (__le32 *)dind->b_data;
446 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 433 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
447 ext4_warning(sb, __func__, 434 ext4_warning(sb, "new group %u GDT block %llu not reserved",
448 "new group %u GDT block %llu not reserved",
449 input->group, gdblock); 435 input->group, gdblock);
450 err = -EINVAL; 436 err = -EINVAL;
451 goto exit_dind; 437 goto exit_dind;
@@ -468,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
468 GFP_NOFS); 454 GFP_NOFS);
469 if (!n_group_desc) { 455 if (!n_group_desc) {
470 err = -ENOMEM; 456 err = -ENOMEM;
471 ext4_warning(sb, __func__, 457 ext4_warning(sb,
472 "not enough memory for %lu groups", gdb_num + 1); 458 "not enough memory for %lu groups", gdb_num + 1);
473 goto exit_inode; 459 goto exit_inode;
474 } 460 }
@@ -567,8 +553,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
567 /* Get each reserved primary GDT block and verify it holds backups */ 553 /* Get each reserved primary GDT block and verify it holds backups */
568 for (res = 0; res < reserved_gdb; res++, blk++) { 554 for (res = 0; res < reserved_gdb; res++, blk++) {
569 if (le32_to_cpu(*data) != blk) { 555 if (le32_to_cpu(*data) != blk) {
570 ext4_warning(sb, __func__, 556 ext4_warning(sb, "reserved block %llu"
571 "reserved block %llu"
572 " not at offset %ld", 557 " not at offset %ld",
573 blk, 558 blk,
574 (long)(data - (__le32 *)dind->b_data)); 559 (long)(data - (__le32 *)dind->b_data));
@@ -713,8 +698,7 @@ static void update_backups(struct super_block *sb,
713 */ 698 */
714exit_err: 699exit_err:
715 if (err) { 700 if (err) {
716 ext4_warning(sb, __func__, 701 ext4_warning(sb, "can't update backup for group %u (err %d), "
717 "can't update backup for group %u (err %d), "
718 "forcing fsck on next reboot", group, err); 702 "forcing fsck on next reboot", group, err);
719 sbi->s_mount_state &= ~EXT4_VALID_FS; 703 sbi->s_mount_state &= ~EXT4_VALID_FS;
720 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 704 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
@@ -753,20 +737,19 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
753 737
754 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 738 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
755 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 739 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
756 ext4_warning(sb, __func__, 740 ext4_warning(sb, "Can't resize non-sparse filesystem further");
757 "Can't resize non-sparse filesystem further");
758 return -EPERM; 741 return -EPERM;
759 } 742 }
760 743
761 if (ext4_blocks_count(es) + input->blocks_count < 744 if (ext4_blocks_count(es) + input->blocks_count <
762 ext4_blocks_count(es)) { 745 ext4_blocks_count(es)) {
763 ext4_warning(sb, __func__, "blocks_count overflow"); 746 ext4_warning(sb, "blocks_count overflow");
764 return -EINVAL; 747 return -EINVAL;
765 } 748 }
766 749
767 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 750 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
768 le32_to_cpu(es->s_inodes_count)) { 751 le32_to_cpu(es->s_inodes_count)) {
769 ext4_warning(sb, __func__, "inodes_count overflow"); 752 ext4_warning(sb, "inodes_count overflow");
770 return -EINVAL; 753 return -EINVAL;
771 } 754 }
772 755
@@ -774,14 +757,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
774 if (!EXT4_HAS_COMPAT_FEATURE(sb, 757 if (!EXT4_HAS_COMPAT_FEATURE(sb,
775 EXT4_FEATURE_COMPAT_RESIZE_INODE) 758 EXT4_FEATURE_COMPAT_RESIZE_INODE)
776 || !le16_to_cpu(es->s_reserved_gdt_blocks)) { 759 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
777 ext4_warning(sb, __func__, 760 ext4_warning(sb,
778 "No reserved GDT blocks, can't resize"); 761 "No reserved GDT blocks, can't resize");
779 return -EPERM; 762 return -EPERM;
780 } 763 }
781 inode = ext4_iget(sb, EXT4_RESIZE_INO); 764 inode = ext4_iget(sb, EXT4_RESIZE_INO);
782 if (IS_ERR(inode)) { 765 if (IS_ERR(inode)) {
783 ext4_warning(sb, __func__, 766 ext4_warning(sb, "Error opening resize inode");
784 "Error opening resize inode");
785 return PTR_ERR(inode); 767 return PTR_ERR(inode);
786 } 768 }
787 } 769 }
@@ -810,8 +792,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
810 792
811 mutex_lock(&sbi->s_resize_lock); 793 mutex_lock(&sbi->s_resize_lock);
812 if (input->group != sbi->s_groups_count) { 794 if (input->group != sbi->s_groups_count) {
813 ext4_warning(sb, __func__, 795 ext4_warning(sb, "multiple resizers run on filesystem!");
814 "multiple resizers run on filesystem!");
815 err = -EBUSY; 796 err = -EBUSY;
816 goto exit_journal; 797 goto exit_journal;
817 } 798 }
@@ -997,13 +978,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
997 " too large to resize to %llu blocks safely\n", 978 " too large to resize to %llu blocks safely\n",
998 sb->s_id, n_blocks_count); 979 sb->s_id, n_blocks_count);
999 if (sizeof(sector_t) < 8) 980 if (sizeof(sector_t) < 8)
1000 ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled"); 981 ext4_warning(sb, "CONFIG_LBDAF not enabled");
1001 return -EINVAL; 982 return -EINVAL;
1002 } 983 }
1003 984
1004 if (n_blocks_count < o_blocks_count) { 985 if (n_blocks_count < o_blocks_count) {
1005 ext4_warning(sb, __func__, 986 ext4_warning(sb, "can't shrink FS - resize aborted");
1006 "can't shrink FS - resize aborted");
1007 return -EBUSY; 987 return -EBUSY;
1008 } 988 }
1009 989
@@ -1011,15 +991,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1011 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); 991 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1012 992
1013 if (last == 0) { 993 if (last == 0) {
1014 ext4_warning(sb, __func__, 994 ext4_warning(sb, "need to use ext2online to resize further");
1015 "need to use ext2online to resize further");
1016 return -EPERM; 995 return -EPERM;
1017 } 996 }
1018 997
1019 add = EXT4_BLOCKS_PER_GROUP(sb) - last; 998 add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1020 999
1021 if (o_blocks_count + add < o_blocks_count) { 1000 if (o_blocks_count + add < o_blocks_count) {
1022 ext4_warning(sb, __func__, "blocks_count overflow"); 1001 ext4_warning(sb, "blocks_count overflow");
1023 return -EINVAL; 1002 return -EINVAL;
1024 } 1003 }
1025 1004
@@ -1027,16 +1006,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1027 add = n_blocks_count - o_blocks_count; 1006 add = n_blocks_count - o_blocks_count;
1028 1007
1029 if (o_blocks_count + add < n_blocks_count) 1008 if (o_blocks_count + add < n_blocks_count)
1030 ext4_warning(sb, __func__, 1009 ext4_warning(sb, "will only finish group (%llu blocks, %u new)",
1031 "will only finish group (%llu"
1032 " blocks, %u new)",
1033 o_blocks_count + add, add); 1010 o_blocks_count + add, add);
1034 1011
1035 /* See if the device is actually as big as what was requested */ 1012 /* See if the device is actually as big as what was requested */
1036 bh = sb_bread(sb, o_blocks_count + add - 1); 1013 bh = sb_bread(sb, o_blocks_count + add - 1);
1037 if (!bh) { 1014 if (!bh) {
1038 ext4_warning(sb, __func__, 1015 ext4_warning(sb, "can't read last block, resize aborted");
1039 "can't read last block, resize aborted");
1040 return -ENOSPC; 1016 return -ENOSPC;
1041 } 1017 }
1042 brelse(bh); 1018 brelse(bh);
@@ -1047,14 +1023,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1047 handle = ext4_journal_start_sb(sb, 3); 1023 handle = ext4_journal_start_sb(sb, 3);
1048 if (IS_ERR(handle)) { 1024 if (IS_ERR(handle)) {
1049 err = PTR_ERR(handle); 1025 err = PTR_ERR(handle);
1050 ext4_warning(sb, __func__, "error %d on journal start", err); 1026 ext4_warning(sb, "error %d on journal start", err);
1051 goto exit_put; 1027 goto exit_put;
1052 } 1028 }
1053 1029
1054 mutex_lock(&EXT4_SB(sb)->s_resize_lock); 1030 mutex_lock(&EXT4_SB(sb)->s_resize_lock);
1055 if (o_blocks_count != ext4_blocks_count(es)) { 1031 if (o_blocks_count != ext4_blocks_count(es)) {
1056 ext4_warning(sb, __func__, 1032 ext4_warning(sb, "multiple resizers run on filesystem!");
1057 "multiple resizers run on filesystem!");
1058 mutex_unlock(&EXT4_SB(sb)->s_resize_lock); 1033 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1059 ext4_journal_stop(handle); 1034 ext4_journal_stop(handle);
1060 err = -EBUSY; 1035 err = -EBUSY;
@@ -1063,8 +1038,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1063 1038
1064 if ((err = ext4_journal_get_write_access(handle, 1039 if ((err = ext4_journal_get_write_access(handle,
1065 EXT4_SB(sb)->s_sbh))) { 1040 EXT4_SB(sb)->s_sbh))) {
1066 ext4_warning(sb, __func__, 1041 ext4_warning(sb, "error %d on journal write access", err);
1067 "error %d on journal write access", err);
1068 mutex_unlock(&EXT4_SB(sb)->s_resize_lock); 1042 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1069 ext4_journal_stop(handle); 1043 ext4_journal_stop(handle);
1070 goto exit_put; 1044 goto exit_put;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d4ca92aab514..e14d22c170d5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -68,7 +68,21 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
68static int ext4_unfreeze(struct super_block *sb); 68static int ext4_unfreeze(struct super_block *sb);
69static void ext4_write_super(struct super_block *sb); 69static void ext4_write_super(struct super_block *sb);
70static int ext4_freeze(struct super_block *sb); 70static int ext4_freeze(struct super_block *sb);
71static int ext4_get_sb(struct file_system_type *fs_type, int flags,
72 const char *dev_name, void *data, struct vfsmount *mnt);
71 73
74#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
75static struct file_system_type ext3_fs_type = {
76 .owner = THIS_MODULE,
77 .name = "ext3",
78 .get_sb = ext4_get_sb,
79 .kill_sb = kill_block_super,
80 .fs_flags = FS_REQUIRES_DEV,
81};
82#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
83#else
84#define IS_EXT3_SB(sb) (0)
85#endif
72 86
73ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 87ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
74 struct ext4_group_desc *bg) 88 struct ext4_group_desc *bg)
@@ -302,7 +316,7 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
302 * write out the superblock safely. 316 * write out the superblock safely.
303 * 317 *
304 * We'll just use the jbd2_journal_abort() error code to record an error in 318 * We'll just use the jbd2_journal_abort() error code to record an error in
305 * the journal instead. On recovery, the journal will compain about 319 * the journal instead. On recovery, the journal will complain about
306 * that error until we've noted it down and cleared it. 320 * that error until we've noted it down and cleared it.
307 */ 321 */
308 322
@@ -333,7 +347,7 @@ static void ext4_handle_error(struct super_block *sb)
333 sb->s_id); 347 sb->s_id);
334} 348}
335 349
336void ext4_error(struct super_block *sb, const char *function, 350void __ext4_error(struct super_block *sb, const char *function,
337 const char *fmt, ...) 351 const char *fmt, ...)
338{ 352{
339 va_list args; 353 va_list args;
@@ -347,6 +361,42 @@ void ext4_error(struct super_block *sb, const char *function,
347 ext4_handle_error(sb); 361 ext4_handle_error(sb);
348} 362}
349 363
364void ext4_error_inode(const char *function, struct inode *inode,
365 const char *fmt, ...)
366{
367 va_list args;
368
369 va_start(args, fmt);
370 printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ",
371 inode->i_sb->s_id, function, inode->i_ino, current->comm);
372 vprintk(fmt, args);
373 printk("\n");
374 va_end(args);
375
376 ext4_handle_error(inode->i_sb);
377}
378
379void ext4_error_file(const char *function, struct file *file,
380 const char *fmt, ...)
381{
382 va_list args;
383 struct inode *inode = file->f_dentry->d_inode;
384 char pathname[80], *path;
385
386 va_start(args, fmt);
387 path = d_path(&(file->f_path), pathname, sizeof(pathname));
388 if (!path)
389 path = "(unknown)";
390 printk(KERN_CRIT
391 "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ",
392 inode->i_sb->s_id, function, inode->i_ino, current->comm, path);
393 vprintk(fmt, args);
394 printk("\n");
395 va_end(args);
396
397 ext4_handle_error(inode->i_sb);
398}
399
350static const char *ext4_decode_error(struct super_block *sb, int errno, 400static const char *ext4_decode_error(struct super_block *sb, int errno,
351 char nbuf[16]) 401 char nbuf[16])
352{ 402{
@@ -450,7 +500,7 @@ void ext4_msg (struct super_block * sb, const char *prefix,
450 va_end(args); 500 va_end(args);
451} 501}
452 502
453void ext4_warning(struct super_block *sb, const char *function, 503void __ext4_warning(struct super_block *sb, const char *function,
454 const char *fmt, ...) 504 const char *fmt, ...)
455{ 505{
456 va_list args; 506 va_list args;
@@ -507,7 +557,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
507 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 557 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
508 return; 558 return;
509 559
510 ext4_warning(sb, __func__, 560 ext4_warning(sb,
511 "updating to rev %d because of new feature flag, " 561 "updating to rev %d because of new feature flag, "
512 "running e2fsck is recommended", 562 "running e2fsck is recommended",
513 EXT4_DYNAMIC_REV); 563 EXT4_DYNAMIC_REV);
@@ -603,10 +653,6 @@ static void ext4_put_super(struct super_block *sb)
603 if (sb->s_dirt) 653 if (sb->s_dirt)
604 ext4_commit_super(sb, 1); 654 ext4_commit_super(sb, 1);
605 655
606 ext4_release_system_zone(sb);
607 ext4_mb_release(sb);
608 ext4_ext_release(sb);
609 ext4_xattr_put_super(sb);
610 if (sbi->s_journal) { 656 if (sbi->s_journal) {
611 err = jbd2_journal_destroy(sbi->s_journal); 657 err = jbd2_journal_destroy(sbi->s_journal);
612 sbi->s_journal = NULL; 658 sbi->s_journal = NULL;
@@ -614,6 +660,12 @@ static void ext4_put_super(struct super_block *sb)
614 ext4_abort(sb, __func__, 660 ext4_abort(sb, __func__,
615 "Couldn't clean up the journal"); 661 "Couldn't clean up the journal");
616 } 662 }
663
664 ext4_release_system_zone(sb);
665 ext4_mb_release(sb);
666 ext4_ext_release(sb);
667 ext4_xattr_put_super(sb);
668
617 if (!(sb->s_flags & MS_RDONLY)) { 669 if (!(sb->s_flags & MS_RDONLY)) {
618 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 670 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
619 es->s_state = cpu_to_le16(sbi->s_mount_state); 671 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -700,10 +752,17 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
700 ei->i_reserved_data_blocks = 0; 752 ei->i_reserved_data_blocks = 0;
701 ei->i_reserved_meta_blocks = 0; 753 ei->i_reserved_meta_blocks = 0;
702 ei->i_allocated_meta_blocks = 0; 754 ei->i_allocated_meta_blocks = 0;
755 ei->i_da_metadata_calc_len = 0;
703 ei->i_delalloc_reserved_flag = 0; 756 ei->i_delalloc_reserved_flag = 0;
704 spin_lock_init(&(ei->i_block_reservation_lock)); 757 spin_lock_init(&(ei->i_block_reservation_lock));
705 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 758#ifdef CONFIG_QUOTA
759 ei->i_reserved_quota = 0;
760#endif
761 INIT_LIST_HEAD(&ei->i_completed_io_list);
762 spin_lock_init(&ei->i_completed_io_lock);
706 ei->cur_aio_dio = NULL; 763 ei->cur_aio_dio = NULL;
764 ei->i_sync_tid = 0;
765 ei->i_datasync_tid = 0;
707 766
708 return &ei->vfs_inode; 767 return &ei->vfs_inode;
709} 768}
@@ -753,6 +812,7 @@ static void destroy_inodecache(void)
753 812
754static void ext4_clear_inode(struct inode *inode) 813static void ext4_clear_inode(struct inode *inode)
755{ 814{
815 dquot_drop(inode);
756 ext4_discard_preallocations(inode); 816 ext4_discard_preallocations(inode);
757 if (EXT4_JOURNAL(inode)) 817 if (EXT4_JOURNAL(inode))
758 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 818 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
@@ -765,9 +825,22 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
765#if defined(CONFIG_QUOTA) 825#if defined(CONFIG_QUOTA)
766 struct ext4_sb_info *sbi = EXT4_SB(sb); 826 struct ext4_sb_info *sbi = EXT4_SB(sb);
767 827
768 if (sbi->s_jquota_fmt) 828 if (sbi->s_jquota_fmt) {
769 seq_printf(seq, ",jqfmt=%s", 829 char *fmtname = "";
770 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); 830
831 switch (sbi->s_jquota_fmt) {
832 case QFMT_VFS_OLD:
833 fmtname = "vfsold";
834 break;
835 case QFMT_VFS_V0:
836 fmtname = "vfsv0";
837 break;
838 case QFMT_VFS_V1:
839 fmtname = "vfsv1";
840 break;
841 }
842 seq_printf(seq, ",jqfmt=%s", fmtname);
843 }
771 844
772 if (sbi->s_qf_names[USRQUOTA]) 845 if (sbi->s_qf_names[USRQUOTA])
773 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 846 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -775,10 +848,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
775 if (sbi->s_qf_names[GRPQUOTA]) 848 if (sbi->s_qf_names[GRPQUOTA])
776 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 849 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
777 850
778 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 851 if (test_opt(sb, USRQUOTA))
779 seq_puts(seq, ",usrquota"); 852 seq_puts(seq, ",usrquota");
780 853
781 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 854 if (test_opt(sb, GRPQUOTA))
782 seq_puts(seq, ",grpquota"); 855 seq_puts(seq, ",grpquota");
783#endif 856#endif
784} 857}
@@ -899,6 +972,15 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
899 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 972 if (test_opt(sb, NO_AUTO_DA_ALLOC))
900 seq_puts(seq, ",noauto_da_alloc"); 973 seq_puts(seq, ",noauto_da_alloc");
901 974
975 if (test_opt(sb, DISCARD))
976 seq_puts(seq, ",discard");
977
978 if (test_opt(sb, NOLOAD))
979 seq_puts(seq, ",norecovery");
980
981 if (test_opt(sb, DIOREAD_NOLOCK))
982 seq_puts(seq, ",dioread_nolock");
983
902 ext4_show_quota_options(seq, sb); 984 ext4_show_quota_options(seq, sb);
903 985
904 return 0; 986 return 0;
@@ -985,17 +1067,9 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
985 const char *data, size_t len, loff_t off); 1067 const char *data, size_t len, loff_t off);
986 1068
987static const struct dquot_operations ext4_quota_operations = { 1069static const struct dquot_operations ext4_quota_operations = {
988 .initialize = dquot_initialize, 1070#ifdef CONFIG_QUOTA
989 .drop = dquot_drop,
990 .alloc_space = dquot_alloc_space,
991 .reserve_space = dquot_reserve_space,
992 .claim_space = dquot_claim_space,
993 .release_rsv = dquot_release_reserved_space,
994 .get_reserved_space = ext4_get_reserved_space, 1071 .get_reserved_space = ext4_get_reserved_space,
995 .alloc_inode = dquot_alloc_inode, 1072#endif
996 .free_space = dquot_free_space,
997 .free_inode = dquot_free_inode,
998 .transfer = dquot_transfer,
999 .write_dquot = ext4_write_dquot, 1073 .write_dquot = ext4_write_dquot,
1000 .acquire_dquot = ext4_acquire_dquot, 1074 .acquire_dquot = ext4_acquire_dquot,
1001 .release_dquot = ext4_release_dquot, 1075 .release_dquot = ext4_release_dquot,
@@ -1074,12 +1148,14 @@ enum {
1074 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1148 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1075 Opt_data_err_abort, Opt_data_err_ignore, 1149 Opt_data_err_abort, Opt_data_err_ignore,
1076 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1150 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1077 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1151 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1078 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1152 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
1079 Opt_usrquota, Opt_grpquota, Opt_i_version, 1153 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
1080 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1154 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1081 Opt_block_validity, Opt_noblock_validity, 1155 Opt_block_validity, Opt_noblock_validity,
1082 Opt_inode_readahead_blks, Opt_journal_ioprio 1156 Opt_inode_readahead_blks, Opt_journal_ioprio,
1157 Opt_dioread_nolock, Opt_dioread_lock,
1158 Opt_discard, Opt_nodiscard,
1083}; 1159};
1084 1160
1085static const match_table_t tokens = { 1161static const match_table_t tokens = {
@@ -1104,6 +1180,7 @@ static const match_table_t tokens = {
1104 {Opt_acl, "acl"}, 1180 {Opt_acl, "acl"},
1105 {Opt_noacl, "noacl"}, 1181 {Opt_noacl, "noacl"},
1106 {Opt_noload, "noload"}, 1182 {Opt_noload, "noload"},
1183 {Opt_noload, "norecovery"},
1107 {Opt_nobh, "nobh"}, 1184 {Opt_nobh, "nobh"},
1108 {Opt_bh, "bh"}, 1185 {Opt_bh, "bh"},
1109 {Opt_commit, "commit=%u"}, 1186 {Opt_commit, "commit=%u"},
@@ -1125,6 +1202,7 @@ static const match_table_t tokens = {
1125 {Opt_grpjquota, "grpjquota=%s"}, 1202 {Opt_grpjquota, "grpjquota=%s"},
1126 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1203 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1127 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1204 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1205 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1128 {Opt_grpquota, "grpquota"}, 1206 {Opt_grpquota, "grpquota"},
1129 {Opt_noquota, "noquota"}, 1207 {Opt_noquota, "noquota"},
1130 {Opt_quota, "quota"}, 1208 {Opt_quota, "quota"},
@@ -1144,6 +1222,10 @@ static const match_table_t tokens = {
1144 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1222 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1145 {Opt_auto_da_alloc, "auto_da_alloc"}, 1223 {Opt_auto_da_alloc, "auto_da_alloc"},
1146 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1224 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1225 {Opt_dioread_nolock, "dioread_nolock"},
1226 {Opt_dioread_lock, "dioread_lock"},
1227 {Opt_discard, "discard"},
1228 {Opt_nodiscard, "nodiscard"},
1147 {Opt_err, NULL}, 1229 {Opt_err, NULL},
1148}; 1230};
1149 1231
@@ -1171,6 +1253,66 @@ static ext4_fsblk_t get_sb_block(void **data)
1171} 1253}
1172 1254
1173#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1255#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1256static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1257 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1258
1259#ifdef CONFIG_QUOTA
1260static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1261{
1262 struct ext4_sb_info *sbi = EXT4_SB(sb);
1263 char *qname;
1264
1265 if (sb_any_quota_loaded(sb) &&
1266 !sbi->s_qf_names[qtype]) {
1267 ext4_msg(sb, KERN_ERR,
1268 "Cannot change journaled "
1269 "quota options when quota turned on");
1270 return 0;
1271 }
1272 qname = match_strdup(args);
1273 if (!qname) {
1274 ext4_msg(sb, KERN_ERR,
1275 "Not enough memory for storing quotafile name");
1276 return 0;
1277 }
1278 if (sbi->s_qf_names[qtype] &&
1279 strcmp(sbi->s_qf_names[qtype], qname)) {
1280 ext4_msg(sb, KERN_ERR,
1281 "%s quota file already specified", QTYPE2NAME(qtype));
1282 kfree(qname);
1283 return 0;
1284 }
1285 sbi->s_qf_names[qtype] = qname;
1286 if (strchr(sbi->s_qf_names[qtype], '/')) {
1287 ext4_msg(sb, KERN_ERR,
1288 "quotafile must be on filesystem root");
1289 kfree(sbi->s_qf_names[qtype]);
1290 sbi->s_qf_names[qtype] = NULL;
1291 return 0;
1292 }
1293 set_opt(sbi->s_mount_opt, QUOTA);
1294 return 1;
1295}
1296
1297static int clear_qf_name(struct super_block *sb, int qtype)
1298{
1299
1300 struct ext4_sb_info *sbi = EXT4_SB(sb);
1301
1302 if (sb_any_quota_loaded(sb) &&
1303 sbi->s_qf_names[qtype]) {
1304 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1305 " when quota turned on");
1306 return 0;
1307 }
1308 /*
1309 * The space will be released later when all options are confirmed
1310 * to be correct
1311 */
1312 sbi->s_qf_names[qtype] = NULL;
1313 return 1;
1314}
1315#endif
1174 1316
1175static int parse_options(char *options, struct super_block *sb, 1317static int parse_options(char *options, struct super_block *sb,
1176 unsigned long *journal_devnum, 1318 unsigned long *journal_devnum,
@@ -1183,8 +1325,7 @@ static int parse_options(char *options, struct super_block *sb,
1183 int data_opt = 0; 1325 int data_opt = 0;
1184 int option; 1326 int option;
1185#ifdef CONFIG_QUOTA 1327#ifdef CONFIG_QUOTA
1186 int qtype, qfmt; 1328 int qfmt;
1187 char *qname;
1188#endif 1329#endif
1189 1330
1190 if (!options) 1331 if (!options)
@@ -1195,19 +1336,31 @@ static int parse_options(char *options, struct super_block *sb,
1195 if (!*p) 1336 if (!*p)
1196 continue; 1337 continue;
1197 1338
1339 /*
1340 * Initialize args struct so we know whether arg was
1341 * found; some options take optional arguments.
1342 */
1343 args[0].to = args[0].from = 0;
1198 token = match_token(p, tokens, args); 1344 token = match_token(p, tokens, args);
1199 switch (token) { 1345 switch (token) {
1200 case Opt_bsd_df: 1346 case Opt_bsd_df:
1347 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1201 clear_opt(sbi->s_mount_opt, MINIX_DF); 1348 clear_opt(sbi->s_mount_opt, MINIX_DF);
1202 break; 1349 break;
1203 case Opt_minix_df: 1350 case Opt_minix_df:
1351 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1204 set_opt(sbi->s_mount_opt, MINIX_DF); 1352 set_opt(sbi->s_mount_opt, MINIX_DF);
1353
1205 break; 1354 break;
1206 case Opt_grpid: 1355 case Opt_grpid:
1356 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1207 set_opt(sbi->s_mount_opt, GRPID); 1357 set_opt(sbi->s_mount_opt, GRPID);
1358
1208 break; 1359 break;
1209 case Opt_nogrpid: 1360 case Opt_nogrpid:
1361 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1210 clear_opt(sbi->s_mount_opt, GRPID); 1362 clear_opt(sbi->s_mount_opt, GRPID);
1363
1211 break; 1364 break;
1212 case Opt_resuid: 1365 case Opt_resuid:
1213 if (match_int(&args[0], &option)) 1366 if (match_int(&args[0], &option))
@@ -1344,14 +1497,13 @@ static int parse_options(char *options, struct super_block *sb,
1344 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1497 data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1345 datacheck: 1498 datacheck:
1346 if (is_remount) { 1499 if (is_remount) {
1347 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1500 if (test_opt(sb, DATA_FLAGS) != data_opt) {
1348 != data_opt) {
1349 ext4_msg(sb, KERN_ERR, 1501 ext4_msg(sb, KERN_ERR,
1350 "Cannot change data mode on remount"); 1502 "Cannot change data mode on remount");
1351 return 0; 1503 return 0;
1352 } 1504 }
1353 } else { 1505 } else {
1354 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 1506 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
1355 sbi->s_mount_opt |= data_opt; 1507 sbi->s_mount_opt |= data_opt;
1356 } 1508 }
1357 break; 1509 break;
@@ -1363,68 +1515,30 @@ static int parse_options(char *options, struct super_block *sb,
1363 break; 1515 break;
1364#ifdef CONFIG_QUOTA 1516#ifdef CONFIG_QUOTA
1365 case Opt_usrjquota: 1517 case Opt_usrjquota:
1366 qtype = USRQUOTA; 1518 if (!set_qf_name(sb, USRQUOTA, &args[0]))
1367 goto set_qf_name;
1368 case Opt_grpjquota:
1369 qtype = GRPQUOTA;
1370set_qf_name:
1371 if (sb_any_quota_loaded(sb) &&
1372 !sbi->s_qf_names[qtype]) {
1373 ext4_msg(sb, KERN_ERR,
1374 "Cannot change journaled "
1375 "quota options when quota turned on");
1376 return 0;
1377 }
1378 qname = match_strdup(&args[0]);
1379 if (!qname) {
1380 ext4_msg(sb, KERN_ERR,
1381 "Not enough memory for "
1382 "storing quotafile name");
1383 return 0; 1519 return 0;
1384 } 1520 break;
1385 if (sbi->s_qf_names[qtype] && 1521 case Opt_grpjquota:
1386 strcmp(sbi->s_qf_names[qtype], qname)) { 1522 if (!set_qf_name(sb, GRPQUOTA, &args[0]))
1387 ext4_msg(sb, KERN_ERR,
1388 "%s quota file already "
1389 "specified", QTYPE2NAME(qtype));
1390 kfree(qname);
1391 return 0;
1392 }
1393 sbi->s_qf_names[qtype] = qname;
1394 if (strchr(sbi->s_qf_names[qtype], '/')) {
1395 ext4_msg(sb, KERN_ERR,
1396 "quotafile must be on "
1397 "filesystem root");
1398 kfree(sbi->s_qf_names[qtype]);
1399 sbi->s_qf_names[qtype] = NULL;
1400 return 0; 1523 return 0;
1401 }
1402 set_opt(sbi->s_mount_opt, QUOTA);
1403 break; 1524 break;
1404 case Opt_offusrjquota: 1525 case Opt_offusrjquota:
1405 qtype = USRQUOTA; 1526 if (!clear_qf_name(sb, USRQUOTA))
1406 goto clear_qf_name; 1527 return 0;
1528 break;
1407 case Opt_offgrpjquota: 1529 case Opt_offgrpjquota:
1408 qtype = GRPQUOTA; 1530 if (!clear_qf_name(sb, GRPQUOTA))
1409clear_qf_name:
1410 if (sb_any_quota_loaded(sb) &&
1411 sbi->s_qf_names[qtype]) {
1412 ext4_msg(sb, KERN_ERR, "Cannot change "
1413 "journaled quota options when "
1414 "quota turned on");
1415 return 0; 1531 return 0;
1416 }
1417 /*
1418 * The space will be released later when all options
1419 * are confirmed to be correct
1420 */
1421 sbi->s_qf_names[qtype] = NULL;
1422 break; 1532 break;
1533
1423 case Opt_jqfmt_vfsold: 1534 case Opt_jqfmt_vfsold:
1424 qfmt = QFMT_VFS_OLD; 1535 qfmt = QFMT_VFS_OLD;
1425 goto set_qf_format; 1536 goto set_qf_format;
1426 case Opt_jqfmt_vfsv0: 1537 case Opt_jqfmt_vfsv0:
1427 qfmt = QFMT_VFS_V0; 1538 qfmt = QFMT_VFS_V0;
1539 goto set_qf_format;
1540 case Opt_jqfmt_vfsv1:
1541 qfmt = QFMT_VFS_V1;
1428set_qf_format: 1542set_qf_format:
1429 if (sb_any_quota_loaded(sb) && 1543 if (sb_any_quota_loaded(sb) &&
1430 sbi->s_jquota_fmt != qfmt) { 1544 sbi->s_jquota_fmt != qfmt) {
@@ -1467,6 +1581,7 @@ set_qf_format:
1467 case Opt_offgrpjquota: 1581 case Opt_offgrpjquota:
1468 case Opt_jqfmt_vfsold: 1582 case Opt_jqfmt_vfsold:
1469 case Opt_jqfmt_vfsv0: 1583 case Opt_jqfmt_vfsv0:
1584 case Opt_jqfmt_vfsv1:
1470 ext4_msg(sb, KERN_ERR, 1585 ext4_msg(sb, KERN_ERR,
1471 "journaled quota options not supported"); 1586 "journaled quota options not supported");
1472 break; 1587 break;
@@ -1480,10 +1595,11 @@ set_qf_format:
1480 clear_opt(sbi->s_mount_opt, BARRIER); 1595 clear_opt(sbi->s_mount_opt, BARRIER);
1481 break; 1596 break;
1482 case Opt_barrier: 1597 case Opt_barrier:
1483 if (match_int(&args[0], &option)) { 1598 if (args[0].from) {
1484 set_opt(sbi->s_mount_opt, BARRIER); 1599 if (match_int(&args[0], &option))
1485 break; 1600 return 0;
1486 } 1601 } else
1602 option = 1; /* No argument, default to 1 */
1487 if (option) 1603 if (option)
1488 set_opt(sbi->s_mount_opt, BARRIER); 1604 set_opt(sbi->s_mount_opt, BARRIER);
1489 else 1605 else
@@ -1556,15 +1672,28 @@ set_qf_format:
1556 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1672 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1557 break; 1673 break;
1558 case Opt_auto_da_alloc: 1674 case Opt_auto_da_alloc:
1559 if (match_int(&args[0], &option)) { 1675 if (args[0].from) {
1560 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1676 if (match_int(&args[0], &option))
1561 break; 1677 return 0;
1562 } 1678 } else
1679 option = 1; /* No argument, default to 1 */
1563 if (option) 1680 if (option)
1564 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1681 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1565 else 1682 else
1566 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1683 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1567 break; 1684 break;
1685 case Opt_discard:
1686 set_opt(sbi->s_mount_opt, DISCARD);
1687 break;
1688 case Opt_nodiscard:
1689 clear_opt(sbi->s_mount_opt, DISCARD);
1690 break;
1691 case Opt_dioread_nolock:
1692 set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
1693 break;
1694 case Opt_dioread_lock:
1695 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
1696 break;
1568 default: 1697 default:
1569 ext4_msg(sb, KERN_ERR, 1698 ext4_msg(sb, KERN_ERR,
1570 "Unrecognized mount option \"%s\" " 1699 "Unrecognized mount option \"%s\" "
@@ -1574,18 +1703,13 @@ set_qf_format:
1574 } 1703 }
1575#ifdef CONFIG_QUOTA 1704#ifdef CONFIG_QUOTA
1576 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1705 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1577 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1706 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1578 sbi->s_qf_names[USRQUOTA])
1579 clear_opt(sbi->s_mount_opt, USRQUOTA); 1707 clear_opt(sbi->s_mount_opt, USRQUOTA);
1580 1708
1581 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1709 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1582 sbi->s_qf_names[GRPQUOTA])
1583 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1710 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1584 1711
1585 if ((sbi->s_qf_names[USRQUOTA] && 1712 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1586 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1587 (sbi->s_qf_names[GRPQUOTA] &&
1588 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1589 ext4_msg(sb, KERN_ERR, "old and new quota " 1713 ext4_msg(sb, KERN_ERR, "old and new quota "
1590 "format mixing"); 1714 "format mixing");
1591 return 0; 1715 return 0;
@@ -1673,14 +1797,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
1673 size_t size; 1797 size_t size;
1674 int i; 1798 int i;
1675 1799
1676 if (!sbi->s_es->s_log_groups_per_flex) { 1800 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1801 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1802
1803 if (groups_per_flex < 2) {
1677 sbi->s_log_groups_per_flex = 0; 1804 sbi->s_log_groups_per_flex = 0;
1678 return 1; 1805 return 1;
1679 } 1806 }
1680 1807
1681 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1682 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1683
1684 /* We allocate both existing and potentially added groups */ 1808 /* We allocate both existing and potentially added groups */
1685 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1809 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1686 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1810 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
@@ -1895,7 +2019,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1895 } 2019 }
1896 2020
1897 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2021 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1898 vfs_dq_init(inode); 2022 dquot_initialize(inode);
1899 if (inode->i_nlink) { 2023 if (inode->i_nlink) {
1900 ext4_msg(sb, KERN_DEBUG, 2024 ext4_msg(sb, KERN_DEBUG,
1901 "%s: truncating inode %lu to %lld bytes", 2025 "%s: truncating inode %lu to %lld bytes",
@@ -2099,11 +2223,8 @@ static int parse_strtoul(const char *buf,
2099{ 2223{
2100 char *endp; 2224 char *endp;
2101 2225
2102 while (*buf && isspace(*buf)) 2226 *value = simple_strtoul(skip_spaces(buf), &endp, 0);
2103 buf++; 2227 endp = skip_spaces(endp);
2104 *value = simple_strtoul(buf, &endp, 0);
2105 while (*endp && isspace(*endp))
2106 endp++;
2107 if (*endp || *value > max) 2228 if (*endp || *value > max)
2108 return -EINVAL; 2229 return -EINVAL;
2109 2230
@@ -2134,9 +2255,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2134 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2255 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2135 2256
2136 return snprintf(buf, PAGE_SIZE, "%llu\n", 2257 return snprintf(buf, PAGE_SIZE, "%llu\n",
2137 sbi->s_kbytes_written + 2258 (unsigned long long)(sbi->s_kbytes_written +
2138 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2259 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2139 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2260 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2140} 2261}
2141 2262
2142static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2263static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -2251,7 +2372,7 @@ static void ext4_sb_release(struct kobject *kobj)
2251} 2372}
2252 2373
2253 2374
2254static struct sysfs_ops ext4_attr_ops = { 2375static const struct sysfs_ops ext4_attr_ops = {
2255 .show = ext4_attr_show, 2376 .show = ext4_attr_show,
2256 .store = ext4_attr_store, 2377 .store = ext4_attr_store,
2257}; 2378};
@@ -2391,8 +2512,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2391 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2512 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2392 if (def_mount_opts & EXT4_DEFM_DEBUG) 2513 if (def_mount_opts & EXT4_DEFM_DEBUG)
2393 set_opt(sbi->s_mount_opt, DEBUG); 2514 set_opt(sbi->s_mount_opt, DEBUG);
2394 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 2515 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
2516 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
2517 "2.6.38");
2395 set_opt(sbi->s_mount_opt, GRPID); 2518 set_opt(sbi->s_mount_opt, GRPID);
2519 }
2396 if (def_mount_opts & EXT4_DEFM_UID16) 2520 if (def_mount_opts & EXT4_DEFM_UID16)
2397 set_opt(sbi->s_mount_opt, NO_UID32); 2521 set_opt(sbi->s_mount_opt, NO_UID32);
2398#ifdef CONFIG_EXT4_FS_XATTR 2522#ifdef CONFIG_EXT4_FS_XATTR
@@ -2404,11 +2528,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2404 set_opt(sbi->s_mount_opt, POSIX_ACL); 2528 set_opt(sbi->s_mount_opt, POSIX_ACL);
2405#endif 2529#endif
2406 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 2530 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2407 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 2531 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2408 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 2532 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2409 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 2533 set_opt(sbi->s_mount_opt, ORDERED_DATA);
2410 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 2534 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2411 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 2535 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2412 2536
2413 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 2537 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2414 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 2538 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
@@ -2429,14 +2553,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2429 * enable delayed allocation by default 2553 * enable delayed allocation by default
2430 * Use -o nodelalloc to turn it off 2554 * Use -o nodelalloc to turn it off
2431 */ 2555 */
2432 set_opt(sbi->s_mount_opt, DELALLOC); 2556 if (!IS_EXT3_SB(sb))
2557 set_opt(sbi->s_mount_opt, DELALLOC);
2433 2558
2434 if (!parse_options((char *) data, sb, &journal_devnum, 2559 if (!parse_options((char *) data, sb, &journal_devnum,
2435 &journal_ioprio, NULL, 0)) 2560 &journal_ioprio, NULL, 0))
2436 goto failed_mount; 2561 goto failed_mount;
2437 2562
2438 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2563 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2439 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2564 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2440 2565
2441 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 2566 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2442 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2567 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
@@ -2721,31 +2846,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2721 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2846 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2722 if (ext4_load_journal(sb, es, journal_devnum)) 2847 if (ext4_load_journal(sb, es, journal_devnum))
2723 goto failed_mount3; 2848 goto failed_mount3;
2724 if (!(sb->s_flags & MS_RDONLY) &&
2725 EXT4_SB(sb)->s_journal->j_failed_commit) {
2726 ext4_msg(sb, KERN_CRIT, "error: "
2727 "ext4_fill_super: Journal transaction "
2728 "%u is corrupt",
2729 EXT4_SB(sb)->s_journal->j_failed_commit);
2730 if (test_opt(sb, ERRORS_RO)) {
2731 ext4_msg(sb, KERN_CRIT,
2732 "Mounting filesystem read-only");
2733 sb->s_flags |= MS_RDONLY;
2734 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2735 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2736 }
2737 if (test_opt(sb, ERRORS_PANIC)) {
2738 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2739 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2740 ext4_commit_super(sb, 1);
2741 goto failed_mount4;
2742 }
2743 }
2744 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2849 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2745 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2850 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2746 ext4_msg(sb, KERN_ERR, "required journal recovery " 2851 ext4_msg(sb, KERN_ERR, "required journal recovery "
2747 "suppressed and not mounted read-only"); 2852 "suppressed and not mounted read-only");
2748 goto failed_mount4; 2853 goto failed_mount_wq;
2749 } else { 2854 } else {
2750 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2855 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2751 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2856 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
@@ -2758,7 +2863,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2758 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2863 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2759 JBD2_FEATURE_INCOMPAT_64BIT)) { 2864 JBD2_FEATURE_INCOMPAT_64BIT)) {
2760 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 2865 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
2761 goto failed_mount4; 2866 goto failed_mount_wq;
2762 } 2867 }
2763 2868
2764 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2869 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
@@ -2797,7 +2902,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2797 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2902 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2798 ext4_msg(sb, KERN_ERR, "Journal does not support " 2903 ext4_msg(sb, KERN_ERR, "Journal does not support "
2799 "requested data journaling mode"); 2904 "requested data journaling mode");
2800 goto failed_mount4; 2905 goto failed_mount_wq;
2801 } 2906 }
2802 default: 2907 default:
2803 break; 2908 break;
@@ -2805,13 +2910,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2805 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2910 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2806 2911
2807no_journal: 2912no_journal:
2808
2809 if (test_opt(sb, NOBH)) { 2913 if (test_opt(sb, NOBH)) {
2810 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2914 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2811 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " 2915 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
2812 "its supported only with writeback mode"); 2916 "its supported only with writeback mode");
2813 clear_opt(sbi->s_mount_opt, NOBH); 2917 clear_opt(sbi->s_mount_opt, NOBH);
2814 } 2918 }
2919 if (test_opt(sb, DIOREAD_NOLOCK)) {
2920 ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
2921 "not supported with nobh mode");
2922 goto failed_mount_wq;
2923 }
2815 } 2924 }
2816 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 2925 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
2817 if (!EXT4_SB(sb)->dio_unwritten_wq) { 2926 if (!EXT4_SB(sb)->dio_unwritten_wq) {
@@ -2876,6 +2985,18 @@ no_journal:
2876 "requested data journaling mode"); 2985 "requested data journaling mode");
2877 clear_opt(sbi->s_mount_opt, DELALLOC); 2986 clear_opt(sbi->s_mount_opt, DELALLOC);
2878 } 2987 }
2988 if (test_opt(sb, DIOREAD_NOLOCK)) {
2989 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2990 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
2991 "option - requested data journaling mode");
2992 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
2993 }
2994 if (sb->s_blocksize < PAGE_SIZE) {
2995 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
2996 "option - block size is too small");
2997 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
2998 }
2999 }
2879 3000
2880 err = ext4_setup_system_zone(sb); 3001 err = ext4_setup_system_zone(sb);
2881 if (err) { 3002 if (err) {
@@ -3339,10 +3460,9 @@ static void ext4_clear_journal_err(struct super_block *sb,
3339 char nbuf[16]; 3460 char nbuf[16];
3340 3461
3341 errstr = ext4_decode_error(sb, j_errno, nbuf); 3462 errstr = ext4_decode_error(sb, j_errno, nbuf);
3342 ext4_warning(sb, __func__, "Filesystem error recorded " 3463 ext4_warning(sb, "Filesystem error recorded "
3343 "from previous mount: %s", errstr); 3464 "from previous mount: %s", errstr);
3344 ext4_warning(sb, __func__, "Marking fs in need of " 3465 ext4_warning(sb, "Marking fs in need of filesystem check.");
3345 "filesystem check.");
3346 3466
3347 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3467 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3348 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3468 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -3493,7 +3613,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3493 ext4_abort(sb, __func__, "Abort forced by user"); 3613 ext4_abort(sb, __func__, "Abort forced by user");
3494 3614
3495 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3615 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3496 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 3616 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3497 3617
3498 es = sbi->s_es; 3618 es = sbi->s_es;
3499 3619
@@ -3668,13 +3788,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3668 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3788 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3669 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3789 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3670 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3790 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3671 ext4_free_blocks_count_set(es, buf->f_bfree);
3672 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3791 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3673 if (buf->f_bfree < ext4_r_blocks_count(es)) 3792 if (buf->f_bfree < ext4_r_blocks_count(es))
3674 buf->f_bavail = 0; 3793 buf->f_bavail = 0;
3675 buf->f_files = le32_to_cpu(es->s_inodes_count); 3794 buf->f_files = le32_to_cpu(es->s_inodes_count);
3676 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3795 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3677 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3678 buf->f_namelen = EXT4_NAME_LEN; 3796 buf->f_namelen = EXT4_NAME_LEN;
3679 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3797 fsid = le64_to_cpup((void *)es->s_uuid) ^
3680 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3798 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@@ -3689,7 +3807,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3689 * Process 1 Process 2 3807 * Process 1 Process 2
3690 * ext4_create() quota_sync() 3808 * ext4_create() quota_sync()
3691 * jbd2_journal_start() write_dquot() 3809 * jbd2_journal_start() write_dquot()
3692 * vfs_dq_init() down(dqio_mutex) 3810 * dquot_initialize() down(dqio_mutex)
3693 * down(dqio_mutex) jbd2_journal_start() 3811 * down(dqio_mutex) jbd2_journal_start()
3694 * 3812 *
3695 */ 3813 */
@@ -3898,9 +4016,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3898 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4016 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3899 int err = 0; 4017 int err = 0;
3900 int offset = off & (sb->s_blocksize - 1); 4018 int offset = off & (sb->s_blocksize - 1);
3901 int tocopy;
3902 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 4019 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3903 size_t towrite = len;
3904 struct buffer_head *bh; 4020 struct buffer_head *bh;
3905 handle_t *handle = journal_current_handle(); 4021 handle_t *handle = journal_current_handle();
3906 4022
@@ -3910,52 +4026,53 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3910 (unsigned long long)off, (unsigned long long)len); 4026 (unsigned long long)off, (unsigned long long)len);
3911 return -EIO; 4027 return -EIO;
3912 } 4028 }
4029 /*
4030 * Since we account only one data block in transaction credits,
4031 * then it is impossible to cross a block boundary.
4032 */
4033 if (sb->s_blocksize - offset < len) {
4034 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
4035 " cancelled because not block aligned",
4036 (unsigned long long)off, (unsigned long long)len);
4037 return -EIO;
4038 }
4039
3913 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 4040 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3914 while (towrite > 0) { 4041 bh = ext4_bread(handle, inode, blk, 1, &err);
3915 tocopy = sb->s_blocksize - offset < towrite ? 4042 if (!bh)
3916 sb->s_blocksize - offset : towrite; 4043 goto out;
3917 bh = ext4_bread(handle, inode, blk, 1, &err); 4044 if (journal_quota) {
3918 if (!bh) 4045 err = ext4_journal_get_write_access(handle, bh);
4046 if (err) {
4047 brelse(bh);
3919 goto out; 4048 goto out;
3920 if (journal_quota) {
3921 err = ext4_journal_get_write_access(handle, bh);
3922 if (err) {
3923 brelse(bh);
3924 goto out;
3925 }
3926 }
3927 lock_buffer(bh);
3928 memcpy(bh->b_data+offset, data, tocopy);
3929 flush_dcache_page(bh->b_page);
3930 unlock_buffer(bh);
3931 if (journal_quota)
3932 err = ext4_handle_dirty_metadata(handle, NULL, bh);
3933 else {
3934 /* Always do at least ordered writes for quotas */
3935 err = ext4_jbd2_file_inode(handle, inode);
3936 mark_buffer_dirty(bh);
3937 } 4049 }
3938 brelse(bh);
3939 if (err)
3940 goto out;
3941 offset = 0;
3942 towrite -= tocopy;
3943 data += tocopy;
3944 blk++;
3945 } 4050 }
4051 lock_buffer(bh);
4052 memcpy(bh->b_data+offset, data, len);
4053 flush_dcache_page(bh->b_page);
4054 unlock_buffer(bh);
4055 if (journal_quota)
4056 err = ext4_handle_dirty_metadata(handle, NULL, bh);
4057 else {
4058 /* Always do at least ordered writes for quotas */
4059 err = ext4_jbd2_file_inode(handle, inode);
4060 mark_buffer_dirty(bh);
4061 }
4062 brelse(bh);
3946out: 4063out:
3947 if (len == towrite) { 4064 if (err) {
3948 mutex_unlock(&inode->i_mutex); 4065 mutex_unlock(&inode->i_mutex);
3949 return err; 4066 return err;
3950 } 4067 }
3951 if (inode->i_size < off+len-towrite) { 4068 if (inode->i_size < off + len) {
3952 i_size_write(inode, off+len-towrite); 4069 i_size_write(inode, off + len);
3953 EXT4_I(inode)->i_disksize = inode->i_size; 4070 EXT4_I(inode)->i_disksize = inode->i_size;
3954 } 4071 }
3955 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4072 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3956 ext4_mark_inode_dirty(handle, inode); 4073 ext4_mark_inode_dirty(handle, inode);
3957 mutex_unlock(&inode->i_mutex); 4074 mutex_unlock(&inode->i_mutex);
3958 return len - towrite; 4075 return len;
3959} 4076}
3960 4077
3961#endif 4078#endif
@@ -3966,6 +4083,52 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3966 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 4083 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3967} 4084}
3968 4085
4086#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4087static struct file_system_type ext2_fs_type = {
4088 .owner = THIS_MODULE,
4089 .name = "ext2",
4090 .get_sb = ext4_get_sb,
4091 .kill_sb = kill_block_super,
4092 .fs_flags = FS_REQUIRES_DEV,
4093};
4094
4095static inline void register_as_ext2(void)
4096{
4097 int err = register_filesystem(&ext2_fs_type);
4098 if (err)
4099 printk(KERN_WARNING
4100 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
4101}
4102
4103static inline void unregister_as_ext2(void)
4104{
4105 unregister_filesystem(&ext2_fs_type);
4106}
4107MODULE_ALIAS("ext2");
4108#else
4109static inline void register_as_ext2(void) { }
4110static inline void unregister_as_ext2(void) { }
4111#endif
4112
4113#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4114static inline void register_as_ext3(void)
4115{
4116 int err = register_filesystem(&ext3_fs_type);
4117 if (err)
4118 printk(KERN_WARNING
4119 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
4120}
4121
4122static inline void unregister_as_ext3(void)
4123{
4124 unregister_filesystem(&ext3_fs_type);
4125}
4126MODULE_ALIAS("ext3");
4127#else
4128static inline void register_as_ext3(void) { }
4129static inline void unregister_as_ext3(void) { }
4130#endif
4131
3969static struct file_system_type ext4_fs_type = { 4132static struct file_system_type ext4_fs_type = {
3970 .owner = THIS_MODULE, 4133 .owner = THIS_MODULE,
3971 .name = "ext4", 4134 .name = "ext4",
@@ -3995,11 +4158,15 @@ static int __init init_ext4_fs(void)
3995 err = init_inodecache(); 4158 err = init_inodecache();
3996 if (err) 4159 if (err)
3997 goto out1; 4160 goto out1;
4161 register_as_ext2();
4162 register_as_ext3();
3998 err = register_filesystem(&ext4_fs_type); 4163 err = register_filesystem(&ext4_fs_type);
3999 if (err) 4164 if (err)
4000 goto out; 4165 goto out;
4001 return 0; 4166 return 0;
4002out: 4167out:
4168 unregister_as_ext2();
4169 unregister_as_ext3();
4003 destroy_inodecache(); 4170 destroy_inodecache();
4004out1: 4171out1:
4005 exit_ext4_xattr(); 4172 exit_ext4_xattr();
@@ -4015,6 +4182,8 @@ out4:
4015 4182
4016static void __exit exit_ext4_fs(void) 4183static void __exit exit_ext4_fs(void)
4017{ 4184{
4185 unregister_as_ext2();
4186 unregister_as_ext3();
4018 unregister_filesystem(&ext4_fs_type); 4187 unregister_filesystem(&ext4_fs_type);
4019 destroy_inodecache(); 4188 destroy_inodecache();
4020 exit_ext4_xattr(); 4189 exit_ext4_xattr();
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fed5b01d7a8d..b4c5aa8489d8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer, 95static int ext4_xattr_list(struct dentry *dentry, char *buffer,
96 size_t buffer_size); 96 size_t buffer_size);
97 97
98static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
140ssize_t 140ssize_t
141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) 141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
142{ 142{
143 return ext4_xattr_list(dentry->d_inode, buffer, size); 143 return ext4_xattr_list(dentry, buffer, size);
144} 144}
145 145
146static int 146static int
@@ -227,7 +227,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
227 ea_bdebug(bh, "b_count=%d, refcount=%d", 227 ea_bdebug(bh, "b_count=%d, refcount=%d",
228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
229 if (ext4_xattr_check_block(bh)) { 229 if (ext4_xattr_check_block(bh)) {
230bad_block: ext4_error(inode->i_sb, __func__, 230bad_block:
231 ext4_error(inode->i_sb,
231 "inode %lu: bad block %llu", inode->i_ino, 232 "inode %lu: bad block %llu", inode->i_ino,
232 EXT4_I(inode)->i_file_acl); 233 EXT4_I(inode)->i_file_acl);
233 error = -EIO; 234 error = -EIO;
@@ -267,7 +268,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
267 void *end; 268 void *end;
268 int error; 269 int error;
269 270
270 if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) 271 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
271 return -ENODATA; 272 return -ENODATA;
272 error = ext4_get_inode_loc(inode, &iloc); 273 error = ext4_get_inode_loc(inode, &iloc);
273 if (error) 274 if (error)
@@ -325,7 +326,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
325} 326}
326 327
327static int 328static int
328ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, 329ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
329 char *buffer, size_t buffer_size) 330 char *buffer, size_t buffer_size)
330{ 331{
331 size_t rest = buffer_size; 332 size_t rest = buffer_size;
@@ -335,9 +336,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
335 ext4_xattr_handler(entry->e_name_index); 336 ext4_xattr_handler(entry->e_name_index);
336 337
337 if (handler) { 338 if (handler) {
338 size_t size = handler->list(inode, buffer, rest, 339 size_t size = handler->list(dentry, buffer, rest,
339 entry->e_name, 340 entry->e_name,
340 entry->e_name_len); 341 entry->e_name_len,
342 handler->flags);
341 if (buffer) { 343 if (buffer) {
342 if (size > rest) 344 if (size > rest)
343 return -ERANGE; 345 return -ERANGE;
@@ -350,8 +352,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
350} 352}
351 353
352static int 354static int
353ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 355ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
354{ 356{
357 struct inode *inode = dentry->d_inode;
355 struct buffer_head *bh = NULL; 358 struct buffer_head *bh = NULL;
356 int error; 359 int error;
357 360
@@ -369,14 +372,14 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
369 ea_bdebug(bh, "b_count=%d, refcount=%d", 372 ea_bdebug(bh, "b_count=%d, refcount=%d",
370 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 373 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
371 if (ext4_xattr_check_block(bh)) { 374 if (ext4_xattr_check_block(bh)) {
372 ext4_error(inode->i_sb, __func__, 375 ext4_error(inode->i_sb,
373 "inode %lu: bad block %llu", inode->i_ino, 376 "inode %lu: bad block %llu", inode->i_ino,
374 EXT4_I(inode)->i_file_acl); 377 EXT4_I(inode)->i_file_acl);
375 error = -EIO; 378 error = -EIO;
376 goto cleanup; 379 goto cleanup;
377 } 380 }
378 ext4_xattr_cache_insert(bh); 381 ext4_xattr_cache_insert(bh);
379 error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 382 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
380 383
381cleanup: 384cleanup:
382 brelse(bh); 385 brelse(bh);
@@ -385,15 +388,16 @@ cleanup:
385} 388}
386 389
387static int 390static int
388ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 391ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
389{ 392{
393 struct inode *inode = dentry->d_inode;
390 struct ext4_xattr_ibody_header *header; 394 struct ext4_xattr_ibody_header *header;
391 struct ext4_inode *raw_inode; 395 struct ext4_inode *raw_inode;
392 struct ext4_iloc iloc; 396 struct ext4_iloc iloc;
393 void *end; 397 void *end;
394 int error; 398 int error;
395 399
396 if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) 400 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
397 return 0; 401 return 0;
398 error = ext4_get_inode_loc(inode, &iloc); 402 error = ext4_get_inode_loc(inode, &iloc);
399 if (error) 403 if (error)
@@ -404,7 +408,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
404 error = ext4_xattr_check_names(IFIRST(header), end); 408 error = ext4_xattr_check_names(IFIRST(header), end);
405 if (error) 409 if (error)
406 goto cleanup; 410 goto cleanup;
407 error = ext4_xattr_list_entries(inode, IFIRST(header), 411 error = ext4_xattr_list_entries(dentry, IFIRST(header),
408 buffer, buffer_size); 412 buffer, buffer_size);
409 413
410cleanup: 414cleanup:
@@ -423,12 +427,12 @@ cleanup:
423 * used / required on success. 427 * used / required on success.
424 */ 428 */
425static int 429static int
426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 430ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
427{ 431{
428 int i_error, b_error; 432 int i_error, b_error;
429 433
430 down_read(&EXT4_I(inode)->xattr_sem); 434 down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
431 i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); 435 i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
432 if (i_error < 0) { 436 if (i_error < 0) {
433 b_error = 0; 437 b_error = 0;
434 } else { 438 } else {
@@ -436,11 +440,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
436 buffer += i_error; 440 buffer += i_error;
437 buffer_size -= i_error; 441 buffer_size -= i_error;
438 } 442 }
439 b_error = ext4_xattr_block_list(inode, buffer, buffer_size); 443 b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
440 if (b_error < 0) 444 if (b_error < 0)
441 i_error = 0; 445 i_error = 0;
442 } 446 }
443 up_read(&EXT4_I(inode)->xattr_sem); 447 up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
444 return i_error + b_error; 448 return i_error + b_error;
445} 449}
446 450
@@ -482,15 +486,16 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
482 ea_bdebug(bh, "refcount now=0; freeing"); 486 ea_bdebug(bh, "refcount now=0; freeing");
483 if (ce) 487 if (ce)
484 mb_cache_entry_free(ce); 488 mb_cache_entry_free(ce);
485 ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
486 get_bh(bh); 489 get_bh(bh);
487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 490 ext4_free_blocks(handle, inode, bh, 0, 1,
491 EXT4_FREE_BLOCKS_METADATA |
492 EXT4_FREE_BLOCKS_FORGET);
488 } else { 493 } else {
489 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 494 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
490 error = ext4_handle_dirty_metadata(handle, inode, bh); 495 error = ext4_handle_dirty_metadata(handle, inode, bh);
491 if (IS_SYNC(inode)) 496 if (IS_SYNC(inode))
492 ext4_handle_sync(handle); 497 ext4_handle_sync(handle);
493 vfs_dq_free_block(inode, 1); 498 dquot_free_block(inode, 1);
494 ea_bdebug(bh, "refcount now=%d; releasing", 499 ea_bdebug(bh, "refcount now=%d; releasing",
495 le32_to_cpu(BHDR(bh)->h_refcount)); 500 le32_to_cpu(BHDR(bh)->h_refcount));
496 if (ce) 501 if (ce)
@@ -661,9 +666,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
661 atomic_read(&(bs->bh->b_count)), 666 atomic_read(&(bs->bh->b_count)),
662 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 667 le32_to_cpu(BHDR(bs->bh)->h_refcount));
663 if (ext4_xattr_check_block(bs->bh)) { 668 if (ext4_xattr_check_block(bs->bh)) {
664 ext4_error(sb, __func__, 669 ext4_error(sb, "inode %lu: bad block %llu",
665 "inode %lu: bad block %llu", inode->i_ino, 670 inode->i_ino, EXT4_I(inode)->i_file_acl);
666 EXT4_I(inode)->i_file_acl);
667 error = -EIO; 671 error = -EIO;
668 goto cleanup; 672 goto cleanup;
669 } 673 }
@@ -783,8 +787,8 @@ inserted:
783 else { 787 else {
784 /* The old block is released after updating 788 /* The old block is released after updating
785 the inode. */ 789 the inode. */
786 error = -EDQUOT; 790 error = dquot_alloc_block(inode, 1);
787 if (vfs_dq_alloc_block(inode, 1)) 791 if (error)
788 goto cleanup; 792 goto cleanup;
789 error = ext4_journal_get_write_access(handle, 793 error = ext4_journal_get_write_access(handle,
790 new_bh); 794 new_bh);
@@ -832,7 +836,8 @@ inserted:
832 new_bh = sb_getblk(sb, block); 836 new_bh = sb_getblk(sb, block);
833 if (!new_bh) { 837 if (!new_bh) {
834getblk_failed: 838getblk_failed:
835 ext4_free_blocks(handle, inode, block, 1, 1); 839 ext4_free_blocks(handle, inode, 0, block, 1,
840 EXT4_FREE_BLOCKS_METADATA);
836 error = -EIO; 841 error = -EIO;
837 goto cleanup; 842 goto cleanup;
838 } 843 }
@@ -871,13 +876,12 @@ cleanup:
871 return error; 876 return error;
872 877
873cleanup_dquot: 878cleanup_dquot:
874 vfs_dq_free_block(inode, 1); 879 dquot_free_block(inode, 1);
875 goto cleanup; 880 goto cleanup;
876 881
877bad_block: 882bad_block:
878 ext4_error(inode->i_sb, __func__, 883 ext4_error(inode->i_sb, "inode %lu: bad block %llu",
879 "inode %lu: bad block %llu", inode->i_ino, 884 inode->i_ino, EXT4_I(inode)->i_file_acl);
880 EXT4_I(inode)->i_file_acl);
881 goto cleanup; 885 goto cleanup;
882 886
883#undef header 887#undef header
@@ -903,7 +907,7 @@ ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
903 is->s.base = is->s.first = IFIRST(header); 907 is->s.base = is->s.first = IFIRST(header);
904 is->s.here = is->s.first; 908 is->s.here = is->s.first;
905 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 909 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
906 if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { 910 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
907 error = ext4_xattr_check_names(IFIRST(header), is->s.end); 911 error = ext4_xattr_check_names(IFIRST(header), is->s.end);
908 if (error) 912 if (error)
909 return error; 913 return error;
@@ -935,10 +939,10 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
935 header = IHDR(inode, ext4_raw_inode(&is->iloc)); 939 header = IHDR(inode, ext4_raw_inode(&is->iloc));
936 if (!IS_LAST_ENTRY(s->first)) { 940 if (!IS_LAST_ENTRY(s->first)) {
937 header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); 941 header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
938 EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; 942 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
939 } else { 943 } else {
940 header->h_magic = cpu_to_le32(0); 944 header->h_magic = cpu_to_le32(0);
941 EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; 945 ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
942 } 946 }
943 return 0; 947 return 0;
944} 948}
@@ -981,17 +985,21 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
981 if (strlen(name) > 255) 985 if (strlen(name) > 255)
982 return -ERANGE; 986 return -ERANGE;
983 down_write(&EXT4_I(inode)->xattr_sem); 987 down_write(&EXT4_I(inode)->xattr_sem);
984 no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; 988 no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
985 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 989 ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
986 990
987 error = ext4_get_inode_loc(inode, &is.iloc); 991 error = ext4_get_inode_loc(inode, &is.iloc);
988 if (error) 992 if (error)
989 goto cleanup; 993 goto cleanup;
990 994
991 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { 995 error = ext4_journal_get_write_access(handle, is.iloc.bh);
996 if (error)
997 goto cleanup;
998
999 if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
992 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); 1000 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
993 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 1001 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
994 EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; 1002 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
995 } 1003 }
996 1004
997 error = ext4_xattr_ibody_find(inode, &i, &is); 1005 error = ext4_xattr_ibody_find(inode, &i, &is);
@@ -1013,9 +1021,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1013 if (flags & XATTR_CREATE) 1021 if (flags & XATTR_CREATE)
1014 goto cleanup; 1022 goto cleanup;
1015 } 1023 }
1016 error = ext4_journal_get_write_access(handle, is.iloc.bh);
1017 if (error)
1018 goto cleanup;
1019 if (!value) { 1024 if (!value) {
1020 if (!is.s.not_found) 1025 if (!is.s.not_found)
1021 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 1026 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
@@ -1046,7 +1051,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1046 ext4_xattr_update_super_block(handle, inode->i_sb); 1051 ext4_xattr_update_super_block(handle, inode->i_sb);
1047 inode->i_ctime = ext4_current_time(inode); 1052 inode->i_ctime = ext4_current_time(inode);
1048 if (!value) 1053 if (!value)
1049 EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; 1054 ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1050 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 1055 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1051 /* 1056 /*
1052 * The bh is consumed by ext4_mark_iloc_dirty, even with 1057 * The bh is consumed by ext4_mark_iloc_dirty, even with
@@ -1061,7 +1066,7 @@ cleanup:
1061 brelse(is.iloc.bh); 1066 brelse(is.iloc.bh);
1062 brelse(bs.bh); 1067 brelse(bs.bh);
1063 if (no_expand == 0) 1068 if (no_expand == 0)
1064 EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; 1069 ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1065 up_write(&EXT4_I(inode)->xattr_sem); 1070 up_write(&EXT4_I(inode)->xattr_sem);
1066 return error; 1071 return error;
1067} 1072}
@@ -1189,9 +1194,8 @@ retry:
1189 if (!bh) 1194 if (!bh)
1190 goto cleanup; 1195 goto cleanup;
1191 if (ext4_xattr_check_block(bh)) { 1196 if (ext4_xattr_check_block(bh)) {
1192 ext4_error(inode->i_sb, __func__, 1197 ext4_error(inode->i_sb, "inode %lu: bad block %llu",
1193 "inode %lu: bad block %llu", inode->i_ino, 1198 inode->i_ino, EXT4_I(inode)->i_file_acl);
1194 EXT4_I(inode)->i_file_acl);
1195 error = -EIO; 1199 error = -EIO;
1196 goto cleanup; 1200 goto cleanup;
1197 } 1201 }
@@ -1296,6 +1300,8 @@ retry:
1296 1300
1297 /* Remove the chosen entry from the inode */ 1301 /* Remove the chosen entry from the inode */
1298 error = ext4_xattr_ibody_set(handle, inode, &i, is); 1302 error = ext4_xattr_ibody_set(handle, inode, &i, is);
1303 if (error)
1304 goto cleanup;
1299 1305
1300 entry = IFIRST(header); 1306 entry = IFIRST(header);
1301 if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) 1307 if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
@@ -1326,6 +1332,8 @@ retry:
1326 goto cleanup; 1332 goto cleanup;
1327 kfree(b_entry_name); 1333 kfree(b_entry_name);
1328 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1329 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1330 kfree(is); 1338 kfree(is);
1331 kfree(bs); 1339 kfree(bs);
@@ -1364,16 +1372,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1364 goto cleanup; 1372 goto cleanup;
1365 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1373 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1366 if (!bh) { 1374 if (!bh) {
1367 ext4_error(inode->i_sb, __func__, 1375 ext4_error(inode->i_sb, "inode %lu: block %llu read error",
1368 "inode %lu: block %llu read error", inode->i_ino, 1376 inode->i_ino, EXT4_I(inode)->i_file_acl);
1369 EXT4_I(inode)->i_file_acl);
1370 goto cleanup; 1377 goto cleanup;
1371 } 1378 }
1372 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 1379 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1373 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1380 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1374 ext4_error(inode->i_sb, __func__, 1381 ext4_error(inode->i_sb, "inode %lu: bad block %llu",
1375 "inode %lu: bad block %llu", inode->i_ino, 1382 inode->i_ino, EXT4_I(inode)->i_file_acl);
1376 EXT4_I(inode)->i_file_acl);
1377 goto cleanup; 1383 goto cleanup;
1378 } 1384 }
1379 ext4_xattr_release_block(handle, inode, bh); 1385 ext4_xattr_release_block(handle, inode, bh);
@@ -1498,7 +1504,7 @@ again:
1498 } 1504 }
1499 bh = sb_bread(inode->i_sb, ce->e_block); 1505 bh = sb_bread(inode->i_sb, ce->e_block);
1500 if (!bh) { 1506 if (!bh) {
1501 ext4_error(inode->i_sb, __func__, 1507 ext4_error(inode->i_sb,
1502 "inode %lu: block %lu read error", 1508 "inode %lu: block %lu read error",
1503 inode->i_ino, (unsigned long) ce->e_block); 1509 inode->i_ino, (unsigned long) ce->e_block);
1504 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1510 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6cae..8b145e98df07 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -7,13 +7,14 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/security.h> 9#include <linux/security.h>
10#include <linux/slab.h>
10#include "ext4_jbd2.h" 11#include "ext4_jbd2.h"
11#include "ext4.h" 12#include "ext4.h"
12#include "xattr.h" 13#include "xattr.h"
13 14
14static size_t 15static size_t
15ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, 16ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
17{ 18{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 19 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
19 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +29,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 29}
29 30
30static int 31static int
31ext4_xattr_security_get(struct inode *inode, const char *name, 32ext4_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 33 void *buffer, size_t size, int type)
33{ 34{
34 if (strcmp(name, "") == 0) 35 if (strcmp(name, "") == 0)
35 return -EINVAL; 36 return -EINVAL;
36 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, 37 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
37 buffer, size); 38 name, buffer, size);
38} 39}
39 40
40static int 41static int
41ext4_xattr_security_set(struct inode *inode, const char *name, 42ext4_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 43 const void *value, size_t size, int flags, int type)
43{ 44{
44 if (strcmp(name, "") == 0) 45 if (strcmp(name, "") == 0)
45 return -EINVAL; 46 return -EINVAL;
46 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, 47 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
47 value, size, flags); 48 name, value, size, flags);
48} 49}
49 50
50int 51int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a37..15b50edc6587 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext4_xattr_trusted_get(struct inode *inode, const char *name, 35ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
36 void *buffer, size_t size) 36 size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, 40 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext4_xattr_trusted_set(struct inode *inode, const char *name, 45ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, 50 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
51 value, size, flags); 51 name, value, size, flags);
52} 52}
53 53
54struct xattr_handler ext4_xattr_trusted_handler = { 54struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42aa..c4ce05746ce1 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext4_xattr_user_get(struct inode *inode, const char *name, 34ext4_xattr_user_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); 41 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext4_xattr_user_set(struct inode *inode, const char *name, 46ext4_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, 53 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext4_xattr_user_handler = { 57struct xattr_handler ext4_xattr_user_handler = {