diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Kconfig | 11 | ||||
-rw-r--r-- | fs/ext4/acl.c | 74 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 81 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 9 | ||||
-rw-r--r-- | fs/ext4/dir.c | 14 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 148 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 3 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 86 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 68 | ||||
-rw-r--r-- | fs/ext4/extents.c | 394 | ||||
-rw-r--r-- | fs/ext4/file.c | 13 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 68 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 52 | ||||
-rw-r--r-- | fs/ext4/inode.c | 965 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 41 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 179 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 10 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 63 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 313 | ||||
-rw-r--r-- | fs/ext4/namei.c | 124 | ||||
-rw-r--r-- | fs/ext4/resize.c | 104 | ||||
-rw-r--r-- | fs/ext4/super.c | 537 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 112 | ||||
-rw-r--r-- | fs/ext4/xattr_security.c | 21 | ||||
-rw-r--r-- | fs/ext4/xattr_trusted.c | 20 | ||||
-rw-r--r-- | fs/ext4/xattr_user.c | 25 |
26 files changed, 2108 insertions, 1427 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9f2d45d75b1a..9ed1bb1f319f 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -26,6 +26,17 @@ config EXT4_FS | |||
26 | 26 | ||
27 | If unsure, say N. | 27 | If unsure, say N. |
28 | 28 | ||
29 | config EXT4_USE_FOR_EXT23 | ||
30 | bool "Use ext4 for ext2/ext3 file systems" | ||
31 | depends on EXT4_FS | ||
32 | depends on EXT3_FS=n || EXT2_FS=n | ||
33 | default y | ||
34 | help | ||
35 | Allow the ext4 file system driver code to be used for ext2 or | ||
36 | ext3 file system mounts. This allows users to reduce their | ||
37 | compiled kernel size by using one file system driver for | ||
38 | ext2, ext3, and ext4 file systems. | ||
39 | |||
29 | config EXT4_FS_XATTR | 40 | config EXT4_FS_XATTR |
30 | bool "Ext4 extended attributes" | 41 | bool "Ext4 extended attributes" |
31 | depends on EXT4_FS | 42 | depends on EXT4_FS |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 0df88b2a69b0..8a2a29d35a6f 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -364,12 +364,12 @@ out: | |||
364 | * Extended attribute handlers | 364 | * Extended attribute handlers |
365 | */ | 365 | */ |
366 | static size_t | 366 | static size_t |
367 | ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, | 367 | ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len, |
368 | const char *name, size_t name_len) | 368 | const char *name, size_t name_len, int type) |
369 | { | 369 | { |
370 | const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); | 370 | const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); |
371 | 371 | ||
372 | if (!test_opt(inode->i_sb, POSIX_ACL)) | 372 | if (!test_opt(dentry->d_sb, POSIX_ACL)) |
373 | return 0; | 373 | return 0; |
374 | if (list && size <= list_len) | 374 | if (list && size <= list_len) |
375 | memcpy(list, POSIX_ACL_XATTR_ACCESS, size); | 375 | memcpy(list, POSIX_ACL_XATTR_ACCESS, size); |
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, | |||
377 | } | 377 | } |
378 | 378 | ||
379 | static size_t | 379 | static size_t |
380 | ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, | 380 | ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len, |
381 | const char *name, size_t name_len) | 381 | const char *name, size_t name_len, int type) |
382 | { | 382 | { |
383 | const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); | 383 | const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); |
384 | 384 | ||
385 | if (!test_opt(inode->i_sb, POSIX_ACL)) | 385 | if (!test_opt(dentry->d_sb, POSIX_ACL)) |
386 | return 0; | 386 | return 0; |
387 | if (list && size <= list_len) | 387 | if (list && size <= list_len) |
388 | memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); | 388 | memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); |
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, | |||
390 | } | 390 | } |
391 | 391 | ||
392 | static int | 392 | static int |
393 | ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) | 393 | ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, |
394 | size_t size, int type) | ||
394 | { | 395 | { |
395 | struct posix_acl *acl; | 396 | struct posix_acl *acl; |
396 | int error; | 397 | int error; |
397 | 398 | ||
398 | if (!test_opt(inode->i_sb, POSIX_ACL)) | 399 | if (strcmp(name, "") != 0) |
400 | return -EINVAL; | ||
401 | if (!test_opt(dentry->d_sb, POSIX_ACL)) | ||
399 | return -EOPNOTSUPP; | 402 | return -EOPNOTSUPP; |
400 | 403 | ||
401 | acl = ext4_get_acl(inode, type); | 404 | acl = ext4_get_acl(dentry->d_inode, type); |
402 | if (IS_ERR(acl)) | 405 | if (IS_ERR(acl)) |
403 | return PTR_ERR(acl); | 406 | return PTR_ERR(acl); |
404 | if (acl == NULL) | 407 | if (acl == NULL) |
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) | |||
410 | } | 413 | } |
411 | 414 | ||
412 | static int | 415 | static int |
413 | ext4_xattr_get_acl_access(struct inode *inode, const char *name, | 416 | ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, |
414 | void *buffer, size_t size) | 417 | size_t size, int flags, int type) |
415 | { | ||
416 | if (strcmp(name, "") != 0) | ||
417 | return -EINVAL; | ||
418 | return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); | ||
419 | } | ||
420 | |||
421 | static int | ||
422 | ext4_xattr_get_acl_default(struct inode *inode, const char *name, | ||
423 | void *buffer, size_t size) | ||
424 | { | ||
425 | if (strcmp(name, "") != 0) | ||
426 | return -EINVAL; | ||
427 | return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); | ||
428 | } | ||
429 | |||
430 | static int | ||
431 | ext4_xattr_set_acl(struct inode *inode, int type, const void *value, | ||
432 | size_t size) | ||
433 | { | 418 | { |
419 | struct inode *inode = dentry->d_inode; | ||
434 | handle_t *handle; | 420 | handle_t *handle; |
435 | struct posix_acl *acl; | 421 | struct posix_acl *acl; |
436 | int error, retries = 0; | 422 | int error, retries = 0; |
437 | 423 | ||
424 | if (strcmp(name, "") != 0) | ||
425 | return -EINVAL; | ||
438 | if (!test_opt(inode->i_sb, POSIX_ACL)) | 426 | if (!test_opt(inode->i_sb, POSIX_ACL)) |
439 | return -EOPNOTSUPP; | 427 | return -EOPNOTSUPP; |
440 | if (!is_owner_or_cap(inode)) | 428 | if (!is_owner_or_cap(inode)) |
@@ -466,34 +454,18 @@ release_and_out: | |||
466 | return error; | 454 | return error; |
467 | } | 455 | } |
468 | 456 | ||
469 | static int | ||
470 | ext4_xattr_set_acl_access(struct inode *inode, const char *name, | ||
471 | const void *value, size_t size, int flags) | ||
472 | { | ||
473 | if (strcmp(name, "") != 0) | ||
474 | return -EINVAL; | ||
475 | return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); | ||
476 | } | ||
477 | |||
478 | static int | ||
479 | ext4_xattr_set_acl_default(struct inode *inode, const char *name, | ||
480 | const void *value, size_t size, int flags) | ||
481 | { | ||
482 | if (strcmp(name, "") != 0) | ||
483 | return -EINVAL; | ||
484 | return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); | ||
485 | } | ||
486 | |||
487 | struct xattr_handler ext4_xattr_acl_access_handler = { | 457 | struct xattr_handler ext4_xattr_acl_access_handler = { |
488 | .prefix = POSIX_ACL_XATTR_ACCESS, | 458 | .prefix = POSIX_ACL_XATTR_ACCESS, |
459 | .flags = ACL_TYPE_ACCESS, | ||
489 | .list = ext4_xattr_list_acl_access, | 460 | .list = ext4_xattr_list_acl_access, |
490 | .get = ext4_xattr_get_acl_access, | 461 | .get = ext4_xattr_get_acl, |
491 | .set = ext4_xattr_set_acl_access, | 462 | .set = ext4_xattr_set_acl, |
492 | }; | 463 | }; |
493 | 464 | ||
494 | struct xattr_handler ext4_xattr_acl_default_handler = { | 465 | struct xattr_handler ext4_xattr_acl_default_handler = { |
495 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 466 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
467 | .flags = ACL_TYPE_DEFAULT, | ||
496 | .list = ext4_xattr_list_acl_default, | 468 | .list = ext4_xattr_list_acl_default, |
497 | .get = ext4_xattr_get_acl_default, | 469 | .get = ext4_xattr_get_acl, |
498 | .set = ext4_xattr_set_acl_default, | 470 | .set = ext4_xattr_set_acl, |
499 | }; | 471 | }; |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1d0418980f8d..d2f37a5516c7 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -97,8 +97,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
97 | /* If checksum is bad mark all blocks used to prevent allocation | 97 | /* If checksum is bad mark all blocks used to prevent allocation |
98 | * essentially implementing a per-group read-only flag. */ | 98 | * essentially implementing a per-group read-only flag. */ |
99 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 99 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { |
100 | ext4_error(sb, __func__, | 100 | ext4_error(sb, "Checksum bad for group %u", |
101 | "Checksum bad for group %u", block_group); | 101 | block_group); |
102 | ext4_free_blks_set(sb, gdp, 0); | 102 | ext4_free_blks_set(sb, gdp, 0); |
103 | ext4_free_inodes_set(sb, gdp, 0); | 103 | ext4_free_inodes_set(sb, gdp, 0); |
104 | ext4_itable_unused_set(sb, gdp, 0); | 104 | ext4_itable_unused_set(sb, gdp, 0); |
@@ -130,8 +130,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
130 | * to make sure we calculate the right free blocks | 130 | * to make sure we calculate the right free blocks |
131 | */ | 131 | */ |
132 | group_blocks = ext4_blocks_count(sbi->s_es) - | 132 | group_blocks = ext4_blocks_count(sbi->s_es) - |
133 | le32_to_cpu(sbi->s_es->s_first_data_block) - | 133 | ext4_group_first_block_no(sb, ngroups - 1); |
134 | (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1)); | ||
135 | } else { | 134 | } else { |
136 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 135 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); |
137 | } | 136 | } |
@@ -189,9 +188,6 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
189 | * when a file system is mounted (see ext4_fill_super). | 188 | * when a file system is mounted (see ext4_fill_super). |
190 | */ | 189 | */ |
191 | 190 | ||
192 | |||
193 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | ||
194 | |||
195 | /** | 191 | /** |
196 | * ext4_get_group_desc() -- load group descriptor from disk | 192 | * ext4_get_group_desc() -- load group descriptor from disk |
197 | * @sb: super block | 193 | * @sb: super block |
@@ -210,10 +206,8 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, | |||
210 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 206 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
211 | 207 | ||
212 | if (block_group >= ngroups) { | 208 | if (block_group >= ngroups) { |
213 | ext4_error(sb, "ext4_get_group_desc", | 209 | ext4_error(sb, "block_group >= groups_count - block_group = %u," |
214 | "block_group >= groups_count - " | 210 | " groups_count = %u", block_group, ngroups); |
215 | "block_group = %u, groups_count = %u", | ||
216 | block_group, ngroups); | ||
217 | 211 | ||
218 | return NULL; | 212 | return NULL; |
219 | } | 213 | } |
@@ -221,8 +215,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, | |||
221 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); | 215 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); |
222 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 216 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
223 | if (!sbi->s_group_desc[group_desc]) { | 217 | if (!sbi->s_group_desc[group_desc]) { |
224 | ext4_error(sb, "ext4_get_group_desc", | 218 | ext4_error(sb, "Group descriptor not loaded - " |
225 | "Group descriptor not loaded - " | ||
226 | "block_group = %u, group_desc = %u, desc = %u", | 219 | "block_group = %u, group_desc = %u, desc = %u", |
227 | block_group, group_desc, offset); | 220 | block_group, group_desc, offset); |
228 | return NULL; | 221 | return NULL; |
@@ -282,9 +275,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb, | |||
282 | return 1; | 275 | return 1; |
283 | 276 | ||
284 | err_out: | 277 | err_out: |
285 | ext4_error(sb, __func__, | 278 | ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu", |
286 | "Invalid block bitmap - " | ||
287 | "block_group = %d, block = %llu", | ||
288 | block_group, bitmap_blk); | 279 | block_group, bitmap_blk); |
289 | return 0; | 280 | return 0; |
290 | } | 281 | } |
@@ -311,8 +302,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
311 | bitmap_blk = ext4_block_bitmap(sb, desc); | 302 | bitmap_blk = ext4_block_bitmap(sb, desc); |
312 | bh = sb_getblk(sb, bitmap_blk); | 303 | bh = sb_getblk(sb, bitmap_blk); |
313 | if (unlikely(!bh)) { | 304 | if (unlikely(!bh)) { |
314 | ext4_error(sb, __func__, | 305 | ext4_error(sb, "Cannot read block bitmap - " |
315 | "Cannot read block bitmap - " | ||
316 | "block_group = %u, block_bitmap = %llu", | 306 | "block_group = %u, block_bitmap = %llu", |
317 | block_group, bitmap_blk); | 307 | block_group, bitmap_blk); |
318 | return NULL; | 308 | return NULL; |
@@ -354,8 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
354 | set_bitmap_uptodate(bh); | 344 | set_bitmap_uptodate(bh); |
355 | if (bh_submit_read(bh) < 0) { | 345 | if (bh_submit_read(bh) < 0) { |
356 | put_bh(bh); | 346 | put_bh(bh); |
357 | ext4_error(sb, __func__, | 347 | ext4_error(sb, "Cannot read block bitmap - " |
358 | "Cannot read block bitmap - " | ||
359 | "block_group = %u, block_bitmap = %llu", | 348 | "block_group = %u, block_bitmap = %llu", |
360 | block_group, bitmap_blk); | 349 | block_group, bitmap_blk); |
361 | return NULL; | 350 | return NULL; |
@@ -419,8 +408,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
419 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | 408 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
420 | in_range(block + count - 1, ext4_inode_table(sb, desc), | 409 | in_range(block + count - 1, ext4_inode_table(sb, desc), |
421 | sbi->s_itb_per_group)) { | 410 | sbi->s_itb_per_group)) { |
422 | ext4_error(sb, __func__, | 411 | ext4_error(sb, "Adding blocks in system zones - " |
423 | "Adding blocks in system zones - " | ||
424 | "Block = %llu, count = %lu", | 412 | "Block = %llu, count = %lu", |
425 | block, count); | 413 | block, count); |
426 | goto error_return; | 414 | goto error_return; |
@@ -453,8 +441,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
453 | BUFFER_TRACE(bitmap_bh, "clear bit"); | 441 | BUFFER_TRACE(bitmap_bh, "clear bit"); |
454 | if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), | 442 | if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), |
455 | bit + i, bitmap_bh->b_data)) { | 443 | bit + i, bitmap_bh->b_data)) { |
456 | ext4_error(sb, __func__, | 444 | ext4_error(sb, "bit already cleared for block %llu", |
457 | "bit already cleared for block %llu", | ||
458 | (ext4_fsblk_t)(block + i)); | 445 | (ext4_fsblk_t)(block + i)); |
459 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); | 446 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); |
460 | } else { | 447 | } else { |
@@ -499,44 +486,6 @@ error_return: | |||
499 | } | 486 | } |
500 | 487 | ||
501 | /** | 488 | /** |
502 | * ext4_free_blocks() -- Free given blocks and update quota | ||
503 | * @handle: handle for this transaction | ||
504 | * @inode: inode | ||
505 | * @block: start physical block to free | ||
506 | * @count: number of blocks to count | ||
507 | * @metadata: Are these metadata blocks | ||
508 | */ | ||
509 | void ext4_free_blocks(handle_t *handle, struct inode *inode, | ||
510 | ext4_fsblk_t block, unsigned long count, | ||
511 | int metadata) | ||
512 | { | ||
513 | struct super_block *sb; | ||
514 | unsigned long dquot_freed_blocks; | ||
515 | |||
516 | /* this isn't the right place to decide whether block is metadata | ||
517 | * inode.c/extents.c knows better, but for safety ... */ | ||
518 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
519 | metadata = 1; | ||
520 | |||
521 | /* We need to make sure we don't reuse | ||
522 | * block released untill the transaction commit. | ||
523 | * writeback mode have weak data consistency so | ||
524 | * don't force data as metadata when freeing block | ||
525 | * for writeback mode. | ||
526 | */ | ||
527 | if (metadata == 0 && !ext4_should_writeback_data(inode)) | ||
528 | metadata = 1; | ||
529 | |||
530 | sb = inode->i_sb; | ||
531 | |||
532 | ext4_mb_free_blocks(handle, inode, block, count, | ||
533 | metadata, &dquot_freed_blocks); | ||
534 | if (dquot_freed_blocks) | ||
535 | vfs_dq_free_block(inode, dquot_freed_blocks); | ||
536 | return; | ||
537 | } | ||
538 | |||
539 | /** | ||
540 | * ext4_has_free_blocks() | 489 | * ext4_has_free_blocks() |
541 | * @sbi: in-core super block structure. | 490 | * @sbi: in-core super block structure. |
542 | * @nblocks: number of needed blocks | 491 | * @nblocks: number of needed blocks |
@@ -761,7 +710,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, | |||
761 | static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, | 710 | static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, |
762 | ext4_group_t group) | 711 | ext4_group_t group) |
763 | { | 712 | { |
764 | return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; | 713 | if (!ext4_bg_has_super(sb, group)) |
714 | return 0; | ||
715 | |||
716 | if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG)) | ||
717 | return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); | ||
718 | else | ||
719 | return EXT4_SB(sb)->s_gdb_count; | ||
765 | } | 720 | } |
766 | 721 | ||
767 | /** | 722 | /** |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 50784ef07563..538c48655084 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -16,9 +16,9 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/swap.h> | 17 | #include <linux/swap.h> |
18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
19 | #include <linux/version.h> | ||
20 | #include <linux/blkdev.h> | 19 | #include <linux/blkdev.h> |
21 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
21 | #include <linux/slab.h> | ||
22 | #include "ext4.h" | 22 | #include "ext4.h" |
23 | 23 | ||
24 | struct ext4_system_zone { | 24 | struct ext4_system_zone { |
@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb) | |||
160 | if (ext4_bg_has_super(sb, i) && | 160 | if (ext4_bg_has_super(sb, i) && |
161 | ((i < 5) || ((i % flex_size) == 0))) | 161 | ((i < 5) || ((i % flex_size) == 0))) |
162 | add_system_zone(sbi, ext4_group_first_block_no(sb, i), | 162 | add_system_zone(sbi, ext4_group_first_block_no(sb, i), |
163 | sbi->s_gdb_count + 1); | 163 | ext4_bg_num_gdb(sb, i) + 1); |
164 | gdp = ext4_get_group_desc(sb, i, NULL); | 164 | gdp = ext4_get_group_desc(sb, i, NULL); |
165 | ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); | 165 | ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); |
166 | if (ret) | 166 | if (ret) |
@@ -206,14 +206,14 @@ void ext4_release_system_zone(struct super_block *sb) | |||
206 | entry = rb_entry(n, struct ext4_system_zone, node); | 206 | entry = rb_entry(n, struct ext4_system_zone, node); |
207 | kmem_cache_free(ext4_system_zone_cachep, entry); | 207 | kmem_cache_free(ext4_system_zone_cachep, entry); |
208 | if (!parent) | 208 | if (!parent) |
209 | EXT4_SB(sb)->system_blks.rb_node = NULL; | 209 | EXT4_SB(sb)->system_blks = RB_ROOT; |
210 | else if (parent->rb_left == n) | 210 | else if (parent->rb_left == n) |
211 | parent->rb_left = NULL; | 211 | parent->rb_left = NULL; |
212 | else if (parent->rb_right == n) | 212 | else if (parent->rb_right == n) |
213 | parent->rb_right = NULL; | 213 | parent->rb_right = NULL; |
214 | n = parent; | 214 | n = parent; |
215 | } | 215 | } |
216 | EXT4_SB(sb)->system_blks.rb_node = NULL; | 216 | EXT4_SB(sb)->system_blks = RB_ROOT; |
217 | } | 217 | } |
218 | 218 | ||
219 | /* | 219 | /* |
@@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, | |||
228 | struct rb_node *n = sbi->system_blks.rb_node; | 228 | struct rb_node *n = sbi->system_blks.rb_node; |
229 | 229 | ||
230 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || | 230 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || |
231 | (start_blk + count < start_blk) || | ||
231 | (start_blk + count > ext4_blocks_count(sbi->s_es))) | 232 | (start_blk + count > ext4_blocks_count(sbi->s_es))) |
232 | return 0; | 233 | return 0; |
233 | while (n) { | 234 | while (n) { |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 9dc93168e262..86cb6d86a048 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -83,10 +83,12 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, | |||
83 | error_msg = "inode out of bounds"; | 83 | error_msg = "inode out of bounds"; |
84 | 84 | ||
85 | if (error_msg != NULL) | 85 | if (error_msg != NULL) |
86 | ext4_error(dir->i_sb, function, | 86 | __ext4_error(dir->i_sb, function, |
87 | "bad entry in directory #%lu: %s - " | 87 | "bad entry in directory #%lu: %s - block=%llu" |
88 | "offset=%u, inode=%u, rec_len=%d, name_len=%d", | 88 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", |
89 | dir->i_ino, error_msg, offset, | 89 | dir->i_ino, error_msg, |
90 | (unsigned long long) bh->b_blocknr, | ||
91 | (unsigned) (offset%bh->b_size), offset, | ||
90 | le32_to_cpu(de->inode), | 92 | le32_to_cpu(de->inode), |
91 | rlen, de->name_len); | 93 | rlen, de->name_len); |
92 | return error_msg == NULL ? 1 : 0; | 94 | return error_msg == NULL ? 1 : 0; |
@@ -150,7 +152,7 @@ static int ext4_readdir(struct file *filp, | |||
150 | */ | 152 | */ |
151 | if (!bh) { | 153 | if (!bh) { |
152 | if (!dir_has_error) { | 154 | if (!dir_has_error) { |
153 | ext4_error(sb, __func__, "directory #%lu " | 155 | ext4_error(sb, "directory #%lu " |
154 | "contains a hole at offset %Lu", | 156 | "contains a hole at offset %Lu", |
155 | inode->i_ino, | 157 | inode->i_ino, |
156 | (unsigned long long) filp->f_pos); | 158 | (unsigned long long) filp->f_pos); |
@@ -303,7 +305,7 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
303 | kfree(old); | 305 | kfree(old); |
304 | } | 306 | } |
305 | if (!parent) | 307 | if (!parent) |
306 | root->rb_node = NULL; | 308 | *root = RB_ROOT; |
307 | else if (parent->rb_left == n) | 309 | else if (parent->rb_left == n) |
308 | parent->rb_left = NULL; | 310 | parent->rb_left = NULL; |
309 | else if (parent->rb_right == n) | 311 | else if (parent->rb_right == n) |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8825515eeddd..bf938cf7c5f0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -53,6 +53,12 @@ | |||
53 | #define ext4_debug(f, a...) do {} while (0) | 53 | #define ext4_debug(f, a...) do {} while (0) |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ | ||
57 | ext4_error_inode(__func__, (inode), (fmt), ## a); | ||
58 | |||
59 | #define EXT4_ERROR_FILE(file, fmt, a...) \ | ||
60 | ext4_error_file(__func__, (file), (fmt), ## a); | ||
61 | |||
56 | /* data type for block offset of block group */ | 62 | /* data type for block offset of block group */ |
57 | typedef int ext4_grpblk_t; | 63 | typedef int ext4_grpblk_t; |
58 | 64 | ||
@@ -133,14 +139,14 @@ struct mpage_da_data { | |||
133 | int pages_written; | 139 | int pages_written; |
134 | int retval; | 140 | int retval; |
135 | }; | 141 | }; |
136 | #define DIO_AIO_UNWRITTEN 0x1 | 142 | #define EXT4_IO_UNWRITTEN 0x1 |
137 | typedef struct ext4_io_end { | 143 | typedef struct ext4_io_end { |
138 | struct list_head list; /* per-file finished AIO list */ | 144 | struct list_head list; /* per-file finished AIO list */ |
139 | struct inode *inode; /* file being written to */ | 145 | struct inode *inode; /* file being written to */ |
140 | unsigned int flag; /* unwritten or not */ | 146 | unsigned int flag; /* unwritten or not */ |
141 | int error; /* I/O error code */ | 147 | struct page *page; /* page struct for buffer write */ |
142 | ext4_lblk_t offset; /* offset in the file */ | 148 | loff_t offset; /* offset in the file */ |
143 | size_t size; /* size of the extent */ | 149 | ssize_t size; /* size of the extent */ |
144 | struct work_struct work; /* data work queue */ | 150 | struct work_struct work; /* data work queue */ |
145 | } ext4_io_end_t; | 151 | } ext4_io_end_t; |
146 | 152 | ||
@@ -284,10 +290,12 @@ struct flex_groups { | |||
284 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 290 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
285 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ | 291 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ |
286 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ | 292 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ |
293 | #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ | ||
294 | #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ | ||
287 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 295 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
288 | 296 | ||
289 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 297 | #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ |
290 | #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ | 298 | #define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */ |
291 | 299 | ||
292 | /* Flags that should be inherited by new inodes from their parent. */ | 300 | /* Flags that should be inherited by new inodes from their parent. */ |
293 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ | 301 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ |
@@ -313,17 +321,6 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | |||
313 | return flags & EXT4_OTHER_FLMASK; | 321 | return flags & EXT4_OTHER_FLMASK; |
314 | } | 322 | } |
315 | 323 | ||
316 | /* | ||
317 | * Inode dynamic state flags | ||
318 | */ | ||
319 | #define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ | ||
320 | #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ | ||
321 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ | ||
322 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ | ||
323 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ | ||
324 | #define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ | ||
325 | #define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/ | ||
326 | |||
327 | /* Used to pass group descriptor data when online resize is done */ | 324 | /* Used to pass group descriptor data when online resize is done */ |
328 | struct ext4_new_group_input { | 325 | struct ext4_new_group_input { |
329 | __u32 group; /* Group number for this data */ | 326 | __u32 group; /* Group number for this data */ |
@@ -361,19 +358,23 @@ struct ext4_new_group_data { | |||
361 | so set the magic i_delalloc_reserve_flag after taking the | 358 | so set the magic i_delalloc_reserve_flag after taking the |
362 | inode allocation semaphore for */ | 359 | inode allocation semaphore for */ |
363 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 | 360 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 |
364 | /* Call ext4_da_update_reserve_space() after successfully | ||
365 | allocating the blocks */ | ||
366 | #define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 | ||
367 | /* caller is from the direct IO path, request to creation of an | 361 | /* caller is from the direct IO path, request to creation of an |
368 | unitialized extents if not allocated, split the uninitialized | 362 | unitialized extents if not allocated, split the uninitialized |
369 | extent if blocks has been preallocated already*/ | 363 | extent if blocks has been preallocated already*/ |
370 | #define EXT4_GET_BLOCKS_DIO 0x0010 | 364 | #define EXT4_GET_BLOCKS_PRE_IO 0x0008 |
371 | #define EXT4_GET_BLOCKS_CONVERT 0x0020 | 365 | #define EXT4_GET_BLOCKS_CONVERT 0x0010 |
372 | #define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ | 366 | #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ |
367 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) | ||
368 | /* Convert extent to initialized after IO complete */ | ||
369 | #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ | ||
373 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) | 370 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) |
374 | /* Convert extent to initialized after direct IO complete */ | 371 | |
375 | #define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ | 372 | /* |
376 | EXT4_GET_BLOCKS_DIO_CREATE_EXT) | 373 | * Flags used by ext4_free_blocks |
374 | */ | ||
375 | #define EXT4_FREE_BLOCKS_METADATA 0x0001 | ||
376 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 | ||
377 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 | ||
377 | 378 | ||
378 | /* | 379 | /* |
379 | * ioctl commands | 380 | * ioctl commands |
@@ -627,7 +628,7 @@ struct ext4_inode_info { | |||
627 | * near to their parent directory's inode. | 628 | * near to their parent directory's inode. |
628 | */ | 629 | */ |
629 | ext4_group_t i_block_group; | 630 | ext4_group_t i_block_group; |
630 | __u32 i_state; /* Dynamic state flags for ext4 */ | 631 | unsigned long i_state_flags; /* Dynamic state flags */ |
631 | 632 | ||
632 | ext4_lblk_t i_dir_start_lookup; | 633 | ext4_lblk_t i_dir_start_lookup; |
633 | #ifdef CONFIG_EXT4_FS_XATTR | 634 | #ifdef CONFIG_EXT4_FS_XATTR |
@@ -693,16 +694,30 @@ struct ext4_inode_info { | |||
693 | unsigned int i_reserved_meta_blocks; | 694 | unsigned int i_reserved_meta_blocks; |
694 | unsigned int i_allocated_meta_blocks; | 695 | unsigned int i_allocated_meta_blocks; |
695 | unsigned short i_delalloc_reserved_flag; | 696 | unsigned short i_delalloc_reserved_flag; |
697 | sector_t i_da_metadata_calc_last_lblock; | ||
698 | int i_da_metadata_calc_len; | ||
696 | 699 | ||
697 | /* on-disk additional length */ | 700 | /* on-disk additional length */ |
698 | __u16 i_extra_isize; | 701 | __u16 i_extra_isize; |
699 | 702 | ||
700 | spinlock_t i_block_reservation_lock; | 703 | spinlock_t i_block_reservation_lock; |
704 | #ifdef CONFIG_QUOTA | ||
705 | /* quota space reservation, managed internally by quota code */ | ||
706 | qsize_t i_reserved_quota; | ||
707 | #endif | ||
701 | 708 | ||
702 | /* completed async DIOs that might need unwritten extents handling */ | 709 | /* completed IOs that might need unwritten extents handling */ |
703 | struct list_head i_aio_dio_complete_list; | 710 | struct list_head i_completed_io_list; |
711 | spinlock_t i_completed_io_lock; | ||
704 | /* current io_end structure for async DIO write*/ | 712 | /* current io_end structure for async DIO write*/ |
705 | ext4_io_end_t *cur_aio_dio; | 713 | ext4_io_end_t *cur_aio_dio; |
714 | |||
715 | /* | ||
716 | * Transactions that contain inode's metadata needed to complete | ||
717 | * fsync and fdatasync, respectively. | ||
718 | */ | ||
719 | tid_t i_sync_tid; | ||
720 | tid_t i_datasync_tid; | ||
706 | }; | 721 | }; |
707 | 722 | ||
708 | /* | 723 | /* |
@@ -744,12 +759,14 @@ struct ext4_inode_info { | |||
744 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ | 759 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
745 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 760 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
746 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 761 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
762 | #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ | ||
747 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 763 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
748 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 764 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
749 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 765 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
750 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 766 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
751 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 767 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
752 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 768 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
769 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | ||
753 | 770 | ||
754 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 771 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
755 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt | 772 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt |
@@ -997,7 +1014,7 @@ struct ext4_sb_info { | |||
997 | atomic_t s_lock_busy; | 1014 | atomic_t s_lock_busy; |
998 | 1015 | ||
999 | /* locality groups */ | 1016 | /* locality groups */ |
1000 | struct ext4_locality_group *s_locality_groups; | 1017 | struct ext4_locality_group __percpu *s_locality_groups; |
1001 | 1018 | ||
1002 | /* for write statistics */ | 1019 | /* for write statistics */ |
1003 | unsigned long s_sectors_written_start; | 1020 | unsigned long s_sectors_written_start; |
@@ -1033,6 +1050,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
1033 | (ino >= EXT4_FIRST_INO(sb) && | 1050 | (ino >= EXT4_FIRST_INO(sb) && |
1034 | ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); | 1051 | ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); |
1035 | } | 1052 | } |
1053 | |||
1054 | /* | ||
1055 | * Inode dynamic state flags | ||
1056 | */ | ||
1057 | enum { | ||
1058 | EXT4_STATE_JDATA, /* journaled data exists */ | ||
1059 | EXT4_STATE_NEW, /* inode is newly created */ | ||
1060 | EXT4_STATE_XATTR, /* has in-inode xattrs */ | ||
1061 | EXT4_STATE_NO_EXPAND, /* No space for expansion */ | ||
1062 | EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ | ||
1063 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ | ||
1064 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | ||
1065 | }; | ||
1066 | |||
1067 | static inline int ext4_test_inode_state(struct inode *inode, int bit) | ||
1068 | { | ||
1069 | return test_bit(bit, &EXT4_I(inode)->i_state_flags); | ||
1070 | } | ||
1071 | |||
1072 | static inline void ext4_set_inode_state(struct inode *inode, int bit) | ||
1073 | { | ||
1074 | set_bit(bit, &EXT4_I(inode)->i_state_flags); | ||
1075 | } | ||
1076 | |||
1077 | static inline void ext4_clear_inode_state(struct inode *inode, int bit) | ||
1078 | { | ||
1079 | clear_bit(bit, &EXT4_I(inode)->i_state_flags); | ||
1080 | } | ||
1036 | #else | 1081 | #else |
1037 | /* Assume that user mode programs are passing in an ext4fs superblock, not | 1082 | /* Assume that user mode programs are passing in an ext4fs superblock, not |
1038 | * a kernel struct super_block. This will allow us to call the feature-test | 1083 | * a kernel struct super_block. This will allow us to call the feature-test |
@@ -1109,6 +1154,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
1109 | #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 | 1154 | #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 |
1110 | #define EXT4_FEATURE_INCOMPAT_MMP 0x0100 | 1155 | #define EXT4_FEATURE_INCOMPAT_MMP 0x0100 |
1111 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 | 1156 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 |
1157 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ | ||
1158 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ | ||
1112 | 1159 | ||
1113 | #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR | 1160 | #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR |
1114 | #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1161 | #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
@@ -1324,8 +1371,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
1324 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 1371 | ext4_fsblk_t goal, unsigned long *count, int *errp); |
1325 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1372 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
1326 | extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1373 | extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
1327 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | ||
1328 | ext4_fsblk_t block, unsigned long count, int metadata); | ||
1329 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | 1374 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
1330 | ext4_fsblk_t block, unsigned long count); | 1375 | ext4_fsblk_t block, unsigned long count); |
1331 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1376 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
@@ -1384,16 +1429,15 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int); | |||
1384 | extern void ext4_discard_preallocations(struct inode *); | 1429 | extern void ext4_discard_preallocations(struct inode *); |
1385 | extern int __init init_ext4_mballoc(void); | 1430 | extern int __init init_ext4_mballoc(void); |
1386 | extern void exit_ext4_mballoc(void); | 1431 | extern void exit_ext4_mballoc(void); |
1387 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1432 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
1388 | ext4_fsblk_t, unsigned long, int, unsigned long *); | 1433 | struct buffer_head *bh, ext4_fsblk_t block, |
1434 | unsigned long count, int flags); | ||
1389 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1435 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1390 | ext4_group_t i, struct ext4_group_desc *desc); | 1436 | ext4_group_t i, struct ext4_group_desc *desc); |
1391 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1437 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); |
1392 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1438 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, |
1393 | ext4_group_t, int); | 1439 | ext4_group_t, int); |
1394 | /* inode.c */ | 1440 | /* inode.c */ |
1395 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | ||
1396 | struct buffer_head *bh, ext4_fsblk_t blocknr); | ||
1397 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 1441 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, |
1398 | ext4_lblk_t, int, int *); | 1442 | ext4_lblk_t, int, int *); |
1399 | struct buffer_head *ext4_bread(handle_t *, struct inode *, | 1443 | struct buffer_head *ext4_bread(handle_t *, struct inode *, |
@@ -1402,7 +1446,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock, | |||
1402 | struct buffer_head *bh_result, int create); | 1446 | struct buffer_head *bh_result, int create); |
1403 | 1447 | ||
1404 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 1448 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
1405 | extern int ext4_write_inode(struct inode *, int); | 1449 | extern int ext4_write_inode(struct inode *, struct writeback_control *); |
1406 | extern int ext4_setattr(struct dentry *, struct iattr *); | 1450 | extern int ext4_setattr(struct dentry *, struct iattr *); |
1407 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1451 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1408 | struct kstat *stat); | 1452 | struct kstat *stat); |
@@ -1424,8 +1468,10 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | |||
1424 | extern int ext4_block_truncate_page(handle_t *handle, | 1468 | extern int ext4_block_truncate_page(handle_t *handle, |
1425 | struct address_space *mapping, loff_t from); | 1469 | struct address_space *mapping, loff_t from); |
1426 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1470 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1427 | extern qsize_t ext4_get_reserved_space(struct inode *inode); | 1471 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1428 | extern int flush_aio_dio_completed_IO(struct inode *inode); | 1472 | extern int flush_completed_IO(struct inode *inode); |
1473 | extern void ext4_da_update_reserve_space(struct inode *inode, | ||
1474 | int used, int quota_claim); | ||
1429 | /* ioctl.c */ | 1475 | /* ioctl.c */ |
1430 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1476 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
1431 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); | 1477 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
@@ -1449,13 +1495,20 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1449 | ext4_fsblk_t n_blocks_count); | 1495 | ext4_fsblk_t n_blocks_count); |
1450 | 1496 | ||
1451 | /* super.c */ | 1497 | /* super.c */ |
1452 | extern void ext4_error(struct super_block *, const char *, const char *, ...) | 1498 | extern void __ext4_error(struct super_block *, const char *, const char *, ...) |
1499 | __attribute__ ((format (printf, 3, 4))); | ||
1500 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) | ||
1501 | extern void ext4_error_inode(const char *, struct inode *, const char *, ...) | ||
1502 | __attribute__ ((format (printf, 3, 4))); | ||
1503 | extern void ext4_error_file(const char *, struct file *, const char *, ...) | ||
1453 | __attribute__ ((format (printf, 3, 4))); | 1504 | __attribute__ ((format (printf, 3, 4))); |
1454 | extern void __ext4_std_error(struct super_block *, const char *, int); | 1505 | extern void __ext4_std_error(struct super_block *, const char *, int); |
1455 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) | 1506 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) |
1456 | __attribute__ ((format (printf, 3, 4))); | 1507 | __attribute__ ((format (printf, 3, 4))); |
1457 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) | 1508 | extern void __ext4_warning(struct super_block *, const char *, |
1509 | const char *, ...) | ||
1458 | __attribute__ ((format (printf, 3, 4))); | 1510 | __attribute__ ((format (printf, 3, 4))); |
1511 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message) | ||
1459 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) | 1512 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) |
1460 | __attribute__ ((format (printf, 3, 4))); | 1513 | __attribute__ ((format (printf, 3, 4))); |
1461 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, | 1514 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, |
@@ -1728,7 +1781,7 @@ extern void ext4_ext_release(struct super_block *); | |||
1728 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | 1781 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, |
1729 | loff_t len); | 1782 | loff_t len); |
1730 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 1783 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, |
1731 | loff_t len); | 1784 | ssize_t len); |
1732 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, | 1785 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, |
1733 | sector_t block, unsigned int max_blocks, | 1786 | sector_t block, unsigned int max_blocks, |
1734 | struct buffer_head *bh, int flags); | 1787 | struct buffer_head *bh, int flags); |
@@ -1740,6 +1793,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1740 | __u64 len, __u64 *moved_len); | 1793 | __u64 len, __u64 *moved_len); |
1741 | 1794 | ||
1742 | 1795 | ||
1796 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | ||
1797 | enum ext4_state_bits { | ||
1798 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | ||
1799 | = BH_JBDPrivateStart, | ||
1800 | }; | ||
1801 | |||
1802 | BUFFER_FNS(Uninit, uninit) | ||
1803 | TAS_BUFFER_FNS(Uninit, uninit) | ||
1804 | |||
1743 | /* | 1805 | /* |
1744 | * Add new method to test wether block and inode bitmaps are properly | 1806 | * Add new method to test wether block and inode bitmaps are properly |
1745 | * initialized. With uninit_bg reading the block from disk is not enough | 1807 | * initialized. With uninit_bg reading the block from disk is not enough |
@@ -1757,6 +1819,8 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) | |||
1757 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); | 1819 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); |
1758 | } | 1820 | } |
1759 | 1821 | ||
1822 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | ||
1823 | |||
1760 | #endif /* __KERNEL__ */ | 1824 | #endif /* __KERNEL__ */ |
1761 | 1825 | ||
1762 | #endif /* _EXT4_H */ | 1826 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 2ca686454e87..bdb6ce7e2eb4 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) | |||
225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); | 225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); |
226 | } | 226 | } |
227 | 227 | ||
228 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | 228 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
229 | sector_t lblocks); | ||
229 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); | 230 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); |
230 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 231 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
231 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 232 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 6a9409920dee..53d2764d71ca 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -4,6 +4,8 @@ | |||
4 | 4 | ||
5 | #include "ext4_jbd2.h" | 5 | #include "ext4_jbd2.h" |
6 | 6 | ||
7 | #include <trace/events/ext4.h> | ||
8 | |||
7 | int __ext4_journal_get_undo_access(const char *where, handle_t *handle, | 9 | int __ext4_journal_get_undo_access(const char *where, handle_t *handle, |
8 | struct buffer_head *bh) | 10 | struct buffer_head *bh) |
9 | { | 11 | { |
@@ -32,35 +34,69 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle, | |||
32 | return err; | 34 | return err; |
33 | } | 35 | } |
34 | 36 | ||
35 | int __ext4_journal_forget(const char *where, handle_t *handle, | 37 | /* |
36 | struct buffer_head *bh) | 38 | * The ext4 forget function must perform a revoke if we are freeing data |
39 | * which has been journaled. Metadata (eg. indirect blocks) must be | ||
40 | * revoked in all cases. | ||
41 | * | ||
42 | * "bh" may be NULL: a metadata block may have been freed from memory | ||
43 | * but there may still be a record of it in the journal, and that record | ||
44 | * still needs to be revoked. | ||
45 | * | ||
46 | * If the handle isn't valid we're not journaling, but we still need to | ||
47 | * call into ext4_journal_revoke() to put the buffer head. | ||
48 | */ | ||
49 | int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | ||
50 | struct inode *inode, struct buffer_head *bh, | ||
51 | ext4_fsblk_t blocknr) | ||
37 | { | 52 | { |
38 | int err = 0; | 53 | int err; |
39 | 54 | ||
40 | if (ext4_handle_valid(handle)) { | 55 | might_sleep(); |
41 | err = jbd2_journal_forget(handle, bh); | 56 | |
42 | if (err) | 57 | trace_ext4_forget(inode, is_metadata, blocknr); |
43 | ext4_journal_abort_handle(where, __func__, bh, | 58 | BUFFER_TRACE(bh, "enter"); |
44 | handle, err); | 59 | |
45 | } | 60 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " |
46 | else | 61 | "data mode %x\n", |
62 | bh, is_metadata, inode->i_mode, | ||
63 | test_opt(inode->i_sb, DATA_FLAGS)); | ||
64 | |||
65 | /* In the no journal case, we can just do a bforget and return */ | ||
66 | if (!ext4_handle_valid(handle)) { | ||
47 | bforget(bh); | 67 | bforget(bh); |
48 | return err; | 68 | return 0; |
49 | } | 69 | } |
50 | 70 | ||
51 | int __ext4_journal_revoke(const char *where, handle_t *handle, | 71 | /* Never use the revoke function if we are doing full data |
52 | ext4_fsblk_t blocknr, struct buffer_head *bh) | 72 | * journaling: there is no need to, and a V1 superblock won't |
53 | { | 73 | * support it. Otherwise, only skip the revoke on un-journaled |
54 | int err = 0; | 74 | * data blocks. */ |
55 | 75 | ||
56 | if (ext4_handle_valid(handle)) { | 76 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || |
57 | err = jbd2_journal_revoke(handle, blocknr, bh); | 77 | (!is_metadata && !ext4_should_journal_data(inode))) { |
58 | if (err) | 78 | if (bh) { |
59 | ext4_journal_abort_handle(where, __func__, bh, | 79 | BUFFER_TRACE(bh, "call jbd2_journal_forget"); |
60 | handle, err); | 80 | err = jbd2_journal_forget(handle, bh); |
81 | if (err) | ||
82 | ext4_journal_abort_handle(where, __func__, bh, | ||
83 | handle, err); | ||
84 | return err; | ||
85 | } | ||
86 | return 0; | ||
61 | } | 87 | } |
62 | else | 88 | |
63 | bforget(bh); | 89 | /* |
90 | * data!=journal && (is_metadata || should_journal_data(inode)) | ||
91 | */ | ||
92 | BUFFER_TRACE(bh, "call jbd2_journal_revoke"); | ||
93 | err = jbd2_journal_revoke(handle, blocknr, bh); | ||
94 | if (err) { | ||
95 | ext4_journal_abort_handle(where, __func__, bh, handle, err); | ||
96 | ext4_abort(inode->i_sb, __func__, | ||
97 | "error %d when attempting revoke", err); | ||
98 | } | ||
99 | BUFFER_TRACE(bh, "exit"); | ||
64 | return err; | 100 | return err; |
65 | } | 101 | } |
66 | 102 | ||
@@ -89,14 +125,14 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | |||
89 | ext4_journal_abort_handle(where, __func__, bh, | 125 | ext4_journal_abort_handle(where, __func__, bh, |
90 | handle, err); | 126 | handle, err); |
91 | } else { | 127 | } else { |
92 | if (inode && bh) | 128 | if (inode) |
93 | mark_buffer_dirty_inode(bh, inode); | 129 | mark_buffer_dirty_inode(bh, inode); |
94 | else | 130 | else |
95 | mark_buffer_dirty(bh); | 131 | mark_buffer_dirty(bh); |
96 | if (inode && inode_needs_sync(inode)) { | 132 | if (inode && inode_needs_sync(inode)) { |
97 | sync_dirty_buffer(bh); | 133 | sync_dirty_buffer(bh); |
98 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | 134 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
99 | ext4_error(inode->i_sb, __func__, | 135 | ext4_error(inode->i_sb, |
100 | "IO error syncing inode, " | 136 | "IO error syncing inode, " |
101 | "inode=%lu, block=%llu", | 137 | "inode=%lu, block=%llu", |
102 | inode->i_ino, | 138 | inode->i_ino, |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a2865980342f..b79ad5126468 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -49,7 +49,7 @@ | |||
49 | 49 | ||
50 | #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ | 50 | #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ |
51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ | 51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ |
52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 52 | EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * Define the number of metadata blocks we need to account to modify data. | 55 | * Define the number of metadata blocks we need to account to modify data. |
@@ -57,7 +57,7 @@ | |||
57 | * This include super block, inode block, quota blocks and xattr blocks | 57 | * This include super block, inode block, quota blocks and xattr blocks |
58 | */ | 58 | */ |
59 | #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ | 59 | #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ |
60 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 60 | EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) |
61 | 61 | ||
62 | /* Delete operations potentially hit one directory's namespace plus an | 62 | /* Delete operations potentially hit one directory's namespace plus an |
63 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be | 63 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be |
@@ -92,6 +92,7 @@ | |||
92 | * but inode, sb and group updates are done only once */ | 92 | * but inode, sb and group updates are done only once */ |
93 | #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ | 93 | #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ |
94 | (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) | 94 | (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) |
95 | |||
95 | #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ | 96 | #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ |
96 | (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) | 97 | (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) |
97 | #else | 98 | #else |
@@ -99,6 +100,9 @@ | |||
99 | #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 | 100 | #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 |
100 | #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 | 101 | #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 |
101 | #endif | 102 | #endif |
103 | #define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) | ||
104 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) | ||
105 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) | ||
102 | 106 | ||
103 | int | 107 | int |
104 | ext4_mark_iloc_dirty(handle_t *handle, | 108 | ext4_mark_iloc_dirty(handle_t *handle, |
@@ -116,12 +120,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, | |||
116 | int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); | 120 | int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); |
117 | 121 | ||
118 | /* | 122 | /* |
119 | * Wrapper functions with which ext4 calls into JBD. The intent here is | 123 | * Wrapper functions with which ext4 calls into JBD. |
120 | * to allow these to be turned into appropriate stubs so ext4 can control | ||
121 | * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't | ||
122 | * been done yet. | ||
123 | */ | 124 | */ |
124 | |||
125 | void ext4_journal_abort_handle(const char *caller, const char *err_fn, | 125 | void ext4_journal_abort_handle(const char *caller, const char *err_fn, |
126 | struct buffer_head *bh, handle_t *handle, int err); | 126 | struct buffer_head *bh, handle_t *handle, int err); |
127 | 127 | ||
@@ -131,13 +131,9 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle, | |||
131 | int __ext4_journal_get_write_access(const char *where, handle_t *handle, | 131 | int __ext4_journal_get_write_access(const char *where, handle_t *handle, |
132 | struct buffer_head *bh); | 132 | struct buffer_head *bh); |
133 | 133 | ||
134 | /* When called with an invalid handle, this will still do a put on the BH */ | 134 | int __ext4_forget(const char *where, handle_t *handle, int is_metadata, |
135 | int __ext4_journal_forget(const char *where, handle_t *handle, | 135 | struct inode *inode, struct buffer_head *bh, |
136 | struct buffer_head *bh); | 136 | ext4_fsblk_t blocknr); |
137 | |||
138 | /* When called with an invalid handle, this will still do a put on the BH */ | ||
139 | int __ext4_journal_revoke(const char *where, handle_t *handle, | ||
140 | ext4_fsblk_t blocknr, struct buffer_head *bh); | ||
141 | 137 | ||
142 | int __ext4_journal_get_create_access(const char *where, | 138 | int __ext4_journal_get_create_access(const char *where, |
143 | handle_t *handle, struct buffer_head *bh); | 139 | handle_t *handle, struct buffer_head *bh); |
@@ -149,12 +145,11 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | |||
149 | __ext4_journal_get_undo_access(__func__, (handle), (bh)) | 145 | __ext4_journal_get_undo_access(__func__, (handle), (bh)) |
150 | #define ext4_journal_get_write_access(handle, bh) \ | 146 | #define ext4_journal_get_write_access(handle, bh) \ |
151 | __ext4_journal_get_write_access(__func__, (handle), (bh)) | 147 | __ext4_journal_get_write_access(__func__, (handle), (bh)) |
152 | #define ext4_journal_revoke(handle, blocknr, bh) \ | 148 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ |
153 | __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) | 149 | __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\ |
150 | (block_nr)) | ||
154 | #define ext4_journal_get_create_access(handle, bh) \ | 151 | #define ext4_journal_get_create_access(handle, bh) \ |
155 | __ext4_journal_get_create_access(__func__, (handle), (bh)) | 152 | __ext4_journal_get_create_access(__func__, (handle), (bh)) |
156 | #define ext4_journal_forget(handle, bh) \ | ||
157 | __ext4_journal_forget(__func__, (handle), (bh)) | ||
158 | #define ext4_handle_dirty_metadata(handle, inode, bh) \ | 153 | #define ext4_handle_dirty_metadata(handle, inode, bh) \ |
159 | __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) | 154 | __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) |
160 | 155 | ||
@@ -254,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) | |||
254 | return 0; | 249 | return 0; |
255 | } | 250 | } |
256 | 251 | ||
252 | static inline void ext4_update_inode_fsync_trans(handle_t *handle, | ||
253 | struct inode *inode, | ||
254 | int datasync) | ||
255 | { | ||
256 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
257 | |||
258 | if (ext4_handle_valid(handle)) { | ||
259 | ei->i_sync_tid = handle->h_transaction->t_tid; | ||
260 | if (datasync) | ||
261 | ei->i_datasync_tid = handle->h_transaction->t_tid; | ||
262 | } | ||
263 | } | ||
264 | |||
257 | /* super.c */ | 265 | /* super.c */ |
258 | int ext4_force_commit(struct super_block *sb); | 266 | int ext4_force_commit(struct super_block *sb); |
259 | 267 | ||
@@ -296,4 +304,28 @@ static inline int ext4_should_writeback_data(struct inode *inode) | |||
296 | return 0; | 304 | return 0; |
297 | } | 305 | } |
298 | 306 | ||
307 | /* | ||
308 | * This function controls whether or not we should try to go down the | ||
309 | * dioread_nolock code paths, which makes it safe to avoid taking | ||
310 | * i_mutex for direct I/O reads. This only works for extent-based | ||
311 | * files, and it doesn't work for nobh or if data journaling is | ||
312 | * enabled, since the dioread_nolock code uses b_private to pass | ||
313 | * information back to the I/O completion handler, and this conflicts | ||
314 | * with the jbd's use of b_private. | ||
315 | */ | ||
316 | static inline int ext4_should_dioread_nolock(struct inode *inode) | ||
317 | { | ||
318 | if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) | ||
319 | return 0; | ||
320 | if (test_opt(inode->i_sb, NOBH)) | ||
321 | return 0; | ||
322 | if (!S_ISREG(inode->i_mode)) | ||
323 | return 0; | ||
324 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
325 | return 0; | ||
326 | if (ext4_should_journal_data(inode)) | ||
327 | return 0; | ||
328 | return 1; | ||
329 | } | ||
330 | |||
299 | #endif /* _EXT4_JBD2_H */ | 331 | #endif /* _EXT4_JBD2_H */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 715264b4bae4..94c8ee81f5e1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
195 | if (S_ISREG(inode->i_mode)) | 195 | if (S_ISREG(inode->i_mode)) |
196 | block_group++; | 196 | block_group++; |
197 | } | 197 | } |
198 | bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + | 198 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); |
199 | le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); | ||
200 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | 199 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; |
201 | 200 | ||
202 | /* | 201 | /* |
@@ -296,29 +295,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
296 | * to allocate @blocks | 295 | * to allocate @blocks |
297 | * Worse case is one block per extent | 296 | * Worse case is one block per extent |
298 | */ | 297 | */ |
299 | int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) | 298 | int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) |
300 | { | 299 | { |
301 | int lcap, icap, rcap, leafs, idxs, num; | 300 | struct ext4_inode_info *ei = EXT4_I(inode); |
302 | int newextents = blocks; | 301 | int idxs, num = 0; |
303 | |||
304 | rcap = ext4_ext_space_root_idx(inode, 0); | ||
305 | lcap = ext4_ext_space_block(inode, 0); | ||
306 | icap = ext4_ext_space_block_idx(inode, 0); | ||
307 | 302 | ||
308 | /* number of new leaf blocks needed */ | 303 | idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
309 | num = leafs = (newextents + lcap - 1) / lcap; | 304 | / sizeof(struct ext4_extent_idx)); |
310 | 305 | ||
311 | /* | 306 | /* |
312 | * Worse case, we need separate index block(s) | 307 | * If the new delayed allocation block is contiguous with the |
313 | * to link all new leaf blocks | 308 | * previous da block, it can share index blocks with the |
309 | * previous block, so we only need to allocate a new index | ||
310 | * block every idxs leaf blocks. At ldxs**2 blocks, we need | ||
311 | * an additional index block, and at ldxs**3 blocks, yet | ||
312 | * another index blocks. | ||
314 | */ | 313 | */ |
315 | idxs = (leafs + icap - 1) / icap; | 314 | if (ei->i_da_metadata_calc_len && |
316 | do { | 315 | ei->i_da_metadata_calc_last_lblock+1 == lblock) { |
317 | num += idxs; | 316 | if ((ei->i_da_metadata_calc_len % idxs) == 0) |
318 | idxs = (idxs + icap - 1) / icap; | 317 | num++; |
319 | } while (idxs > rcap); | 318 | if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) |
319 | num++; | ||
320 | if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { | ||
321 | num++; | ||
322 | ei->i_da_metadata_calc_len = 0; | ||
323 | } else | ||
324 | ei->i_da_metadata_calc_len++; | ||
325 | ei->i_da_metadata_calc_last_lblock++; | ||
326 | return num; | ||
327 | } | ||
320 | 328 | ||
321 | return num; | 329 | /* |
330 | * In the worst case we need a new set of index blocks at | ||
331 | * every level of the inode's extent tree. | ||
332 | */ | ||
333 | ei->i_da_metadata_calc_len = 1; | ||
334 | ei->i_da_metadata_calc_last_lblock = lblock; | ||
335 | return ext_depth(inode) + 1; | ||
322 | } | 336 | } |
323 | 337 | ||
324 | static int | 338 | static int |
@@ -425,7 +439,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode, | |||
425 | return 0; | 439 | return 0; |
426 | 440 | ||
427 | corrupted: | 441 | corrupted: |
428 | ext4_error(inode->i_sb, function, | 442 | __ext4_error(inode->i_sb, function, |
429 | "bad header/extent in inode #%lu: %s - magic %x, " | 443 | "bad header/extent in inode #%lu: %s - magic %x, " |
430 | "entries %u, max %u(%u), depth %u(%u)", | 444 | "entries %u, max %u(%u), depth %u(%u)", |
431 | inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), | 445 | inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), |
@@ -688,7 +702,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
688 | } | 702 | } |
689 | eh = ext_block_hdr(bh); | 703 | eh = ext_block_hdr(bh); |
690 | ppos++; | 704 | ppos++; |
691 | BUG_ON(ppos > depth); | 705 | if (unlikely(ppos > depth)) { |
706 | put_bh(bh); | ||
707 | EXT4_ERROR_INODE(inode, | ||
708 | "ppos %d > depth %d", ppos, depth); | ||
709 | goto err; | ||
710 | } | ||
692 | path[ppos].p_bh = bh; | 711 | path[ppos].p_bh = bh; |
693 | path[ppos].p_hdr = eh; | 712 | path[ppos].p_hdr = eh; |
694 | i--; | 713 | i--; |
@@ -734,7 +753,12 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | |||
734 | if (err) | 753 | if (err) |
735 | return err; | 754 | return err; |
736 | 755 | ||
737 | BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); | 756 | if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { |
757 | EXT4_ERROR_INODE(inode, | ||
758 | "logical %d == ei_block %d!", | ||
759 | logical, le32_to_cpu(curp->p_idx->ei_block)); | ||
760 | return -EIO; | ||
761 | } | ||
738 | len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; | 762 | len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; |
739 | if (logical > le32_to_cpu(curp->p_idx->ei_block)) { | 763 | if (logical > le32_to_cpu(curp->p_idx->ei_block)) { |
740 | /* insert after */ | 764 | /* insert after */ |
@@ -764,9 +788,17 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | |||
764 | ext4_idx_store_pblock(ix, ptr); | 788 | ext4_idx_store_pblock(ix, ptr); |
765 | le16_add_cpu(&curp->p_hdr->eh_entries, 1); | 789 | le16_add_cpu(&curp->p_hdr->eh_entries, 1); |
766 | 790 | ||
767 | BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) | 791 | if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) |
768 | > le16_to_cpu(curp->p_hdr->eh_max)); | 792 | > le16_to_cpu(curp->p_hdr->eh_max))) { |
769 | BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); | 793 | EXT4_ERROR_INODE(inode, |
794 | "logical %d == ei_block %d!", | ||
795 | logical, le32_to_cpu(curp->p_idx->ei_block)); | ||
796 | return -EIO; | ||
797 | } | ||
798 | if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { | ||
799 | EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); | ||
800 | return -EIO; | ||
801 | } | ||
770 | 802 | ||
771 | err = ext4_ext_dirty(handle, inode, curp); | 803 | err = ext4_ext_dirty(handle, inode, curp); |
772 | ext4_std_error(inode->i_sb, err); | 804 | ext4_std_error(inode->i_sb, err); |
@@ -804,7 +836,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
804 | 836 | ||
805 | /* if current leaf will be split, then we should use | 837 | /* if current leaf will be split, then we should use |
806 | * border from split point */ | 838 | * border from split point */ |
807 | BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); | 839 | if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { |
840 | EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); | ||
841 | return -EIO; | ||
842 | } | ||
808 | if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { | 843 | if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { |
809 | border = path[depth].p_ext[1].ee_block; | 844 | border = path[depth].p_ext[1].ee_block; |
810 | ext_debug("leaf will be split." | 845 | ext_debug("leaf will be split." |
@@ -845,7 +880,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
845 | 880 | ||
846 | /* initialize new leaf */ | 881 | /* initialize new leaf */ |
847 | newblock = ablocks[--a]; | 882 | newblock = ablocks[--a]; |
848 | BUG_ON(newblock == 0); | 883 | if (unlikely(newblock == 0)) { |
884 | EXT4_ERROR_INODE(inode, "newblock == 0!"); | ||
885 | err = -EIO; | ||
886 | goto cleanup; | ||
887 | } | ||
849 | bh = sb_getblk(inode->i_sb, newblock); | 888 | bh = sb_getblk(inode->i_sb, newblock); |
850 | if (!bh) { | 889 | if (!bh) { |
851 | err = -EIO; | 890 | err = -EIO; |
@@ -865,7 +904,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
865 | ex = EXT_FIRST_EXTENT(neh); | 904 | ex = EXT_FIRST_EXTENT(neh); |
866 | 905 | ||
867 | /* move remainder of path[depth] to the new leaf */ | 906 | /* move remainder of path[depth] to the new leaf */ |
868 | BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); | 907 | if (unlikely(path[depth].p_hdr->eh_entries != |
908 | path[depth].p_hdr->eh_max)) { | ||
909 | EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", | ||
910 | path[depth].p_hdr->eh_entries, | ||
911 | path[depth].p_hdr->eh_max); | ||
912 | err = -EIO; | ||
913 | goto cleanup; | ||
914 | } | ||
869 | /* start copy from next extent */ | 915 | /* start copy from next extent */ |
870 | /* TODO: we could do it by single memmove */ | 916 | /* TODO: we could do it by single memmove */ |
871 | m = 0; | 917 | m = 0; |
@@ -912,7 +958,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
912 | 958 | ||
913 | /* create intermediate indexes */ | 959 | /* create intermediate indexes */ |
914 | k = depth - at - 1; | 960 | k = depth - at - 1; |
915 | BUG_ON(k < 0); | 961 | if (unlikely(k < 0)) { |
962 | EXT4_ERROR_INODE(inode, "k %d < 0!", k); | ||
963 | err = -EIO; | ||
964 | goto cleanup; | ||
965 | } | ||
916 | if (k) | 966 | if (k) |
917 | ext_debug("create %d intermediate indices\n", k); | 967 | ext_debug("create %d intermediate indices\n", k); |
918 | /* insert new index into current index block */ | 968 | /* insert new index into current index block */ |
@@ -949,8 +999,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
949 | 999 | ||
950 | ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, | 1000 | ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, |
951 | EXT_MAX_INDEX(path[i].p_hdr)); | 1001 | EXT_MAX_INDEX(path[i].p_hdr)); |
952 | BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != | 1002 | if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != |
953 | EXT_LAST_INDEX(path[i].p_hdr)); | 1003 | EXT_LAST_INDEX(path[i].p_hdr))) { |
1004 | EXT4_ERROR_INODE(inode, | ||
1005 | "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", | ||
1006 | le32_to_cpu(path[i].p_ext->ee_block)); | ||
1007 | err = -EIO; | ||
1008 | goto cleanup; | ||
1009 | } | ||
954 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { | 1010 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { |
955 | ext_debug("%d: move %d:%llu in new index %llu\n", i, | 1011 | ext_debug("%d: move %d:%llu in new index %llu\n", i, |
956 | le32_to_cpu(path[i].p_idx->ei_block), | 1012 | le32_to_cpu(path[i].p_idx->ei_block), |
@@ -1007,7 +1063,8 @@ cleanup: | |||
1007 | for (i = 0; i < depth; i++) { | 1063 | for (i = 0; i < depth; i++) { |
1008 | if (!ablocks[i]) | 1064 | if (!ablocks[i]) |
1009 | continue; | 1065 | continue; |
1010 | ext4_free_blocks(handle, inode, ablocks[i], 1, 1); | 1066 | ext4_free_blocks(handle, inode, 0, ablocks[i], 1, |
1067 | EXT4_FREE_BLOCKS_METADATA); | ||
1011 | } | 1068 | } |
1012 | } | 1069 | } |
1013 | kfree(ablocks); | 1070 | kfree(ablocks); |
@@ -1187,7 +1244,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1187 | struct ext4_extent *ex; | 1244 | struct ext4_extent *ex; |
1188 | int depth, ee_len; | 1245 | int depth, ee_len; |
1189 | 1246 | ||
1190 | BUG_ON(path == NULL); | 1247 | if (unlikely(path == NULL)) { |
1248 | EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); | ||
1249 | return -EIO; | ||
1250 | } | ||
1191 | depth = path->p_depth; | 1251 | depth = path->p_depth; |
1192 | *phys = 0; | 1252 | *phys = 0; |
1193 | 1253 | ||
@@ -1201,15 +1261,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1201 | ex = path[depth].p_ext; | 1261 | ex = path[depth].p_ext; |
1202 | ee_len = ext4_ext_get_actual_len(ex); | 1262 | ee_len = ext4_ext_get_actual_len(ex); |
1203 | if (*logical < le32_to_cpu(ex->ee_block)) { | 1263 | if (*logical < le32_to_cpu(ex->ee_block)) { |
1204 | BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); | 1264 | if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { |
1265 | EXT4_ERROR_INODE(inode, | ||
1266 | "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", | ||
1267 | *logical, le32_to_cpu(ex->ee_block)); | ||
1268 | return -EIO; | ||
1269 | } | ||
1205 | while (--depth >= 0) { | 1270 | while (--depth >= 0) { |
1206 | ix = path[depth].p_idx; | 1271 | ix = path[depth].p_idx; |
1207 | BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); | 1272 | if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { |
1273 | EXT4_ERROR_INODE(inode, | ||
1274 | "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", | ||
1275 | ix != NULL ? ix->ei_block : 0, | ||
1276 | EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? | ||
1277 | EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, | ||
1278 | depth); | ||
1279 | return -EIO; | ||
1280 | } | ||
1208 | } | 1281 | } |
1209 | return 0; | 1282 | return 0; |
1210 | } | 1283 | } |
1211 | 1284 | ||
1212 | BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); | 1285 | if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { |
1286 | EXT4_ERROR_INODE(inode, | ||
1287 | "logical %d < ee_block %d + ee_len %d!", | ||
1288 | *logical, le32_to_cpu(ex->ee_block), ee_len); | ||
1289 | return -EIO; | ||
1290 | } | ||
1213 | 1291 | ||
1214 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; | 1292 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; |
1215 | *phys = ext_pblock(ex) + ee_len - 1; | 1293 | *phys = ext_pblock(ex) + ee_len - 1; |
@@ -1235,7 +1313,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1235 | int depth; /* Note, NOT eh_depth; depth from top of tree */ | 1313 | int depth; /* Note, NOT eh_depth; depth from top of tree */ |
1236 | int ee_len; | 1314 | int ee_len; |
1237 | 1315 | ||
1238 | BUG_ON(path == NULL); | 1316 | if (unlikely(path == NULL)) { |
1317 | EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); | ||
1318 | return -EIO; | ||
1319 | } | ||
1239 | depth = path->p_depth; | 1320 | depth = path->p_depth; |
1240 | *phys = 0; | 1321 | *phys = 0; |
1241 | 1322 | ||
@@ -1249,17 +1330,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1249 | ex = path[depth].p_ext; | 1330 | ex = path[depth].p_ext; |
1250 | ee_len = ext4_ext_get_actual_len(ex); | 1331 | ee_len = ext4_ext_get_actual_len(ex); |
1251 | if (*logical < le32_to_cpu(ex->ee_block)) { | 1332 | if (*logical < le32_to_cpu(ex->ee_block)) { |
1252 | BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); | 1333 | if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { |
1334 | EXT4_ERROR_INODE(inode, | ||
1335 | "first_extent(path[%d].p_hdr) != ex", | ||
1336 | depth); | ||
1337 | return -EIO; | ||
1338 | } | ||
1253 | while (--depth >= 0) { | 1339 | while (--depth >= 0) { |
1254 | ix = path[depth].p_idx; | 1340 | ix = path[depth].p_idx; |
1255 | BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); | 1341 | if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { |
1342 | EXT4_ERROR_INODE(inode, | ||
1343 | "ix != EXT_FIRST_INDEX *logical %d!", | ||
1344 | *logical); | ||
1345 | return -EIO; | ||
1346 | } | ||
1256 | } | 1347 | } |
1257 | *logical = le32_to_cpu(ex->ee_block); | 1348 | *logical = le32_to_cpu(ex->ee_block); |
1258 | *phys = ext_pblock(ex); | 1349 | *phys = ext_pblock(ex); |
1259 | return 0; | 1350 | return 0; |
1260 | } | 1351 | } |
1261 | 1352 | ||
1262 | BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); | 1353 | if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { |
1354 | EXT4_ERROR_INODE(inode, | ||
1355 | "logical %d < ee_block %d + ee_len %d!", | ||
1356 | *logical, le32_to_cpu(ex->ee_block), ee_len); | ||
1357 | return -EIO; | ||
1358 | } | ||
1263 | 1359 | ||
1264 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { | 1360 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { |
1265 | /* next allocated block in this leaf */ | 1361 | /* next allocated block in this leaf */ |
@@ -1398,8 +1494,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, | |||
1398 | 1494 | ||
1399 | eh = path[depth].p_hdr; | 1495 | eh = path[depth].p_hdr; |
1400 | ex = path[depth].p_ext; | 1496 | ex = path[depth].p_ext; |
1401 | BUG_ON(ex == NULL); | 1497 | |
1402 | BUG_ON(eh == NULL); | 1498 | if (unlikely(ex == NULL || eh == NULL)) { |
1499 | EXT4_ERROR_INODE(inode, | ||
1500 | "ex %p == NULL or eh %p == NULL", ex, eh); | ||
1501 | return -EIO; | ||
1502 | } | ||
1403 | 1503 | ||
1404 | if (depth == 0) { | 1504 | if (depth == 0) { |
1405 | /* there is no tree at all */ | 1505 | /* there is no tree at all */ |
@@ -1522,8 +1622,9 @@ int ext4_ext_try_to_merge(struct inode *inode, | |||
1522 | merge_done = 1; | 1622 | merge_done = 1; |
1523 | WARN_ON(eh->eh_entries == 0); | 1623 | WARN_ON(eh->eh_entries == 0); |
1524 | if (!eh->eh_entries) | 1624 | if (!eh->eh_entries) |
1525 | ext4_error(inode->i_sb, "ext4_ext_try_to_merge", | 1625 | ext4_error(inode->i_sb, |
1526 | "inode#%lu, eh->eh_entries = 0!", inode->i_ino); | 1626 | "inode#%lu, eh->eh_entries = 0!", |
1627 | inode->i_ino); | ||
1527 | } | 1628 | } |
1528 | 1629 | ||
1529 | return merge_done; | 1630 | return merge_done; |
@@ -1596,13 +1697,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1596 | ext4_lblk_t next; | 1697 | ext4_lblk_t next; |
1597 | unsigned uninitialized = 0; | 1698 | unsigned uninitialized = 0; |
1598 | 1699 | ||
1599 | BUG_ON(ext4_ext_get_actual_len(newext) == 0); | 1700 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
1701 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | ||
1702 | return -EIO; | ||
1703 | } | ||
1600 | depth = ext_depth(inode); | 1704 | depth = ext_depth(inode); |
1601 | ex = path[depth].p_ext; | 1705 | ex = path[depth].p_ext; |
1602 | BUG_ON(path[depth].p_hdr == NULL); | 1706 | if (unlikely(path[depth].p_hdr == NULL)) { |
1707 | EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); | ||
1708 | return -EIO; | ||
1709 | } | ||
1603 | 1710 | ||
1604 | /* try to insert block into found extent and return */ | 1711 | /* try to insert block into found extent and return */ |
1605 | if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) | 1712 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) |
1606 | && ext4_can_extents_be_merged(inode, ex, newext)) { | 1713 | && ext4_can_extents_be_merged(inode, ex, newext)) { |
1607 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", | 1714 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
1608 | ext4_ext_is_uninitialized(newext), | 1715 | ext4_ext_is_uninitialized(newext), |
@@ -1723,7 +1830,7 @@ has_space: | |||
1723 | 1830 | ||
1724 | merge: | 1831 | merge: |
1725 | /* try to merge extents to the right */ | 1832 | /* try to merge extents to the right */ |
1726 | if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) | 1833 | if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) |
1727 | ext4_ext_try_to_merge(inode, path, nearex); | 1834 | ext4_ext_try_to_merge(inode, path, nearex); |
1728 | 1835 | ||
1729 | /* try to merge extents to the left */ | 1836 | /* try to merge extents to the left */ |
@@ -1761,7 +1868,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1761 | while (block < last && block != EXT_MAX_BLOCK) { | 1868 | while (block < last && block != EXT_MAX_BLOCK) { |
1762 | num = last - block; | 1869 | num = last - block; |
1763 | /* find extent for this block */ | 1870 | /* find extent for this block */ |
1871 | down_read(&EXT4_I(inode)->i_data_sem); | ||
1764 | path = ext4_ext_find_extent(inode, block, path); | 1872 | path = ext4_ext_find_extent(inode, block, path); |
1873 | up_read(&EXT4_I(inode)->i_data_sem); | ||
1765 | if (IS_ERR(path)) { | 1874 | if (IS_ERR(path)) { |
1766 | err = PTR_ERR(path); | 1875 | err = PTR_ERR(path); |
1767 | path = NULL; | 1876 | path = NULL; |
@@ -1769,7 +1878,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1769 | } | 1878 | } |
1770 | 1879 | ||
1771 | depth = ext_depth(inode); | 1880 | depth = ext_depth(inode); |
1772 | BUG_ON(path[depth].p_hdr == NULL); | 1881 | if (unlikely(path[depth].p_hdr == NULL)) { |
1882 | EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); | ||
1883 | err = -EIO; | ||
1884 | break; | ||
1885 | } | ||
1773 | ex = path[depth].p_ext; | 1886 | ex = path[depth].p_ext; |
1774 | next = ext4_ext_next_allocated_block(path); | 1887 | next = ext4_ext_next_allocated_block(path); |
1775 | 1888 | ||
@@ -1820,7 +1933,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1820 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | 1933 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; |
1821 | } | 1934 | } |
1822 | 1935 | ||
1823 | BUG_ON(cbex.ec_len == 0); | 1936 | if (unlikely(cbex.ec_len == 0)) { |
1937 | EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); | ||
1938 | err = -EIO; | ||
1939 | break; | ||
1940 | } | ||
1824 | err = func(inode, path, &cbex, ex, cbdata); | 1941 | err = func(inode, path, &cbex, ex, cbdata); |
1825 | ext4_ext_drop_refs(path); | 1942 | ext4_ext_drop_refs(path); |
1826 | 1943 | ||
@@ -1934,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | |||
1934 | 2051 | ||
1935 | BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && | 2052 | BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && |
1936 | cex->ec_type != EXT4_EXT_CACHE_EXTENT); | 2053 | cex->ec_type != EXT4_EXT_CACHE_EXTENT); |
1937 | if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { | 2054 | if (in_range(block, cex->ec_block, cex->ec_len)) { |
1938 | ex->ee_block = cpu_to_le32(cex->ec_block); | 2055 | ex->ee_block = cpu_to_le32(cex->ec_block); |
1939 | ext4_ext_store_pblock(ex, cex->ec_start); | 2056 | ext4_ext_store_pblock(ex, cex->ec_start); |
1940 | ex->ee_len = cpu_to_le16(cex->ec_len); | 2057 | ex->ee_len = cpu_to_le16(cex->ec_len); |
@@ -1957,14 +2074,16 @@ errout: | |||
1957 | static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | 2074 | static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, |
1958 | struct ext4_ext_path *path) | 2075 | struct ext4_ext_path *path) |
1959 | { | 2076 | { |
1960 | struct buffer_head *bh; | ||
1961 | int err; | 2077 | int err; |
1962 | ext4_fsblk_t leaf; | 2078 | ext4_fsblk_t leaf; |
1963 | 2079 | ||
1964 | /* free index block */ | 2080 | /* free index block */ |
1965 | path--; | 2081 | path--; |
1966 | leaf = idx_pblock(path->p_idx); | 2082 | leaf = idx_pblock(path->p_idx); |
1967 | BUG_ON(path->p_hdr->eh_entries == 0); | 2083 | if (unlikely(path->p_hdr->eh_entries == 0)) { |
2084 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); | ||
2085 | return -EIO; | ||
2086 | } | ||
1968 | err = ext4_ext_get_access(handle, inode, path); | 2087 | err = ext4_ext_get_access(handle, inode, path); |
1969 | if (err) | 2088 | if (err) |
1970 | return err; | 2089 | return err; |
@@ -1973,9 +2092,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1973 | if (err) | 2092 | if (err) |
1974 | return err; | 2093 | return err; |
1975 | ext_debug("index is empty, remove it, free block %llu\n", leaf); | 2094 | ext_debug("index is empty, remove it, free block %llu\n", leaf); |
1976 | bh = sb_find_get_block(inode->i_sb, leaf); | 2095 | ext4_free_blocks(handle, inode, 0, leaf, 1, |
1977 | ext4_forget(handle, 1, inode, bh, leaf); | 2096 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
1978 | ext4_free_blocks(handle, inode, leaf, 1, 1); | ||
1979 | return err; | 2097 | return err; |
1980 | } | 2098 | } |
1981 | 2099 | ||
@@ -2042,12 +2160,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2042 | struct ext4_extent *ex, | 2160 | struct ext4_extent *ex, |
2043 | ext4_lblk_t from, ext4_lblk_t to) | 2161 | ext4_lblk_t from, ext4_lblk_t to) |
2044 | { | 2162 | { |
2045 | struct buffer_head *bh; | ||
2046 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2163 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2047 | int i, metadata = 0; | 2164 | int flags = EXT4_FREE_BLOCKS_FORGET; |
2048 | 2165 | ||
2049 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 2166 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
2050 | metadata = 1; | 2167 | flags |= EXT4_FREE_BLOCKS_METADATA; |
2051 | #ifdef EXTENTS_STATS | 2168 | #ifdef EXTENTS_STATS |
2052 | { | 2169 | { |
2053 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2170 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -2072,11 +2189,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2072 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2189 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2073 | start = ext_pblock(ex) + ee_len - num; | 2190 | start = ext_pblock(ex) + ee_len - num; |
2074 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2191 | ext_debug("free last %u blocks starting %llu\n", num, start); |
2075 | for (i = 0; i < num; i++) { | 2192 | ext4_free_blocks(handle, inode, 0, start, num, flags); |
2076 | bh = sb_find_get_block(inode->i_sb, start + i); | ||
2077 | ext4_forget(handle, 0, inode, bh, start + i); | ||
2078 | } | ||
2079 | ext4_free_blocks(handle, inode, start, num, metadata); | ||
2080 | } else if (from == le32_to_cpu(ex->ee_block) | 2193 | } else if (from == le32_to_cpu(ex->ee_block) |
2081 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2194 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2082 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", | 2195 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", |
@@ -2108,8 +2221,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2108 | if (!path[depth].p_hdr) | 2221 | if (!path[depth].p_hdr) |
2109 | path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); | 2222 | path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); |
2110 | eh = path[depth].p_hdr; | 2223 | eh = path[depth].p_hdr; |
2111 | BUG_ON(eh == NULL); | 2224 | if (unlikely(path[depth].p_hdr == NULL)) { |
2112 | 2225 | EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); | |
2226 | return -EIO; | ||
2227 | } | ||
2113 | /* find where to start removing */ | 2228 | /* find where to start removing */ |
2114 | ex = EXT_LAST_EXTENT(eh); | 2229 | ex = EXT_LAST_EXTENT(eh); |
2115 | 2230 | ||
@@ -2167,7 +2282,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2167 | correct_index = 1; | 2282 | correct_index = 1; |
2168 | credits += (ext_depth(inode)) + 1; | 2283 | credits += (ext_depth(inode)) + 1; |
2169 | } | 2284 | } |
2170 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 2285 | credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); |
2171 | 2286 | ||
2172 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); | 2287 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); |
2173 | if (err) | 2288 | if (err) |
@@ -2972,7 +3087,7 @@ fix_extent_len: | |||
2972 | ext4_ext_dirty(handle, inode, path + depth); | 3087 | ext4_ext_dirty(handle, inode, path + depth); |
2973 | return err; | 3088 | return err; |
2974 | } | 3089 | } |
2975 | static int ext4_convert_unwritten_extents_dio(handle_t *handle, | 3090 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
2976 | struct inode *inode, | 3091 | struct inode *inode, |
2977 | struct ext4_ext_path *path) | 3092 | struct ext4_ext_path *path) |
2978 | { | 3093 | { |
@@ -3027,6 +3142,14 @@ out: | |||
3027 | return err; | 3142 | return err; |
3028 | } | 3143 | } |
3029 | 3144 | ||
3145 | static void unmap_underlying_metadata_blocks(struct block_device *bdev, | ||
3146 | sector_t block, int count) | ||
3147 | { | ||
3148 | int i; | ||
3149 | for (i = 0; i < count; i++) | ||
3150 | unmap_underlying_metadata(bdev, block + i); | ||
3151 | } | ||
3152 | |||
3030 | static int | 3153 | static int |
3031 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3154 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3032 | ext4_lblk_t iblock, unsigned int max_blocks, | 3155 | ext4_lblk_t iblock, unsigned int max_blocks, |
@@ -3044,8 +3167,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3044 | flags, allocated); | 3167 | flags, allocated); |
3045 | ext4_ext_show_leaf(inode, path); | 3168 | ext4_ext_show_leaf(inode, path); |
3046 | 3169 | ||
3047 | /* DIO get_block() before submit the IO, split the extent */ | 3170 | /* get_block() before submit the IO, split the extent */ |
3048 | if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { | 3171 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3049 | ret = ext4_split_unwritten_extents(handle, | 3172 | ret = ext4_split_unwritten_extents(handle, |
3050 | inode, path, iblock, | 3173 | inode, path, iblock, |
3051 | max_blocks, flags); | 3174 | max_blocks, flags); |
@@ -3055,15 +3178,19 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3055 | * completed | 3178 | * completed |
3056 | */ | 3179 | */ |
3057 | if (io) | 3180 | if (io) |
3058 | io->flag = DIO_AIO_UNWRITTEN; | 3181 | io->flag = EXT4_IO_UNWRITTEN; |
3059 | else | 3182 | else |
3060 | EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; | 3183 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3184 | if (ext4_should_dioread_nolock(inode)) | ||
3185 | set_buffer_uninit(bh_result); | ||
3061 | goto out; | 3186 | goto out; |
3062 | } | 3187 | } |
3063 | /* async DIO end_io complete, convert the filled extent to written */ | 3188 | /* IO end_io complete, convert the filled extent to written */ |
3064 | if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { | 3189 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { |
3065 | ret = ext4_convert_unwritten_extents_dio(handle, inode, | 3190 | ret = ext4_convert_unwritten_extents_endio(handle, inode, |
3066 | path); | 3191 | path); |
3192 | if (ret >= 0) | ||
3193 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
3067 | goto out2; | 3194 | goto out2; |
3068 | } | 3195 | } |
3069 | /* buffered IO case */ | 3196 | /* buffered IO case */ |
@@ -3091,6 +3218,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3091 | ret = ext4_ext_convert_to_initialized(handle, inode, | 3218 | ret = ext4_ext_convert_to_initialized(handle, inode, |
3092 | path, iblock, | 3219 | path, iblock, |
3093 | max_blocks); | 3220 | max_blocks); |
3221 | if (ret >= 0) | ||
3222 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
3094 | out: | 3223 | out: |
3095 | if (ret <= 0) { | 3224 | if (ret <= 0) { |
3096 | err = ret; | 3225 | err = ret; |
@@ -3098,6 +3227,30 @@ out: | |||
3098 | } else | 3227 | } else |
3099 | allocated = ret; | 3228 | allocated = ret; |
3100 | set_buffer_new(bh_result); | 3229 | set_buffer_new(bh_result); |
3230 | /* | ||
3231 | * if we allocated more blocks than requested | ||
3232 | * we need to make sure we unmap the extra block | ||
3233 | * allocated. The actual needed block will get | ||
3234 | * unmapped later when we find the buffer_head marked | ||
3235 | * new. | ||
3236 | */ | ||
3237 | if (allocated > max_blocks) { | ||
3238 | unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, | ||
3239 | newblock + max_blocks, | ||
3240 | allocated - max_blocks); | ||
3241 | allocated = max_blocks; | ||
3242 | } | ||
3243 | |||
3244 | /* | ||
3245 | * If we have done fallocate with the offset that is already | ||
3246 | * delayed allocated, we would have block reservation | ||
3247 | * and quota reservation done in the delayed write path. | ||
3248 | * But fallocate would have already updated quota and block | ||
3249 | * count for this offset. So cancel these reservation | ||
3250 | */ | ||
3251 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
3252 | ext4_da_update_reserve_space(inode, allocated, 0); | ||
3253 | |||
3101 | map_out: | 3254 | map_out: |
3102 | set_buffer_mapped(bh_result); | 3255 | set_buffer_mapped(bh_result); |
3103 | out1: | 3256 | out1: |
@@ -3138,7 +3291,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3138 | { | 3291 | { |
3139 | struct ext4_ext_path *path = NULL; | 3292 | struct ext4_ext_path *path = NULL; |
3140 | struct ext4_extent_header *eh; | 3293 | struct ext4_extent_header *eh; |
3141 | struct ext4_extent newex, *ex; | 3294 | struct ext4_extent newex, *ex, *last_ex; |
3142 | ext4_fsblk_t newblock; | 3295 | ext4_fsblk_t newblock; |
3143 | int err = 0, depth, ret, cache_type; | 3296 | int err = 0, depth, ret, cache_type; |
3144 | unsigned int allocated = 0; | 3297 | unsigned int allocated = 0; |
@@ -3190,7 +3343,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3190 | * this situation is possible, though, _during_ tree modification; | 3343 | * this situation is possible, though, _during_ tree modification; |
3191 | * this is why assert can't be put in ext4_ext_find_extent() | 3344 | * this is why assert can't be put in ext4_ext_find_extent() |
3192 | */ | 3345 | */ |
3193 | BUG_ON(path[depth].p_ext == NULL && depth != 0); | 3346 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { |
3347 | EXT4_ERROR_INODE(inode, "bad extent address " | ||
3348 | "iblock: %d, depth: %d pblock %lld", | ||
3349 | iblock, depth, path[depth].p_block); | ||
3350 | err = -EIO; | ||
3351 | goto out2; | ||
3352 | } | ||
3194 | eh = path[depth].p_hdr; | 3353 | eh = path[depth].p_hdr; |
3195 | 3354 | ||
3196 | ex = path[depth].p_ext; | 3355 | ex = path[depth].p_ext; |
@@ -3205,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3205 | */ | 3364 | */ |
3206 | ee_len = ext4_ext_get_actual_len(ex); | 3365 | ee_len = ext4_ext_get_actual_len(ex); |
3207 | /* if found extent covers block, simply return it */ | 3366 | /* if found extent covers block, simply return it */ |
3208 | if (iblock >= ee_block && iblock < ee_block + ee_len) { | 3367 | if (in_range(iblock, ee_block, ee_len)) { |
3209 | newblock = iblock - ee_block + ee_start; | 3368 | newblock = iblock - ee_block + ee_start; |
3210 | /* number of remaining blocks in the extent */ | 3369 | /* number of remaining blocks in the extent */ |
3211 | allocated = ee_len - (iblock - ee_block); | 3370 | allocated = ee_len - (iblock - ee_block); |
@@ -3297,21 +3456,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3297 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ | 3456 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ |
3298 | ext4_ext_mark_uninitialized(&newex); | 3457 | ext4_ext_mark_uninitialized(&newex); |
3299 | /* | 3458 | /* |
3300 | * io_end structure was created for every async | 3459 | * io_end structure was created for every IO write to an |
3301 | * direct IO write to the middle of the file. | 3460 | * uninitialized extent. To avoid unecessary conversion, |
3302 | * To avoid unecessary convertion for every aio dio rewrite | 3461 | * here we flag the IO that really needs the conversion. |
3303 | * to the mid of file, here we flag the IO that is really | ||
3304 | * need the convertion. | ||
3305 | * For non asycn direct IO case, flag the inode state | 3462 | * For non asycn direct IO case, flag the inode state |
3306 | * that we need to perform convertion when IO is done. | 3463 | * that we need to perform convertion when IO is done. |
3307 | */ | 3464 | */ |
3308 | if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { | 3465 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3309 | if (io) | 3466 | if (io) |
3310 | io->flag = DIO_AIO_UNWRITTEN; | 3467 | io->flag = EXT4_IO_UNWRITTEN; |
3311 | else | 3468 | else |
3312 | EXT4_I(inode)->i_state |= | 3469 | ext4_set_inode_state(inode, |
3313 | EXT4_STATE_DIO_UNWRITTEN;; | 3470 | EXT4_STATE_DIO_UNWRITTEN); |
3314 | } | 3471 | } |
3472 | if (ext4_should_dioread_nolock(inode)) | ||
3473 | set_buffer_uninit(bh_result); | ||
3474 | } | ||
3475 | |||
3476 | if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { | ||
3477 | if (unlikely(!eh->eh_entries)) { | ||
3478 | EXT4_ERROR_INODE(inode, | ||
3479 | "eh->eh_entries == 0 ee_block %d", | ||
3480 | ex->ee_block); | ||
3481 | err = -EIO; | ||
3482 | goto out2; | ||
3483 | } | ||
3484 | last_ex = EXT_LAST_EXTENT(eh); | ||
3485 | if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) | ||
3486 | + ext4_ext_get_actual_len(last_ex)) | ||
3487 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | ||
3315 | } | 3488 | } |
3316 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3489 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3317 | if (err) { | 3490 | if (err) { |
@@ -3319,20 +3492,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3319 | /* not a good idea to call discard here directly, | 3492 | /* not a good idea to call discard here directly, |
3320 | * but otherwise we'd need to call it every free() */ | 3493 | * but otherwise we'd need to call it every free() */ |
3321 | ext4_discard_preallocations(inode); | 3494 | ext4_discard_preallocations(inode); |
3322 | ext4_free_blocks(handle, inode, ext_pblock(&newex), | 3495 | ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), |
3323 | ext4_ext_get_actual_len(&newex), 0); | 3496 | ext4_ext_get_actual_len(&newex), 0); |
3324 | goto out2; | 3497 | goto out2; |
3325 | } | 3498 | } |
3326 | 3499 | ||
3327 | /* previous routine could use block we allocated */ | 3500 | /* previous routine could use block we allocated */ |
3328 | newblock = ext_pblock(&newex); | 3501 | newblock = ext_pblock(&newex); |
3329 | allocated = ext4_ext_get_actual_len(&newex); | 3502 | allocated = ext4_ext_get_actual_len(&newex); |
3503 | if (allocated > max_blocks) | ||
3504 | allocated = max_blocks; | ||
3330 | set_buffer_new(bh_result); | 3505 | set_buffer_new(bh_result); |
3331 | 3506 | ||
3332 | /* Cache only when it is _not_ an uninitialized extent */ | 3507 | /* |
3333 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) | 3508 | * Update reserved blocks/metadata blocks after successful |
3509 | * block allocation which had been deferred till now. | ||
3510 | */ | ||
3511 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
3512 | ext4_da_update_reserve_space(inode, allocated, 1); | ||
3513 | |||
3514 | /* | ||
3515 | * Cache the extent and update transaction to commit on fdatasync only | ||
3516 | * when it is _not_ an uninitialized extent. | ||
3517 | */ | ||
3518 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { | ||
3334 | ext4_ext_put_in_cache(inode, iblock, allocated, newblock, | 3519 | ext4_ext_put_in_cache(inode, iblock, allocated, newblock, |
3335 | EXT4_EXT_CACHE_EXTENT); | 3520 | EXT4_EXT_CACHE_EXTENT); |
3521 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
3522 | } else | ||
3523 | ext4_update_inode_fsync_trans(handle, inode, 0); | ||
3336 | out: | 3524 | out: |
3337 | if (allocated > max_blocks) | 3525 | if (allocated > max_blocks) |
3338 | allocated = max_blocks; | 3526 | allocated = max_blocks; |
@@ -3431,6 +3619,13 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
3431 | i_size_write(inode, new_size); | 3619 | i_size_write(inode, new_size); |
3432 | if (new_size > EXT4_I(inode)->i_disksize) | 3620 | if (new_size > EXT4_I(inode)->i_disksize) |
3433 | ext4_update_i_disksize(inode, new_size); | 3621 | ext4_update_i_disksize(inode, new_size); |
3622 | } else { | ||
3623 | /* | ||
3624 | * Mark that we allocate beyond EOF so the subsequent truncate | ||
3625 | * can proceed even if the new size is the same as i_size. | ||
3626 | */ | ||
3627 | if (new_size > i_size_read(inode)) | ||
3628 | EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; | ||
3434 | } | 3629 | } |
3435 | 3630 | ||
3436 | } | 3631 | } |
@@ -3535,7 +3730,7 @@ retry: | |||
3535 | * Returns 0 on success. | 3730 | * Returns 0 on success. |
3536 | */ | 3731 | */ |
3537 | int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 3732 | int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, |
3538 | loff_t len) | 3733 | ssize_t len) |
3539 | { | 3734 | { |
3540 | handle_t *handle; | 3735 | handle_t *handle; |
3541 | ext4_lblk_t block; | 3736 | ext4_lblk_t block; |
@@ -3567,7 +3762,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3567 | map_bh.b_state = 0; | 3762 | map_bh.b_state = 0; |
3568 | ret = ext4_get_blocks(handle, inode, block, | 3763 | ret = ext4_get_blocks(handle, inode, block, |
3569 | max_blocks, &map_bh, | 3764 | max_blocks, &map_bh, |
3570 | EXT4_GET_BLOCKS_DIO_CONVERT_EXT); | 3765 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
3571 | if (ret <= 0) { | 3766 | if (ret <= 0) { |
3572 | WARN_ON(ret <= 0); | 3767 | WARN_ON(ret <= 0); |
3573 | printk(KERN_ERR "%s: ext4_ext_get_blocks " | 3768 | printk(KERN_ERR "%s: ext4_ext_get_blocks " |
@@ -3671,7 +3866,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
3671 | int error = 0; | 3866 | int error = 0; |
3672 | 3867 | ||
3673 | /* in-inode? */ | 3868 | /* in-inode? */ |
3674 | if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { | 3869 | if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { |
3675 | struct ext4_iloc iloc; | 3870 | struct ext4_iloc iloc; |
3676 | int offset; /* offset of xattr in inode */ | 3871 | int offset; /* offset of xattr in inode */ |
3677 | 3872 | ||
@@ -3699,7 +3894,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3699 | __u64 start, __u64 len) | 3894 | __u64 start, __u64 len) |
3700 | { | 3895 | { |
3701 | ext4_lblk_t start_blk; | 3896 | ext4_lblk_t start_blk; |
3702 | ext4_lblk_t len_blks; | ||
3703 | int error = 0; | 3897 | int error = 0; |
3704 | 3898 | ||
3705 | /* fallback to generic here if not in extents fmt */ | 3899 | /* fallback to generic here if not in extents fmt */ |
@@ -3713,17 +3907,21 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3713 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { | 3907 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { |
3714 | error = ext4_xattr_fiemap(inode, fieinfo); | 3908 | error = ext4_xattr_fiemap(inode, fieinfo); |
3715 | } else { | 3909 | } else { |
3910 | ext4_lblk_t len_blks; | ||
3911 | __u64 last_blk; | ||
3912 | |||
3716 | start_blk = start >> inode->i_sb->s_blocksize_bits; | 3913 | start_blk = start >> inode->i_sb->s_blocksize_bits; |
3717 | len_blks = len >> inode->i_sb->s_blocksize_bits; | 3914 | last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; |
3915 | if (last_blk >= EXT_MAX_BLOCK) | ||
3916 | last_blk = EXT_MAX_BLOCK-1; | ||
3917 | len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; | ||
3718 | 3918 | ||
3719 | /* | 3919 | /* |
3720 | * Walk the extent tree gathering extent information. | 3920 | * Walk the extent tree gathering extent information. |
3721 | * ext4_ext_fiemap_cb will push extents back to user. | 3921 | * ext4_ext_fiemap_cb will push extents back to user. |
3722 | */ | 3922 | */ |
3723 | down_read(&EXT4_I(inode)->i_data_sem); | ||
3724 | error = ext4_ext_walk_space(inode, start_blk, len_blks, | 3923 | error = ext4_ext_walk_space(inode, start_blk, len_blks, |
3725 | ext4_ext_fiemap_cb, fieinfo); | 3924 | ext4_ext_fiemap_cb, fieinfo); |
3726 | up_read(&EXT4_I(inode)->i_data_sem); | ||
3727 | } | 3925 | } |
3728 | 3926 | ||
3729 | return error; | 3927 | return error; |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9630583cef28..d0776e410f34 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/jbd2.h> | 23 | #include <linux/jbd2.h> |
24 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
25 | #include <linux/path.h> | 25 | #include <linux/path.h> |
26 | #include <linux/quotaops.h> | ||
26 | #include "ext4.h" | 27 | #include "ext4.h" |
27 | #include "ext4_jbd2.h" | 28 | #include "ext4_jbd2.h" |
28 | #include "xattr.h" | 29 | #include "xattr.h" |
@@ -35,9 +36,9 @@ | |||
35 | */ | 36 | */ |
36 | static int ext4_release_file(struct inode *inode, struct file *filp) | 37 | static int ext4_release_file(struct inode *inode, struct file *filp) |
37 | { | 38 | { |
38 | if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { | 39 | if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { |
39 | ext4_alloc_da_blocks(inode); | 40 | ext4_alloc_da_blocks(inode); |
40 | EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; | 41 | ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
41 | } | 42 | } |
42 | /* if we are the last writer on the inode, drop the block reservation */ | 43 | /* if we are the last writer on the inode, drop the block reservation */ |
43 | if ((filp->f_mode & FMODE_WRITE) && | 44 | if ((filp->f_mode & FMODE_WRITE) && |
@@ -116,18 +117,16 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
116 | * devices or filesystem images. | 117 | * devices or filesystem images. |
117 | */ | 118 | */ |
118 | memset(buf, 0, sizeof(buf)); | 119 | memset(buf, 0, sizeof(buf)); |
119 | path.mnt = mnt->mnt_parent; | 120 | path.mnt = mnt; |
120 | path.dentry = mnt->mnt_mountpoint; | 121 | path.dentry = mnt->mnt_root; |
121 | path_get(&path); | ||
122 | cp = d_path(&path, buf, sizeof(buf)); | 122 | cp = d_path(&path, buf, sizeof(buf)); |
123 | path_put(&path); | ||
124 | if (!IS_ERR(cp)) { | 123 | if (!IS_ERR(cp)) { |
125 | memcpy(sbi->s_es->s_last_mounted, cp, | 124 | memcpy(sbi->s_es->s_last_mounted, cp, |
126 | sizeof(sbi->s_es->s_last_mounted)); | 125 | sizeof(sbi->s_es->s_last_mounted)); |
127 | sb->s_dirt = 1; | 126 | sb->s_dirt = 1; |
128 | } | 127 | } |
129 | } | 128 | } |
130 | return generic_file_open(inode, filp); | 129 | return dquot_file_open(inode, filp); |
131 | } | 130 | } |
132 | 131 | ||
133 | const struct file_operations ext4_file_operations = { | 132 | const struct file_operations ext4_file_operations = { |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 2b1531266ee2..0d0c3239c1cd 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -51,25 +51,30 @@ | |||
51 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | 51 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) |
52 | { | 52 | { |
53 | struct inode *inode = dentry->d_inode; | 53 | struct inode *inode = dentry->d_inode; |
54 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
54 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 55 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
55 | int err, ret = 0; | 56 | int ret; |
57 | tid_t commit_tid; | ||
56 | 58 | ||
57 | J_ASSERT(ext4_journal_current_handle() == NULL); | 59 | J_ASSERT(ext4_journal_current_handle() == NULL); |
58 | 60 | ||
59 | trace_ext4_sync_file(file, dentry, datasync); | 61 | trace_ext4_sync_file(file, dentry, datasync); |
60 | 62 | ||
61 | ret = flush_aio_dio_completed_IO(inode); | 63 | if (inode->i_sb->s_flags & MS_RDONLY) |
64 | return 0; | ||
65 | |||
66 | ret = flush_completed_IO(inode); | ||
62 | if (ret < 0) | 67 | if (ret < 0) |
63 | goto out; | 68 | return ret; |
69 | |||
70 | if (!journal) | ||
71 | return simple_fsync(file, dentry, datasync); | ||
72 | |||
64 | /* | 73 | /* |
65 | * data=writeback: | 74 | * data=writeback,ordered: |
66 | * The caller's filemap_fdatawrite()/wait will sync the data. | 75 | * The caller's filemap_fdatawrite()/wait will sync the data. |
67 | * sync_inode() will sync the metadata | 76 | * Metadata is in the journal, we wait for proper transaction to |
68 | * | 77 | * commit here. |
69 | * data=ordered: | ||
70 | * The caller's filemap_fdatawrite() will write the data and | ||
71 | * sync_inode() will write the inode if it is dirty. Then the caller's | ||
72 | * filemap_fdatawait() will wait on the pages. | ||
73 | * | 78 | * |
74 | * data=journal: | 79 | * data=journal: |
75 | * filemap_fdatawrite won't do anything (the buffers are clean). | 80 | * filemap_fdatawrite won't do anything (the buffers are clean). |
@@ -79,32 +84,25 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
79 | * (they were dirtied by commit). But that's OK - the blocks are | 84 | * (they were dirtied by commit). But that's OK - the blocks are |
80 | * safe in-journal, which is all fsync() needs to ensure. | 85 | * safe in-journal, which is all fsync() needs to ensure. |
81 | */ | 86 | */ |
82 | if (ext4_should_journal_data(inode)) { | 87 | if (ext4_should_journal_data(inode)) |
83 | ret = ext4_force_commit(inode->i_sb); | 88 | return ext4_force_commit(inode->i_sb); |
84 | goto out; | ||
85 | } | ||
86 | 89 | ||
87 | if (!journal) | 90 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
88 | ret = sync_mapping_buffers(inode->i_mapping); | 91 | if (jbd2_log_start_commit(journal, commit_tid)) { |
89 | 92 | /* | |
90 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 93 | * When the journal is on a different device than the |
91 | goto out; | 94 | * fs data disk, we need to issue the barrier in |
92 | 95 | * writeback mode. (In ordered mode, the jbd2 layer | |
93 | /* | 96 | * will take care of issuing the barrier. In |
94 | * The VFS has written the file data. If the inode is unaltered | 97 | * data=journal, all of the data blocks are written to |
95 | * then we need not start a commit. | 98 | * the journal device.) |
96 | */ | 99 | */ |
97 | if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { | 100 | if (ext4_should_writeback_data(inode) && |
98 | struct writeback_control wbc = { | 101 | (journal->j_fs_dev != journal->j_dev) && |
99 | .sync_mode = WB_SYNC_ALL, | 102 | (journal->j_flags & JBD2_BARRIER)) |
100 | .nr_to_write = 0, /* sys_fsync did this */ | 103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); |
101 | }; | 104 | jbd2_log_wait_commit(journal, commit_tid); |
102 | err = sync_inode(inode, &wbc); | 105 | } else if (journal->j_flags & JBD2_BARRIER) |
103 | if (ret == 0) | ||
104 | ret = err; | ||
105 | } | ||
106 | out: | ||
107 | if (journal && (journal->j_flags & JBD2_BARRIER)) | ||
108 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 106 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); |
109 | return ret; | 107 | return ret; |
110 | } | 108 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f3624ead4f6c..57f6eef6ccd6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -76,8 +76,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
76 | /* If checksum is bad mark all blocks and inodes use to prevent | 76 | /* If checksum is bad mark all blocks and inodes use to prevent |
77 | * allocation, essentially implementing a per-group read-only flag. */ | 77 | * allocation, essentially implementing a per-group read-only flag. */ |
78 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 78 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { |
79 | ext4_error(sb, __func__, "Checksum bad for group %u", | 79 | ext4_error(sb, "Checksum bad for group %u", block_group); |
80 | block_group); | ||
81 | ext4_free_blks_set(sb, gdp, 0); | 80 | ext4_free_blks_set(sb, gdp, 0); |
82 | ext4_free_inodes_set(sb, gdp, 0); | 81 | ext4_free_inodes_set(sb, gdp, 0); |
83 | ext4_itable_unused_set(sb, gdp, 0); | 82 | ext4_itable_unused_set(sb, gdp, 0); |
@@ -111,8 +110,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
111 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 110 | bitmap_blk = ext4_inode_bitmap(sb, desc); |
112 | bh = sb_getblk(sb, bitmap_blk); | 111 | bh = sb_getblk(sb, bitmap_blk); |
113 | if (unlikely(!bh)) { | 112 | if (unlikely(!bh)) { |
114 | ext4_error(sb, __func__, | 113 | ext4_error(sb, "Cannot read inode bitmap - " |
115 | "Cannot read inode bitmap - " | ||
116 | "block_group = %u, inode_bitmap = %llu", | 114 | "block_group = %u, inode_bitmap = %llu", |
117 | block_group, bitmap_blk); | 115 | block_group, bitmap_blk); |
118 | return NULL; | 116 | return NULL; |
@@ -153,8 +151,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
153 | set_bitmap_uptodate(bh); | 151 | set_bitmap_uptodate(bh); |
154 | if (bh_submit_read(bh) < 0) { | 152 | if (bh_submit_read(bh) < 0) { |
155 | put_bh(bh); | 153 | put_bh(bh); |
156 | ext4_error(sb, __func__, | 154 | ext4_error(sb, "Cannot read inode bitmap - " |
157 | "Cannot read inode bitmap - " | ||
158 | "block_group = %u, inode_bitmap = %llu", | 155 | "block_group = %u, inode_bitmap = %llu", |
159 | block_group, bitmap_blk); | 156 | block_group, bitmap_blk); |
160 | return NULL; | 157 | return NULL; |
@@ -217,10 +214,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
217 | * Note: we must free any quota before locking the superblock, | 214 | * Note: we must free any quota before locking the superblock, |
218 | * as writing the quota to disk may need the lock as well. | 215 | * as writing the quota to disk may need the lock as well. |
219 | */ | 216 | */ |
220 | vfs_dq_init(inode); | 217 | dquot_initialize(inode); |
221 | ext4_xattr_delete_inode(handle, inode); | 218 | ext4_xattr_delete_inode(handle, inode); |
222 | vfs_dq_free_inode(inode); | 219 | dquot_free_inode(inode); |
223 | vfs_dq_drop(inode); | 220 | dquot_drop(inode); |
224 | 221 | ||
225 | is_directory = S_ISDIR(inode->i_mode); | 222 | is_directory = S_ISDIR(inode->i_mode); |
226 | 223 | ||
@@ -229,8 +226,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
229 | 226 | ||
230 | es = EXT4_SB(sb)->s_es; | 227 | es = EXT4_SB(sb)->s_es; |
231 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 228 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
232 | ext4_error(sb, "ext4_free_inode", | 229 | ext4_error(sb, "reserved or nonexistent inode %lu", ino); |
233 | "reserved or nonexistent inode %lu", ino); | ||
234 | goto error_return; | 230 | goto error_return; |
235 | } | 231 | } |
236 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | 232 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); |
@@ -248,8 +244,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
248 | cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), | 244 | cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), |
249 | bit, bitmap_bh->b_data); | 245 | bit, bitmap_bh->b_data); |
250 | if (!cleared) | 246 | if (!cleared) |
251 | ext4_error(sb, "ext4_free_inode", | 247 | ext4_error(sb, "bit already cleared for inode %lu", ino); |
252 | "bit already cleared for inode %lu", ino); | ||
253 | else { | 248 | else { |
254 | gdp = ext4_get_group_desc(sb, block_group, &bh2); | 249 | gdp = ext4_get_group_desc(sb, block_group, &bh2); |
255 | 250 | ||
@@ -268,7 +263,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
268 | ext4_group_t f; | 263 | ext4_group_t f; |
269 | 264 | ||
270 | f = ext4_flex_group(sbi, block_group); | 265 | f = ext4_flex_group(sbi, block_group); |
271 | atomic_dec(&sbi->s_flex_groups[f].free_inodes); | 266 | atomic_dec(&sbi->s_flex_groups[f].used_dirs); |
272 | } | 267 | } |
273 | 268 | ||
274 | } | 269 | } |
@@ -736,8 +731,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
736 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 731 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
737 | ino > EXT4_INODES_PER_GROUP(sb)) { | 732 | ino > EXT4_INODES_PER_GROUP(sb)) { |
738 | ext4_unlock_group(sb, group); | 733 | ext4_unlock_group(sb, group); |
739 | ext4_error(sb, __func__, | 734 | ext4_error(sb, "reserved inode or inode > inodes count - " |
740 | "reserved inode or inode > inodes count - " | ||
741 | "block_group = %u, inode=%lu", group, | 735 | "block_group = %u, inode=%lu", group, |
742 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 736 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
743 | return 1; | 737 | return 1; |
@@ -779,7 +773,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
779 | if (sbi->s_log_groups_per_flex) { | 773 | if (sbi->s_log_groups_per_flex) { |
780 | ext4_group_t f = ext4_flex_group(sbi, group); | 774 | ext4_group_t f = ext4_flex_group(sbi, group); |
781 | 775 | ||
782 | atomic_inc(&sbi->s_flex_groups[f].free_inodes); | 776 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); |
783 | } | 777 | } |
784 | } | 778 | } |
785 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 779 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
@@ -904,7 +898,7 @@ repeat_in_this_group: | |||
904 | BUFFER_TRACE(inode_bitmap_bh, | 898 | BUFFER_TRACE(inode_bitmap_bh, |
905 | "call ext4_handle_dirty_metadata"); | 899 | "call ext4_handle_dirty_metadata"); |
906 | err = ext4_handle_dirty_metadata(handle, | 900 | err = ext4_handle_dirty_metadata(handle, |
907 | inode, | 901 | NULL, |
908 | inode_bitmap_bh); | 902 | inode_bitmap_bh); |
909 | if (err) | 903 | if (err) |
910 | goto fail; | 904 | goto fail; |
@@ -1029,15 +1023,16 @@ got: | |||
1029 | inode->i_generation = sbi->s_next_generation++; | 1023 | inode->i_generation = sbi->s_next_generation++; |
1030 | spin_unlock(&sbi->s_next_gen_lock); | 1024 | spin_unlock(&sbi->s_next_gen_lock); |
1031 | 1025 | ||
1032 | ei->i_state = EXT4_STATE_NEW; | 1026 | ei->i_state_flags = 0; |
1027 | ext4_set_inode_state(inode, EXT4_STATE_NEW); | ||
1033 | 1028 | ||
1034 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 1029 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
1035 | 1030 | ||
1036 | ret = inode; | 1031 | ret = inode; |
1037 | if (vfs_dq_alloc_inode(inode)) { | 1032 | dquot_initialize(inode); |
1038 | err = -EDQUOT; | 1033 | err = dquot_alloc_inode(inode); |
1034 | if (err) | ||
1039 | goto fail_drop; | 1035 | goto fail_drop; |
1040 | } | ||
1041 | 1036 | ||
1042 | err = ext4_init_acl(handle, inode, dir); | 1037 | err = ext4_init_acl(handle, inode, dir); |
1043 | if (err) | 1038 | if (err) |
@@ -1074,10 +1069,10 @@ really_out: | |||
1074 | return ret; | 1069 | return ret; |
1075 | 1070 | ||
1076 | fail_free_drop: | 1071 | fail_free_drop: |
1077 | vfs_dq_free_inode(inode); | 1072 | dquot_free_inode(inode); |
1078 | 1073 | ||
1079 | fail_drop: | 1074 | fail_drop: |
1080 | vfs_dq_drop(inode); | 1075 | dquot_drop(inode); |
1081 | inode->i_flags |= S_NOQUOTA; | 1076 | inode->i_flags |= S_NOQUOTA; |
1082 | inode->i_nlink = 0; | 1077 | inode->i_nlink = 0; |
1083 | unlock_new_inode(inode); | 1078 | unlock_new_inode(inode); |
@@ -1098,8 +1093,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) | |||
1098 | 1093 | ||
1099 | /* Error cases - e2fsck has already cleaned up for us */ | 1094 | /* Error cases - e2fsck has already cleaned up for us */ |
1100 | if (ino > max_ino) { | 1095 | if (ino > max_ino) { |
1101 | ext4_warning(sb, __func__, | 1096 | ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); |
1102 | "bad orphan ino %lu! e2fsck was run?", ino); | ||
1103 | goto error; | 1097 | goto error; |
1104 | } | 1098 | } |
1105 | 1099 | ||
@@ -1107,8 +1101,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) | |||
1107 | bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); | 1101 | bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); |
1108 | bitmap_bh = ext4_read_inode_bitmap(sb, block_group); | 1102 | bitmap_bh = ext4_read_inode_bitmap(sb, block_group); |
1109 | if (!bitmap_bh) { | 1103 | if (!bitmap_bh) { |
1110 | ext4_warning(sb, __func__, | 1104 | ext4_warning(sb, "inode bitmap error for orphan %lu", ino); |
1111 | "inode bitmap error for orphan %lu", ino); | ||
1112 | goto error; | 1105 | goto error; |
1113 | } | 1106 | } |
1114 | 1107 | ||
@@ -1140,8 +1133,7 @@ iget_failed: | |||
1140 | err = PTR_ERR(inode); | 1133 | err = PTR_ERR(inode); |
1141 | inode = NULL; | 1134 | inode = NULL; |
1142 | bad_orphan: | 1135 | bad_orphan: |
1143 | ext4_warning(sb, __func__, | 1136 | ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); |
1144 | "bad orphan inode %lu! e2fsck was run?", ino); | ||
1145 | printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", | 1137 | printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", |
1146 | bit, (unsigned long long)bitmap_bh->b_blocknr, | 1138 | bit, (unsigned long long)bitmap_bh->b_blocknr, |
1147 | ext4_test_bit(bit, bitmap_bh->b_data)); | 1139 | ext4_test_bit(bit, bitmap_bh->b_data)); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2c8caa51addb..5381802d6052 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -38,6 +38,8 @@ | |||
38 | #include <linux/uio.h> | 38 | #include <linux/uio.h> |
39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> |
40 | #include <linux/workqueue.h> | 40 | #include <linux/workqueue.h> |
41 | #include <linux/kernel.h> | ||
42 | #include <linux/slab.h> | ||
41 | 43 | ||
42 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
43 | #include "xattr.h" | 45 | #include "xattr.h" |
@@ -71,58 +73,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) | |||
71 | } | 73 | } |
72 | 74 | ||
73 | /* | 75 | /* |
74 | * The ext4 forget function must perform a revoke if we are freeing data | ||
75 | * which has been journaled. Metadata (eg. indirect blocks) must be | ||
76 | * revoked in all cases. | ||
77 | * | ||
78 | * "bh" may be NULL: a metadata block may have been freed from memory | ||
79 | * but there may still be a record of it in the journal, and that record | ||
80 | * still needs to be revoked. | ||
81 | * | ||
82 | * If the handle isn't valid we're not journaling, but we still need to | ||
83 | * call into ext4_journal_revoke() to put the buffer head. | ||
84 | */ | ||
85 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | ||
86 | struct buffer_head *bh, ext4_fsblk_t blocknr) | ||
87 | { | ||
88 | int err; | ||
89 | |||
90 | might_sleep(); | ||
91 | |||
92 | BUFFER_TRACE(bh, "enter"); | ||
93 | |||
94 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " | ||
95 | "data mode %x\n", | ||
96 | bh, is_metadata, inode->i_mode, | ||
97 | test_opt(inode->i_sb, DATA_FLAGS)); | ||
98 | |||
99 | /* Never use the revoke function if we are doing full data | ||
100 | * journaling: there is no need to, and a V1 superblock won't | ||
101 | * support it. Otherwise, only skip the revoke on un-journaled | ||
102 | * data blocks. */ | ||
103 | |||
104 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || | ||
105 | (!is_metadata && !ext4_should_journal_data(inode))) { | ||
106 | if (bh) { | ||
107 | BUFFER_TRACE(bh, "call jbd2_journal_forget"); | ||
108 | return ext4_journal_forget(handle, bh); | ||
109 | } | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * data!=journal && (is_metadata || should_journal_data(inode)) | ||
115 | */ | ||
116 | BUFFER_TRACE(bh, "call ext4_journal_revoke"); | ||
117 | err = ext4_journal_revoke(handle, blocknr, bh); | ||
118 | if (err) | ||
119 | ext4_abort(inode->i_sb, __func__, | ||
120 | "error %d when attempting revoke", err); | ||
121 | BUFFER_TRACE(bh, "exit"); | ||
122 | return err; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Work out how many blocks we need to proceed with the next chunk of a | 76 | * Work out how many blocks we need to proceed with the next chunk of a |
127 | * truncate transaction. | 77 | * truncate transaction. |
128 | */ | 78 | */ |
@@ -222,6 +172,9 @@ void ext4_delete_inode(struct inode *inode) | |||
222 | handle_t *handle; | 172 | handle_t *handle; |
223 | int err; | 173 | int err; |
224 | 174 | ||
175 | if (!is_bad_inode(inode)) | ||
176 | dquot_initialize(inode); | ||
177 | |||
225 | if (ext4_should_order_data(inode)) | 178 | if (ext4_should_order_data(inode)) |
226 | ext4_begin_ordered_truncate(inode, 0); | 179 | ext4_begin_ordered_truncate(inode, 0); |
227 | truncate_inode_pages(&inode->i_data, 0); | 180 | truncate_inode_pages(&inode->i_data, 0); |
@@ -246,7 +199,7 @@ void ext4_delete_inode(struct inode *inode) | |||
246 | inode->i_size = 0; | 199 | inode->i_size = 0; |
247 | err = ext4_mark_inode_dirty(handle, inode); | 200 | err = ext4_mark_inode_dirty(handle, inode); |
248 | if (err) { | 201 | if (err) { |
249 | ext4_warning(inode->i_sb, __func__, | 202 | ext4_warning(inode->i_sb, |
250 | "couldn't mark inode dirty (err %d)", err); | 203 | "couldn't mark inode dirty (err %d)", err); |
251 | goto stop_handle; | 204 | goto stop_handle; |
252 | } | 205 | } |
@@ -264,7 +217,7 @@ void ext4_delete_inode(struct inode *inode) | |||
264 | if (err > 0) | 217 | if (err > 0) |
265 | err = ext4_journal_restart(handle, 3); | 218 | err = ext4_journal_restart(handle, 3); |
266 | if (err != 0) { | 219 | if (err != 0) { |
267 | ext4_warning(inode->i_sb, __func__, | 220 | ext4_warning(inode->i_sb, |
268 | "couldn't extend journal (err %d)", err); | 221 | "couldn't extend journal (err %d)", err); |
269 | stop_handle: | 222 | stop_handle: |
270 | ext4_journal_stop(handle); | 223 | ext4_journal_stop(handle); |
@@ -375,8 +328,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
375 | offsets[n++] = i_block & (ptrs - 1); | 328 | offsets[n++] = i_block & (ptrs - 1); |
376 | final = ptrs; | 329 | final = ptrs; |
377 | } else { | 330 | } else { |
378 | ext4_warning(inode->i_sb, "ext4_block_to_path", | 331 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", |
379 | "block %lu > max in inode %lu", | ||
380 | i_block + direct_blocks + | 332 | i_block + direct_blocks + |
381 | indirect_blocks + double_blocks, inode->i_ino); | 333 | indirect_blocks + double_blocks, inode->i_ino); |
382 | } | 334 | } |
@@ -396,7 +348,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
396 | if (blk && | 348 | if (blk && |
397 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
398 | blk, 1))) { | 350 | blk, 1))) { |
399 | ext4_error(inode->i_sb, function, | 351 | __ext4_error(inode->i_sb, function, |
400 | "invalid block reference %u " | 352 | "invalid block reference %u " |
401 | "in inode #%lu", blk, inode->i_ino); | 353 | "in inode #%lu", blk, inode->i_ino); |
402 | return -EIO; | 354 | return -EIO; |
@@ -659,7 +611,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
659 | if (*err) | 611 | if (*err) |
660 | goto failed_out; | 612 | goto failed_out; |
661 | 613 | ||
662 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | 614 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { |
615 | EXT4_ERROR_INODE(inode, | ||
616 | "current_block %llu + count %lu > %d!", | ||
617 | current_block, count, | ||
618 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
619 | *err = -EIO; | ||
620 | goto failed_out; | ||
621 | } | ||
663 | 622 | ||
664 | target -= count; | 623 | target -= count; |
665 | /* allocate blocks for indirect blocks */ | 624 | /* allocate blocks for indirect blocks */ |
@@ -695,7 +654,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
695 | ar.flags = EXT4_MB_HINT_DATA; | 654 | ar.flags = EXT4_MB_HINT_DATA; |
696 | 655 | ||
697 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 656 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
698 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | 657 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { |
658 | EXT4_ERROR_INODE(inode, | ||
659 | "current_block %llu + ar.len %d > %d!", | ||
660 | current_block, ar.len, | ||
661 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
662 | *err = -EIO; | ||
663 | goto failed_out; | ||
664 | } | ||
699 | 665 | ||
700 | if (*err && (target == blks)) { | 666 | if (*err && (target == blks)) { |
701 | /* | 667 | /* |
@@ -721,7 +687,7 @@ allocated: | |||
721 | return ret; | 687 | return ret; |
722 | failed_out: | 688 | failed_out: |
723 | for (i = 0; i < index; i++) | 689 | for (i = 0; i < index; i++) |
724 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 690 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); |
725 | return ret; | 691 | return ret; |
726 | } | 692 | } |
727 | 693 | ||
@@ -817,14 +783,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
817 | return err; | 783 | return err; |
818 | failed: | 784 | failed: |
819 | /* Allocation failed, free what we already allocated */ | 785 | /* Allocation failed, free what we already allocated */ |
786 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | ||
820 | for (i = 1; i <= n ; i++) { | 787 | for (i = 1; i <= n ; i++) { |
821 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); | 788 | /* |
822 | ext4_journal_forget(handle, branch[i].bh); | 789 | * branch[i].bh is newly allocated, so there is no |
790 | * need to revoke the block, which is why we don't | ||
791 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
792 | */ | ||
793 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, | ||
794 | EXT4_FREE_BLOCKS_FORGET); | ||
823 | } | 795 | } |
824 | for (i = 0; i < indirect_blks; i++) | 796 | for (i = n+1; i < indirect_blks; i++) |
825 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 797 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); |
826 | 798 | ||
827 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); | 799 | ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); |
828 | 800 | ||
829 | return err; | 801 | return err; |
830 | } | 802 | } |
@@ -903,12 +875,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
903 | 875 | ||
904 | err_out: | 876 | err_out: |
905 | for (i = 1; i <= num; i++) { | 877 | for (i = 1; i <= num; i++) { |
906 | BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); | 878 | /* |
907 | ext4_journal_forget(handle, where[i].bh); | 879 | * branch[i].bh is newly allocated, so there is no |
908 | ext4_free_blocks(handle, inode, | 880 | * need to revoke the block, which is why we don't |
909 | le32_to_cpu(where[i-1].key), 1, 0); | 881 | * need to set EXT4_FREE_BLOCKS_METADATA. |
882 | */ | ||
883 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | ||
884 | EXT4_FREE_BLOCKS_FORGET); | ||
910 | } | 885 | } |
911 | ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); | 886 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), |
887 | blks, 0); | ||
912 | 888 | ||
913 | return err; | 889 | return err; |
914 | } | 890 | } |
@@ -1021,10 +997,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
1021 | if (!err) | 997 | if (!err) |
1022 | err = ext4_splice_branch(handle, inode, iblock, | 998 | err = ext4_splice_branch(handle, inode, iblock, |
1023 | partial, indirect_blks, count); | 999 | partial, indirect_blks, count); |
1024 | else | 1000 | if (err) |
1025 | goto cleanup; | 1001 | goto cleanup; |
1026 | 1002 | ||
1027 | set_buffer_new(bh_result); | 1003 | set_buffer_new(bh_result); |
1004 | |||
1005 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
1028 | got_it: | 1006 | got_it: |
1029 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 1007 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); |
1030 | if (count > blocks_to_boundary) | 1008 | if (count > blocks_to_boundary) |
@@ -1043,92 +1021,121 @@ out: | |||
1043 | return err; | 1021 | return err; |
1044 | } | 1022 | } |
1045 | 1023 | ||
1046 | qsize_t ext4_get_reserved_space(struct inode *inode) | 1024 | #ifdef CONFIG_QUOTA |
1025 | qsize_t *ext4_get_reserved_space(struct inode *inode) | ||
1047 | { | 1026 | { |
1048 | unsigned long long total; | 1027 | return &EXT4_I(inode)->i_reserved_quota; |
1049 | |||
1050 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1051 | total = EXT4_I(inode)->i_reserved_data_blocks + | ||
1052 | EXT4_I(inode)->i_reserved_meta_blocks; | ||
1053 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1054 | |||
1055 | return total; | ||
1056 | } | 1028 | } |
1029 | #endif | ||
1030 | |||
1057 | /* | 1031 | /* |
1058 | * Calculate the number of metadata blocks need to reserve | 1032 | * Calculate the number of metadata blocks need to reserve |
1059 | * to allocate @blocks for non extent file based file | 1033 | * to allocate a new block at @lblocks for non extent file based file |
1060 | */ | 1034 | */ |
1061 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | 1035 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, |
1036 | sector_t lblock) | ||
1062 | { | 1037 | { |
1063 | int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1038 | struct ext4_inode_info *ei = EXT4_I(inode); |
1064 | int ind_blks, dind_blks, tind_blks; | 1039 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); |
1065 | 1040 | int blk_bits; | |
1066 | /* number of new indirect blocks needed */ | ||
1067 | ind_blks = (blocks + icap - 1) / icap; | ||
1068 | 1041 | ||
1069 | dind_blks = (ind_blks + icap - 1) / icap; | 1042 | if (lblock < EXT4_NDIR_BLOCKS) |
1043 | return 0; | ||
1070 | 1044 | ||
1071 | tind_blks = 1; | 1045 | lblock -= EXT4_NDIR_BLOCKS; |
1072 | 1046 | ||
1073 | return ind_blks + dind_blks + tind_blks; | 1047 | if (ei->i_da_metadata_calc_len && |
1048 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | ||
1049 | ei->i_da_metadata_calc_len++; | ||
1050 | return 0; | ||
1051 | } | ||
1052 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | ||
1053 | ei->i_da_metadata_calc_len = 1; | ||
1054 | blk_bits = order_base_2(lblock); | ||
1055 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | ||
1074 | } | 1056 | } |
1075 | 1057 | ||
1076 | /* | 1058 | /* |
1077 | * Calculate the number of metadata blocks need to reserve | 1059 | * Calculate the number of metadata blocks need to reserve |
1078 | * to allocate given number of blocks | 1060 | * to allocate a block located at @lblock |
1079 | */ | 1061 | */ |
1080 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 1062 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) |
1081 | { | 1063 | { |
1082 | if (!blocks) | ||
1083 | return 0; | ||
1084 | |||
1085 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1064 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
1086 | return ext4_ext_calc_metadata_amount(inode, blocks); | 1065 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1087 | 1066 | ||
1088 | return ext4_indirect_calc_metadata_amount(inode, blocks); | 1067 | return ext4_indirect_calc_metadata_amount(inode, lblock); |
1089 | } | 1068 | } |
1090 | 1069 | ||
1091 | static void ext4_da_update_reserve_space(struct inode *inode, int used) | 1070 | /* |
1071 | * Called with i_data_sem down, which is important since we can call | ||
1072 | * ext4_discard_preallocations() from here. | ||
1073 | */ | ||
1074 | void ext4_da_update_reserve_space(struct inode *inode, | ||
1075 | int used, int quota_claim) | ||
1092 | { | 1076 | { |
1093 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1077 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1094 | int total, mdb, mdb_free; | 1078 | struct ext4_inode_info *ei = EXT4_I(inode); |
1095 | 1079 | int mdb_free = 0, allocated_meta_blocks = 0; | |
1096 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1080 | |
1097 | /* recalculate the number of metablocks still need to be reserved */ | 1081 | spin_lock(&ei->i_block_reservation_lock); |
1098 | total = EXT4_I(inode)->i_reserved_data_blocks - used; | 1082 | trace_ext4_da_update_reserve_space(inode, used); |
1099 | mdb = ext4_calc_metadata_amount(inode, total); | 1083 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
1100 | 1084 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | |
1101 | /* figure out how many metablocks to release */ | 1085 | "with only %d reserved data blocks\n", |
1102 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1086 | __func__, inode->i_ino, used, |
1103 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1087 | ei->i_reserved_data_blocks); |
1104 | 1088 | WARN_ON(1); | |
1105 | if (mdb_free) { | 1089 | used = ei->i_reserved_data_blocks; |
1106 | /* Account for allocated meta_blocks */ | 1090 | } |
1107 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | 1091 | |
1108 | 1092 | /* Update per-inode reservations */ | |
1109 | /* update fs dirty blocks counter */ | 1093 | ei->i_reserved_data_blocks -= used; |
1094 | used += ei->i_allocated_meta_blocks; | ||
1095 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | ||
1096 | allocated_meta_blocks = ei->i_allocated_meta_blocks; | ||
1097 | ei->i_allocated_meta_blocks = 0; | ||
1098 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); | ||
1099 | |||
1100 | if (ei->i_reserved_data_blocks == 0) { | ||
1101 | /* | ||
1102 | * We can release all of the reserved metadata blocks | ||
1103 | * only when we have written all of the delayed | ||
1104 | * allocation blocks. | ||
1105 | */ | ||
1106 | mdb_free = ei->i_reserved_meta_blocks; | ||
1107 | ei->i_reserved_meta_blocks = 0; | ||
1108 | ei->i_da_metadata_calc_len = 0; | ||
1110 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); | 1109 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
1111 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1112 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1113 | } | 1110 | } |
1114 | |||
1115 | /* update per-inode reservations */ | ||
1116 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | ||
1117 | EXT4_I(inode)->i_reserved_data_blocks -= used; | ||
1118 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1111 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1119 | 1112 | ||
1120 | /* | 1113 | /* Update quota subsystem */ |
1121 | * free those over-booking quota for metadata blocks | 1114 | if (quota_claim) { |
1122 | */ | 1115 | dquot_claim_block(inode, used); |
1123 | if (mdb_free) | 1116 | if (mdb_free) |
1124 | vfs_dq_release_reservation_block(inode, mdb_free); | 1117 | dquot_release_reservation_block(inode, mdb_free); |
1118 | } else { | ||
1119 | /* | ||
1120 | * We did fallocate with an offset that is already delayed | ||
1121 | * allocated. So on delayed allocated writeback we should | ||
1122 | * not update the quota for allocated blocks. But then | ||
1123 | * converting an fallocate region to initialized region would | ||
1124 | * have caused a metadata allocation. So claim quota for | ||
1125 | * that | ||
1126 | */ | ||
1127 | if (allocated_meta_blocks) | ||
1128 | dquot_claim_block(inode, allocated_meta_blocks); | ||
1129 | dquot_release_reservation_block(inode, mdb_free + used); | ||
1130 | } | ||
1125 | 1131 | ||
1126 | /* | 1132 | /* |
1127 | * If we have done all the pending block allocations and if | 1133 | * If we have done all the pending block allocations and if |
1128 | * there aren't any writers on the inode, we can discard the | 1134 | * there aren't any writers on the inode, we can discard the |
1129 | * inode's preallocations. | 1135 | * inode's preallocations. |
1130 | */ | 1136 | */ |
1131 | if (!total && (atomic_read(&inode->i_writecount) == 0)) | 1137 | if ((ei->i_reserved_data_blocks == 0) && |
1138 | (atomic_read(&inode->i_writecount) == 0)) | ||
1132 | ext4_discard_preallocations(inode); | 1139 | ext4_discard_preallocations(inode); |
1133 | } | 1140 | } |
1134 | 1141 | ||
@@ -1136,7 +1143,7 @@ static int check_block_validity(struct inode *inode, const char *msg, | |||
1136 | sector_t logical, sector_t phys, int len) | 1143 | sector_t logical, sector_t phys, int len) |
1137 | { | 1144 | { |
1138 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1145 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { |
1139 | ext4_error(inode->i_sb, msg, | 1146 | __ext4_error(inode->i_sb, msg, |
1140 | "inode #%lu logical block %llu mapped to %llu " | 1147 | "inode #%lu logical block %llu mapped to %llu " |
1141 | "(size %d)", inode->i_ino, | 1148 | "(size %d)", inode->i_ino, |
1142 | (unsigned long long) logical, | 1149 | (unsigned long long) logical, |
@@ -1318,20 +1325,22 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1318 | * i_data's format changing. Force the migrate | 1325 | * i_data's format changing. Force the migrate |
1319 | * to fail by clearing migrate flags | 1326 | * to fail by clearing migrate flags |
1320 | */ | 1327 | */ |
1321 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; | 1328 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); |
1322 | } | 1329 | } |
1323 | } | ||
1324 | 1330 | ||
1331 | /* | ||
1332 | * Update reserved blocks/metadata blocks after successful | ||
1333 | * block allocation which had been deferred till now. We don't | ||
1334 | * support fallocate for non extent files. So we can update | ||
1335 | * reserve space here. | ||
1336 | */ | ||
1337 | if ((retval > 0) && | ||
1338 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | ||
1339 | ext4_da_update_reserve_space(inode, retval, 1); | ||
1340 | } | ||
1325 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 1341 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1326 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1342 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; |
1327 | 1343 | ||
1328 | /* | ||
1329 | * Update reserved blocks/metadata blocks after successful | ||
1330 | * block allocation which had been deferred till now. | ||
1331 | */ | ||
1332 | if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) | ||
1333 | ext4_da_update_reserve_space(inode, retval); | ||
1334 | |||
1335 | up_write((&EXT4_I(inode)->i_data_sem)); | 1344 | up_write((&EXT4_I(inode)->i_data_sem)); |
1336 | if (retval > 0 && buffer_mapped(bh)) { | 1345 | if (retval > 0 && buffer_mapped(bh)) { |
1337 | int ret = check_block_validity(inode, "file system " | 1346 | int ret = check_block_validity(inode, "file system " |
@@ -1534,6 +1543,18 @@ static int do_journal_get_write_access(handle_t *handle, | |||
1534 | return ext4_journal_get_write_access(handle, bh); | 1543 | return ext4_journal_get_write_access(handle, bh); |
1535 | } | 1544 | } |
1536 | 1545 | ||
1546 | /* | ||
1547 | * Truncate blocks that were not used by write. We have to truncate the | ||
1548 | * pagecache as well so that corresponding buffers get properly unmapped. | ||
1549 | */ | ||
1550 | static void ext4_truncate_failed_write(struct inode *inode) | ||
1551 | { | ||
1552 | truncate_inode_pages(inode->i_mapping, inode->i_size); | ||
1553 | ext4_truncate(inode); | ||
1554 | } | ||
1555 | |||
1556 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | ||
1557 | struct buffer_head *bh_result, int create); | ||
1537 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 1558 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
1538 | loff_t pos, unsigned len, unsigned flags, | 1559 | loff_t pos, unsigned len, unsigned flags, |
1539 | struct page **pagep, void **fsdata) | 1560 | struct page **pagep, void **fsdata) |
@@ -1575,8 +1596,12 @@ retry: | |||
1575 | } | 1596 | } |
1576 | *pagep = page; | 1597 | *pagep = page; |
1577 | 1598 | ||
1578 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1599 | if (ext4_should_dioread_nolock(inode)) |
1579 | ext4_get_block); | 1600 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, |
1601 | fsdata, ext4_get_block_write); | ||
1602 | else | ||
1603 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | ||
1604 | fsdata, ext4_get_block); | ||
1580 | 1605 | ||
1581 | if (!ret && ext4_should_journal_data(inode)) { | 1606 | if (!ret && ext4_should_journal_data(inode)) { |
1582 | ret = walk_page_buffers(handle, page_buffers(page), | 1607 | ret = walk_page_buffers(handle, page_buffers(page), |
@@ -1599,7 +1624,7 @@ retry: | |||
1599 | 1624 | ||
1600 | ext4_journal_stop(handle); | 1625 | ext4_journal_stop(handle); |
1601 | if (pos + len > inode->i_size) { | 1626 | if (pos + len > inode->i_size) { |
1602 | ext4_truncate(inode); | 1627 | ext4_truncate_failed_write(inode); |
1603 | /* | 1628 | /* |
1604 | * If truncate failed early the inode might | 1629 | * If truncate failed early the inode might |
1605 | * still be on the orphan list; we need to | 1630 | * still be on the orphan list; we need to |
@@ -1709,7 +1734,7 @@ static int ext4_ordered_write_end(struct file *file, | |||
1709 | ret = ret2; | 1734 | ret = ret2; |
1710 | 1735 | ||
1711 | if (pos + len > inode->i_size) { | 1736 | if (pos + len > inode->i_size) { |
1712 | ext4_truncate(inode); | 1737 | ext4_truncate_failed_write(inode); |
1713 | /* | 1738 | /* |
1714 | * If truncate failed early the inode might still be | 1739 | * If truncate failed early the inode might still be |
1715 | * on the orphan list; we need to make sure the inode | 1740 | * on the orphan list; we need to make sure the inode |
@@ -1751,7 +1776,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
1751 | ret = ret2; | 1776 | ret = ret2; |
1752 | 1777 | ||
1753 | if (pos + len > inode->i_size) { | 1778 | if (pos + len > inode->i_size) { |
1754 | ext4_truncate(inode); | 1779 | ext4_truncate_failed_write(inode); |
1755 | /* | 1780 | /* |
1756 | * If truncate failed early the inode might still be | 1781 | * If truncate failed early the inode might still be |
1757 | * on the orphan list; we need to make sure the inode | 1782 | * on the orphan list; we need to make sure the inode |
@@ -1793,7 +1818,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1793 | new_i_size = pos + copied; | 1818 | new_i_size = pos + copied; |
1794 | if (new_i_size > inode->i_size) | 1819 | if (new_i_size > inode->i_size) |
1795 | i_size_write(inode, pos+copied); | 1820 | i_size_write(inode, pos+copied); |
1796 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1821 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1797 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 1822 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1798 | ext4_update_i_disksize(inode, new_i_size); | 1823 | ext4_update_i_disksize(inode, new_i_size); |
1799 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1824 | ret2 = ext4_mark_inode_dirty(handle, inode); |
@@ -1814,7 +1839,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1814 | if (!ret) | 1839 | if (!ret) |
1815 | ret = ret2; | 1840 | ret = ret2; |
1816 | if (pos + len > inode->i_size) { | 1841 | if (pos + len > inode->i_size) { |
1817 | ext4_truncate(inode); | 1842 | ext4_truncate_failed_write(inode); |
1818 | /* | 1843 | /* |
1819 | * If truncate failed early the inode might still be | 1844 | * If truncate failed early the inode might still be |
1820 | * on the orphan list; we need to make sure the inode | 1845 | * on the orphan list; we need to make sure the inode |
@@ -1827,11 +1852,16 @@ static int ext4_journalled_write_end(struct file *file, | |||
1827 | return ret ? ret : copied; | 1852 | return ret ? ret : copied; |
1828 | } | 1853 | } |
1829 | 1854 | ||
1830 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1855 | /* |
1856 | * Reserve a single block located at lblock | ||
1857 | */ | ||
1858 | static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | ||
1831 | { | 1859 | { |
1832 | int retries = 0; | 1860 | int retries = 0; |
1833 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1861 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1834 | unsigned long md_needed, mdblocks, total = 0; | 1862 | struct ext4_inode_info *ei = EXT4_I(inode); |
1863 | unsigned long md_needed, md_reserved; | ||
1864 | int ret; | ||
1835 | 1865 | ||
1836 | /* | 1866 | /* |
1837 | * recalculate the amount of metadata blocks to reserve | 1867 | * recalculate the amount of metadata blocks to reserve |
@@ -1839,86 +1869,80 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1839 | * worse case is one extent per block | 1869 | * worse case is one extent per block |
1840 | */ | 1870 | */ |
1841 | repeat: | 1871 | repeat: |
1842 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1872 | spin_lock(&ei->i_block_reservation_lock); |
1843 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1873 | md_reserved = ei->i_reserved_meta_blocks; |
1844 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1874 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1845 | BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); | 1875 | trace_ext4_da_reserve_space(inode, md_needed); |
1846 | 1876 | spin_unlock(&ei->i_block_reservation_lock); | |
1847 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | ||
1848 | total = md_needed + nrblocks; | ||
1849 | 1877 | ||
1850 | /* | 1878 | /* |
1851 | * Make quota reservation here to prevent quota overflow | 1879 | * Make quota reservation here to prevent quota overflow |
1852 | * later. Real quota accounting is done at pages writeout | 1880 | * later. Real quota accounting is done at pages writeout |
1853 | * time. | 1881 | * time. |
1854 | */ | 1882 | */ |
1855 | if (vfs_dq_reserve_block(inode, total)) { | 1883 | ret = dquot_reserve_block(inode, md_needed + 1); |
1856 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1884 | if (ret) |
1857 | return -EDQUOT; | 1885 | return ret; |
1858 | } | ||
1859 | 1886 | ||
1860 | if (ext4_claim_free_blocks(sbi, total)) { | 1887 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { |
1861 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1888 | dquot_release_reservation_block(inode, md_needed + 1); |
1862 | vfs_dq_release_reservation_block(inode, total); | ||
1863 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1889 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1864 | yield(); | 1890 | yield(); |
1865 | goto repeat; | 1891 | goto repeat; |
1866 | } | 1892 | } |
1867 | return -ENOSPC; | 1893 | return -ENOSPC; |
1868 | } | 1894 | } |
1869 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1895 | spin_lock(&ei->i_block_reservation_lock); |
1870 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | 1896 | ei->i_reserved_data_blocks++; |
1897 | ei->i_reserved_meta_blocks += md_needed; | ||
1898 | spin_unlock(&ei->i_block_reservation_lock); | ||
1871 | 1899 | ||
1872 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1873 | return 0; /* success */ | 1900 | return 0; /* success */ |
1874 | } | 1901 | } |
1875 | 1902 | ||
1876 | static void ext4_da_release_space(struct inode *inode, int to_free) | 1903 | static void ext4_da_release_space(struct inode *inode, int to_free) |
1877 | { | 1904 | { |
1878 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1905 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1879 | int total, mdb, mdb_free, release; | 1906 | struct ext4_inode_info *ei = EXT4_I(inode); |
1880 | 1907 | ||
1881 | if (!to_free) | 1908 | if (!to_free) |
1882 | return; /* Nothing to release, exit */ | 1909 | return; /* Nothing to release, exit */ |
1883 | 1910 | ||
1884 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1911 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1885 | 1912 | ||
1886 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | 1913 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
1887 | /* | 1914 | /* |
1888 | * if there is no reserved blocks, but we try to free some | 1915 | * if there aren't enough reserved blocks, then the |
1889 | * then the counter is messed up somewhere. | 1916 | * counter is messed up somewhere. Since this |
1890 | * but since this function is called from invalidate | 1917 | * function is called from invalidate page, it's |
1891 | * page, it's harmless to return without any action | 1918 | * harmless to return without any action. |
1892 | */ | 1919 | */ |
1893 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | 1920 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " |
1894 | "blocks for inode %lu, but there is no reserved " | 1921 | "ino %lu, to_free %d with only %d reserved " |
1895 | "data blocks\n", to_free, inode->i_ino); | 1922 | "data blocks\n", inode->i_ino, to_free, |
1896 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1923 | ei->i_reserved_data_blocks); |
1897 | return; | 1924 | WARN_ON(1); |
1925 | to_free = ei->i_reserved_data_blocks; | ||
1898 | } | 1926 | } |
1927 | ei->i_reserved_data_blocks -= to_free; | ||
1899 | 1928 | ||
1900 | /* recalculate the number of metablocks still need to be reserved */ | 1929 | if (ei->i_reserved_data_blocks == 0) { |
1901 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1930 | /* |
1902 | mdb = ext4_calc_metadata_amount(inode, total); | 1931 | * We can release all of the reserved metadata blocks |
1903 | 1932 | * only when we have written all of the delayed | |
1904 | /* figure out how many metablocks to release */ | 1933 | * allocation blocks. |
1905 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1934 | */ |
1906 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1935 | to_free += ei->i_reserved_meta_blocks; |
1907 | 1936 | ei->i_reserved_meta_blocks = 0; | |
1908 | release = to_free + mdb_free; | 1937 | ei->i_da_metadata_calc_len = 0; |
1909 | 1938 | } | |
1910 | /* update fs dirty blocks counter for truncate case */ | ||
1911 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); | ||
1912 | 1939 | ||
1913 | /* update per-inode reservations */ | 1940 | /* update fs dirty blocks counter */ |
1914 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1941 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); |
1915 | EXT4_I(inode)->i_reserved_data_blocks -= to_free; | ||
1916 | 1942 | ||
1917 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1918 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1919 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1943 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1920 | 1944 | ||
1921 | vfs_dq_release_reservation_block(inode, release); | 1945 | dquot_release_reservation_block(inode, to_free); |
1922 | } | 1946 | } |
1923 | 1947 | ||
1924 | static void ext4_da_page_release_reservation(struct page *page, | 1948 | static void ext4_da_page_release_reservation(struct page *page, |
@@ -2095,6 +2119,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2095 | } else if (buffer_mapped(bh)) | 2119 | } else if (buffer_mapped(bh)) |
2096 | BUG_ON(bh->b_blocknr != pblock); | 2120 | BUG_ON(bh->b_blocknr != pblock); |
2097 | 2121 | ||
2122 | if (buffer_uninit(exbh)) | ||
2123 | set_buffer_uninit(bh); | ||
2098 | cur_logical++; | 2124 | cur_logical++; |
2099 | pblock++; | 2125 | pblock++; |
2100 | } while ((bh = bh->b_this_page) != head); | 2126 | } while ((bh = bh->b_this_page) != head); |
@@ -2137,17 +2163,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2137 | break; | 2163 | break; |
2138 | for (i = 0; i < nr_pages; i++) { | 2164 | for (i = 0; i < nr_pages; i++) { |
2139 | struct page *page = pvec.pages[i]; | 2165 | struct page *page = pvec.pages[i]; |
2140 | index = page->index; | 2166 | if (page->index > end) |
2141 | if (index > end) | ||
2142 | break; | 2167 | break; |
2143 | index++; | ||
2144 | |||
2145 | BUG_ON(!PageLocked(page)); | 2168 | BUG_ON(!PageLocked(page)); |
2146 | BUG_ON(PageWriteback(page)); | 2169 | BUG_ON(PageWriteback(page)); |
2147 | block_invalidatepage(page, 0); | 2170 | block_invalidatepage(page, 0); |
2148 | ClearPageUptodate(page); | 2171 | ClearPageUptodate(page); |
2149 | unlock_page(page); | 2172 | unlock_page(page); |
2150 | } | 2173 | } |
2174 | index = pvec.pages[nr_pages - 1]->index + 1; | ||
2175 | pagevec_release(&pvec); | ||
2151 | } | 2176 | } |
2152 | return; | 2177 | return; |
2153 | } | 2178 | } |
@@ -2223,10 +2248,12 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2223 | * variables are updated after the blocks have been allocated. | 2248 | * variables are updated after the blocks have been allocated. |
2224 | */ | 2249 | */ |
2225 | new.b_state = 0; | 2250 | new.b_state = 0; |
2226 | get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | | 2251 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; |
2227 | EXT4_GET_BLOCKS_DELALLOC_RESERVE); | 2252 | if (ext4_should_dioread_nolock(mpd->inode)) |
2253 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
2228 | if (mpd->b_state & (1 << BH_Delay)) | 2254 | if (mpd->b_state & (1 << BH_Delay)) |
2229 | get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; | 2255 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; |
2256 | |||
2230 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, | 2257 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, |
2231 | &new, get_blocks_flags); | 2258 | &new, get_blocks_flags); |
2232 | if (blks < 0) { | 2259 | if (blks < 0) { |
@@ -2524,7 +2551,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2524 | * XXX: __block_prepare_write() unmaps passed block, | 2551 | * XXX: __block_prepare_write() unmaps passed block, |
2525 | * is it OK? | 2552 | * is it OK? |
2526 | */ | 2553 | */ |
2527 | ret = ext4_da_reserve_space(inode, 1); | 2554 | ret = ext4_da_reserve_space(inode, iblock); |
2528 | if (ret) | 2555 | if (ret) |
2529 | /* not enough space to reserve */ | 2556 | /* not enough space to reserve */ |
2530 | return ret; | 2557 | return ret; |
@@ -2600,7 +2627,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) | |||
2600 | } | 2627 | } |
2601 | 2628 | ||
2602 | static int __ext4_journalled_writepage(struct page *page, | 2629 | static int __ext4_journalled_writepage(struct page *page, |
2603 | struct writeback_control *wbc, | ||
2604 | unsigned int len) | 2630 | unsigned int len) |
2605 | { | 2631 | { |
2606 | struct address_space *mapping = page->mapping; | 2632 | struct address_space *mapping = page->mapping; |
@@ -2635,11 +2661,14 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2635 | ret = err; | 2661 | ret = err; |
2636 | 2662 | ||
2637 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | 2663 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); |
2638 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 2664 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
2639 | out: | 2665 | out: |
2640 | return ret; | 2666 | return ret; |
2641 | } | 2667 | } |
2642 | 2668 | ||
2669 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
2670 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
2671 | |||
2643 | /* | 2672 | /* |
2644 | * Note that we don't need to start a transaction unless we're journaling data | 2673 | * Note that we don't need to start a transaction unless we're journaling data |
2645 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2674 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
@@ -2687,7 +2716,7 @@ static int ext4_writepage(struct page *page, | |||
2687 | int ret = 0; | 2716 | int ret = 0; |
2688 | loff_t size; | 2717 | loff_t size; |
2689 | unsigned int len; | 2718 | unsigned int len; |
2690 | struct buffer_head *page_bufs; | 2719 | struct buffer_head *page_bufs = NULL; |
2691 | struct inode *inode = page->mapping->host; | 2720 | struct inode *inode = page->mapping->host; |
2692 | 2721 | ||
2693 | trace_ext4_writepage(inode, page); | 2722 | trace_ext4_writepage(inode, page); |
@@ -2758,12 +2787,16 @@ static int ext4_writepage(struct page *page, | |||
2758 | * doesn't seem much point in redirtying the page here. | 2787 | * doesn't seem much point in redirtying the page here. |
2759 | */ | 2788 | */ |
2760 | ClearPageChecked(page); | 2789 | ClearPageChecked(page); |
2761 | return __ext4_journalled_writepage(page, wbc, len); | 2790 | return __ext4_journalled_writepage(page, len); |
2762 | } | 2791 | } |
2763 | 2792 | ||
2764 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2793 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
2765 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | 2794 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); |
2766 | else | 2795 | else if (page_bufs && buffer_uninit(page_bufs)) { |
2796 | ext4_set_bh_endio(page_bufs, inode); | ||
2797 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | ||
2798 | wbc, ext4_end_io_buffer_write); | ||
2799 | } else | ||
2767 | ret = block_write_full_page(page, noalloc_get_block_write, | 2800 | ret = block_write_full_page(page, noalloc_get_block_write, |
2768 | wbc); | 2801 | wbc); |
2769 | 2802 | ||
@@ -2788,7 +2821,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2788 | * number of contiguous block. So we will limit | 2821 | * number of contiguous block. So we will limit |
2789 | * number of contiguous block to a sane value | 2822 | * number of contiguous block to a sane value |
2790 | */ | 2823 | */ |
2791 | if (!(inode->i_flags & EXT4_EXTENTS_FL) && | 2824 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && |
2792 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2825 | (max_blocks > EXT4_MAX_TRANS_DATA)) |
2793 | max_blocks = EXT4_MAX_TRANS_DATA; | 2826 | max_blocks = EXT4_MAX_TRANS_DATA; |
2794 | 2827 | ||
@@ -2933,7 +2966,7 @@ retry: | |||
2933 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, | 2966 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, |
2934 | &mpd); | 2967 | &mpd); |
2935 | /* | 2968 | /* |
2936 | * If we have a contigous extent of pages and we | 2969 | * If we have a contiguous extent of pages and we |
2937 | * haven't done the I/O yet, map the blocks and submit | 2970 | * haven't done the I/O yet, map the blocks and submit |
2938 | * them for I/O. | 2971 | * them for I/O. |
2939 | */ | 2972 | */ |
@@ -2999,8 +3032,7 @@ retry: | |||
2999 | out_writepages: | 3032 | out_writepages: |
3000 | if (!no_nrwrite_index_update) | 3033 | if (!no_nrwrite_index_update) |
3001 | wbc->no_nrwrite_index_update = 0; | 3034 | wbc->no_nrwrite_index_update = 0; |
3002 | if (wbc->nr_to_write > nr_to_writebump) | 3035 | wbc->nr_to_write -= nr_to_writebump; |
3003 | wbc->nr_to_write -= nr_to_writebump; | ||
3004 | wbc->range_start = range_start; | 3036 | wbc->range_start = range_start; |
3005 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3037 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
3006 | return ret; | 3038 | return ret; |
@@ -3025,11 +3057,18 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
3025 | if (2 * free_blocks < 3 * dirty_blocks || | 3057 | if (2 * free_blocks < 3 * dirty_blocks || |
3026 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 3058 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { |
3027 | /* | 3059 | /* |
3028 | * free block count is less that 150% of dirty blocks | 3060 | * free block count is less than 150% of dirty blocks |
3029 | * or free blocks is less that watermark | 3061 | * or free blocks is less than watermark |
3030 | */ | 3062 | */ |
3031 | return 1; | 3063 | return 1; |
3032 | } | 3064 | } |
3065 | /* | ||
3066 | * Even if we don't switch but are nearing capacity, | ||
3067 | * start pushing delalloc when 1/2 of free blocks are dirty. | ||
3068 | */ | ||
3069 | if (free_blocks < 2 * dirty_blocks) | ||
3070 | writeback_inodes_sb_if_idle(sb); | ||
3071 | |||
3033 | return 0; | 3072 | return 0; |
3034 | } | 3073 | } |
3035 | 3074 | ||
@@ -3037,7 +3076,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
3037 | loff_t pos, unsigned len, unsigned flags, | 3076 | loff_t pos, unsigned len, unsigned flags, |
3038 | struct page **pagep, void **fsdata) | 3077 | struct page **pagep, void **fsdata) |
3039 | { | 3078 | { |
3040 | int ret, retries = 0; | 3079 | int ret, retries = 0, quota_retries = 0; |
3041 | struct page *page; | 3080 | struct page *page; |
3042 | pgoff_t index; | 3081 | pgoff_t index; |
3043 | unsigned from, to; | 3082 | unsigned from, to; |
@@ -3091,11 +3130,27 @@ retry: | |||
3091 | * i_size_read because we hold i_mutex. | 3130 | * i_size_read because we hold i_mutex. |
3092 | */ | 3131 | */ |
3093 | if (pos + len > inode->i_size) | 3132 | if (pos + len > inode->i_size) |
3094 | ext4_truncate(inode); | 3133 | ext4_truncate_failed_write(inode); |
3095 | } | 3134 | } |
3096 | 3135 | ||
3097 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3136 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3098 | goto retry; | 3137 | goto retry; |
3138 | |||
3139 | if ((ret == -EDQUOT) && | ||
3140 | EXT4_I(inode)->i_reserved_meta_blocks && | ||
3141 | (quota_retries++ < 3)) { | ||
3142 | /* | ||
3143 | * Since we often over-estimate the number of meta | ||
3144 | * data blocks required, we may sometimes get a | ||
3145 | * spurios out of quota error even though there would | ||
3146 | * be enough space once we write the data blocks and | ||
3147 | * find out how many meta data blocks were _really_ | ||
3148 | * required. So try forcing the inode write to see if | ||
3149 | * that helps. | ||
3150 | */ | ||
3151 | write_inode_now(inode, (quota_retries == 3)); | ||
3152 | goto retry; | ||
3153 | } | ||
3099 | out: | 3154 | out: |
3100 | return ret; | 3155 | return ret; |
3101 | } | 3156 | } |
@@ -3284,7 +3339,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3284 | filemap_write_and_wait(mapping); | 3339 | filemap_write_and_wait(mapping); |
3285 | } | 3340 | } |
3286 | 3341 | ||
3287 | if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | 3342 | if (EXT4_JOURNAL(inode) && |
3343 | ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { | ||
3288 | /* | 3344 | /* |
3289 | * This is a REALLY heavyweight approach, but the use of | 3345 | * This is a REALLY heavyweight approach, but the use of |
3290 | * bmap on dirty files is expected to be extremely rare: | 3346 | * bmap on dirty files is expected to be extremely rare: |
@@ -3303,7 +3359,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3303 | * everything they get. | 3359 | * everything they get. |
3304 | */ | 3360 | */ |
3305 | 3361 | ||
3306 | EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; | 3362 | ext4_clear_inode_state(inode, EXT4_STATE_JDATA); |
3307 | journal = EXT4_JOURNAL(inode); | 3363 | journal = EXT4_JOURNAL(inode); |
3308 | jbd2_journal_lock_updates(journal); | 3364 | jbd2_journal_lock_updates(journal); |
3309 | err = jbd2_journal_flush(journal); | 3365 | err = jbd2_journal_flush(journal); |
@@ -3328,11 +3384,45 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3328 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3384 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3329 | } | 3385 | } |
3330 | 3386 | ||
3387 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3388 | { | ||
3389 | BUG_ON(!io); | ||
3390 | if (io->page) | ||
3391 | put_page(io->page); | ||
3392 | iput(io->inode); | ||
3393 | kfree(io); | ||
3394 | } | ||
3395 | |||
3396 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | ||
3397 | { | ||
3398 | struct buffer_head *head, *bh; | ||
3399 | unsigned int curr_off = 0; | ||
3400 | |||
3401 | if (!page_has_buffers(page)) | ||
3402 | return; | ||
3403 | head = bh = page_buffers(page); | ||
3404 | do { | ||
3405 | if (offset <= curr_off && test_clear_buffer_uninit(bh) | ||
3406 | && bh->b_private) { | ||
3407 | ext4_free_io_end(bh->b_private); | ||
3408 | bh->b_private = NULL; | ||
3409 | bh->b_end_io = NULL; | ||
3410 | } | ||
3411 | curr_off = curr_off + bh->b_size; | ||
3412 | bh = bh->b_this_page; | ||
3413 | } while (bh != head); | ||
3414 | } | ||
3415 | |||
3331 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 3416 | static void ext4_invalidatepage(struct page *page, unsigned long offset) |
3332 | { | 3417 | { |
3333 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3418 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3334 | 3419 | ||
3335 | /* | 3420 | /* |
3421 | * free any io_end structure allocated for buffers to be discarded | ||
3422 | */ | ||
3423 | if (ext4_should_dioread_nolock(page->mapping->host)) | ||
3424 | ext4_invalidatepage_free_endio(page, offset); | ||
3425 | /* | ||
3336 | * If it's a full truncate we just forget about the pending dirtying | 3426 | * If it's a full truncate we just forget about the pending dirtying |
3337 | */ | 3427 | */ |
3338 | if (offset == 0) | 3428 | if (offset == 0) |
@@ -3403,7 +3493,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
3403 | } | 3493 | } |
3404 | 3494 | ||
3405 | retry: | 3495 | retry: |
3406 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 3496 | if (rw == READ && ext4_should_dioread_nolock(inode)) |
3497 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | ||
3498 | inode->i_sb->s_bdev, iov, | ||
3499 | offset, nr_segs, | ||
3500 | ext4_get_block, NULL); | ||
3501 | else | ||
3502 | ret = blockdev_direct_IO(rw, iocb, inode, | ||
3503 | inode->i_sb->s_bdev, iov, | ||
3407 | offset, nr_segs, | 3504 | offset, nr_segs, |
3408 | ext4_get_block, NULL); | 3505 | ext4_get_block, NULL); |
3409 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3506 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -3419,6 +3516,9 @@ retry: | |||
3419 | * but cannot extend i_size. Bail out and pretend | 3516 | * but cannot extend i_size. Bail out and pretend |
3420 | * the write failed... */ | 3517 | * the write failed... */ |
3421 | ret = PTR_ERR(handle); | 3518 | ret = PTR_ERR(handle); |
3519 | if (inode->i_nlink) | ||
3520 | ext4_orphan_del(NULL, inode); | ||
3521 | |||
3422 | goto out; | 3522 | goto out; |
3423 | } | 3523 | } |
3424 | if (inode->i_nlink) | 3524 | if (inode->i_nlink) |
@@ -3446,75 +3546,63 @@ out: | |||
3446 | return ret; | 3546 | return ret; |
3447 | } | 3547 | } |
3448 | 3548 | ||
3449 | static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, | 3549 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
3450 | struct buffer_head *bh_result, int create) | 3550 | struct buffer_head *bh_result, int create) |
3451 | { | 3551 | { |
3452 | handle_t *handle = NULL; | 3552 | handle_t *handle = ext4_journal_current_handle(); |
3453 | int ret = 0; | 3553 | int ret = 0; |
3454 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 3554 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
3455 | int dio_credits; | 3555 | int dio_credits; |
3556 | int started = 0; | ||
3456 | 3557 | ||
3457 | ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", | 3558 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
3458 | inode->i_ino, create); | 3559 | inode->i_ino, create); |
3459 | /* | 3560 | /* |
3460 | * DIO VFS code passes create = 0 flag for write to | 3561 | * ext4_get_block in prepare for a DIO write or buffer write. |
3461 | * the middle of file. It does this to avoid block | 3562 | * We allocate an uinitialized extent if blocks haven't been allocated. |
3462 | * allocation for holes, to prevent expose stale data | 3563 | * The extent will be converted to initialized after IO complete. |
3463 | * out when there is parallel buffered read (which does | ||
3464 | * not hold the i_mutex lock) while direct IO write has | ||
3465 | * not completed. DIO request on holes finally falls back | ||
3466 | * to buffered IO for this reason. | ||
3467 | * | ||
3468 | * For ext4 extent based file, since we support fallocate, | ||
3469 | * new allocated extent as uninitialized, for holes, we | ||
3470 | * could fallocate blocks for holes, thus parallel | ||
3471 | * buffered IO read will zero out the page when read on | ||
3472 | * a hole while parallel DIO write to the hole has not completed. | ||
3473 | * | ||
3474 | * when we come here, we know it's a direct IO write to | ||
3475 | * to the middle of file (<i_size) | ||
3476 | * so it's safe to override the create flag from VFS. | ||
3477 | */ | 3564 | */ |
3478 | create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; | 3565 | create = EXT4_GET_BLOCKS_IO_CREATE_EXT; |
3479 | 3566 | ||
3480 | if (max_blocks > DIO_MAX_BLOCKS) | 3567 | if (!handle) { |
3481 | max_blocks = DIO_MAX_BLOCKS; | 3568 | if (max_blocks > DIO_MAX_BLOCKS) |
3482 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | 3569 | max_blocks = DIO_MAX_BLOCKS; |
3483 | handle = ext4_journal_start(inode, dio_credits); | 3570 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
3484 | if (IS_ERR(handle)) { | 3571 | handle = ext4_journal_start(inode, dio_credits); |
3485 | ret = PTR_ERR(handle); | 3572 | if (IS_ERR(handle)) { |
3486 | goto out; | 3573 | ret = PTR_ERR(handle); |
3574 | goto out; | ||
3575 | } | ||
3576 | started = 1; | ||
3487 | } | 3577 | } |
3578 | |||
3488 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | 3579 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, |
3489 | create); | 3580 | create); |
3490 | if (ret > 0) { | 3581 | if (ret > 0) { |
3491 | bh_result->b_size = (ret << inode->i_blkbits); | 3582 | bh_result->b_size = (ret << inode->i_blkbits); |
3492 | ret = 0; | 3583 | ret = 0; |
3493 | } | 3584 | } |
3494 | ext4_journal_stop(handle); | 3585 | if (started) |
3586 | ext4_journal_stop(handle); | ||
3495 | out: | 3587 | out: |
3496 | return ret; | 3588 | return ret; |
3497 | } | 3589 | } |
3498 | 3590 | ||
3499 | static void ext4_free_io_end(ext4_io_end_t *io) | 3591 | static void dump_completed_IO(struct inode * inode) |
3500 | { | ||
3501 | BUG_ON(!io); | ||
3502 | iput(io->inode); | ||
3503 | kfree(io); | ||
3504 | } | ||
3505 | static void dump_aio_dio_list(struct inode * inode) | ||
3506 | { | 3592 | { |
3507 | #ifdef EXT4_DEBUG | 3593 | #ifdef EXT4_DEBUG |
3508 | struct list_head *cur, *before, *after; | 3594 | struct list_head *cur, *before, *after; |
3509 | ext4_io_end_t *io, *io0, *io1; | 3595 | ext4_io_end_t *io, *io0, *io1; |
3596 | unsigned long flags; | ||
3510 | 3597 | ||
3511 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3598 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ |
3512 | ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); | 3599 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); |
3513 | return; | 3600 | return; |
3514 | } | 3601 | } |
3515 | 3602 | ||
3516 | ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); | 3603 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); |
3517 | list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ | 3604 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
3605 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3518 | cur = &io->list; | 3606 | cur = &io->list; |
3519 | before = cur->prev; | 3607 | before = cur->prev; |
3520 | io0 = container_of(before, ext4_io_end_t, list); | 3608 | io0 = container_of(before, ext4_io_end_t, list); |
@@ -3524,32 +3612,31 @@ static void dump_aio_dio_list(struct inode * inode) | |||
3524 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | 3612 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", |
3525 | io, inode->i_ino, io0, io1); | 3613 | io, inode->i_ino, io0, io1); |
3526 | } | 3614 | } |
3615 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3527 | #endif | 3616 | #endif |
3528 | } | 3617 | } |
3529 | 3618 | ||
3530 | /* | 3619 | /* |
3531 | * check a range of space and convert unwritten extents to written. | 3620 | * check a range of space and convert unwritten extents to written. |
3532 | */ | 3621 | */ |
3533 | static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | 3622 | static int ext4_end_io_nolock(ext4_io_end_t *io) |
3534 | { | 3623 | { |
3535 | struct inode *inode = io->inode; | 3624 | struct inode *inode = io->inode; |
3536 | loff_t offset = io->offset; | 3625 | loff_t offset = io->offset; |
3537 | size_t size = io->size; | 3626 | ssize_t size = io->size; |
3538 | int ret = 0; | 3627 | int ret = 0; |
3539 | 3628 | ||
3540 | ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," | 3629 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
3541 | "list->prev 0x%p\n", | 3630 | "list->prev 0x%p\n", |
3542 | io, inode->i_ino, io->list.next, io->list.prev); | 3631 | io, inode->i_ino, io->list.next, io->list.prev); |
3543 | 3632 | ||
3544 | if (list_empty(&io->list)) | 3633 | if (list_empty(&io->list)) |
3545 | return ret; | 3634 | return ret; |
3546 | 3635 | ||
3547 | if (io->flag != DIO_AIO_UNWRITTEN) | 3636 | if (io->flag != EXT4_IO_UNWRITTEN) |
3548 | return ret; | 3637 | return ret; |
3549 | 3638 | ||
3550 | if (offset + size <= i_size_read(inode)) | 3639 | ret = ext4_convert_unwritten_extents(inode, offset, size); |
3551 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3552 | |||
3553 | if (ret < 0) { | 3640 | if (ret < 0) { |
3554 | printk(KERN_EMERG "%s: failed to convert unwritten" | 3641 | printk(KERN_EMERG "%s: failed to convert unwritten" |
3555 | "extents to written extents, error is %d" | 3642 | "extents to written extents, error is %d" |
@@ -3562,50 +3649,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | |||
3562 | io->flag = 0; | 3649 | io->flag = 0; |
3563 | return ret; | 3650 | return ret; |
3564 | } | 3651 | } |
3652 | |||
3565 | /* | 3653 | /* |
3566 | * work on completed aio dio IO, to convert unwritten extents to extents | 3654 | * work on completed aio dio IO, to convert unwritten extents to extents |
3567 | */ | 3655 | */ |
3568 | static void ext4_end_aio_dio_work(struct work_struct *work) | 3656 | static void ext4_end_io_work(struct work_struct *work) |
3569 | { | 3657 | { |
3570 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | 3658 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); |
3571 | struct inode *inode = io->inode; | 3659 | struct inode *inode = io->inode; |
3572 | int ret = 0; | 3660 | struct ext4_inode_info *ei = EXT4_I(inode); |
3661 | unsigned long flags; | ||
3662 | int ret; | ||
3573 | 3663 | ||
3574 | mutex_lock(&inode->i_mutex); | 3664 | mutex_lock(&inode->i_mutex); |
3575 | ret = ext4_end_aio_dio_nolock(io); | 3665 | ret = ext4_end_io_nolock(io); |
3576 | if (ret >= 0) { | 3666 | if (ret < 0) { |
3577 | if (!list_empty(&io->list)) | 3667 | mutex_unlock(&inode->i_mutex); |
3578 | list_del_init(&io->list); | 3668 | return; |
3579 | ext4_free_io_end(io); | ||
3580 | } | 3669 | } |
3670 | |||
3671 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3672 | if (!list_empty(&io->list)) | ||
3673 | list_del_init(&io->list); | ||
3674 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3581 | mutex_unlock(&inode->i_mutex); | 3675 | mutex_unlock(&inode->i_mutex); |
3676 | ext4_free_io_end(io); | ||
3582 | } | 3677 | } |
3678 | |||
3583 | /* | 3679 | /* |
3584 | * This function is called from ext4_sync_file(). | 3680 | * This function is called from ext4_sync_file(). |
3585 | * | 3681 | * |
3586 | * When AIO DIO IO is completed, the work to convert unwritten | 3682 | * When IO is completed, the work to convert unwritten extents to |
3587 | * extents to written is queued on workqueue but may not get immediately | 3683 | * written is queued on workqueue but may not get immediately |
3588 | * scheduled. When fsync is called, we need to ensure the | 3684 | * scheduled. When fsync is called, we need to ensure the |
3589 | * conversion is complete before fsync returns. | 3685 | * conversion is complete before fsync returns. |
3590 | * The inode keeps track of a list of completed AIO from DIO path | 3686 | * The inode keeps track of a list of pending/completed IO that |
3591 | * that might needs to do the conversion. This function walks through | 3687 | * might needs to do the conversion. This function walks through |
3592 | * the list and convert the related unwritten extents to written. | 3688 | * the list and convert the related unwritten extents for completed IO |
3689 | * to written. | ||
3690 | * The function return the number of pending IOs on success. | ||
3593 | */ | 3691 | */ |
3594 | int flush_aio_dio_completed_IO(struct inode *inode) | 3692 | int flush_completed_IO(struct inode *inode) |
3595 | { | 3693 | { |
3596 | ext4_io_end_t *io; | 3694 | ext4_io_end_t *io; |
3695 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3696 | unsigned long flags; | ||
3597 | int ret = 0; | 3697 | int ret = 0; |
3598 | int ret2 = 0; | 3698 | int ret2 = 0; |
3599 | 3699 | ||
3600 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) | 3700 | if (list_empty(&ei->i_completed_io_list)) |
3601 | return ret; | 3701 | return ret; |
3602 | 3702 | ||
3603 | dump_aio_dio_list(inode); | 3703 | dump_completed_IO(inode); |
3604 | while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3704 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3605 | io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, | 3705 | while (!list_empty(&ei->i_completed_io_list)){ |
3706 | io = list_entry(ei->i_completed_io_list.next, | ||
3606 | ext4_io_end_t, list); | 3707 | ext4_io_end_t, list); |
3607 | /* | 3708 | /* |
3608 | * Calling ext4_end_aio_dio_nolock() to convert completed | 3709 | * Calling ext4_end_io_nolock() to convert completed |
3609 | * IO to written. | 3710 | * IO to written. |
3610 | * | 3711 | * |
3611 | * When ext4_sync_file() is called, run_queue() may already | 3712 | * When ext4_sync_file() is called, run_queue() may already |
@@ -3618,20 +3719,23 @@ int flush_aio_dio_completed_IO(struct inode *inode) | |||
3618 | * avoid double converting from both fsync and background work | 3719 | * avoid double converting from both fsync and background work |
3619 | * queue work. | 3720 | * queue work. |
3620 | */ | 3721 | */ |
3621 | ret = ext4_end_aio_dio_nolock(io); | 3722 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3723 | ret = ext4_end_io_nolock(io); | ||
3724 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3622 | if (ret < 0) | 3725 | if (ret < 0) |
3623 | ret2 = ret; | 3726 | ret2 = ret; |
3624 | else | 3727 | else |
3625 | list_del_init(&io->list); | 3728 | list_del_init(&io->list); |
3626 | } | 3729 | } |
3730 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3627 | return (ret2 < 0) ? ret2 : 0; | 3731 | return (ret2 < 0) ? ret2 : 0; |
3628 | } | 3732 | } |
3629 | 3733 | ||
3630 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | 3734 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) |
3631 | { | 3735 | { |
3632 | ext4_io_end_t *io = NULL; | 3736 | ext4_io_end_t *io = NULL; |
3633 | 3737 | ||
3634 | io = kmalloc(sizeof(*io), GFP_NOFS); | 3738 | io = kmalloc(sizeof(*io), flags); |
3635 | 3739 | ||
3636 | if (io) { | 3740 | if (io) { |
3637 | igrab(inode); | 3741 | igrab(inode); |
@@ -3639,8 +3743,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | |||
3639 | io->flag = 0; | 3743 | io->flag = 0; |
3640 | io->offset = 0; | 3744 | io->offset = 0; |
3641 | io->size = 0; | 3745 | io->size = 0; |
3642 | io->error = 0; | 3746 | io->page = NULL; |
3643 | INIT_WORK(&io->work, ext4_end_aio_dio_work); | 3747 | INIT_WORK(&io->work, ext4_end_io_work); |
3644 | INIT_LIST_HEAD(&io->list); | 3748 | INIT_LIST_HEAD(&io->list); |
3645 | } | 3749 | } |
3646 | 3750 | ||
@@ -3652,6 +3756,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3652 | { | 3756 | { |
3653 | ext4_io_end_t *io_end = iocb->private; | 3757 | ext4_io_end_t *io_end = iocb->private; |
3654 | struct workqueue_struct *wq; | 3758 | struct workqueue_struct *wq; |
3759 | unsigned long flags; | ||
3760 | struct ext4_inode_info *ei; | ||
3655 | 3761 | ||
3656 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3762 | /* if not async direct IO or dio with 0 bytes write, just return */ |
3657 | if (!io_end || !size) | 3763 | if (!io_end || !size) |
@@ -3663,7 +3769,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3663 | size); | 3769 | size); |
3664 | 3770 | ||
3665 | /* if not aio dio with unwritten extents, just free io and return */ | 3771 | /* if not aio dio with unwritten extents, just free io and return */ |
3666 | if (io_end->flag != DIO_AIO_UNWRITTEN){ | 3772 | if (io_end->flag != EXT4_IO_UNWRITTEN){ |
3667 | ext4_free_io_end(io_end); | 3773 | ext4_free_io_end(io_end); |
3668 | iocb->private = NULL; | 3774 | iocb->private = NULL; |
3669 | return; | 3775 | return; |
@@ -3671,16 +3777,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3671 | 3777 | ||
3672 | io_end->offset = offset; | 3778 | io_end->offset = offset; |
3673 | io_end->size = size; | 3779 | io_end->size = size; |
3780 | io_end->flag = EXT4_IO_UNWRITTEN; | ||
3674 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3781 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3675 | 3782 | ||
3676 | /* queue the work to convert unwritten extents to written */ | 3783 | /* queue the work to convert unwritten extents to written */ |
3677 | queue_work(wq, &io_end->work); | 3784 | queue_work(wq, &io_end->work); |
3678 | 3785 | ||
3679 | /* Add the io_end to per-inode completed aio dio list*/ | 3786 | /* Add the io_end to per-inode completed aio dio list*/ |
3680 | list_add_tail(&io_end->list, | 3787 | ei = EXT4_I(io_end->inode); |
3681 | &EXT4_I(io_end->inode)->i_aio_dio_complete_list); | 3788 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3789 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | ||
3790 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3682 | iocb->private = NULL; | 3791 | iocb->private = NULL; |
3683 | } | 3792 | } |
3793 | |||
3794 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | ||
3795 | { | ||
3796 | ext4_io_end_t *io_end = bh->b_private; | ||
3797 | struct workqueue_struct *wq; | ||
3798 | struct inode *inode; | ||
3799 | unsigned long flags; | ||
3800 | |||
3801 | if (!test_clear_buffer_uninit(bh) || !io_end) | ||
3802 | goto out; | ||
3803 | |||
3804 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { | ||
3805 | printk("sb umounted, discard end_io request for inode %lu\n", | ||
3806 | io_end->inode->i_ino); | ||
3807 | ext4_free_io_end(io_end); | ||
3808 | goto out; | ||
3809 | } | ||
3810 | |||
3811 | io_end->flag = EXT4_IO_UNWRITTEN; | ||
3812 | inode = io_end->inode; | ||
3813 | |||
3814 | /* Add the io_end to per-inode completed io list*/ | ||
3815 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3816 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
3817 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3818 | |||
3819 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
3820 | /* queue the work to convert unwritten extents to written */ | ||
3821 | queue_work(wq, &io_end->work); | ||
3822 | out: | ||
3823 | bh->b_private = NULL; | ||
3824 | bh->b_end_io = NULL; | ||
3825 | clear_buffer_uninit(bh); | ||
3826 | end_buffer_async_write(bh, uptodate); | ||
3827 | } | ||
3828 | |||
3829 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) | ||
3830 | { | ||
3831 | ext4_io_end_t *io_end; | ||
3832 | struct page *page = bh->b_page; | ||
3833 | loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; | ||
3834 | size_t size = bh->b_size; | ||
3835 | |||
3836 | retry: | ||
3837 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); | ||
3838 | if (!io_end) { | ||
3839 | if (printk_ratelimit()) | ||
3840 | printk(KERN_WARNING "%s: allocation fail\n", __func__); | ||
3841 | schedule(); | ||
3842 | goto retry; | ||
3843 | } | ||
3844 | io_end->offset = offset; | ||
3845 | io_end->size = size; | ||
3846 | /* | ||
3847 | * We need to hold a reference to the page to make sure it | ||
3848 | * doesn't get evicted before ext4_end_io_work() has a chance | ||
3849 | * to convert the extent from written to unwritten. | ||
3850 | */ | ||
3851 | io_end->page = page; | ||
3852 | get_page(io_end->page); | ||
3853 | |||
3854 | bh->b_private = io_end; | ||
3855 | bh->b_end_io = ext4_end_io_buffer_write; | ||
3856 | return 0; | ||
3857 | } | ||
3858 | |||
3684 | /* | 3859 | /* |
3685 | * For ext4 extent files, ext4 will do direct-io write to holes, | 3860 | * For ext4 extent files, ext4 will do direct-io write to holes, |
3686 | * preallocated extents, and those write extend the file, no need to | 3861 | * preallocated extents, and those write extend the file, no need to |
@@ -3734,7 +3909,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3734 | iocb->private = NULL; | 3909 | iocb->private = NULL; |
3735 | EXT4_I(inode)->cur_aio_dio = NULL; | 3910 | EXT4_I(inode)->cur_aio_dio = NULL; |
3736 | if (!is_sync_kiocb(iocb)) { | 3911 | if (!is_sync_kiocb(iocb)) { |
3737 | iocb->private = ext4_init_io_end(inode); | 3912 | iocb->private = ext4_init_io_end(inode, GFP_NOFS); |
3738 | if (!iocb->private) | 3913 | if (!iocb->private) |
3739 | return -ENOMEM; | 3914 | return -ENOMEM; |
3740 | /* | 3915 | /* |
@@ -3750,7 +3925,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3750 | ret = blockdev_direct_IO(rw, iocb, inode, | 3925 | ret = blockdev_direct_IO(rw, iocb, inode, |
3751 | inode->i_sb->s_bdev, iov, | 3926 | inode->i_sb->s_bdev, iov, |
3752 | offset, nr_segs, | 3927 | offset, nr_segs, |
3753 | ext4_get_block_dio_write, | 3928 | ext4_get_block_write, |
3754 | ext4_end_io_dio); | 3929 | ext4_end_io_dio); |
3755 | if (iocb->private) | 3930 | if (iocb->private) |
3756 | EXT4_I(inode)->cur_aio_dio = NULL; | 3931 | EXT4_I(inode)->cur_aio_dio = NULL; |
@@ -3771,8 +3946,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3771 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3946 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { |
3772 | ext4_free_io_end(iocb->private); | 3947 | ext4_free_io_end(iocb->private); |
3773 | iocb->private = NULL; | 3948 | iocb->private = NULL; |
3774 | } else if (ret > 0 && (EXT4_I(inode)->i_state & | 3949 | } else if (ret > 0 && ext4_test_inode_state(inode, |
3775 | EXT4_STATE_DIO_UNWRITTEN)) { | 3950 | EXT4_STATE_DIO_UNWRITTEN)) { |
3776 | int err; | 3951 | int err; |
3777 | /* | 3952 | /* |
3778 | * for non AIO case, since the IO is already | 3953 | * for non AIO case, since the IO is already |
@@ -3782,7 +3957,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3782 | offset, ret); | 3957 | offset, ret); |
3783 | if (err < 0) | 3958 | if (err < 0) |
3784 | ret = err; | 3959 | ret = err; |
3785 | EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; | 3960 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3786 | } | 3961 | } |
3787 | return ret; | 3962 | return ret; |
3788 | } | 3963 | } |
@@ -4064,7 +4239,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
4064 | int k, err; | 4239 | int k, err; |
4065 | 4240 | ||
4066 | *top = 0; | 4241 | *top = 0; |
4067 | /* Make k index the deepest non-null offest + 1 */ | 4242 | /* Make k index the deepest non-null offset + 1 */ |
4068 | for (k = depth; k > 1 && !offsets[k-1]; k--) | 4243 | for (k = depth; k > 1 && !offsets[k-1]; k--) |
4069 | ; | 4244 | ; |
4070 | partial = ext4_get_branch(inode, k, offsets, chain, &err); | 4245 | partial = ext4_get_branch(inode, k, offsets, chain, &err); |
@@ -4113,13 +4288,27 @@ no_top: | |||
4113 | * We release `count' blocks on disk, but (last - first) may be greater | 4288 | * We release `count' blocks on disk, but (last - first) may be greater |
4114 | * than `count' because there can be holes in there. | 4289 | * than `count' because there can be holes in there. |
4115 | */ | 4290 | */ |
4116 | static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4291 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4117 | struct buffer_head *bh, | 4292 | struct buffer_head *bh, |
4118 | ext4_fsblk_t block_to_free, | 4293 | ext4_fsblk_t block_to_free, |
4119 | unsigned long count, __le32 *first, | 4294 | unsigned long count, __le32 *first, |
4120 | __le32 *last) | 4295 | __le32 *last) |
4121 | { | 4296 | { |
4122 | __le32 *p; | 4297 | __le32 *p; |
4298 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | ||
4299 | |||
4300 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
4301 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
4302 | |||
4303 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | ||
4304 | count)) { | ||
4305 | ext4_error(inode->i_sb, "inode #%lu: " | ||
4306 | "attempt to clear blocks %llu len %lu, invalid", | ||
4307 | inode->i_ino, (unsigned long long) block_to_free, | ||
4308 | count); | ||
4309 | return 1; | ||
4310 | } | ||
4311 | |||
4123 | if (try_to_extend_transaction(handle, inode)) { | 4312 | if (try_to_extend_transaction(handle, inode)) { |
4124 | if (bh) { | 4313 | if (bh) { |
4125 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4314 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
@@ -4134,27 +4323,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4134 | } | 4323 | } |
4135 | } | 4324 | } |
4136 | 4325 | ||
4137 | /* | 4326 | for (p = first; p < last; p++) |
4138 | * Any buffers which are on the journal will be in memory. We | 4327 | *p = 0; |
4139 | * find them on the hash table so jbd2_journal_revoke() will | ||
4140 | * run jbd2_journal_forget() on them. We've already detached | ||
4141 | * each block from the file, so bforget() in | ||
4142 | * jbd2_journal_forget() should be safe. | ||
4143 | * | ||
4144 | * AKPM: turn on bforget in jbd2_journal_forget()!!! | ||
4145 | */ | ||
4146 | for (p = first; p < last; p++) { | ||
4147 | u32 nr = le32_to_cpu(*p); | ||
4148 | if (nr) { | ||
4149 | struct buffer_head *tbh; | ||
4150 | |||
4151 | *p = 0; | ||
4152 | tbh = sb_find_get_block(inode->i_sb, nr); | ||
4153 | ext4_forget(handle, 0, inode, tbh, nr); | ||
4154 | } | ||
4155 | } | ||
4156 | 4328 | ||
4157 | ext4_free_blocks(handle, inode, block_to_free, count, 0); | 4329 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); |
4330 | return 0; | ||
4158 | } | 4331 | } |
4159 | 4332 | ||
4160 | /** | 4333 | /** |
@@ -4210,9 +4383,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4210 | } else if (nr == block_to_free + count) { | 4383 | } else if (nr == block_to_free + count) { |
4211 | count++; | 4384 | count++; |
4212 | } else { | 4385 | } else { |
4213 | ext4_clear_blocks(handle, inode, this_bh, | 4386 | if (ext4_clear_blocks(handle, inode, this_bh, |
4214 | block_to_free, | 4387 | block_to_free, count, |
4215 | count, block_to_free_p, p); | 4388 | block_to_free_p, p)) |
4389 | break; | ||
4216 | block_to_free = nr; | 4390 | block_to_free = nr; |
4217 | block_to_free_p = p; | 4391 | block_to_free_p = p; |
4218 | count = 1; | 4392 | count = 1; |
@@ -4236,7 +4410,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4236 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 4410 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) |
4237 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 4411 | ext4_handle_dirty_metadata(handle, inode, this_bh); |
4238 | else | 4412 | else |
4239 | ext4_error(inode->i_sb, __func__, | 4413 | ext4_error(inode->i_sb, |
4240 | "circular indirect block detected, " | 4414 | "circular indirect block detected, " |
4241 | "inode=%lu, block=%llu", | 4415 | "inode=%lu, block=%llu", |
4242 | inode->i_ino, | 4416 | inode->i_ino, |
@@ -4276,6 +4450,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4276 | if (!nr) | 4450 | if (!nr) |
4277 | continue; /* A hole */ | 4451 | continue; /* A hole */ |
4278 | 4452 | ||
4453 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
4454 | nr, 1)) { | ||
4455 | ext4_error(inode->i_sb, | ||
4456 | "indirect mapped block in inode " | ||
4457 | "#%lu invalid (level %d, blk #%lu)", | ||
4458 | inode->i_ino, depth, | ||
4459 | (unsigned long) nr); | ||
4460 | break; | ||
4461 | } | ||
4462 | |||
4279 | /* Go read the buffer for the next level down */ | 4463 | /* Go read the buffer for the next level down */ |
4280 | bh = sb_bread(inode->i_sb, nr); | 4464 | bh = sb_bread(inode->i_sb, nr); |
4281 | 4465 | ||
@@ -4284,7 +4468,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4284 | * (should be rare). | 4468 | * (should be rare). |
4285 | */ | 4469 | */ |
4286 | if (!bh) { | 4470 | if (!bh) { |
4287 | ext4_error(inode->i_sb, "ext4_free_branches", | 4471 | ext4_error(inode->i_sb, |
4288 | "Read failure, inode=%lu, block=%llu", | 4472 | "Read failure, inode=%lu, block=%llu", |
4289 | inode->i_ino, nr); | 4473 | inode->i_ino, nr); |
4290 | continue; | 4474 | continue; |
@@ -4342,7 +4526,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4342 | blocks_for_truncate(inode)); | 4526 | blocks_for_truncate(inode)); |
4343 | } | 4527 | } |
4344 | 4528 | ||
4345 | ext4_free_blocks(handle, inode, nr, 1, 1); | 4529 | ext4_free_blocks(handle, inode, 0, nr, 1, |
4530 | EXT4_FREE_BLOCKS_METADATA); | ||
4346 | 4531 | ||
4347 | if (parent_bh) { | 4532 | if (parent_bh) { |
4348 | /* | 4533 | /* |
@@ -4427,8 +4612,10 @@ void ext4_truncate(struct inode *inode) | |||
4427 | if (!ext4_can_truncate(inode)) | 4612 | if (!ext4_can_truncate(inode)) |
4428 | return; | 4613 | return; |
4429 | 4614 | ||
4615 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | ||
4616 | |||
4430 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4617 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
4431 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 4618 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
4432 | 4619 | ||
4433 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4620 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
4434 | ext4_ext_truncate(inode); | 4621 | ext4_ext_truncate(inode); |
@@ -4598,9 +4785,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4598 | 4785 | ||
4599 | bh = sb_getblk(sb, block); | 4786 | bh = sb_getblk(sb, block); |
4600 | if (!bh) { | 4787 | if (!bh) { |
4601 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " | 4788 | ext4_error(sb, "unable to read inode block - " |
4602 | "inode block - inode=%lu, block=%llu", | 4789 | "inode=%lu, block=%llu", inode->i_ino, block); |
4603 | inode->i_ino, block); | ||
4604 | return -EIO; | 4790 | return -EIO; |
4605 | } | 4791 | } |
4606 | if (!buffer_uptodate(bh)) { | 4792 | if (!buffer_uptodate(bh)) { |
@@ -4698,9 +4884,8 @@ make_io: | |||
4698 | submit_bh(READ_META, bh); | 4884 | submit_bh(READ_META, bh); |
4699 | wait_on_buffer(bh); | 4885 | wait_on_buffer(bh); |
4700 | if (!buffer_uptodate(bh)) { | 4886 | if (!buffer_uptodate(bh)) { |
4701 | ext4_error(sb, __func__, | 4887 | ext4_error(sb, "unable to read inode block - inode=%lu," |
4702 | "unable to read inode block - inode=%lu, " | 4888 | " block=%llu", inode->i_ino, block); |
4703 | "block=%llu", inode->i_ino, block); | ||
4704 | brelse(bh); | 4889 | brelse(bh); |
4705 | return -EIO; | 4890 | return -EIO; |
4706 | } | 4891 | } |
@@ -4714,7 +4899,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) | |||
4714 | { | 4899 | { |
4715 | /* We have all inode data except xattrs in memory here. */ | 4900 | /* We have all inode data except xattrs in memory here. */ |
4716 | return __ext4_get_inode_loc(inode, iloc, | 4901 | return __ext4_get_inode_loc(inode, iloc, |
4717 | !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); | 4902 | !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); |
4718 | } | 4903 | } |
4719 | 4904 | ||
4720 | void ext4_set_inode_flags(struct inode *inode) | 4905 | void ext4_set_inode_flags(struct inode *inode) |
@@ -4781,8 +4966,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4781 | struct ext4_iloc iloc; | 4966 | struct ext4_iloc iloc; |
4782 | struct ext4_inode *raw_inode; | 4967 | struct ext4_inode *raw_inode; |
4783 | struct ext4_inode_info *ei; | 4968 | struct ext4_inode_info *ei; |
4784 | struct buffer_head *bh; | ||
4785 | struct inode *inode; | 4969 | struct inode *inode; |
4970 | journal_t *journal = EXT4_SB(sb)->s_journal; | ||
4786 | long ret; | 4971 | long ret; |
4787 | int block; | 4972 | int block; |
4788 | 4973 | ||
@@ -4793,11 +4978,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4793 | return inode; | 4978 | return inode; |
4794 | 4979 | ||
4795 | ei = EXT4_I(inode); | 4980 | ei = EXT4_I(inode); |
4981 | iloc.bh = 0; | ||
4796 | 4982 | ||
4797 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4983 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
4798 | if (ret < 0) | 4984 | if (ret < 0) |
4799 | goto bad_inode; | 4985 | goto bad_inode; |
4800 | bh = iloc.bh; | ||
4801 | raw_inode = ext4_raw_inode(&iloc); | 4986 | raw_inode = ext4_raw_inode(&iloc); |
4802 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 4987 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
4803 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 4988 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
@@ -4808,7 +4993,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4808 | } | 4993 | } |
4809 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 4994 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
4810 | 4995 | ||
4811 | ei->i_state = 0; | 4996 | ei->i_state_flags = 0; |
4812 | ei->i_dir_start_lookup = 0; | 4997 | ei->i_dir_start_lookup = 0; |
4813 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 4998 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
4814 | /* We now have enough fields to check if the inode was active or not. | 4999 | /* We now have enough fields to check if the inode was active or not. |
@@ -4820,7 +5005,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4820 | if (inode->i_mode == 0 || | 5005 | if (inode->i_mode == 0 || |
4821 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { | 5006 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { |
4822 | /* this inode is deleted */ | 5007 | /* this inode is deleted */ |
4823 | brelse(bh); | ||
4824 | ret = -ESTALE; | 5008 | ret = -ESTALE; |
4825 | goto bad_inode; | 5009 | goto bad_inode; |
4826 | } | 5010 | } |
@@ -4837,6 +5021,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4837 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; | 5021 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; |
4838 | inode->i_size = ext4_isize(raw_inode); | 5022 | inode->i_size = ext4_isize(raw_inode); |
4839 | ei->i_disksize = inode->i_size; | 5023 | ei->i_disksize = inode->i_size; |
5024 | #ifdef CONFIG_QUOTA | ||
5025 | ei->i_reserved_quota = 0; | ||
5026 | #endif | ||
4840 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 5027 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
4841 | ei->i_block_group = iloc.block_group; | 5028 | ei->i_block_group = iloc.block_group; |
4842 | ei->i_last_alloc_group = ~0; | 5029 | ei->i_last_alloc_group = ~0; |
@@ -4848,11 +5035,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4848 | ei->i_data[block] = raw_inode->i_block[block]; | 5035 | ei->i_data[block] = raw_inode->i_block[block]; |
4849 | INIT_LIST_HEAD(&ei->i_orphan); | 5036 | INIT_LIST_HEAD(&ei->i_orphan); |
4850 | 5037 | ||
5038 | /* | ||
5039 | * Set transaction id's of transactions that have to be committed | ||
5040 | * to finish f[data]sync. We set them to currently running transaction | ||
5041 | * as we cannot be sure that the inode or some of its metadata isn't | ||
5042 | * part of the transaction - the inode could have been reclaimed and | ||
5043 | * now it is reread from disk. | ||
5044 | */ | ||
5045 | if (journal) { | ||
5046 | transaction_t *transaction; | ||
5047 | tid_t tid; | ||
5048 | |||
5049 | spin_lock(&journal->j_state_lock); | ||
5050 | if (journal->j_running_transaction) | ||
5051 | transaction = journal->j_running_transaction; | ||
5052 | else | ||
5053 | transaction = journal->j_committing_transaction; | ||
5054 | if (transaction) | ||
5055 | tid = transaction->t_tid; | ||
5056 | else | ||
5057 | tid = journal->j_commit_sequence; | ||
5058 | spin_unlock(&journal->j_state_lock); | ||
5059 | ei->i_sync_tid = tid; | ||
5060 | ei->i_datasync_tid = tid; | ||
5061 | } | ||
5062 | |||
4851 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 5063 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
4852 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 5064 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
4853 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 5065 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
4854 | EXT4_INODE_SIZE(inode->i_sb)) { | 5066 | EXT4_INODE_SIZE(inode->i_sb)) { |
4855 | brelse(bh); | ||
4856 | ret = -EIO; | 5067 | ret = -EIO; |
4857 | goto bad_inode; | 5068 | goto bad_inode; |
4858 | } | 5069 | } |
@@ -4865,7 +5076,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4865 | EXT4_GOOD_OLD_INODE_SIZE + | 5076 | EXT4_GOOD_OLD_INODE_SIZE + |
4866 | ei->i_extra_isize; | 5077 | ei->i_extra_isize; |
4867 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | 5078 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) |
4868 | ei->i_state |= EXT4_STATE_XATTR; | 5079 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
4869 | } | 5080 | } |
4870 | } else | 5081 | } else |
4871 | ei->i_extra_isize = 0; | 5082 | ei->i_extra_isize = 0; |
@@ -4884,12 +5095,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4884 | 5095 | ||
4885 | ret = 0; | 5096 | ret = 0; |
4886 | if (ei->i_file_acl && | 5097 | if (ei->i_file_acl && |
4887 | ((ei->i_file_acl < | 5098 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { |
4888 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + | 5099 | ext4_error(sb, "bad extended attribute block %llu inode #%lu", |
4889 | EXT4_SB(sb)->s_gdb_count)) || | ||
4890 | (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { | ||
4891 | ext4_error(sb, __func__, | ||
4892 | "bad extended attribute block %llu in inode #%lu", | ||
4893 | ei->i_file_acl, inode->i_ino); | 5100 | ei->i_file_acl, inode->i_ino); |
4894 | ret = -EIO; | 5101 | ret = -EIO; |
4895 | goto bad_inode; | 5102 | goto bad_inode; |
@@ -4905,10 +5112,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4905 | /* Validate block references which are part of inode */ | 5112 | /* Validate block references which are part of inode */ |
4906 | ret = ext4_check_inode_blockref(inode); | 5113 | ret = ext4_check_inode_blockref(inode); |
4907 | } | 5114 | } |
4908 | if (ret) { | 5115 | if (ret) |
4909 | brelse(bh); | ||
4910 | goto bad_inode; | 5116 | goto bad_inode; |
4911 | } | ||
4912 | 5117 | ||
4913 | if (S_ISREG(inode->i_mode)) { | 5118 | if (S_ISREG(inode->i_mode)) { |
4914 | inode->i_op = &ext4_file_inode_operations; | 5119 | inode->i_op = &ext4_file_inode_operations; |
@@ -4936,10 +5141,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4936 | init_special_inode(inode, inode->i_mode, | 5141 | init_special_inode(inode, inode->i_mode, |
4937 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 5142 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
4938 | } else { | 5143 | } else { |
4939 | brelse(bh); | ||
4940 | ret = -EIO; | 5144 | ret = -EIO; |
4941 | ext4_error(inode->i_sb, __func__, | 5145 | ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", |
4942 | "bogus i_mode (%o) for inode=%lu", | ||
4943 | inode->i_mode, inode->i_ino); | 5146 | inode->i_mode, inode->i_ino); |
4944 | goto bad_inode; | 5147 | goto bad_inode; |
4945 | } | 5148 | } |
@@ -4949,6 +5152,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4949 | return inode; | 5152 | return inode; |
4950 | 5153 | ||
4951 | bad_inode: | 5154 | bad_inode: |
5155 | brelse(iloc.bh); | ||
4952 | iget_failed(inode); | 5156 | iget_failed(inode); |
4953 | return ERR_PTR(ret); | 5157 | return ERR_PTR(ret); |
4954 | } | 5158 | } |
@@ -5010,7 +5214,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5010 | 5214 | ||
5011 | /* For fields not not tracking in the in-memory inode, | 5215 | /* For fields not not tracking in the in-memory inode, |
5012 | * initialise them to zero for new inodes. */ | 5216 | * initialise them to zero for new inodes. */ |
5013 | if (ei->i_state & EXT4_STATE_NEW) | 5217 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) |
5014 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); | 5218 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); |
5015 | 5219 | ||
5016 | ext4_get_inode_flags(ei); | 5220 | ext4_get_inode_flags(ei); |
@@ -5074,7 +5278,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5074 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 5278 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
5075 | sb->s_dirt = 1; | 5279 | sb->s_dirt = 1; |
5076 | ext4_handle_sync(handle); | 5280 | ext4_handle_sync(handle); |
5077 | err = ext4_handle_dirty_metadata(handle, inode, | 5281 | err = ext4_handle_dirty_metadata(handle, NULL, |
5078 | EXT4_SB(sb)->s_sbh); | 5282 | EXT4_SB(sb)->s_sbh); |
5079 | } | 5283 | } |
5080 | } | 5284 | } |
@@ -5103,11 +5307,12 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5103 | } | 5307 | } |
5104 | 5308 | ||
5105 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 5309 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
5106 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 5310 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); |
5107 | if (!err) | 5311 | if (!err) |
5108 | err = rc; | 5312 | err = rc; |
5109 | ei->i_state &= ~EXT4_STATE_NEW; | 5313 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); |
5110 | 5314 | ||
5315 | ext4_update_inode_fsync_trans(handle, inode, 0); | ||
5111 | out_brelse: | 5316 | out_brelse: |
5112 | brelse(bh); | 5317 | brelse(bh); |
5113 | ext4_std_error(inode->i_sb, err); | 5318 | ext4_std_error(inode->i_sb, err); |
@@ -5149,7 +5354,7 @@ out_brelse: | |||
5149 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 5354 | * `stuff()' is running, and the new i_size will be lost. Plus the inode |
5150 | * will no longer be on the superblock's dirty inode list. | 5355 | * will no longer be on the superblock's dirty inode list. |
5151 | */ | 5356 | */ |
5152 | int ext4_write_inode(struct inode *inode, int wait) | 5357 | int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) |
5153 | { | 5358 | { |
5154 | int err; | 5359 | int err; |
5155 | 5360 | ||
@@ -5163,7 +5368,7 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
5163 | return -EIO; | 5368 | return -EIO; |
5164 | } | 5369 | } |
5165 | 5370 | ||
5166 | if (!wait) | 5371 | if (wbc->sync_mode != WB_SYNC_ALL) |
5167 | return 0; | 5372 | return 0; |
5168 | 5373 | ||
5169 | err = ext4_force_commit(inode->i_sb); | 5374 | err = ext4_force_commit(inode->i_sb); |
@@ -5173,13 +5378,11 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
5173 | err = ext4_get_inode_loc(inode, &iloc); | 5378 | err = ext4_get_inode_loc(inode, &iloc); |
5174 | if (err) | 5379 | if (err) |
5175 | return err; | 5380 | return err; |
5176 | if (wait) | 5381 | if (wbc->sync_mode == WB_SYNC_ALL) |
5177 | sync_dirty_buffer(iloc.bh); | 5382 | sync_dirty_buffer(iloc.bh); |
5178 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5383 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5179 | ext4_error(inode->i_sb, __func__, | 5384 | ext4_error(inode->i_sb, "IO error syncing inode, " |
5180 | "IO error syncing inode, " | 5385 | "inode=%lu, block=%llu", inode->i_ino, |
5181 | "inode=%lu, block=%llu", | ||
5182 | inode->i_ino, | ||
5183 | (unsigned long long)iloc.bh->b_blocknr); | 5386 | (unsigned long long)iloc.bh->b_blocknr); |
5184 | err = -EIO; | 5387 | err = -EIO; |
5185 | } | 5388 | } |
@@ -5221,19 +5424,21 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5221 | if (error) | 5424 | if (error) |
5222 | return error; | 5425 | return error; |
5223 | 5426 | ||
5427 | if (ia_valid & ATTR_SIZE) | ||
5428 | dquot_initialize(inode); | ||
5224 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 5429 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
5225 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 5430 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
5226 | handle_t *handle; | 5431 | handle_t *handle; |
5227 | 5432 | ||
5228 | /* (user+group)*(old+new) structure, inode write (sb, | 5433 | /* (user+group)*(old+new) structure, inode write (sb, |
5229 | * inode block, ? - but truncate inode update has it) */ | 5434 | * inode block, ? - but truncate inode update has it) */ |
5230 | handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ | 5435 | handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ |
5231 | EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); | 5436 | EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); |
5232 | if (IS_ERR(handle)) { | 5437 | if (IS_ERR(handle)) { |
5233 | error = PTR_ERR(handle); | 5438 | error = PTR_ERR(handle); |
5234 | goto err_out; | 5439 | goto err_out; |
5235 | } | 5440 | } |
5236 | error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; | 5441 | error = dquot_transfer(inode, attr); |
5237 | if (error) { | 5442 | if (error) { |
5238 | ext4_journal_stop(handle); | 5443 | ext4_journal_stop(handle); |
5239 | return error; | 5444 | return error; |
@@ -5260,7 +5465,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5260 | } | 5465 | } |
5261 | 5466 | ||
5262 | if (S_ISREG(inode->i_mode) && | 5467 | if (S_ISREG(inode->i_mode) && |
5263 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { | 5468 | attr->ia_valid & ATTR_SIZE && |
5469 | (attr->ia_size < inode->i_size || | ||
5470 | (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { | ||
5264 | handle_t *handle; | 5471 | handle_t *handle; |
5265 | 5472 | ||
5266 | handle = ext4_journal_start(inode, 3); | 5473 | handle = ext4_journal_start(inode, 3); |
@@ -5291,6 +5498,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5291 | goto err_out; | 5498 | goto err_out; |
5292 | } | 5499 | } |
5293 | } | 5500 | } |
5501 | /* ext4_truncate will clear the flag */ | ||
5502 | if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) | ||
5503 | ext4_truncate(inode); | ||
5294 | } | 5504 | } |
5295 | 5505 | ||
5296 | rc = inode_setattr(inode, attr); | 5506 | rc = inode_setattr(inode, attr); |
@@ -5376,7 +5586,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5376 | * worse case, the indexs blocks spread over different block groups | 5586 | * worse case, the indexs blocks spread over different block groups |
5377 | * | 5587 | * |
5378 | * If datablocks are discontiguous, they are possible to spread over | 5588 | * If datablocks are discontiguous, they are possible to spread over |
5379 | * different block groups too. If they are contiugous, with flexbg, | 5589 | * different block groups too. If they are contiuguous, with flexbg, |
5380 | * they could still across block group boundary. | 5590 | * they could still across block group boundary. |
5381 | * | 5591 | * |
5382 | * Also account for superblock, inode, quota and xattr blocks | 5592 | * Also account for superblock, inode, quota and xattr blocks |
@@ -5452,7 +5662,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
5452 | * Calculate the journal credits for a chunk of data modification. | 5662 | * Calculate the journal credits for a chunk of data modification. |
5453 | * | 5663 | * |
5454 | * This is called from DIO, fallocate or whoever calling | 5664 | * This is called from DIO, fallocate or whoever calling |
5455 | * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. | 5665 | * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. |
5456 | * | 5666 | * |
5457 | * journal buffers for data blocks are not included here, as DIO | 5667 | * journal buffers for data blocks are not included here, as DIO |
5458 | * and fallocate do no need to journal data buffers. | 5668 | * and fallocate do no need to journal data buffers. |
@@ -5529,8 +5739,8 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5529 | entry = IFIRST(header); | 5739 | entry = IFIRST(header); |
5530 | 5740 | ||
5531 | /* No extended attributes present */ | 5741 | /* No extended attributes present */ |
5532 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || | 5742 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |
5533 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { | 5743 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { |
5534 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, | 5744 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, |
5535 | new_extra_isize); | 5745 | new_extra_isize); |
5536 | EXT4_I(inode)->i_extra_isize = new_extra_isize; | 5746 | EXT4_I(inode)->i_extra_isize = new_extra_isize; |
@@ -5574,7 +5784,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5574 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5784 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5575 | if (ext4_handle_valid(handle) && | 5785 | if (ext4_handle_valid(handle) && |
5576 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5786 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
5577 | !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { | 5787 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { |
5578 | /* | 5788 | /* |
5579 | * We need extra buffer credits since we may write into EA block | 5789 | * We need extra buffer credits since we may write into EA block |
5580 | * with this same handle. If journal_extend fails, then it will | 5790 | * with this same handle. If journal_extend fails, then it will |
@@ -5588,10 +5798,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5588 | sbi->s_want_extra_isize, | 5798 | sbi->s_want_extra_isize, |
5589 | iloc, handle); | 5799 | iloc, handle); |
5590 | if (ret) { | 5800 | if (ret) { |
5591 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | 5801 | ext4_set_inode_state(inode, |
5802 | EXT4_STATE_NO_EXPAND); | ||
5592 | if (mnt_count != | 5803 | if (mnt_count != |
5593 | le16_to_cpu(sbi->s_es->s_mnt_count)) { | 5804 | le16_to_cpu(sbi->s_es->s_mnt_count)) { |
5594 | ext4_warning(inode->i_sb, __func__, | 5805 | ext4_warning(inode->i_sb, |
5595 | "Unable to expand inode %lu. Delete" | 5806 | "Unable to expand inode %lu. Delete" |
5596 | " some EAs or run e2fsck.", | 5807 | " some EAs or run e2fsck.", |
5597 | inode->i_ino); | 5808 | inode->i_ino); |
@@ -5613,7 +5824,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5613 | * i_size has been changed by generic_commit_write() and we thus need | 5824 | * i_size has been changed by generic_commit_write() and we thus need |
5614 | * to include the updated inode in the current transaction. | 5825 | * to include the updated inode in the current transaction. |
5615 | * | 5826 | * |
5616 | * Also, vfs_dq_alloc_block() will always dirty the inode when blocks | 5827 | * Also, dquot_alloc_block() will always dirty the inode when blocks |
5617 | * are allocated to the file. | 5828 | * are allocated to the file. |
5618 | * | 5829 | * |
5619 | * If the inode is marked synchronous, we don't honour that here - doing | 5830 | * If the inode is marked synchronous, we don't honour that here - doing |
@@ -5655,7 +5866,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode) | |||
5655 | err = jbd2_journal_get_write_access(handle, iloc.bh); | 5866 | err = jbd2_journal_get_write_access(handle, iloc.bh); |
5656 | if (!err) | 5867 | if (!err) |
5657 | err = ext4_handle_dirty_metadata(handle, | 5868 | err = ext4_handle_dirty_metadata(handle, |
5658 | inode, | 5869 | NULL, |
5659 | iloc.bh); | 5870 | iloc.bh); |
5660 | brelse(iloc.bh); | 5871 | brelse(iloc.bh); |
5661 | } | 5872 | } |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c1cdf613e725..016d0249294f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -92,6 +92,15 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
92 | flags &= ~EXT4_EXTENTS_FL; | 92 | flags &= ~EXT4_EXTENTS_FL; |
93 | } | 93 | } |
94 | 94 | ||
95 | if (flags & EXT4_EOFBLOCKS_FL) { | ||
96 | /* we don't support adding EOFBLOCKS flag */ | ||
97 | if (!(oldflags & EXT4_EOFBLOCKS_FL)) { | ||
98 | err = -EOPNOTSUPP; | ||
99 | goto flags_out; | ||
100 | } | ||
101 | } else if (oldflags & EXT4_EOFBLOCKS_FL) | ||
102 | ext4_truncate(inode); | ||
103 | |||
95 | handle = ext4_journal_start(inode, 1); | 104 | handle = ext4_journal_start(inode, 1); |
96 | if (IS_ERR(handle)) { | 105 | if (IS_ERR(handle)) { |
97 | err = PTR_ERR(handle); | 106 | err = PTR_ERR(handle); |
@@ -221,31 +230,39 @@ setversion_out: | |||
221 | struct file *donor_filp; | 230 | struct file *donor_filp; |
222 | int err; | 231 | int err; |
223 | 232 | ||
233 | if (!(filp->f_mode & FMODE_READ) || | ||
234 | !(filp->f_mode & FMODE_WRITE)) | ||
235 | return -EBADF; | ||
236 | |||
224 | if (copy_from_user(&me, | 237 | if (copy_from_user(&me, |
225 | (struct move_extent __user *)arg, sizeof(me))) | 238 | (struct move_extent __user *)arg, sizeof(me))) |
226 | return -EFAULT; | 239 | return -EFAULT; |
240 | me.moved_len = 0; | ||
227 | 241 | ||
228 | donor_filp = fget(me.donor_fd); | 242 | donor_filp = fget(me.donor_fd); |
229 | if (!donor_filp) | 243 | if (!donor_filp) |
230 | return -EBADF; | 244 | return -EBADF; |
231 | 245 | ||
232 | if (!capable(CAP_DAC_OVERRIDE)) { | 246 | if (!(donor_filp->f_mode & FMODE_WRITE)) { |
233 | if ((current->real_cred->fsuid != inode->i_uid) || | 247 | err = -EBADF; |
234 | !(inode->i_mode & S_IRUSR) || | 248 | goto mext_out; |
235 | !(donor_filp->f_dentry->d_inode->i_mode & | ||
236 | S_IRUSR)) { | ||
237 | fput(donor_filp); | ||
238 | return -EACCES; | ||
239 | } | ||
240 | } | 249 | } |
241 | 250 | ||
251 | err = mnt_want_write(filp->f_path.mnt); | ||
252 | if (err) | ||
253 | goto mext_out; | ||
254 | |||
242 | err = ext4_move_extents(filp, donor_filp, me.orig_start, | 255 | err = ext4_move_extents(filp, donor_filp, me.orig_start, |
243 | me.donor_start, me.len, &me.moved_len); | 256 | me.donor_start, me.len, &me.moved_len); |
244 | fput(donor_filp); | 257 | mnt_drop_write(filp->f_path.mnt); |
245 | 258 | if (me.moved_len > 0) | |
246 | if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) | 259 | file_remove_suid(donor_filp); |
247 | return -EFAULT; | ||
248 | 260 | ||
261 | if (copy_to_user((struct move_extent __user *)arg, | ||
262 | &me, sizeof(me))) | ||
263 | err = -EFAULT; | ||
264 | mext_out: | ||
265 | fput(donor_filp); | ||
249 | return err; | 266 | return err; |
250 | } | 267 | } |
251 | 268 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bba12824defa..bde9d0b170c2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #include "mballoc.h" | 24 | #include "mballoc.h" |
25 | #include <linux/debugfs.h> | 25 | #include <linux/debugfs.h> |
26 | #include <linux/slab.h> | ||
26 | #include <trace/events/ext4.h> | 27 | #include <trace/events/ext4.h> |
27 | 28 | ||
28 | /* | 29 | /* |
@@ -69,7 +70,7 @@ | |||
69 | * | 70 | * |
70 | * pa_lstart -> the logical start block for this prealloc space | 71 | * pa_lstart -> the logical start block for this prealloc space |
71 | * pa_pstart -> the physical start block for this prealloc space | 72 | * pa_pstart -> the physical start block for this prealloc space |
72 | * pa_len -> lenght for this prealloc space | 73 | * pa_len -> length for this prealloc space |
73 | * pa_free -> free space available in this prealloc space | 74 | * pa_free -> free space available in this prealloc space |
74 | * | 75 | * |
75 | * The inode preallocation space is used looking at the _logical_ start | 76 | * The inode preallocation space is used looking at the _logical_ start |
@@ -142,7 +143,7 @@ | |||
142 | * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The | 143 | * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The |
143 | * value of s_mb_order2_reqs can be tuned via | 144 | * value of s_mb_order2_reqs can be tuned via |
144 | * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to | 145 | * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to |
145 | * stripe size (sbi->s_stripe), we try to search for contigous block in | 146 | * stripe size (sbi->s_stripe), we try to search for contiguous block in |
146 | * stripe size. This should result in better allocation on RAID setups. If | 147 | * stripe size. This should result in better allocation on RAID setups. If |
147 | * not, we search in the specific group using bitmap for best extents. The | 148 | * not, we search in the specific group using bitmap for best extents. The |
148 | * tunable min_to_scan and max_to_scan control the behaviour here. | 149 | * tunable min_to_scan and max_to_scan control the behaviour here. |
@@ -441,10 +442,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
441 | for (i = 0; i < count; i++) { | 442 | for (i = 0; i < count; i++) { |
442 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { | 443 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { |
443 | ext4_fsblk_t blocknr; | 444 | ext4_fsblk_t blocknr; |
444 | blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); | 445 | |
446 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | ||
445 | blocknr += first + i; | 447 | blocknr += first + i; |
446 | blocknr += | ||
447 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
448 | ext4_grp_locked_error(sb, e4b->bd_group, | 448 | ext4_grp_locked_error(sb, e4b->bd_group, |
449 | __func__, "double-free of inode" | 449 | __func__, "double-free of inode" |
450 | " %lu's block %llu(bit %u in group %u)", | 450 | " %lu's block %llu(bit %u in group %u)", |
@@ -1255,10 +1255,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1255 | 1255 | ||
1256 | if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { | 1256 | if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { |
1257 | ext4_fsblk_t blocknr; | 1257 | ext4_fsblk_t blocknr; |
1258 | blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); | 1258 | |
1259 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | ||
1259 | blocknr += block; | 1260 | blocknr += block; |
1260 | blocknr += | ||
1261 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
1262 | ext4_grp_locked_error(sb, e4b->bd_group, | 1261 | ext4_grp_locked_error(sb, e4b->bd_group, |
1263 | __func__, "double-free of inode" | 1262 | __func__, "double-free of inode" |
1264 | " %lu's block %llu(bit %u in group %u)", | 1263 | " %lu's block %llu(bit %u in group %u)", |
@@ -1631,7 +1630,6 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1631 | int max; | 1630 | int max; |
1632 | int err; | 1631 | int err; |
1633 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | 1632 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
1634 | struct ext4_super_block *es = sbi->s_es; | ||
1635 | struct ext4_free_extent ex; | 1633 | struct ext4_free_extent ex; |
1636 | 1634 | ||
1637 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) | 1635 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) |
@@ -1648,8 +1646,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1648 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { | 1646 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { |
1649 | ext4_fsblk_t start; | 1647 | ext4_fsblk_t start; |
1650 | 1648 | ||
1651 | start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + | 1649 | start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) + |
1652 | ex.fe_start + le32_to_cpu(es->s_first_data_block); | 1650 | ex.fe_start; |
1653 | /* use do_div to get remainder (would be 64-bit modulo) */ | 1651 | /* use do_div to get remainder (would be 64-bit modulo) */ |
1654 | if (do_div(start, sbi->s_stripe) == 0) { | 1652 | if (do_div(start, sbi->s_stripe) == 0) { |
1655 | ac->ac_found++; | 1653 | ac->ac_found++; |
@@ -1803,8 +1801,8 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1803 | BUG_ON(sbi->s_stripe == 0); | 1801 | BUG_ON(sbi->s_stripe == 0); |
1804 | 1802 | ||
1805 | /* find first stripe-aligned block in group */ | 1803 | /* find first stripe-aligned block in group */ |
1806 | first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) | 1804 | first_group_block = ext4_group_first_block_no(sb, e4b->bd_group); |
1807 | + le32_to_cpu(sbi->s_es->s_first_data_block); | 1805 | |
1808 | a = first_group_block + sbi->s_stripe - 1; | 1806 | a = first_group_block + sbi->s_stripe - 1; |
1809 | do_div(a, sbi->s_stripe); | 1807 | do_div(a, sbi->s_stripe); |
1810 | i = (a * sbi->s_stripe) - first_group_block; | 1808 | i = (a * sbi->s_stripe) - first_group_block; |
@@ -2256,7 +2254,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2256 | 2254 | ||
2257 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2255 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2258 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2256 | init_rwsem(&meta_group_info[i]->alloc_sem); |
2259 | meta_group_info[i]->bb_free_root.rb_node = NULL; | 2257 | meta_group_info[i]->bb_free_root = RB_ROOT; |
2260 | 2258 | ||
2261 | #ifdef DOUBLE_CHECK | 2259 | #ifdef DOUBLE_CHECK |
2262 | { | 2260 | { |
@@ -2529,7 +2527,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2529 | struct ext4_group_info *db; | 2527 | struct ext4_group_info *db; |
2530 | int err, count = 0, count2 = 0; | 2528 | int err, count = 0, count2 = 0; |
2531 | struct ext4_free_data *entry; | 2529 | struct ext4_free_data *entry; |
2532 | ext4_fsblk_t discard_block; | ||
2533 | struct list_head *l, *ltmp; | 2530 | struct list_head *l, *ltmp; |
2534 | 2531 | ||
2535 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2532 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
@@ -2559,13 +2556,16 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2559 | page_cache_release(e4b.bd_bitmap_page); | 2556 | page_cache_release(e4b.bd_bitmap_page); |
2560 | } | 2557 | } |
2561 | ext4_unlock_group(sb, entry->group); | 2558 | ext4_unlock_group(sb, entry->group); |
2562 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) | 2559 | if (test_opt(sb, DISCARD)) { |
2563 | + entry->start_blk | 2560 | ext4_fsblk_t discard_block; |
2564 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 2561 | |
2565 | trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, | 2562 | discard_block = entry->start_blk + |
2566 | entry->count); | 2563 | ext4_group_first_block_no(sb, entry->group); |
2567 | sb_issue_discard(sb, discard_block, entry->count); | 2564 | trace_ext4_discard_blocks(sb, |
2568 | 2565 | (unsigned long long)discard_block, | |
2566 | entry->count); | ||
2567 | sb_issue_discard(sb, discard_block, entry->count); | ||
2568 | } | ||
2569 | kmem_cache_free(ext4_free_ext_cachep, entry); | 2569 | kmem_cache_free(ext4_free_ext_cachep, entry); |
2570 | ext4_mb_release_desc(&e4b); | 2570 | ext4_mb_release_desc(&e4b); |
2571 | } | 2571 | } |
@@ -2698,14 +2698,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2698 | if (err) | 2698 | if (err) |
2699 | goto out_err; | 2699 | goto out_err; |
2700 | 2700 | ||
2701 | block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) | 2701 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); |
2702 | + ac->ac_b_ex.fe_start | ||
2703 | + le32_to_cpu(es->s_first_data_block); | ||
2704 | 2702 | ||
2705 | len = ac->ac_b_ex.fe_len; | 2703 | len = ac->ac_b_ex.fe_len; |
2706 | if (!ext4_data_block_valid(sbi, block, len)) { | 2704 | if (!ext4_data_block_valid(sbi, block, len)) { |
2707 | ext4_error(sb, __func__, | 2705 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " |
2708 | "Allocating blocks %llu-%llu which overlap " | ||
2709 | "fs metadata\n", block, block+len); | 2706 | "fs metadata\n", block, block+len); |
2710 | /* File system mounted not to panic on error | 2707 | /* File system mounted not to panic on error |
2711 | * Fix the bitmap and repeat the block allocation | 2708 | * Fix the bitmap and repeat the block allocation |
@@ -2750,12 +2747,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2750 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2747 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
2751 | /* release all the reserved blocks if non delalloc */ | 2748 | /* release all the reserved blocks if non delalloc */ |
2752 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); | 2749 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
2753 | else { | ||
2754 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
2755 | ac->ac_b_ex.fe_len); | ||
2756 | /* convert reserved quota blocks to real quota blocks */ | ||
2757 | vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len); | ||
2758 | } | ||
2759 | 2750 | ||
2760 | if (sbi->s_log_groups_per_flex) { | 2751 | if (sbi->s_log_groups_per_flex) { |
2761 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2752 | ext4_group_t flex_group = ext4_flex_group(sbi, |
@@ -3006,6 +2997,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) | |||
3006 | } | 2997 | } |
3007 | 2998 | ||
3008 | /* | 2999 | /* |
3000 | * Called on failure; free up any blocks from the inode PA for this | ||
3001 | * context. We don't need this for MB_GROUP_PA because we only change | ||
3002 | * pa_free in ext4_mb_release_context(), but on failure, we've already | ||
3003 | * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed. | ||
3004 | */ | ||
3005 | static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) | ||
3006 | { | ||
3007 | struct ext4_prealloc_space *pa = ac->ac_pa; | ||
3008 | int len; | ||
3009 | |||
3010 | if (pa && pa->pa_type == MB_INODE_PA) { | ||
3011 | len = ac->ac_b_ex.fe_len; | ||
3012 | pa->pa_free += len; | ||
3013 | } | ||
3014 | |||
3015 | } | ||
3016 | |||
3017 | /* | ||
3009 | * use blocks preallocated to inode | 3018 | * use blocks preallocated to inode |
3010 | */ | 3019 | */ |
3011 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | 3020 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, |
@@ -3144,9 +3153,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3144 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3153 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
3145 | order = PREALLOC_TB_SIZE - 1; | 3154 | order = PREALLOC_TB_SIZE - 1; |
3146 | 3155 | ||
3147 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | 3156 | goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex); |
3148 | ac->ac_g_ex.fe_start + | ||
3149 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
3150 | /* | 3157 | /* |
3151 | * search for the prealloc space that is having | 3158 | * search for the prealloc space that is having |
3152 | * minimal distance from the goal block. | 3159 | * minimal distance from the goal block. |
@@ -3509,8 +3516,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3509 | if (bit >= end) | 3516 | if (bit >= end) |
3510 | break; | 3517 | break; |
3511 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3518 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
3512 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | 3519 | start = ext4_group_first_block_no(sb, group) + bit; |
3513 | le32_to_cpu(sbi->s_es->s_first_data_block); | ||
3514 | mb_debug(1, " free preallocated %u/%u in group %u\n", | 3520 | mb_debug(1, " free preallocated %u/%u in group %u\n", |
3515 | (unsigned) start, (unsigned) next - bit, | 3521 | (unsigned) start, (unsigned) next - bit, |
3516 | (unsigned) group); | 3522 | (unsigned) group); |
@@ -3606,15 +3612,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3606 | 3612 | ||
3607 | bitmap_bh = ext4_read_block_bitmap(sb, group); | 3613 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
3608 | if (bitmap_bh == NULL) { | 3614 | if (bitmap_bh == NULL) { |
3609 | ext4_error(sb, __func__, "Error in reading block " | 3615 | ext4_error(sb, "Error reading block bitmap for %u", group); |
3610 | "bitmap for %u", group); | ||
3611 | return 0; | 3616 | return 0; |
3612 | } | 3617 | } |
3613 | 3618 | ||
3614 | err = ext4_mb_load_buddy(sb, group, &e4b); | 3619 | err = ext4_mb_load_buddy(sb, group, &e4b); |
3615 | if (err) { | 3620 | if (err) { |
3616 | ext4_error(sb, __func__, "Error in loading buddy " | 3621 | ext4_error(sb, "Error loading buddy information for %u", group); |
3617 | "information for %u", group); | ||
3618 | put_bh(bitmap_bh); | 3622 | put_bh(bitmap_bh); |
3619 | return 0; | 3623 | return 0; |
3620 | } | 3624 | } |
@@ -3787,15 +3791,15 @@ repeat: | |||
3787 | 3791 | ||
3788 | err = ext4_mb_load_buddy(sb, group, &e4b); | 3792 | err = ext4_mb_load_buddy(sb, group, &e4b); |
3789 | if (err) { | 3793 | if (err) { |
3790 | ext4_error(sb, __func__, "Error in loading buddy " | 3794 | ext4_error(sb, "Error loading buddy information for %u", |
3791 | "information for %u", group); | 3795 | group); |
3792 | continue; | 3796 | continue; |
3793 | } | 3797 | } |
3794 | 3798 | ||
3795 | bitmap_bh = ext4_read_block_bitmap(sb, group); | 3799 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
3796 | if (bitmap_bh == NULL) { | 3800 | if (bitmap_bh == NULL) { |
3797 | ext4_error(sb, __func__, "Error in reading block " | 3801 | ext4_error(sb, "Error reading block bitmap for %u", |
3798 | "bitmap for %u", group); | 3802 | group); |
3799 | ext4_mb_release_desc(&e4b); | 3803 | ext4_mb_release_desc(&e4b); |
3800 | continue; | 3804 | continue; |
3801 | } | 3805 | } |
@@ -3921,7 +3925,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
3921 | 3925 | ||
3922 | /* don't use group allocation for large files */ | 3926 | /* don't use group allocation for large files */ |
3923 | size = max(size, isize); | 3927 | size = max(size, isize); |
3924 | if (size >= sbi->s_mb_stream_request) { | 3928 | if (size > sbi->s_mb_stream_request) { |
3925 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; | 3929 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; |
3926 | return; | 3930 | return; |
3927 | } | 3931 | } |
@@ -3932,7 +3936,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
3932 | * per cpu locality group is to reduce the contention between block | 3936 | * per cpu locality group is to reduce the contention between block |
3933 | * request from multiple CPUs. | 3937 | * request from multiple CPUs. |
3934 | */ | 3938 | */ |
3935 | ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); | 3939 | ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups); |
3936 | 3940 | ||
3937 | /* we're going to use group allocation */ | 3941 | /* we're going to use group allocation */ |
3938 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | 3942 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; |
@@ -4060,8 +4064,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4060 | 4064 | ||
4061 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); | 4065 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); |
4062 | if (ext4_mb_load_buddy(sb, group, &e4b)) { | 4066 | if (ext4_mb_load_buddy(sb, group, &e4b)) { |
4063 | ext4_error(sb, __func__, "Error in loading buddy " | 4067 | ext4_error(sb, "Error loading buddy information for %u", |
4064 | "information for %u", group); | 4068 | group); |
4065 | continue; | 4069 | continue; |
4066 | } | 4070 | } |
4067 | ext4_lock_group(sb, group); | 4071 | ext4_lock_group(sb, group); |
@@ -4237,7 +4241,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4237 | return 0; | 4241 | return 0; |
4238 | } | 4242 | } |
4239 | reserv_blks = ar->len; | 4243 | reserv_blks = ar->len; |
4240 | while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) { | 4244 | while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { |
4241 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4245 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
4242 | ar->len--; | 4246 | ar->len--; |
4243 | } | 4247 | } |
@@ -4290,6 +4294,7 @@ repeat: | |||
4290 | ac->ac_status = AC_STATUS_CONTINUE; | 4294 | ac->ac_status = AC_STATUS_CONTINUE; |
4291 | goto repeat; | 4295 | goto repeat; |
4292 | } else if (*errp) { | 4296 | } else if (*errp) { |
4297 | ext4_discard_allocated_blocks(ac); | ||
4293 | ac->ac_b_ex.fe_len = 0; | 4298 | ac->ac_b_ex.fe_len = 0; |
4294 | ar->len = 0; | 4299 | ar->len = 0; |
4295 | ext4_mb_show_ac(ac); | 4300 | ext4_mb_show_ac(ac); |
@@ -4313,7 +4318,7 @@ out2: | |||
4313 | kmem_cache_free(ext4_ac_cachep, ac); | 4318 | kmem_cache_free(ext4_ac_cachep, ac); |
4314 | out1: | 4319 | out1: |
4315 | if (inquota && ar->len < inquota) | 4320 | if (inquota && ar->len < inquota) |
4316 | vfs_dq_free_block(ar->inode, inquota - ar->len); | 4321 | dquot_free_block(ar->inode, inquota - ar->len); |
4317 | out3: | 4322 | out3: |
4318 | if (!ar->len) { | 4323 | if (!ar->len) { |
4319 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) | 4324 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) |
@@ -4422,18 +4427,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4422 | return 0; | 4427 | return 0; |
4423 | } | 4428 | } |
4424 | 4429 | ||
4425 | /* | 4430 | /** |
4426 | * Main entry point into mballoc to free blocks | 4431 | * ext4_free_blocks() -- Free given blocks and update quota |
4432 | * @handle: handle for this transaction | ||
4433 | * @inode: inode | ||
4434 | * @block: start physical block to free | ||
4435 | * @count: number of blocks to count | ||
4436 | * @metadata: Are these metadata blocks | ||
4427 | */ | 4437 | */ |
4428 | void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | 4438 | void ext4_free_blocks(handle_t *handle, struct inode *inode, |
4429 | ext4_fsblk_t block, unsigned long count, | 4439 | struct buffer_head *bh, ext4_fsblk_t block, |
4430 | int metadata, unsigned long *freed) | 4440 | unsigned long count, int flags) |
4431 | { | 4441 | { |
4432 | struct buffer_head *bitmap_bh = NULL; | 4442 | struct buffer_head *bitmap_bh = NULL; |
4433 | struct super_block *sb = inode->i_sb; | 4443 | struct super_block *sb = inode->i_sb; |
4434 | struct ext4_allocation_context *ac = NULL; | 4444 | struct ext4_allocation_context *ac = NULL; |
4435 | struct ext4_group_desc *gdp; | 4445 | struct ext4_group_desc *gdp; |
4436 | struct ext4_super_block *es; | 4446 | struct ext4_super_block *es; |
4447 | unsigned long freed = 0; | ||
4437 | unsigned int overflow; | 4448 | unsigned int overflow; |
4438 | ext4_grpblk_t bit; | 4449 | ext4_grpblk_t bit; |
4439 | struct buffer_head *gd_bh; | 4450 | struct buffer_head *gd_bh; |
@@ -4443,21 +4454,49 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4443 | int err = 0; | 4454 | int err = 0; |
4444 | int ret; | 4455 | int ret; |
4445 | 4456 | ||
4446 | *freed = 0; | 4457 | if (bh) { |
4458 | if (block) | ||
4459 | BUG_ON(block != bh->b_blocknr); | ||
4460 | else | ||
4461 | block = bh->b_blocknr; | ||
4462 | } | ||
4447 | 4463 | ||
4448 | sbi = EXT4_SB(sb); | 4464 | sbi = EXT4_SB(sb); |
4449 | es = EXT4_SB(sb)->s_es; | 4465 | es = EXT4_SB(sb)->s_es; |
4450 | if (block < le32_to_cpu(es->s_first_data_block) || | 4466 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
4451 | block + count < block || | 4467 | !ext4_data_block_valid(sbi, block, count)) { |
4452 | block + count > ext4_blocks_count(es)) { | 4468 | ext4_error(sb, "Freeing blocks not in datazone - " |
4453 | ext4_error(sb, __func__, | 4469 | "block = %llu, count = %lu", block, count); |
4454 | "Freeing blocks not in datazone - " | ||
4455 | "block = %llu, count = %lu", block, count); | ||
4456 | goto error_return; | 4470 | goto error_return; |
4457 | } | 4471 | } |
4458 | 4472 | ||
4459 | ext4_debug("freeing block %llu\n", block); | 4473 | ext4_debug("freeing block %llu\n", block); |
4460 | trace_ext4_free_blocks(inode, block, count, metadata); | 4474 | trace_ext4_free_blocks(inode, block, count, flags); |
4475 | |||
4476 | if (flags & EXT4_FREE_BLOCKS_FORGET) { | ||
4477 | struct buffer_head *tbh = bh; | ||
4478 | int i; | ||
4479 | |||
4480 | BUG_ON(bh && (count > 1)); | ||
4481 | |||
4482 | for (i = 0; i < count; i++) { | ||
4483 | if (!bh) | ||
4484 | tbh = sb_find_get_block(inode->i_sb, | ||
4485 | block + i); | ||
4486 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | ||
4487 | inode, tbh, block + i); | ||
4488 | } | ||
4489 | } | ||
4490 | |||
4491 | /* | ||
4492 | * We need to make sure we don't reuse the freed block until | ||
4493 | * after the transaction is committed, which we can do by | ||
4494 | * treating the block as metadata, below. We make an | ||
4495 | * exception if the inode is to be written in writeback mode | ||
4496 | * since writeback mode has weak data consistency guarantees. | ||
4497 | */ | ||
4498 | if (!ext4_should_writeback_data(inode)) | ||
4499 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
4461 | 4500 | ||
4462 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4501 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
4463 | if (ac) { | 4502 | if (ac) { |
@@ -4495,8 +4534,7 @@ do_more: | |||
4495 | in_range(block + count - 1, ext4_inode_table(sb, gdp), | 4534 | in_range(block + count - 1, ext4_inode_table(sb, gdp), |
4496 | EXT4_SB(sb)->s_itb_per_group)) { | 4535 | EXT4_SB(sb)->s_itb_per_group)) { |
4497 | 4536 | ||
4498 | ext4_error(sb, __func__, | 4537 | ext4_error(sb, "Freeing blocks in system zone - " |
4499 | "Freeing blocks in system zone - " | ||
4500 | "Block = %llu, count = %lu", block, count); | 4538 | "Block = %llu, count = %lu", block, count); |
4501 | /* err = 0. ext4_std_error should be a no op */ | 4539 | /* err = 0. ext4_std_error should be a no op */ |
4502 | goto error_return; | 4540 | goto error_return; |
@@ -4533,7 +4571,8 @@ do_more: | |||
4533 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4571 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4534 | if (err) | 4572 | if (err) |
4535 | goto error_return; | 4573 | goto error_return; |
4536 | if (metadata && ext4_handle_valid(handle)) { | 4574 | |
4575 | if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) { | ||
4537 | struct ext4_free_data *new_entry; | 4576 | struct ext4_free_data *new_entry; |
4538 | /* | 4577 | /* |
4539 | * blocks being freed are metadata. these blocks shouldn't | 4578 | * blocks being freed are metadata. these blocks shouldn't |
@@ -4572,7 +4611,7 @@ do_more: | |||
4572 | 4611 | ||
4573 | ext4_mb_release_desc(&e4b); | 4612 | ext4_mb_release_desc(&e4b); |
4574 | 4613 | ||
4575 | *freed += count; | 4614 | freed += count; |
4576 | 4615 | ||
4577 | /* We dirtied the bitmap block */ | 4616 | /* We dirtied the bitmap block */ |
4578 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 4617 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
@@ -4592,6 +4631,8 @@ do_more: | |||
4592 | } | 4631 | } |
4593 | sb->s_dirt = 1; | 4632 | sb->s_dirt = 1; |
4594 | error_return: | 4633 | error_return: |
4634 | if (freed) | ||
4635 | dquot_free_block(inode, freed); | ||
4595 | brelse(bitmap_bh); | 4636 | brelse(bitmap_bh); |
4596 | ext4_std_error(sb, err); | 4637 | ext4_std_error(sb, err); |
4597 | if (ac) | 4638 | if (ac) |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 0ca811061bc7..b619322c76f0 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/proc_fs.h> | 17 | #include <linux/proc_fs.h> |
18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/version.h> | ||
21 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
22 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
23 | #include "ext4_jbd2.h" | 22 | #include "ext4_jbd2.h" |
@@ -221,16 +220,9 @@ struct ext4_buddy { | |||
221 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | 220 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) |
222 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | 221 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) |
223 | 222 | ||
224 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | ||
225 | |||
226 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | 223 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
227 | struct ext4_free_extent *fex) | 224 | struct ext4_free_extent *fex) |
228 | { | 225 | { |
229 | ext4_fsblk_t block; | 226 | return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start; |
230 | |||
231 | block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) | ||
232 | + fex->fe_start | ||
233 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
234 | return block; | ||
235 | } | 227 | } |
236 | #endif | 228 | #endif |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index a93d5b80f3e2..34dcfc52ef44 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -13,6 +13,7 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/slab.h> | ||
16 | #include "ext4_jbd2.h" | 17 | #include "ext4_jbd2.h" |
17 | #include "ext4_extents.h" | 18 | #include "ext4_extents.h" |
18 | 19 | ||
@@ -238,7 +239,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) | |||
238 | * So allocate a credit of 3. We may update | 239 | * So allocate a credit of 3. We may update |
239 | * quota (user and group). | 240 | * quota (user and group). |
240 | */ | 241 | */ |
241 | needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 242 | needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); |
242 | 243 | ||
243 | if (ext4_journal_extend(handle, needed) != 0) | 244 | if (ext4_journal_extend(handle, needed) != 0) |
244 | retval = ext4_journal_restart(handle, needed); | 245 | retval = ext4_journal_restart(handle, needed); |
@@ -262,13 +263,17 @@ static int free_dind_blocks(handle_t *handle, | |||
262 | for (i = 0; i < max_entries; i++) { | 263 | for (i = 0; i < max_entries; i++) { |
263 | if (tmp_idata[i]) { | 264 | if (tmp_idata[i]) { |
264 | extend_credit_for_blkdel(handle, inode); | 265 | extend_credit_for_blkdel(handle, inode); |
265 | ext4_free_blocks(handle, inode, | 266 | ext4_free_blocks(handle, inode, 0, |
266 | le32_to_cpu(tmp_idata[i]), 1, 1); | 267 | le32_to_cpu(tmp_idata[i]), 1, |
268 | EXT4_FREE_BLOCKS_METADATA | | ||
269 | EXT4_FREE_BLOCKS_FORGET); | ||
267 | } | 270 | } |
268 | } | 271 | } |
269 | put_bh(bh); | 272 | put_bh(bh); |
270 | extend_credit_for_blkdel(handle, inode); | 273 | extend_credit_for_blkdel(handle, inode); |
271 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | 274 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, |
275 | EXT4_FREE_BLOCKS_METADATA | | ||
276 | EXT4_FREE_BLOCKS_FORGET); | ||
272 | return 0; | 277 | return 0; |
273 | } | 278 | } |
274 | 279 | ||
@@ -297,7 +302,9 @@ static int free_tind_blocks(handle_t *handle, | |||
297 | } | 302 | } |
298 | put_bh(bh); | 303 | put_bh(bh); |
299 | extend_credit_for_blkdel(handle, inode); | 304 | extend_credit_for_blkdel(handle, inode); |
300 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | 305 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, |
306 | EXT4_FREE_BLOCKS_METADATA | | ||
307 | EXT4_FREE_BLOCKS_FORGET); | ||
301 | return 0; | 308 | return 0; |
302 | } | 309 | } |
303 | 310 | ||
@@ -308,8 +315,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) | |||
308 | /* ei->i_data[EXT4_IND_BLOCK] */ | 315 | /* ei->i_data[EXT4_IND_BLOCK] */ |
309 | if (i_data[0]) { | 316 | if (i_data[0]) { |
310 | extend_credit_for_blkdel(handle, inode); | 317 | extend_credit_for_blkdel(handle, inode); |
311 | ext4_free_blocks(handle, inode, | 318 | ext4_free_blocks(handle, inode, 0, |
312 | le32_to_cpu(i_data[0]), 1, 1); | 319 | le32_to_cpu(i_data[0]), 1, |
320 | EXT4_FREE_BLOCKS_METADATA | | ||
321 | EXT4_FREE_BLOCKS_FORGET); | ||
313 | } | 322 | } |
314 | 323 | ||
315 | /* ei->i_data[EXT4_DIND_BLOCK] */ | 324 | /* ei->i_data[EXT4_DIND_BLOCK] */ |
@@ -357,12 +366,12 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
357 | * happened after we started the migrate. We need to | 366 | * happened after we started the migrate. We need to |
358 | * fail the migrate | 367 | * fail the migrate |
359 | */ | 368 | */ |
360 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { | 369 | if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) { |
361 | retval = -EAGAIN; | 370 | retval = -EAGAIN; |
362 | up_write(&EXT4_I(inode)->i_data_sem); | 371 | up_write(&EXT4_I(inode)->i_data_sem); |
363 | goto err_out; | 372 | goto err_out; |
364 | } else | 373 | } else |
365 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; | 374 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); |
366 | /* | 375 | /* |
367 | * We have the extent map build with the tmp inode. | 376 | * We have the extent map build with the tmp inode. |
368 | * Now copy the i_data across | 377 | * Now copy the i_data across |
@@ -419,7 +428,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
419 | } | 428 | } |
420 | put_bh(bh); | 429 | put_bh(bh); |
421 | extend_credit_for_blkdel(handle, inode); | 430 | extend_credit_for_blkdel(handle, inode); |
422 | ext4_free_blocks(handle, inode, block, 1, 1); | 431 | ext4_free_blocks(handle, inode, 0, block, 1, |
432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | ||
423 | return retval; | 433 | return retval; |
424 | } | 434 | } |
425 | 435 | ||
@@ -477,7 +487,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
477 | handle = ext4_journal_start(inode, | 487 | handle = ext4_journal_start(inode, |
478 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + | 488 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + |
479 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | 489 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + |
480 | 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) | 490 | EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) |
481 | + 1); | 491 | + 1); |
482 | if (IS_ERR(handle)) { | 492 | if (IS_ERR(handle)) { |
483 | retval = PTR_ERR(handle); | 493 | retval = PTR_ERR(handle); |
@@ -494,14 +504,10 @@ int ext4_ext_migrate(struct inode *inode) | |||
494 | } | 504 | } |
495 | i_size_write(tmp_inode, i_size_read(inode)); | 505 | i_size_write(tmp_inode, i_size_read(inode)); |
496 | /* | 506 | /* |
497 | * We don't want the inode to be reclaimed | 507 | * Set the i_nlink to zero so it will be deleted later |
498 | * if we got interrupted in between. We have | 508 | * when we drop inode reference. |
499 | * this tmp inode carrying reference to the | ||
500 | * data blocks of the original file. We set | ||
501 | * the i_nlink to zero at the last stage after | ||
502 | * switching the original file to extent format | ||
503 | */ | 509 | */ |
504 | tmp_inode->i_nlink = 1; | 510 | tmp_inode->i_nlink = 0; |
505 | 511 | ||
506 | ext4_ext_tree_init(handle, tmp_inode); | 512 | ext4_ext_tree_init(handle, tmp_inode); |
507 | ext4_orphan_add(handle, tmp_inode); | 513 | ext4_orphan_add(handle, tmp_inode); |
@@ -524,10 +530,20 @@ int ext4_ext_migrate(struct inode *inode) | |||
524 | * allocation. | 530 | * allocation. |
525 | */ | 531 | */ |
526 | down_read((&EXT4_I(inode)->i_data_sem)); | 532 | down_read((&EXT4_I(inode)->i_data_sem)); |
527 | EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; | 533 | ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); |
528 | up_read((&EXT4_I(inode)->i_data_sem)); | 534 | up_read((&EXT4_I(inode)->i_data_sem)); |
529 | 535 | ||
530 | handle = ext4_journal_start(inode, 1); | 536 | handle = ext4_journal_start(inode, 1); |
537 | if (IS_ERR(handle)) { | ||
538 | /* | ||
539 | * It is impossible to update on-disk structures without | ||
540 | * a handle, so just rollback in-core changes and live other | ||
541 | * work to orphan_list_cleanup() | ||
542 | */ | ||
543 | ext4_orphan_del(NULL, tmp_inode); | ||
544 | retval = PTR_ERR(handle); | ||
545 | goto out; | ||
546 | } | ||
531 | 547 | ||
532 | ei = EXT4_I(inode); | 548 | ei = EXT4_I(inode); |
533 | i_data = ei->i_data; | 549 | i_data = ei->i_data; |
@@ -609,15 +625,8 @@ err_out: | |||
609 | 625 | ||
610 | /* Reset the extent details */ | 626 | /* Reset the extent details */ |
611 | ext4_ext_tree_init(handle, tmp_inode); | 627 | ext4_ext_tree_init(handle, tmp_inode); |
612 | |||
613 | /* | ||
614 | * Set the i_nlink to zero so that | ||
615 | * generic_drop_inode really deletes the | ||
616 | * inode | ||
617 | */ | ||
618 | tmp_inode->i_nlink = 0; | ||
619 | |||
620 | ext4_journal_stop(handle); | 628 | ext4_journal_stop(handle); |
629 | out: | ||
621 | unlock_new_inode(tmp_inode); | 630 | unlock_new_inode(tmp_inode); |
622 | iput(tmp_inode); | 631 | iput(tmp_inode); |
623 | 632 | ||
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 25b6b1457360..d1fc662cc311 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/quotaops.h> | 17 | #include <linux/quotaops.h> |
18 | #include <linux/slab.h> | ||
18 | #include "ext4_jbd2.h" | 19 | #include "ext4_jbd2.h" |
19 | #include "ext4_extents.h" | 20 | #include "ext4_extents.h" |
20 | #include "ext4.h" | 21 | #include "ext4.h" |
@@ -77,12 +78,14 @@ static int | |||
77 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | 78 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, |
78 | struct ext4_extent **extent) | 79 | struct ext4_extent **extent) |
79 | { | 80 | { |
81 | struct ext4_extent_header *eh; | ||
80 | int ppos, leaf_ppos = path->p_depth; | 82 | int ppos, leaf_ppos = path->p_depth; |
81 | 83 | ||
82 | ppos = leaf_ppos; | 84 | ppos = leaf_ppos; |
83 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | 85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { |
84 | /* leaf block */ | 86 | /* leaf block */ |
85 | *extent = ++path[ppos].p_ext; | 87 | *extent = ++path[ppos].p_ext; |
88 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | ||
86 | return 0; | 89 | return 0; |
87 | } | 90 | } |
88 | 91 | ||
@@ -119,9 +122,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
119 | ext_block_hdr(path[cur_ppos+1].p_bh); | 122 | ext_block_hdr(path[cur_ppos+1].p_bh); |
120 | } | 123 | } |
121 | 124 | ||
125 | path[leaf_ppos].p_ext = *extent = NULL; | ||
126 | |||
127 | eh = path[leaf_ppos].p_hdr; | ||
128 | if (le16_to_cpu(eh->eh_entries) == 0) | ||
129 | /* empty leaf is found */ | ||
130 | return -ENODATA; | ||
131 | |||
122 | /* leaf block */ | 132 | /* leaf block */ |
123 | path[leaf_ppos].p_ext = *extent = | 133 | path[leaf_ppos].p_ext = *extent = |
124 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | 134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); |
135 | path[leaf_ppos].p_block = | ||
136 | ext_pblock(path[leaf_ppos].p_ext); | ||
125 | return 0; | 137 | return 0; |
126 | } | 138 | } |
127 | } | 139 | } |
@@ -141,12 +153,12 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, | |||
141 | int ret = 0; | 153 | int ret = 0; |
142 | 154 | ||
143 | if (inode1 == NULL) { | 155 | if (inode1 == NULL) { |
144 | ext4_error(inode2->i_sb, function, | 156 | __ext4_error(inode2->i_sb, function, |
145 | "Both inodes should not be NULL: " | 157 | "Both inodes should not be NULL: " |
146 | "inode1 NULL inode2 %lu", inode2->i_ino); | 158 | "inode1 NULL inode2 %lu", inode2->i_ino); |
147 | ret = -EIO; | 159 | ret = -EIO; |
148 | } else if (inode2 == NULL) { | 160 | } else if (inode2 == NULL) { |
149 | ext4_error(inode1->i_sb, function, | 161 | __ext4_error(inode1->i_sb, function, |
150 | "Both inodes should not be NULL: " | 162 | "Both inodes should not be NULL: " |
151 | "inode1 %lu inode2 NULL", inode1->i_ino); | 163 | "inode1 %lu inode2 NULL", inode1->i_ino); |
152 | ret = -EIO; | 164 | ret = -EIO; |
@@ -155,40 +167,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, | |||
155 | } | 167 | } |
156 | 168 | ||
157 | /** | 169 | /** |
158 | * mext_double_down_read - Acquire two inodes' read semaphore | 170 | * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem |
159 | * | 171 | * |
160 | * @orig_inode: original inode structure | 172 | * @orig_inode: original inode structure |
161 | * @donor_inode: donor inode structure | 173 | * @donor_inode: donor inode structure |
162 | * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. | 174 | * Acquire write lock of i_data_sem of the two inodes (orig and donor) by |
175 | * i_ino order. | ||
163 | */ | 176 | */ |
164 | static void | 177 | static void |
165 | mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) | 178 | double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) |
166 | { | ||
167 | struct inode *first = orig_inode, *second = donor_inode; | ||
168 | |||
169 | /* | ||
170 | * Use the inode number to provide the stable locking order instead | ||
171 | * of its address, because the C language doesn't guarantee you can | ||
172 | * compare pointers that don't come from the same array. | ||
173 | */ | ||
174 | if (donor_inode->i_ino < orig_inode->i_ino) { | ||
175 | first = donor_inode; | ||
176 | second = orig_inode; | ||
177 | } | ||
178 | |||
179 | down_read(&EXT4_I(first)->i_data_sem); | ||
180 | down_read(&EXT4_I(second)->i_data_sem); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * mext_double_down_write - Acquire two inodes' write semaphore | ||
185 | * | ||
186 | * @orig_inode: original inode structure | ||
187 | * @donor_inode: donor inode structure | ||
188 | * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. | ||
189 | */ | ||
190 | static void | ||
191 | mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | ||
192 | { | 179 | { |
193 | struct inode *first = orig_inode, *second = donor_inode; | 180 | struct inode *first = orig_inode, *second = donor_inode; |
194 | 181 | ||
@@ -203,32 +190,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
203 | } | 190 | } |
204 | 191 | ||
205 | down_write(&EXT4_I(first)->i_data_sem); | 192 | down_write(&EXT4_I(first)->i_data_sem); |
206 | down_write(&EXT4_I(second)->i_data_sem); | 193 | down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); |
207 | } | ||
208 | |||
209 | /** | ||
210 | * mext_double_up_read - Release two inodes' read semaphore | ||
211 | * | ||
212 | * @orig_inode: original inode structure to be released its lock first | ||
213 | * @donor_inode: donor inode structure to be released its lock second | ||
214 | * Release read semaphore of two inodes (orig and donor). | ||
215 | */ | ||
216 | static void | ||
217 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | ||
218 | { | ||
219 | up_read(&EXT4_I(orig_inode)->i_data_sem); | ||
220 | up_read(&EXT4_I(donor_inode)->i_data_sem); | ||
221 | } | 194 | } |
222 | 195 | ||
223 | /** | 196 | /** |
224 | * mext_double_up_write - Release two inodes' write semaphore | 197 | * double_up_write_data_sem - Release two inodes' write lock of i_data_sem |
225 | * | 198 | * |
226 | * @orig_inode: original inode structure to be released its lock first | 199 | * @orig_inode: original inode structure to be released its lock first |
227 | * @donor_inode: donor inode structure to be released its lock second | 200 | * @donor_inode: donor inode structure to be released its lock second |
228 | * Release write semaphore of two inodes (orig and donor). | 201 | * Release write lock of i_data_sem of two inodes (orig and donor). |
229 | */ | 202 | */ |
230 | static void | 203 | static void |
231 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) | 204 | double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) |
232 | { | 205 | { |
233 | up_write(&EXT4_I(orig_inode)->i_data_sem); | 206 | up_write(&EXT4_I(orig_inode)->i_data_sem); |
234 | up_write(&EXT4_I(donor_inode)->i_data_sem); | 207 | up_write(&EXT4_I(donor_inode)->i_data_sem); |
@@ -280,6 +253,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
280 | } | 253 | } |
281 | 254 | ||
282 | o_start->ee_len = start_ext->ee_len; | 255 | o_start->ee_len = start_ext->ee_len; |
256 | eblock = le32_to_cpu(start_ext->ee_block); | ||
283 | new_flag = 1; | 257 | new_flag = 1; |
284 | 258 | ||
285 | } else if (start_ext->ee_len && new_ext->ee_len && | 259 | } else if (start_ext->ee_len && new_ext->ee_len && |
@@ -290,6 +264,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
290 | * orig |------------------------------| | 264 | * orig |------------------------------| |
291 | */ | 265 | */ |
292 | o_start->ee_len = start_ext->ee_len; | 266 | o_start->ee_len = start_ext->ee_len; |
267 | eblock = le32_to_cpu(start_ext->ee_block); | ||
293 | new_flag = 1; | 268 | new_flag = 1; |
294 | 269 | ||
295 | } else if (!start_ext->ee_len && new_ext->ee_len && | 270 | } else if (!start_ext->ee_len && new_ext->ee_len && |
@@ -503,7 +478,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
503 | struct ext4_extent *oext, *o_start, *o_end, *prev_ext; | 478 | struct ext4_extent *oext, *o_start, *o_end, *prev_ext; |
504 | struct ext4_extent new_ext, start_ext, end_ext; | 479 | struct ext4_extent new_ext, start_ext, end_ext; |
505 | ext4_lblk_t new_ext_end; | 480 | ext4_lblk_t new_ext_end; |
506 | ext4_fsblk_t new_phys_end; | ||
507 | int oext_alen, new_ext_alen, end_ext_alen; | 481 | int oext_alen, new_ext_alen, end_ext_alen; |
508 | int depth = ext_depth(orig_inode); | 482 | int depth = ext_depth(orig_inode); |
509 | int ret; | 483 | int ret; |
@@ -517,7 +491,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
517 | new_ext.ee_len = dext->ee_len; | 491 | new_ext.ee_len = dext->ee_len; |
518 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | 492 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); |
519 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | 493 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; |
520 | new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1; | ||
521 | 494 | ||
522 | /* | 495 | /* |
523 | * Case: original extent is first | 496 | * Case: original extent is first |
@@ -530,6 +503,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
530 | le32_to_cpu(oext->ee_block) + oext_alen) { | 503 | le32_to_cpu(oext->ee_block) + oext_alen) { |
531 | start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - | 504 | start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - |
532 | le32_to_cpu(oext->ee_block)); | 505 | le32_to_cpu(oext->ee_block)); |
506 | start_ext.ee_block = oext->ee_block; | ||
533 | copy_extent_status(oext, &start_ext); | 507 | copy_extent_status(oext, &start_ext); |
534 | } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { | 508 | } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { |
535 | prev_ext = oext - 1; | 509 | prev_ext = oext - 1; |
@@ -543,6 +517,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
543 | start_ext.ee_len = cpu_to_le16( | 517 | start_ext.ee_len = cpu_to_le16( |
544 | ext4_ext_get_actual_len(prev_ext) + | 518 | ext4_ext_get_actual_len(prev_ext) + |
545 | new_ext_alen); | 519 | new_ext_alen); |
520 | start_ext.ee_block = oext->ee_block; | ||
546 | copy_extent_status(prev_ext, &start_ext); | 521 | copy_extent_status(prev_ext, &start_ext); |
547 | new_ext.ee_len = 0; | 522 | new_ext.ee_len = 0; |
548 | } | 523 | } |
@@ -554,7 +529,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
554 | * new_ext |-------| | 529 | * new_ext |-------| |
555 | */ | 530 | */ |
556 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { | 531 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { |
557 | ext4_error(orig_inode->i_sb, __func__, | 532 | ext4_error(orig_inode->i_sb, |
558 | "new_ext_end(%u) should be less than or equal to " | 533 | "new_ext_end(%u) should be less than or equal to " |
559 | "oext->ee_block(%u) + oext_alen(%d) - 1", | 534 | "oext->ee_block(%u) + oext_alen(%d) - 1", |
560 | new_ext_end, le32_to_cpu(oext->ee_block), | 535 | new_ext_end, le32_to_cpu(oext->ee_block), |
@@ -596,7 +571,7 @@ out: | |||
596 | * @tmp_oext: the extent that will belong to the donor inode | 571 | * @tmp_oext: the extent that will belong to the donor inode |
597 | * @orig_off: block offset of original inode | 572 | * @orig_off: block offset of original inode |
598 | * @donor_off: block offset of donor inode | 573 | * @donor_off: block offset of donor inode |
599 | * @max_count: the maximun length of extents | 574 | * @max_count: the maximum length of extents |
600 | * | 575 | * |
601 | * Return 0 on success, or a negative error value on failure. | 576 | * Return 0 on success, or a negative error value on failure. |
602 | */ | 577 | */ |
@@ -661,6 +636,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
661 | * @donor_inode: donor inode | 636 | * @donor_inode: donor inode |
662 | * @from: block offset of orig_inode | 637 | * @from: block offset of orig_inode |
663 | * @count: block count to be replaced | 638 | * @count: block count to be replaced |
639 | * @err: pointer to save return value | ||
664 | * | 640 | * |
665 | * Replace original inode extents and donor inode extents page by page. | 641 | * Replace original inode extents and donor inode extents page by page. |
666 | * We implement this replacement in the following three steps: | 642 | * We implement this replacement in the following three steps: |
@@ -671,33 +647,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
671 | * 3. Change the block information of donor inode to point at the saved | 647 | * 3. Change the block information of donor inode to point at the saved |
672 | * original inode blocks in the dummy extents. | 648 | * original inode blocks in the dummy extents. |
673 | * | 649 | * |
674 | * Return 0 on success, or a negative error value on failure. | 650 | * Return replaced block count. |
675 | */ | 651 | */ |
676 | static int | 652 | static int |
677 | mext_replace_branches(handle_t *handle, struct inode *orig_inode, | 653 | mext_replace_branches(handle_t *handle, struct inode *orig_inode, |
678 | struct inode *donor_inode, ext4_lblk_t from, | 654 | struct inode *donor_inode, ext4_lblk_t from, |
679 | ext4_lblk_t count) | 655 | ext4_lblk_t count, int *err) |
680 | { | 656 | { |
681 | struct ext4_ext_path *orig_path = NULL; | 657 | struct ext4_ext_path *orig_path = NULL; |
682 | struct ext4_ext_path *donor_path = NULL; | 658 | struct ext4_ext_path *donor_path = NULL; |
683 | struct ext4_extent *oext, *dext; | 659 | struct ext4_extent *oext, *dext; |
684 | struct ext4_extent tmp_dext, tmp_oext; | 660 | struct ext4_extent tmp_dext, tmp_oext; |
685 | ext4_lblk_t orig_off = from, donor_off = from; | 661 | ext4_lblk_t orig_off = from, donor_off = from; |
686 | int err = 0; | ||
687 | int depth; | 662 | int depth; |
688 | int replaced_count = 0; | 663 | int replaced_count = 0; |
689 | int dext_alen; | 664 | int dext_alen; |
690 | 665 | ||
691 | mext_double_down_write(orig_inode, donor_inode); | 666 | /* Protect extent trees against block allocations via delalloc */ |
667 | double_down_write_data_sem(orig_inode, donor_inode); | ||
692 | 668 | ||
693 | /* Get the original extent for the block "orig_off" */ | 669 | /* Get the original extent for the block "orig_off" */ |
694 | err = get_ext_path(orig_inode, orig_off, &orig_path); | 670 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
695 | if (err) | 671 | if (*err) |
696 | goto out; | 672 | goto out; |
697 | 673 | ||
698 | /* Get the donor extent for the head */ | 674 | /* Get the donor extent for the head */ |
699 | err = get_ext_path(donor_inode, donor_off, &donor_path); | 675 | *err = get_ext_path(donor_inode, donor_off, &donor_path); |
700 | if (err) | 676 | if (*err) |
701 | goto out; | 677 | goto out; |
702 | depth = ext_depth(orig_inode); | 678 | depth = ext_depth(orig_inode); |
703 | oext = orig_path[depth].p_ext; | 679 | oext = orig_path[depth].p_ext; |
@@ -707,39 +683,39 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
707 | dext = donor_path[depth].p_ext; | 683 | dext = donor_path[depth].p_ext; |
708 | tmp_dext = *dext; | 684 | tmp_dext = *dext; |
709 | 685 | ||
710 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 686 | *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
711 | donor_off, count); | 687 | donor_off, count); |
712 | if (err) | 688 | if (*err) |
713 | goto out; | 689 | goto out; |
714 | 690 | ||
715 | /* Loop for the donor extents */ | 691 | /* Loop for the donor extents */ |
716 | while (1) { | 692 | while (1) { |
717 | /* The extent for donor must be found. */ | 693 | /* The extent for donor must be found. */ |
718 | if (!dext) { | 694 | if (!dext) { |
719 | ext4_error(donor_inode->i_sb, __func__, | 695 | ext4_error(donor_inode->i_sb, |
720 | "The extent for donor must be found"); | 696 | "The extent for donor must be found"); |
721 | err = -EIO; | 697 | *err = -EIO; |
722 | goto out; | 698 | goto out; |
723 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | 699 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { |
724 | ext4_error(donor_inode->i_sb, __func__, | 700 | ext4_error(donor_inode->i_sb, |
725 | "Donor offset(%u) and the first block of donor " | 701 | "Donor offset(%u) and the first block of donor " |
726 | "extent(%u) should be equal", | 702 | "extent(%u) should be equal", |
727 | donor_off, | 703 | donor_off, |
728 | le32_to_cpu(tmp_dext.ee_block)); | 704 | le32_to_cpu(tmp_dext.ee_block)); |
729 | err = -EIO; | 705 | *err = -EIO; |
730 | goto out; | 706 | goto out; |
731 | } | 707 | } |
732 | 708 | ||
733 | /* Set donor extent to orig extent */ | 709 | /* Set donor extent to orig extent */ |
734 | err = mext_leaf_block(handle, orig_inode, | 710 | *err = mext_leaf_block(handle, orig_inode, |
735 | orig_path, &tmp_dext, &orig_off); | 711 | orig_path, &tmp_dext, &orig_off); |
736 | if (err < 0) | 712 | if (*err) |
737 | goto out; | 713 | goto out; |
738 | 714 | ||
739 | /* Set orig extent to donor extent */ | 715 | /* Set orig extent to donor extent */ |
740 | err = mext_leaf_block(handle, donor_inode, | 716 | *err = mext_leaf_block(handle, donor_inode, |
741 | donor_path, &tmp_oext, &donor_off); | 717 | donor_path, &tmp_oext, &donor_off); |
742 | if (err < 0) | 718 | if (*err) |
743 | goto out; | 719 | goto out; |
744 | 720 | ||
745 | dext_alen = ext4_ext_get_actual_len(&tmp_dext); | 721 | dext_alen = ext4_ext_get_actual_len(&tmp_dext); |
@@ -753,35 +729,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
753 | 729 | ||
754 | if (orig_path) | 730 | if (orig_path) |
755 | ext4_ext_drop_refs(orig_path); | 731 | ext4_ext_drop_refs(orig_path); |
756 | err = get_ext_path(orig_inode, orig_off, &orig_path); | 732 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
757 | if (err) | 733 | if (*err) |
758 | goto out; | 734 | goto out; |
759 | depth = ext_depth(orig_inode); | 735 | depth = ext_depth(orig_inode); |
760 | oext = orig_path[depth].p_ext; | 736 | oext = orig_path[depth].p_ext; |
761 | if (le32_to_cpu(oext->ee_block) + | ||
762 | ext4_ext_get_actual_len(oext) <= orig_off) { | ||
763 | err = 0; | ||
764 | goto out; | ||
765 | } | ||
766 | tmp_oext = *oext; | 737 | tmp_oext = *oext; |
767 | 738 | ||
768 | if (donor_path) | 739 | if (donor_path) |
769 | ext4_ext_drop_refs(donor_path); | 740 | ext4_ext_drop_refs(donor_path); |
770 | err = get_ext_path(donor_inode, donor_off, &donor_path); | 741 | *err = get_ext_path(donor_inode, donor_off, &donor_path); |
771 | if (err) | 742 | if (*err) |
772 | goto out; | 743 | goto out; |
773 | depth = ext_depth(donor_inode); | 744 | depth = ext_depth(donor_inode); |
774 | dext = donor_path[depth].p_ext; | 745 | dext = donor_path[depth].p_ext; |
775 | if (le32_to_cpu(dext->ee_block) + | ||
776 | ext4_ext_get_actual_len(dext) <= donor_off) { | ||
777 | err = 0; | ||
778 | goto out; | ||
779 | } | ||
780 | tmp_dext = *dext; | 746 | tmp_dext = *dext; |
781 | 747 | ||
782 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 748 | *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
783 | donor_off, count - replaced_count); | 749 | donor_off, count - replaced_count); |
784 | if (err) | 750 | if (*err) |
785 | goto out; | 751 | goto out; |
786 | } | 752 | } |
787 | 753 | ||
@@ -795,8 +761,12 @@ out: | |||
795 | kfree(donor_path); | 761 | kfree(donor_path); |
796 | } | 762 | } |
797 | 763 | ||
798 | mext_double_up_write(orig_inode, donor_inode); | 764 | ext4_ext_invalidate_cache(orig_inode); |
799 | return err; | 765 | ext4_ext_invalidate_cache(donor_inode); |
766 | |||
767 | double_up_write_data_sem(orig_inode, donor_inode); | ||
768 | |||
769 | return replaced_count; | ||
800 | } | 770 | } |
801 | 771 | ||
802 | /** | 772 | /** |
@@ -808,16 +778,17 @@ out: | |||
808 | * @data_offset_in_page: block index where data swapping starts | 778 | * @data_offset_in_page: block index where data swapping starts |
809 | * @block_len_in_page: the number of blocks to be swapped | 779 | * @block_len_in_page: the number of blocks to be swapped |
810 | * @uninit: orig extent is uninitialized or not | 780 | * @uninit: orig extent is uninitialized or not |
781 | * @err: pointer to save return value | ||
811 | * | 782 | * |
812 | * Save the data in original inode blocks and replace original inode extents | 783 | * Save the data in original inode blocks and replace original inode extents |
813 | * with donor inode extents by calling mext_replace_branches(). | 784 | * with donor inode extents by calling mext_replace_branches(). |
814 | * Finally, write out the saved data in new original inode blocks. Return 0 | 785 | * Finally, write out the saved data in new original inode blocks. Return |
815 | * on success, or a negative error value on failure. | 786 | * replaced block count. |
816 | */ | 787 | */ |
817 | static int | 788 | static int |
818 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | 789 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, |
819 | pgoff_t orig_page_offset, int data_offset_in_page, | 790 | pgoff_t orig_page_offset, int data_offset_in_page, |
820 | int block_len_in_page, int uninit) | 791 | int block_len_in_page, int uninit, int *err) |
821 | { | 792 | { |
822 | struct inode *orig_inode = o_filp->f_dentry->d_inode; | 793 | struct inode *orig_inode = o_filp->f_dentry->d_inode; |
823 | struct address_space *mapping = orig_inode->i_mapping; | 794 | struct address_space *mapping = orig_inode->i_mapping; |
@@ -829,9 +800,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
829 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | 800 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; |
830 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 801 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
831 | unsigned int w_flags = 0; | 802 | unsigned int w_flags = 0; |
832 | unsigned int tmp_data_len, data_len; | 803 | unsigned int tmp_data_size, data_size, replaced_size; |
833 | void *fsdata; | 804 | void *fsdata; |
834 | int ret, i, jblocks; | 805 | int i, jblocks; |
806 | int err2 = 0; | ||
807 | int replaced_count = 0; | ||
835 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 808 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
836 | 809 | ||
837 | /* | 810 | /* |
@@ -841,8 +814,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
841 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; | 814 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; |
842 | handle = ext4_journal_start(orig_inode, jblocks); | 815 | handle = ext4_journal_start(orig_inode, jblocks); |
843 | if (IS_ERR(handle)) { | 816 | if (IS_ERR(handle)) { |
844 | ret = PTR_ERR(handle); | 817 | *err = PTR_ERR(handle); |
845 | return ret; | 818 | return 0; |
846 | } | 819 | } |
847 | 820 | ||
848 | if (segment_eq(get_fs(), KERNEL_DS)) | 821 | if (segment_eq(get_fs(), KERNEL_DS)) |
@@ -858,39 +831,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
858 | * Just swap data blocks between orig and donor. | 831 | * Just swap data blocks between orig and donor. |
859 | */ | 832 | */ |
860 | if (uninit) { | 833 | if (uninit) { |
861 | ret = mext_replace_branches(handle, orig_inode, | 834 | replaced_count = mext_replace_branches(handle, orig_inode, |
862 | donor_inode, orig_blk_offset, | 835 | donor_inode, orig_blk_offset, |
863 | block_len_in_page); | 836 | block_len_in_page, err); |
864 | |||
865 | /* Clear the inode cache not to refer to the old data */ | ||
866 | ext4_ext_invalidate_cache(orig_inode); | ||
867 | ext4_ext_invalidate_cache(donor_inode); | ||
868 | goto out2; | 837 | goto out2; |
869 | } | 838 | } |
870 | 839 | ||
871 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | 840 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; |
872 | 841 | ||
873 | /* Calculate data_len */ | 842 | /* Calculate data_size */ |
874 | if ((orig_blk_offset + block_len_in_page - 1) == | 843 | if ((orig_blk_offset + block_len_in_page - 1) == |
875 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | 844 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { |
876 | /* Replace the last block */ | 845 | /* Replace the last block */ |
877 | tmp_data_len = orig_inode->i_size & (blocksize - 1); | 846 | tmp_data_size = orig_inode->i_size & (blocksize - 1); |
878 | /* | 847 | /* |
879 | * If data_len equal zero, it shows data_len is multiples of | 848 | * If data_size equal zero, it shows data_size is multiples of |
880 | * blocksize. So we set appropriate value. | 849 | * blocksize. So we set appropriate value. |
881 | */ | 850 | */ |
882 | if (tmp_data_len == 0) | 851 | if (tmp_data_size == 0) |
883 | tmp_data_len = blocksize; | 852 | tmp_data_size = blocksize; |
884 | 853 | ||
885 | data_len = tmp_data_len + | 854 | data_size = tmp_data_size + |
886 | ((block_len_in_page - 1) << orig_inode->i_blkbits); | 855 | ((block_len_in_page - 1) << orig_inode->i_blkbits); |
887 | } else { | 856 | } else |
888 | data_len = block_len_in_page << orig_inode->i_blkbits; | 857 | data_size = block_len_in_page << orig_inode->i_blkbits; |
889 | } | 858 | |
859 | replaced_size = data_size; | ||
890 | 860 | ||
891 | ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, | 861 | *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, |
892 | &page, &fsdata); | 862 | &page, &fsdata); |
893 | if (unlikely(ret < 0)) | 863 | if (unlikely(*err < 0)) |
894 | goto out; | 864 | goto out; |
895 | 865 | ||
896 | if (!PageUptodate(page)) { | 866 | if (!PageUptodate(page)) { |
@@ -911,14 +881,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
911 | /* Release old bh and drop refs */ | 881 | /* Release old bh and drop refs */ |
912 | try_to_release_page(page, 0); | 882 | try_to_release_page(page, 0); |
913 | 883 | ||
914 | ret = mext_replace_branches(handle, orig_inode, donor_inode, | 884 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, |
915 | orig_blk_offset, block_len_in_page); | 885 | orig_blk_offset, block_len_in_page, |
916 | if (ret < 0) | 886 | &err2); |
917 | goto out; | 887 | if (err2) { |
918 | 888 | if (replaced_count) { | |
919 | /* Clear the inode cache not to refer to the old data */ | 889 | block_len_in_page = replaced_count; |
920 | ext4_ext_invalidate_cache(orig_inode); | 890 | replaced_size = |
921 | ext4_ext_invalidate_cache(donor_inode); | 891 | block_len_in_page << orig_inode->i_blkbits; |
892 | } else | ||
893 | goto out; | ||
894 | } | ||
922 | 895 | ||
923 | if (!page_has_buffers(page)) | 896 | if (!page_has_buffers(page)) |
924 | create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); | 897 | create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); |
@@ -928,16 +901,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
928 | bh = bh->b_this_page; | 901 | bh = bh->b_this_page; |
929 | 902 | ||
930 | for (i = 0; i < block_len_in_page; i++) { | 903 | for (i = 0; i < block_len_in_page; i++) { |
931 | ret = ext4_get_block(orig_inode, | 904 | *err = ext4_get_block(orig_inode, |
932 | (sector_t)(orig_blk_offset + i), bh, 0); | 905 | (sector_t)(orig_blk_offset + i), bh, 0); |
933 | if (ret < 0) | 906 | if (*err < 0) |
934 | goto out; | 907 | goto out; |
935 | 908 | ||
936 | if (bh->b_this_page != NULL) | 909 | if (bh->b_this_page != NULL) |
937 | bh = bh->b_this_page; | 910 | bh = bh->b_this_page; |
938 | } | 911 | } |
939 | 912 | ||
940 | ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, | 913 | *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, |
941 | page, fsdata); | 914 | page, fsdata); |
942 | page = NULL; | 915 | page = NULL; |
943 | 916 | ||
@@ -951,18 +924,20 @@ out: | |||
951 | out2: | 924 | out2: |
952 | ext4_journal_stop(handle); | 925 | ext4_journal_stop(handle); |
953 | 926 | ||
954 | return ret < 0 ? ret : 0; | 927 | if (err2) |
928 | *err = err2; | ||
929 | |||
930 | return replaced_count; | ||
955 | } | 931 | } |
956 | 932 | ||
957 | /** | 933 | /** |
958 | * mext_check_argumants - Check whether move extent can be done | 934 | * mext_check_arguments - Check whether move extent can be done |
959 | * | 935 | * |
960 | * @orig_inode: original inode | 936 | * @orig_inode: original inode |
961 | * @donor_inode: donor inode | 937 | * @donor_inode: donor inode |
962 | * @orig_start: logical start offset in block for orig | 938 | * @orig_start: logical start offset in block for orig |
963 | * @donor_start: logical start offset in block for donor | 939 | * @donor_start: logical start offset in block for donor |
964 | * @len: the number of blocks to be moved | 940 | * @len: the number of blocks to be moved |
965 | * @moved_len: moved block length | ||
966 | * | 941 | * |
967 | * Check the arguments of ext4_move_extents() whether the files can be | 942 | * Check the arguments of ext4_move_extents() whether the files can be |
968 | * exchanged with each other. | 943 | * exchanged with each other. |
@@ -970,18 +945,17 @@ out2: | |||
970 | */ | 945 | */ |
971 | static int | 946 | static int |
972 | mext_check_arguments(struct inode *orig_inode, | 947 | mext_check_arguments(struct inode *orig_inode, |
973 | struct inode *donor_inode, __u64 orig_start, | 948 | struct inode *donor_inode, __u64 orig_start, |
974 | __u64 donor_start, __u64 *len, __u64 moved_len) | 949 | __u64 donor_start, __u64 *len) |
975 | { | 950 | { |
976 | ext4_lblk_t orig_blocks, donor_blocks; | 951 | ext4_lblk_t orig_blocks, donor_blocks; |
977 | unsigned int blkbits = orig_inode->i_blkbits; | 952 | unsigned int blkbits = orig_inode->i_blkbits; |
978 | unsigned int blocksize = 1 << blkbits; | 953 | unsigned int blocksize = 1 << blkbits; |
979 | 954 | ||
980 | /* Regular file check */ | 955 | if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { |
981 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | 956 | ext4_debug("ext4 move extent: suid or sgid is set" |
982 | ext4_debug("ext4 move extent: The argument files should be " | 957 | " to donor file [ino:orig %lu, donor %lu]\n", |
983 | "regular file [ino:orig %lu, donor %lu]\n", | 958 | orig_inode->i_ino, donor_inode->i_ino); |
984 | orig_inode->i_ino, donor_inode->i_ino); | ||
985 | return -EINVAL; | 959 | return -EINVAL; |
986 | } | 960 | } |
987 | 961 | ||
@@ -1025,13 +999,6 @@ mext_check_arguments(struct inode *orig_inode, | |||
1025 | return -EINVAL; | 999 | return -EINVAL; |
1026 | } | 1000 | } |
1027 | 1001 | ||
1028 | if (moved_len) { | ||
1029 | ext4_debug("ext4 move extent: moved_len should be 0 " | ||
1030 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, | ||
1031 | donor_inode->i_ino); | ||
1032 | return -EINVAL; | ||
1033 | } | ||
1034 | |||
1035 | if ((orig_start > EXT_MAX_BLOCK) || | 1002 | if ((orig_start > EXT_MAX_BLOCK) || |
1036 | (donor_start > EXT_MAX_BLOCK) || | 1003 | (donor_start > EXT_MAX_BLOCK) || |
1037 | (*len > EXT_MAX_BLOCK) || | 1004 | (*len > EXT_MAX_BLOCK) || |
@@ -1088,7 +1055,7 @@ mext_check_arguments(struct inode *orig_inode, | |||
1088 | } | 1055 | } |
1089 | 1056 | ||
1090 | if (!*len) { | 1057 | if (!*len) { |
1091 | ext4_debug("ext4 move extent: len shoudld not be 0 " | 1058 | ext4_debug("ext4 move extent: len should not be 0 " |
1092 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, | 1059 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, |
1093 | donor_inode->i_ino); | 1060 | donor_inode->i_ino); |
1094 | return -EINVAL; | 1061 | return -EINVAL; |
@@ -1232,16 +1199,24 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1232 | return -EINVAL; | 1199 | return -EINVAL; |
1233 | } | 1200 | } |
1234 | 1201 | ||
1235 | /* protect orig and donor against a truncate */ | 1202 | /* Regular file check */ |
1203 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | ||
1204 | ext4_debug("ext4 move extent: The argument files should be " | ||
1205 | "regular file [ino:orig %lu, donor %lu]\n", | ||
1206 | orig_inode->i_ino, donor_inode->i_ino); | ||
1207 | return -EINVAL; | ||
1208 | } | ||
1209 | |||
1210 | /* Protect orig and donor inodes against a truncate */ | ||
1236 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); | 1211 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); |
1237 | if (ret1 < 0) | 1212 | if (ret1 < 0) |
1238 | return ret1; | 1213 | return ret1; |
1239 | 1214 | ||
1240 | mext_double_down_read(orig_inode, donor_inode); | 1215 | /* Protect extent tree against block allocations via delalloc */ |
1216 | double_down_write_data_sem(orig_inode, donor_inode); | ||
1241 | /* Check the filesystem environment whether move_extent can be done */ | 1217 | /* Check the filesystem environment whether move_extent can be done */ |
1242 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, | 1218 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, |
1243 | donor_start, &len, *moved_len); | 1219 | donor_start, &len); |
1244 | mext_double_up_read(orig_inode, donor_inode); | ||
1245 | if (ret1) | 1220 | if (ret1) |
1246 | goto out; | 1221 | goto out; |
1247 | 1222 | ||
@@ -1355,36 +1330,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1355 | seq_start = le32_to_cpu(ext_cur->ee_block); | 1330 | seq_start = le32_to_cpu(ext_cur->ee_block); |
1356 | rest_blocks = seq_blocks; | 1331 | rest_blocks = seq_blocks; |
1357 | 1332 | ||
1358 | /* Discard preallocations of two inodes */ | 1333 | /* |
1359 | down_write(&EXT4_I(orig_inode)->i_data_sem); | 1334 | * Up semaphore to avoid following problems: |
1360 | ext4_discard_preallocations(orig_inode); | 1335 | * a. transaction deadlock among ext4_journal_start, |
1361 | up_write(&EXT4_I(orig_inode)->i_data_sem); | 1336 | * ->write_begin via pagefault, and jbd2_journal_commit |
1362 | 1337 | * b. racing with ->readpage, ->write_begin, and ext4_get_block | |
1363 | down_write(&EXT4_I(donor_inode)->i_data_sem); | 1338 | * in move_extent_per_page |
1364 | ext4_discard_preallocations(donor_inode); | 1339 | */ |
1365 | up_write(&EXT4_I(donor_inode)->i_data_sem); | 1340 | double_up_write_data_sem(orig_inode, donor_inode); |
1366 | 1341 | ||
1367 | while (orig_page_offset <= seq_end_page) { | 1342 | while (orig_page_offset <= seq_end_page) { |
1368 | 1343 | ||
1369 | /* Swap original branches with new branches */ | 1344 | /* Swap original branches with new branches */ |
1370 | ret1 = move_extent_per_page(o_filp, donor_inode, | 1345 | block_len_in_page = move_extent_per_page( |
1346 | o_filp, donor_inode, | ||
1371 | orig_page_offset, | 1347 | orig_page_offset, |
1372 | data_offset_in_page, | 1348 | data_offset_in_page, |
1373 | block_len_in_page, uninit); | 1349 | block_len_in_page, uninit, |
1374 | if (ret1 < 0) | 1350 | &ret1); |
1375 | goto out; | 1351 | |
1376 | orig_page_offset++; | ||
1377 | /* Count how many blocks we have exchanged */ | 1352 | /* Count how many blocks we have exchanged */ |
1378 | *moved_len += block_len_in_page; | 1353 | *moved_len += block_len_in_page; |
1354 | if (ret1 < 0) | ||
1355 | break; | ||
1379 | if (*moved_len > len) { | 1356 | if (*moved_len > len) { |
1380 | ext4_error(orig_inode->i_sb, __func__, | 1357 | ext4_error(orig_inode->i_sb, |
1381 | "We replaced blocks too much! " | 1358 | "We replaced blocks too much! " |
1382 | "sum of replaced: %llu requested: %llu", | 1359 | "sum of replaced: %llu requested: %llu", |
1383 | *moved_len, len); | 1360 | *moved_len, len); |
1384 | ret1 = -EIO; | 1361 | ret1 = -EIO; |
1385 | goto out; | 1362 | break; |
1386 | } | 1363 | } |
1387 | 1364 | ||
1365 | orig_page_offset++; | ||
1388 | data_offset_in_page = 0; | 1366 | data_offset_in_page = 0; |
1389 | rest_blocks -= block_len_in_page; | 1367 | rest_blocks -= block_len_in_page; |
1390 | if (rest_blocks > blocks_per_page) | 1368 | if (rest_blocks > blocks_per_page) |
@@ -1393,6 +1371,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1393 | block_len_in_page = rest_blocks; | 1371 | block_len_in_page = rest_blocks; |
1394 | } | 1372 | } |
1395 | 1373 | ||
1374 | double_down_write_data_sem(orig_inode, donor_inode); | ||
1375 | if (ret1 < 0) | ||
1376 | break; | ||
1377 | |||
1396 | /* Decrease buffer counter */ | 1378 | /* Decrease buffer counter */ |
1397 | if (holecheck_path) | 1379 | if (holecheck_path) |
1398 | ext4_ext_drop_refs(holecheck_path); | 1380 | ext4_ext_drop_refs(holecheck_path); |
@@ -1414,6 +1396,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1414 | 1396 | ||
1415 | } | 1397 | } |
1416 | out: | 1398 | out: |
1399 | if (*moved_len) { | ||
1400 | ext4_discard_preallocations(orig_inode); | ||
1401 | ext4_discard_preallocations(donor_inode); | ||
1402 | } | ||
1403 | |||
1417 | if (orig_path) { | 1404 | if (orig_path) { |
1418 | ext4_ext_drop_refs(orig_path); | 1405 | ext4_ext_drop_refs(orig_path); |
1419 | kfree(orig_path); | 1406 | kfree(orig_path); |
@@ -1422,7 +1409,7 @@ out: | |||
1422 | ext4_ext_drop_refs(holecheck_path); | 1409 | ext4_ext_drop_refs(holecheck_path); |
1423 | kfree(holecheck_path); | 1410 | kfree(holecheck_path); |
1424 | } | 1411 | } |
1425 | 1412 | double_up_write_data_sem(orig_inode, donor_inode); | |
1426 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); | 1413 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); |
1427 | 1414 | ||
1428 | if (ret1) | 1415 | if (ret1) |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6d2c1b897fc7..0c070fabd108 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -383,8 +383,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
383 | if (root->info.hash_version != DX_HASH_TEA && | 383 | if (root->info.hash_version != DX_HASH_TEA && |
384 | root->info.hash_version != DX_HASH_HALF_MD4 && | 384 | root->info.hash_version != DX_HASH_HALF_MD4 && |
385 | root->info.hash_version != DX_HASH_LEGACY) { | 385 | root->info.hash_version != DX_HASH_LEGACY) { |
386 | ext4_warning(dir->i_sb, __func__, | 386 | ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", |
387 | "Unrecognised inode hash code %d", | ||
388 | root->info.hash_version); | 387 | root->info.hash_version); |
389 | brelse(bh); | 388 | brelse(bh); |
390 | *err = ERR_BAD_DX_DIR; | 389 | *err = ERR_BAD_DX_DIR; |
@@ -399,8 +398,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
399 | hash = hinfo->hash; | 398 | hash = hinfo->hash; |
400 | 399 | ||
401 | if (root->info.unused_flags & 1) { | 400 | if (root->info.unused_flags & 1) { |
402 | ext4_warning(dir->i_sb, __func__, | 401 | ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", |
403 | "Unimplemented inode hash flags: %#06x", | ||
404 | root->info.unused_flags); | 402 | root->info.unused_flags); |
405 | brelse(bh); | 403 | brelse(bh); |
406 | *err = ERR_BAD_DX_DIR; | 404 | *err = ERR_BAD_DX_DIR; |
@@ -408,8 +406,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
408 | } | 406 | } |
409 | 407 | ||
410 | if ((indirect = root->info.indirect_levels) > 1) { | 408 | if ((indirect = root->info.indirect_levels) > 1) { |
411 | ext4_warning(dir->i_sb, __func__, | 409 | ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", |
412 | "Unimplemented inode hash depth: %#06x", | ||
413 | root->info.indirect_levels); | 410 | root->info.indirect_levels); |
414 | brelse(bh); | 411 | brelse(bh); |
415 | *err = ERR_BAD_DX_DIR; | 412 | *err = ERR_BAD_DX_DIR; |
@@ -421,8 +418,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
421 | 418 | ||
422 | if (dx_get_limit(entries) != dx_root_limit(dir, | 419 | if (dx_get_limit(entries) != dx_root_limit(dir, |
423 | root->info.info_length)) { | 420 | root->info.info_length)) { |
424 | ext4_warning(dir->i_sb, __func__, | 421 | ext4_warning(dir->i_sb, "dx entry: limit != root limit"); |
425 | "dx entry: limit != root limit"); | ||
426 | brelse(bh); | 422 | brelse(bh); |
427 | *err = ERR_BAD_DX_DIR; | 423 | *err = ERR_BAD_DX_DIR; |
428 | goto fail; | 424 | goto fail; |
@@ -433,7 +429,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
433 | { | 429 | { |
434 | count = dx_get_count(entries); | 430 | count = dx_get_count(entries); |
435 | if (!count || count > dx_get_limit(entries)) { | 431 | if (!count || count > dx_get_limit(entries)) { |
436 | ext4_warning(dir->i_sb, __func__, | 432 | ext4_warning(dir->i_sb, |
437 | "dx entry: no count or count > limit"); | 433 | "dx entry: no count or count > limit"); |
438 | brelse(bh); | 434 | brelse(bh); |
439 | *err = ERR_BAD_DX_DIR; | 435 | *err = ERR_BAD_DX_DIR; |
@@ -478,7 +474,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
478 | goto fail2; | 474 | goto fail2; |
479 | at = entries = ((struct dx_node *) bh->b_data)->entries; | 475 | at = entries = ((struct dx_node *) bh->b_data)->entries; |
480 | if (dx_get_limit(entries) != dx_node_limit (dir)) { | 476 | if (dx_get_limit(entries) != dx_node_limit (dir)) { |
481 | ext4_warning(dir->i_sb, __func__, | 477 | ext4_warning(dir->i_sb, |
482 | "dx entry: limit != node limit"); | 478 | "dx entry: limit != node limit"); |
483 | brelse(bh); | 479 | brelse(bh); |
484 | *err = ERR_BAD_DX_DIR; | 480 | *err = ERR_BAD_DX_DIR; |
@@ -494,7 +490,7 @@ fail2: | |||
494 | } | 490 | } |
495 | fail: | 491 | fail: |
496 | if (*err == ERR_BAD_DX_DIR) | 492 | if (*err == ERR_BAD_DX_DIR) |
497 | ext4_warning(dir->i_sb, __func__, | 493 | ext4_warning(dir->i_sb, |
498 | "Corrupt dir inode %ld, running e2fsck is " | 494 | "Corrupt dir inode %ld, running e2fsck is " |
499 | "recommended.", dir->i_ino); | 495 | "recommended.", dir->i_ino); |
500 | return NULL; | 496 | return NULL; |
@@ -947,9 +943,8 @@ restart: | |||
947 | wait_on_buffer(bh); | 943 | wait_on_buffer(bh); |
948 | if (!buffer_uptodate(bh)) { | 944 | if (!buffer_uptodate(bh)) { |
949 | /* read error, skip block & hope for the best */ | 945 | /* read error, skip block & hope for the best */ |
950 | ext4_error(sb, __func__, "reading directory #%lu " | 946 | ext4_error(sb, "reading directory #%lu offset %lu", |
951 | "offset %lu", dir->i_ino, | 947 | dir->i_ino, (unsigned long)block); |
952 | (unsigned long)block); | ||
953 | brelse(bh); | 948 | brelse(bh); |
954 | goto next; | 949 | goto next; |
955 | } | 950 | } |
@@ -1041,7 +1036,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
1041 | retval = ext4_htree_next_block(dir, hash, frame, | 1036 | retval = ext4_htree_next_block(dir, hash, frame, |
1042 | frames, NULL); | 1037 | frames, NULL); |
1043 | if (retval < 0) { | 1038 | if (retval < 0) { |
1044 | ext4_warning(sb, __func__, | 1039 | ext4_warning(sb, |
1045 | "error reading index page in directory #%lu", | 1040 | "error reading index page in directory #%lu", |
1046 | dir->i_ino); | 1041 | dir->i_ino); |
1047 | *err = retval; | 1042 | *err = retval; |
@@ -1071,14 +1066,13 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1071 | __u32 ino = le32_to_cpu(de->inode); | 1066 | __u32 ino = le32_to_cpu(de->inode); |
1072 | brelse(bh); | 1067 | brelse(bh); |
1073 | if (!ext4_valid_inum(dir->i_sb, ino)) { | 1068 | if (!ext4_valid_inum(dir->i_sb, ino)) { |
1074 | ext4_error(dir->i_sb, "ext4_lookup", | 1069 | ext4_error(dir->i_sb, "bad inode number: %u", ino); |
1075 | "bad inode number: %u", ino); | ||
1076 | return ERR_PTR(-EIO); | 1070 | return ERR_PTR(-EIO); |
1077 | } | 1071 | } |
1078 | inode = ext4_iget(dir->i_sb, ino); | 1072 | inode = ext4_iget(dir->i_sb, ino); |
1079 | if (unlikely(IS_ERR(inode))) { | 1073 | if (unlikely(IS_ERR(inode))) { |
1080 | if (PTR_ERR(inode) == -ESTALE) { | 1074 | if (PTR_ERR(inode) == -ESTALE) { |
1081 | ext4_error(dir->i_sb, __func__, | 1075 | ext4_error(dir->i_sb, |
1082 | "deleted inode referenced: %u", | 1076 | "deleted inode referenced: %u", |
1083 | ino); | 1077 | ino); |
1084 | return ERR_PTR(-EIO); | 1078 | return ERR_PTR(-EIO); |
@@ -1110,7 +1104,7 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1110 | brelse(bh); | 1104 | brelse(bh); |
1111 | 1105 | ||
1112 | if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { | 1106 | if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { |
1113 | ext4_error(child->d_inode->i_sb, "ext4_get_parent", | 1107 | ext4_error(child->d_inode->i_sb, |
1114 | "bad inode number: %u", ino); | 1108 | "bad inode number: %u", ino); |
1115 | return ERR_PTR(-EIO); | 1109 | return ERR_PTR(-EIO); |
1116 | } | 1110 | } |
@@ -1292,9 +1286,6 @@ errout: | |||
1292 | * add_dirent_to_buf will attempt search the directory block for | 1286 | * add_dirent_to_buf will attempt search the directory block for |
1293 | * space. It will return -ENOSPC if no space is available, and -EIO | 1287 | * space. It will return -ENOSPC if no space is available, and -EIO |
1294 | * and -EEXIST if directory entry already exists. | 1288 | * and -EEXIST if directory entry already exists. |
1295 | * | ||
1296 | * NOTE! bh is NOT released in the case where ENOSPC is returned. In | ||
1297 | * all other cases bh is released. | ||
1298 | */ | 1289 | */ |
1299 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | 1290 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, |
1300 | struct inode *inode, struct ext4_dir_entry_2 *de, | 1291 | struct inode *inode, struct ext4_dir_entry_2 *de, |
@@ -1315,14 +1306,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1315 | top = bh->b_data + blocksize - reclen; | 1306 | top = bh->b_data + blocksize - reclen; |
1316 | while ((char *) de <= top) { | 1307 | while ((char *) de <= top) { |
1317 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, | 1308 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, |
1318 | bh, offset)) { | 1309 | bh, offset)) |
1319 | brelse(bh); | ||
1320 | return -EIO; | 1310 | return -EIO; |
1321 | } | 1311 | if (ext4_match(namelen, name, de)) |
1322 | if (ext4_match(namelen, name, de)) { | ||
1323 | brelse(bh); | ||
1324 | return -EEXIST; | 1312 | return -EEXIST; |
1325 | } | ||
1326 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1313 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
1327 | rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); | 1314 | rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); |
1328 | if ((de->inode? rlen - nlen: rlen) >= reclen) | 1315 | if ((de->inode? rlen - nlen: rlen) >= reclen) |
@@ -1337,7 +1324,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1337 | err = ext4_journal_get_write_access(handle, bh); | 1324 | err = ext4_journal_get_write_access(handle, bh); |
1338 | if (err) { | 1325 | if (err) { |
1339 | ext4_std_error(dir->i_sb, err); | 1326 | ext4_std_error(dir->i_sb, err); |
1340 | brelse(bh); | ||
1341 | return err; | 1327 | return err; |
1342 | } | 1328 | } |
1343 | 1329 | ||
@@ -1377,7 +1363,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1377 | err = ext4_handle_dirty_metadata(handle, dir, bh); | 1363 | err = ext4_handle_dirty_metadata(handle, dir, bh); |
1378 | if (err) | 1364 | if (err) |
1379 | ext4_std_error(dir->i_sb, err); | 1365 | ext4_std_error(dir->i_sb, err); |
1380 | brelse(bh); | ||
1381 | return 0; | 1366 | return 0; |
1382 | } | 1367 | } |
1383 | 1368 | ||
@@ -1419,7 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1419 | de = (struct ext4_dir_entry_2 *)((char *)fde + | 1404 | de = (struct ext4_dir_entry_2 *)((char *)fde + |
1420 | ext4_rec_len_from_disk(fde->rec_len, blocksize)); | 1405 | ext4_rec_len_from_disk(fde->rec_len, blocksize)); |
1421 | if ((char *) de >= (((char *) root) + blocksize)) { | 1406 | if ((char *) de >= (((char *) root) + blocksize)) { |
1422 | ext4_error(dir->i_sb, __func__, | 1407 | ext4_error(dir->i_sb, |
1423 | "invalid rec_len for '..' in inode %lu", | 1408 | "invalid rec_len for '..' in inode %lu", |
1424 | dir->i_ino); | 1409 | dir->i_ino); |
1425 | brelse(bh); | 1410 | brelse(bh); |
@@ -1471,7 +1456,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1471 | if (!(de)) | 1456 | if (!(de)) |
1472 | return retval; | 1457 | return retval; |
1473 | 1458 | ||
1474 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1459 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1460 | brelse(bh); | ||
1461 | return retval; | ||
1475 | } | 1462 | } |
1476 | 1463 | ||
1477 | /* | 1464 | /* |
@@ -1514,8 +1501,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1514 | if(!bh) | 1501 | if(!bh) |
1515 | return retval; | 1502 | return retval; |
1516 | retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); | 1503 | retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); |
1517 | if (retval != -ENOSPC) | 1504 | if (retval != -ENOSPC) { |
1505 | brelse(bh); | ||
1518 | return retval; | 1506 | return retval; |
1507 | } | ||
1519 | 1508 | ||
1520 | if (blocks == 1 && !dx_fallback && | 1509 | if (blocks == 1 && !dx_fallback && |
1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) | 1510 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) |
@@ -1528,7 +1517,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1528 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1517 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1529 | de->inode = 0; | 1518 | de->inode = 0; |
1530 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); | 1519 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); |
1531 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1520 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1521 | brelse(bh); | ||
1522 | return retval; | ||
1532 | } | 1523 | } |
1533 | 1524 | ||
1534 | /* | 1525 | /* |
@@ -1561,10 +1552,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1561 | goto journal_error; | 1552 | goto journal_error; |
1562 | 1553 | ||
1563 | err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); | 1554 | err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); |
1564 | if (err != -ENOSPC) { | 1555 | if (err != -ENOSPC) |
1565 | bh = NULL; | ||
1566 | goto cleanup; | 1556 | goto cleanup; |
1567 | } | ||
1568 | 1557 | ||
1569 | /* Block full, should compress but for now just split */ | 1558 | /* Block full, should compress but for now just split */ |
1570 | dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", | 1559 | dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", |
@@ -1580,8 +1569,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1580 | 1569 | ||
1581 | if (levels && (dx_get_count(frames->entries) == | 1570 | if (levels && (dx_get_count(frames->entries) == |
1582 | dx_get_limit(frames->entries))) { | 1571 | dx_get_limit(frames->entries))) { |
1583 | ext4_warning(sb, __func__, | 1572 | ext4_warning(sb, "Directory index full!"); |
1584 | "Directory index full!"); | ||
1585 | err = -ENOSPC; | 1573 | err = -ENOSPC; |
1586 | goto cleanup; | 1574 | goto cleanup; |
1587 | } | 1575 | } |
@@ -1657,7 +1645,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1657 | if (!de) | 1645 | if (!de) |
1658 | goto cleanup; | 1646 | goto cleanup; |
1659 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1647 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1660 | bh = NULL; | ||
1661 | goto cleanup; | 1648 | goto cleanup; |
1662 | 1649 | ||
1663 | journal_error: | 1650 | journal_error: |
@@ -1772,10 +1759,12 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1772 | struct inode *inode; | 1759 | struct inode *inode; |
1773 | int err, retries = 0; | 1760 | int err, retries = 0; |
1774 | 1761 | ||
1762 | dquot_initialize(dir); | ||
1763 | |||
1775 | retry: | 1764 | retry: |
1776 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 1765 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
1777 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | 1766 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + |
1778 | 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); | 1767 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); |
1779 | if (IS_ERR(handle)) | 1768 | if (IS_ERR(handle)) |
1780 | return PTR_ERR(handle); | 1769 | return PTR_ERR(handle); |
1781 | 1770 | ||
@@ -1806,10 +1795,12 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, | |||
1806 | if (!new_valid_dev(rdev)) | 1795 | if (!new_valid_dev(rdev)) |
1807 | return -EINVAL; | 1796 | return -EINVAL; |
1808 | 1797 | ||
1798 | dquot_initialize(dir); | ||
1799 | |||
1809 | retry: | 1800 | retry: |
1810 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 1801 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
1811 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | 1802 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + |
1812 | 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); | 1803 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); |
1813 | if (IS_ERR(handle)) | 1804 | if (IS_ERR(handle)) |
1814 | return PTR_ERR(handle); | 1805 | return PTR_ERR(handle); |
1815 | 1806 | ||
@@ -1843,10 +1834,12 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1843 | if (EXT4_DIR_LINK_MAX(dir)) | 1834 | if (EXT4_DIR_LINK_MAX(dir)) |
1844 | return -EMLINK; | 1835 | return -EMLINK; |
1845 | 1836 | ||
1837 | dquot_initialize(dir); | ||
1838 | |||
1846 | retry: | 1839 | retry: |
1847 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 1840 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
1848 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | 1841 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + |
1849 | 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); | 1842 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); |
1850 | if (IS_ERR(handle)) | 1843 | if (IS_ERR(handle)) |
1851 | return PTR_ERR(handle); | 1844 | return PTR_ERR(handle); |
1852 | 1845 | ||
@@ -1922,11 +1915,11 @@ static int empty_dir(struct inode *inode) | |||
1922 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || | 1915 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || |
1923 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { | 1916 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { |
1924 | if (err) | 1917 | if (err) |
1925 | ext4_error(inode->i_sb, __func__, | 1918 | ext4_error(inode->i_sb, |
1926 | "error %d reading directory #%lu offset 0", | 1919 | "error %d reading directory #%lu offset 0", |
1927 | err, inode->i_ino); | 1920 | err, inode->i_ino); |
1928 | else | 1921 | else |
1929 | ext4_warning(inode->i_sb, __func__, | 1922 | ext4_warning(inode->i_sb, |
1930 | "bad directory (dir #%lu) - no data block", | 1923 | "bad directory (dir #%lu) - no data block", |
1931 | inode->i_ino); | 1924 | inode->i_ino); |
1932 | return 1; | 1925 | return 1; |
@@ -1937,7 +1930,7 @@ static int empty_dir(struct inode *inode) | |||
1937 | !le32_to_cpu(de1->inode) || | 1930 | !le32_to_cpu(de1->inode) || |
1938 | strcmp(".", de->name) || | 1931 | strcmp(".", de->name) || |
1939 | strcmp("..", de1->name)) { | 1932 | strcmp("..", de1->name)) { |
1940 | ext4_warning(inode->i_sb, "empty_dir", | 1933 | ext4_warning(inode->i_sb, |
1941 | "bad directory (dir #%lu) - no `.' or `..'", | 1934 | "bad directory (dir #%lu) - no `.' or `..'", |
1942 | inode->i_ino); | 1935 | inode->i_ino); |
1943 | brelse(bh); | 1936 | brelse(bh); |
@@ -1955,7 +1948,7 @@ static int empty_dir(struct inode *inode) | |||
1955 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); | 1948 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); |
1956 | if (!bh) { | 1949 | if (!bh) { |
1957 | if (err) | 1950 | if (err) |
1958 | ext4_error(sb, __func__, | 1951 | ext4_error(sb, |
1959 | "error %d reading directory" | 1952 | "error %d reading directory" |
1960 | " #%lu offset %u", | 1953 | " #%lu offset %u", |
1961 | err, inode->i_ino, offset); | 1954 | err, inode->i_ino, offset); |
@@ -2026,11 +2019,18 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2026 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 2019 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
2027 | if (err) | 2020 | if (err) |
2028 | goto out_unlock; | 2021 | goto out_unlock; |
2022 | /* | ||
2023 | * Due to previous errors inode may be already a part of on-disk | ||
2024 | * orphan list. If so skip on-disk list modification. | ||
2025 | */ | ||
2026 | if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= | ||
2027 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) | ||
2028 | goto mem_insert; | ||
2029 | 2029 | ||
2030 | /* Insert this inode at the head of the on-disk orphan list... */ | 2030 | /* Insert this inode at the head of the on-disk orphan list... */ |
2031 | NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); | 2031 | NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); |
2032 | EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); | 2032 | EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); |
2033 | err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); | 2033 | err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); |
2034 | rc = ext4_mark_iloc_dirty(handle, inode, &iloc); | 2034 | rc = ext4_mark_iloc_dirty(handle, inode, &iloc); |
2035 | if (!err) | 2035 | if (!err) |
2036 | err = rc; | 2036 | err = rc; |
@@ -2043,6 +2043,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2043 | * | 2043 | * |
2044 | * This is safe: on error we're going to ignore the orphan list | 2044 | * This is safe: on error we're going to ignore the orphan list |
2045 | * anyway on the next recovery. */ | 2045 | * anyway on the next recovery. */ |
2046 | mem_insert: | ||
2046 | if (!err) | 2047 | if (!err) |
2047 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 2048 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
2048 | 2049 | ||
@@ -2102,7 +2103,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2102 | if (err) | 2103 | if (err) |
2103 | goto out_brelse; | 2104 | goto out_brelse; |
2104 | sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); | 2105 | sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); |
2105 | err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh); | 2106 | err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); |
2106 | } else { | 2107 | } else { |
2107 | struct ext4_iloc iloc2; | 2108 | struct ext4_iloc iloc2; |
2108 | struct inode *i_prev = | 2109 | struct inode *i_prev = |
@@ -2142,7 +2143,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) | |||
2142 | 2143 | ||
2143 | /* Initialize quotas before so that eventual writes go in | 2144 | /* Initialize quotas before so that eventual writes go in |
2144 | * separate transaction */ | 2145 | * separate transaction */ |
2145 | vfs_dq_init(dentry->d_inode); | 2146 | dquot_initialize(dir); |
2147 | dquot_initialize(dentry->d_inode); | ||
2148 | |||
2146 | handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); | 2149 | handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); |
2147 | if (IS_ERR(handle)) | 2150 | if (IS_ERR(handle)) |
2148 | return PTR_ERR(handle); | 2151 | return PTR_ERR(handle); |
@@ -2169,7 +2172,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) | |||
2169 | if (retval) | 2172 | if (retval) |
2170 | goto end_rmdir; | 2173 | goto end_rmdir; |
2171 | if (!EXT4_DIR_LINK_EMPTY(inode)) | 2174 | if (!EXT4_DIR_LINK_EMPTY(inode)) |
2172 | ext4_warning(inode->i_sb, "ext4_rmdir", | 2175 | ext4_warning(inode->i_sb, |
2173 | "empty directory has too many links (%d)", | 2176 | "empty directory has too many links (%d)", |
2174 | inode->i_nlink); | 2177 | inode->i_nlink); |
2175 | inode->i_version++; | 2178 | inode->i_version++; |
@@ -2201,7 +2204,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2201 | 2204 | ||
2202 | /* Initialize quotas before so that eventual writes go | 2205 | /* Initialize quotas before so that eventual writes go |
2203 | * in separate transaction */ | 2206 | * in separate transaction */ |
2204 | vfs_dq_init(dentry->d_inode); | 2207 | dquot_initialize(dir); |
2208 | dquot_initialize(dentry->d_inode); | ||
2209 | |||
2205 | handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); | 2210 | handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); |
2206 | if (IS_ERR(handle)) | 2211 | if (IS_ERR(handle)) |
2207 | return PTR_ERR(handle); | 2212 | return PTR_ERR(handle); |
@@ -2221,7 +2226,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2221 | goto end_unlink; | 2226 | goto end_unlink; |
2222 | 2227 | ||
2223 | if (!inode->i_nlink) { | 2228 | if (!inode->i_nlink) { |
2224 | ext4_warning(inode->i_sb, "ext4_unlink", | 2229 | ext4_warning(inode->i_sb, |
2225 | "Deleting nonexistent file (%lu), %d", | 2230 | "Deleting nonexistent file (%lu), %d", |
2226 | inode->i_ino, inode->i_nlink); | 2231 | inode->i_ino, inode->i_nlink); |
2227 | inode->i_nlink = 1; | 2232 | inode->i_nlink = 1; |
@@ -2256,10 +2261,12 @@ static int ext4_symlink(struct inode *dir, | |||
2256 | if (l > dir->i_sb->s_blocksize) | 2261 | if (l > dir->i_sb->s_blocksize) |
2257 | return -ENAMETOOLONG; | 2262 | return -ENAMETOOLONG; |
2258 | 2263 | ||
2264 | dquot_initialize(dir); | ||
2265 | |||
2259 | retry: | 2266 | retry: |
2260 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 2267 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
2261 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + | 2268 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + |
2262 | 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); | 2269 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); |
2263 | if (IS_ERR(handle)) | 2270 | if (IS_ERR(handle)) |
2264 | return PTR_ERR(handle); | 2271 | return PTR_ERR(handle); |
2265 | 2272 | ||
@@ -2314,6 +2321,8 @@ static int ext4_link(struct dentry *old_dentry, | |||
2314 | if (inode->i_nlink >= EXT4_LINK_MAX) | 2321 | if (inode->i_nlink >= EXT4_LINK_MAX) |
2315 | return -EMLINK; | 2322 | return -EMLINK; |
2316 | 2323 | ||
2324 | dquot_initialize(dir); | ||
2325 | |||
2317 | /* | 2326 | /* |
2318 | * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing | 2327 | * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing |
2319 | * otherwise has the potential to corrupt the orphan inode list. | 2328 | * otherwise has the potential to corrupt the orphan inode list. |
@@ -2364,12 +2373,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2364 | struct ext4_dir_entry_2 *old_de, *new_de; | 2373 | struct ext4_dir_entry_2 *old_de, *new_de; |
2365 | int retval, force_da_alloc = 0; | 2374 | int retval, force_da_alloc = 0; |
2366 | 2375 | ||
2376 | dquot_initialize(old_dir); | ||
2377 | dquot_initialize(new_dir); | ||
2378 | |||
2367 | old_bh = new_bh = dir_bh = NULL; | 2379 | old_bh = new_bh = dir_bh = NULL; |
2368 | 2380 | ||
2369 | /* Initialize quotas before so that eventual writes go | 2381 | /* Initialize quotas before so that eventual writes go |
2370 | * in separate transaction */ | 2382 | * in separate transaction */ |
2371 | if (new_dentry->d_inode) | 2383 | if (new_dentry->d_inode) |
2372 | vfs_dq_init(new_dentry->d_inode); | 2384 | dquot_initialize(new_dentry->d_inode); |
2373 | handle = ext4_journal_start(old_dir, 2 * | 2385 | handle = ext4_journal_start(old_dir, 2 * |
2374 | EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + | 2386 | EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + |
2375 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); | 2387 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); |
@@ -2468,7 +2480,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2468 | } | 2480 | } |
2469 | } | 2481 | } |
2470 | if (retval) { | 2482 | if (retval) { |
2471 | ext4_warning(old_dir->i_sb, "ext4_rename", | 2483 | ext4_warning(old_dir->i_sb, |
2472 | "Deleting old file (%lu), %d, error=%d", | 2484 | "Deleting old file (%lu), %d, error=%d", |
2473 | old_dir->i_ino, old_dir->i_nlink, retval); | 2485 | old_dir->i_ino, old_dir->i_nlink, retval); |
2474 | } | 2486 | } |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3cfc343c41b5..5692c48754a0 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -48,65 +48,54 @@ static int verify_group_input(struct super_block *sb, | |||
48 | 48 | ||
49 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); | 49 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); |
50 | if (group != sbi->s_groups_count) | 50 | if (group != sbi->s_groups_count) |
51 | ext4_warning(sb, __func__, | 51 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", |
52 | "Cannot add at group %u (only %u groups)", | ||
53 | input->group, sbi->s_groups_count); | 52 | input->group, sbi->s_groups_count); |
54 | else if (offset != 0) | 53 | else if (offset != 0) |
55 | ext4_warning(sb, __func__, "Last group not full"); | 54 | ext4_warning(sb, "Last group not full"); |
56 | else if (input->reserved_blocks > input->blocks_count / 5) | 55 | else if (input->reserved_blocks > input->blocks_count / 5) |
57 | ext4_warning(sb, __func__, "Reserved blocks too high (%u)", | 56 | ext4_warning(sb, "Reserved blocks too high (%u)", |
58 | input->reserved_blocks); | 57 | input->reserved_blocks); |
59 | else if (free_blocks_count < 0) | 58 | else if (free_blocks_count < 0) |
60 | ext4_warning(sb, __func__, "Bad blocks count %u", | 59 | ext4_warning(sb, "Bad blocks count %u", |
61 | input->blocks_count); | 60 | input->blocks_count); |
62 | else if (!(bh = sb_bread(sb, end - 1))) | 61 | else if (!(bh = sb_bread(sb, end - 1))) |
63 | ext4_warning(sb, __func__, | 62 | ext4_warning(sb, "Cannot read last block (%llu)", |
64 | "Cannot read last block (%llu)", | ||
65 | end - 1); | 63 | end - 1); |
66 | else if (outside(input->block_bitmap, start, end)) | 64 | else if (outside(input->block_bitmap, start, end)) |
67 | ext4_warning(sb, __func__, | 65 | ext4_warning(sb, "Block bitmap not in group (block %llu)", |
68 | "Block bitmap not in group (block %llu)", | ||
69 | (unsigned long long)input->block_bitmap); | 66 | (unsigned long long)input->block_bitmap); |
70 | else if (outside(input->inode_bitmap, start, end)) | 67 | else if (outside(input->inode_bitmap, start, end)) |
71 | ext4_warning(sb, __func__, | 68 | ext4_warning(sb, "Inode bitmap not in group (block %llu)", |
72 | "Inode bitmap not in group (block %llu)", | ||
73 | (unsigned long long)input->inode_bitmap); | 69 | (unsigned long long)input->inode_bitmap); |
74 | else if (outside(input->inode_table, start, end) || | 70 | else if (outside(input->inode_table, start, end) || |
75 | outside(itend - 1, start, end)) | 71 | outside(itend - 1, start, end)) |
76 | ext4_warning(sb, __func__, | 72 | ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)", |
77 | "Inode table not in group (blocks %llu-%llu)", | ||
78 | (unsigned long long)input->inode_table, itend - 1); | 73 | (unsigned long long)input->inode_table, itend - 1); |
79 | else if (input->inode_bitmap == input->block_bitmap) | 74 | else if (input->inode_bitmap == input->block_bitmap) |
80 | ext4_warning(sb, __func__, | 75 | ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)", |
81 | "Block bitmap same as inode bitmap (%llu)", | ||
82 | (unsigned long long)input->block_bitmap); | 76 | (unsigned long long)input->block_bitmap); |
83 | else if (inside(input->block_bitmap, input->inode_table, itend)) | 77 | else if (inside(input->block_bitmap, input->inode_table, itend)) |
84 | ext4_warning(sb, __func__, | 78 | ext4_warning(sb, "Block bitmap (%llu) in inode table " |
85 | "Block bitmap (%llu) in inode table (%llu-%llu)", | 79 | "(%llu-%llu)", |
86 | (unsigned long long)input->block_bitmap, | 80 | (unsigned long long)input->block_bitmap, |
87 | (unsigned long long)input->inode_table, itend - 1); | 81 | (unsigned long long)input->inode_table, itend - 1); |
88 | else if (inside(input->inode_bitmap, input->inode_table, itend)) | 82 | else if (inside(input->inode_bitmap, input->inode_table, itend)) |
89 | ext4_warning(sb, __func__, | 83 | ext4_warning(sb, "Inode bitmap (%llu) in inode table " |
90 | "Inode bitmap (%llu) in inode table (%llu-%llu)", | 84 | "(%llu-%llu)", |
91 | (unsigned long long)input->inode_bitmap, | 85 | (unsigned long long)input->inode_bitmap, |
92 | (unsigned long long)input->inode_table, itend - 1); | 86 | (unsigned long long)input->inode_table, itend - 1); |
93 | else if (inside(input->block_bitmap, start, metaend)) | 87 | else if (inside(input->block_bitmap, start, metaend)) |
94 | ext4_warning(sb, __func__, | 88 | ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)", |
95 | "Block bitmap (%llu) in GDT table" | ||
96 | " (%llu-%llu)", | ||
97 | (unsigned long long)input->block_bitmap, | 89 | (unsigned long long)input->block_bitmap, |
98 | start, metaend - 1); | 90 | start, metaend - 1); |
99 | else if (inside(input->inode_bitmap, start, metaend)) | 91 | else if (inside(input->inode_bitmap, start, metaend)) |
100 | ext4_warning(sb, __func__, | 92 | ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)", |
101 | "Inode bitmap (%llu) in GDT table" | ||
102 | " (%llu-%llu)", | ||
103 | (unsigned long long)input->inode_bitmap, | 93 | (unsigned long long)input->inode_bitmap, |
104 | start, metaend - 1); | 94 | start, metaend - 1); |
105 | else if (inside(input->inode_table, start, metaend) || | 95 | else if (inside(input->inode_table, start, metaend) || |
106 | inside(itend - 1, start, metaend)) | 96 | inside(itend - 1, start, metaend)) |
107 | ext4_warning(sb, __func__, | 97 | ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table " |
108 | "Inode table (%llu-%llu) overlaps" | 98 | "(%llu-%llu)", |
109 | "GDT table (%llu-%llu)", | ||
110 | (unsigned long long)input->inode_table, | 99 | (unsigned long long)input->inode_table, |
111 | itend - 1, start, metaend - 1); | 100 | itend - 1, start, metaend - 1); |
112 | else | 101 | else |
@@ -247,7 +236,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
247 | goto exit_bh; | 236 | goto exit_bh; |
248 | 237 | ||
249 | if (IS_ERR(gdb = bclean(handle, sb, block))) { | 238 | if (IS_ERR(gdb = bclean(handle, sb, block))) { |
250 | err = PTR_ERR(bh); | 239 | err = PTR_ERR(gdb); |
251 | goto exit_bh; | 240 | goto exit_bh; |
252 | } | 241 | } |
253 | ext4_handle_dirty_metadata(handle, NULL, gdb); | 242 | ext4_handle_dirty_metadata(handle, NULL, gdb); |
@@ -364,8 +353,7 @@ static int verify_reserved_gdb(struct super_block *sb, | |||
364 | while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { | 353 | while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { |
365 | if (le32_to_cpu(*p++) != | 354 | if (le32_to_cpu(*p++) != |
366 | grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ | 355 | grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ |
367 | ext4_warning(sb, __func__, | 356 | ext4_warning(sb, "reserved GDT %llu" |
368 | "reserved GDT %llu" | ||
369 | " missing grp %d (%llu)", | 357 | " missing grp %d (%llu)", |
370 | blk, grp, | 358 | blk, grp, |
371 | grp * | 359 | grp * |
@@ -420,8 +408,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
420 | */ | 408 | */ |
421 | if (EXT4_SB(sb)->s_sbh->b_blocknr != | 409 | if (EXT4_SB(sb)->s_sbh->b_blocknr != |
422 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { | 410 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { |
423 | ext4_warning(sb, __func__, | 411 | ext4_warning(sb, "won't resize using backup superblock at %llu", |
424 | "won't resize using backup superblock at %llu", | ||
425 | (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); | 412 | (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); |
426 | return -EPERM; | 413 | return -EPERM; |
427 | } | 414 | } |
@@ -444,8 +431,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
444 | 431 | ||
445 | data = (__le32 *)dind->b_data; | 432 | data = (__le32 *)dind->b_data; |
446 | if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { | 433 | if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { |
447 | ext4_warning(sb, __func__, | 434 | ext4_warning(sb, "new group %u GDT block %llu not reserved", |
448 | "new group %u GDT block %llu not reserved", | ||
449 | input->group, gdblock); | 435 | input->group, gdblock); |
450 | err = -EINVAL; | 436 | err = -EINVAL; |
451 | goto exit_dind; | 437 | goto exit_dind; |
@@ -468,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
468 | GFP_NOFS); | 454 | GFP_NOFS); |
469 | if (!n_group_desc) { | 455 | if (!n_group_desc) { |
470 | err = -ENOMEM; | 456 | err = -ENOMEM; |
471 | ext4_warning(sb, __func__, | 457 | ext4_warning(sb, |
472 | "not enough memory for %lu groups", gdb_num + 1); | 458 | "not enough memory for %lu groups", gdb_num + 1); |
473 | goto exit_inode; | 459 | goto exit_inode; |
474 | } | 460 | } |
@@ -567,8 +553,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | |||
567 | /* Get each reserved primary GDT block and verify it holds backups */ | 553 | /* Get each reserved primary GDT block and verify it holds backups */ |
568 | for (res = 0; res < reserved_gdb; res++, blk++) { | 554 | for (res = 0; res < reserved_gdb; res++, blk++) { |
569 | if (le32_to_cpu(*data) != blk) { | 555 | if (le32_to_cpu(*data) != blk) { |
570 | ext4_warning(sb, __func__, | 556 | ext4_warning(sb, "reserved block %llu" |
571 | "reserved block %llu" | ||
572 | " not at offset %ld", | 557 | " not at offset %ld", |
573 | blk, | 558 | blk, |
574 | (long)(data - (__le32 *)dind->b_data)); | 559 | (long)(data - (__le32 *)dind->b_data)); |
@@ -713,8 +698,7 @@ static void update_backups(struct super_block *sb, | |||
713 | */ | 698 | */ |
714 | exit_err: | 699 | exit_err: |
715 | if (err) { | 700 | if (err) { |
716 | ext4_warning(sb, __func__, | 701 | ext4_warning(sb, "can't update backup for group %u (err %d), " |
717 | "can't update backup for group %u (err %d), " | ||
718 | "forcing fsck on next reboot", group, err); | 702 | "forcing fsck on next reboot", group, err); |
719 | sbi->s_mount_state &= ~EXT4_VALID_FS; | 703 | sbi->s_mount_state &= ~EXT4_VALID_FS; |
720 | sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); | 704 | sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); |
@@ -753,20 +737,19 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
753 | 737 | ||
754 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, | 738 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, |
755 | EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { | 739 | EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { |
756 | ext4_warning(sb, __func__, | 740 | ext4_warning(sb, "Can't resize non-sparse filesystem further"); |
757 | "Can't resize non-sparse filesystem further"); | ||
758 | return -EPERM; | 741 | return -EPERM; |
759 | } | 742 | } |
760 | 743 | ||
761 | if (ext4_blocks_count(es) + input->blocks_count < | 744 | if (ext4_blocks_count(es) + input->blocks_count < |
762 | ext4_blocks_count(es)) { | 745 | ext4_blocks_count(es)) { |
763 | ext4_warning(sb, __func__, "blocks_count overflow"); | 746 | ext4_warning(sb, "blocks_count overflow"); |
764 | return -EINVAL; | 747 | return -EINVAL; |
765 | } | 748 | } |
766 | 749 | ||
767 | if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < | 750 | if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < |
768 | le32_to_cpu(es->s_inodes_count)) { | 751 | le32_to_cpu(es->s_inodes_count)) { |
769 | ext4_warning(sb, __func__, "inodes_count overflow"); | 752 | ext4_warning(sb, "inodes_count overflow"); |
770 | return -EINVAL; | 753 | return -EINVAL; |
771 | } | 754 | } |
772 | 755 | ||
@@ -774,14 +757,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
774 | if (!EXT4_HAS_COMPAT_FEATURE(sb, | 757 | if (!EXT4_HAS_COMPAT_FEATURE(sb, |
775 | EXT4_FEATURE_COMPAT_RESIZE_INODE) | 758 | EXT4_FEATURE_COMPAT_RESIZE_INODE) |
776 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { | 759 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { |
777 | ext4_warning(sb, __func__, | 760 | ext4_warning(sb, |
778 | "No reserved GDT blocks, can't resize"); | 761 | "No reserved GDT blocks, can't resize"); |
779 | return -EPERM; | 762 | return -EPERM; |
780 | } | 763 | } |
781 | inode = ext4_iget(sb, EXT4_RESIZE_INO); | 764 | inode = ext4_iget(sb, EXT4_RESIZE_INO); |
782 | if (IS_ERR(inode)) { | 765 | if (IS_ERR(inode)) { |
783 | ext4_warning(sb, __func__, | 766 | ext4_warning(sb, "Error opening resize inode"); |
784 | "Error opening resize inode"); | ||
785 | return PTR_ERR(inode); | 767 | return PTR_ERR(inode); |
786 | } | 768 | } |
787 | } | 769 | } |
@@ -810,8 +792,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
810 | 792 | ||
811 | mutex_lock(&sbi->s_resize_lock); | 793 | mutex_lock(&sbi->s_resize_lock); |
812 | if (input->group != sbi->s_groups_count) { | 794 | if (input->group != sbi->s_groups_count) { |
813 | ext4_warning(sb, __func__, | 795 | ext4_warning(sb, "multiple resizers run on filesystem!"); |
814 | "multiple resizers run on filesystem!"); | ||
815 | err = -EBUSY; | 796 | err = -EBUSY; |
816 | goto exit_journal; | 797 | goto exit_journal; |
817 | } | 798 | } |
@@ -997,13 +978,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
997 | " too large to resize to %llu blocks safely\n", | 978 | " too large to resize to %llu blocks safely\n", |
998 | sb->s_id, n_blocks_count); | 979 | sb->s_id, n_blocks_count); |
999 | if (sizeof(sector_t) < 8) | 980 | if (sizeof(sector_t) < 8) |
1000 | ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled"); | 981 | ext4_warning(sb, "CONFIG_LBDAF not enabled"); |
1001 | return -EINVAL; | 982 | return -EINVAL; |
1002 | } | 983 | } |
1003 | 984 | ||
1004 | if (n_blocks_count < o_blocks_count) { | 985 | if (n_blocks_count < o_blocks_count) { |
1005 | ext4_warning(sb, __func__, | 986 | ext4_warning(sb, "can't shrink FS - resize aborted"); |
1006 | "can't shrink FS - resize aborted"); | ||
1007 | return -EBUSY; | 987 | return -EBUSY; |
1008 | } | 988 | } |
1009 | 989 | ||
@@ -1011,15 +991,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1011 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); | 991 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); |
1012 | 992 | ||
1013 | if (last == 0) { | 993 | if (last == 0) { |
1014 | ext4_warning(sb, __func__, | 994 | ext4_warning(sb, "need to use ext2online to resize further"); |
1015 | "need to use ext2online to resize further"); | ||
1016 | return -EPERM; | 995 | return -EPERM; |
1017 | } | 996 | } |
1018 | 997 | ||
1019 | add = EXT4_BLOCKS_PER_GROUP(sb) - last; | 998 | add = EXT4_BLOCKS_PER_GROUP(sb) - last; |
1020 | 999 | ||
1021 | if (o_blocks_count + add < o_blocks_count) { | 1000 | if (o_blocks_count + add < o_blocks_count) { |
1022 | ext4_warning(sb, __func__, "blocks_count overflow"); | 1001 | ext4_warning(sb, "blocks_count overflow"); |
1023 | return -EINVAL; | 1002 | return -EINVAL; |
1024 | } | 1003 | } |
1025 | 1004 | ||
@@ -1027,16 +1006,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1027 | add = n_blocks_count - o_blocks_count; | 1006 | add = n_blocks_count - o_blocks_count; |
1028 | 1007 | ||
1029 | if (o_blocks_count + add < n_blocks_count) | 1008 | if (o_blocks_count + add < n_blocks_count) |
1030 | ext4_warning(sb, __func__, | 1009 | ext4_warning(sb, "will only finish group (%llu blocks, %u new)", |
1031 | "will only finish group (%llu" | ||
1032 | " blocks, %u new)", | ||
1033 | o_blocks_count + add, add); | 1010 | o_blocks_count + add, add); |
1034 | 1011 | ||
1035 | /* See if the device is actually as big as what was requested */ | 1012 | /* See if the device is actually as big as what was requested */ |
1036 | bh = sb_bread(sb, o_blocks_count + add - 1); | 1013 | bh = sb_bread(sb, o_blocks_count + add - 1); |
1037 | if (!bh) { | 1014 | if (!bh) { |
1038 | ext4_warning(sb, __func__, | 1015 | ext4_warning(sb, "can't read last block, resize aborted"); |
1039 | "can't read last block, resize aborted"); | ||
1040 | return -ENOSPC; | 1016 | return -ENOSPC; |
1041 | } | 1017 | } |
1042 | brelse(bh); | 1018 | brelse(bh); |
@@ -1047,14 +1023,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1047 | handle = ext4_journal_start_sb(sb, 3); | 1023 | handle = ext4_journal_start_sb(sb, 3); |
1048 | if (IS_ERR(handle)) { | 1024 | if (IS_ERR(handle)) { |
1049 | err = PTR_ERR(handle); | 1025 | err = PTR_ERR(handle); |
1050 | ext4_warning(sb, __func__, "error %d on journal start", err); | 1026 | ext4_warning(sb, "error %d on journal start", err); |
1051 | goto exit_put; | 1027 | goto exit_put; |
1052 | } | 1028 | } |
1053 | 1029 | ||
1054 | mutex_lock(&EXT4_SB(sb)->s_resize_lock); | 1030 | mutex_lock(&EXT4_SB(sb)->s_resize_lock); |
1055 | if (o_blocks_count != ext4_blocks_count(es)) { | 1031 | if (o_blocks_count != ext4_blocks_count(es)) { |
1056 | ext4_warning(sb, __func__, | 1032 | ext4_warning(sb, "multiple resizers run on filesystem!"); |
1057 | "multiple resizers run on filesystem!"); | ||
1058 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | 1033 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1059 | ext4_journal_stop(handle); | 1034 | ext4_journal_stop(handle); |
1060 | err = -EBUSY; | 1035 | err = -EBUSY; |
@@ -1063,8 +1038,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1063 | 1038 | ||
1064 | if ((err = ext4_journal_get_write_access(handle, | 1039 | if ((err = ext4_journal_get_write_access(handle, |
1065 | EXT4_SB(sb)->s_sbh))) { | 1040 | EXT4_SB(sb)->s_sbh))) { |
1066 | ext4_warning(sb, __func__, | 1041 | ext4_warning(sb, "error %d on journal write access", err); |
1067 | "error %d on journal write access", err); | ||
1068 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | 1042 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1069 | ext4_journal_stop(handle); | 1043 | ext4_journal_stop(handle); |
1070 | goto exit_put; | 1044 | goto exit_put; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d4ca92aab514..e14d22c170d5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -68,7 +68,21 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | |||
68 | static int ext4_unfreeze(struct super_block *sb); | 68 | static int ext4_unfreeze(struct super_block *sb); |
69 | static void ext4_write_super(struct super_block *sb); | 69 | static void ext4_write_super(struct super_block *sb); |
70 | static int ext4_freeze(struct super_block *sb); | 70 | static int ext4_freeze(struct super_block *sb); |
71 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, | ||
72 | const char *dev_name, void *data, struct vfsmount *mnt); | ||
71 | 73 | ||
74 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | ||
75 | static struct file_system_type ext3_fs_type = { | ||
76 | .owner = THIS_MODULE, | ||
77 | .name = "ext3", | ||
78 | .get_sb = ext4_get_sb, | ||
79 | .kill_sb = kill_block_super, | ||
80 | .fs_flags = FS_REQUIRES_DEV, | ||
81 | }; | ||
82 | #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) | ||
83 | #else | ||
84 | #define IS_EXT3_SB(sb) (0) | ||
85 | #endif | ||
72 | 86 | ||
73 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, | 87 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, |
74 | struct ext4_group_desc *bg) | 88 | struct ext4_group_desc *bg) |
@@ -302,7 +316,7 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn, | |||
302 | * write out the superblock safely. | 316 | * write out the superblock safely. |
303 | * | 317 | * |
304 | * We'll just use the jbd2_journal_abort() error code to record an error in | 318 | * We'll just use the jbd2_journal_abort() error code to record an error in |
305 | * the journal instead. On recovery, the journal will compain about | 319 | * the journal instead. On recovery, the journal will complain about |
306 | * that error until we've noted it down and cleared it. | 320 | * that error until we've noted it down and cleared it. |
307 | */ | 321 | */ |
308 | 322 | ||
@@ -333,7 +347,7 @@ static void ext4_handle_error(struct super_block *sb) | |||
333 | sb->s_id); | 347 | sb->s_id); |
334 | } | 348 | } |
335 | 349 | ||
336 | void ext4_error(struct super_block *sb, const char *function, | 350 | void __ext4_error(struct super_block *sb, const char *function, |
337 | const char *fmt, ...) | 351 | const char *fmt, ...) |
338 | { | 352 | { |
339 | va_list args; | 353 | va_list args; |
@@ -347,6 +361,42 @@ void ext4_error(struct super_block *sb, const char *function, | |||
347 | ext4_handle_error(sb); | 361 | ext4_handle_error(sb); |
348 | } | 362 | } |
349 | 363 | ||
364 | void ext4_error_inode(const char *function, struct inode *inode, | ||
365 | const char *fmt, ...) | ||
366 | { | ||
367 | va_list args; | ||
368 | |||
369 | va_start(args, fmt); | ||
370 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", | ||
371 | inode->i_sb->s_id, function, inode->i_ino, current->comm); | ||
372 | vprintk(fmt, args); | ||
373 | printk("\n"); | ||
374 | va_end(args); | ||
375 | |||
376 | ext4_handle_error(inode->i_sb); | ||
377 | } | ||
378 | |||
379 | void ext4_error_file(const char *function, struct file *file, | ||
380 | const char *fmt, ...) | ||
381 | { | ||
382 | va_list args; | ||
383 | struct inode *inode = file->f_dentry->d_inode; | ||
384 | char pathname[80], *path; | ||
385 | |||
386 | va_start(args, fmt); | ||
387 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); | ||
388 | if (!path) | ||
389 | path = "(unknown)"; | ||
390 | printk(KERN_CRIT | ||
391 | "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", | ||
392 | inode->i_sb->s_id, function, inode->i_ino, current->comm, path); | ||
393 | vprintk(fmt, args); | ||
394 | printk("\n"); | ||
395 | va_end(args); | ||
396 | |||
397 | ext4_handle_error(inode->i_sb); | ||
398 | } | ||
399 | |||
350 | static const char *ext4_decode_error(struct super_block *sb, int errno, | 400 | static const char *ext4_decode_error(struct super_block *sb, int errno, |
351 | char nbuf[16]) | 401 | char nbuf[16]) |
352 | { | 402 | { |
@@ -450,7 +500,7 @@ void ext4_msg (struct super_block * sb, const char *prefix, | |||
450 | va_end(args); | 500 | va_end(args); |
451 | } | 501 | } |
452 | 502 | ||
453 | void ext4_warning(struct super_block *sb, const char *function, | 503 | void __ext4_warning(struct super_block *sb, const char *function, |
454 | const char *fmt, ...) | 504 | const char *fmt, ...) |
455 | { | 505 | { |
456 | va_list args; | 506 | va_list args; |
@@ -507,7 +557,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) | |||
507 | if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) | 557 | if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) |
508 | return; | 558 | return; |
509 | 559 | ||
510 | ext4_warning(sb, __func__, | 560 | ext4_warning(sb, |
511 | "updating to rev %d because of new feature flag, " | 561 | "updating to rev %d because of new feature flag, " |
512 | "running e2fsck is recommended", | 562 | "running e2fsck is recommended", |
513 | EXT4_DYNAMIC_REV); | 563 | EXT4_DYNAMIC_REV); |
@@ -603,10 +653,6 @@ static void ext4_put_super(struct super_block *sb) | |||
603 | if (sb->s_dirt) | 653 | if (sb->s_dirt) |
604 | ext4_commit_super(sb, 1); | 654 | ext4_commit_super(sb, 1); |
605 | 655 | ||
606 | ext4_release_system_zone(sb); | ||
607 | ext4_mb_release(sb); | ||
608 | ext4_ext_release(sb); | ||
609 | ext4_xattr_put_super(sb); | ||
610 | if (sbi->s_journal) { | 656 | if (sbi->s_journal) { |
611 | err = jbd2_journal_destroy(sbi->s_journal); | 657 | err = jbd2_journal_destroy(sbi->s_journal); |
612 | sbi->s_journal = NULL; | 658 | sbi->s_journal = NULL; |
@@ -614,6 +660,12 @@ static void ext4_put_super(struct super_block *sb) | |||
614 | ext4_abort(sb, __func__, | 660 | ext4_abort(sb, __func__, |
615 | "Couldn't clean up the journal"); | 661 | "Couldn't clean up the journal"); |
616 | } | 662 | } |
663 | |||
664 | ext4_release_system_zone(sb); | ||
665 | ext4_mb_release(sb); | ||
666 | ext4_ext_release(sb); | ||
667 | ext4_xattr_put_super(sb); | ||
668 | |||
617 | if (!(sb->s_flags & MS_RDONLY)) { | 669 | if (!(sb->s_flags & MS_RDONLY)) { |
618 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 670 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
619 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 671 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
@@ -700,10 +752,17 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
700 | ei->i_reserved_data_blocks = 0; | 752 | ei->i_reserved_data_blocks = 0; |
701 | ei->i_reserved_meta_blocks = 0; | 753 | ei->i_reserved_meta_blocks = 0; |
702 | ei->i_allocated_meta_blocks = 0; | 754 | ei->i_allocated_meta_blocks = 0; |
755 | ei->i_da_metadata_calc_len = 0; | ||
703 | ei->i_delalloc_reserved_flag = 0; | 756 | ei->i_delalloc_reserved_flag = 0; |
704 | spin_lock_init(&(ei->i_block_reservation_lock)); | 757 | spin_lock_init(&(ei->i_block_reservation_lock)); |
705 | INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); | 758 | #ifdef CONFIG_QUOTA |
759 | ei->i_reserved_quota = 0; | ||
760 | #endif | ||
761 | INIT_LIST_HEAD(&ei->i_completed_io_list); | ||
762 | spin_lock_init(&ei->i_completed_io_lock); | ||
706 | ei->cur_aio_dio = NULL; | 763 | ei->cur_aio_dio = NULL; |
764 | ei->i_sync_tid = 0; | ||
765 | ei->i_datasync_tid = 0; | ||
707 | 766 | ||
708 | return &ei->vfs_inode; | 767 | return &ei->vfs_inode; |
709 | } | 768 | } |
@@ -753,6 +812,7 @@ static void destroy_inodecache(void) | |||
753 | 812 | ||
754 | static void ext4_clear_inode(struct inode *inode) | 813 | static void ext4_clear_inode(struct inode *inode) |
755 | { | 814 | { |
815 | dquot_drop(inode); | ||
756 | ext4_discard_preallocations(inode); | 816 | ext4_discard_preallocations(inode); |
757 | if (EXT4_JOURNAL(inode)) | 817 | if (EXT4_JOURNAL(inode)) |
758 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | 818 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, |
@@ -765,9 +825,22 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
765 | #if defined(CONFIG_QUOTA) | 825 | #if defined(CONFIG_QUOTA) |
766 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 826 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
767 | 827 | ||
768 | if (sbi->s_jquota_fmt) | 828 | if (sbi->s_jquota_fmt) { |
769 | seq_printf(seq, ",jqfmt=%s", | 829 | char *fmtname = ""; |
770 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); | 830 | |
831 | switch (sbi->s_jquota_fmt) { | ||
832 | case QFMT_VFS_OLD: | ||
833 | fmtname = "vfsold"; | ||
834 | break; | ||
835 | case QFMT_VFS_V0: | ||
836 | fmtname = "vfsv0"; | ||
837 | break; | ||
838 | case QFMT_VFS_V1: | ||
839 | fmtname = "vfsv1"; | ||
840 | break; | ||
841 | } | ||
842 | seq_printf(seq, ",jqfmt=%s", fmtname); | ||
843 | } | ||
771 | 844 | ||
772 | if (sbi->s_qf_names[USRQUOTA]) | 845 | if (sbi->s_qf_names[USRQUOTA]) |
773 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | 846 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); |
@@ -775,10 +848,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
775 | if (sbi->s_qf_names[GRPQUOTA]) | 848 | if (sbi->s_qf_names[GRPQUOTA]) |
776 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | 849 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); |
777 | 850 | ||
778 | if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) | 851 | if (test_opt(sb, USRQUOTA)) |
779 | seq_puts(seq, ",usrquota"); | 852 | seq_puts(seq, ",usrquota"); |
780 | 853 | ||
781 | if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) | 854 | if (test_opt(sb, GRPQUOTA)) |
782 | seq_puts(seq, ",grpquota"); | 855 | seq_puts(seq, ",grpquota"); |
783 | #endif | 856 | #endif |
784 | } | 857 | } |
@@ -899,6 +972,15 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
899 | if (test_opt(sb, NO_AUTO_DA_ALLOC)) | 972 | if (test_opt(sb, NO_AUTO_DA_ALLOC)) |
900 | seq_puts(seq, ",noauto_da_alloc"); | 973 | seq_puts(seq, ",noauto_da_alloc"); |
901 | 974 | ||
975 | if (test_opt(sb, DISCARD)) | ||
976 | seq_puts(seq, ",discard"); | ||
977 | |||
978 | if (test_opt(sb, NOLOAD)) | ||
979 | seq_puts(seq, ",norecovery"); | ||
980 | |||
981 | if (test_opt(sb, DIOREAD_NOLOCK)) | ||
982 | seq_puts(seq, ",dioread_nolock"); | ||
983 | |||
902 | ext4_show_quota_options(seq, sb); | 984 | ext4_show_quota_options(seq, sb); |
903 | 985 | ||
904 | return 0; | 986 | return 0; |
@@ -985,17 +1067,9 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
985 | const char *data, size_t len, loff_t off); | 1067 | const char *data, size_t len, loff_t off); |
986 | 1068 | ||
987 | static const struct dquot_operations ext4_quota_operations = { | 1069 | static const struct dquot_operations ext4_quota_operations = { |
988 | .initialize = dquot_initialize, | 1070 | #ifdef CONFIG_QUOTA |
989 | .drop = dquot_drop, | ||
990 | .alloc_space = dquot_alloc_space, | ||
991 | .reserve_space = dquot_reserve_space, | ||
992 | .claim_space = dquot_claim_space, | ||
993 | .release_rsv = dquot_release_reserved_space, | ||
994 | .get_reserved_space = ext4_get_reserved_space, | 1071 | .get_reserved_space = ext4_get_reserved_space, |
995 | .alloc_inode = dquot_alloc_inode, | 1072 | #endif |
996 | .free_space = dquot_free_space, | ||
997 | .free_inode = dquot_free_inode, | ||
998 | .transfer = dquot_transfer, | ||
999 | .write_dquot = ext4_write_dquot, | 1073 | .write_dquot = ext4_write_dquot, |
1000 | .acquire_dquot = ext4_acquire_dquot, | 1074 | .acquire_dquot = ext4_acquire_dquot, |
1001 | .release_dquot = ext4_release_dquot, | 1075 | .release_dquot = ext4_release_dquot, |
@@ -1074,12 +1148,14 @@ enum { | |||
1074 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1148 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1075 | Opt_data_err_abort, Opt_data_err_ignore, | 1149 | Opt_data_err_abort, Opt_data_err_ignore, |
1076 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1150 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1077 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 1151 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
1078 | Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, | 1152 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, |
1079 | Opt_usrquota, Opt_grpquota, Opt_i_version, | 1153 | Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, |
1080 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 1154 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
1081 | Opt_block_validity, Opt_noblock_validity, | 1155 | Opt_block_validity, Opt_noblock_validity, |
1082 | Opt_inode_readahead_blks, Opt_journal_ioprio | 1156 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1157 | Opt_dioread_nolock, Opt_dioread_lock, | ||
1158 | Opt_discard, Opt_nodiscard, | ||
1083 | }; | 1159 | }; |
1084 | 1160 | ||
1085 | static const match_table_t tokens = { | 1161 | static const match_table_t tokens = { |
@@ -1104,6 +1180,7 @@ static const match_table_t tokens = { | |||
1104 | {Opt_acl, "acl"}, | 1180 | {Opt_acl, "acl"}, |
1105 | {Opt_noacl, "noacl"}, | 1181 | {Opt_noacl, "noacl"}, |
1106 | {Opt_noload, "noload"}, | 1182 | {Opt_noload, "noload"}, |
1183 | {Opt_noload, "norecovery"}, | ||
1107 | {Opt_nobh, "nobh"}, | 1184 | {Opt_nobh, "nobh"}, |
1108 | {Opt_bh, "bh"}, | 1185 | {Opt_bh, "bh"}, |
1109 | {Opt_commit, "commit=%u"}, | 1186 | {Opt_commit, "commit=%u"}, |
@@ -1125,6 +1202,7 @@ static const match_table_t tokens = { | |||
1125 | {Opt_grpjquota, "grpjquota=%s"}, | 1202 | {Opt_grpjquota, "grpjquota=%s"}, |
1126 | {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, | 1203 | {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, |
1127 | {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, | 1204 | {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, |
1205 | {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, | ||
1128 | {Opt_grpquota, "grpquota"}, | 1206 | {Opt_grpquota, "grpquota"}, |
1129 | {Opt_noquota, "noquota"}, | 1207 | {Opt_noquota, "noquota"}, |
1130 | {Opt_quota, "quota"}, | 1208 | {Opt_quota, "quota"}, |
@@ -1144,6 +1222,10 @@ static const match_table_t tokens = { | |||
1144 | {Opt_auto_da_alloc, "auto_da_alloc=%u"}, | 1222 | {Opt_auto_da_alloc, "auto_da_alloc=%u"}, |
1145 | {Opt_auto_da_alloc, "auto_da_alloc"}, | 1223 | {Opt_auto_da_alloc, "auto_da_alloc"}, |
1146 | {Opt_noauto_da_alloc, "noauto_da_alloc"}, | 1224 | {Opt_noauto_da_alloc, "noauto_da_alloc"}, |
1225 | {Opt_dioread_nolock, "dioread_nolock"}, | ||
1226 | {Opt_dioread_lock, "dioread_lock"}, | ||
1227 | {Opt_discard, "discard"}, | ||
1228 | {Opt_nodiscard, "nodiscard"}, | ||
1147 | {Opt_err, NULL}, | 1229 | {Opt_err, NULL}, |
1148 | }; | 1230 | }; |
1149 | 1231 | ||
@@ -1171,6 +1253,66 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
1171 | } | 1253 | } |
1172 | 1254 | ||
1173 | #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) | 1255 | #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) |
1256 | static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" | ||
1257 | "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; | ||
1258 | |||
1259 | #ifdef CONFIG_QUOTA | ||
1260 | static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) | ||
1261 | { | ||
1262 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1263 | char *qname; | ||
1264 | |||
1265 | if (sb_any_quota_loaded(sb) && | ||
1266 | !sbi->s_qf_names[qtype]) { | ||
1267 | ext4_msg(sb, KERN_ERR, | ||
1268 | "Cannot change journaled " | ||
1269 | "quota options when quota turned on"); | ||
1270 | return 0; | ||
1271 | } | ||
1272 | qname = match_strdup(args); | ||
1273 | if (!qname) { | ||
1274 | ext4_msg(sb, KERN_ERR, | ||
1275 | "Not enough memory for storing quotafile name"); | ||
1276 | return 0; | ||
1277 | } | ||
1278 | if (sbi->s_qf_names[qtype] && | ||
1279 | strcmp(sbi->s_qf_names[qtype], qname)) { | ||
1280 | ext4_msg(sb, KERN_ERR, | ||
1281 | "%s quota file already specified", QTYPE2NAME(qtype)); | ||
1282 | kfree(qname); | ||
1283 | return 0; | ||
1284 | } | ||
1285 | sbi->s_qf_names[qtype] = qname; | ||
1286 | if (strchr(sbi->s_qf_names[qtype], '/')) { | ||
1287 | ext4_msg(sb, KERN_ERR, | ||
1288 | "quotafile must be on filesystem root"); | ||
1289 | kfree(sbi->s_qf_names[qtype]); | ||
1290 | sbi->s_qf_names[qtype] = NULL; | ||
1291 | return 0; | ||
1292 | } | ||
1293 | set_opt(sbi->s_mount_opt, QUOTA); | ||
1294 | return 1; | ||
1295 | } | ||
1296 | |||
1297 | static int clear_qf_name(struct super_block *sb, int qtype) | ||
1298 | { | ||
1299 | |||
1300 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1301 | |||
1302 | if (sb_any_quota_loaded(sb) && | ||
1303 | sbi->s_qf_names[qtype]) { | ||
1304 | ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" | ||
1305 | " when quota turned on"); | ||
1306 | return 0; | ||
1307 | } | ||
1308 | /* | ||
1309 | * The space will be released later when all options are confirmed | ||
1310 | * to be correct | ||
1311 | */ | ||
1312 | sbi->s_qf_names[qtype] = NULL; | ||
1313 | return 1; | ||
1314 | } | ||
1315 | #endif | ||
1174 | 1316 | ||
1175 | static int parse_options(char *options, struct super_block *sb, | 1317 | static int parse_options(char *options, struct super_block *sb, |
1176 | unsigned long *journal_devnum, | 1318 | unsigned long *journal_devnum, |
@@ -1183,8 +1325,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1183 | int data_opt = 0; | 1325 | int data_opt = 0; |
1184 | int option; | 1326 | int option; |
1185 | #ifdef CONFIG_QUOTA | 1327 | #ifdef CONFIG_QUOTA |
1186 | int qtype, qfmt; | 1328 | int qfmt; |
1187 | char *qname; | ||
1188 | #endif | 1329 | #endif |
1189 | 1330 | ||
1190 | if (!options) | 1331 | if (!options) |
@@ -1195,19 +1336,31 @@ static int parse_options(char *options, struct super_block *sb, | |||
1195 | if (!*p) | 1336 | if (!*p) |
1196 | continue; | 1337 | continue; |
1197 | 1338 | ||
1339 | /* | ||
1340 | * Initialize args struct so we know whether arg was | ||
1341 | * found; some options take optional arguments. | ||
1342 | */ | ||
1343 | args[0].to = args[0].from = 0; | ||
1198 | token = match_token(p, tokens, args); | 1344 | token = match_token(p, tokens, args); |
1199 | switch (token) { | 1345 | switch (token) { |
1200 | case Opt_bsd_df: | 1346 | case Opt_bsd_df: |
1347 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | ||
1201 | clear_opt(sbi->s_mount_opt, MINIX_DF); | 1348 | clear_opt(sbi->s_mount_opt, MINIX_DF); |
1202 | break; | 1349 | break; |
1203 | case Opt_minix_df: | 1350 | case Opt_minix_df: |
1351 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | ||
1204 | set_opt(sbi->s_mount_opt, MINIX_DF); | 1352 | set_opt(sbi->s_mount_opt, MINIX_DF); |
1353 | |||
1205 | break; | 1354 | break; |
1206 | case Opt_grpid: | 1355 | case Opt_grpid: |
1356 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | ||
1207 | set_opt(sbi->s_mount_opt, GRPID); | 1357 | set_opt(sbi->s_mount_opt, GRPID); |
1358 | |||
1208 | break; | 1359 | break; |
1209 | case Opt_nogrpid: | 1360 | case Opt_nogrpid: |
1361 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | ||
1210 | clear_opt(sbi->s_mount_opt, GRPID); | 1362 | clear_opt(sbi->s_mount_opt, GRPID); |
1363 | |||
1211 | break; | 1364 | break; |
1212 | case Opt_resuid: | 1365 | case Opt_resuid: |
1213 | if (match_int(&args[0], &option)) | 1366 | if (match_int(&args[0], &option)) |
@@ -1344,14 +1497,13 @@ static int parse_options(char *options, struct super_block *sb, | |||
1344 | data_opt = EXT4_MOUNT_WRITEBACK_DATA; | 1497 | data_opt = EXT4_MOUNT_WRITEBACK_DATA; |
1345 | datacheck: | 1498 | datacheck: |
1346 | if (is_remount) { | 1499 | if (is_remount) { |
1347 | if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) | 1500 | if (test_opt(sb, DATA_FLAGS) != data_opt) { |
1348 | != data_opt) { | ||
1349 | ext4_msg(sb, KERN_ERR, | 1501 | ext4_msg(sb, KERN_ERR, |
1350 | "Cannot change data mode on remount"); | 1502 | "Cannot change data mode on remount"); |
1351 | return 0; | 1503 | return 0; |
1352 | } | 1504 | } |
1353 | } else { | 1505 | } else { |
1354 | sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; | 1506 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); |
1355 | sbi->s_mount_opt |= data_opt; | 1507 | sbi->s_mount_opt |= data_opt; |
1356 | } | 1508 | } |
1357 | break; | 1509 | break; |
@@ -1363,68 +1515,30 @@ static int parse_options(char *options, struct super_block *sb, | |||
1363 | break; | 1515 | break; |
1364 | #ifdef CONFIG_QUOTA | 1516 | #ifdef CONFIG_QUOTA |
1365 | case Opt_usrjquota: | 1517 | case Opt_usrjquota: |
1366 | qtype = USRQUOTA; | 1518 | if (!set_qf_name(sb, USRQUOTA, &args[0])) |
1367 | goto set_qf_name; | ||
1368 | case Opt_grpjquota: | ||
1369 | qtype = GRPQUOTA; | ||
1370 | set_qf_name: | ||
1371 | if (sb_any_quota_loaded(sb) && | ||
1372 | !sbi->s_qf_names[qtype]) { | ||
1373 | ext4_msg(sb, KERN_ERR, | ||
1374 | "Cannot change journaled " | ||
1375 | "quota options when quota turned on"); | ||
1376 | return 0; | ||
1377 | } | ||
1378 | qname = match_strdup(&args[0]); | ||
1379 | if (!qname) { | ||
1380 | ext4_msg(sb, KERN_ERR, | ||
1381 | "Not enough memory for " | ||
1382 | "storing quotafile name"); | ||
1383 | return 0; | 1519 | return 0; |
1384 | } | 1520 | break; |
1385 | if (sbi->s_qf_names[qtype] && | 1521 | case Opt_grpjquota: |
1386 | strcmp(sbi->s_qf_names[qtype], qname)) { | 1522 | if (!set_qf_name(sb, GRPQUOTA, &args[0])) |
1387 | ext4_msg(sb, KERN_ERR, | ||
1388 | "%s quota file already " | ||
1389 | "specified", QTYPE2NAME(qtype)); | ||
1390 | kfree(qname); | ||
1391 | return 0; | ||
1392 | } | ||
1393 | sbi->s_qf_names[qtype] = qname; | ||
1394 | if (strchr(sbi->s_qf_names[qtype], '/')) { | ||
1395 | ext4_msg(sb, KERN_ERR, | ||
1396 | "quotafile must be on " | ||
1397 | "filesystem root"); | ||
1398 | kfree(sbi->s_qf_names[qtype]); | ||
1399 | sbi->s_qf_names[qtype] = NULL; | ||
1400 | return 0; | 1523 | return 0; |
1401 | } | ||
1402 | set_opt(sbi->s_mount_opt, QUOTA); | ||
1403 | break; | 1524 | break; |
1404 | case Opt_offusrjquota: | 1525 | case Opt_offusrjquota: |
1405 | qtype = USRQUOTA; | 1526 | if (!clear_qf_name(sb, USRQUOTA)) |
1406 | goto clear_qf_name; | 1527 | return 0; |
1528 | break; | ||
1407 | case Opt_offgrpjquota: | 1529 | case Opt_offgrpjquota: |
1408 | qtype = GRPQUOTA; | 1530 | if (!clear_qf_name(sb, GRPQUOTA)) |
1409 | clear_qf_name: | ||
1410 | if (sb_any_quota_loaded(sb) && | ||
1411 | sbi->s_qf_names[qtype]) { | ||
1412 | ext4_msg(sb, KERN_ERR, "Cannot change " | ||
1413 | "journaled quota options when " | ||
1414 | "quota turned on"); | ||
1415 | return 0; | 1531 | return 0; |
1416 | } | ||
1417 | /* | ||
1418 | * The space will be released later when all options | ||
1419 | * are confirmed to be correct | ||
1420 | */ | ||
1421 | sbi->s_qf_names[qtype] = NULL; | ||
1422 | break; | 1532 | break; |
1533 | |||
1423 | case Opt_jqfmt_vfsold: | 1534 | case Opt_jqfmt_vfsold: |
1424 | qfmt = QFMT_VFS_OLD; | 1535 | qfmt = QFMT_VFS_OLD; |
1425 | goto set_qf_format; | 1536 | goto set_qf_format; |
1426 | case Opt_jqfmt_vfsv0: | 1537 | case Opt_jqfmt_vfsv0: |
1427 | qfmt = QFMT_VFS_V0; | 1538 | qfmt = QFMT_VFS_V0; |
1539 | goto set_qf_format; | ||
1540 | case Opt_jqfmt_vfsv1: | ||
1541 | qfmt = QFMT_VFS_V1; | ||
1428 | set_qf_format: | 1542 | set_qf_format: |
1429 | if (sb_any_quota_loaded(sb) && | 1543 | if (sb_any_quota_loaded(sb) && |
1430 | sbi->s_jquota_fmt != qfmt) { | 1544 | sbi->s_jquota_fmt != qfmt) { |
@@ -1467,6 +1581,7 @@ set_qf_format: | |||
1467 | case Opt_offgrpjquota: | 1581 | case Opt_offgrpjquota: |
1468 | case Opt_jqfmt_vfsold: | 1582 | case Opt_jqfmt_vfsold: |
1469 | case Opt_jqfmt_vfsv0: | 1583 | case Opt_jqfmt_vfsv0: |
1584 | case Opt_jqfmt_vfsv1: | ||
1470 | ext4_msg(sb, KERN_ERR, | 1585 | ext4_msg(sb, KERN_ERR, |
1471 | "journaled quota options not supported"); | 1586 | "journaled quota options not supported"); |
1472 | break; | 1587 | break; |
@@ -1480,10 +1595,11 @@ set_qf_format: | |||
1480 | clear_opt(sbi->s_mount_opt, BARRIER); | 1595 | clear_opt(sbi->s_mount_opt, BARRIER); |
1481 | break; | 1596 | break; |
1482 | case Opt_barrier: | 1597 | case Opt_barrier: |
1483 | if (match_int(&args[0], &option)) { | 1598 | if (args[0].from) { |
1484 | set_opt(sbi->s_mount_opt, BARRIER); | 1599 | if (match_int(&args[0], &option)) |
1485 | break; | 1600 | return 0; |
1486 | } | 1601 | } else |
1602 | option = 1; /* No argument, default to 1 */ | ||
1487 | if (option) | 1603 | if (option) |
1488 | set_opt(sbi->s_mount_opt, BARRIER); | 1604 | set_opt(sbi->s_mount_opt, BARRIER); |
1489 | else | 1605 | else |
@@ -1556,15 +1672,28 @@ set_qf_format: | |||
1556 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); | 1672 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); |
1557 | break; | 1673 | break; |
1558 | case Opt_auto_da_alloc: | 1674 | case Opt_auto_da_alloc: |
1559 | if (match_int(&args[0], &option)) { | 1675 | if (args[0].from) { |
1560 | clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); | 1676 | if (match_int(&args[0], &option)) |
1561 | break; | 1677 | return 0; |
1562 | } | 1678 | } else |
1679 | option = 1; /* No argument, default to 1 */ | ||
1563 | if (option) | 1680 | if (option) |
1564 | clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); | 1681 | clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); |
1565 | else | 1682 | else |
1566 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); | 1683 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); |
1567 | break; | 1684 | break; |
1685 | case Opt_discard: | ||
1686 | set_opt(sbi->s_mount_opt, DISCARD); | ||
1687 | break; | ||
1688 | case Opt_nodiscard: | ||
1689 | clear_opt(sbi->s_mount_opt, DISCARD); | ||
1690 | break; | ||
1691 | case Opt_dioread_nolock: | ||
1692 | set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | ||
1693 | break; | ||
1694 | case Opt_dioread_lock: | ||
1695 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | ||
1696 | break; | ||
1568 | default: | 1697 | default: |
1569 | ext4_msg(sb, KERN_ERR, | 1698 | ext4_msg(sb, KERN_ERR, |
1570 | "Unrecognized mount option \"%s\" " | 1699 | "Unrecognized mount option \"%s\" " |
@@ -1574,18 +1703,13 @@ set_qf_format: | |||
1574 | } | 1703 | } |
1575 | #ifdef CONFIG_QUOTA | 1704 | #ifdef CONFIG_QUOTA |
1576 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { | 1705 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { |
1577 | if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && | 1706 | if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) |
1578 | sbi->s_qf_names[USRQUOTA]) | ||
1579 | clear_opt(sbi->s_mount_opt, USRQUOTA); | 1707 | clear_opt(sbi->s_mount_opt, USRQUOTA); |
1580 | 1708 | ||
1581 | if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && | 1709 | if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) |
1582 | sbi->s_qf_names[GRPQUOTA]) | ||
1583 | clear_opt(sbi->s_mount_opt, GRPQUOTA); | 1710 | clear_opt(sbi->s_mount_opt, GRPQUOTA); |
1584 | 1711 | ||
1585 | if ((sbi->s_qf_names[USRQUOTA] && | 1712 | if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { |
1586 | (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || | ||
1587 | (sbi->s_qf_names[GRPQUOTA] && | ||
1588 | (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { | ||
1589 | ext4_msg(sb, KERN_ERR, "old and new quota " | 1713 | ext4_msg(sb, KERN_ERR, "old and new quota " |
1590 | "format mixing"); | 1714 | "format mixing"); |
1591 | return 0; | 1715 | return 0; |
@@ -1673,14 +1797,14 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1673 | size_t size; | 1797 | size_t size; |
1674 | int i; | 1798 | int i; |
1675 | 1799 | ||
1676 | if (!sbi->s_es->s_log_groups_per_flex) { | 1800 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
1801 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | ||
1802 | |||
1803 | if (groups_per_flex < 2) { | ||
1677 | sbi->s_log_groups_per_flex = 0; | 1804 | sbi->s_log_groups_per_flex = 0; |
1678 | return 1; | 1805 | return 1; |
1679 | } | 1806 | } |
1680 | 1807 | ||
1681 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | ||
1682 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | ||
1683 | |||
1684 | /* We allocate both existing and potentially added groups */ | 1808 | /* We allocate both existing and potentially added groups */ |
1685 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + | 1809 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
1686 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << | 1810 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << |
@@ -1895,7 +2019,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1895 | } | 2019 | } |
1896 | 2020 | ||
1897 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 2021 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
1898 | vfs_dq_init(inode); | 2022 | dquot_initialize(inode); |
1899 | if (inode->i_nlink) { | 2023 | if (inode->i_nlink) { |
1900 | ext4_msg(sb, KERN_DEBUG, | 2024 | ext4_msg(sb, KERN_DEBUG, |
1901 | "%s: truncating inode %lu to %lld bytes", | 2025 | "%s: truncating inode %lu to %lld bytes", |
@@ -2099,11 +2223,8 @@ static int parse_strtoul(const char *buf, | |||
2099 | { | 2223 | { |
2100 | char *endp; | 2224 | char *endp; |
2101 | 2225 | ||
2102 | while (*buf && isspace(*buf)) | 2226 | *value = simple_strtoul(skip_spaces(buf), &endp, 0); |
2103 | buf++; | 2227 | endp = skip_spaces(endp); |
2104 | *value = simple_strtoul(buf, &endp, 0); | ||
2105 | while (*endp && isspace(*endp)) | ||
2106 | endp++; | ||
2107 | if (*endp || *value > max) | 2228 | if (*endp || *value > max) |
2108 | return -EINVAL; | 2229 | return -EINVAL; |
2109 | 2230 | ||
@@ -2134,9 +2255,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, | |||
2134 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2255 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
2135 | 2256 | ||
2136 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2257 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2137 | sbi->s_kbytes_written + | 2258 | (unsigned long long)(sbi->s_kbytes_written + |
2138 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2259 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
2139 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); | 2260 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); |
2140 | } | 2261 | } |
2141 | 2262 | ||
2142 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | 2263 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, |
@@ -2251,7 +2372,7 @@ static void ext4_sb_release(struct kobject *kobj) | |||
2251 | } | 2372 | } |
2252 | 2373 | ||
2253 | 2374 | ||
2254 | static struct sysfs_ops ext4_attr_ops = { | 2375 | static const struct sysfs_ops ext4_attr_ops = { |
2255 | .show = ext4_attr_show, | 2376 | .show = ext4_attr_show, |
2256 | .store = ext4_attr_store, | 2377 | .store = ext4_attr_store, |
2257 | }; | 2378 | }; |
@@ -2391,8 +2512,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2391 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 2512 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
2392 | if (def_mount_opts & EXT4_DEFM_DEBUG) | 2513 | if (def_mount_opts & EXT4_DEFM_DEBUG) |
2393 | set_opt(sbi->s_mount_opt, DEBUG); | 2514 | set_opt(sbi->s_mount_opt, DEBUG); |
2394 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) | 2515 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { |
2516 | ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", | ||
2517 | "2.6.38"); | ||
2395 | set_opt(sbi->s_mount_opt, GRPID); | 2518 | set_opt(sbi->s_mount_opt, GRPID); |
2519 | } | ||
2396 | if (def_mount_opts & EXT4_DEFM_UID16) | 2520 | if (def_mount_opts & EXT4_DEFM_UID16) |
2397 | set_opt(sbi->s_mount_opt, NO_UID32); | 2521 | set_opt(sbi->s_mount_opt, NO_UID32); |
2398 | #ifdef CONFIG_EXT4_FS_XATTR | 2522 | #ifdef CONFIG_EXT4_FS_XATTR |
@@ -2404,11 +2528,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2404 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 2528 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
2405 | #endif | 2529 | #endif |
2406 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) | 2530 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) |
2407 | sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; | 2531 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); |
2408 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) | 2532 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) |
2409 | sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; | 2533 | set_opt(sbi->s_mount_opt, ORDERED_DATA); |
2410 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) | 2534 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) |
2411 | sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; | 2535 | set_opt(sbi->s_mount_opt, WRITEBACK_DATA); |
2412 | 2536 | ||
2413 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) | 2537 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) |
2414 | set_opt(sbi->s_mount_opt, ERRORS_PANIC); | 2538 | set_opt(sbi->s_mount_opt, ERRORS_PANIC); |
@@ -2429,14 +2553,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2429 | * enable delayed allocation by default | 2553 | * enable delayed allocation by default |
2430 | * Use -o nodelalloc to turn it off | 2554 | * Use -o nodelalloc to turn it off |
2431 | */ | 2555 | */ |
2432 | set_opt(sbi->s_mount_opt, DELALLOC); | 2556 | if (!IS_EXT3_SB(sb)) |
2557 | set_opt(sbi->s_mount_opt, DELALLOC); | ||
2433 | 2558 | ||
2434 | if (!parse_options((char *) data, sb, &journal_devnum, | 2559 | if (!parse_options((char *) data, sb, &journal_devnum, |
2435 | &journal_ioprio, NULL, 0)) | 2560 | &journal_ioprio, NULL, 0)) |
2436 | goto failed_mount; | 2561 | goto failed_mount; |
2437 | 2562 | ||
2438 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 2563 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
2439 | ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); | 2564 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
2440 | 2565 | ||
2441 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && | 2566 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && |
2442 | (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || | 2567 | (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || |
@@ -2721,31 +2846,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2721 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { | 2846 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { |
2722 | if (ext4_load_journal(sb, es, journal_devnum)) | 2847 | if (ext4_load_journal(sb, es, journal_devnum)) |
2723 | goto failed_mount3; | 2848 | goto failed_mount3; |
2724 | if (!(sb->s_flags & MS_RDONLY) && | ||
2725 | EXT4_SB(sb)->s_journal->j_failed_commit) { | ||
2726 | ext4_msg(sb, KERN_CRIT, "error: " | ||
2727 | "ext4_fill_super: Journal transaction " | ||
2728 | "%u is corrupt", | ||
2729 | EXT4_SB(sb)->s_journal->j_failed_commit); | ||
2730 | if (test_opt(sb, ERRORS_RO)) { | ||
2731 | ext4_msg(sb, KERN_CRIT, | ||
2732 | "Mounting filesystem read-only"); | ||
2733 | sb->s_flags |= MS_RDONLY; | ||
2734 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
2735 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
2736 | } | ||
2737 | if (test_opt(sb, ERRORS_PANIC)) { | ||
2738 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
2739 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
2740 | ext4_commit_super(sb, 1); | ||
2741 | goto failed_mount4; | ||
2742 | } | ||
2743 | } | ||
2744 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && | 2849 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && |
2745 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 2850 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
2746 | ext4_msg(sb, KERN_ERR, "required journal recovery " | 2851 | ext4_msg(sb, KERN_ERR, "required journal recovery " |
2747 | "suppressed and not mounted read-only"); | 2852 | "suppressed and not mounted read-only"); |
2748 | goto failed_mount4; | 2853 | goto failed_mount_wq; |
2749 | } else { | 2854 | } else { |
2750 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); | 2855 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); |
2751 | set_opt(sbi->s_mount_opt, WRITEBACK_DATA); | 2856 | set_opt(sbi->s_mount_opt, WRITEBACK_DATA); |
@@ -2758,7 +2863,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2758 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, | 2863 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, |
2759 | JBD2_FEATURE_INCOMPAT_64BIT)) { | 2864 | JBD2_FEATURE_INCOMPAT_64BIT)) { |
2760 | ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); | 2865 | ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); |
2761 | goto failed_mount4; | 2866 | goto failed_mount_wq; |
2762 | } | 2867 | } |
2763 | 2868 | ||
2764 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 2869 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { |
@@ -2797,7 +2902,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2797 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { | 2902 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { |
2798 | ext4_msg(sb, KERN_ERR, "Journal does not support " | 2903 | ext4_msg(sb, KERN_ERR, "Journal does not support " |
2799 | "requested data journaling mode"); | 2904 | "requested data journaling mode"); |
2800 | goto failed_mount4; | 2905 | goto failed_mount_wq; |
2801 | } | 2906 | } |
2802 | default: | 2907 | default: |
2803 | break; | 2908 | break; |
@@ -2805,13 +2910,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2805 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 2910 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
2806 | 2911 | ||
2807 | no_journal: | 2912 | no_journal: |
2808 | |||
2809 | if (test_opt(sb, NOBH)) { | 2913 | if (test_opt(sb, NOBH)) { |
2810 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { | 2914 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { |
2811 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " | 2915 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " |
2812 | "its supported only with writeback mode"); | 2916 | "its supported only with writeback mode"); |
2813 | clear_opt(sbi->s_mount_opt, NOBH); | 2917 | clear_opt(sbi->s_mount_opt, NOBH); |
2814 | } | 2918 | } |
2919 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
2920 | ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " | ||
2921 | "not supported with nobh mode"); | ||
2922 | goto failed_mount_wq; | ||
2923 | } | ||
2815 | } | 2924 | } |
2816 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); | 2925 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); |
2817 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 2926 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
@@ -2876,6 +2985,18 @@ no_journal: | |||
2876 | "requested data journaling mode"); | 2985 | "requested data journaling mode"); |
2877 | clear_opt(sbi->s_mount_opt, DELALLOC); | 2986 | clear_opt(sbi->s_mount_opt, DELALLOC); |
2878 | } | 2987 | } |
2988 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
2989 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
2990 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " | ||
2991 | "option - requested data journaling mode"); | ||
2992 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | ||
2993 | } | ||
2994 | if (sb->s_blocksize < PAGE_SIZE) { | ||
2995 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " | ||
2996 | "option - block size is too small"); | ||
2997 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | ||
2998 | } | ||
2999 | } | ||
2879 | 3000 | ||
2880 | err = ext4_setup_system_zone(sb); | 3001 | err = ext4_setup_system_zone(sb); |
2881 | if (err) { | 3002 | if (err) { |
@@ -3339,10 +3460,9 @@ static void ext4_clear_journal_err(struct super_block *sb, | |||
3339 | char nbuf[16]; | 3460 | char nbuf[16]; |
3340 | 3461 | ||
3341 | errstr = ext4_decode_error(sb, j_errno, nbuf); | 3462 | errstr = ext4_decode_error(sb, j_errno, nbuf); |
3342 | ext4_warning(sb, __func__, "Filesystem error recorded " | 3463 | ext4_warning(sb, "Filesystem error recorded " |
3343 | "from previous mount: %s", errstr); | 3464 | "from previous mount: %s", errstr); |
3344 | ext4_warning(sb, __func__, "Marking fs in need of " | 3465 | ext4_warning(sb, "Marking fs in need of filesystem check."); |
3345 | "filesystem check."); | ||
3346 | 3466 | ||
3347 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 3467 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
3348 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 3468 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
@@ -3493,7 +3613,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3493 | ext4_abort(sb, __func__, "Abort forced by user"); | 3613 | ext4_abort(sb, __func__, "Abort forced by user"); |
3494 | 3614 | ||
3495 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 3615 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
3496 | ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); | 3616 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
3497 | 3617 | ||
3498 | es = sbi->s_es; | 3618 | es = sbi->s_es; |
3499 | 3619 | ||
@@ -3668,13 +3788,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3668 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 3788 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
3669 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - | 3789 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - |
3670 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | 3790 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); |
3671 | ext4_free_blocks_count_set(es, buf->f_bfree); | ||
3672 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 3791 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
3673 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 3792 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
3674 | buf->f_bavail = 0; | 3793 | buf->f_bavail = 0; |
3675 | buf->f_files = le32_to_cpu(es->s_inodes_count); | 3794 | buf->f_files = le32_to_cpu(es->s_inodes_count); |
3676 | buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); | 3795 | buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); |
3677 | es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); | ||
3678 | buf->f_namelen = EXT4_NAME_LEN; | 3796 | buf->f_namelen = EXT4_NAME_LEN; |
3679 | fsid = le64_to_cpup((void *)es->s_uuid) ^ | 3797 | fsid = le64_to_cpup((void *)es->s_uuid) ^ |
3680 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 3798 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
@@ -3689,7 +3807,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3689 | * Process 1 Process 2 | 3807 | * Process 1 Process 2 |
3690 | * ext4_create() quota_sync() | 3808 | * ext4_create() quota_sync() |
3691 | * jbd2_journal_start() write_dquot() | 3809 | * jbd2_journal_start() write_dquot() |
3692 | * vfs_dq_init() down(dqio_mutex) | 3810 | * dquot_initialize() down(dqio_mutex) |
3693 | * down(dqio_mutex) jbd2_journal_start() | 3811 | * down(dqio_mutex) jbd2_journal_start() |
3694 | * | 3812 | * |
3695 | */ | 3813 | */ |
@@ -3898,9 +4016,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
3898 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 4016 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
3899 | int err = 0; | 4017 | int err = 0; |
3900 | int offset = off & (sb->s_blocksize - 1); | 4018 | int offset = off & (sb->s_blocksize - 1); |
3901 | int tocopy; | ||
3902 | int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; | 4019 | int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; |
3903 | size_t towrite = len; | ||
3904 | struct buffer_head *bh; | 4020 | struct buffer_head *bh; |
3905 | handle_t *handle = journal_current_handle(); | 4021 | handle_t *handle = journal_current_handle(); |
3906 | 4022 | ||
@@ -3910,52 +4026,53 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
3910 | (unsigned long long)off, (unsigned long long)len); | 4026 | (unsigned long long)off, (unsigned long long)len); |
3911 | return -EIO; | 4027 | return -EIO; |
3912 | } | 4028 | } |
4029 | /* | ||
4030 | * Since we account only one data block in transaction credits, | ||
4031 | * then it is impossible to cross a block boundary. | ||
4032 | */ | ||
4033 | if (sb->s_blocksize - offset < len) { | ||
4034 | ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" | ||
4035 | " cancelled because not block aligned", | ||
4036 | (unsigned long long)off, (unsigned long long)len); | ||
4037 | return -EIO; | ||
4038 | } | ||
4039 | |||
3913 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | 4040 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); |
3914 | while (towrite > 0) { | 4041 | bh = ext4_bread(handle, inode, blk, 1, &err); |
3915 | tocopy = sb->s_blocksize - offset < towrite ? | 4042 | if (!bh) |
3916 | sb->s_blocksize - offset : towrite; | 4043 | goto out; |
3917 | bh = ext4_bread(handle, inode, blk, 1, &err); | 4044 | if (journal_quota) { |
3918 | if (!bh) | 4045 | err = ext4_journal_get_write_access(handle, bh); |
4046 | if (err) { | ||
4047 | brelse(bh); | ||
3919 | goto out; | 4048 | goto out; |
3920 | if (journal_quota) { | ||
3921 | err = ext4_journal_get_write_access(handle, bh); | ||
3922 | if (err) { | ||
3923 | brelse(bh); | ||
3924 | goto out; | ||
3925 | } | ||
3926 | } | ||
3927 | lock_buffer(bh); | ||
3928 | memcpy(bh->b_data+offset, data, tocopy); | ||
3929 | flush_dcache_page(bh->b_page); | ||
3930 | unlock_buffer(bh); | ||
3931 | if (journal_quota) | ||
3932 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
3933 | else { | ||
3934 | /* Always do at least ordered writes for quotas */ | ||
3935 | err = ext4_jbd2_file_inode(handle, inode); | ||
3936 | mark_buffer_dirty(bh); | ||
3937 | } | 4049 | } |
3938 | brelse(bh); | ||
3939 | if (err) | ||
3940 | goto out; | ||
3941 | offset = 0; | ||
3942 | towrite -= tocopy; | ||
3943 | data += tocopy; | ||
3944 | blk++; | ||
3945 | } | 4050 | } |
4051 | lock_buffer(bh); | ||
4052 | memcpy(bh->b_data+offset, data, len); | ||
4053 | flush_dcache_page(bh->b_page); | ||
4054 | unlock_buffer(bh); | ||
4055 | if (journal_quota) | ||
4056 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
4057 | else { | ||
4058 | /* Always do at least ordered writes for quotas */ | ||
4059 | err = ext4_jbd2_file_inode(handle, inode); | ||
4060 | mark_buffer_dirty(bh); | ||
4061 | } | ||
4062 | brelse(bh); | ||
3946 | out: | 4063 | out: |
3947 | if (len == towrite) { | 4064 | if (err) { |
3948 | mutex_unlock(&inode->i_mutex); | 4065 | mutex_unlock(&inode->i_mutex); |
3949 | return err; | 4066 | return err; |
3950 | } | 4067 | } |
3951 | if (inode->i_size < off+len-towrite) { | 4068 | if (inode->i_size < off + len) { |
3952 | i_size_write(inode, off+len-towrite); | 4069 | i_size_write(inode, off + len); |
3953 | EXT4_I(inode)->i_disksize = inode->i_size; | 4070 | EXT4_I(inode)->i_disksize = inode->i_size; |
3954 | } | 4071 | } |
3955 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 4072 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
3956 | ext4_mark_inode_dirty(handle, inode); | 4073 | ext4_mark_inode_dirty(handle, inode); |
3957 | mutex_unlock(&inode->i_mutex); | 4074 | mutex_unlock(&inode->i_mutex); |
3958 | return len - towrite; | 4075 | return len; |
3959 | } | 4076 | } |
3960 | 4077 | ||
3961 | #endif | 4078 | #endif |
@@ -3966,6 +4083,52 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags, | |||
3966 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); | 4083 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); |
3967 | } | 4084 | } |
3968 | 4085 | ||
4086 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | ||
4087 | static struct file_system_type ext2_fs_type = { | ||
4088 | .owner = THIS_MODULE, | ||
4089 | .name = "ext2", | ||
4090 | .get_sb = ext4_get_sb, | ||
4091 | .kill_sb = kill_block_super, | ||
4092 | .fs_flags = FS_REQUIRES_DEV, | ||
4093 | }; | ||
4094 | |||
4095 | static inline void register_as_ext2(void) | ||
4096 | { | ||
4097 | int err = register_filesystem(&ext2_fs_type); | ||
4098 | if (err) | ||
4099 | printk(KERN_WARNING | ||
4100 | "EXT4-fs: Unable to register as ext2 (%d)\n", err); | ||
4101 | } | ||
4102 | |||
4103 | static inline void unregister_as_ext2(void) | ||
4104 | { | ||
4105 | unregister_filesystem(&ext2_fs_type); | ||
4106 | } | ||
4107 | MODULE_ALIAS("ext2"); | ||
4108 | #else | ||
4109 | static inline void register_as_ext2(void) { } | ||
4110 | static inline void unregister_as_ext2(void) { } | ||
4111 | #endif | ||
4112 | |||
4113 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | ||
4114 | static inline void register_as_ext3(void) | ||
4115 | { | ||
4116 | int err = register_filesystem(&ext3_fs_type); | ||
4117 | if (err) | ||
4118 | printk(KERN_WARNING | ||
4119 | "EXT4-fs: Unable to register as ext3 (%d)\n", err); | ||
4120 | } | ||
4121 | |||
4122 | static inline void unregister_as_ext3(void) | ||
4123 | { | ||
4124 | unregister_filesystem(&ext3_fs_type); | ||
4125 | } | ||
4126 | MODULE_ALIAS("ext3"); | ||
4127 | #else | ||
4128 | static inline void register_as_ext3(void) { } | ||
4129 | static inline void unregister_as_ext3(void) { } | ||
4130 | #endif | ||
4131 | |||
3969 | static struct file_system_type ext4_fs_type = { | 4132 | static struct file_system_type ext4_fs_type = { |
3970 | .owner = THIS_MODULE, | 4133 | .owner = THIS_MODULE, |
3971 | .name = "ext4", | 4134 | .name = "ext4", |
@@ -3995,11 +4158,15 @@ static int __init init_ext4_fs(void) | |||
3995 | err = init_inodecache(); | 4158 | err = init_inodecache(); |
3996 | if (err) | 4159 | if (err) |
3997 | goto out1; | 4160 | goto out1; |
4161 | register_as_ext2(); | ||
4162 | register_as_ext3(); | ||
3998 | err = register_filesystem(&ext4_fs_type); | 4163 | err = register_filesystem(&ext4_fs_type); |
3999 | if (err) | 4164 | if (err) |
4000 | goto out; | 4165 | goto out; |
4001 | return 0; | 4166 | return 0; |
4002 | out: | 4167 | out: |
4168 | unregister_as_ext2(); | ||
4169 | unregister_as_ext3(); | ||
4003 | destroy_inodecache(); | 4170 | destroy_inodecache(); |
4004 | out1: | 4171 | out1: |
4005 | exit_ext4_xattr(); | 4172 | exit_ext4_xattr(); |
@@ -4015,6 +4182,8 @@ out4: | |||
4015 | 4182 | ||
4016 | static void __exit exit_ext4_fs(void) | 4183 | static void __exit exit_ext4_fs(void) |
4017 | { | 4184 | { |
4185 | unregister_as_ext2(); | ||
4186 | unregister_as_ext3(); | ||
4018 | unregister_filesystem(&ext4_fs_type); | 4187 | unregister_filesystem(&ext4_fs_type); |
4019 | destroy_inodecache(); | 4188 | destroy_inodecache(); |
4020 | exit_ext4_xattr(); | 4189 | exit_ext4_xattr(); |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fed5b01d7a8d..b4c5aa8489d8 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *, | |||
92 | struct mb_cache_entry **); | 92 | struct mb_cache_entry **); |
93 | static void ext4_xattr_rehash(struct ext4_xattr_header *, | 93 | static void ext4_xattr_rehash(struct ext4_xattr_header *, |
94 | struct ext4_xattr_entry *); | 94 | struct ext4_xattr_entry *); |
95 | static int ext4_xattr_list(struct inode *inode, char *buffer, | 95 | static int ext4_xattr_list(struct dentry *dentry, char *buffer, |
96 | size_t buffer_size); | 96 | size_t buffer_size); |
97 | 97 | ||
98 | static struct mb_cache *ext4_xattr_cache; | 98 | static struct mb_cache *ext4_xattr_cache; |
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index) | |||
140 | ssize_t | 140 | ssize_t |
141 | ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) | 141 | ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) |
142 | { | 142 | { |
143 | return ext4_xattr_list(dentry->d_inode, buffer, size); | 143 | return ext4_xattr_list(dentry, buffer, size); |
144 | } | 144 | } |
145 | 145 | ||
146 | static int | 146 | static int |
@@ -227,7 +227,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, | |||
227 | ea_bdebug(bh, "b_count=%d, refcount=%d", | 227 | ea_bdebug(bh, "b_count=%d, refcount=%d", |
228 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); | 228 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); |
229 | if (ext4_xattr_check_block(bh)) { | 229 | if (ext4_xattr_check_block(bh)) { |
230 | bad_block: ext4_error(inode->i_sb, __func__, | 230 | bad_block: |
231 | ext4_error(inode->i_sb, | ||
231 | "inode %lu: bad block %llu", inode->i_ino, | 232 | "inode %lu: bad block %llu", inode->i_ino, |
232 | EXT4_I(inode)->i_file_acl); | 233 | EXT4_I(inode)->i_file_acl); |
233 | error = -EIO; | 234 | error = -EIO; |
@@ -267,7 +268,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, | |||
267 | void *end; | 268 | void *end; |
268 | int error; | 269 | int error; |
269 | 270 | ||
270 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) | 271 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) |
271 | return -ENODATA; | 272 | return -ENODATA; |
272 | error = ext4_get_inode_loc(inode, &iloc); | 273 | error = ext4_get_inode_loc(inode, &iloc); |
273 | if (error) | 274 | if (error) |
@@ -325,7 +326,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name, | |||
325 | } | 326 | } |
326 | 327 | ||
327 | static int | 328 | static int |
328 | ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, | 329 | ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry, |
329 | char *buffer, size_t buffer_size) | 330 | char *buffer, size_t buffer_size) |
330 | { | 331 | { |
331 | size_t rest = buffer_size; | 332 | size_t rest = buffer_size; |
@@ -335,9 +336,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, | |||
335 | ext4_xattr_handler(entry->e_name_index); | 336 | ext4_xattr_handler(entry->e_name_index); |
336 | 337 | ||
337 | if (handler) { | 338 | if (handler) { |
338 | size_t size = handler->list(inode, buffer, rest, | 339 | size_t size = handler->list(dentry, buffer, rest, |
339 | entry->e_name, | 340 | entry->e_name, |
340 | entry->e_name_len); | 341 | entry->e_name_len, |
342 | handler->flags); | ||
341 | if (buffer) { | 343 | if (buffer) { |
342 | if (size > rest) | 344 | if (size > rest) |
343 | return -ERANGE; | 345 | return -ERANGE; |
@@ -350,8 +352,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, | |||
350 | } | 352 | } |
351 | 353 | ||
352 | static int | 354 | static int |
353 | ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) | 355 | ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) |
354 | { | 356 | { |
357 | struct inode *inode = dentry->d_inode; | ||
355 | struct buffer_head *bh = NULL; | 358 | struct buffer_head *bh = NULL; |
356 | int error; | 359 | int error; |
357 | 360 | ||
@@ -369,14 +372,14 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) | |||
369 | ea_bdebug(bh, "b_count=%d, refcount=%d", | 372 | ea_bdebug(bh, "b_count=%d, refcount=%d", |
370 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); | 373 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); |
371 | if (ext4_xattr_check_block(bh)) { | 374 | if (ext4_xattr_check_block(bh)) { |
372 | ext4_error(inode->i_sb, __func__, | 375 | ext4_error(inode->i_sb, |
373 | "inode %lu: bad block %llu", inode->i_ino, | 376 | "inode %lu: bad block %llu", inode->i_ino, |
374 | EXT4_I(inode)->i_file_acl); | 377 | EXT4_I(inode)->i_file_acl); |
375 | error = -EIO; | 378 | error = -EIO; |
376 | goto cleanup; | 379 | goto cleanup; |
377 | } | 380 | } |
378 | ext4_xattr_cache_insert(bh); | 381 | ext4_xattr_cache_insert(bh); |
379 | error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); | 382 | error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); |
380 | 383 | ||
381 | cleanup: | 384 | cleanup: |
382 | brelse(bh); | 385 | brelse(bh); |
@@ -385,15 +388,16 @@ cleanup: | |||
385 | } | 388 | } |
386 | 389 | ||
387 | static int | 390 | static int |
388 | ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) | 391 | ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) |
389 | { | 392 | { |
393 | struct inode *inode = dentry->d_inode; | ||
390 | struct ext4_xattr_ibody_header *header; | 394 | struct ext4_xattr_ibody_header *header; |
391 | struct ext4_inode *raw_inode; | 395 | struct ext4_inode *raw_inode; |
392 | struct ext4_iloc iloc; | 396 | struct ext4_iloc iloc; |
393 | void *end; | 397 | void *end; |
394 | int error; | 398 | int error; |
395 | 399 | ||
396 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) | 400 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) |
397 | return 0; | 401 | return 0; |
398 | error = ext4_get_inode_loc(inode, &iloc); | 402 | error = ext4_get_inode_loc(inode, &iloc); |
399 | if (error) | 403 | if (error) |
@@ -404,7 +408,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) | |||
404 | error = ext4_xattr_check_names(IFIRST(header), end); | 408 | error = ext4_xattr_check_names(IFIRST(header), end); |
405 | if (error) | 409 | if (error) |
406 | goto cleanup; | 410 | goto cleanup; |
407 | error = ext4_xattr_list_entries(inode, IFIRST(header), | 411 | error = ext4_xattr_list_entries(dentry, IFIRST(header), |
408 | buffer, buffer_size); | 412 | buffer, buffer_size); |
409 | 413 | ||
410 | cleanup: | 414 | cleanup: |
@@ -423,12 +427,12 @@ cleanup: | |||
423 | * used / required on success. | 427 | * used / required on success. |
424 | */ | 428 | */ |
425 | static int | 429 | static int |
426 | ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) | 430 | ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) |
427 | { | 431 | { |
428 | int i_error, b_error; | 432 | int i_error, b_error; |
429 | 433 | ||
430 | down_read(&EXT4_I(inode)->xattr_sem); | 434 | down_read(&EXT4_I(dentry->d_inode)->xattr_sem); |
431 | i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); | 435 | i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size); |
432 | if (i_error < 0) { | 436 | if (i_error < 0) { |
433 | b_error = 0; | 437 | b_error = 0; |
434 | } else { | 438 | } else { |
@@ -436,11 +440,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) | |||
436 | buffer += i_error; | 440 | buffer += i_error; |
437 | buffer_size -= i_error; | 441 | buffer_size -= i_error; |
438 | } | 442 | } |
439 | b_error = ext4_xattr_block_list(inode, buffer, buffer_size); | 443 | b_error = ext4_xattr_block_list(dentry, buffer, buffer_size); |
440 | if (b_error < 0) | 444 | if (b_error < 0) |
441 | i_error = 0; | 445 | i_error = 0; |
442 | } | 446 | } |
443 | up_read(&EXT4_I(inode)->xattr_sem); | 447 | up_read(&EXT4_I(dentry->d_inode)->xattr_sem); |
444 | return i_error + b_error; | 448 | return i_error + b_error; |
445 | } | 449 | } |
446 | 450 | ||
@@ -482,15 +486,16 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | |||
482 | ea_bdebug(bh, "refcount now=0; freeing"); | 486 | ea_bdebug(bh, "refcount now=0; freeing"); |
483 | if (ce) | 487 | if (ce) |
484 | mb_cache_entry_free(ce); | 488 | mb_cache_entry_free(ce); |
485 | ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1); | ||
486 | get_bh(bh); | 489 | get_bh(bh); |
487 | ext4_forget(handle, 1, inode, bh, bh->b_blocknr); | 490 | ext4_free_blocks(handle, inode, bh, 0, 1, |
491 | EXT4_FREE_BLOCKS_METADATA | | ||
492 | EXT4_FREE_BLOCKS_FORGET); | ||
488 | } else { | 493 | } else { |
489 | le32_add_cpu(&BHDR(bh)->h_refcount, -1); | 494 | le32_add_cpu(&BHDR(bh)->h_refcount, -1); |
490 | error = ext4_handle_dirty_metadata(handle, inode, bh); | 495 | error = ext4_handle_dirty_metadata(handle, inode, bh); |
491 | if (IS_SYNC(inode)) | 496 | if (IS_SYNC(inode)) |
492 | ext4_handle_sync(handle); | 497 | ext4_handle_sync(handle); |
493 | vfs_dq_free_block(inode, 1); | 498 | dquot_free_block(inode, 1); |
494 | ea_bdebug(bh, "refcount now=%d; releasing", | 499 | ea_bdebug(bh, "refcount now=%d; releasing", |
495 | le32_to_cpu(BHDR(bh)->h_refcount)); | 500 | le32_to_cpu(BHDR(bh)->h_refcount)); |
496 | if (ce) | 501 | if (ce) |
@@ -661,9 +666,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, | |||
661 | atomic_read(&(bs->bh->b_count)), | 666 | atomic_read(&(bs->bh->b_count)), |
662 | le32_to_cpu(BHDR(bs->bh)->h_refcount)); | 667 | le32_to_cpu(BHDR(bs->bh)->h_refcount)); |
663 | if (ext4_xattr_check_block(bs->bh)) { | 668 | if (ext4_xattr_check_block(bs->bh)) { |
664 | ext4_error(sb, __func__, | 669 | ext4_error(sb, "inode %lu: bad block %llu", |
665 | "inode %lu: bad block %llu", inode->i_ino, | 670 | inode->i_ino, EXT4_I(inode)->i_file_acl); |
666 | EXT4_I(inode)->i_file_acl); | ||
667 | error = -EIO; | 671 | error = -EIO; |
668 | goto cleanup; | 672 | goto cleanup; |
669 | } | 673 | } |
@@ -783,8 +787,8 @@ inserted: | |||
783 | else { | 787 | else { |
784 | /* The old block is released after updating | 788 | /* The old block is released after updating |
785 | the inode. */ | 789 | the inode. */ |
786 | error = -EDQUOT; | 790 | error = dquot_alloc_block(inode, 1); |
787 | if (vfs_dq_alloc_block(inode, 1)) | 791 | if (error) |
788 | goto cleanup; | 792 | goto cleanup; |
789 | error = ext4_journal_get_write_access(handle, | 793 | error = ext4_journal_get_write_access(handle, |
790 | new_bh); | 794 | new_bh); |
@@ -832,7 +836,8 @@ inserted: | |||
832 | new_bh = sb_getblk(sb, block); | 836 | new_bh = sb_getblk(sb, block); |
833 | if (!new_bh) { | 837 | if (!new_bh) { |
834 | getblk_failed: | 838 | getblk_failed: |
835 | ext4_free_blocks(handle, inode, block, 1, 1); | 839 | ext4_free_blocks(handle, inode, 0, block, 1, |
840 | EXT4_FREE_BLOCKS_METADATA); | ||
836 | error = -EIO; | 841 | error = -EIO; |
837 | goto cleanup; | 842 | goto cleanup; |
838 | } | 843 | } |
@@ -871,13 +876,12 @@ cleanup: | |||
871 | return error; | 876 | return error; |
872 | 877 | ||
873 | cleanup_dquot: | 878 | cleanup_dquot: |
874 | vfs_dq_free_block(inode, 1); | 879 | dquot_free_block(inode, 1); |
875 | goto cleanup; | 880 | goto cleanup; |
876 | 881 | ||
877 | bad_block: | 882 | bad_block: |
878 | ext4_error(inode->i_sb, __func__, | 883 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", |
879 | "inode %lu: bad block %llu", inode->i_ino, | 884 | inode->i_ino, EXT4_I(inode)->i_file_acl); |
880 | EXT4_I(inode)->i_file_acl); | ||
881 | goto cleanup; | 885 | goto cleanup; |
882 | 886 | ||
883 | #undef header | 887 | #undef header |
@@ -903,7 +907,7 @@ ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, | |||
903 | is->s.base = is->s.first = IFIRST(header); | 907 | is->s.base = is->s.first = IFIRST(header); |
904 | is->s.here = is->s.first; | 908 | is->s.here = is->s.first; |
905 | is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; | 909 | is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; |
906 | if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { | 910 | if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { |
907 | error = ext4_xattr_check_names(IFIRST(header), is->s.end); | 911 | error = ext4_xattr_check_names(IFIRST(header), is->s.end); |
908 | if (error) | 912 | if (error) |
909 | return error; | 913 | return error; |
@@ -935,10 +939,10 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, | |||
935 | header = IHDR(inode, ext4_raw_inode(&is->iloc)); | 939 | header = IHDR(inode, ext4_raw_inode(&is->iloc)); |
936 | if (!IS_LAST_ENTRY(s->first)) { | 940 | if (!IS_LAST_ENTRY(s->first)) { |
937 | header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); | 941 | header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); |
938 | EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; | 942 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
939 | } else { | 943 | } else { |
940 | header->h_magic = cpu_to_le32(0); | 944 | header->h_magic = cpu_to_le32(0); |
941 | EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; | 945 | ext4_clear_inode_state(inode, EXT4_STATE_XATTR); |
942 | } | 946 | } |
943 | return 0; | 947 | return 0; |
944 | } | 948 | } |
@@ -981,17 +985,21 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
981 | if (strlen(name) > 255) | 985 | if (strlen(name) > 255) |
982 | return -ERANGE; | 986 | return -ERANGE; |
983 | down_write(&EXT4_I(inode)->xattr_sem); | 987 | down_write(&EXT4_I(inode)->xattr_sem); |
984 | no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; | 988 | no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); |
985 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | 989 | ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); |
986 | 990 | ||
987 | error = ext4_get_inode_loc(inode, &is.iloc); | 991 | error = ext4_get_inode_loc(inode, &is.iloc); |
988 | if (error) | 992 | if (error) |
989 | goto cleanup; | 993 | goto cleanup; |
990 | 994 | ||
991 | if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { | 995 | error = ext4_journal_get_write_access(handle, is.iloc.bh); |
996 | if (error) | ||
997 | goto cleanup; | ||
998 | |||
999 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) { | ||
992 | struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); | 1000 | struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); |
993 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); | 1001 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); |
994 | EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; | 1002 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); |
995 | } | 1003 | } |
996 | 1004 | ||
997 | error = ext4_xattr_ibody_find(inode, &i, &is); | 1005 | error = ext4_xattr_ibody_find(inode, &i, &is); |
@@ -1013,9 +1021,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
1013 | if (flags & XATTR_CREATE) | 1021 | if (flags & XATTR_CREATE) |
1014 | goto cleanup; | 1022 | goto cleanup; |
1015 | } | 1023 | } |
1016 | error = ext4_journal_get_write_access(handle, is.iloc.bh); | ||
1017 | if (error) | ||
1018 | goto cleanup; | ||
1019 | if (!value) { | 1024 | if (!value) { |
1020 | if (!is.s.not_found) | 1025 | if (!is.s.not_found) |
1021 | error = ext4_xattr_ibody_set(handle, inode, &i, &is); | 1026 | error = ext4_xattr_ibody_set(handle, inode, &i, &is); |
@@ -1046,7 +1051,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
1046 | ext4_xattr_update_super_block(handle, inode->i_sb); | 1051 | ext4_xattr_update_super_block(handle, inode->i_sb); |
1047 | inode->i_ctime = ext4_current_time(inode); | 1052 | inode->i_ctime = ext4_current_time(inode); |
1048 | if (!value) | 1053 | if (!value) |
1049 | EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; | 1054 | ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); |
1050 | error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); | 1055 | error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); |
1051 | /* | 1056 | /* |
1052 | * The bh is consumed by ext4_mark_iloc_dirty, even with | 1057 | * The bh is consumed by ext4_mark_iloc_dirty, even with |
@@ -1061,7 +1066,7 @@ cleanup: | |||
1061 | brelse(is.iloc.bh); | 1066 | brelse(is.iloc.bh); |
1062 | brelse(bs.bh); | 1067 | brelse(bs.bh); |
1063 | if (no_expand == 0) | 1068 | if (no_expand == 0) |
1064 | EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; | 1069 | ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); |
1065 | up_write(&EXT4_I(inode)->xattr_sem); | 1070 | up_write(&EXT4_I(inode)->xattr_sem); |
1066 | return error; | 1071 | return error; |
1067 | } | 1072 | } |
@@ -1189,9 +1194,8 @@ retry: | |||
1189 | if (!bh) | 1194 | if (!bh) |
1190 | goto cleanup; | 1195 | goto cleanup; |
1191 | if (ext4_xattr_check_block(bh)) { | 1196 | if (ext4_xattr_check_block(bh)) { |
1192 | ext4_error(inode->i_sb, __func__, | 1197 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", |
1193 | "inode %lu: bad block %llu", inode->i_ino, | 1198 | inode->i_ino, EXT4_I(inode)->i_file_acl); |
1194 | EXT4_I(inode)->i_file_acl); | ||
1195 | error = -EIO; | 1199 | error = -EIO; |
1196 | goto cleanup; | 1200 | goto cleanup; |
1197 | } | 1201 | } |
@@ -1296,6 +1300,8 @@ retry: | |||
1296 | 1300 | ||
1297 | /* Remove the chosen entry from the inode */ | 1301 | /* Remove the chosen entry from the inode */ |
1298 | error = ext4_xattr_ibody_set(handle, inode, &i, is); | 1302 | error = ext4_xattr_ibody_set(handle, inode, &i, is); |
1303 | if (error) | ||
1304 | goto cleanup; | ||
1299 | 1305 | ||
1300 | entry = IFIRST(header); | 1306 | entry = IFIRST(header); |
1301 | if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) | 1307 | if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) |
@@ -1326,6 +1332,8 @@ retry: | |||
1326 | goto cleanup; | 1332 | goto cleanup; |
1327 | kfree(b_entry_name); | 1333 | kfree(b_entry_name); |
1328 | kfree(buffer); | 1334 | kfree(buffer); |
1335 | b_entry_name = NULL; | ||
1336 | buffer = NULL; | ||
1329 | brelse(is->iloc.bh); | 1337 | brelse(is->iloc.bh); |
1330 | kfree(is); | 1338 | kfree(is); |
1331 | kfree(bs); | 1339 | kfree(bs); |
@@ -1364,16 +1372,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) | |||
1364 | goto cleanup; | 1372 | goto cleanup; |
1365 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); | 1373 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); |
1366 | if (!bh) { | 1374 | if (!bh) { |
1367 | ext4_error(inode->i_sb, __func__, | 1375 | ext4_error(inode->i_sb, "inode %lu: block %llu read error", |
1368 | "inode %lu: block %llu read error", inode->i_ino, | 1376 | inode->i_ino, EXT4_I(inode)->i_file_acl); |
1369 | EXT4_I(inode)->i_file_acl); | ||
1370 | goto cleanup; | 1377 | goto cleanup; |
1371 | } | 1378 | } |
1372 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || | 1379 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || |
1373 | BHDR(bh)->h_blocks != cpu_to_le32(1)) { | 1380 | BHDR(bh)->h_blocks != cpu_to_le32(1)) { |
1374 | ext4_error(inode->i_sb, __func__, | 1381 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", |
1375 | "inode %lu: bad block %llu", inode->i_ino, | 1382 | inode->i_ino, EXT4_I(inode)->i_file_acl); |
1376 | EXT4_I(inode)->i_file_acl); | ||
1377 | goto cleanup; | 1383 | goto cleanup; |
1378 | } | 1384 | } |
1379 | ext4_xattr_release_block(handle, inode, bh); | 1385 | ext4_xattr_release_block(handle, inode, bh); |
@@ -1498,7 +1504,7 @@ again: | |||
1498 | } | 1504 | } |
1499 | bh = sb_bread(inode->i_sb, ce->e_block); | 1505 | bh = sb_bread(inode->i_sb, ce->e_block); |
1500 | if (!bh) { | 1506 | if (!bh) { |
1501 | ext4_error(inode->i_sb, __func__, | 1507 | ext4_error(inode->i_sb, |
1502 | "inode %lu: block %lu read error", | 1508 | "inode %lu: block %lu read error", |
1503 | inode->i_ino, (unsigned long) ce->e_block); | 1509 | inode->i_ino, (unsigned long) ce->e_block); |
1504 | } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= | 1510 | } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= |
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index ca5f89fc6cae..8b145e98df07 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c | |||
@@ -7,13 +7,14 @@ | |||
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/security.h> | 9 | #include <linux/security.h> |
10 | #include <linux/slab.h> | ||
10 | #include "ext4_jbd2.h" | 11 | #include "ext4_jbd2.h" |
11 | #include "ext4.h" | 12 | #include "ext4.h" |
12 | #include "xattr.h" | 13 | #include "xattr.h" |
13 | 14 | ||
14 | static size_t | 15 | static size_t |
15 | ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, | 16 | ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, |
16 | const char *name, size_t name_len) | 17 | const char *name, size_t name_len, int type) |
17 | { | 18 | { |
18 | const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; | 19 | const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; |
19 | const size_t total_len = prefix_len + name_len + 1; | 20 | const size_t total_len = prefix_len + name_len + 1; |
@@ -28,23 +29,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, | |||
28 | } | 29 | } |
29 | 30 | ||
30 | static int | 31 | static int |
31 | ext4_xattr_security_get(struct inode *inode, const char *name, | 32 | ext4_xattr_security_get(struct dentry *dentry, const char *name, |
32 | void *buffer, size_t size) | 33 | void *buffer, size_t size, int type) |
33 | { | 34 | { |
34 | if (strcmp(name, "") == 0) | 35 | if (strcmp(name, "") == 0) |
35 | return -EINVAL; | 36 | return -EINVAL; |
36 | return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, | 37 | return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY, |
37 | buffer, size); | 38 | name, buffer, size); |
38 | } | 39 | } |
39 | 40 | ||
40 | static int | 41 | static int |
41 | ext4_xattr_security_set(struct inode *inode, const char *name, | 42 | ext4_xattr_security_set(struct dentry *dentry, const char *name, |
42 | const void *value, size_t size, int flags) | 43 | const void *value, size_t size, int flags, int type) |
43 | { | 44 | { |
44 | if (strcmp(name, "") == 0) | 45 | if (strcmp(name, "") == 0) |
45 | return -EINVAL; | 46 | return -EINVAL; |
46 | return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, | 47 | return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY, |
47 | value, size, flags); | 48 | name, value, size, flags); |
48 | } | 49 | } |
49 | 50 | ||
50 | int | 51 | int |
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index ac1a52cf2a37..15b50edc6587 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c | |||
@@ -14,8 +14,8 @@ | |||
14 | #include "xattr.h" | 14 | #include "xattr.h" |
15 | 15 | ||
16 | static size_t | 16 | static size_t |
17 | ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, | 17 | ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, |
18 | const char *name, size_t name_len) | 18 | const char *name, size_t name_len, int type) |
19 | { | 19 | { |
20 | const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; | 20 | const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; |
21 | const size_t total_len = prefix_len + name_len + 1; | 21 | const size_t total_len = prefix_len + name_len + 1; |
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, | |||
32 | } | 32 | } |
33 | 33 | ||
34 | static int | 34 | static int |
35 | ext4_xattr_trusted_get(struct inode *inode, const char *name, | 35 | ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer, |
36 | void *buffer, size_t size) | 36 | size_t size, int type) |
37 | { | 37 | { |
38 | if (strcmp(name, "") == 0) | 38 | if (strcmp(name, "") == 0) |
39 | return -EINVAL; | 39 | return -EINVAL; |
40 | return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, | 40 | return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED, |
41 | buffer, size); | 41 | name, buffer, size); |
42 | } | 42 | } |
43 | 43 | ||
44 | static int | 44 | static int |
45 | ext4_xattr_trusted_set(struct inode *inode, const char *name, | 45 | ext4_xattr_trusted_set(struct dentry *dentry, const char *name, |
46 | const void *value, size_t size, int flags) | 46 | const void *value, size_t size, int flags, int type) |
47 | { | 47 | { |
48 | if (strcmp(name, "") == 0) | 48 | if (strcmp(name, "") == 0) |
49 | return -EINVAL; | 49 | return -EINVAL; |
50 | return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, | 50 | return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED, |
51 | value, size, flags); | 51 | name, value, size, flags); |
52 | } | 52 | } |
53 | 53 | ||
54 | struct xattr_handler ext4_xattr_trusted_handler = { | 54 | struct xattr_handler ext4_xattr_trusted_handler = { |
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index d91aa61b42aa..c4ce05746ce1 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c | |||
@@ -13,13 +13,13 @@ | |||
13 | #include "xattr.h" | 13 | #include "xattr.h" |
14 | 14 | ||
15 | static size_t | 15 | static size_t |
16 | ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, | 16 | ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, |
17 | const char *name, size_t name_len) | 17 | const char *name, size_t name_len, int type) |
18 | { | 18 | { |
19 | const size_t prefix_len = XATTR_USER_PREFIX_LEN; | 19 | const size_t prefix_len = XATTR_USER_PREFIX_LEN; |
20 | const size_t total_len = prefix_len + name_len + 1; | 20 | const size_t total_len = prefix_len + name_len + 1; |
21 | 21 | ||
22 | if (!test_opt(inode->i_sb, XATTR_USER)) | 22 | if (!test_opt(dentry->d_sb, XATTR_USER)) |
23 | return 0; | 23 | return 0; |
24 | 24 | ||
25 | if (list && total_len <= list_size) { | 25 | if (list && total_len <= list_size) { |
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, | |||
31 | } | 31 | } |
32 | 32 | ||
33 | static int | 33 | static int |
34 | ext4_xattr_user_get(struct inode *inode, const char *name, | 34 | ext4_xattr_user_get(struct dentry *dentry, const char *name, |
35 | void *buffer, size_t size) | 35 | void *buffer, size_t size, int type) |
36 | { | 36 | { |
37 | if (strcmp(name, "") == 0) | 37 | if (strcmp(name, "") == 0) |
38 | return -EINVAL; | 38 | return -EINVAL; |
39 | if (!test_opt(inode->i_sb, XATTR_USER)) | 39 | if (!test_opt(dentry->d_sb, XATTR_USER)) |
40 | return -EOPNOTSUPP; | 40 | return -EOPNOTSUPP; |
41 | return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); | 41 | return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER, |
42 | name, buffer, size); | ||
42 | } | 43 | } |
43 | 44 | ||
44 | static int | 45 | static int |
45 | ext4_xattr_user_set(struct inode *inode, const char *name, | 46 | ext4_xattr_user_set(struct dentry *dentry, const char *name, |
46 | const void *value, size_t size, int flags) | 47 | const void *value, size_t size, int flags, int type) |
47 | { | 48 | { |
48 | if (strcmp(name, "") == 0) | 49 | if (strcmp(name, "") == 0) |
49 | return -EINVAL; | 50 | return -EINVAL; |
50 | if (!test_opt(inode->i_sb, XATTR_USER)) | 51 | if (!test_opt(dentry->d_sb, XATTR_USER)) |
51 | return -EOPNOTSUPP; | 52 | return -EOPNOTSUPP; |
52 | return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, | 53 | return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER, |
53 | value, size, flags); | 54 | name, value, size, flags); |
54 | } | 55 | } |
55 | 56 | ||
56 | struct xattr_handler ext4_xattr_user_handler = { | 57 | struct xattr_handler ext4_xattr_user_handler = { |