diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/ext4 | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Makefile | 5 | ||||
-rw-r--r-- | fs/ext4/acl.c | 13 | ||||
-rw-r--r-- | fs/ext4/acl.h | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 157 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 7 | ||||
-rw-r--r-- | fs/ext4/dir.c | 58 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 347 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 82 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 14 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 18 | ||||
-rw-r--r-- | fs/ext4/extents.c | 2024 | ||||
-rw-r--r-- | fs/ext4/file.c | 129 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 142 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 147 | ||||
-rw-r--r-- | fs/ext4/inode.c | 1198 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 39 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 893 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 8 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 18 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 351 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 35 | ||||
-rw-r--r-- | fs/ext4/namei.c | 236 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 417 | ||||
-rw-r--r-- | fs/ext4/resize.c | 125 | ||||
-rw-r--r-- | fs/ext4/super.c | 1231 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 40 | ||||
-rw-r--r-- | fs/ext4/xattr.h | 14 | ||||
-rw-r--r-- | fs/ext4/xattr_security.c | 5 |
28 files changed, 5144 insertions, 2611 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 8867b2a1e5fe..04109460ba9e 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -4,9 +4,10 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_EXT4_FS) += ext4.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ |
10 | mmp.o | ||
10 | 11 | ||
11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 12 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o | 13 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 5e2ed4504ead..21eacd7b7d79 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -238,10 +238,17 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, | |||
238 | } | 238 | } |
239 | 239 | ||
240 | int | 240 | int |
241 | ext4_check_acl(struct inode *inode, int mask) | 241 | ext4_check_acl(struct inode *inode, int mask, unsigned int flags) |
242 | { | 242 | { |
243 | struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); | 243 | struct posix_acl *acl; |
244 | |||
245 | if (flags & IPERM_FLAG_RCU) { | ||
246 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
247 | return -ECHILD; | ||
248 | return -EAGAIN; | ||
249 | } | ||
244 | 250 | ||
251 | acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); | ||
245 | if (IS_ERR(acl)) | 252 | if (IS_ERR(acl)) |
246 | return PTR_ERR(acl); | 253 | return PTR_ERR(acl); |
247 | if (acl) { | 254 | if (acl) { |
@@ -426,7 +433,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, | |||
426 | return -EINVAL; | 433 | return -EINVAL; |
427 | if (!test_opt(inode->i_sb, POSIX_ACL)) | 434 | if (!test_opt(inode->i_sb, POSIX_ACL)) |
428 | return -EOPNOTSUPP; | 435 | return -EOPNOTSUPP; |
429 | if (!is_owner_or_cap(inode)) | 436 | if (!inode_owner_or_capable(inode)) |
430 | return -EPERM; | 437 | return -EPERM; |
431 | 438 | ||
432 | if (value) { | 439 | if (value) { |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 9d843d5deac4..dec821168fd4 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size) | |||
54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
55 | 55 | ||
56 | /* acl.c */ | 56 | /* acl.c */ |
57 | extern int ext4_check_acl(struct inode *, int); | 57 | extern int ext4_check_acl(struct inode *, int, unsigned int); |
58 | extern int ext4_acl_chmod(struct inode *); | 58 | extern int ext4_acl_chmod(struct inode *); |
59 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); | 59 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
60 | 60 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index bd30799a43ed..264f6949511e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include "ext4_jbd2.h" | 21 | #include "ext4_jbd2.h" |
22 | #include "mballoc.h" | 22 | #include "mballoc.h" |
23 | 23 | ||
24 | #include <trace/events/ext4.h> | ||
25 | |||
24 | /* | 26 | /* |
25 | * balloc.c contains the blocks allocation and deallocation routines | 27 | * balloc.c contains the blocks allocation and deallocation routines |
26 | */ | 28 | */ |
@@ -171,7 +173,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
171 | * less than the blocksize * 8 ( which is the size | 173 | * less than the blocksize * 8 ( which is the size |
172 | * of bitmap ), set rest of the block bitmap to 1 | 174 | * of bitmap ), set rest of the block bitmap to 1 |
173 | */ | 175 | */ |
174 | mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); | 176 | ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8, |
177 | bh->b_data); | ||
175 | } | 178 | } |
176 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); | 179 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); |
177 | } | 180 | } |
@@ -341,6 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
341 | * We do it here so the bitmap uptodate bit | 344 | * We do it here so the bitmap uptodate bit |
342 | * get set with buffer lock held. | 345 | * get set with buffer lock held. |
343 | */ | 346 | */ |
347 | trace_ext4_read_block_bitmap_load(sb, block_group); | ||
344 | set_bitmap_uptodate(bh); | 348 | set_bitmap_uptodate(bh); |
345 | if (bh_submit_read(bh) < 0) { | 349 | if (bh_submit_read(bh) < 0) { |
346 | put_bh(bh); | 350 | put_bh(bh); |
@@ -358,130 +362,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
358 | } | 362 | } |
359 | 363 | ||
360 | /** | 364 | /** |
361 | * ext4_add_groupblocks() -- Add given blocks to an existing group | ||
362 | * @handle: handle to this transaction | ||
363 | * @sb: super block | ||
364 | * @block: start physcial block to add to the block group | ||
365 | * @count: number of blocks to free | ||
366 | * | ||
367 | * This marks the blocks as free in the bitmap. We ask the | ||
368 | * mballoc to reload the buddy after this by setting group | ||
369 | * EXT4_GROUP_INFO_NEED_INIT_BIT flag | ||
370 | */ | ||
371 | void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | ||
372 | ext4_fsblk_t block, unsigned long count) | ||
373 | { | ||
374 | struct buffer_head *bitmap_bh = NULL; | ||
375 | struct buffer_head *gd_bh; | ||
376 | ext4_group_t block_group; | ||
377 | ext4_grpblk_t bit; | ||
378 | unsigned int i; | ||
379 | struct ext4_group_desc *desc; | ||
380 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
381 | int err = 0, ret, blk_free_count; | ||
382 | ext4_grpblk_t blocks_freed; | ||
383 | struct ext4_group_info *grp; | ||
384 | |||
385 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | ||
386 | |||
387 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | ||
388 | grp = ext4_get_group_info(sb, block_group); | ||
389 | /* | ||
390 | * Check to see if we are freeing blocks across a group | ||
391 | * boundary. | ||
392 | */ | ||
393 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { | ||
394 | goto error_return; | ||
395 | } | ||
396 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | ||
397 | if (!bitmap_bh) | ||
398 | goto error_return; | ||
399 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); | ||
400 | if (!desc) | ||
401 | goto error_return; | ||
402 | |||
403 | if (in_range(ext4_block_bitmap(sb, desc), block, count) || | ||
404 | in_range(ext4_inode_bitmap(sb, desc), block, count) || | ||
405 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | ||
406 | in_range(block + count - 1, ext4_inode_table(sb, desc), | ||
407 | sbi->s_itb_per_group)) { | ||
408 | ext4_error(sb, "Adding blocks in system zones - " | ||
409 | "Block = %llu, count = %lu", | ||
410 | block, count); | ||
411 | goto error_return; | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * We are about to add blocks to the bitmap, | ||
416 | * so we need undo access. | ||
417 | */ | ||
418 | BUFFER_TRACE(bitmap_bh, "getting undo access"); | ||
419 | err = ext4_journal_get_undo_access(handle, bitmap_bh); | ||
420 | if (err) | ||
421 | goto error_return; | ||
422 | |||
423 | /* | ||
424 | * We are about to modify some metadata. Call the journal APIs | ||
425 | * to unshare ->b_data if a currently-committing transaction is | ||
426 | * using it | ||
427 | */ | ||
428 | BUFFER_TRACE(gd_bh, "get_write_access"); | ||
429 | err = ext4_journal_get_write_access(handle, gd_bh); | ||
430 | if (err) | ||
431 | goto error_return; | ||
432 | /* | ||
433 | * make sure we don't allow a parallel init on other groups in the | ||
434 | * same buddy cache | ||
435 | */ | ||
436 | down_write(&grp->alloc_sem); | ||
437 | for (i = 0, blocks_freed = 0; i < count; i++) { | ||
438 | BUFFER_TRACE(bitmap_bh, "clear bit"); | ||
439 | if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), | ||
440 | bit + i, bitmap_bh->b_data)) { | ||
441 | ext4_error(sb, "bit already cleared for block %llu", | ||
442 | (ext4_fsblk_t)(block + i)); | ||
443 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); | ||
444 | } else { | ||
445 | blocks_freed++; | ||
446 | } | ||
447 | } | ||
448 | ext4_lock_group(sb, block_group); | ||
449 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | ||
450 | ext4_free_blks_set(sb, desc, blk_free_count); | ||
451 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | ||
452 | ext4_unlock_group(sb, block_group); | ||
453 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | ||
454 | |||
455 | if (sbi->s_log_groups_per_flex) { | ||
456 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
457 | atomic_add(blocks_freed, | ||
458 | &sbi->s_flex_groups[flex_group].free_blocks); | ||
459 | } | ||
460 | /* | ||
461 | * request to reload the buddy with the | ||
462 | * new bitmap information | ||
463 | */ | ||
464 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | ||
465 | grp->bb_free += blocks_freed; | ||
466 | up_write(&grp->alloc_sem); | ||
467 | |||
468 | /* We dirtied the bitmap block */ | ||
469 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
470 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
471 | |||
472 | /* And the group descriptor block */ | ||
473 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | ||
474 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | ||
475 | if (!err) | ||
476 | err = ret; | ||
477 | |||
478 | error_return: | ||
479 | brelse(bitmap_bh); | ||
480 | ext4_std_error(sb, err); | ||
481 | return; | ||
482 | } | ||
483 | |||
484 | /** | ||
485 | * ext4_has_free_blocks() | 365 | * ext4_has_free_blocks() |
486 | * @sbi: in-core super block structure. | 366 | * @sbi: in-core super block structure. |
487 | * @nblocks: number of needed blocks | 367 | * @nblocks: number of needed blocks |
@@ -489,7 +369,8 @@ error_return: | |||
489 | * Check if filesystem has nblocks free & available for allocation. | 369 | * Check if filesystem has nblocks free & available for allocation. |
490 | * On success return 1, return 0 on failure. | 370 | * On success return 1, return 0 on failure. |
491 | */ | 371 | */ |
492 | int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | 372 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi, |
373 | s64 nblocks, unsigned int flags) | ||
493 | { | 374 | { |
494 | s64 free_blocks, dirty_blocks, root_blocks; | 375 | s64 free_blocks, dirty_blocks, root_blocks; |
495 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | 376 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; |
@@ -503,11 +384,6 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | |||
503 | EXT4_FREEBLOCKS_WATERMARK) { | 384 | EXT4_FREEBLOCKS_WATERMARK) { |
504 | free_blocks = percpu_counter_sum_positive(fbc); | 385 | free_blocks = percpu_counter_sum_positive(fbc); |
505 | dirty_blocks = percpu_counter_sum_positive(dbc); | 386 | dirty_blocks = percpu_counter_sum_positive(dbc); |
506 | if (dirty_blocks < 0) { | ||
507 | printk(KERN_CRIT "Dirty block accounting " | ||
508 | "went wrong %lld\n", | ||
509 | (long long)dirty_blocks); | ||
510 | } | ||
511 | } | 387 | } |
512 | /* Check whether we have space after | 388 | /* Check whether we have space after |
513 | * accounting for current dirty blocks & root reserved blocks. | 389 | * accounting for current dirty blocks & root reserved blocks. |
@@ -518,7 +394,9 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | |||
518 | /* Hm, nope. Are (enough) root reserved blocks available? */ | 394 | /* Hm, nope. Are (enough) root reserved blocks available? */ |
519 | if (sbi->s_resuid == current_fsuid() || | 395 | if (sbi->s_resuid == current_fsuid() || |
520 | ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || | 396 | ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || |
521 | capable(CAP_SYS_RESOURCE)) { | 397 | capable(CAP_SYS_RESOURCE) || |
398 | (flags & EXT4_MB_USE_ROOT_BLOCKS)) { | ||
399 | |||
522 | if (free_blocks >= (nblocks + dirty_blocks)) | 400 | if (free_blocks >= (nblocks + dirty_blocks)) |
523 | return 1; | 401 | return 1; |
524 | } | 402 | } |
@@ -527,9 +405,9 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | |||
527 | } | 405 | } |
528 | 406 | ||
529 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | 407 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, |
530 | s64 nblocks) | 408 | s64 nblocks, unsigned int flags) |
531 | { | 409 | { |
532 | if (ext4_has_free_blocks(sbi, nblocks)) { | 410 | if (ext4_has_free_blocks(sbi, nblocks, flags)) { |
533 | percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); | 411 | percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); |
534 | return 0; | 412 | return 0; |
535 | } else | 413 | } else |
@@ -543,14 +421,14 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | |||
543 | * | 421 | * |
544 | * ext4_should_retry_alloc() is called when ENOSPC is returned, and if | 422 | * ext4_should_retry_alloc() is called when ENOSPC is returned, and if |
545 | * it is profitable to retry the operation, this function will wait | 423 | * it is profitable to retry the operation, this function will wait |
546 | * for the current or commiting transaction to complete, and then | 424 | * for the current or committing transaction to complete, and then |
547 | * return TRUE. | 425 | * return TRUE. |
548 | * | 426 | * |
549 | * if the total number of retries exceed three times, return FALSE. | 427 | * if the total number of retries exceed three times, return FALSE. |
550 | */ | 428 | */ |
551 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) | 429 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) |
552 | { | 430 | { |
553 | if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || | 431 | if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || |
554 | (*retries)++ > 3 || | 432 | (*retries)++ > 3 || |
555 | !EXT4_SB(sb)->s_journal) | 433 | !EXT4_SB(sb)->s_journal) |
556 | return 0; | 434 | return 0; |
@@ -573,7 +451,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
573 | * error stores in errp pointer | 451 | * error stores in errp pointer |
574 | */ | 452 | */ |
575 | ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | 453 | ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
576 | ext4_fsblk_t goal, unsigned long *count, int *errp) | 454 | ext4_fsblk_t goal, unsigned int flags, |
455 | unsigned long *count, int *errp) | ||
577 | { | 456 | { |
578 | struct ext4_allocation_request ar; | 457 | struct ext4_allocation_request ar; |
579 | ext4_fsblk_t ret; | 458 | ext4_fsblk_t ret; |
@@ -583,6 +462,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
583 | ar.inode = inode; | 462 | ar.inode = inode; |
584 | ar.goal = goal; | 463 | ar.goal = goal; |
585 | ar.len = count ? *count : 1; | 464 | ar.len = count ? *count : 1; |
465 | ar.flags = flags; | ||
586 | 466 | ||
587 | ret = ext4_mb_new_blocks(handle, &ar, errp); | 467 | ret = ext4_mb_new_blocks(handle, &ar, errp); |
588 | if (count) | 468 | if (count) |
@@ -591,7 +471,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
591 | * Account for the allocated meta blocks. We will never | 471 | * Account for the allocated meta blocks. We will never |
592 | * fail EDQUOT for metdata, but we do account for it. | 472 | * fail EDQUOT for metdata, but we do account for it. |
593 | */ | 473 | */ |
594 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { | 474 | if (!(*errp) && |
475 | ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { | ||
595 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 476 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
596 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; | 477 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; |
597 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 478 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 3db5084db9bd..fac90f3fba80 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -29,16 +29,15 @@ struct ext4_system_zone { | |||
29 | 29 | ||
30 | static struct kmem_cache *ext4_system_zone_cachep; | 30 | static struct kmem_cache *ext4_system_zone_cachep; |
31 | 31 | ||
32 | int __init init_ext4_system_zone(void) | 32 | int __init ext4_init_system_zone(void) |
33 | { | 33 | { |
34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, | 34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0); |
35 | SLAB_RECLAIM_ACCOUNT); | ||
36 | if (ext4_system_zone_cachep == NULL) | 35 | if (ext4_system_zone_cachep == NULL) |
37 | return -ENOMEM; | 36 | return -ENOMEM; |
38 | return 0; | 37 | return 0; |
39 | } | 38 | } |
40 | 39 | ||
41 | void exit_ext4_system_zone(void) | 40 | void ext4_exit_system_zone(void) |
42 | { | 41 | { |
43 | kmem_cache_destroy(ext4_system_zone_cachep); | 42 | kmem_cache_destroy(ext4_system_zone_cachep); |
44 | } | 43 | } |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 374510f72baa..164c56092e58 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode, | |||
39 | struct file *filp); | 39 | struct file *filp); |
40 | 40 | ||
41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
42 | .llseek = generic_file_llseek, | 42 | .llseek = ext4_llseek, |
43 | .read = generic_read_dir, | 43 | .read = generic_read_dir, |
44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ | 44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ |
45 | .unlocked_ioctl = ext4_ioctl, | 45 | .unlocked_ioctl = ext4_ioctl, |
@@ -60,9 +60,13 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
60 | return (ext4_filetype_table[filetype]); | 60 | return (ext4_filetype_table[filetype]); |
61 | } | 61 | } |
62 | 62 | ||
63 | 63 | /* | |
64 | * Return 0 if the directory entry is OK, and 1 if there is a problem | ||
65 | * | ||
66 | * Note: this is the opposite of what ext2 and ext3 historically returned... | ||
67 | */ | ||
64 | int __ext4_check_dir_entry(const char *function, unsigned int line, | 68 | int __ext4_check_dir_entry(const char *function, unsigned int line, |
65 | struct inode *dir, | 69 | struct inode *dir, struct file *filp, |
66 | struct ext4_dir_entry_2 *de, | 70 | struct ext4_dir_entry_2 *de, |
67 | struct buffer_head *bh, | 71 | struct buffer_head *bh, |
68 | unsigned int offset) | 72 | unsigned int offset) |
@@ -71,26 +75,37 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, | |||
71 | const int rlen = ext4_rec_len_from_disk(de->rec_len, | 75 | const int rlen = ext4_rec_len_from_disk(de->rec_len, |
72 | dir->i_sb->s_blocksize); | 76 | dir->i_sb->s_blocksize); |
73 | 77 | ||
74 | if (rlen < EXT4_DIR_REC_LEN(1)) | 78 | if (unlikely(rlen < EXT4_DIR_REC_LEN(1))) |
75 | error_msg = "rec_len is smaller than minimal"; | 79 | error_msg = "rec_len is smaller than minimal"; |
76 | else if (rlen % 4 != 0) | 80 | else if (unlikely(rlen % 4 != 0)) |
77 | error_msg = "rec_len % 4 != 0"; | 81 | error_msg = "rec_len % 4 != 0"; |
78 | else if (rlen < EXT4_DIR_REC_LEN(de->name_len)) | 82 | else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len))) |
79 | error_msg = "rec_len is too small for name_len"; | 83 | error_msg = "rec_len is too small for name_len"; |
80 | else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) | 84 | else if (unlikely(((char *) de - bh->b_data) + rlen > |
85 | dir->i_sb->s_blocksize)) | ||
81 | error_msg = "directory entry across blocks"; | 86 | error_msg = "directory entry across blocks"; |
82 | else if (le32_to_cpu(de->inode) > | 87 | else if (unlikely(le32_to_cpu(de->inode) > |
83 | le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)) | 88 | le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))) |
84 | error_msg = "inode out of bounds"; | 89 | error_msg = "inode out of bounds"; |
90 | else | ||
91 | return 0; | ||
85 | 92 | ||
86 | if (error_msg != NULL) | 93 | if (filp) |
87 | ext4_error_inode(dir, function, line, bh->b_blocknr, | 94 | ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0, |
88 | "bad entry in directory: %s - " | 95 | "bad entry in directory: %s - offset=%u(%u), " |
89 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", | 96 | "inode=%u, rec_len=%d, name_len=%d", |
90 | error_msg, (unsigned) (offset%bh->b_size), offset, | 97 | error_msg, (unsigned) (offset%bh->b_size), |
91 | le32_to_cpu(de->inode), | 98 | offset, le32_to_cpu(de->inode), |
92 | rlen, de->name_len); | 99 | rlen, de->name_len); |
93 | return error_msg == NULL ? 1 : 0; | 100 | else |
101 | ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0, | ||
102 | "bad entry in directory: %s - offset=%u(%u), " | ||
103 | "inode=%u, rec_len=%d, name_len=%d", | ||
104 | error_msg, (unsigned) (offset%bh->b_size), | ||
105 | offset, le32_to_cpu(de->inode), | ||
106 | rlen, de->name_len); | ||
107 | |||
108 | return 1; | ||
94 | } | 109 | } |
95 | 110 | ||
96 | static int ext4_readdir(struct file *filp, | 111 | static int ext4_readdir(struct file *filp, |
@@ -152,8 +167,9 @@ static int ext4_readdir(struct file *filp, | |||
152 | */ | 167 | */ |
153 | if (!bh) { | 168 | if (!bh) { |
154 | if (!dir_has_error) { | 169 | if (!dir_has_error) { |
155 | EXT4_ERROR_INODE(inode, "directory " | 170 | EXT4_ERROR_FILE(filp, 0, |
156 | "contains a hole at offset %Lu", | 171 | "directory contains a " |
172 | "hole at offset %llu", | ||
157 | (unsigned long long) filp->f_pos); | 173 | (unsigned long long) filp->f_pos); |
158 | dir_has_error = 1; | 174 | dir_has_error = 1; |
159 | } | 175 | } |
@@ -194,8 +210,8 @@ revalidate: | |||
194 | while (!error && filp->f_pos < inode->i_size | 210 | while (!error && filp->f_pos < inode->i_size |
195 | && offset < sb->s_blocksize) { | 211 | && offset < sb->s_blocksize) { |
196 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 212 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
197 | if (!ext4_check_dir_entry(inode, de, | 213 | if (ext4_check_dir_entry(inode, filp, de, |
198 | bh, offset)) { | 214 | bh, offset)) { |
199 | /* | 215 | /* |
200 | * On error, skip the f_pos to the next block | 216 | * On error, skip the f_pos to the next block |
201 | */ | 217 | */ |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 889ec9d5e6ad..1921392cd708 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -62,8 +62,8 @@ | |||
62 | #define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ | 62 | #define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ |
63 | ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) | 63 | ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) |
64 | 64 | ||
65 | #define EXT4_ERROR_FILE(file, fmt, a...) \ | 65 | #define EXT4_ERROR_FILE(file, block, fmt, a...) \ |
66 | ext4_error_file(__func__, __LINE__, (file), (fmt), ## a) | 66 | ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) |
67 | 67 | ||
68 | /* data type for block offset of block group */ | 68 | /* data type for block offset of block group */ |
69 | typedef int ext4_grpblk_t; | 69 | typedef int ext4_grpblk_t; |
@@ -108,7 +108,8 @@ typedef unsigned int ext4_group_t; | |||
108 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 | 108 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 |
109 | /* We are doing stream allocation */ | 109 | /* We are doing stream allocation */ |
110 | #define EXT4_MB_STREAM_ALLOC 0x0800 | 110 | #define EXT4_MB_STREAM_ALLOC 0x0800 |
111 | 111 | /* Use reserved root blocks if needed */ | |
112 | #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 | ||
112 | 113 | ||
113 | struct ext4_allocation_request { | 114 | struct ext4_allocation_request { |
114 | /* target inode for block we're allocating */ | 115 | /* target inode for block we're allocating */ |
@@ -168,7 +169,20 @@ struct mpage_da_data { | |||
168 | int pages_written; | 169 | int pages_written; |
169 | int retval; | 170 | int retval; |
170 | }; | 171 | }; |
171 | #define EXT4_IO_UNWRITTEN 0x1 | 172 | |
173 | /* | ||
174 | * Flags for ext4_io_end->flags | ||
175 | */ | ||
176 | #define EXT4_IO_END_UNWRITTEN 0x0001 | ||
177 | #define EXT4_IO_END_ERROR 0x0002 | ||
178 | |||
179 | struct ext4_io_page { | ||
180 | struct page *p_page; | ||
181 | atomic_t p_count; | ||
182 | }; | ||
183 | |||
184 | #define MAX_IO_PAGES 128 | ||
185 | |||
172 | typedef struct ext4_io_end { | 186 | typedef struct ext4_io_end { |
173 | struct list_head list; /* per-file finished IO list */ | 187 | struct list_head list; /* per-file finished IO list */ |
174 | struct inode *inode; /* file being written to */ | 188 | struct inode *inode; /* file being written to */ |
@@ -179,13 +193,25 @@ typedef struct ext4_io_end { | |||
179 | struct work_struct work; /* data work queue */ | 193 | struct work_struct work; /* data work queue */ |
180 | struct kiocb *iocb; /* iocb struct for AIO */ | 194 | struct kiocb *iocb; /* iocb struct for AIO */ |
181 | int result; /* error value for AIO */ | 195 | int result; /* error value for AIO */ |
196 | int num_io_pages; | ||
197 | struct ext4_io_page *pages[MAX_IO_PAGES]; | ||
182 | } ext4_io_end_t; | 198 | } ext4_io_end_t; |
183 | 199 | ||
200 | struct ext4_io_submit { | ||
201 | int io_op; | ||
202 | struct bio *io_bio; | ||
203 | ext4_io_end_t *io_end; | ||
204 | struct ext4_io_page *io_page; | ||
205 | sector_t io_next_block; | ||
206 | }; | ||
207 | |||
184 | /* | 208 | /* |
185 | * Special inodes numbers | 209 | * Special inodes numbers |
186 | */ | 210 | */ |
187 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ | 211 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ |
188 | #define EXT4_ROOT_INO 2 /* Root inode */ | 212 | #define EXT4_ROOT_INO 2 /* Root inode */ |
213 | #define EXT4_USR_QUOTA_INO 3 /* User quota inode */ | ||
214 | #define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */ | ||
189 | #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ | 215 | #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ |
190 | #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ | 216 | #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ |
191 | #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ | 217 | #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ |
@@ -205,6 +231,7 @@ typedef struct ext4_io_end { | |||
205 | #define EXT4_MIN_BLOCK_SIZE 1024 | 231 | #define EXT4_MIN_BLOCK_SIZE 1024 |
206 | #define EXT4_MAX_BLOCK_SIZE 65536 | 232 | #define EXT4_MAX_BLOCK_SIZE 65536 |
207 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 | 233 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 |
234 | #define EXT4_MAX_BLOCK_LOG_SIZE 16 | ||
208 | #ifdef __KERNEL__ | 235 | #ifdef __KERNEL__ |
209 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) | 236 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) |
210 | #else | 237 | #else |
@@ -488,6 +515,10 @@ struct ext4_new_group_data { | |||
488 | /* Convert extent to initialized after IO complete */ | 515 | /* Convert extent to initialized after IO complete */ |
489 | #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ | 516 | #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ |
490 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) | 517 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) |
518 | /* Punch out blocks of an extent */ | ||
519 | #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 | ||
520 | /* Don't normalize allocation size (used for fallocate) */ | ||
521 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 | ||
491 | 522 | ||
492 | /* | 523 | /* |
493 | * Flags used by ext4_free_blocks | 524 | * Flags used by ext4_free_blocks |
@@ -537,23 +568,7 @@ struct ext4_new_group_data { | |||
537 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION | 568 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION |
538 | #endif | 569 | #endif |
539 | 570 | ||
540 | 571 | /* Max physical block we can address w/o extents */ | |
541 | /* | ||
542 | * Mount options | ||
543 | */ | ||
544 | struct ext4_mount_options { | ||
545 | unsigned long s_mount_opt; | ||
546 | uid_t s_resuid; | ||
547 | gid_t s_resgid; | ||
548 | unsigned long s_commit_interval; | ||
549 | u32 s_min_batch_time, s_max_batch_time; | ||
550 | #ifdef CONFIG_QUOTA | ||
551 | int s_jquota_fmt; | ||
552 | char *s_qf_names[MAXQUOTAS]; | ||
553 | #endif | ||
554 | }; | ||
555 | |||
556 | /* Max physical block we can addres w/o extents */ | ||
557 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF | 572 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF |
558 | 573 | ||
559 | /* | 574 | /* |
@@ -685,6 +700,8 @@ do { \ | |||
685 | if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ | 700 | if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ |
686 | ext4_decode_extra_time(&(inode)->xtime, \ | 701 | ext4_decode_extra_time(&(inode)->xtime, \ |
687 | raw_inode->xtime ## _extra); \ | 702 | raw_inode->xtime ## _extra); \ |
703 | else \ | ||
704 | (inode)->xtime.tv_nsec = 0; \ | ||
688 | } while (0) | 705 | } while (0) |
689 | 706 | ||
690 | #define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ | 707 | #define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ |
@@ -695,6 +712,8 @@ do { \ | |||
695 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ | 712 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ |
696 | ext4_decode_extra_time(&(einode)->xtime, \ | 713 | ext4_decode_extra_time(&(einode)->xtime, \ |
697 | raw_inode->xtime ## _extra); \ | 714 | raw_inode->xtime ## _extra); \ |
715 | else \ | ||
716 | (einode)->xtime.tv_nsec = 0; \ | ||
698 | } while (0) | 717 | } while (0) |
699 | 718 | ||
700 | #define i_disk_version osd1.linux1.l_i_version | 719 | #define i_disk_version osd1.linux1.l_i_version |
@@ -726,12 +745,13 @@ do { \ | |||
726 | 745 | ||
727 | /* | 746 | /* |
728 | * storage for cached extent | 747 | * storage for cached extent |
748 | * If ec_len == 0, then the cache is invalid. | ||
749 | * If ec_start == 0, then the cache represents a gap (null mapping) | ||
729 | */ | 750 | */ |
730 | struct ext4_ext_cache { | 751 | struct ext4_ext_cache { |
731 | ext4_fsblk_t ec_start; | 752 | ext4_fsblk_t ec_start; |
732 | ext4_lblk_t ec_block; | 753 | ext4_lblk_t ec_block; |
733 | __u32 ec_len; /* must be 32bit to return holes */ | 754 | __u32 ec_len; /* must be 32bit to return holes */ |
734 | __u32 ec_type; | ||
735 | }; | 755 | }; |
736 | 756 | ||
737 | /* | 757 | /* |
@@ -750,10 +770,12 @@ struct ext4_inode_info { | |||
750 | * near to their parent directory's inode. | 770 | * near to their parent directory's inode. |
751 | */ | 771 | */ |
752 | ext4_group_t i_block_group; | 772 | ext4_group_t i_block_group; |
773 | ext4_lblk_t i_dir_start_lookup; | ||
774 | #if (BITS_PER_LONG < 64) | ||
753 | unsigned long i_state_flags; /* Dynamic state flags */ | 775 | unsigned long i_state_flags; /* Dynamic state flags */ |
776 | #endif | ||
754 | unsigned long i_flags; | 777 | unsigned long i_flags; |
755 | 778 | ||
756 | ext4_lblk_t i_dir_start_lookup; | ||
757 | #ifdef CONFIG_EXT4_FS_XATTR | 779 | #ifdef CONFIG_EXT4_FS_XATTR |
758 | /* | 780 | /* |
759 | * Extended attributes can be read independently of the main file | 781 | * Extended attributes can be read independently of the main file |
@@ -796,7 +818,7 @@ struct ext4_inode_info { | |||
796 | */ | 818 | */ |
797 | struct rw_semaphore i_data_sem; | 819 | struct rw_semaphore i_data_sem; |
798 | struct inode vfs_inode; | 820 | struct inode vfs_inode; |
799 | struct jbd2_inode jinode; | 821 | struct jbd2_inode *jinode; |
800 | 822 | ||
801 | struct ext4_ext_cache i_cached_extent; | 823 | struct ext4_ext_cache i_cached_extent; |
802 | /* | 824 | /* |
@@ -816,14 +838,12 @@ struct ext4_inode_info { | |||
816 | unsigned int i_reserved_data_blocks; | 838 | unsigned int i_reserved_data_blocks; |
817 | unsigned int i_reserved_meta_blocks; | 839 | unsigned int i_reserved_meta_blocks; |
818 | unsigned int i_allocated_meta_blocks; | 840 | unsigned int i_allocated_meta_blocks; |
819 | unsigned short i_delalloc_reserved_flag; | 841 | ext4_lblk_t i_da_metadata_calc_last_lblock; |
820 | sector_t i_da_metadata_calc_last_lblock; | ||
821 | int i_da_metadata_calc_len; | 842 | int i_da_metadata_calc_len; |
822 | 843 | ||
823 | /* on-disk additional length */ | 844 | /* on-disk additional length */ |
824 | __u16 i_extra_isize; | 845 | __u16 i_extra_isize; |
825 | 846 | ||
826 | spinlock_t i_block_reservation_lock; | ||
827 | #ifdef CONFIG_QUOTA | 847 | #ifdef CONFIG_QUOTA |
828 | /* quota space reservation, managed internally by quota code */ | 848 | /* quota space reservation, managed internally by quota code */ |
829 | qsize_t i_reserved_quota; | 849 | qsize_t i_reserved_quota; |
@@ -832,8 +852,12 @@ struct ext4_inode_info { | |||
832 | /* completed IOs that might need unwritten extents handling */ | 852 | /* completed IOs that might need unwritten extents handling */ |
833 | struct list_head i_completed_io_list; | 853 | struct list_head i_completed_io_list; |
834 | spinlock_t i_completed_io_lock; | 854 | spinlock_t i_completed_io_lock; |
855 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | ||
835 | /* current io_end structure for async DIO write*/ | 856 | /* current io_end structure for async DIO write*/ |
836 | ext4_io_end_t *cur_aio_dio; | 857 | ext4_io_end_t *cur_aio_dio; |
858 | atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ | ||
859 | |||
860 | spinlock_t i_block_reservation_lock; | ||
837 | 861 | ||
838 | /* | 862 | /* |
839 | * Transactions that contain inode's metadata needed to complete | 863 | * Transactions that contain inode's metadata needed to complete |
@@ -885,24 +909,35 @@ struct ext4_inode_info { | |||
885 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 909 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
886 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 910 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
887 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 911 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
912 | #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ | ||
888 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 913 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
889 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 914 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
890 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 915 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
891 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | 916 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ |
917 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ | ||
892 | 918 | ||
893 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 919 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ |
894 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt | 920 | ~EXT4_MOUNT_##opt |
921 | #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ | ||
922 | EXT4_MOUNT_##opt | ||
895 | #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ | 923 | #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ |
896 | EXT4_MOUNT_##opt) | 924 | EXT4_MOUNT_##opt) |
897 | 925 | ||
898 | #define ext4_set_bit ext2_set_bit | 926 | #define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \ |
927 | ~EXT4_MOUNT2_##opt | ||
928 | #define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \ | ||
929 | EXT4_MOUNT2_##opt | ||
930 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ | ||
931 | EXT4_MOUNT2_##opt) | ||
932 | |||
933 | #define ext4_set_bit __test_and_set_bit_le | ||
899 | #define ext4_set_bit_atomic ext2_set_bit_atomic | 934 | #define ext4_set_bit_atomic ext2_set_bit_atomic |
900 | #define ext4_clear_bit ext2_clear_bit | 935 | #define ext4_clear_bit __test_and_clear_bit_le |
901 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic | 936 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic |
902 | #define ext4_test_bit ext2_test_bit | 937 | #define ext4_test_bit test_bit_le |
903 | #define ext4_find_first_zero_bit ext2_find_first_zero_bit | 938 | #define ext4_find_first_zero_bit find_first_zero_bit_le |
904 | #define ext4_find_next_zero_bit ext2_find_next_zero_bit | 939 | #define ext4_find_next_zero_bit find_next_zero_bit_le |
905 | #define ext4_find_next_bit ext2_find_next_bit | 940 | #define ext4_find_next_bit find_next_bit_le |
906 | 941 | ||
907 | /* | 942 | /* |
908 | * Maximal mount counts between two filesystem checks | 943 | * Maximal mount counts between two filesystem checks |
@@ -1000,7 +1035,7 @@ struct ext4_super_block { | |||
1000 | __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ | 1035 | __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ |
1001 | __le32 s_flags; /* Miscellaneous flags */ | 1036 | __le32 s_flags; /* Miscellaneous flags */ |
1002 | __le16 s_raid_stride; /* RAID stride */ | 1037 | __le16 s_raid_stride; /* RAID stride */ |
1003 | __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ | 1038 | __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ |
1004 | __le64 s_mmp_block; /* Block for multi-mount protection */ | 1039 | __le64 s_mmp_block; /* Block for multi-mount protection */ |
1005 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 1040 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
1006 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ | 1041 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
@@ -1060,6 +1095,7 @@ struct ext4_sb_info { | |||
1060 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | 1095 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
1061 | struct buffer_head **s_group_desc; | 1096 | struct buffer_head **s_group_desc; |
1062 | unsigned int s_mount_opt; | 1097 | unsigned int s_mount_opt; |
1098 | unsigned int s_mount_opt2; | ||
1063 | unsigned int s_mount_flags; | 1099 | unsigned int s_mount_flags; |
1064 | ext4_fsblk_t s_sb_block; | 1100 | ext4_fsblk_t s_sb_block; |
1065 | uid_t s_resuid; | 1101 | uid_t s_resuid; |
@@ -1087,7 +1123,6 @@ struct ext4_sb_info { | |||
1087 | struct completion s_kobj_unregister; | 1123 | struct completion s_kobj_unregister; |
1088 | 1124 | ||
1089 | /* Journaling */ | 1125 | /* Journaling */ |
1090 | struct inode *s_journal_inode; | ||
1091 | struct journal_s *s_journal; | 1126 | struct journal_s *s_journal; |
1092 | struct list_head s_orphan; | 1127 | struct list_head s_orphan; |
1093 | struct mutex s_orphan_lock; | 1128 | struct mutex s_orphan_lock; |
@@ -1116,14 +1151,14 @@ struct ext4_sb_info { | |||
1116 | unsigned long s_ext_blocks; | 1151 | unsigned long s_ext_blocks; |
1117 | unsigned long s_ext_extents; | 1152 | unsigned long s_ext_extents; |
1118 | #endif | 1153 | #endif |
1154 | /* ext4 extent cache stats */ | ||
1155 | unsigned long extent_cache_hits; | ||
1156 | unsigned long extent_cache_misses; | ||
1119 | 1157 | ||
1120 | /* for buddy allocator */ | 1158 | /* for buddy allocator */ |
1121 | struct ext4_group_info ***s_group_info; | 1159 | struct ext4_group_info ***s_group_info; |
1122 | struct inode *s_buddy_cache; | 1160 | struct inode *s_buddy_cache; |
1123 | long s_blocks_reserved; | ||
1124 | spinlock_t s_reserve_lock; | ||
1125 | spinlock_t s_md_lock; | 1161 | spinlock_t s_md_lock; |
1126 | tid_t s_last_transaction; | ||
1127 | unsigned short *s_mb_offsets; | 1162 | unsigned short *s_mb_offsets; |
1128 | unsigned int *s_mb_maxs; | 1163 | unsigned int *s_mb_maxs; |
1129 | 1164 | ||
@@ -1141,7 +1176,6 @@ struct ext4_sb_info { | |||
1141 | unsigned long s_mb_last_start; | 1176 | unsigned long s_mb_last_start; |
1142 | 1177 | ||
1143 | /* stats for buddy allocator */ | 1178 | /* stats for buddy allocator */ |
1144 | spinlock_t s_mb_pa_lock; | ||
1145 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | 1179 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ |
1146 | atomic_t s_bal_success; /* we found long enough chunks */ | 1180 | atomic_t s_bal_success; /* we found long enough chunks */ |
1147 | atomic_t s_bal_allocated; /* in blocks */ | 1181 | atomic_t s_bal_allocated; /* in blocks */ |
@@ -1172,6 +1206,14 @@ struct ext4_sb_info { | |||
1172 | 1206 | ||
1173 | /* timer for periodic error stats printing */ | 1207 | /* timer for periodic error stats printing */ |
1174 | struct timer_list s_err_report; | 1208 | struct timer_list s_err_report; |
1209 | |||
1210 | /* Lazy inode table initialization info */ | ||
1211 | struct ext4_li_request *s_li_request; | ||
1212 | /* Wait multiplier for lazy initialization thread */ | ||
1213 | unsigned int s_li_wait_mult; | ||
1214 | |||
1215 | /* Kernel thread for multiple mount protection */ | ||
1216 | struct task_struct *s_mmp_tsk; | ||
1175 | }; | 1217 | }; |
1176 | 1218 | ||
1177 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1219 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1210,24 +1252,39 @@ enum { | |||
1210 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ | 1252 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ |
1211 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | 1253 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
1212 | EXT4_STATE_NEWENTRY, /* File just added to dir */ | 1254 | EXT4_STATE_NEWENTRY, /* File just added to dir */ |
1255 | EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ | ||
1213 | }; | 1256 | }; |
1214 | 1257 | ||
1215 | #define EXT4_INODE_BIT_FNS(name, field) \ | 1258 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ |
1216 | static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ | 1259 | static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ |
1217 | { \ | 1260 | { \ |
1218 | return test_bit(bit, &EXT4_I(inode)->i_##field); \ | 1261 | return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ |
1219 | } \ | 1262 | } \ |
1220 | static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ | 1263 | static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ |
1221 | { \ | 1264 | { \ |
1222 | set_bit(bit, &EXT4_I(inode)->i_##field); \ | 1265 | set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ |
1223 | } \ | 1266 | } \ |
1224 | static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ | 1267 | static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ |
1225 | { \ | 1268 | { \ |
1226 | clear_bit(bit, &EXT4_I(inode)->i_##field); \ | 1269 | clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ |
1227 | } | 1270 | } |
1228 | 1271 | ||
1229 | EXT4_INODE_BIT_FNS(flag, flags) | 1272 | EXT4_INODE_BIT_FNS(flag, flags, 0) |
1230 | EXT4_INODE_BIT_FNS(state, state_flags) | 1273 | #if (BITS_PER_LONG < 64) |
1274 | EXT4_INODE_BIT_FNS(state, state_flags, 0) | ||
1275 | |||
1276 | static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | ||
1277 | { | ||
1278 | (ei)->i_state_flags = 0; | ||
1279 | } | ||
1280 | #else | ||
1281 | EXT4_INODE_BIT_FNS(state, flags, 32) | ||
1282 | |||
1283 | static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | ||
1284 | { | ||
1285 | /* We depend on the fact that callers will set i_flags */ | ||
1286 | } | ||
1287 | #endif | ||
1231 | #else | 1288 | #else |
1232 | /* Assume that user mode programs are passing in an ext4fs superblock, not | 1289 | /* Assume that user mode programs are passing in an ext4fs superblock, not |
1233 | * a kernel struct super_block. This will allow us to call the feature-test | 1290 | * a kernel struct super_block. This will allow us to call the feature-test |
@@ -1294,6 +1351,7 @@ EXT4_INODE_BIT_FNS(state, state_flags) | |||
1294 | #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 | 1351 | #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 |
1295 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 | 1352 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 |
1296 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 | 1353 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 |
1354 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 | ||
1297 | 1355 | ||
1298 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 | 1356 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
1299 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 | 1357 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 |
@@ -1307,13 +1365,29 @@ EXT4_INODE_BIT_FNS(state, state_flags) | |||
1307 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ | 1365 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ |
1308 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ | 1366 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ |
1309 | 1367 | ||
1368 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | ||
1369 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | ||
1370 | EXT4_FEATURE_INCOMPAT_META_BG) | ||
1371 | #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | ||
1372 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | ||
1373 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR) | ||
1374 | |||
1375 | #define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | ||
1376 | #define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | ||
1377 | EXT4_FEATURE_INCOMPAT_RECOVER| \ | ||
1378 | EXT4_FEATURE_INCOMPAT_META_BG) | ||
1379 | #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | ||
1380 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | ||
1381 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR) | ||
1382 | |||
1310 | #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR | 1383 | #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR |
1311 | #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1384 | #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
1312 | EXT4_FEATURE_INCOMPAT_RECOVER| \ | 1385 | EXT4_FEATURE_INCOMPAT_RECOVER| \ |
1313 | EXT4_FEATURE_INCOMPAT_META_BG| \ | 1386 | EXT4_FEATURE_INCOMPAT_META_BG| \ |
1314 | EXT4_FEATURE_INCOMPAT_EXTENTS| \ | 1387 | EXT4_FEATURE_INCOMPAT_EXTENTS| \ |
1315 | EXT4_FEATURE_INCOMPAT_64BIT| \ | 1388 | EXT4_FEATURE_INCOMPAT_64BIT| \ |
1316 | EXT4_FEATURE_INCOMPAT_FLEX_BG) | 1389 | EXT4_FEATURE_INCOMPAT_FLEX_BG| \ |
1390 | EXT4_FEATURE_INCOMPAT_MMP) | ||
1317 | #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | 1391 | #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ |
1318 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | 1392 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ |
1319 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ | 1393 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ |
@@ -1533,7 +1607,97 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
1533 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 1607 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
1534 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); | 1608 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); |
1535 | 1609 | ||
1536 | extern struct proc_dir_entry *ext4_proc_root; | 1610 | /* |
1611 | * Timeout and state flag for lazy initialization inode thread. | ||
1612 | */ | ||
1613 | #define EXT4_DEF_LI_WAIT_MULT 10 | ||
1614 | #define EXT4_DEF_LI_MAX_START_DELAY 5 | ||
1615 | #define EXT4_LAZYINIT_QUIT 0x0001 | ||
1616 | #define EXT4_LAZYINIT_RUNNING 0x0002 | ||
1617 | |||
1618 | /* | ||
1619 | * Lazy inode table initialization info | ||
1620 | */ | ||
1621 | struct ext4_lazy_init { | ||
1622 | unsigned long li_state; | ||
1623 | struct list_head li_request_list; | ||
1624 | struct mutex li_list_mtx; | ||
1625 | }; | ||
1626 | |||
1627 | struct ext4_li_request { | ||
1628 | struct super_block *lr_super; | ||
1629 | struct ext4_sb_info *lr_sbi; | ||
1630 | ext4_group_t lr_next_group; | ||
1631 | struct list_head lr_request; | ||
1632 | unsigned long lr_next_sched; | ||
1633 | unsigned long lr_timeout; | ||
1634 | }; | ||
1635 | |||
1636 | struct ext4_features { | ||
1637 | struct kobject f_kobj; | ||
1638 | struct completion f_kobj_unregister; | ||
1639 | }; | ||
1640 | |||
1641 | /* | ||
1642 | * This structure will be used for multiple mount protection. It will be | ||
1643 | * written into the block number saved in the s_mmp_block field in the | ||
1644 | * superblock. Programs that check MMP should assume that if | ||
1645 | * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe | ||
1646 | * to use the filesystem, regardless of how old the timestamp is. | ||
1647 | */ | ||
1648 | #define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ | ||
1649 | #define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ | ||
1650 | #define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ | ||
1651 | #define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ | ||
1652 | |||
1653 | struct mmp_struct { | ||
1654 | __le32 mmp_magic; /* Magic number for MMP */ | ||
1655 | __le32 mmp_seq; /* Sequence no. updated periodically */ | ||
1656 | |||
1657 | /* | ||
1658 | * mmp_time, mmp_nodename & mmp_bdevname are only used for information | ||
1659 | * purposes and do not affect the correctness of the algorithm | ||
1660 | */ | ||
1661 | __le64 mmp_time; /* Time last updated */ | ||
1662 | char mmp_nodename[64]; /* Node which last updated MMP block */ | ||
1663 | char mmp_bdevname[32]; /* Bdev which last updated MMP block */ | ||
1664 | |||
1665 | /* | ||
1666 | * mmp_check_interval is used to verify if the MMP block has been | ||
1667 | * updated on the block device. The value is updated based on the | ||
1668 | * maximum time to write the MMP block during an update cycle. | ||
1669 | */ | ||
1670 | __le16 mmp_check_interval; | ||
1671 | |||
1672 | __le16 mmp_pad1; | ||
1673 | __le32 mmp_pad2[227]; | ||
1674 | }; | ||
1675 | |||
1676 | /* arguments passed to the mmp thread */ | ||
1677 | struct mmpd_data { | ||
1678 | struct buffer_head *bh; /* bh from initial read_mmp_block() */ | ||
1679 | struct super_block *sb; /* super block of the fs */ | ||
1680 | }; | ||
1681 | |||
1682 | /* | ||
1683 | * Check interval multiplier | ||
1684 | * The MMP block is written every update interval and initially checked every | ||
1685 | * update interval x the multiplier (the value is then adapted based on the | ||
1686 | * write latency). The reason is that writes can be delayed under load and we | ||
1687 | * don't want readers to incorrectly assume that the filesystem is no longer | ||
1688 | * in use. | ||
1689 | */ | ||
1690 | #define EXT4_MMP_CHECK_MULT 2UL | ||
1691 | |||
1692 | /* | ||
1693 | * Minimum interval for MMP checking in seconds. | ||
1694 | */ | ||
1695 | #define EXT4_MMP_MIN_CHECK_INTERVAL 5UL | ||
1696 | |||
1697 | /* | ||
1698 | * Maximum interval for MMP checking in seconds. | ||
1699 | */ | ||
1700 | #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL | ||
1537 | 1701 | ||
1538 | /* | 1702 | /* |
1539 | * Function prototypes | 1703 | * Function prototypes |
@@ -1559,11 +1723,12 @@ extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); | |||
1559 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, | 1723 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, |
1560 | ext4_group_t group); | 1724 | ext4_group_t group); |
1561 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | 1725 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
1562 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 1726 | ext4_fsblk_t goal, |
1563 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1727 | unsigned int flags, |
1564 | extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1728 | unsigned long *count, |
1565 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | 1729 | int *errp); |
1566 | ext4_fsblk_t block, unsigned long count); | 1730 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, |
1731 | s64 nblocks, unsigned int flags); | ||
1567 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1732 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
1568 | extern void ext4_check_blocks_bitmap(struct super_block *); | 1733 | extern void ext4_check_blocks_bitmap(struct super_block *); |
1569 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1734 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
@@ -1581,10 +1746,12 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb, | |||
1581 | 1746 | ||
1582 | /* dir.c */ | 1747 | /* dir.c */ |
1583 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, | 1748 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, |
1749 | struct file *, | ||
1584 | struct ext4_dir_entry_2 *, | 1750 | struct ext4_dir_entry_2 *, |
1585 | struct buffer_head *, unsigned int); | 1751 | struct buffer_head *, unsigned int); |
1586 | #define ext4_check_dir_entry(dir, de, bh, offset) \ | 1752 | #define ext4_check_dir_entry(dir, filp, de, bh, offset) \ |
1587 | __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset)) | 1753 | unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ |
1754 | (de), (bh), (offset))) | ||
1588 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | 1755 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, |
1589 | __u32 minor_hash, | 1756 | __u32 minor_hash, |
1590 | struct ext4_dir_entry_2 *dirent); | 1757 | struct ext4_dir_entry_2 *dirent); |
@@ -1592,6 +1759,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p); | |||
1592 | 1759 | ||
1593 | /* fsync.c */ | 1760 | /* fsync.c */ |
1594 | extern int ext4_sync_file(struct file *, int); | 1761 | extern int ext4_sync_file(struct file *, int); |
1762 | extern int ext4_flush_completed_IO(struct inode *); | ||
1595 | 1763 | ||
1596 | /* hash.c */ | 1764 | /* hash.c */ |
1597 | extern int ext4fs_dirhash(const char *name, int len, struct | 1765 | extern int ext4fs_dirhash(const char *name, int len, struct |
@@ -1605,11 +1773,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | |||
1605 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1773 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1606 | extern unsigned long ext4_count_dirs(struct super_block *); | 1774 | extern unsigned long ext4_count_dirs(struct super_block *); |
1607 | extern void ext4_check_inodes_bitmap(struct super_block *); | 1775 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1608 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | 1776 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
1609 | struct buffer_head *bh, | 1777 | extern int ext4_init_inode_table(struct super_block *sb, |
1610 | ext4_group_t group, | 1778 | ext4_group_t group, int barrier); |
1611 | struct ext4_group_desc *desc); | ||
1612 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | ||
1613 | 1779 | ||
1614 | /* mballoc.c */ | 1780 | /* mballoc.c */ |
1615 | extern long ext4_mb_stats; | 1781 | extern long ext4_mb_stats; |
@@ -1620,16 +1786,17 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | |||
1620 | struct ext4_allocation_request *, int *); | 1786 | struct ext4_allocation_request *, int *); |
1621 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1787 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
1622 | extern void ext4_discard_preallocations(struct inode *); | 1788 | extern void ext4_discard_preallocations(struct inode *); |
1623 | extern int __init init_ext4_mballoc(void); | 1789 | extern int __init ext4_init_mballoc(void); |
1624 | extern void exit_ext4_mballoc(void); | 1790 | extern void ext4_exit_mballoc(void); |
1625 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | 1791 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
1626 | struct buffer_head *bh, ext4_fsblk_t block, | 1792 | struct buffer_head *bh, ext4_fsblk_t block, |
1627 | unsigned long count, int flags); | 1793 | unsigned long count, int flags); |
1628 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1794 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1629 | ext4_group_t i, struct ext4_group_desc *desc); | 1795 | ext4_group_t i, struct ext4_group_desc *desc); |
1630 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1796 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
1631 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1797 | ext4_fsblk_t block, unsigned long count); |
1632 | ext4_group_t, int); | 1798 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
1799 | |||
1633 | /* inode.c */ | 1800 | /* inode.c */ |
1634 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 1801 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, |
1635 | ext4_lblk_t, int, int *); | 1802 | ext4_lblk_t, int, int *); |
@@ -1646,24 +1813,25 @@ extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1646 | extern void ext4_evict_inode(struct inode *); | 1813 | extern void ext4_evict_inode(struct inode *); |
1647 | extern void ext4_clear_inode(struct inode *); | 1814 | extern void ext4_clear_inode(struct inode *); |
1648 | extern int ext4_sync_inode(handle_t *, struct inode *); | 1815 | extern int ext4_sync_inode(handle_t *, struct inode *); |
1649 | extern void ext4_dirty_inode(struct inode *); | 1816 | extern void ext4_dirty_inode(struct inode *, int); |
1650 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1817 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
1651 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1818 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1652 | extern int ext4_can_truncate(struct inode *inode); | 1819 | extern int ext4_can_truncate(struct inode *inode); |
1653 | extern void ext4_truncate(struct inode *); | 1820 | extern void ext4_truncate(struct inode *); |
1821 | extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); | ||
1654 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | 1822 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); |
1655 | extern void ext4_set_inode_flags(struct inode *); | 1823 | extern void ext4_set_inode_flags(struct inode *); |
1656 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1824 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1657 | extern int ext4_alloc_da_blocks(struct inode *inode); | 1825 | extern int ext4_alloc_da_blocks(struct inode *inode); |
1658 | extern void ext4_set_aops(struct inode *inode); | 1826 | extern void ext4_set_aops(struct inode *inode); |
1659 | extern int ext4_writepage_trans_blocks(struct inode *); | 1827 | extern int ext4_writepage_trans_blocks(struct inode *); |
1660 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
1661 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 1828 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
1662 | extern int ext4_block_truncate_page(handle_t *handle, | 1829 | extern int ext4_block_truncate_page(handle_t *handle, |
1663 | struct address_space *mapping, loff_t from); | 1830 | struct address_space *mapping, loff_t from); |
1831 | extern int ext4_block_zero_page_range(handle_t *handle, | ||
1832 | struct address_space *mapping, loff_t from, loff_t length); | ||
1664 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1833 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1665 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1834 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1666 | extern int flush_completed_IO(struct inode *inode); | ||
1667 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1835 | extern void ext4_da_update_reserve_space(struct inode *inode, |
1668 | int used, int quota_claim); | 1836 | int used, int quota_claim); |
1669 | /* ioctl.c */ | 1837 | /* ioctl.c */ |
@@ -1696,8 +1864,8 @@ extern void ext4_error_inode(struct inode *, const char *, unsigned int, | |||
1696 | ext4_fsblk_t, const char *, ...) | 1864 | ext4_fsblk_t, const char *, ...) |
1697 | __attribute__ ((format (printf, 5, 6))); | 1865 | __attribute__ ((format (printf, 5, 6))); |
1698 | extern void ext4_error_file(struct file *, const char *, unsigned int, | 1866 | extern void ext4_error_file(struct file *, const char *, unsigned int, |
1699 | const char *, ...) | 1867 | ext4_fsblk_t, const char *, ...) |
1700 | __attribute__ ((format (printf, 4, 5))); | 1868 | __attribute__ ((format (printf, 5, 6))); |
1701 | extern void __ext4_std_error(struct super_block *, const char *, | 1869 | extern void __ext4_std_error(struct super_block *, const char *, |
1702 | unsigned int, int); | 1870 | unsigned int, int); |
1703 | extern void __ext4_abort(struct super_block *, const char *, unsigned int, | 1871 | extern void __ext4_abort(struct super_block *, const char *, unsigned int, |
@@ -1712,6 +1880,10 @@ extern void __ext4_warning(struct super_block *, const char *, unsigned int, | |||
1712 | __LINE__, ## message) | 1880 | __LINE__, ## message) |
1713 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) | 1881 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) |
1714 | __attribute__ ((format (printf, 3, 4))); | 1882 | __attribute__ ((format (printf, 3, 4))); |
1883 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, | ||
1884 | const char *, unsigned int, const char *); | ||
1885 | #define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ | ||
1886 | __LINE__, msg) | ||
1715 | extern void __ext4_grp_locked_error(const char *, unsigned int, \ | 1887 | extern void __ext4_grp_locked_error(const char *, unsigned int, \ |
1716 | struct super_block *, ext4_group_t, \ | 1888 | struct super_block *, ext4_group_t, \ |
1717 | unsigned long, ext4_fsblk_t, \ | 1889 | unsigned long, ext4_fsblk_t, \ |
@@ -1960,6 +2132,7 @@ extern const struct file_operations ext4_dir_operations; | |||
1960 | /* file.c */ | 2132 | /* file.c */ |
1961 | extern const struct inode_operations ext4_file_inode_operations; | 2133 | extern const struct inode_operations ext4_file_inode_operations; |
1962 | extern const struct file_operations ext4_file_operations; | 2134 | extern const struct file_operations ext4_file_operations; |
2135 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); | ||
1963 | 2136 | ||
1964 | /* namei.c */ | 2137 | /* namei.c */ |
1965 | extern const struct inode_operations ext4_dir_inode_operations; | 2138 | extern const struct inode_operations ext4_dir_inode_operations; |
@@ -1973,8 +2146,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
1973 | /* block_validity */ | 2146 | /* block_validity */ |
1974 | extern void ext4_release_system_zone(struct super_block *sb); | 2147 | extern void ext4_release_system_zone(struct super_block *sb); |
1975 | extern int ext4_setup_system_zone(struct super_block *sb); | 2148 | extern int ext4_setup_system_zone(struct super_block *sb); |
1976 | extern int __init init_ext4_system_zone(void); | 2149 | extern int __init ext4_init_system_zone(void); |
1977 | extern void exit_ext4_system_zone(void); | 2150 | extern void ext4_exit_system_zone(void); |
1978 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, | 2151 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, |
1979 | ext4_fsblk_t start_blk, | 2152 | ext4_fsblk_t start_blk, |
1980 | unsigned int count); | 2153 | unsigned int count); |
@@ -1987,9 +2160,11 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | |||
1987 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | 2160 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
1988 | struct ext4_map_blocks *map, int flags); | 2161 | struct ext4_map_blocks *map, int flags); |
1989 | extern void ext4_ext_truncate(struct inode *); | 2162 | extern void ext4_ext_truncate(struct inode *); |
2163 | extern int ext4_ext_punch_hole(struct file *file, loff_t offset, | ||
2164 | loff_t length); | ||
1990 | extern void ext4_ext_init(struct super_block *); | 2165 | extern void ext4_ext_init(struct super_block *); |
1991 | extern void ext4_ext_release(struct super_block *); | 2166 | extern void ext4_ext_release(struct super_block *); |
1992 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | 2167 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
1993 | loff_t len); | 2168 | loff_t len); |
1994 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 2169 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, |
1995 | ssize_t len); | 2170 | ssize_t len); |
@@ -2002,6 +2177,21 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2002 | __u64 start_orig, __u64 start_donor, | 2177 | __u64 start_orig, __u64 start_donor, |
2003 | __u64 len, __u64 *moved_len); | 2178 | __u64 len, __u64 *moved_len); |
2004 | 2179 | ||
2180 | /* page-io.c */ | ||
2181 | extern int __init ext4_init_pageio(void); | ||
2182 | extern void ext4_exit_pageio(void); | ||
2183 | extern void ext4_ioend_wait(struct inode *); | ||
2184 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
2185 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | ||
2186 | extern int ext4_end_io_nolock(ext4_io_end_t *io); | ||
2187 | extern void ext4_io_submit(struct ext4_io_submit *io); | ||
2188 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | ||
2189 | struct page *page, | ||
2190 | int len, | ||
2191 | struct writeback_control *wbc); | ||
2192 | |||
2193 | /* mmp.c */ | ||
2194 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | ||
2005 | 2195 | ||
2006 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2196 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ |
2007 | enum ext4_state_bits { | 2197 | enum ext4_state_bits { |
@@ -2031,6 +2221,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) | |||
2031 | 2221 | ||
2032 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 2222 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
2033 | 2223 | ||
2224 | /* For ioend & aio unwritten conversion wait queues */ | ||
2225 | #define EXT4_WQ_HASH_SZ 37 | ||
2226 | #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ | ||
2227 | EXT4_WQ_HASH_SZ]) | ||
2228 | #define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ | ||
2229 | EXT4_WQ_HASH_SZ]) | ||
2230 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | ||
2231 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | ||
2232 | |||
2034 | #endif /* __KERNEL__ */ | 2233 | #endif /* __KERNEL__ */ |
2035 | 2234 | ||
2036 | #endif /* _EXT4_H */ | 2235 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index bdb6ce7e2eb4..095c36f3b612 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -119,17 +119,13 @@ struct ext4_ext_path { | |||
119 | * structure for external API | 119 | * structure for external API |
120 | */ | 120 | */ |
121 | 121 | ||
122 | #define EXT4_EXT_CACHE_NO 0 | ||
123 | #define EXT4_EXT_CACHE_GAP 1 | ||
124 | #define EXT4_EXT_CACHE_EXTENT 2 | ||
125 | |||
126 | /* | 122 | /* |
127 | * to be called by ext4_ext_walk_space() | 123 | * to be called by ext4_ext_walk_space() |
128 | * negative retcode - error | 124 | * negative retcode - error |
129 | * positive retcode - signal for ext4_ext_walk_space(), see below | 125 | * positive retcode - signal for ext4_ext_walk_space(), see below |
130 | * callback must return valid extent (passed or newly created) | 126 | * callback must return valid extent (passed or newly created) |
131 | */ | 127 | */ |
132 | typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | 128 | typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t, |
133 | struct ext4_ext_cache *, | 129 | struct ext4_ext_cache *, |
134 | struct ext4_extent *, void *); | 130 | struct ext4_extent *, void *); |
135 | 131 | ||
@@ -137,8 +133,11 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | |||
137 | #define EXT_BREAK 1 | 133 | #define EXT_BREAK 1 |
138 | #define EXT_REPEAT 2 | 134 | #define EXT_REPEAT 2 |
139 | 135 | ||
140 | /* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ | 136 | /* |
141 | #define EXT_MAX_BLOCK 0xffffffff | 137 | * Maximum number of logical blocks in a file; ext4_extent's ee_block is |
138 | * __le32. | ||
139 | */ | ||
140 | #define EXT_MAX_BLOCKS 0xffffffff | ||
142 | 141 | ||
143 | /* | 142 | /* |
144 | * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an | 143 | * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an |
@@ -197,7 +196,7 @@ static inline unsigned short ext_depth(struct inode *inode) | |||
197 | static inline void | 196 | static inline void |
198 | ext4_ext_invalidate_cache(struct inode *inode) | 197 | ext4_ext_invalidate_cache(struct inode *inode) |
199 | { | 198 | { |
200 | EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; | 199 | EXT4_I(inode)->i_cached_extent.ec_len = 0; |
201 | } | 200 | } |
202 | 201 | ||
203 | static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) | 202 | static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) |
@@ -225,11 +224,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) | |||
225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); | 224 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); |
226 | } | 225 | } |
227 | 226 | ||
227 | /* | ||
228 | * ext4_ext_pblock: | ||
229 | * combine low and high parts of physical block number into ext4_fsblk_t | ||
230 | */ | ||
231 | static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex) | ||
232 | { | ||
233 | ext4_fsblk_t block; | ||
234 | |||
235 | block = le32_to_cpu(ex->ee_start_lo); | ||
236 | block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; | ||
237 | return block; | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * ext4_idx_pblock: | ||
242 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | ||
243 | */ | ||
244 | static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix) | ||
245 | { | ||
246 | ext4_fsblk_t block; | ||
247 | |||
248 | block = le32_to_cpu(ix->ei_leaf_lo); | ||
249 | block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; | ||
250 | return block; | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * ext4_ext_store_pblock: | ||
255 | * stores a large physical block number into an extent struct, | ||
256 | * breaking it into parts | ||
257 | */ | ||
258 | static inline void ext4_ext_store_pblock(struct ext4_extent *ex, | ||
259 | ext4_fsblk_t pb) | ||
260 | { | ||
261 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
262 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & | ||
263 | 0xffff); | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * ext4_idx_store_pblock: | ||
268 | * stores a large physical block number into an index struct, | ||
269 | * breaking it into parts | ||
270 | */ | ||
271 | static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, | ||
272 | ext4_fsblk_t pb) | ||
273 | { | ||
274 | ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
275 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & | ||
276 | 0xffff); | ||
277 | } | ||
278 | |||
228 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 279 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
229 | sector_t lblocks); | 280 | ext4_lblk_t lblocks); |
230 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); | ||
231 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | ||
232 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | ||
233 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 281 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
234 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | 282 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
235 | int num, | 283 | int num, |
@@ -237,19 +285,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | |||
237 | extern int ext4_can_extents_be_merged(struct inode *inode, | 285 | extern int ext4_can_extents_be_merged(struct inode *inode, |
238 | struct ext4_extent *ex1, | 286 | struct ext4_extent *ex1, |
239 | struct ext4_extent *ex2); | 287 | struct ext4_extent *ex2); |
240 | extern int ext4_ext_try_to_merge(struct inode *inode, | ||
241 | struct ext4_ext_path *path, | ||
242 | struct ext4_extent *); | ||
243 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | ||
244 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); | 288 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); |
245 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
246 | ext_prepare_callback, void *); | ||
247 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 289 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
248 | struct ext4_ext_path *); | 290 | struct ext4_ext_path *); |
249 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | ||
250 | ext4_lblk_t *, ext4_fsblk_t *); | ||
251 | extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, | ||
252 | ext4_lblk_t *, ext4_fsblk_t *); | ||
253 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 291 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
254 | extern int ext4_ext_check_inode(struct inode *inode); | 292 | extern int ext4_ext_check_inode(struct inode *inode); |
255 | #endif /* _EXT4_EXTENTS */ | 293 | #endif /* _EXT4_EXTENTS */ |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 6e272ef6ba96..f5240aa15601 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -6,20 +6,6 @@ | |||
6 | 6 | ||
7 | #include <trace/events/ext4.h> | 7 | #include <trace/events/ext4.h> |
8 | 8 | ||
9 | int __ext4_journal_get_undo_access(const char *where, unsigned int line, | ||
10 | handle_t *handle, struct buffer_head *bh) | ||
11 | { | ||
12 | int err = 0; | ||
13 | |||
14 | if (ext4_handle_valid(handle)) { | ||
15 | err = jbd2_journal_get_undo_access(handle, bh); | ||
16 | if (err) | ||
17 | ext4_journal_abort_handle(where, line, __func__, bh, | ||
18 | handle, err); | ||
19 | } | ||
20 | return err; | ||
21 | } | ||
22 | |||
23 | int __ext4_journal_get_write_access(const char *where, unsigned int line, | 9 | int __ext4_journal_get_write_access(const char *where, unsigned int line, |
24 | handle_t *handle, struct buffer_head *bh) | 10 | handle_t *handle, struct buffer_head *bh) |
25 | { | 11 | { |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b0bd792c58c5..bb85757689b6 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -86,8 +86,8 @@ | |||
86 | 86 | ||
87 | #ifdef CONFIG_QUOTA | 87 | #ifdef CONFIG_QUOTA |
88 | /* Amount of blocks needed for quota update - we know that the structure was | 88 | /* Amount of blocks needed for quota update - we know that the structure was |
89 | * allocated so we need to update only inode+data */ | 89 | * allocated so we need to update only data block */ |
90 | #define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) | 90 | #define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) |
91 | /* Amount of blocks needed for quota insert/delete - we do some block writes | 91 | /* Amount of blocks needed for quota insert/delete - we do some block writes |
92 | * but inode, sb and group updates are done only once */ | 92 | * but inode, sb and group updates are done only once */ |
93 | #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ | 93 | #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ |
@@ -126,9 +126,6 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line, | |||
126 | const char *err_fn, | 126 | const char *err_fn, |
127 | struct buffer_head *bh, handle_t *handle, int err); | 127 | struct buffer_head *bh, handle_t *handle, int err); |
128 | 128 | ||
129 | int __ext4_journal_get_undo_access(const char *where, unsigned int line, | ||
130 | handle_t *handle, struct buffer_head *bh); | ||
131 | |||
132 | int __ext4_journal_get_write_access(const char *where, unsigned int line, | 129 | int __ext4_journal_get_write_access(const char *where, unsigned int line, |
133 | handle_t *handle, struct buffer_head *bh); | 130 | handle_t *handle, struct buffer_head *bh); |
134 | 131 | ||
@@ -146,8 +143,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
146 | int __ext4_handle_dirty_super(const char *where, unsigned int line, | 143 | int __ext4_handle_dirty_super(const char *where, unsigned int line, |
147 | handle_t *handle, struct super_block *sb); | 144 | handle_t *handle, struct super_block *sb); |
148 | 145 | ||
149 | #define ext4_journal_get_undo_access(handle, bh) \ | ||
150 | __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh)) | ||
151 | #define ext4_journal_get_write_access(handle, bh) \ | 146 | #define ext4_journal_get_write_access(handle, bh) \ |
152 | __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) | 147 | __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) |
153 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ | 148 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ |
@@ -202,13 +197,6 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) | |||
202 | return 1; | 197 | return 1; |
203 | } | 198 | } |
204 | 199 | ||
205 | static inline void ext4_journal_release_buffer(handle_t *handle, | ||
206 | struct buffer_head *bh) | ||
207 | { | ||
208 | if (ext4_handle_valid(handle)) | ||
209 | jbd2_journal_release_buffer(handle, bh); | ||
210 | } | ||
211 | |||
212 | static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) | 200 | static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) |
213 | { | 201 | { |
214 | return ext4_journal_start_sb(inode->i_sb, nblocks); | 202 | return ext4_journal_start_sb(inode->i_sb, nblocks); |
@@ -253,7 +241,7 @@ static inline int ext4_journal_force_commit(journal_t *journal) | |||
253 | static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) | 241 | static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) |
254 | { | 242 | { |
255 | if (ext4_handle_valid(handle)) | 243 | if (ext4_handle_valid(handle)) |
256 | return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); | 244 | return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode); |
257 | return 0; | 245 | return 0; |
258 | } | 246 | } |
259 | 247 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 06328d3e5717..f815cc81e7a2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,54 +44,14 @@ | |||
44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
45 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
46 | 46 | ||
47 | #include <trace/events/ext4.h> | ||
47 | 48 | ||
48 | /* | 49 | static int ext4_split_extent(handle_t *handle, |
49 | * ext_pblock: | 50 | struct inode *inode, |
50 | * combine low and high parts of physical block number into ext4_fsblk_t | 51 | struct ext4_ext_path *path, |
51 | */ | 52 | struct ext4_map_blocks *map, |
52 | ext4_fsblk_t ext_pblock(struct ext4_extent *ex) | 53 | int split_flag, |
53 | { | 54 | int flags); |
54 | ext4_fsblk_t block; | ||
55 | |||
56 | block = le32_to_cpu(ex->ee_start_lo); | ||
57 | block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; | ||
58 | return block; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * idx_pblock: | ||
63 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | ||
64 | */ | ||
65 | ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) | ||
66 | { | ||
67 | ext4_fsblk_t block; | ||
68 | |||
69 | block = le32_to_cpu(ix->ei_leaf_lo); | ||
70 | block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; | ||
71 | return block; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * ext4_ext_store_pblock: | ||
76 | * stores a large physical block number into an extent struct, | ||
77 | * breaking it into parts | ||
78 | */ | ||
79 | void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) | ||
80 | { | ||
81 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
82 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * ext4_idx_store_pblock: | ||
87 | * stores a large physical block number into an index struct, | ||
88 | * breaking it into parts | ||
89 | */ | ||
90 | static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | ||
91 | { | ||
92 | ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | ||
94 | } | ||
95 | 55 | ||
96 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 56 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
97 | struct inode *inode, | 57 | struct inode *inode, |
@@ -166,10 +126,33 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
166 | struct ext4_extent *ex; | 126 | struct ext4_extent *ex; |
167 | depth = path->p_depth; | 127 | depth = path->p_depth; |
168 | 128 | ||
169 | /* try to predict block placement */ | 129 | /* |
130 | * Try to predict block placement assuming that we are | ||
131 | * filling in a file which will eventually be | ||
132 | * non-sparse --- i.e., in the case of libbfd writing | ||
133 | * an ELF object sections out-of-order but in a way | ||
134 | * the eventually results in a contiguous object or | ||
135 | * executable file, or some database extending a table | ||
136 | * space file. However, this is actually somewhat | ||
137 | * non-ideal if we are writing a sparse file such as | ||
138 | * qemu or KVM writing a raw image file that is going | ||
139 | * to stay fairly sparse, since it will end up | ||
140 | * fragmenting the file system's free space. Maybe we | ||
141 | * should have some hueristics or some way to allow | ||
142 | * userspace to pass a hint to file system, | ||
143 | * especially if the latter case turns out to be | ||
144 | * common. | ||
145 | */ | ||
170 | ex = path[depth].p_ext; | 146 | ex = path[depth].p_ext; |
171 | if (ex) | 147 | if (ex) { |
172 | return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); | 148 | ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); |
149 | ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); | ||
150 | |||
151 | if (block > ext_block) | ||
152 | return ext_pblk + (block - ext_block); | ||
153 | else | ||
154 | return ext_pblk - (ext_block - block); | ||
155 | } | ||
173 | 156 | ||
174 | /* it looks like index is empty; | 157 | /* it looks like index is empty; |
175 | * try to find starting block from index itself */ | 158 | * try to find starting block from index itself */ |
@@ -216,12 +199,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
216 | static ext4_fsblk_t | 199 | static ext4_fsblk_t |
217 | ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, | 200 | ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, |
218 | struct ext4_ext_path *path, | 201 | struct ext4_ext_path *path, |
219 | struct ext4_extent *ex, int *err) | 202 | struct ext4_extent *ex, int *err, unsigned int flags) |
220 | { | 203 | { |
221 | ext4_fsblk_t goal, newblock; | 204 | ext4_fsblk_t goal, newblock; |
222 | 205 | ||
223 | goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); | 206 | goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); |
224 | newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); | 207 | newblock = ext4_new_meta_blocks(handle, inode, goal, flags, |
208 | NULL, err); | ||
225 | return newblock; | 209 | return newblock; |
226 | } | 210 | } |
227 | 211 | ||
@@ -292,7 +276,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
292 | * to allocate @blocks | 276 | * to allocate @blocks |
293 | * Worse case is one block per extent | 277 | * Worse case is one block per extent |
294 | */ | 278 | */ |
295 | int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) | 279 | int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) |
296 | { | 280 | { |
297 | struct ext4_inode_info *ei = EXT4_I(inode); | 281 | struct ext4_inode_info *ei = EXT4_I(inode); |
298 | int idxs, num = 0; | 282 | int idxs, num = 0; |
@@ -354,7 +338,7 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
354 | 338 | ||
355 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | 339 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) |
356 | { | 340 | { |
357 | ext4_fsblk_t block = ext_pblock(ext); | 341 | ext4_fsblk_t block = ext4_ext_pblock(ext); |
358 | int len = ext4_ext_get_actual_len(ext); | 342 | int len = ext4_ext_get_actual_len(ext); |
359 | 343 | ||
360 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); | 344 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
@@ -363,7 +347,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
363 | static int ext4_valid_extent_idx(struct inode *inode, | 347 | static int ext4_valid_extent_idx(struct inode *inode, |
364 | struct ext4_extent_idx *ext_idx) | 348 | struct ext4_extent_idx *ext_idx) |
365 | { | 349 | { |
366 | ext4_fsblk_t block = idx_pblock(ext_idx); | 350 | ext4_fsblk_t block = ext4_idx_pblock(ext_idx); |
367 | 351 | ||
368 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); | 352 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); |
369 | } | 353 | } |
@@ -463,13 +447,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | |||
463 | for (k = 0; k <= l; k++, path++) { | 447 | for (k = 0; k <= l; k++, path++) { |
464 | if (path->p_idx) { | 448 | if (path->p_idx) { |
465 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), | 449 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), |
466 | idx_pblock(path->p_idx)); | 450 | ext4_idx_pblock(path->p_idx)); |
467 | } else if (path->p_ext) { | 451 | } else if (path->p_ext) { |
468 | ext_debug(" %d:[%d]%d:%llu ", | 452 | ext_debug(" %d:[%d]%d:%llu ", |
469 | le32_to_cpu(path->p_ext->ee_block), | 453 | le32_to_cpu(path->p_ext->ee_block), |
470 | ext4_ext_is_uninitialized(path->p_ext), | 454 | ext4_ext_is_uninitialized(path->p_ext), |
471 | ext4_ext_get_actual_len(path->p_ext), | 455 | ext4_ext_get_actual_len(path->p_ext), |
472 | ext_pblock(path->p_ext)); | 456 | ext4_ext_pblock(path->p_ext)); |
473 | } else | 457 | } else |
474 | ext_debug(" []"); | 458 | ext_debug(" []"); |
475 | } | 459 | } |
@@ -494,13 +478,47 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
494 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { | 478 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { |
495 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), | 479 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), |
496 | ext4_ext_is_uninitialized(ex), | 480 | ext4_ext_is_uninitialized(ex), |
497 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 481 | ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); |
498 | } | 482 | } |
499 | ext_debug("\n"); | 483 | ext_debug("\n"); |
500 | } | 484 | } |
485 | |||
486 | static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, | ||
487 | ext4_fsblk_t newblock, int level) | ||
488 | { | ||
489 | int depth = ext_depth(inode); | ||
490 | struct ext4_extent *ex; | ||
491 | |||
492 | if (depth != level) { | ||
493 | struct ext4_extent_idx *idx; | ||
494 | idx = path[level].p_idx; | ||
495 | while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { | ||
496 | ext_debug("%d: move %d:%llu in new index %llu\n", level, | ||
497 | le32_to_cpu(idx->ei_block), | ||
498 | ext4_idx_pblock(idx), | ||
499 | newblock); | ||
500 | idx++; | ||
501 | } | ||
502 | |||
503 | return; | ||
504 | } | ||
505 | |||
506 | ex = path[depth].p_ext; | ||
507 | while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { | ||
508 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", | ||
509 | le32_to_cpu(ex->ee_block), | ||
510 | ext4_ext_pblock(ex), | ||
511 | ext4_ext_is_uninitialized(ex), | ||
512 | ext4_ext_get_actual_len(ex), | ||
513 | newblock); | ||
514 | ex++; | ||
515 | } | ||
516 | } | ||
517 | |||
501 | #else | 518 | #else |
502 | #define ext4_ext_show_path(inode, path) | 519 | #define ext4_ext_show_path(inode, path) |
503 | #define ext4_ext_show_leaf(inode, path) | 520 | #define ext4_ext_show_leaf(inode, path) |
521 | #define ext4_ext_show_move(inode, path, newblock, level) | ||
504 | #endif | 522 | #endif |
505 | 523 | ||
506 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 524 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
@@ -545,7 +563,7 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
545 | 563 | ||
546 | path->p_idx = l - 1; | 564 | path->p_idx = l - 1; |
547 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), | 565 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), |
548 | idx_pblock(path->p_idx)); | 566 | ext4_idx_pblock(path->p_idx)); |
549 | 567 | ||
550 | #ifdef CHECK_BINSEARCH | 568 | #ifdef CHECK_BINSEARCH |
551 | { | 569 | { |
@@ -614,7 +632,7 @@ ext4_ext_binsearch(struct inode *inode, | |||
614 | path->p_ext = l - 1; | 632 | path->p_ext = l - 1; |
615 | ext_debug(" -> %d:%llu:[%d]%d ", | 633 | ext_debug(" -> %d:%llu:[%d]%d ", |
616 | le32_to_cpu(path->p_ext->ee_block), | 634 | le32_to_cpu(path->p_ext->ee_block), |
617 | ext_pblock(path->p_ext), | 635 | ext4_ext_pblock(path->p_ext), |
618 | ext4_ext_is_uninitialized(path->p_ext), | 636 | ext4_ext_is_uninitialized(path->p_ext), |
619 | ext4_ext_get_actual_len(path->p_ext)); | 637 | ext4_ext_get_actual_len(path->p_ext)); |
620 | 638 | ||
@@ -682,7 +700,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
682 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); | 700 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); |
683 | 701 | ||
684 | ext4_ext_binsearch_idx(inode, path + ppos, block); | 702 | ext4_ext_binsearch_idx(inode, path + ppos, block); |
685 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | 703 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); |
686 | path[ppos].p_depth = i; | 704 | path[ppos].p_depth = i; |
687 | path[ppos].p_ext = NULL; | 705 | path[ppos].p_ext = NULL; |
688 | 706 | ||
@@ -690,6 +708,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
690 | if (unlikely(!bh)) | 708 | if (unlikely(!bh)) |
691 | goto err; | 709 | goto err; |
692 | if (!bh_uptodate_or_lock(bh)) { | 710 | if (!bh_uptodate_or_lock(bh)) { |
711 | trace_ext4_ext_load_extent(inode, block, | ||
712 | path[ppos].p_block); | ||
693 | if (bh_submit_read(bh) < 0) { | 713 | if (bh_submit_read(bh) < 0) { |
694 | put_bh(bh); | 714 | put_bh(bh); |
695 | goto err; | 715 | goto err; |
@@ -721,7 +741,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
721 | ext4_ext_binsearch(inode, path + ppos, block); | 741 | ext4_ext_binsearch(inode, path + ppos, block); |
722 | /* if not an empty leaf */ | 742 | /* if not an empty leaf */ |
723 | if (path[ppos].p_ext) | 743 | if (path[ppos].p_ext) |
724 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | 744 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); |
725 | 745 | ||
726 | ext4_ext_show_path(inode, path); | 746 | ext4_ext_show_path(inode, path); |
727 | 747 | ||
@@ -739,9 +759,9 @@ err: | |||
739 | * insert new index [@logical;@ptr] into the block at @curp; | 759 | * insert new index [@logical;@ptr] into the block at @curp; |
740 | * check where to insert: before @curp or after @curp | 760 | * check where to insert: before @curp or after @curp |
741 | */ | 761 | */ |
742 | int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | 762 | static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, |
743 | struct ext4_ext_path *curp, | 763 | struct ext4_ext_path *curp, |
744 | int logical, ext4_fsblk_t ptr) | 764 | int logical, ext4_fsblk_t ptr) |
745 | { | 765 | { |
746 | struct ext4_extent_idx *ix; | 766 | struct ext4_extent_idx *ix; |
747 | int len, err; | 767 | int len, err; |
@@ -814,14 +834,14 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | |||
814 | * - initializes subtree | 834 | * - initializes subtree |
815 | */ | 835 | */ |
816 | static int ext4_ext_split(handle_t *handle, struct inode *inode, | 836 | static int ext4_ext_split(handle_t *handle, struct inode *inode, |
817 | struct ext4_ext_path *path, | 837 | unsigned int flags, |
818 | struct ext4_extent *newext, int at) | 838 | struct ext4_ext_path *path, |
839 | struct ext4_extent *newext, int at) | ||
819 | { | 840 | { |
820 | struct buffer_head *bh = NULL; | 841 | struct buffer_head *bh = NULL; |
821 | int depth = ext_depth(inode); | 842 | int depth = ext_depth(inode); |
822 | struct ext4_extent_header *neh; | 843 | struct ext4_extent_header *neh; |
823 | struct ext4_extent_idx *fidx; | 844 | struct ext4_extent_idx *fidx; |
824 | struct ext4_extent *ex; | ||
825 | int i = at, k, m, a; | 845 | int i = at, k, m, a; |
826 | ext4_fsblk_t newblock, oldblock; | 846 | ext4_fsblk_t newblock, oldblock; |
827 | __le32 border; | 847 | __le32 border; |
@@ -869,7 +889,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
869 | ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); | 889 | ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); |
870 | for (a = 0; a < depth - at; a++) { | 890 | for (a = 0; a < depth - at; a++) { |
871 | newblock = ext4_ext_new_meta_block(handle, inode, path, | 891 | newblock = ext4_ext_new_meta_block(handle, inode, path, |
872 | newext, &err); | 892 | newext, &err, flags); |
873 | if (newblock == 0) | 893 | if (newblock == 0) |
874 | goto cleanup; | 894 | goto cleanup; |
875 | ablocks[a] = newblock; | 895 | ablocks[a] = newblock; |
@@ -898,7 +918,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
898 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); | 918 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
899 | neh->eh_magic = EXT4_EXT_MAGIC; | 919 | neh->eh_magic = EXT4_EXT_MAGIC; |
900 | neh->eh_depth = 0; | 920 | neh->eh_depth = 0; |
901 | ex = EXT_FIRST_EXTENT(neh); | ||
902 | 921 | ||
903 | /* move remainder of path[depth] to the new leaf */ | 922 | /* move remainder of path[depth] to the new leaf */ |
904 | if (unlikely(path[depth].p_hdr->eh_entries != | 923 | if (unlikely(path[depth].p_hdr->eh_entries != |
@@ -910,25 +929,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
910 | goto cleanup; | 929 | goto cleanup; |
911 | } | 930 | } |
912 | /* start copy from next extent */ | 931 | /* start copy from next extent */ |
913 | /* TODO: we could do it by single memmove */ | 932 | m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; |
914 | m = 0; | 933 | ext4_ext_show_move(inode, path, newblock, depth); |
915 | path[depth].p_ext++; | ||
916 | while (path[depth].p_ext <= | ||
917 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | ||
918 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", | ||
919 | le32_to_cpu(path[depth].p_ext->ee_block), | ||
920 | ext_pblock(path[depth].p_ext), | ||
921 | ext4_ext_is_uninitialized(path[depth].p_ext), | ||
922 | ext4_ext_get_actual_len(path[depth].p_ext), | ||
923 | newblock); | ||
924 | /*memmove(ex++, path[depth].p_ext++, | ||
925 | sizeof(struct ext4_extent)); | ||
926 | neh->eh_entries++;*/ | ||
927 | path[depth].p_ext++; | ||
928 | m++; | ||
929 | } | ||
930 | if (m) { | 934 | if (m) { |
931 | memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); | 935 | struct ext4_extent *ex; |
936 | ex = EXT_FIRST_EXTENT(neh); | ||
937 | memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); | ||
932 | le16_add_cpu(&neh->eh_entries, m); | 938 | le16_add_cpu(&neh->eh_entries, m); |
933 | } | 939 | } |
934 | 940 | ||
@@ -990,12 +996,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
990 | 996 | ||
991 | ext_debug("int.index at %d (block %llu): %u -> %llu\n", | 997 | ext_debug("int.index at %d (block %llu): %u -> %llu\n", |
992 | i, newblock, le32_to_cpu(border), oldblock); | 998 | i, newblock, le32_to_cpu(border), oldblock); |
993 | /* copy indexes */ | ||
994 | m = 0; | ||
995 | path[i].p_idx++; | ||
996 | 999 | ||
997 | ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, | 1000 | /* move remainder of path[i] to the new index block */ |
998 | EXT_MAX_INDEX(path[i].p_hdr)); | ||
999 | if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != | 1001 | if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != |
1000 | EXT_LAST_INDEX(path[i].p_hdr))) { | 1002 | EXT_LAST_INDEX(path[i].p_hdr))) { |
1001 | EXT4_ERROR_INODE(inode, | 1003 | EXT4_ERROR_INODE(inode, |
@@ -1004,20 +1006,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
1004 | err = -EIO; | 1006 | err = -EIO; |
1005 | goto cleanup; | 1007 | goto cleanup; |
1006 | } | 1008 | } |
1007 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { | 1009 | /* start copy indexes */ |
1008 | ext_debug("%d: move %d:%llu in new index %llu\n", i, | 1010 | m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; |
1009 | le32_to_cpu(path[i].p_idx->ei_block), | 1011 | ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, |
1010 | idx_pblock(path[i].p_idx), | 1012 | EXT_MAX_INDEX(path[i].p_hdr)); |
1011 | newblock); | 1013 | ext4_ext_show_move(inode, path, newblock, i); |
1012 | /*memmove(++fidx, path[i].p_idx++, | ||
1013 | sizeof(struct ext4_extent_idx)); | ||
1014 | neh->eh_entries++; | ||
1015 | BUG_ON(neh->eh_entries > neh->eh_max);*/ | ||
1016 | path[i].p_idx++; | ||
1017 | m++; | ||
1018 | } | ||
1019 | if (m) { | 1014 | if (m) { |
1020 | memmove(++fidx, path[i].p_idx - m, | 1015 | memmove(++fidx, path[i].p_idx, |
1021 | sizeof(struct ext4_extent_idx) * m); | 1016 | sizeof(struct ext4_extent_idx) * m); |
1022 | le16_add_cpu(&neh->eh_entries, m); | 1017 | le16_add_cpu(&neh->eh_entries, m); |
1023 | } | 1018 | } |
@@ -1060,7 +1055,7 @@ cleanup: | |||
1060 | for (i = 0; i < depth; i++) { | 1055 | for (i = 0; i < depth; i++) { |
1061 | if (!ablocks[i]) | 1056 | if (!ablocks[i]) |
1062 | continue; | 1057 | continue; |
1063 | ext4_free_blocks(handle, inode, 0, ablocks[i], 1, | 1058 | ext4_free_blocks(handle, inode, NULL, ablocks[i], 1, |
1064 | EXT4_FREE_BLOCKS_METADATA); | 1059 | EXT4_FREE_BLOCKS_METADATA); |
1065 | } | 1060 | } |
1066 | } | 1061 | } |
@@ -1078,8 +1073,9 @@ cleanup: | |||
1078 | * just created block | 1073 | * just created block |
1079 | */ | 1074 | */ |
1080 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | 1075 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, |
1081 | struct ext4_ext_path *path, | 1076 | unsigned int flags, |
1082 | struct ext4_extent *newext) | 1077 | struct ext4_ext_path *path, |
1078 | struct ext4_extent *newext) | ||
1083 | { | 1079 | { |
1084 | struct ext4_ext_path *curp = path; | 1080 | struct ext4_ext_path *curp = path; |
1085 | struct ext4_extent_header *neh; | 1081 | struct ext4_extent_header *neh; |
@@ -1087,7 +1083,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1087 | ext4_fsblk_t newblock; | 1083 | ext4_fsblk_t newblock; |
1088 | int err = 0; | 1084 | int err = 0; |
1089 | 1085 | ||
1090 | newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); | 1086 | newblock = ext4_ext_new_meta_block(handle, inode, path, |
1087 | newext, &err, flags); | ||
1091 | if (newblock == 0) | 1088 | if (newblock == 0) |
1092 | return err; | 1089 | return err; |
1093 | 1090 | ||
@@ -1146,7 +1143,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1146 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", | 1143 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", |
1147 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), | 1144 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), |
1148 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), | 1145 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1149 | idx_pblock(EXT_FIRST_INDEX(neh))); | 1146 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); |
1150 | 1147 | ||
1151 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); | 1148 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); |
1152 | err = ext4_ext_dirty(handle, inode, curp); | 1149 | err = ext4_ext_dirty(handle, inode, curp); |
@@ -1162,8 +1159,9 @@ out: | |||
1162 | * if no free index is found, then it requests in-depth growing. | 1159 | * if no free index is found, then it requests in-depth growing. |
1163 | */ | 1160 | */ |
1164 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, | 1161 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, |
1165 | struct ext4_ext_path *path, | 1162 | unsigned int flags, |
1166 | struct ext4_extent *newext) | 1163 | struct ext4_ext_path *path, |
1164 | struct ext4_extent *newext) | ||
1167 | { | 1165 | { |
1168 | struct ext4_ext_path *curp; | 1166 | struct ext4_ext_path *curp; |
1169 | int depth, i, err = 0; | 1167 | int depth, i, err = 0; |
@@ -1183,7 +1181,7 @@ repeat: | |||
1183 | if (EXT_HAS_FREE_INDEX(curp)) { | 1181 | if (EXT_HAS_FREE_INDEX(curp)) { |
1184 | /* if we found index with free entry, then use that | 1182 | /* if we found index with free entry, then use that |
1185 | * entry: create all needed subtree and add new leaf */ | 1183 | * entry: create all needed subtree and add new leaf */ |
1186 | err = ext4_ext_split(handle, inode, path, newext, i); | 1184 | err = ext4_ext_split(handle, inode, flags, path, newext, i); |
1187 | if (err) | 1185 | if (err) |
1188 | goto out; | 1186 | goto out; |
1189 | 1187 | ||
@@ -1196,7 +1194,8 @@ repeat: | |||
1196 | err = PTR_ERR(path); | 1194 | err = PTR_ERR(path); |
1197 | } else { | 1195 | } else { |
1198 | /* tree is full, time to grow in depth */ | 1196 | /* tree is full, time to grow in depth */ |
1199 | err = ext4_ext_grow_indepth(handle, inode, path, newext); | 1197 | err = ext4_ext_grow_indepth(handle, inode, flags, |
1198 | path, newext); | ||
1200 | if (err) | 1199 | if (err) |
1201 | goto out; | 1200 | goto out; |
1202 | 1201 | ||
@@ -1232,9 +1231,9 @@ out: | |||
1232 | * returns 0 at @phys | 1231 | * returns 0 at @phys |
1233 | * return value contains 0 (success) or error code | 1232 | * return value contains 0 (success) or error code |
1234 | */ | 1233 | */ |
1235 | int | 1234 | static int ext4_ext_search_left(struct inode *inode, |
1236 | ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | 1235 | struct ext4_ext_path *path, |
1237 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1236 | ext4_lblk_t *logical, ext4_fsblk_t *phys) |
1238 | { | 1237 | { |
1239 | struct ext4_extent_idx *ix; | 1238 | struct ext4_extent_idx *ix; |
1240 | struct ext4_extent *ex; | 1239 | struct ext4_extent *ex; |
@@ -1286,7 +1285,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1286 | } | 1285 | } |
1287 | 1286 | ||
1288 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; | 1287 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; |
1289 | *phys = ext_pblock(ex) + ee_len - 1; | 1288 | *phys = ext4_ext_pblock(ex) + ee_len - 1; |
1290 | return 0; | 1289 | return 0; |
1291 | } | 1290 | } |
1292 | 1291 | ||
@@ -1297,9 +1296,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1297 | * returns 0 at @phys | 1296 | * returns 0 at @phys |
1298 | * return value contains 0 (success) or error code | 1297 | * return value contains 0 (success) or error code |
1299 | */ | 1298 | */ |
1300 | int | 1299 | static int ext4_ext_search_right(struct inode *inode, |
1301 | ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | 1300 | struct ext4_ext_path *path, |
1302 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1301 | ext4_lblk_t *logical, ext4_fsblk_t *phys) |
1303 | { | 1302 | { |
1304 | struct buffer_head *bh = NULL; | 1303 | struct buffer_head *bh = NULL; |
1305 | struct ext4_extent_header *eh; | 1304 | struct ext4_extent_header *eh; |
@@ -1342,7 +1341,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1342 | } | 1341 | } |
1343 | } | 1342 | } |
1344 | *logical = le32_to_cpu(ex->ee_block); | 1343 | *logical = le32_to_cpu(ex->ee_block); |
1345 | *phys = ext_pblock(ex); | 1344 | *phys = ext4_ext_pblock(ex); |
1346 | return 0; | 1345 | return 0; |
1347 | } | 1346 | } |
1348 | 1347 | ||
@@ -1357,7 +1356,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1357 | /* next allocated block in this leaf */ | 1356 | /* next allocated block in this leaf */ |
1358 | ex++; | 1357 | ex++; |
1359 | *logical = le32_to_cpu(ex->ee_block); | 1358 | *logical = le32_to_cpu(ex->ee_block); |
1360 | *phys = ext_pblock(ex); | 1359 | *phys = ext4_ext_pblock(ex); |
1361 | return 0; | 1360 | return 0; |
1362 | } | 1361 | } |
1363 | 1362 | ||
@@ -1376,7 +1375,7 @@ got_index: | |||
1376 | * follow it and find the closest allocated | 1375 | * follow it and find the closest allocated |
1377 | * block to the right */ | 1376 | * block to the right */ |
1378 | ix++; | 1377 | ix++; |
1379 | block = idx_pblock(ix); | 1378 | block = ext4_idx_pblock(ix); |
1380 | while (++depth < path->p_depth) { | 1379 | while (++depth < path->p_depth) { |
1381 | bh = sb_bread(inode->i_sb, block); | 1380 | bh = sb_bread(inode->i_sb, block); |
1382 | if (bh == NULL) | 1381 | if (bh == NULL) |
@@ -1388,7 +1387,7 @@ got_index: | |||
1388 | return -EIO; | 1387 | return -EIO; |
1389 | } | 1388 | } |
1390 | ix = EXT_FIRST_INDEX(eh); | 1389 | ix = EXT_FIRST_INDEX(eh); |
1391 | block = idx_pblock(ix); | 1390 | block = ext4_idx_pblock(ix); |
1392 | put_bh(bh); | 1391 | put_bh(bh); |
1393 | } | 1392 | } |
1394 | 1393 | ||
@@ -1402,14 +1401,14 @@ got_index: | |||
1402 | } | 1401 | } |
1403 | ex = EXT_FIRST_EXTENT(eh); | 1402 | ex = EXT_FIRST_EXTENT(eh); |
1404 | *logical = le32_to_cpu(ex->ee_block); | 1403 | *logical = le32_to_cpu(ex->ee_block); |
1405 | *phys = ext_pblock(ex); | 1404 | *phys = ext4_ext_pblock(ex); |
1406 | put_bh(bh); | 1405 | put_bh(bh); |
1407 | return 0; | 1406 | return 0; |
1408 | } | 1407 | } |
1409 | 1408 | ||
1410 | /* | 1409 | /* |
1411 | * ext4_ext_next_allocated_block: | 1410 | * ext4_ext_next_allocated_block: |
1412 | * returns allocated block in subsequent extent or EXT_MAX_BLOCK. | 1411 | * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. |
1413 | * NOTE: it considers block number from index entry as | 1412 | * NOTE: it considers block number from index entry as |
1414 | * allocated block. Thus, index entries have to be consistent | 1413 | * allocated block. Thus, index entries have to be consistent |
1415 | * with leaves. | 1414 | * with leaves. |
@@ -1423,7 +1422,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1423 | depth = path->p_depth; | 1422 | depth = path->p_depth; |
1424 | 1423 | ||
1425 | if (depth == 0 && path->p_ext == NULL) | 1424 | if (depth == 0 && path->p_ext == NULL) |
1426 | return EXT_MAX_BLOCK; | 1425 | return EXT_MAX_BLOCKS; |
1427 | 1426 | ||
1428 | while (depth >= 0) { | 1427 | while (depth >= 0) { |
1429 | if (depth == path->p_depth) { | 1428 | if (depth == path->p_depth) { |
@@ -1440,12 +1439,12 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1440 | depth--; | 1439 | depth--; |
1441 | } | 1440 | } |
1442 | 1441 | ||
1443 | return EXT_MAX_BLOCK; | 1442 | return EXT_MAX_BLOCKS; |
1444 | } | 1443 | } |
1445 | 1444 | ||
1446 | /* | 1445 | /* |
1447 | * ext4_ext_next_leaf_block: | 1446 | * ext4_ext_next_leaf_block: |
1448 | * returns first allocated block from next leaf or EXT_MAX_BLOCK | 1447 | * returns first allocated block from next leaf or EXT_MAX_BLOCKS |
1449 | */ | 1448 | */ |
1450 | static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | 1449 | static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, |
1451 | struct ext4_ext_path *path) | 1450 | struct ext4_ext_path *path) |
@@ -1457,7 +1456,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | |||
1457 | 1456 | ||
1458 | /* zero-tree has no leaf blocks at all */ | 1457 | /* zero-tree has no leaf blocks at all */ |
1459 | if (depth == 0) | 1458 | if (depth == 0) |
1460 | return EXT_MAX_BLOCK; | 1459 | return EXT_MAX_BLOCKS; |
1461 | 1460 | ||
1462 | /* go to index block */ | 1461 | /* go to index block */ |
1463 | depth--; | 1462 | depth--; |
@@ -1470,7 +1469,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | |||
1470 | depth--; | 1469 | depth--; |
1471 | } | 1470 | } |
1472 | 1471 | ||
1473 | return EXT_MAX_BLOCK; | 1472 | return EXT_MAX_BLOCKS; |
1474 | } | 1473 | } |
1475 | 1474 | ||
1476 | /* | 1475 | /* |
@@ -1573,7 +1572,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1573 | return 0; | 1572 | return 0; |
1574 | #endif | 1573 | #endif |
1575 | 1574 | ||
1576 | if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) | 1575 | if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) |
1577 | return 1; | 1576 | return 1; |
1578 | return 0; | 1577 | return 0; |
1579 | } | 1578 | } |
@@ -1585,9 +1584,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1585 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns | 1584 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns |
1586 | * 1 if they got merged. | 1585 | * 1 if they got merged. |
1587 | */ | 1586 | */ |
1588 | int ext4_ext_try_to_merge(struct inode *inode, | 1587 | static int ext4_ext_try_to_merge_right(struct inode *inode, |
1589 | struct ext4_ext_path *path, | 1588 | struct ext4_ext_path *path, |
1590 | struct ext4_extent *ex) | 1589 | struct ext4_extent *ex) |
1591 | { | 1590 | { |
1592 | struct ext4_extent_header *eh; | 1591 | struct ext4_extent_header *eh; |
1593 | unsigned int depth, len; | 1592 | unsigned int depth, len; |
@@ -1625,6 +1624,31 @@ int ext4_ext_try_to_merge(struct inode *inode, | |||
1625 | } | 1624 | } |
1626 | 1625 | ||
1627 | /* | 1626 | /* |
1627 | * This function tries to merge the @ex extent to neighbours in the tree. | ||
1628 | * return 1 if merge left else 0. | ||
1629 | */ | ||
1630 | static int ext4_ext_try_to_merge(struct inode *inode, | ||
1631 | struct ext4_ext_path *path, | ||
1632 | struct ext4_extent *ex) { | ||
1633 | struct ext4_extent_header *eh; | ||
1634 | unsigned int depth; | ||
1635 | int merge_done = 0; | ||
1636 | int ret = 0; | ||
1637 | |||
1638 | depth = ext_depth(inode); | ||
1639 | BUG_ON(path[depth].p_hdr == NULL); | ||
1640 | eh = path[depth].p_hdr; | ||
1641 | |||
1642 | if (ex > EXT_FIRST_EXTENT(eh)) | ||
1643 | merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); | ||
1644 | |||
1645 | if (!merge_done) | ||
1646 | ret = ext4_ext_try_to_merge_right(inode, path, ex); | ||
1647 | |||
1648 | return ret; | ||
1649 | } | ||
1650 | |||
1651 | /* | ||
1628 | * check if a portion of the "newext" extent overlaps with an | 1652 | * check if a portion of the "newext" extent overlaps with an |
1629 | * existing extent. | 1653 | * existing extent. |
1630 | * | 1654 | * |
@@ -1632,9 +1656,9 @@ int ext4_ext_try_to_merge(struct inode *inode, | |||
1632 | * such that there will be no overlap, and then returns 1. | 1656 | * such that there will be no overlap, and then returns 1. |
1633 | * If there is no overlap found, it returns 0. | 1657 | * If there is no overlap found, it returns 0. |
1634 | */ | 1658 | */ |
1635 | unsigned int ext4_ext_check_overlap(struct inode *inode, | 1659 | static unsigned int ext4_ext_check_overlap(struct inode *inode, |
1636 | struct ext4_extent *newext, | 1660 | struct ext4_extent *newext, |
1637 | struct ext4_ext_path *path) | 1661 | struct ext4_ext_path *path) |
1638 | { | 1662 | { |
1639 | ext4_lblk_t b1, b2; | 1663 | ext4_lblk_t b1, b2; |
1640 | unsigned int depth, len1; | 1664 | unsigned int depth, len1; |
@@ -1653,13 +1677,13 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1653 | */ | 1677 | */ |
1654 | if (b2 < b1) { | 1678 | if (b2 < b1) { |
1655 | b2 = ext4_ext_next_allocated_block(path); | 1679 | b2 = ext4_ext_next_allocated_block(path); |
1656 | if (b2 == EXT_MAX_BLOCK) | 1680 | if (b2 == EXT_MAX_BLOCKS) |
1657 | goto out; | 1681 | goto out; |
1658 | } | 1682 | } |
1659 | 1683 | ||
1660 | /* check for wrap through zero on extent logical start block*/ | 1684 | /* check for wrap through zero on extent logical start block*/ |
1661 | if (b1 + len1 < b1) { | 1685 | if (b1 + len1 < b1) { |
1662 | len1 = EXT_MAX_BLOCK - b1; | 1686 | len1 = EXT_MAX_BLOCKS - b1; |
1663 | newext->ee_len = cpu_to_le16(len1); | 1687 | newext->ee_len = cpu_to_le16(len1); |
1664 | ret = 1; | 1688 | ret = 1; |
1665 | } | 1689 | } |
@@ -1690,6 +1714,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1690 | int depth, len, err; | 1714 | int depth, len, err; |
1691 | ext4_lblk_t next; | 1715 | ext4_lblk_t next; |
1692 | unsigned uninitialized = 0; | 1716 | unsigned uninitialized = 0; |
1717 | int flags = 0; | ||
1693 | 1718 | ||
1694 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { | 1719 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
1695 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | 1720 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); |
@@ -1706,11 +1731,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1706 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) | 1731 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) |
1707 | && ext4_can_extents_be_merged(inode, ex, newext)) { | 1732 | && ext4_can_extents_be_merged(inode, ex, newext)) { |
1708 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", | 1733 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
1709 | ext4_ext_is_uninitialized(newext), | 1734 | ext4_ext_is_uninitialized(newext), |
1710 | ext4_ext_get_actual_len(newext), | 1735 | ext4_ext_get_actual_len(newext), |
1711 | le32_to_cpu(ex->ee_block), | 1736 | le32_to_cpu(ex->ee_block), |
1712 | ext4_ext_is_uninitialized(ex), | 1737 | ext4_ext_is_uninitialized(ex), |
1713 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 1738 | ext4_ext_get_actual_len(ex), |
1739 | ext4_ext_pblock(ex)); | ||
1714 | err = ext4_ext_get_access(handle, inode, path + depth); | 1740 | err = ext4_ext_get_access(handle, inode, path + depth); |
1715 | if (err) | 1741 | if (err) |
1716 | return err; | 1742 | return err; |
@@ -1741,7 +1767,7 @@ repeat: | |||
1741 | fex = EXT_LAST_EXTENT(eh); | 1767 | fex = EXT_LAST_EXTENT(eh); |
1742 | next = ext4_ext_next_leaf_block(inode, path); | 1768 | next = ext4_ext_next_leaf_block(inode, path); |
1743 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) | 1769 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) |
1744 | && next != EXT_MAX_BLOCK) { | 1770 | && next != EXT_MAX_BLOCKS) { |
1745 | ext_debug("next leaf block - %d\n", next); | 1771 | ext_debug("next leaf block - %d\n", next); |
1746 | BUG_ON(npath != NULL); | 1772 | BUG_ON(npath != NULL); |
1747 | npath = ext4_ext_find_extent(inode, next, NULL); | 1773 | npath = ext4_ext_find_extent(inode, next, NULL); |
@@ -1750,7 +1776,7 @@ repeat: | |||
1750 | BUG_ON(npath->p_depth != path->p_depth); | 1776 | BUG_ON(npath->p_depth != path->p_depth); |
1751 | eh = npath[depth].p_hdr; | 1777 | eh = npath[depth].p_hdr; |
1752 | if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { | 1778 | if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { |
1753 | ext_debug("next leaf isnt full(%d)\n", | 1779 | ext_debug("next leaf isn't full(%d)\n", |
1754 | le16_to_cpu(eh->eh_entries)); | 1780 | le16_to_cpu(eh->eh_entries)); |
1755 | path = npath; | 1781 | path = npath; |
1756 | goto repeat; | 1782 | goto repeat; |
@@ -1763,7 +1789,9 @@ repeat: | |||
1763 | * There is no free space in the found leaf. | 1789 | * There is no free space in the found leaf. |
1764 | * We're gonna add a new leaf in the tree. | 1790 | * We're gonna add a new leaf in the tree. |
1765 | */ | 1791 | */ |
1766 | err = ext4_ext_create_new_leaf(handle, inode, path, newext); | 1792 | if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) |
1793 | flags = EXT4_MB_USE_ROOT_BLOCKS; | ||
1794 | err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); | ||
1767 | if (err) | 1795 | if (err) |
1768 | goto cleanup; | 1796 | goto cleanup; |
1769 | depth = ext_depth(inode); | 1797 | depth = ext_depth(inode); |
@@ -1780,7 +1808,7 @@ has_space: | |||
1780 | /* there is no extent in this leaf, create first one */ | 1808 | /* there is no extent in this leaf, create first one */ |
1781 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", | 1809 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", |
1782 | le32_to_cpu(newext->ee_block), | 1810 | le32_to_cpu(newext->ee_block), |
1783 | ext_pblock(newext), | 1811 | ext4_ext_pblock(newext), |
1784 | ext4_ext_is_uninitialized(newext), | 1812 | ext4_ext_is_uninitialized(newext), |
1785 | ext4_ext_get_actual_len(newext)); | 1813 | ext4_ext_get_actual_len(newext)); |
1786 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1814 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); |
@@ -1794,7 +1822,7 @@ has_space: | |||
1794 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " | 1822 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " |
1795 | "move %d from 0x%p to 0x%p\n", | 1823 | "move %d from 0x%p to 0x%p\n", |
1796 | le32_to_cpu(newext->ee_block), | 1824 | le32_to_cpu(newext->ee_block), |
1797 | ext_pblock(newext), | 1825 | ext4_ext_pblock(newext), |
1798 | ext4_ext_is_uninitialized(newext), | 1826 | ext4_ext_is_uninitialized(newext), |
1799 | ext4_ext_get_actual_len(newext), | 1827 | ext4_ext_get_actual_len(newext), |
1800 | nearex, len, nearex + 1, nearex + 2); | 1828 | nearex, len, nearex + 1, nearex + 2); |
@@ -1808,7 +1836,7 @@ has_space: | |||
1808 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " | 1836 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " |
1809 | "move %d from 0x%p to 0x%p\n", | 1837 | "move %d from 0x%p to 0x%p\n", |
1810 | le32_to_cpu(newext->ee_block), | 1838 | le32_to_cpu(newext->ee_block), |
1811 | ext_pblock(newext), | 1839 | ext4_ext_pblock(newext), |
1812 | ext4_ext_is_uninitialized(newext), | 1840 | ext4_ext_is_uninitialized(newext), |
1813 | ext4_ext_get_actual_len(newext), | 1841 | ext4_ext_get_actual_len(newext), |
1814 | nearex, len, nearex + 1, nearex + 2); | 1842 | nearex, len, nearex + 1, nearex + 2); |
@@ -1819,7 +1847,7 @@ has_space: | |||
1819 | le16_add_cpu(&eh->eh_entries, 1); | 1847 | le16_add_cpu(&eh->eh_entries, 1); |
1820 | nearex = path[depth].p_ext; | 1848 | nearex = path[depth].p_ext; |
1821 | nearex->ee_block = newext->ee_block; | 1849 | nearex->ee_block = newext->ee_block; |
1822 | ext4_ext_store_pblock(nearex, ext_pblock(newext)); | 1850 | ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); |
1823 | nearex->ee_len = newext->ee_len; | 1851 | nearex->ee_len = newext->ee_len; |
1824 | 1852 | ||
1825 | merge: | 1853 | merge: |
@@ -1845,9 +1873,9 @@ cleanup: | |||
1845 | return err; | 1873 | return err; |
1846 | } | 1874 | } |
1847 | 1875 | ||
1848 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | 1876 | static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, |
1849 | ext4_lblk_t num, ext_prepare_callback func, | 1877 | ext4_lblk_t num, ext_prepare_callback func, |
1850 | void *cbdata) | 1878 | void *cbdata) |
1851 | { | 1879 | { |
1852 | struct ext4_ext_path *path = NULL; | 1880 | struct ext4_ext_path *path = NULL; |
1853 | struct ext4_ext_cache cbex; | 1881 | struct ext4_ext_cache cbex; |
@@ -1859,7 +1887,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1859 | BUG_ON(func == NULL); | 1887 | BUG_ON(func == NULL); |
1860 | BUG_ON(inode == NULL); | 1888 | BUG_ON(inode == NULL); |
1861 | 1889 | ||
1862 | while (block < last && block != EXT_MAX_BLOCK) { | 1890 | while (block < last && block != EXT_MAX_BLOCKS) { |
1863 | num = last - block; | 1891 | num = last - block; |
1864 | /* find extent for this block */ | 1892 | /* find extent for this block */ |
1865 | down_read(&EXT4_I(inode)->i_data_sem); | 1893 | down_read(&EXT4_I(inode)->i_data_sem); |
@@ -1919,12 +1947,10 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1919 | cbex.ec_block = start; | 1947 | cbex.ec_block = start; |
1920 | cbex.ec_len = end - start; | 1948 | cbex.ec_len = end - start; |
1921 | cbex.ec_start = 0; | 1949 | cbex.ec_start = 0; |
1922 | cbex.ec_type = EXT4_EXT_CACHE_GAP; | ||
1923 | } else { | 1950 | } else { |
1924 | cbex.ec_block = le32_to_cpu(ex->ee_block); | 1951 | cbex.ec_block = le32_to_cpu(ex->ee_block); |
1925 | cbex.ec_len = ext4_ext_get_actual_len(ex); | 1952 | cbex.ec_len = ext4_ext_get_actual_len(ex); |
1926 | cbex.ec_start = ext_pblock(ex); | 1953 | cbex.ec_start = ext4_ext_pblock(ex); |
1927 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | ||
1928 | } | 1954 | } |
1929 | 1955 | ||
1930 | if (unlikely(cbex.ec_len == 0)) { | 1956 | if (unlikely(cbex.ec_len == 0)) { |
@@ -1932,7 +1958,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1932 | err = -EIO; | 1958 | err = -EIO; |
1933 | break; | 1959 | break; |
1934 | } | 1960 | } |
1935 | err = func(inode, path, &cbex, ex, cbdata); | 1961 | err = func(inode, next, &cbex, ex, cbdata); |
1936 | ext4_ext_drop_refs(path); | 1962 | ext4_ext_drop_refs(path); |
1937 | 1963 | ||
1938 | if (err < 0) | 1964 | if (err < 0) |
@@ -1964,13 +1990,12 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1964 | 1990 | ||
1965 | static void | 1991 | static void |
1966 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, | 1992 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, |
1967 | __u32 len, ext4_fsblk_t start, int type) | 1993 | __u32 len, ext4_fsblk_t start) |
1968 | { | 1994 | { |
1969 | struct ext4_ext_cache *cex; | 1995 | struct ext4_ext_cache *cex; |
1970 | BUG_ON(len == 0); | 1996 | BUG_ON(len == 0); |
1971 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1997 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1972 | cex = &EXT4_I(inode)->i_cached_extent; | 1998 | cex = &EXT4_I(inode)->i_cached_extent; |
1973 | cex->ec_type = type; | ||
1974 | cex->ec_block = block; | 1999 | cex->ec_block = block; |
1975 | cex->ec_len = len; | 2000 | cex->ec_len = len; |
1976 | cex->ec_start = start; | 2001 | cex->ec_start = start; |
@@ -1995,7 +2020,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
1995 | if (ex == NULL) { | 2020 | if (ex == NULL) { |
1996 | /* there is no extent yet, so gap is [0;-] */ | 2021 | /* there is no extent yet, so gap is [0;-] */ |
1997 | lblock = 0; | 2022 | lblock = 0; |
1998 | len = EXT_MAX_BLOCK; | 2023 | len = EXT_MAX_BLOCKS; |
1999 | ext_debug("cache gap(whole file):"); | 2024 | ext_debug("cache gap(whole file):"); |
2000 | } else if (block < le32_to_cpu(ex->ee_block)) { | 2025 | } else if (block < le32_to_cpu(ex->ee_block)) { |
2001 | lblock = block; | 2026 | lblock = block; |
@@ -2023,43 +2048,90 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2023 | } | 2048 | } |
2024 | 2049 | ||
2025 | ext_debug(" -> %u:%lu\n", lblock, len); | 2050 | ext_debug(" -> %u:%lu\n", lblock, len); |
2026 | ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); | 2051 | ext4_ext_put_in_cache(inode, lblock, len, 0); |
2027 | } | 2052 | } |
2028 | 2053 | ||
2029 | static int | 2054 | /* |
2030 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | 2055 | * ext4_ext_in_cache() |
2031 | struct ext4_extent *ex) | 2056 | * Checks to see if the given block is in the cache. |
2032 | { | 2057 | * If it is, the cached extent is stored in the given |
2058 | * cache extent pointer. If the cached extent is a hole, | ||
2059 | * this routine should be used instead of | ||
2060 | * ext4_ext_in_cache if the calling function needs to | ||
2061 | * know the size of the hole. | ||
2062 | * | ||
2063 | * @inode: The files inode | ||
2064 | * @block: The block to look for in the cache | ||
2065 | * @ex: Pointer where the cached extent will be stored | ||
2066 | * if it contains block | ||
2067 | * | ||
2068 | * Return 0 if cache is invalid; 1 if the cache is valid | ||
2069 | */ | ||
2070 | static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, | ||
2071 | struct ext4_ext_cache *ex){ | ||
2033 | struct ext4_ext_cache *cex; | 2072 | struct ext4_ext_cache *cex; |
2034 | int ret = EXT4_EXT_CACHE_NO; | 2073 | struct ext4_sb_info *sbi; |
2074 | int ret = 0; | ||
2035 | 2075 | ||
2036 | /* | 2076 | /* |
2037 | * We borrow i_block_reservation_lock to protect i_cached_extent | 2077 | * We borrow i_block_reservation_lock to protect i_cached_extent |
2038 | */ | 2078 | */ |
2039 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 2079 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
2040 | cex = &EXT4_I(inode)->i_cached_extent; | 2080 | cex = &EXT4_I(inode)->i_cached_extent; |
2081 | sbi = EXT4_SB(inode->i_sb); | ||
2041 | 2082 | ||
2042 | /* has cache valid data? */ | 2083 | /* has cache valid data? */ |
2043 | if (cex->ec_type == EXT4_EXT_CACHE_NO) | 2084 | if (cex->ec_len == 0) |
2044 | goto errout; | 2085 | goto errout; |
2045 | 2086 | ||
2046 | BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && | ||
2047 | cex->ec_type != EXT4_EXT_CACHE_EXTENT); | ||
2048 | if (in_range(block, cex->ec_block, cex->ec_len)) { | 2087 | if (in_range(block, cex->ec_block, cex->ec_len)) { |
2049 | ex->ee_block = cpu_to_le32(cex->ec_block); | 2088 | memcpy(ex, cex, sizeof(struct ext4_ext_cache)); |
2050 | ext4_ext_store_pblock(ex, cex->ec_start); | ||
2051 | ex->ee_len = cpu_to_le16(cex->ec_len); | ||
2052 | ext_debug("%u cached by %u:%u:%llu\n", | 2089 | ext_debug("%u cached by %u:%u:%llu\n", |
2053 | block, | 2090 | block, |
2054 | cex->ec_block, cex->ec_len, cex->ec_start); | 2091 | cex->ec_block, cex->ec_len, cex->ec_start); |
2055 | ret = cex->ec_type; | 2092 | ret = 1; |
2056 | } | 2093 | } |
2057 | errout: | 2094 | errout: |
2095 | if (!ret) | ||
2096 | sbi->extent_cache_misses++; | ||
2097 | else | ||
2098 | sbi->extent_cache_hits++; | ||
2058 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 2099 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
2059 | return ret; | 2100 | return ret; |
2060 | } | 2101 | } |
2061 | 2102 | ||
2062 | /* | 2103 | /* |
2104 | * ext4_ext_in_cache() | ||
2105 | * Checks to see if the given block is in the cache. | ||
2106 | * If it is, the cached extent is stored in the given | ||
2107 | * extent pointer. | ||
2108 | * | ||
2109 | * @inode: The files inode | ||
2110 | * @block: The block to look for in the cache | ||
2111 | * @ex: Pointer where the cached extent will be stored | ||
2112 | * if it contains block | ||
2113 | * | ||
2114 | * Return 0 if cache is invalid; 1 if the cache is valid | ||
2115 | */ | ||
2116 | static int | ||
2117 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | ||
2118 | struct ext4_extent *ex) | ||
2119 | { | ||
2120 | struct ext4_ext_cache cex; | ||
2121 | int ret = 0; | ||
2122 | |||
2123 | if (ext4_ext_check_cache(inode, block, &cex)) { | ||
2124 | ex->ee_block = cpu_to_le32(cex.ec_block); | ||
2125 | ext4_ext_store_pblock(ex, cex.ec_start); | ||
2126 | ex->ee_len = cpu_to_le16(cex.ec_len); | ||
2127 | ret = 1; | ||
2128 | } | ||
2129 | |||
2130 | return ret; | ||
2131 | } | ||
2132 | |||
2133 | |||
2134 | /* | ||
2063 | * ext4_ext_rm_idx: | 2135 | * ext4_ext_rm_idx: |
2064 | * removes index from the index block. | 2136 | * removes index from the index block. |
2065 | * It's used in truncate case only, thus all requests are for | 2137 | * It's used in truncate case only, thus all requests are for |
@@ -2073,7 +2145,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2073 | 2145 | ||
2074 | /* free index block */ | 2146 | /* free index block */ |
2075 | path--; | 2147 | path--; |
2076 | leaf = idx_pblock(path->p_idx); | 2148 | leaf = ext4_idx_pblock(path->p_idx); |
2077 | if (unlikely(path->p_hdr->eh_entries == 0)) { | 2149 | if (unlikely(path->p_hdr->eh_entries == 0)) { |
2078 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); | 2150 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); |
2079 | return -EIO; | 2151 | return -EIO; |
@@ -2086,7 +2158,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2086 | if (err) | 2158 | if (err) |
2087 | return err; | 2159 | return err; |
2088 | ext_debug("index is empty, remove it, free block %llu\n", leaf); | 2160 | ext_debug("index is empty, remove it, free block %llu\n", leaf); |
2089 | ext4_free_blocks(handle, inode, 0, leaf, 1, | 2161 | ext4_free_blocks(handle, inode, NULL, leaf, 1, |
2090 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 2162 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
2091 | return err; | 2163 | return err; |
2092 | } | 2164 | } |
@@ -2181,13 +2253,21 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2181 | ext4_fsblk_t start; | 2253 | ext4_fsblk_t start; |
2182 | 2254 | ||
2183 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2255 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2184 | start = ext_pblock(ex) + ee_len - num; | 2256 | start = ext4_ext_pblock(ex) + ee_len - num; |
2185 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2257 | ext_debug("free last %u blocks starting %llu\n", num, start); |
2186 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2258 | ext4_free_blocks(handle, inode, NULL, start, num, flags); |
2187 | } else if (from == le32_to_cpu(ex->ee_block) | 2259 | } else if (from == le32_to_cpu(ex->ee_block) |
2188 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2260 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2189 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", | 2261 | /* head removal */ |
2190 | from, to, le32_to_cpu(ex->ee_block), ee_len); | 2262 | ext4_lblk_t num; |
2263 | ext4_fsblk_t start; | ||
2264 | |||
2265 | num = to - from; | ||
2266 | start = ext4_ext_pblock(ex); | ||
2267 | |||
2268 | ext_debug("free first %u blocks starting %llu\n", num, start); | ||
2269 | ext4_free_blocks(handle, inode, 0, start, num, flags); | ||
2270 | |||
2191 | } else { | 2271 | } else { |
2192 | printk(KERN_INFO "strange request: removal(2) " | 2272 | printk(KERN_INFO "strange request: removal(2) " |
2193 | "%u-%u from %u:%u\n", | 2273 | "%u-%u from %u:%u\n", |
@@ -2196,9 +2276,22 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2196 | return 0; | 2276 | return 0; |
2197 | } | 2277 | } |
2198 | 2278 | ||
2279 | |||
2280 | /* | ||
2281 | * ext4_ext_rm_leaf() Removes the extents associated with the | ||
2282 | * blocks appearing between "start" and "end", and splits the extents | ||
2283 | * if "start" and "end" appear in the same extent | ||
2284 | * | ||
2285 | * @handle: The journal handle | ||
2286 | * @inode: The files inode | ||
2287 | * @path: The path to the leaf | ||
2288 | * @start: The first block to remove | ||
2289 | * @end: The last block to remove | ||
2290 | */ | ||
2199 | static int | 2291 | static int |
2200 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 2292 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
2201 | struct ext4_ext_path *path, ext4_lblk_t start) | 2293 | struct ext4_ext_path *path, ext4_lblk_t start, |
2294 | ext4_lblk_t end) | ||
2202 | { | 2295 | { |
2203 | int err = 0, correct_index = 0; | 2296 | int err = 0, correct_index = 0; |
2204 | int depth = ext_depth(inode), credits; | 2297 | int depth = ext_depth(inode), credits; |
@@ -2209,6 +2302,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2209 | unsigned short ex_ee_len; | 2302 | unsigned short ex_ee_len; |
2210 | unsigned uninitialized = 0; | 2303 | unsigned uninitialized = 0; |
2211 | struct ext4_extent *ex; | 2304 | struct ext4_extent *ex; |
2305 | struct ext4_map_blocks map; | ||
2212 | 2306 | ||
2213 | /* the header must be checked already in ext4_ext_remove_space() */ | 2307 | /* the header must be checked already in ext4_ext_remove_space() */ |
2214 | ext_debug("truncate since %u in leaf\n", start); | 2308 | ext_debug("truncate since %u in leaf\n", start); |
@@ -2238,31 +2332,95 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2238 | path[depth].p_ext = ex; | 2332 | path[depth].p_ext = ex; |
2239 | 2333 | ||
2240 | a = ex_ee_block > start ? ex_ee_block : start; | 2334 | a = ex_ee_block > start ? ex_ee_block : start; |
2241 | b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? | 2335 | b = ex_ee_block+ex_ee_len - 1 < end ? |
2242 | ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; | 2336 | ex_ee_block+ex_ee_len - 1 : end; |
2243 | 2337 | ||
2244 | ext_debug(" border %u:%u\n", a, b); | 2338 | ext_debug(" border %u:%u\n", a, b); |
2245 | 2339 | ||
2246 | if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { | 2340 | /* If this extent is beyond the end of the hole, skip it */ |
2247 | block = 0; | 2341 | if (end <= ex_ee_block) { |
2248 | num = 0; | 2342 | ex--; |
2249 | BUG(); | 2343 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2344 | ex_ee_len = ext4_ext_get_actual_len(ex); | ||
2345 | continue; | ||
2346 | } else if (a != ex_ee_block && | ||
2347 | b != ex_ee_block + ex_ee_len - 1) { | ||
2348 | /* | ||
2349 | * If this is a truncate, then this condition should | ||
2350 | * never happen because at least one of the end points | ||
2351 | * needs to be on the edge of the extent. | ||
2352 | */ | ||
2353 | if (end == EXT_MAX_BLOCKS - 1) { | ||
2354 | ext_debug(" bad truncate %u:%u\n", | ||
2355 | start, end); | ||
2356 | block = 0; | ||
2357 | num = 0; | ||
2358 | err = -EIO; | ||
2359 | goto out; | ||
2360 | } | ||
2361 | /* | ||
2362 | * else this is a hole punch, so the extent needs to | ||
2363 | * be split since neither edge of the hole is on the | ||
2364 | * extent edge | ||
2365 | */ | ||
2366 | else{ | ||
2367 | map.m_pblk = ext4_ext_pblock(ex); | ||
2368 | map.m_lblk = ex_ee_block; | ||
2369 | map.m_len = b - ex_ee_block; | ||
2370 | |||
2371 | err = ext4_split_extent(handle, | ||
2372 | inode, path, &map, 0, | ||
2373 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT | | ||
2374 | EXT4_GET_BLOCKS_PRE_IO); | ||
2375 | |||
2376 | if (err < 0) | ||
2377 | goto out; | ||
2378 | |||
2379 | ex_ee_len = ext4_ext_get_actual_len(ex); | ||
2380 | |||
2381 | b = ex_ee_block+ex_ee_len - 1 < end ? | ||
2382 | ex_ee_block+ex_ee_len - 1 : end; | ||
2383 | |||
2384 | /* Then remove tail of this extent */ | ||
2385 | block = ex_ee_block; | ||
2386 | num = a - block; | ||
2387 | } | ||
2250 | } else if (a != ex_ee_block) { | 2388 | } else if (a != ex_ee_block) { |
2251 | /* remove tail of the extent */ | 2389 | /* remove tail of the extent */ |
2252 | block = ex_ee_block; | 2390 | block = ex_ee_block; |
2253 | num = a - block; | 2391 | num = a - block; |
2254 | } else if (b != ex_ee_block + ex_ee_len - 1) { | 2392 | } else if (b != ex_ee_block + ex_ee_len - 1) { |
2255 | /* remove head of the extent */ | 2393 | /* remove head of the extent */ |
2256 | block = a; | 2394 | block = b; |
2257 | num = b - a; | 2395 | num = ex_ee_block + ex_ee_len - b; |
2258 | /* there is no "make a hole" API yet */ | 2396 | |
2259 | BUG(); | 2397 | /* |
2398 | * If this is a truncate, this condition | ||
2399 | * should never happen | ||
2400 | */ | ||
2401 | if (end == EXT_MAX_BLOCKS - 1) { | ||
2402 | ext_debug(" bad truncate %u:%u\n", | ||
2403 | start, end); | ||
2404 | err = -EIO; | ||
2405 | goto out; | ||
2406 | } | ||
2260 | } else { | 2407 | } else { |
2261 | /* remove whole extent: excellent! */ | 2408 | /* remove whole extent: excellent! */ |
2262 | block = ex_ee_block; | 2409 | block = ex_ee_block; |
2263 | num = 0; | 2410 | num = 0; |
2264 | BUG_ON(a != ex_ee_block); | 2411 | if (a != ex_ee_block) { |
2265 | BUG_ON(b != ex_ee_block + ex_ee_len - 1); | 2412 | ext_debug(" bad truncate %u:%u\n", |
2413 | start, end); | ||
2414 | err = -EIO; | ||
2415 | goto out; | ||
2416 | } | ||
2417 | |||
2418 | if (b != ex_ee_block + ex_ee_len - 1) { | ||
2419 | ext_debug(" bad truncate %u:%u\n", | ||
2420 | start, end); | ||
2421 | err = -EIO; | ||
2422 | goto out; | ||
2423 | } | ||
2266 | } | 2424 | } |
2267 | 2425 | ||
2268 | /* | 2426 | /* |
@@ -2293,7 +2451,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2293 | if (num == 0) { | 2451 | if (num == 0) { |
2294 | /* this extent is removed; mark slot entirely unused */ | 2452 | /* this extent is removed; mark slot entirely unused */ |
2295 | ext4_ext_store_pblock(ex, 0); | 2453 | ext4_ext_store_pblock(ex, 0); |
2296 | le16_add_cpu(&eh->eh_entries, -1); | 2454 | } else if (block != ex_ee_block) { |
2455 | /* | ||
2456 | * If this was a head removal, then we need to update | ||
2457 | * the physical block since it is now at a different | ||
2458 | * location | ||
2459 | */ | ||
2460 | ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a)); | ||
2297 | } | 2461 | } |
2298 | 2462 | ||
2299 | ex->ee_block = cpu_to_le32(block); | 2463 | ex->ee_block = cpu_to_le32(block); |
@@ -2309,8 +2473,29 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2309 | if (err) | 2473 | if (err) |
2310 | goto out; | 2474 | goto out; |
2311 | 2475 | ||
2476 | /* | ||
2477 | * If the extent was completely released, | ||
2478 | * we need to remove it from the leaf | ||
2479 | */ | ||
2480 | if (num == 0) { | ||
2481 | if (end != EXT_MAX_BLOCKS - 1) { | ||
2482 | /* | ||
2483 | * For hole punching, we need to scoot all the | ||
2484 | * extents up when an extent is removed so that | ||
2485 | * we dont have blank extents in the middle | ||
2486 | */ | ||
2487 | memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) * | ||
2488 | sizeof(struct ext4_extent)); | ||
2489 | |||
2490 | /* Now get rid of the one at the end */ | ||
2491 | memset(EXT_LAST_EXTENT(eh), 0, | ||
2492 | sizeof(struct ext4_extent)); | ||
2493 | } | ||
2494 | le16_add_cpu(&eh->eh_entries, -1); | ||
2495 | } | ||
2496 | |||
2312 | ext_debug("new extent: %u:%u:%llu\n", block, num, | 2497 | ext_debug("new extent: %u:%u:%llu\n", block, num, |
2313 | ext_pblock(ex)); | 2498 | ext4_ext_pblock(ex)); |
2314 | ex--; | 2499 | ex--; |
2315 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2500 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2316 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2501 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2349,7 +2534,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
2349 | return 1; | 2534 | return 1; |
2350 | } | 2535 | } |
2351 | 2536 | ||
2352 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | 2537 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, |
2538 | ext4_lblk_t end) | ||
2353 | { | 2539 | { |
2354 | struct super_block *sb = inode->i_sb; | 2540 | struct super_block *sb = inode->i_sb; |
2355 | int depth = ext_depth(inode); | 2541 | int depth = ext_depth(inode); |
@@ -2388,7 +2574,8 @@ again: | |||
2388 | while (i >= 0 && err == 0) { | 2574 | while (i >= 0 && err == 0) { |
2389 | if (i == depth) { | 2575 | if (i == depth) { |
2390 | /* this is leaf block */ | 2576 | /* this is leaf block */ |
2391 | err = ext4_ext_rm_leaf(handle, inode, path, start); | 2577 | err = ext4_ext_rm_leaf(handle, inode, path, |
2578 | start, end); | ||
2392 | /* root level has p_bh == NULL, brelse() eats this */ | 2579 | /* root level has p_bh == NULL, brelse() eats this */ |
2393 | brelse(path[i].p_bh); | 2580 | brelse(path[i].p_bh); |
2394 | path[i].p_bh = NULL; | 2581 | path[i].p_bh = NULL; |
@@ -2421,9 +2608,9 @@ again: | |||
2421 | struct buffer_head *bh; | 2608 | struct buffer_head *bh; |
2422 | /* go to the next level */ | 2609 | /* go to the next level */ |
2423 | ext_debug("move to level %d (block %llu)\n", | 2610 | ext_debug("move to level %d (block %llu)\n", |
2424 | i + 1, idx_pblock(path[i].p_idx)); | 2611 | i + 1, ext4_idx_pblock(path[i].p_idx)); |
2425 | memset(path + i + 1, 0, sizeof(*path)); | 2612 | memset(path + i + 1, 0, sizeof(*path)); |
2426 | bh = sb_bread(sb, idx_pblock(path[i].p_idx)); | 2613 | bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); |
2427 | if (!bh) { | 2614 | if (!bh) { |
2428 | /* should we reset i_size? */ | 2615 | /* should we reset i_size? */ |
2429 | err = -EIO; | 2616 | err = -EIO; |
@@ -2535,84 +2722,217 @@ void ext4_ext_release(struct super_block *sb) | |||
2535 | #endif | 2722 | #endif |
2536 | } | 2723 | } |
2537 | 2724 | ||
2538 | static void bi_complete(struct bio *bio, int error) | ||
2539 | { | ||
2540 | complete((struct completion *)bio->bi_private); | ||
2541 | } | ||
2542 | |||
2543 | /* FIXME!! we need to try to merge to left or right after zero-out */ | 2725 | /* FIXME!! we need to try to merge to left or right after zero-out */ |
2544 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | 2726 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) |
2545 | { | 2727 | { |
2728 | ext4_fsblk_t ee_pblock; | ||
2729 | unsigned int ee_len; | ||
2546 | int ret; | 2730 | int ret; |
2547 | struct bio *bio; | ||
2548 | int blkbits, blocksize; | ||
2549 | sector_t ee_pblock; | ||
2550 | struct completion event; | ||
2551 | unsigned int ee_len, len, done, offset; | ||
2552 | 2731 | ||
2553 | |||
2554 | blkbits = inode->i_blkbits; | ||
2555 | blocksize = inode->i_sb->s_blocksize; | ||
2556 | ee_len = ext4_ext_get_actual_len(ex); | 2732 | ee_len = ext4_ext_get_actual_len(ex); |
2557 | ee_pblock = ext_pblock(ex); | 2733 | ee_pblock = ext4_ext_pblock(ex); |
2734 | |||
2735 | ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); | ||
2736 | if (ret > 0) | ||
2737 | ret = 0; | ||
2738 | |||
2739 | return ret; | ||
2740 | } | ||
2741 | |||
2742 | /* | ||
2743 | * used by extent splitting. | ||
2744 | */ | ||
2745 | #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ | ||
2746 | due to ENOSPC */ | ||
2747 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ | ||
2748 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ | ||
2749 | |||
2750 | /* | ||
2751 | * ext4_split_extent_at() splits an extent at given block. | ||
2752 | * | ||
2753 | * @handle: the journal handle | ||
2754 | * @inode: the file inode | ||
2755 | * @path: the path to the extent | ||
2756 | * @split: the logical block where the extent is splitted. | ||
2757 | * @split_flags: indicates if the extent could be zeroout if split fails, and | ||
2758 | * the states(init or uninit) of new extents. | ||
2759 | * @flags: flags used to insert new extent to extent tree. | ||
2760 | * | ||
2761 | * | ||
2762 | * Splits extent [a, b] into two extents [a, @split) and [@split, b], states | ||
2763 | * of which are deterimined by split_flag. | ||
2764 | * | ||
2765 | * There are two cases: | ||
2766 | * a> the extent are splitted into two extent. | ||
2767 | * b> split is not needed, and just mark the extent. | ||
2768 | * | ||
2769 | * return 0 on success. | ||
2770 | */ | ||
2771 | static int ext4_split_extent_at(handle_t *handle, | ||
2772 | struct inode *inode, | ||
2773 | struct ext4_ext_path *path, | ||
2774 | ext4_lblk_t split, | ||
2775 | int split_flag, | ||
2776 | int flags) | ||
2777 | { | ||
2778 | ext4_fsblk_t newblock; | ||
2779 | ext4_lblk_t ee_block; | ||
2780 | struct ext4_extent *ex, newex, orig_ex; | ||
2781 | struct ext4_extent *ex2 = NULL; | ||
2782 | unsigned int ee_len, depth; | ||
2783 | int err = 0; | ||
2784 | |||
2785 | ext_debug("ext4_split_extents_at: inode %lu, logical" | ||
2786 | "block %llu\n", inode->i_ino, (unsigned long long)split); | ||
2558 | 2787 | ||
2559 | /* convert ee_pblock to 512 byte sectors */ | 2788 | ext4_ext_show_leaf(inode, path); |
2560 | ee_pblock = ee_pblock << (blkbits - 9); | 2789 | |
2790 | depth = ext_depth(inode); | ||
2791 | ex = path[depth].p_ext; | ||
2792 | ee_block = le32_to_cpu(ex->ee_block); | ||
2793 | ee_len = ext4_ext_get_actual_len(ex); | ||
2794 | newblock = split - ee_block + ext4_ext_pblock(ex); | ||
2561 | 2795 | ||
2562 | while (ee_len > 0) { | 2796 | BUG_ON(split < ee_block || split >= (ee_block + ee_len)); |
2563 | 2797 | ||
2564 | if (ee_len > BIO_MAX_PAGES) | 2798 | err = ext4_ext_get_access(handle, inode, path + depth); |
2565 | len = BIO_MAX_PAGES; | 2799 | if (err) |
2800 | goto out; | ||
2801 | |||
2802 | if (split == ee_block) { | ||
2803 | /* | ||
2804 | * case b: block @split is the block that the extent begins with | ||
2805 | * then we just change the state of the extent, and splitting | ||
2806 | * is not needed. | ||
2807 | */ | ||
2808 | if (split_flag & EXT4_EXT_MARK_UNINIT2) | ||
2809 | ext4_ext_mark_uninitialized(ex); | ||
2566 | else | 2810 | else |
2567 | len = ee_len; | 2811 | ext4_ext_mark_initialized(ex); |
2568 | 2812 | ||
2569 | bio = bio_alloc(GFP_NOIO, len); | 2813 | if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) |
2570 | if (!bio) | 2814 | ext4_ext_try_to_merge(inode, path, ex); |
2571 | return -ENOMEM; | ||
2572 | 2815 | ||
2573 | bio->bi_sector = ee_pblock; | 2816 | err = ext4_ext_dirty(handle, inode, path + depth); |
2574 | bio->bi_bdev = inode->i_sb->s_bdev; | 2817 | goto out; |
2818 | } | ||
2575 | 2819 | ||
2576 | done = 0; | 2820 | /* case a */ |
2577 | offset = 0; | 2821 | memcpy(&orig_ex, ex, sizeof(orig_ex)); |
2578 | while (done < len) { | 2822 | ex->ee_len = cpu_to_le16(split - ee_block); |
2579 | ret = bio_add_page(bio, ZERO_PAGE(0), | 2823 | if (split_flag & EXT4_EXT_MARK_UNINIT1) |
2580 | blocksize, offset); | 2824 | ext4_ext_mark_uninitialized(ex); |
2581 | if (ret != blocksize) { | ||
2582 | /* | ||
2583 | * We can't add any more pages because of | ||
2584 | * hardware limitations. Start a new bio. | ||
2585 | */ | ||
2586 | break; | ||
2587 | } | ||
2588 | done++; | ||
2589 | offset += blocksize; | ||
2590 | if (offset >= PAGE_CACHE_SIZE) | ||
2591 | offset = 0; | ||
2592 | } | ||
2593 | 2825 | ||
2594 | init_completion(&event); | 2826 | /* |
2595 | bio->bi_private = &event; | 2827 | * path may lead to new leaf, not to original leaf any more |
2596 | bio->bi_end_io = bi_complete; | 2828 | * after ext4_ext_insert_extent() returns, |
2597 | submit_bio(WRITE, bio); | 2829 | */ |
2598 | wait_for_completion(&event); | 2830 | err = ext4_ext_dirty(handle, inode, path + depth); |
2831 | if (err) | ||
2832 | goto fix_extent_len; | ||
2599 | 2833 | ||
2600 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 2834 | ex2 = &newex; |
2601 | bio_put(bio); | 2835 | ex2->ee_block = cpu_to_le32(split); |
2602 | return -EIO; | 2836 | ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); |
2603 | } | 2837 | ext4_ext_store_pblock(ex2, newblock); |
2604 | bio_put(bio); | 2838 | if (split_flag & EXT4_EXT_MARK_UNINIT2) |
2605 | ee_len -= done; | 2839 | ext4_ext_mark_uninitialized(ex2); |
2606 | ee_pblock += done << (blkbits - 9); | 2840 | |
2841 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | ||
2842 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
2843 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2844 | if (err) | ||
2845 | goto fix_extent_len; | ||
2846 | /* update the extent length and mark as initialized */ | ||
2847 | ex->ee_len = cpu_to_le32(ee_len); | ||
2848 | ext4_ext_try_to_merge(inode, path, ex); | ||
2849 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
2850 | goto out; | ||
2851 | } else if (err) | ||
2852 | goto fix_extent_len; | ||
2853 | |||
2854 | out: | ||
2855 | ext4_ext_show_leaf(inode, path); | ||
2856 | return err; | ||
2857 | |||
2858 | fix_extent_len: | ||
2859 | ex->ee_len = orig_ex.ee_len; | ||
2860 | ext4_ext_dirty(handle, inode, path + depth); | ||
2861 | return err; | ||
2862 | } | ||
2863 | |||
2864 | /* | ||
2865 | * ext4_split_extents() splits an extent and mark extent which is covered | ||
2866 | * by @map as split_flags indicates | ||
2867 | * | ||
2868 | * It may result in splitting the extent into multiple extents (upto three) | ||
2869 | * There are three possibilities: | ||
2870 | * a> There is no split required | ||
2871 | * b> Splits in two extents: Split is happening at either end of the extent | ||
2872 | * c> Splits in three extents: Somone is splitting in middle of the extent | ||
2873 | * | ||
2874 | */ | ||
2875 | static int ext4_split_extent(handle_t *handle, | ||
2876 | struct inode *inode, | ||
2877 | struct ext4_ext_path *path, | ||
2878 | struct ext4_map_blocks *map, | ||
2879 | int split_flag, | ||
2880 | int flags) | ||
2881 | { | ||
2882 | ext4_lblk_t ee_block; | ||
2883 | struct ext4_extent *ex; | ||
2884 | unsigned int ee_len, depth; | ||
2885 | int err = 0; | ||
2886 | int uninitialized; | ||
2887 | int split_flag1, flags1; | ||
2888 | |||
2889 | depth = ext_depth(inode); | ||
2890 | ex = path[depth].p_ext; | ||
2891 | ee_block = le32_to_cpu(ex->ee_block); | ||
2892 | ee_len = ext4_ext_get_actual_len(ex); | ||
2893 | uninitialized = ext4_ext_is_uninitialized(ex); | ||
2894 | |||
2895 | if (map->m_lblk + map->m_len < ee_block + ee_len) { | ||
2896 | split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? | ||
2897 | EXT4_EXT_MAY_ZEROOUT : 0; | ||
2898 | flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; | ||
2899 | if (uninitialized) | ||
2900 | split_flag1 |= EXT4_EXT_MARK_UNINIT1 | | ||
2901 | EXT4_EXT_MARK_UNINIT2; | ||
2902 | err = ext4_split_extent_at(handle, inode, path, | ||
2903 | map->m_lblk + map->m_len, split_flag1, flags1); | ||
2904 | if (err) | ||
2905 | goto out; | ||
2607 | } | 2906 | } |
2608 | return 0; | 2907 | |
2908 | ext4_ext_drop_refs(path); | ||
2909 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | ||
2910 | if (IS_ERR(path)) | ||
2911 | return PTR_ERR(path); | ||
2912 | |||
2913 | if (map->m_lblk >= ee_block) { | ||
2914 | split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? | ||
2915 | EXT4_EXT_MAY_ZEROOUT : 0; | ||
2916 | if (uninitialized) | ||
2917 | split_flag1 |= EXT4_EXT_MARK_UNINIT1; | ||
2918 | if (split_flag & EXT4_EXT_MARK_UNINIT2) | ||
2919 | split_flag1 |= EXT4_EXT_MARK_UNINIT2; | ||
2920 | err = ext4_split_extent_at(handle, inode, path, | ||
2921 | map->m_lblk, split_flag1, flags); | ||
2922 | if (err) | ||
2923 | goto out; | ||
2924 | } | ||
2925 | |||
2926 | ext4_ext_show_leaf(inode, path); | ||
2927 | out: | ||
2928 | return err ? err : map->m_len; | ||
2609 | } | 2929 | } |
2610 | 2930 | ||
2611 | #define EXT4_EXT_ZERO_LEN 7 | 2931 | #define EXT4_EXT_ZERO_LEN 7 |
2612 | /* | 2932 | /* |
2613 | * This function is called by ext4_ext_map_blocks() if someone tries to write | 2933 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
2614 | * to an uninitialized extent. It may result in splitting the uninitialized | 2934 | * to an uninitialized extent. It may result in splitting the uninitialized |
2615 | * extent into multiple extents (upto three - one initialized and two | 2935 | * extent into multiple extents (up to three - one initialized and two |
2616 | * uninitialized). | 2936 | * uninitialized). |
2617 | * There are three possibilities: | 2937 | * There are three possibilities: |
2618 | * a> There is no split required: Entire extent should be initialized | 2938 | * a> There is no split required: Entire extent should be initialized |
@@ -2624,17 +2944,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2624 | struct ext4_map_blocks *map, | 2944 | struct ext4_map_blocks *map, |
2625 | struct ext4_ext_path *path) | 2945 | struct ext4_ext_path *path) |
2626 | { | 2946 | { |
2627 | struct ext4_extent *ex, newex, orig_ex; | 2947 | struct ext4_map_blocks split_map; |
2628 | struct ext4_extent *ex1 = NULL; | 2948 | struct ext4_extent zero_ex; |
2629 | struct ext4_extent *ex2 = NULL; | 2949 | struct ext4_extent *ex; |
2630 | struct ext4_extent *ex3 = NULL; | ||
2631 | struct ext4_extent_header *eh; | ||
2632 | ext4_lblk_t ee_block, eof_block; | 2950 | ext4_lblk_t ee_block, eof_block; |
2633 | unsigned int allocated, ee_len, depth; | 2951 | unsigned int allocated, ee_len, depth; |
2634 | ext4_fsblk_t newblock; | ||
2635 | int err = 0; | 2952 | int err = 0; |
2636 | int ret = 0; | 2953 | int split_flag = 0; |
2637 | int may_zeroout; | ||
2638 | 2954 | ||
2639 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" | 2955 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" |
2640 | "block %llu, max_blocks %u\n", inode->i_ino, | 2956 | "block %llu, max_blocks %u\n", inode->i_ino, |
@@ -2646,279 +2962,86 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2646 | eof_block = map->m_lblk + map->m_len; | 2962 | eof_block = map->m_lblk + map->m_len; |
2647 | 2963 | ||
2648 | depth = ext_depth(inode); | 2964 | depth = ext_depth(inode); |
2649 | eh = path[depth].p_hdr; | ||
2650 | ex = path[depth].p_ext; | 2965 | ex = path[depth].p_ext; |
2651 | ee_block = le32_to_cpu(ex->ee_block); | 2966 | ee_block = le32_to_cpu(ex->ee_block); |
2652 | ee_len = ext4_ext_get_actual_len(ex); | 2967 | ee_len = ext4_ext_get_actual_len(ex); |
2653 | allocated = ee_len - (map->m_lblk - ee_block); | 2968 | allocated = ee_len - (map->m_lblk - ee_block); |
2654 | newblock = map->m_lblk - ee_block + ext_pblock(ex); | ||
2655 | |||
2656 | ex2 = ex; | ||
2657 | orig_ex.ee_block = ex->ee_block; | ||
2658 | orig_ex.ee_len = cpu_to_le16(ee_len); | ||
2659 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | ||
2660 | 2969 | ||
2970 | WARN_ON(map->m_lblk < ee_block); | ||
2661 | /* | 2971 | /* |
2662 | * It is safe to convert extent to initialized via explicit | 2972 | * It is safe to convert extent to initialized via explicit |
2663 | * zeroout only if extent is fully insde i_size or new_size. | 2973 | * zeroout only if extent is fully insde i_size or new_size. |
2664 | */ | 2974 | */ |
2665 | may_zeroout = ee_block + ee_len <= eof_block; | 2975 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
2666 | 2976 | ||
2667 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
2668 | if (err) | ||
2669 | goto out; | ||
2670 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 2977 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ |
2671 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { | 2978 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && |
2672 | err = ext4_ext_zeroout(inode, &orig_ex); | 2979 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { |
2980 | err = ext4_ext_zeroout(inode, ex); | ||
2673 | if (err) | 2981 | if (err) |
2674 | goto fix_extent_len; | ||
2675 | /* update the extent length and mark as initialized */ | ||
2676 | ex->ee_block = orig_ex.ee_block; | ||
2677 | ex->ee_len = orig_ex.ee_len; | ||
2678 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
2679 | ext4_ext_dirty(handle, inode, path + depth); | ||
2680 | /* zeroed the full extent */ | ||
2681 | return allocated; | ||
2682 | } | ||
2683 | |||
2684 | /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ | ||
2685 | if (map->m_lblk > ee_block) { | ||
2686 | ex1 = ex; | ||
2687 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); | ||
2688 | ext4_ext_mark_uninitialized(ex1); | ||
2689 | ex2 = &newex; | ||
2690 | } | ||
2691 | /* | ||
2692 | * for sanity, update the length of the ex2 extent before | ||
2693 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | ||
2694 | * overlap of blocks. | ||
2695 | */ | ||
2696 | if (!ex1 && allocated > map->m_len) | ||
2697 | ex2->ee_len = cpu_to_le16(map->m_len); | ||
2698 | /* ex3: to ee_block + ee_len : uninitialised */ | ||
2699 | if (allocated > map->m_len) { | ||
2700 | unsigned int newdepth; | ||
2701 | /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ | ||
2702 | if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { | ||
2703 | /* | ||
2704 | * map->m_lblk == ee_block is handled by the zerouout | ||
2705 | * at the beginning. | ||
2706 | * Mark first half uninitialized. | ||
2707 | * Mark second half initialized and zero out the | ||
2708 | * initialized extent | ||
2709 | */ | ||
2710 | ex->ee_block = orig_ex.ee_block; | ||
2711 | ex->ee_len = cpu_to_le16(ee_len - allocated); | ||
2712 | ext4_ext_mark_uninitialized(ex); | ||
2713 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
2714 | ext4_ext_dirty(handle, inode, path + depth); | ||
2715 | |||
2716 | ex3 = &newex; | ||
2717 | ex3->ee_block = cpu_to_le32(map->m_lblk); | ||
2718 | ext4_ext_store_pblock(ex3, newblock); | ||
2719 | ex3->ee_len = cpu_to_le16(allocated); | ||
2720 | err = ext4_ext_insert_extent(handle, inode, path, | ||
2721 | ex3, 0); | ||
2722 | if (err == -ENOSPC) { | ||
2723 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2724 | if (err) | ||
2725 | goto fix_extent_len; | ||
2726 | ex->ee_block = orig_ex.ee_block; | ||
2727 | ex->ee_len = orig_ex.ee_len; | ||
2728 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
2729 | ext4_ext_dirty(handle, inode, path + depth); | ||
2730 | /* blocks available from map->m_lblk */ | ||
2731 | return allocated; | ||
2732 | |||
2733 | } else if (err) | ||
2734 | goto fix_extent_len; | ||
2735 | |||
2736 | /* | ||
2737 | * We need to zero out the second half because | ||
2738 | * an fallocate request can update file size and | ||
2739 | * converting the second half to initialized extent | ||
2740 | * implies that we can leak some junk data to user | ||
2741 | * space. | ||
2742 | */ | ||
2743 | err = ext4_ext_zeroout(inode, ex3); | ||
2744 | if (err) { | ||
2745 | /* | ||
2746 | * We should actually mark the | ||
2747 | * second half as uninit and return error | ||
2748 | * Insert would have changed the extent | ||
2749 | */ | ||
2750 | depth = ext_depth(inode); | ||
2751 | ext4_ext_drop_refs(path); | ||
2752 | path = ext4_ext_find_extent(inode, map->m_lblk, | ||
2753 | path); | ||
2754 | if (IS_ERR(path)) { | ||
2755 | err = PTR_ERR(path); | ||
2756 | return err; | ||
2757 | } | ||
2758 | /* get the second half extent details */ | ||
2759 | ex = path[depth].p_ext; | ||
2760 | err = ext4_ext_get_access(handle, inode, | ||
2761 | path + depth); | ||
2762 | if (err) | ||
2763 | return err; | ||
2764 | ext4_ext_mark_uninitialized(ex); | ||
2765 | ext4_ext_dirty(handle, inode, path + depth); | ||
2766 | return err; | ||
2767 | } | ||
2768 | |||
2769 | /* zeroed the second half */ | ||
2770 | return allocated; | ||
2771 | } | ||
2772 | ex3 = &newex; | ||
2773 | ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); | ||
2774 | ext4_ext_store_pblock(ex3, newblock + map->m_len); | ||
2775 | ex3->ee_len = cpu_to_le16(allocated - map->m_len); | ||
2776 | ext4_ext_mark_uninitialized(ex3); | ||
2777 | err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); | ||
2778 | if (err == -ENOSPC && may_zeroout) { | ||
2779 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2780 | if (err) | ||
2781 | goto fix_extent_len; | ||
2782 | /* update the extent length and mark as initialized */ | ||
2783 | ex->ee_block = orig_ex.ee_block; | ||
2784 | ex->ee_len = orig_ex.ee_len; | ||
2785 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
2786 | ext4_ext_dirty(handle, inode, path + depth); | ||
2787 | /* zeroed the full extent */ | ||
2788 | /* blocks available from map->m_lblk */ | ||
2789 | return allocated; | ||
2790 | |||
2791 | } else if (err) | ||
2792 | goto fix_extent_len; | ||
2793 | /* | ||
2794 | * The depth, and hence eh & ex might change | ||
2795 | * as part of the insert above. | ||
2796 | */ | ||
2797 | newdepth = ext_depth(inode); | ||
2798 | /* | ||
2799 | * update the extent length after successful insert of the | ||
2800 | * split extent | ||
2801 | */ | ||
2802 | ee_len -= ext4_ext_get_actual_len(ex3); | ||
2803 | orig_ex.ee_len = cpu_to_le16(ee_len); | ||
2804 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2805 | |||
2806 | depth = newdepth; | ||
2807 | ext4_ext_drop_refs(path); | ||
2808 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | ||
2809 | if (IS_ERR(path)) { | ||
2810 | err = PTR_ERR(path); | ||
2811 | goto out; | 2982 | goto out; |
2812 | } | ||
2813 | eh = path[depth].p_hdr; | ||
2814 | ex = path[depth].p_ext; | ||
2815 | if (ex2 != &newex) | ||
2816 | ex2 = ex; | ||
2817 | 2983 | ||
2818 | err = ext4_ext_get_access(handle, inode, path + depth); | 2984 | err = ext4_ext_get_access(handle, inode, path + depth); |
2819 | if (err) | 2985 | if (err) |
2820 | goto out; | 2986 | goto out; |
2821 | 2987 | ext4_ext_mark_initialized(ex); | |
2822 | allocated = map->m_len; | 2988 | ext4_ext_try_to_merge(inode, path, ex); |
2823 | 2989 | err = ext4_ext_dirty(handle, inode, path + depth); | |
2824 | /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying | 2990 | goto out; |
2825 | * to insert a extent in the middle zerout directly | ||
2826 | * otherwise give the extent a chance to merge to left | ||
2827 | */ | ||
2828 | if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && | ||
2829 | map->m_lblk != ee_block && may_zeroout) { | ||
2830 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2831 | if (err) | ||
2832 | goto fix_extent_len; | ||
2833 | /* update the extent length and mark as initialized */ | ||
2834 | ex->ee_block = orig_ex.ee_block; | ||
2835 | ex->ee_len = orig_ex.ee_len; | ||
2836 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
2837 | ext4_ext_dirty(handle, inode, path + depth); | ||
2838 | /* zero out the first half */ | ||
2839 | /* blocks available from map->m_lblk */ | ||
2840 | return allocated; | ||
2841 | } | ||
2842 | } | ||
2843 | /* | ||
2844 | * If there was a change of depth as part of the | ||
2845 | * insertion of ex3 above, we need to update the length | ||
2846 | * of the ex1 extent again here | ||
2847 | */ | ||
2848 | if (ex1 && ex1 != ex) { | ||
2849 | ex1 = ex; | ||
2850 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); | ||
2851 | ext4_ext_mark_uninitialized(ex1); | ||
2852 | ex2 = &newex; | ||
2853 | } | ||
2854 | /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ | ||
2855 | ex2->ee_block = cpu_to_le32(map->m_lblk); | ||
2856 | ext4_ext_store_pblock(ex2, newblock); | ||
2857 | ex2->ee_len = cpu_to_le16(allocated); | ||
2858 | if (ex2 != ex) | ||
2859 | goto insert; | ||
2860 | /* | ||
2861 | * New (initialized) extent starts from the first block | ||
2862 | * in the current extent. i.e., ex2 == ex | ||
2863 | * We have to see if it can be merged with the extent | ||
2864 | * on the left. | ||
2865 | */ | ||
2866 | if (ex2 > EXT_FIRST_EXTENT(eh)) { | ||
2867 | /* | ||
2868 | * To merge left, pass "ex2 - 1" to try_to_merge(), | ||
2869 | * since it merges towards right _only_. | ||
2870 | */ | ||
2871 | ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); | ||
2872 | if (ret) { | ||
2873 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
2874 | if (err) | ||
2875 | goto out; | ||
2876 | depth = ext_depth(inode); | ||
2877 | ex2--; | ||
2878 | } | ||
2879 | } | 2991 | } |
2992 | |||
2880 | /* | 2993 | /* |
2881 | * Try to Merge towards right. This might be required | 2994 | * four cases: |
2882 | * only when the whole extent is being written to. | 2995 | * 1. split the extent into three extents. |
2883 | * i.e. ex2 == ex and ex3 == NULL. | 2996 | * 2. split the extent into two extents, zeroout the first half. |
2997 | * 3. split the extent into two extents, zeroout the second half. | ||
2998 | * 4. split the extent into two extents with out zeroout. | ||
2884 | */ | 2999 | */ |
2885 | if (!ex3) { | 3000 | split_map.m_lblk = map->m_lblk; |
2886 | ret = ext4_ext_try_to_merge(inode, path, ex2); | 3001 | split_map.m_len = map->m_len; |
2887 | if (ret) { | 3002 | |
2888 | err = ext4_ext_correct_indexes(handle, inode, path); | 3003 | if (allocated > map->m_len) { |
3004 | if (allocated <= EXT4_EXT_ZERO_LEN && | ||
3005 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3006 | /* case 3 */ | ||
3007 | zero_ex.ee_block = | ||
3008 | cpu_to_le32(map->m_lblk); | ||
3009 | zero_ex.ee_len = cpu_to_le16(allocated); | ||
3010 | ext4_ext_store_pblock(&zero_ex, | ||
3011 | ext4_ext_pblock(ex) + map->m_lblk - ee_block); | ||
3012 | err = ext4_ext_zeroout(inode, &zero_ex); | ||
2889 | if (err) | 3013 | if (err) |
2890 | goto out; | 3014 | goto out; |
3015 | split_map.m_lblk = map->m_lblk; | ||
3016 | split_map.m_len = allocated; | ||
3017 | } else if ((map->m_lblk - ee_block + map->m_len < | ||
3018 | EXT4_EXT_ZERO_LEN) && | ||
3019 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3020 | /* case 2 */ | ||
3021 | if (map->m_lblk != ee_block) { | ||
3022 | zero_ex.ee_block = ex->ee_block; | ||
3023 | zero_ex.ee_len = cpu_to_le16(map->m_lblk - | ||
3024 | ee_block); | ||
3025 | ext4_ext_store_pblock(&zero_ex, | ||
3026 | ext4_ext_pblock(ex)); | ||
3027 | err = ext4_ext_zeroout(inode, &zero_ex); | ||
3028 | if (err) | ||
3029 | goto out; | ||
3030 | } | ||
3031 | |||
3032 | split_map.m_lblk = ee_block; | ||
3033 | split_map.m_len = map->m_lblk - ee_block + map->m_len; | ||
3034 | allocated = map->m_len; | ||
2891 | } | 3035 | } |
2892 | } | 3036 | } |
2893 | /* Mark modified extent as dirty */ | 3037 | |
2894 | err = ext4_ext_dirty(handle, inode, path + depth); | 3038 | allocated = ext4_split_extent(handle, inode, path, |
2895 | goto out; | 3039 | &split_map, split_flag, 0); |
2896 | insert: | 3040 | if (allocated < 0) |
2897 | err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); | 3041 | err = allocated; |
2898 | if (err == -ENOSPC && may_zeroout) { | 3042 | |
2899 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2900 | if (err) | ||
2901 | goto fix_extent_len; | ||
2902 | /* update the extent length and mark as initialized */ | ||
2903 | ex->ee_block = orig_ex.ee_block; | ||
2904 | ex->ee_len = orig_ex.ee_len; | ||
2905 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
2906 | ext4_ext_dirty(handle, inode, path + depth); | ||
2907 | /* zero out the first half */ | ||
2908 | return allocated; | ||
2909 | } else if (err) | ||
2910 | goto fix_extent_len; | ||
2911 | out: | 3043 | out: |
2912 | ext4_ext_show_leaf(inode, path); | ||
2913 | return err ? err : allocated; | 3044 | return err ? err : allocated; |
2914 | |||
2915 | fix_extent_len: | ||
2916 | ex->ee_block = orig_ex.ee_block; | ||
2917 | ex->ee_len = orig_ex.ee_len; | ||
2918 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
2919 | ext4_ext_mark_uninitialized(ex); | ||
2920 | ext4_ext_dirty(handle, inode, path + depth); | ||
2921 | return err; | ||
2922 | } | 3045 | } |
2923 | 3046 | ||
2924 | /* | 3047 | /* |
@@ -2926,15 +3049,15 @@ fix_extent_len: | |||
2926 | * ext4_get_blocks_dio_write() when DIO to write | 3049 | * ext4_get_blocks_dio_write() when DIO to write |
2927 | * to an uninitialized extent. | 3050 | * to an uninitialized extent. |
2928 | * | 3051 | * |
2929 | * Writing to an uninitized extent may result in splitting the uninitialized | 3052 | * Writing to an uninitialized extent may result in splitting the uninitialized |
2930 | * extent into multiple /intialized unintialized extents (up to three) | 3053 | * extent into multiple /initialized uninitialized extents (up to three) |
2931 | * There are three possibilities: | 3054 | * There are three possibilities: |
2932 | * a> There is no split required: Entire extent should be uninitialized | 3055 | * a> There is no split required: Entire extent should be uninitialized |
2933 | * b> Splits in two extents: Write is happening at either end of the extent | 3056 | * b> Splits in two extents: Write is happening at either end of the extent |
2934 | * c> Splits in three extents: Somone is writing in middle of the extent | 3057 | * c> Splits in three extents: Somone is writing in middle of the extent |
2935 | * | 3058 | * |
2936 | * One of more index blocks maybe needed if the extent tree grow after | 3059 | * One of more index blocks maybe needed if the extent tree grow after |
2937 | * the unintialized extent split. To prevent ENOSPC occur at the IO | 3060 | * the uninitialized extent split. To prevent ENOSPC occur at the IO |
2938 | * complete, we need to split the uninitialized extent before DIO submit | 3061 | * complete, we need to split the uninitialized extent before DIO submit |
2939 | * the IO. The uninitialized extent called at this time will be split | 3062 | * the IO. The uninitialized extent called at this time will be split |
2940 | * into three uninitialized extent(at most). After IO complete, the part | 3063 | * into three uninitialized extent(at most). After IO complete, the part |
@@ -2949,15 +3072,11 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2949 | struct ext4_ext_path *path, | 3072 | struct ext4_ext_path *path, |
2950 | int flags) | 3073 | int flags) |
2951 | { | 3074 | { |
2952 | struct ext4_extent *ex, newex, orig_ex; | 3075 | ext4_lblk_t eof_block; |
2953 | struct ext4_extent *ex1 = NULL; | 3076 | ext4_lblk_t ee_block; |
2954 | struct ext4_extent *ex2 = NULL; | 3077 | struct ext4_extent *ex; |
2955 | struct ext4_extent *ex3 = NULL; | 3078 | unsigned int ee_len; |
2956 | ext4_lblk_t ee_block, eof_block; | 3079 | int split_flag = 0, depth; |
2957 | unsigned int allocated, ee_len, depth; | ||
2958 | ext4_fsblk_t newblock; | ||
2959 | int err = 0; | ||
2960 | int may_zeroout; | ||
2961 | 3080 | ||
2962 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" | 3081 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" |
2963 | "block %llu, max_blocks %u\n", inode->i_ino, | 3082 | "block %llu, max_blocks %u\n", inode->i_ino, |
@@ -2967,156 +3086,22 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2967 | inode->i_sb->s_blocksize_bits; | 3086 | inode->i_sb->s_blocksize_bits; |
2968 | if (eof_block < map->m_lblk + map->m_len) | 3087 | if (eof_block < map->m_lblk + map->m_len) |
2969 | eof_block = map->m_lblk + map->m_len; | 3088 | eof_block = map->m_lblk + map->m_len; |
2970 | |||
2971 | depth = ext_depth(inode); | ||
2972 | ex = path[depth].p_ext; | ||
2973 | ee_block = le32_to_cpu(ex->ee_block); | ||
2974 | ee_len = ext4_ext_get_actual_len(ex); | ||
2975 | allocated = ee_len - (map->m_lblk - ee_block); | ||
2976 | newblock = map->m_lblk - ee_block + ext_pblock(ex); | ||
2977 | |||
2978 | ex2 = ex; | ||
2979 | orig_ex.ee_block = ex->ee_block; | ||
2980 | orig_ex.ee_len = cpu_to_le16(ee_len); | ||
2981 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | ||
2982 | |||
2983 | /* | 3089 | /* |
2984 | * It is safe to convert extent to initialized via explicit | 3090 | * It is safe to convert extent to initialized via explicit |
2985 | * zeroout only if extent is fully insde i_size or new_size. | 3091 | * zeroout only if extent is fully insde i_size or new_size. |
2986 | */ | 3092 | */ |
2987 | may_zeroout = ee_block + ee_len <= eof_block; | 3093 | depth = ext_depth(inode); |
2988 | 3094 | ex = path[depth].p_ext; | |
2989 | /* | 3095 | ee_block = le32_to_cpu(ex->ee_block); |
2990 | * If the uninitialized extent begins at the same logical | 3096 | ee_len = ext4_ext_get_actual_len(ex); |
2991 | * block where the write begins, and the write completely | ||
2992 | * covers the extent, then we don't need to split it. | ||
2993 | */ | ||
2994 | if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) | ||
2995 | return allocated; | ||
2996 | |||
2997 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
2998 | if (err) | ||
2999 | goto out; | ||
3000 | /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ | ||
3001 | if (map->m_lblk > ee_block) { | ||
3002 | ex1 = ex; | ||
3003 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); | ||
3004 | ext4_ext_mark_uninitialized(ex1); | ||
3005 | ex2 = &newex; | ||
3006 | } | ||
3007 | /* | ||
3008 | * for sanity, update the length of the ex2 extent before | ||
3009 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | ||
3010 | * overlap of blocks. | ||
3011 | */ | ||
3012 | if (!ex1 && allocated > map->m_len) | ||
3013 | ex2->ee_len = cpu_to_le16(map->m_len); | ||
3014 | /* ex3: to ee_block + ee_len : uninitialised */ | ||
3015 | if (allocated > map->m_len) { | ||
3016 | unsigned int newdepth; | ||
3017 | ex3 = &newex; | ||
3018 | ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); | ||
3019 | ext4_ext_store_pblock(ex3, newblock + map->m_len); | ||
3020 | ex3->ee_len = cpu_to_le16(allocated - map->m_len); | ||
3021 | ext4_ext_mark_uninitialized(ex3); | ||
3022 | err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); | ||
3023 | if (err == -ENOSPC && may_zeroout) { | ||
3024 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
3025 | if (err) | ||
3026 | goto fix_extent_len; | ||
3027 | /* update the extent length and mark as initialized */ | ||
3028 | ex->ee_block = orig_ex.ee_block; | ||
3029 | ex->ee_len = orig_ex.ee_len; | ||
3030 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
3031 | ext4_ext_dirty(handle, inode, path + depth); | ||
3032 | /* zeroed the full extent */ | ||
3033 | /* blocks available from map->m_lblk */ | ||
3034 | return allocated; | ||
3035 | |||
3036 | } else if (err) | ||
3037 | goto fix_extent_len; | ||
3038 | /* | ||
3039 | * The depth, and hence eh & ex might change | ||
3040 | * as part of the insert above. | ||
3041 | */ | ||
3042 | newdepth = ext_depth(inode); | ||
3043 | /* | ||
3044 | * update the extent length after successful insert of the | ||
3045 | * split extent | ||
3046 | */ | ||
3047 | ee_len -= ext4_ext_get_actual_len(ex3); | ||
3048 | orig_ex.ee_len = cpu_to_le16(ee_len); | ||
3049 | may_zeroout = ee_block + ee_len <= eof_block; | ||
3050 | |||
3051 | depth = newdepth; | ||
3052 | ext4_ext_drop_refs(path); | ||
3053 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | ||
3054 | if (IS_ERR(path)) { | ||
3055 | err = PTR_ERR(path); | ||
3056 | goto out; | ||
3057 | } | ||
3058 | ex = path[depth].p_ext; | ||
3059 | if (ex2 != &newex) | ||
3060 | ex2 = ex; | ||
3061 | |||
3062 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3063 | if (err) | ||
3064 | goto out; | ||
3065 | 3097 | ||
3066 | allocated = map->m_len; | 3098 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
3067 | } | 3099 | split_flag |= EXT4_EXT_MARK_UNINIT2; |
3068 | /* | ||
3069 | * If there was a change of depth as part of the | ||
3070 | * insertion of ex3 above, we need to update the length | ||
3071 | * of the ex1 extent again here | ||
3072 | */ | ||
3073 | if (ex1 && ex1 != ex) { | ||
3074 | ex1 = ex; | ||
3075 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); | ||
3076 | ext4_ext_mark_uninitialized(ex1); | ||
3077 | ex2 = &newex; | ||
3078 | } | ||
3079 | /* | ||
3080 | * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written | ||
3081 | * using direct I/O, uninitialised still. | ||
3082 | */ | ||
3083 | ex2->ee_block = cpu_to_le32(map->m_lblk); | ||
3084 | ext4_ext_store_pblock(ex2, newblock); | ||
3085 | ex2->ee_len = cpu_to_le16(allocated); | ||
3086 | ext4_ext_mark_uninitialized(ex2); | ||
3087 | if (ex2 != ex) | ||
3088 | goto insert; | ||
3089 | /* Mark modified extent as dirty */ | ||
3090 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
3091 | ext_debug("out here\n"); | ||
3092 | goto out; | ||
3093 | insert: | ||
3094 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | ||
3095 | if (err == -ENOSPC && may_zeroout) { | ||
3096 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
3097 | if (err) | ||
3098 | goto fix_extent_len; | ||
3099 | /* update the extent length and mark as initialized */ | ||
3100 | ex->ee_block = orig_ex.ee_block; | ||
3101 | ex->ee_len = orig_ex.ee_len; | ||
3102 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
3103 | ext4_ext_dirty(handle, inode, path + depth); | ||
3104 | /* zero out the first half */ | ||
3105 | return allocated; | ||
3106 | } else if (err) | ||
3107 | goto fix_extent_len; | ||
3108 | out: | ||
3109 | ext4_ext_show_leaf(inode, path); | ||
3110 | return err ? err : allocated; | ||
3111 | 3100 | ||
3112 | fix_extent_len: | 3101 | flags |= EXT4_GET_BLOCKS_PRE_IO; |
3113 | ex->ee_block = orig_ex.ee_block; | 3102 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); |
3114 | ex->ee_len = orig_ex.ee_len; | ||
3115 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | ||
3116 | ext4_ext_mark_uninitialized(ex); | ||
3117 | ext4_ext_dirty(handle, inode, path + depth); | ||
3118 | return err; | ||
3119 | } | 3103 | } |
3104 | |||
3120 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, | 3105 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
3121 | struct inode *inode, | 3106 | struct inode *inode, |
3122 | struct ext4_ext_path *path) | 3107 | struct ext4_ext_path *path) |
@@ -3125,46 +3110,27 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3125 | struct ext4_extent_header *eh; | 3110 | struct ext4_extent_header *eh; |
3126 | int depth; | 3111 | int depth; |
3127 | int err = 0; | 3112 | int err = 0; |
3128 | int ret = 0; | ||
3129 | 3113 | ||
3130 | depth = ext_depth(inode); | 3114 | depth = ext_depth(inode); |
3131 | eh = path[depth].p_hdr; | 3115 | eh = path[depth].p_hdr; |
3132 | ex = path[depth].p_ext; | 3116 | ex = path[depth].p_ext; |
3133 | 3117 | ||
3118 | ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" | ||
3119 | "block %llu, max_blocks %u\n", inode->i_ino, | ||
3120 | (unsigned long long)le32_to_cpu(ex->ee_block), | ||
3121 | ext4_ext_get_actual_len(ex)); | ||
3122 | |||
3134 | err = ext4_ext_get_access(handle, inode, path + depth); | 3123 | err = ext4_ext_get_access(handle, inode, path + depth); |
3135 | if (err) | 3124 | if (err) |
3136 | goto out; | 3125 | goto out; |
3137 | /* first mark the extent as initialized */ | 3126 | /* first mark the extent as initialized */ |
3138 | ext4_ext_mark_initialized(ex); | 3127 | ext4_ext_mark_initialized(ex); |
3139 | 3128 | ||
3140 | /* | 3129 | /* note: ext4_ext_correct_indexes() isn't needed here because |
3141 | * We have to see if it can be merged with the extent | 3130 | * borders are not changed |
3142 | * on the left. | ||
3143 | */ | ||
3144 | if (ex > EXT_FIRST_EXTENT(eh)) { | ||
3145 | /* | ||
3146 | * To merge left, pass "ex - 1" to try_to_merge(), | ||
3147 | * since it merges towards right _only_. | ||
3148 | */ | ||
3149 | ret = ext4_ext_try_to_merge(inode, path, ex - 1); | ||
3150 | if (ret) { | ||
3151 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
3152 | if (err) | ||
3153 | goto out; | ||
3154 | depth = ext_depth(inode); | ||
3155 | ex--; | ||
3156 | } | ||
3157 | } | ||
3158 | /* | ||
3159 | * Try to Merge towards right. | ||
3160 | */ | 3131 | */ |
3161 | ret = ext4_ext_try_to_merge(inode, path, ex); | 3132 | ext4_ext_try_to_merge(inode, path, ex); |
3162 | if (ret) { | 3133 | |
3163 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
3164 | if (err) | ||
3165 | goto out; | ||
3166 | depth = ext_depth(inode); | ||
3167 | } | ||
3168 | /* Mark modified extent as dirty */ | 3134 | /* Mark modified extent as dirty */ |
3169 | err = ext4_ext_dirty(handle, inode, path + depth); | 3135 | err = ext4_ext_dirty(handle, inode, path + depth); |
3170 | out: | 3136 | out: |
@@ -3180,6 +3146,56 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, | |||
3180 | unmap_underlying_metadata(bdev, block + i); | 3146 | unmap_underlying_metadata(bdev, block + i); |
3181 | } | 3147 | } |
3182 | 3148 | ||
3149 | /* | ||
3150 | * Handle EOFBLOCKS_FL flag, clearing it if necessary | ||
3151 | */ | ||
3152 | static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | ||
3153 | ext4_lblk_t lblk, | ||
3154 | struct ext4_ext_path *path, | ||
3155 | unsigned int len) | ||
3156 | { | ||
3157 | int i, depth; | ||
3158 | struct ext4_extent_header *eh; | ||
3159 | struct ext4_extent *last_ex; | ||
3160 | |||
3161 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
3162 | return 0; | ||
3163 | |||
3164 | depth = ext_depth(inode); | ||
3165 | eh = path[depth].p_hdr; | ||
3166 | |||
3167 | if (unlikely(!eh->eh_entries)) { | ||
3168 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | ||
3169 | "EOFBLOCKS_FL set"); | ||
3170 | return -EIO; | ||
3171 | } | ||
3172 | last_ex = EXT_LAST_EXTENT(eh); | ||
3173 | /* | ||
3174 | * We should clear the EOFBLOCKS_FL flag if we are writing the | ||
3175 | * last block in the last extent in the file. We test this by | ||
3176 | * first checking to see if the caller to | ||
3177 | * ext4_ext_get_blocks() was interested in the last block (or | ||
3178 | * a block beyond the last block) in the current extent. If | ||
3179 | * this turns out to be false, we can bail out from this | ||
3180 | * function immediately. | ||
3181 | */ | ||
3182 | if (lblk + len < le32_to_cpu(last_ex->ee_block) + | ||
3183 | ext4_ext_get_actual_len(last_ex)) | ||
3184 | return 0; | ||
3185 | /* | ||
3186 | * If the caller does appear to be planning to write at or | ||
3187 | * beyond the end of the current extent, we then test to see | ||
3188 | * if the current extent is the last extent in the file, by | ||
3189 | * checking to make sure it was reached via the rightmost node | ||
3190 | * at each level of the tree. | ||
3191 | */ | ||
3192 | for (i = depth-1; i >= 0; i--) | ||
3193 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3194 | return 0; | ||
3195 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3196 | return ext4_mark_inode_dirty(handle, inode); | ||
3197 | } | ||
3198 | |||
3183 | static int | 3199 | static int |
3184 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3200 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3185 | struct ext4_map_blocks *map, | 3201 | struct ext4_map_blocks *map, |
@@ -3202,12 +3218,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3202 | path, flags); | 3218 | path, flags); |
3203 | /* | 3219 | /* |
3204 | * Flag the inode(non aio case) or end_io struct (aio case) | 3220 | * Flag the inode(non aio case) or end_io struct (aio case) |
3205 | * that this IO needs to convertion to written when IO is | 3221 | * that this IO needs to conversion to written when IO is |
3206 | * completed | 3222 | * completed |
3207 | */ | 3223 | */ |
3208 | if (io) | 3224 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { |
3209 | io->flag = EXT4_IO_UNWRITTEN; | 3225 | io->flag = EXT4_IO_END_UNWRITTEN; |
3210 | else | 3226 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); |
3227 | } else | ||
3211 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3228 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3212 | if (ext4_should_dioread_nolock(inode)) | 3229 | if (ext4_should_dioread_nolock(inode)) |
3213 | map->m_flags |= EXT4_MAP_UNINIT; | 3230 | map->m_flags |= EXT4_MAP_UNINIT; |
@@ -3217,8 +3234,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3217 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { | 3234 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { |
3218 | ret = ext4_convert_unwritten_extents_endio(handle, inode, | 3235 | ret = ext4_convert_unwritten_extents_endio(handle, inode, |
3219 | path); | 3236 | path); |
3220 | if (ret >= 0) | 3237 | if (ret >= 0) { |
3221 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3238 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3239 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | ||
3240 | path, map->m_len); | ||
3241 | } else | ||
3242 | err = ret; | ||
3222 | goto out2; | 3243 | goto out2; |
3223 | } | 3244 | } |
3224 | /* buffered IO case */ | 3245 | /* buffered IO case */ |
@@ -3244,8 +3265,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3244 | 3265 | ||
3245 | /* buffered write, writepage time, convert*/ | 3266 | /* buffered write, writepage time, convert*/ |
3246 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); | 3267 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); |
3247 | if (ret >= 0) | 3268 | if (ret >= 0) { |
3248 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3269 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3270 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, | ||
3271 | map->m_len); | ||
3272 | if (err < 0) | ||
3273 | goto out2; | ||
3274 | } | ||
3275 | |||
3249 | out: | 3276 | out: |
3250 | if (ret <= 0) { | 3277 | if (ret <= 0) { |
3251 | err = ret; | 3278 | err = ret; |
@@ -3292,6 +3319,7 @@ out2: | |||
3292 | } | 3319 | } |
3293 | return err ? err : allocated; | 3320 | return err ? err : allocated; |
3294 | } | 3321 | } |
3322 | |||
3295 | /* | 3323 | /* |
3296 | * Block allocation/map/preallocation routine for extents based files | 3324 | * Block allocation/map/preallocation routine for extents based files |
3297 | * | 3325 | * |
@@ -3314,21 +3342,24 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3314 | struct ext4_map_blocks *map, int flags) | 3342 | struct ext4_map_blocks *map, int flags) |
3315 | { | 3343 | { |
3316 | struct ext4_ext_path *path = NULL; | 3344 | struct ext4_ext_path *path = NULL; |
3317 | struct ext4_extent_header *eh; | 3345 | struct ext4_extent newex, *ex; |
3318 | struct ext4_extent newex, *ex, *last_ex; | 3346 | ext4_fsblk_t newblock = 0; |
3319 | ext4_fsblk_t newblock; | 3347 | int err = 0, depth, ret; |
3320 | int i, err = 0, depth, ret, cache_type; | ||
3321 | unsigned int allocated = 0; | 3348 | unsigned int allocated = 0; |
3349 | unsigned int punched_out = 0; | ||
3350 | unsigned int result = 0; | ||
3322 | struct ext4_allocation_request ar; | 3351 | struct ext4_allocation_request ar; |
3323 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3352 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3353 | struct ext4_map_blocks punch_map; | ||
3324 | 3354 | ||
3325 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3355 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3326 | map->m_lblk, map->m_len, inode->i_ino); | 3356 | map->m_lblk, map->m_len, inode->i_ino); |
3357 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
3327 | 3358 | ||
3328 | /* check in cache */ | 3359 | /* check in cache */ |
3329 | cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); | 3360 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && |
3330 | if (cache_type) { | 3361 | ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { |
3331 | if (cache_type == EXT4_EXT_CACHE_GAP) { | 3362 | if (!newex.ee_start_lo && !newex.ee_start_hi) { |
3332 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { | 3363 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
3333 | /* | 3364 | /* |
3334 | * block isn't allocated yet and | 3365 | * block isn't allocated yet and |
@@ -3337,17 +3368,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3337 | goto out2; | 3368 | goto out2; |
3338 | } | 3369 | } |
3339 | /* we should allocate requested block */ | 3370 | /* we should allocate requested block */ |
3340 | } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { | 3371 | } else { |
3341 | /* block is already allocated */ | 3372 | /* block is already allocated */ |
3342 | newblock = map->m_lblk | 3373 | newblock = map->m_lblk |
3343 | - le32_to_cpu(newex.ee_block) | 3374 | - le32_to_cpu(newex.ee_block) |
3344 | + ext_pblock(&newex); | 3375 | + ext4_ext_pblock(&newex); |
3345 | /* number of remaining blocks in the extent */ | 3376 | /* number of remaining blocks in the extent */ |
3346 | allocated = ext4_ext_get_actual_len(&newex) - | 3377 | allocated = ext4_ext_get_actual_len(&newex) - |
3347 | (map->m_lblk - le32_to_cpu(newex.ee_block)); | 3378 | (map->m_lblk - le32_to_cpu(newex.ee_block)); |
3348 | goto out; | 3379 | goto out; |
3349 | } else { | ||
3350 | BUG(); | ||
3351 | } | 3380 | } |
3352 | } | 3381 | } |
3353 | 3382 | ||
@@ -3374,12 +3403,11 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3374 | err = -EIO; | 3403 | err = -EIO; |
3375 | goto out2; | 3404 | goto out2; |
3376 | } | 3405 | } |
3377 | eh = path[depth].p_hdr; | ||
3378 | 3406 | ||
3379 | ex = path[depth].p_ext; | 3407 | ex = path[depth].p_ext; |
3380 | if (ex) { | 3408 | if (ex) { |
3381 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | 3409 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
3382 | ext4_fsblk_t ee_start = ext_pblock(ex); | 3410 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
3383 | unsigned short ee_len; | 3411 | unsigned short ee_len; |
3384 | 3412 | ||
3385 | /* | 3413 | /* |
@@ -3395,17 +3423,84 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3395 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, | 3423 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
3396 | ee_block, ee_len, newblock); | 3424 | ee_block, ee_len, newblock); |
3397 | 3425 | ||
3398 | /* Do not put uninitialized extent in the cache */ | 3426 | if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { |
3399 | if (!ext4_ext_is_uninitialized(ex)) { | 3427 | /* |
3400 | ext4_ext_put_in_cache(inode, ee_block, | 3428 | * Do not put uninitialized extent |
3401 | ee_len, ee_start, | 3429 | * in the cache |
3402 | EXT4_EXT_CACHE_EXTENT); | 3430 | */ |
3403 | goto out; | 3431 | if (!ext4_ext_is_uninitialized(ex)) { |
3432 | ext4_ext_put_in_cache(inode, ee_block, | ||
3433 | ee_len, ee_start); | ||
3434 | goto out; | ||
3435 | } | ||
3436 | ret = ext4_ext_handle_uninitialized_extents( | ||
3437 | handle, inode, map, path, flags, | ||
3438 | allocated, newblock); | ||
3439 | return ret; | ||
3404 | } | 3440 | } |
3405 | ret = ext4_ext_handle_uninitialized_extents(handle, | 3441 | |
3406 | inode, map, path, flags, allocated, | 3442 | /* |
3407 | newblock); | 3443 | * Punch out the map length, but only to the |
3408 | return ret; | 3444 | * end of the extent |
3445 | */ | ||
3446 | punched_out = allocated < map->m_len ? | ||
3447 | allocated : map->m_len; | ||
3448 | |||
3449 | /* | ||
3450 | * Sense extents need to be converted to | ||
3451 | * uninitialized, they must fit in an | ||
3452 | * uninitialized extent | ||
3453 | */ | ||
3454 | if (punched_out > EXT_UNINIT_MAX_LEN) | ||
3455 | punched_out = EXT_UNINIT_MAX_LEN; | ||
3456 | |||
3457 | punch_map.m_lblk = map->m_lblk; | ||
3458 | punch_map.m_pblk = newblock; | ||
3459 | punch_map.m_len = punched_out; | ||
3460 | punch_map.m_flags = 0; | ||
3461 | |||
3462 | /* Check to see if the extent needs to be split */ | ||
3463 | if (punch_map.m_len != ee_len || | ||
3464 | punch_map.m_lblk != ee_block) { | ||
3465 | |||
3466 | ret = ext4_split_extent(handle, inode, | ||
3467 | path, &punch_map, 0, | ||
3468 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT | | ||
3469 | EXT4_GET_BLOCKS_PRE_IO); | ||
3470 | |||
3471 | if (ret < 0) { | ||
3472 | err = ret; | ||
3473 | goto out2; | ||
3474 | } | ||
3475 | /* | ||
3476 | * find extent for the block at | ||
3477 | * the start of the hole | ||
3478 | */ | ||
3479 | ext4_ext_drop_refs(path); | ||
3480 | kfree(path); | ||
3481 | |||
3482 | path = ext4_ext_find_extent(inode, | ||
3483 | map->m_lblk, NULL); | ||
3484 | if (IS_ERR(path)) { | ||
3485 | err = PTR_ERR(path); | ||
3486 | path = NULL; | ||
3487 | goto out2; | ||
3488 | } | ||
3489 | |||
3490 | depth = ext_depth(inode); | ||
3491 | ex = path[depth].p_ext; | ||
3492 | ee_len = ext4_ext_get_actual_len(ex); | ||
3493 | ee_block = le32_to_cpu(ex->ee_block); | ||
3494 | ee_start = ext4_ext_pblock(ex); | ||
3495 | |||
3496 | } | ||
3497 | |||
3498 | ext4_ext_mark_uninitialized(ex); | ||
3499 | |||
3500 | err = ext4_ext_remove_space(inode, map->m_lblk, | ||
3501 | map->m_lblk + punched_out); | ||
3502 | |||
3503 | goto out2; | ||
3409 | } | 3504 | } |
3410 | } | 3505 | } |
3411 | 3506 | ||
@@ -3467,6 +3562,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3467 | else | 3562 | else |
3468 | /* disable in-core preallocation for non-regular files */ | 3563 | /* disable in-core preallocation for non-regular files */ |
3469 | ar.flags = 0; | 3564 | ar.flags = 0; |
3565 | if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) | ||
3566 | ar.flags |= EXT4_MB_HINT_NOPREALLOC; | ||
3470 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | 3567 | newblock = ext4_mb_new_blocks(handle, &ar, &err); |
3471 | if (!newblock) | 3568 | if (!newblock) |
3472 | goto out2; | 3569 | goto out2; |
@@ -3481,15 +3578,16 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3481 | ext4_ext_mark_uninitialized(&newex); | 3578 | ext4_ext_mark_uninitialized(&newex); |
3482 | /* | 3579 | /* |
3483 | * io_end structure was created for every IO write to an | 3580 | * io_end structure was created for every IO write to an |
3484 | * uninitialized extent. To avoid unecessary conversion, | 3581 | * uninitialized extent. To avoid unnecessary conversion, |
3485 | * here we flag the IO that really needs the conversion. | 3582 | * here we flag the IO that really needs the conversion. |
3486 | * For non asycn direct IO case, flag the inode state | 3583 | * For non asycn direct IO case, flag the inode state |
3487 | * that we need to perform convertion when IO is done. | 3584 | * that we need to perform conversion when IO is done. |
3488 | */ | 3585 | */ |
3489 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3586 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3490 | if (io) | 3587 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { |
3491 | io->flag = EXT4_IO_UNWRITTEN; | 3588 | io->flag = EXT4_IO_END_UNWRITTEN; |
3492 | else | 3589 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); |
3590 | } else | ||
3493 | ext4_set_inode_state(inode, | 3591 | ext4_set_inode_state(inode, |
3494 | EXT4_STATE_DIO_UNWRITTEN); | 3592 | EXT4_STATE_DIO_UNWRITTEN); |
3495 | } | 3593 | } |
@@ -3497,44 +3595,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3497 | map->m_flags |= EXT4_MAP_UNINIT; | 3595 | map->m_flags |= EXT4_MAP_UNINIT; |
3498 | } | 3596 | } |
3499 | 3597 | ||
3500 | if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { | 3598 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); |
3501 | if (unlikely(!eh->eh_entries)) { | 3599 | if (err) |
3502 | EXT4_ERROR_INODE(inode, | 3600 | goto out2; |
3503 | "eh->eh_entries == 0 and " | 3601 | |
3504 | "EOFBLOCKS_FL set"); | ||
3505 | err = -EIO; | ||
3506 | goto out2; | ||
3507 | } | ||
3508 | last_ex = EXT_LAST_EXTENT(eh); | ||
3509 | /* | ||
3510 | * If the current leaf block was reached by looking at | ||
3511 | * the last index block all the way down the tree, and | ||
3512 | * we are extending the inode beyond the last extent | ||
3513 | * in the current leaf block, then clear the | ||
3514 | * EOFBLOCKS_FL flag. | ||
3515 | */ | ||
3516 | for (i = depth-1; i >= 0; i--) { | ||
3517 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3518 | break; | ||
3519 | } | ||
3520 | if ((i < 0) && | ||
3521 | (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + | ||
3522 | ext4_ext_get_actual_len(last_ex))) | ||
3523 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3524 | } | ||
3525 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3602 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3526 | if (err) { | 3603 | if (err) { |
3527 | /* free data blocks we just allocated */ | 3604 | /* free data blocks we just allocated */ |
3528 | /* not a good idea to call discard here directly, | 3605 | /* not a good idea to call discard here directly, |
3529 | * but otherwise we'd need to call it every free() */ | 3606 | * but otherwise we'd need to call it every free() */ |
3530 | ext4_discard_preallocations(inode); | 3607 | ext4_discard_preallocations(inode); |
3531 | ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), | 3608 | ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), |
3532 | ext4_ext_get_actual_len(&newex), 0); | 3609 | ext4_ext_get_actual_len(&newex), 0); |
3533 | goto out2; | 3610 | goto out2; |
3534 | } | 3611 | } |
3535 | 3612 | ||
3536 | /* previous routine could use block we allocated */ | 3613 | /* previous routine could use block we allocated */ |
3537 | newblock = ext_pblock(&newex); | 3614 | newblock = ext4_ext_pblock(&newex); |
3538 | allocated = ext4_ext_get_actual_len(&newex); | 3615 | allocated = ext4_ext_get_actual_len(&newex); |
3539 | if (allocated > map->m_len) | 3616 | if (allocated > map->m_len) |
3540 | allocated = map->m_len; | 3617 | allocated = map->m_len; |
@@ -3552,8 +3629,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3552 | * when it is _not_ an uninitialized extent. | 3629 | * when it is _not_ an uninitialized extent. |
3553 | */ | 3630 | */ |
3554 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { | 3631 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { |
3555 | ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, | 3632 | ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock); |
3556 | EXT4_EXT_CACHE_EXTENT); | ||
3557 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3633 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3558 | } else | 3634 | } else |
3559 | ext4_update_inode_fsync_trans(handle, inode, 0); | 3635 | ext4_update_inode_fsync_trans(handle, inode, 0); |
@@ -3569,7 +3645,13 @@ out2: | |||
3569 | ext4_ext_drop_refs(path); | 3645 | ext4_ext_drop_refs(path); |
3570 | kfree(path); | 3646 | kfree(path); |
3571 | } | 3647 | } |
3572 | return err ? err : allocated; | 3648 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, |
3649 | newblock, map->m_len, err ? err : allocated); | ||
3650 | |||
3651 | result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? | ||
3652 | punched_out : allocated; | ||
3653 | |||
3654 | return err ? err : result; | ||
3573 | } | 3655 | } |
3574 | 3656 | ||
3575 | void ext4_ext_truncate(struct inode *inode) | 3657 | void ext4_ext_truncate(struct inode *inode) |
@@ -3581,6 +3663,12 @@ void ext4_ext_truncate(struct inode *inode) | |||
3581 | int err = 0; | 3663 | int err = 0; |
3582 | 3664 | ||
3583 | /* | 3665 | /* |
3666 | * finish any pending end_io work so we won't run the risk of | ||
3667 | * converting any truncated blocks to initialized later | ||
3668 | */ | ||
3669 | ext4_flush_completed_IO(inode); | ||
3670 | |||
3671 | /* | ||
3584 | * probably first extent we're gonna free will be last in block | 3672 | * probably first extent we're gonna free will be last in block |
3585 | */ | 3673 | */ |
3586 | err = ext4_writepage_trans_blocks(inode); | 3674 | err = ext4_writepage_trans_blocks(inode); |
@@ -3611,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
3611 | 3699 | ||
3612 | last_block = (inode->i_size + sb->s_blocksize - 1) | 3700 | last_block = (inode->i_size + sb->s_blocksize - 1) |
3613 | >> EXT4_BLOCK_SIZE_BITS(sb); | 3701 | >> EXT4_BLOCK_SIZE_BITS(sb); |
3614 | err = ext4_ext_remove_space(inode, last_block); | 3702 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); |
3615 | 3703 | ||
3616 | /* In a multi-transaction truncate, we only make the final | 3704 | /* In a multi-transaction truncate, we only make the final |
3617 | * transaction synchronous. | 3705 | * transaction synchronous. |
@@ -3619,8 +3707,9 @@ void ext4_ext_truncate(struct inode *inode) | |||
3619 | if (IS_SYNC(inode)) | 3707 | if (IS_SYNC(inode)) |
3620 | ext4_handle_sync(handle); | 3708 | ext4_handle_sync(handle); |
3621 | 3709 | ||
3622 | out_stop: | ||
3623 | up_write(&EXT4_I(inode)->i_data_sem); | 3710 | up_write(&EXT4_I(inode)->i_data_sem); |
3711 | |||
3712 | out_stop: | ||
3624 | /* | 3713 | /* |
3625 | * If this was a simple ftruncate() and the file will remain alive, | 3714 | * If this was a simple ftruncate() and the file will remain alive, |
3626 | * then we need to clear up the orphan record which we created above. | 3715 | * then we need to clear up the orphan record which we created above. |
@@ -3667,14 +3756,15 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
3667 | } | 3756 | } |
3668 | 3757 | ||
3669 | /* | 3758 | /* |
3670 | * preallocate space for a file. This implements ext4's fallocate inode | 3759 | * preallocate space for a file. This implements ext4's fallocate file |
3671 | * operation, which gets called from sys_fallocate system call. | 3760 | * operation, which gets called from sys_fallocate system call. |
3672 | * For block-mapped files, posix_fallocate should fall back to the method | 3761 | * For block-mapped files, posix_fallocate should fall back to the method |
3673 | * of writing zeroes to the required new blocks (the same behavior which is | 3762 | * of writing zeroes to the required new blocks (the same behavior which is |
3674 | * expected for file systems which do not support fallocate() system call). | 3763 | * expected for file systems which do not support fallocate() system call). |
3675 | */ | 3764 | */ |
3676 | long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | 3765 | long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
3677 | { | 3766 | { |
3767 | struct inode *inode = file->f_path.dentry->d_inode; | ||
3678 | handle_t *handle; | 3768 | handle_t *handle; |
3679 | loff_t new_size; | 3769 | loff_t new_size; |
3680 | unsigned int max_blocks; | 3770 | unsigned int max_blocks; |
@@ -3691,10 +3781,14 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
3691 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 3781 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3692 | return -EOPNOTSUPP; | 3782 | return -EOPNOTSUPP; |
3693 | 3783 | ||
3694 | /* preallocation to directories is currently not supported */ | 3784 | /* Return error if mode is not supported */ |
3695 | if (S_ISDIR(inode->i_mode)) | 3785 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
3696 | return -ENODEV; | 3786 | return -EOPNOTSUPP; |
3787 | |||
3788 | if (mode & FALLOC_FL_PUNCH_HOLE) | ||
3789 | return ext4_punch_hole(file, offset, len); | ||
3697 | 3790 | ||
3791 | trace_ext4_fallocate_enter(inode, offset, len, mode); | ||
3698 | map.m_lblk = offset >> blkbits; | 3792 | map.m_lblk = offset >> blkbits; |
3699 | /* | 3793 | /* |
3700 | * We can't just convert len to max_blocks because | 3794 | * We can't just convert len to max_blocks because |
@@ -3710,6 +3804,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
3710 | ret = inode_newsize_ok(inode, (len + offset)); | 3804 | ret = inode_newsize_ok(inode, (len + offset)); |
3711 | if (ret) { | 3805 | if (ret) { |
3712 | mutex_unlock(&inode->i_mutex); | 3806 | mutex_unlock(&inode->i_mutex); |
3807 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | ||
3713 | return ret; | 3808 | return ret; |
3714 | } | 3809 | } |
3715 | retry: | 3810 | retry: |
@@ -3722,14 +3817,15 @@ retry: | |||
3722 | break; | 3817 | break; |
3723 | } | 3818 | } |
3724 | ret = ext4_map_blocks(handle, inode, &map, | 3819 | ret = ext4_map_blocks(handle, inode, &map, |
3725 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); | 3820 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | |
3821 | EXT4_GET_BLOCKS_NO_NORMALIZE); | ||
3726 | if (ret <= 0) { | 3822 | if (ret <= 0) { |
3727 | #ifdef EXT4FS_DEBUG | 3823 | #ifdef EXT4FS_DEBUG |
3728 | WARN_ON(ret <= 0); | 3824 | WARN_ON(ret <= 0); |
3729 | printk(KERN_ERR "%s: ext4_ext_map_blocks " | 3825 | printk(KERN_ERR "%s: ext4_ext_map_blocks " |
3730 | "returned error inode#%lu, block=%u, " | 3826 | "returned error inode#%lu, block=%u, " |
3731 | "max_blocks=%u", __func__, | 3827 | "max_blocks=%u", __func__, |
3732 | inode->i_ino, block, max_blocks); | 3828 | inode->i_ino, map.m_lblk, max_blocks); |
3733 | #endif | 3829 | #endif |
3734 | ext4_mark_inode_dirty(handle, inode); | 3830 | ext4_mark_inode_dirty(handle, inode); |
3735 | ret2 = ext4_journal_stop(handle); | 3831 | ret2 = ext4_journal_stop(handle); |
@@ -3754,6 +3850,8 @@ retry: | |||
3754 | goto retry; | 3850 | goto retry; |
3755 | } | 3851 | } |
3756 | mutex_unlock(&inode->i_mutex); | 3852 | mutex_unlock(&inode->i_mutex); |
3853 | trace_ext4_fallocate_exit(inode, offset, max_blocks, | ||
3854 | ret > 0 ? ret2 : ret); | ||
3757 | return ret > 0 ? ret2 : ret; | 3855 | return ret > 0 ? ret2 : ret; |
3758 | } | 3856 | } |
3759 | 3857 | ||
@@ -3812,45 +3910,190 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3812 | } | 3910 | } |
3813 | return ret > 0 ? ret2 : ret; | 3911 | return ret > 0 ? ret2 : ret; |
3814 | } | 3912 | } |
3913 | |||
3815 | /* | 3914 | /* |
3816 | * Callback function called for each extent to gather FIEMAP information. | 3915 | * Callback function called for each extent to gather FIEMAP information. |
3817 | */ | 3916 | */ |
3818 | static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | 3917 | static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next, |
3819 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | 3918 | struct ext4_ext_cache *newex, struct ext4_extent *ex, |
3820 | void *data) | 3919 | void *data) |
3821 | { | 3920 | { |
3822 | struct fiemap_extent_info *fieinfo = data; | ||
3823 | unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; | ||
3824 | __u64 logical; | 3921 | __u64 logical; |
3825 | __u64 physical; | 3922 | __u64 physical; |
3826 | __u64 length; | 3923 | __u64 length; |
3827 | __u32 flags = 0; | 3924 | __u32 flags = 0; |
3828 | int error; | 3925 | int ret = 0; |
3926 | struct fiemap_extent_info *fieinfo = data; | ||
3927 | unsigned char blksize_bits; | ||
3829 | 3928 | ||
3830 | logical = (__u64)newex->ec_block << blksize_bits; | 3929 | blksize_bits = inode->i_sb->s_blocksize_bits; |
3930 | logical = (__u64)newex->ec_block << blksize_bits; | ||
3831 | 3931 | ||
3832 | if (newex->ec_type == EXT4_EXT_CACHE_GAP) { | 3932 | if (newex->ec_start == 0) { |
3833 | pgoff_t offset; | 3933 | /* |
3834 | struct page *page; | 3934 | * No extent in extent-tree contains block @newex->ec_start, |
3935 | * then the block may stay in 1)a hole or 2)delayed-extent. | ||
3936 | * | ||
3937 | * Holes or delayed-extents are processed as follows. | ||
3938 | * 1. lookup dirty pages with specified range in pagecache. | ||
3939 | * If no page is got, then there is no delayed-extent and | ||
3940 | * return with EXT_CONTINUE. | ||
3941 | * 2. find the 1st mapped buffer, | ||
3942 | * 3. check if the mapped buffer is both in the request range | ||
3943 | * and a delayed buffer. If not, there is no delayed-extent, | ||
3944 | * then return. | ||
3945 | * 4. a delayed-extent is found, the extent will be collected. | ||
3946 | */ | ||
3947 | ext4_lblk_t end = 0; | ||
3948 | pgoff_t last_offset; | ||
3949 | pgoff_t offset; | ||
3950 | pgoff_t index; | ||
3951 | pgoff_t start_index = 0; | ||
3952 | struct page **pages = NULL; | ||
3835 | struct buffer_head *bh = NULL; | 3953 | struct buffer_head *bh = NULL; |
3954 | struct buffer_head *head = NULL; | ||
3955 | unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *); | ||
3956 | |||
3957 | pages = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3958 | if (pages == NULL) | ||
3959 | return -ENOMEM; | ||
3836 | 3960 | ||
3837 | offset = logical >> PAGE_SHIFT; | 3961 | offset = logical >> PAGE_SHIFT; |
3838 | page = find_get_page(inode->i_mapping, offset); | 3962 | repeat: |
3839 | if (!page || !page_has_buffers(page)) | 3963 | last_offset = offset; |
3840 | return EXT_CONTINUE; | 3964 | head = NULL; |
3965 | ret = find_get_pages_tag(inode->i_mapping, &offset, | ||
3966 | PAGECACHE_TAG_DIRTY, nr_pages, pages); | ||
3967 | |||
3968 | if (!(flags & FIEMAP_EXTENT_DELALLOC)) { | ||
3969 | /* First time, try to find a mapped buffer. */ | ||
3970 | if (ret == 0) { | ||
3971 | out: | ||
3972 | for (index = 0; index < ret; index++) | ||
3973 | page_cache_release(pages[index]); | ||
3974 | /* just a hole. */ | ||
3975 | kfree(pages); | ||
3976 | return EXT_CONTINUE; | ||
3977 | } | ||
3978 | index = 0; | ||
3841 | 3979 | ||
3842 | bh = page_buffers(page); | 3980 | next_page: |
3981 | /* Try to find the 1st mapped buffer. */ | ||
3982 | end = ((__u64)pages[index]->index << PAGE_SHIFT) >> | ||
3983 | blksize_bits; | ||
3984 | if (!page_has_buffers(pages[index])) | ||
3985 | goto out; | ||
3986 | head = page_buffers(pages[index]); | ||
3987 | if (!head) | ||
3988 | goto out; | ||
3843 | 3989 | ||
3844 | if (!bh) | 3990 | index++; |
3845 | return EXT_CONTINUE; | 3991 | bh = head; |
3992 | do { | ||
3993 | if (end >= newex->ec_block + | ||
3994 | newex->ec_len) | ||
3995 | /* The buffer is out of | ||
3996 | * the request range. | ||
3997 | */ | ||
3998 | goto out; | ||
3999 | |||
4000 | if (buffer_mapped(bh) && | ||
4001 | end >= newex->ec_block) { | ||
4002 | start_index = index - 1; | ||
4003 | /* get the 1st mapped buffer. */ | ||
4004 | goto found_mapped_buffer; | ||
4005 | } | ||
4006 | |||
4007 | bh = bh->b_this_page; | ||
4008 | end++; | ||
4009 | } while (bh != head); | ||
3846 | 4010 | ||
3847 | if (buffer_delay(bh)) { | 4011 | /* No mapped buffer in the range found in this page, |
3848 | flags |= FIEMAP_EXTENT_DELALLOC; | 4012 | * We need to look up next page. |
3849 | page_cache_release(page); | 4013 | */ |
4014 | if (index >= ret) { | ||
4015 | /* There is no page left, but we need to limit | ||
4016 | * newex->ec_len. | ||
4017 | */ | ||
4018 | newex->ec_len = end - newex->ec_block; | ||
4019 | goto out; | ||
4020 | } | ||
4021 | goto next_page; | ||
3850 | } else { | 4022 | } else { |
3851 | page_cache_release(page); | 4023 | /*Find contiguous delayed buffers. */ |
3852 | return EXT_CONTINUE; | 4024 | if (ret > 0 && pages[0]->index == last_offset) |
4025 | head = page_buffers(pages[0]); | ||
4026 | bh = head; | ||
4027 | index = 1; | ||
4028 | start_index = 0; | ||
4029 | } | ||
4030 | |||
4031 | found_mapped_buffer: | ||
4032 | if (bh != NULL && buffer_delay(bh)) { | ||
4033 | /* 1st or contiguous delayed buffer found. */ | ||
4034 | if (!(flags & FIEMAP_EXTENT_DELALLOC)) { | ||
4035 | /* | ||
4036 | * 1st delayed buffer found, record | ||
4037 | * the start of extent. | ||
4038 | */ | ||
4039 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
4040 | newex->ec_block = end; | ||
4041 | logical = (__u64)end << blksize_bits; | ||
4042 | } | ||
4043 | /* Find contiguous delayed buffers. */ | ||
4044 | do { | ||
4045 | if (!buffer_delay(bh)) | ||
4046 | goto found_delayed_extent; | ||
4047 | bh = bh->b_this_page; | ||
4048 | end++; | ||
4049 | } while (bh != head); | ||
4050 | |||
4051 | for (; index < ret; index++) { | ||
4052 | if (!page_has_buffers(pages[index])) { | ||
4053 | bh = NULL; | ||
4054 | break; | ||
4055 | } | ||
4056 | head = page_buffers(pages[index]); | ||
4057 | if (!head) { | ||
4058 | bh = NULL; | ||
4059 | break; | ||
4060 | } | ||
4061 | |||
4062 | if (pages[index]->index != | ||
4063 | pages[start_index]->index + index | ||
4064 | - start_index) { | ||
4065 | /* Blocks are not contiguous. */ | ||
4066 | bh = NULL; | ||
4067 | break; | ||
4068 | } | ||
4069 | bh = head; | ||
4070 | do { | ||
4071 | if (!buffer_delay(bh)) | ||
4072 | /* Delayed-extent ends. */ | ||
4073 | goto found_delayed_extent; | ||
4074 | bh = bh->b_this_page; | ||
4075 | end++; | ||
4076 | } while (bh != head); | ||
4077 | } | ||
4078 | } else if (!(flags & FIEMAP_EXTENT_DELALLOC)) | ||
4079 | /* a hole found. */ | ||
4080 | goto out; | ||
4081 | |||
4082 | found_delayed_extent: | ||
4083 | newex->ec_len = min(end - newex->ec_block, | ||
4084 | (ext4_lblk_t)EXT_INIT_MAX_LEN); | ||
4085 | if (ret == nr_pages && bh != NULL && | ||
4086 | newex->ec_len < EXT_INIT_MAX_LEN && | ||
4087 | buffer_delay(bh)) { | ||
4088 | /* Have not collected an extent and continue. */ | ||
4089 | for (index = 0; index < ret; index++) | ||
4090 | page_cache_release(pages[index]); | ||
4091 | goto repeat; | ||
3853 | } | 4092 | } |
4093 | |||
4094 | for (index = 0; index < ret; index++) | ||
4095 | page_cache_release(pages[index]); | ||
4096 | kfree(pages); | ||
3854 | } | 4097 | } |
3855 | 4098 | ||
3856 | physical = (__u64)newex->ec_start << blksize_bits; | 4099 | physical = (__u64)newex->ec_start << blksize_bits; |
@@ -3859,32 +4102,15 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3859 | if (ex && ext4_ext_is_uninitialized(ex)) | 4102 | if (ex && ext4_ext_is_uninitialized(ex)) |
3860 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 4103 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
3861 | 4104 | ||
3862 | /* | 4105 | if (next == EXT_MAX_BLOCKS) |
3863 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | ||
3864 | * | ||
3865 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
3866 | * this also indicates no more allocated blocks. | ||
3867 | * | ||
3868 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
3869 | */ | ||
3870 | if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || | ||
3871 | newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { | ||
3872 | loff_t size = i_size_read(inode); | ||
3873 | loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); | ||
3874 | |||
3875 | flags |= FIEMAP_EXTENT_LAST; | 4106 | flags |= FIEMAP_EXTENT_LAST; |
3876 | if ((flags & FIEMAP_EXTENT_DELALLOC) && | ||
3877 | logical+length > size) | ||
3878 | length = (size - logical + bs - 1) & ~(bs-1); | ||
3879 | } | ||
3880 | 4107 | ||
3881 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | 4108 | ret = fiemap_fill_next_extent(fieinfo, logical, physical, |
3882 | length, flags); | 4109 | length, flags); |
3883 | if (error < 0) | 4110 | if (ret < 0) |
3884 | return error; | 4111 | return ret; |
3885 | if (error == 1) | 4112 | if (ret == 1) |
3886 | return EXT_BREAK; | 4113 | return EXT_BREAK; |
3887 | |||
3888 | return EXT_CONTINUE; | 4114 | return EXT_CONTINUE; |
3889 | } | 4115 | } |
3890 | 4116 | ||
@@ -3926,6 +4152,177 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
3926 | return (error < 0 ? error : 0); | 4152 | return (error < 0 ? error : 0); |
3927 | } | 4153 | } |
3928 | 4154 | ||
4155 | /* | ||
4156 | * ext4_ext_punch_hole | ||
4157 | * | ||
4158 | * Punches a hole of "length" bytes in a file starting | ||
4159 | * at byte "offset" | ||
4160 | * | ||
4161 | * @inode: The inode of the file to punch a hole in | ||
4162 | * @offset: The starting byte offset of the hole | ||
4163 | * @length: The length of the hole | ||
4164 | * | ||
4165 | * Returns the number of blocks removed or negative on err | ||
4166 | */ | ||
4167 | int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | ||
4168 | { | ||
4169 | struct inode *inode = file->f_path.dentry->d_inode; | ||
4170 | struct super_block *sb = inode->i_sb; | ||
4171 | struct ext4_ext_cache cache_ex; | ||
4172 | ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; | ||
4173 | struct address_space *mapping = inode->i_mapping; | ||
4174 | struct ext4_map_blocks map; | ||
4175 | handle_t *handle; | ||
4176 | loff_t first_block_offset, last_block_offset, block_len; | ||
4177 | loff_t first_page, last_page, first_page_offset, last_page_offset; | ||
4178 | int ret, credits, blocks_released, err = 0; | ||
4179 | |||
4180 | first_block = (offset + sb->s_blocksize - 1) >> | ||
4181 | EXT4_BLOCK_SIZE_BITS(sb); | ||
4182 | last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
4183 | |||
4184 | first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb); | ||
4185 | last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb); | ||
4186 | |||
4187 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
4188 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | ||
4189 | |||
4190 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | ||
4191 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | ||
4192 | |||
4193 | /* | ||
4194 | * Write out all dirty pages to avoid race conditions | ||
4195 | * Then release them. | ||
4196 | */ | ||
4197 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4198 | err = filemap_write_and_wait_range(mapping, | ||
4199 | first_page_offset == 0 ? 0 : first_page_offset-1, | ||
4200 | last_page_offset); | ||
4201 | |||
4202 | if (err) | ||
4203 | return err; | ||
4204 | } | ||
4205 | |||
4206 | /* Now release the pages */ | ||
4207 | if (last_page_offset > first_page_offset) { | ||
4208 | truncate_inode_pages_range(mapping, first_page_offset, | ||
4209 | last_page_offset-1); | ||
4210 | } | ||
4211 | |||
4212 | /* finish any pending end_io work */ | ||
4213 | ext4_flush_completed_IO(inode); | ||
4214 | |||
4215 | credits = ext4_writepage_trans_blocks(inode); | ||
4216 | handle = ext4_journal_start(inode, credits); | ||
4217 | if (IS_ERR(handle)) | ||
4218 | return PTR_ERR(handle); | ||
4219 | |||
4220 | err = ext4_orphan_add(handle, inode); | ||
4221 | if (err) | ||
4222 | goto out; | ||
4223 | |||
4224 | /* | ||
4225 | * Now we need to zero out the un block aligned data. | ||
4226 | * If the file is smaller than a block, just | ||
4227 | * zero out the middle | ||
4228 | */ | ||
4229 | if (first_block > last_block) | ||
4230 | ext4_block_zero_page_range(handle, mapping, offset, length); | ||
4231 | else { | ||
4232 | /* zero out the head of the hole before the first block */ | ||
4233 | block_len = first_block_offset - offset; | ||
4234 | if (block_len > 0) | ||
4235 | ext4_block_zero_page_range(handle, mapping, | ||
4236 | offset, block_len); | ||
4237 | |||
4238 | /* zero out the tail of the hole after the last block */ | ||
4239 | block_len = offset + length - last_block_offset; | ||
4240 | if (block_len > 0) { | ||
4241 | ext4_block_zero_page_range(handle, mapping, | ||
4242 | last_block_offset, block_len); | ||
4243 | } | ||
4244 | } | ||
4245 | |||
4246 | /* If there are no blocks to remove, return now */ | ||
4247 | if (first_block >= last_block) | ||
4248 | goto out; | ||
4249 | |||
4250 | down_write(&EXT4_I(inode)->i_data_sem); | ||
4251 | ext4_ext_invalidate_cache(inode); | ||
4252 | ext4_discard_preallocations(inode); | ||
4253 | |||
4254 | /* | ||
4255 | * Loop over all the blocks and identify blocks | ||
4256 | * that need to be punched out | ||
4257 | */ | ||
4258 | iblock = first_block; | ||
4259 | blocks_released = 0; | ||
4260 | while (iblock < last_block) { | ||
4261 | max_blocks = last_block - iblock; | ||
4262 | num_blocks = 1; | ||
4263 | memset(&map, 0, sizeof(map)); | ||
4264 | map.m_lblk = iblock; | ||
4265 | map.m_len = max_blocks; | ||
4266 | ret = ext4_ext_map_blocks(handle, inode, &map, | ||
4267 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT); | ||
4268 | |||
4269 | if (ret > 0) { | ||
4270 | blocks_released += ret; | ||
4271 | num_blocks = ret; | ||
4272 | } else if (ret == 0) { | ||
4273 | /* | ||
4274 | * If map blocks could not find the block, | ||
4275 | * then it is in a hole. If the hole was | ||
4276 | * not already cached, then map blocks should | ||
4277 | * put it in the cache. So we can get the hole | ||
4278 | * out of the cache | ||
4279 | */ | ||
4280 | memset(&cache_ex, 0, sizeof(cache_ex)); | ||
4281 | if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && | ||
4282 | !cache_ex.ec_start) { | ||
4283 | |||
4284 | /* The hole is cached */ | ||
4285 | num_blocks = cache_ex.ec_block + | ||
4286 | cache_ex.ec_len - iblock; | ||
4287 | |||
4288 | } else { | ||
4289 | /* The block could not be identified */ | ||
4290 | err = -EIO; | ||
4291 | break; | ||
4292 | } | ||
4293 | } else { | ||
4294 | /* Map blocks error */ | ||
4295 | err = ret; | ||
4296 | break; | ||
4297 | } | ||
4298 | |||
4299 | if (num_blocks == 0) { | ||
4300 | /* This condition should never happen */ | ||
4301 | ext_debug("Block lookup failed"); | ||
4302 | err = -EIO; | ||
4303 | break; | ||
4304 | } | ||
4305 | |||
4306 | iblock += num_blocks; | ||
4307 | } | ||
4308 | |||
4309 | if (blocks_released > 0) { | ||
4310 | ext4_ext_invalidate_cache(inode); | ||
4311 | ext4_discard_preallocations(inode); | ||
4312 | } | ||
4313 | |||
4314 | if (IS_SYNC(inode)) | ||
4315 | ext4_handle_sync(handle); | ||
4316 | |||
4317 | up_write(&EXT4_I(inode)->i_data_sem); | ||
4318 | |||
4319 | out: | ||
4320 | ext4_orphan_del(handle, inode); | ||
4321 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4322 | ext4_mark_inode_dirty(handle, inode); | ||
4323 | ext4_journal_stop(handle); | ||
4324 | return err; | ||
4325 | } | ||
3929 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4326 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
3930 | __u64 start, __u64 len) | 4327 | __u64 start, __u64 len) |
3931 | { | 4328 | { |
@@ -3948,8 +4345,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3948 | 4345 | ||
3949 | start_blk = start >> inode->i_sb->s_blocksize_bits; | 4346 | start_blk = start >> inode->i_sb->s_blocksize_bits; |
3950 | last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; | 4347 | last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; |
3951 | if (last_blk >= EXT_MAX_BLOCK) | 4348 | if (last_blk >= EXT_MAX_BLOCKS) |
3952 | last_blk = EXT_MAX_BLOCK-1; | 4349 | last_blk = EXT_MAX_BLOCKS-1; |
3953 | len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; | 4350 | len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; |
3954 | 4351 | ||
3955 | /* | 4352 | /* |
@@ -3962,4 +4359,3 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3962 | 4359 | ||
3963 | return error; | 4360 | return error; |
3964 | } | 4361 | } |
3965 | |||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ee92b66d4558..2c0972322009 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp) | |||
55 | return 0; | 55 | return 0; |
56 | } | 56 | } |
57 | 57 | ||
58 | static void ext4_aiodio_wait(struct inode *inode) | ||
59 | { | ||
60 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | ||
61 | |||
62 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This tests whether the IO in question is block-aligned or not. | ||
67 | * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they | ||
68 | * are converted to written only after the IO is complete. Until they are | ||
69 | * mapped, these blocks appear as holes, so dio_zero_block() will assume that | ||
70 | * it needs to zero out portions of the start and/or end block. If 2 AIO | ||
71 | * threads are at work on the same unwritten block, they must be synchronized | ||
72 | * or one thread will zero the other's data, causing corruption. | ||
73 | */ | ||
74 | static int | ||
75 | ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, | ||
76 | unsigned long nr_segs, loff_t pos) | ||
77 | { | ||
78 | struct super_block *sb = inode->i_sb; | ||
79 | int blockmask = sb->s_blocksize - 1; | ||
80 | size_t count = iov_length(iov, nr_segs); | ||
81 | loff_t final_size = pos + count; | ||
82 | |||
83 | if (pos >= inode->i_size) | ||
84 | return 0; | ||
85 | |||
86 | if ((pos & blockmask) || (final_size & blockmask)) | ||
87 | return 1; | ||
88 | |||
89 | return 0; | ||
90 | } | ||
91 | |||
58 | static ssize_t | 92 | static ssize_t |
59 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | 93 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, |
60 | unsigned long nr_segs, loff_t pos) | 94 | unsigned long nr_segs, loff_t pos) |
61 | { | 95 | { |
62 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 96 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
97 | int unaligned_aio = 0; | ||
98 | int ret; | ||
63 | 99 | ||
64 | /* | 100 | /* |
65 | * If we have encountered a bitmap-format file, the size limit | 101 | * If we have encountered a bitmap-format file, the size limit |
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
78 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, | 114 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, |
79 | sbi->s_bitmap_maxbytes - pos); | 115 | sbi->s_bitmap_maxbytes - pos); |
80 | } | 116 | } |
117 | } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) && | ||
118 | !is_sync_kiocb(iocb))) { | ||
119 | unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); | ||
120 | } | ||
121 | |||
122 | /* Unaligned direct AIO must be serialized; see comment above */ | ||
123 | if (unaligned_aio) { | ||
124 | static unsigned long unaligned_warn_time; | ||
125 | |||
126 | /* Warn about this once per day */ | ||
127 | if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ)) | ||
128 | ext4_msg(inode->i_sb, KERN_WARNING, | ||
129 | "Unaligned AIO/DIO on inode %ld by %s; " | ||
130 | "performance will be poor.", | ||
131 | inode->i_ino, current->comm); | ||
132 | mutex_lock(ext4_aio_mutex(inode)); | ||
133 | ext4_aiodio_wait(inode); | ||
81 | } | 134 | } |
82 | 135 | ||
83 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 136 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); |
137 | |||
138 | if (unaligned_aio) | ||
139 | mutex_unlock(ext4_aio_mutex(inode)); | ||
140 | |||
141 | return ret; | ||
84 | } | 142 | } |
85 | 143 | ||
86 | static const struct vm_operations_struct ext4_file_vm_ops = { | 144 | static const struct vm_operations_struct ext4_file_vm_ops = { |
@@ -104,6 +162,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
104 | { | 162 | { |
105 | struct super_block *sb = inode->i_sb; | 163 | struct super_block *sb = inode->i_sb; |
106 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 164 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
165 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
107 | struct vfsmount *mnt = filp->f_path.mnt; | 166 | struct vfsmount *mnt = filp->f_path.mnt; |
108 | struct path path; | 167 | struct path path; |
109 | char buf[64], *cp; | 168 | char buf[64], *cp; |
@@ -127,11 +186,74 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
127 | ext4_mark_super_dirty(sb); | 186 | ext4_mark_super_dirty(sb); |
128 | } | 187 | } |
129 | } | 188 | } |
189 | /* | ||
190 | * Set up the jbd2_inode if we are opening the inode for | ||
191 | * writing and the journal is present | ||
192 | */ | ||
193 | if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) { | ||
194 | struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL); | ||
195 | |||
196 | spin_lock(&inode->i_lock); | ||
197 | if (!ei->jinode) { | ||
198 | if (!jinode) { | ||
199 | spin_unlock(&inode->i_lock); | ||
200 | return -ENOMEM; | ||
201 | } | ||
202 | ei->jinode = jinode; | ||
203 | jbd2_journal_init_jbd_inode(ei->jinode, inode); | ||
204 | jinode = NULL; | ||
205 | } | ||
206 | spin_unlock(&inode->i_lock); | ||
207 | if (unlikely(jinode != NULL)) | ||
208 | jbd2_free_inode(jinode); | ||
209 | } | ||
130 | return dquot_file_open(inode, filp); | 210 | return dquot_file_open(inode, filp); |
131 | } | 211 | } |
132 | 212 | ||
213 | /* | ||
214 | * ext4_llseek() copied from generic_file_llseek() to handle both | ||
215 | * block-mapped and extent-mapped maxbytes values. This should | ||
216 | * otherwise be identical with generic_file_llseek(). | ||
217 | */ | ||
218 | loff_t ext4_llseek(struct file *file, loff_t offset, int origin) | ||
219 | { | ||
220 | struct inode *inode = file->f_mapping->host; | ||
221 | loff_t maxbytes; | ||
222 | |||
223 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | ||
224 | maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; | ||
225 | else | ||
226 | maxbytes = inode->i_sb->s_maxbytes; | ||
227 | mutex_lock(&inode->i_mutex); | ||
228 | switch (origin) { | ||
229 | case SEEK_END: | ||
230 | offset += inode->i_size; | ||
231 | break; | ||
232 | case SEEK_CUR: | ||
233 | if (offset == 0) { | ||
234 | mutex_unlock(&inode->i_mutex); | ||
235 | return file->f_pos; | ||
236 | } | ||
237 | offset += file->f_pos; | ||
238 | break; | ||
239 | } | ||
240 | |||
241 | if (offset < 0 || offset > maxbytes) { | ||
242 | mutex_unlock(&inode->i_mutex); | ||
243 | return -EINVAL; | ||
244 | } | ||
245 | |||
246 | if (offset != file->f_pos) { | ||
247 | file->f_pos = offset; | ||
248 | file->f_version = 0; | ||
249 | } | ||
250 | mutex_unlock(&inode->i_mutex); | ||
251 | |||
252 | return offset; | ||
253 | } | ||
254 | |||
133 | const struct file_operations ext4_file_operations = { | 255 | const struct file_operations ext4_file_operations = { |
134 | .llseek = generic_file_llseek, | 256 | .llseek = ext4_llseek, |
135 | .read = do_sync_read, | 257 | .read = do_sync_read, |
136 | .write = do_sync_write, | 258 | .write = do_sync_write, |
137 | .aio_read = generic_file_aio_read, | 259 | .aio_read = generic_file_aio_read, |
@@ -146,10 +268,10 @@ const struct file_operations ext4_file_operations = { | |||
146 | .fsync = ext4_sync_file, | 268 | .fsync = ext4_sync_file, |
147 | .splice_read = generic_file_splice_read, | 269 | .splice_read = generic_file_splice_read, |
148 | .splice_write = generic_file_splice_write, | 270 | .splice_write = generic_file_splice_write, |
271 | .fallocate = ext4_fallocate, | ||
149 | }; | 272 | }; |
150 | 273 | ||
151 | const struct inode_operations ext4_file_inode_operations = { | 274 | const struct inode_operations ext4_file_inode_operations = { |
152 | .truncate = ext4_truncate, | ||
153 | .setattr = ext4_setattr, | 275 | .setattr = ext4_setattr, |
154 | .getattr = ext4_getattr, | 276 | .getattr = ext4_getattr, |
155 | #ifdef CONFIG_EXT4_FS_XATTR | 277 | #ifdef CONFIG_EXT4_FS_XATTR |
@@ -159,7 +281,6 @@ const struct inode_operations ext4_file_inode_operations = { | |||
159 | .removexattr = generic_removexattr, | 281 | .removexattr = generic_removexattr, |
160 | #endif | 282 | #endif |
161 | .check_acl = ext4_check_acl, | 283 | .check_acl = ext4_check_acl, |
162 | .fallocate = ext4_fallocate, | ||
163 | .fiemap = ext4_fiemap, | 284 | .fiemap = ext4_fiemap, |
164 | }; | 285 | }; |
165 | 286 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 592adf2e546e..ce66d2fe826c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -34,6 +34,89 @@ | |||
34 | 34 | ||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | static void dump_completed_IO(struct inode * inode) | ||
38 | { | ||
39 | #ifdef EXT4FS_DEBUG | ||
40 | struct list_head *cur, *before, *after; | ||
41 | ext4_io_end_t *io, *io0, *io1; | ||
42 | unsigned long flags; | ||
43 | |||
44 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
45 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
50 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
51 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
52 | cur = &io->list; | ||
53 | before = cur->prev; | ||
54 | io0 = container_of(before, ext4_io_end_t, list); | ||
55 | after = cur->next; | ||
56 | io1 = container_of(after, ext4_io_end_t, list); | ||
57 | |||
58 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
59 | io, inode->i_ino, io0, io1); | ||
60 | } | ||
61 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This function is called from ext4_sync_file(). | ||
67 | * | ||
68 | * When IO is completed, the work to convert unwritten extents to | ||
69 | * written is queued on workqueue but may not get immediately | ||
70 | * scheduled. When fsync is called, we need to ensure the | ||
71 | * conversion is complete before fsync returns. | ||
72 | * The inode keeps track of a list of pending/completed IO that | ||
73 | * might needs to do the conversion. This function walks through | ||
74 | * the list and convert the related unwritten extents for completed IO | ||
75 | * to written. | ||
76 | * The function return the number of pending IOs on success. | ||
77 | */ | ||
78 | extern int ext4_flush_completed_IO(struct inode *inode) | ||
79 | { | ||
80 | ext4_io_end_t *io; | ||
81 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
82 | unsigned long flags; | ||
83 | int ret = 0; | ||
84 | int ret2 = 0; | ||
85 | |||
86 | if (list_empty(&ei->i_completed_io_list)) | ||
87 | return ret; | ||
88 | |||
89 | dump_completed_IO(inode); | ||
90 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
91 | while (!list_empty(&ei->i_completed_io_list)){ | ||
92 | io = list_entry(ei->i_completed_io_list.next, | ||
93 | ext4_io_end_t, list); | ||
94 | /* | ||
95 | * Calling ext4_end_io_nolock() to convert completed | ||
96 | * IO to written. | ||
97 | * | ||
98 | * When ext4_sync_file() is called, run_queue() may already | ||
99 | * about to flush the work corresponding to this io structure. | ||
100 | * It will be upset if it founds the io structure related | ||
101 | * to the work-to-be schedule is freed. | ||
102 | * | ||
103 | * Thus we need to keep the io structure still valid here after | ||
104 | * conversion finished. The io structure has a flag to | ||
105 | * avoid double converting from both fsync and background work | ||
106 | * queue work. | ||
107 | */ | ||
108 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
109 | ret = ext4_end_io_nolock(io); | ||
110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
111 | if (ret < 0) | ||
112 | ret2 = ret; | ||
113 | else | ||
114 | list_del_init(&io->list); | ||
115 | } | ||
116 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
117 | return (ret2 < 0) ? ret2 : 0; | ||
118 | } | ||
119 | |||
37 | /* | 120 | /* |
38 | * If we're not journaling and this is a just-created file, we have to | 121 | * If we're not journaling and this is a just-created file, we have to |
39 | * sync our parent directory (if it was freshly created) since | 122 | * sync our parent directory (if it was freshly created) since |
@@ -42,9 +125,11 @@ | |||
42 | * the parent directory's parent as well, and so on recursively, if | 125 | * the parent directory's parent as well, and so on recursively, if |
43 | * they are also freshly created. | 126 | * they are also freshly created. |
44 | */ | 127 | */ |
45 | static void ext4_sync_parent(struct inode *inode) | 128 | static int ext4_sync_parent(struct inode *inode) |
46 | { | 129 | { |
130 | struct writeback_control wbc; | ||
47 | struct dentry *dentry = NULL; | 131 | struct dentry *dentry = NULL; |
132 | int ret = 0; | ||
48 | 133 | ||
49 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { | 134 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { |
50 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); | 135 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); |
@@ -53,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode) | |||
53 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) | 138 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) |
54 | break; | 139 | break; |
55 | inode = dentry->d_parent->d_inode; | 140 | inode = dentry->d_parent->d_inode; |
56 | sync_mapping_buffers(inode->i_mapping); | 141 | ret = sync_mapping_buffers(inode->i_mapping); |
142 | if (ret) | ||
143 | break; | ||
144 | memset(&wbc, 0, sizeof(wbc)); | ||
145 | wbc.sync_mode = WB_SYNC_ALL; | ||
146 | wbc.nr_to_write = 0; /* only write out the inode */ | ||
147 | ret = sync_inode(inode, &wbc); | ||
148 | if (ret) | ||
149 | break; | ||
57 | } | 150 | } |
151 | return ret; | ||
58 | } | 152 | } |
59 | 153 | ||
60 | /* | 154 | /* |
@@ -78,23 +172,24 @@ int ext4_sync_file(struct file *file, int datasync) | |||
78 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 172 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
79 | int ret; | 173 | int ret; |
80 | tid_t commit_tid; | 174 | tid_t commit_tid; |
175 | bool needs_barrier = false; | ||
81 | 176 | ||
82 | J_ASSERT(ext4_journal_current_handle() == NULL); | 177 | J_ASSERT(ext4_journal_current_handle() == NULL); |
83 | 178 | ||
84 | trace_ext4_sync_file(file, datasync); | 179 | trace_ext4_sync_file_enter(file, datasync); |
85 | 180 | ||
86 | if (inode->i_sb->s_flags & MS_RDONLY) | 181 | if (inode->i_sb->s_flags & MS_RDONLY) |
87 | return 0; | 182 | return 0; |
88 | 183 | ||
89 | ret = flush_completed_IO(inode); | 184 | ret = ext4_flush_completed_IO(inode); |
90 | if (ret < 0) | 185 | if (ret < 0) |
91 | return ret; | 186 | goto out; |
92 | 187 | ||
93 | if (!journal) { | 188 | if (!journal) { |
94 | ret = generic_file_fsync(file, datasync); | 189 | ret = generic_file_fsync(file, datasync); |
95 | if (!ret && !list_empty(&inode->i_dentry)) | 190 | if (!ret && !list_empty(&inode->i_dentry)) |
96 | ext4_sync_parent(inode); | 191 | ret = ext4_sync_parent(inode); |
97 | return ret; | 192 | goto out; |
98 | } | 193 | } |
99 | 194 | ||
100 | /* | 195 | /* |
@@ -111,27 +206,20 @@ int ext4_sync_file(struct file *file, int datasync) | |||
111 | * (they were dirtied by commit). But that's OK - the blocks are | 206 | * (they were dirtied by commit). But that's OK - the blocks are |
112 | * safe in-journal, which is all fsync() needs to ensure. | 207 | * safe in-journal, which is all fsync() needs to ensure. |
113 | */ | 208 | */ |
114 | if (ext4_should_journal_data(inode)) | 209 | if (ext4_should_journal_data(inode)) { |
115 | return ext4_force_commit(inode->i_sb); | 210 | ret = ext4_force_commit(inode->i_sb); |
211 | goto out; | ||
212 | } | ||
116 | 213 | ||
117 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; | 214 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
118 | if (jbd2_log_start_commit(journal, commit_tid)) { | 215 | if (journal->j_flags & JBD2_BARRIER && |
119 | /* | 216 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) |
120 | * When the journal is on a different device than the | 217 | needs_barrier = true; |
121 | * fs data disk, we need to issue the barrier in | 218 | jbd2_log_start_commit(journal, commit_tid); |
122 | * writeback mode. (In ordered mode, the jbd2 layer | 219 | ret = jbd2_log_wait_commit(journal, commit_tid); |
123 | * will take care of issuing the barrier. In | 220 | if (needs_barrier) |
124 | * data=journal, all of the data blocks are written to | 221 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
125 | * the journal device.) | 222 | out: |
126 | */ | 223 | trace_ext4_sync_file_exit(inode, ret); |
127 | if (ext4_should_writeback_data(inode) && | ||
128 | (journal->j_fs_dev != journal->j_dev) && | ||
129 | (journal->j_flags & JBD2_BARRIER)) | ||
130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | ||
131 | NULL, BLKDEV_IFL_WAIT); | ||
132 | ret = jbd2_log_wait_commit(journal, commit_tid); | ||
133 | } else if (journal->j_flags & JBD2_BARRIER) | ||
134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | ||
135 | BLKDEV_IFL_WAIT); | ||
136 | return ret; | 224 | return ret; |
137 | } | 225 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 45853e0d1f21..21bb2f61e502 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -50,7 +50,7 @@ | |||
50 | * need to use it within a single byte (to ensure we get endianness right). | 50 | * need to use it within a single byte (to ensure we get endianness right). |
51 | * We can use memset for the rest of the bitmap as there are no other users. | 51 | * We can use memset for the rest of the bitmap as there are no other users. |
52 | */ | 52 | */ |
53 | void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | 53 | void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) |
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | 56 | ||
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | |||
65 | } | 65 | } |
66 | 66 | ||
67 | /* Initializes an uninitialized inode bitmap */ | 67 | /* Initializes an uninitialized inode bitmap */ |
68 | unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | 68 | static unsigned ext4_init_inode_bitmap(struct super_block *sb, |
69 | ext4_group_t block_group, | 69 | struct buffer_head *bh, |
70 | struct ext4_group_desc *gdp) | 70 | ext4_group_t block_group, |
71 | struct ext4_group_desc *gdp) | ||
71 | { | 72 | { |
72 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 73 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
73 | 74 | ||
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
85 | } | 86 | } |
86 | 87 | ||
87 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | 88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
88 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
89 | bh->b_data); | 90 | bh->b_data); |
90 | 91 | ||
91 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
107 | desc = ext4_get_group_desc(sb, block_group, NULL); | 108 | desc = ext4_get_group_desc(sb, block_group, NULL); |
108 | if (!desc) | 109 | if (!desc) |
109 | return NULL; | 110 | return NULL; |
111 | |||
110 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 112 | bitmap_blk = ext4_inode_bitmap(sb, desc); |
111 | bh = sb_getblk(sb, bitmap_blk); | 113 | bh = sb_getblk(sb, bitmap_blk); |
112 | if (unlikely(!bh)) { | 114 | if (unlikely(!bh)) { |
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
123 | unlock_buffer(bh); | 125 | unlock_buffer(bh); |
124 | return bh; | 126 | return bh; |
125 | } | 127 | } |
128 | |||
126 | ext4_lock_group(sb, block_group); | 129 | ext4_lock_group(sb, block_group); |
127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 130 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 131 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
133 | return bh; | 136 | return bh; |
134 | } | 137 | } |
135 | ext4_unlock_group(sb, block_group); | 138 | ext4_unlock_group(sb, block_group); |
139 | |||
136 | if (buffer_uptodate(bh)) { | 140 | if (buffer_uptodate(bh)) { |
137 | /* | 141 | /* |
138 | * if not uninit if bh is uptodate, | 142 | * if not uninit if bh is uptodate, |
@@ -148,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
148 | * We do it here so the bitmap uptodate bit | 152 | * We do it here so the bitmap uptodate bit |
149 | * get set with buffer lock held. | 153 | * get set with buffer lock held. |
150 | */ | 154 | */ |
155 | trace_ext4_load_inode_bitmap(sb, block_group); | ||
151 | set_bitmap_uptodate(bh); | 156 | set_bitmap_uptodate(bh); |
152 | if (bh_submit_read(bh) < 0) { | 157 | if (bh_submit_read(bh) < 0) { |
153 | put_bh(bh); | 158 | put_bh(bh); |
@@ -411,8 +416,8 @@ struct orlov_stats { | |||
411 | * for a particular block group or flex_bg. If flex_size is 1, then g | 416 | * for a particular block group or flex_bg. If flex_size is 1, then g |
412 | * is a block group number; otherwise it is flex_bg number. | 417 | * is a block group number; otherwise it is flex_bg number. |
413 | */ | 418 | */ |
414 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | 419 | static void get_orlov_stats(struct super_block *sb, ext4_group_t g, |
415 | int flex_size, struct orlov_stats *stats) | 420 | int flex_size, struct orlov_stats *stats) |
416 | { | 421 | { |
417 | struct ext4_group_desc *desc; | 422 | struct ext4_group_desc *desc; |
418 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; | 423 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; |
@@ -645,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
645 | *group = parent_group + flex_size; | 650 | *group = parent_group + flex_size; |
646 | if (*group > ngroups) | 651 | if (*group > ngroups) |
647 | *group = 0; | 652 | *group = 0; |
648 | return find_group_orlov(sb, parent, group, mode, 0); | 653 | return find_group_orlov(sb, parent, group, mode, NULL); |
649 | } | 654 | } |
650 | 655 | ||
651 | /* | 656 | /* |
@@ -712,8 +717,17 @@ static int ext4_claim_inode(struct super_block *sb, | |||
712 | { | 717 | { |
713 | int free = 0, retval = 0, count; | 718 | int free = 0, retval = 0, count; |
714 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 719 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
720 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
715 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 721 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
716 | 722 | ||
723 | /* | ||
724 | * We have to be sure that new inode allocation does not race with | ||
725 | * inode table initialization, because otherwise we may end up | ||
726 | * allocating and writing new inode right before sb_issue_zeroout | ||
727 | * takes place and overwriting our new inode with zeroes. So we | ||
728 | * take alloc_sem to prevent it. | ||
729 | */ | ||
730 | down_read(&grp->alloc_sem); | ||
717 | ext4_lock_group(sb, group); | 731 | ext4_lock_group(sb, group); |
718 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 732 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
719 | /* not a free inode */ | 733 | /* not a free inode */ |
@@ -724,6 +738,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
724 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 738 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
725 | ino > EXT4_INODES_PER_GROUP(sb)) { | 739 | ino > EXT4_INODES_PER_GROUP(sb)) { |
726 | ext4_unlock_group(sb, group); | 740 | ext4_unlock_group(sb, group); |
741 | up_read(&grp->alloc_sem); | ||
727 | ext4_error(sb, "reserved inode or inode > inodes count - " | 742 | ext4_error(sb, "reserved inode or inode > inodes count - " |
728 | "block_group = %u, inode=%lu", group, | 743 | "block_group = %u, inode=%lu", group, |
729 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 744 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
@@ -772,6 +787,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
772 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 787 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
773 | err_ret: | 788 | err_ret: |
774 | ext4_unlock_group(sb, group); | 789 | ext4_unlock_group(sb, group); |
790 | up_read(&grp->alloc_sem); | ||
775 | return retval; | 791 | return retval; |
776 | } | 792 | } |
777 | 793 | ||
@@ -1012,7 +1028,7 @@ got: | |||
1012 | inode->i_generation = sbi->s_next_generation++; | 1028 | inode->i_generation = sbi->s_next_generation++; |
1013 | spin_unlock(&sbi->s_next_gen_lock); | 1029 | spin_unlock(&sbi->s_next_gen_lock); |
1014 | 1030 | ||
1015 | ei->i_state_flags = 0; | 1031 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
1016 | ext4_set_inode_state(inode, EXT4_STATE_NEW); | 1032 | ext4_set_inode_state(inode, EXT4_STATE_NEW); |
1017 | 1033 | ||
1018 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 1034 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
@@ -1027,7 +1043,7 @@ got: | |||
1027 | if (err) | 1043 | if (err) |
1028 | goto fail_free_drop; | 1044 | goto fail_free_drop; |
1029 | 1045 | ||
1030 | err = ext4_init_security(handle, inode, dir); | 1046 | err = ext4_init_security(handle, inode, dir, qstr); |
1031 | if (err) | 1047 | if (err) |
1032 | goto fail_free_drop; | 1048 | goto fail_free_drop; |
1033 | 1049 | ||
@@ -1039,6 +1055,11 @@ got: | |||
1039 | } | 1055 | } |
1040 | } | 1056 | } |
1041 | 1057 | ||
1058 | if (ext4_handle_valid(handle)) { | ||
1059 | ei->i_sync_tid = handle->h_transaction->t_tid; | ||
1060 | ei->i_datasync_tid = handle->h_transaction->t_tid; | ||
1061 | } | ||
1062 | |||
1042 | err = ext4_mark_inode_dirty(handle, inode); | 1063 | err = ext4_mark_inode_dirty(handle, inode); |
1043 | if (err) { | 1064 | if (err) { |
1044 | ext4_std_error(sb, err); | 1065 | ext4_std_error(sb, err); |
@@ -1205,3 +1226,109 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1205 | } | 1226 | } |
1206 | return count; | 1227 | return count; |
1207 | } | 1228 | } |
1229 | |||
1230 | /* | ||
1231 | * Zeroes not yet zeroed inode table - just write zeroes through the whole | ||
1232 | * inode table. Must be called without any spinlock held. The only place | ||
1233 | * where it is called from on active part of filesystem is ext4lazyinit | ||
1234 | * thread, so we do not need any special locks, however we have to prevent | ||
1235 | * inode allocation from the current group, so we take alloc_sem lock, to | ||
1236 | * block ext4_claim_inode until we are finished. | ||
1237 | */ | ||
1238 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | ||
1239 | int barrier) | ||
1240 | { | ||
1241 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
1242 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1243 | struct ext4_group_desc *gdp = NULL; | ||
1244 | struct buffer_head *group_desc_bh; | ||
1245 | handle_t *handle; | ||
1246 | ext4_fsblk_t blk; | ||
1247 | int num, ret = 0, used_blks = 0; | ||
1248 | |||
1249 | /* This should not happen, but just to be sure check this */ | ||
1250 | if (sb->s_flags & MS_RDONLY) { | ||
1251 | ret = 1; | ||
1252 | goto out; | ||
1253 | } | ||
1254 | |||
1255 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | ||
1256 | if (!gdp) | ||
1257 | goto out; | ||
1258 | |||
1259 | /* | ||
1260 | * We do not need to lock this, because we are the only one | ||
1261 | * handling this flag. | ||
1262 | */ | ||
1263 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) | ||
1264 | goto out; | ||
1265 | |||
1266 | handle = ext4_journal_start_sb(sb, 1); | ||
1267 | if (IS_ERR(handle)) { | ||
1268 | ret = PTR_ERR(handle); | ||
1269 | goto out; | ||
1270 | } | ||
1271 | |||
1272 | down_write(&grp->alloc_sem); | ||
1273 | /* | ||
1274 | * If inode bitmap was already initialized there may be some | ||
1275 | * used inodes so we need to skip blocks with used inodes in | ||
1276 | * inode table. | ||
1277 | */ | ||
1278 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
1279 | used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - | ||
1280 | ext4_itable_unused_count(sb, gdp)), | ||
1281 | sbi->s_inodes_per_block); | ||
1282 | |||
1283 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | ||
1284 | ext4_error(sb, "Something is wrong with group %u\n" | ||
1285 | "Used itable blocks: %d" | ||
1286 | "itable unused count: %u\n", | ||
1287 | group, used_blks, | ||
1288 | ext4_itable_unused_count(sb, gdp)); | ||
1289 | ret = 1; | ||
1290 | goto out; | ||
1291 | } | ||
1292 | |||
1293 | blk = ext4_inode_table(sb, gdp) + used_blks; | ||
1294 | num = sbi->s_itb_per_group - used_blks; | ||
1295 | |||
1296 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
1297 | ret = ext4_journal_get_write_access(handle, | ||
1298 | group_desc_bh); | ||
1299 | if (ret) | ||
1300 | goto err_out; | ||
1301 | |||
1302 | /* | ||
1303 | * Skip zeroout if the inode table is full. But we set the ZEROED | ||
1304 | * flag anyway, because obviously, when it is full it does not need | ||
1305 | * further zeroing. | ||
1306 | */ | ||
1307 | if (unlikely(num == 0)) | ||
1308 | goto skip_zeroout; | ||
1309 | |||
1310 | ext4_debug("going to zero out inode table in group %d\n", | ||
1311 | group); | ||
1312 | ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); | ||
1313 | if (ret < 0) | ||
1314 | goto err_out; | ||
1315 | if (barrier) | ||
1316 | blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); | ||
1317 | |||
1318 | skip_zeroout: | ||
1319 | ext4_lock_group(sb, group); | ||
1320 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
1321 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
1322 | ext4_unlock_group(sb, group); | ||
1323 | |||
1324 | BUFFER_TRACE(group_desc_bh, | ||
1325 | "call ext4_handle_dirty_metadata"); | ||
1326 | ret = ext4_handle_dirty_metadata(handle, NULL, | ||
1327 | group_desc_bh); | ||
1328 | |||
1329 | err_out: | ||
1330 | up_write(&grp->alloc_sem); | ||
1331 | ext4_journal_stop(handle); | ||
1332 | out: | ||
1333 | return ret; | ||
1334 | } | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4b8debeb3965..e3126c051006 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -39,7 +39,9 @@ | |||
39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> |
40 | #include <linux/workqueue.h> | 40 | #include <linux/workqueue.h> |
41 | #include <linux/kernel.h> | 41 | #include <linux/kernel.h> |
42 | #include <linux/printk.h> | ||
42 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/ratelimit.h> | ||
43 | 45 | ||
44 | #include "ext4_jbd2.h" | 46 | #include "ext4_jbd2.h" |
45 | #include "xattr.h" | 47 | #include "xattr.h" |
@@ -53,13 +55,27 @@ | |||
53 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 55 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
54 | loff_t new_size) | 56 | loff_t new_size) |
55 | { | 57 | { |
56 | return jbd2_journal_begin_ordered_truncate( | 58 | trace_ext4_begin_ordered_truncate(inode, new_size); |
57 | EXT4_SB(inode->i_sb)->s_journal, | 59 | /* |
58 | &EXT4_I(inode)->jinode, | 60 | * If jinode is zero, then we never opened the file for |
59 | new_size); | 61 | * writing, so there's no need to call |
62 | * jbd2_journal_begin_ordered_truncate() since there's no | ||
63 | * outstanding writes we need to flush. | ||
64 | */ | ||
65 | if (!EXT4_I(inode)->jinode) | ||
66 | return 0; | ||
67 | return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), | ||
68 | EXT4_I(inode)->jinode, | ||
69 | new_size); | ||
60 | } | 70 | } |
61 | 71 | ||
62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 72 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
73 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
74 | struct buffer_head *bh_result, int create); | ||
75 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
76 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
77 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | ||
78 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | ||
63 | 79 | ||
64 | /* | 80 | /* |
65 | * Test whether an inode is a fast symlink. | 81 | * Test whether an inode is a fast symlink. |
@@ -157,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
157 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
158 | jbd_debug(2, "restarting handle %p\n", handle); | 174 | jbd_debug(2, "restarting handle %p\n", handle); |
159 | up_write(&EXT4_I(inode)->i_data_sem); | 175 | up_write(&EXT4_I(inode)->i_data_sem); |
160 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | 176 | ret = ext4_journal_restart(handle, nblocks); |
161 | down_write(&EXT4_I(inode)->i_data_sem); | 177 | down_write(&EXT4_I(inode)->i_data_sem); |
162 | ext4_discard_preallocations(inode); | 178 | ext4_discard_preallocations(inode); |
163 | 179 | ||
@@ -172,6 +188,7 @@ void ext4_evict_inode(struct inode *inode) | |||
172 | handle_t *handle; | 188 | handle_t *handle; |
173 | int err; | 189 | int err; |
174 | 190 | ||
191 | trace_ext4_evict_inode(inode); | ||
175 | if (inode->i_nlink) { | 192 | if (inode->i_nlink) { |
176 | truncate_inode_pages(&inode->i_data, 0); | 193 | truncate_inode_pages(&inode->i_data, 0); |
177 | goto no_delete; | 194 | goto no_delete; |
@@ -544,7 +561,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | |||
544 | } | 561 | } |
545 | 562 | ||
546 | /** | 563 | /** |
547 | * ext4_blks_to_allocate: Look up the block map and count the number | 564 | * ext4_blks_to_allocate - Look up the block map and count the number |
548 | * of direct blocks need to be allocated for the given branch. | 565 | * of direct blocks need to be allocated for the given branch. |
549 | * | 566 | * |
550 | * @branch: chain of indirect blocks | 567 | * @branch: chain of indirect blocks |
@@ -583,13 +600,19 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | |||
583 | 600 | ||
584 | /** | 601 | /** |
585 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch | 602 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch |
603 | * @handle: handle for this transaction | ||
604 | * @inode: inode which needs allocated blocks | ||
605 | * @iblock: the logical block to start allocated at | ||
606 | * @goal: preferred physical block of allocation | ||
586 | * @indirect_blks: the number of blocks need to allocate for indirect | 607 | * @indirect_blks: the number of blocks need to allocate for indirect |
587 | * blocks | 608 | * blocks |
588 | * | 609 | * @blks: number of desired blocks |
589 | * @new_blocks: on return it will store the new block numbers for | 610 | * @new_blocks: on return it will store the new block numbers for |
590 | * the indirect blocks(if needed) and the first direct block, | 611 | * the indirect blocks(if needed) and the first direct block, |
591 | * @blks: on return it will store the total number of allocated | 612 | * @err: on return it will store the error code |
592 | * direct blocks | 613 | * |
614 | * This function will return the number of blocks allocated as | ||
615 | * requested by the passed-in parameters. | ||
593 | */ | 616 | */ |
594 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | 617 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, |
595 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 618 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
@@ -616,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
616 | while (target > 0) { | 639 | while (target > 0) { |
617 | count = target; | 640 | count = target; |
618 | /* allocating blocks for indirect blocks and direct blocks */ | 641 | /* allocating blocks for indirect blocks and direct blocks */ |
619 | current_block = ext4_new_meta_blocks(handle, inode, | 642 | current_block = ext4_new_meta_blocks(handle, inode, goal, |
620 | goal, &count, err); | 643 | 0, &count, err); |
621 | if (*err) | 644 | if (*err) |
622 | goto failed_out; | 645 | goto failed_out; |
623 | 646 | ||
@@ -697,15 +720,17 @@ allocated: | |||
697 | return ret; | 720 | return ret; |
698 | failed_out: | 721 | failed_out: |
699 | for (i = 0; i < index; i++) | 722 | for (i = 0; i < index; i++) |
700 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
701 | return ret; | 724 | return ret; |
702 | } | 725 | } |
703 | 726 | ||
704 | /** | 727 | /** |
705 | * ext4_alloc_branch - allocate and set up a chain of blocks. | 728 | * ext4_alloc_branch - allocate and set up a chain of blocks. |
729 | * @handle: handle for this transaction | ||
706 | * @inode: owner | 730 | * @inode: owner |
707 | * @indirect_blks: number of allocated indirect blocks | 731 | * @indirect_blks: number of allocated indirect blocks |
708 | * @blks: number of allocated direct blocks | 732 | * @blks: number of allocated direct blocks |
733 | * @goal: preferred place for allocation | ||
709 | * @offsets: offsets (in the blocks) to store the pointers to next. | 734 | * @offsets: offsets (in the blocks) to store the pointers to next. |
710 | * @branch: place to store the chain in. | 735 | * @branch: place to store the chain in. |
711 | * | 736 | * |
@@ -755,6 +780,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
755 | * parent to disk. | 780 | * parent to disk. |
756 | */ | 781 | */ |
757 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 782 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
783 | if (unlikely(!bh)) { | ||
784 | err = -EIO; | ||
785 | goto failed; | ||
786 | } | ||
787 | |||
758 | branch[n].bh = bh; | 788 | branch[n].bh = bh; |
759 | lock_buffer(bh); | 789 | lock_buffer(bh); |
760 | BUFFER_TRACE(bh, "call get_create_access"); | 790 | BUFFER_TRACE(bh, "call get_create_access"); |
@@ -793,26 +823,27 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
793 | return err; | 823 | return err; |
794 | failed: | 824 | failed: |
795 | /* Allocation failed, free what we already allocated */ | 825 | /* Allocation failed, free what we already allocated */ |
796 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); |
797 | for (i = 1; i <= n ; i++) { | 827 | for (i = 1; i <= n ; i++) { |
798 | /* | 828 | /* |
799 | * branch[i].bh is newly allocated, so there is no | 829 | * branch[i].bh is newly allocated, so there is no |
800 | * need to revoke the block, which is why we don't | 830 | * need to revoke the block, which is why we don't |
801 | * need to set EXT4_FREE_BLOCKS_METADATA. | 831 | * need to set EXT4_FREE_BLOCKS_METADATA. |
802 | */ | 832 | */ |
803 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, | 833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, |
804 | EXT4_FREE_BLOCKS_FORGET); | 834 | EXT4_FREE_BLOCKS_FORGET); |
805 | } | 835 | } |
806 | for (i = n+1; i < indirect_blks; i++) | 836 | for (i = n+1; i < indirect_blks; i++) |
807 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
808 | 838 | ||
809 | ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); | 839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); |
810 | 840 | ||
811 | return err; | 841 | return err; |
812 | } | 842 | } |
813 | 843 | ||
814 | /** | 844 | /** |
815 | * ext4_splice_branch - splice the allocated branch onto inode. | 845 | * ext4_splice_branch - splice the allocated branch onto inode. |
846 | * @handle: handle for this transaction | ||
816 | * @inode: owner | 847 | * @inode: owner |
817 | * @block: (logical) number of block we are adding | 848 | * @block: (logical) number of block we are adding |
818 | * @chain: chain of indirect blocks (with a missing link - see | 849 | * @chain: chain of indirect blocks (with a missing link - see |
@@ -893,7 +924,7 @@ err_out: | |||
893 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, |
894 | EXT4_FREE_BLOCKS_FORGET); | 925 | EXT4_FREE_BLOCKS_FORGET); |
895 | } | 926 | } |
896 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), | 927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), |
897 | blks, 0); | 928 | blks, 0); |
898 | 929 | ||
899 | return err; | 930 | return err; |
@@ -942,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
942 | int count = 0; | 973 | int count = 0; |
943 | ext4_fsblk_t first_block = 0; | 974 | ext4_fsblk_t first_block = 0; |
944 | 975 | ||
976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
945 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | 977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
946 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
947 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | 979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
@@ -1027,6 +1059,8 @@ cleanup: | |||
1027 | partial--; | 1059 | partial--; |
1028 | } | 1060 | } |
1029 | out: | 1061 | out: |
1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
1063 | map->m_pblk, map->m_len, err); | ||
1030 | return err; | 1064 | return err; |
1031 | } | 1065 | } |
1032 | 1066 | ||
@@ -1068,7 +1102,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, | |||
1068 | * Calculate the number of metadata blocks need to reserve | 1102 | * Calculate the number of metadata blocks need to reserve |
1069 | * to allocate a block located at @lblock | 1103 | * to allocate a block located at @lblock |
1070 | */ | 1104 | */ |
1071 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) | 1105 | static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) |
1072 | { | 1106 | { |
1073 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 1107 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1074 | return ext4_ext_calc_metadata_amount(inode, lblock); | 1108 | return ext4_ext_calc_metadata_amount(inode, lblock); |
@@ -1207,8 +1241,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1207 | break; | 1241 | break; |
1208 | idx++; | 1242 | idx++; |
1209 | num++; | 1243 | num++; |
1210 | if (num >= max_pages) | 1244 | if (num >= max_pages) { |
1245 | done = 1; | ||
1211 | break; | 1246 | break; |
1247 | } | ||
1212 | } | 1248 | } |
1213 | pagevec_release(&pvec); | 1249 | pagevec_release(&pvec); |
1214 | } | 1250 | } |
@@ -1305,7 +1341,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1305 | * avoid double accounting | 1341 | * avoid double accounting |
1306 | */ | 1342 | */ |
1307 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 1343 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1308 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | 1344 | ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
1309 | /* | 1345 | /* |
1310 | * We need to check for EXT4 here because migrate | 1346 | * We need to check for EXT4 here because migrate |
1311 | * could have changed the inode type in between | 1347 | * could have changed the inode type in between |
@@ -1335,7 +1371,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1335 | ext4_da_update_reserve_space(inode, retval, 1); | 1371 | ext4_da_update_reserve_space(inode, retval, 1); |
1336 | } | 1372 | } |
1337 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 1373 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1338 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1374 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
1339 | 1375 | ||
1340 | up_write((&EXT4_I(inode)->i_data_sem)); | 1376 | up_write((&EXT4_I(inode)->i_data_sem)); |
1341 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1377 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
@@ -1538,10 +1574,10 @@ static int do_journal_get_write_access(handle_t *handle, | |||
1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1574 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1539 | return 0; | 1575 | return 0; |
1540 | /* | 1576 | /* |
1541 | * __block_prepare_write() could have dirtied some buffers. Clean | 1577 | * __block_write_begin() could have dirtied some buffers. Clean |
1542 | * the dirty bit as jbd2_journal_get_write_access() could complain | 1578 | * the dirty bit as jbd2_journal_get_write_access() could complain |
1543 | * otherwise about fs integrity issues. Setting of the dirty bit | 1579 | * otherwise about fs integrity issues. Setting of the dirty bit |
1544 | * by __block_prepare_write() isn't a real problem here as we clear | 1580 | * by __block_write_begin() isn't a real problem here as we clear |
1545 | * the bit before releasing a page lock and thus writeback cannot | 1581 | * the bit before releasing a page lock and thus writeback cannot |
1546 | * ever write the buffer. | 1582 | * ever write the buffer. |
1547 | */ | 1583 | */ |
@@ -1863,7 +1899,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1863 | /* | 1899 | /* |
1864 | * Reserve a single block located at lblock | 1900 | * Reserve a single block located at lblock |
1865 | */ | 1901 | */ |
1866 | static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | 1902 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1867 | { | 1903 | { |
1868 | int retries = 0; | 1904 | int retries = 0; |
1869 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1905 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -1894,7 +1930,7 @@ repeat: | |||
1894 | * We do still charge estimated metadata to the sb though; | 1930 | * We do still charge estimated metadata to the sb though; |
1895 | * we cannot afford to run out of free blocks. | 1931 | * we cannot afford to run out of free blocks. |
1896 | */ | 1932 | */ |
1897 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1933 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { |
1898 | dquot_release_reservation_block(inode, 1); | 1934 | dquot_release_reservation_block(inode, 1); |
1899 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1935 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1900 | yield(); | 1936 | yield(); |
@@ -1995,16 +2031,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1995 | * | 2031 | * |
1996 | * As pages are already locked by write_cache_pages(), we can't use it | 2032 | * As pages are already locked by write_cache_pages(), we can't use it |
1997 | */ | 2033 | */ |
1998 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 2034 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
2035 | struct ext4_map_blocks *map) | ||
1999 | { | 2036 | { |
2000 | long pages_skipped; | ||
2001 | struct pagevec pvec; | 2037 | struct pagevec pvec; |
2002 | unsigned long index, end; | 2038 | unsigned long index, end; |
2003 | int ret = 0, err, nr_pages, i; | 2039 | int ret = 0, err, nr_pages, i; |
2004 | struct inode *inode = mpd->inode; | 2040 | struct inode *inode = mpd->inode; |
2005 | struct address_space *mapping = inode->i_mapping; | 2041 | struct address_space *mapping = inode->i_mapping; |
2042 | loff_t size = i_size_read(inode); | ||
2043 | unsigned int len, block_start; | ||
2044 | struct buffer_head *bh, *page_bufs = NULL; | ||
2045 | int journal_data = ext4_should_journal_data(inode); | ||
2046 | sector_t pblock = 0, cur_logical = 0; | ||
2047 | struct ext4_io_submit io_submit; | ||
2006 | 2048 | ||
2007 | BUG_ON(mpd->next_page <= mpd->first_page); | 2049 | BUG_ON(mpd->next_page <= mpd->first_page); |
2050 | memset(&io_submit, 0, sizeof(io_submit)); | ||
2008 | /* | 2051 | /* |
2009 | * We need to start from the first_page to the next_page - 1 | 2052 | * We need to start from the first_page to the next_page - 1 |
2010 | * to make sure we also write the mapped dirty buffer_heads. | 2053 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -2020,124 +2063,111 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2020 | if (nr_pages == 0) | 2063 | if (nr_pages == 0) |
2021 | break; | 2064 | break; |
2022 | for (i = 0; i < nr_pages; i++) { | 2065 | for (i = 0; i < nr_pages; i++) { |
2066 | int commit_write = 0, skip_page = 0; | ||
2023 | struct page *page = pvec.pages[i]; | 2067 | struct page *page = pvec.pages[i]; |
2024 | 2068 | ||
2025 | index = page->index; | 2069 | index = page->index; |
2026 | if (index > end) | 2070 | if (index > end) |
2027 | break; | 2071 | break; |
2072 | |||
2073 | if (index == size >> PAGE_CACHE_SHIFT) | ||
2074 | len = size & ~PAGE_CACHE_MASK; | ||
2075 | else | ||
2076 | len = PAGE_CACHE_SIZE; | ||
2077 | if (map) { | ||
2078 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
2079 | inode->i_blkbits); | ||
2080 | pblock = map->m_pblk + (cur_logical - | ||
2081 | map->m_lblk); | ||
2082 | } | ||
2028 | index++; | 2083 | index++; |
2029 | 2084 | ||
2030 | BUG_ON(!PageLocked(page)); | 2085 | BUG_ON(!PageLocked(page)); |
2031 | BUG_ON(PageWriteback(page)); | 2086 | BUG_ON(PageWriteback(page)); |
2032 | 2087 | ||
2033 | pages_skipped = mpd->wbc->pages_skipped; | ||
2034 | err = mapping->a_ops->writepage(page, mpd->wbc); | ||
2035 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) | ||
2036 | /* | ||
2037 | * have successfully written the page | ||
2038 | * without skipping the same | ||
2039 | */ | ||
2040 | mpd->pages_written++; | ||
2041 | /* | 2088 | /* |
2042 | * In error case, we have to continue because | 2089 | * If the page does not have buffers (for |
2043 | * remaining pages are still locked | 2090 | * whatever reason), try to create them using |
2044 | * XXX: unlock and re-dirty them? | 2091 | * __block_write_begin. If this fails, |
2092 | * skip the page and move on. | ||
2045 | */ | 2093 | */ |
2046 | if (ret == 0) | 2094 | if (!page_has_buffers(page)) { |
2047 | ret = err; | 2095 | if (__block_write_begin(page, 0, len, |
2048 | } | 2096 | noalloc_get_block_write)) { |
2049 | pagevec_release(&pvec); | 2097 | skip_page: |
2050 | } | 2098 | unlock_page(page); |
2051 | return ret; | 2099 | continue; |
2052 | } | 2100 | } |
2053 | 2101 | commit_write = 1; | |
2054 | /* | 2102 | } |
2055 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | ||
2056 | * | ||
2057 | * the function goes through all passed space and put actual disk | ||
2058 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | ||
2059 | */ | ||
2060 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, | ||
2061 | struct ext4_map_blocks *map) | ||
2062 | { | ||
2063 | struct inode *inode = mpd->inode; | ||
2064 | struct address_space *mapping = inode->i_mapping; | ||
2065 | int blocks = map->m_len; | ||
2066 | sector_t pblock = map->m_pblk, cur_logical; | ||
2067 | struct buffer_head *head, *bh; | ||
2068 | pgoff_t index, end; | ||
2069 | struct pagevec pvec; | ||
2070 | int nr_pages, i; | ||
2071 | |||
2072 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2073 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2074 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2075 | |||
2076 | pagevec_init(&pvec, 0); | ||
2077 | |||
2078 | while (index <= end) { | ||
2079 | /* XXX: optimize tail */ | ||
2080 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
2081 | if (nr_pages == 0) | ||
2082 | break; | ||
2083 | for (i = 0; i < nr_pages; i++) { | ||
2084 | struct page *page = pvec.pages[i]; | ||
2085 | |||
2086 | index = page->index; | ||
2087 | if (index > end) | ||
2088 | break; | ||
2089 | index++; | ||
2090 | |||
2091 | BUG_ON(!PageLocked(page)); | ||
2092 | BUG_ON(PageWriteback(page)); | ||
2093 | BUG_ON(!page_has_buffers(page)); | ||
2094 | |||
2095 | bh = page_buffers(page); | ||
2096 | head = bh; | ||
2097 | |||
2098 | /* skip blocks out of the range */ | ||
2099 | do { | ||
2100 | if (cur_logical >= map->m_lblk) | ||
2101 | break; | ||
2102 | cur_logical++; | ||
2103 | } while ((bh = bh->b_this_page) != head); | ||
2104 | 2103 | ||
2104 | bh = page_bufs = page_buffers(page); | ||
2105 | block_start = 0; | ||
2105 | do { | 2106 | do { |
2106 | if (cur_logical >= map->m_lblk + blocks) | 2107 | if (!bh) |
2107 | break; | 2108 | goto skip_page; |
2108 | 2109 | if (map && (cur_logical >= map->m_lblk) && | |
2109 | if (buffer_delay(bh) || buffer_unwritten(bh)) { | 2110 | (cur_logical <= (map->m_lblk + |
2110 | 2111 | (map->m_len - 1)))) { | |
2111 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | ||
2112 | |||
2113 | if (buffer_delay(bh)) { | 2112 | if (buffer_delay(bh)) { |
2114 | clear_buffer_delay(bh); | 2113 | clear_buffer_delay(bh); |
2115 | bh->b_blocknr = pblock; | 2114 | bh->b_blocknr = pblock; |
2116 | } else { | ||
2117 | /* | ||
2118 | * unwritten already should have | ||
2119 | * blocknr assigned. Verify that | ||
2120 | */ | ||
2121 | clear_buffer_unwritten(bh); | ||
2122 | BUG_ON(bh->b_blocknr != pblock); | ||
2123 | } | 2115 | } |
2116 | if (buffer_unwritten(bh) || | ||
2117 | buffer_mapped(bh)) | ||
2118 | BUG_ON(bh->b_blocknr != pblock); | ||
2119 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
2120 | set_buffer_uninit(bh); | ||
2121 | clear_buffer_unwritten(bh); | ||
2122 | } | ||
2124 | 2123 | ||
2125 | } else if (buffer_mapped(bh)) | 2124 | /* skip page if block allocation undone */ |
2126 | BUG_ON(bh->b_blocknr != pblock); | 2125 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2127 | 2126 | skip_page = 1; | |
2128 | if (map->m_flags & EXT4_MAP_UNINIT) | 2127 | bh = bh->b_this_page; |
2129 | set_buffer_uninit(bh); | 2128 | block_start += bh->b_size; |
2130 | cur_logical++; | 2129 | cur_logical++; |
2131 | pblock++; | 2130 | pblock++; |
2132 | } while ((bh = bh->b_this_page) != head); | 2131 | } while (bh != page_bufs); |
2132 | |||
2133 | if (skip_page) | ||
2134 | goto skip_page; | ||
2135 | |||
2136 | if (commit_write) | ||
2137 | /* mark the buffer_heads as dirty & uptodate */ | ||
2138 | block_commit_write(page, 0, len); | ||
2139 | |||
2140 | clear_page_dirty_for_io(page); | ||
2141 | /* | ||
2142 | * Delalloc doesn't support data journalling, | ||
2143 | * but eventually maybe we'll lift this | ||
2144 | * restriction. | ||
2145 | */ | ||
2146 | if (unlikely(journal_data && PageChecked(page))) | ||
2147 | err = __ext4_journalled_writepage(page, len); | ||
2148 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) | ||
2149 | err = ext4_bio_write_page(&io_submit, page, | ||
2150 | len, mpd->wbc); | ||
2151 | else | ||
2152 | err = block_write_full_page(page, | ||
2153 | noalloc_get_block_write, mpd->wbc); | ||
2154 | |||
2155 | if (!err) | ||
2156 | mpd->pages_written++; | ||
2157 | /* | ||
2158 | * In error case, we have to continue because | ||
2159 | * remaining pages are still locked | ||
2160 | */ | ||
2161 | if (ret == 0) | ||
2162 | ret = err; | ||
2133 | } | 2163 | } |
2134 | pagevec_release(&pvec); | 2164 | pagevec_release(&pvec); |
2135 | } | 2165 | } |
2166 | ext4_io_submit(&io_submit); | ||
2167 | return ret; | ||
2136 | } | 2168 | } |
2137 | 2169 | ||
2138 | 2170 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |
2139 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | ||
2140 | sector_t logical, long blk_cnt) | ||
2141 | { | 2171 | { |
2142 | int nr_pages, i; | 2172 | int nr_pages, i; |
2143 | pgoff_t index, end; | 2173 | pgoff_t index, end; |
@@ -2145,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2145 | struct inode *inode = mpd->inode; | 2175 | struct inode *inode = mpd->inode; |
2146 | struct address_space *mapping = inode->i_mapping; | 2176 | struct address_space *mapping = inode->i_mapping; |
2147 | 2177 | ||
2148 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2178 | index = mpd->first_page; |
2149 | end = (logical + blk_cnt - 1) >> | 2179 | end = mpd->next_page - 1; |
2150 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2151 | while (index <= end) { | 2180 | while (index <= end) { |
2152 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
2153 | if (nr_pages == 0) | 2182 | if (nr_pages == 0) |
@@ -2187,35 +2216,32 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2187 | } | 2216 | } |
2188 | 2217 | ||
2189 | /* | 2218 | /* |
2190 | * mpage_da_map_blocks - go through given space | 2219 | * mpage_da_map_and_submit - go through given space, map them |
2220 | * if necessary, and then submit them for I/O | ||
2191 | * | 2221 | * |
2192 | * @mpd - bh describing space | 2222 | * @mpd - bh describing space |
2193 | * | 2223 | * |
2194 | * The function skips space we know is already mapped to disk blocks. | 2224 | * The function skips space we know is already mapped to disk blocks. |
2195 | * | 2225 | * |
2196 | */ | 2226 | */ |
2197 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2227 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
2198 | { | 2228 | { |
2199 | int err, blks, get_blocks_flags; | 2229 | int err, blks, get_blocks_flags; |
2200 | struct ext4_map_blocks map; | 2230 | struct ext4_map_blocks map, *mapp = NULL; |
2201 | sector_t next = mpd->b_blocknr; | 2231 | sector_t next = mpd->b_blocknr; |
2202 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2232 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2203 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2233 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
2204 | handle_t *handle = NULL; | 2234 | handle_t *handle = NULL; |
2205 | 2235 | ||
2206 | /* | 2236 | /* |
2207 | * We consider only non-mapped and non-allocated blocks | 2237 | * If the blocks are mapped already, or we couldn't accumulate |
2238 | * any blocks, then proceed immediately to the submission stage. | ||
2208 | */ | 2239 | */ |
2209 | if ((mpd->b_state & (1 << BH_Mapped)) && | 2240 | if ((mpd->b_size == 0) || |
2210 | !(mpd->b_state & (1 << BH_Delay)) && | 2241 | ((mpd->b_state & (1 << BH_Mapped)) && |
2211 | !(mpd->b_state & (1 << BH_Unwritten))) | 2242 | !(mpd->b_state & (1 << BH_Delay)) && |
2212 | return 0; | 2243 | !(mpd->b_state & (1 << BH_Unwritten)))) |
2213 | 2244 | goto submit_io; | |
2214 | /* | ||
2215 | * If we didn't accumulate anything to write simply return | ||
2216 | */ | ||
2217 | if (!mpd->b_size) | ||
2218 | return 0; | ||
2219 | 2245 | ||
2220 | handle = ext4_journal_current_handle(); | 2246 | handle = ext4_journal_current_handle(); |
2221 | BUG_ON(!handle); | 2247 | BUG_ON(!handle); |
@@ -2231,7 +2257,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2231 | * affects functions in many different parts of the allocation | 2257 | * affects functions in many different parts of the allocation |
2232 | * call path. This flag exists primarily because we don't | 2258 | * call path. This flag exists primarily because we don't |
2233 | * want to change *many* call functions, so ext4_map_blocks() | 2259 | * want to change *many* call functions, so ext4_map_blocks() |
2234 | * will set the magic i_delalloc_reserved_flag once the | 2260 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the |
2235 | * inode's allocation semaphore is taken. | 2261 | * inode's allocation semaphore is taken. |
2236 | * | 2262 | * |
2237 | * If the blocks in questions were delalloc blocks, set | 2263 | * If the blocks in questions were delalloc blocks, set |
@@ -2252,17 +2278,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2252 | 2278 | ||
2253 | err = blks; | 2279 | err = blks; |
2254 | /* | 2280 | /* |
2255 | * If get block returns with error we simply | 2281 | * If get block returns EAGAIN or ENOSPC and there |
2256 | * return. Later writepage will redirty the page and | 2282 | * appears to be free blocks we will just let |
2257 | * writepages will find the dirty page again | 2283 | * mpage_da_submit_io() unlock all of the pages. |
2258 | */ | 2284 | */ |
2259 | if (err == -EAGAIN) | 2285 | if (err == -EAGAIN) |
2260 | return 0; | 2286 | goto submit_io; |
2261 | 2287 | ||
2262 | if (err == -ENOSPC && | 2288 | if (err == -ENOSPC && |
2263 | ext4_count_free_blocks(sb)) { | 2289 | ext4_count_free_blocks(sb)) { |
2264 | mpd->retval = err; | 2290 | mpd->retval = err; |
2265 | return 0; | 2291 | goto submit_io; |
2266 | } | 2292 | } |
2267 | 2293 | ||
2268 | /* | 2294 | /* |
@@ -2285,12 +2311,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2285 | ext4_print_free_blocks(mpd->inode); | 2311 | ext4_print_free_blocks(mpd->inode); |
2286 | } | 2312 | } |
2287 | /* invalidate all the pages */ | 2313 | /* invalidate all the pages */ |
2288 | ext4_da_block_invalidatepages(mpd, next, | 2314 | ext4_da_block_invalidatepages(mpd); |
2289 | mpd->b_size >> mpd->inode->i_blkbits); | 2315 | |
2290 | return err; | 2316 | /* Mark this page range as having been completed */ |
2317 | mpd->io_done = 1; | ||
2318 | return; | ||
2291 | } | 2319 | } |
2292 | BUG_ON(blks == 0); | 2320 | BUG_ON(blks == 0); |
2293 | 2321 | ||
2322 | mapp = ↦ | ||
2294 | if (map.m_flags & EXT4_MAP_NEW) { | 2323 | if (map.m_flags & EXT4_MAP_NEW) { |
2295 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2324 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
2296 | int i; | 2325 | int i; |
@@ -2299,18 +2328,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2299 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2328 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2300 | } | 2329 | } |
2301 | 2330 | ||
2302 | /* | ||
2303 | * If blocks are delayed marked, we need to | ||
2304 | * put actual blocknr and drop delayed bit | ||
2305 | */ | ||
2306 | if ((mpd->b_state & (1 << BH_Delay)) || | ||
2307 | (mpd->b_state & (1 << BH_Unwritten))) | ||
2308 | mpage_put_bnr_to_bhs(mpd, &map); | ||
2309 | |||
2310 | if (ext4_should_order_data(mpd->inode)) { | 2331 | if (ext4_should_order_data(mpd->inode)) { |
2311 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2332 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
2312 | if (err) | 2333 | if (err) |
2313 | return err; | 2334 | /* This only happens if the journal is aborted */ |
2335 | return; | ||
2314 | } | 2336 | } |
2315 | 2337 | ||
2316 | /* | 2338 | /* |
@@ -2321,10 +2343,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2321 | disksize = i_size_read(mpd->inode); | 2343 | disksize = i_size_read(mpd->inode); |
2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2344 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
2323 | ext4_update_i_disksize(mpd->inode, disksize); | 2345 | ext4_update_i_disksize(mpd->inode, disksize); |
2324 | return ext4_mark_inode_dirty(handle, mpd->inode); | 2346 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
2347 | if (err) | ||
2348 | ext4_error(mpd->inode->i_sb, | ||
2349 | "Failed to mark inode %lu dirty", | ||
2350 | mpd->inode->i_ino); | ||
2325 | } | 2351 | } |
2326 | 2352 | ||
2327 | return 0; | 2353 | submit_io: |
2354 | mpage_da_submit_io(mpd, mapp); | ||
2355 | mpd->io_done = 1; | ||
2328 | } | 2356 | } |
2329 | 2357 | ||
2330 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2358 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -2401,9 +2429,7 @@ flush_it: | |||
2401 | * We couldn't merge the block to our extent, so we | 2429 | * We couldn't merge the block to our extent, so we |
2402 | * need to flush current extent and start new one | 2430 | * need to flush current extent and start new one |
2403 | */ | 2431 | */ |
2404 | if (mpage_da_map_blocks(mpd) == 0) | 2432 | mpage_da_map_and_submit(mpd); |
2405 | mpage_da_submit_io(mpd); | ||
2406 | mpd->io_done = 1; | ||
2407 | return; | 2433 | return; |
2408 | } | 2434 | } |
2409 | 2435 | ||
@@ -2413,104 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2413 | } | 2439 | } |
2414 | 2440 | ||
2415 | /* | 2441 | /* |
2416 | * __mpage_da_writepage - finds extent of pages and blocks | ||
2417 | * | ||
2418 | * @page: page to consider | ||
2419 | * @wbc: not used, we just follow rules | ||
2420 | * @data: context | ||
2421 | * | ||
2422 | * The function finds extents of pages and scan them for all blocks. | ||
2423 | */ | ||
2424 | static int __mpage_da_writepage(struct page *page, | ||
2425 | struct writeback_control *wbc, void *data) | ||
2426 | { | ||
2427 | struct mpage_da_data *mpd = data; | ||
2428 | struct inode *inode = mpd->inode; | ||
2429 | struct buffer_head *bh, *head; | ||
2430 | sector_t logical; | ||
2431 | |||
2432 | /* | ||
2433 | * Can we merge this page to current extent? | ||
2434 | */ | ||
2435 | if (mpd->next_page != page->index) { | ||
2436 | /* | ||
2437 | * Nope, we can't. So, we map non-allocated blocks | ||
2438 | * and start IO on them using writepage() | ||
2439 | */ | ||
2440 | if (mpd->next_page != mpd->first_page) { | ||
2441 | if (mpage_da_map_blocks(mpd) == 0) | ||
2442 | mpage_da_submit_io(mpd); | ||
2443 | /* | ||
2444 | * skip rest of the page in the page_vec | ||
2445 | */ | ||
2446 | mpd->io_done = 1; | ||
2447 | redirty_page_for_writepage(wbc, page); | ||
2448 | unlock_page(page); | ||
2449 | return MPAGE_DA_EXTENT_TAIL; | ||
2450 | } | ||
2451 | |||
2452 | /* | ||
2453 | * Start next extent of pages ... | ||
2454 | */ | ||
2455 | mpd->first_page = page->index; | ||
2456 | |||
2457 | /* | ||
2458 | * ... and blocks | ||
2459 | */ | ||
2460 | mpd->b_size = 0; | ||
2461 | mpd->b_state = 0; | ||
2462 | mpd->b_blocknr = 0; | ||
2463 | } | ||
2464 | |||
2465 | mpd->next_page = page->index + 1; | ||
2466 | logical = (sector_t) page->index << | ||
2467 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2468 | |||
2469 | if (!page_has_buffers(page)) { | ||
2470 | mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE, | ||
2471 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2472 | if (mpd->io_done) | ||
2473 | return MPAGE_DA_EXTENT_TAIL; | ||
2474 | } else { | ||
2475 | /* | ||
2476 | * Page with regular buffer heads, just add all dirty ones | ||
2477 | */ | ||
2478 | head = page_buffers(page); | ||
2479 | bh = head; | ||
2480 | do { | ||
2481 | BUG_ON(buffer_locked(bh)); | ||
2482 | /* | ||
2483 | * We need to try to allocate | ||
2484 | * unmapped blocks in the same page. | ||
2485 | * Otherwise we won't make progress | ||
2486 | * with the page in ext4_writepage | ||
2487 | */ | ||
2488 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2489 | mpage_add_bh_to_extent(mpd, logical, | ||
2490 | bh->b_size, | ||
2491 | bh->b_state); | ||
2492 | if (mpd->io_done) | ||
2493 | return MPAGE_DA_EXTENT_TAIL; | ||
2494 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2495 | /* | ||
2496 | * mapped dirty buffer. We need to update | ||
2497 | * the b_state because we look at | ||
2498 | * b_state in mpage_da_map_blocks. We don't | ||
2499 | * update b_size because if we find an | ||
2500 | * unmapped buffer_head later we need to | ||
2501 | * use the b_state flag of that buffer_head. | ||
2502 | */ | ||
2503 | if (mpd->b_size == 0) | ||
2504 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2505 | } | ||
2506 | logical++; | ||
2507 | } while ((bh = bh->b_this_page) != head); | ||
2508 | } | ||
2509 | |||
2510 | return 0; | ||
2511 | } | ||
2512 | |||
2513 | /* | ||
2514 | * This is a special get_blocks_t callback which is used by | 2442 | * This is a special get_blocks_t callback which is used by |
2515 | * ext4_da_write_begin(). It will either return mapped block or | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2516 | * reserve space for a single block. | 2444 | * reserve space for a single block. |
@@ -2550,8 +2478,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2550 | if (buffer_delay(bh)) | 2478 | if (buffer_delay(bh)) |
2551 | return 0; /* Not sure this could or should happen */ | 2479 | return 0; /* Not sure this could or should happen */ |
2552 | /* | 2480 | /* |
2553 | * XXX: __block_prepare_write() unmaps passed block, | 2481 | * XXX: __block_write_begin() unmaps passed block, is it OK? |
2554 | * is it OK? | ||
2555 | */ | 2482 | */ |
2556 | ret = ext4_da_reserve_space(inode, iblock); | 2483 | ret = ext4_da_reserve_space(inode, iblock); |
2557 | if (ret) | 2484 | if (ret) |
@@ -2583,7 +2510,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2583 | /* | 2510 | /* |
2584 | * This function is used as a standard get_block_t calback function | 2511 | * This function is used as a standard get_block_t calback function |
2585 | * when there is no desire to allocate any blocks. It is used as a | 2512 | * when there is no desire to allocate any blocks. It is used as a |
2586 | * callback function for block_prepare_write() and block_write_full_page(). | 2513 | * callback function for block_write_begin() and block_write_full_page(). |
2587 | * These functions should only try to map a single block at a time. | 2514 | * These functions should only try to map a single block at a time. |
2588 | * | 2515 | * |
2589 | * Since this function doesn't do block allocations even if the caller | 2516 | * Since this function doesn't do block allocations even if the caller |
@@ -2623,6 +2550,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2623 | int ret = 0; | 2550 | int ret = 0; |
2624 | int err; | 2551 | int err; |
2625 | 2552 | ||
2553 | ClearPageChecked(page); | ||
2626 | page_bufs = page_buffers(page); | 2554 | page_bufs = page_buffers(page); |
2627 | BUG_ON(!page_bufs); | 2555 | BUG_ON(!page_bufs); |
2628 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2556 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
@@ -2661,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2661 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2589 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
2662 | * need to file the inode to the transaction's list in ordered mode because if | 2590 | * need to file the inode to the transaction's list in ordered mode because if |
2663 | * we are writing back data added by write(), the inode is already there and if | 2591 | * we are writing back data added by write(), the inode is already there and if |
2664 | * we are writing back data modified via mmap(), noone guarantees in which | 2592 | * we are writing back data modified via mmap(), no one guarantees in which |
2665 | * transaction the data will hit the disk. In case we are journaling data, we | 2593 | * transaction the data will hit the disk. In case we are journaling data, we |
2666 | * cannot start transaction directly because transaction start ranks above page | 2594 | * cannot start transaction directly because transaction start ranks above page |
2667 | * lock so we have to do some magic. | 2595 | * lock so we have to do some magic. |
@@ -2700,84 +2628,57 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2700 | static int ext4_writepage(struct page *page, | 2628 | static int ext4_writepage(struct page *page, |
2701 | struct writeback_control *wbc) | 2629 | struct writeback_control *wbc) |
2702 | { | 2630 | { |
2703 | int ret = 0; | 2631 | int ret = 0, commit_write = 0; |
2704 | loff_t size; | 2632 | loff_t size; |
2705 | unsigned int len; | 2633 | unsigned int len; |
2706 | struct buffer_head *page_bufs = NULL; | 2634 | struct buffer_head *page_bufs = NULL; |
2707 | struct inode *inode = page->mapping->host; | 2635 | struct inode *inode = page->mapping->host; |
2708 | 2636 | ||
2709 | trace_ext4_writepage(inode, page); | 2637 | trace_ext4_writepage(page); |
2710 | size = i_size_read(inode); | 2638 | size = i_size_read(inode); |
2711 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2639 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2712 | len = size & ~PAGE_CACHE_MASK; | 2640 | len = size & ~PAGE_CACHE_MASK; |
2713 | else | 2641 | else |
2714 | len = PAGE_CACHE_SIZE; | 2642 | len = PAGE_CACHE_SIZE; |
2715 | 2643 | ||
2716 | if (page_has_buffers(page)) { | 2644 | /* |
2717 | page_bufs = page_buffers(page); | 2645 | * If the page does not have buffers (for whatever reason), |
2718 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2646 | * try to create them using __block_write_begin. If this |
2719 | ext4_bh_delay_or_unwritten)) { | 2647 | * fails, redirty the page and move on. |
2720 | /* | 2648 | */ |
2721 | * We don't want to do block allocation | 2649 | if (!page_has_buffers(page)) { |
2722 | * So redirty the page and return | 2650 | if (__block_write_begin(page, 0, len, |
2723 | * We may reach here when we do a journal commit | 2651 | noalloc_get_block_write)) { |
2724 | * via journal_submit_inode_data_buffers. | 2652 | redirty_page: |
2725 | * If we don't have mapping block we just ignore | ||
2726 | * them. We can also reach here via shrink_page_list | ||
2727 | */ | ||
2728 | redirty_page_for_writepage(wbc, page); | 2653 | redirty_page_for_writepage(wbc, page); |
2729 | unlock_page(page); | 2654 | unlock_page(page); |
2730 | return 0; | 2655 | return 0; |
2731 | } | 2656 | } |
2732 | } else { | 2657 | commit_write = 1; |
2658 | } | ||
2659 | page_bufs = page_buffers(page); | ||
2660 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2661 | ext4_bh_delay_or_unwritten)) { | ||
2733 | /* | 2662 | /* |
2734 | * The test for page_has_buffers() is subtle: | 2663 | * We don't want to do block allocation, so redirty |
2735 | * We know the page is dirty but it lost buffers. That means | 2664 | * the page and return. We may reach here when we do |
2736 | * that at some moment in time after write_begin()/write_end() | 2665 | * a journal commit via journal_submit_inode_data_buffers. |
2737 | * has been called all buffers have been clean and thus they | 2666 | * We can also reach here via shrink_page_list |
2738 | * must have been written at least once. So they are all | ||
2739 | * mapped and we can happily proceed with mapping them | ||
2740 | * and writing the page. | ||
2741 | * | ||
2742 | * Try to initialize the buffer_heads and check whether | ||
2743 | * all are mapped and non delay. We don't want to | ||
2744 | * do block allocation here. | ||
2745 | */ | 2667 | */ |
2746 | ret = block_prepare_write(page, 0, len, | 2668 | goto redirty_page; |
2747 | noalloc_get_block_write); | 2669 | } |
2748 | if (!ret) { | 2670 | if (commit_write) |
2749 | page_bufs = page_buffers(page); | ||
2750 | /* check whether all are mapped and non delay */ | ||
2751 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2752 | ext4_bh_delay_or_unwritten)) { | ||
2753 | redirty_page_for_writepage(wbc, page); | ||
2754 | unlock_page(page); | ||
2755 | return 0; | ||
2756 | } | ||
2757 | } else { | ||
2758 | /* | ||
2759 | * We can't do block allocation here | ||
2760 | * so just redity the page and unlock | ||
2761 | * and return | ||
2762 | */ | ||
2763 | redirty_page_for_writepage(wbc, page); | ||
2764 | unlock_page(page); | ||
2765 | return 0; | ||
2766 | } | ||
2767 | /* now mark the buffer_heads as dirty and uptodate */ | 2671 | /* now mark the buffer_heads as dirty and uptodate */ |
2768 | block_commit_write(page, 0, len); | 2672 | block_commit_write(page, 0, len); |
2769 | } | ||
2770 | 2673 | ||
2771 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | 2674 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2772 | /* | 2675 | /* |
2773 | * It's mmapped pagecache. Add buffers and journal it. There | 2676 | * It's mmapped pagecache. Add buffers and journal it. There |
2774 | * doesn't seem much point in redirtying the page here. | 2677 | * doesn't seem much point in redirtying the page here. |
2775 | */ | 2678 | */ |
2776 | ClearPageChecked(page); | ||
2777 | return __ext4_journalled_writepage(page, len); | 2679 | return __ext4_journalled_writepage(page, len); |
2778 | } | ||
2779 | 2680 | ||
2780 | if (page_bufs && buffer_uninit(page_bufs)) { | 2681 | if (buffer_uninit(page_bufs)) { |
2781 | ext4_set_bh_endio(page_bufs, inode); | 2682 | ext4_set_bh_endio(page_bufs, inode); |
2782 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2683 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2783 | wbc, ext4_end_io_buffer_write); | 2684 | wbc, ext4_end_io_buffer_write); |
@@ -2790,7 +2691,7 @@ static int ext4_writepage(struct page *page, | |||
2790 | 2691 | ||
2791 | /* | 2692 | /* |
2792 | * This is called via ext4_da_writepages() to | 2693 | * This is called via ext4_da_writepages() to |
2793 | * calulate the total number of credits to reserve to fit | 2694 | * calculate the total number of credits to reserve to fit |
2794 | * a single extent allocation into a single transaction, | 2695 | * a single extent allocation into a single transaction, |
2795 | * ext4_da_writpeages() will loop calling this before | 2696 | * ext4_da_writpeages() will loop calling this before |
2796 | * the block allocation. | 2697 | * the block allocation. |
@@ -2815,37 +2716,42 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2815 | 2716 | ||
2816 | /* | 2717 | /* |
2817 | * write_cache_pages_da - walk the list of dirty pages of the given | 2718 | * write_cache_pages_da - walk the list of dirty pages of the given |
2818 | * address space and call the callback function (which usually writes | 2719 | * address space and accumulate pages that need writing, and call |
2819 | * the pages). | 2720 | * mpage_da_map_and_submit to map a single contiguous memory region |
2820 | * | 2721 | * and then write them. |
2821 | * This is a forked version of write_cache_pages(). Differences: | ||
2822 | * Range cyclic is ignored. | ||
2823 | * no_nrwrite_index_update is always presumed true | ||
2824 | */ | 2722 | */ |
2825 | static int write_cache_pages_da(struct address_space *mapping, | 2723 | static int write_cache_pages_da(struct address_space *mapping, |
2826 | struct writeback_control *wbc, | 2724 | struct writeback_control *wbc, |
2827 | struct mpage_da_data *mpd) | 2725 | struct mpage_da_data *mpd, |
2726 | pgoff_t *done_index) | ||
2828 | { | 2727 | { |
2829 | int ret = 0; | 2728 | struct buffer_head *bh, *head; |
2830 | int done = 0; | 2729 | struct inode *inode = mapping->host; |
2831 | struct pagevec pvec; | 2730 | struct pagevec pvec; |
2832 | int nr_pages; | 2731 | unsigned int nr_pages; |
2833 | pgoff_t index; | 2732 | sector_t logical; |
2834 | pgoff_t end; /* Inclusive */ | 2733 | pgoff_t index, end; |
2835 | long nr_to_write = wbc->nr_to_write; | 2734 | long nr_to_write = wbc->nr_to_write; |
2836 | 2735 | int i, tag, ret = 0; | |
2736 | |||
2737 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2738 | mpd->wbc = wbc; | ||
2739 | mpd->inode = inode; | ||
2837 | pagevec_init(&pvec, 0); | 2740 | pagevec_init(&pvec, 0); |
2838 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2741 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2839 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2742 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2840 | 2743 | ||
2841 | while (!done && (index <= end)) { | 2744 | if (wbc->sync_mode == WB_SYNC_ALL) |
2842 | int i; | 2745 | tag = PAGECACHE_TAG_TOWRITE; |
2746 | else | ||
2747 | tag = PAGECACHE_TAG_DIRTY; | ||
2843 | 2748 | ||
2844 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2749 | *done_index = index; |
2845 | PAGECACHE_TAG_DIRTY, | 2750 | while (index <= end) { |
2751 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | ||
2846 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2752 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2847 | if (nr_pages == 0) | 2753 | if (nr_pages == 0) |
2848 | break; | 2754 | return 0; |
2849 | 2755 | ||
2850 | for (i = 0; i < nr_pages; i++) { | 2756 | for (i = 0; i < nr_pages; i++) { |
2851 | struct page *page = pvec.pages[i]; | 2757 | struct page *page = pvec.pages[i]; |
@@ -2857,58 +2763,98 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2857 | * mapping. However, page->index will not change | 2763 | * mapping. However, page->index will not change |
2858 | * because we have a reference on the page. | 2764 | * because we have a reference on the page. |
2859 | */ | 2765 | */ |
2860 | if (page->index > end) { | 2766 | if (page->index > end) |
2861 | done = 1; | 2767 | goto out; |
2862 | break; | 2768 | |
2769 | *done_index = page->index + 1; | ||
2770 | |||
2771 | /* | ||
2772 | * If we can't merge this page, and we have | ||
2773 | * accumulated an contiguous region, write it | ||
2774 | */ | ||
2775 | if ((mpd->next_page != page->index) && | ||
2776 | (mpd->next_page != mpd->first_page)) { | ||
2777 | mpage_da_map_and_submit(mpd); | ||
2778 | goto ret_extent_tail; | ||
2863 | } | 2779 | } |
2864 | 2780 | ||
2865 | lock_page(page); | 2781 | lock_page(page); |
2866 | 2782 | ||
2867 | /* | 2783 | /* |
2868 | * Page truncated or invalidated. We can freely skip it | 2784 | * If the page is no longer dirty, or its |
2869 | * then, even for data integrity operations: the page | 2785 | * mapping no longer corresponds to inode we |
2870 | * has disappeared concurrently, so there could be no | 2786 | * are writing (which means it has been |
2871 | * real expectation of this data interity operation | 2787 | * truncated or invalidated), or the page is |
2872 | * even if there is now a new, dirty page at the same | 2788 | * already under writeback and we are not |
2873 | * pagecache address. | 2789 | * doing a data integrity writeback, skip the page |
2874 | */ | 2790 | */ |
2875 | if (unlikely(page->mapping != mapping)) { | 2791 | if (!PageDirty(page) || |
2876 | continue_unlock: | 2792 | (PageWriteback(page) && |
2793 | (wbc->sync_mode == WB_SYNC_NONE)) || | ||
2794 | unlikely(page->mapping != mapping)) { | ||
2877 | unlock_page(page); | 2795 | unlock_page(page); |
2878 | continue; | 2796 | continue; |
2879 | } | 2797 | } |
2880 | 2798 | ||
2881 | if (!PageDirty(page)) { | 2799 | wait_on_page_writeback(page); |
2882 | /* someone wrote it for us */ | ||
2883 | goto continue_unlock; | ||
2884 | } | ||
2885 | |||
2886 | if (PageWriteback(page)) { | ||
2887 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2888 | wait_on_page_writeback(page); | ||
2889 | else | ||
2890 | goto continue_unlock; | ||
2891 | } | ||
2892 | |||
2893 | BUG_ON(PageWriteback(page)); | 2800 | BUG_ON(PageWriteback(page)); |
2894 | if (!clear_page_dirty_for_io(page)) | ||
2895 | goto continue_unlock; | ||
2896 | 2801 | ||
2897 | ret = __mpage_da_writepage(page, wbc, mpd); | 2802 | if (mpd->next_page != page->index) |
2898 | if (unlikely(ret)) { | 2803 | mpd->first_page = page->index; |
2899 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | 2804 | mpd->next_page = page->index + 1; |
2900 | unlock_page(page); | 2805 | logical = (sector_t) page->index << |
2901 | ret = 0; | 2806 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2902 | } else { | 2807 | |
2903 | done = 1; | 2808 | if (!page_has_buffers(page)) { |
2904 | break; | 2809 | mpage_add_bh_to_extent(mpd, logical, |
2905 | } | 2810 | PAGE_CACHE_SIZE, |
2811 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2812 | if (mpd->io_done) | ||
2813 | goto ret_extent_tail; | ||
2814 | } else { | ||
2815 | /* | ||
2816 | * Page with regular buffer heads, | ||
2817 | * just add all dirty ones | ||
2818 | */ | ||
2819 | head = page_buffers(page); | ||
2820 | bh = head; | ||
2821 | do { | ||
2822 | BUG_ON(buffer_locked(bh)); | ||
2823 | /* | ||
2824 | * We need to try to allocate | ||
2825 | * unmapped blocks in the same page. | ||
2826 | * Otherwise we won't make progress | ||
2827 | * with the page in ext4_writepage | ||
2828 | */ | ||
2829 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2830 | mpage_add_bh_to_extent(mpd, logical, | ||
2831 | bh->b_size, | ||
2832 | bh->b_state); | ||
2833 | if (mpd->io_done) | ||
2834 | goto ret_extent_tail; | ||
2835 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2836 | /* | ||
2837 | * mapped dirty buffer. We need | ||
2838 | * to update the b_state | ||
2839 | * because we look at b_state | ||
2840 | * in mpage_da_map_blocks. We | ||
2841 | * don't update b_size because | ||
2842 | * if we find an unmapped | ||
2843 | * buffer_head later we need to | ||
2844 | * use the b_state flag of that | ||
2845 | * buffer_head. | ||
2846 | */ | ||
2847 | if (mpd->b_size == 0) | ||
2848 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2849 | } | ||
2850 | logical++; | ||
2851 | } while ((bh = bh->b_this_page) != head); | ||
2906 | } | 2852 | } |
2907 | 2853 | ||
2908 | if (nr_to_write > 0) { | 2854 | if (nr_to_write > 0) { |
2909 | nr_to_write--; | 2855 | nr_to_write--; |
2910 | if (nr_to_write == 0 && | 2856 | if (nr_to_write == 0 && |
2911 | wbc->sync_mode == WB_SYNC_NONE) { | 2857 | wbc->sync_mode == WB_SYNC_NONE) |
2912 | /* | 2858 | /* |
2913 | * We stop writing back only if we are | 2859 | * We stop writing back only if we are |
2914 | * not doing integrity sync. In case of | 2860 | * not doing integrity sync. In case of |
@@ -2919,14 +2865,18 @@ continue_unlock: | |||
2919 | * pages, but have not synced all of the | 2865 | * pages, but have not synced all of the |
2920 | * old dirty pages. | 2866 | * old dirty pages. |
2921 | */ | 2867 | */ |
2922 | done = 1; | 2868 | goto out; |
2923 | break; | ||
2924 | } | ||
2925 | } | 2869 | } |
2926 | } | 2870 | } |
2927 | pagevec_release(&pvec); | 2871 | pagevec_release(&pvec); |
2928 | cond_resched(); | 2872 | cond_resched(); |
2929 | } | 2873 | } |
2874 | return 0; | ||
2875 | ret_extent_tail: | ||
2876 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2877 | out: | ||
2878 | pagevec_release(&pvec); | ||
2879 | cond_resched(); | ||
2930 | return ret; | 2880 | return ret; |
2931 | } | 2881 | } |
2932 | 2882 | ||
@@ -2940,13 +2890,14 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2940 | struct mpage_da_data mpd; | 2890 | struct mpage_da_data mpd; |
2941 | struct inode *inode = mapping->host; | 2891 | struct inode *inode = mapping->host; |
2942 | int pages_written = 0; | 2892 | int pages_written = 0; |
2943 | long pages_skipped; | ||
2944 | unsigned int max_pages; | 2893 | unsigned int max_pages; |
2945 | int range_cyclic, cycled = 1, io_done = 0; | 2894 | int range_cyclic, cycled = 1, io_done = 0; |
2946 | int needed_blocks, ret = 0; | 2895 | int needed_blocks, ret = 0; |
2947 | long desired_nr_to_write, nr_to_writebump = 0; | 2896 | long desired_nr_to_write, nr_to_writebump = 0; |
2948 | loff_t range_start = wbc->range_start; | 2897 | loff_t range_start = wbc->range_start; |
2949 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2898 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2899 | pgoff_t done_index = 0; | ||
2900 | pgoff_t end; | ||
2950 | 2901 | ||
2951 | trace_ext4_da_writepages(inode, wbc); | 2902 | trace_ext4_da_writepages(inode, wbc); |
2952 | 2903 | ||
@@ -2982,8 +2933,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2982 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2933 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2983 | wbc->range_end = LLONG_MAX; | 2934 | wbc->range_end = LLONG_MAX; |
2984 | wbc->range_cyclic = 0; | 2935 | wbc->range_cyclic = 0; |
2985 | } else | 2936 | end = -1; |
2937 | } else { | ||
2986 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2938 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2939 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2940 | } | ||
2987 | 2941 | ||
2988 | /* | 2942 | /* |
2989 | * This works around two forms of stupidity. The first is in | 2943 | * This works around two forms of stupidity. The first is in |
@@ -3002,9 +2956,12 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3002 | * sbi->max_writeback_mb_bump whichever is smaller. | 2956 | * sbi->max_writeback_mb_bump whichever is smaller. |
3003 | */ | 2957 | */ |
3004 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2958 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
3005 | if (!range_cyclic && range_whole) | 2959 | if (!range_cyclic && range_whole) { |
3006 | desired_nr_to_write = wbc->nr_to_write * 8; | 2960 | if (wbc->nr_to_write == LONG_MAX) |
3007 | else | 2961 | desired_nr_to_write = wbc->nr_to_write; |
2962 | else | ||
2963 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2964 | } else | ||
3008 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 2965 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
3009 | max_pages); | 2966 | max_pages); |
3010 | if (desired_nr_to_write > max_pages) | 2967 | if (desired_nr_to_write > max_pages) |
@@ -3015,12 +2972,10 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3015 | wbc->nr_to_write = desired_nr_to_write; | 2972 | wbc->nr_to_write = desired_nr_to_write; |
3016 | } | 2973 | } |
3017 | 2974 | ||
3018 | mpd.wbc = wbc; | ||
3019 | mpd.inode = mapping->host; | ||
3020 | |||
3021 | pages_skipped = wbc->pages_skipped; | ||
3022 | |||
3023 | retry: | 2975 | retry: |
2976 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2977 | tag_pages_for_writeback(mapping, index, end); | ||
2978 | |||
3024 | while (!ret && wbc->nr_to_write > 0) { | 2979 | while (!ret && wbc->nr_to_write > 0) { |
3025 | 2980 | ||
3026 | /* | 2981 | /* |
@@ -3043,32 +2998,18 @@ retry: | |||
3043 | } | 2998 | } |
3044 | 2999 | ||
3045 | /* | 3000 | /* |
3046 | * Now call __mpage_da_writepage to find the next | 3001 | * Now call write_cache_pages_da() to find the next |
3047 | * contiguous region of logical blocks that need | 3002 | * contiguous region of logical blocks that need |
3048 | * blocks to be allocated by ext4. We don't actually | 3003 | * blocks to be allocated by ext4 and submit them. |
3049 | * submit the blocks for I/O here, even though | ||
3050 | * write_cache_pages thinks it will, and will set the | ||
3051 | * pages as clean for write before calling | ||
3052 | * __mpage_da_writepage(). | ||
3053 | */ | 3004 | */ |
3054 | mpd.b_size = 0; | 3005 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3055 | mpd.b_state = 0; | ||
3056 | mpd.b_blocknr = 0; | ||
3057 | mpd.first_page = 0; | ||
3058 | mpd.next_page = 0; | ||
3059 | mpd.io_done = 0; | ||
3060 | mpd.pages_written = 0; | ||
3061 | mpd.retval = 0; | ||
3062 | ret = write_cache_pages_da(mapping, wbc, &mpd); | ||
3063 | /* | 3006 | /* |
3064 | * If we have a contiguous extent of pages and we | 3007 | * If we have a contiguous extent of pages and we |
3065 | * haven't done the I/O yet, map the blocks and submit | 3008 | * haven't done the I/O yet, map the blocks and submit |
3066 | * them for I/O. | 3009 | * them for I/O. |
3067 | */ | 3010 | */ |
3068 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3011 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
3069 | if (mpage_da_map_blocks(&mpd) == 0) | 3012 | mpage_da_map_and_submit(&mpd); |
3070 | mpage_da_submit_io(&mpd); | ||
3071 | mpd.io_done = 1; | ||
3072 | ret = MPAGE_DA_EXTENT_TAIL; | 3013 | ret = MPAGE_DA_EXTENT_TAIL; |
3073 | } | 3014 | } |
3074 | trace_ext4_da_write_pages(inode, &mpd); | 3015 | trace_ext4_da_write_pages(inode, &mpd); |
@@ -3082,7 +3023,6 @@ retry: | |||
3082 | * and try again | 3023 | * and try again |
3083 | */ | 3024 | */ |
3084 | jbd2_journal_force_commit_nested(sbi->s_journal); | 3025 | jbd2_journal_force_commit_nested(sbi->s_journal); |
3085 | wbc->pages_skipped = pages_skipped; | ||
3086 | ret = 0; | 3026 | ret = 0; |
3087 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 3027 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
3088 | /* | 3028 | /* |
@@ -3090,7 +3030,6 @@ retry: | |||
3090 | * rest of the pages | 3030 | * rest of the pages |
3091 | */ | 3031 | */ |
3092 | pages_written += mpd.pages_written; | 3032 | pages_written += mpd.pages_written; |
3093 | wbc->pages_skipped = pages_skipped; | ||
3094 | ret = 0; | 3033 | ret = 0; |
3095 | io_done = 1; | 3034 | io_done = 1; |
3096 | } else if (wbc->nr_to_write) | 3035 | } else if (wbc->nr_to_write) |
@@ -3108,21 +3047,15 @@ retry: | |||
3108 | wbc->range_end = mapping->writeback_index - 1; | 3047 | wbc->range_end = mapping->writeback_index - 1; |
3109 | goto retry; | 3048 | goto retry; |
3110 | } | 3049 | } |
3111 | if (pages_skipped != wbc->pages_skipped) | ||
3112 | ext4_msg(inode->i_sb, KERN_CRIT, | ||
3113 | "This should not happen leaving %s " | ||
3114 | "with nr_to_write = %ld ret = %d", | ||
3115 | __func__, wbc->nr_to_write, ret); | ||
3116 | 3050 | ||
3117 | /* Update index */ | 3051 | /* Update index */ |
3118 | index += pages_written; | ||
3119 | wbc->range_cyclic = range_cyclic; | 3052 | wbc->range_cyclic = range_cyclic; |
3120 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3053 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
3121 | /* | 3054 | /* |
3122 | * set the writeback_index so that range_cyclic | 3055 | * set the writeback_index so that range_cyclic |
3123 | * mode will write it back later | 3056 | * mode will write it back later |
3124 | */ | 3057 | */ |
3125 | mapping->writeback_index = index; | 3058 | mapping->writeback_index = done_index; |
3126 | 3059 | ||
3127 | out_writepages: | 3060 | out_writepages: |
3128 | wbc->nr_to_write -= nr_to_writebump; | 3061 | wbc->nr_to_write -= nr_to_writebump; |
@@ -3367,10 +3300,10 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
3367 | * doing I/O at all. | 3300 | * doing I/O at all. |
3368 | * | 3301 | * |
3369 | * We could call write_cache_pages(), and then redirty all of | 3302 | * We could call write_cache_pages(), and then redirty all of |
3370 | * the pages by calling redirty_page_for_writeback() but that | 3303 | * the pages by calling redirty_page_for_writepage() but that |
3371 | * would be ugly in the extreme. So instead we would need to | 3304 | * would be ugly in the extreme. So instead we would need to |
3372 | * replicate parts of the code in the above functions, | 3305 | * replicate parts of the code in the above functions, |
3373 | * simplifying them becuase we wouldn't actually intend to | 3306 | * simplifying them because we wouldn't actually intend to |
3374 | * write out the pages, but rather only collect contiguous | 3307 | * write out the pages, but rather only collect contiguous |
3375 | * logical block extents, call the multi-block allocator, and | 3308 | * logical block extents, call the multi-block allocator, and |
3376 | * then update the buffer heads with the block allocations. | 3309 | * then update the buffer heads with the block allocations. |
@@ -3447,6 +3380,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3447 | 3380 | ||
3448 | static int ext4_readpage(struct file *file, struct page *page) | 3381 | static int ext4_readpage(struct file *file, struct page *page) |
3449 | { | 3382 | { |
3383 | trace_ext4_readpage(page); | ||
3450 | return mpage_readpage(page, ext4_get_block); | 3384 | return mpage_readpage(page, ext4_get_block); |
3451 | } | 3385 | } |
3452 | 3386 | ||
@@ -3457,15 +3391,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3457 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3391 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3458 | } | 3392 | } |
3459 | 3393 | ||
3460 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3461 | { | ||
3462 | BUG_ON(!io); | ||
3463 | if (io->page) | ||
3464 | put_page(io->page); | ||
3465 | iput(io->inode); | ||
3466 | kfree(io); | ||
3467 | } | ||
3468 | |||
3469 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3394 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3470 | { | 3395 | { |
3471 | struct buffer_head *head, *bh; | 3396 | struct buffer_head *head, *bh; |
@@ -3490,6 +3415,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
3490 | { | 3415 | { |
3491 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3416 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3492 | 3417 | ||
3418 | trace_ext4_invalidatepage(page, offset); | ||
3419 | |||
3493 | /* | 3420 | /* |
3494 | * free any io_end structure allocated for buffers to be discarded | 3421 | * free any io_end structure allocated for buffers to be discarded |
3495 | */ | 3422 | */ |
@@ -3511,6 +3438,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3511 | { | 3438 | { |
3512 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3439 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3513 | 3440 | ||
3441 | trace_ext4_releasepage(page); | ||
3442 | |||
3514 | WARN_ON(PageChecked(page)); | 3443 | WARN_ON(PageChecked(page)); |
3515 | if (!page_has_buffers(page)) | 3444 | if (!page_has_buffers(page)) |
3516 | return 0; | 3445 | return 0; |
@@ -3582,7 +3511,7 @@ retry: | |||
3582 | loff_t end = offset + iov_length(iov, nr_segs); | 3511 | loff_t end = offset + iov_length(iov, nr_segs); |
3583 | 3512 | ||
3584 | if (end > isize) | 3513 | if (end > isize) |
3585 | vmtruncate(inode, isize); | 3514 | ext4_truncate_failed_write(inode); |
3586 | } | 3515 | } |
3587 | } | 3516 | } |
3588 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3517 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -3642,173 +3571,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
3642 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3571 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3643 | } | 3572 | } |
3644 | 3573 | ||
3645 | static void dump_completed_IO(struct inode * inode) | ||
3646 | { | ||
3647 | #ifdef EXT4_DEBUG | ||
3648 | struct list_head *cur, *before, *after; | ||
3649 | ext4_io_end_t *io, *io0, *io1; | ||
3650 | unsigned long flags; | ||
3651 | |||
3652 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
3653 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
3654 | return; | ||
3655 | } | ||
3656 | |||
3657 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
3658 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3659 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3660 | cur = &io->list; | ||
3661 | before = cur->prev; | ||
3662 | io0 = container_of(before, ext4_io_end_t, list); | ||
3663 | after = cur->next; | ||
3664 | io1 = container_of(after, ext4_io_end_t, list); | ||
3665 | |||
3666 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
3667 | io, inode->i_ino, io0, io1); | ||
3668 | } | ||
3669 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3670 | #endif | ||
3671 | } | ||
3672 | |||
3673 | /* | ||
3674 | * check a range of space and convert unwritten extents to written. | ||
3675 | */ | ||
3676 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
3677 | { | ||
3678 | struct inode *inode = io->inode; | ||
3679 | loff_t offset = io->offset; | ||
3680 | ssize_t size = io->size; | ||
3681 | int ret = 0; | ||
3682 | |||
3683 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
3684 | "list->prev 0x%p\n", | ||
3685 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3686 | |||
3687 | if (list_empty(&io->list)) | ||
3688 | return ret; | ||
3689 | |||
3690 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
3691 | return ret; | ||
3692 | |||
3693 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3694 | if (ret < 0) { | ||
3695 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
3696 | "extents to written extents, error is %d" | ||
3697 | " io is still on inode %lu aio dio list\n", | ||
3698 | __func__, ret, inode->i_ino); | ||
3699 | return ret; | ||
3700 | } | ||
3701 | |||
3702 | if (io->iocb) | ||
3703 | aio_complete(io->iocb, io->result, 0); | ||
3704 | /* clear the DIO AIO unwritten flag */ | ||
3705 | io->flag = 0; | ||
3706 | return ret; | ||
3707 | } | ||
3708 | |||
3709 | /* | ||
3710 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3711 | */ | ||
3712 | static void ext4_end_io_work(struct work_struct *work) | ||
3713 | { | ||
3714 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3715 | struct inode *inode = io->inode; | ||
3716 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3717 | unsigned long flags; | ||
3718 | int ret; | ||
3719 | |||
3720 | mutex_lock(&inode->i_mutex); | ||
3721 | ret = ext4_end_io_nolock(io); | ||
3722 | if (ret < 0) { | ||
3723 | mutex_unlock(&inode->i_mutex); | ||
3724 | return; | ||
3725 | } | ||
3726 | |||
3727 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3728 | if (!list_empty(&io->list)) | ||
3729 | list_del_init(&io->list); | ||
3730 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3731 | mutex_unlock(&inode->i_mutex); | ||
3732 | ext4_free_io_end(io); | ||
3733 | } | ||
3734 | |||
3735 | /* | ||
3736 | * This function is called from ext4_sync_file(). | ||
3737 | * | ||
3738 | * When IO is completed, the work to convert unwritten extents to | ||
3739 | * written is queued on workqueue but may not get immediately | ||
3740 | * scheduled. When fsync is called, we need to ensure the | ||
3741 | * conversion is complete before fsync returns. | ||
3742 | * The inode keeps track of a list of pending/completed IO that | ||
3743 | * might needs to do the conversion. This function walks through | ||
3744 | * the list and convert the related unwritten extents for completed IO | ||
3745 | * to written. | ||
3746 | * The function return the number of pending IOs on success. | ||
3747 | */ | ||
3748 | int flush_completed_IO(struct inode *inode) | ||
3749 | { | ||
3750 | ext4_io_end_t *io; | ||
3751 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3752 | unsigned long flags; | ||
3753 | int ret = 0; | ||
3754 | int ret2 = 0; | ||
3755 | |||
3756 | if (list_empty(&ei->i_completed_io_list)) | ||
3757 | return ret; | ||
3758 | |||
3759 | dump_completed_IO(inode); | ||
3760 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3761 | while (!list_empty(&ei->i_completed_io_list)){ | ||
3762 | io = list_entry(ei->i_completed_io_list.next, | ||
3763 | ext4_io_end_t, list); | ||
3764 | /* | ||
3765 | * Calling ext4_end_io_nolock() to convert completed | ||
3766 | * IO to written. | ||
3767 | * | ||
3768 | * When ext4_sync_file() is called, run_queue() may already | ||
3769 | * about to flush the work corresponding to this io structure. | ||
3770 | * It will be upset if it founds the io structure related | ||
3771 | * to the work-to-be schedule is freed. | ||
3772 | * | ||
3773 | * Thus we need to keep the io structure still valid here after | ||
3774 | * convertion finished. The io structure has a flag to | ||
3775 | * avoid double converting from both fsync and background work | ||
3776 | * queue work. | ||
3777 | */ | ||
3778 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3779 | ret = ext4_end_io_nolock(io); | ||
3780 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3781 | if (ret < 0) | ||
3782 | ret2 = ret; | ||
3783 | else | ||
3784 | list_del_init(&io->list); | ||
3785 | } | ||
3786 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3787 | return (ret2 < 0) ? ret2 : 0; | ||
3788 | } | ||
3789 | |||
3790 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
3791 | { | ||
3792 | ext4_io_end_t *io = NULL; | ||
3793 | |||
3794 | io = kmalloc(sizeof(*io), flags); | ||
3795 | |||
3796 | if (io) { | ||
3797 | igrab(inode); | ||
3798 | io->inode = inode; | ||
3799 | io->flag = 0; | ||
3800 | io->offset = 0; | ||
3801 | io->size = 0; | ||
3802 | io->page = NULL; | ||
3803 | io->iocb = NULL; | ||
3804 | io->result = 0; | ||
3805 | INIT_WORK(&io->work, ext4_end_io_work); | ||
3806 | INIT_LIST_HEAD(&io->list); | ||
3807 | } | ||
3808 | |||
3809 | return io; | ||
3810 | } | ||
3811 | |||
3812 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3574 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3813 | ssize_t size, void *private, int ret, | 3575 | ssize_t size, void *private, int ret, |
3814 | bool is_async) | 3576 | bool is_async) |
@@ -3828,7 +3590,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3828 | size); | 3590 | size); |
3829 | 3591 | ||
3830 | /* if not aio dio with unwritten extents, just free io and return */ | 3592 | /* if not aio dio with unwritten extents, just free io and return */ |
3831 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3593 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3832 | ext4_free_io_end(io_end); | 3594 | ext4_free_io_end(io_end); |
3833 | iocb->private = NULL; | 3595 | iocb->private = NULL; |
3834 | out: | 3596 | out: |
@@ -3845,14 +3607,14 @@ out: | |||
3845 | } | 3607 | } |
3846 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3608 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3847 | 3609 | ||
3848 | /* queue the work to convert unwritten extents to written */ | ||
3849 | queue_work(wq, &io_end->work); | ||
3850 | |||
3851 | /* Add the io_end to per-inode completed aio dio list*/ | 3610 | /* Add the io_end to per-inode completed aio dio list*/ |
3852 | ei = EXT4_I(io_end->inode); | 3611 | ei = EXT4_I(io_end->inode); |
3853 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3612 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3854 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3613 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3855 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3614 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3615 | |||
3616 | /* queue the work to convert unwritten extents to written */ | ||
3617 | queue_work(wq, &io_end->work); | ||
3856 | iocb->private = NULL; | 3618 | iocb->private = NULL; |
3857 | } | 3619 | } |
3858 | 3620 | ||
@@ -3873,7 +3635,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3873 | goto out; | 3635 | goto out; |
3874 | } | 3636 | } |
3875 | 3637 | ||
3876 | io_end->flag = EXT4_IO_UNWRITTEN; | 3638 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3877 | inode = io_end->inode; | 3639 | inode = io_end->inode; |
3878 | 3640 | ||
3879 | /* Add the io_end to per-inode completed io list*/ | 3641 | /* Add the io_end to per-inode completed io list*/ |
@@ -3901,8 +3663,7 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) | |||
3901 | retry: | 3663 | retry: |
3902 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); | 3664 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); |
3903 | if (!io_end) { | 3665 | if (!io_end) { |
3904 | if (printk_ratelimit()) | 3666 | pr_warn_ratelimited("%s: allocation fail\n", __func__); |
3905 | printk(KERN_WARNING "%s: allocation fail\n", __func__); | ||
3906 | schedule(); | 3667 | schedule(); |
3907 | goto retry; | 3668 | goto retry; |
3908 | } | 3669 | } |
@@ -3926,13 +3687,13 @@ retry: | |||
3926 | * preallocated extents, and those write extend the file, no need to | 3687 | * preallocated extents, and those write extend the file, no need to |
3927 | * fall back to buffered IO. | 3688 | * fall back to buffered IO. |
3928 | * | 3689 | * |
3929 | * For holes, we fallocate those blocks, mark them as unintialized | 3690 | * For holes, we fallocate those blocks, mark them as uninitialized |
3930 | * If those blocks were preallocated, we mark sure they are splited, but | 3691 | * If those blocks were preallocated, we mark sure they are splited, but |
3931 | * still keep the range to write as unintialized. | 3692 | * still keep the range to write as uninitialized. |
3932 | * | 3693 | * |
3933 | * The unwrritten extents will be converted to written when DIO is completed. | 3694 | * The unwrritten extents will be converted to written when DIO is completed. |
3934 | * For async direct IO, since the IO may still pending when return, we | 3695 | * For async direct IO, since the IO may still pending when return, we |
3935 | * set up an end_io call back function, which will do the convertion | 3696 | * set up an end_io call back function, which will do the conversion |
3936 | * when async direct IO completed. | 3697 | * when async direct IO completed. |
3937 | * | 3698 | * |
3938 | * If the O_DIRECT write will extend the file then add this inode to the | 3699 | * If the O_DIRECT write will extend the file then add this inode to the |
@@ -3955,7 +3716,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3955 | * We could direct write to holes and fallocate. | 3716 | * We could direct write to holes and fallocate. |
3956 | * | 3717 | * |
3957 | * Allocated blocks to fill the hole are marked as uninitialized | 3718 | * Allocated blocks to fill the hole are marked as uninitialized |
3958 | * to prevent paralel buffered read to expose the stale data | 3719 | * to prevent parallel buffered read to expose the stale data |
3959 | * before DIO complete the data IO. | 3720 | * before DIO complete the data IO. |
3960 | * | 3721 | * |
3961 | * As to previously fallocated extents, ext4 get_block | 3722 | * As to previously fallocated extents, ext4 get_block |
@@ -4016,7 +3777,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
4016 | int err; | 3777 | int err; |
4017 | /* | 3778 | /* |
4018 | * for non AIO case, since the IO is already | 3779 | * for non AIO case, since the IO is already |
4019 | * completed, we could do the convertion right here | 3780 | * completed, we could do the conversion right here |
4020 | */ | 3781 | */ |
4021 | err = ext4_convert_unwritten_extents(inode, | 3782 | err = ext4_convert_unwritten_extents(inode, |
4022 | offset, ret); | 3783 | offset, ret); |
@@ -4037,11 +3798,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
4037 | { | 3798 | { |
4038 | struct file *file = iocb->ki_filp; | 3799 | struct file *file = iocb->ki_filp; |
4039 | struct inode *inode = file->f_mapping->host; | 3800 | struct inode *inode = file->f_mapping->host; |
3801 | ssize_t ret; | ||
4040 | 3802 | ||
3803 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
4041 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3804 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
4042 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3805 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
4043 | 3806 | else | |
4044 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3807 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3808 | trace_ext4_direct_IO_exit(inode, offset, | ||
3809 | iov_length(iov, nr_segs), rw, ret); | ||
3810 | return ret; | ||
4045 | } | 3811 | } |
4046 | 3812 | ||
4047 | /* | 3813 | /* |
@@ -4067,7 +3833,6 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
4067 | .readpage = ext4_readpage, | 3833 | .readpage = ext4_readpage, |
4068 | .readpages = ext4_readpages, | 3834 | .readpages = ext4_readpages, |
4069 | .writepage = ext4_writepage, | 3835 | .writepage = ext4_writepage, |
4070 | .sync_page = block_sync_page, | ||
4071 | .write_begin = ext4_write_begin, | 3836 | .write_begin = ext4_write_begin, |
4072 | .write_end = ext4_ordered_write_end, | 3837 | .write_end = ext4_ordered_write_end, |
4073 | .bmap = ext4_bmap, | 3838 | .bmap = ext4_bmap, |
@@ -4083,7 +3848,6 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
4083 | .readpage = ext4_readpage, | 3848 | .readpage = ext4_readpage, |
4084 | .readpages = ext4_readpages, | 3849 | .readpages = ext4_readpages, |
4085 | .writepage = ext4_writepage, | 3850 | .writepage = ext4_writepage, |
4086 | .sync_page = block_sync_page, | ||
4087 | .write_begin = ext4_write_begin, | 3851 | .write_begin = ext4_write_begin, |
4088 | .write_end = ext4_writeback_write_end, | 3852 | .write_end = ext4_writeback_write_end, |
4089 | .bmap = ext4_bmap, | 3853 | .bmap = ext4_bmap, |
@@ -4099,7 +3863,6 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
4099 | .readpage = ext4_readpage, | 3863 | .readpage = ext4_readpage, |
4100 | .readpages = ext4_readpages, | 3864 | .readpages = ext4_readpages, |
4101 | .writepage = ext4_writepage, | 3865 | .writepage = ext4_writepage, |
4102 | .sync_page = block_sync_page, | ||
4103 | .write_begin = ext4_write_begin, | 3866 | .write_begin = ext4_write_begin, |
4104 | .write_end = ext4_journalled_write_end, | 3867 | .write_end = ext4_journalled_write_end, |
4105 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3868 | .set_page_dirty = ext4_journalled_set_page_dirty, |
@@ -4115,7 +3878,6 @@ static const struct address_space_operations ext4_da_aops = { | |||
4115 | .readpages = ext4_readpages, | 3878 | .readpages = ext4_readpages, |
4116 | .writepage = ext4_writepage, | 3879 | .writepage = ext4_writepage, |
4117 | .writepages = ext4_da_writepages, | 3880 | .writepages = ext4_da_writepages, |
4118 | .sync_page = block_sync_page, | ||
4119 | .write_begin = ext4_da_write_begin, | 3881 | .write_begin = ext4_da_write_begin, |
4120 | .write_end = ext4_da_write_end, | 3882 | .write_end = ext4_da_write_end, |
4121 | .bmap = ext4_bmap, | 3883 | .bmap = ext4_bmap, |
@@ -4152,9 +3914,30 @@ void ext4_set_aops(struct inode *inode) | |||
4152 | int ext4_block_truncate_page(handle_t *handle, | 3914 | int ext4_block_truncate_page(handle_t *handle, |
4153 | struct address_space *mapping, loff_t from) | 3915 | struct address_space *mapping, loff_t from) |
4154 | { | 3916 | { |
3917 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3918 | unsigned length; | ||
3919 | unsigned blocksize; | ||
3920 | struct inode *inode = mapping->host; | ||
3921 | |||
3922 | blocksize = inode->i_sb->s_blocksize; | ||
3923 | length = blocksize - (offset & (blocksize - 1)); | ||
3924 | |||
3925 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
3926 | } | ||
3927 | |||
3928 | /* | ||
3929 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3930 | * starting from file offset 'from'. The range to be zero'd must | ||
3931 | * be contained with in one block. If the specified range exceeds | ||
3932 | * the end of the block it will be shortened to end of the block | ||
3933 | * that cooresponds to 'from' | ||
3934 | */ | ||
3935 | int ext4_block_zero_page_range(handle_t *handle, | ||
3936 | struct address_space *mapping, loff_t from, loff_t length) | ||
3937 | { | ||
4155 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3938 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
4156 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3939 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
4157 | unsigned blocksize, length, pos; | 3940 | unsigned blocksize, max, pos; |
4158 | ext4_lblk_t iblock; | 3941 | ext4_lblk_t iblock; |
4159 | struct inode *inode = mapping->host; | 3942 | struct inode *inode = mapping->host; |
4160 | struct buffer_head *bh; | 3943 | struct buffer_head *bh; |
@@ -4167,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle, | |||
4167 | return -EINVAL; | 3950 | return -EINVAL; |
4168 | 3951 | ||
4169 | blocksize = inode->i_sb->s_blocksize; | 3952 | blocksize = inode->i_sb->s_blocksize; |
4170 | length = blocksize - (offset & (blocksize - 1)); | 3953 | max = blocksize - (offset & (blocksize - 1)); |
3954 | |||
3955 | /* | ||
3956 | * correct length if it does not fall between | ||
3957 | * 'from' and the end of the block | ||
3958 | */ | ||
3959 | if (length > max || length < 0) | ||
3960 | length = max; | ||
3961 | |||
4171 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3962 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
4172 | 3963 | ||
4173 | if (!page_has_buffers(page)) | 3964 | if (!page_has_buffers(page)) |
@@ -4226,7 +4017,7 @@ int ext4_block_truncate_page(handle_t *handle, | |||
4226 | if (ext4_should_journal_data(inode)) { | 4017 | if (ext4_should_journal_data(inode)) { |
4227 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 4018 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4228 | } else { | 4019 | } else { |
4229 | if (ext4_should_order_data(inode)) | 4020 | if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) |
4230 | err = ext4_jbd2_file_inode(handle, inode); | 4021 | err = ext4_jbd2_file_inode(handle, inode); |
4231 | mark_buffer_dirty(bh); | 4022 | mark_buffer_dirty(bh); |
4232 | } | 4023 | } |
@@ -4262,7 +4053,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) | |||
4262 | * | 4053 | * |
4263 | * When we do truncate() we may have to clean the ends of several | 4054 | * When we do truncate() we may have to clean the ends of several |
4264 | * indirect blocks but leave the blocks themselves alive. Block is | 4055 | * indirect blocks but leave the blocks themselves alive. Block is |
4265 | * partially truncated if some data below the new i_size is refered | 4056 | * partially truncated if some data below the new i_size is referred |
4266 | * from it (and it is on the path to the first completely truncated | 4057 | * from it (and it is on the path to the first completely truncated |
4267 | * data block, indeed). We have to free the top of that path along | 4058 | * data block, indeed). We have to free the top of that path along |
4268 | * with everything to the right of the path. Since no allocation | 4059 | * with everything to the right of the path. Since no allocation |
@@ -4341,6 +4132,9 @@ no_top: | |||
4341 | * | 4132 | * |
4342 | * We release `count' blocks on disk, but (last - first) may be greater | 4133 | * We release `count' blocks on disk, but (last - first) may be greater |
4343 | * than `count' because there can be holes in there. | 4134 | * than `count' because there can be holes in there. |
4135 | * | ||
4136 | * Return 0 on success, 1 on invalid block range | ||
4137 | * and < 0 on fatal error. | ||
4344 | */ | 4138 | */ |
4345 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4139 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4346 | struct buffer_head *bh, | 4140 | struct buffer_head *bh, |
@@ -4350,6 +4144,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4350 | { | 4144 | { |
4351 | __le32 *p; | 4145 | __le32 *p; |
4352 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | 4146 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; |
4147 | int err; | ||
4353 | 4148 | ||
4354 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 4149 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
4355 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4150 | flags |= EXT4_FREE_BLOCKS_METADATA; |
@@ -4365,22 +4160,33 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4365 | if (try_to_extend_transaction(handle, inode)) { | 4160 | if (try_to_extend_transaction(handle, inode)) { |
4366 | if (bh) { | 4161 | if (bh) { |
4367 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4162 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4368 | ext4_handle_dirty_metadata(handle, inode, bh); | 4163 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4164 | if (unlikely(err)) | ||
4165 | goto out_err; | ||
4369 | } | 4166 | } |
4370 | ext4_mark_inode_dirty(handle, inode); | 4167 | err = ext4_mark_inode_dirty(handle, inode); |
4371 | ext4_truncate_restart_trans(handle, inode, | 4168 | if (unlikely(err)) |
4372 | blocks_for_truncate(inode)); | 4169 | goto out_err; |
4170 | err = ext4_truncate_restart_trans(handle, inode, | ||
4171 | blocks_for_truncate(inode)); | ||
4172 | if (unlikely(err)) | ||
4173 | goto out_err; | ||
4373 | if (bh) { | 4174 | if (bh) { |
4374 | BUFFER_TRACE(bh, "retaking write access"); | 4175 | BUFFER_TRACE(bh, "retaking write access"); |
4375 | ext4_journal_get_write_access(handle, bh); | 4176 | err = ext4_journal_get_write_access(handle, bh); |
4177 | if (unlikely(err)) | ||
4178 | goto out_err; | ||
4376 | } | 4179 | } |
4377 | } | 4180 | } |
4378 | 4181 | ||
4379 | for (p = first; p < last; p++) | 4182 | for (p = first; p < last; p++) |
4380 | *p = 0; | 4183 | *p = 0; |
4381 | 4184 | ||
4382 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4185 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); |
4383 | return 0; | 4186 | return 0; |
4187 | out_err: | ||
4188 | ext4_std_error(inode->i_sb, err); | ||
4189 | return err; | ||
4384 | } | 4190 | } |
4385 | 4191 | ||
4386 | /** | 4192 | /** |
@@ -4391,7 +4197,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4391 | * @first: array of block numbers | 4197 | * @first: array of block numbers |
4392 | * @last: points immediately past the end of array | 4198 | * @last: points immediately past the end of array |
4393 | * | 4199 | * |
4394 | * We are freeing all blocks refered from that array (numbers are stored as | 4200 | * We are freeing all blocks referred from that array (numbers are stored as |
4395 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | 4201 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. |
4396 | * | 4202 | * |
4397 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | 4203 | * We accumulate contiguous runs of blocks to free. Conveniently, if these |
@@ -4414,7 +4220,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4414 | ext4_fsblk_t nr; /* Current block # */ | 4220 | ext4_fsblk_t nr; /* Current block # */ |
4415 | __le32 *p; /* Pointer into inode/ind | 4221 | __le32 *p; /* Pointer into inode/ind |
4416 | for current block */ | 4222 | for current block */ |
4417 | int err; | 4223 | int err = 0; |
4418 | 4224 | ||
4419 | if (this_bh) { /* For indirect block */ | 4225 | if (this_bh) { /* For indirect block */ |
4420 | BUFFER_TRACE(this_bh, "get_write_access"); | 4226 | BUFFER_TRACE(this_bh, "get_write_access"); |
@@ -4436,9 +4242,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4436 | } else if (nr == block_to_free + count) { | 4242 | } else if (nr == block_to_free + count) { |
4437 | count++; | 4243 | count++; |
4438 | } else { | 4244 | } else { |
4439 | if (ext4_clear_blocks(handle, inode, this_bh, | 4245 | err = ext4_clear_blocks(handle, inode, this_bh, |
4440 | block_to_free, count, | 4246 | block_to_free, count, |
4441 | block_to_free_p, p)) | 4247 | block_to_free_p, p); |
4248 | if (err) | ||
4442 | break; | 4249 | break; |
4443 | block_to_free = nr; | 4250 | block_to_free = nr; |
4444 | block_to_free_p = p; | 4251 | block_to_free_p = p; |
@@ -4447,9 +4254,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4447 | } | 4254 | } |
4448 | } | 4255 | } |
4449 | 4256 | ||
4450 | if (count > 0) | 4257 | if (!err && count > 0) |
4451 | ext4_clear_blocks(handle, inode, this_bh, block_to_free, | 4258 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, |
4452 | count, block_to_free_p, p); | 4259 | count, block_to_free_p, p); |
4260 | if (err < 0) | ||
4261 | /* fatal error */ | ||
4262 | return; | ||
4453 | 4263 | ||
4454 | if (this_bh) { | 4264 | if (this_bh) { |
4455 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | 4265 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); |
@@ -4479,7 +4289,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4479 | * @last: pointer immediately past the end of array | 4289 | * @last: pointer immediately past the end of array |
4480 | * @depth: depth of the branches to free | 4290 | * @depth: depth of the branches to free |
4481 | * | 4291 | * |
4482 | * We are freeing all blocks refered from these branches (numbers are | 4292 | * We are freeing all blocks referred from these branches (numbers are |
4483 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 4293 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
4484 | * appropriately. | 4294 | * appropriately. |
4485 | */ | 4295 | */ |
@@ -4530,6 +4340,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4530 | (__le32 *) bh->b_data, | 4340 | (__le32 *) bh->b_data, |
4531 | (__le32 *) bh->b_data + addr_per_block, | 4341 | (__le32 *) bh->b_data + addr_per_block, |
4532 | depth); | 4342 | depth); |
4343 | brelse(bh); | ||
4533 | 4344 | ||
4534 | /* | 4345 | /* |
4535 | * Everything below this this pointer has been | 4346 | * Everything below this this pointer has been |
@@ -4566,7 +4377,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4566 | * transaction where the data blocks are | 4377 | * transaction where the data blocks are |
4567 | * actually freed. | 4378 | * actually freed. |
4568 | */ | 4379 | */ |
4569 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4380 | ext4_free_blocks(handle, inode, NULL, nr, 1, |
4570 | EXT4_FREE_BLOCKS_METADATA| | 4381 | EXT4_FREE_BLOCKS_METADATA| |
4571 | EXT4_FREE_BLOCKS_FORGET); | 4382 | EXT4_FREE_BLOCKS_FORGET); |
4572 | 4383 | ||
@@ -4596,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4596 | 4407 | ||
4597 | int ext4_can_truncate(struct inode *inode) | 4408 | int ext4_can_truncate(struct inode *inode) |
4598 | { | 4409 | { |
4599 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
4600 | return 0; | ||
4601 | if (S_ISREG(inode->i_mode)) | 4410 | if (S_ISREG(inode->i_mode)) |
4602 | return 1; | 4411 | return 1; |
4603 | if (S_ISDIR(inode->i_mode)) | 4412 | if (S_ISDIR(inode->i_mode)) |
@@ -4608,6 +4417,31 @@ int ext4_can_truncate(struct inode *inode) | |||
4608 | } | 4417 | } |
4609 | 4418 | ||
4610 | /* | 4419 | /* |
4420 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks | ||
4421 | * associated with the given offset and length | ||
4422 | * | ||
4423 | * @inode: File inode | ||
4424 | * @offset: The offset where the hole will begin | ||
4425 | * @len: The length of the hole | ||
4426 | * | ||
4427 | * Returns: 0 on sucess or negative on failure | ||
4428 | */ | ||
4429 | |||
4430 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | ||
4431 | { | ||
4432 | struct inode *inode = file->f_path.dentry->d_inode; | ||
4433 | if (!S_ISREG(inode->i_mode)) | ||
4434 | return -ENOTSUPP; | ||
4435 | |||
4436 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
4437 | /* TODO: Add support for non extent hole punching */ | ||
4438 | return -ENOTSUPP; | ||
4439 | } | ||
4440 | |||
4441 | return ext4_ext_punch_hole(file, offset, length); | ||
4442 | } | ||
4443 | |||
4444 | /* | ||
4611 | * ext4_truncate() | 4445 | * ext4_truncate() |
4612 | * | 4446 | * |
4613 | * We block out ext4_get_block() block instantiations across the entire | 4447 | * We block out ext4_get_block() block instantiations across the entire |
@@ -4646,10 +4480,12 @@ void ext4_truncate(struct inode *inode) | |||
4646 | Indirect chain[4]; | 4480 | Indirect chain[4]; |
4647 | Indirect *partial; | 4481 | Indirect *partial; |
4648 | __le32 nr = 0; | 4482 | __le32 nr = 0; |
4649 | int n; | 4483 | int n = 0; |
4650 | ext4_lblk_t last_block; | 4484 | ext4_lblk_t last_block, max_block; |
4651 | unsigned blocksize = inode->i_sb->s_blocksize; | 4485 | unsigned blocksize = inode->i_sb->s_blocksize; |
4652 | 4486 | ||
4487 | trace_ext4_truncate_enter(inode); | ||
4488 | |||
4653 | if (!ext4_can_truncate(inode)) | 4489 | if (!ext4_can_truncate(inode)) |
4654 | return; | 4490 | return; |
4655 | 4491 | ||
@@ -4660,6 +4496,7 @@ void ext4_truncate(struct inode *inode) | |||
4660 | 4496 | ||
4661 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4497 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4662 | ext4_ext_truncate(inode); | 4498 | ext4_ext_truncate(inode); |
4499 | trace_ext4_truncate_exit(inode); | ||
4663 | return; | 4500 | return; |
4664 | } | 4501 | } |
4665 | 4502 | ||
@@ -4669,14 +4506,18 @@ void ext4_truncate(struct inode *inode) | |||
4669 | 4506 | ||
4670 | last_block = (inode->i_size + blocksize-1) | 4507 | last_block = (inode->i_size + blocksize-1) |
4671 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 4508 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
4509 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | ||
4510 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
4672 | 4511 | ||
4673 | if (inode->i_size & (blocksize - 1)) | 4512 | if (inode->i_size & (blocksize - 1)) |
4674 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | 4513 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
4675 | goto out_stop; | 4514 | goto out_stop; |
4676 | 4515 | ||
4677 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 4516 | if (last_block != max_block) { |
4678 | if (n == 0) | 4517 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
4679 | goto out_stop; /* error */ | 4518 | if (n == 0) |
4519 | goto out_stop; /* error */ | ||
4520 | } | ||
4680 | 4521 | ||
4681 | /* | 4522 | /* |
4682 | * OK. This truncate is going to happen. We add the inode to the | 4523 | * OK. This truncate is going to happen. We add the inode to the |
@@ -4707,7 +4548,13 @@ void ext4_truncate(struct inode *inode) | |||
4707 | */ | 4548 | */ |
4708 | ei->i_disksize = inode->i_size; | 4549 | ei->i_disksize = inode->i_size; |
4709 | 4550 | ||
4710 | if (n == 1) { /* direct blocks */ | 4551 | if (last_block == max_block) { |
4552 | /* | ||
4553 | * It is unnecessary to free any data blocks if last_block is | ||
4554 | * equal to the indirect block limit. | ||
4555 | */ | ||
4556 | goto out_unlock; | ||
4557 | } else if (n == 1) { /* direct blocks */ | ||
4711 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 4558 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
4712 | i_data + EXT4_NDIR_BLOCKS); | 4559 | i_data + EXT4_NDIR_BLOCKS); |
4713 | goto do_indirects; | 4560 | goto do_indirects; |
@@ -4767,6 +4614,7 @@ do_indirects: | |||
4767 | ; | 4614 | ; |
4768 | } | 4615 | } |
4769 | 4616 | ||
4617 | out_unlock: | ||
4770 | up_write(&ei->i_data_sem); | 4618 | up_write(&ei->i_data_sem); |
4771 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4619 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4772 | ext4_mark_inode_dirty(handle, inode); | 4620 | ext4_mark_inode_dirty(handle, inode); |
@@ -4789,6 +4637,7 @@ out_stop: | |||
4789 | ext4_orphan_del(handle, inode); | 4637 | ext4_orphan_del(handle, inode); |
4790 | 4638 | ||
4791 | ext4_journal_stop(handle); | 4639 | ext4_journal_stop(handle); |
4640 | trace_ext4_truncate_exit(inode); | ||
4792 | } | 4641 | } |
4793 | 4642 | ||
4794 | /* | 4643 | /* |
@@ -4818,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4818 | /* | 4667 | /* |
4819 | * Figure out the offset within the block group inode table | 4668 | * Figure out the offset within the block group inode table |
4820 | */ | 4669 | */ |
4821 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | 4670 | inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; |
4822 | inode_offset = ((inode->i_ino - 1) % | 4671 | inode_offset = ((inode->i_ino - 1) % |
4823 | EXT4_INODES_PER_GROUP(sb)); | 4672 | EXT4_INODES_PER_GROUP(sb)); |
4824 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | 4673 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); |
@@ -4920,6 +4769,7 @@ make_io: | |||
4920 | * has in-inode xattrs, or we don't have this inode in memory. | 4769 | * has in-inode xattrs, or we don't have this inode in memory. |
4921 | * Read the block from disk. | 4770 | * Read the block from disk. |
4922 | */ | 4771 | */ |
4772 | trace_ext4_load_inode(inode); | ||
4923 | get_bh(bh); | 4773 | get_bh(bh); |
4924 | bh->b_end_io = end_buffer_read_sync; | 4774 | bh->b_end_io = end_buffer_read_sync; |
4925 | submit_bh(READ_META, bh); | 4775 | submit_bh(READ_META, bh); |
@@ -5025,7 +4875,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5025 | return inode; | 4875 | return inode; |
5026 | 4876 | ||
5027 | ei = EXT4_I(inode); | 4877 | ei = EXT4_I(inode); |
5028 | iloc.bh = 0; | 4878 | iloc.bh = NULL; |
5029 | 4879 | ||
5030 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4880 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
5031 | if (ret < 0) | 4881 | if (ret < 0) |
@@ -5040,7 +4890,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5040 | } | 4890 | } |
5041 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 4891 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
5042 | 4892 | ||
5043 | ei->i_state_flags = 0; | 4893 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
5044 | ei->i_dir_start_lookup = 0; | 4894 | ei->i_dir_start_lookup = 0; |
5045 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 4895 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
5046 | /* We now have enough fields to check if the inode was active or not. | 4896 | /* We now have enough fields to check if the inode was active or not. |
@@ -5299,7 +5149,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5299 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 5149 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
5300 | goto out_brelse; | 5150 | goto out_brelse; |
5301 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 5151 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
5302 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); | 5152 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); |
5303 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 5153 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
5304 | cpu_to_le32(EXT4_OS_HURD)) | 5154 | cpu_to_le32(EXT4_OS_HURD)) |
5305 | raw_inode->i_file_acl_high = | 5155 | raw_inode->i_file_acl_high = |
@@ -5464,6 +5314,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5464 | { | 5314 | { |
5465 | struct inode *inode = dentry->d_inode; | 5315 | struct inode *inode = dentry->d_inode; |
5466 | int error, rc = 0; | 5316 | int error, rc = 0; |
5317 | int orphan = 0; | ||
5467 | const unsigned int ia_valid = attr->ia_valid; | 5318 | const unsigned int ia_valid = attr->ia_valid; |
5468 | 5319 | ||
5469 | error = inode_change_ok(inode, attr); | 5320 | error = inode_change_ok(inode, attr); |
@@ -5510,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5510 | 5361 | ||
5511 | if (S_ISREG(inode->i_mode) && | 5362 | if (S_ISREG(inode->i_mode) && |
5512 | attr->ia_valid & ATTR_SIZE && | 5363 | attr->ia_valid & ATTR_SIZE && |
5513 | (attr->ia_size < inode->i_size || | 5364 | (attr->ia_size < inode->i_size)) { |
5514 | (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { | ||
5515 | handle_t *handle; | 5365 | handle_t *handle; |
5516 | 5366 | ||
5517 | handle = ext4_journal_start(inode, 3); | 5367 | handle = ext4_journal_start(inode, 3); |
@@ -5519,8 +5369,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5519 | error = PTR_ERR(handle); | 5369 | error = PTR_ERR(handle); |
5520 | goto err_out; | 5370 | goto err_out; |
5521 | } | 5371 | } |
5522 | 5372 | if (ext4_handle_valid(handle)) { | |
5523 | error = ext4_orphan_add(handle, inode); | 5373 | error = ext4_orphan_add(handle, inode); |
5374 | orphan = 1; | ||
5375 | } | ||
5524 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5376 | EXT4_I(inode)->i_disksize = attr->ia_size; |
5525 | rc = ext4_mark_inode_dirty(handle, inode); | 5377 | rc = ext4_mark_inode_dirty(handle, inode); |
5526 | if (!error) | 5378 | if (!error) |
@@ -5538,18 +5390,20 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5538 | goto err_out; | 5390 | goto err_out; |
5539 | } | 5391 | } |
5540 | ext4_orphan_del(handle, inode); | 5392 | ext4_orphan_del(handle, inode); |
5393 | orphan = 0; | ||
5541 | ext4_journal_stop(handle); | 5394 | ext4_journal_stop(handle); |
5542 | goto err_out; | 5395 | goto err_out; |
5543 | } | 5396 | } |
5544 | } | 5397 | } |
5545 | /* ext4_truncate will clear the flag */ | ||
5546 | if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) | ||
5547 | ext4_truncate(inode); | ||
5548 | } | 5398 | } |
5549 | 5399 | ||
5550 | if ((attr->ia_valid & ATTR_SIZE) && | 5400 | if (attr->ia_valid & ATTR_SIZE) { |
5551 | attr->ia_size != i_size_read(inode)) | 5401 | if (attr->ia_size != i_size_read(inode)) { |
5552 | rc = vmtruncate(inode, attr->ia_size); | 5402 | truncate_setsize(inode, attr->ia_size); |
5403 | ext4_truncate(inode); | ||
5404 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
5405 | ext4_truncate(inode); | ||
5406 | } | ||
5553 | 5407 | ||
5554 | if (!rc) { | 5408 | if (!rc) { |
5555 | setattr_copy(inode, attr); | 5409 | setattr_copy(inode, attr); |
@@ -5560,7 +5414,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5560 | * If the call to ext4_truncate failed to get a transaction handle at | 5414 | * If the call to ext4_truncate failed to get a transaction handle at |
5561 | * all, we need to clean up the in-core orphan list manually. | 5415 | * all, we need to clean up the in-core orphan list manually. |
5562 | */ | 5416 | */ |
5563 | if (inode->i_nlink) | 5417 | if (orphan && inode->i_nlink) |
5564 | ext4_orphan_del(NULL, inode); | 5418 | ext4_orphan_del(NULL, inode); |
5565 | 5419 | ||
5566 | if (!rc && (ia_valid & ATTR_MODE)) | 5420 | if (!rc && (ia_valid & ATTR_MODE)) |
@@ -5592,9 +5446,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
5592 | * will return the blocks that include the delayed allocation | 5446 | * will return the blocks that include the delayed allocation |
5593 | * blocks for this file. | 5447 | * blocks for this file. |
5594 | */ | 5448 | */ |
5595 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5596 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 5449 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; |
5597 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5598 | 5450 | ||
5599 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 5451 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; |
5600 | return 0; | 5452 | return 0; |
@@ -5608,13 +5460,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | |||
5608 | /* if nrblocks are contiguous */ | 5460 | /* if nrblocks are contiguous */ |
5609 | if (chunk) { | 5461 | if (chunk) { |
5610 | /* | 5462 | /* |
5611 | * With N contiguous data blocks, it need at most | 5463 | * With N contiguous data blocks, we need at most |
5612 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | 5464 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
5613 | * 2 dindirect blocks | 5465 | * 2 dindirect blocks, and 1 tindirect block |
5614 | * 1 tindirect block | ||
5615 | */ | 5466 | */ |
5616 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | 5467 | return DIV_ROUND_UP(nrblocks, |
5617 | return indirects + 3; | 5468 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
5618 | } | 5469 | } |
5619 | /* | 5470 | /* |
5620 | * if nrblocks are not contiguous, worse case, each block touch | 5471 | * if nrblocks are not contiguous, worse case, each block touch |
@@ -5643,7 +5494,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5643 | * | 5494 | * |
5644 | * Also account for superblock, inode, quota and xattr blocks | 5495 | * Also account for superblock, inode, quota and xattr blocks |
5645 | */ | 5496 | */ |
5646 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5497 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5647 | { | 5498 | { |
5648 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5499 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5649 | int gdpblocks; | 5500 | int gdpblocks; |
@@ -5688,7 +5539,7 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5688 | } | 5539 | } |
5689 | 5540 | ||
5690 | /* | 5541 | /* |
5691 | * Calulate the total number of credits to reserve to fit | 5542 | * Calculate the total number of credits to reserve to fit |
5692 | * the modification of a single pages into a single transaction, | 5543 | * the modification of a single pages into a single transaction, |
5693 | * which may include multiple chunks of block allocations. | 5544 | * which may include multiple chunks of block allocations. |
5694 | * | 5545 | * |
@@ -5831,6 +5682,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5831 | int err, ret; | 5682 | int err, ret; |
5832 | 5683 | ||
5833 | might_sleep(); | 5684 | might_sleep(); |
5685 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); | ||
5834 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5686 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5835 | if (ext4_handle_valid(handle) && | 5687 | if (ext4_handle_valid(handle) && |
5836 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5688 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
@@ -5881,7 +5733,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5881 | * so would cause a commit on atime updates, which we don't bother doing. | 5733 | * so would cause a commit on atime updates, which we don't bother doing. |
5882 | * We handle synchronous inodes at the highest possible level. | 5734 | * We handle synchronous inodes at the highest possible level. |
5883 | */ | 5735 | */ |
5884 | void ext4_dirty_inode(struct inode *inode) | 5736 | void ext4_dirty_inode(struct inode *inode, int flags) |
5885 | { | 5737 | { |
5886 | handle_t *handle; | 5738 | handle_t *handle; |
5887 | 5739 | ||
@@ -6009,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6009 | goto out_unlock; | 5861 | goto out_unlock; |
6010 | } | 5862 | } |
6011 | ret = 0; | 5863 | ret = 0; |
6012 | if (PageMappedToDisk(page)) | 5864 | |
6013 | goto out_unlock; | 5865 | lock_page(page); |
5866 | wait_on_page_writeback(page); | ||
5867 | if (PageMappedToDisk(page)) { | ||
5868 | up_read(&inode->i_alloc_sem); | ||
5869 | return VM_FAULT_LOCKED; | ||
5870 | } | ||
6014 | 5871 | ||
6015 | if (page->index == size >> PAGE_CACHE_SHIFT) | 5872 | if (page->index == size >> PAGE_CACHE_SHIFT) |
6016 | len = size & ~PAGE_CACHE_MASK; | 5873 | len = size & ~PAGE_CACHE_MASK; |
6017 | else | 5874 | else |
6018 | len = PAGE_CACHE_SIZE; | 5875 | len = PAGE_CACHE_SIZE; |
6019 | 5876 | ||
6020 | lock_page(page); | ||
6021 | /* | 5877 | /* |
6022 | * return if we have all the buffers mapped. This avoid | 5878 | * return if we have all the buffers mapped. This avoid |
6023 | * the need to call write_begin/write_end which does a | 5879 | * the need to call write_begin/write_end which does a |
@@ -6027,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6027 | if (page_has_buffers(page)) { | 5883 | if (page_has_buffers(page)) { |
6028 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5884 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
6029 | ext4_bh_unmapped)) { | 5885 | ext4_bh_unmapped)) { |
6030 | unlock_page(page); | 5886 | up_read(&inode->i_alloc_sem); |
6031 | goto out_unlock; | 5887 | return VM_FAULT_LOCKED; |
6032 | } | 5888 | } |
6033 | } | 5889 | } |
6034 | unlock_page(page); | 5890 | unlock_page(page); |
@@ -6048,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6048 | if (ret < 0) | 5904 | if (ret < 0) |
6049 | goto out_unlock; | 5905 | goto out_unlock; |
6050 | ret = 0; | 5906 | ret = 0; |
5907 | |||
5908 | /* | ||
5909 | * write_begin/end might have created a dirty page and someone | ||
5910 | * could wander in and start the IO. Make sure that hasn't | ||
5911 | * happened. | ||
5912 | */ | ||
5913 | lock_page(page); | ||
5914 | wait_on_page_writeback(page); | ||
5915 | up_read(&inode->i_alloc_sem); | ||
5916 | return VM_FAULT_LOCKED; | ||
6051 | out_unlock: | 5917 | out_unlock: |
6052 | if (ret) | 5918 | if (ret) |
6053 | ret = VM_FAULT_SIGBUS; | 5919 | ret = VM_FAULT_SIGBUS; |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bf5ae883b1bd..808c554e773f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
38 | unsigned int oldflags; | 38 | unsigned int oldflags; |
39 | unsigned int jflag; | 39 | unsigned int jflag; |
40 | 40 | ||
41 | if (!is_owner_or_cap(inode)) | 41 | if (!inode_owner_or_capable(inode)) |
42 | return -EACCES; | 42 | return -EACCES; |
43 | 43 | ||
44 | if (get_user(flags, (int __user *) arg)) | 44 | if (get_user(flags, (int __user *) arg)) |
@@ -146,7 +146,7 @@ flags_out: | |||
146 | __u32 generation; | 146 | __u32 generation; |
147 | int err; | 147 | int err; |
148 | 148 | ||
149 | if (!is_owner_or_cap(inode)) | 149 | if (!inode_owner_or_capable(inode)) |
150 | return -EPERM; | 150 | return -EPERM; |
151 | 151 | ||
152 | err = mnt_want_write(filp->f_path.mnt); | 152 | err = mnt_want_write(filp->f_path.mnt); |
@@ -298,7 +298,7 @@ mext_out: | |||
298 | case EXT4_IOC_MIGRATE: | 298 | case EXT4_IOC_MIGRATE: |
299 | { | 299 | { |
300 | int err; | 300 | int err; |
301 | if (!is_owner_or_cap(inode)) | 301 | if (!inode_owner_or_capable(inode)) |
302 | return -EACCES; | 302 | return -EACCES; |
303 | 303 | ||
304 | err = mnt_want_write(filp->f_path.mnt); | 304 | err = mnt_want_write(filp->f_path.mnt); |
@@ -320,7 +320,7 @@ mext_out: | |||
320 | case EXT4_IOC_ALLOC_DA_BLKS: | 320 | case EXT4_IOC_ALLOC_DA_BLKS: |
321 | { | 321 | { |
322 | int err; | 322 | int err; |
323 | if (!is_owner_or_cap(inode)) | 323 | if (!inode_owner_or_capable(inode)) |
324 | return -EACCES; | 324 | return -EACCES; |
325 | 325 | ||
326 | err = mnt_want_write(filp->f_path.mnt); | 326 | err = mnt_want_write(filp->f_path.mnt); |
@@ -331,6 +331,36 @@ mext_out: | |||
331 | return err; | 331 | return err; |
332 | } | 332 | } |
333 | 333 | ||
334 | case FITRIM: | ||
335 | { | ||
336 | struct super_block *sb = inode->i_sb; | ||
337 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
338 | struct fstrim_range range; | ||
339 | int ret = 0; | ||
340 | |||
341 | if (!capable(CAP_SYS_ADMIN)) | ||
342 | return -EPERM; | ||
343 | |||
344 | if (!blk_queue_discard(q)) | ||
345 | return -EOPNOTSUPP; | ||
346 | |||
347 | if (copy_from_user(&range, (struct fstrim_range *)arg, | ||
348 | sizeof(range))) | ||
349 | return -EFAULT; | ||
350 | |||
351 | range.minlen = max((unsigned int)range.minlen, | ||
352 | q->limits.discard_granularity); | ||
353 | ret = ext4_trim_fs(sb, &range); | ||
354 | if (ret < 0) | ||
355 | return ret; | ||
356 | |||
357 | if (copy_to_user((struct fstrim_range *)arg, &range, | ||
358 | sizeof(range))) | ||
359 | return -EFAULT; | ||
360 | |||
361 | return 0; | ||
362 | } | ||
363 | |||
334 | default: | 364 | default: |
335 | return -ENOTTY; | 365 | return -ENOTTY; |
336 | } | 366 | } |
@@ -397,6 +427,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
397 | return err; | 427 | return err; |
398 | } | 428 | } |
399 | case EXT4_IOC_MOVE_EXT: | 429 | case EXT4_IOC_MOVE_EXT: |
430 | case FITRIM: | ||
400 | break; | 431 | break; |
401 | default: | 432 | default: |
402 | return -ENOIOCTLCMD; | 433 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4b4ad4b7ce57..6ed859d56850 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -92,7 +92,7 @@ | |||
92 | * between CPUs. It is possible to get scheduled at this point. | 92 | * between CPUs. It is possible to get scheduled at this point. |
93 | * | 93 | * |
94 | * The locality group prealloc space is used looking at whether we have | 94 | * The locality group prealloc space is used looking at whether we have |
95 | * enough free space (pa_free) withing the prealloc space. | 95 | * enough free space (pa_free) within the prealloc space. |
96 | * | 96 | * |
97 | * If we can't allocate blocks via inode prealloc or/and locality group | 97 | * If we can't allocate blocks via inode prealloc or/and locality group |
98 | * prealloc then we look at the buddy cache. The buddy cache is represented | 98 | * prealloc then we look at the buddy cache. The buddy cache is represented |
@@ -338,6 +338,19 @@ | |||
338 | static struct kmem_cache *ext4_pspace_cachep; | 338 | static struct kmem_cache *ext4_pspace_cachep; |
339 | static struct kmem_cache *ext4_ac_cachep; | 339 | static struct kmem_cache *ext4_ac_cachep; |
340 | static struct kmem_cache *ext4_free_ext_cachep; | 340 | static struct kmem_cache *ext4_free_ext_cachep; |
341 | |||
342 | /* We create slab caches for groupinfo data structures based on the | ||
343 | * superblock block size. There will be one per mounted filesystem for | ||
344 | * each unique s_blocksize_bits */ | ||
345 | #define NR_GRPINFO_CACHES 8 | ||
346 | static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; | ||
347 | |||
348 | static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { | ||
349 | "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k", | ||
350 | "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k", | ||
351 | "ext4_groupinfo_64k", "ext4_groupinfo_128k" | ||
352 | }; | ||
353 | |||
341 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 354 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
342 | ext4_group_t group); | 355 | ext4_group_t group); |
343 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 356 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
@@ -419,9 +432,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
419 | } | 432 | } |
420 | 433 | ||
421 | /* at order 0 we see each particular block */ | 434 | /* at order 0 we see each particular block */ |
422 | *max = 1 << (e4b->bd_blkbits + 3); | 435 | if (order == 0) { |
423 | if (order == 0) | 436 | *max = 1 << (e4b->bd_blkbits + 3); |
424 | return EXT4_MB_BITMAP(e4b); | 437 | return EXT4_MB_BITMAP(e4b); |
438 | } | ||
425 | 439 | ||
426 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; | 440 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; |
427 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; | 441 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; |
@@ -603,7 +617,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
603 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); | 617 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); |
604 | 618 | ||
605 | grp = ext4_get_group_info(sb, e4b->bd_group); | 619 | grp = ext4_get_group_info(sb, e4b->bd_group); |
606 | buddy = mb_find_buddy(e4b, 0, &max); | ||
607 | list_for_each(cur, &grp->bb_prealloc_list) { | 620 | list_for_each(cur, &grp->bb_prealloc_list) { |
608 | ext4_group_t groupnr; | 621 | ext4_group_t groupnr; |
609 | struct ext4_prealloc_space *pa; | 622 | struct ext4_prealloc_space *pa; |
@@ -622,7 +635,12 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
622 | #define mb_check_buddy(e4b) | 635 | #define mb_check_buddy(e4b) |
623 | #endif | 636 | #endif |
624 | 637 | ||
625 | /* FIXME!! need more doc */ | 638 | /* |
639 | * Divide blocks started from @first with length @len into | ||
640 | * smaller chunks with power of 2 blocks. | ||
641 | * Clear the bits in bitmap which the blocks of the chunk(s) covered, | ||
642 | * then increase bb_counters[] for corresponded chunk size. | ||
643 | */ | ||
626 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 644 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
627 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, | 645 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
628 | struct ext4_group_info *grp) | 646 | struct ext4_group_info *grp) |
@@ -769,6 +787,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
769 | struct inode *inode; | 787 | struct inode *inode; |
770 | char *data; | 788 | char *data; |
771 | char *bitmap; | 789 | char *bitmap; |
790 | struct ext4_group_info *grinfo; | ||
772 | 791 | ||
773 | mb_debug(1, "init page %lu\n", page->index); | 792 | mb_debug(1, "init page %lu\n", page->index); |
774 | 793 | ||
@@ -801,6 +820,18 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
801 | if (first_group + i >= ngroups) | 820 | if (first_group + i >= ngroups) |
802 | break; | 821 | break; |
803 | 822 | ||
823 | grinfo = ext4_get_group_info(sb, first_group + i); | ||
824 | /* | ||
825 | * If page is uptodate then we came here after online resize | ||
826 | * which added some new uninitialized group info structs, so | ||
827 | * we must skip all initialized uptodate buddies on the page, | ||
828 | * which may be currently in use by an allocating task. | ||
829 | */ | ||
830 | if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) { | ||
831 | bh[i] = NULL; | ||
832 | continue; | ||
833 | } | ||
834 | |||
804 | err = -EIO; | 835 | err = -EIO; |
805 | desc = ext4_get_group_desc(sb, first_group + i, NULL); | 836 | desc = ext4_get_group_desc(sb, first_group + i, NULL); |
806 | if (desc == NULL) | 837 | if (desc == NULL) |
@@ -853,26 +884,28 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
853 | } | 884 | } |
854 | 885 | ||
855 | /* wait for I/O completion */ | 886 | /* wait for I/O completion */ |
856 | for (i = 0; i < groups_per_page && bh[i]; i++) | 887 | for (i = 0; i < groups_per_page; i++) |
857 | wait_on_buffer(bh[i]); | 888 | if (bh[i]) |
889 | wait_on_buffer(bh[i]); | ||
858 | 890 | ||
859 | err = -EIO; | 891 | err = -EIO; |
860 | for (i = 0; i < groups_per_page && bh[i]; i++) | 892 | for (i = 0; i < groups_per_page; i++) |
861 | if (!buffer_uptodate(bh[i])) | 893 | if (bh[i] && !buffer_uptodate(bh[i])) |
862 | goto out; | 894 | goto out; |
863 | 895 | ||
864 | err = 0; | 896 | err = 0; |
865 | first_block = page->index * blocks_per_page; | 897 | first_block = page->index * blocks_per_page; |
866 | /* init the page */ | ||
867 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | ||
868 | for (i = 0; i < blocks_per_page; i++) { | 898 | for (i = 0; i < blocks_per_page; i++) { |
869 | int group; | 899 | int group; |
870 | struct ext4_group_info *grinfo; | ||
871 | 900 | ||
872 | group = (first_block + i) >> 1; | 901 | group = (first_block + i) >> 1; |
873 | if (group >= ngroups) | 902 | if (group >= ngroups) |
874 | break; | 903 | break; |
875 | 904 | ||
905 | if (!bh[group - first_group]) | ||
906 | /* skip initialized uptodate buddy */ | ||
907 | continue; | ||
908 | |||
876 | /* | 909 | /* |
877 | * data carry information regarding this | 910 | * data carry information regarding this |
878 | * particular group in the format specified | 911 | * particular group in the format specified |
@@ -901,6 +934,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
901 | * incore got set to the group block bitmap below | 934 | * incore got set to the group block bitmap below |
902 | */ | 935 | */ |
903 | ext4_lock_group(sb, group); | 936 | ext4_lock_group(sb, group); |
937 | /* init the buddy */ | ||
938 | memset(data, 0xff, blocksize); | ||
904 | ext4_mb_generate_buddy(sb, data, incore, group); | 939 | ext4_mb_generate_buddy(sb, data, incore, group); |
905 | ext4_unlock_group(sb, group); | 940 | ext4_unlock_group(sb, group); |
906 | incore = NULL; | 941 | incore = NULL; |
@@ -930,7 +965,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
930 | 965 | ||
931 | out: | 966 | out: |
932 | if (bh) { | 967 | if (bh) { |
933 | for (i = 0; i < groups_per_page && bh[i]; i++) | 968 | for (i = 0; i < groups_per_page; i++) |
934 | brelse(bh[i]); | 969 | brelse(bh[i]); |
935 | if (bh != &bhs) | 970 | if (bh != &bhs) |
936 | kfree(bh); | 971 | kfree(bh); |
@@ -939,6 +974,67 @@ out: | |||
939 | } | 974 | } |
940 | 975 | ||
941 | /* | 976 | /* |
977 | * Lock the buddy and bitmap pages. This make sure other parallel init_group | ||
978 | * on the same buddy page doesn't happen whild holding the buddy page lock. | ||
979 | * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap | ||
980 | * are on the same page e4b->bd_buddy_page is NULL and return value is 0. | ||
981 | */ | ||
982 | static int ext4_mb_get_buddy_page_lock(struct super_block *sb, | ||
983 | ext4_group_t group, struct ext4_buddy *e4b) | ||
984 | { | ||
985 | struct inode *inode = EXT4_SB(sb)->s_buddy_cache; | ||
986 | int block, pnum, poff; | ||
987 | int blocks_per_page; | ||
988 | struct page *page; | ||
989 | |||
990 | e4b->bd_buddy_page = NULL; | ||
991 | e4b->bd_bitmap_page = NULL; | ||
992 | |||
993 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
994 | /* | ||
995 | * the buddy cache inode stores the block bitmap | ||
996 | * and buddy information in consecutive blocks. | ||
997 | * So for each group we need two blocks. | ||
998 | */ | ||
999 | block = group * 2; | ||
1000 | pnum = block / blocks_per_page; | ||
1001 | poff = block % blocks_per_page; | ||
1002 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1003 | if (!page) | ||
1004 | return -EIO; | ||
1005 | BUG_ON(page->mapping != inode->i_mapping); | ||
1006 | e4b->bd_bitmap_page = page; | ||
1007 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1008 | |||
1009 | if (blocks_per_page >= 2) { | ||
1010 | /* buddy and bitmap are on the same page */ | ||
1011 | return 0; | ||
1012 | } | ||
1013 | |||
1014 | block++; | ||
1015 | pnum = block / blocks_per_page; | ||
1016 | poff = block % blocks_per_page; | ||
1017 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1018 | if (!page) | ||
1019 | return -EIO; | ||
1020 | BUG_ON(page->mapping != inode->i_mapping); | ||
1021 | e4b->bd_buddy_page = page; | ||
1022 | return 0; | ||
1023 | } | ||
1024 | |||
1025 | static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) | ||
1026 | { | ||
1027 | if (e4b->bd_bitmap_page) { | ||
1028 | unlock_page(e4b->bd_bitmap_page); | ||
1029 | page_cache_release(e4b->bd_bitmap_page); | ||
1030 | } | ||
1031 | if (e4b->bd_buddy_page) { | ||
1032 | unlock_page(e4b->bd_buddy_page); | ||
1033 | page_cache_release(e4b->bd_buddy_page); | ||
1034 | } | ||
1035 | } | ||
1036 | |||
1037 | /* | ||
942 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | 1038 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the |
943 | * block group lock of all groups for this page; do not hold the BG lock when | 1039 | * block group lock of all groups for this page; do not hold the BG lock when |
944 | * calling this routine! | 1040 | * calling this routine! |
@@ -947,93 +1043,60 @@ static noinline_for_stack | |||
947 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 1043 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
948 | { | 1044 | { |
949 | 1045 | ||
950 | int ret = 0; | ||
951 | void *bitmap; | ||
952 | int blocks_per_page; | ||
953 | int block, pnum, poff; | ||
954 | int num_grp_locked = 0; | ||
955 | struct ext4_group_info *this_grp; | 1046 | struct ext4_group_info *this_grp; |
956 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1047 | struct ext4_buddy e4b; |
957 | struct inode *inode = sbi->s_buddy_cache; | 1048 | struct page *page; |
958 | struct page *page = NULL, *bitmap_page = NULL; | 1049 | int ret = 0; |
959 | 1050 | ||
960 | mb_debug(1, "init group %u\n", group); | 1051 | mb_debug(1, "init group %u\n", group); |
961 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
962 | this_grp = ext4_get_group_info(sb, group); | 1052 | this_grp = ext4_get_group_info(sb, group); |
963 | /* | 1053 | /* |
964 | * This ensures that we don't reinit the buddy cache | 1054 | * This ensures that we don't reinit the buddy cache |
965 | * page which map to the group from which we are already | 1055 | * page which map to the group from which we are already |
966 | * allocating. If we are looking at the buddy cache we would | 1056 | * allocating. If we are looking at the buddy cache we would |
967 | * have taken a reference using ext4_mb_load_buddy and that | 1057 | * have taken a reference using ext4_mb_load_buddy and that |
968 | * would have taken the alloc_sem lock. | 1058 | * would have pinned buddy page to page cache. |
969 | */ | 1059 | */ |
970 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | 1060 | ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); |
971 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | 1061 | if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { |
972 | /* | 1062 | /* |
973 | * somebody initialized the group | 1063 | * somebody initialized the group |
974 | * return without doing anything | 1064 | * return without doing anything |
975 | */ | 1065 | */ |
976 | ret = 0; | ||
977 | goto err; | 1066 | goto err; |
978 | } | 1067 | } |
979 | /* | 1068 | |
980 | * the buddy cache inode stores the block bitmap | 1069 | page = e4b.bd_bitmap_page; |
981 | * and buddy information in consecutive blocks. | 1070 | ret = ext4_mb_init_cache(page, NULL); |
982 | * So for each group we need two blocks. | 1071 | if (ret) |
983 | */ | 1072 | goto err; |
984 | block = group * 2; | 1073 | if (!PageUptodate(page)) { |
985 | pnum = block / blocks_per_page; | ||
986 | poff = block % blocks_per_page; | ||
987 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
988 | if (page) { | ||
989 | BUG_ON(page->mapping != inode->i_mapping); | ||
990 | ret = ext4_mb_init_cache(page, NULL); | ||
991 | if (ret) { | ||
992 | unlock_page(page); | ||
993 | goto err; | ||
994 | } | ||
995 | unlock_page(page); | ||
996 | } | ||
997 | if (page == NULL || !PageUptodate(page)) { | ||
998 | ret = -EIO; | 1074 | ret = -EIO; |
999 | goto err; | 1075 | goto err; |
1000 | } | 1076 | } |
1001 | mark_page_accessed(page); | 1077 | mark_page_accessed(page); |
1002 | bitmap_page = page; | ||
1003 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1004 | 1078 | ||
1005 | /* init buddy cache */ | 1079 | if (e4b.bd_buddy_page == NULL) { |
1006 | block++; | ||
1007 | pnum = block / blocks_per_page; | ||
1008 | poff = block % blocks_per_page; | ||
1009 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1010 | if (page == bitmap_page) { | ||
1011 | /* | 1080 | /* |
1012 | * If both the bitmap and buddy are in | 1081 | * If both the bitmap and buddy are in |
1013 | * the same page we don't need to force | 1082 | * the same page we don't need to force |
1014 | * init the buddy | 1083 | * init the buddy |
1015 | */ | 1084 | */ |
1016 | unlock_page(page); | 1085 | ret = 0; |
1017 | } else if (page) { | 1086 | goto err; |
1018 | BUG_ON(page->mapping != inode->i_mapping); | ||
1019 | ret = ext4_mb_init_cache(page, bitmap); | ||
1020 | if (ret) { | ||
1021 | unlock_page(page); | ||
1022 | goto err; | ||
1023 | } | ||
1024 | unlock_page(page); | ||
1025 | } | 1087 | } |
1026 | if (page == NULL || !PageUptodate(page)) { | 1088 | /* init buddy cache */ |
1089 | page = e4b.bd_buddy_page; | ||
1090 | ret = ext4_mb_init_cache(page, e4b.bd_bitmap); | ||
1091 | if (ret) | ||
1092 | goto err; | ||
1093 | if (!PageUptodate(page)) { | ||
1027 | ret = -EIO; | 1094 | ret = -EIO; |
1028 | goto err; | 1095 | goto err; |
1029 | } | 1096 | } |
1030 | mark_page_accessed(page); | 1097 | mark_page_accessed(page); |
1031 | err: | 1098 | err: |
1032 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | 1099 | ext4_mb_put_buddy_page_lock(&e4b); |
1033 | if (bitmap_page) | ||
1034 | page_cache_release(bitmap_page); | ||
1035 | if (page) | ||
1036 | page_cache_release(page); | ||
1037 | return ret; | 1100 | return ret; |
1038 | } | 1101 | } |
1039 | 1102 | ||
@@ -1067,24 +1130,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
1067 | e4b->bd_group = group; | 1130 | e4b->bd_group = group; |
1068 | e4b->bd_buddy_page = NULL; | 1131 | e4b->bd_buddy_page = NULL; |
1069 | e4b->bd_bitmap_page = NULL; | 1132 | e4b->bd_bitmap_page = NULL; |
1070 | e4b->alloc_semp = &grp->alloc_sem; | ||
1071 | |||
1072 | /* Take the read lock on the group alloc | ||
1073 | * sem. This would make sure a parallel | ||
1074 | * ext4_mb_init_group happening on other | ||
1075 | * groups mapped by the page is blocked | ||
1076 | * till we are done with allocation | ||
1077 | */ | ||
1078 | repeat_load_buddy: | ||
1079 | down_read(e4b->alloc_semp); | ||
1080 | 1133 | ||
1081 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | 1134 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
1082 | /* we need to check for group need init flag | ||
1083 | * with alloc_semp held so that we can be sure | ||
1084 | * that new blocks didn't get added to the group | ||
1085 | * when we are loading the buddy cache | ||
1086 | */ | ||
1087 | up_read(e4b->alloc_semp); | ||
1088 | /* | 1135 | /* |
1089 | * we need full data about the group | 1136 | * we need full data about the group |
1090 | * to make a good selection | 1137 | * to make a good selection |
@@ -1092,7 +1139,6 @@ repeat_load_buddy: | |||
1092 | ret = ext4_mb_init_group(sb, group); | 1139 | ret = ext4_mb_init_group(sb, group); |
1093 | if (ret) | 1140 | if (ret) |
1094 | return ret; | 1141 | return ret; |
1095 | goto repeat_load_buddy; | ||
1096 | } | 1142 | } |
1097 | 1143 | ||
1098 | /* | 1144 | /* |
@@ -1176,15 +1222,14 @@ repeat_load_buddy: | |||
1176 | return 0; | 1222 | return 0; |
1177 | 1223 | ||
1178 | err: | 1224 | err: |
1225 | if (page) | ||
1226 | page_cache_release(page); | ||
1179 | if (e4b->bd_bitmap_page) | 1227 | if (e4b->bd_bitmap_page) |
1180 | page_cache_release(e4b->bd_bitmap_page); | 1228 | page_cache_release(e4b->bd_bitmap_page); |
1181 | if (e4b->bd_buddy_page) | 1229 | if (e4b->bd_buddy_page) |
1182 | page_cache_release(e4b->bd_buddy_page); | 1230 | page_cache_release(e4b->bd_buddy_page); |
1183 | e4b->bd_buddy = NULL; | 1231 | e4b->bd_buddy = NULL; |
1184 | e4b->bd_bitmap = NULL; | 1232 | e4b->bd_bitmap = NULL; |
1185 | |||
1186 | /* Done with the buddy cache */ | ||
1187 | up_read(e4b->alloc_semp); | ||
1188 | return ret; | 1233 | return ret; |
1189 | } | 1234 | } |
1190 | 1235 | ||
@@ -1194,9 +1239,6 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) | |||
1194 | page_cache_release(e4b->bd_bitmap_page); | 1239 | page_cache_release(e4b->bd_bitmap_page); |
1195 | if (e4b->bd_buddy_page) | 1240 | if (e4b->bd_buddy_page) |
1196 | page_cache_release(e4b->bd_buddy_page); | 1241 | page_cache_release(e4b->bd_buddy_page); |
1197 | /* Done with the buddy cache */ | ||
1198 | if (e4b->alloc_semp) | ||
1199 | up_read(e4b->alloc_semp); | ||
1200 | } | 1242 | } |
1201 | 1243 | ||
1202 | 1244 | ||
@@ -1509,9 +1551,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
1509 | get_page(ac->ac_bitmap_page); | 1551 | get_page(ac->ac_bitmap_page); |
1510 | ac->ac_buddy_page = e4b->bd_buddy_page; | 1552 | ac->ac_buddy_page = e4b->bd_buddy_page; |
1511 | get_page(ac->ac_buddy_page); | 1553 | get_page(ac->ac_buddy_page); |
1512 | /* on allocation we use ac to track the held semaphore */ | ||
1513 | ac->alloc_semp = e4b->alloc_semp; | ||
1514 | e4b->alloc_semp = NULL; | ||
1515 | /* store last allocated for subsequent stream allocation */ | 1554 | /* store last allocated for subsequent stream allocation */ |
1516 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { | 1555 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1517 | spin_lock(&sbi->s_md_lock); | 1556 | spin_lock(&sbi->s_md_lock); |
@@ -1915,84 +1954,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1915 | return 0; | 1954 | return 0; |
1916 | } | 1955 | } |
1917 | 1956 | ||
1918 | /* | ||
1919 | * lock the group_info alloc_sem of all the groups | ||
1920 | * belonging to the same buddy cache page. This | ||
1921 | * make sure other parallel operation on the buddy | ||
1922 | * cache doesn't happen whild holding the buddy cache | ||
1923 | * lock | ||
1924 | */ | ||
1925 | int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | ||
1926 | { | ||
1927 | int i; | ||
1928 | int block, pnum; | ||
1929 | int blocks_per_page; | ||
1930 | int groups_per_page; | ||
1931 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1932 | ext4_group_t first_group; | ||
1933 | struct ext4_group_info *grp; | ||
1934 | |||
1935 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1936 | /* | ||
1937 | * the buddy cache inode stores the block bitmap | ||
1938 | * and buddy information in consecutive blocks. | ||
1939 | * So for each group we need two blocks. | ||
1940 | */ | ||
1941 | block = group * 2; | ||
1942 | pnum = block / blocks_per_page; | ||
1943 | first_group = pnum * blocks_per_page / 2; | ||
1944 | |||
1945 | groups_per_page = blocks_per_page >> 1; | ||
1946 | if (groups_per_page == 0) | ||
1947 | groups_per_page = 1; | ||
1948 | /* read all groups the page covers into the cache */ | ||
1949 | for (i = 0; i < groups_per_page; i++) { | ||
1950 | |||
1951 | if ((first_group + i) >= ngroups) | ||
1952 | break; | ||
1953 | grp = ext4_get_group_info(sb, first_group + i); | ||
1954 | /* take all groups write allocation | ||
1955 | * semaphore. This make sure there is | ||
1956 | * no block allocation going on in any | ||
1957 | * of that groups | ||
1958 | */ | ||
1959 | down_write_nested(&grp->alloc_sem, i); | ||
1960 | } | ||
1961 | return i; | ||
1962 | } | ||
1963 | |||
1964 | void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
1965 | ext4_group_t group, int locked_group) | ||
1966 | { | ||
1967 | int i; | ||
1968 | int block, pnum; | ||
1969 | int blocks_per_page; | ||
1970 | ext4_group_t first_group; | ||
1971 | struct ext4_group_info *grp; | ||
1972 | |||
1973 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1974 | /* | ||
1975 | * the buddy cache inode stores the block bitmap | ||
1976 | * and buddy information in consecutive blocks. | ||
1977 | * So for each group we need two blocks. | ||
1978 | */ | ||
1979 | block = group * 2; | ||
1980 | pnum = block / blocks_per_page; | ||
1981 | first_group = pnum * blocks_per_page / 2; | ||
1982 | /* release locks on all the groups */ | ||
1983 | for (i = 0; i < locked_group; i++) { | ||
1984 | |||
1985 | grp = ext4_get_group_info(sb, first_group + i); | ||
1986 | /* take all groups write allocation | ||
1987 | * semaphore. This make sure there is | ||
1988 | * no block allocation going on in any | ||
1989 | * of that groups | ||
1990 | */ | ||
1991 | up_write(&grp->alloc_sem); | ||
1992 | } | ||
1993 | |||
1994 | } | ||
1995 | |||
1996 | static noinline_for_stack int | 1957 | static noinline_for_stack int |
1997 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1958 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1998 | { | 1959 | { |
@@ -2233,15 +2194,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = { | |||
2233 | .release = seq_release, | 2194 | .release = seq_release, |
2234 | }; | 2195 | }; |
2235 | 2196 | ||
2197 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) | ||
2198 | { | ||
2199 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2200 | struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index]; | ||
2201 | |||
2202 | BUG_ON(!cachep); | ||
2203 | return cachep; | ||
2204 | } | ||
2236 | 2205 | ||
2237 | /* Create and initialize ext4_group_info data for the given group. */ | 2206 | /* Create and initialize ext4_group_info data for the given group. */ |
2238 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2207 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2239 | struct ext4_group_desc *desc) | 2208 | struct ext4_group_desc *desc) |
2240 | { | 2209 | { |
2241 | int i, len; | 2210 | int i; |
2242 | int metalen = 0; | 2211 | int metalen = 0; |
2243 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2212 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2244 | struct ext4_group_info **meta_group_info; | 2213 | struct ext4_group_info **meta_group_info; |
2214 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2245 | 2215 | ||
2246 | /* | 2216 | /* |
2247 | * First check if this group is the first of a reserved block. | 2217 | * First check if this group is the first of a reserved block. |
@@ -2261,22 +2231,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2261 | meta_group_info; | 2231 | meta_group_info; |
2262 | } | 2232 | } |
2263 | 2233 | ||
2264 | /* | ||
2265 | * calculate needed size. if change bb_counters size, | ||
2266 | * don't forget about ext4_mb_generate_buddy() | ||
2267 | */ | ||
2268 | len = offsetof(typeof(**meta_group_info), | ||
2269 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2270 | |||
2271 | meta_group_info = | 2234 | meta_group_info = |
2272 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2235 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2273 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2236 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2274 | 2237 | ||
2275 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | 2238 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2276 | if (meta_group_info[i] == NULL) { | 2239 | if (meta_group_info[i] == NULL) { |
2277 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | 2240 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); |
2278 | goto exit_group_info; | 2241 | goto exit_group_info; |
2279 | } | 2242 | } |
2243 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | ||
2280 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2244 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, |
2281 | &(meta_group_info[i]->bb_state)); | 2245 | &(meta_group_info[i]->bb_state)); |
2282 | 2246 | ||
@@ -2331,6 +2295,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2331 | int num_meta_group_infos_max; | 2295 | int num_meta_group_infos_max; |
2332 | int array_size; | 2296 | int array_size; |
2333 | struct ext4_group_desc *desc; | 2297 | struct ext4_group_desc *desc; |
2298 | struct kmem_cache *cachep; | ||
2334 | 2299 | ||
2335 | /* This is the number of blocks used by GDT */ | 2300 | /* This is the number of blocks used by GDT */ |
2336 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - | 2301 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
@@ -2363,7 +2328,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2363 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2328 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
2364 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2329 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
2365 | * So a two level scheme suffices for now. */ | 2330 | * So a two level scheme suffices for now. */ |
2366 | sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); | 2331 | sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); |
2367 | if (sbi->s_group_info == NULL) { | 2332 | if (sbi->s_group_info == NULL) { |
2368 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2333 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); |
2369 | return -ENOMEM; | 2334 | return -ENOMEM; |
@@ -2373,6 +2338,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2373 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); | 2338 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); |
2374 | goto err_freesgi; | 2339 | goto err_freesgi; |
2375 | } | 2340 | } |
2341 | sbi->s_buddy_cache->i_ino = get_next_ino(); | ||
2376 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2342 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2377 | for (i = 0; i < ngroups; i++) { | 2343 | for (i = 0; i < ngroups; i++) { |
2378 | desc = ext4_get_group_desc(sb, i, NULL); | 2344 | desc = ext4_get_group_desc(sb, i, NULL); |
@@ -2388,8 +2354,9 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2388 | return 0; | 2354 | return 0; |
2389 | 2355 | ||
2390 | err_freebuddy: | 2356 | err_freebuddy: |
2357 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2391 | while (i-- > 0) | 2358 | while (i-- > 0) |
2392 | kfree(ext4_get_group_info(sb, i)); | 2359 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
2393 | i = num_meta_group_infos; | 2360 | i = num_meta_group_infos; |
2394 | while (i-- > 0) | 2361 | while (i-- > 0) |
2395 | kfree(sbi->s_group_info[i]); | 2362 | kfree(sbi->s_group_info[i]); |
@@ -2399,6 +2366,55 @@ err_freesgi: | |||
2399 | return -ENOMEM; | 2366 | return -ENOMEM; |
2400 | } | 2367 | } |
2401 | 2368 | ||
2369 | static void ext4_groupinfo_destroy_slabs(void) | ||
2370 | { | ||
2371 | int i; | ||
2372 | |||
2373 | for (i = 0; i < NR_GRPINFO_CACHES; i++) { | ||
2374 | if (ext4_groupinfo_caches[i]) | ||
2375 | kmem_cache_destroy(ext4_groupinfo_caches[i]); | ||
2376 | ext4_groupinfo_caches[i] = NULL; | ||
2377 | } | ||
2378 | } | ||
2379 | |||
2380 | static int ext4_groupinfo_create_slab(size_t size) | ||
2381 | { | ||
2382 | static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex); | ||
2383 | int slab_size; | ||
2384 | int blocksize_bits = order_base_2(size); | ||
2385 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2386 | struct kmem_cache *cachep; | ||
2387 | |||
2388 | if (cache_index >= NR_GRPINFO_CACHES) | ||
2389 | return -EINVAL; | ||
2390 | |||
2391 | if (unlikely(cache_index < 0)) | ||
2392 | cache_index = 0; | ||
2393 | |||
2394 | mutex_lock(&ext4_grpinfo_slab_create_mutex); | ||
2395 | if (ext4_groupinfo_caches[cache_index]) { | ||
2396 | mutex_unlock(&ext4_grpinfo_slab_create_mutex); | ||
2397 | return 0; /* Already created */ | ||
2398 | } | ||
2399 | |||
2400 | slab_size = offsetof(struct ext4_group_info, | ||
2401 | bb_counters[blocksize_bits + 2]); | ||
2402 | |||
2403 | cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index], | ||
2404 | slab_size, 0, SLAB_RECLAIM_ACCOUNT, | ||
2405 | NULL); | ||
2406 | |||
2407 | mutex_unlock(&ext4_grpinfo_slab_create_mutex); | ||
2408 | if (!cachep) { | ||
2409 | printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); | ||
2410 | return -ENOMEM; | ||
2411 | } | ||
2412 | |||
2413 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2414 | |||
2415 | return 0; | ||
2416 | } | ||
2417 | |||
2402 | int ext4_mb_init(struct super_block *sb, int needs_recovery) | 2418 | int ext4_mb_init(struct super_block *sb, int needs_recovery) |
2403 | { | 2419 | { |
2404 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2420 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -2411,16 +2427,21 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2411 | 2427 | ||
2412 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2428 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2413 | if (sbi->s_mb_offsets == NULL) { | 2429 | if (sbi->s_mb_offsets == NULL) { |
2414 | return -ENOMEM; | 2430 | ret = -ENOMEM; |
2431 | goto out; | ||
2415 | } | 2432 | } |
2416 | 2433 | ||
2417 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); | 2434 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2418 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2435 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2419 | if (sbi->s_mb_maxs == NULL) { | 2436 | if (sbi->s_mb_maxs == NULL) { |
2420 | kfree(sbi->s_mb_offsets); | 2437 | ret = -ENOMEM; |
2421 | return -ENOMEM; | 2438 | goto out; |
2422 | } | 2439 | } |
2423 | 2440 | ||
2441 | ret = ext4_groupinfo_create_slab(sb->s_blocksize); | ||
2442 | if (ret < 0) | ||
2443 | goto out; | ||
2444 | |||
2424 | /* order 0 is regular bitmap */ | 2445 | /* order 0 is regular bitmap */ |
2425 | sbi->s_mb_maxs[0] = sb->s_blocksize << 3; | 2446 | sbi->s_mb_maxs[0] = sb->s_blocksize << 3; |
2426 | sbi->s_mb_offsets[0] = 0; | 2447 | sbi->s_mb_offsets[0] = 0; |
@@ -2439,9 +2460,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2439 | /* init file for buddy data */ | 2460 | /* init file for buddy data */ |
2440 | ret = ext4_mb_init_backend(sb); | 2461 | ret = ext4_mb_init_backend(sb); |
2441 | if (ret != 0) { | 2462 | if (ret != 0) { |
2442 | kfree(sbi->s_mb_offsets); | 2463 | goto out; |
2443 | kfree(sbi->s_mb_maxs); | ||
2444 | return ret; | ||
2445 | } | 2464 | } |
2446 | 2465 | ||
2447 | spin_lock_init(&sbi->s_md_lock); | 2466 | spin_lock_init(&sbi->s_md_lock); |
@@ -2456,9 +2475,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2456 | 2475 | ||
2457 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2476 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2458 | if (sbi->s_locality_groups == NULL) { | 2477 | if (sbi->s_locality_groups == NULL) { |
2459 | kfree(sbi->s_mb_offsets); | 2478 | ret = -ENOMEM; |
2460 | kfree(sbi->s_mb_maxs); | 2479 | goto out; |
2461 | return -ENOMEM; | ||
2462 | } | 2480 | } |
2463 | for_each_possible_cpu(i) { | 2481 | for_each_possible_cpu(i) { |
2464 | struct ext4_locality_group *lg; | 2482 | struct ext4_locality_group *lg; |
@@ -2475,7 +2493,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2475 | 2493 | ||
2476 | if (sbi->s_journal) | 2494 | if (sbi->s_journal) |
2477 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2495 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2478 | return 0; | 2496 | out: |
2497 | if (ret) { | ||
2498 | kfree(sbi->s_mb_offsets); | ||
2499 | kfree(sbi->s_mb_maxs); | ||
2500 | } | ||
2501 | return ret; | ||
2479 | } | 2502 | } |
2480 | 2503 | ||
2481 | /* need to called with the ext4 group lock held */ | 2504 | /* need to called with the ext4 group lock held */ |
@@ -2503,6 +2526,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2503 | int num_meta_group_infos; | 2526 | int num_meta_group_infos; |
2504 | struct ext4_group_info *grinfo; | 2527 | struct ext4_group_info *grinfo; |
2505 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2528 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2529 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2506 | 2530 | ||
2507 | if (sbi->s_group_info) { | 2531 | if (sbi->s_group_info) { |
2508 | for (i = 0; i < ngroups; i++) { | 2532 | for (i = 0; i < ngroups; i++) { |
@@ -2513,7 +2537,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2513 | ext4_lock_group(sb, i); | 2537 | ext4_lock_group(sb, i); |
2514 | ext4_mb_cleanup_pa(grinfo); | 2538 | ext4_mb_cleanup_pa(grinfo); |
2515 | ext4_unlock_group(sb, i); | 2539 | ext4_unlock_group(sb, i); |
2516 | kfree(grinfo); | 2540 | kmem_cache_free(cachep, grinfo); |
2517 | } | 2541 | } |
2518 | num_meta_group_infos = (ngroups + | 2542 | num_meta_group_infos = (ngroups + |
2519 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2543 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
@@ -2557,20 +2581,15 @@ int ext4_mb_release(struct super_block *sb) | |||
2557 | return 0; | 2581 | return 0; |
2558 | } | 2582 | } |
2559 | 2583 | ||
2560 | static inline void ext4_issue_discard(struct super_block *sb, | 2584 | static inline int ext4_issue_discard(struct super_block *sb, |
2561 | ext4_group_t block_group, ext4_grpblk_t block, int count) | 2585 | ext4_group_t block_group, ext4_grpblk_t block, int count) |
2562 | { | 2586 | { |
2563 | int ret; | ||
2564 | ext4_fsblk_t discard_block; | 2587 | ext4_fsblk_t discard_block; |
2565 | 2588 | ||
2566 | discard_block = block + ext4_group_first_block_no(sb, block_group); | 2589 | discard_block = block + ext4_group_first_block_no(sb, block_group); |
2567 | trace_ext4_discard_blocks(sb, | 2590 | trace_ext4_discard_blocks(sb, |
2568 | (unsigned long long) discard_block, count); | 2591 | (unsigned long long) discard_block, count); |
2569 | ret = sb_issue_discard(sb, discard_block, count); | 2592 | return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
2570 | if (ret == EOPNOTSUPP) { | ||
2571 | ext4_warning(sb, "discard not supported, disabling"); | ||
2572 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | ||
2573 | } | ||
2574 | } | 2593 | } |
2575 | 2594 | ||
2576 | /* | 2595 | /* |
@@ -2594,7 +2613,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2594 | 2613 | ||
2595 | if (test_opt(sb, DISCARD)) | 2614 | if (test_opt(sb, DISCARD)) |
2596 | ext4_issue_discard(sb, entry->group, | 2615 | ext4_issue_discard(sb, entry->group, |
2597 | entry->start_blk, entry->count); | 2616 | entry->start_blk, entry->count); |
2598 | 2617 | ||
2599 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2618 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2600 | /* we expect to find existing buddy because it's pinned */ | 2619 | /* we expect to find existing buddy because it's pinned */ |
@@ -2658,28 +2677,22 @@ static void ext4_remove_debugfs_entry(void) | |||
2658 | 2677 | ||
2659 | #endif | 2678 | #endif |
2660 | 2679 | ||
2661 | int __init init_ext4_mballoc(void) | 2680 | int __init ext4_init_mballoc(void) |
2662 | { | 2681 | { |
2663 | ext4_pspace_cachep = | 2682 | ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, |
2664 | kmem_cache_create("ext4_prealloc_space", | 2683 | SLAB_RECLAIM_ACCOUNT); |
2665 | sizeof(struct ext4_prealloc_space), | ||
2666 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2667 | if (ext4_pspace_cachep == NULL) | 2684 | if (ext4_pspace_cachep == NULL) |
2668 | return -ENOMEM; | 2685 | return -ENOMEM; |
2669 | 2686 | ||
2670 | ext4_ac_cachep = | 2687 | ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context, |
2671 | kmem_cache_create("ext4_alloc_context", | 2688 | SLAB_RECLAIM_ACCOUNT); |
2672 | sizeof(struct ext4_allocation_context), | ||
2673 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2674 | if (ext4_ac_cachep == NULL) { | 2689 | if (ext4_ac_cachep == NULL) { |
2675 | kmem_cache_destroy(ext4_pspace_cachep); | 2690 | kmem_cache_destroy(ext4_pspace_cachep); |
2676 | return -ENOMEM; | 2691 | return -ENOMEM; |
2677 | } | 2692 | } |
2678 | 2693 | ||
2679 | ext4_free_ext_cachep = | 2694 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, |
2680 | kmem_cache_create("ext4_free_block_extents", | 2695 | SLAB_RECLAIM_ACCOUNT); |
2681 | sizeof(struct ext4_free_data), | ||
2682 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2683 | if (ext4_free_ext_cachep == NULL) { | 2696 | if (ext4_free_ext_cachep == NULL) { |
2684 | kmem_cache_destroy(ext4_pspace_cachep); | 2697 | kmem_cache_destroy(ext4_pspace_cachep); |
2685 | kmem_cache_destroy(ext4_ac_cachep); | 2698 | kmem_cache_destroy(ext4_ac_cachep); |
@@ -2689,7 +2702,7 @@ int __init init_ext4_mballoc(void) | |||
2689 | return 0; | 2702 | return 0; |
2690 | } | 2703 | } |
2691 | 2704 | ||
2692 | void exit_ext4_mballoc(void) | 2705 | void ext4_exit_mballoc(void) |
2693 | { | 2706 | { |
2694 | /* | 2707 | /* |
2695 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2708 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
@@ -2699,6 +2712,7 @@ void exit_ext4_mballoc(void) | |||
2699 | kmem_cache_destroy(ext4_pspace_cachep); | 2712 | kmem_cache_destroy(ext4_pspace_cachep); |
2700 | kmem_cache_destroy(ext4_ac_cachep); | 2713 | kmem_cache_destroy(ext4_ac_cachep); |
2701 | kmem_cache_destroy(ext4_free_ext_cachep); | 2714 | kmem_cache_destroy(ext4_free_ext_cachep); |
2715 | ext4_groupinfo_destroy_slabs(); | ||
2702 | ext4_remove_debugfs_entry(); | 2716 | ext4_remove_debugfs_entry(); |
2703 | } | 2717 | } |
2704 | 2718 | ||
@@ -3135,7 +3149,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | |||
3135 | cur_distance = abs(goal_block - cpa->pa_pstart); | 3149 | cur_distance = abs(goal_block - cpa->pa_pstart); |
3136 | new_distance = abs(goal_block - pa->pa_pstart); | 3150 | new_distance = abs(goal_block - pa->pa_pstart); |
3137 | 3151 | ||
3138 | if (cur_distance < new_distance) | 3152 | if (cur_distance <= new_distance) |
3139 | return cpa; | 3153 | return cpa; |
3140 | 3154 | ||
3141 | /* drop the previous reference */ | 3155 | /* drop the previous reference */ |
@@ -3535,8 +3549,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) | |||
3535 | */ | 3549 | */ |
3536 | static noinline_for_stack int | 3550 | static noinline_for_stack int |
3537 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | 3551 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, |
3538 | struct ext4_prealloc_space *pa, | 3552 | struct ext4_prealloc_space *pa) |
3539 | struct ext4_allocation_context *ac) | ||
3540 | { | 3553 | { |
3541 | struct super_block *sb = e4b->bd_sb; | 3554 | struct super_block *sb = e4b->bd_sb; |
3542 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3555 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -3554,11 +3567,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3554 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3567 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3555 | end = bit + pa->pa_len; | 3568 | end = bit + pa->pa_len; |
3556 | 3569 | ||
3557 | if (ac) { | ||
3558 | ac->ac_sb = sb; | ||
3559 | ac->ac_inode = pa->pa_inode; | ||
3560 | } | ||
3561 | |||
3562 | while (bit < end) { | 3570 | while (bit < end) { |
3563 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); | 3571 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); |
3564 | if (bit >= end) | 3572 | if (bit >= end) |
@@ -3569,15 +3577,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3569 | (unsigned) next - bit, (unsigned) group); | 3577 | (unsigned) next - bit, (unsigned) group); |
3570 | free += next - bit; | 3578 | free += next - bit; |
3571 | 3579 | ||
3572 | if (ac) { | 3580 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3573 | ac->ac_b_ex.fe_group = group; | 3581 | trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, |
3574 | ac->ac_b_ex.fe_start = bit; | ||
3575 | ac->ac_b_ex.fe_len = next - bit; | ||
3576 | ac->ac_b_ex.fe_logical = 0; | ||
3577 | trace_ext4_mballoc_discard(ac); | ||
3578 | } | ||
3579 | |||
3580 | trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit, | ||
3581 | next - bit); | 3582 | next - bit); |
3582 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3583 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3583 | bit = next + 1; | 3584 | bit = next + 1; |
@@ -3601,29 +3602,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3601 | 3602 | ||
3602 | static noinline_for_stack int | 3603 | static noinline_for_stack int |
3603 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, | 3604 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, |
3604 | struct ext4_prealloc_space *pa, | 3605 | struct ext4_prealloc_space *pa) |
3605 | struct ext4_allocation_context *ac) | ||
3606 | { | 3606 | { |
3607 | struct super_block *sb = e4b->bd_sb; | 3607 | struct super_block *sb = e4b->bd_sb; |
3608 | ext4_group_t group; | 3608 | ext4_group_t group; |
3609 | ext4_grpblk_t bit; | 3609 | ext4_grpblk_t bit; |
3610 | 3610 | ||
3611 | trace_ext4_mb_release_group_pa(sb, ac, pa); | 3611 | trace_ext4_mb_release_group_pa(pa); |
3612 | BUG_ON(pa->pa_deleted == 0); | 3612 | BUG_ON(pa->pa_deleted == 0); |
3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3615 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); | 3615 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
3616 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); | 3616 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
3617 | 3617 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); | |
3618 | if (ac) { | ||
3619 | ac->ac_sb = sb; | ||
3620 | ac->ac_inode = NULL; | ||
3621 | ac->ac_b_ex.fe_group = group; | ||
3622 | ac->ac_b_ex.fe_start = bit; | ||
3623 | ac->ac_b_ex.fe_len = pa->pa_len; | ||
3624 | ac->ac_b_ex.fe_logical = 0; | ||
3625 | trace_ext4_mballoc_discard(ac); | ||
3626 | } | ||
3627 | 3618 | ||
3628 | return 0; | 3619 | return 0; |
3629 | } | 3620 | } |
@@ -3644,7 +3635,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3644 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 3635 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
3645 | struct buffer_head *bitmap_bh = NULL; | 3636 | struct buffer_head *bitmap_bh = NULL; |
3646 | struct ext4_prealloc_space *pa, *tmp; | 3637 | struct ext4_prealloc_space *pa, *tmp; |
3647 | struct ext4_allocation_context *ac; | ||
3648 | struct list_head list; | 3638 | struct list_head list; |
3649 | struct ext4_buddy e4b; | 3639 | struct ext4_buddy e4b; |
3650 | int err; | 3640 | int err; |
@@ -3673,9 +3663,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3673 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | 3663 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; |
3674 | 3664 | ||
3675 | INIT_LIST_HEAD(&list); | 3665 | INIT_LIST_HEAD(&list); |
3676 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3677 | if (ac) | ||
3678 | ac->ac_sb = sb; | ||
3679 | repeat: | 3666 | repeat: |
3680 | ext4_lock_group(sb, group); | 3667 | ext4_lock_group(sb, group); |
3681 | list_for_each_entry_safe(pa, tmp, | 3668 | list_for_each_entry_safe(pa, tmp, |
@@ -3730,9 +3717,9 @@ repeat: | |||
3730 | spin_unlock(pa->pa_obj_lock); | 3717 | spin_unlock(pa->pa_obj_lock); |
3731 | 3718 | ||
3732 | if (pa->pa_type == MB_GROUP_PA) | 3719 | if (pa->pa_type == MB_GROUP_PA) |
3733 | ext4_mb_release_group_pa(&e4b, pa, ac); | 3720 | ext4_mb_release_group_pa(&e4b, pa); |
3734 | else | 3721 | else |
3735 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3722 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3736 | 3723 | ||
3737 | list_del(&pa->u.pa_tmp_list); | 3724 | list_del(&pa->u.pa_tmp_list); |
3738 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3725 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
@@ -3740,8 +3727,6 @@ repeat: | |||
3740 | 3727 | ||
3741 | out: | 3728 | out: |
3742 | ext4_unlock_group(sb, group); | 3729 | ext4_unlock_group(sb, group); |
3743 | if (ac) | ||
3744 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3745 | ext4_mb_unload_buddy(&e4b); | 3730 | ext4_mb_unload_buddy(&e4b); |
3746 | put_bh(bitmap_bh); | 3731 | put_bh(bitmap_bh); |
3747 | return free; | 3732 | return free; |
@@ -3762,7 +3747,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3762 | struct super_block *sb = inode->i_sb; | 3747 | struct super_block *sb = inode->i_sb; |
3763 | struct buffer_head *bitmap_bh = NULL; | 3748 | struct buffer_head *bitmap_bh = NULL; |
3764 | struct ext4_prealloc_space *pa, *tmp; | 3749 | struct ext4_prealloc_space *pa, *tmp; |
3765 | struct ext4_allocation_context *ac; | ||
3766 | ext4_group_t group = 0; | 3750 | ext4_group_t group = 0; |
3767 | struct list_head list; | 3751 | struct list_head list; |
3768 | struct ext4_buddy e4b; | 3752 | struct ext4_buddy e4b; |
@@ -3778,11 +3762,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3778 | 3762 | ||
3779 | INIT_LIST_HEAD(&list); | 3763 | INIT_LIST_HEAD(&list); |
3780 | 3764 | ||
3781 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3782 | if (ac) { | ||
3783 | ac->ac_sb = sb; | ||
3784 | ac->ac_inode = inode; | ||
3785 | } | ||
3786 | repeat: | 3765 | repeat: |
3787 | /* first, collect all pa's in the inode */ | 3766 | /* first, collect all pa's in the inode */ |
3788 | spin_lock(&ei->i_prealloc_lock); | 3767 | spin_lock(&ei->i_prealloc_lock); |
@@ -3852,7 +3831,7 @@ repeat: | |||
3852 | 3831 | ||
3853 | ext4_lock_group(sb, group); | 3832 | ext4_lock_group(sb, group); |
3854 | list_del(&pa->pa_group_list); | 3833 | list_del(&pa->pa_group_list); |
3855 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3834 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3856 | ext4_unlock_group(sb, group); | 3835 | ext4_unlock_group(sb, group); |
3857 | 3836 | ||
3858 | ext4_mb_unload_buddy(&e4b); | 3837 | ext4_mb_unload_buddy(&e4b); |
@@ -3861,30 +3840,16 @@ repeat: | |||
3861 | list_del(&pa->u.pa_tmp_list); | 3840 | list_del(&pa->u.pa_tmp_list); |
3862 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3841 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
3863 | } | 3842 | } |
3864 | if (ac) | ||
3865 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3866 | } | 3843 | } |
3867 | 3844 | ||
3868 | /* | ||
3869 | * finds all preallocated spaces and return blocks being freed to them | ||
3870 | * if preallocated space becomes full (no block is used from the space) | ||
3871 | * then the function frees space in buddy | ||
3872 | * XXX: at the moment, truncate (which is the only way to free blocks) | ||
3873 | * discards all preallocations | ||
3874 | */ | ||
3875 | static void ext4_mb_return_to_preallocation(struct inode *inode, | ||
3876 | struct ext4_buddy *e4b, | ||
3877 | sector_t block, int count) | ||
3878 | { | ||
3879 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); | ||
3880 | } | ||
3881 | #ifdef CONFIG_EXT4_DEBUG | 3845 | #ifdef CONFIG_EXT4_DEBUG |
3882 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 3846 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
3883 | { | 3847 | { |
3884 | struct super_block *sb = ac->ac_sb; | 3848 | struct super_block *sb = ac->ac_sb; |
3885 | ext4_group_t ngroups, i; | 3849 | ext4_group_t ngroups, i; |
3886 | 3850 | ||
3887 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | 3851 | if (!mb_enable_debug || |
3852 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) | ||
3888 | return; | 3853 | return; |
3889 | 3854 | ||
3890 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 3855 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
@@ -4060,14 +4025,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4060 | struct ext4_buddy e4b; | 4025 | struct ext4_buddy e4b; |
4061 | struct list_head discard_list; | 4026 | struct list_head discard_list; |
4062 | struct ext4_prealloc_space *pa, *tmp; | 4027 | struct ext4_prealloc_space *pa, *tmp; |
4063 | struct ext4_allocation_context *ac; | ||
4064 | 4028 | ||
4065 | mb_debug(1, "discard locality group preallocation\n"); | 4029 | mb_debug(1, "discard locality group preallocation\n"); |
4066 | 4030 | ||
4067 | INIT_LIST_HEAD(&discard_list); | 4031 | INIT_LIST_HEAD(&discard_list); |
4068 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4069 | if (ac) | ||
4070 | ac->ac_sb = sb; | ||
4071 | 4032 | ||
4072 | spin_lock(&lg->lg_prealloc_lock); | 4033 | spin_lock(&lg->lg_prealloc_lock); |
4073 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], | 4034 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], |
@@ -4119,15 +4080,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4119 | } | 4080 | } |
4120 | ext4_lock_group(sb, group); | 4081 | ext4_lock_group(sb, group); |
4121 | list_del(&pa->pa_group_list); | 4082 | list_del(&pa->pa_group_list); |
4122 | ext4_mb_release_group_pa(&e4b, pa, ac); | 4083 | ext4_mb_release_group_pa(&e4b, pa); |
4123 | ext4_unlock_group(sb, group); | 4084 | ext4_unlock_group(sb, group); |
4124 | 4085 | ||
4125 | ext4_mb_unload_buddy(&e4b); | 4086 | ext4_mb_unload_buddy(&e4b); |
4126 | list_del(&pa->u.pa_tmp_list); | 4087 | list_del(&pa->u.pa_tmp_list); |
4127 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 4088 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
4128 | } | 4089 | } |
4129 | if (ac) | ||
4130 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4131 | } | 4090 | } |
4132 | 4091 | ||
4133 | /* | 4092 | /* |
@@ -4203,15 +4162,12 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4203 | spin_unlock(&pa->pa_lock); | 4162 | spin_unlock(&pa->pa_lock); |
4204 | } | 4163 | } |
4205 | } | 4164 | } |
4206 | if (ac->alloc_semp) | ||
4207 | up_read(ac->alloc_semp); | ||
4208 | if (pa) { | 4165 | if (pa) { |
4209 | /* | 4166 | /* |
4210 | * We want to add the pa to the right bucket. | 4167 | * We want to add the pa to the right bucket. |
4211 | * Remove it from the list and while adding | 4168 | * Remove it from the list and while adding |
4212 | * make sure the list to which we are adding | 4169 | * make sure the list to which we are adding |
4213 | * doesn't grow big. We need to release | 4170 | * doesn't grow big. |
4214 | * alloc_semp before calling ext4_mb_add_n_trim() | ||
4215 | */ | 4171 | */ |
4216 | if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { | 4172 | if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { |
4217 | spin_lock(pa->pa_obj_lock); | 4173 | spin_lock(pa->pa_obj_lock); |
@@ -4273,14 +4229,16 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4273 | * EDQUOT check, as blocks and quotas have been already | 4229 | * EDQUOT check, as blocks and quotas have been already |
4274 | * reserved when data being copied into pagecache. | 4230 | * reserved when data being copied into pagecache. |
4275 | */ | 4231 | */ |
4276 | if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) | 4232 | if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) |
4277 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; | 4233 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; |
4278 | else { | 4234 | else { |
4279 | /* Without delayed allocation we need to verify | 4235 | /* Without delayed allocation we need to verify |
4280 | * there is enough free blocks to do block allocation | 4236 | * there is enough free blocks to do block allocation |
4281 | * and verify allocation doesn't exceed the quota limits. | 4237 | * and verify allocation doesn't exceed the quota limits. |
4282 | */ | 4238 | */ |
4283 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { | 4239 | while (ar->len && |
4240 | ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { | ||
4241 | |||
4284 | /* let others to free the space */ | 4242 | /* let others to free the space */ |
4285 | yield(); | 4243 | yield(); |
4286 | ar->len = ar->len >> 1; | 4244 | ar->len = ar->len >> 1; |
@@ -4290,9 +4248,15 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4290 | return 0; | 4248 | return 0; |
4291 | } | 4249 | } |
4292 | reserv_blks = ar->len; | 4250 | reserv_blks = ar->len; |
4293 | while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { | 4251 | if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { |
4294 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4252 | dquot_alloc_block_nofail(ar->inode, ar->len); |
4295 | ar->len--; | 4253 | } else { |
4254 | while (ar->len && | ||
4255 | dquot_alloc_block(ar->inode, ar->len)) { | ||
4256 | |||
4257 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4258 | ar->len--; | ||
4259 | } | ||
4296 | } | 4260 | } |
4297 | inquota = ar->len; | 4261 | inquota = ar->len; |
4298 | if (ar->len == 0) { | 4262 | if (ar->len == 0) { |
@@ -4370,7 +4334,8 @@ out: | |||
4370 | if (inquota && ar->len < inquota) | 4334 | if (inquota && ar->len < inquota) |
4371 | dquot_free_block(ar->inode, inquota - ar->len); | 4335 | dquot_free_block(ar->inode, inquota - ar->len); |
4372 | if (!ar->len) { | 4336 | if (!ar->len) { |
4373 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) | 4337 | if (!ext4_test_inode_state(ar->inode, |
4338 | EXT4_STATE_DELALLOC_RESERVED)) | ||
4374 | /* release all the reserved blocks if non delalloc */ | 4339 | /* release all the reserved blocks if non delalloc */ |
4375 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 4340 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
4376 | reserv_blks); | 4341 | reserv_blks); |
@@ -4483,7 +4448,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4483 | * @inode: inode | 4448 | * @inode: inode |
4484 | * @block: start physical block to free | 4449 | * @block: start physical block to free |
4485 | * @count: number of blocks to count | 4450 | * @count: number of blocks to count |
4486 | * @metadata: Are these metadata blocks | 4451 | * @flags: flags used by ext4_free_blocks |
4487 | */ | 4452 | */ |
4488 | void ext4_free_blocks(handle_t *handle, struct inode *inode, | 4453 | void ext4_free_blocks(handle_t *handle, struct inode *inode, |
4489 | struct buffer_head *bh, ext4_fsblk_t block, | 4454 | struct buffer_head *bh, ext4_fsblk_t block, |
@@ -4491,7 +4456,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4491 | { | 4456 | { |
4492 | struct buffer_head *bitmap_bh = NULL; | 4457 | struct buffer_head *bitmap_bh = NULL; |
4493 | struct super_block *sb = inode->i_sb; | 4458 | struct super_block *sb = inode->i_sb; |
4494 | struct ext4_allocation_context *ac = NULL; | ||
4495 | struct ext4_group_desc *gdp; | 4459 | struct ext4_group_desc *gdp; |
4496 | unsigned long freed = 0; | 4460 | unsigned long freed = 0; |
4497 | unsigned int overflow; | 4461 | unsigned int overflow; |
@@ -4531,6 +4495,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4531 | if (!bh) | 4495 | if (!bh) |
4532 | tbh = sb_find_get_block(inode->i_sb, | 4496 | tbh = sb_find_get_block(inode->i_sb, |
4533 | block + i); | 4497 | block + i); |
4498 | if (unlikely(!tbh)) | ||
4499 | continue; | ||
4534 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4500 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4535 | inode, tbh, block + i); | 4501 | inode, tbh, block + i); |
4536 | } | 4502 | } |
@@ -4546,12 +4512,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4546 | if (!ext4_should_writeback_data(inode)) | 4512 | if (!ext4_should_writeback_data(inode)) |
4547 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4513 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4548 | 4514 | ||
4549 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4550 | if (ac) { | ||
4551 | ac->ac_inode = inode; | ||
4552 | ac->ac_sb = sb; | ||
4553 | } | ||
4554 | |||
4555 | do_more: | 4515 | do_more: |
4556 | overflow = 0; | 4516 | overflow = 0; |
4557 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4517 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -4609,12 +4569,7 @@ do_more: | |||
4609 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4569 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4610 | } | 4570 | } |
4611 | #endif | 4571 | #endif |
4612 | if (ac) { | 4572 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); |
4613 | ac->ac_b_ex.fe_group = block_group; | ||
4614 | ac->ac_b_ex.fe_start = bit; | ||
4615 | ac->ac_b_ex.fe_len = count; | ||
4616 | trace_ext4_mballoc_free(ac); | ||
4617 | } | ||
4618 | 4573 | ||
4619 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4574 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4620 | if (err) | 4575 | if (err) |
@@ -4626,7 +4581,11 @@ do_more: | |||
4626 | * blocks being freed are metadata. these blocks shouldn't | 4581 | * blocks being freed are metadata. these blocks shouldn't |
4627 | * be used until this transaction is committed | 4582 | * be used until this transaction is committed |
4628 | */ | 4583 | */ |
4629 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | 4584 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); |
4585 | if (!new_entry) { | ||
4586 | err = -ENOMEM; | ||
4587 | goto error_return; | ||
4588 | } | ||
4630 | new_entry->start_blk = bit; | 4589 | new_entry->start_blk = bit; |
4631 | new_entry->group = block_group; | 4590 | new_entry->group = block_group; |
4632 | new_entry->count = count; | 4591 | new_entry->count = count; |
@@ -4643,9 +4602,6 @@ do_more: | |||
4643 | ext4_lock_group(sb, block_group); | 4602 | ext4_lock_group(sb, block_group); |
4644 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4603 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4645 | mb_free_blocks(inode, &e4b, bit, count); | 4604 | mb_free_blocks(inode, &e4b, bit, count); |
4646 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | ||
4647 | if (test_opt(sb, DISCARD)) | ||
4648 | ext4_issue_discard(sb, block_group, bit, count); | ||
4649 | } | 4605 | } |
4650 | 4606 | ||
4651 | ret = ext4_free_blks_count(sb, gdp) + count; | 4607 | ret = ext4_free_blks_count(sb, gdp) + count; |
@@ -4685,7 +4641,316 @@ error_return: | |||
4685 | dquot_free_block(inode, freed); | 4641 | dquot_free_block(inode, freed); |
4686 | brelse(bitmap_bh); | 4642 | brelse(bitmap_bh); |
4687 | ext4_std_error(sb, err); | 4643 | ext4_std_error(sb, err); |
4688 | if (ac) | ||
4689 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4690 | return; | 4644 | return; |
4691 | } | 4645 | } |
4646 | |||
4647 | /** | ||
4648 | * ext4_add_groupblocks() -- Add given blocks to an existing group | ||
4649 | * @handle: handle to this transaction | ||
4650 | * @sb: super block | ||
4651 | * @block: start physcial block to add to the block group | ||
4652 | * @count: number of blocks to free | ||
4653 | * | ||
4654 | * This marks the blocks as free in the bitmap and buddy. | ||
4655 | */ | ||
4656 | void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | ||
4657 | ext4_fsblk_t block, unsigned long count) | ||
4658 | { | ||
4659 | struct buffer_head *bitmap_bh = NULL; | ||
4660 | struct buffer_head *gd_bh; | ||
4661 | ext4_group_t block_group; | ||
4662 | ext4_grpblk_t bit; | ||
4663 | unsigned int i; | ||
4664 | struct ext4_group_desc *desc; | ||
4665 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4666 | struct ext4_buddy e4b; | ||
4667 | int err = 0, ret, blk_free_count; | ||
4668 | ext4_grpblk_t blocks_freed; | ||
4669 | struct ext4_group_info *grp; | ||
4670 | |||
4671 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | ||
4672 | |||
4673 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | ||
4674 | grp = ext4_get_group_info(sb, block_group); | ||
4675 | /* | ||
4676 | * Check to see if we are freeing blocks across a group | ||
4677 | * boundary. | ||
4678 | */ | ||
4679 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) | ||
4680 | goto error_return; | ||
4681 | |||
4682 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | ||
4683 | if (!bitmap_bh) | ||
4684 | goto error_return; | ||
4685 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); | ||
4686 | if (!desc) | ||
4687 | goto error_return; | ||
4688 | |||
4689 | if (in_range(ext4_block_bitmap(sb, desc), block, count) || | ||
4690 | in_range(ext4_inode_bitmap(sb, desc), block, count) || | ||
4691 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | ||
4692 | in_range(block + count - 1, ext4_inode_table(sb, desc), | ||
4693 | sbi->s_itb_per_group)) { | ||
4694 | ext4_error(sb, "Adding blocks in system zones - " | ||
4695 | "Block = %llu, count = %lu", | ||
4696 | block, count); | ||
4697 | goto error_return; | ||
4698 | } | ||
4699 | |||
4700 | BUFFER_TRACE(bitmap_bh, "getting write access"); | ||
4701 | err = ext4_journal_get_write_access(handle, bitmap_bh); | ||
4702 | if (err) | ||
4703 | goto error_return; | ||
4704 | |||
4705 | /* | ||
4706 | * We are about to modify some metadata. Call the journal APIs | ||
4707 | * to unshare ->b_data if a currently-committing transaction is | ||
4708 | * using it | ||
4709 | */ | ||
4710 | BUFFER_TRACE(gd_bh, "get_write_access"); | ||
4711 | err = ext4_journal_get_write_access(handle, gd_bh); | ||
4712 | if (err) | ||
4713 | goto error_return; | ||
4714 | |||
4715 | for (i = 0, blocks_freed = 0; i < count; i++) { | ||
4716 | BUFFER_TRACE(bitmap_bh, "clear bit"); | ||
4717 | if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { | ||
4718 | ext4_error(sb, "bit already cleared for block %llu", | ||
4719 | (ext4_fsblk_t)(block + i)); | ||
4720 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); | ||
4721 | } else { | ||
4722 | blocks_freed++; | ||
4723 | } | ||
4724 | } | ||
4725 | |||
4726 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | ||
4727 | if (err) | ||
4728 | goto error_return; | ||
4729 | |||
4730 | /* | ||
4731 | * need to update group_info->bb_free and bitmap | ||
4732 | * with group lock held. generate_buddy look at | ||
4733 | * them with group lock_held | ||
4734 | */ | ||
4735 | ext4_lock_group(sb, block_group); | ||
4736 | mb_clear_bits(bitmap_bh->b_data, bit, count); | ||
4737 | mb_free_blocks(NULL, &e4b, bit, count); | ||
4738 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | ||
4739 | ext4_free_blks_set(sb, desc, blk_free_count); | ||
4740 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | ||
4741 | ext4_unlock_group(sb, block_group); | ||
4742 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | ||
4743 | |||
4744 | if (sbi->s_log_groups_per_flex) { | ||
4745 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
4746 | atomic_add(blocks_freed, | ||
4747 | &sbi->s_flex_groups[flex_group].free_blocks); | ||
4748 | } | ||
4749 | |||
4750 | ext4_mb_unload_buddy(&e4b); | ||
4751 | |||
4752 | /* We dirtied the bitmap block */ | ||
4753 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
4754 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
4755 | |||
4756 | /* And the group descriptor block */ | ||
4757 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | ||
4758 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | ||
4759 | if (!err) | ||
4760 | err = ret; | ||
4761 | |||
4762 | error_return: | ||
4763 | brelse(bitmap_bh); | ||
4764 | ext4_std_error(sb, err); | ||
4765 | return; | ||
4766 | } | ||
4767 | |||
4768 | /** | ||
4769 | * ext4_trim_extent -- function to TRIM one single free extent in the group | ||
4770 | * @sb: super block for the file system | ||
4771 | * @start: starting block of the free extent in the alloc. group | ||
4772 | * @count: number of blocks to TRIM | ||
4773 | * @group: alloc. group we are working with | ||
4774 | * @e4b: ext4 buddy for the group | ||
4775 | * | ||
4776 | * Trim "count" blocks starting at "start" in the "group". To assure that no | ||
4777 | * one will allocate those blocks, mark it as used in buddy bitmap. This must | ||
4778 | * be called with under the group lock. | ||
4779 | */ | ||
4780 | static void ext4_trim_extent(struct super_block *sb, int start, int count, | ||
4781 | ext4_group_t group, struct ext4_buddy *e4b) | ||
4782 | { | ||
4783 | struct ext4_free_extent ex; | ||
4784 | |||
4785 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | ||
4786 | |||
4787 | ex.fe_start = start; | ||
4788 | ex.fe_group = group; | ||
4789 | ex.fe_len = count; | ||
4790 | |||
4791 | /* | ||
4792 | * Mark blocks used, so no one can reuse them while | ||
4793 | * being trimmed. | ||
4794 | */ | ||
4795 | mb_mark_used(e4b, &ex); | ||
4796 | ext4_unlock_group(sb, group); | ||
4797 | ext4_issue_discard(sb, group, start, count); | ||
4798 | ext4_lock_group(sb, group); | ||
4799 | mb_free_blocks(NULL, e4b, start, ex.fe_len); | ||
4800 | } | ||
4801 | |||
4802 | /** | ||
4803 | * ext4_trim_all_free -- function to trim all free space in alloc. group | ||
4804 | * @sb: super block for file system | ||
4805 | * @e4b: ext4 buddy | ||
4806 | * @start: first group block to examine | ||
4807 | * @max: last group block to examine | ||
4808 | * @minblocks: minimum extent block count | ||
4809 | * | ||
4810 | * ext4_trim_all_free walks through group's buddy bitmap searching for free | ||
4811 | * extents. When the free block is found, ext4_trim_extent is called to TRIM | ||
4812 | * the extent. | ||
4813 | * | ||
4814 | * | ||
4815 | * ext4_trim_all_free walks through group's block bitmap searching for free | ||
4816 | * extents. When the free extent is found, mark it as used in group buddy | ||
4817 | * bitmap. Then issue a TRIM command on this extent and free the extent in | ||
4818 | * the group buddy bitmap. This is done until whole group is scanned. | ||
4819 | */ | ||
4820 | static ext4_grpblk_t | ||
4821 | ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | ||
4822 | ext4_grpblk_t start, ext4_grpblk_t max, | ||
4823 | ext4_grpblk_t minblocks) | ||
4824 | { | ||
4825 | void *bitmap; | ||
4826 | ext4_grpblk_t next, count = 0; | ||
4827 | struct ext4_buddy e4b; | ||
4828 | int ret; | ||
4829 | |||
4830 | ret = ext4_mb_load_buddy(sb, group, &e4b); | ||
4831 | if (ret) { | ||
4832 | ext4_error(sb, "Error in loading buddy " | ||
4833 | "information for %u", group); | ||
4834 | return ret; | ||
4835 | } | ||
4836 | bitmap = e4b.bd_bitmap; | ||
4837 | |||
4838 | ext4_lock_group(sb, group); | ||
4839 | start = (e4b.bd_info->bb_first_free > start) ? | ||
4840 | e4b.bd_info->bb_first_free : start; | ||
4841 | |||
4842 | while (start < max) { | ||
4843 | start = mb_find_next_zero_bit(bitmap, max, start); | ||
4844 | if (start >= max) | ||
4845 | break; | ||
4846 | next = mb_find_next_bit(bitmap, max, start); | ||
4847 | |||
4848 | if ((next - start) >= minblocks) { | ||
4849 | ext4_trim_extent(sb, start, | ||
4850 | next - start, group, &e4b); | ||
4851 | count += next - start; | ||
4852 | } | ||
4853 | start = next + 1; | ||
4854 | |||
4855 | if (fatal_signal_pending(current)) { | ||
4856 | count = -ERESTARTSYS; | ||
4857 | break; | ||
4858 | } | ||
4859 | |||
4860 | if (need_resched()) { | ||
4861 | ext4_unlock_group(sb, group); | ||
4862 | cond_resched(); | ||
4863 | ext4_lock_group(sb, group); | ||
4864 | } | ||
4865 | |||
4866 | if ((e4b.bd_info->bb_free - count) < minblocks) | ||
4867 | break; | ||
4868 | } | ||
4869 | ext4_unlock_group(sb, group); | ||
4870 | ext4_mb_unload_buddy(&e4b); | ||
4871 | |||
4872 | ext4_debug("trimmed %d blocks in the group %d\n", | ||
4873 | count, group); | ||
4874 | |||
4875 | return count; | ||
4876 | } | ||
4877 | |||
4878 | /** | ||
4879 | * ext4_trim_fs() -- trim ioctl handle function | ||
4880 | * @sb: superblock for filesystem | ||
4881 | * @range: fstrim_range structure | ||
4882 | * | ||
4883 | * start: First Byte to trim | ||
4884 | * len: number of Bytes to trim from start | ||
4885 | * minlen: minimum extent length in Bytes | ||
4886 | * ext4_trim_fs goes through all allocation groups containing Bytes from | ||
4887 | * start to start+len. For each such a group ext4_trim_all_free function | ||
4888 | * is invoked to trim all free space. | ||
4889 | */ | ||
4890 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | ||
4891 | { | ||
4892 | struct ext4_group_info *grp; | ||
4893 | ext4_group_t first_group, last_group; | ||
4894 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | ||
4895 | ext4_grpblk_t cnt = 0, first_block, last_block; | ||
4896 | uint64_t start, len, minlen, trimmed = 0; | ||
4897 | ext4_fsblk_t first_data_blk = | ||
4898 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
4899 | int ret = 0; | ||
4900 | |||
4901 | start = range->start >> sb->s_blocksize_bits; | ||
4902 | len = range->len >> sb->s_blocksize_bits; | ||
4903 | minlen = range->minlen >> sb->s_blocksize_bits; | ||
4904 | |||
4905 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | ||
4906 | return -EINVAL; | ||
4907 | if (start < first_data_blk) { | ||
4908 | len -= first_data_blk - start; | ||
4909 | start = first_data_blk; | ||
4910 | } | ||
4911 | |||
4912 | /* Determine first and last group to examine based on start and len */ | ||
4913 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | ||
4914 | &first_group, &first_block); | ||
4915 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | ||
4916 | &last_group, &last_block); | ||
4917 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
4918 | last_block = EXT4_BLOCKS_PER_GROUP(sb); | ||
4919 | |||
4920 | if (first_group > last_group) | ||
4921 | return -EINVAL; | ||
4922 | |||
4923 | for (group = first_group; group <= last_group; group++) { | ||
4924 | grp = ext4_get_group_info(sb, group); | ||
4925 | /* We only do this if the grp has never been initialized */ | ||
4926 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
4927 | ret = ext4_mb_init_group(sb, group); | ||
4928 | if (ret) | ||
4929 | break; | ||
4930 | } | ||
4931 | |||
4932 | /* | ||
4933 | * For all the groups except the last one, last block will | ||
4934 | * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to | ||
4935 | * change it for the last group in which case start + | ||
4936 | * len < EXT4_BLOCKS_PER_GROUP(sb). | ||
4937 | */ | ||
4938 | if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) | ||
4939 | last_block = first_block + len; | ||
4940 | len -= last_block - first_block; | ||
4941 | |||
4942 | if (grp->bb_free >= minlen) { | ||
4943 | cnt = ext4_trim_all_free(sb, group, first_block, | ||
4944 | last_block, minlen); | ||
4945 | if (cnt < 0) { | ||
4946 | ret = cnt; | ||
4947 | break; | ||
4948 | } | ||
4949 | } | ||
4950 | trimmed += cnt; | ||
4951 | first_block = 0; | ||
4952 | } | ||
4953 | range->len = trimmed * sb->s_blocksize; | ||
4954 | |||
4955 | return ret; | ||
4956 | } | ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index b619322c76f0..20b5e7bfebd1 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -169,7 +169,7 @@ struct ext4_allocation_context { | |||
169 | /* original request */ | 169 | /* original request */ |
170 | struct ext4_free_extent ac_o_ex; | 170 | struct ext4_free_extent ac_o_ex; |
171 | 171 | ||
172 | /* goal request (after normalization) */ | 172 | /* goal request (normalized ac_o_ex) */ |
173 | struct ext4_free_extent ac_g_ex; | 173 | struct ext4_free_extent ac_g_ex; |
174 | 174 | ||
175 | /* the best found extent */ | 175 | /* the best found extent */ |
@@ -193,11 +193,6 @@ struct ext4_allocation_context { | |||
193 | __u8 ac_op; /* operation, for history only */ | 193 | __u8 ac_op; /* operation, for history only */ |
194 | struct page *ac_bitmap_page; | 194 | struct page *ac_bitmap_page; |
195 | struct page *ac_buddy_page; | 195 | struct page *ac_buddy_page; |
196 | /* | ||
197 | * pointer to the held semaphore upon successful | ||
198 | * block allocation | ||
199 | */ | ||
200 | struct rw_semaphore *alloc_semp; | ||
201 | struct ext4_prealloc_space *ac_pa; | 196 | struct ext4_prealloc_space *ac_pa; |
202 | struct ext4_locality_group *ac_lg; | 197 | struct ext4_locality_group *ac_lg; |
203 | }; | 198 | }; |
@@ -215,7 +210,6 @@ struct ext4_buddy { | |||
215 | struct super_block *bd_sb; | 210 | struct super_block *bd_sb; |
216 | __u16 bd_blkbits; | 211 | __u16 bd_blkbits; |
217 | ext4_group_t bd_group; | 212 | ext4_group_t bd_group; |
218 | struct rw_semaphore *alloc_semp; | ||
219 | }; | 213 | }; |
220 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | 214 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) |
221 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | 215 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 1765c2c50a9b..b57b98fb44d1 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -263,7 +263,7 @@ static int free_dind_blocks(handle_t *handle, | |||
263 | for (i = 0; i < max_entries; i++) { | 263 | for (i = 0; i < max_entries; i++) { |
264 | if (tmp_idata[i]) { | 264 | if (tmp_idata[i]) { |
265 | extend_credit_for_blkdel(handle, inode); | 265 | extend_credit_for_blkdel(handle, inode); |
266 | ext4_free_blocks(handle, inode, 0, | 266 | ext4_free_blocks(handle, inode, NULL, |
267 | le32_to_cpu(tmp_idata[i]), 1, | 267 | le32_to_cpu(tmp_idata[i]), 1, |
268 | EXT4_FREE_BLOCKS_METADATA | | 268 | EXT4_FREE_BLOCKS_METADATA | |
269 | EXT4_FREE_BLOCKS_FORGET); | 269 | EXT4_FREE_BLOCKS_FORGET); |
@@ -271,7 +271,7 @@ static int free_dind_blocks(handle_t *handle, | |||
271 | } | 271 | } |
272 | put_bh(bh); | 272 | put_bh(bh); |
273 | extend_credit_for_blkdel(handle, inode); | 273 | extend_credit_for_blkdel(handle, inode); |
274 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, | 274 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, |
275 | EXT4_FREE_BLOCKS_METADATA | | 275 | EXT4_FREE_BLOCKS_METADATA | |
276 | EXT4_FREE_BLOCKS_FORGET); | 276 | EXT4_FREE_BLOCKS_FORGET); |
277 | return 0; | 277 | return 0; |
@@ -302,7 +302,7 @@ static int free_tind_blocks(handle_t *handle, | |||
302 | } | 302 | } |
303 | put_bh(bh); | 303 | put_bh(bh); |
304 | extend_credit_for_blkdel(handle, inode); | 304 | extend_credit_for_blkdel(handle, inode); |
305 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, | 305 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, |
306 | EXT4_FREE_BLOCKS_METADATA | | 306 | EXT4_FREE_BLOCKS_METADATA | |
307 | EXT4_FREE_BLOCKS_FORGET); | 307 | EXT4_FREE_BLOCKS_FORGET); |
308 | return 0; | 308 | return 0; |
@@ -315,7 +315,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) | |||
315 | /* ei->i_data[EXT4_IND_BLOCK] */ | 315 | /* ei->i_data[EXT4_IND_BLOCK] */ |
316 | if (i_data[0]) { | 316 | if (i_data[0]) { |
317 | extend_credit_for_blkdel(handle, inode); | 317 | extend_credit_for_blkdel(handle, inode); |
318 | ext4_free_blocks(handle, inode, 0, | 318 | ext4_free_blocks(handle, inode, NULL, |
319 | le32_to_cpu(i_data[0]), 1, | 319 | le32_to_cpu(i_data[0]), 1, |
320 | EXT4_FREE_BLOCKS_METADATA | | 320 | EXT4_FREE_BLOCKS_METADATA | |
321 | EXT4_FREE_BLOCKS_FORGET); | 321 | EXT4_FREE_BLOCKS_FORGET); |
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
376 | * We have the extent map build with the tmp inode. | 376 | * We have the extent map build with the tmp inode. |
377 | * Now copy the i_data across | 377 | * Now copy the i_data across |
378 | */ | 378 | */ |
379 | ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS); | 379 | ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); |
380 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); | 380 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); |
381 | 381 | ||
382 | /* | 382 | /* |
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
412 | struct buffer_head *bh; | 412 | struct buffer_head *bh; |
413 | struct ext4_extent_header *eh; | 413 | struct ext4_extent_header *eh; |
414 | 414 | ||
415 | block = idx_pblock(ix); | 415 | block = ext4_idx_pblock(ix); |
416 | bh = sb_bread(inode->i_sb, block); | 416 | bh = sb_bread(inode->i_sb, block); |
417 | if (!bh) | 417 | if (!bh) |
418 | return -EIO; | 418 | return -EIO; |
@@ -428,7 +428,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
428 | } | 428 | } |
429 | put_bh(bh); | 429 | put_bh(bh); |
430 | extend_credit_for_blkdel(handle, inode); | 430 | extend_credit_for_blkdel(handle, inode); |
431 | ext4_free_blocks(handle, inode, 0, block, 1, | 431 | ext4_free_blocks(handle, inode, NULL, block, 1, |
432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
433 | return retval; | 433 | return retval; |
434 | } | 434 | } |
@@ -496,7 +496,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
496 | goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * | 496 | goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * |
497 | EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; | 497 | EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; |
498 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, | 498 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, |
499 | S_IFREG, 0, goal); | 499 | S_IFREG, NULL, goal); |
500 | if (IS_ERR(tmp_inode)) { | 500 | if (IS_ERR(tmp_inode)) { |
501 | retval = -ENOMEM; | 501 | retval = -ENOMEM; |
502 | ext4_journal_stop(handle); | 502 | ext4_journal_stop(handle); |
@@ -517,7 +517,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
517 | * start with one credit accounted for | 517 | * start with one credit accounted for |
518 | * superblock modification. | 518 | * superblock modification. |
519 | * | 519 | * |
520 | * For the tmp_inode we already have commited the | 520 | * For the tmp_inode we already have committed the |
521 | * trascation that created the inode. Later as and | 521 | * trascation that created the inode. Later as and |
522 | * when we add extents we extent the journal | 522 | * when we add extents we extent the journal |
523 | */ | 523 | */ |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c new file mode 100644 index 000000000000..9bdef3f537c5 --- /dev/null +++ b/fs/ext4/mmp.c | |||
@@ -0,0 +1,351 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/random.h> | ||
3 | #include <linux/buffer_head.h> | ||
4 | #include <linux/utsname.h> | ||
5 | #include <linux/kthread.h> | ||
6 | |||
7 | #include "ext4.h" | ||
8 | |||
9 | /* | ||
10 | * Write the MMP block using WRITE_SYNC to try to get the block on-disk | ||
11 | * faster. | ||
12 | */ | ||
13 | static int write_mmp_block(struct buffer_head *bh) | ||
14 | { | ||
15 | mark_buffer_dirty(bh); | ||
16 | lock_buffer(bh); | ||
17 | bh->b_end_io = end_buffer_write_sync; | ||
18 | get_bh(bh); | ||
19 | submit_bh(WRITE_SYNC, bh); | ||
20 | wait_on_buffer(bh); | ||
21 | if (unlikely(!buffer_uptodate(bh))) | ||
22 | return 1; | ||
23 | |||
24 | return 0; | ||
25 | } | ||
26 | |||
27 | /* | ||
28 | * Read the MMP block. It _must_ be read from disk and hence we clear the | ||
29 | * uptodate flag on the buffer. | ||
30 | */ | ||
31 | static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, | ||
32 | ext4_fsblk_t mmp_block) | ||
33 | { | ||
34 | struct mmp_struct *mmp; | ||
35 | |||
36 | if (*bh) | ||
37 | clear_buffer_uptodate(*bh); | ||
38 | |||
39 | /* This would be sb_bread(sb, mmp_block), except we need to be sure | ||
40 | * that the MD RAID device cache has been bypassed, and that the read | ||
41 | * is not blocked in the elevator. */ | ||
42 | if (!*bh) | ||
43 | *bh = sb_getblk(sb, mmp_block); | ||
44 | if (*bh) { | ||
45 | get_bh(*bh); | ||
46 | lock_buffer(*bh); | ||
47 | (*bh)->b_end_io = end_buffer_read_sync; | ||
48 | submit_bh(READ_SYNC, *bh); | ||
49 | wait_on_buffer(*bh); | ||
50 | if (!buffer_uptodate(*bh)) { | ||
51 | brelse(*bh); | ||
52 | *bh = NULL; | ||
53 | } | ||
54 | } | ||
55 | if (!*bh) { | ||
56 | ext4_warning(sb, "Error while reading MMP block %llu", | ||
57 | mmp_block); | ||
58 | return -EIO; | ||
59 | } | ||
60 | |||
61 | mmp = (struct mmp_struct *)((*bh)->b_data); | ||
62 | if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) | ||
63 | return -EINVAL; | ||
64 | |||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Dump as much information as possible to help the admin. | ||
70 | */ | ||
71 | void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, | ||
72 | const char *function, unsigned int line, const char *msg) | ||
73 | { | ||
74 | __ext4_warning(sb, function, line, msg); | ||
75 | __ext4_warning(sb, function, line, | ||
76 | "MMP failure info: last update time: %llu, last update " | ||
77 | "node: %s, last update device: %s\n", | ||
78 | (long long unsigned int) le64_to_cpu(mmp->mmp_time), | ||
79 | mmp->mmp_nodename, mmp->mmp_bdevname); | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * kmmpd will update the MMP sequence every s_mmp_update_interval seconds | ||
84 | */ | ||
85 | static int kmmpd(void *data) | ||
86 | { | ||
87 | struct super_block *sb = ((struct mmpd_data *) data)->sb; | ||
88 | struct buffer_head *bh = ((struct mmpd_data *) data)->bh; | ||
89 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
90 | struct mmp_struct *mmp; | ||
91 | ext4_fsblk_t mmp_block; | ||
92 | u32 seq = 0; | ||
93 | unsigned long failed_writes = 0; | ||
94 | int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); | ||
95 | unsigned mmp_check_interval; | ||
96 | unsigned long last_update_time; | ||
97 | unsigned long diff; | ||
98 | int retval; | ||
99 | |||
100 | mmp_block = le64_to_cpu(es->s_mmp_block); | ||
101 | mmp = (struct mmp_struct *)(bh->b_data); | ||
102 | mmp->mmp_time = cpu_to_le64(get_seconds()); | ||
103 | /* | ||
104 | * Start with the higher mmp_check_interval and reduce it if | ||
105 | * the MMP block is being updated on time. | ||
106 | */ | ||
107 | mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, | ||
108 | EXT4_MMP_MIN_CHECK_INTERVAL); | ||
109 | mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); | ||
110 | bdevname(bh->b_bdev, mmp->mmp_bdevname); | ||
111 | |||
112 | memcpy(mmp->mmp_nodename, init_utsname()->sysname, | ||
113 | sizeof(mmp->mmp_nodename)); | ||
114 | |||
115 | while (!kthread_should_stop()) { | ||
116 | if (++seq > EXT4_MMP_SEQ_MAX) | ||
117 | seq = 1; | ||
118 | |||
119 | mmp->mmp_seq = cpu_to_le32(seq); | ||
120 | mmp->mmp_time = cpu_to_le64(get_seconds()); | ||
121 | last_update_time = jiffies; | ||
122 | |||
123 | retval = write_mmp_block(bh); | ||
124 | /* | ||
125 | * Don't spew too many error messages. Print one every | ||
126 | * (s_mmp_update_interval * 60) seconds. | ||
127 | */ | ||
128 | if (retval && (failed_writes % 60) == 0) { | ||
129 | ext4_error(sb, "Error writing to MMP block"); | ||
130 | failed_writes++; | ||
131 | } | ||
132 | |||
133 | if (!(le32_to_cpu(es->s_feature_incompat) & | ||
134 | EXT4_FEATURE_INCOMPAT_MMP)) { | ||
135 | ext4_warning(sb, "kmmpd being stopped since MMP feature" | ||
136 | " has been disabled."); | ||
137 | EXT4_SB(sb)->s_mmp_tsk = NULL; | ||
138 | goto failed; | ||
139 | } | ||
140 | |||
141 | if (sb->s_flags & MS_RDONLY) { | ||
142 | ext4_warning(sb, "kmmpd being stopped since filesystem " | ||
143 | "has been remounted as readonly."); | ||
144 | EXT4_SB(sb)->s_mmp_tsk = NULL; | ||
145 | goto failed; | ||
146 | } | ||
147 | |||
148 | diff = jiffies - last_update_time; | ||
149 | if (diff < mmp_update_interval * HZ) | ||
150 | schedule_timeout_interruptible(mmp_update_interval * | ||
151 | HZ - diff); | ||
152 | |||
153 | /* | ||
154 | * We need to make sure that more than mmp_check_interval | ||
155 | * seconds have not passed since writing. If that has happened | ||
156 | * we need to check if the MMP block is as we left it. | ||
157 | */ | ||
158 | diff = jiffies - last_update_time; | ||
159 | if (diff > mmp_check_interval * HZ) { | ||
160 | struct buffer_head *bh_check = NULL; | ||
161 | struct mmp_struct *mmp_check; | ||
162 | |||
163 | retval = read_mmp_block(sb, &bh_check, mmp_block); | ||
164 | if (retval) { | ||
165 | ext4_error(sb, "error reading MMP data: %d", | ||
166 | retval); | ||
167 | |||
168 | EXT4_SB(sb)->s_mmp_tsk = NULL; | ||
169 | goto failed; | ||
170 | } | ||
171 | |||
172 | mmp_check = (struct mmp_struct *)(bh_check->b_data); | ||
173 | if (mmp->mmp_seq != mmp_check->mmp_seq || | ||
174 | memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, | ||
175 | sizeof(mmp->mmp_nodename))) { | ||
176 | dump_mmp_msg(sb, mmp_check, | ||
177 | "Error while updating MMP info. " | ||
178 | "The filesystem seems to have been" | ||
179 | " multiply mounted."); | ||
180 | ext4_error(sb, "abort"); | ||
181 | goto failed; | ||
182 | } | ||
183 | put_bh(bh_check); | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Adjust the mmp_check_interval depending on how much time | ||
188 | * it took for the MMP block to be written. | ||
189 | */ | ||
190 | mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, | ||
191 | EXT4_MMP_MAX_CHECK_INTERVAL), | ||
192 | EXT4_MMP_MIN_CHECK_INTERVAL); | ||
193 | mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * Unmount seems to be clean. | ||
198 | */ | ||
199 | mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); | ||
200 | mmp->mmp_time = cpu_to_le64(get_seconds()); | ||
201 | |||
202 | retval = write_mmp_block(bh); | ||
203 | |||
204 | failed: | ||
205 | kfree(data); | ||
206 | brelse(bh); | ||
207 | return retval; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Get a random new sequence number but make sure it is not greater than | ||
212 | * EXT4_MMP_SEQ_MAX. | ||
213 | */ | ||
214 | static unsigned int mmp_new_seq(void) | ||
215 | { | ||
216 | u32 new_seq; | ||
217 | |||
218 | do { | ||
219 | get_random_bytes(&new_seq, sizeof(u32)); | ||
220 | } while (new_seq > EXT4_MMP_SEQ_MAX); | ||
221 | |||
222 | return new_seq; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Protect the filesystem from being mounted more than once. | ||
227 | */ | ||
228 | int ext4_multi_mount_protect(struct super_block *sb, | ||
229 | ext4_fsblk_t mmp_block) | ||
230 | { | ||
231 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
232 | struct buffer_head *bh = NULL; | ||
233 | struct mmp_struct *mmp = NULL; | ||
234 | struct mmpd_data *mmpd_data; | ||
235 | u32 seq; | ||
236 | unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); | ||
237 | unsigned int wait_time = 0; | ||
238 | int retval; | ||
239 | |||
240 | if (mmp_block < le32_to_cpu(es->s_first_data_block) || | ||
241 | mmp_block >= ext4_blocks_count(es)) { | ||
242 | ext4_warning(sb, "Invalid MMP block in superblock"); | ||
243 | goto failed; | ||
244 | } | ||
245 | |||
246 | retval = read_mmp_block(sb, &bh, mmp_block); | ||
247 | if (retval) | ||
248 | goto failed; | ||
249 | |||
250 | mmp = (struct mmp_struct *)(bh->b_data); | ||
251 | |||
252 | if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) | ||
253 | mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; | ||
254 | |||
255 | /* | ||
256 | * If check_interval in MMP block is larger, use that instead of | ||
257 | * update_interval from the superblock. | ||
258 | */ | ||
259 | if (mmp->mmp_check_interval > mmp_check_interval) | ||
260 | mmp_check_interval = mmp->mmp_check_interval; | ||
261 | |||
262 | seq = le32_to_cpu(mmp->mmp_seq); | ||
263 | if (seq == EXT4_MMP_SEQ_CLEAN) | ||
264 | goto skip; | ||
265 | |||
266 | if (seq == EXT4_MMP_SEQ_FSCK) { | ||
267 | dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); | ||
268 | goto failed; | ||
269 | } | ||
270 | |||
271 | wait_time = min(mmp_check_interval * 2 + 1, | ||
272 | mmp_check_interval + 60); | ||
273 | |||
274 | /* Print MMP interval if more than 20 secs. */ | ||
275 | if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) | ||
276 | ext4_warning(sb, "MMP interval %u higher than expected, please" | ||
277 | " wait.\n", wait_time * 2); | ||
278 | |||
279 | if (schedule_timeout_interruptible(HZ * wait_time) != 0) { | ||
280 | ext4_warning(sb, "MMP startup interrupted, failing mount\n"); | ||
281 | goto failed; | ||
282 | } | ||
283 | |||
284 | retval = read_mmp_block(sb, &bh, mmp_block); | ||
285 | if (retval) | ||
286 | goto failed; | ||
287 | mmp = (struct mmp_struct *)(bh->b_data); | ||
288 | if (seq != le32_to_cpu(mmp->mmp_seq)) { | ||
289 | dump_mmp_msg(sb, mmp, | ||
290 | "Device is already active on another node."); | ||
291 | goto failed; | ||
292 | } | ||
293 | |||
294 | skip: | ||
295 | /* | ||
296 | * write a new random sequence number. | ||
297 | */ | ||
298 | mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); | ||
299 | |||
300 | retval = write_mmp_block(bh); | ||
301 | if (retval) | ||
302 | goto failed; | ||
303 | |||
304 | /* | ||
305 | * wait for MMP interval and check mmp_seq. | ||
306 | */ | ||
307 | if (schedule_timeout_interruptible(HZ * wait_time) != 0) { | ||
308 | ext4_warning(sb, "MMP startup interrupted, failing mount\n"); | ||
309 | goto failed; | ||
310 | } | ||
311 | |||
312 | retval = read_mmp_block(sb, &bh, mmp_block); | ||
313 | if (retval) | ||
314 | goto failed; | ||
315 | mmp = (struct mmp_struct *)(bh->b_data); | ||
316 | if (seq != le32_to_cpu(mmp->mmp_seq)) { | ||
317 | dump_mmp_msg(sb, mmp, | ||
318 | "Device is already active on another node."); | ||
319 | goto failed; | ||
320 | } | ||
321 | |||
322 | mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL); | ||
323 | if (!mmpd_data) { | ||
324 | ext4_warning(sb, "not enough memory for mmpd_data"); | ||
325 | goto failed; | ||
326 | } | ||
327 | mmpd_data->sb = sb; | ||
328 | mmpd_data->bh = bh; | ||
329 | |||
330 | /* | ||
331 | * Start a kernel thread to update the MMP block periodically. | ||
332 | */ | ||
333 | EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", | ||
334 | bdevname(bh->b_bdev, | ||
335 | mmp->mmp_bdevname)); | ||
336 | if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { | ||
337 | EXT4_SB(sb)->s_mmp_tsk = NULL; | ||
338 | kfree(mmpd_data); | ||
339 | ext4_warning(sb, "Unable to create kmmpd thread for %s.", | ||
340 | sb->s_id); | ||
341 | goto failed; | ||
342 | } | ||
343 | |||
344 | return 0; | ||
345 | |||
346 | failed: | ||
347 | brelse(bh); | ||
348 | return 1; | ||
349 | } | ||
350 | |||
351 | |||
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 5f1ed9fc913c..f57455a1b1b2 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | 85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { |
86 | /* leaf block */ | 86 | /* leaf block */ |
87 | *extent = ++path[ppos].p_ext; | 87 | *extent = ++path[ppos].p_ext; |
88 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | 88 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); |
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | 91 | ||
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
96 | 96 | ||
97 | /* index block */ | 97 | /* index block */ |
98 | path[ppos].p_idx++; | 98 | path[ppos].p_idx++; |
99 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | 99 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); |
100 | if (path[ppos+1].p_bh) | 100 | if (path[ppos+1].p_bh) |
101 | brelse(path[ppos+1].p_bh); | 101 | brelse(path[ppos+1].p_bh); |
102 | path[ppos+1].p_bh = | 102 | path[ppos+1].p_bh = |
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
111 | path[cur_ppos].p_idx = | 111 | path[cur_ppos].p_idx = |
112 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); | 112 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); |
113 | path[cur_ppos].p_block = | 113 | path[cur_ppos].p_block = |
114 | idx_pblock(path[cur_ppos].p_idx); | 114 | ext4_idx_pblock(path[cur_ppos].p_idx); |
115 | if (path[cur_ppos+1].p_bh) | 115 | if (path[cur_ppos+1].p_bh) |
116 | brelse(path[cur_ppos+1].p_bh); | 116 | brelse(path[cur_ppos+1].p_bh); |
117 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, | 117 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, |
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
133 | path[leaf_ppos].p_ext = *extent = | 133 | path[leaf_ppos].p_ext = *extent = |
134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | 134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); |
135 | path[leaf_ppos].p_block = | 135 | path[leaf_ppos].p_block = |
136 | ext_pblock(path[leaf_ppos].p_ext); | 136 | ext4_ext_pblock(path[leaf_ppos].p_ext); |
137 | return 0; | 137 | return 0; |
138 | } | 138 | } |
139 | } | 139 | } |
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
249 | */ | 249 | */ |
250 | o_end->ee_block = end_ext->ee_block; | 250 | o_end->ee_block = end_ext->ee_block; |
251 | o_end->ee_len = end_ext->ee_len; | 251 | o_end->ee_len = end_ext->ee_len; |
252 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | 252 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); |
253 | } | 253 | } |
254 | 254 | ||
255 | o_start->ee_len = start_ext->ee_len; | 255 | o_start->ee_len = start_ext->ee_len; |
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
276 | */ | 276 | */ |
277 | o_end->ee_block = end_ext->ee_block; | 277 | o_end->ee_block = end_ext->ee_block; |
278 | o_end->ee_len = end_ext->ee_len; | 278 | o_end->ee_len = end_ext->ee_len; |
279 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | 279 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); |
280 | 280 | ||
281 | /* | 281 | /* |
282 | * Set 0 to the extent block if new_ext was | 282 | * Set 0 to the extent block if new_ext was |
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start, | |||
361 | /* Insert new entry */ | 361 | /* Insert new entry */ |
362 | if (new_ext->ee_len) { | 362 | if (new_ext->ee_len) { |
363 | o_start[i] = *new_ext; | 363 | o_start[i] = *new_ext; |
364 | ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); | 364 | ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); |
365 | } | 365 | } |
366 | 366 | ||
367 | /* Insert end entry */ | 367 | /* Insert end entry */ |
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
488 | start_ext.ee_len = end_ext.ee_len = 0; | 488 | start_ext.ee_len = end_ext.ee_len = 0; |
489 | 489 | ||
490 | new_ext.ee_block = cpu_to_le32(*from); | 490 | new_ext.ee_block = cpu_to_le32(*from); |
491 | ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); | 491 | ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); |
492 | new_ext.ee_len = dext->ee_len; | 492 | new_ext.ee_len = dext->ee_len; |
493 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | 493 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); |
494 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | 494 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; |
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
553 | copy_extent_status(oext, &end_ext); | 553 | copy_extent_status(oext, &end_ext); |
554 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); | 554 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); |
555 | ext4_ext_store_pblock(&end_ext, | 555 | ext4_ext_store_pblock(&end_ext, |
556 | (ext_pblock(o_end) + oext_alen - end_ext_alen)); | 556 | (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); |
557 | end_ext.ee_block = | 557 | end_ext.ee_block = |
558 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + | 558 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + |
559 | oext_alen - end_ext_alen); | 559 | oext_alen - end_ext_alen); |
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
604 | /* When tmp_dext is too large, pick up the target range. */ | 604 | /* When tmp_dext is too large, pick up the target range. */ |
605 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | 605 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); |
606 | 606 | ||
607 | ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); | 607 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); |
608 | tmp_dext->ee_block = | 608 | tmp_dext->ee_block = |
609 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); | 609 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); |
610 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); | 610 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); |
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
613 | tmp_dext->ee_len = cpu_to_le16(max_count); | 613 | tmp_dext->ee_len = cpu_to_le16(max_count); |
614 | 614 | ||
615 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); | 615 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); |
616 | ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); | 616 | ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); |
617 | 617 | ||
618 | /* Adjust extent length if donor extent is larger than orig */ | 618 | /* Adjust extent length if donor extent is larger than orig */ |
619 | if (ext4_ext_get_actual_len(tmp_dext) > | 619 | if (ext4_ext_get_actual_len(tmp_dext) > |
@@ -876,8 +876,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
876 | * It needs to call wait_on_page_writeback() to wait for the | 876 | * It needs to call wait_on_page_writeback() to wait for the |
877 | * writeback of the page. | 877 | * writeback of the page. |
878 | */ | 878 | */ |
879 | if (PageWriteback(page)) | 879 | wait_on_page_writeback(page); |
880 | wait_on_page_writeback(page); | ||
881 | 880 | ||
882 | /* Release old bh and drop refs */ | 881 | /* Release old bh and drop refs */ |
883 | try_to_release_page(page, 0); | 882 | try_to_release_page(page, 0); |
@@ -1003,12 +1002,12 @@ mext_check_arguments(struct inode *orig_inode, | |||
1003 | return -EINVAL; | 1002 | return -EINVAL; |
1004 | } | 1003 | } |
1005 | 1004 | ||
1006 | if ((orig_start > EXT_MAX_BLOCK) || | 1005 | if ((orig_start >= EXT_MAX_BLOCKS) || |
1007 | (donor_start > EXT_MAX_BLOCK) || | 1006 | (donor_start >= EXT_MAX_BLOCKS) || |
1008 | (*len > EXT_MAX_BLOCK) || | 1007 | (*len > EXT_MAX_BLOCKS) || |
1009 | (orig_start + *len > EXT_MAX_BLOCK)) { | 1008 | (orig_start + *len >= EXT_MAX_BLOCKS)) { |
1010 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " | 1009 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
1011 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, | 1010 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, |
1012 | orig_inode->i_ino, donor_inode->i_ino); | 1011 | orig_inode->i_ino, donor_inode->i_ino); |
1013 | return -EINVAL; | 1012 | return -EINVAL; |
1014 | } | 1013 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 314c0d3b3fa9..b754b7721f51 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "xattr.h" | 40 | #include "xattr.h" |
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | 42 | ||
43 | #include <trace/events/ext4.h> | ||
43 | /* | 44 | /* |
44 | * define how far ahead to read directories while searching them. | 45 | * define how far ahead to read directories while searching them. |
45 | */ | 46 | */ |
@@ -581,9 +582,9 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
581 | dir->i_sb->s_blocksize - | 582 | dir->i_sb->s_blocksize - |
582 | EXT4_DIR_REC_LEN(0)); | 583 | EXT4_DIR_REC_LEN(0)); |
583 | for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { | 584 | for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { |
584 | if (!ext4_check_dir_entry(dir, de, bh, | 585 | if (ext4_check_dir_entry(dir, NULL, de, bh, |
585 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | 586 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) |
586 | +((char *)de - bh->b_data))) { | 587 | + ((char *)de - bh->b_data))) { |
587 | /* On error, skip the f_pos to the next block. */ | 588 | /* On error, skip the f_pos to the next block. */ |
588 | dir_file->f_pos = (dir_file->f_pos | | 589 | dir_file->f_pos = (dir_file->f_pos | |
589 | (dir->i_sb->s_blocksize - 1)) + 1; | 590 | (dir->i_sb->s_blocksize - 1)) + 1; |
@@ -820,7 +821,7 @@ static inline int search_dirblock(struct buffer_head *bh, | |||
820 | if ((char *) de + namelen <= dlimit && | 821 | if ((char *) de + namelen <= dlimit && |
821 | ext4_match (namelen, name, de)) { | 822 | ext4_match (namelen, name, de)) { |
822 | /* found a match - just to be sure, do a full check */ | 823 | /* found a match - just to be sure, do a full check */ |
823 | if (!ext4_check_dir_entry(dir, de, bh, offset)) | 824 | if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) |
824 | return -1; | 825 | return -1; |
825 | *res_dir = de; | 826 | *res_dir = de; |
826 | return 1; | 827 | return 1; |
@@ -856,6 +857,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
856 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; | 857 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
857 | struct buffer_head *bh, *ret = NULL; | 858 | struct buffer_head *bh, *ret = NULL; |
858 | ext4_lblk_t start, block, b; | 859 | ext4_lblk_t start, block, b; |
860 | const u8 *name = d_name->name; | ||
859 | int ra_max = 0; /* Number of bh's in the readahead | 861 | int ra_max = 0; /* Number of bh's in the readahead |
860 | buffer, bh_use[] */ | 862 | buffer, bh_use[] */ |
861 | int ra_ptr = 0; /* Current index into readahead | 863 | int ra_ptr = 0; /* Current index into readahead |
@@ -870,6 +872,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
870 | namelen = d_name->len; | 872 | namelen = d_name->len; |
871 | if (namelen > EXT4_NAME_LEN) | 873 | if (namelen > EXT4_NAME_LEN) |
872 | return NULL; | 874 | return NULL; |
875 | if ((namelen <= 2) && (name[0] == '.') && | ||
876 | (name[1] == '.' || name[1] == '\0')) { | ||
877 | /* | ||
878 | * "." or ".." will only be in the first block | ||
879 | * NFS may look up ".."; "." should be handled by the VFS | ||
880 | */ | ||
881 | block = start = 0; | ||
882 | nblocks = 1; | ||
883 | goto restart; | ||
884 | } | ||
873 | if (is_dx(dir)) { | 885 | if (is_dx(dir)) { |
874 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); | 886 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
875 | /* | 887 | /* |
@@ -960,55 +972,35 @@ cleanup_and_exit: | |||
960 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, | 972 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
961 | struct ext4_dir_entry_2 **res_dir, int *err) | 973 | struct ext4_dir_entry_2 **res_dir, int *err) |
962 | { | 974 | { |
963 | struct super_block * sb; | 975 | struct super_block * sb = dir->i_sb; |
964 | struct dx_hash_info hinfo; | 976 | struct dx_hash_info hinfo; |
965 | u32 hash; | ||
966 | struct dx_frame frames[2], *frame; | 977 | struct dx_frame frames[2], *frame; |
967 | struct ext4_dir_entry_2 *de, *top; | ||
968 | struct buffer_head *bh; | 978 | struct buffer_head *bh; |
969 | ext4_lblk_t block; | 979 | ext4_lblk_t block; |
970 | int retval; | 980 | int retval; |
971 | int namelen = d_name->len; | ||
972 | const u8 *name = d_name->name; | ||
973 | 981 | ||
974 | sb = dir->i_sb; | 982 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
975 | /* NFS may look up ".." - look at dx_root directory block */ | 983 | return NULL; |
976 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | ||
977 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) | ||
978 | return NULL; | ||
979 | } else { | ||
980 | frame = frames; | ||
981 | frame->bh = NULL; /* for dx_release() */ | ||
982 | frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ | ||
983 | dx_set_block(frame->at, 0); /* dx_root block is 0 */ | ||
984 | } | ||
985 | hash = hinfo.hash; | ||
986 | do { | 984 | do { |
987 | block = dx_get_block(frame->at); | 985 | block = dx_get_block(frame->at); |
988 | if (!(bh = ext4_bread (NULL,dir, block, 0, err))) | 986 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) |
989 | goto errout; | 987 | goto errout; |
990 | de = (struct ext4_dir_entry_2 *) bh->b_data; | ||
991 | top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - | ||
992 | EXT4_DIR_REC_LEN(0)); | ||
993 | for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) { | ||
994 | int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) | ||
995 | + ((char *) de - bh->b_data); | ||
996 | |||
997 | if (!ext4_check_dir_entry(dir, de, bh, off)) { | ||
998 | brelse(bh); | ||
999 | *err = ERR_BAD_DX_DIR; | ||
1000 | goto errout; | ||
1001 | } | ||
1002 | 988 | ||
1003 | if (ext4_match(namelen, name, de)) { | 989 | retval = search_dirblock(bh, dir, d_name, |
1004 | *res_dir = de; | 990 | block << EXT4_BLOCK_SIZE_BITS(sb), |
1005 | dx_release(frames); | 991 | res_dir); |
1006 | return bh; | 992 | if (retval == 1) { /* Success! */ |
1007 | } | 993 | dx_release(frames); |
994 | return bh; | ||
1008 | } | 995 | } |
1009 | brelse(bh); | 996 | brelse(bh); |
997 | if (retval == -1) { | ||
998 | *err = ERR_BAD_DX_DIR; | ||
999 | goto errout; | ||
1000 | } | ||
1001 | |||
1010 | /* Check to see if we should continue to search */ | 1002 | /* Check to see if we should continue to search */ |
1011 | retval = ext4_htree_next_block(dir, hash, frame, | 1003 | retval = ext4_htree_next_block(dir, hinfo.hash, frame, |
1012 | frames, NULL); | 1004 | frames, NULL); |
1013 | if (retval < 0) { | 1005 | if (retval < 0) { |
1014 | ext4_warning(sb, | 1006 | ext4_warning(sb, |
@@ -1045,7 +1037,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1045 | return ERR_PTR(-EIO); | 1037 | return ERR_PTR(-EIO); |
1046 | } | 1038 | } |
1047 | inode = ext4_iget(dir->i_sb, ino); | 1039 | inode = ext4_iget(dir->i_sb, ino); |
1048 | if (unlikely(IS_ERR(inode))) { | 1040 | if (IS_ERR(inode)) { |
1049 | if (PTR_ERR(inode) == -ESTALE) { | 1041 | if (PTR_ERR(inode) == -ESTALE) { |
1050 | EXT4_ERROR_INODE(dir, | 1042 | EXT4_ERROR_INODE(dir, |
1051 | "deleted inode referenced: %u", | 1043 | "deleted inode referenced: %u", |
@@ -1278,7 +1270,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1278 | de = (struct ext4_dir_entry_2 *)bh->b_data; | 1270 | de = (struct ext4_dir_entry_2 *)bh->b_data; |
1279 | top = bh->b_data + blocksize - reclen; | 1271 | top = bh->b_data + blocksize - reclen; |
1280 | while ((char *) de <= top) { | 1272 | while ((char *) de <= top) { |
1281 | if (!ext4_check_dir_entry(dir, de, bh, offset)) | 1273 | if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) |
1282 | return -EIO; | 1274 | return -EIO; |
1283 | if (ext4_match(namelen, name, de)) | 1275 | if (ext4_match(namelen, name, de)) |
1284 | return -EEXIST; | 1276 | return -EEXIST; |
@@ -1421,10 +1413,22 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1421 | frame->at = entries; | 1413 | frame->at = entries; |
1422 | frame->bh = bh; | 1414 | frame->bh = bh; |
1423 | bh = bh2; | 1415 | bh = bh2; |
1416 | |||
1417 | ext4_handle_dirty_metadata(handle, dir, frame->bh); | ||
1418 | ext4_handle_dirty_metadata(handle, dir, bh); | ||
1419 | |||
1424 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); | 1420 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); |
1425 | dx_release (frames); | 1421 | if (!de) { |
1426 | if (!(de)) | 1422 | /* |
1423 | * Even if the block split failed, we have to properly write | ||
1424 | * out all the changes we did so far. Otherwise we can end up | ||
1425 | * with corrupted filesystem. | ||
1426 | */ | ||
1427 | ext4_mark_inode_dirty(handle, dir); | ||
1428 | dx_release(frames); | ||
1427 | return retval; | 1429 | return retval; |
1430 | } | ||
1431 | dx_release(frames); | ||
1428 | 1432 | ||
1429 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1433 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1430 | brelse(bh); | 1434 | brelse(bh); |
@@ -1611,7 +1615,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1611 | if (err) | 1615 | if (err) |
1612 | goto journal_error; | 1616 | goto journal_error; |
1613 | } | 1617 | } |
1614 | ext4_handle_dirty_metadata(handle, inode, frames[0].bh); | 1618 | err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh); |
1619 | if (err) { | ||
1620 | ext4_std_error(inode->i_sb, err); | ||
1621 | goto cleanup; | ||
1622 | } | ||
1615 | } | 1623 | } |
1616 | de = do_split(handle, dir, &bh, frame, &hinfo, &err); | 1624 | de = do_split(handle, dir, &bh, frame, &hinfo, &err); |
1617 | if (!de) | 1625 | if (!de) |
@@ -1639,17 +1647,21 @@ static int ext4_delete_entry(handle_t *handle, | |||
1639 | { | 1647 | { |
1640 | struct ext4_dir_entry_2 *de, *pde; | 1648 | struct ext4_dir_entry_2 *de, *pde; |
1641 | unsigned int blocksize = dir->i_sb->s_blocksize; | 1649 | unsigned int blocksize = dir->i_sb->s_blocksize; |
1642 | int i; | 1650 | int i, err; |
1643 | 1651 | ||
1644 | i = 0; | 1652 | i = 0; |
1645 | pde = NULL; | 1653 | pde = NULL; |
1646 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1654 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1647 | while (i < bh->b_size) { | 1655 | while (i < bh->b_size) { |
1648 | if (!ext4_check_dir_entry(dir, de, bh, i)) | 1656 | if (ext4_check_dir_entry(dir, NULL, de, bh, i)) |
1649 | return -EIO; | 1657 | return -EIO; |
1650 | if (de == de_del) { | 1658 | if (de == de_del) { |
1651 | BUFFER_TRACE(bh, "get_write_access"); | 1659 | BUFFER_TRACE(bh, "get_write_access"); |
1652 | ext4_journal_get_write_access(handle, bh); | 1660 | err = ext4_journal_get_write_access(handle, bh); |
1661 | if (unlikely(err)) { | ||
1662 | ext4_std_error(dir->i_sb, err); | ||
1663 | return err; | ||
1664 | } | ||
1653 | if (pde) | 1665 | if (pde) |
1654 | pde->rec_len = ext4_rec_len_to_disk( | 1666 | pde->rec_len = ext4_rec_len_to_disk( |
1655 | ext4_rec_len_from_disk(pde->rec_len, | 1667 | ext4_rec_len_from_disk(pde->rec_len, |
@@ -1661,7 +1673,11 @@ static int ext4_delete_entry(handle_t *handle, | |||
1661 | de->inode = 0; | 1673 | de->inode = 0; |
1662 | dir->i_version++; | 1674 | dir->i_version++; |
1663 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 1675 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
1664 | ext4_handle_dirty_metadata(handle, dir, bh); | 1676 | err = ext4_handle_dirty_metadata(handle, dir, bh); |
1677 | if (unlikely(err)) { | ||
1678 | ext4_std_error(dir->i_sb, err); | ||
1679 | return err; | ||
1680 | } | ||
1665 | return 0; | 1681 | return 0; |
1666 | } | 1682 | } |
1667 | i += ext4_rec_len_from_disk(de->rec_len, blocksize); | 1683 | i += ext4_rec_len_from_disk(de->rec_len, blocksize); |
@@ -1798,7 +1814,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1798 | { | 1814 | { |
1799 | handle_t *handle; | 1815 | handle_t *handle; |
1800 | struct inode *inode; | 1816 | struct inode *inode; |
1801 | struct buffer_head *dir_block; | 1817 | struct buffer_head *dir_block = NULL; |
1802 | struct ext4_dir_entry_2 *de; | 1818 | struct ext4_dir_entry_2 *de; |
1803 | unsigned int blocksize = dir->i_sb->s_blocksize; | 1819 | unsigned int blocksize = dir->i_sb->s_blocksize; |
1804 | int err, retries = 0; | 1820 | int err, retries = 0; |
@@ -1831,7 +1847,9 @@ retry: | |||
1831 | if (!dir_block) | 1847 | if (!dir_block) |
1832 | goto out_clear_inode; | 1848 | goto out_clear_inode; |
1833 | BUFFER_TRACE(dir_block, "get_write_access"); | 1849 | BUFFER_TRACE(dir_block, "get_write_access"); |
1834 | ext4_journal_get_write_access(handle, dir_block); | 1850 | err = ext4_journal_get_write_access(handle, dir_block); |
1851 | if (err) | ||
1852 | goto out_clear_inode; | ||
1835 | de = (struct ext4_dir_entry_2 *) dir_block->b_data; | 1853 | de = (struct ext4_dir_entry_2 *) dir_block->b_data; |
1836 | de->inode = cpu_to_le32(inode->i_ino); | 1854 | de->inode = cpu_to_le32(inode->i_ino); |
1837 | de->name_len = 1; | 1855 | de->name_len = 1; |
@@ -1848,10 +1866,12 @@ retry: | |||
1848 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1866 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1849 | inode->i_nlink = 2; | 1867 | inode->i_nlink = 2; |
1850 | BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); | 1868 | BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); |
1851 | ext4_handle_dirty_metadata(handle, dir, dir_block); | 1869 | err = ext4_handle_dirty_metadata(handle, dir, dir_block); |
1852 | brelse(dir_block); | 1870 | if (err) |
1853 | ext4_mark_inode_dirty(handle, inode); | 1871 | goto out_clear_inode; |
1854 | err = ext4_add_entry(handle, dentry, inode); | 1872 | err = ext4_mark_inode_dirty(handle, inode); |
1873 | if (!err) | ||
1874 | err = ext4_add_entry(handle, dentry, inode); | ||
1855 | if (err) { | 1875 | if (err) { |
1856 | out_clear_inode: | 1876 | out_clear_inode: |
1857 | clear_nlink(inode); | 1877 | clear_nlink(inode); |
@@ -1862,10 +1882,13 @@ out_clear_inode: | |||
1862 | } | 1882 | } |
1863 | ext4_inc_count(handle, dir); | 1883 | ext4_inc_count(handle, dir); |
1864 | ext4_update_dx_flag(dir); | 1884 | ext4_update_dx_flag(dir); |
1865 | ext4_mark_inode_dirty(handle, dir); | 1885 | err = ext4_mark_inode_dirty(handle, dir); |
1886 | if (err) | ||
1887 | goto out_clear_inode; | ||
1866 | d_instantiate(dentry, inode); | 1888 | d_instantiate(dentry, inode); |
1867 | unlock_new_inode(inode); | 1889 | unlock_new_inode(inode); |
1868 | out_stop: | 1890 | out_stop: |
1891 | brelse(dir_block); | ||
1869 | ext4_journal_stop(handle); | 1892 | ext4_journal_stop(handle); |
1870 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) | 1893 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) |
1871 | goto retry; | 1894 | goto retry; |
@@ -1928,7 +1951,7 @@ static int empty_dir(struct inode *inode) | |||
1928 | } | 1951 | } |
1929 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1952 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1930 | } | 1953 | } |
1931 | if (!ext4_check_dir_entry(inode, de, bh, offset)) { | 1954 | if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) { |
1932 | de = (struct ext4_dir_entry_2 *)(bh->b_data + | 1955 | de = (struct ext4_dir_entry_2 *)(bh->b_data + |
1933 | sb->s_blocksize); | 1956 | sb->s_blocksize); |
1934 | offset = (offset | (sb->s_blocksize - 1)) + 1; | 1957 | offset = (offset | (sb->s_blocksize - 1)) + 1; |
@@ -2173,6 +2196,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2173 | struct ext4_dir_entry_2 *de; | 2196 | struct ext4_dir_entry_2 *de; |
2174 | handle_t *handle; | 2197 | handle_t *handle; |
2175 | 2198 | ||
2199 | trace_ext4_unlink_enter(dir, dentry); | ||
2176 | /* Initialize quotas before so that eventual writes go | 2200 | /* Initialize quotas before so that eventual writes go |
2177 | * in separate transaction */ | 2201 | * in separate transaction */ |
2178 | dquot_initialize(dir); | 2202 | dquot_initialize(dir); |
@@ -2218,6 +2242,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2218 | end_unlink: | 2242 | end_unlink: |
2219 | ext4_journal_stop(handle); | 2243 | ext4_journal_stop(handle); |
2220 | brelse(bh); | 2244 | brelse(bh); |
2245 | trace_ext4_unlink_exit(dentry, retval); | ||
2221 | return retval; | 2246 | return retval; |
2222 | } | 2247 | } |
2223 | 2248 | ||
@@ -2227,6 +2252,7 @@ static int ext4_symlink(struct inode *dir, | |||
2227 | handle_t *handle; | 2252 | handle_t *handle; |
2228 | struct inode *inode; | 2253 | struct inode *inode; |
2229 | int l, err, retries = 0; | 2254 | int l, err, retries = 0; |
2255 | int credits; | ||
2230 | 2256 | ||
2231 | l = strlen(symname)+1; | 2257 | l = strlen(symname)+1; |
2232 | if (l > dir->i_sb->s_blocksize) | 2258 | if (l > dir->i_sb->s_blocksize) |
@@ -2234,10 +2260,26 @@ static int ext4_symlink(struct inode *dir, | |||
2234 | 2260 | ||
2235 | dquot_initialize(dir); | 2261 | dquot_initialize(dir); |
2236 | 2262 | ||
2263 | if (l > EXT4_N_BLOCKS * 4) { | ||
2264 | /* | ||
2265 | * For non-fast symlinks, we just allocate inode and put it on | ||
2266 | * orphan list in the first transaction => we need bitmap, | ||
2267 | * group descriptor, sb, inode block, quota blocks. | ||
2268 | */ | ||
2269 | credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
2270 | } else { | ||
2271 | /* | ||
2272 | * Fast symlink. We have to add entry to directory | ||
2273 | * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS), | ||
2274 | * allocate new inode (bitmap, group descriptor, inode block, | ||
2275 | * quota blocks, sb is already counted in previous macros). | ||
2276 | */ | ||
2277 | credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
2278 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | ||
2279 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
2280 | } | ||
2237 | retry: | 2281 | retry: |
2238 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 2282 | handle = ext4_journal_start(dir, credits); |
2239 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + | ||
2240 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); | ||
2241 | if (IS_ERR(handle)) | 2283 | if (IS_ERR(handle)) |
2242 | return PTR_ERR(handle); | 2284 | return PTR_ERR(handle); |
2243 | 2285 | ||
@@ -2250,21 +2292,44 @@ retry: | |||
2250 | if (IS_ERR(inode)) | 2292 | if (IS_ERR(inode)) |
2251 | goto out_stop; | 2293 | goto out_stop; |
2252 | 2294 | ||
2253 | if (l > sizeof(EXT4_I(inode)->i_data)) { | 2295 | if (l > EXT4_N_BLOCKS * 4) { |
2254 | inode->i_op = &ext4_symlink_inode_operations; | 2296 | inode->i_op = &ext4_symlink_inode_operations; |
2255 | ext4_set_aops(inode); | 2297 | ext4_set_aops(inode); |
2256 | /* | 2298 | /* |
2257 | * page_symlink() calls into ext4_prepare/commit_write. | 2299 | * We cannot call page_symlink() with transaction started |
2258 | * We have a transaction open. All is sweetness. It also sets | 2300 | * because it calls into ext4_write_begin() which can wait |
2259 | * i_size in generic_commit_write(). | 2301 | * for transaction commit if we are running out of space |
2302 | * and thus we deadlock. So we have to stop transaction now | ||
2303 | * and restart it when symlink contents is written. | ||
2304 | * | ||
2305 | * To keep fs consistent in case of crash, we have to put inode | ||
2306 | * to orphan list in the mean time. | ||
2260 | */ | 2307 | */ |
2308 | drop_nlink(inode); | ||
2309 | err = ext4_orphan_add(handle, inode); | ||
2310 | ext4_journal_stop(handle); | ||
2311 | if (err) | ||
2312 | goto err_drop_inode; | ||
2261 | err = __page_symlink(inode, symname, l, 1); | 2313 | err = __page_symlink(inode, symname, l, 1); |
2314 | if (err) | ||
2315 | goto err_drop_inode; | ||
2316 | /* | ||
2317 | * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS | ||
2318 | * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified | ||
2319 | */ | ||
2320 | handle = ext4_journal_start(dir, | ||
2321 | EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
2322 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); | ||
2323 | if (IS_ERR(handle)) { | ||
2324 | err = PTR_ERR(handle); | ||
2325 | goto err_drop_inode; | ||
2326 | } | ||
2327 | inc_nlink(inode); | ||
2328 | err = ext4_orphan_del(handle, inode); | ||
2262 | if (err) { | 2329 | if (err) { |
2330 | ext4_journal_stop(handle); | ||
2263 | clear_nlink(inode); | 2331 | clear_nlink(inode); |
2264 | unlock_new_inode(inode); | 2332 | goto err_drop_inode; |
2265 | ext4_mark_inode_dirty(handle, inode); | ||
2266 | iput(inode); | ||
2267 | goto out_stop; | ||
2268 | } | 2333 | } |
2269 | } else { | 2334 | } else { |
2270 | /* clear the extent format for fast symlink */ | 2335 | /* clear the extent format for fast symlink */ |
@@ -2280,6 +2345,10 @@ out_stop: | |||
2280 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) | 2345 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) |
2281 | goto retry; | 2346 | goto retry; |
2282 | return err; | 2347 | return err; |
2348 | err_drop_inode: | ||
2349 | unlock_new_inode(inode); | ||
2350 | iput(inode); | ||
2351 | return err; | ||
2283 | } | 2352 | } |
2284 | 2353 | ||
2285 | static int ext4_link(struct dentry *old_dentry, | 2354 | static int ext4_link(struct dentry *old_dentry, |
@@ -2294,13 +2363,6 @@ static int ext4_link(struct dentry *old_dentry, | |||
2294 | 2363 | ||
2295 | dquot_initialize(dir); | 2364 | dquot_initialize(dir); |
2296 | 2365 | ||
2297 | /* | ||
2298 | * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing | ||
2299 | * otherwise has the potential to corrupt the orphan inode list. | ||
2300 | */ | ||
2301 | if (inode->i_nlink == 0) | ||
2302 | return -ENOENT; | ||
2303 | |||
2304 | retry: | 2366 | retry: |
2305 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 2367 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
2306 | EXT4_INDEX_EXTRA_TRANS_BLOCKS); | 2368 | EXT4_INDEX_EXTRA_TRANS_BLOCKS); |
@@ -2312,7 +2374,7 @@ retry: | |||
2312 | 2374 | ||
2313 | inode->i_ctime = ext4_current_time(inode); | 2375 | inode->i_ctime = ext4_current_time(inode); |
2314 | ext4_inc_count(handle, inode); | 2376 | ext4_inc_count(handle, inode); |
2315 | atomic_inc(&inode->i_count); | 2377 | ihold(inode); |
2316 | 2378 | ||
2317 | err = ext4_add_entry(handle, dentry, inode); | 2379 | err = ext4_add_entry(handle, dentry, inode); |
2318 | if (!err) { | 2380 | if (!err) { |
@@ -2399,6 +2461,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2399 | if (!new_inode && new_dir != old_dir && | 2461 | if (!new_inode && new_dir != old_dir && |
2400 | EXT4_DIR_LINK_MAX(new_dir)) | 2462 | EXT4_DIR_LINK_MAX(new_dir)) |
2401 | goto end_rename; | 2463 | goto end_rename; |
2464 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
2465 | retval = ext4_journal_get_write_access(handle, dir_bh); | ||
2466 | if (retval) | ||
2467 | goto end_rename; | ||
2402 | } | 2468 | } |
2403 | if (!new_bh) { | 2469 | if (!new_bh) { |
2404 | retval = ext4_add_entry(handle, new_dentry, old_inode); | 2470 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
@@ -2406,7 +2472,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2406 | goto end_rename; | 2472 | goto end_rename; |
2407 | } else { | 2473 | } else { |
2408 | BUFFER_TRACE(new_bh, "get write access"); | 2474 | BUFFER_TRACE(new_bh, "get write access"); |
2409 | ext4_journal_get_write_access(handle, new_bh); | 2475 | retval = ext4_journal_get_write_access(handle, new_bh); |
2476 | if (retval) | ||
2477 | goto end_rename; | ||
2410 | new_de->inode = cpu_to_le32(old_inode->i_ino); | 2478 | new_de->inode = cpu_to_le32(old_inode->i_ino); |
2411 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, | 2479 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, |
2412 | EXT4_FEATURE_INCOMPAT_FILETYPE)) | 2480 | EXT4_FEATURE_INCOMPAT_FILETYPE)) |
@@ -2416,7 +2484,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2416 | ext4_current_time(new_dir); | 2484 | ext4_current_time(new_dir); |
2417 | ext4_mark_inode_dirty(handle, new_dir); | 2485 | ext4_mark_inode_dirty(handle, new_dir); |
2418 | BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); | 2486 | BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); |
2419 | ext4_handle_dirty_metadata(handle, new_dir, new_bh); | 2487 | retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh); |
2488 | if (unlikely(retval)) { | ||
2489 | ext4_std_error(new_dir->i_sb, retval); | ||
2490 | goto end_rename; | ||
2491 | } | ||
2420 | brelse(new_bh); | 2492 | brelse(new_bh); |
2421 | new_bh = NULL; | 2493 | new_bh = NULL; |
2422 | } | 2494 | } |
@@ -2463,12 +2535,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2463 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); | 2535 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); |
2464 | ext4_update_dx_flag(old_dir); | 2536 | ext4_update_dx_flag(old_dir); |
2465 | if (dir_bh) { | 2537 | if (dir_bh) { |
2466 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
2467 | ext4_journal_get_write_access(handle, dir_bh); | ||
2468 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = | 2538 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = |
2469 | cpu_to_le32(new_dir->i_ino); | 2539 | cpu_to_le32(new_dir->i_ino); |
2470 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | 2540 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); |
2471 | ext4_handle_dirty_metadata(handle, old_dir, dir_bh); | 2541 | retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh); |
2542 | if (retval) { | ||
2543 | ext4_std_error(old_dir->i_sb, retval); | ||
2544 | goto end_rename; | ||
2545 | } | ||
2472 | ext4_dec_count(handle, old_dir); | 2546 | ext4_dec_count(handle, old_dir); |
2473 | if (new_inode) { | 2547 | if (new_inode) { |
2474 | /* checked empty_dir above, can't have another parent, | 2548 | /* checked empty_dir above, can't have another parent, |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c new file mode 100644 index 000000000000..7bb8f76d470a --- /dev/null +++ b/fs/ext4/page-io.c | |||
@@ -0,0 +1,417 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/page-io.c | ||
3 | * | ||
4 | * This contains the new page_io functions for ext4 | ||
5 | * | ||
6 | * Written by Theodore Ts'o, 2010. | ||
7 | */ | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/time.h> | ||
12 | #include <linux/jbd2.h> | ||
13 | #include <linux/highuid.h> | ||
14 | #include <linux/pagemap.h> | ||
15 | #include <linux/quotaops.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/buffer_head.h> | ||
18 | #include <linux/writeback.h> | ||
19 | #include <linux/pagevec.h> | ||
20 | #include <linux/mpage.h> | ||
21 | #include <linux/namei.h> | ||
22 | #include <linux/uio.h> | ||
23 | #include <linux/bio.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | #include "ext4_jbd2.h" | ||
29 | #include "xattr.h" | ||
30 | #include "acl.h" | ||
31 | #include "ext4_extents.h" | ||
32 | |||
33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; | ||
34 | |||
35 | int __init ext4_init_pageio(void) | ||
36 | { | ||
37 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); | ||
38 | if (io_page_cachep == NULL) | ||
39 | return -ENOMEM; | ||
40 | io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); | ||
41 | if (io_end_cachep == NULL) { | ||
42 | kmem_cache_destroy(io_page_cachep); | ||
43 | return -ENOMEM; | ||
44 | } | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | void ext4_exit_pageio(void) | ||
49 | { | ||
50 | kmem_cache_destroy(io_end_cachep); | ||
51 | kmem_cache_destroy(io_page_cachep); | ||
52 | } | ||
53 | |||
54 | void ext4_ioend_wait(struct inode *inode) | ||
55 | { | ||
56 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | ||
57 | |||
58 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | ||
59 | } | ||
60 | |||
61 | static void put_io_page(struct ext4_io_page *io_page) | ||
62 | { | ||
63 | if (atomic_dec_and_test(&io_page->p_count)) { | ||
64 | end_page_writeback(io_page->p_page); | ||
65 | put_page(io_page->p_page); | ||
66 | kmem_cache_free(io_page_cachep, io_page); | ||
67 | } | ||
68 | } | ||
69 | |||
70 | void ext4_free_io_end(ext4_io_end_t *io) | ||
71 | { | ||
72 | int i; | ||
73 | wait_queue_head_t *wq; | ||
74 | |||
75 | BUG_ON(!io); | ||
76 | if (io->page) | ||
77 | put_page(io->page); | ||
78 | for (i = 0; i < io->num_io_pages; i++) | ||
79 | put_io_page(io->pages[i]); | ||
80 | io->num_io_pages = 0; | ||
81 | wq = ext4_ioend_wq(io->inode); | ||
82 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && | ||
83 | waitqueue_active(wq)) | ||
84 | wake_up_all(wq); | ||
85 | kmem_cache_free(io_end_cachep, io); | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * check a range of space and convert unwritten extents to written. | ||
90 | */ | ||
91 | int ext4_end_io_nolock(ext4_io_end_t *io) | ||
92 | { | ||
93 | struct inode *inode = io->inode; | ||
94 | loff_t offset = io->offset; | ||
95 | ssize_t size = io->size; | ||
96 | wait_queue_head_t *wq; | ||
97 | int ret = 0; | ||
98 | |||
99 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
100 | "list->prev 0x%p\n", | ||
101 | io, inode->i_ino, io->list.next, io->list.prev); | ||
102 | |||
103 | if (list_empty(&io->list)) | ||
104 | return ret; | ||
105 | |||
106 | if (!(io->flag & EXT4_IO_END_UNWRITTEN)) | ||
107 | return ret; | ||
108 | |||
109 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
110 | if (ret < 0) { | ||
111 | printk(KERN_EMERG "%s: failed to convert unwritten " | ||
112 | "extents to written extents, error is %d " | ||
113 | "io is still on inode %lu aio dio list\n", | ||
114 | __func__, ret, inode->i_ino); | ||
115 | return ret; | ||
116 | } | ||
117 | |||
118 | if (io->iocb) | ||
119 | aio_complete(io->iocb, io->result, 0); | ||
120 | /* clear the DIO AIO unwritten flag */ | ||
121 | if (io->flag & EXT4_IO_END_UNWRITTEN) { | ||
122 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
123 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
124 | wq = ext4_ioend_wq(io->inode); | ||
125 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) && | ||
126 | waitqueue_active(wq)) { | ||
127 | wake_up_all(wq); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
136 | */ | ||
137 | static void ext4_end_io_work(struct work_struct *work) | ||
138 | { | ||
139 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
140 | struct inode *inode = io->inode; | ||
141 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
142 | unsigned long flags; | ||
143 | int ret; | ||
144 | |||
145 | mutex_lock(&inode->i_mutex); | ||
146 | ret = ext4_end_io_nolock(io); | ||
147 | if (ret < 0) { | ||
148 | mutex_unlock(&inode->i_mutex); | ||
149 | return; | ||
150 | } | ||
151 | |||
152 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
153 | if (!list_empty(&io->list)) | ||
154 | list_del_init(&io->list); | ||
155 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
156 | mutex_unlock(&inode->i_mutex); | ||
157 | ext4_free_io_end(io); | ||
158 | } | ||
159 | |||
160 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | ||
161 | { | ||
162 | ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); | ||
163 | if (io) { | ||
164 | atomic_inc(&EXT4_I(inode)->i_ioend_count); | ||
165 | io->inode = inode; | ||
166 | INIT_WORK(&io->work, ext4_end_io_work); | ||
167 | INIT_LIST_HEAD(&io->list); | ||
168 | } | ||
169 | return io; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Print an buffer I/O error compatible with the fs/buffer.c. This | ||
174 | * provides compatibility with dmesg scrapers that look for a specific | ||
175 | * buffer I/O error message. We really need a unified error reporting | ||
176 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
177 | * probably not going to happen in my lifetime, due to LKML politics... | ||
178 | */ | ||
179 | static void buffer_io_error(struct buffer_head *bh) | ||
180 | { | ||
181 | char b[BDEVNAME_SIZE]; | ||
182 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | ||
183 | bdevname(bh->b_bdev, b), | ||
184 | (unsigned long long)bh->b_blocknr); | ||
185 | } | ||
186 | |||
187 | static void ext4_end_bio(struct bio *bio, int error) | ||
188 | { | ||
189 | ext4_io_end_t *io_end = bio->bi_private; | ||
190 | struct workqueue_struct *wq; | ||
191 | struct inode *inode; | ||
192 | unsigned long flags; | ||
193 | int i; | ||
194 | sector_t bi_sector = bio->bi_sector; | ||
195 | |||
196 | BUG_ON(!io_end); | ||
197 | bio->bi_private = NULL; | ||
198 | bio->bi_end_io = NULL; | ||
199 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
200 | error = 0; | ||
201 | bio_put(bio); | ||
202 | |||
203 | for (i = 0; i < io_end->num_io_pages; i++) { | ||
204 | struct page *page = io_end->pages[i]->p_page; | ||
205 | struct buffer_head *bh, *head; | ||
206 | loff_t offset; | ||
207 | loff_t io_end_offset; | ||
208 | |||
209 | if (error) { | ||
210 | SetPageError(page); | ||
211 | set_bit(AS_EIO, &page->mapping->flags); | ||
212 | head = page_buffers(page); | ||
213 | BUG_ON(!head); | ||
214 | |||
215 | io_end_offset = io_end->offset + io_end->size; | ||
216 | |||
217 | offset = (sector_t) page->index << PAGE_CACHE_SHIFT; | ||
218 | bh = head; | ||
219 | do { | ||
220 | if ((offset >= io_end->offset) && | ||
221 | (offset+bh->b_size <= io_end_offset)) | ||
222 | buffer_io_error(bh); | ||
223 | |||
224 | offset += bh->b_size; | ||
225 | bh = bh->b_this_page; | ||
226 | } while (bh != head); | ||
227 | } | ||
228 | |||
229 | put_io_page(io_end->pages[i]); | ||
230 | } | ||
231 | io_end->num_io_pages = 0; | ||
232 | inode = io_end->inode; | ||
233 | |||
234 | if (error) { | ||
235 | io_end->flag |= EXT4_IO_END_ERROR; | ||
236 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | ||
237 | "(offset %llu size %ld starting block %llu)", | ||
238 | inode->i_ino, | ||
239 | (unsigned long long) io_end->offset, | ||
240 | (long) io_end->size, | ||
241 | (unsigned long long) | ||
242 | bi_sector >> (inode->i_blkbits - 9)); | ||
243 | } | ||
244 | |||
245 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
246 | ext4_free_io_end(io_end); | ||
247 | return; | ||
248 | } | ||
249 | |||
250 | /* Add the io_end to per-inode completed io list*/ | ||
251 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
252 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
253 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
254 | |||
255 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
256 | /* queue the work to convert unwritten extents to written */ | ||
257 | queue_work(wq, &io_end->work); | ||
258 | } | ||
259 | |||
260 | void ext4_io_submit(struct ext4_io_submit *io) | ||
261 | { | ||
262 | struct bio *bio = io->io_bio; | ||
263 | |||
264 | if (bio) { | ||
265 | bio_get(io->io_bio); | ||
266 | submit_bio(io->io_op, io->io_bio); | ||
267 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); | ||
268 | bio_put(io->io_bio); | ||
269 | } | ||
270 | io->io_bio = NULL; | ||
271 | io->io_op = 0; | ||
272 | io->io_end = NULL; | ||
273 | } | ||
274 | |||
275 | static int io_submit_init(struct ext4_io_submit *io, | ||
276 | struct inode *inode, | ||
277 | struct writeback_control *wbc, | ||
278 | struct buffer_head *bh) | ||
279 | { | ||
280 | ext4_io_end_t *io_end; | ||
281 | struct page *page = bh->b_page; | ||
282 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | ||
283 | struct bio *bio; | ||
284 | |||
285 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
286 | if (!io_end) | ||
287 | return -ENOMEM; | ||
288 | do { | ||
289 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
290 | nvecs >>= 1; | ||
291 | } while (bio == NULL); | ||
292 | |||
293 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | ||
294 | bio->bi_bdev = bh->b_bdev; | ||
295 | bio->bi_private = io->io_end = io_end; | ||
296 | bio->bi_end_io = ext4_end_bio; | ||
297 | |||
298 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | ||
299 | |||
300 | io->io_bio = bio; | ||
301 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
302 | io->io_next_block = bh->b_blocknr; | ||
303 | return 0; | ||
304 | } | ||
305 | |||
306 | static int io_submit_add_bh(struct ext4_io_submit *io, | ||
307 | struct ext4_io_page *io_page, | ||
308 | struct inode *inode, | ||
309 | struct writeback_control *wbc, | ||
310 | struct buffer_head *bh) | ||
311 | { | ||
312 | ext4_io_end_t *io_end; | ||
313 | int ret; | ||
314 | |||
315 | if (buffer_new(bh)) { | ||
316 | clear_buffer_new(bh); | ||
317 | unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | ||
318 | } | ||
319 | |||
320 | if (!buffer_mapped(bh) || buffer_delay(bh)) { | ||
321 | if (!buffer_mapped(bh)) | ||
322 | clear_buffer_dirty(bh); | ||
323 | if (io->io_bio) | ||
324 | ext4_io_submit(io); | ||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | ||
329 | submit_and_retry: | ||
330 | ext4_io_submit(io); | ||
331 | } | ||
332 | if (io->io_bio == NULL) { | ||
333 | ret = io_submit_init(io, inode, wbc, bh); | ||
334 | if (ret) | ||
335 | return ret; | ||
336 | } | ||
337 | io_end = io->io_end; | ||
338 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | ||
339 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | ||
340 | goto submit_and_retry; | ||
341 | if (buffer_uninit(bh)) | ||
342 | io->io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
343 | io->io_end->size += bh->b_size; | ||
344 | io->io_next_block++; | ||
345 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
346 | if (ret != bh->b_size) | ||
347 | goto submit_and_retry; | ||
348 | if ((io_end->num_io_pages == 0) || | ||
349 | (io_end->pages[io_end->num_io_pages-1] != io_page)) { | ||
350 | io_end->pages[io_end->num_io_pages++] = io_page; | ||
351 | atomic_inc(&io_page->p_count); | ||
352 | } | ||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | int ext4_bio_write_page(struct ext4_io_submit *io, | ||
357 | struct page *page, | ||
358 | int len, | ||
359 | struct writeback_control *wbc) | ||
360 | { | ||
361 | struct inode *inode = page->mapping->host; | ||
362 | unsigned block_start, block_end, blocksize; | ||
363 | struct ext4_io_page *io_page; | ||
364 | struct buffer_head *bh, *head; | ||
365 | int ret = 0; | ||
366 | |||
367 | blocksize = 1 << inode->i_blkbits; | ||
368 | |||
369 | BUG_ON(!PageLocked(page)); | ||
370 | BUG_ON(PageWriteback(page)); | ||
371 | |||
372 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); | ||
373 | if (!io_page) { | ||
374 | set_page_dirty(page); | ||
375 | unlock_page(page); | ||
376 | return -ENOMEM; | ||
377 | } | ||
378 | io_page->p_page = page; | ||
379 | atomic_set(&io_page->p_count, 1); | ||
380 | get_page(page); | ||
381 | set_page_writeback(page); | ||
382 | ClearPageError(page); | ||
383 | |||
384 | for (bh = head = page_buffers(page), block_start = 0; | ||
385 | bh != head || !block_start; | ||
386 | block_start = block_end, bh = bh->b_this_page) { | ||
387 | |||
388 | block_end = block_start + blocksize; | ||
389 | if (block_start >= len) { | ||
390 | clear_buffer_dirty(bh); | ||
391 | set_buffer_uptodate(bh); | ||
392 | continue; | ||
393 | } | ||
394 | clear_buffer_dirty(bh); | ||
395 | ret = io_submit_add_bh(io, io_page, inode, wbc, bh); | ||
396 | if (ret) { | ||
397 | /* | ||
398 | * We only get here on ENOMEM. Not much else | ||
399 | * we can do but mark the page as dirty, and | ||
400 | * better luck next time. | ||
401 | */ | ||
402 | set_page_dirty(page); | ||
403 | break; | ||
404 | } | ||
405 | } | ||
406 | unlock_page(page); | ||
407 | /* | ||
408 | * If the page was truncated before we could do the writeback, | ||
409 | * or we had a memory allocation error while trying to write | ||
410 | * the first buffer head, we won't have submitted any pages for | ||
411 | * I/O. In that case we need to make sure we've cleared the | ||
412 | * PageWriteback bit from the page to prevent the system from | ||
413 | * wedging later on. | ||
414 | */ | ||
415 | put_io_page(io_page); | ||
416 | return ret; | ||
417 | } | ||
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ca5c8aa00a2f..80bbc9c60c24 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -220,29 +220,25 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
220 | memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); | 220 | memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); |
221 | set_buffer_uptodate(gdb); | 221 | set_buffer_uptodate(gdb); |
222 | unlock_buffer(gdb); | 222 | unlock_buffer(gdb); |
223 | ext4_handle_dirty_metadata(handle, NULL, gdb); | 223 | err = ext4_handle_dirty_metadata(handle, NULL, gdb); |
224 | if (unlikely(err)) { | ||
225 | brelse(gdb); | ||
226 | goto exit_bh; | ||
227 | } | ||
224 | ext4_set_bit(bit, bh->b_data); | 228 | ext4_set_bit(bit, bh->b_data); |
225 | brelse(gdb); | 229 | brelse(gdb); |
226 | } | 230 | } |
227 | 231 | ||
228 | /* Zero out all of the reserved backup group descriptor table blocks */ | 232 | /* Zero out all of the reserved backup group descriptor table blocks */ |
229 | for (i = 0, bit = gdblocks + 1, block = start + bit; | 233 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
230 | i < reserved_gdb; i++, block++, bit++) { | 234 | block, sbi->s_itb_per_group); |
231 | struct buffer_head *gdb; | 235 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, |
232 | 236 | GFP_NOFS); | |
233 | ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); | 237 | if (err) |
234 | 238 | goto exit_bh; | |
235 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | 239 | for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++) |
236 | goto exit_bh; | ||
237 | |||
238 | if (IS_ERR(gdb = bclean(handle, sb, block))) { | ||
239 | err = PTR_ERR(gdb); | ||
240 | goto exit_bh; | ||
241 | } | ||
242 | ext4_handle_dirty_metadata(handle, NULL, gdb); | ||
243 | ext4_set_bit(bit, bh->b_data); | 240 | ext4_set_bit(bit, bh->b_data); |
244 | brelse(gdb); | 241 | |
245 | } | ||
246 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, | 242 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, |
247 | input->block_bitmap - start); | 243 | input->block_bitmap - start); |
248 | ext4_set_bit(input->block_bitmap - start, bh->b_data); | 244 | ext4_set_bit(input->block_bitmap - start, bh->b_data); |
@@ -251,29 +247,26 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
251 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); | 247 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); |
252 | 248 | ||
253 | /* Zero out all of the inode table blocks */ | 249 | /* Zero out all of the inode table blocks */ |
254 | for (i = 0, block = input->inode_table, bit = block - start; | 250 | block = input->inode_table; |
255 | i < sbi->s_itb_per_group; i++, bit++, block++) { | 251 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
256 | struct buffer_head *it; | 252 | block, sbi->s_itb_per_group); |
257 | 253 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | |
258 | ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); | 254 | if (err) |
259 | 255 | goto exit_bh; | |
260 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | 256 | for (i = 0, bit = input->inode_table - start; |
261 | goto exit_bh; | 257 | i < sbi->s_itb_per_group; i++, bit++) |
262 | |||
263 | if (IS_ERR(it = bclean(handle, sb, block))) { | ||
264 | err = PTR_ERR(it); | ||
265 | goto exit_bh; | ||
266 | } | ||
267 | ext4_handle_dirty_metadata(handle, NULL, it); | ||
268 | brelse(it); | ||
269 | ext4_set_bit(bit, bh->b_data); | 258 | ext4_set_bit(bit, bh->b_data); |
270 | } | ||
271 | 259 | ||
272 | if ((err = extend_or_restart_transaction(handle, 2, bh))) | 260 | if ((err = extend_or_restart_transaction(handle, 2, bh))) |
273 | goto exit_bh; | 261 | goto exit_bh; |
274 | 262 | ||
275 | mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); | 263 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, |
276 | ext4_handle_dirty_metadata(handle, NULL, bh); | 264 | bh->b_data); |
265 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
266 | if (unlikely(err)) { | ||
267 | ext4_std_error(sb, err); | ||
268 | goto exit_bh; | ||
269 | } | ||
277 | brelse(bh); | 270 | brelse(bh); |
278 | /* Mark unused entries in inode bitmap used */ | 271 | /* Mark unused entries in inode bitmap used */ |
279 | ext4_debug("clear inode bitmap %#04llx (+%llu)\n", | 272 | ext4_debug("clear inode bitmap %#04llx (+%llu)\n", |
@@ -283,9 +276,11 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
283 | goto exit_journal; | 276 | goto exit_journal; |
284 | } | 277 | } |
285 | 278 | ||
286 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 279 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
287 | bh->b_data); | 280 | bh->b_data); |
288 | ext4_handle_dirty_metadata(handle, NULL, bh); | 281 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
282 | if (unlikely(err)) | ||
283 | ext4_std_error(sb, err); | ||
289 | exit_bh: | 284 | exit_bh: |
290 | brelse(bh); | 285 | brelse(bh); |
291 | 286 | ||
@@ -437,17 +432,21 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
437 | goto exit_dind; | 432 | goto exit_dind; |
438 | } | 433 | } |
439 | 434 | ||
440 | if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) | 435 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
436 | if (unlikely(err)) | ||
441 | goto exit_dind; | 437 | goto exit_dind; |
442 | 438 | ||
443 | if ((err = ext4_journal_get_write_access(handle, *primary))) | 439 | err = ext4_journal_get_write_access(handle, *primary); |
440 | if (unlikely(err)) | ||
444 | goto exit_sbh; | 441 | goto exit_sbh; |
445 | 442 | ||
446 | if ((err = ext4_journal_get_write_access(handle, dind))) | 443 | err = ext4_journal_get_write_access(handle, dind); |
447 | goto exit_primary; | 444 | if (unlikely(err)) |
445 | ext4_std_error(sb, err); | ||
448 | 446 | ||
449 | /* ext4_reserve_inode_write() gets a reference on the iloc */ | 447 | /* ext4_reserve_inode_write() gets a reference on the iloc */ |
450 | if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) | 448 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
449 | if (unlikely(err)) | ||
451 | goto exit_dindj; | 450 | goto exit_dindj; |
452 | 451 | ||
453 | n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), | 452 | n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), |
@@ -469,12 +468,20 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
469 | * reserved inode, and will become GDT blocks (primary and backup). | 468 | * reserved inode, and will become GDT blocks (primary and backup). |
470 | */ | 469 | */ |
471 | data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; | 470 | data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; |
472 | ext4_handle_dirty_metadata(handle, NULL, dind); | 471 | err = ext4_handle_dirty_metadata(handle, NULL, dind); |
473 | brelse(dind); | 472 | if (unlikely(err)) { |
473 | ext4_std_error(sb, err); | ||
474 | goto exit_inode; | ||
475 | } | ||
474 | inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; | 476 | inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; |
475 | ext4_mark_iloc_dirty(handle, inode, &iloc); | 477 | ext4_mark_iloc_dirty(handle, inode, &iloc); |
476 | memset((*primary)->b_data, 0, sb->s_blocksize); | 478 | memset((*primary)->b_data, 0, sb->s_blocksize); |
477 | ext4_handle_dirty_metadata(handle, NULL, *primary); | 479 | err = ext4_handle_dirty_metadata(handle, NULL, *primary); |
480 | if (unlikely(err)) { | ||
481 | ext4_std_error(sb, err); | ||
482 | goto exit_inode; | ||
483 | } | ||
484 | brelse(dind); | ||
478 | 485 | ||
479 | o_group_desc = EXT4_SB(sb)->s_group_desc; | 486 | o_group_desc = EXT4_SB(sb)->s_group_desc; |
480 | memcpy(n_group_desc, o_group_desc, | 487 | memcpy(n_group_desc, o_group_desc, |
@@ -485,19 +492,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
485 | kfree(o_group_desc); | 492 | kfree(o_group_desc); |
486 | 493 | ||
487 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); | 494 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); |
488 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | 495 | err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); |
496 | if (err) | ||
497 | ext4_std_error(sb, err); | ||
489 | 498 | ||
490 | return 0; | 499 | return err; |
491 | 500 | ||
492 | exit_inode: | 501 | exit_inode: |
493 | /* ext4_journal_release_buffer(handle, iloc.bh); */ | 502 | /* ext4_handle_release_buffer(handle, iloc.bh); */ |
494 | brelse(iloc.bh); | 503 | brelse(iloc.bh); |
495 | exit_dindj: | 504 | exit_dindj: |
496 | /* ext4_journal_release_buffer(handle, dind); */ | 505 | /* ext4_handle_release_buffer(handle, dind); */ |
497 | exit_primary: | ||
498 | /* ext4_journal_release_buffer(handle, *primary); */ | ||
499 | exit_sbh: | 506 | exit_sbh: |
500 | /* ext4_journal_release_buffer(handle, *primary); */ | 507 | /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ |
501 | exit_dind: | 508 | exit_dind: |
502 | brelse(dind); | 509 | brelse(dind); |
503 | exit_bh: | 510 | exit_bh: |
@@ -579,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | |||
579 | /* | 586 | /* |
580 | int j; | 587 | int j; |
581 | for (j = 0; j < i; j++) | 588 | for (j = 0; j < i; j++) |
582 | ext4_journal_release_buffer(handle, primary[j]); | 589 | ext4_handle_release_buffer(handle, primary[j]); |
583 | */ | 590 | */ |
584 | goto exit_bh; | 591 | goto exit_bh; |
585 | } | 592 | } |
@@ -680,7 +687,9 @@ static void update_backups(struct super_block *sb, | |||
680 | memset(bh->b_data + size, 0, rest); | 687 | memset(bh->b_data + size, 0, rest); |
681 | set_buffer_uptodate(bh); | 688 | set_buffer_uptodate(bh); |
682 | unlock_buffer(bh); | 689 | unlock_buffer(bh); |
683 | ext4_handle_dirty_metadata(handle, NULL, bh); | 690 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
691 | if (unlikely(err)) | ||
692 | ext4_std_error(sb, err); | ||
684 | brelse(bh); | 693 | brelse(bh); |
685 | } | 694 | } |
686 | if ((err2 = ext4_journal_stop(handle)) && !err) | 695 | if ((err2 = ext4_journal_stop(handle)) && !err) |
@@ -898,7 +907,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
898 | /* Update the global fs size fields */ | 907 | /* Update the global fs size fields */ |
899 | sbi->s_groups_count++; | 908 | sbi->s_groups_count++; |
900 | 909 | ||
901 | ext4_handle_dirty_metadata(handle, NULL, primary); | 910 | err = ext4_handle_dirty_metadata(handle, NULL, primary); |
911 | if (unlikely(err)) { | ||
912 | ext4_std_error(sb, err); | ||
913 | goto exit_journal; | ||
914 | } | ||
902 | 915 | ||
903 | /* Update the reserved block counts only once the new group is | 916 | /* Update the reserved block counts only once the new group is |
904 | * active. */ | 917 | * active. */ |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c272..9ea71aa864b3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/blkdev.h> | 27 | #include <linux/blkdev.h> |
28 | #include <linux/parser.h> | 28 | #include <linux/parser.h> |
29 | #include <linux/smp_lock.h> | ||
30 | #include <linux/buffer_head.h> | 29 | #include <linux/buffer_head.h> |
31 | #include <linux/exportfs.h> | 30 | #include <linux/exportfs.h> |
32 | #include <linux/vfs.h> | 31 | #include <linux/vfs.h> |
@@ -39,8 +38,12 @@ | |||
39 | #include <linux/ctype.h> | 38 | #include <linux/ctype.h> |
40 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
41 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
41 | #include <linux/cleancache.h> | ||
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | 43 | ||
44 | #include <linux/kthread.h> | ||
45 | #include <linux/freezer.h> | ||
46 | |||
44 | #include "ext4.h" | 47 | #include "ext4.h" |
45 | #include "ext4_jbd2.h" | 48 | #include "ext4_jbd2.h" |
46 | #include "xattr.h" | 49 | #include "xattr.h" |
@@ -50,8 +53,11 @@ | |||
50 | #define CREATE_TRACE_POINTS | 53 | #define CREATE_TRACE_POINTS |
51 | #include <trace/events/ext4.h> | 54 | #include <trace/events/ext4.h> |
52 | 55 | ||
53 | struct proc_dir_entry *ext4_proc_root; | 56 | static struct proc_dir_entry *ext4_proc_root; |
54 | static struct kset *ext4_kset; | 57 | static struct kset *ext4_kset; |
58 | static struct ext4_lazy_init *ext4_li_info; | ||
59 | static struct mutex ext4_li_mtx; | ||
60 | static struct ext4_features *ext4_feat; | ||
55 | 61 | ||
56 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 62 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
57 | unsigned long journal_devnum); | 63 | unsigned long journal_devnum); |
@@ -68,14 +74,34 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | |||
68 | static int ext4_unfreeze(struct super_block *sb); | 74 | static int ext4_unfreeze(struct super_block *sb); |
69 | static void ext4_write_super(struct super_block *sb); | 75 | static void ext4_write_super(struct super_block *sb); |
70 | static int ext4_freeze(struct super_block *sb); | 76 | static int ext4_freeze(struct super_block *sb); |
71 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, | 77 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
72 | const char *dev_name, void *data, struct vfsmount *mnt); | 78 | const char *dev_name, void *data); |
79 | static inline int ext2_feature_set_ok(struct super_block *sb); | ||
80 | static inline int ext3_feature_set_ok(struct super_block *sb); | ||
81 | static int ext4_feature_set_ok(struct super_block *sb, int readonly); | ||
82 | static void ext4_destroy_lazyinit_thread(void); | ||
83 | static void ext4_unregister_li_request(struct super_block *sb); | ||
84 | static void ext4_clear_request_list(void); | ||
85 | |||
86 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | ||
87 | static struct file_system_type ext2_fs_type = { | ||
88 | .owner = THIS_MODULE, | ||
89 | .name = "ext2", | ||
90 | .mount = ext4_mount, | ||
91 | .kill_sb = kill_block_super, | ||
92 | .fs_flags = FS_REQUIRES_DEV, | ||
93 | }; | ||
94 | #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) | ||
95 | #else | ||
96 | #define IS_EXT2_SB(sb) (0) | ||
97 | #endif | ||
98 | |||
73 | 99 | ||
74 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 100 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
75 | static struct file_system_type ext3_fs_type = { | 101 | static struct file_system_type ext3_fs_type = { |
76 | .owner = THIS_MODULE, | 102 | .owner = THIS_MODULE, |
77 | .name = "ext3", | 103 | .name = "ext3", |
78 | .get_sb = ext4_get_sb, | 104 | .mount = ext4_mount, |
79 | .kill_sb = kill_block_super, | 105 | .kill_sb = kill_block_super, |
80 | .fs_flags = FS_REQUIRES_DEV, | 106 | .fs_flags = FS_REQUIRES_DEV, |
81 | }; | 107 | }; |
@@ -233,27 +259,44 @@ static void ext4_put_nojournal(handle_t *handle) | |||
233 | * journal_end calls result in the superblock being marked dirty, so | 259 | * journal_end calls result in the superblock being marked dirty, so |
234 | * that sync() will call the filesystem's write_super callback if | 260 | * that sync() will call the filesystem's write_super callback if |
235 | * appropriate. | 261 | * appropriate. |
262 | * | ||
263 | * To avoid j_barrier hold in userspace when a user calls freeze(), | ||
264 | * ext4 prevents a new handle from being started by s_frozen, which | ||
265 | * is in an upper layer. | ||
236 | */ | 266 | */ |
237 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | 267 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) |
238 | { | 268 | { |
239 | journal_t *journal; | 269 | journal_t *journal; |
270 | handle_t *handle; | ||
240 | 271 | ||
241 | if (sb->s_flags & MS_RDONLY) | 272 | if (sb->s_flags & MS_RDONLY) |
242 | return ERR_PTR(-EROFS); | 273 | return ERR_PTR(-EROFS); |
243 | 274 | ||
244 | vfs_check_frozen(sb, SB_FREEZE_TRANS); | ||
245 | /* Special case here: if the journal has aborted behind our | ||
246 | * backs (eg. EIO in the commit thread), then we still need to | ||
247 | * take the FS itself readonly cleanly. */ | ||
248 | journal = EXT4_SB(sb)->s_journal; | 275 | journal = EXT4_SB(sb)->s_journal; |
249 | if (journal) { | 276 | handle = ext4_journal_current_handle(); |
250 | if (is_journal_aborted(journal)) { | 277 | |
251 | ext4_abort(sb, "Detected aborted journal"); | 278 | /* |
252 | return ERR_PTR(-EROFS); | 279 | * If a handle has been started, it should be allowed to |
253 | } | 280 | * finish, otherwise deadlock could happen between freeze |
254 | return jbd2_journal_start(journal, nblocks); | 281 | * and others(e.g. truncate) due to the restart of the |
282 | * journal handle if the filesystem is forzen and active | ||
283 | * handles are not stopped. | ||
284 | */ | ||
285 | if (!handle) | ||
286 | vfs_check_frozen(sb, SB_FREEZE_TRANS); | ||
287 | |||
288 | if (!journal) | ||
289 | return ext4_get_nojournal(); | ||
290 | /* | ||
291 | * Special case here: if the journal has aborted behind our | ||
292 | * backs (eg. EIO in the commit thread), then we still need to | ||
293 | * take the FS itself readonly cleanly. | ||
294 | */ | ||
295 | if (is_journal_aborted(journal)) { | ||
296 | ext4_abort(sb, "Detected aborted journal"); | ||
297 | return ERR_PTR(-EROFS); | ||
255 | } | 298 | } |
256 | return ext4_get_nojournal(); | 299 | return jbd2_journal_start(journal, nblocks); |
257 | } | 300 | } |
258 | 301 | ||
259 | /* | 302 | /* |
@@ -381,13 +424,14 @@ static void ext4_handle_error(struct super_block *sb) | |||
381 | void __ext4_error(struct super_block *sb, const char *function, | 424 | void __ext4_error(struct super_block *sb, const char *function, |
382 | unsigned int line, const char *fmt, ...) | 425 | unsigned int line, const char *fmt, ...) |
383 | { | 426 | { |
427 | struct va_format vaf; | ||
384 | va_list args; | 428 | va_list args; |
385 | 429 | ||
386 | va_start(args, fmt); | 430 | va_start(args, fmt); |
387 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ", | 431 | vaf.fmt = fmt; |
388 | sb->s_id, function, line, current->comm); | 432 | vaf.va = &args; |
389 | vprintk(fmt, args); | 433 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", |
390 | printk("\n"); | 434 | sb->s_id, function, line, current->comm, &vaf); |
391 | va_end(args); | 435 | va_end(args); |
392 | 436 | ||
393 | ext4_handle_error(sb); | 437 | ext4_handle_error(sb); |
@@ -398,28 +442,31 @@ void ext4_error_inode(struct inode *inode, const char *function, | |||
398 | const char *fmt, ...) | 442 | const char *fmt, ...) |
399 | { | 443 | { |
400 | va_list args; | 444 | va_list args; |
445 | struct va_format vaf; | ||
401 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | 446 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
402 | 447 | ||
403 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); | 448 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); |
404 | es->s_last_error_block = cpu_to_le64(block); | 449 | es->s_last_error_block = cpu_to_le64(block); |
405 | save_error_info(inode->i_sb, function, line); | 450 | save_error_info(inode->i_sb, function, line); |
406 | va_start(args, fmt); | 451 | va_start(args, fmt); |
452 | vaf.fmt = fmt; | ||
453 | vaf.va = &args; | ||
407 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", | 454 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", |
408 | inode->i_sb->s_id, function, line, inode->i_ino); | 455 | inode->i_sb->s_id, function, line, inode->i_ino); |
409 | if (block) | 456 | if (block) |
410 | printk("block %llu: ", block); | 457 | printk(KERN_CONT "block %llu: ", block); |
411 | printk("comm %s: ", current->comm); | 458 | printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); |
412 | vprintk(fmt, args); | ||
413 | printk("\n"); | ||
414 | va_end(args); | 459 | va_end(args); |
415 | 460 | ||
416 | ext4_handle_error(inode->i_sb); | 461 | ext4_handle_error(inode->i_sb); |
417 | } | 462 | } |
418 | 463 | ||
419 | void ext4_error_file(struct file *file, const char *function, | 464 | void ext4_error_file(struct file *file, const char *function, |
420 | unsigned int line, const char *fmt, ...) | 465 | unsigned int line, ext4_fsblk_t block, |
466 | const char *fmt, ...) | ||
421 | { | 467 | { |
422 | va_list args; | 468 | va_list args; |
469 | struct va_format vaf; | ||
423 | struct ext4_super_block *es; | 470 | struct ext4_super_block *es; |
424 | struct inode *inode = file->f_dentry->d_inode; | 471 | struct inode *inode = file->f_dentry->d_inode; |
425 | char pathname[80], *path; | 472 | char pathname[80], *path; |
@@ -427,17 +474,18 @@ void ext4_error_file(struct file *file, const char *function, | |||
427 | es = EXT4_SB(inode->i_sb)->s_es; | 474 | es = EXT4_SB(inode->i_sb)->s_es; |
428 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); | 475 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); |
429 | save_error_info(inode->i_sb, function, line); | 476 | save_error_info(inode->i_sb, function, line); |
430 | va_start(args, fmt); | ||
431 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); | 477 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); |
432 | if (!path) | 478 | if (IS_ERR(path)) |
433 | path = "(unknown)"; | 479 | path = "(unknown)"; |
434 | printk(KERN_CRIT | 480 | printk(KERN_CRIT |
435 | "EXT4-fs error (device %s): %s:%d: inode #%lu " | 481 | "EXT4-fs error (device %s): %s:%d: inode #%lu: ", |
436 | "(comm %s path %s): ", | 482 | inode->i_sb->s_id, function, line, inode->i_ino); |
437 | inode->i_sb->s_id, function, line, inode->i_ino, | 483 | if (block) |
438 | current->comm, path); | 484 | printk(KERN_CONT "block %llu: ", block); |
439 | vprintk(fmt, args); | 485 | va_start(args, fmt); |
440 | printk("\n"); | 486 | vaf.fmt = fmt; |
487 | vaf.va = &args; | ||
488 | printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); | ||
441 | va_end(args); | 489 | va_end(args); |
442 | 490 | ||
443 | ext4_handle_error(inode->i_sb); | 491 | ext4_handle_error(inode->i_sb); |
@@ -536,28 +584,29 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
536 | panic("EXT4-fs panic from previous error\n"); | 584 | panic("EXT4-fs panic from previous error\n"); |
537 | } | 585 | } |
538 | 586 | ||
539 | void ext4_msg (struct super_block * sb, const char *prefix, | 587 | void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) |
540 | const char *fmt, ...) | ||
541 | { | 588 | { |
589 | struct va_format vaf; | ||
542 | va_list args; | 590 | va_list args; |
543 | 591 | ||
544 | va_start(args, fmt); | 592 | va_start(args, fmt); |
545 | printk("%sEXT4-fs (%s): ", prefix, sb->s_id); | 593 | vaf.fmt = fmt; |
546 | vprintk(fmt, args); | 594 | vaf.va = &args; |
547 | printk("\n"); | 595 | printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); |
548 | va_end(args); | 596 | va_end(args); |
549 | } | 597 | } |
550 | 598 | ||
551 | void __ext4_warning(struct super_block *sb, const char *function, | 599 | void __ext4_warning(struct super_block *sb, const char *function, |
552 | unsigned int line, const char *fmt, ...) | 600 | unsigned int line, const char *fmt, ...) |
553 | { | 601 | { |
602 | struct va_format vaf; | ||
554 | va_list args; | 603 | va_list args; |
555 | 604 | ||
556 | va_start(args, fmt); | 605 | va_start(args, fmt); |
557 | printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ", | 606 | vaf.fmt = fmt; |
558 | sb->s_id, function, line); | 607 | vaf.va = &args; |
559 | vprintk(fmt, args); | 608 | printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", |
560 | printk("\n"); | 609 | sb->s_id, function, line, &vaf); |
561 | va_end(args); | 610 | va_end(args); |
562 | } | 611 | } |
563 | 612 | ||
@@ -568,21 +617,25 @@ void __ext4_grp_locked_error(const char *function, unsigned int line, | |||
568 | __releases(bitlock) | 617 | __releases(bitlock) |
569 | __acquires(bitlock) | 618 | __acquires(bitlock) |
570 | { | 619 | { |
620 | struct va_format vaf; | ||
571 | va_list args; | 621 | va_list args; |
572 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 622 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
573 | 623 | ||
574 | es->s_last_error_ino = cpu_to_le32(ino); | 624 | es->s_last_error_ino = cpu_to_le32(ino); |
575 | es->s_last_error_block = cpu_to_le64(block); | 625 | es->s_last_error_block = cpu_to_le64(block); |
576 | __save_error_info(sb, function, line); | 626 | __save_error_info(sb, function, line); |
627 | |||
577 | va_start(args, fmt); | 628 | va_start(args, fmt); |
578 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", | 629 | |
630 | vaf.fmt = fmt; | ||
631 | vaf.va = &args; | ||
632 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", | ||
579 | sb->s_id, function, line, grp); | 633 | sb->s_id, function, line, grp); |
580 | if (ino) | 634 | if (ino) |
581 | printk("inode %lu: ", ino); | 635 | printk(KERN_CONT "inode %lu: ", ino); |
582 | if (block) | 636 | if (block) |
583 | printk("block %llu:", (unsigned long long) block); | 637 | printk(KERN_CONT "block %llu:", (unsigned long long) block); |
584 | vprintk(fmt, args); | 638 | printk(KERN_CONT "%pV\n", &vaf); |
585 | printk("\n"); | ||
586 | va_end(args); | 639 | va_end(args); |
587 | 640 | ||
588 | if (test_opt(sb, ERRORS_CONT)) { | 641 | if (test_opt(sb, ERRORS_CONT)) { |
@@ -598,7 +651,7 @@ __acquires(bitlock) | |||
598 | * filesystem will have already been marked read/only and the | 651 | * filesystem will have already been marked read/only and the |
599 | * journal has been aborted. We return 1 as a hint to callers | 652 | * journal has been aborted. We return 1 as a hint to callers |
600 | * who might what to use the return value from | 653 | * who might what to use the return value from |
601 | * ext4_grp_locked_error() to distinguish beween the | 654 | * ext4_grp_locked_error() to distinguish between the |
602 | * ERRORS_CONT and ERRORS_RO case, and perhaps return more | 655 | * ERRORS_CONT and ERRORS_RO case, and perhaps return more |
603 | * aggressively from the ext4 function in question, with a | 656 | * aggressively from the ext4 function in question, with a |
604 | * more appropriate error code. | 657 | * more appropriate error code. |
@@ -640,7 +693,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) | |||
640 | struct block_device *bdev; | 693 | struct block_device *bdev; |
641 | char b[BDEVNAME_SIZE]; | 694 | char b[BDEVNAME_SIZE]; |
642 | 695 | ||
643 | bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); | 696 | bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); |
644 | if (IS_ERR(bdev)) | 697 | if (IS_ERR(bdev)) |
645 | goto fail; | 698 | goto fail; |
646 | return bdev; | 699 | return bdev; |
@@ -656,8 +709,7 @@ fail: | |||
656 | */ | 709 | */ |
657 | static int ext4_blkdev_put(struct block_device *bdev) | 710 | static int ext4_blkdev_put(struct block_device *bdev) |
658 | { | 711 | { |
659 | bd_release(bdev); | 712 | return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
660 | return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | ||
661 | } | 713 | } |
662 | 714 | ||
663 | static int ext4_blkdev_remove(struct ext4_sb_info *sbi) | 715 | static int ext4_blkdev_remove(struct ext4_sb_info *sbi) |
@@ -702,13 +754,13 @@ static void ext4_put_super(struct super_block *sb) | |||
702 | struct ext4_super_block *es = sbi->s_es; | 754 | struct ext4_super_block *es = sbi->s_es; |
703 | int i, err; | 755 | int i, err; |
704 | 756 | ||
757 | ext4_unregister_li_request(sb); | ||
705 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 758 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
706 | 759 | ||
707 | flush_workqueue(sbi->dio_unwritten_wq); | 760 | flush_workqueue(sbi->dio_unwritten_wq); |
708 | destroy_workqueue(sbi->dio_unwritten_wq); | 761 | destroy_workqueue(sbi->dio_unwritten_wq); |
709 | 762 | ||
710 | lock_super(sb); | 763 | lock_super(sb); |
711 | lock_kernel(); | ||
712 | if (sb->s_dirt) | 764 | if (sb->s_dirt) |
713 | ext4_commit_super(sb, 1); | 765 | ext4_commit_super(sb, 1); |
714 | 766 | ||
@@ -719,6 +771,7 @@ static void ext4_put_super(struct super_block *sb) | |||
719 | ext4_abort(sb, "Couldn't clean up the journal"); | 771 | ext4_abort(sb, "Couldn't clean up the journal"); |
720 | } | 772 | } |
721 | 773 | ||
774 | del_timer(&sbi->s_err_report); | ||
722 | ext4_release_system_zone(sb); | 775 | ext4_release_system_zone(sb); |
723 | ext4_mb_release(sb); | 776 | ext4_mb_release(sb); |
724 | ext4_ext_release(sb); | 777 | ext4_ext_release(sb); |
@@ -770,12 +823,13 @@ static void ext4_put_super(struct super_block *sb) | |||
770 | invalidate_bdev(sbi->journal_bdev); | 823 | invalidate_bdev(sbi->journal_bdev); |
771 | ext4_blkdev_remove(sbi); | 824 | ext4_blkdev_remove(sbi); |
772 | } | 825 | } |
826 | if (sbi->s_mmp_tsk) | ||
827 | kthread_stop(sbi->s_mmp_tsk); | ||
773 | sb->s_fs_info = NULL; | 828 | sb->s_fs_info = NULL; |
774 | /* | 829 | /* |
775 | * Now that we are completely done shutting down the | 830 | * Now that we are completely done shutting down the |
776 | * superblock, we need to actually destroy the kobject. | 831 | * superblock, we need to actually destroy the kobject. |
777 | */ | 832 | */ |
778 | unlock_kernel(); | ||
779 | unlock_super(sb); | 833 | unlock_super(sb); |
780 | kobject_put(&sbi->s_kobj); | 834 | kobject_put(&sbi->s_kobj); |
781 | wait_for_completion(&sbi->s_kobj_unregister); | 835 | wait_for_completion(&sbi->s_kobj_unregister); |
@@ -801,32 +855,44 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
801 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 855 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
802 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 856 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
803 | spin_lock_init(&ei->i_prealloc_lock); | 857 | spin_lock_init(&ei->i_prealloc_lock); |
804 | /* | ||
805 | * Note: We can be called before EXT4_SB(sb)->s_journal is set, | ||
806 | * therefore it can be null here. Don't check it, just initialize | ||
807 | * jinode. | ||
808 | */ | ||
809 | jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); | ||
810 | ei->i_reserved_data_blocks = 0; | 858 | ei->i_reserved_data_blocks = 0; |
811 | ei->i_reserved_meta_blocks = 0; | 859 | ei->i_reserved_meta_blocks = 0; |
812 | ei->i_allocated_meta_blocks = 0; | 860 | ei->i_allocated_meta_blocks = 0; |
813 | ei->i_da_metadata_calc_len = 0; | 861 | ei->i_da_metadata_calc_len = 0; |
814 | ei->i_delalloc_reserved_flag = 0; | ||
815 | spin_lock_init(&(ei->i_block_reservation_lock)); | 862 | spin_lock_init(&(ei->i_block_reservation_lock)); |
816 | #ifdef CONFIG_QUOTA | 863 | #ifdef CONFIG_QUOTA |
817 | ei->i_reserved_quota = 0; | 864 | ei->i_reserved_quota = 0; |
818 | #endif | 865 | #endif |
866 | ei->jinode = NULL; | ||
819 | INIT_LIST_HEAD(&ei->i_completed_io_list); | 867 | INIT_LIST_HEAD(&ei->i_completed_io_list); |
820 | spin_lock_init(&ei->i_completed_io_lock); | 868 | spin_lock_init(&ei->i_completed_io_lock); |
821 | ei->cur_aio_dio = NULL; | 869 | ei->cur_aio_dio = NULL; |
822 | ei->i_sync_tid = 0; | 870 | ei->i_sync_tid = 0; |
823 | ei->i_datasync_tid = 0; | 871 | ei->i_datasync_tid = 0; |
872 | atomic_set(&ei->i_ioend_count, 0); | ||
873 | atomic_set(&ei->i_aiodio_unwritten, 0); | ||
824 | 874 | ||
825 | return &ei->vfs_inode; | 875 | return &ei->vfs_inode; |
826 | } | 876 | } |
827 | 877 | ||
878 | static int ext4_drop_inode(struct inode *inode) | ||
879 | { | ||
880 | int drop = generic_drop_inode(inode); | ||
881 | |||
882 | trace_ext4_drop_inode(inode, drop); | ||
883 | return drop; | ||
884 | } | ||
885 | |||
886 | static void ext4_i_callback(struct rcu_head *head) | ||
887 | { | ||
888 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
889 | INIT_LIST_HEAD(&inode->i_dentry); | ||
890 | kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); | ||
891 | } | ||
892 | |||
828 | static void ext4_destroy_inode(struct inode *inode) | 893 | static void ext4_destroy_inode(struct inode *inode) |
829 | { | 894 | { |
895 | ext4_ioend_wait(inode); | ||
830 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 896 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
831 | ext4_msg(inode->i_sb, KERN_ERR, | 897 | ext4_msg(inode->i_sb, KERN_ERR, |
832 | "Inode %lu (%p): orphan list check failed!", | 898 | "Inode %lu (%p): orphan list check failed!", |
@@ -836,7 +902,7 @@ static void ext4_destroy_inode(struct inode *inode) | |||
836 | true); | 902 | true); |
837 | dump_stack(); | 903 | dump_stack(); |
838 | } | 904 | } |
839 | kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); | 905 | call_rcu(&inode->i_rcu, ext4_i_callback); |
840 | } | 906 | } |
841 | 907 | ||
842 | static void init_once(void *foo) | 908 | static void init_once(void *foo) |
@@ -874,9 +940,12 @@ void ext4_clear_inode(struct inode *inode) | |||
874 | end_writeback(inode); | 940 | end_writeback(inode); |
875 | dquot_drop(inode); | 941 | dquot_drop(inode); |
876 | ext4_discard_preallocations(inode); | 942 | ext4_discard_preallocations(inode); |
877 | if (EXT4_JOURNAL(inode)) | 943 | if (EXT4_I(inode)->jinode) { |
878 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | 944 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), |
879 | &EXT4_I(inode)->jinode); | 945 | EXT4_I(inode)->jinode); |
946 | jbd2_free_inode(EXT4_I(inode)->jinode); | ||
947 | EXT4_I(inode)->jinode = NULL; | ||
948 | } | ||
880 | } | 949 | } |
881 | 950 | ||
882 | static inline void ext4_show_quota_options(struct seq_file *seq, | 951 | static inline void ext4_show_quota_options(struct seq_file *seq, |
@@ -965,13 +1034,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
965 | if (test_opt(sb, OLDALLOC)) | 1034 | if (test_opt(sb, OLDALLOC)) |
966 | seq_puts(seq, ",oldalloc"); | 1035 | seq_puts(seq, ",oldalloc"); |
967 | #ifdef CONFIG_EXT4_FS_XATTR | 1036 | #ifdef CONFIG_EXT4_FS_XATTR |
968 | if (test_opt(sb, XATTR_USER) && | 1037 | if (test_opt(sb, XATTR_USER)) |
969 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | ||
970 | seq_puts(seq, ",user_xattr"); | 1038 | seq_puts(seq, ",user_xattr"); |
971 | if (!test_opt(sb, XATTR_USER) && | 1039 | if (!test_opt(sb, XATTR_USER)) |
972 | (def_mount_opts & EXT4_DEFM_XATTR_USER)) { | ||
973 | seq_puts(seq, ",nouser_xattr"); | 1040 | seq_puts(seq, ",nouser_xattr"); |
974 | } | ||
975 | #endif | 1041 | #endif |
976 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1042 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
977 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 1043 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
@@ -1009,6 +1075,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1009 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | 1075 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) |
1010 | seq_puts(seq, ",nodelalloc"); | 1076 | seq_puts(seq, ",nodelalloc"); |
1011 | 1077 | ||
1078 | if (!test_opt(sb, MBLK_IO_SUBMIT)) | ||
1079 | seq_puts(seq, ",nomblk_io_submit"); | ||
1012 | if (sbi->s_stripe) | 1080 | if (sbi->s_stripe) |
1013 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 1081 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
1014 | /* | 1082 | /* |
@@ -1045,6 +1113,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1045 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) | 1113 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) |
1046 | seq_puts(seq, ",block_validity"); | 1114 | seq_puts(seq, ",block_validity"); |
1047 | 1115 | ||
1116 | if (!test_opt(sb, INIT_INODE_TABLE)) | ||
1117 | seq_puts(seq, ",noinit_inode_table"); | ||
1118 | else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) | ||
1119 | seq_printf(seq, ",init_inode_table=%u", | ||
1120 | (unsigned) sbi->s_li_wait_mult); | ||
1121 | |||
1048 | ext4_show_quota_options(seq, sb); | 1122 | ext4_show_quota_options(seq, sb); |
1049 | 1123 | ||
1050 | return 0; | 1124 | return 0; |
@@ -1123,7 +1197,7 @@ static int ext4_release_dquot(struct dquot *dquot); | |||
1123 | static int ext4_mark_dquot_dirty(struct dquot *dquot); | 1197 | static int ext4_mark_dquot_dirty(struct dquot *dquot); |
1124 | static int ext4_write_info(struct super_block *sb, int type); | 1198 | static int ext4_write_info(struct super_block *sb, int type); |
1125 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 1199 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
1126 | char *path); | 1200 | struct path *path); |
1127 | static int ext4_quota_off(struct super_block *sb, int type); | 1201 | static int ext4_quota_off(struct super_block *sb, int type); |
1128 | static int ext4_quota_on_mount(struct super_block *sb, int type); | 1202 | static int ext4_quota_on_mount(struct super_block *sb, int type); |
1129 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 1203 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
@@ -1132,9 +1206,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
1132 | const char *data, size_t len, loff_t off); | 1206 | const char *data, size_t len, loff_t off); |
1133 | 1207 | ||
1134 | static const struct dquot_operations ext4_quota_operations = { | 1208 | static const struct dquot_operations ext4_quota_operations = { |
1135 | #ifdef CONFIG_QUOTA | ||
1136 | .get_reserved_space = ext4_get_reserved_space, | 1209 | .get_reserved_space = ext4_get_reserved_space, |
1137 | #endif | ||
1138 | .write_dquot = ext4_write_dquot, | 1210 | .write_dquot = ext4_write_dquot, |
1139 | .acquire_dquot = ext4_acquire_dquot, | 1211 | .acquire_dquot = ext4_acquire_dquot, |
1140 | .release_dquot = ext4_release_dquot, | 1212 | .release_dquot = ext4_release_dquot, |
@@ -1160,6 +1232,7 @@ static const struct super_operations ext4_sops = { | |||
1160 | .destroy_inode = ext4_destroy_inode, | 1232 | .destroy_inode = ext4_destroy_inode, |
1161 | .write_inode = ext4_write_inode, | 1233 | .write_inode = ext4_write_inode, |
1162 | .dirty_inode = ext4_dirty_inode, | 1234 | .dirty_inode = ext4_dirty_inode, |
1235 | .drop_inode = ext4_drop_inode, | ||
1163 | .evict_inode = ext4_evict_inode, | 1236 | .evict_inode = ext4_evict_inode, |
1164 | .put_super = ext4_put_super, | 1237 | .put_super = ext4_put_super, |
1165 | .sync_fs = ext4_sync_fs, | 1238 | .sync_fs = ext4_sync_fs, |
@@ -1180,6 +1253,7 @@ static const struct super_operations ext4_nojournal_sops = { | |||
1180 | .destroy_inode = ext4_destroy_inode, | 1253 | .destroy_inode = ext4_destroy_inode, |
1181 | .write_inode = ext4_write_inode, | 1254 | .write_inode = ext4_write_inode, |
1182 | .dirty_inode = ext4_dirty_inode, | 1255 | .dirty_inode = ext4_dirty_inode, |
1256 | .drop_inode = ext4_drop_inode, | ||
1183 | .evict_inode = ext4_evict_inode, | 1257 | .evict_inode = ext4_evict_inode, |
1184 | .write_super = ext4_write_super, | 1258 | .write_super = ext4_write_super, |
1185 | .put_super = ext4_put_super, | 1259 | .put_super = ext4_put_super, |
@@ -1214,11 +1288,12 @@ enum { | |||
1214 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, | 1288 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
1215 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, | 1289 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, |
1216 | Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, | 1290 | Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, |
1217 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 1291 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, |
1218 | Opt_block_validity, Opt_noblock_validity, | 1292 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, |
1219 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1293 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1220 | Opt_dioread_nolock, Opt_dioread_lock, | 1294 | Opt_dioread_nolock, Opt_dioread_lock, |
1221 | Opt_discard, Opt_nodiscard, | 1295 | Opt_discard, Opt_nodiscard, |
1296 | Opt_init_inode_table, Opt_noinit_inode_table, | ||
1222 | }; | 1297 | }; |
1223 | 1298 | ||
1224 | static const match_table_t tokens = { | 1299 | static const match_table_t tokens = { |
@@ -1278,6 +1353,8 @@ static const match_table_t tokens = { | |||
1278 | {Opt_resize, "resize"}, | 1353 | {Opt_resize, "resize"}, |
1279 | {Opt_delalloc, "delalloc"}, | 1354 | {Opt_delalloc, "delalloc"}, |
1280 | {Opt_nodelalloc, "nodelalloc"}, | 1355 | {Opt_nodelalloc, "nodelalloc"}, |
1356 | {Opt_mblk_io_submit, "mblk_io_submit"}, | ||
1357 | {Opt_nomblk_io_submit, "nomblk_io_submit"}, | ||
1281 | {Opt_block_validity, "block_validity"}, | 1358 | {Opt_block_validity, "block_validity"}, |
1282 | {Opt_noblock_validity, "noblock_validity"}, | 1359 | {Opt_noblock_validity, "noblock_validity"}, |
1283 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | 1360 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, |
@@ -1289,6 +1366,9 @@ static const match_table_t tokens = { | |||
1289 | {Opt_dioread_lock, "dioread_lock"}, | 1366 | {Opt_dioread_lock, "dioread_lock"}, |
1290 | {Opt_discard, "discard"}, | 1367 | {Opt_discard, "discard"}, |
1291 | {Opt_nodiscard, "nodiscard"}, | 1368 | {Opt_nodiscard, "nodiscard"}, |
1369 | {Opt_init_inode_table, "init_itable=%u"}, | ||
1370 | {Opt_init_inode_table, "init_itable"}, | ||
1371 | {Opt_noinit_inode_table, "noinit_itable"}, | ||
1292 | {Opt_err, NULL}, | 1372 | {Opt_err, NULL}, |
1293 | }; | 1373 | }; |
1294 | 1374 | ||
@@ -1353,7 +1433,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) | |||
1353 | sbi->s_qf_names[qtype] = NULL; | 1433 | sbi->s_qf_names[qtype] = NULL; |
1354 | return 0; | 1434 | return 0; |
1355 | } | 1435 | } |
1356 | set_opt(sbi->s_mount_opt, QUOTA); | 1436 | set_opt(sb, QUOTA); |
1357 | return 1; | 1437 | return 1; |
1358 | } | 1438 | } |
1359 | 1439 | ||
@@ -1403,26 +1483,26 @@ static int parse_options(char *options, struct super_block *sb, | |||
1403 | * Initialize args struct so we know whether arg was | 1483 | * Initialize args struct so we know whether arg was |
1404 | * found; some options take optional arguments. | 1484 | * found; some options take optional arguments. |
1405 | */ | 1485 | */ |
1406 | args[0].to = args[0].from = 0; | 1486 | args[0].to = args[0].from = NULL; |
1407 | token = match_token(p, tokens, args); | 1487 | token = match_token(p, tokens, args); |
1408 | switch (token) { | 1488 | switch (token) { |
1409 | case Opt_bsd_df: | 1489 | case Opt_bsd_df: |
1410 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1490 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); |
1411 | clear_opt(sbi->s_mount_opt, MINIX_DF); | 1491 | clear_opt(sb, MINIX_DF); |
1412 | break; | 1492 | break; |
1413 | case Opt_minix_df: | 1493 | case Opt_minix_df: |
1414 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1494 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); |
1415 | set_opt(sbi->s_mount_opt, MINIX_DF); | 1495 | set_opt(sb, MINIX_DF); |
1416 | 1496 | ||
1417 | break; | 1497 | break; |
1418 | case Opt_grpid: | 1498 | case Opt_grpid: |
1419 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1499 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); |
1420 | set_opt(sbi->s_mount_opt, GRPID); | 1500 | set_opt(sb, GRPID); |
1421 | 1501 | ||
1422 | break; | 1502 | break; |
1423 | case Opt_nogrpid: | 1503 | case Opt_nogrpid: |
1424 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1504 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); |
1425 | clear_opt(sbi->s_mount_opt, GRPID); | 1505 | clear_opt(sb, GRPID); |
1426 | 1506 | ||
1427 | break; | 1507 | break; |
1428 | case Opt_resuid: | 1508 | case Opt_resuid: |
@@ -1440,38 +1520,38 @@ static int parse_options(char *options, struct super_block *sb, | |||
1440 | /* *sb_block = match_int(&args[0]); */ | 1520 | /* *sb_block = match_int(&args[0]); */ |
1441 | break; | 1521 | break; |
1442 | case Opt_err_panic: | 1522 | case Opt_err_panic: |
1443 | clear_opt(sbi->s_mount_opt, ERRORS_CONT); | 1523 | clear_opt(sb, ERRORS_CONT); |
1444 | clear_opt(sbi->s_mount_opt, ERRORS_RO); | 1524 | clear_opt(sb, ERRORS_RO); |
1445 | set_opt(sbi->s_mount_opt, ERRORS_PANIC); | 1525 | set_opt(sb, ERRORS_PANIC); |
1446 | break; | 1526 | break; |
1447 | case Opt_err_ro: | 1527 | case Opt_err_ro: |
1448 | clear_opt(sbi->s_mount_opt, ERRORS_CONT); | 1528 | clear_opt(sb, ERRORS_CONT); |
1449 | clear_opt(sbi->s_mount_opt, ERRORS_PANIC); | 1529 | clear_opt(sb, ERRORS_PANIC); |
1450 | set_opt(sbi->s_mount_opt, ERRORS_RO); | 1530 | set_opt(sb, ERRORS_RO); |
1451 | break; | 1531 | break; |
1452 | case Opt_err_cont: | 1532 | case Opt_err_cont: |
1453 | clear_opt(sbi->s_mount_opt, ERRORS_RO); | 1533 | clear_opt(sb, ERRORS_RO); |
1454 | clear_opt(sbi->s_mount_opt, ERRORS_PANIC); | 1534 | clear_opt(sb, ERRORS_PANIC); |
1455 | set_opt(sbi->s_mount_opt, ERRORS_CONT); | 1535 | set_opt(sb, ERRORS_CONT); |
1456 | break; | 1536 | break; |
1457 | case Opt_nouid32: | 1537 | case Opt_nouid32: |
1458 | set_opt(sbi->s_mount_opt, NO_UID32); | 1538 | set_opt(sb, NO_UID32); |
1459 | break; | 1539 | break; |
1460 | case Opt_debug: | 1540 | case Opt_debug: |
1461 | set_opt(sbi->s_mount_opt, DEBUG); | 1541 | set_opt(sb, DEBUG); |
1462 | break; | 1542 | break; |
1463 | case Opt_oldalloc: | 1543 | case Opt_oldalloc: |
1464 | set_opt(sbi->s_mount_opt, OLDALLOC); | 1544 | set_opt(sb, OLDALLOC); |
1465 | break; | 1545 | break; |
1466 | case Opt_orlov: | 1546 | case Opt_orlov: |
1467 | clear_opt(sbi->s_mount_opt, OLDALLOC); | 1547 | clear_opt(sb, OLDALLOC); |
1468 | break; | 1548 | break; |
1469 | #ifdef CONFIG_EXT4_FS_XATTR | 1549 | #ifdef CONFIG_EXT4_FS_XATTR |
1470 | case Opt_user_xattr: | 1550 | case Opt_user_xattr: |
1471 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1551 | set_opt(sb, XATTR_USER); |
1472 | break; | 1552 | break; |
1473 | case Opt_nouser_xattr: | 1553 | case Opt_nouser_xattr: |
1474 | clear_opt(sbi->s_mount_opt, XATTR_USER); | 1554 | clear_opt(sb, XATTR_USER); |
1475 | break; | 1555 | break; |
1476 | #else | 1556 | #else |
1477 | case Opt_user_xattr: | 1557 | case Opt_user_xattr: |
@@ -1481,10 +1561,10 @@ static int parse_options(char *options, struct super_block *sb, | |||
1481 | #endif | 1561 | #endif |
1482 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1562 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1483 | case Opt_acl: | 1563 | case Opt_acl: |
1484 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1564 | set_opt(sb, POSIX_ACL); |
1485 | break; | 1565 | break; |
1486 | case Opt_noacl: | 1566 | case Opt_noacl: |
1487 | clear_opt(sbi->s_mount_opt, POSIX_ACL); | 1567 | clear_opt(sb, POSIX_ACL); |
1488 | break; | 1568 | break; |
1489 | #else | 1569 | #else |
1490 | case Opt_acl: | 1570 | case Opt_acl: |
@@ -1503,7 +1583,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1503 | "Cannot specify journal on remount"); | 1583 | "Cannot specify journal on remount"); |
1504 | return 0; | 1584 | return 0; |
1505 | } | 1585 | } |
1506 | set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); | 1586 | set_opt(sb, UPDATE_JOURNAL); |
1507 | break; | 1587 | break; |
1508 | case Opt_journal_dev: | 1588 | case Opt_journal_dev: |
1509 | if (is_remount) { | 1589 | if (is_remount) { |
@@ -1516,14 +1596,14 @@ static int parse_options(char *options, struct super_block *sb, | |||
1516 | *journal_devnum = option; | 1596 | *journal_devnum = option; |
1517 | break; | 1597 | break; |
1518 | case Opt_journal_checksum: | 1598 | case Opt_journal_checksum: |
1519 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | 1599 | set_opt(sb, JOURNAL_CHECKSUM); |
1520 | break; | 1600 | break; |
1521 | case Opt_journal_async_commit: | 1601 | case Opt_journal_async_commit: |
1522 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); | 1602 | set_opt(sb, JOURNAL_ASYNC_COMMIT); |
1523 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | 1603 | set_opt(sb, JOURNAL_CHECKSUM); |
1524 | break; | 1604 | break; |
1525 | case Opt_noload: | 1605 | case Opt_noload: |
1526 | set_opt(sbi->s_mount_opt, NOLOAD); | 1606 | set_opt(sb, NOLOAD); |
1527 | break; | 1607 | break; |
1528 | case Opt_commit: | 1608 | case Opt_commit: |
1529 | if (match_int(&args[0], &option)) | 1609 | if (match_int(&args[0], &option)) |
@@ -1566,15 +1646,15 @@ static int parse_options(char *options, struct super_block *sb, | |||
1566 | return 0; | 1646 | return 0; |
1567 | } | 1647 | } |
1568 | } else { | 1648 | } else { |
1569 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); | 1649 | clear_opt(sb, DATA_FLAGS); |
1570 | sbi->s_mount_opt |= data_opt; | 1650 | sbi->s_mount_opt |= data_opt; |
1571 | } | 1651 | } |
1572 | break; | 1652 | break; |
1573 | case Opt_data_err_abort: | 1653 | case Opt_data_err_abort: |
1574 | set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | 1654 | set_opt(sb, DATA_ERR_ABORT); |
1575 | break; | 1655 | break; |
1576 | case Opt_data_err_ignore: | 1656 | case Opt_data_err_ignore: |
1577 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | 1657 | clear_opt(sb, DATA_ERR_ABORT); |
1578 | break; | 1658 | break; |
1579 | #ifdef CONFIG_QUOTA | 1659 | #ifdef CONFIG_QUOTA |
1580 | case Opt_usrjquota: | 1660 | case Opt_usrjquota: |
@@ -1614,12 +1694,12 @@ set_qf_format: | |||
1614 | break; | 1694 | break; |
1615 | case Opt_quota: | 1695 | case Opt_quota: |
1616 | case Opt_usrquota: | 1696 | case Opt_usrquota: |
1617 | set_opt(sbi->s_mount_opt, QUOTA); | 1697 | set_opt(sb, QUOTA); |
1618 | set_opt(sbi->s_mount_opt, USRQUOTA); | 1698 | set_opt(sb, USRQUOTA); |
1619 | break; | 1699 | break; |
1620 | case Opt_grpquota: | 1700 | case Opt_grpquota: |
1621 | set_opt(sbi->s_mount_opt, QUOTA); | 1701 | set_opt(sb, QUOTA); |
1622 | set_opt(sbi->s_mount_opt, GRPQUOTA); | 1702 | set_opt(sb, GRPQUOTA); |
1623 | break; | 1703 | break; |
1624 | case Opt_noquota: | 1704 | case Opt_noquota: |
1625 | if (sb_any_quota_loaded(sb)) { | 1705 | if (sb_any_quota_loaded(sb)) { |
@@ -1627,9 +1707,9 @@ set_qf_format: | |||
1627 | "options when quota turned on"); | 1707 | "options when quota turned on"); |
1628 | return 0; | 1708 | return 0; |
1629 | } | 1709 | } |
1630 | clear_opt(sbi->s_mount_opt, QUOTA); | 1710 | clear_opt(sb, QUOTA); |
1631 | clear_opt(sbi->s_mount_opt, USRQUOTA); | 1711 | clear_opt(sb, USRQUOTA); |
1632 | clear_opt(sbi->s_mount_opt, GRPQUOTA); | 1712 | clear_opt(sb, GRPQUOTA); |
1633 | break; | 1713 | break; |
1634 | #else | 1714 | #else |
1635 | case Opt_quota: | 1715 | case Opt_quota: |
@@ -1655,7 +1735,7 @@ set_qf_format: | |||
1655 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; | 1735 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; |
1656 | break; | 1736 | break; |
1657 | case Opt_nobarrier: | 1737 | case Opt_nobarrier: |
1658 | clear_opt(sbi->s_mount_opt, BARRIER); | 1738 | clear_opt(sb, BARRIER); |
1659 | break; | 1739 | break; |
1660 | case Opt_barrier: | 1740 | case Opt_barrier: |
1661 | if (args[0].from) { | 1741 | if (args[0].from) { |
@@ -1664,9 +1744,9 @@ set_qf_format: | |||
1664 | } else | 1744 | } else |
1665 | option = 1; /* No argument, default to 1 */ | 1745 | option = 1; /* No argument, default to 1 */ |
1666 | if (option) | 1746 | if (option) |
1667 | set_opt(sbi->s_mount_opt, BARRIER); | 1747 | set_opt(sb, BARRIER); |
1668 | else | 1748 | else |
1669 | clear_opt(sbi->s_mount_opt, BARRIER); | 1749 | clear_opt(sb, BARRIER); |
1670 | break; | 1750 | break; |
1671 | case Opt_ignore: | 1751 | case Opt_ignore: |
1672 | break; | 1752 | break; |
@@ -1690,11 +1770,17 @@ set_qf_format: | |||
1690 | "Ignoring deprecated bh option"); | 1770 | "Ignoring deprecated bh option"); |
1691 | break; | 1771 | break; |
1692 | case Opt_i_version: | 1772 | case Opt_i_version: |
1693 | set_opt(sbi->s_mount_opt, I_VERSION); | 1773 | set_opt(sb, I_VERSION); |
1694 | sb->s_flags |= MS_I_VERSION; | 1774 | sb->s_flags |= MS_I_VERSION; |
1695 | break; | 1775 | break; |
1696 | case Opt_nodelalloc: | 1776 | case Opt_nodelalloc: |
1697 | clear_opt(sbi->s_mount_opt, DELALLOC); | 1777 | clear_opt(sb, DELALLOC); |
1778 | break; | ||
1779 | case Opt_mblk_io_submit: | ||
1780 | set_opt(sb, MBLK_IO_SUBMIT); | ||
1781 | break; | ||
1782 | case Opt_nomblk_io_submit: | ||
1783 | clear_opt(sb, MBLK_IO_SUBMIT); | ||
1698 | break; | 1784 | break; |
1699 | case Opt_stripe: | 1785 | case Opt_stripe: |
1700 | if (match_int(&args[0], &option)) | 1786 | if (match_int(&args[0], &option)) |
@@ -1704,20 +1790,20 @@ set_qf_format: | |||
1704 | sbi->s_stripe = option; | 1790 | sbi->s_stripe = option; |
1705 | break; | 1791 | break; |
1706 | case Opt_delalloc: | 1792 | case Opt_delalloc: |
1707 | set_opt(sbi->s_mount_opt, DELALLOC); | 1793 | set_opt(sb, DELALLOC); |
1708 | break; | 1794 | break; |
1709 | case Opt_block_validity: | 1795 | case Opt_block_validity: |
1710 | set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | 1796 | set_opt(sb, BLOCK_VALIDITY); |
1711 | break; | 1797 | break; |
1712 | case Opt_noblock_validity: | 1798 | case Opt_noblock_validity: |
1713 | clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | 1799 | clear_opt(sb, BLOCK_VALIDITY); |
1714 | break; | 1800 | break; |
1715 | case Opt_inode_readahead_blks: | 1801 | case Opt_inode_readahead_blks: |
1716 | if (match_int(&args[0], &option)) | 1802 | if (match_int(&args[0], &option)) |
1717 | return 0; | 1803 | return 0; |
1718 | if (option < 0 || option > (1 << 30)) | 1804 | if (option < 0 || option > (1 << 30)) |
1719 | return 0; | 1805 | return 0; |
1720 | if (!is_power_of_2(option)) { | 1806 | if (option && !is_power_of_2(option)) { |
1721 | ext4_msg(sb, KERN_ERR, | 1807 | ext4_msg(sb, KERN_ERR, |
1722 | "EXT4-fs: inode_readahead_blks" | 1808 | "EXT4-fs: inode_readahead_blks" |
1723 | " must be a power of 2"); | 1809 | " must be a power of 2"); |
@@ -1734,7 +1820,7 @@ set_qf_format: | |||
1734 | option); | 1820 | option); |
1735 | break; | 1821 | break; |
1736 | case Opt_noauto_da_alloc: | 1822 | case Opt_noauto_da_alloc: |
1737 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); | 1823 | set_opt(sb, NO_AUTO_DA_ALLOC); |
1738 | break; | 1824 | break; |
1739 | case Opt_auto_da_alloc: | 1825 | case Opt_auto_da_alloc: |
1740 | if (args[0].from) { | 1826 | if (args[0].from) { |
@@ -1743,21 +1829,35 @@ set_qf_format: | |||
1743 | } else | 1829 | } else |
1744 | option = 1; /* No argument, default to 1 */ | 1830 | option = 1; /* No argument, default to 1 */ |
1745 | if (option) | 1831 | if (option) |
1746 | clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); | 1832 | clear_opt(sb, NO_AUTO_DA_ALLOC); |
1747 | else | 1833 | else |
1748 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); | 1834 | set_opt(sb,NO_AUTO_DA_ALLOC); |
1749 | break; | 1835 | break; |
1750 | case Opt_discard: | 1836 | case Opt_discard: |
1751 | set_opt(sbi->s_mount_opt, DISCARD); | 1837 | set_opt(sb, DISCARD); |
1752 | break; | 1838 | break; |
1753 | case Opt_nodiscard: | 1839 | case Opt_nodiscard: |
1754 | clear_opt(sbi->s_mount_opt, DISCARD); | 1840 | clear_opt(sb, DISCARD); |
1755 | break; | 1841 | break; |
1756 | case Opt_dioread_nolock: | 1842 | case Opt_dioread_nolock: |
1757 | set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | 1843 | set_opt(sb, DIOREAD_NOLOCK); |
1758 | break; | 1844 | break; |
1759 | case Opt_dioread_lock: | 1845 | case Opt_dioread_lock: |
1760 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | 1846 | clear_opt(sb, DIOREAD_NOLOCK); |
1847 | break; | ||
1848 | case Opt_init_inode_table: | ||
1849 | set_opt(sb, INIT_INODE_TABLE); | ||
1850 | if (args[0].from) { | ||
1851 | if (match_int(&args[0], &option)) | ||
1852 | return 0; | ||
1853 | } else | ||
1854 | option = EXT4_DEF_LI_WAIT_MULT; | ||
1855 | if (option < 0) | ||
1856 | return 0; | ||
1857 | sbi->s_li_wait_mult = option; | ||
1858 | break; | ||
1859 | case Opt_noinit_inode_table: | ||
1860 | clear_opt(sb, INIT_INODE_TABLE); | ||
1761 | break; | 1861 | break; |
1762 | default: | 1862 | default: |
1763 | ext4_msg(sb, KERN_ERR, | 1863 | ext4_msg(sb, KERN_ERR, |
@@ -1769,10 +1869,10 @@ set_qf_format: | |||
1769 | #ifdef CONFIG_QUOTA | 1869 | #ifdef CONFIG_QUOTA |
1770 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { | 1870 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { |
1771 | if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) | 1871 | if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) |
1772 | clear_opt(sbi->s_mount_opt, USRQUOTA); | 1872 | clear_opt(sb, USRQUOTA); |
1773 | 1873 | ||
1774 | if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) | 1874 | if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) |
1775 | clear_opt(sbi->s_mount_opt, GRPQUOTA); | 1875 | clear_opt(sb, GRPQUOTA); |
1776 | 1876 | ||
1777 | if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { | 1877 | if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { |
1778 | ext4_msg(sb, KERN_ERR, "old and new quota " | 1878 | ext4_msg(sb, KERN_ERR, "old and new quota " |
@@ -1817,7 +1917,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1817 | ext4_msg(sb, KERN_WARNING, | 1917 | ext4_msg(sb, KERN_WARNING, |
1818 | "warning: mounting fs with errors, " | 1918 | "warning: mounting fs with errors, " |
1819 | "running e2fsck is recommended"); | 1919 | "running e2fsck is recommended"); |
1820 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && | 1920 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && |
1821 | le16_to_cpu(es->s_mnt_count) >= | 1921 | le16_to_cpu(es->s_mnt_count) >= |
1822 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) | 1922 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) |
1823 | ext4_msg(sb, KERN_WARNING, | 1923 | ext4_msg(sb, KERN_WARNING, |
@@ -1842,13 +1942,14 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1842 | ext4_commit_super(sb, 1); | 1942 | ext4_commit_super(sb, 1); |
1843 | if (test_opt(sb, DEBUG)) | 1943 | if (test_opt(sb, DEBUG)) |
1844 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " | 1944 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " |
1845 | "bpg=%lu, ipg=%lu, mo=%04x]\n", | 1945 | "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", |
1846 | sb->s_blocksize, | 1946 | sb->s_blocksize, |
1847 | sbi->s_groups_count, | 1947 | sbi->s_groups_count, |
1848 | EXT4_BLOCKS_PER_GROUP(sb), | 1948 | EXT4_BLOCKS_PER_GROUP(sb), |
1849 | EXT4_INODES_PER_GROUP(sb), | 1949 | EXT4_INODES_PER_GROUP(sb), |
1850 | sbi->s_mount_opt); | 1950 | sbi->s_mount_opt, sbi->s_mount_opt2); |
1851 | 1951 | ||
1952 | cleancache_init_fs(sb); | ||
1852 | return res; | 1953 | return res; |
1853 | } | 1954 | } |
1854 | 1955 | ||
@@ -1877,14 +1978,13 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1877 | size = flex_group_count * sizeof(struct flex_groups); | 1978 | size = flex_group_count * sizeof(struct flex_groups); |
1878 | sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); | 1979 | sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); |
1879 | if (sbi->s_flex_groups == NULL) { | 1980 | if (sbi->s_flex_groups == NULL) { |
1880 | sbi->s_flex_groups = vmalloc(size); | 1981 | sbi->s_flex_groups = vzalloc(size); |
1881 | if (sbi->s_flex_groups) | 1982 | if (sbi->s_flex_groups == NULL) { |
1882 | memset(sbi->s_flex_groups, 0, size); | 1983 | ext4_msg(sb, KERN_ERR, |
1883 | } | 1984 | "not enough memory for %u flex groups", |
1884 | if (sbi->s_flex_groups == NULL) { | 1985 | flex_group_count); |
1885 | ext4_msg(sb, KERN_ERR, "not enough memory for " | 1986 | goto failed; |
1886 | "%u flex groups", flex_group_count); | 1987 | } |
1887 | goto failed; | ||
1888 | } | 1988 | } |
1889 | 1989 | ||
1890 | for (i = 0; i < sbi->s_groups_count; i++) { | 1990 | for (i = 0; i < sbi->s_groups_count; i++) { |
@@ -1942,7 +2042,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, | |||
1942 | } | 2042 | } |
1943 | 2043 | ||
1944 | /* Called at mount-time, super-block is locked */ | 2044 | /* Called at mount-time, super-block is locked */ |
1945 | static int ext4_check_descriptors(struct super_block *sb) | 2045 | static int ext4_check_descriptors(struct super_block *sb, |
2046 | ext4_group_t *first_not_zeroed) | ||
1946 | { | 2047 | { |
1947 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2048 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1948 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); | 2049 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); |
@@ -1951,7 +2052,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1951 | ext4_fsblk_t inode_bitmap; | 2052 | ext4_fsblk_t inode_bitmap; |
1952 | ext4_fsblk_t inode_table; | 2053 | ext4_fsblk_t inode_table; |
1953 | int flexbg_flag = 0; | 2054 | int flexbg_flag = 0; |
1954 | ext4_group_t i; | 2055 | ext4_group_t i, grp = sbi->s_groups_count; |
1955 | 2056 | ||
1956 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 2057 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1957 | flexbg_flag = 1; | 2058 | flexbg_flag = 1; |
@@ -1967,6 +2068,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1967 | last_block = first_block + | 2068 | last_block = first_block + |
1968 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); | 2069 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); |
1969 | 2070 | ||
2071 | if ((grp == sbi->s_groups_count) && | ||
2072 | !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2073 | grp = i; | ||
2074 | |||
1970 | block_bitmap = ext4_block_bitmap(sb, gdp); | 2075 | block_bitmap = ext4_block_bitmap(sb, gdp); |
1971 | if (block_bitmap < first_block || block_bitmap > last_block) { | 2076 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1972 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2077 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
@@ -2004,6 +2109,8 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
2004 | if (!flexbg_flag) | 2109 | if (!flexbg_flag) |
2005 | first_block += EXT4_BLOCKS_PER_GROUP(sb); | 2110 | first_block += EXT4_BLOCKS_PER_GROUP(sb); |
2006 | } | 2111 | } |
2112 | if (NULL != first_not_zeroed) | ||
2113 | *first_not_zeroed = grp; | ||
2007 | 2114 | ||
2008 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); | 2115 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); |
2009 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); | 2116 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); |
@@ -2046,6 +2153,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2046 | return; | 2153 | return; |
2047 | } | 2154 | } |
2048 | 2155 | ||
2156 | /* Check if feature set would not allow a r/w mount */ | ||
2157 | if (!ext4_feature_set_ok(sb, 0)) { | ||
2158 | ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " | ||
2159 | "unknown ROCOMPAT features"); | ||
2160 | return; | ||
2161 | } | ||
2162 | |||
2049 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2163 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
2050 | if (es->s_last_orphan) | 2164 | if (es->s_last_orphan) |
2051 | jbd_debug(1, "Errors on filesystem, " | 2165 | jbd_debug(1, "Errors on filesystem, " |
@@ -2129,6 +2243,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2129 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, | 2243 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, |
2130 | * so that won't be a limiting factor. | 2244 | * so that won't be a limiting factor. |
2131 | * | 2245 | * |
2246 | * However there is other limiting factor. We do store extents in the form | ||
2247 | * of starting block and length, hence the resulting length of the extent | ||
2248 | * covering maximum file size must fit into on-disk format containers as | ||
2249 | * well. Given that length is always by 1 unit bigger than max unit (because | ||
2250 | * we count 0 as well) we have to lower the s_maxbytes by one fs block. | ||
2251 | * | ||
2132 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | 2252 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. |
2133 | */ | 2253 | */ |
2134 | static loff_t ext4_max_size(int blkbits, int has_huge_files) | 2254 | static loff_t ext4_max_size(int blkbits, int has_huge_files) |
@@ -2150,10 +2270,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) | |||
2150 | upper_limit <<= blkbits; | 2270 | upper_limit <<= blkbits; |
2151 | } | 2271 | } |
2152 | 2272 | ||
2153 | /* 32-bit extent-start container, ee_block */ | 2273 | /* |
2154 | res = 1LL << 32; | 2274 | * 32-bit extent-start container, ee_block. We lower the maxbytes |
2275 | * by one fs block, so ee_len can cover the extent of maximum file | ||
2276 | * size | ||
2277 | */ | ||
2278 | res = (1LL << 32) - 1; | ||
2155 | res <<= blkbits; | 2279 | res <<= blkbits; |
2156 | res -= 1; | ||
2157 | 2280 | ||
2158 | /* Sanity check against vm- & vfs- imposed limits */ | 2281 | /* Sanity check against vm- & vfs- imposed limits */ |
2159 | if (res > upper_limit) | 2282 | if (res > upper_limit) |
@@ -2329,6 +2452,18 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, | |||
2329 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); | 2452 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); |
2330 | } | 2453 | } |
2331 | 2454 | ||
2455 | static ssize_t extent_cache_hits_show(struct ext4_attr *a, | ||
2456 | struct ext4_sb_info *sbi, char *buf) | ||
2457 | { | ||
2458 | return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits); | ||
2459 | } | ||
2460 | |||
2461 | static ssize_t extent_cache_misses_show(struct ext4_attr *a, | ||
2462 | struct ext4_sb_info *sbi, char *buf) | ||
2463 | { | ||
2464 | return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses); | ||
2465 | } | ||
2466 | |||
2332 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | 2467 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, |
2333 | struct ext4_sb_info *sbi, | 2468 | struct ext4_sb_info *sbi, |
2334 | const char *buf, size_t count) | 2469 | const char *buf, size_t count) |
@@ -2338,7 +2473,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2338 | if (parse_strtoul(buf, 0x40000000, &t)) | 2473 | if (parse_strtoul(buf, 0x40000000, &t)) |
2339 | return -EINVAL; | 2474 | return -EINVAL; |
2340 | 2475 | ||
2341 | if (!is_power_of_2(t)) | 2476 | if (t && !is_power_of_2(t)) |
2342 | return -EINVAL; | 2477 | return -EINVAL; |
2343 | 2478 | ||
2344 | sbi->s_inode_readahead_blks = t; | 2479 | sbi->s_inode_readahead_blks = t; |
@@ -2376,6 +2511,7 @@ static struct ext4_attr ext4_attr_##_name = { \ | |||
2376 | #define EXT4_ATTR(name, mode, show, store) \ | 2511 | #define EXT4_ATTR(name, mode, show, store) \ |
2377 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2512 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
2378 | 2513 | ||
2514 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) | ||
2379 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) | 2515 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) |
2380 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) | 2516 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) |
2381 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2517 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
@@ -2385,6 +2521,8 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | |||
2385 | EXT4_RO_ATTR(delayed_allocation_blocks); | 2521 | EXT4_RO_ATTR(delayed_allocation_blocks); |
2386 | EXT4_RO_ATTR(session_write_kbytes); | 2522 | EXT4_RO_ATTR(session_write_kbytes); |
2387 | EXT4_RO_ATTR(lifetime_write_kbytes); | 2523 | EXT4_RO_ATTR(lifetime_write_kbytes); |
2524 | EXT4_RO_ATTR(extent_cache_hits); | ||
2525 | EXT4_RO_ATTR(extent_cache_misses); | ||
2388 | EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, | 2526 | EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, |
2389 | inode_readahead_blks_store, s_inode_readahead_blks); | 2527 | inode_readahead_blks_store, s_inode_readahead_blks); |
2390 | EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); | 2528 | EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); |
@@ -2400,6 +2538,8 @@ static struct attribute *ext4_attrs[] = { | |||
2400 | ATTR_LIST(delayed_allocation_blocks), | 2538 | ATTR_LIST(delayed_allocation_blocks), |
2401 | ATTR_LIST(session_write_kbytes), | 2539 | ATTR_LIST(session_write_kbytes), |
2402 | ATTR_LIST(lifetime_write_kbytes), | 2540 | ATTR_LIST(lifetime_write_kbytes), |
2541 | ATTR_LIST(extent_cache_hits), | ||
2542 | ATTR_LIST(extent_cache_misses), | ||
2403 | ATTR_LIST(inode_readahead_blks), | 2543 | ATTR_LIST(inode_readahead_blks), |
2404 | ATTR_LIST(inode_goal), | 2544 | ATTR_LIST(inode_goal), |
2405 | ATTR_LIST(mb_stats), | 2545 | ATTR_LIST(mb_stats), |
@@ -2412,6 +2552,16 @@ static struct attribute *ext4_attrs[] = { | |||
2412 | NULL, | 2552 | NULL, |
2413 | }; | 2553 | }; |
2414 | 2554 | ||
2555 | /* Features this copy of ext4 supports */ | ||
2556 | EXT4_INFO_ATTR(lazy_itable_init); | ||
2557 | EXT4_INFO_ATTR(batched_discard); | ||
2558 | |||
2559 | static struct attribute *ext4_feat_attrs[] = { | ||
2560 | ATTR_LIST(lazy_itable_init), | ||
2561 | ATTR_LIST(batched_discard), | ||
2562 | NULL, | ||
2563 | }; | ||
2564 | |||
2415 | static ssize_t ext4_attr_show(struct kobject *kobj, | 2565 | static ssize_t ext4_attr_show(struct kobject *kobj, |
2416 | struct attribute *attr, char *buf) | 2566 | struct attribute *attr, char *buf) |
2417 | { | 2567 | { |
@@ -2440,7 +2590,6 @@ static void ext4_sb_release(struct kobject *kobj) | |||
2440 | complete(&sbi->s_kobj_unregister); | 2590 | complete(&sbi->s_kobj_unregister); |
2441 | } | 2591 | } |
2442 | 2592 | ||
2443 | |||
2444 | static const struct sysfs_ops ext4_attr_ops = { | 2593 | static const struct sysfs_ops ext4_attr_ops = { |
2445 | .show = ext4_attr_show, | 2594 | .show = ext4_attr_show, |
2446 | .store = ext4_attr_store, | 2595 | .store = ext4_attr_store, |
@@ -2452,6 +2601,17 @@ static struct kobj_type ext4_ktype = { | |||
2452 | .release = ext4_sb_release, | 2601 | .release = ext4_sb_release, |
2453 | }; | 2602 | }; |
2454 | 2603 | ||
2604 | static void ext4_feat_release(struct kobject *kobj) | ||
2605 | { | ||
2606 | complete(&ext4_feat->f_kobj_unregister); | ||
2607 | } | ||
2608 | |||
2609 | static struct kobj_type ext4_feat_ktype = { | ||
2610 | .default_attrs = ext4_feat_attrs, | ||
2611 | .sysfs_ops = &ext4_attr_ops, | ||
2612 | .release = ext4_feat_release, | ||
2613 | }; | ||
2614 | |||
2455 | /* | 2615 | /* |
2456 | * Check whether this filesystem can be mounted based on | 2616 | * Check whether this filesystem can be mounted based on |
2457 | * the features present and the RDONLY/RDWR mount requested. | 2617 | * the features present and the RDONLY/RDWR mount requested. |
@@ -2542,6 +2702,343 @@ static void print_daily_error_info(unsigned long arg) | |||
2542 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ | 2702 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ |
2543 | } | 2703 | } |
2544 | 2704 | ||
2705 | /* Find next suitable group and run ext4_init_inode_table */ | ||
2706 | static int ext4_run_li_request(struct ext4_li_request *elr) | ||
2707 | { | ||
2708 | struct ext4_group_desc *gdp = NULL; | ||
2709 | ext4_group_t group, ngroups; | ||
2710 | struct super_block *sb; | ||
2711 | unsigned long timeout = 0; | ||
2712 | int ret = 0; | ||
2713 | |||
2714 | sb = elr->lr_super; | ||
2715 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
2716 | |||
2717 | for (group = elr->lr_next_group; group < ngroups; group++) { | ||
2718 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
2719 | if (!gdp) { | ||
2720 | ret = 1; | ||
2721 | break; | ||
2722 | } | ||
2723 | |||
2724 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2725 | break; | ||
2726 | } | ||
2727 | |||
2728 | if (group == ngroups) | ||
2729 | ret = 1; | ||
2730 | |||
2731 | if (!ret) { | ||
2732 | timeout = jiffies; | ||
2733 | ret = ext4_init_inode_table(sb, group, | ||
2734 | elr->lr_timeout ? 0 : 1); | ||
2735 | if (elr->lr_timeout == 0) { | ||
2736 | timeout = (jiffies - timeout) * | ||
2737 | elr->lr_sbi->s_li_wait_mult; | ||
2738 | elr->lr_timeout = timeout; | ||
2739 | } | ||
2740 | elr->lr_next_sched = jiffies + elr->lr_timeout; | ||
2741 | elr->lr_next_group = group + 1; | ||
2742 | } | ||
2743 | |||
2744 | return ret; | ||
2745 | } | ||
2746 | |||
2747 | /* | ||
2748 | * Remove lr_request from the list_request and free the | ||
2749 | * request structure. Should be called with li_list_mtx held | ||
2750 | */ | ||
2751 | static void ext4_remove_li_request(struct ext4_li_request *elr) | ||
2752 | { | ||
2753 | struct ext4_sb_info *sbi; | ||
2754 | |||
2755 | if (!elr) | ||
2756 | return; | ||
2757 | |||
2758 | sbi = elr->lr_sbi; | ||
2759 | |||
2760 | list_del(&elr->lr_request); | ||
2761 | sbi->s_li_request = NULL; | ||
2762 | kfree(elr); | ||
2763 | } | ||
2764 | |||
2765 | static void ext4_unregister_li_request(struct super_block *sb) | ||
2766 | { | ||
2767 | mutex_lock(&ext4_li_mtx); | ||
2768 | if (!ext4_li_info) { | ||
2769 | mutex_unlock(&ext4_li_mtx); | ||
2770 | return; | ||
2771 | } | ||
2772 | |||
2773 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2774 | ext4_remove_li_request(EXT4_SB(sb)->s_li_request); | ||
2775 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2776 | mutex_unlock(&ext4_li_mtx); | ||
2777 | } | ||
2778 | |||
2779 | static struct task_struct *ext4_lazyinit_task; | ||
2780 | |||
2781 | /* | ||
2782 | * This is the function where ext4lazyinit thread lives. It walks | ||
2783 | * through the request list searching for next scheduled filesystem. | ||
2784 | * When such a fs is found, run the lazy initialization request | ||
2785 | * (ext4_rn_li_request) and keep track of the time spend in this | ||
2786 | * function. Based on that time we compute next schedule time of | ||
2787 | * the request. When walking through the list is complete, compute | ||
2788 | * next waking time and put itself into sleep. | ||
2789 | */ | ||
2790 | static int ext4_lazyinit_thread(void *arg) | ||
2791 | { | ||
2792 | struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; | ||
2793 | struct list_head *pos, *n; | ||
2794 | struct ext4_li_request *elr; | ||
2795 | unsigned long next_wakeup, cur; | ||
2796 | |||
2797 | BUG_ON(NULL == eli); | ||
2798 | |||
2799 | cont_thread: | ||
2800 | while (true) { | ||
2801 | next_wakeup = MAX_JIFFY_OFFSET; | ||
2802 | |||
2803 | mutex_lock(&eli->li_list_mtx); | ||
2804 | if (list_empty(&eli->li_request_list)) { | ||
2805 | mutex_unlock(&eli->li_list_mtx); | ||
2806 | goto exit_thread; | ||
2807 | } | ||
2808 | |||
2809 | list_for_each_safe(pos, n, &eli->li_request_list) { | ||
2810 | elr = list_entry(pos, struct ext4_li_request, | ||
2811 | lr_request); | ||
2812 | |||
2813 | if (time_after_eq(jiffies, elr->lr_next_sched)) { | ||
2814 | if (ext4_run_li_request(elr) != 0) { | ||
2815 | /* error, remove the lazy_init job */ | ||
2816 | ext4_remove_li_request(elr); | ||
2817 | continue; | ||
2818 | } | ||
2819 | } | ||
2820 | |||
2821 | if (time_before(elr->lr_next_sched, next_wakeup)) | ||
2822 | next_wakeup = elr->lr_next_sched; | ||
2823 | } | ||
2824 | mutex_unlock(&eli->li_list_mtx); | ||
2825 | |||
2826 | if (freezing(current)) | ||
2827 | refrigerator(); | ||
2828 | |||
2829 | cur = jiffies; | ||
2830 | if ((time_after_eq(cur, next_wakeup)) || | ||
2831 | (MAX_JIFFY_OFFSET == next_wakeup)) { | ||
2832 | cond_resched(); | ||
2833 | continue; | ||
2834 | } | ||
2835 | |||
2836 | schedule_timeout_interruptible(next_wakeup - cur); | ||
2837 | |||
2838 | if (kthread_should_stop()) { | ||
2839 | ext4_clear_request_list(); | ||
2840 | goto exit_thread; | ||
2841 | } | ||
2842 | } | ||
2843 | |||
2844 | exit_thread: | ||
2845 | /* | ||
2846 | * It looks like the request list is empty, but we need | ||
2847 | * to check it under the li_list_mtx lock, to prevent any | ||
2848 | * additions into it, and of course we should lock ext4_li_mtx | ||
2849 | * to atomically free the list and ext4_li_info, because at | ||
2850 | * this point another ext4 filesystem could be registering | ||
2851 | * new one. | ||
2852 | */ | ||
2853 | mutex_lock(&ext4_li_mtx); | ||
2854 | mutex_lock(&eli->li_list_mtx); | ||
2855 | if (!list_empty(&eli->li_request_list)) { | ||
2856 | mutex_unlock(&eli->li_list_mtx); | ||
2857 | mutex_unlock(&ext4_li_mtx); | ||
2858 | goto cont_thread; | ||
2859 | } | ||
2860 | mutex_unlock(&eli->li_list_mtx); | ||
2861 | kfree(ext4_li_info); | ||
2862 | ext4_li_info = NULL; | ||
2863 | mutex_unlock(&ext4_li_mtx); | ||
2864 | |||
2865 | return 0; | ||
2866 | } | ||
2867 | |||
2868 | static void ext4_clear_request_list(void) | ||
2869 | { | ||
2870 | struct list_head *pos, *n; | ||
2871 | struct ext4_li_request *elr; | ||
2872 | |||
2873 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2874 | list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { | ||
2875 | elr = list_entry(pos, struct ext4_li_request, | ||
2876 | lr_request); | ||
2877 | ext4_remove_li_request(elr); | ||
2878 | } | ||
2879 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2880 | } | ||
2881 | |||
2882 | static int ext4_run_lazyinit_thread(void) | ||
2883 | { | ||
2884 | ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, | ||
2885 | ext4_li_info, "ext4lazyinit"); | ||
2886 | if (IS_ERR(ext4_lazyinit_task)) { | ||
2887 | int err = PTR_ERR(ext4_lazyinit_task); | ||
2888 | ext4_clear_request_list(); | ||
2889 | kfree(ext4_li_info); | ||
2890 | ext4_li_info = NULL; | ||
2891 | printk(KERN_CRIT "EXT4: error %d creating inode table " | ||
2892 | "initialization thread\n", | ||
2893 | err); | ||
2894 | return err; | ||
2895 | } | ||
2896 | ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; | ||
2897 | return 0; | ||
2898 | } | ||
2899 | |||
2900 | /* | ||
2901 | * Check whether it make sense to run itable init. thread or not. | ||
2902 | * If there is at least one uninitialized inode table, return | ||
2903 | * corresponding group number, else the loop goes through all | ||
2904 | * groups and return total number of groups. | ||
2905 | */ | ||
2906 | static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) | ||
2907 | { | ||
2908 | ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; | ||
2909 | struct ext4_group_desc *gdp = NULL; | ||
2910 | |||
2911 | for (group = 0; group < ngroups; group++) { | ||
2912 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
2913 | if (!gdp) | ||
2914 | continue; | ||
2915 | |||
2916 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2917 | break; | ||
2918 | } | ||
2919 | |||
2920 | return group; | ||
2921 | } | ||
2922 | |||
2923 | static int ext4_li_info_new(void) | ||
2924 | { | ||
2925 | struct ext4_lazy_init *eli = NULL; | ||
2926 | |||
2927 | eli = kzalloc(sizeof(*eli), GFP_KERNEL); | ||
2928 | if (!eli) | ||
2929 | return -ENOMEM; | ||
2930 | |||
2931 | INIT_LIST_HEAD(&eli->li_request_list); | ||
2932 | mutex_init(&eli->li_list_mtx); | ||
2933 | |||
2934 | eli->li_state |= EXT4_LAZYINIT_QUIT; | ||
2935 | |||
2936 | ext4_li_info = eli; | ||
2937 | |||
2938 | return 0; | ||
2939 | } | ||
2940 | |||
2941 | static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, | ||
2942 | ext4_group_t start) | ||
2943 | { | ||
2944 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2945 | struct ext4_li_request *elr; | ||
2946 | unsigned long rnd; | ||
2947 | |||
2948 | elr = kzalloc(sizeof(*elr), GFP_KERNEL); | ||
2949 | if (!elr) | ||
2950 | return NULL; | ||
2951 | |||
2952 | elr->lr_super = sb; | ||
2953 | elr->lr_sbi = sbi; | ||
2954 | elr->lr_next_group = start; | ||
2955 | |||
2956 | /* | ||
2957 | * Randomize first schedule time of the request to | ||
2958 | * spread the inode table initialization requests | ||
2959 | * better. | ||
2960 | */ | ||
2961 | get_random_bytes(&rnd, sizeof(rnd)); | ||
2962 | elr->lr_next_sched = jiffies + (unsigned long)rnd % | ||
2963 | (EXT4_DEF_LI_MAX_START_DELAY * HZ); | ||
2964 | |||
2965 | return elr; | ||
2966 | } | ||
2967 | |||
2968 | static int ext4_register_li_request(struct super_block *sb, | ||
2969 | ext4_group_t first_not_zeroed) | ||
2970 | { | ||
2971 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2972 | struct ext4_li_request *elr; | ||
2973 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | ||
2974 | int ret = 0; | ||
2975 | |||
2976 | if (sbi->s_li_request != NULL) { | ||
2977 | /* | ||
2978 | * Reset timeout so it can be computed again, because | ||
2979 | * s_li_wait_mult might have changed. | ||
2980 | */ | ||
2981 | sbi->s_li_request->lr_timeout = 0; | ||
2982 | return 0; | ||
2983 | } | ||
2984 | |||
2985 | if (first_not_zeroed == ngroups || | ||
2986 | (sb->s_flags & MS_RDONLY) || | ||
2987 | !test_opt(sb, INIT_INODE_TABLE)) | ||
2988 | return 0; | ||
2989 | |||
2990 | elr = ext4_li_request_new(sb, first_not_zeroed); | ||
2991 | if (!elr) | ||
2992 | return -ENOMEM; | ||
2993 | |||
2994 | mutex_lock(&ext4_li_mtx); | ||
2995 | |||
2996 | if (NULL == ext4_li_info) { | ||
2997 | ret = ext4_li_info_new(); | ||
2998 | if (ret) | ||
2999 | goto out; | ||
3000 | } | ||
3001 | |||
3002 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
3003 | list_add(&elr->lr_request, &ext4_li_info->li_request_list); | ||
3004 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
3005 | |||
3006 | sbi->s_li_request = elr; | ||
3007 | /* | ||
3008 | * set elr to NULL here since it has been inserted to | ||
3009 | * the request_list and the removal and free of it is | ||
3010 | * handled by ext4_clear_request_list from now on. | ||
3011 | */ | ||
3012 | elr = NULL; | ||
3013 | |||
3014 | if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { | ||
3015 | ret = ext4_run_lazyinit_thread(); | ||
3016 | if (ret) | ||
3017 | goto out; | ||
3018 | } | ||
3019 | out: | ||
3020 | mutex_unlock(&ext4_li_mtx); | ||
3021 | if (ret) | ||
3022 | kfree(elr); | ||
3023 | return ret; | ||
3024 | } | ||
3025 | |||
3026 | /* | ||
3027 | * We do not need to lock anything since this is called on | ||
3028 | * module unload. | ||
3029 | */ | ||
3030 | static void ext4_destroy_lazyinit_thread(void) | ||
3031 | { | ||
3032 | /* | ||
3033 | * If thread exited earlier | ||
3034 | * there's nothing to be done. | ||
3035 | */ | ||
3036 | if (!ext4_li_info || !ext4_lazyinit_task) | ||
3037 | return; | ||
3038 | |||
3039 | kthread_stop(ext4_lazyinit_task); | ||
3040 | } | ||
3041 | |||
2545 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 3042 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2546 | __releases(kernel_lock) | 3043 | __releases(kernel_lock) |
2547 | __acquires(kernel_lock) | 3044 | __acquires(kernel_lock) |
@@ -2567,6 +3064,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2567 | __u64 blocks_count; | 3064 | __u64 blocks_count; |
2568 | int err; | 3065 | int err; |
2569 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 3066 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
3067 | ext4_group_t first_not_zeroed; | ||
2570 | 3068 | ||
2571 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 3069 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
2572 | if (!sbi) | 3070 | if (!sbi) |
@@ -2588,8 +3086,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2588 | sbi->s_sectors_written_start = | 3086 | sbi->s_sectors_written_start = |
2589 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); | 3087 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); |
2590 | 3088 | ||
2591 | unlock_kernel(); | ||
2592 | |||
2593 | /* Cleanup superblock name */ | 3089 | /* Cleanup superblock name */ |
2594 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | 3090 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) |
2595 | *cp = '!'; | 3091 | *cp = '!'; |
@@ -2629,40 +3125,41 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2629 | 3125 | ||
2630 | /* Set defaults before we parse the mount options */ | 3126 | /* Set defaults before we parse the mount options */ |
2631 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 3127 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
3128 | set_opt(sb, INIT_INODE_TABLE); | ||
2632 | if (def_mount_opts & EXT4_DEFM_DEBUG) | 3129 | if (def_mount_opts & EXT4_DEFM_DEBUG) |
2633 | set_opt(sbi->s_mount_opt, DEBUG); | 3130 | set_opt(sb, DEBUG); |
2634 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { | 3131 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { |
2635 | ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", | 3132 | ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", |
2636 | "2.6.38"); | 3133 | "2.6.38"); |
2637 | set_opt(sbi->s_mount_opt, GRPID); | 3134 | set_opt(sb, GRPID); |
2638 | } | 3135 | } |
2639 | if (def_mount_opts & EXT4_DEFM_UID16) | 3136 | if (def_mount_opts & EXT4_DEFM_UID16) |
2640 | set_opt(sbi->s_mount_opt, NO_UID32); | 3137 | set_opt(sb, NO_UID32); |
3138 | /* xattr user namespace & acls are now defaulted on */ | ||
2641 | #ifdef CONFIG_EXT4_FS_XATTR | 3139 | #ifdef CONFIG_EXT4_FS_XATTR |
2642 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 3140 | set_opt(sb, XATTR_USER); |
2643 | set_opt(sbi->s_mount_opt, XATTR_USER); | ||
2644 | #endif | 3141 | #endif |
2645 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 3142 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
2646 | if (def_mount_opts & EXT4_DEFM_ACL) | 3143 | set_opt(sb, POSIX_ACL); |
2647 | set_opt(sbi->s_mount_opt, POSIX_ACL); | ||
2648 | #endif | 3144 | #endif |
3145 | set_opt(sb, MBLK_IO_SUBMIT); | ||
2649 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) | 3146 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) |
2650 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); | 3147 | set_opt(sb, JOURNAL_DATA); |
2651 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) | 3148 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) |
2652 | set_opt(sbi->s_mount_opt, ORDERED_DATA); | 3149 | set_opt(sb, ORDERED_DATA); |
2653 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) | 3150 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) |
2654 | set_opt(sbi->s_mount_opt, WRITEBACK_DATA); | 3151 | set_opt(sb, WRITEBACK_DATA); |
2655 | 3152 | ||
2656 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) | 3153 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) |
2657 | set_opt(sbi->s_mount_opt, ERRORS_PANIC); | 3154 | set_opt(sb, ERRORS_PANIC); |
2658 | else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) | 3155 | else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) |
2659 | set_opt(sbi->s_mount_opt, ERRORS_CONT); | 3156 | set_opt(sb, ERRORS_CONT); |
2660 | else | 3157 | else |
2661 | set_opt(sbi->s_mount_opt, ERRORS_RO); | 3158 | set_opt(sb, ERRORS_RO); |
2662 | if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) | 3159 | if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) |
2663 | set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | 3160 | set_opt(sb, BLOCK_VALIDITY); |
2664 | if (def_mount_opts & EXT4_DEFM_DISCARD) | 3161 | if (def_mount_opts & EXT4_DEFM_DISCARD) |
2665 | set_opt(sbi->s_mount_opt, DISCARD); | 3162 | set_opt(sb, DISCARD); |
2666 | 3163 | ||
2667 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); | 3164 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); |
2668 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); | 3165 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); |
@@ -2671,7 +3168,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2671 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; | 3168 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; |
2672 | 3169 | ||
2673 | if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) | 3170 | if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) |
2674 | set_opt(sbi->s_mount_opt, BARRIER); | 3171 | set_opt(sb, BARRIER); |
2675 | 3172 | ||
2676 | /* | 3173 | /* |
2677 | * enable delayed allocation by default | 3174 | * enable delayed allocation by default |
@@ -2679,7 +3176,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2679 | */ | 3176 | */ |
2680 | if (!IS_EXT3_SB(sb) && | 3177 | if (!IS_EXT3_SB(sb) && |
2681 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) | 3178 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) |
2682 | set_opt(sbi->s_mount_opt, DELALLOC); | 3179 | set_opt(sb, DELALLOC); |
3180 | |||
3181 | /* | ||
3182 | * set default s_li_wait_mult for lazyinit, for the case there is | ||
3183 | * no mount option specified. | ||
3184 | */ | ||
3185 | sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; | ||
2683 | 3186 | ||
2684 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, | 3187 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, |
2685 | &journal_devnum, &journal_ioprio, NULL, 0)) { | 3188 | &journal_devnum, &journal_ioprio, NULL, 0)) { |
@@ -2702,6 +3205,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2702 | "feature flags set on rev 0 fs, " | 3205 | "feature flags set on rev 0 fs, " |
2703 | "running e2fsck is recommended"); | 3206 | "running e2fsck is recommended"); |
2704 | 3207 | ||
3208 | if (IS_EXT2_SB(sb)) { | ||
3209 | if (ext2_feature_set_ok(sb)) | ||
3210 | ext4_msg(sb, KERN_INFO, "mounting ext2 file system " | ||
3211 | "using the ext4 subsystem"); | ||
3212 | else { | ||
3213 | ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " | ||
3214 | "to feature incompatibilities"); | ||
3215 | goto failed_mount; | ||
3216 | } | ||
3217 | } | ||
3218 | |||
3219 | if (IS_EXT3_SB(sb)) { | ||
3220 | if (ext3_feature_set_ok(sb)) | ||
3221 | ext4_msg(sb, KERN_INFO, "mounting ext3 file system " | ||
3222 | "using the ext4 subsystem"); | ||
3223 | else { | ||
3224 | ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " | ||
3225 | "to feature incompatibilities"); | ||
3226 | goto failed_mount; | ||
3227 | } | ||
3228 | } | ||
3229 | |||
2705 | /* | 3230 | /* |
2706 | * Check feature flags regardless of the revision level, since we | 3231 | * Check feature flags regardless of the revision level, since we |
2707 | * previously didn't change the revision level when setting the flags, | 3232 | * previously didn't change the revision level when setting the flags, |
@@ -2831,15 +3356,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2831 | * Test whether we have more sectors than will fit in sector_t, | 3356 | * Test whether we have more sectors than will fit in sector_t, |
2832 | * and whether the max offset is addressable by the page cache. | 3357 | * and whether the max offset is addressable by the page cache. |
2833 | */ | 3358 | */ |
2834 | if ((ext4_blocks_count(es) > | 3359 | err = generic_check_addressable(sb->s_blocksize_bits, |
2835 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | 3360 | ext4_blocks_count(es)); |
2836 | (ext4_blocks_count(es) > | 3361 | if (err) { |
2837 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
2838 | ext4_msg(sb, KERN_ERR, "filesystem" | 3362 | ext4_msg(sb, KERN_ERR, "filesystem" |
2839 | " too large to mount safely on this system"); | 3363 | " too large to mount safely on this system"); |
2840 | if (sizeof(sector_t) < 8) | 3364 | if (sizeof(sector_t) < 8) |
2841 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 3365 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
2842 | ret = -EFBIG; | 3366 | ret = err; |
2843 | goto failed_mount; | 3367 | goto failed_mount; |
2844 | } | 3368 | } |
2845 | 3369 | ||
@@ -2908,7 +3432,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2908 | goto failed_mount2; | 3432 | goto failed_mount2; |
2909 | } | 3433 | } |
2910 | } | 3434 | } |
2911 | if (!ext4_check_descriptors(sb)) { | 3435 | if (!ext4_check_descriptors(sb, &first_not_zeroed)) { |
2912 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); | 3436 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); |
2913 | goto failed_mount2; | 3437 | goto failed_mount2; |
2914 | } | 3438 | } |
@@ -2924,6 +3448,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2924 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 3448 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
2925 | spin_lock_init(&sbi->s_next_gen_lock); | 3449 | spin_lock_init(&sbi->s_next_gen_lock); |
2926 | 3450 | ||
3451 | init_timer(&sbi->s_err_report); | ||
3452 | sbi->s_err_report.function = print_daily_error_info; | ||
3453 | sbi->s_err_report.data = (unsigned long) sb; | ||
3454 | |||
3455 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
3456 | ext4_count_free_blocks(sb)); | ||
3457 | if (!err) { | ||
3458 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
3459 | ext4_count_free_inodes(sb)); | ||
3460 | } | ||
3461 | if (!err) { | ||
3462 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
3463 | ext4_count_dirs(sb)); | ||
3464 | } | ||
3465 | if (!err) { | ||
3466 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
3467 | } | ||
3468 | if (err) { | ||
3469 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
3470 | goto failed_mount3; | ||
3471 | } | ||
3472 | |||
2927 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3473 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2928 | sbi->s_max_writeback_mb_bump = 128; | 3474 | sbi->s_max_writeback_mb_bump = 128; |
2929 | 3475 | ||
@@ -2941,6 +3487,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2941 | sb->s_qcop = &ext4_qctl_operations; | 3487 | sb->s_qcop = &ext4_qctl_operations; |
2942 | sb->dq_op = &ext4_quota_operations; | 3488 | sb->dq_op = &ext4_quota_operations; |
2943 | #endif | 3489 | #endif |
3490 | memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); | ||
3491 | |||
2944 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ | 3492 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ |
2945 | mutex_init(&sbi->s_orphan_lock); | 3493 | mutex_init(&sbi->s_orphan_lock); |
2946 | mutex_init(&sbi->s_resize_lock); | 3494 | mutex_init(&sbi->s_resize_lock); |
@@ -2951,6 +3499,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2951 | EXT4_HAS_INCOMPAT_FEATURE(sb, | 3499 | EXT4_HAS_INCOMPAT_FEATURE(sb, |
2952 | EXT4_FEATURE_INCOMPAT_RECOVER)); | 3500 | EXT4_FEATURE_INCOMPAT_RECOVER)); |
2953 | 3501 | ||
3502 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && | ||
3503 | !(sb->s_flags & MS_RDONLY)) | ||
3504 | if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) | ||
3505 | goto failed_mount3; | ||
3506 | |||
2954 | /* | 3507 | /* |
2955 | * The first inode we look at is the journal inode. Don't try | 3508 | * The first inode we look at is the journal inode. Don't try |
2956 | * root first: it may be modified in the journal! | 3509 | * root first: it may be modified in the journal! |
@@ -2965,8 +3518,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2965 | "suppressed and not mounted read-only"); | 3518 | "suppressed and not mounted read-only"); |
2966 | goto failed_mount_wq; | 3519 | goto failed_mount_wq; |
2967 | } else { | 3520 | } else { |
2968 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); | 3521 | clear_opt(sb, DATA_FLAGS); |
2969 | set_opt(sbi->s_mount_opt, WRITEBACK_DATA); | ||
2970 | sbi->s_journal = NULL; | 3522 | sbi->s_journal = NULL; |
2971 | needs_recovery = 0; | 3523 | needs_recovery = 0; |
2972 | goto no_journal; | 3524 | goto no_journal; |
@@ -3004,9 +3556,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3004 | */ | 3556 | */ |
3005 | if (jbd2_journal_check_available_features | 3557 | if (jbd2_journal_check_available_features |
3006 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) | 3558 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) |
3007 | set_opt(sbi->s_mount_opt, ORDERED_DATA); | 3559 | set_opt(sb, ORDERED_DATA); |
3008 | else | 3560 | else |
3009 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); | 3561 | set_opt(sb, JOURNAL_DATA); |
3010 | break; | 3562 | break; |
3011 | 3563 | ||
3012 | case EXT4_MOUNT_ORDERED_DATA: | 3564 | case EXT4_MOUNT_ORDERED_DATA: |
@@ -3022,23 +3574,25 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3022 | } | 3574 | } |
3023 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 3575 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
3024 | 3576 | ||
3025 | no_journal: | 3577 | /* |
3026 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | 3578 | * The journal may have updated the bg summary counts, so we |
3027 | ext4_count_free_blocks(sb)); | 3579 | * need to update the global counters. |
3028 | if (!err) | 3580 | */ |
3029 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | 3581 | percpu_counter_set(&sbi->s_freeblocks_counter, |
3030 | ext4_count_free_inodes(sb)); | 3582 | ext4_count_free_blocks(sb)); |
3031 | if (!err) | 3583 | percpu_counter_set(&sbi->s_freeinodes_counter, |
3032 | err = percpu_counter_init(&sbi->s_dirs_counter, | 3584 | ext4_count_free_inodes(sb)); |
3033 | ext4_count_dirs(sb)); | 3585 | percpu_counter_set(&sbi->s_dirs_counter, |
3034 | if (!err) | 3586 | ext4_count_dirs(sb)); |
3035 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | 3587 | percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); |
3036 | if (err) { | ||
3037 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
3038 | goto failed_mount_wq; | ||
3039 | } | ||
3040 | 3588 | ||
3041 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); | 3589 | no_journal: |
3590 | /* | ||
3591 | * The maximum number of concurrent works can be high and | ||
3592 | * concurrency isn't really necessary. Limit it to 1. | ||
3593 | */ | ||
3594 | EXT4_SB(sb)->dio_unwritten_wq = | ||
3595 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | ||
3042 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3596 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
3043 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3597 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
3044 | goto failed_mount_wq; | 3598 | goto failed_mount_wq; |
@@ -3053,17 +3607,16 @@ no_journal: | |||
3053 | if (IS_ERR(root)) { | 3607 | if (IS_ERR(root)) { |
3054 | ext4_msg(sb, KERN_ERR, "get root inode failed"); | 3608 | ext4_msg(sb, KERN_ERR, "get root inode failed"); |
3055 | ret = PTR_ERR(root); | 3609 | ret = PTR_ERR(root); |
3610 | root = NULL; | ||
3056 | goto failed_mount4; | 3611 | goto failed_mount4; |
3057 | } | 3612 | } |
3058 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 3613 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
3059 | iput(root); | ||
3060 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); | 3614 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); |
3061 | goto failed_mount4; | 3615 | goto failed_mount4; |
3062 | } | 3616 | } |
3063 | sb->s_root = d_alloc_root(root); | 3617 | sb->s_root = d_alloc_root(root); |
3064 | if (!sb->s_root) { | 3618 | if (!sb->s_root) { |
3065 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); | 3619 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); |
3066 | iput(root); | ||
3067 | ret = -ENOMEM; | 3620 | ret = -ENOMEM; |
3068 | goto failed_mount4; | 3621 | goto failed_mount4; |
3069 | } | 3622 | } |
@@ -3099,18 +3652,18 @@ no_journal: | |||
3099 | (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { | 3652 | (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { |
3100 | ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " | 3653 | ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " |
3101 | "requested data journaling mode"); | 3654 | "requested data journaling mode"); |
3102 | clear_opt(sbi->s_mount_opt, DELALLOC); | 3655 | clear_opt(sb, DELALLOC); |
3103 | } | 3656 | } |
3104 | if (test_opt(sb, DIOREAD_NOLOCK)) { | 3657 | if (test_opt(sb, DIOREAD_NOLOCK)) { |
3105 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 3658 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
3106 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " | 3659 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " |
3107 | "option - requested data journaling mode"); | 3660 | "option - requested data journaling mode"); |
3108 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | 3661 | clear_opt(sb, DIOREAD_NOLOCK); |
3109 | } | 3662 | } |
3110 | if (sb->s_blocksize < PAGE_SIZE) { | 3663 | if (sb->s_blocksize < PAGE_SIZE) { |
3111 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " | 3664 | ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " |
3112 | "option - block size is too small"); | 3665 | "option - block size is too small"); |
3113 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | 3666 | clear_opt(sb, DIOREAD_NOLOCK); |
3114 | } | 3667 | } |
3115 | } | 3668 | } |
3116 | 3669 | ||
@@ -3129,6 +3682,10 @@ no_journal: | |||
3129 | goto failed_mount4; | 3682 | goto failed_mount4; |
3130 | } | 3683 | } |
3131 | 3684 | ||
3685 | err = ext4_register_li_request(sb, first_not_zeroed); | ||
3686 | if (err) | ||
3687 | goto failed_mount4; | ||
3688 | |||
3132 | sbi->s_kobj.kset = ext4_kset; | 3689 | sbi->s_kobj.kset = ext4_kset; |
3133 | init_completion(&sbi->s_kobj_unregister); | 3690 | init_completion(&sbi->s_kobj_unregister); |
3134 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, | 3691 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, |
@@ -3160,13 +3717,9 @@ no_journal: | |||
3160 | "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, | 3717 | "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, |
3161 | *sbi->s_es->s_mount_opts ? "; " : "", orig_data); | 3718 | *sbi->s_es->s_mount_opts ? "; " : "", orig_data); |
3162 | 3719 | ||
3163 | init_timer(&sbi->s_err_report); | ||
3164 | sbi->s_err_report.function = print_daily_error_info; | ||
3165 | sbi->s_err_report.data = (unsigned long) sb; | ||
3166 | if (es->s_error_count) | 3720 | if (es->s_error_count) |
3167 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ | 3721 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ |
3168 | 3722 | ||
3169 | lock_kernel(); | ||
3170 | kfree(orig_data); | 3723 | kfree(orig_data); |
3171 | return 0; | 3724 | return 0; |
3172 | 3725 | ||
@@ -3176,6 +3729,8 @@ cantfind_ext4: | |||
3176 | goto failed_mount; | 3729 | goto failed_mount; |
3177 | 3730 | ||
3178 | failed_mount4: | 3731 | failed_mount4: |
3732 | iput(root); | ||
3733 | sb->s_root = NULL; | ||
3179 | ext4_msg(sb, KERN_ERR, "mount failed"); | 3734 | ext4_msg(sb, KERN_ERR, "mount failed"); |
3180 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 3735 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); |
3181 | failed_mount_wq: | 3736 | failed_mount_wq: |
@@ -3184,17 +3739,20 @@ failed_mount_wq: | |||
3184 | jbd2_journal_destroy(sbi->s_journal); | 3739 | jbd2_journal_destroy(sbi->s_journal); |
3185 | sbi->s_journal = NULL; | 3740 | sbi->s_journal = NULL; |
3186 | } | 3741 | } |
3187 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3188 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3189 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3190 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3191 | failed_mount3: | 3742 | failed_mount3: |
3743 | del_timer(&sbi->s_err_report); | ||
3192 | if (sbi->s_flex_groups) { | 3744 | if (sbi->s_flex_groups) { |
3193 | if (is_vmalloc_addr(sbi->s_flex_groups)) | 3745 | if (is_vmalloc_addr(sbi->s_flex_groups)) |
3194 | vfree(sbi->s_flex_groups); | 3746 | vfree(sbi->s_flex_groups); |
3195 | else | 3747 | else |
3196 | kfree(sbi->s_flex_groups); | 3748 | kfree(sbi->s_flex_groups); |
3197 | } | 3749 | } |
3750 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3751 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3752 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3753 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3754 | if (sbi->s_mmp_tsk) | ||
3755 | kthread_stop(sbi->s_mmp_tsk); | ||
3198 | failed_mount2: | 3756 | failed_mount2: |
3199 | for (i = 0; i < db_count; i++) | 3757 | for (i = 0; i < db_count; i++) |
3200 | brelse(sbi->s_group_desc[i]); | 3758 | brelse(sbi->s_group_desc[i]); |
@@ -3213,7 +3771,6 @@ out_fail: | |||
3213 | sb->s_fs_info = NULL; | 3771 | sb->s_fs_info = NULL; |
3214 | kfree(sbi->s_blockgroup_lock); | 3772 | kfree(sbi->s_blockgroup_lock); |
3215 | kfree(sbi); | 3773 | kfree(sbi); |
3216 | lock_kernel(); | ||
3217 | out_free_orig: | 3774 | out_free_orig: |
3218 | kfree(orig_data); | 3775 | kfree(orig_data); |
3219 | return ret; | 3776 | return ret; |
@@ -3306,13 +3863,6 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
3306 | if (bdev == NULL) | 3863 | if (bdev == NULL) |
3307 | return NULL; | 3864 | return NULL; |
3308 | 3865 | ||
3309 | if (bd_claim(bdev, sb)) { | ||
3310 | ext4_msg(sb, KERN_ERR, | ||
3311 | "failed to claim external journal device"); | ||
3312 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | ||
3313 | return NULL; | ||
3314 | } | ||
3315 | |||
3316 | blocksize = sb->s_blocksize; | 3866 | blocksize = sb->s_blocksize; |
3317 | hblock = bdev_logical_block_size(bdev); | 3867 | hblock = bdev_logical_block_size(bdev); |
3318 | if (blocksize < hblock) { | 3868 | if (blocksize < hblock) { |
@@ -3470,7 +4020,7 @@ static int ext4_load_journal(struct super_block *sb, | |||
3470 | EXT4_SB(sb)->s_journal = journal; | 4020 | EXT4_SB(sb)->s_journal = journal; |
3471 | ext4_clear_journal_err(sb, es); | 4021 | ext4_clear_journal_err(sb, es); |
3472 | 4022 | ||
3473 | if (journal_devnum && | 4023 | if (!really_read_only && journal_devnum && |
3474 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 4024 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
3475 | es->s_journal_dev = cpu_to_le32(journal_devnum); | 4025 | es->s_journal_dev = cpu_to_le32(journal_devnum); |
3476 | 4026 | ||
@@ -3524,9 +4074,10 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3524 | es->s_kbytes_written = | 4074 | es->s_kbytes_written = |
3525 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | 4075 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); |
3526 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 4076 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( |
3527 | &EXT4_SB(sb)->s_freeblocks_counter)); | 4077 | &EXT4_SB(sb)->s_freeblocks_counter)); |
3528 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( | 4078 | es->s_free_inodes_count = |
3529 | &EXT4_SB(sb)->s_freeinodes_counter)); | 4079 | cpu_to_le32(percpu_counter_sum_positive( |
4080 | &EXT4_SB(sb)->s_freeinodes_counter)); | ||
3530 | sb->s_dirt = 0; | 4081 | sb->s_dirt = 0; |
3531 | BUFFER_TRACE(sbh, "marking dirty"); | 4082 | BUFFER_TRACE(sbh, "marking dirty"); |
3532 | mark_buffer_dirty(sbh); | 4083 | mark_buffer_dirty(sbh); |
@@ -3658,6 +4209,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
3658 | /* | 4209 | /* |
3659 | * LVM calls this function before a (read-only) snapshot is created. This | 4210 | * LVM calls this function before a (read-only) snapshot is created. This |
3660 | * gives us a chance to flush the journal completely and mark the fs clean. | 4211 | * gives us a chance to flush the journal completely and mark the fs clean. |
4212 | * | ||
4213 | * Note that only this function cannot bring a filesystem to be in a clean | ||
4214 | * state independently, because ext4 prevents a new handle from being started | ||
4215 | * by @sb->s_frozen, which stays in an upper layer. It thus needs help from | ||
4216 | * the upper layer. | ||
3661 | */ | 4217 | */ |
3662 | static int ext4_freeze(struct super_block *sb) | 4218 | static int ext4_freeze(struct super_block *sb) |
3663 | { | 4219 | { |
@@ -3706,6 +4262,22 @@ static int ext4_unfreeze(struct super_block *sb) | |||
3706 | return 0; | 4262 | return 0; |
3707 | } | 4263 | } |
3708 | 4264 | ||
4265 | /* | ||
4266 | * Structure to save mount options for ext4_remount's benefit | ||
4267 | */ | ||
4268 | struct ext4_mount_options { | ||
4269 | unsigned long s_mount_opt; | ||
4270 | unsigned long s_mount_opt2; | ||
4271 | uid_t s_resuid; | ||
4272 | gid_t s_resgid; | ||
4273 | unsigned long s_commit_interval; | ||
4274 | u32 s_min_batch_time, s_max_batch_time; | ||
4275 | #ifdef CONFIG_QUOTA | ||
4276 | int s_jquota_fmt; | ||
4277 | char *s_qf_names[MAXQUOTAS]; | ||
4278 | #endif | ||
4279 | }; | ||
4280 | |||
3709 | static int ext4_remount(struct super_block *sb, int *flags, char *data) | 4281 | static int ext4_remount(struct super_block *sb, int *flags, char *data) |
3710 | { | 4282 | { |
3711 | struct ext4_super_block *es; | 4283 | struct ext4_super_block *es; |
@@ -3716,18 +4288,17 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3716 | int enable_quota = 0; | 4288 | int enable_quota = 0; |
3717 | ext4_group_t g; | 4289 | ext4_group_t g; |
3718 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 4290 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
3719 | int err; | 4291 | int err = 0; |
3720 | #ifdef CONFIG_QUOTA | 4292 | #ifdef CONFIG_QUOTA |
3721 | int i; | 4293 | int i; |
3722 | #endif | 4294 | #endif |
3723 | char *orig_data = kstrdup(data, GFP_KERNEL); | 4295 | char *orig_data = kstrdup(data, GFP_KERNEL); |
3724 | 4296 | ||
3725 | lock_kernel(); | ||
3726 | |||
3727 | /* Store the original options */ | 4297 | /* Store the original options */ |
3728 | lock_super(sb); | 4298 | lock_super(sb); |
3729 | old_sb_flags = sb->s_flags; | 4299 | old_sb_flags = sb->s_flags; |
3730 | old_opts.s_mount_opt = sbi->s_mount_opt; | 4300 | old_opts.s_mount_opt = sbi->s_mount_opt; |
4301 | old_opts.s_mount_opt2 = sbi->s_mount_opt2; | ||
3731 | old_opts.s_resuid = sbi->s_resuid; | 4302 | old_opts.s_resuid = sbi->s_resuid; |
3732 | old_opts.s_resgid = sbi->s_resgid; | 4303 | old_opts.s_resgid = sbi->s_resgid; |
3733 | old_opts.s_commit_interval = sbi->s_commit_interval; | 4304 | old_opts.s_commit_interval = sbi->s_commit_interval; |
@@ -3843,9 +4414,29 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3843 | goto restore_opts; | 4414 | goto restore_opts; |
3844 | if (!ext4_setup_super(sb, es, 0)) | 4415 | if (!ext4_setup_super(sb, es, 0)) |
3845 | sb->s_flags &= ~MS_RDONLY; | 4416 | sb->s_flags &= ~MS_RDONLY; |
4417 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
4418 | EXT4_FEATURE_INCOMPAT_MMP)) | ||
4419 | if (ext4_multi_mount_protect(sb, | ||
4420 | le64_to_cpu(es->s_mmp_block))) { | ||
4421 | err = -EROFS; | ||
4422 | goto restore_opts; | ||
4423 | } | ||
3846 | enable_quota = 1; | 4424 | enable_quota = 1; |
3847 | } | 4425 | } |
3848 | } | 4426 | } |
4427 | |||
4428 | /* | ||
4429 | * Reinitialize lazy itable initialization thread based on | ||
4430 | * current settings | ||
4431 | */ | ||
4432 | if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) | ||
4433 | ext4_unregister_li_request(sb); | ||
4434 | else { | ||
4435 | ext4_group_t first_not_zeroed; | ||
4436 | first_not_zeroed = ext4_has_uninit_itable(sb); | ||
4437 | ext4_register_li_request(sb, first_not_zeroed); | ||
4438 | } | ||
4439 | |||
3849 | ext4_setup_system_zone(sb); | 4440 | ext4_setup_system_zone(sb); |
3850 | if (sbi->s_journal == NULL) | 4441 | if (sbi->s_journal == NULL) |
3851 | ext4_commit_super(sb, 1); | 4442 | ext4_commit_super(sb, 1); |
@@ -3858,7 +4449,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3858 | kfree(old_opts.s_qf_names[i]); | 4449 | kfree(old_opts.s_qf_names[i]); |
3859 | #endif | 4450 | #endif |
3860 | unlock_super(sb); | 4451 | unlock_super(sb); |
3861 | unlock_kernel(); | ||
3862 | if (enable_quota) | 4452 | if (enable_quota) |
3863 | dquot_resume(sb, -1); | 4453 | dquot_resume(sb, -1); |
3864 | 4454 | ||
@@ -3869,6 +4459,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3869 | restore_opts: | 4459 | restore_opts: |
3870 | sb->s_flags = old_sb_flags; | 4460 | sb->s_flags = old_sb_flags; |
3871 | sbi->s_mount_opt = old_opts.s_mount_opt; | 4461 | sbi->s_mount_opt = old_opts.s_mount_opt; |
4462 | sbi->s_mount_opt2 = old_opts.s_mount_opt2; | ||
3872 | sbi->s_resuid = old_opts.s_resuid; | 4463 | sbi->s_resuid = old_opts.s_resuid; |
3873 | sbi->s_resgid = old_opts.s_resgid; | 4464 | sbi->s_resgid = old_opts.s_resgid; |
3874 | sbi->s_commit_interval = old_opts.s_commit_interval; | 4465 | sbi->s_commit_interval = old_opts.s_commit_interval; |
@@ -3884,7 +4475,6 @@ restore_opts: | |||
3884 | } | 4475 | } |
3885 | #endif | 4476 | #endif |
3886 | unlock_super(sb); | 4477 | unlock_super(sb); |
3887 | unlock_kernel(); | ||
3888 | kfree(orig_data); | 4478 | kfree(orig_data); |
3889 | return err; | 4479 | return err; |
3890 | } | 4480 | } |
@@ -3895,6 +4485,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3895 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4485 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3896 | struct ext4_super_block *es = sbi->s_es; | 4486 | struct ext4_super_block *es = sbi->s_es; |
3897 | u64 fsid; | 4487 | u64 fsid; |
4488 | s64 bfree; | ||
3898 | 4489 | ||
3899 | if (test_opt(sb, MINIX_DF)) { | 4490 | if (test_opt(sb, MINIX_DF)) { |
3900 | sbi->s_overhead_last = 0; | 4491 | sbi->s_overhead_last = 0; |
@@ -3938,8 +4529,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3938 | buf->f_type = EXT4_SUPER_MAGIC; | 4529 | buf->f_type = EXT4_SUPER_MAGIC; |
3939 | buf->f_bsize = sb->s_blocksize; | 4530 | buf->f_bsize = sb->s_blocksize; |
3940 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 4531 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
3941 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - | 4532 | bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - |
3942 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | 4533 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); |
4534 | /* prevent underflow in case that few free space is available */ | ||
4535 | buf->f_bfree = max_t(s64, bfree, 0); | ||
3943 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 4536 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
3944 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 4537 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
3945 | buf->f_bavail = 0; | 4538 | buf->f_bavail = 0; |
@@ -4066,27 +4659,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type) | |||
4066 | * Standard function to be called on quota_on | 4659 | * Standard function to be called on quota_on |
4067 | */ | 4660 | */ |
4068 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 4661 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
4069 | char *name) | 4662 | struct path *path) |
4070 | { | 4663 | { |
4071 | int err; | 4664 | int err; |
4072 | struct path path; | ||
4073 | 4665 | ||
4074 | if (!test_opt(sb, QUOTA)) | 4666 | if (!test_opt(sb, QUOTA)) |
4075 | return -EINVAL; | 4667 | return -EINVAL; |
4076 | 4668 | ||
4077 | err = kern_path(name, LOOKUP_FOLLOW, &path); | ||
4078 | if (err) | ||
4079 | return err; | ||
4080 | |||
4081 | /* Quotafile not on the same filesystem? */ | 4669 | /* Quotafile not on the same filesystem? */ |
4082 | if (path.mnt->mnt_sb != sb) { | 4670 | if (path->mnt->mnt_sb != sb) |
4083 | path_put(&path); | ||
4084 | return -EXDEV; | 4671 | return -EXDEV; |
4085 | } | ||
4086 | /* Journaling quota? */ | 4672 | /* Journaling quota? */ |
4087 | if (EXT4_SB(sb)->s_qf_names[type]) { | 4673 | if (EXT4_SB(sb)->s_qf_names[type]) { |
4088 | /* Quotafile not in fs root? */ | 4674 | /* Quotafile not in fs root? */ |
4089 | if (path.dentry->d_parent != sb->s_root) | 4675 | if (path->dentry->d_parent != sb->s_root) |
4090 | ext4_msg(sb, KERN_WARNING, | 4676 | ext4_msg(sb, KERN_WARNING, |
4091 | "Quota file not on filesystem root. " | 4677 | "Quota file not on filesystem root. " |
4092 | "Journaled quota will not work"); | 4678 | "Journaled quota will not work"); |
@@ -4097,7 +4683,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
4097 | * all updates to the file when we bypass pagecache... | 4683 | * all updates to the file when we bypass pagecache... |
4098 | */ | 4684 | */ |
4099 | if (EXT4_SB(sb)->s_journal && | 4685 | if (EXT4_SB(sb)->s_journal && |
4100 | ext4_should_journal_data(path.dentry->d_inode)) { | 4686 | ext4_should_journal_data(path->dentry->d_inode)) { |
4101 | /* | 4687 | /* |
4102 | * We don't need to lock updates but journal_flush() could | 4688 | * We don't need to lock updates but journal_flush() could |
4103 | * otherwise be livelocked... | 4689 | * otherwise be livelocked... |
@@ -4105,32 +4691,42 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
4105 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 4691 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
4106 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 4692 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
4107 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 4693 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
4108 | if (err) { | 4694 | if (err) |
4109 | path_put(&path); | ||
4110 | return err; | 4695 | return err; |
4111 | } | ||
4112 | } | 4696 | } |
4113 | 4697 | ||
4114 | err = dquot_quota_on_path(sb, type, format_id, &path); | 4698 | return dquot_quota_on(sb, type, format_id, path); |
4115 | path_put(&path); | ||
4116 | return err; | ||
4117 | } | 4699 | } |
4118 | 4700 | ||
4119 | static int ext4_quota_off(struct super_block *sb, int type) | 4701 | static int ext4_quota_off(struct super_block *sb, int type) |
4120 | { | 4702 | { |
4121 | /* Force all delayed allocation blocks to be allocated */ | 4703 | struct inode *inode = sb_dqopt(sb)->files[type]; |
4122 | if (test_opt(sb, DELALLOC)) { | 4704 | handle_t *handle; |
4123 | down_read(&sb->s_umount); | 4705 | |
4706 | /* Force all delayed allocation blocks to be allocated. | ||
4707 | * Caller already holds s_umount sem */ | ||
4708 | if (test_opt(sb, DELALLOC)) | ||
4124 | sync_filesystem(sb); | 4709 | sync_filesystem(sb); |
4125 | up_read(&sb->s_umount); | ||
4126 | } | ||
4127 | 4710 | ||
4711 | if (!inode) | ||
4712 | goto out; | ||
4713 | |||
4714 | /* Update modification times of quota files when userspace can | ||
4715 | * start looking at them */ | ||
4716 | handle = ext4_journal_start(inode, 1); | ||
4717 | if (IS_ERR(handle)) | ||
4718 | goto out; | ||
4719 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
4720 | ext4_mark_inode_dirty(handle, inode); | ||
4721 | ext4_journal_stop(handle); | ||
4722 | |||
4723 | out: | ||
4128 | return dquot_quota_off(sb, type); | 4724 | return dquot_quota_off(sb, type); |
4129 | } | 4725 | } |
4130 | 4726 | ||
4131 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 4727 | /* Read data from quotafile - avoid pagecache and such because we cannot afford |
4132 | * acquiring the locks... As quota files are never truncated and quota code | 4728 | * acquiring the locks... As quota files are never truncated and quota code |
4133 | * itself serializes the operations (and noone else should touch the files) | 4729 | * itself serializes the operations (and no one else should touch the files) |
4134 | * we don't have to be afraid of races */ | 4730 | * we don't have to be afraid of races */ |
4135 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 4731 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
4136 | size_t len, loff_t off) | 4732 | size_t len, loff_t off) |
@@ -4220,30 +4816,21 @@ out: | |||
4220 | if (inode->i_size < off + len) { | 4816 | if (inode->i_size < off + len) { |
4221 | i_size_write(inode, off + len); | 4817 | i_size_write(inode, off + len); |
4222 | EXT4_I(inode)->i_disksize = inode->i_size; | 4818 | EXT4_I(inode)->i_disksize = inode->i_size; |
4819 | ext4_mark_inode_dirty(handle, inode); | ||
4223 | } | 4820 | } |
4224 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
4225 | ext4_mark_inode_dirty(handle, inode); | ||
4226 | mutex_unlock(&inode->i_mutex); | 4821 | mutex_unlock(&inode->i_mutex); |
4227 | return len; | 4822 | return len; |
4228 | } | 4823 | } |
4229 | 4824 | ||
4230 | #endif | 4825 | #endif |
4231 | 4826 | ||
4232 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, | 4827 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
4233 | const char *dev_name, void *data, struct vfsmount *mnt) | 4828 | const char *dev_name, void *data) |
4234 | { | 4829 | { |
4235 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); | 4830 | return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); |
4236 | } | 4831 | } |
4237 | 4832 | ||
4238 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 4833 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
4239 | static struct file_system_type ext2_fs_type = { | ||
4240 | .owner = THIS_MODULE, | ||
4241 | .name = "ext2", | ||
4242 | .get_sb = ext4_get_sb, | ||
4243 | .kill_sb = kill_block_super, | ||
4244 | .fs_flags = FS_REQUIRES_DEV, | ||
4245 | }; | ||
4246 | |||
4247 | static inline void register_as_ext2(void) | 4834 | static inline void register_as_ext2(void) |
4248 | { | 4835 | { |
4249 | int err = register_filesystem(&ext2_fs_type); | 4836 | int err = register_filesystem(&ext2_fs_type); |
@@ -4256,10 +4843,22 @@ static inline void unregister_as_ext2(void) | |||
4256 | { | 4843 | { |
4257 | unregister_filesystem(&ext2_fs_type); | 4844 | unregister_filesystem(&ext2_fs_type); |
4258 | } | 4845 | } |
4846 | |||
4847 | static inline int ext2_feature_set_ok(struct super_block *sb) | ||
4848 | { | ||
4849 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) | ||
4850 | return 0; | ||
4851 | if (sb->s_flags & MS_RDONLY) | ||
4852 | return 1; | ||
4853 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) | ||
4854 | return 0; | ||
4855 | return 1; | ||
4856 | } | ||
4259 | MODULE_ALIAS("ext2"); | 4857 | MODULE_ALIAS("ext2"); |
4260 | #else | 4858 | #else |
4261 | static inline void register_as_ext2(void) { } | 4859 | static inline void register_as_ext2(void) { } |
4262 | static inline void unregister_as_ext2(void) { } | 4860 | static inline void unregister_as_ext2(void) { } |
4861 | static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } | ||
4263 | #endif | 4862 | #endif |
4264 | 4863 | ||
4265 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 4864 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
@@ -4275,79 +4874,155 @@ static inline void unregister_as_ext3(void) | |||
4275 | { | 4874 | { |
4276 | unregister_filesystem(&ext3_fs_type); | 4875 | unregister_filesystem(&ext3_fs_type); |
4277 | } | 4876 | } |
4877 | |||
4878 | static inline int ext3_feature_set_ok(struct super_block *sb) | ||
4879 | { | ||
4880 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) | ||
4881 | return 0; | ||
4882 | if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) | ||
4883 | return 0; | ||
4884 | if (sb->s_flags & MS_RDONLY) | ||
4885 | return 1; | ||
4886 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) | ||
4887 | return 0; | ||
4888 | return 1; | ||
4889 | } | ||
4278 | MODULE_ALIAS("ext3"); | 4890 | MODULE_ALIAS("ext3"); |
4279 | #else | 4891 | #else |
4280 | static inline void register_as_ext3(void) { } | 4892 | static inline void register_as_ext3(void) { } |
4281 | static inline void unregister_as_ext3(void) { } | 4893 | static inline void unregister_as_ext3(void) { } |
4894 | static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } | ||
4282 | #endif | 4895 | #endif |
4283 | 4896 | ||
4284 | static struct file_system_type ext4_fs_type = { | 4897 | static struct file_system_type ext4_fs_type = { |
4285 | .owner = THIS_MODULE, | 4898 | .owner = THIS_MODULE, |
4286 | .name = "ext4", | 4899 | .name = "ext4", |
4287 | .get_sb = ext4_get_sb, | 4900 | .mount = ext4_mount, |
4288 | .kill_sb = kill_block_super, | 4901 | .kill_sb = kill_block_super, |
4289 | .fs_flags = FS_REQUIRES_DEV, | 4902 | .fs_flags = FS_REQUIRES_DEV, |
4290 | }; | 4903 | }; |
4291 | 4904 | ||
4292 | static int __init init_ext4_fs(void) | 4905 | static int __init ext4_init_feat_adverts(void) |
4293 | { | 4906 | { |
4294 | int err; | 4907 | struct ext4_features *ef; |
4908 | int ret = -ENOMEM; | ||
4909 | |||
4910 | ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); | ||
4911 | if (!ef) | ||
4912 | goto out; | ||
4913 | |||
4914 | ef->f_kobj.kset = ext4_kset; | ||
4915 | init_completion(&ef->f_kobj_unregister); | ||
4916 | ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, | ||
4917 | "features"); | ||
4918 | if (ret) { | ||
4919 | kfree(ef); | ||
4920 | goto out; | ||
4921 | } | ||
4922 | |||
4923 | ext4_feat = ef; | ||
4924 | ret = 0; | ||
4925 | out: | ||
4926 | return ret; | ||
4927 | } | ||
4928 | |||
4929 | static void ext4_exit_feat_adverts(void) | ||
4930 | { | ||
4931 | kobject_put(&ext4_feat->f_kobj); | ||
4932 | wait_for_completion(&ext4_feat->f_kobj_unregister); | ||
4933 | kfree(ext4_feat); | ||
4934 | } | ||
4935 | |||
4936 | /* Shared across all ext4 file systems */ | ||
4937 | wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | ||
4938 | struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | ||
4939 | |||
4940 | static int __init ext4_init_fs(void) | ||
4941 | { | ||
4942 | int i, err; | ||
4295 | 4943 | ||
4296 | ext4_check_flag_values(); | 4944 | ext4_check_flag_values(); |
4297 | err = init_ext4_system_zone(); | 4945 | |
4946 | for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { | ||
4947 | mutex_init(&ext4__aio_mutex[i]); | ||
4948 | init_waitqueue_head(&ext4__ioend_wq[i]); | ||
4949 | } | ||
4950 | |||
4951 | err = ext4_init_pageio(); | ||
4298 | if (err) | 4952 | if (err) |
4299 | return err; | 4953 | return err; |
4954 | err = ext4_init_system_zone(); | ||
4955 | if (err) | ||
4956 | goto out7; | ||
4300 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 4957 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
4301 | if (!ext4_kset) | 4958 | if (!ext4_kset) |
4302 | goto out4; | 4959 | goto out6; |
4303 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 4960 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
4304 | err = init_ext4_mballoc(); | 4961 | if (!ext4_proc_root) |
4962 | goto out5; | ||
4963 | |||
4964 | err = ext4_init_feat_adverts(); | ||
4965 | if (err) | ||
4966 | goto out4; | ||
4967 | |||
4968 | err = ext4_init_mballoc(); | ||
4305 | if (err) | 4969 | if (err) |
4306 | goto out3; | 4970 | goto out3; |
4307 | 4971 | ||
4308 | err = init_ext4_xattr(); | 4972 | err = ext4_init_xattr(); |
4309 | if (err) | 4973 | if (err) |
4310 | goto out2; | 4974 | goto out2; |
4311 | err = init_inodecache(); | 4975 | err = init_inodecache(); |
4312 | if (err) | 4976 | if (err) |
4313 | goto out1; | 4977 | goto out1; |
4314 | register_as_ext2(); | ||
4315 | register_as_ext3(); | 4978 | register_as_ext3(); |
4979 | register_as_ext2(); | ||
4316 | err = register_filesystem(&ext4_fs_type); | 4980 | err = register_filesystem(&ext4_fs_type); |
4317 | if (err) | 4981 | if (err) |
4318 | goto out; | 4982 | goto out; |
4983 | |||
4984 | ext4_li_info = NULL; | ||
4985 | mutex_init(&ext4_li_mtx); | ||
4319 | return 0; | 4986 | return 0; |
4320 | out: | 4987 | out: |
4321 | unregister_as_ext2(); | 4988 | unregister_as_ext2(); |
4322 | unregister_as_ext3(); | 4989 | unregister_as_ext3(); |
4323 | destroy_inodecache(); | 4990 | destroy_inodecache(); |
4324 | out1: | 4991 | out1: |
4325 | exit_ext4_xattr(); | 4992 | ext4_exit_xattr(); |
4326 | out2: | 4993 | out2: |
4327 | exit_ext4_mballoc(); | 4994 | ext4_exit_mballoc(); |
4328 | out3: | 4995 | out3: |
4996 | ext4_exit_feat_adverts(); | ||
4997 | out4: | ||
4329 | remove_proc_entry("fs/ext4", NULL); | 4998 | remove_proc_entry("fs/ext4", NULL); |
4999 | out5: | ||
4330 | kset_unregister(ext4_kset); | 5000 | kset_unregister(ext4_kset); |
4331 | out4: | 5001 | out6: |
4332 | exit_ext4_system_zone(); | 5002 | ext4_exit_system_zone(); |
5003 | out7: | ||
5004 | ext4_exit_pageio(); | ||
4333 | return err; | 5005 | return err; |
4334 | } | 5006 | } |
4335 | 5007 | ||
4336 | static void __exit exit_ext4_fs(void) | 5008 | static void __exit ext4_exit_fs(void) |
4337 | { | 5009 | { |
5010 | ext4_destroy_lazyinit_thread(); | ||
4338 | unregister_as_ext2(); | 5011 | unregister_as_ext2(); |
4339 | unregister_as_ext3(); | 5012 | unregister_as_ext3(); |
4340 | unregister_filesystem(&ext4_fs_type); | 5013 | unregister_filesystem(&ext4_fs_type); |
4341 | destroy_inodecache(); | 5014 | destroy_inodecache(); |
4342 | exit_ext4_xattr(); | 5015 | ext4_exit_xattr(); |
4343 | exit_ext4_mballoc(); | 5016 | ext4_exit_mballoc(); |
5017 | ext4_exit_feat_adverts(); | ||
4344 | remove_proc_entry("fs/ext4", NULL); | 5018 | remove_proc_entry("fs/ext4", NULL); |
4345 | kset_unregister(ext4_kset); | 5019 | kset_unregister(ext4_kset); |
4346 | exit_ext4_system_zone(); | 5020 | ext4_exit_system_zone(); |
5021 | ext4_exit_pageio(); | ||
4347 | } | 5022 | } |
4348 | 5023 | ||
4349 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 5024 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
4350 | MODULE_DESCRIPTION("Fourth Extended Filesystem"); | 5025 | MODULE_DESCRIPTION("Fourth Extended Filesystem"); |
4351 | MODULE_LICENSE("GPL"); | 5026 | MODULE_LICENSE("GPL"); |
4352 | module_init(init_ext4_fs) | 5027 | module_init(ext4_init_fs) |
4353 | module_exit(exit_ext4_fs) | 5028 | module_exit(ext4_exit_fs) |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3a8cd8dff1ad..c757adc97250 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -427,23 +427,23 @@ cleanup: | |||
427 | static int | 427 | static int |
428 | ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) | 428 | ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) |
429 | { | 429 | { |
430 | int i_error, b_error; | 430 | int ret, ret2; |
431 | 431 | ||
432 | down_read(&EXT4_I(dentry->d_inode)->xattr_sem); | 432 | down_read(&EXT4_I(dentry->d_inode)->xattr_sem); |
433 | i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size); | 433 | ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size); |
434 | if (i_error < 0) { | 434 | if (ret < 0) |
435 | b_error = 0; | 435 | goto errout; |
436 | } else { | 436 | if (buffer) { |
437 | if (buffer) { | 437 | buffer += ret; |
438 | buffer += i_error; | 438 | buffer_size -= ret; |
439 | buffer_size -= i_error; | ||
440 | } | ||
441 | b_error = ext4_xattr_block_list(dentry, buffer, buffer_size); | ||
442 | if (b_error < 0) | ||
443 | i_error = 0; | ||
444 | } | 439 | } |
440 | ret = ext4_xattr_block_list(dentry, buffer, buffer_size); | ||
441 | if (ret < 0) | ||
442 | goto errout; | ||
443 | ret += ret2; | ||
444 | errout: | ||
445 | up_read(&EXT4_I(dentry->d_inode)->xattr_sem); | 445 | up_read(&EXT4_I(dentry->d_inode)->xattr_sem); |
446 | return i_error + b_error; | 446 | return ret; |
447 | } | 447 | } |
448 | 448 | ||
449 | /* | 449 | /* |
@@ -735,7 +735,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
735 | int offset = (char *)s->here - bs->bh->b_data; | 735 | int offset = (char *)s->here - bs->bh->b_data; |
736 | 736 | ||
737 | unlock_buffer(bs->bh); | 737 | unlock_buffer(bs->bh); |
738 | jbd2_journal_release_buffer(handle, bs->bh); | 738 | ext4_handle_release_buffer(handle, bs->bh); |
739 | if (ce) { | 739 | if (ce) { |
740 | mb_cache_entry_release(ce); | 740 | mb_cache_entry_release(ce); |
741 | ce = NULL; | 741 | ce = NULL; |
@@ -820,8 +820,8 @@ inserted: | |||
820 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 820 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
821 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 821 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
822 | 822 | ||
823 | block = ext4_new_meta_blocks(handle, inode, | 823 | block = ext4_new_meta_blocks(handle, inode, goal, 0, |
824 | goal, NULL, &error); | 824 | NULL, &error); |
825 | if (error) | 825 | if (error) |
826 | goto cleanup; | 826 | goto cleanup; |
827 | 827 | ||
@@ -833,7 +833,7 @@ inserted: | |||
833 | new_bh = sb_getblk(sb, block); | 833 | new_bh = sb_getblk(sb, block); |
834 | if (!new_bh) { | 834 | if (!new_bh) { |
835 | getblk_failed: | 835 | getblk_failed: |
836 | ext4_free_blocks(handle, inode, 0, block, 1, | 836 | ext4_free_blocks(handle, inode, NULL, block, 1, |
837 | EXT4_FREE_BLOCKS_METADATA); | 837 | EXT4_FREE_BLOCKS_METADATA); |
838 | error = -EIO; | 838 | error = -EIO; |
839 | goto cleanup; | 839 | goto cleanup; |
@@ -947,7 +947,7 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, | |||
947 | /* | 947 | /* |
948 | * ext4_xattr_set_handle() | 948 | * ext4_xattr_set_handle() |
949 | * | 949 | * |
950 | * Create, replace or remove an extended attribute for this inode. Buffer | 950 | * Create, replace or remove an extended attribute for this inode. Value |
951 | * is NULL to remove an existing extended attribute, and non-NULL to | 951 | * is NULL to remove an existing extended attribute, and non-NULL to |
952 | * either replace an existing extended attribute, or create a new extended | 952 | * either replace an existing extended attribute, or create a new extended |
953 | * attribute. The flags XATTR_REPLACE and XATTR_CREATE | 953 | * attribute. The flags XATTR_REPLACE and XATTR_CREATE |
@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, | |||
1588 | #undef BLOCK_HASH_SHIFT | 1588 | #undef BLOCK_HASH_SHIFT |
1589 | 1589 | ||
1590 | int __init | 1590 | int __init |
1591 | init_ext4_xattr(void) | 1591 | ext4_init_xattr(void) |
1592 | { | 1592 | { |
1593 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); | 1593 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); |
1594 | if (!ext4_xattr_cache) | 1594 | if (!ext4_xattr_cache) |
@@ -1597,7 +1597,7 @@ init_ext4_xattr(void) | |||
1597 | } | 1597 | } |
1598 | 1598 | ||
1599 | void | 1599 | void |
1600 | exit_ext4_xattr(void) | 1600 | ext4_exit_xattr(void) |
1601 | { | 1601 | { |
1602 | if (ext4_xattr_cache) | 1602 | if (ext4_xattr_cache) |
1603 | mb_cache_destroy(ext4_xattr_cache); | 1603 | mb_cache_destroy(ext4_xattr_cache); |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 518e96e43905..25b7387ff183 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *); | |||
83 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | 83 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, |
84 | struct ext4_inode *raw_inode, handle_t *handle); | 84 | struct ext4_inode *raw_inode, handle_t *handle); |
85 | 85 | ||
86 | extern int init_ext4_xattr(void); | 86 | extern int __init ext4_init_xattr(void); |
87 | extern void exit_ext4_xattr(void); | 87 | extern void ext4_exit_xattr(void); |
88 | 88 | ||
89 | extern const struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern const struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb) | |||
121 | { | 121 | { |
122 | } | 122 | } |
123 | 123 | ||
124 | static inline int | 124 | static __init inline int |
125 | init_ext4_xattr(void) | 125 | ext4_init_xattr(void) |
126 | { | 126 | { |
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | static inline void | 130 | static inline void |
131 | exit_ext4_xattr(void) | 131 | ext4_exit_xattr(void) |
132 | { | 132 | { |
133 | } | 133 | } |
134 | 134 | ||
@@ -145,10 +145,10 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
145 | 145 | ||
146 | #ifdef CONFIG_EXT4_FS_SECURITY | 146 | #ifdef CONFIG_EXT4_FS_SECURITY |
147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, | 147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, |
148 | struct inode *dir); | 148 | struct inode *dir, const struct qstr *qstr); |
149 | #else | 149 | #else |
150 | static inline int ext4_init_security(handle_t *handle, struct inode *inode, | 150 | static inline int ext4_init_security(handle_t *handle, struct inode *inode, |
151 | struct inode *dir) | 151 | struct inode *dir, const struct qstr *qstr) |
152 | { | 152 | { |
153 | return 0; | 153 | return 0; |
154 | } | 154 | } |
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 9b21268e121c..007c3bfbf094 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c | |||
@@ -49,14 +49,15 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name, | |||
49 | } | 49 | } |
50 | 50 | ||
51 | int | 51 | int |
52 | ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir) | 52 | ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, |
53 | const struct qstr *qstr) | ||
53 | { | 54 | { |
54 | int err; | 55 | int err; |
55 | size_t len; | 56 | size_t len; |
56 | void *value; | 57 | void *value; |
57 | char *name; | 58 | char *name; |
58 | 59 | ||
59 | err = security_inode_init_security(inode, dir, &name, &value, &len); | 60 | err = security_inode_init_security(inode, dir, qstr, &name, &value, &len); |
60 | if (err) { | 61 | if (err) { |
61 | if (err == -EOPNOTSUPP) | 62 | if (err == -EOPNOTSUPP) |
62 | return 0; | 63 | return 0; |