diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-18 13:56:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-18 13:56:26 -0400 |
commit | 3530c1886291df061e3972c55590777ef1cb67f8 (patch) | |
tree | bd6755e533eb5a0f37ff600da6bc0d9d1ba33c17 /fs/ext4 | |
parent | 6952b61de9984073289859073e8195ad0bee8fd5 (diff) | |
parent | 1358870deaf11a752a84fbd89201749aa62498e8 (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (64 commits)
ext4: Update documentation about quota mount options
ext4: replace MAX_DEFRAG_SIZE with EXT_MAX_BLOCK
ext4: Fix the alloc on close after a truncate hueristic
ext4: Add a tracepoint for ext4_alloc_da_blocks()
ext4: store EXT4_EXT_MIGRATE in i_state instead of i_flags
ext4: limit block allocations for indirect-block files to < 2^32
ext4: Fix different block exchange issue in EXT4_IOC_MOVE_EXT
ext4: Add null extent check to ext_get_path
ext4: Replace BUG_ON() with ext4_error() in move_extents.c
ext4: Replace get_ext_path macro with an inline funciton
ext4: Fix include/trace/events/ext4.h to work with Systemtap
ext4: Fix initalization of s_flex_groups
ext4: Always set dx_node's fake_dirent explicitly.
ext4: Fix async commit mode to be safe by using a barrier
ext4: Don't update superblock write time when filesystem is read-only
ext4: Clarify the locking details in mballoc
ext4: check for need init flag in ext4_mb_load_buddy
ext4: move ext4_mb_init_group() function earlier in the mballoc.c
ext4: Make non-journal fsync work properly
ext4: Assure that metadata blocks are written during fsync in no journal mode
...
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Kconfig | 11 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 91 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 4 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 9 | ||||
-rw-r--r-- | fs/ext4/extents.c | 112 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 13 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 150 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 7 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 429 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 22 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 22 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 334 | ||||
-rw-r--r-- | fs/ext4/namei.c | 22 | ||||
-rw-r--r-- | fs/ext4/resize.c | 7 | ||||
-rw-r--r-- | fs/ext4/super.c | 155 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 15 |
18 files changed, 863 insertions, 544 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 418b6f3b0ae8..d5c0ea2e8f2d 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -37,7 +37,7 @@ config EXT4DEV_COMPAT | |||
37 | 37 | ||
38 | To enable backwards compatibility so that systems that are | 38 | To enable backwards compatibility so that systems that are |
39 | still expecting to mount ext4 filesystems using ext4dev, | 39 | still expecting to mount ext4 filesystems using ext4dev, |
40 | chose Y here. This feature will go away by 2.6.31, so | 40 | choose Y here. This feature will go away by 2.6.31, so |
41 | please arrange to get your userspace programs fixed! | 41 | please arrange to get your userspace programs fixed! |
42 | 42 | ||
43 | config EXT4_FS_XATTR | 43 | config EXT4_FS_XATTR |
@@ -77,3 +77,12 @@ config EXT4_FS_SECURITY | |||
77 | 77 | ||
78 | If you are not using a security module that requires using | 78 | If you are not using a security module that requires using |
79 | extended attributes for file security labels, say N. | 79 | extended attributes for file security labels, say N. |
80 | |||
81 | config EXT4_DEBUG | ||
82 | bool "EXT4 debugging support" | ||
83 | depends on EXT4_FS | ||
84 | help | ||
85 | Enables run-time debugging support for the ext4 filesystem. | ||
86 | |||
87 | If you select Y here, then you will be able to turn on debugging | ||
88 | with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e2126d70dff5..1d0418980f8d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
478 | * new bitmap information | 478 | * new bitmap information |
479 | */ | 479 | */ |
480 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | 480 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
481 | ext4_mb_update_group_info(grp, blocks_freed); | 481 | grp->bb_free += blocks_freed; |
482 | up_write(&grp->alloc_sem); | 482 | up_write(&grp->alloc_sem); |
483 | 483 | ||
484 | /* We dirtied the bitmap block */ | 484 | /* We dirtied the bitmap block */ |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9714db393efe..e227eea23f05 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t; | |||
67 | 67 | ||
68 | 68 | ||
69 | /* prefer goal again. length */ | 69 | /* prefer goal again. length */ |
70 | #define EXT4_MB_HINT_MERGE 1 | 70 | #define EXT4_MB_HINT_MERGE 0x0001 |
71 | /* blocks already reserved */ | 71 | /* blocks already reserved */ |
72 | #define EXT4_MB_HINT_RESERVED 2 | 72 | #define EXT4_MB_HINT_RESERVED 0x0002 |
73 | /* metadata is being allocated */ | 73 | /* metadata is being allocated */ |
74 | #define EXT4_MB_HINT_METADATA 4 | 74 | #define EXT4_MB_HINT_METADATA 0x0004 |
75 | /* first blocks in the file */ | 75 | /* first blocks in the file */ |
76 | #define EXT4_MB_HINT_FIRST 8 | 76 | #define EXT4_MB_HINT_FIRST 0x0008 |
77 | /* search for the best chunk */ | 77 | /* search for the best chunk */ |
78 | #define EXT4_MB_HINT_BEST 16 | 78 | #define EXT4_MB_HINT_BEST 0x0010 |
79 | /* data is being allocated */ | 79 | /* data is being allocated */ |
80 | #define EXT4_MB_HINT_DATA 32 | 80 | #define EXT4_MB_HINT_DATA 0x0020 |
81 | /* don't preallocate (for tails) */ | 81 | /* don't preallocate (for tails) */ |
82 | #define EXT4_MB_HINT_NOPREALLOC 64 | 82 | #define EXT4_MB_HINT_NOPREALLOC 0x0040 |
83 | /* allocate for locality group */ | 83 | /* allocate for locality group */ |
84 | #define EXT4_MB_HINT_GROUP_ALLOC 128 | 84 | #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 |
85 | /* allocate goal blocks or none */ | 85 | /* allocate goal blocks or none */ |
86 | #define EXT4_MB_HINT_GOAL_ONLY 256 | 86 | #define EXT4_MB_HINT_GOAL_ONLY 0x0100 |
87 | /* goal is meaningful */ | 87 | /* goal is meaningful */ |
88 | #define EXT4_MB_HINT_TRY_GOAL 512 | 88 | #define EXT4_MB_HINT_TRY_GOAL 0x0200 |
89 | /* blocks already pre-reserved by delayed allocation */ | 89 | /* blocks already pre-reserved by delayed allocation */ |
90 | #define EXT4_MB_DELALLOC_RESERVED 1024 | 90 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 |
91 | /* We are doing stream allocation */ | ||
92 | #define EXT4_MB_STREAM_ALLOC 0x0800 | ||
91 | 93 | ||
92 | 94 | ||
93 | struct ext4_allocation_request { | 95 | struct ext4_allocation_request { |
@@ -112,6 +114,21 @@ struct ext4_allocation_request { | |||
112 | }; | 114 | }; |
113 | 115 | ||
114 | /* | 116 | /* |
117 | * For delayed allocation tracking | ||
118 | */ | ||
119 | struct mpage_da_data { | ||
120 | struct inode *inode; | ||
121 | sector_t b_blocknr; /* start block number of extent */ | ||
122 | size_t b_size; /* size of extent */ | ||
123 | unsigned long b_state; /* state of the extent */ | ||
124 | unsigned long first_page, next_page; /* extent of pages */ | ||
125 | struct writeback_control *wbc; | ||
126 | int io_done; | ||
127 | int pages_written; | ||
128 | int retval; | ||
129 | }; | ||
130 | |||
131 | /* | ||
115 | * Special inodes numbers | 132 | * Special inodes numbers |
116 | */ | 133 | */ |
117 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ | 134 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ |
@@ -251,7 +268,6 @@ struct flex_groups { | |||
251 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 268 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
252 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ | 269 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ |
253 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ | 270 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ |
254 | #define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ | ||
255 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 271 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
256 | 272 | ||
257 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 273 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
@@ -289,6 +305,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | |||
289 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ | 305 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ |
290 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ | 306 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ |
291 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ | 307 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ |
308 | #define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ | ||
292 | 309 | ||
293 | /* Used to pass group descriptor data when online resize is done */ | 310 | /* Used to pass group descriptor data when online resize is done */ |
294 | struct ext4_new_group_input { | 311 | struct ext4_new_group_input { |
@@ -386,6 +403,9 @@ struct ext4_mount_options { | |||
386 | #endif | 403 | #endif |
387 | }; | 404 | }; |
388 | 405 | ||
406 | /* Max physical block we can addres w/o extents */ | ||
407 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF | ||
408 | |||
389 | /* | 409 | /* |
390 | * Structure of an inode on the disk | 410 | * Structure of an inode on the disk |
391 | */ | 411 | */ |
@@ -456,7 +476,6 @@ struct move_extent { | |||
456 | __u64 len; /* block length to be moved */ | 476 | __u64 len; /* block length to be moved */ |
457 | __u64 moved_len; /* moved block length */ | 477 | __u64 moved_len; /* moved block length */ |
458 | }; | 478 | }; |
459 | #define MAX_DEFRAG_SIZE ((1UL<<31) - 1) | ||
460 | 479 | ||
461 | #define EXT4_EPOCH_BITS 2 | 480 | #define EXT4_EPOCH_BITS 2 |
462 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) | 481 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) |
@@ -694,7 +713,6 @@ struct ext4_inode_info { | |||
694 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ | 713 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
695 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 714 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
696 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 715 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
697 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | ||
698 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 716 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
699 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 717 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
700 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 718 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
@@ -841,6 +859,7 @@ struct ext4_sb_info { | |||
841 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | 859 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ |
842 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | 860 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ |
843 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | 861 | ext4_group_t s_groups_count; /* Number of groups in the fs */ |
862 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ | ||
844 | unsigned long s_overhead_last; /* Last calculated overhead */ | 863 | unsigned long s_overhead_last; /* Last calculated overhead */ |
845 | unsigned long s_blocks_last; /* Last seen block count */ | 864 | unsigned long s_blocks_last; /* Last seen block count */ |
846 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 865 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
@@ -950,6 +969,7 @@ struct ext4_sb_info { | |||
950 | atomic_t s_mb_lost_chunks; | 969 | atomic_t s_mb_lost_chunks; |
951 | atomic_t s_mb_preallocated; | 970 | atomic_t s_mb_preallocated; |
952 | atomic_t s_mb_discarded; | 971 | atomic_t s_mb_discarded; |
972 | atomic_t s_lock_busy; | ||
953 | 973 | ||
954 | /* locality groups */ | 974 | /* locality groups */ |
955 | struct ext4_locality_group *s_locality_groups; | 975 | struct ext4_locality_group *s_locality_groups; |
@@ -1340,8 +1360,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *, | |||
1340 | ext4_fsblk_t, unsigned long, int, unsigned long *); | 1360 | ext4_fsblk_t, unsigned long, int, unsigned long *); |
1341 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1361 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1342 | ext4_group_t i, struct ext4_group_desc *desc); | 1362 | ext4_group_t i, struct ext4_group_desc *desc); |
1343 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | ||
1344 | ext4_grpblk_t add); | ||
1345 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1363 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); |
1346 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1364 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, |
1347 | ext4_group_t, int); | 1365 | ext4_group_t, int); |
@@ -1367,6 +1385,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
1367 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1385 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1368 | extern int ext4_can_truncate(struct inode *inode); | 1386 | extern int ext4_can_truncate(struct inode *inode); |
1369 | extern void ext4_truncate(struct inode *); | 1387 | extern void ext4_truncate(struct inode *); |
1388 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | ||
1370 | extern void ext4_set_inode_flags(struct inode *); | 1389 | extern void ext4_set_inode_flags(struct inode *); |
1371 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1390 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1372 | extern int ext4_alloc_da_blocks(struct inode *inode); | 1391 | extern int ext4_alloc_da_blocks(struct inode *inode); |
@@ -1575,15 +1594,18 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | |||
1575 | struct ext4_group_info { | 1594 | struct ext4_group_info { |
1576 | unsigned long bb_state; | 1595 | unsigned long bb_state; |
1577 | struct rb_root bb_free_root; | 1596 | struct rb_root bb_free_root; |
1578 | unsigned short bb_first_free; | 1597 | ext4_grpblk_t bb_first_free; /* first free block */ |
1579 | unsigned short bb_free; | 1598 | ext4_grpblk_t bb_free; /* total free blocks */ |
1580 | unsigned short bb_fragments; | 1599 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ |
1581 | struct list_head bb_prealloc_list; | 1600 | struct list_head bb_prealloc_list; |
1582 | #ifdef DOUBLE_CHECK | 1601 | #ifdef DOUBLE_CHECK |
1583 | void *bb_bitmap; | 1602 | void *bb_bitmap; |
1584 | #endif | 1603 | #endif |
1585 | struct rw_semaphore alloc_sem; | 1604 | struct rw_semaphore alloc_sem; |
1586 | unsigned short bb_counters[]; | 1605 | ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block |
1606 | * regions, index is order. | ||
1607 | * bb_counters[3] = 5 means | ||
1608 | * 5 free 8-block regions. */ | ||
1587 | }; | 1609 | }; |
1588 | 1610 | ||
1589 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 1611 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
@@ -1591,15 +1613,42 @@ struct ext4_group_info { | |||
1591 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 1613 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
1592 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 1614 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
1593 | 1615 | ||
1616 | #define EXT4_MAX_CONTENTION 8 | ||
1617 | #define EXT4_CONTENTION_THRESHOLD 2 | ||
1618 | |||
1594 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, | 1619 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, |
1595 | ext4_group_t group) | 1620 | ext4_group_t group) |
1596 | { | 1621 | { |
1597 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); | 1622 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); |
1598 | } | 1623 | } |
1599 | 1624 | ||
1625 | /* | ||
1626 | * Returns true if the filesystem is busy enough that attempts to | ||
1627 | * access the block group locks has run into contention. | ||
1628 | */ | ||
1629 | static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) | ||
1630 | { | ||
1631 | return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); | ||
1632 | } | ||
1633 | |||
1600 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 1634 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
1601 | { | 1635 | { |
1602 | spin_lock(ext4_group_lock_ptr(sb, group)); | 1636 | spinlock_t *lock = ext4_group_lock_ptr(sb, group); |
1637 | if (spin_trylock(lock)) | ||
1638 | /* | ||
1639 | * We're able to grab the lock right away, so drop the | ||
1640 | * lock contention counter. | ||
1641 | */ | ||
1642 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); | ||
1643 | else { | ||
1644 | /* | ||
1645 | * The lock is busy, so bump the contention counter, | ||
1646 | * and then wait on the spin lock. | ||
1647 | */ | ||
1648 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, | ||
1649 | EXT4_MAX_CONTENTION); | ||
1650 | spin_lock(lock); | ||
1651 | } | ||
1603 | } | 1652 | } |
1604 | 1653 | ||
1605 | static inline void ext4_unlock_group(struct super_block *sb, | 1654 | static inline void ext4_unlock_group(struct super_block *sb, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 20a84105a10b..61652f1d15e6 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -43,8 +43,7 @@ | |||
43 | #define CHECK_BINSEARCH__ | 43 | #define CHECK_BINSEARCH__ |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * If EXT_DEBUG is defined you can use the 'extdebug' mount option | 46 | * Turn on EXT_DEBUG to get lots of info about extents operations. |
47 | * to get lots of info about what's going on. | ||
48 | */ | 47 | */ |
49 | #define EXT_DEBUG__ | 48 | #define EXT_DEBUG__ |
50 | #ifdef EXT_DEBUG | 49 | #ifdef EXT_DEBUG |
@@ -138,6 +137,7 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | |||
138 | #define EXT_BREAK 1 | 137 | #define EXT_BREAK 1 |
139 | #define EXT_REPEAT 2 | 138 | #define EXT_REPEAT 2 |
140 | 139 | ||
140 | /* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ | ||
141 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
142 | 142 | ||
143 | /* | 143 | /* |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index eb27fd0f2ee8..6a9409920dee 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -44,7 +44,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle, | |||
44 | handle, err); | 44 | handle, err); |
45 | } | 45 | } |
46 | else | 46 | else |
47 | brelse(bh); | 47 | bforget(bh); |
48 | return err; | 48 | return err; |
49 | } | 49 | } |
50 | 50 | ||
@@ -60,7 +60,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, | |||
60 | handle, err); | 60 | handle, err); |
61 | } | 61 | } |
62 | else | 62 | else |
63 | brelse(bh); | 63 | bforget(bh); |
64 | return err; | 64 | return err; |
65 | } | 65 | } |
66 | 66 | ||
@@ -89,7 +89,10 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | |||
89 | ext4_journal_abort_handle(where, __func__, bh, | 89 | ext4_journal_abort_handle(where, __func__, bh, |
90 | handle, err); | 90 | handle, err); |
91 | } else { | 91 | } else { |
92 | mark_buffer_dirty(bh); | 92 | if (inode && bh) |
93 | mark_buffer_dirty_inode(bh, inode); | ||
94 | else | ||
95 | mark_buffer_dirty(bh); | ||
93 | if (inode && inode_needs_sync(inode)) { | 96 | if (inode && inode_needs_sync(inode)) { |
94 | sync_dirty_buffer(bh); | 97 | sync_dirty_buffer(bh); |
95 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | 98 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 73ebfb44ad75..7a3832577923 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -93,7 +93,9 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | |||
93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | 93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); |
94 | } | 94 | } |
95 | 95 | ||
96 | static int ext4_ext_journal_restart(handle_t *handle, int needed) | 96 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
97 | struct inode *inode, | ||
98 | int needed) | ||
97 | { | 99 | { |
98 | int err; | 100 | int err; |
99 | 101 | ||
@@ -104,7 +106,14 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) | |||
104 | err = ext4_journal_extend(handle, needed); | 106 | err = ext4_journal_extend(handle, needed); |
105 | if (err <= 0) | 107 | if (err <= 0) |
106 | return err; | 108 | return err; |
107 | return ext4_journal_restart(handle, needed); | 109 | err = ext4_truncate_restart_trans(handle, inode, needed); |
110 | /* | ||
111 | * We have dropped i_data_sem so someone might have cached again | ||
112 | * an extent we are going to truncate. | ||
113 | */ | ||
114 | ext4_ext_invalidate_cache(inode); | ||
115 | |||
116 | return err; | ||
108 | } | 117 | } |
109 | 118 | ||
110 | /* | 119 | /* |
@@ -220,57 +229,65 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, | |||
220 | return newblock; | 229 | return newblock; |
221 | } | 230 | } |
222 | 231 | ||
223 | static int ext4_ext_space_block(struct inode *inode) | 232 | static inline int ext4_ext_space_block(struct inode *inode, int check) |
224 | { | 233 | { |
225 | int size; | 234 | int size; |
226 | 235 | ||
227 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 236 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
228 | / sizeof(struct ext4_extent); | 237 | / sizeof(struct ext4_extent); |
238 | if (!check) { | ||
229 | #ifdef AGGRESSIVE_TEST | 239 | #ifdef AGGRESSIVE_TEST |
230 | if (size > 6) | 240 | if (size > 6) |
231 | size = 6; | 241 | size = 6; |
232 | #endif | 242 | #endif |
243 | } | ||
233 | return size; | 244 | return size; |
234 | } | 245 | } |
235 | 246 | ||
236 | static int ext4_ext_space_block_idx(struct inode *inode) | 247 | static inline int ext4_ext_space_block_idx(struct inode *inode, int check) |
237 | { | 248 | { |
238 | int size; | 249 | int size; |
239 | 250 | ||
240 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 251 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
241 | / sizeof(struct ext4_extent_idx); | 252 | / sizeof(struct ext4_extent_idx); |
253 | if (!check) { | ||
242 | #ifdef AGGRESSIVE_TEST | 254 | #ifdef AGGRESSIVE_TEST |
243 | if (size > 5) | 255 | if (size > 5) |
244 | size = 5; | 256 | size = 5; |
245 | #endif | 257 | #endif |
258 | } | ||
246 | return size; | 259 | return size; |
247 | } | 260 | } |
248 | 261 | ||
249 | static int ext4_ext_space_root(struct inode *inode) | 262 | static inline int ext4_ext_space_root(struct inode *inode, int check) |
250 | { | 263 | { |
251 | int size; | 264 | int size; |
252 | 265 | ||
253 | size = sizeof(EXT4_I(inode)->i_data); | 266 | size = sizeof(EXT4_I(inode)->i_data); |
254 | size -= sizeof(struct ext4_extent_header); | 267 | size -= sizeof(struct ext4_extent_header); |
255 | size /= sizeof(struct ext4_extent); | 268 | size /= sizeof(struct ext4_extent); |
269 | if (!check) { | ||
256 | #ifdef AGGRESSIVE_TEST | 270 | #ifdef AGGRESSIVE_TEST |
257 | if (size > 3) | 271 | if (size > 3) |
258 | size = 3; | 272 | size = 3; |
259 | #endif | 273 | #endif |
274 | } | ||
260 | return size; | 275 | return size; |
261 | } | 276 | } |
262 | 277 | ||
263 | static int ext4_ext_space_root_idx(struct inode *inode) | 278 | static inline int ext4_ext_space_root_idx(struct inode *inode, int check) |
264 | { | 279 | { |
265 | int size; | 280 | int size; |
266 | 281 | ||
267 | size = sizeof(EXT4_I(inode)->i_data); | 282 | size = sizeof(EXT4_I(inode)->i_data); |
268 | size -= sizeof(struct ext4_extent_header); | 283 | size -= sizeof(struct ext4_extent_header); |
269 | size /= sizeof(struct ext4_extent_idx); | 284 | size /= sizeof(struct ext4_extent_idx); |
285 | if (!check) { | ||
270 | #ifdef AGGRESSIVE_TEST | 286 | #ifdef AGGRESSIVE_TEST |
271 | if (size > 4) | 287 | if (size > 4) |
272 | size = 4; | 288 | size = 4; |
273 | #endif | 289 | #endif |
290 | } | ||
274 | return size; | 291 | return size; |
275 | } | 292 | } |
276 | 293 | ||
@@ -284,9 +301,9 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) | |||
284 | int lcap, icap, rcap, leafs, idxs, num; | 301 | int lcap, icap, rcap, leafs, idxs, num; |
285 | int newextents = blocks; | 302 | int newextents = blocks; |
286 | 303 | ||
287 | rcap = ext4_ext_space_root_idx(inode); | 304 | rcap = ext4_ext_space_root_idx(inode, 0); |
288 | lcap = ext4_ext_space_block(inode); | 305 | lcap = ext4_ext_space_block(inode, 0); |
289 | icap = ext4_ext_space_block_idx(inode); | 306 | icap = ext4_ext_space_block_idx(inode, 0); |
290 | 307 | ||
291 | /* number of new leaf blocks needed */ | 308 | /* number of new leaf blocks needed */ |
292 | num = leafs = (newextents + lcap - 1) / lcap; | 309 | num = leafs = (newextents + lcap - 1) / lcap; |
@@ -311,14 +328,14 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
311 | 328 | ||
312 | if (depth == ext_depth(inode)) { | 329 | if (depth == ext_depth(inode)) { |
313 | if (depth == 0) | 330 | if (depth == 0) |
314 | max = ext4_ext_space_root(inode); | 331 | max = ext4_ext_space_root(inode, 1); |
315 | else | 332 | else |
316 | max = ext4_ext_space_root_idx(inode); | 333 | max = ext4_ext_space_root_idx(inode, 1); |
317 | } else { | 334 | } else { |
318 | if (depth == 0) | 335 | if (depth == 0) |
319 | max = ext4_ext_space_block(inode); | 336 | max = ext4_ext_space_block(inode, 1); |
320 | else | 337 | else |
321 | max = ext4_ext_space_block_idx(inode); | 338 | max = ext4_ext_space_block_idx(inode, 1); |
322 | } | 339 | } |
323 | 340 | ||
324 | return max; | 341 | return max; |
@@ -437,8 +454,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | |||
437 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), | 454 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), |
438 | idx_pblock(path->p_idx)); | 455 | idx_pblock(path->p_idx)); |
439 | } else if (path->p_ext) { | 456 | } else if (path->p_ext) { |
440 | ext_debug(" %d:%d:%llu ", | 457 | ext_debug(" %d:[%d]%d:%llu ", |
441 | le32_to_cpu(path->p_ext->ee_block), | 458 | le32_to_cpu(path->p_ext->ee_block), |
459 | ext4_ext_is_uninitialized(path->p_ext), | ||
442 | ext4_ext_get_actual_len(path->p_ext), | 460 | ext4_ext_get_actual_len(path->p_ext), |
443 | ext_pblock(path->p_ext)); | 461 | ext_pblock(path->p_ext)); |
444 | } else | 462 | } else |
@@ -460,8 +478,11 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
460 | eh = path[depth].p_hdr; | 478 | eh = path[depth].p_hdr; |
461 | ex = EXT_FIRST_EXTENT(eh); | 479 | ex = EXT_FIRST_EXTENT(eh); |
462 | 480 | ||
481 | ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); | ||
482 | |||
463 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { | 483 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { |
464 | ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), | 484 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), |
485 | ext4_ext_is_uninitialized(ex), | ||
465 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 486 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); |
466 | } | 487 | } |
467 | ext_debug("\n"); | 488 | ext_debug("\n"); |
@@ -580,9 +601,10 @@ ext4_ext_binsearch(struct inode *inode, | |||
580 | } | 601 | } |
581 | 602 | ||
582 | path->p_ext = l - 1; | 603 | path->p_ext = l - 1; |
583 | ext_debug(" -> %d:%llu:%d ", | 604 | ext_debug(" -> %d:%llu:[%d]%d ", |
584 | le32_to_cpu(path->p_ext->ee_block), | 605 | le32_to_cpu(path->p_ext->ee_block), |
585 | ext_pblock(path->p_ext), | 606 | ext_pblock(path->p_ext), |
607 | ext4_ext_is_uninitialized(path->p_ext), | ||
586 | ext4_ext_get_actual_len(path->p_ext)); | 608 | ext4_ext_get_actual_len(path->p_ext)); |
587 | 609 | ||
588 | #ifdef CHECK_BINSEARCH | 610 | #ifdef CHECK_BINSEARCH |
@@ -612,7 +634,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) | |||
612 | eh->eh_depth = 0; | 634 | eh->eh_depth = 0; |
613 | eh->eh_entries = 0; | 635 | eh->eh_entries = 0; |
614 | eh->eh_magic = EXT4_EXT_MAGIC; | 636 | eh->eh_magic = EXT4_EXT_MAGIC; |
615 | eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); | 637 | eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); |
616 | ext4_mark_inode_dirty(handle, inode); | 638 | ext4_mark_inode_dirty(handle, inode); |
617 | ext4_ext_invalidate_cache(inode); | 639 | ext4_ext_invalidate_cache(inode); |
618 | return 0; | 640 | return 0; |
@@ -837,7 +859,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
837 | 859 | ||
838 | neh = ext_block_hdr(bh); | 860 | neh = ext_block_hdr(bh); |
839 | neh->eh_entries = 0; | 861 | neh->eh_entries = 0; |
840 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); | 862 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
841 | neh->eh_magic = EXT4_EXT_MAGIC; | 863 | neh->eh_magic = EXT4_EXT_MAGIC; |
842 | neh->eh_depth = 0; | 864 | neh->eh_depth = 0; |
843 | ex = EXT_FIRST_EXTENT(neh); | 865 | ex = EXT_FIRST_EXTENT(neh); |
@@ -850,9 +872,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
850 | path[depth].p_ext++; | 872 | path[depth].p_ext++; |
851 | while (path[depth].p_ext <= | 873 | while (path[depth].p_ext <= |
852 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | 874 | EXT_MAX_EXTENT(path[depth].p_hdr)) { |
853 | ext_debug("move %d:%llu:%d in new leaf %llu\n", | 875 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", |
854 | le32_to_cpu(path[depth].p_ext->ee_block), | 876 | le32_to_cpu(path[depth].p_ext->ee_block), |
855 | ext_pblock(path[depth].p_ext), | 877 | ext_pblock(path[depth].p_ext), |
878 | ext4_ext_is_uninitialized(path[depth].p_ext), | ||
856 | ext4_ext_get_actual_len(path[depth].p_ext), | 879 | ext4_ext_get_actual_len(path[depth].p_ext), |
857 | newblock); | 880 | newblock); |
858 | /*memmove(ex++, path[depth].p_ext++, | 881 | /*memmove(ex++, path[depth].p_ext++, |
@@ -912,7 +935,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
912 | neh = ext_block_hdr(bh); | 935 | neh = ext_block_hdr(bh); |
913 | neh->eh_entries = cpu_to_le16(1); | 936 | neh->eh_entries = cpu_to_le16(1); |
914 | neh->eh_magic = EXT4_EXT_MAGIC; | 937 | neh->eh_magic = EXT4_EXT_MAGIC; |
915 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); | 938 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); |
916 | neh->eh_depth = cpu_to_le16(depth - i); | 939 | neh->eh_depth = cpu_to_le16(depth - i); |
917 | fidx = EXT_FIRST_INDEX(neh); | 940 | fidx = EXT_FIRST_INDEX(neh); |
918 | fidx->ei_block = border; | 941 | fidx->ei_block = border; |
@@ -1037,9 +1060,9 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1037 | /* old root could have indexes or leaves | 1060 | /* old root could have indexes or leaves |
1038 | * so calculate e_max right way */ | 1061 | * so calculate e_max right way */ |
1039 | if (ext_depth(inode)) | 1062 | if (ext_depth(inode)) |
1040 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); | 1063 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); |
1041 | else | 1064 | else |
1042 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); | 1065 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
1043 | neh->eh_magic = EXT4_EXT_MAGIC; | 1066 | neh->eh_magic = EXT4_EXT_MAGIC; |
1044 | set_buffer_uptodate(bh); | 1067 | set_buffer_uptodate(bh); |
1045 | unlock_buffer(bh); | 1068 | unlock_buffer(bh); |
@@ -1054,7 +1077,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1054 | goto out; | 1077 | goto out; |
1055 | 1078 | ||
1056 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; | 1079 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; |
1057 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); | 1080 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); |
1058 | curp->p_hdr->eh_entries = cpu_to_le16(1); | 1081 | curp->p_hdr->eh_entries = cpu_to_le16(1); |
1059 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); | 1082 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); |
1060 | 1083 | ||
@@ -1580,9 +1603,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1580 | 1603 | ||
1581 | /* try to insert block into found extent and return */ | 1604 | /* try to insert block into found extent and return */ |
1582 | if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { | 1605 | if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { |
1583 | ext_debug("append %d block to %d:%d (from %llu)\n", | 1606 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
1607 | ext4_ext_is_uninitialized(newext), | ||
1584 | ext4_ext_get_actual_len(newext), | 1608 | ext4_ext_get_actual_len(newext), |
1585 | le32_to_cpu(ex->ee_block), | 1609 | le32_to_cpu(ex->ee_block), |
1610 | ext4_ext_is_uninitialized(ex), | ||
1586 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 1611 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); |
1587 | err = ext4_ext_get_access(handle, inode, path + depth); | 1612 | err = ext4_ext_get_access(handle, inode, path + depth); |
1588 | if (err) | 1613 | if (err) |
@@ -1651,9 +1676,10 @@ has_space: | |||
1651 | 1676 | ||
1652 | if (!nearex) { | 1677 | if (!nearex) { |
1653 | /* there is no extent in this leaf, create first one */ | 1678 | /* there is no extent in this leaf, create first one */ |
1654 | ext_debug("first extent in the leaf: %d:%llu:%d\n", | 1679 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", |
1655 | le32_to_cpu(newext->ee_block), | 1680 | le32_to_cpu(newext->ee_block), |
1656 | ext_pblock(newext), | 1681 | ext_pblock(newext), |
1682 | ext4_ext_is_uninitialized(newext), | ||
1657 | ext4_ext_get_actual_len(newext)); | 1683 | ext4_ext_get_actual_len(newext)); |
1658 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1684 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); |
1659 | } else if (le32_to_cpu(newext->ee_block) | 1685 | } else if (le32_to_cpu(newext->ee_block) |
@@ -1663,10 +1689,11 @@ has_space: | |||
1663 | len = EXT_MAX_EXTENT(eh) - nearex; | 1689 | len = EXT_MAX_EXTENT(eh) - nearex; |
1664 | len = (len - 1) * sizeof(struct ext4_extent); | 1690 | len = (len - 1) * sizeof(struct ext4_extent); |
1665 | len = len < 0 ? 0 : len; | 1691 | len = len < 0 ? 0 : len; |
1666 | ext_debug("insert %d:%llu:%d after: nearest 0x%p, " | 1692 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " |
1667 | "move %d from 0x%p to 0x%p\n", | 1693 | "move %d from 0x%p to 0x%p\n", |
1668 | le32_to_cpu(newext->ee_block), | 1694 | le32_to_cpu(newext->ee_block), |
1669 | ext_pblock(newext), | 1695 | ext_pblock(newext), |
1696 | ext4_ext_is_uninitialized(newext), | ||
1670 | ext4_ext_get_actual_len(newext), | 1697 | ext4_ext_get_actual_len(newext), |
1671 | nearex, len, nearex + 1, nearex + 2); | 1698 | nearex, len, nearex + 1, nearex + 2); |
1672 | memmove(nearex + 2, nearex + 1, len); | 1699 | memmove(nearex + 2, nearex + 1, len); |
@@ -1676,10 +1703,11 @@ has_space: | |||
1676 | BUG_ON(newext->ee_block == nearex->ee_block); | 1703 | BUG_ON(newext->ee_block == nearex->ee_block); |
1677 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); | 1704 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); |
1678 | len = len < 0 ? 0 : len; | 1705 | len = len < 0 ? 0 : len; |
1679 | ext_debug("insert %d:%llu:%d before: nearest 0x%p, " | 1706 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " |
1680 | "move %d from 0x%p to 0x%p\n", | 1707 | "move %d from 0x%p to 0x%p\n", |
1681 | le32_to_cpu(newext->ee_block), | 1708 | le32_to_cpu(newext->ee_block), |
1682 | ext_pblock(newext), | 1709 | ext_pblock(newext), |
1710 | ext4_ext_is_uninitialized(newext), | ||
1683 | ext4_ext_get_actual_len(newext), | 1711 | ext4_ext_get_actual_len(newext), |
1684 | nearex, len, nearex + 1, nearex + 2); | 1712 | nearex, len, nearex + 1, nearex + 2); |
1685 | memmove(nearex + 1, nearex, len); | 1713 | memmove(nearex + 1, nearex, len); |
@@ -2094,7 +2122,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2094 | else | 2122 | else |
2095 | uninitialized = 0; | 2123 | uninitialized = 0; |
2096 | 2124 | ||
2097 | ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); | 2125 | ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, |
2126 | uninitialized, ex_ee_len); | ||
2098 | path[depth].p_ext = ex; | 2127 | path[depth].p_ext = ex; |
2099 | 2128 | ||
2100 | a = ex_ee_block > start ? ex_ee_block : start; | 2129 | a = ex_ee_block > start ? ex_ee_block : start; |
@@ -2138,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2138 | } | 2167 | } |
2139 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 2168 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
2140 | 2169 | ||
2141 | err = ext4_ext_journal_restart(handle, credits); | 2170 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); |
2142 | if (err) | 2171 | if (err) |
2143 | goto out; | 2172 | goto out; |
2144 | 2173 | ||
@@ -2327,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2327 | if (err == 0) { | 2356 | if (err == 0) { |
2328 | ext_inode_hdr(inode)->eh_depth = 0; | 2357 | ext_inode_hdr(inode)->eh_depth = 0; |
2329 | ext_inode_hdr(inode)->eh_max = | 2358 | ext_inode_hdr(inode)->eh_max = |
2330 | cpu_to_le16(ext4_ext_space_root(inode)); | 2359 | cpu_to_le16(ext4_ext_space_root(inode, 0)); |
2331 | err = ext4_ext_dirty(handle, inode, path); | 2360 | err = ext4_ext_dirty(handle, inode, path); |
2332 | } | 2361 | } |
2333 | } | 2362 | } |
@@ -2743,6 +2772,7 @@ insert: | |||
2743 | } else if (err) | 2772 | } else if (err) |
2744 | goto fix_extent_len; | 2773 | goto fix_extent_len; |
2745 | out: | 2774 | out: |
2775 | ext4_ext_show_leaf(inode, path); | ||
2746 | return err ? err : allocated; | 2776 | return err ? err : allocated; |
2747 | 2777 | ||
2748 | fix_extent_len: | 2778 | fix_extent_len: |
@@ -2786,7 +2816,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2786 | struct ext4_allocation_request ar; | 2816 | struct ext4_allocation_request ar; |
2787 | 2817 | ||
2788 | __clear_bit(BH_New, &bh_result->b_state); | 2818 | __clear_bit(BH_New, &bh_result->b_state); |
2789 | ext_debug("blocks %u/%u requested for inode %u\n", | 2819 | ext_debug("blocks %u/%u requested for inode %lu\n", |
2790 | iblock, max_blocks, inode->i_ino); | 2820 | iblock, max_blocks, inode->i_ino); |
2791 | 2821 | ||
2792 | /* check in cache */ | 2822 | /* check in cache */ |
@@ -2849,7 +2879,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2849 | newblock = iblock - ee_block + ee_start; | 2879 | newblock = iblock - ee_block + ee_start; |
2850 | /* number of remaining blocks in the extent */ | 2880 | /* number of remaining blocks in the extent */ |
2851 | allocated = ee_len - (iblock - ee_block); | 2881 | allocated = ee_len - (iblock - ee_block); |
2852 | ext_debug("%u fit into %lu:%d -> %llu\n", iblock, | 2882 | ext_debug("%u fit into %u:%d -> %llu\n", iblock, |
2853 | ee_block, ee_len, newblock); | 2883 | ee_block, ee_len, newblock); |
2854 | 2884 | ||
2855 | /* Do not put uninitialized extent in the cache */ | 2885 | /* Do not put uninitialized extent in the cache */ |
@@ -2950,7 +2980,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2950 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | 2980 | newblock = ext4_mb_new_blocks(handle, &ar, &err); |
2951 | if (!newblock) | 2981 | if (!newblock) |
2952 | goto out2; | 2982 | goto out2; |
2953 | ext_debug("allocate new block: goal %llu, found %llu/%lu\n", | 2983 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", |
2954 | ar.goal, newblock, allocated); | 2984 | ar.goal, newblock, allocated); |
2955 | 2985 | ||
2956 | /* try to insert new extent into found leaf and return */ | 2986 | /* try to insert new extent into found leaf and return */ |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 83cf6415f599..07475740b512 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -50,7 +50,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
50 | { | 50 | { |
51 | struct inode *inode = dentry->d_inode; | 51 | struct inode *inode = dentry->d_inode; |
52 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 52 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
53 | int ret = 0; | 53 | int err, ret = 0; |
54 | 54 | ||
55 | J_ASSERT(ext4_journal_current_handle() == NULL); | 55 | J_ASSERT(ext4_journal_current_handle() == NULL); |
56 | 56 | ||
@@ -79,6 +79,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
79 | goto out; | 79 | goto out; |
80 | } | 80 | } |
81 | 81 | ||
82 | if (!journal) | ||
83 | ret = sync_mapping_buffers(inode->i_mapping); | ||
84 | |||
82 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 85 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
83 | goto out; | 86 | goto out; |
84 | 87 | ||
@@ -91,10 +94,12 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
91 | .sync_mode = WB_SYNC_ALL, | 94 | .sync_mode = WB_SYNC_ALL, |
92 | .nr_to_write = 0, /* sys_fsync did this */ | 95 | .nr_to_write = 0, /* sys_fsync did this */ |
93 | }; | 96 | }; |
94 | ret = sync_inode(inode, &wbc); | 97 | err = sync_inode(inode, &wbc); |
95 | if (journal && (journal->j_flags & JBD2_BARRIER)) | 98 | if (ret == 0) |
96 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 99 | ret = err; |
97 | } | 100 | } |
98 | out: | 101 | out: |
102 | if (journal && (journal->j_flags & JBD2_BARRIER)) | ||
103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
99 | return ret; | 104 | return ret; |
100 | } | 105 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 29e6dc7299b8..f3624ead4f6c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -1189,7 +1189,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1189 | 1189 | ||
1190 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); | 1190 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); |
1191 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", | 1191 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", |
1192 | i, ext4_free_inodes_count(sb, gdp), x); | 1192 | (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); |
1193 | bitmap_count += x; | 1193 | bitmap_count += x; |
1194 | } | 1194 | } |
1195 | brelse(bitmap_bh); | 1195 | brelse(bitmap_bh); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9c642b22efa..4abd683b963d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | |||
192 | * so before we call here everything must be consistently dirtied against | 192 | * so before we call here everything must be consistently dirtied against |
193 | * this transaction. | 193 | * this transaction. |
194 | */ | 194 | */ |
195 | static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | 195 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, |
196 | int nblocks) | ||
196 | { | 197 | { |
198 | int ret; | ||
199 | |||
200 | /* | ||
201 | * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this | ||
202 | * moment, get_block can be called only for blocks inside i_size since | ||
203 | * page cache has been already dropped and writes are blocked by | ||
204 | * i_mutex. So we can safely drop the i_data_sem here. | ||
205 | */ | ||
197 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 206 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
198 | jbd_debug(2, "restarting handle %p\n", handle); | 207 | jbd_debug(2, "restarting handle %p\n", handle); |
199 | return ext4_journal_restart(handle, blocks_for_truncate(inode)); | 208 | up_write(&EXT4_I(inode)->i_data_sem); |
209 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | ||
210 | down_write(&EXT4_I(inode)->i_data_sem); | ||
211 | |||
212 | return ret; | ||
200 | } | 213 | } |
201 | 214 | ||
202 | /* | 215 | /* |
@@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
341 | int n = 0; | 354 | int n = 0; |
342 | int final = 0; | 355 | int final = 0; |
343 | 356 | ||
344 | if (i_block < 0) { | 357 | if (i_block < direct_blocks) { |
345 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); | ||
346 | } else if (i_block < direct_blocks) { | ||
347 | offsets[n++] = i_block; | 358 | offsets[n++] = i_block; |
348 | final = direct_blocks; | 359 | final = direct_blocks; |
349 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | 360 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
@@ -551,15 +562,21 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
551 | * | 562 | * |
552 | * Normally this function find the preferred place for block allocation, | 563 | * Normally this function find the preferred place for block allocation, |
553 | * returns it. | 564 | * returns it. |
565 | * Because this is only used for non-extent files, we limit the block nr | ||
566 | * to 32 bits. | ||
554 | */ | 567 | */ |
555 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 568 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
556 | Indirect *partial) | 569 | Indirect *partial) |
557 | { | 570 | { |
571 | ext4_fsblk_t goal; | ||
572 | |||
558 | /* | 573 | /* |
559 | * XXX need to get goal block from mballoc's data structures | 574 | * XXX need to get goal block from mballoc's data structures |
560 | */ | 575 | */ |
561 | 576 | ||
562 | return ext4_find_near(inode, partial); | 577 | goal = ext4_find_near(inode, partial); |
578 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
579 | return goal; | ||
563 | } | 580 | } |
564 | 581 | ||
565 | /** | 582 | /** |
@@ -640,6 +657,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
640 | if (*err) | 657 | if (*err) |
641 | goto failed_out; | 658 | goto failed_out; |
642 | 659 | ||
660 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | ||
661 | |||
643 | target -= count; | 662 | target -= count; |
644 | /* allocate blocks for indirect blocks */ | 663 | /* allocate blocks for indirect blocks */ |
645 | while (index < indirect_blks && count) { | 664 | while (index < indirect_blks && count) { |
@@ -674,6 +693,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
674 | ar.flags = EXT4_MB_HINT_DATA; | 693 | ar.flags = EXT4_MB_HINT_DATA; |
675 | 694 | ||
676 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 695 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
696 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | ||
677 | 697 | ||
678 | if (*err && (target == blks)) { | 698 | if (*err && (target == blks)) { |
679 | /* | 699 | /* |
@@ -762,8 +782,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
762 | BUFFER_TRACE(bh, "call get_create_access"); | 782 | BUFFER_TRACE(bh, "call get_create_access"); |
763 | err = ext4_journal_get_create_access(handle, bh); | 783 | err = ext4_journal_get_create_access(handle, bh); |
764 | if (err) { | 784 | if (err) { |
785 | /* Don't brelse(bh) here; it's done in | ||
786 | * ext4_journal_forget() below */ | ||
765 | unlock_buffer(bh); | 787 | unlock_buffer(bh); |
766 | brelse(bh); | ||
767 | goto failed; | 788 | goto failed; |
768 | } | 789 | } |
769 | 790 | ||
@@ -1109,16 +1130,15 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1109 | ext4_discard_preallocations(inode); | 1130 | ext4_discard_preallocations(inode); |
1110 | } | 1131 | } |
1111 | 1132 | ||
1112 | static int check_block_validity(struct inode *inode, sector_t logical, | 1133 | static int check_block_validity(struct inode *inode, const char *msg, |
1113 | sector_t phys, int len) | 1134 | sector_t logical, sector_t phys, int len) |
1114 | { | 1135 | { |
1115 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1136 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { |
1116 | ext4_error(inode->i_sb, "check_block_validity", | 1137 | ext4_error(inode->i_sb, msg, |
1117 | "inode #%lu logical block %llu mapped to %llu " | 1138 | "inode #%lu logical block %llu mapped to %llu " |
1118 | "(size %d)", inode->i_ino, | 1139 | "(size %d)", inode->i_ino, |
1119 | (unsigned long long) logical, | 1140 | (unsigned long long) logical, |
1120 | (unsigned long long) phys, len); | 1141 | (unsigned long long) phys, len); |
1121 | WARN_ON(1); | ||
1122 | return -EIO; | 1142 | return -EIO; |
1123 | } | 1143 | } |
1124 | return 0; | 1144 | return 0; |
@@ -1170,8 +1190,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1170 | up_read((&EXT4_I(inode)->i_data_sem)); | 1190 | up_read((&EXT4_I(inode)->i_data_sem)); |
1171 | 1191 | ||
1172 | if (retval > 0 && buffer_mapped(bh)) { | 1192 | if (retval > 0 && buffer_mapped(bh)) { |
1173 | int ret = check_block_validity(inode, block, | 1193 | int ret = check_block_validity(inode, "file system corruption", |
1174 | bh->b_blocknr, retval); | 1194 | block, bh->b_blocknr, retval); |
1175 | if (ret != 0) | 1195 | if (ret != 0) |
1176 | return ret; | 1196 | return ret; |
1177 | } | 1197 | } |
@@ -1235,8 +1255,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1235 | * i_data's format changing. Force the migrate | 1255 | * i_data's format changing. Force the migrate |
1236 | * to fail by clearing migrate flags | 1256 | * to fail by clearing migrate flags |
1237 | */ | 1257 | */ |
1238 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 1258 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
1239 | ~EXT4_EXT_MIGRATE; | ||
1240 | } | 1259 | } |
1241 | } | 1260 | } |
1242 | 1261 | ||
@@ -1252,8 +1271,9 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1252 | 1271 | ||
1253 | up_write((&EXT4_I(inode)->i_data_sem)); | 1272 | up_write((&EXT4_I(inode)->i_data_sem)); |
1254 | if (retval > 0 && buffer_mapped(bh)) { | 1273 | if (retval > 0 && buffer_mapped(bh)) { |
1255 | int ret = check_block_validity(inode, block, | 1274 | int ret = check_block_validity(inode, "file system " |
1256 | bh->b_blocknr, retval); | 1275 | "corruption after allocation", |
1276 | block, bh->b_blocknr, retval); | ||
1257 | if (ret != 0) | 1277 | if (ret != 0) |
1258 | return ret; | 1278 | return ret; |
1259 | } | 1279 | } |
@@ -1863,18 +1883,6 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1863 | * Delayed allocation stuff | 1883 | * Delayed allocation stuff |
1864 | */ | 1884 | */ |
1865 | 1885 | ||
1866 | struct mpage_da_data { | ||
1867 | struct inode *inode; | ||
1868 | sector_t b_blocknr; /* start block number of extent */ | ||
1869 | size_t b_size; /* size of extent */ | ||
1870 | unsigned long b_state; /* state of the extent */ | ||
1871 | unsigned long first_page, next_page; /* extent of pages */ | ||
1872 | struct writeback_control *wbc; | ||
1873 | int io_done; | ||
1874 | int pages_written; | ||
1875 | int retval; | ||
1876 | }; | ||
1877 | |||
1878 | /* | 1886 | /* |
1879 | * mpage_da_submit_io - walks through extent of pages and try to write | 1887 | * mpage_da_submit_io - walks through extent of pages and try to write |
1880 | * them with writepage() call back | 1888 | * them with writepage() call back |
@@ -2737,6 +2745,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2737 | long pages_skipped; | 2745 | long pages_skipped; |
2738 | int range_cyclic, cycled = 1, io_done = 0; | 2746 | int range_cyclic, cycled = 1, io_done = 0; |
2739 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2747 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2748 | loff_t range_start = wbc->range_start; | ||
2740 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2749 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2741 | 2750 | ||
2742 | trace_ext4_da_writepages(inode, wbc); | 2751 | trace_ext4_da_writepages(inode, wbc); |
@@ -2850,6 +2859,7 @@ retry: | |||
2850 | mpd.io_done = 1; | 2859 | mpd.io_done = 1; |
2851 | ret = MPAGE_DA_EXTENT_TAIL; | 2860 | ret = MPAGE_DA_EXTENT_TAIL; |
2852 | } | 2861 | } |
2862 | trace_ext4_da_write_pages(inode, &mpd); | ||
2853 | wbc->nr_to_write -= mpd.pages_written; | 2863 | wbc->nr_to_write -= mpd.pages_written; |
2854 | 2864 | ||
2855 | ext4_journal_stop(handle); | 2865 | ext4_journal_stop(handle); |
@@ -2905,6 +2915,7 @@ out_writepages: | |||
2905 | if (!no_nrwrite_index_update) | 2915 | if (!no_nrwrite_index_update) |
2906 | wbc->no_nrwrite_index_update = 0; | 2916 | wbc->no_nrwrite_index_update = 0; |
2907 | wbc->nr_to_write -= nr_to_writebump; | 2917 | wbc->nr_to_write -= nr_to_writebump; |
2918 | wbc->range_start = range_start; | ||
2908 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 2919 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
2909 | return ret; | 2920 | return ret; |
2910 | } | 2921 | } |
@@ -3117,6 +3128,8 @@ out: | |||
3117 | */ | 3128 | */ |
3118 | int ext4_alloc_da_blocks(struct inode *inode) | 3129 | int ext4_alloc_da_blocks(struct inode *inode) |
3119 | { | 3130 | { |
3131 | trace_ext4_alloc_da_blocks(inode); | ||
3132 | |||
3120 | if (!EXT4_I(inode)->i_reserved_data_blocks && | 3133 | if (!EXT4_I(inode)->i_reserved_data_blocks && |
3121 | !EXT4_I(inode)->i_reserved_meta_blocks) | 3134 | !EXT4_I(inode)->i_reserved_meta_blocks) |
3122 | return 0; | 3135 | return 0; |
@@ -3659,7 +3672,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
3659 | ext4_handle_dirty_metadata(handle, inode, bh); | 3672 | ext4_handle_dirty_metadata(handle, inode, bh); |
3660 | } | 3673 | } |
3661 | ext4_mark_inode_dirty(handle, inode); | 3674 | ext4_mark_inode_dirty(handle, inode); |
3662 | ext4_journal_test_restart(handle, inode); | 3675 | ext4_truncate_restart_trans(handle, inode, |
3676 | blocks_for_truncate(inode)); | ||
3663 | if (bh) { | 3677 | if (bh) { |
3664 | BUFFER_TRACE(bh, "retaking write access"); | 3678 | BUFFER_TRACE(bh, "retaking write access"); |
3665 | ext4_journal_get_write_access(handle, bh); | 3679 | ext4_journal_get_write_access(handle, bh); |
@@ -3870,7 +3884,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
3870 | return; | 3884 | return; |
3871 | if (try_to_extend_transaction(handle, inode)) { | 3885 | if (try_to_extend_transaction(handle, inode)) { |
3872 | ext4_mark_inode_dirty(handle, inode); | 3886 | ext4_mark_inode_dirty(handle, inode); |
3873 | ext4_journal_test_restart(handle, inode); | 3887 | ext4_truncate_restart_trans(handle, inode, |
3888 | blocks_for_truncate(inode)); | ||
3874 | } | 3889 | } |
3875 | 3890 | ||
3876 | ext4_free_blocks(handle, inode, nr, 1, 1); | 3891 | ext4_free_blocks(handle, inode, nr, 1, 1); |
@@ -3958,8 +3973,7 @@ void ext4_truncate(struct inode *inode) | |||
3958 | if (!ext4_can_truncate(inode)) | 3973 | if (!ext4_can_truncate(inode)) |
3959 | return; | 3974 | return; |
3960 | 3975 | ||
3961 | if (ei->i_disksize && inode->i_size == 0 && | 3976 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
3962 | !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
3963 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 3977 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; |
3964 | 3978 | ||
3965 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3979 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
@@ -4533,7 +4547,8 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4533 | */ | 4547 | */ |
4534 | static int ext4_do_update_inode(handle_t *handle, | 4548 | static int ext4_do_update_inode(handle_t *handle, |
4535 | struct inode *inode, | 4549 | struct inode *inode, |
4536 | struct ext4_iloc *iloc) | 4550 | struct ext4_iloc *iloc, |
4551 | int do_sync) | ||
4537 | { | 4552 | { |
4538 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); | 4553 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); |
4539 | struct ext4_inode_info *ei = EXT4_I(inode); | 4554 | struct ext4_inode_info *ei = EXT4_I(inode); |
@@ -4581,8 +4596,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4581 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 4596 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
4582 | goto out_brelse; | 4597 | goto out_brelse; |
4583 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 4598 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
4584 | /* clear the migrate flag in the raw_inode */ | 4599 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); |
4585 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); | ||
4586 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4600 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
4587 | cpu_to_le32(EXT4_OS_HURD)) | 4601 | cpu_to_le32(EXT4_OS_HURD)) |
4588 | raw_inode->i_file_acl_high = | 4602 | raw_inode->i_file_acl_high = |
@@ -4635,10 +4649,22 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4635 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 4649 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
4636 | } | 4650 | } |
4637 | 4651 | ||
4638 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4652 | /* |
4639 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 4653 | * If we're not using a journal and we were called from |
4640 | if (!err) | 4654 | * ext4_write_inode() to sync the inode (making do_sync true), |
4641 | err = rc; | 4655 | * we can just use sync_dirty_buffer() directly to do our dirty |
4656 | * work. Testing s_journal here is a bit redundant but it's | ||
4657 | * worth it to avoid potential future trouble. | ||
4658 | */ | ||
4659 | if (EXT4_SB(inode->i_sb)->s_journal == NULL && do_sync) { | ||
4660 | BUFFER_TRACE(bh, "call sync_dirty_buffer"); | ||
4661 | sync_dirty_buffer(bh); | ||
4662 | } else { | ||
4663 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
4664 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | ||
4665 | if (!err) | ||
4666 | err = rc; | ||
4667 | } | ||
4642 | ei->i_state &= ~EXT4_STATE_NEW; | 4668 | ei->i_state &= ~EXT4_STATE_NEW; |
4643 | 4669 | ||
4644 | out_brelse: | 4670 | out_brelse: |
@@ -4684,19 +4710,32 @@ out_brelse: | |||
4684 | */ | 4710 | */ |
4685 | int ext4_write_inode(struct inode *inode, int wait) | 4711 | int ext4_write_inode(struct inode *inode, int wait) |
4686 | { | 4712 | { |
4713 | int err; | ||
4714 | |||
4687 | if (current->flags & PF_MEMALLOC) | 4715 | if (current->flags & PF_MEMALLOC) |
4688 | return 0; | 4716 | return 0; |
4689 | 4717 | ||
4690 | if (ext4_journal_current_handle()) { | 4718 | if (EXT4_SB(inode->i_sb)->s_journal) { |
4691 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); | 4719 | if (ext4_journal_current_handle()) { |
4692 | dump_stack(); | 4720 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); |
4693 | return -EIO; | 4721 | dump_stack(); |
4694 | } | 4722 | return -EIO; |
4723 | } | ||
4695 | 4724 | ||
4696 | if (!wait) | 4725 | if (!wait) |
4697 | return 0; | 4726 | return 0; |
4727 | |||
4728 | err = ext4_force_commit(inode->i_sb); | ||
4729 | } else { | ||
4730 | struct ext4_iloc iloc; | ||
4698 | 4731 | ||
4699 | return ext4_force_commit(inode->i_sb); | 4732 | err = ext4_get_inode_loc(inode, &iloc); |
4733 | if (err) | ||
4734 | return err; | ||
4735 | err = ext4_do_update_inode(EXT4_NOJOURNAL_HANDLE, | ||
4736 | inode, &iloc, wait); | ||
4737 | } | ||
4738 | return err; | ||
4700 | } | 4739 | } |
4701 | 4740 | ||
4702 | /* | 4741 | /* |
@@ -4990,7 +5029,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
4990 | get_bh(iloc->bh); | 5029 | get_bh(iloc->bh); |
4991 | 5030 | ||
4992 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ | 5031 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ |
4993 | err = ext4_do_update_inode(handle, inode, iloc); | 5032 | err = ext4_do_update_inode(handle, inode, iloc, 0); |
4994 | put_bh(iloc->bh); | 5033 | put_bh(iloc->bh); |
4995 | return err; | 5034 | return err; |
4996 | } | 5035 | } |
@@ -5281,12 +5320,21 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5281 | else | 5320 | else |
5282 | len = PAGE_CACHE_SIZE; | 5321 | len = PAGE_CACHE_SIZE; |
5283 | 5322 | ||
5323 | lock_page(page); | ||
5324 | /* | ||
5325 | * return if we have all the buffers mapped. This avoid | ||
5326 | * the need to call write_begin/write_end which does a | ||
5327 | * journal_start/journal_stop which can block and take | ||
5328 | * long time | ||
5329 | */ | ||
5284 | if (page_has_buffers(page)) { | 5330 | if (page_has_buffers(page)) { |
5285 | /* return if we have all the buffers mapped */ | ||
5286 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5331 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5287 | ext4_bh_unmapped)) | 5332 | ext4_bh_unmapped)) { |
5333 | unlock_page(page); | ||
5288 | goto out_unlock; | 5334 | goto out_unlock; |
5335 | } | ||
5289 | } | 5336 | } |
5337 | unlock_page(page); | ||
5290 | /* | 5338 | /* |
5291 | * OK, we need to fill the hole... Do write_begin write_end | 5339 | * OK, we need to fill the hole... Do write_begin write_end |
5292 | * to do block allocation/reservation.We are not holding | 5340 | * to do block allocation/reservation.We are not holding |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7050a9cd04a4..c1cdf613e725 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -243,10 +243,9 @@ setversion_out: | |||
243 | me.donor_start, me.len, &me.moved_len); | 243 | me.donor_start, me.len, &me.moved_len); |
244 | fput(donor_filp); | 244 | fput(donor_filp); |
245 | 245 | ||
246 | if (!err) | 246 | if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) |
247 | if (copy_to_user((struct move_extent *)arg, | 247 | return -EFAULT; |
248 | &me, sizeof(me))) | 248 | |
249 | return -EFAULT; | ||
250 | return err; | 249 | return err; |
251 | } | 250 | } |
252 | 251 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cd258463e2a9..e9c61896d605 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include "mballoc.h" | 24 | #include "mballoc.h" |
25 | #include <linux/debugfs.h> | ||
25 | #include <trace/events/ext4.h> | 26 | #include <trace/events/ext4.h> |
26 | 27 | ||
27 | /* | 28 | /* |
@@ -622,13 +623,13 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
622 | 623 | ||
623 | /* FIXME!! need more doc */ | 624 | /* FIXME!! need more doc */ |
624 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 625 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
625 | void *buddy, unsigned first, int len, | 626 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
626 | struct ext4_group_info *grp) | 627 | struct ext4_group_info *grp) |
627 | { | 628 | { |
628 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 629 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
629 | unsigned short min; | 630 | ext4_grpblk_t min; |
630 | unsigned short max; | 631 | ext4_grpblk_t max; |
631 | unsigned short chunk; | 632 | ext4_grpblk_t chunk; |
632 | unsigned short border; | 633 | unsigned short border; |
633 | 634 | ||
634 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); | 635 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); |
@@ -662,10 +663,10 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
662 | void *buddy, void *bitmap, ext4_group_t group) | 663 | void *buddy, void *bitmap, ext4_group_t group) |
663 | { | 664 | { |
664 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 665 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
665 | unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); | 666 | ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); |
666 | unsigned short i = 0; | 667 | ext4_grpblk_t i = 0; |
667 | unsigned short first; | 668 | ext4_grpblk_t first; |
668 | unsigned short len; | 669 | ext4_grpblk_t len; |
669 | unsigned free = 0; | 670 | unsigned free = 0; |
670 | unsigned fragments = 0; | 671 | unsigned fragments = 0; |
671 | unsigned long long period = get_cycles(); | 672 | unsigned long long period = get_cycles(); |
@@ -743,7 +744,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
743 | char *data; | 744 | char *data; |
744 | char *bitmap; | 745 | char *bitmap; |
745 | 746 | ||
746 | mb_debug("init page %lu\n", page->index); | 747 | mb_debug(1, "init page %lu\n", page->index); |
747 | 748 | ||
748 | inode = page->mapping->host; | 749 | inode = page->mapping->host; |
749 | sb = inode->i_sb; | 750 | sb = inode->i_sb; |
@@ -822,7 +823,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
822 | set_bitmap_uptodate(bh[i]); | 823 | set_bitmap_uptodate(bh[i]); |
823 | bh[i]->b_end_io = end_buffer_read_sync; | 824 | bh[i]->b_end_io = end_buffer_read_sync; |
824 | submit_bh(READ, bh[i]); | 825 | submit_bh(READ, bh[i]); |
825 | mb_debug("read bitmap for group %u\n", first_group + i); | 826 | mb_debug(1, "read bitmap for group %u\n", first_group + i); |
826 | } | 827 | } |
827 | 828 | ||
828 | /* wait for I/O completion */ | 829 | /* wait for I/O completion */ |
@@ -862,12 +863,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
862 | if ((first_block + i) & 1) { | 863 | if ((first_block + i) & 1) { |
863 | /* this is block of buddy */ | 864 | /* this is block of buddy */ |
864 | BUG_ON(incore == NULL); | 865 | BUG_ON(incore == NULL); |
865 | mb_debug("put buddy for group %u in page %lu/%x\n", | 866 | mb_debug(1, "put buddy for group %u in page %lu/%x\n", |
866 | group, page->index, i * blocksize); | 867 | group, page->index, i * blocksize); |
867 | grinfo = ext4_get_group_info(sb, group); | 868 | grinfo = ext4_get_group_info(sb, group); |
868 | grinfo->bb_fragments = 0; | 869 | grinfo->bb_fragments = 0; |
869 | memset(grinfo->bb_counters, 0, | 870 | memset(grinfo->bb_counters, 0, |
870 | sizeof(unsigned short)*(sb->s_blocksize_bits+2)); | 871 | sizeof(*grinfo->bb_counters) * |
872 | (sb->s_blocksize_bits+2)); | ||
871 | /* | 873 | /* |
872 | * incore got set to the group block bitmap below | 874 | * incore got set to the group block bitmap below |
873 | */ | 875 | */ |
@@ -878,7 +880,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
878 | } else { | 880 | } else { |
879 | /* this is block of bitmap */ | 881 | /* this is block of bitmap */ |
880 | BUG_ON(incore != NULL); | 882 | BUG_ON(incore != NULL); |
881 | mb_debug("put bitmap for group %u in page %lu/%x\n", | 883 | mb_debug(1, "put bitmap for group %u in page %lu/%x\n", |
882 | group, page->index, i * blocksize); | 884 | group, page->index, i * blocksize); |
883 | 885 | ||
884 | /* see comments in ext4_mb_put_pa() */ | 886 | /* see comments in ext4_mb_put_pa() */ |
@@ -908,6 +910,100 @@ out: | |||
908 | return err; | 910 | return err; |
909 | } | 911 | } |
910 | 912 | ||
913 | static noinline_for_stack | ||
914 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
915 | { | ||
916 | |||
917 | int ret = 0; | ||
918 | void *bitmap; | ||
919 | int blocks_per_page; | ||
920 | int block, pnum, poff; | ||
921 | int num_grp_locked = 0; | ||
922 | struct ext4_group_info *this_grp; | ||
923 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
924 | struct inode *inode = sbi->s_buddy_cache; | ||
925 | struct page *page = NULL, *bitmap_page = NULL; | ||
926 | |||
927 | mb_debug(1, "init group %u\n", group); | ||
928 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
929 | this_grp = ext4_get_group_info(sb, group); | ||
930 | /* | ||
931 | * This ensures that we don't reinit the buddy cache | ||
932 | * page which map to the group from which we are already | ||
933 | * allocating. If we are looking at the buddy cache we would | ||
934 | * have taken a reference using ext4_mb_load_buddy and that | ||
935 | * would have taken the alloc_sem lock. | ||
936 | */ | ||
937 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
938 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
939 | /* | ||
940 | * somebody initialized the group | ||
941 | * return without doing anything | ||
942 | */ | ||
943 | ret = 0; | ||
944 | goto err; | ||
945 | } | ||
946 | /* | ||
947 | * the buddy cache inode stores the block bitmap | ||
948 | * and buddy information in consecutive blocks. | ||
949 | * So for each group we need two blocks. | ||
950 | */ | ||
951 | block = group * 2; | ||
952 | pnum = block / blocks_per_page; | ||
953 | poff = block % blocks_per_page; | ||
954 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
955 | if (page) { | ||
956 | BUG_ON(page->mapping != inode->i_mapping); | ||
957 | ret = ext4_mb_init_cache(page, NULL); | ||
958 | if (ret) { | ||
959 | unlock_page(page); | ||
960 | goto err; | ||
961 | } | ||
962 | unlock_page(page); | ||
963 | } | ||
964 | if (page == NULL || !PageUptodate(page)) { | ||
965 | ret = -EIO; | ||
966 | goto err; | ||
967 | } | ||
968 | mark_page_accessed(page); | ||
969 | bitmap_page = page; | ||
970 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
971 | |||
972 | /* init buddy cache */ | ||
973 | block++; | ||
974 | pnum = block / blocks_per_page; | ||
975 | poff = block % blocks_per_page; | ||
976 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
977 | if (page == bitmap_page) { | ||
978 | /* | ||
979 | * If both the bitmap and buddy are in | ||
980 | * the same page we don't need to force | ||
981 | * init the buddy | ||
982 | */ | ||
983 | unlock_page(page); | ||
984 | } else if (page) { | ||
985 | BUG_ON(page->mapping != inode->i_mapping); | ||
986 | ret = ext4_mb_init_cache(page, bitmap); | ||
987 | if (ret) { | ||
988 | unlock_page(page); | ||
989 | goto err; | ||
990 | } | ||
991 | unlock_page(page); | ||
992 | } | ||
993 | if (page == NULL || !PageUptodate(page)) { | ||
994 | ret = -EIO; | ||
995 | goto err; | ||
996 | } | ||
997 | mark_page_accessed(page); | ||
998 | err: | ||
999 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
1000 | if (bitmap_page) | ||
1001 | page_cache_release(bitmap_page); | ||
1002 | if (page) | ||
1003 | page_cache_release(page); | ||
1004 | return ret; | ||
1005 | } | ||
1006 | |||
911 | static noinline_for_stack int | 1007 | static noinline_for_stack int |
912 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 1008 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
913 | struct ext4_buddy *e4b) | 1009 | struct ext4_buddy *e4b) |
@@ -922,7 +1018,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
922 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1018 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
923 | struct inode *inode = sbi->s_buddy_cache; | 1019 | struct inode *inode = sbi->s_buddy_cache; |
924 | 1020 | ||
925 | mb_debug("load group %u\n", group); | 1021 | mb_debug(1, "load group %u\n", group); |
926 | 1022 | ||
927 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 1023 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
928 | grp = ext4_get_group_info(sb, group); | 1024 | grp = ext4_get_group_info(sb, group); |
@@ -941,8 +1037,26 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
941 | * groups mapped by the page is blocked | 1037 | * groups mapped by the page is blocked |
942 | * till we are done with allocation | 1038 | * till we are done with allocation |
943 | */ | 1039 | */ |
1040 | repeat_load_buddy: | ||
944 | down_read(e4b->alloc_semp); | 1041 | down_read(e4b->alloc_semp); |
945 | 1042 | ||
1043 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
1044 | /* we need to check for group need init flag | ||
1045 | * with alloc_semp held so that we can be sure | ||
1046 | * that new blocks didn't get added to the group | ||
1047 | * when we are loading the buddy cache | ||
1048 | */ | ||
1049 | up_read(e4b->alloc_semp); | ||
1050 | /* | ||
1051 | * we need full data about the group | ||
1052 | * to make a good selection | ||
1053 | */ | ||
1054 | ret = ext4_mb_init_group(sb, group); | ||
1055 | if (ret) | ||
1056 | return ret; | ||
1057 | goto repeat_load_buddy; | ||
1058 | } | ||
1059 | |||
946 | /* | 1060 | /* |
947 | * the buddy cache inode stores the block bitmap | 1061 | * the buddy cache inode stores the block bitmap |
948 | * and buddy information in consecutive blocks. | 1062 | * and buddy information in consecutive blocks. |
@@ -1360,7 +1474,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
1360 | ac->alloc_semp = e4b->alloc_semp; | 1474 | ac->alloc_semp = e4b->alloc_semp; |
1361 | e4b->alloc_semp = NULL; | 1475 | e4b->alloc_semp = NULL; |
1362 | /* store last allocated for subsequent stream allocation */ | 1476 | /* store last allocated for subsequent stream allocation */ |
1363 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { | 1477 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1364 | spin_lock(&sbi->s_md_lock); | 1478 | spin_lock(&sbi->s_md_lock); |
1365 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; | 1479 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; |
1366 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; | 1480 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; |
@@ -1837,97 +1951,6 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | |||
1837 | 1951 | ||
1838 | } | 1952 | } |
1839 | 1953 | ||
1840 | static noinline_for_stack | ||
1841 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
1842 | { | ||
1843 | |||
1844 | int ret; | ||
1845 | void *bitmap; | ||
1846 | int blocks_per_page; | ||
1847 | int block, pnum, poff; | ||
1848 | int num_grp_locked = 0; | ||
1849 | struct ext4_group_info *this_grp; | ||
1850 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1851 | struct inode *inode = sbi->s_buddy_cache; | ||
1852 | struct page *page = NULL, *bitmap_page = NULL; | ||
1853 | |||
1854 | mb_debug("init group %lu\n", group); | ||
1855 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1856 | this_grp = ext4_get_group_info(sb, group); | ||
1857 | /* | ||
1858 | * This ensures we don't add group | ||
1859 | * to this buddy cache via resize | ||
1860 | */ | ||
1861 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
1862 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
1863 | /* | ||
1864 | * somebody initialized the group | ||
1865 | * return without doing anything | ||
1866 | */ | ||
1867 | ret = 0; | ||
1868 | goto err; | ||
1869 | } | ||
1870 | /* | ||
1871 | * the buddy cache inode stores the block bitmap | ||
1872 | * and buddy information in consecutive blocks. | ||
1873 | * So for each group we need two blocks. | ||
1874 | */ | ||
1875 | block = group * 2; | ||
1876 | pnum = block / blocks_per_page; | ||
1877 | poff = block % blocks_per_page; | ||
1878 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1879 | if (page) { | ||
1880 | BUG_ON(page->mapping != inode->i_mapping); | ||
1881 | ret = ext4_mb_init_cache(page, NULL); | ||
1882 | if (ret) { | ||
1883 | unlock_page(page); | ||
1884 | goto err; | ||
1885 | } | ||
1886 | unlock_page(page); | ||
1887 | } | ||
1888 | if (page == NULL || !PageUptodate(page)) { | ||
1889 | ret = -EIO; | ||
1890 | goto err; | ||
1891 | } | ||
1892 | mark_page_accessed(page); | ||
1893 | bitmap_page = page; | ||
1894 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1895 | |||
1896 | /* init buddy cache */ | ||
1897 | block++; | ||
1898 | pnum = block / blocks_per_page; | ||
1899 | poff = block % blocks_per_page; | ||
1900 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1901 | if (page == bitmap_page) { | ||
1902 | /* | ||
1903 | * If both the bitmap and buddy are in | ||
1904 | * the same page we don't need to force | ||
1905 | * init the buddy | ||
1906 | */ | ||
1907 | unlock_page(page); | ||
1908 | } else if (page) { | ||
1909 | BUG_ON(page->mapping != inode->i_mapping); | ||
1910 | ret = ext4_mb_init_cache(page, bitmap); | ||
1911 | if (ret) { | ||
1912 | unlock_page(page); | ||
1913 | goto err; | ||
1914 | } | ||
1915 | unlock_page(page); | ||
1916 | } | ||
1917 | if (page == NULL || !PageUptodate(page)) { | ||
1918 | ret = -EIO; | ||
1919 | goto err; | ||
1920 | } | ||
1921 | mark_page_accessed(page); | ||
1922 | err: | ||
1923 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
1924 | if (bitmap_page) | ||
1925 | page_cache_release(bitmap_page); | ||
1926 | if (page) | ||
1927 | page_cache_release(page); | ||
1928 | return ret; | ||
1929 | } | ||
1930 | |||
1931 | static noinline_for_stack int | 1954 | static noinline_for_stack int |
1932 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1955 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1933 | { | 1956 | { |
@@ -1938,11 +1961,14 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1938 | struct ext4_sb_info *sbi; | 1961 | struct ext4_sb_info *sbi; |
1939 | struct super_block *sb; | 1962 | struct super_block *sb; |
1940 | struct ext4_buddy e4b; | 1963 | struct ext4_buddy e4b; |
1941 | loff_t size, isize; | ||
1942 | 1964 | ||
1943 | sb = ac->ac_sb; | 1965 | sb = ac->ac_sb; |
1944 | sbi = EXT4_SB(sb); | 1966 | sbi = EXT4_SB(sb); |
1945 | ngroups = ext4_get_groups_count(sb); | 1967 | ngroups = ext4_get_groups_count(sb); |
1968 | /* non-extent files are limited to low blocks/groups */ | ||
1969 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) | ||
1970 | ngroups = sbi->s_blockfile_groups; | ||
1971 | |||
1946 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 1972 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
1947 | 1973 | ||
1948 | /* first, try the goal */ | 1974 | /* first, try the goal */ |
@@ -1974,20 +2000,16 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1974 | } | 2000 | } |
1975 | 2001 | ||
1976 | bsbits = ac->ac_sb->s_blocksize_bits; | 2002 | bsbits = ac->ac_sb->s_blocksize_bits; |
1977 | /* if stream allocation is enabled, use global goal */ | ||
1978 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | ||
1979 | isize = i_size_read(ac->ac_inode) >> bsbits; | ||
1980 | if (size < isize) | ||
1981 | size = isize; | ||
1982 | 2003 | ||
1983 | if (size < sbi->s_mb_stream_request && | 2004 | /* if stream allocation is enabled, use global goal */ |
1984 | (ac->ac_flags & EXT4_MB_HINT_DATA)) { | 2005 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1985 | /* TBD: may be hot point */ | 2006 | /* TBD: may be hot point */ |
1986 | spin_lock(&sbi->s_md_lock); | 2007 | spin_lock(&sbi->s_md_lock); |
1987 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; | 2008 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; |
1988 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; | 2009 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; |
1989 | spin_unlock(&sbi->s_md_lock); | 2010 | spin_unlock(&sbi->s_md_lock); |
1990 | } | 2011 | } |
2012 | |||
1991 | /* Let's just scan groups to find more-less suitable blocks */ | 2013 | /* Let's just scan groups to find more-less suitable blocks */ |
1992 | cr = ac->ac_2order ? 0 : 1; | 2014 | cr = ac->ac_2order ? 0 : 1; |
1993 | /* | 2015 | /* |
@@ -2015,27 +2037,6 @@ repeat: | |||
2015 | if (grp->bb_free == 0) | 2037 | if (grp->bb_free == 0) |
2016 | continue; | 2038 | continue; |
2017 | 2039 | ||
2018 | /* | ||
2019 | * if the group is already init we check whether it is | ||
2020 | * a good group and if not we don't load the buddy | ||
2021 | */ | ||
2022 | if (EXT4_MB_GRP_NEED_INIT(grp)) { | ||
2023 | /* | ||
2024 | * we need full data about the group | ||
2025 | * to make a good selection | ||
2026 | */ | ||
2027 | err = ext4_mb_init_group(sb, group); | ||
2028 | if (err) | ||
2029 | goto out; | ||
2030 | } | ||
2031 | |||
2032 | /* | ||
2033 | * If the particular group doesn't satisfy our | ||
2034 | * criteria we continue with the next group | ||
2035 | */ | ||
2036 | if (!ext4_mb_good_group(ac, group, cr)) | ||
2037 | continue; | ||
2038 | |||
2039 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2040 | err = ext4_mb_load_buddy(sb, group, &e4b); |
2040 | if (err) | 2041 | if (err) |
2041 | goto out; | 2042 | goto out; |
@@ -2156,7 +2157,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
2156 | 2157 | ||
2157 | if (v == SEQ_START_TOKEN) { | 2158 | if (v == SEQ_START_TOKEN) { |
2158 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " | 2159 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " |
2159 | "%-5s %-2s %-5s %-5s %-5s %-6s\n", | 2160 | "%-5s %-2s %-6s %-5s %-5s %-6s\n", |
2160 | "pid", "inode", "original", "goal", "result", "found", | 2161 | "pid", "inode", "original", "goal", "result", "found", |
2161 | "grps", "cr", "flags", "merge", "tail", "broken"); | 2162 | "grps", "cr", "flags", "merge", "tail", "broken"); |
2162 | return 0; | 2163 | return 0; |
@@ -2164,7 +2165,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
2164 | 2165 | ||
2165 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { | 2166 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { |
2166 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " | 2167 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " |
2167 | "%-5u %-5s %-5u %-6u\n"; | 2168 | "0x%04x %-5s %-5u %-6u\n"; |
2168 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, | 2169 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, |
2169 | hs->result.fe_start, hs->result.fe_len, | 2170 | hs->result.fe_start, hs->result.fe_len, |
2170 | hs->result.fe_logical); | 2171 | hs->result.fe_logical); |
@@ -2205,7 +2206,7 @@ static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v) | |||
2205 | { | 2206 | { |
2206 | } | 2207 | } |
2207 | 2208 | ||
2208 | static struct seq_operations ext4_mb_seq_history_ops = { | 2209 | static const struct seq_operations ext4_mb_seq_history_ops = { |
2209 | .start = ext4_mb_seq_history_start, | 2210 | .start = ext4_mb_seq_history_start, |
2210 | .next = ext4_mb_seq_history_next, | 2211 | .next = ext4_mb_seq_history_next, |
2211 | .stop = ext4_mb_seq_history_stop, | 2212 | .stop = ext4_mb_seq_history_stop, |
@@ -2287,7 +2288,7 @@ static ssize_t ext4_mb_seq_history_write(struct file *file, | |||
2287 | return count; | 2288 | return count; |
2288 | } | 2289 | } |
2289 | 2290 | ||
2290 | static struct file_operations ext4_mb_seq_history_fops = { | 2291 | static const struct file_operations ext4_mb_seq_history_fops = { |
2291 | .owner = THIS_MODULE, | 2292 | .owner = THIS_MODULE, |
2292 | .open = ext4_mb_seq_history_open, | 2293 | .open = ext4_mb_seq_history_open, |
2293 | .read = seq_read, | 2294 | .read = seq_read, |
@@ -2328,7 +2329,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
2328 | struct ext4_buddy e4b; | 2329 | struct ext4_buddy e4b; |
2329 | struct sg { | 2330 | struct sg { |
2330 | struct ext4_group_info info; | 2331 | struct ext4_group_info info; |
2331 | unsigned short counters[16]; | 2332 | ext4_grpblk_t counters[16]; |
2332 | } sg; | 2333 | } sg; |
2333 | 2334 | ||
2334 | group--; | 2335 | group--; |
@@ -2366,7 +2367,7 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) | |||
2366 | { | 2367 | { |
2367 | } | 2368 | } |
2368 | 2369 | ||
2369 | static struct seq_operations ext4_mb_seq_groups_ops = { | 2370 | static const struct seq_operations ext4_mb_seq_groups_ops = { |
2370 | .start = ext4_mb_seq_groups_start, | 2371 | .start = ext4_mb_seq_groups_start, |
2371 | .next = ext4_mb_seq_groups_next, | 2372 | .next = ext4_mb_seq_groups_next, |
2372 | .stop = ext4_mb_seq_groups_stop, | 2373 | .stop = ext4_mb_seq_groups_stop, |
@@ -2387,7 +2388,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) | |||
2387 | 2388 | ||
2388 | } | 2389 | } |
2389 | 2390 | ||
2390 | static struct file_operations ext4_mb_seq_groups_fops = { | 2391 | static const struct file_operations ext4_mb_seq_groups_fops = { |
2391 | .owner = THIS_MODULE, | 2392 | .owner = THIS_MODULE, |
2392 | .open = ext4_mb_seq_groups_open, | 2393 | .open = ext4_mb_seq_groups_open, |
2393 | .read = seq_read, | 2394 | .read = seq_read, |
@@ -2532,7 +2533,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2532 | 2533 | ||
2533 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2534 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2534 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2535 | init_rwsem(&meta_group_info[i]->alloc_sem); |
2535 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | 2536 | meta_group_info[i]->bb_free_root.rb_node = NULL; |
2536 | 2537 | ||
2537 | #ifdef DOUBLE_CHECK | 2538 | #ifdef DOUBLE_CHECK |
2538 | { | 2539 | { |
@@ -2558,26 +2559,15 @@ exit_meta_group_info: | |||
2558 | return -ENOMEM; | 2559 | return -ENOMEM; |
2559 | } /* ext4_mb_add_groupinfo */ | 2560 | } /* ext4_mb_add_groupinfo */ |
2560 | 2561 | ||
2561 | /* | ||
2562 | * Update an existing group. | ||
2563 | * This function is used for online resize | ||
2564 | */ | ||
2565 | void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | ||
2566 | { | ||
2567 | grp->bb_free += add; | ||
2568 | } | ||
2569 | |||
2570 | static int ext4_mb_init_backend(struct super_block *sb) | 2562 | static int ext4_mb_init_backend(struct super_block *sb) |
2571 | { | 2563 | { |
2572 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 2564 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
2573 | ext4_group_t i; | 2565 | ext4_group_t i; |
2574 | int metalen; | ||
2575 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2566 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2576 | struct ext4_super_block *es = sbi->s_es; | 2567 | struct ext4_super_block *es = sbi->s_es; |
2577 | int num_meta_group_infos; | 2568 | int num_meta_group_infos; |
2578 | int num_meta_group_infos_max; | 2569 | int num_meta_group_infos_max; |
2579 | int array_size; | 2570 | int array_size; |
2580 | struct ext4_group_info **meta_group_info; | ||
2581 | struct ext4_group_desc *desc; | 2571 | struct ext4_group_desc *desc; |
2582 | 2572 | ||
2583 | /* This is the number of blocks used by GDT */ | 2573 | /* This is the number of blocks used by GDT */ |
@@ -2622,22 +2612,6 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2622 | goto err_freesgi; | 2612 | goto err_freesgi; |
2623 | } | 2613 | } |
2624 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2614 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2625 | |||
2626 | metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2627 | for (i = 0; i < num_meta_group_infos; i++) { | ||
2628 | if ((i + 1) == num_meta_group_infos) | ||
2629 | metalen = sizeof(*meta_group_info) * | ||
2630 | (ngroups - | ||
2631 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); | ||
2632 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | ||
2633 | if (meta_group_info == NULL) { | ||
2634 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | ||
2635 | "buddy group\n"); | ||
2636 | goto err_freemeta; | ||
2637 | } | ||
2638 | sbi->s_group_info[i] = meta_group_info; | ||
2639 | } | ||
2640 | |||
2641 | for (i = 0; i < ngroups; i++) { | 2615 | for (i = 0; i < ngroups; i++) { |
2642 | desc = ext4_get_group_desc(sb, i, NULL); | 2616 | desc = ext4_get_group_desc(sb, i, NULL); |
2643 | if (desc == NULL) { | 2617 | if (desc == NULL) { |
@@ -2655,7 +2629,6 @@ err_freebuddy: | |||
2655 | while (i-- > 0) | 2629 | while (i-- > 0) |
2656 | kfree(ext4_get_group_info(sb, i)); | 2630 | kfree(ext4_get_group_info(sb, i)); |
2657 | i = num_meta_group_infos; | 2631 | i = num_meta_group_infos; |
2658 | err_freemeta: | ||
2659 | while (i-- > 0) | 2632 | while (i-- > 0) |
2660 | kfree(sbi->s_group_info[i]); | 2633 | kfree(sbi->s_group_info[i]); |
2661 | iput(sbi->s_buddy_cache); | 2634 | iput(sbi->s_buddy_cache); |
@@ -2672,14 +2645,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2672 | unsigned max; | 2645 | unsigned max; |
2673 | int ret; | 2646 | int ret; |
2674 | 2647 | ||
2675 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2648 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
2676 | 2649 | ||
2677 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2650 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2678 | if (sbi->s_mb_offsets == NULL) { | 2651 | if (sbi->s_mb_offsets == NULL) { |
2679 | return -ENOMEM; | 2652 | return -ENOMEM; |
2680 | } | 2653 | } |
2681 | 2654 | ||
2682 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); | 2655 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2683 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2656 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2684 | if (sbi->s_mb_maxs == NULL) { | 2657 | if (sbi->s_mb_maxs == NULL) { |
2685 | kfree(sbi->s_mb_offsets); | 2658 | kfree(sbi->s_mb_offsets); |
@@ -2758,7 +2731,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2758 | kmem_cache_free(ext4_pspace_cachep, pa); | 2731 | kmem_cache_free(ext4_pspace_cachep, pa); |
2759 | } | 2732 | } |
2760 | if (count) | 2733 | if (count) |
2761 | mb_debug("mballoc: %u PAs left\n", count); | 2734 | mb_debug(1, "mballoc: %u PAs left\n", count); |
2762 | 2735 | ||
2763 | } | 2736 | } |
2764 | 2737 | ||
@@ -2839,7 +2812,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2839 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2812 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2840 | entry = list_entry(l, struct ext4_free_data, list); | 2813 | entry = list_entry(l, struct ext4_free_data, list); |
2841 | 2814 | ||
2842 | mb_debug("gonna free %u blocks in group %u (0x%p):", | 2815 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2843 | entry->count, entry->group, entry); | 2816 | entry->count, entry->group, entry); |
2844 | 2817 | ||
2845 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2818 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
@@ -2874,9 +2847,43 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2874 | ext4_mb_release_desc(&e4b); | 2847 | ext4_mb_release_desc(&e4b); |
2875 | } | 2848 | } |
2876 | 2849 | ||
2877 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2850 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
2851 | } | ||
2852 | |||
2853 | #ifdef CONFIG_EXT4_DEBUG | ||
2854 | u8 mb_enable_debug __read_mostly; | ||
2855 | |||
2856 | static struct dentry *debugfs_dir; | ||
2857 | static struct dentry *debugfs_debug; | ||
2858 | |||
2859 | static void __init ext4_create_debugfs_entry(void) | ||
2860 | { | ||
2861 | debugfs_dir = debugfs_create_dir("ext4", NULL); | ||
2862 | if (debugfs_dir) | ||
2863 | debugfs_debug = debugfs_create_u8("mballoc-debug", | ||
2864 | S_IRUGO | S_IWUSR, | ||
2865 | debugfs_dir, | ||
2866 | &mb_enable_debug); | ||
2867 | } | ||
2868 | |||
2869 | static void ext4_remove_debugfs_entry(void) | ||
2870 | { | ||
2871 | debugfs_remove(debugfs_debug); | ||
2872 | debugfs_remove(debugfs_dir); | ||
2878 | } | 2873 | } |
2879 | 2874 | ||
2875 | #else | ||
2876 | |||
2877 | static void __init ext4_create_debugfs_entry(void) | ||
2878 | { | ||
2879 | } | ||
2880 | |||
2881 | static void ext4_remove_debugfs_entry(void) | ||
2882 | { | ||
2883 | } | ||
2884 | |||
2885 | #endif | ||
2886 | |||
2880 | int __init init_ext4_mballoc(void) | 2887 | int __init init_ext4_mballoc(void) |
2881 | { | 2888 | { |
2882 | ext4_pspace_cachep = | 2889 | ext4_pspace_cachep = |
@@ -2904,6 +2911,7 @@ int __init init_ext4_mballoc(void) | |||
2904 | kmem_cache_destroy(ext4_ac_cachep); | 2911 | kmem_cache_destroy(ext4_ac_cachep); |
2905 | return -ENOMEM; | 2912 | return -ENOMEM; |
2906 | } | 2913 | } |
2914 | ext4_create_debugfs_entry(); | ||
2907 | return 0; | 2915 | return 0; |
2908 | } | 2916 | } |
2909 | 2917 | ||
@@ -2917,6 +2925,7 @@ void exit_ext4_mballoc(void) | |||
2917 | kmem_cache_destroy(ext4_pspace_cachep); | 2925 | kmem_cache_destroy(ext4_pspace_cachep); |
2918 | kmem_cache_destroy(ext4_ac_cachep); | 2926 | kmem_cache_destroy(ext4_ac_cachep); |
2919 | kmem_cache_destroy(ext4_free_ext_cachep); | 2927 | kmem_cache_destroy(ext4_free_ext_cachep); |
2928 | ext4_remove_debugfs_entry(); | ||
2920 | } | 2929 | } |
2921 | 2930 | ||
2922 | 2931 | ||
@@ -3061,7 +3070,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) | |||
3061 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; | 3070 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; |
3062 | else | 3071 | else |
3063 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; | 3072 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; |
3064 | mb_debug("#%u: goal %u blocks for locality group\n", | 3073 | mb_debug(1, "#%u: goal %u blocks for locality group\n", |
3065 | current->pid, ac->ac_g_ex.fe_len); | 3074 | current->pid, ac->ac_g_ex.fe_len); |
3066 | } | 3075 | } |
3067 | 3076 | ||
@@ -3180,23 +3189,18 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3180 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || | 3189 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || |
3181 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); | 3190 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); |
3182 | 3191 | ||
3183 | /* skip PA normalized request doesn't overlap with */ | 3192 | /* skip PAs this normalized request doesn't overlap with */ |
3184 | if (pa->pa_lstart >= end) { | 3193 | if (pa->pa_lstart >= end || pa_end <= start) { |
3185 | spin_unlock(&pa->pa_lock); | ||
3186 | continue; | ||
3187 | } | ||
3188 | if (pa_end <= start) { | ||
3189 | spin_unlock(&pa->pa_lock); | 3194 | spin_unlock(&pa->pa_lock); |
3190 | continue; | 3195 | continue; |
3191 | } | 3196 | } |
3192 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); | 3197 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); |
3193 | 3198 | ||
3199 | /* adjust start or end to be adjacent to this pa */ | ||
3194 | if (pa_end <= ac->ac_o_ex.fe_logical) { | 3200 | if (pa_end <= ac->ac_o_ex.fe_logical) { |
3195 | BUG_ON(pa_end < start); | 3201 | BUG_ON(pa_end < start); |
3196 | start = pa_end; | 3202 | start = pa_end; |
3197 | } | 3203 | } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { |
3198 | |||
3199 | if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { | ||
3200 | BUG_ON(pa->pa_lstart > end); | 3204 | BUG_ON(pa->pa_lstart > end); |
3201 | end = pa->pa_lstart; | 3205 | end = pa->pa_lstart; |
3202 | } | 3206 | } |
@@ -3251,7 +3255,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3251 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; | 3255 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
3252 | } | 3256 | } |
3253 | 3257 | ||
3254 | mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, | 3258 | mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size, |
3255 | (unsigned) orig_size, (unsigned) start); | 3259 | (unsigned) orig_size, (unsigned) start); |
3256 | } | 3260 | } |
3257 | 3261 | ||
@@ -3300,7 +3304,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | |||
3300 | BUG_ON(pa->pa_free < len); | 3304 | BUG_ON(pa->pa_free < len); |
3301 | pa->pa_free -= len; | 3305 | pa->pa_free -= len; |
3302 | 3306 | ||
3303 | mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); | 3307 | mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa); |
3304 | } | 3308 | } |
3305 | 3309 | ||
3306 | /* | 3310 | /* |
@@ -3324,7 +3328,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3324 | * in on-disk bitmap -- see ext4_mb_release_context() | 3328 | * in on-disk bitmap -- see ext4_mb_release_context() |
3325 | * Other CPUs are prevented from allocating from this pa by lg_mutex | 3329 | * Other CPUs are prevented from allocating from this pa by lg_mutex |
3326 | */ | 3330 | */ |
3327 | mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); | 3331 | mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); |
3328 | } | 3332 | } |
3329 | 3333 | ||
3330 | /* | 3334 | /* |
@@ -3382,6 +3386,11 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3382 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) | 3386 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) |
3383 | continue; | 3387 | continue; |
3384 | 3388 | ||
3389 | /* non-extent files can't have physical blocks past 2^32 */ | ||
3390 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && | ||
3391 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) | ||
3392 | continue; | ||
3393 | |||
3385 | /* found preallocated blocks, use them */ | 3394 | /* found preallocated blocks, use them */ |
3386 | spin_lock(&pa->pa_lock); | 3395 | spin_lock(&pa->pa_lock); |
3387 | if (pa->pa_deleted == 0 && pa->pa_free) { | 3396 | if (pa->pa_deleted == 0 && pa->pa_free) { |
@@ -3503,7 +3512,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3503 | preallocated += len; | 3512 | preallocated += len; |
3504 | count++; | 3513 | count++; |
3505 | } | 3514 | } |
3506 | mb_debug("prellocated %u for group %u\n", preallocated, group); | 3515 | mb_debug(1, "prellocated %u for group %u\n", preallocated, group); |
3507 | } | 3516 | } |
3508 | 3517 | ||
3509 | static void ext4_mb_pa_callback(struct rcu_head *head) | 3518 | static void ext4_mb_pa_callback(struct rcu_head *head) |
@@ -3638,7 +3647,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
3638 | pa->pa_deleted = 0; | 3647 | pa->pa_deleted = 0; |
3639 | pa->pa_type = MB_INODE_PA; | 3648 | pa->pa_type = MB_INODE_PA; |
3640 | 3649 | ||
3641 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, | 3650 | mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa, |
3642 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3651 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3643 | trace_ext4_mb_new_inode_pa(ac, pa); | 3652 | trace_ext4_mb_new_inode_pa(ac, pa); |
3644 | 3653 | ||
@@ -3698,7 +3707,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) | |||
3698 | pa->pa_deleted = 0; | 3707 | pa->pa_deleted = 0; |
3699 | pa->pa_type = MB_GROUP_PA; | 3708 | pa->pa_type = MB_GROUP_PA; |
3700 | 3709 | ||
3701 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, | 3710 | mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa, |
3702 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3711 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3703 | trace_ext4_mb_new_group_pa(ac, pa); | 3712 | trace_ext4_mb_new_group_pa(ac, pa); |
3704 | 3713 | ||
@@ -3777,7 +3786,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3777 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3786 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
3778 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | 3787 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + |
3779 | le32_to_cpu(sbi->s_es->s_first_data_block); | 3788 | le32_to_cpu(sbi->s_es->s_first_data_block); |
3780 | mb_debug(" free preallocated %u/%u in group %u\n", | 3789 | mb_debug(1, " free preallocated %u/%u in group %u\n", |
3781 | (unsigned) start, (unsigned) next - bit, | 3790 | (unsigned) start, (unsigned) next - bit, |
3782 | (unsigned) group); | 3791 | (unsigned) group); |
3783 | free += next - bit; | 3792 | free += next - bit; |
@@ -3868,7 +3877,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3868 | int busy = 0; | 3877 | int busy = 0; |
3869 | int free = 0; | 3878 | int free = 0; |
3870 | 3879 | ||
3871 | mb_debug("discard preallocation for group %u\n", group); | 3880 | mb_debug(1, "discard preallocation for group %u\n", group); |
3872 | 3881 | ||
3873 | if (list_empty(&grp->bb_prealloc_list)) | 3882 | if (list_empty(&grp->bb_prealloc_list)) |
3874 | return 0; | 3883 | return 0; |
@@ -3992,7 +4001,7 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3992 | return; | 4001 | return; |
3993 | } | 4002 | } |
3994 | 4003 | ||
3995 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); | 4004 | mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino); |
3996 | trace_ext4_discard_preallocations(inode); | 4005 | trace_ext4_discard_preallocations(inode); |
3997 | 4006 | ||
3998 | INIT_LIST_HEAD(&list); | 4007 | INIT_LIST_HEAD(&list); |
@@ -4097,7 +4106,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, | |||
4097 | { | 4106 | { |
4098 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); | 4107 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); |
4099 | } | 4108 | } |
4100 | #ifdef MB_DEBUG | 4109 | #ifdef CONFIG_EXT4_DEBUG |
4101 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 4110 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
4102 | { | 4111 | { |
4103 | struct super_block *sb = ac->ac_sb; | 4112 | struct super_block *sb = ac->ac_sb; |
@@ -4139,14 +4148,14 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
4139 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, | 4148 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, |
4140 | NULL, &start); | 4149 | NULL, &start); |
4141 | spin_unlock(&pa->pa_lock); | 4150 | spin_unlock(&pa->pa_lock); |
4142 | printk(KERN_ERR "PA:%lu:%d:%u \n", i, | 4151 | printk(KERN_ERR "PA:%u:%d:%u \n", i, |
4143 | start, pa->pa_len); | 4152 | start, pa->pa_len); |
4144 | } | 4153 | } |
4145 | ext4_unlock_group(sb, i); | 4154 | ext4_unlock_group(sb, i); |
4146 | 4155 | ||
4147 | if (grp->bb_free == 0) | 4156 | if (grp->bb_free == 0) |
4148 | continue; | 4157 | continue; |
4149 | printk(KERN_ERR "%lu: %d/%d \n", | 4158 | printk(KERN_ERR "%u: %d/%d \n", |
4150 | i, grp->bb_free, grp->bb_fragments); | 4159 | i, grp->bb_free, grp->bb_fragments); |
4151 | } | 4160 | } |
4152 | printk(KERN_ERR "\n"); | 4161 | printk(KERN_ERR "\n"); |
@@ -4174,16 +4183,26 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
4174 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 4183 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
4175 | return; | 4184 | return; |
4176 | 4185 | ||
4186 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | ||
4187 | return; | ||
4188 | |||
4177 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | 4189 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; |
4178 | isize = i_size_read(ac->ac_inode) >> bsbits; | 4190 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) |
4191 | >> bsbits; | ||
4179 | size = max(size, isize); | 4192 | size = max(size, isize); |
4180 | 4193 | ||
4181 | /* don't use group allocation for large files */ | 4194 | if ((size == isize) && |
4182 | if (size >= sbi->s_mb_stream_request) | 4195 | !ext4_fs_is_busy(sbi) && |
4196 | (atomic_read(&ac->ac_inode->i_writecount) == 0)) { | ||
4197 | ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4183 | return; | 4198 | return; |
4199 | } | ||
4184 | 4200 | ||
4185 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | 4201 | /* don't use group allocation for large files */ |
4202 | if (size >= sbi->s_mb_stream_request) { | ||
4203 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; | ||
4186 | return; | 4204 | return; |
4205 | } | ||
4187 | 4206 | ||
4188 | BUG_ON(ac->ac_lg != NULL); | 4207 | BUG_ON(ac->ac_lg != NULL); |
4189 | /* | 4208 | /* |
@@ -4246,7 +4265,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4246 | * locality group. this is a policy, actually */ | 4265 | * locality group. this is a policy, actually */ |
4247 | ext4_mb_group_or_file(ac); | 4266 | ext4_mb_group_or_file(ac); |
4248 | 4267 | ||
4249 | mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " | 4268 | mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " |
4250 | "left: %u/%u, right %u/%u to %swritable\n", | 4269 | "left: %u/%u, right %u/%u to %swritable\n", |
4251 | (unsigned) ar->len, (unsigned) ar->logical, | 4270 | (unsigned) ar->len, (unsigned) ar->logical, |
4252 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, | 4271 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, |
@@ -4268,7 +4287,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4268 | struct ext4_prealloc_space *pa, *tmp; | 4287 | struct ext4_prealloc_space *pa, *tmp; |
4269 | struct ext4_allocation_context *ac; | 4288 | struct ext4_allocation_context *ac; |
4270 | 4289 | ||
4271 | mb_debug("discard locality group preallocation\n"); | 4290 | mb_debug(1, "discard locality group preallocation\n"); |
4272 | 4291 | ||
4273 | INIT_LIST_HEAD(&discard_list); | 4292 | INIT_LIST_HEAD(&discard_list); |
4274 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4293 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c96bb19f58f9..188d3d709b24 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -37,11 +37,19 @@ | |||
37 | 37 | ||
38 | /* | 38 | /* |
39 | */ | 39 | */ |
40 | #define MB_DEBUG__ | 40 | #ifdef CONFIG_EXT4_DEBUG |
41 | #ifdef MB_DEBUG | 41 | extern u8 mb_enable_debug; |
42 | #define mb_debug(fmt, a...) printk(fmt, ##a) | 42 | |
43 | #define mb_debug(n, fmt, a...) \ | ||
44 | do { \ | ||
45 | if ((n) <= mb_enable_debug) { \ | ||
46 | printk(KERN_DEBUG "(%s, %d): %s: ", \ | ||
47 | __FILE__, __LINE__, __func__); \ | ||
48 | printk(fmt, ## a); \ | ||
49 | } \ | ||
50 | } while (0) | ||
43 | #else | 51 | #else |
44 | #define mb_debug(fmt, a...) | 52 | #define mb_debug(n, fmt, a...) |
45 | #endif | 53 | #endif |
46 | 54 | ||
47 | /* | 55 | /* |
@@ -128,8 +136,8 @@ struct ext4_prealloc_space { | |||
128 | unsigned pa_deleted; | 136 | unsigned pa_deleted; |
129 | ext4_fsblk_t pa_pstart; /* phys. block */ | 137 | ext4_fsblk_t pa_pstart; /* phys. block */ |
130 | ext4_lblk_t pa_lstart; /* log. block */ | 138 | ext4_lblk_t pa_lstart; /* log. block */ |
131 | unsigned short pa_len; /* len of preallocated chunk */ | 139 | ext4_grpblk_t pa_len; /* len of preallocated chunk */ |
132 | unsigned short pa_free; /* how many blocks are free */ | 140 | ext4_grpblk_t pa_free; /* how many blocks are free */ |
133 | unsigned short pa_type; /* pa type. inode or group */ | 141 | unsigned short pa_type; /* pa type. inode or group */ |
134 | spinlock_t *pa_obj_lock; | 142 | spinlock_t *pa_obj_lock; |
135 | struct inode *pa_inode; /* hack, for history only */ | 143 | struct inode *pa_inode; /* hack, for history only */ |
@@ -144,7 +152,7 @@ struct ext4_free_extent { | |||
144 | ext4_lblk_t fe_logical; | 152 | ext4_lblk_t fe_logical; |
145 | ext4_grpblk_t fe_start; | 153 | ext4_grpblk_t fe_start; |
146 | ext4_group_t fe_group; | 154 | ext4_group_t fe_group; |
147 | int fe_len; | 155 | ext4_grpblk_t fe_len; |
148 | }; | 156 | }; |
149 | 157 | ||
150 | /* | 158 | /* |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 313a50b39741..bf519f239ae6 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -353,17 +353,16 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
353 | 353 | ||
354 | down_write(&EXT4_I(inode)->i_data_sem); | 354 | down_write(&EXT4_I(inode)->i_data_sem); |
355 | /* | 355 | /* |
356 | * if EXT4_EXT_MIGRATE is cleared a block allocation | 356 | * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation |
357 | * happened after we started the migrate. We need to | 357 | * happened after we started the migrate. We need to |
358 | * fail the migrate | 358 | * fail the migrate |
359 | */ | 359 | */ |
360 | if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { | 360 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { |
361 | retval = -EAGAIN; | 361 | retval = -EAGAIN; |
362 | up_write(&EXT4_I(inode)->i_data_sem); | 362 | up_write(&EXT4_I(inode)->i_data_sem); |
363 | goto err_out; | 363 | goto err_out; |
364 | } else | 364 | } else |
365 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 365 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
366 | ~EXT4_EXT_MIGRATE; | ||
367 | /* | 366 | /* |
368 | * We have the extent map build with the tmp inode. | 367 | * We have the extent map build with the tmp inode. |
369 | * Now copy the i_data across | 368 | * Now copy the i_data across |
@@ -517,14 +516,15 @@ int ext4_ext_migrate(struct inode *inode) | |||
517 | * when we add extents we extent the journal | 516 | * when we add extents we extent the journal |
518 | */ | 517 | */ |
519 | /* | 518 | /* |
520 | * Even though we take i_mutex we can still cause block allocation | 519 | * Even though we take i_mutex we can still cause block |
521 | * via mmap write to holes. If we have allocated new blocks we fail | 520 | * allocation via mmap write to holes. If we have allocated |
522 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 521 | * new blocks we fail migrate. New block allocation will |
523 | * The flag is updated with i_data_sem held to prevent racing with | 522 | * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated |
524 | * block allocation. | 523 | * with i_data_sem held to prevent racing with block |
524 | * allocation. | ||
525 | */ | 525 | */ |
526 | down_read((&EXT4_I(inode)->i_data_sem)); | 526 | down_read((&EXT4_I(inode)->i_data_sem)); |
527 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; | 527 | EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; |
528 | up_read((&EXT4_I(inode)->i_data_sem)); | 528 | up_read((&EXT4_I(inode)->i_data_sem)); |
529 | 529 | ||
530 | handle = ext4_journal_start(inode, 1); | 530 | handle = ext4_journal_start(inode, 1); |
@@ -618,7 +618,7 @@ err_out: | |||
618 | tmp_inode->i_nlink = 0; | 618 | tmp_inode->i_nlink = 0; |
619 | 619 | ||
620 | ext4_journal_stop(handle); | 620 | ext4_journal_stop(handle); |
621 | 621 | unlock_new_inode(tmp_inode); | |
622 | iput(tmp_inode); | 622 | iput(tmp_inode); |
623 | 623 | ||
624 | return retval; | 624 | return retval; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index bbf2dd9404dc..c07a2915e40b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -19,14 +19,31 @@ | |||
19 | #include "ext4_extents.h" | 19 | #include "ext4_extents.h" |
20 | #include "ext4.h" | 20 | #include "ext4.h" |
21 | 21 | ||
22 | #define get_ext_path(path, inode, block, ret) \ | 22 | /** |
23 | do { \ | 23 | * get_ext_path - Find an extent path for designated logical block number. |
24 | path = ext4_ext_find_extent(inode, block, path); \ | 24 | * |
25 | if (IS_ERR(path)) { \ | 25 | * @inode: an inode which is searched |
26 | ret = PTR_ERR(path); \ | 26 | * @lblock: logical block number to find an extent path |
27 | path = NULL; \ | 27 | * @path: pointer to an extent path pointer (for output) |
28 | } \ | 28 | * |
29 | } while (0) | 29 | * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value |
30 | * on failure. | ||
31 | */ | ||
32 | static inline int | ||
33 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, | ||
34 | struct ext4_ext_path **path) | ||
35 | { | ||
36 | int ret = 0; | ||
37 | |||
38 | *path = ext4_ext_find_extent(inode, lblock, *path); | ||
39 | if (IS_ERR(*path)) { | ||
40 | ret = PTR_ERR(*path); | ||
41 | *path = NULL; | ||
42 | } else if ((*path)[ext_depth(inode)].p_ext == NULL) | ||
43 | ret = -ENODATA; | ||
44 | |||
45 | return ret; | ||
46 | } | ||
30 | 47 | ||
31 | /** | 48 | /** |
32 | * copy_extent_status - Copy the extent's initialization status | 49 | * copy_extent_status - Copy the extent's initialization status |
@@ -113,6 +130,31 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
113 | } | 130 | } |
114 | 131 | ||
115 | /** | 132 | /** |
133 | * mext_check_null_inode - NULL check for two inodes | ||
134 | * | ||
135 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
136 | */ | ||
137 | static int | ||
138 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, | ||
139 | const char *function) | ||
140 | { | ||
141 | int ret = 0; | ||
142 | |||
143 | if (inode1 == NULL) { | ||
144 | ext4_error(inode2->i_sb, function, | ||
145 | "Both inodes should not be NULL: " | ||
146 | "inode1 NULL inode2 %lu", inode2->i_ino); | ||
147 | ret = -EIO; | ||
148 | } else if (inode2 == NULL) { | ||
149 | ext4_error(inode1->i_sb, function, | ||
150 | "Both inodes should not be NULL: " | ||
151 | "inode1 %lu inode2 NULL", inode1->i_ino); | ||
152 | ret = -EIO; | ||
153 | } | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | /** | ||
116 | * mext_double_down_read - Acquire two inodes' read semaphore | 158 | * mext_double_down_read - Acquire two inodes' read semaphore |
117 | * | 159 | * |
118 | * @orig_inode: original inode structure | 160 | * @orig_inode: original inode structure |
@@ -124,8 +166,6 @@ mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) | |||
124 | { | 166 | { |
125 | struct inode *first = orig_inode, *second = donor_inode; | 167 | struct inode *first = orig_inode, *second = donor_inode; |
126 | 168 | ||
127 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
128 | |||
129 | /* | 169 | /* |
130 | * Use the inode number to provide the stable locking order instead | 170 | * Use the inode number to provide the stable locking order instead |
131 | * of its address, because the C language doesn't guarantee you can | 171 | * of its address, because the C language doesn't guarantee you can |
@@ -152,8 +192,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
152 | { | 192 | { |
153 | struct inode *first = orig_inode, *second = donor_inode; | 193 | struct inode *first = orig_inode, *second = donor_inode; |
154 | 194 | ||
155 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
156 | |||
157 | /* | 195 | /* |
158 | * Use the inode number to provide the stable locking order instead | 196 | * Use the inode number to provide the stable locking order instead |
159 | * of its address, because the C language doesn't guarantee you can | 197 | * of its address, because the C language doesn't guarantee you can |
@@ -178,8 +216,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
178 | static void | 216 | static void |
179 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | 217 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) |
180 | { | 218 | { |
181 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
182 | |||
183 | up_read(&EXT4_I(orig_inode)->i_data_sem); | 219 | up_read(&EXT4_I(orig_inode)->i_data_sem); |
184 | up_read(&EXT4_I(donor_inode)->i_data_sem); | 220 | up_read(&EXT4_I(donor_inode)->i_data_sem); |
185 | } | 221 | } |
@@ -194,8 +230,6 @@ mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | |||
194 | static void | 230 | static void |
195 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) | 231 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) |
196 | { | 232 | { |
197 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
198 | |||
199 | up_write(&EXT4_I(orig_inode)->i_data_sem); | 233 | up_write(&EXT4_I(orig_inode)->i_data_sem); |
200 | up_write(&EXT4_I(donor_inode)->i_data_sem); | 234 | up_write(&EXT4_I(donor_inode)->i_data_sem); |
201 | } | 235 | } |
@@ -283,8 +317,8 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
283 | } | 317 | } |
284 | 318 | ||
285 | if (new_flag) { | 319 | if (new_flag) { |
286 | get_ext_path(orig_path, orig_inode, eblock, err); | 320 | err = get_ext_path(orig_inode, eblock, &orig_path); |
287 | if (orig_path == NULL) | 321 | if (err) |
288 | goto out; | 322 | goto out; |
289 | 323 | ||
290 | if (ext4_ext_insert_extent(handle, orig_inode, | 324 | if (ext4_ext_insert_extent(handle, orig_inode, |
@@ -293,9 +327,9 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
293 | } | 327 | } |
294 | 328 | ||
295 | if (end_flag) { | 329 | if (end_flag) { |
296 | get_ext_path(orig_path, orig_inode, | 330 | err = get_ext_path(orig_inode, |
297 | le32_to_cpu(end_ext->ee_block) - 1, err); | 331 | le32_to_cpu(end_ext->ee_block) - 1, &orig_path); |
298 | if (orig_path == NULL) | 332 | if (err) |
299 | goto out; | 333 | goto out; |
300 | 334 | ||
301 | if (ext4_ext_insert_extent(handle, orig_inode, | 335 | if (ext4_ext_insert_extent(handle, orig_inode, |
@@ -519,7 +553,15 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
519 | * oext |-----------| | 553 | * oext |-----------| |
520 | * new_ext |-------| | 554 | * new_ext |-------| |
521 | */ | 555 | */ |
522 | BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end); | 556 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { |
557 | ext4_error(orig_inode->i_sb, __func__, | ||
558 | "new_ext_end(%u) should be less than or equal to " | ||
559 | "oext->ee_block(%u) + oext_alen(%d) - 1", | ||
560 | new_ext_end, le32_to_cpu(oext->ee_block), | ||
561 | oext_alen); | ||
562 | ret = -EIO; | ||
563 | goto out; | ||
564 | } | ||
523 | 565 | ||
524 | /* | 566 | /* |
525 | * Case: new_ext is smaller than original extent | 567 | * Case: new_ext is smaller than original extent |
@@ -543,6 +585,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
543 | 585 | ||
544 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, | 586 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, |
545 | o_end, &start_ext, &new_ext, &end_ext); | 587 | o_end, &start_ext, &new_ext, &end_ext); |
588 | out: | ||
546 | return ret; | 589 | return ret; |
547 | } | 590 | } |
548 | 591 | ||
@@ -554,8 +597,10 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
554 | * @orig_off: block offset of original inode | 597 | * @orig_off: block offset of original inode |
555 | * @donor_off: block offset of donor inode | 598 | * @donor_off: block offset of donor inode |
556 | * @max_count: the maximun length of extents | 599 | * @max_count: the maximun length of extents |
600 | * | ||
601 | * Return 0 on success, or a negative error value on failure. | ||
557 | */ | 602 | */ |
558 | static void | 603 | static int |
559 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, | 604 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, |
560 | struct ext4_extent *tmp_oext, | 605 | struct ext4_extent *tmp_oext, |
561 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, | 606 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, |
@@ -564,6 +609,19 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
564 | ext4_lblk_t diff, orig_diff; | 609 | ext4_lblk_t diff, orig_diff; |
565 | struct ext4_extent dext_old, oext_old; | 610 | struct ext4_extent dext_old, oext_old; |
566 | 611 | ||
612 | BUG_ON(orig_off != donor_off); | ||
613 | |||
614 | /* original and donor extents have to cover the same block offset */ | ||
615 | if (orig_off < le32_to_cpu(tmp_oext->ee_block) || | ||
616 | le32_to_cpu(tmp_oext->ee_block) + | ||
617 | ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) | ||
618 | return -ENODATA; | ||
619 | |||
620 | if (orig_off < le32_to_cpu(tmp_dext->ee_block) || | ||
621 | le32_to_cpu(tmp_dext->ee_block) + | ||
622 | ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) | ||
623 | return -ENODATA; | ||
624 | |||
567 | dext_old = *tmp_dext; | 625 | dext_old = *tmp_dext; |
568 | oext_old = *tmp_oext; | 626 | oext_old = *tmp_oext; |
569 | 627 | ||
@@ -591,6 +649,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
591 | 649 | ||
592 | copy_extent_status(&oext_old, tmp_dext); | 650 | copy_extent_status(&oext_old, tmp_dext); |
593 | copy_extent_status(&dext_old, tmp_oext); | 651 | copy_extent_status(&dext_old, tmp_oext); |
652 | |||
653 | return 0; | ||
594 | } | 654 | } |
595 | 655 | ||
596 | /** | 656 | /** |
@@ -631,13 +691,13 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
631 | mext_double_down_write(orig_inode, donor_inode); | 691 | mext_double_down_write(orig_inode, donor_inode); |
632 | 692 | ||
633 | /* Get the original extent for the block "orig_off" */ | 693 | /* Get the original extent for the block "orig_off" */ |
634 | get_ext_path(orig_path, orig_inode, orig_off, err); | 694 | err = get_ext_path(orig_inode, orig_off, &orig_path); |
635 | if (orig_path == NULL) | 695 | if (err) |
636 | goto out; | 696 | goto out; |
637 | 697 | ||
638 | /* Get the donor extent for the head */ | 698 | /* Get the donor extent for the head */ |
639 | get_ext_path(donor_path, donor_inode, donor_off, err); | 699 | err = get_ext_path(donor_inode, donor_off, &donor_path); |
640 | if (donor_path == NULL) | 700 | if (err) |
641 | goto out; | 701 | goto out; |
642 | depth = ext_depth(orig_inode); | 702 | depth = ext_depth(orig_inode); |
643 | oext = orig_path[depth].p_ext; | 703 | oext = orig_path[depth].p_ext; |
@@ -647,13 +707,28 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
647 | dext = donor_path[depth].p_ext; | 707 | dext = donor_path[depth].p_ext; |
648 | tmp_dext = *dext; | 708 | tmp_dext = *dext; |
649 | 709 | ||
650 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 710 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
651 | donor_off, count); | 711 | donor_off, count); |
712 | if (err) | ||
713 | goto out; | ||
652 | 714 | ||
653 | /* Loop for the donor extents */ | 715 | /* Loop for the donor extents */ |
654 | while (1) { | 716 | while (1) { |
655 | /* The extent for donor must be found. */ | 717 | /* The extent for donor must be found. */ |
656 | BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block)); | 718 | if (!dext) { |
719 | ext4_error(donor_inode->i_sb, __func__, | ||
720 | "The extent for donor must be found"); | ||
721 | err = -EIO; | ||
722 | goto out; | ||
723 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | ||
724 | ext4_error(donor_inode->i_sb, __func__, | ||
725 | "Donor offset(%u) and the first block of donor " | ||
726 | "extent(%u) should be equal", | ||
727 | donor_off, | ||
728 | le32_to_cpu(tmp_dext.ee_block)); | ||
729 | err = -EIO; | ||
730 | goto out; | ||
731 | } | ||
657 | 732 | ||
658 | /* Set donor extent to orig extent */ | 733 | /* Set donor extent to orig extent */ |
659 | err = mext_leaf_block(handle, orig_inode, | 734 | err = mext_leaf_block(handle, orig_inode, |
@@ -678,8 +753,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
678 | 753 | ||
679 | if (orig_path) | 754 | if (orig_path) |
680 | ext4_ext_drop_refs(orig_path); | 755 | ext4_ext_drop_refs(orig_path); |
681 | get_ext_path(orig_path, orig_inode, orig_off, err); | 756 | err = get_ext_path(orig_inode, orig_off, &orig_path); |
682 | if (orig_path == NULL) | 757 | if (err) |
683 | goto out; | 758 | goto out; |
684 | depth = ext_depth(orig_inode); | 759 | depth = ext_depth(orig_inode); |
685 | oext = orig_path[depth].p_ext; | 760 | oext = orig_path[depth].p_ext; |
@@ -692,9 +767,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
692 | 767 | ||
693 | if (donor_path) | 768 | if (donor_path) |
694 | ext4_ext_drop_refs(donor_path); | 769 | ext4_ext_drop_refs(donor_path); |
695 | get_ext_path(donor_path, donor_inode, | 770 | err = get_ext_path(donor_inode, donor_off, &donor_path); |
696 | donor_off, err); | 771 | if (err) |
697 | if (donor_path == NULL) | ||
698 | goto out; | 772 | goto out; |
699 | depth = ext_depth(donor_inode); | 773 | depth = ext_depth(donor_inode); |
700 | dext = donor_path[depth].p_ext; | 774 | dext = donor_path[depth].p_ext; |
@@ -705,9 +779,10 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
705 | } | 779 | } |
706 | tmp_dext = *dext; | 780 | tmp_dext = *dext; |
707 | 781 | ||
708 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 782 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
709 | donor_off, | 783 | donor_off, count - replaced_count); |
710 | count - replaced_count); | 784 | if (err) |
785 | goto out; | ||
711 | } | 786 | } |
712 | 787 | ||
713 | out: | 788 | out: |
@@ -740,7 +815,7 @@ out: | |||
740 | * on success, or a negative error value on failure. | 815 | * on success, or a negative error value on failure. |
741 | */ | 816 | */ |
742 | static int | 817 | static int |
743 | move_extent_par_page(struct file *o_filp, struct inode *donor_inode, | 818 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, |
744 | pgoff_t orig_page_offset, int data_offset_in_page, | 819 | pgoff_t orig_page_offset, int data_offset_in_page, |
745 | int block_len_in_page, int uninit) | 820 | int block_len_in_page, int uninit) |
746 | { | 821 | { |
@@ -871,6 +946,7 @@ out: | |||
871 | if (PageLocked(page)) | 946 | if (PageLocked(page)) |
872 | unlock_page(page); | 947 | unlock_page(page); |
873 | page_cache_release(page); | 948 | page_cache_release(page); |
949 | ext4_journal_stop(handle); | ||
874 | } | 950 | } |
875 | out2: | 951 | out2: |
876 | ext4_journal_stop(handle); | 952 | ext4_journal_stop(handle); |
@@ -897,6 +973,10 @@ mext_check_arguments(struct inode *orig_inode, | |||
897 | struct inode *donor_inode, __u64 orig_start, | 973 | struct inode *donor_inode, __u64 orig_start, |
898 | __u64 donor_start, __u64 *len, __u64 moved_len) | 974 | __u64 donor_start, __u64 *len, __u64 moved_len) |
899 | { | 975 | { |
976 | ext4_lblk_t orig_blocks, donor_blocks; | ||
977 | unsigned int blkbits = orig_inode->i_blkbits; | ||
978 | unsigned int blocksize = 1 << blkbits; | ||
979 | |||
900 | /* Regular file check */ | 980 | /* Regular file check */ |
901 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | 981 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { |
902 | ext4_debug("ext4 move extent: The argument files should be " | 982 | ext4_debug("ext4 move extent: The argument files should be " |
@@ -960,54 +1040,58 @@ mext_check_arguments(struct inode *orig_inode, | |||
960 | return -EINVAL; | 1040 | return -EINVAL; |
961 | } | 1041 | } |
962 | 1042 | ||
963 | if ((orig_start > MAX_DEFRAG_SIZE) || | 1043 | if ((orig_start > EXT_MAX_BLOCK) || |
964 | (donor_start > MAX_DEFRAG_SIZE) || | 1044 | (donor_start > EXT_MAX_BLOCK) || |
965 | (*len > MAX_DEFRAG_SIZE) || | 1045 | (*len > EXT_MAX_BLOCK) || |
966 | (orig_start + *len > MAX_DEFRAG_SIZE)) { | 1046 | (orig_start + *len > EXT_MAX_BLOCK)) { |
967 | ext4_debug("ext4 move extent: Can't handle over [%lu] blocks " | 1047 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
968 | "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE, | 1048 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, |
969 | orig_inode->i_ino, donor_inode->i_ino); | 1049 | orig_inode->i_ino, donor_inode->i_ino); |
970 | return -EINVAL; | 1050 | return -EINVAL; |
971 | } | 1051 | } |
972 | 1052 | ||
973 | if (orig_inode->i_size > donor_inode->i_size) { | 1053 | if (orig_inode->i_size > donor_inode->i_size) { |
974 | if (orig_start >= donor_inode->i_size) { | 1054 | donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; |
1055 | /* TODO: eliminate this artificial restriction */ | ||
1056 | if (orig_start >= donor_blocks) { | ||
975 | ext4_debug("ext4 move extent: orig start offset " | 1057 | ext4_debug("ext4 move extent: orig start offset " |
976 | "[%llu] should be less than donor file size " | 1058 | "[%llu] should be less than donor file blocks " |
977 | "[%lld] [ino:orig %lu, donor_inode %lu]\n", | 1059 | "[%u] [ino:orig %lu, donor %lu]\n", |
978 | orig_start, donor_inode->i_size, | 1060 | orig_start, donor_blocks, |
979 | orig_inode->i_ino, donor_inode->i_ino); | 1061 | orig_inode->i_ino, donor_inode->i_ino); |
980 | return -EINVAL; | 1062 | return -EINVAL; |
981 | } | 1063 | } |
982 | 1064 | ||
983 | if (orig_start + *len > donor_inode->i_size) { | 1065 | /* TODO: eliminate this artificial restriction */ |
1066 | if (orig_start + *len > donor_blocks) { | ||
984 | ext4_debug("ext4 move extent: End offset [%llu] should " | 1067 | ext4_debug("ext4 move extent: End offset [%llu] should " |
985 | "be less than donor file size [%lld]." | 1068 | "be less than donor file blocks [%u]." |
986 | "So adjust length from %llu to %lld " | 1069 | "So adjust length from %llu to %llu " |
987 | "[ino:orig %lu, donor %lu]\n", | 1070 | "[ino:orig %lu, donor %lu]\n", |
988 | orig_start + *len, donor_inode->i_size, | 1071 | orig_start + *len, donor_blocks, |
989 | *len, donor_inode->i_size - orig_start, | 1072 | *len, donor_blocks - orig_start, |
990 | orig_inode->i_ino, donor_inode->i_ino); | 1073 | orig_inode->i_ino, donor_inode->i_ino); |
991 | *len = donor_inode->i_size - orig_start; | 1074 | *len = donor_blocks - orig_start; |
992 | } | 1075 | } |
993 | } else { | 1076 | } else { |
994 | if (orig_start >= orig_inode->i_size) { | 1077 | orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits; |
1078 | if (orig_start >= orig_blocks) { | ||
995 | ext4_debug("ext4 move extent: start offset [%llu] " | 1079 | ext4_debug("ext4 move extent: start offset [%llu] " |
996 | "should be less than original file size " | 1080 | "should be less than original file blocks " |
997 | "[%lld] [inode:orig %lu, donor %lu]\n", | 1081 | "[%u] [ino:orig %lu, donor %lu]\n", |
998 | orig_start, orig_inode->i_size, | 1082 | orig_start, orig_blocks, |
999 | orig_inode->i_ino, donor_inode->i_ino); | 1083 | orig_inode->i_ino, donor_inode->i_ino); |
1000 | return -EINVAL; | 1084 | return -EINVAL; |
1001 | } | 1085 | } |
1002 | 1086 | ||
1003 | if (orig_start + *len > orig_inode->i_size) { | 1087 | if (orig_start + *len > orig_blocks) { |
1004 | ext4_debug("ext4 move extent: Adjust length " | 1088 | ext4_debug("ext4 move extent: Adjust length " |
1005 | "from %llu to %lld. Because it should be " | 1089 | "from %llu to %llu. Because it should be " |
1006 | "less than original file size " | 1090 | "less than original file blocks " |
1007 | "[ino:orig %lu, donor %lu]\n", | 1091 | "[ino:orig %lu, donor %lu]\n", |
1008 | *len, orig_inode->i_size - orig_start, | 1092 | *len, orig_blocks - orig_start, |
1009 | orig_inode->i_ino, donor_inode->i_ino); | 1093 | orig_inode->i_ino, donor_inode->i_ino); |
1010 | *len = orig_inode->i_size - orig_start; | 1094 | *len = orig_blocks - orig_start; |
1011 | } | 1095 | } |
1012 | } | 1096 | } |
1013 | 1097 | ||
@@ -1027,18 +1111,23 @@ mext_check_arguments(struct inode *orig_inode, | |||
1027 | * @inode1: the inode structure | 1111 | * @inode1: the inode structure |
1028 | * @inode2: the inode structure | 1112 | * @inode2: the inode structure |
1029 | * | 1113 | * |
1030 | * Lock two inodes' i_mutex by i_ino order. This function is moved from | 1114 | * Lock two inodes' i_mutex by i_ino order. |
1031 | * fs/inode.c. | 1115 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. |
1032 | */ | 1116 | */ |
1033 | static void | 1117 | static int |
1034 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | 1118 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) |
1035 | { | 1119 | { |
1036 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | 1120 | int ret = 0; |
1037 | if (inode1) | 1121 | |
1038 | mutex_lock(&inode1->i_mutex); | 1122 | BUG_ON(inode1 == NULL && inode2 == NULL); |
1039 | else if (inode2) | 1123 | |
1040 | mutex_lock(&inode2->i_mutex); | 1124 | ret = mext_check_null_inode(inode1, inode2, __func__); |
1041 | return; | 1125 | if (ret < 0) |
1126 | goto out; | ||
1127 | |||
1128 | if (inode1 == inode2) { | ||
1129 | mutex_lock(&inode1->i_mutex); | ||
1130 | goto out; | ||
1042 | } | 1131 | } |
1043 | 1132 | ||
1044 | if (inode1->i_ino < inode2->i_ino) { | 1133 | if (inode1->i_ino < inode2->i_ino) { |
@@ -1048,6 +1137,9 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
1048 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | 1137 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); |
1049 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | 1138 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); |
1050 | } | 1139 | } |
1140 | |||
1141 | out: | ||
1142 | return ret; | ||
1051 | } | 1143 | } |
1052 | 1144 | ||
1053 | /** | 1145 | /** |
@@ -1056,17 +1148,28 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
1056 | * @inode1: the inode that is released first | 1148 | * @inode1: the inode that is released first |
1057 | * @inode2: the inode that is released second | 1149 | * @inode2: the inode that is released second |
1058 | * | 1150 | * |
1059 | * This function is moved from fs/inode.c. | 1151 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. |
1060 | */ | 1152 | */ |
1061 | 1153 | ||
1062 | static void | 1154 | static int |
1063 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | 1155 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) |
1064 | { | 1156 | { |
1157 | int ret = 0; | ||
1158 | |||
1159 | BUG_ON(inode1 == NULL && inode2 == NULL); | ||
1160 | |||
1161 | ret = mext_check_null_inode(inode1, inode2, __func__); | ||
1162 | if (ret < 0) | ||
1163 | goto out; | ||
1164 | |||
1065 | if (inode1) | 1165 | if (inode1) |
1066 | mutex_unlock(&inode1->i_mutex); | 1166 | mutex_unlock(&inode1->i_mutex); |
1067 | 1167 | ||
1068 | if (inode2 && inode2 != inode1) | 1168 | if (inode2 && inode2 != inode1) |
1069 | mutex_unlock(&inode2->i_mutex); | 1169 | mutex_unlock(&inode2->i_mutex); |
1170 | |||
1171 | out: | ||
1172 | return ret; | ||
1070 | } | 1173 | } |
1071 | 1174 | ||
1072 | /** | 1175 | /** |
@@ -1123,70 +1226,76 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1123 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | 1226 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; |
1124 | ext4_lblk_t rest_blocks; | 1227 | ext4_lblk_t rest_blocks; |
1125 | pgoff_t orig_page_offset = 0, seq_end_page; | 1228 | pgoff_t orig_page_offset = 0, seq_end_page; |
1126 | int ret, depth, last_extent = 0; | 1229 | int ret1, ret2, depth, last_extent = 0; |
1127 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 1230 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
1128 | int data_offset_in_page; | 1231 | int data_offset_in_page; |
1129 | int block_len_in_page; | 1232 | int block_len_in_page; |
1130 | int uninit; | 1233 | int uninit; |
1131 | 1234 | ||
1132 | /* protect orig and donor against a truncate */ | 1235 | /* protect orig and donor against a truncate */ |
1133 | mext_inode_double_lock(orig_inode, donor_inode); | 1236 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); |
1237 | if (ret1 < 0) | ||
1238 | return ret1; | ||
1134 | 1239 | ||
1135 | mext_double_down_read(orig_inode, donor_inode); | 1240 | mext_double_down_read(orig_inode, donor_inode); |
1136 | /* Check the filesystem environment whether move_extent can be done */ | 1241 | /* Check the filesystem environment whether move_extent can be done */ |
1137 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, | 1242 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, |
1138 | donor_start, &len, *moved_len); | 1243 | donor_start, &len, *moved_len); |
1139 | mext_double_up_read(orig_inode, donor_inode); | 1244 | mext_double_up_read(orig_inode, donor_inode); |
1140 | if (ret) | 1245 | if (ret1) |
1141 | goto out2; | 1246 | goto out; |
1142 | 1247 | ||
1143 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | 1248 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; |
1144 | block_end = block_start + len - 1; | 1249 | block_end = block_start + len - 1; |
1145 | if (file_end < block_end) | 1250 | if (file_end < block_end) |
1146 | len -= block_end - file_end; | 1251 | len -= block_end - file_end; |
1147 | 1252 | ||
1148 | get_ext_path(orig_path, orig_inode, block_start, ret); | 1253 | ret1 = get_ext_path(orig_inode, block_start, &orig_path); |
1149 | if (orig_path == NULL) | 1254 | if (ret1) |
1150 | goto out2; | 1255 | goto out; |
1151 | 1256 | ||
1152 | /* Get path structure to check the hole */ | 1257 | /* Get path structure to check the hole */ |
1153 | get_ext_path(holecheck_path, orig_inode, block_start, ret); | 1258 | ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); |
1154 | if (holecheck_path == NULL) | 1259 | if (ret1) |
1155 | goto out; | 1260 | goto out; |
1156 | 1261 | ||
1157 | depth = ext_depth(orig_inode); | 1262 | depth = ext_depth(orig_inode); |
1158 | ext_cur = holecheck_path[depth].p_ext; | 1263 | ext_cur = holecheck_path[depth].p_ext; |
1159 | if (ext_cur == NULL) { | ||
1160 | ret = -EINVAL; | ||
1161 | goto out; | ||
1162 | } | ||
1163 | 1264 | ||
1164 | /* | 1265 | /* |
1165 | * Get proper extent whose ee_block is beyond block_start | 1266 | * Get proper starting location of block replacement if block_start was |
1166 | * if block_start was within the hole. | 1267 | * within the hole. |
1167 | */ | 1268 | */ |
1168 | if (le32_to_cpu(ext_cur->ee_block) + | 1269 | if (le32_to_cpu(ext_cur->ee_block) + |
1169 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { | 1270 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { |
1271 | /* | ||
1272 | * The hole exists between extents or the tail of | ||
1273 | * original file. | ||
1274 | */ | ||
1170 | last_extent = mext_next_extent(orig_inode, | 1275 | last_extent = mext_next_extent(orig_inode, |
1171 | holecheck_path, &ext_cur); | 1276 | holecheck_path, &ext_cur); |
1172 | if (last_extent < 0) { | 1277 | if (last_extent < 0) { |
1173 | ret = last_extent; | 1278 | ret1 = last_extent; |
1174 | goto out; | 1279 | goto out; |
1175 | } | 1280 | } |
1176 | last_extent = mext_next_extent(orig_inode, orig_path, | 1281 | last_extent = mext_next_extent(orig_inode, orig_path, |
1177 | &ext_dummy); | 1282 | &ext_dummy); |
1178 | if (last_extent < 0) { | 1283 | if (last_extent < 0) { |
1179 | ret = last_extent; | 1284 | ret1 = last_extent; |
1180 | goto out; | 1285 | goto out; |
1181 | } | 1286 | } |
1182 | } | 1287 | seq_start = le32_to_cpu(ext_cur->ee_block); |
1183 | seq_start = block_start; | 1288 | } else if (le32_to_cpu(ext_cur->ee_block) > block_start) |
1289 | /* The hole exists at the beginning of original file. */ | ||
1290 | seq_start = le32_to_cpu(ext_cur->ee_block); | ||
1291 | else | ||
1292 | seq_start = block_start; | ||
1184 | 1293 | ||
1185 | /* No blocks within the specified range. */ | 1294 | /* No blocks within the specified range. */ |
1186 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | 1295 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { |
1187 | ext4_debug("ext4 move extent: The specified range of file " | 1296 | ext4_debug("ext4 move extent: The specified range of file " |
1188 | "may be the hole\n"); | 1297 | "may be the hole\n"); |
1189 | ret = -EINVAL; | 1298 | ret1 = -EINVAL; |
1190 | goto out; | 1299 | goto out; |
1191 | } | 1300 | } |
1192 | 1301 | ||
@@ -1206,7 +1315,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1206 | last_extent = mext_next_extent(orig_inode, holecheck_path, | 1315 | last_extent = mext_next_extent(orig_inode, holecheck_path, |
1207 | &ext_cur); | 1316 | &ext_cur); |
1208 | if (last_extent < 0) { | 1317 | if (last_extent < 0) { |
1209 | ret = last_extent; | 1318 | ret1 = last_extent; |
1210 | break; | 1319 | break; |
1211 | } | 1320 | } |
1212 | add_blocks = ext4_ext_get_actual_len(ext_cur); | 1321 | add_blocks = ext4_ext_get_actual_len(ext_cur); |
@@ -1258,16 +1367,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1258 | while (orig_page_offset <= seq_end_page) { | 1367 | while (orig_page_offset <= seq_end_page) { |
1259 | 1368 | ||
1260 | /* Swap original branches with new branches */ | 1369 | /* Swap original branches with new branches */ |
1261 | ret = move_extent_par_page(o_filp, donor_inode, | 1370 | ret1 = move_extent_per_page(o_filp, donor_inode, |
1262 | orig_page_offset, | 1371 | orig_page_offset, |
1263 | data_offset_in_page, | 1372 | data_offset_in_page, |
1264 | block_len_in_page, uninit); | 1373 | block_len_in_page, uninit); |
1265 | if (ret < 0) | 1374 | if (ret1 < 0) |
1266 | goto out; | 1375 | goto out; |
1267 | orig_page_offset++; | 1376 | orig_page_offset++; |
1268 | /* Count how many blocks we have exchanged */ | 1377 | /* Count how many blocks we have exchanged */ |
1269 | *moved_len += block_len_in_page; | 1378 | *moved_len += block_len_in_page; |
1270 | BUG_ON(*moved_len > len); | 1379 | if (*moved_len > len) { |
1380 | ext4_error(orig_inode->i_sb, __func__, | ||
1381 | "We replaced blocks too much! " | ||
1382 | "sum of replaced: %llu requested: %llu", | ||
1383 | *moved_len, len); | ||
1384 | ret1 = -EIO; | ||
1385 | goto out; | ||
1386 | } | ||
1271 | 1387 | ||
1272 | data_offset_in_page = 0; | 1388 | data_offset_in_page = 0; |
1273 | rest_blocks -= block_len_in_page; | 1389 | rest_blocks -= block_len_in_page; |
@@ -1280,17 +1396,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1280 | /* Decrease buffer counter */ | 1396 | /* Decrease buffer counter */ |
1281 | if (holecheck_path) | 1397 | if (holecheck_path) |
1282 | ext4_ext_drop_refs(holecheck_path); | 1398 | ext4_ext_drop_refs(holecheck_path); |
1283 | get_ext_path(holecheck_path, orig_inode, | 1399 | ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); |
1284 | seq_start, ret); | 1400 | if (ret1) |
1285 | if (holecheck_path == NULL) | ||
1286 | break; | 1401 | break; |
1287 | depth = holecheck_path->p_depth; | 1402 | depth = holecheck_path->p_depth; |
1288 | 1403 | ||
1289 | /* Decrease buffer counter */ | 1404 | /* Decrease buffer counter */ |
1290 | if (orig_path) | 1405 | if (orig_path) |
1291 | ext4_ext_drop_refs(orig_path); | 1406 | ext4_ext_drop_refs(orig_path); |
1292 | get_ext_path(orig_path, orig_inode, seq_start, ret); | 1407 | ret1 = get_ext_path(orig_inode, seq_start, &orig_path); |
1293 | if (orig_path == NULL) | 1408 | if (ret1) |
1294 | break; | 1409 | break; |
1295 | 1410 | ||
1296 | ext_cur = holecheck_path[depth].p_ext; | 1411 | ext_cur = holecheck_path[depth].p_ext; |
@@ -1307,14 +1422,13 @@ out: | |||
1307 | ext4_ext_drop_refs(holecheck_path); | 1422 | ext4_ext_drop_refs(holecheck_path); |
1308 | kfree(holecheck_path); | 1423 | kfree(holecheck_path); |
1309 | } | 1424 | } |
1310 | out2: | ||
1311 | mext_inode_double_unlock(orig_inode, donor_inode); | ||
1312 | 1425 | ||
1313 | if (ret) | 1426 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); |
1314 | return ret; | ||
1315 | 1427 | ||
1316 | /* All of the specified blocks must be exchanged in succeed */ | 1428 | if (ret1) |
1317 | BUG_ON(*moved_len != len); | 1429 | return ret1; |
1430 | else if (ret2) | ||
1431 | return ret2; | ||
1318 | 1432 | ||
1319 | return 0; | 1433 | return 0; |
1320 | } | 1434 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 114abe5d2c1d..42f81d285cd5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1518,8 +1518,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1518 | return retval; | 1518 | return retval; |
1519 | 1519 | ||
1520 | if (blocks == 1 && !dx_fallback && | 1520 | if (blocks == 1 && !dx_fallback && |
1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) | 1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { |
1522 | return make_indexed_dir(handle, dentry, inode, bh); | 1522 | retval = make_indexed_dir(handle, dentry, inode, bh); |
1523 | if (retval == -ENOSPC) | ||
1524 | brelse(bh); | ||
1525 | return retval; | ||
1526 | } | ||
1523 | brelse(bh); | 1527 | brelse(bh); |
1524 | } | 1528 | } |
1525 | bh = ext4_append(handle, dir, &block, &retval); | 1529 | bh = ext4_append(handle, dir, &block, &retval); |
@@ -1528,7 +1532,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1528 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1532 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1529 | de->inode = 0; | 1533 | de->inode = 0; |
1530 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); | 1534 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); |
1531 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1535 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1536 | if (retval == -ENOSPC) | ||
1537 | brelse(bh); | ||
1538 | return retval; | ||
1532 | } | 1539 | } |
1533 | 1540 | ||
1534 | /* | 1541 | /* |
@@ -1590,9 +1597,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1590 | goto cleanup; | 1597 | goto cleanup; |
1591 | node2 = (struct dx_node *)(bh2->b_data); | 1598 | node2 = (struct dx_node *)(bh2->b_data); |
1592 | entries2 = node2->entries; | 1599 | entries2 = node2->entries; |
1600 | memset(&node2->fake, 0, sizeof(struct fake_dirent)); | ||
1593 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, | 1601 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, |
1594 | sb->s_blocksize); | 1602 | sb->s_blocksize); |
1595 | node2->fake.inode = 0; | ||
1596 | BUFFER_TRACE(frame->bh, "get_write_access"); | 1603 | BUFFER_TRACE(frame->bh, "get_write_access"); |
1597 | err = ext4_journal_get_write_access(handle, frame->bh); | 1604 | err = ext4_journal_get_write_access(handle, frame->bh); |
1598 | if (err) | 1605 | if (err) |
@@ -1657,7 +1664,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1657 | if (!de) | 1664 | if (!de) |
1658 | goto cleanup; | 1665 | goto cleanup; |
1659 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1666 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1660 | bh = NULL; | 1667 | if (err != -ENOSPC) |
1668 | bh = NULL; | ||
1661 | goto cleanup; | 1669 | goto cleanup; |
1662 | 1670 | ||
1663 | journal_error: | 1671 | journal_error: |
@@ -2310,7 +2318,7 @@ static int ext4_link(struct dentry *old_dentry, | |||
2310 | struct inode *inode = old_dentry->d_inode; | 2318 | struct inode *inode = old_dentry->d_inode; |
2311 | int err, retries = 0; | 2319 | int err, retries = 0; |
2312 | 2320 | ||
2313 | if (EXT4_DIR_LINK_MAX(inode)) | 2321 | if (inode->i_nlink >= EXT4_LINK_MAX) |
2314 | return -EMLINK; | 2322 | return -EMLINK; |
2315 | 2323 | ||
2316 | /* | 2324 | /* |
@@ -2413,7 +2421,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2413 | goto end_rename; | 2421 | goto end_rename; |
2414 | retval = -EMLINK; | 2422 | retval = -EMLINK; |
2415 | if (!new_inode && new_dir != old_dir && | 2423 | if (!new_inode && new_dir != old_dir && |
2416 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2424 | EXT4_DIR_LINK_MAX(new_dir)) |
2417 | goto end_rename; | 2425 | goto end_rename; |
2418 | } | 2426 | } |
2419 | if (!new_bh) { | 2427 | if (!new_bh) { |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 68b0351fc647..3cfc343c41b5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -746,7 +746,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
746 | struct inode *inode = NULL; | 746 | struct inode *inode = NULL; |
747 | handle_t *handle; | 747 | handle_t *handle; |
748 | int gdb_off, gdb_num; | 748 | int gdb_off, gdb_num; |
749 | int num_grp_locked = 0; | ||
750 | int err, err2; | 749 | int err, err2; |
751 | 750 | ||
752 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | 751 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); |
@@ -856,7 +855,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
856 | * using the new disk blocks. | 855 | * using the new disk blocks. |
857 | */ | 856 | */ |
858 | 857 | ||
859 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group); | ||
860 | /* Update group descriptor block for new group */ | 858 | /* Update group descriptor block for new group */ |
861 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + | 859 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + |
862 | gdb_off * EXT4_DESC_SIZE(sb)); | 860 | gdb_off * EXT4_DESC_SIZE(sb)); |
@@ -875,10 +873,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
875 | * descriptor | 873 | * descriptor |
876 | */ | 874 | */ |
877 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); | 875 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); |
878 | if (err) { | 876 | if (err) |
879 | ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | ||
880 | goto exit_journal; | 877 | goto exit_journal; |
881 | } | ||
882 | 878 | ||
883 | /* | 879 | /* |
884 | * Make the new blocks and inodes valid next. We do this before | 880 | * Make the new blocks and inodes valid next. We do this before |
@@ -920,7 +916,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
920 | 916 | ||
921 | /* Update the global fs size fields */ | 917 | /* Update the global fs size fields */ |
922 | sbi->s_groups_count++; | 918 | sbi->s_groups_count++; |
923 | ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | ||
924 | 919 | ||
925 | ext4_handle_dirty_metadata(handle, NULL, primary); | 920 | ext4_handle_dirty_metadata(handle, NULL, primary); |
926 | 921 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f4f079e6b9a..a6b1ab734728 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "ext4_jbd2.h" | 45 | #include "ext4_jbd2.h" |
46 | #include "xattr.h" | 46 | #include "xattr.h" |
47 | #include "acl.h" | 47 | #include "acl.h" |
48 | #include "mballoc.h" | ||
48 | 49 | ||
49 | #define CREATE_TRACE_POINTS | 50 | #define CREATE_TRACE_POINTS |
50 | #include <trace/events/ext4.h> | 51 | #include <trace/events/ext4.h> |
@@ -344,7 +345,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, | |||
344 | errstr = "Out of memory"; | 345 | errstr = "Out of memory"; |
345 | break; | 346 | break; |
346 | case -EROFS: | 347 | case -EROFS: |
347 | if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) | 348 | if (!sb || (EXT4_SB(sb)->s_journal && |
349 | EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) | ||
348 | errstr = "Journal has aborted"; | 350 | errstr = "Journal has aborted"; |
349 | else | 351 | else |
350 | errstr = "Readonly filesystem"; | 352 | errstr = "Readonly filesystem"; |
@@ -1279,11 +1281,9 @@ static int parse_options(char *options, struct super_block *sb, | |||
1279 | *journal_devnum = option; | 1281 | *journal_devnum = option; |
1280 | break; | 1282 | break; |
1281 | case Opt_journal_checksum: | 1283 | case Opt_journal_checksum: |
1282 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | 1284 | break; /* Kept for backwards compatibility */ |
1283 | break; | ||
1284 | case Opt_journal_async_commit: | 1285 | case Opt_journal_async_commit: |
1285 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); | 1286 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); |
1286 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | ||
1287 | break; | 1287 | break; |
1288 | case Opt_noload: | 1288 | case Opt_noload: |
1289 | set_opt(sbi->s_mount_opt, NOLOAD); | 1289 | set_opt(sbi->s_mount_opt, NOLOAD); |
@@ -1695,12 +1695,12 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1695 | gdp = ext4_get_group_desc(sb, i, NULL); | 1695 | gdp = ext4_get_group_desc(sb, i, NULL); |
1696 | 1696 | ||
1697 | flex_group = ext4_flex_group(sbi, i); | 1697 | flex_group = ext4_flex_group(sbi, i); |
1698 | atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, | 1698 | atomic_add(ext4_free_inodes_count(sb, gdp), |
1699 | ext4_free_inodes_count(sb, gdp)); | 1699 | &sbi->s_flex_groups[flex_group].free_inodes); |
1700 | atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, | 1700 | atomic_add(ext4_free_blks_count(sb, gdp), |
1701 | ext4_free_blks_count(sb, gdp)); | 1701 | &sbi->s_flex_groups[flex_group].free_blocks); |
1702 | atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, | 1702 | atomic_add(ext4_used_dirs_count(sb, gdp), |
1703 | ext4_used_dirs_count(sb, gdp)); | 1703 | &sbi->s_flex_groups[flex_group].used_dirs); |
1704 | } | 1704 | } |
1705 | 1705 | ||
1706 | return 1; | 1706 | return 1; |
@@ -2253,6 +2253,49 @@ static struct kobj_type ext4_ktype = { | |||
2253 | .release = ext4_sb_release, | 2253 | .release = ext4_sb_release, |
2254 | }; | 2254 | }; |
2255 | 2255 | ||
2256 | /* | ||
2257 | * Check whether this filesystem can be mounted based on | ||
2258 | * the features present and the RDONLY/RDWR mount requested. | ||
2259 | * Returns 1 if this filesystem can be mounted as requested, | ||
2260 | * 0 if it cannot be. | ||
2261 | */ | ||
2262 | static int ext4_feature_set_ok(struct super_block *sb, int readonly) | ||
2263 | { | ||
2264 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { | ||
2265 | ext4_msg(sb, KERN_ERR, | ||
2266 | "Couldn't mount because of " | ||
2267 | "unsupported optional features (%x)", | ||
2268 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | ||
2269 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | ||
2270 | return 0; | ||
2271 | } | ||
2272 | |||
2273 | if (readonly) | ||
2274 | return 1; | ||
2275 | |||
2276 | /* Check that feature set is OK for a read-write mount */ | ||
2277 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { | ||
2278 | ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " | ||
2279 | "unsupported optional features (%x)", | ||
2280 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | ||
2281 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
2282 | return 0; | ||
2283 | } | ||
2284 | /* | ||
2285 | * Large file size enabled file system can only be mounted | ||
2286 | * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF | ||
2287 | */ | ||
2288 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | ||
2289 | if (sizeof(blkcnt_t) < sizeof(u64)) { | ||
2290 | ext4_msg(sb, KERN_ERR, "Filesystem with huge files " | ||
2291 | "cannot be mounted RDWR without " | ||
2292 | "CONFIG_LBDAF"); | ||
2293 | return 0; | ||
2294 | } | ||
2295 | } | ||
2296 | return 1; | ||
2297 | } | ||
2298 | |||
2256 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2299 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2257 | __releases(kernel_lock) | 2300 | __releases(kernel_lock) |
2258 | __acquires(kernel_lock) | 2301 | __acquires(kernel_lock) |
@@ -2274,7 +2317,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2274 | unsigned int db_count; | 2317 | unsigned int db_count; |
2275 | unsigned int i; | 2318 | unsigned int i; |
2276 | int needs_recovery, has_huge_files; | 2319 | int needs_recovery, has_huge_files; |
2277 | int features; | ||
2278 | __u64 blocks_count; | 2320 | __u64 blocks_count; |
2279 | int err; | 2321 | int err; |
2280 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 2322 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
@@ -2401,39 +2443,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2401 | * previously didn't change the revision level when setting the flags, | 2443 | * previously didn't change the revision level when setting the flags, |
2402 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2444 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
2403 | */ | 2445 | */ |
2404 | features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); | 2446 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) |
2405 | if (features) { | ||
2406 | ext4_msg(sb, KERN_ERR, | ||
2407 | "Couldn't mount because of " | ||
2408 | "unsupported optional features (%x)", | ||
2409 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | ||
2410 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | ||
2411 | goto failed_mount; | ||
2412 | } | ||
2413 | features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); | ||
2414 | if (!(sb->s_flags & MS_RDONLY) && features) { | ||
2415 | ext4_msg(sb, KERN_ERR, | ||
2416 | "Couldn't mount RDWR because of " | ||
2417 | "unsupported optional features (%x)", | ||
2418 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | ||
2419 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
2420 | goto failed_mount; | 2447 | goto failed_mount; |
2421 | } | 2448 | |
2422 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2423 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2424 | if (has_huge_files) { | ||
2425 | /* | ||
2426 | * Large file size enabled file system can only be | ||
2427 | * mount if kernel is build with CONFIG_LBDAF | ||
2428 | */ | ||
2429 | if (sizeof(root->i_blocks) < sizeof(u64) && | ||
2430 | !(sb->s_flags & MS_RDONLY)) { | ||
2431 | ext4_msg(sb, KERN_ERR, "Filesystem with huge " | ||
2432 | "files cannot be mounted read-write " | ||
2433 | "without CONFIG_LBDAF"); | ||
2434 | goto failed_mount; | ||
2435 | } | ||
2436 | } | ||
2437 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | 2449 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); |
2438 | 2450 | ||
2439 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 2451 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
@@ -2469,6 +2481,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2469 | } | 2481 | } |
2470 | } | 2482 | } |
2471 | 2483 | ||
2484 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2485 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2472 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, | 2486 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
2473 | has_huge_files); | 2487 | has_huge_files); |
2474 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | 2488 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); |
@@ -2549,12 +2563,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2549 | goto failed_mount; | 2563 | goto failed_mount; |
2550 | } | 2564 | } |
2551 | 2565 | ||
2552 | if (ext4_blocks_count(es) > | 2566 | /* |
2553 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 2567 | * Test whether we have more sectors than will fit in sector_t, |
2568 | * and whether the max offset is addressable by the page cache. | ||
2569 | */ | ||
2570 | if ((ext4_blocks_count(es) > | ||
2571 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | ||
2572 | (ext4_blocks_count(es) > | ||
2573 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
2554 | ext4_msg(sb, KERN_ERR, "filesystem" | 2574 | ext4_msg(sb, KERN_ERR, "filesystem" |
2555 | " too large to mount safely"); | 2575 | " too large to mount safely on this system"); |
2556 | if (sizeof(sector_t) < 8) | 2576 | if (sizeof(sector_t) < 8) |
2557 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 2577 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
2578 | ret = -EFBIG; | ||
2558 | goto failed_mount; | 2579 | goto failed_mount; |
2559 | } | 2580 | } |
2560 | 2581 | ||
@@ -2595,6 +2616,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2595 | goto failed_mount; | 2616 | goto failed_mount; |
2596 | } | 2617 | } |
2597 | sbi->s_groups_count = blocks_count; | 2618 | sbi->s_groups_count = blocks_count; |
2619 | sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, | ||
2620 | (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); | ||
2598 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | 2621 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / |
2599 | EXT4_DESC_PER_BLOCK(sb); | 2622 | EXT4_DESC_PER_BLOCK(sb); |
2600 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), | 2623 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), |
@@ -2729,20 +2752,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2729 | goto failed_mount4; | 2752 | goto failed_mount4; |
2730 | } | 2753 | } |
2731 | 2754 | ||
2732 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 2755 | jbd2_journal_set_features(sbi->s_journal, |
2733 | jbd2_journal_set_features(sbi->s_journal, | 2756 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); |
2734 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 2757 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) |
2758 | jbd2_journal_set_features(sbi->s_journal, 0, 0, | ||
2735 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 2759 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
2736 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | 2760 | else |
2737 | jbd2_journal_set_features(sbi->s_journal, | ||
2738 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); | ||
2739 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | 2761 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
2740 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 2762 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
2741 | } else { | ||
2742 | jbd2_journal_clear_features(sbi->s_journal, | ||
2743 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2744 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2745 | } | ||
2746 | 2763 | ||
2747 | /* We have now updated the journal if required, so we can | 2764 | /* We have now updated the journal if required, so we can |
2748 | * validate the data journaling mode. */ | 2765 | * validate the data journaling mode. */ |
@@ -3208,7 +3225,18 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3208 | clear_buffer_write_io_error(sbh); | 3225 | clear_buffer_write_io_error(sbh); |
3209 | set_buffer_uptodate(sbh); | 3226 | set_buffer_uptodate(sbh); |
3210 | } | 3227 | } |
3211 | es->s_wtime = cpu_to_le32(get_seconds()); | 3228 | /* |
3229 | * If the file system is mounted read-only, don't update the | ||
3230 | * superblock write time. This avoids updating the superblock | ||
3231 | * write time when we are mounting the root file system | ||
3232 | * read/only but we need to replay the journal; at that point, | ||
3233 | * for people who are east of GMT and who make their clock | ||
3234 | * tick in localtime for Windows bug-for-bug compatibility, | ||
3235 | * the clock is set in the future, and this will cause e2fsck | ||
3236 | * to complain and force a full file system check. | ||
3237 | */ | ||
3238 | if (!(sb->s_flags & MS_RDONLY)) | ||
3239 | es->s_wtime = cpu_to_le32(get_seconds()); | ||
3212 | es->s_kbytes_written = | 3240 | es->s_kbytes_written = |
3213 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 3241 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + |
3214 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 3242 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
@@ -3477,18 +3505,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3477 | if (sbi->s_journal) | 3505 | if (sbi->s_journal) |
3478 | ext4_mark_recovery_complete(sb, es); | 3506 | ext4_mark_recovery_complete(sb, es); |
3479 | } else { | 3507 | } else { |
3480 | int ret; | 3508 | /* Make sure we can mount this feature set readwrite */ |
3481 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3509 | if (!ext4_feature_set_ok(sb, 0)) { |
3482 | ~EXT4_FEATURE_RO_COMPAT_SUPP))) { | ||
3483 | ext4_msg(sb, KERN_WARNING, "couldn't " | ||
3484 | "remount RDWR because of unsupported " | ||
3485 | "optional features (%x)", | ||
3486 | (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & | ||
3487 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
3488 | err = -EROFS; | 3510 | err = -EROFS; |
3489 | goto restore_opts; | 3511 | goto restore_opts; |
3490 | } | 3512 | } |
3491 | |||
3492 | /* | 3513 | /* |
3493 | * Make sure the group descriptor checksums | 3514 | * Make sure the group descriptor checksums |
3494 | * are sane. If they aren't, refuse to remount r/w. | 3515 | * are sane. If they aren't, refuse to remount r/w. |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 62b31c246994..fed5b01d7a8d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -810,12 +810,23 @@ inserted: | |||
810 | get_bh(new_bh); | 810 | get_bh(new_bh); |
811 | } else { | 811 | } else { |
812 | /* We need to allocate a new block */ | 812 | /* We need to allocate a new block */ |
813 | ext4_fsblk_t goal = ext4_group_first_block_no(sb, | 813 | ext4_fsblk_t goal, block; |
814 | |||
815 | goal = ext4_group_first_block_no(sb, | ||
814 | EXT4_I(inode)->i_block_group); | 816 | EXT4_I(inode)->i_block_group); |
815 | ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode, | 817 | |
818 | /* non-extent files can't have physical blocks past 2^32 */ | ||
819 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
820 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
821 | |||
822 | block = ext4_new_meta_blocks(handle, inode, | ||
816 | goal, NULL, &error); | 823 | goal, NULL, &error); |
817 | if (error) | 824 | if (error) |
818 | goto cleanup; | 825 | goto cleanup; |
826 | |||
827 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
828 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); | ||
829 | |||
819 | ea_idebug(inode, "creating block %d", block); | 830 | ea_idebug(inode, "creating block %d", block); |
820 | 831 | ||
821 | new_bh = sb_getblk(sb, block); | 832 | new_bh = sb_getblk(sb, block); |