diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-18 13:56:26 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-18 13:56:26 -0400 |
| commit | 3530c1886291df061e3972c55590777ef1cb67f8 (patch) | |
| tree | bd6755e533eb5a0f37ff600da6bc0d9d1ba33c17 | |
| parent | 6952b61de9984073289859073e8195ad0bee8fd5 (diff) | |
| parent | 1358870deaf11a752a84fbd89201749aa62498e8 (diff) | |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (64 commits)
ext4: Update documentation about quota mount options
ext4: replace MAX_DEFRAG_SIZE with EXT_MAX_BLOCK
ext4: Fix the alloc on close after a truncate hueristic
ext4: Add a tracepoint for ext4_alloc_da_blocks()
ext4: store EXT4_EXT_MIGRATE in i_state instead of i_flags
ext4: limit block allocations for indirect-block files to < 2^32
ext4: Fix different block exchange issue in EXT4_IOC_MOVE_EXT
ext4: Add null extent check to ext_get_path
ext4: Replace BUG_ON() with ext4_error() in move_extents.c
ext4: Replace get_ext_path macro with an inline funciton
ext4: Fix include/trace/events/ext4.h to work with Systemtap
ext4: Fix initalization of s_flex_groups
ext4: Always set dx_node's fake_dirent explicitly.
ext4: Fix async commit mode to be safe by using a barrier
ext4: Don't update superblock write time when filesystem is read-only
ext4: Clarify the locking details in mballoc
ext4: check for need init flag in ext4_mb_load_buddy
ext4: move ext4_mb_init_group() function earlier in the mballoc.c
ext4: Make non-journal fsync work properly
ext4: Assure that metadata blocks are written during fsync in no journal mode
...
| -rw-r--r-- | Documentation/filesystems/ext4.txt | 24 | ||||
| -rw-r--r-- | fs/ext4/Kconfig | 11 | ||||
| -rw-r--r-- | fs/ext4/balloc.c | 2 | ||||
| -rw-r--r-- | fs/ext4/ext4.h | 91 | ||||
| -rw-r--r-- | fs/ext4/ext4_extents.h | 4 | ||||
| -rw-r--r-- | fs/ext4/ext4_jbd2.c | 9 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 112 | ||||
| -rw-r--r-- | fs/ext4/fsync.c | 13 | ||||
| -rw-r--r-- | fs/ext4/ialloc.c | 2 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 150 | ||||
| -rw-r--r-- | fs/ext4/ioctl.c | 7 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 429 | ||||
| -rw-r--r-- | fs/ext4/mballoc.h | 22 | ||||
| -rw-r--r-- | fs/ext4/migrate.c | 22 | ||||
| -rw-r--r-- | fs/ext4/move_extent.c | 334 | ||||
| -rw-r--r-- | fs/ext4/namei.c | 22 | ||||
| -rw-r--r-- | fs/ext4/resize.c | 7 | ||||
| -rw-r--r-- | fs/ext4/super.c | 155 | ||||
| -rw-r--r-- | fs/ext4/xattr.c | 15 | ||||
| -rw-r--r-- | fs/jbd2/commit.c | 11 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 6 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 7 | ||||
| -rw-r--r-- | include/linux/jbd2.h | 2 | ||||
| -rw-r--r-- | include/trace/events/ext4.h | 142 | ||||
| -rw-r--r-- | include/trace/events/jbd2.h | 2 |
25 files changed, 1000 insertions, 601 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 7be02ac5fa36..18b5ec8cea45 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
| @@ -134,15 +134,9 @@ ro Mount filesystem read only. Note that ext4 will | |||
| 134 | mount options "ro,noload" can be used to prevent | 134 | mount options "ro,noload" can be used to prevent |
| 135 | writes to the filesystem. | 135 | writes to the filesystem. |
| 136 | 136 | ||
| 137 | journal_checksum Enable checksumming of the journal transactions. | ||
| 138 | This will allow the recovery code in e2fsck and the | ||
| 139 | kernel to detect corruption in the kernel. It is a | ||
| 140 | compatible change and will be ignored by older kernels. | ||
| 141 | |||
| 142 | journal_async_commit Commit block can be written to disk without waiting | 137 | journal_async_commit Commit block can be written to disk without waiting |
| 143 | for descriptor blocks. If enabled older kernels cannot | 138 | for descriptor blocks. If enabled older kernels cannot |
| 144 | mount the device. This will enable 'journal_checksum' | 139 | mount the device. |
| 145 | internally. | ||
| 146 | 140 | ||
| 147 | journal=update Update the ext4 file system's journal to the current | 141 | journal=update Update the ext4 file system's journal to the current |
| 148 | format. | 142 | format. |
| @@ -263,10 +257,18 @@ resuid=n The user ID which may use the reserved blocks. | |||
| 263 | 257 | ||
| 264 | sb=n Use alternate superblock at this location. | 258 | sb=n Use alternate superblock at this location. |
| 265 | 259 | ||
| 266 | quota | 260 | quota These options are ignored by the filesystem. They |
| 267 | noquota | 261 | noquota are used only by quota tools to recognize volumes |
| 268 | grpquota | 262 | grpquota where quota should be turned on. See documentation |
| 269 | usrquota | 263 | usrquota in the quota-tools package for more details |
| 264 | (http://sourceforge.net/projects/linuxquota). | ||
| 265 | |||
| 266 | jqfmt=<quota type> These options tell filesystem details about quota | ||
| 267 | usrjquota=<file> so that quota information can be properly updated | ||
| 268 | grpjquota=<file> during journal replay. They replace the above | ||
| 269 | quota options. See documentation in the quota-tools | ||
| 270 | package for more details | ||
| 271 | (http://sourceforge.net/projects/linuxquota). | ||
| 270 | 272 | ||
| 271 | bh (*) ext4 associates buffer heads to data pages to | 273 | bh (*) ext4 associates buffer heads to data pages to |
| 272 | nobh (a) cache disk block mapping information | 274 | nobh (a) cache disk block mapping information |
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 418b6f3b0ae8..d5c0ea2e8f2d 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
| @@ -37,7 +37,7 @@ config EXT4DEV_COMPAT | |||
| 37 | 37 | ||
| 38 | To enable backwards compatibility so that systems that are | 38 | To enable backwards compatibility so that systems that are |
| 39 | still expecting to mount ext4 filesystems using ext4dev, | 39 | still expecting to mount ext4 filesystems using ext4dev, |
| 40 | chose Y here. This feature will go away by 2.6.31, so | 40 | choose Y here. This feature will go away by 2.6.31, so |
| 41 | please arrange to get your userspace programs fixed! | 41 | please arrange to get your userspace programs fixed! |
| 42 | 42 | ||
| 43 | config EXT4_FS_XATTR | 43 | config EXT4_FS_XATTR |
| @@ -77,3 +77,12 @@ config EXT4_FS_SECURITY | |||
| 77 | 77 | ||
| 78 | If you are not using a security module that requires using | 78 | If you are not using a security module that requires using |
| 79 | extended attributes for file security labels, say N. | 79 | extended attributes for file security labels, say N. |
| 80 | |||
| 81 | config EXT4_DEBUG | ||
| 82 | bool "EXT4 debugging support" | ||
| 83 | depends on EXT4_FS | ||
| 84 | help | ||
| 85 | Enables run-time debugging support for the ext4 filesystem. | ||
| 86 | |||
| 87 | If you select Y here, then you will be able to turn on debugging | ||
| 88 | with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e2126d70dff5..1d0418980f8d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
| 478 | * new bitmap information | 478 | * new bitmap information |
| 479 | */ | 479 | */ |
| 480 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | 480 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
| 481 | ext4_mb_update_group_info(grp, blocks_freed); | 481 | grp->bb_free += blocks_freed; |
| 482 | up_write(&grp->alloc_sem); | 482 | up_write(&grp->alloc_sem); |
| 483 | 483 | ||
| 484 | /* We dirtied the bitmap block */ | 484 | /* We dirtied the bitmap block */ |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9714db393efe..e227eea23f05 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t; | |||
| 67 | 67 | ||
| 68 | 68 | ||
| 69 | /* prefer goal again. length */ | 69 | /* prefer goal again. length */ |
| 70 | #define EXT4_MB_HINT_MERGE 1 | 70 | #define EXT4_MB_HINT_MERGE 0x0001 |
| 71 | /* blocks already reserved */ | 71 | /* blocks already reserved */ |
| 72 | #define EXT4_MB_HINT_RESERVED 2 | 72 | #define EXT4_MB_HINT_RESERVED 0x0002 |
| 73 | /* metadata is being allocated */ | 73 | /* metadata is being allocated */ |
| 74 | #define EXT4_MB_HINT_METADATA 4 | 74 | #define EXT4_MB_HINT_METADATA 0x0004 |
| 75 | /* first blocks in the file */ | 75 | /* first blocks in the file */ |
| 76 | #define EXT4_MB_HINT_FIRST 8 | 76 | #define EXT4_MB_HINT_FIRST 0x0008 |
| 77 | /* search for the best chunk */ | 77 | /* search for the best chunk */ |
| 78 | #define EXT4_MB_HINT_BEST 16 | 78 | #define EXT4_MB_HINT_BEST 0x0010 |
| 79 | /* data is being allocated */ | 79 | /* data is being allocated */ |
| 80 | #define EXT4_MB_HINT_DATA 32 | 80 | #define EXT4_MB_HINT_DATA 0x0020 |
| 81 | /* don't preallocate (for tails) */ | 81 | /* don't preallocate (for tails) */ |
| 82 | #define EXT4_MB_HINT_NOPREALLOC 64 | 82 | #define EXT4_MB_HINT_NOPREALLOC 0x0040 |
| 83 | /* allocate for locality group */ | 83 | /* allocate for locality group */ |
| 84 | #define EXT4_MB_HINT_GROUP_ALLOC 128 | 84 | #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 |
| 85 | /* allocate goal blocks or none */ | 85 | /* allocate goal blocks or none */ |
| 86 | #define EXT4_MB_HINT_GOAL_ONLY 256 | 86 | #define EXT4_MB_HINT_GOAL_ONLY 0x0100 |
| 87 | /* goal is meaningful */ | 87 | /* goal is meaningful */ |
| 88 | #define EXT4_MB_HINT_TRY_GOAL 512 | 88 | #define EXT4_MB_HINT_TRY_GOAL 0x0200 |
| 89 | /* blocks already pre-reserved by delayed allocation */ | 89 | /* blocks already pre-reserved by delayed allocation */ |
| 90 | #define EXT4_MB_DELALLOC_RESERVED 1024 | 90 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 |
| 91 | /* We are doing stream allocation */ | ||
| 92 | #define EXT4_MB_STREAM_ALLOC 0x0800 | ||
| 91 | 93 | ||
| 92 | 94 | ||
| 93 | struct ext4_allocation_request { | 95 | struct ext4_allocation_request { |
| @@ -112,6 +114,21 @@ struct ext4_allocation_request { | |||
| 112 | }; | 114 | }; |
| 113 | 115 | ||
| 114 | /* | 116 | /* |
| 117 | * For delayed allocation tracking | ||
| 118 | */ | ||
| 119 | struct mpage_da_data { | ||
| 120 | struct inode *inode; | ||
| 121 | sector_t b_blocknr; /* start block number of extent */ | ||
| 122 | size_t b_size; /* size of extent */ | ||
| 123 | unsigned long b_state; /* state of the extent */ | ||
| 124 | unsigned long first_page, next_page; /* extent of pages */ | ||
| 125 | struct writeback_control *wbc; | ||
| 126 | int io_done; | ||
| 127 | int pages_written; | ||
| 128 | int retval; | ||
| 129 | }; | ||
| 130 | |||
| 131 | /* | ||
| 115 | * Special inodes numbers | 132 | * Special inodes numbers |
| 116 | */ | 133 | */ |
| 117 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ | 134 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ |
| @@ -251,7 +268,6 @@ struct flex_groups { | |||
| 251 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 268 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
| 252 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ | 269 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ |
| 253 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ | 270 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ |
| 254 | #define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ | ||
| 255 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 271 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
| 256 | 272 | ||
| 257 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 273 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
| @@ -289,6 +305,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | |||
| 289 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ | 305 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ |
| 290 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ | 306 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ |
| 291 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ | 307 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ |
| 308 | #define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ | ||
| 292 | 309 | ||
| 293 | /* Used to pass group descriptor data when online resize is done */ | 310 | /* Used to pass group descriptor data when online resize is done */ |
| 294 | struct ext4_new_group_input { | 311 | struct ext4_new_group_input { |
| @@ -386,6 +403,9 @@ struct ext4_mount_options { | |||
| 386 | #endif | 403 | #endif |
| 387 | }; | 404 | }; |
| 388 | 405 | ||
| 406 | /* Max physical block we can addres w/o extents */ | ||
| 407 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF | ||
| 408 | |||
| 389 | /* | 409 | /* |
| 390 | * Structure of an inode on the disk | 410 | * Structure of an inode on the disk |
| 391 | */ | 411 | */ |
| @@ -456,7 +476,6 @@ struct move_extent { | |||
| 456 | __u64 len; /* block length to be moved */ | 476 | __u64 len; /* block length to be moved */ |
| 457 | __u64 moved_len; /* moved block length */ | 477 | __u64 moved_len; /* moved block length */ |
| 458 | }; | 478 | }; |
| 459 | #define MAX_DEFRAG_SIZE ((1UL<<31) - 1) | ||
| 460 | 479 | ||
| 461 | #define EXT4_EPOCH_BITS 2 | 480 | #define EXT4_EPOCH_BITS 2 |
| 462 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) | 481 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) |
| @@ -694,7 +713,6 @@ struct ext4_inode_info { | |||
| 694 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ | 713 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
| 695 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 714 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
| 696 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 715 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
| 697 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | ||
| 698 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 716 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
| 699 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 717 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
| 700 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 718 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
| @@ -841,6 +859,7 @@ struct ext4_sb_info { | |||
| 841 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | 859 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ |
| 842 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | 860 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ |
| 843 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | 861 | ext4_group_t s_groups_count; /* Number of groups in the fs */ |
| 862 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ | ||
| 844 | unsigned long s_overhead_last; /* Last calculated overhead */ | 863 | unsigned long s_overhead_last; /* Last calculated overhead */ |
| 845 | unsigned long s_blocks_last; /* Last seen block count */ | 864 | unsigned long s_blocks_last; /* Last seen block count */ |
| 846 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 865 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
| @@ -950,6 +969,7 @@ struct ext4_sb_info { | |||
| 950 | atomic_t s_mb_lost_chunks; | 969 | atomic_t s_mb_lost_chunks; |
| 951 | atomic_t s_mb_preallocated; | 970 | atomic_t s_mb_preallocated; |
| 952 | atomic_t s_mb_discarded; | 971 | atomic_t s_mb_discarded; |
| 972 | atomic_t s_lock_busy; | ||
| 953 | 973 | ||
| 954 | /* locality groups */ | 974 | /* locality groups */ |
| 955 | struct ext4_locality_group *s_locality_groups; | 975 | struct ext4_locality_group *s_locality_groups; |
| @@ -1340,8 +1360,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *, | |||
| 1340 | ext4_fsblk_t, unsigned long, int, unsigned long *); | 1360 | ext4_fsblk_t, unsigned long, int, unsigned long *); |
| 1341 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1361 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
| 1342 | ext4_group_t i, struct ext4_group_desc *desc); | 1362 | ext4_group_t i, struct ext4_group_desc *desc); |
| 1343 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | ||
| 1344 | ext4_grpblk_t add); | ||
| 1345 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1363 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); |
| 1346 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1364 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, |
| 1347 | ext4_group_t, int); | 1365 | ext4_group_t, int); |
| @@ -1367,6 +1385,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
| 1367 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1385 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
| 1368 | extern int ext4_can_truncate(struct inode *inode); | 1386 | extern int ext4_can_truncate(struct inode *inode); |
| 1369 | extern void ext4_truncate(struct inode *); | 1387 | extern void ext4_truncate(struct inode *); |
| 1388 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | ||
| 1370 | extern void ext4_set_inode_flags(struct inode *); | 1389 | extern void ext4_set_inode_flags(struct inode *); |
| 1371 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1390 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
| 1372 | extern int ext4_alloc_da_blocks(struct inode *inode); | 1391 | extern int ext4_alloc_da_blocks(struct inode *inode); |
| @@ -1575,15 +1594,18 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | |||
| 1575 | struct ext4_group_info { | 1594 | struct ext4_group_info { |
| 1576 | unsigned long bb_state; | 1595 | unsigned long bb_state; |
| 1577 | struct rb_root bb_free_root; | 1596 | struct rb_root bb_free_root; |
| 1578 | unsigned short bb_first_free; | 1597 | ext4_grpblk_t bb_first_free; /* first free block */ |
| 1579 | unsigned short bb_free; | 1598 | ext4_grpblk_t bb_free; /* total free blocks */ |
| 1580 | unsigned short bb_fragments; | 1599 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ |
| 1581 | struct list_head bb_prealloc_list; | 1600 | struct list_head bb_prealloc_list; |
| 1582 | #ifdef DOUBLE_CHECK | 1601 | #ifdef DOUBLE_CHECK |
| 1583 | void *bb_bitmap; | 1602 | void *bb_bitmap; |
| 1584 | #endif | 1603 | #endif |
| 1585 | struct rw_semaphore alloc_sem; | 1604 | struct rw_semaphore alloc_sem; |
| 1586 | unsigned short bb_counters[]; | 1605 | ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block |
| 1606 | * regions, index is order. | ||
| 1607 | * bb_counters[3] = 5 means | ||
| 1608 | * 5 free 8-block regions. */ | ||
| 1587 | }; | 1609 | }; |
| 1588 | 1610 | ||
| 1589 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 1611 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
| @@ -1591,15 +1613,42 @@ struct ext4_group_info { | |||
| 1591 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 1613 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
| 1592 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 1614 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
| 1593 | 1615 | ||
| 1616 | #define EXT4_MAX_CONTENTION 8 | ||
| 1617 | #define EXT4_CONTENTION_THRESHOLD 2 | ||
| 1618 | |||
| 1594 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, | 1619 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, |
| 1595 | ext4_group_t group) | 1620 | ext4_group_t group) |
| 1596 | { | 1621 | { |
| 1597 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); | 1622 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); |
| 1598 | } | 1623 | } |
| 1599 | 1624 | ||
| 1625 | /* | ||
| 1626 | * Returns true if the filesystem is busy enough that attempts to | ||
| 1627 | * access the block group locks has run into contention. | ||
| 1628 | */ | ||
| 1629 | static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) | ||
| 1630 | { | ||
| 1631 | return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); | ||
| 1632 | } | ||
| 1633 | |||
| 1600 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 1634 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
| 1601 | { | 1635 | { |
| 1602 | spin_lock(ext4_group_lock_ptr(sb, group)); | 1636 | spinlock_t *lock = ext4_group_lock_ptr(sb, group); |
| 1637 | if (spin_trylock(lock)) | ||
| 1638 | /* | ||
| 1639 | * We're able to grab the lock right away, so drop the | ||
| 1640 | * lock contention counter. | ||
| 1641 | */ | ||
| 1642 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); | ||
| 1643 | else { | ||
| 1644 | /* | ||
| 1645 | * The lock is busy, so bump the contention counter, | ||
| 1646 | * and then wait on the spin lock. | ||
| 1647 | */ | ||
| 1648 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, | ||
| 1649 | EXT4_MAX_CONTENTION); | ||
| 1650 | spin_lock(lock); | ||
| 1651 | } | ||
| 1603 | } | 1652 | } |
| 1604 | 1653 | ||
| 1605 | static inline void ext4_unlock_group(struct super_block *sb, | 1654 | static inline void ext4_unlock_group(struct super_block *sb, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 20a84105a10b..61652f1d15e6 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
| @@ -43,8 +43,7 @@ | |||
| 43 | #define CHECK_BINSEARCH__ | 43 | #define CHECK_BINSEARCH__ |
| 44 | 44 | ||
| 45 | /* | 45 | /* |
| 46 | * If EXT_DEBUG is defined you can use the 'extdebug' mount option | 46 | * Turn on EXT_DEBUG to get lots of info about extents operations. |
| 47 | * to get lots of info about what's going on. | ||
| 48 | */ | 47 | */ |
| 49 | #define EXT_DEBUG__ | 48 | #define EXT_DEBUG__ |
| 50 | #ifdef EXT_DEBUG | 49 | #ifdef EXT_DEBUG |
| @@ -138,6 +137,7 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | |||
| 138 | #define EXT_BREAK 1 | 137 | #define EXT_BREAK 1 |
| 139 | #define EXT_REPEAT 2 | 138 | #define EXT_REPEAT 2 |
| 140 | 139 | ||
| 140 | /* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ | ||
| 141 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
| 142 | 142 | ||
| 143 | /* | 143 | /* |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index eb27fd0f2ee8..6a9409920dee 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
| @@ -44,7 +44,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle, | |||
| 44 | handle, err); | 44 | handle, err); |
| 45 | } | 45 | } |
| 46 | else | 46 | else |
| 47 | brelse(bh); | 47 | bforget(bh); |
| 48 | return err; | 48 | return err; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| @@ -60,7 +60,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, | |||
| 60 | handle, err); | 60 | handle, err); |
| 61 | } | 61 | } |
| 62 | else | 62 | else |
| 63 | brelse(bh); | 63 | bforget(bh); |
| 64 | return err; | 64 | return err; |
| 65 | } | 65 | } |
| 66 | 66 | ||
| @@ -89,7 +89,10 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | |||
| 89 | ext4_journal_abort_handle(where, __func__, bh, | 89 | ext4_journal_abort_handle(where, __func__, bh, |
| 90 | handle, err); | 90 | handle, err); |
| 91 | } else { | 91 | } else { |
| 92 | mark_buffer_dirty(bh); | 92 | if (inode && bh) |
| 93 | mark_buffer_dirty_inode(bh, inode); | ||
| 94 | else | ||
| 95 | mark_buffer_dirty(bh); | ||
| 93 | if (inode && inode_needs_sync(inode)) { | 96 | if (inode && inode_needs_sync(inode)) { |
| 94 | sync_dirty_buffer(bh); | 97 | sync_dirty_buffer(bh); |
| 95 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | 98 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 73ebfb44ad75..7a3832577923 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -93,7 +93,9 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | |||
| 93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | 93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | static int ext4_ext_journal_restart(handle_t *handle, int needed) | 96 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
| 97 | struct inode *inode, | ||
| 98 | int needed) | ||
| 97 | { | 99 | { |
| 98 | int err; | 100 | int err; |
| 99 | 101 | ||
| @@ -104,7 +106,14 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) | |||
| 104 | err = ext4_journal_extend(handle, needed); | 106 | err = ext4_journal_extend(handle, needed); |
| 105 | if (err <= 0) | 107 | if (err <= 0) |
| 106 | return err; | 108 | return err; |
| 107 | return ext4_journal_restart(handle, needed); | 109 | err = ext4_truncate_restart_trans(handle, inode, needed); |
| 110 | /* | ||
| 111 | * We have dropped i_data_sem so someone might have cached again | ||
| 112 | * an extent we are going to truncate. | ||
| 113 | */ | ||
| 114 | ext4_ext_invalidate_cache(inode); | ||
| 115 | |||
| 116 | return err; | ||
| 108 | } | 117 | } |
| 109 | 118 | ||
| 110 | /* | 119 | /* |
| @@ -220,57 +229,65 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, | |||
| 220 | return newblock; | 229 | return newblock; |
| 221 | } | 230 | } |
| 222 | 231 | ||
| 223 | static int ext4_ext_space_block(struct inode *inode) | 232 | static inline int ext4_ext_space_block(struct inode *inode, int check) |
| 224 | { | 233 | { |
| 225 | int size; | 234 | int size; |
| 226 | 235 | ||
| 227 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 236 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
| 228 | / sizeof(struct ext4_extent); | 237 | / sizeof(struct ext4_extent); |
| 238 | if (!check) { | ||
| 229 | #ifdef AGGRESSIVE_TEST | 239 | #ifdef AGGRESSIVE_TEST |
| 230 | if (size > 6) | 240 | if (size > 6) |
| 231 | size = 6; | 241 | size = 6; |
| 232 | #endif | 242 | #endif |
| 243 | } | ||
| 233 | return size; | 244 | return size; |
| 234 | } | 245 | } |
| 235 | 246 | ||
| 236 | static int ext4_ext_space_block_idx(struct inode *inode) | 247 | static inline int ext4_ext_space_block_idx(struct inode *inode, int check) |
| 237 | { | 248 | { |
| 238 | int size; | 249 | int size; |
| 239 | 250 | ||
| 240 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 251 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
| 241 | / sizeof(struct ext4_extent_idx); | 252 | / sizeof(struct ext4_extent_idx); |
| 253 | if (!check) { | ||
| 242 | #ifdef AGGRESSIVE_TEST | 254 | #ifdef AGGRESSIVE_TEST |
| 243 | if (size > 5) | 255 | if (size > 5) |
| 244 | size = 5; | 256 | size = 5; |
| 245 | #endif | 257 | #endif |
| 258 | } | ||
| 246 | return size; | 259 | return size; |
| 247 | } | 260 | } |
| 248 | 261 | ||
| 249 | static int ext4_ext_space_root(struct inode *inode) | 262 | static inline int ext4_ext_space_root(struct inode *inode, int check) |
| 250 | { | 263 | { |
| 251 | int size; | 264 | int size; |
| 252 | 265 | ||
| 253 | size = sizeof(EXT4_I(inode)->i_data); | 266 | size = sizeof(EXT4_I(inode)->i_data); |
| 254 | size -= sizeof(struct ext4_extent_header); | 267 | size -= sizeof(struct ext4_extent_header); |
| 255 | size /= sizeof(struct ext4_extent); | 268 | size /= sizeof(struct ext4_extent); |
| 269 | if (!check) { | ||
| 256 | #ifdef AGGRESSIVE_TEST | 270 | #ifdef AGGRESSIVE_TEST |
| 257 | if (size > 3) | 271 | if (size > 3) |
| 258 | size = 3; | 272 | size = 3; |
| 259 | #endif | 273 | #endif |
| 274 | } | ||
| 260 | return size; | 275 | return size; |
| 261 | } | 276 | } |
| 262 | 277 | ||
| 263 | static int ext4_ext_space_root_idx(struct inode *inode) | 278 | static inline int ext4_ext_space_root_idx(struct inode *inode, int check) |
| 264 | { | 279 | { |
| 265 | int size; | 280 | int size; |
| 266 | 281 | ||
| 267 | size = sizeof(EXT4_I(inode)->i_data); | 282 | size = sizeof(EXT4_I(inode)->i_data); |
| 268 | size -= sizeof(struct ext4_extent_header); | 283 | size -= sizeof(struct ext4_extent_header); |
| 269 | size /= sizeof(struct ext4_extent_idx); | 284 | size /= sizeof(struct ext4_extent_idx); |
| 285 | if (!check) { | ||
| 270 | #ifdef AGGRESSIVE_TEST | 286 | #ifdef AGGRESSIVE_TEST |
| 271 | if (size > 4) | 287 | if (size > 4) |
| 272 | size = 4; | 288 | size = 4; |
| 273 | #endif | 289 | #endif |
| 290 | } | ||
| 274 | return size; | 291 | return size; |
| 275 | } | 292 | } |
| 276 | 293 | ||
| @@ -284,9 +301,9 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) | |||
| 284 | int lcap, icap, rcap, leafs, idxs, num; | 301 | int lcap, icap, rcap, leafs, idxs, num; |
| 285 | int newextents = blocks; | 302 | int newextents = blocks; |
| 286 | 303 | ||
| 287 | rcap = ext4_ext_space_root_idx(inode); | 304 | rcap = ext4_ext_space_root_idx(inode, 0); |
| 288 | lcap = ext4_ext_space_block(inode); | 305 | lcap = ext4_ext_space_block(inode, 0); |
| 289 | icap = ext4_ext_space_block_idx(inode); | 306 | icap = ext4_ext_space_block_idx(inode, 0); |
| 290 | 307 | ||
| 291 | /* number of new leaf blocks needed */ | 308 | /* number of new leaf blocks needed */ |
| 292 | num = leafs = (newextents + lcap - 1) / lcap; | 309 | num = leafs = (newextents + lcap - 1) / lcap; |
| @@ -311,14 +328,14 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
| 311 | 328 | ||
| 312 | if (depth == ext_depth(inode)) { | 329 | if (depth == ext_depth(inode)) { |
| 313 | if (depth == 0) | 330 | if (depth == 0) |
| 314 | max = ext4_ext_space_root(inode); | 331 | max = ext4_ext_space_root(inode, 1); |
| 315 | else | 332 | else |
| 316 | max = ext4_ext_space_root_idx(inode); | 333 | max = ext4_ext_space_root_idx(inode, 1); |
| 317 | } else { | 334 | } else { |
| 318 | if (depth == 0) | 335 | if (depth == 0) |
| 319 | max = ext4_ext_space_block(inode); | 336 | max = ext4_ext_space_block(inode, 1); |
| 320 | else | 337 | else |
| 321 | max = ext4_ext_space_block_idx(inode); | 338 | max = ext4_ext_space_block_idx(inode, 1); |
| 322 | } | 339 | } |
| 323 | 340 | ||
| 324 | return max; | 341 | return max; |
| @@ -437,8 +454,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | |||
| 437 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), | 454 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), |
| 438 | idx_pblock(path->p_idx)); | 455 | idx_pblock(path->p_idx)); |
| 439 | } else if (path->p_ext) { | 456 | } else if (path->p_ext) { |
| 440 | ext_debug(" %d:%d:%llu ", | 457 | ext_debug(" %d:[%d]%d:%llu ", |
| 441 | le32_to_cpu(path->p_ext->ee_block), | 458 | le32_to_cpu(path->p_ext->ee_block), |
| 459 | ext4_ext_is_uninitialized(path->p_ext), | ||
| 442 | ext4_ext_get_actual_len(path->p_ext), | 460 | ext4_ext_get_actual_len(path->p_ext), |
| 443 | ext_pblock(path->p_ext)); | 461 | ext_pblock(path->p_ext)); |
| 444 | } else | 462 | } else |
| @@ -460,8 +478,11 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
| 460 | eh = path[depth].p_hdr; | 478 | eh = path[depth].p_hdr; |
| 461 | ex = EXT_FIRST_EXTENT(eh); | 479 | ex = EXT_FIRST_EXTENT(eh); |
| 462 | 480 | ||
| 481 | ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); | ||
| 482 | |||
| 463 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { | 483 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { |
| 464 | ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), | 484 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), |
| 485 | ext4_ext_is_uninitialized(ex), | ||
| 465 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 486 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); |
| 466 | } | 487 | } |
| 467 | ext_debug("\n"); | 488 | ext_debug("\n"); |
| @@ -580,9 +601,10 @@ ext4_ext_binsearch(struct inode *inode, | |||
| 580 | } | 601 | } |
| 581 | 602 | ||
| 582 | path->p_ext = l - 1; | 603 | path->p_ext = l - 1; |
| 583 | ext_debug(" -> %d:%llu:%d ", | 604 | ext_debug(" -> %d:%llu:[%d]%d ", |
| 584 | le32_to_cpu(path->p_ext->ee_block), | 605 | le32_to_cpu(path->p_ext->ee_block), |
| 585 | ext_pblock(path->p_ext), | 606 | ext_pblock(path->p_ext), |
| 607 | ext4_ext_is_uninitialized(path->p_ext), | ||
| 586 | ext4_ext_get_actual_len(path->p_ext)); | 608 | ext4_ext_get_actual_len(path->p_ext)); |
| 587 | 609 | ||
| 588 | #ifdef CHECK_BINSEARCH | 610 | #ifdef CHECK_BINSEARCH |
| @@ -612,7 +634,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) | |||
| 612 | eh->eh_depth = 0; | 634 | eh->eh_depth = 0; |
| 613 | eh->eh_entries = 0; | 635 | eh->eh_entries = 0; |
| 614 | eh->eh_magic = EXT4_EXT_MAGIC; | 636 | eh->eh_magic = EXT4_EXT_MAGIC; |
| 615 | eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); | 637 | eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); |
| 616 | ext4_mark_inode_dirty(handle, inode); | 638 | ext4_mark_inode_dirty(handle, inode); |
| 617 | ext4_ext_invalidate_cache(inode); | 639 | ext4_ext_invalidate_cache(inode); |
| 618 | return 0; | 640 | return 0; |
| @@ -837,7 +859,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
| 837 | 859 | ||
| 838 | neh = ext_block_hdr(bh); | 860 | neh = ext_block_hdr(bh); |
| 839 | neh->eh_entries = 0; | 861 | neh->eh_entries = 0; |
| 840 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); | 862 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
| 841 | neh->eh_magic = EXT4_EXT_MAGIC; | 863 | neh->eh_magic = EXT4_EXT_MAGIC; |
| 842 | neh->eh_depth = 0; | 864 | neh->eh_depth = 0; |
| 843 | ex = EXT_FIRST_EXTENT(neh); | 865 | ex = EXT_FIRST_EXTENT(neh); |
| @@ -850,9 +872,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
| 850 | path[depth].p_ext++; | 872 | path[depth].p_ext++; |
| 851 | while (path[depth].p_ext <= | 873 | while (path[depth].p_ext <= |
| 852 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | 874 | EXT_MAX_EXTENT(path[depth].p_hdr)) { |
| 853 | ext_debug("move %d:%llu:%d in new leaf %llu\n", | 875 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", |
| 854 | le32_to_cpu(path[depth].p_ext->ee_block), | 876 | le32_to_cpu(path[depth].p_ext->ee_block), |
| 855 | ext_pblock(path[depth].p_ext), | 877 | ext_pblock(path[depth].p_ext), |
| 878 | ext4_ext_is_uninitialized(path[depth].p_ext), | ||
| 856 | ext4_ext_get_actual_len(path[depth].p_ext), | 879 | ext4_ext_get_actual_len(path[depth].p_ext), |
| 857 | newblock); | 880 | newblock); |
| 858 | /*memmove(ex++, path[depth].p_ext++, | 881 | /*memmove(ex++, path[depth].p_ext++, |
| @@ -912,7 +935,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
| 912 | neh = ext_block_hdr(bh); | 935 | neh = ext_block_hdr(bh); |
| 913 | neh->eh_entries = cpu_to_le16(1); | 936 | neh->eh_entries = cpu_to_le16(1); |
| 914 | neh->eh_magic = EXT4_EXT_MAGIC; | 937 | neh->eh_magic = EXT4_EXT_MAGIC; |
| 915 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); | 938 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); |
| 916 | neh->eh_depth = cpu_to_le16(depth - i); | 939 | neh->eh_depth = cpu_to_le16(depth - i); |
| 917 | fidx = EXT_FIRST_INDEX(neh); | 940 | fidx = EXT_FIRST_INDEX(neh); |
| 918 | fidx->ei_block = border; | 941 | fidx->ei_block = border; |
| @@ -1037,9 +1060,9 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
| 1037 | /* old root could have indexes or leaves | 1060 | /* old root could have indexes or leaves |
| 1038 | * so calculate e_max right way */ | 1061 | * so calculate e_max right way */ |
| 1039 | if (ext_depth(inode)) | 1062 | if (ext_depth(inode)) |
| 1040 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); | 1063 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); |
| 1041 | else | 1064 | else |
| 1042 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); | 1065 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
| 1043 | neh->eh_magic = EXT4_EXT_MAGIC; | 1066 | neh->eh_magic = EXT4_EXT_MAGIC; |
| 1044 | set_buffer_uptodate(bh); | 1067 | set_buffer_uptodate(bh); |
| 1045 | unlock_buffer(bh); | 1068 | unlock_buffer(bh); |
| @@ -1054,7 +1077,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
| 1054 | goto out; | 1077 | goto out; |
| 1055 | 1078 | ||
| 1056 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; | 1079 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; |
| 1057 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); | 1080 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); |
| 1058 | curp->p_hdr->eh_entries = cpu_to_le16(1); | 1081 | curp->p_hdr->eh_entries = cpu_to_le16(1); |
| 1059 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); | 1082 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); |
| 1060 | 1083 | ||
| @@ -1580,9 +1603,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
| 1580 | 1603 | ||
| 1581 | /* try to insert block into found extent and return */ | 1604 | /* try to insert block into found extent and return */ |
| 1582 | if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { | 1605 | if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { |
| 1583 | ext_debug("append %d block to %d:%d (from %llu)\n", | 1606 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
| 1607 | ext4_ext_is_uninitialized(newext), | ||
| 1584 | ext4_ext_get_actual_len(newext), | 1608 | ext4_ext_get_actual_len(newext), |
| 1585 | le32_to_cpu(ex->ee_block), | 1609 | le32_to_cpu(ex->ee_block), |
| 1610 | ext4_ext_is_uninitialized(ex), | ||
| 1586 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 1611 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); |
| 1587 | err = ext4_ext_get_access(handle, inode, path + depth); | 1612 | err = ext4_ext_get_access(handle, inode, path + depth); |
| 1588 | if (err) | 1613 | if (err) |
| @@ -1651,9 +1676,10 @@ has_space: | |||
| 1651 | 1676 | ||
| 1652 | if (!nearex) { | 1677 | if (!nearex) { |
| 1653 | /* there is no extent in this leaf, create first one */ | 1678 | /* there is no extent in this leaf, create first one */ |
| 1654 | ext_debug("first extent in the leaf: %d:%llu:%d\n", | 1679 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", |
| 1655 | le32_to_cpu(newext->ee_block), | 1680 | le32_to_cpu(newext->ee_block), |
| 1656 | ext_pblock(newext), | 1681 | ext_pblock(newext), |
| 1682 | ext4_ext_is_uninitialized(newext), | ||
| 1657 | ext4_ext_get_actual_len(newext)); | 1683 | ext4_ext_get_actual_len(newext)); |
| 1658 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1684 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); |
| 1659 | } else if (le32_to_cpu(newext->ee_block) | 1685 | } else if (le32_to_cpu(newext->ee_block) |
| @@ -1663,10 +1689,11 @@ has_space: | |||
| 1663 | len = EXT_MAX_EXTENT(eh) - nearex; | 1689 | len = EXT_MAX_EXTENT(eh) - nearex; |
| 1664 | len = (len - 1) * sizeof(struct ext4_extent); | 1690 | len = (len - 1) * sizeof(struct ext4_extent); |
| 1665 | len = len < 0 ? 0 : len; | 1691 | len = len < 0 ? 0 : len; |
| 1666 | ext_debug("insert %d:%llu:%d after: nearest 0x%p, " | 1692 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " |
| 1667 | "move %d from 0x%p to 0x%p\n", | 1693 | "move %d from 0x%p to 0x%p\n", |
| 1668 | le32_to_cpu(newext->ee_block), | 1694 | le32_to_cpu(newext->ee_block), |
| 1669 | ext_pblock(newext), | 1695 | ext_pblock(newext), |
| 1696 | ext4_ext_is_uninitialized(newext), | ||
| 1670 | ext4_ext_get_actual_len(newext), | 1697 | ext4_ext_get_actual_len(newext), |
| 1671 | nearex, len, nearex + 1, nearex + 2); | 1698 | nearex, len, nearex + 1, nearex + 2); |
| 1672 | memmove(nearex + 2, nearex + 1, len); | 1699 | memmove(nearex + 2, nearex + 1, len); |
| @@ -1676,10 +1703,11 @@ has_space: | |||
| 1676 | BUG_ON(newext->ee_block == nearex->ee_block); | 1703 | BUG_ON(newext->ee_block == nearex->ee_block); |
| 1677 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); | 1704 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); |
| 1678 | len = len < 0 ? 0 : len; | 1705 | len = len < 0 ? 0 : len; |
| 1679 | ext_debug("insert %d:%llu:%d before: nearest 0x%p, " | 1706 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " |
| 1680 | "move %d from 0x%p to 0x%p\n", | 1707 | "move %d from 0x%p to 0x%p\n", |
| 1681 | le32_to_cpu(newext->ee_block), | 1708 | le32_to_cpu(newext->ee_block), |
| 1682 | ext_pblock(newext), | 1709 | ext_pblock(newext), |
| 1710 | ext4_ext_is_uninitialized(newext), | ||
| 1683 | ext4_ext_get_actual_len(newext), | 1711 | ext4_ext_get_actual_len(newext), |
| 1684 | nearex, len, nearex + 1, nearex + 2); | 1712 | nearex, len, nearex + 1, nearex + 2); |
| 1685 | memmove(nearex + 1, nearex, len); | 1713 | memmove(nearex + 1, nearex, len); |
| @@ -2094,7 +2122,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2094 | else | 2122 | else |
| 2095 | uninitialized = 0; | 2123 | uninitialized = 0; |
| 2096 | 2124 | ||
| 2097 | ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); | 2125 | ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, |
| 2126 | uninitialized, ex_ee_len); | ||
| 2098 | path[depth].p_ext = ex; | 2127 | path[depth].p_ext = ex; |
| 2099 | 2128 | ||
| 2100 | a = ex_ee_block > start ? ex_ee_block : start; | 2129 | a = ex_ee_block > start ? ex_ee_block : start; |
| @@ -2138,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2138 | } | 2167 | } |
| 2139 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 2168 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
| 2140 | 2169 | ||
| 2141 | err = ext4_ext_journal_restart(handle, credits); | 2170 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); |
| 2142 | if (err) | 2171 | if (err) |
| 2143 | goto out; | 2172 | goto out; |
| 2144 | 2173 | ||
| @@ -2327,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
| 2327 | if (err == 0) { | 2356 | if (err == 0) { |
| 2328 | ext_inode_hdr(inode)->eh_depth = 0; | 2357 | ext_inode_hdr(inode)->eh_depth = 0; |
| 2329 | ext_inode_hdr(inode)->eh_max = | 2358 | ext_inode_hdr(inode)->eh_max = |
| 2330 | cpu_to_le16(ext4_ext_space_root(inode)); | 2359 | cpu_to_le16(ext4_ext_space_root(inode, 0)); |
| 2331 | err = ext4_ext_dirty(handle, inode, path); | 2360 | err = ext4_ext_dirty(handle, inode, path); |
| 2332 | } | 2361 | } |
| 2333 | } | 2362 | } |
| @@ -2743,6 +2772,7 @@ insert: | |||
| 2743 | } else if (err) | 2772 | } else if (err) |
| 2744 | goto fix_extent_len; | 2773 | goto fix_extent_len; |
| 2745 | out: | 2774 | out: |
| 2775 | ext4_ext_show_leaf(inode, path); | ||
| 2746 | return err ? err : allocated; | 2776 | return err ? err : allocated; |
| 2747 | 2777 | ||
| 2748 | fix_extent_len: | 2778 | fix_extent_len: |
| @@ -2786,7 +2816,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2786 | struct ext4_allocation_request ar; | 2816 | struct ext4_allocation_request ar; |
| 2787 | 2817 | ||
| 2788 | __clear_bit(BH_New, &bh_result->b_state); | 2818 | __clear_bit(BH_New, &bh_result->b_state); |
| 2789 | ext_debug("blocks %u/%u requested for inode %u\n", | 2819 | ext_debug("blocks %u/%u requested for inode %lu\n", |
| 2790 | iblock, max_blocks, inode->i_ino); | 2820 | iblock, max_blocks, inode->i_ino); |
| 2791 | 2821 | ||
| 2792 | /* check in cache */ | 2822 | /* check in cache */ |
| @@ -2849,7 +2879,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2849 | newblock = iblock - ee_block + ee_start; | 2879 | newblock = iblock - ee_block + ee_start; |
| 2850 | /* number of remaining blocks in the extent */ | 2880 | /* number of remaining blocks in the extent */ |
| 2851 | allocated = ee_len - (iblock - ee_block); | 2881 | allocated = ee_len - (iblock - ee_block); |
| 2852 | ext_debug("%u fit into %lu:%d -> %llu\n", iblock, | 2882 | ext_debug("%u fit into %u:%d -> %llu\n", iblock, |
| 2853 | ee_block, ee_len, newblock); | 2883 | ee_block, ee_len, newblock); |
| 2854 | 2884 | ||
| 2855 | /* Do not put uninitialized extent in the cache */ | 2885 | /* Do not put uninitialized extent in the cache */ |
| @@ -2950,7 +2980,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2950 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | 2980 | newblock = ext4_mb_new_blocks(handle, &ar, &err); |
| 2951 | if (!newblock) | 2981 | if (!newblock) |
| 2952 | goto out2; | 2982 | goto out2; |
| 2953 | ext_debug("allocate new block: goal %llu, found %llu/%lu\n", | 2983 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", |
| 2954 | ar.goal, newblock, allocated); | 2984 | ar.goal, newblock, allocated); |
| 2955 | 2985 | ||
| 2956 | /* try to insert new extent into found leaf and return */ | 2986 | /* try to insert new extent into found leaf and return */ |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 83cf6415f599..07475740b512 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
| @@ -50,7 +50,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 50 | { | 50 | { |
| 51 | struct inode *inode = dentry->d_inode; | 51 | struct inode *inode = dentry->d_inode; |
| 52 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 52 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
| 53 | int ret = 0; | 53 | int err, ret = 0; |
| 54 | 54 | ||
| 55 | J_ASSERT(ext4_journal_current_handle() == NULL); | 55 | J_ASSERT(ext4_journal_current_handle() == NULL); |
| 56 | 56 | ||
| @@ -79,6 +79,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 79 | goto out; | 79 | goto out; |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | if (!journal) | ||
| 83 | ret = sync_mapping_buffers(inode->i_mapping); | ||
| 84 | |||
| 82 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 85 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
| 83 | goto out; | 86 | goto out; |
| 84 | 87 | ||
| @@ -91,10 +94,12 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 91 | .sync_mode = WB_SYNC_ALL, | 94 | .sync_mode = WB_SYNC_ALL, |
| 92 | .nr_to_write = 0, /* sys_fsync did this */ | 95 | .nr_to_write = 0, /* sys_fsync did this */ |
| 93 | }; | 96 | }; |
| 94 | ret = sync_inode(inode, &wbc); | 97 | err = sync_inode(inode, &wbc); |
| 95 | if (journal && (journal->j_flags & JBD2_BARRIER)) | 98 | if (ret == 0) |
| 96 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 99 | ret = err; |
| 97 | } | 100 | } |
| 98 | out: | 101 | out: |
| 102 | if (journal && (journal->j_flags & JBD2_BARRIER)) | ||
| 103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
| 99 | return ret; | 104 | return ret; |
| 100 | } | 105 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 29e6dc7299b8..f3624ead4f6c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -1189,7 +1189,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
| 1189 | 1189 | ||
| 1190 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); | 1190 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); |
| 1191 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", | 1191 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", |
| 1192 | i, ext4_free_inodes_count(sb, gdp), x); | 1192 | (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); |
| 1193 | bitmap_count += x; | 1193 | bitmap_count += x; |
| 1194 | } | 1194 | } |
| 1195 | brelse(bitmap_bh); | 1195 | brelse(bitmap_bh); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9c642b22efa..4abd683b963d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | |||
| 192 | * so before we call here everything must be consistently dirtied against | 192 | * so before we call here everything must be consistently dirtied against |
| 193 | * this transaction. | 193 | * this transaction. |
| 194 | */ | 194 | */ |
| 195 | static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | 195 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, |
| 196 | int nblocks) | ||
| 196 | { | 197 | { |
| 198 | int ret; | ||
| 199 | |||
| 200 | /* | ||
| 201 | * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this | ||
| 202 | * moment, get_block can be called only for blocks inside i_size since | ||
| 203 | * page cache has been already dropped and writes are blocked by | ||
| 204 | * i_mutex. So we can safely drop the i_data_sem here. | ||
| 205 | */ | ||
| 197 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 206 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
| 198 | jbd_debug(2, "restarting handle %p\n", handle); | 207 | jbd_debug(2, "restarting handle %p\n", handle); |
| 199 | return ext4_journal_restart(handle, blocks_for_truncate(inode)); | 208 | up_write(&EXT4_I(inode)->i_data_sem); |
| 209 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | ||
| 210 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 211 | |||
| 212 | return ret; | ||
| 200 | } | 213 | } |
| 201 | 214 | ||
| 202 | /* | 215 | /* |
| @@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
| 341 | int n = 0; | 354 | int n = 0; |
| 342 | int final = 0; | 355 | int final = 0; |
| 343 | 356 | ||
| 344 | if (i_block < 0) { | 357 | if (i_block < direct_blocks) { |
| 345 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); | ||
| 346 | } else if (i_block < direct_blocks) { | ||
| 347 | offsets[n++] = i_block; | 358 | offsets[n++] = i_block; |
| 348 | final = direct_blocks; | 359 | final = direct_blocks; |
| 349 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | 360 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
| @@ -551,15 +562,21 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
| 551 | * | 562 | * |
| 552 | * Normally this function find the preferred place for block allocation, | 563 | * Normally this function find the preferred place for block allocation, |
| 553 | * returns it. | 564 | * returns it. |
| 565 | * Because this is only used for non-extent files, we limit the block nr | ||
| 566 | * to 32 bits. | ||
| 554 | */ | 567 | */ |
| 555 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 568 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
| 556 | Indirect *partial) | 569 | Indirect *partial) |
| 557 | { | 570 | { |
| 571 | ext4_fsblk_t goal; | ||
| 572 | |||
| 558 | /* | 573 | /* |
| 559 | * XXX need to get goal block from mballoc's data structures | 574 | * XXX need to get goal block from mballoc's data structures |
| 560 | */ | 575 | */ |
| 561 | 576 | ||
| 562 | return ext4_find_near(inode, partial); | 577 | goal = ext4_find_near(inode, partial); |
| 578 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
| 579 | return goal; | ||
| 563 | } | 580 | } |
| 564 | 581 | ||
| 565 | /** | 582 | /** |
| @@ -640,6 +657,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 640 | if (*err) | 657 | if (*err) |
| 641 | goto failed_out; | 658 | goto failed_out; |
| 642 | 659 | ||
| 660 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | ||
| 661 | |||
| 643 | target -= count; | 662 | target -= count; |
| 644 | /* allocate blocks for indirect blocks */ | 663 | /* allocate blocks for indirect blocks */ |
| 645 | while (index < indirect_blks && count) { | 664 | while (index < indirect_blks && count) { |
| @@ -674,6 +693,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 674 | ar.flags = EXT4_MB_HINT_DATA; | 693 | ar.flags = EXT4_MB_HINT_DATA; |
| 675 | 694 | ||
| 676 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 695 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
| 696 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | ||
| 677 | 697 | ||
| 678 | if (*err && (target == blks)) { | 698 | if (*err && (target == blks)) { |
| 679 | /* | 699 | /* |
| @@ -762,8 +782,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 762 | BUFFER_TRACE(bh, "call get_create_access"); | 782 | BUFFER_TRACE(bh, "call get_create_access"); |
| 763 | err = ext4_journal_get_create_access(handle, bh); | 783 | err = ext4_journal_get_create_access(handle, bh); |
| 764 | if (err) { | 784 | if (err) { |
| 785 | /* Don't brelse(bh) here; it's done in | ||
| 786 | * ext4_journal_forget() below */ | ||
| 765 | unlock_buffer(bh); | 787 | unlock_buffer(bh); |
| 766 | brelse(bh); | ||
| 767 | goto failed; | 788 | goto failed; |
| 768 | } | 789 | } |
| 769 | 790 | ||
| @@ -1109,16 +1130,15 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
| 1109 | ext4_discard_preallocations(inode); | 1130 | ext4_discard_preallocations(inode); |
| 1110 | } | 1131 | } |
| 1111 | 1132 | ||
| 1112 | static int check_block_validity(struct inode *inode, sector_t logical, | 1133 | static int check_block_validity(struct inode *inode, const char *msg, |
| 1113 | sector_t phys, int len) | 1134 | sector_t logical, sector_t phys, int len) |
| 1114 | { | 1135 | { |
| 1115 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1136 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { |
| 1116 | ext4_error(inode->i_sb, "check_block_validity", | 1137 | ext4_error(inode->i_sb, msg, |
| 1117 | "inode #%lu logical block %llu mapped to %llu " | 1138 | "inode #%lu logical block %llu mapped to %llu " |
| 1118 | "(size %d)", inode->i_ino, | 1139 | "(size %d)", inode->i_ino, |
| 1119 | (unsigned long long) logical, | 1140 | (unsigned long long) logical, |
| 1120 | (unsigned long long) phys, len); | 1141 | (unsigned long long) phys, len); |
| 1121 | WARN_ON(1); | ||
| 1122 | return -EIO; | 1142 | return -EIO; |
| 1123 | } | 1143 | } |
| 1124 | return 0; | 1144 | return 0; |
| @@ -1170,8 +1190,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
| 1170 | up_read((&EXT4_I(inode)->i_data_sem)); | 1190 | up_read((&EXT4_I(inode)->i_data_sem)); |
| 1171 | 1191 | ||
| 1172 | if (retval > 0 && buffer_mapped(bh)) { | 1192 | if (retval > 0 && buffer_mapped(bh)) { |
| 1173 | int ret = check_block_validity(inode, block, | 1193 | int ret = check_block_validity(inode, "file system corruption", |
| 1174 | bh->b_blocknr, retval); | 1194 | block, bh->b_blocknr, retval); |
| 1175 | if (ret != 0) | 1195 | if (ret != 0) |
| 1176 | return ret; | 1196 | return ret; |
| 1177 | } | 1197 | } |
| @@ -1235,8 +1255,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
| 1235 | * i_data's format changing. Force the migrate | 1255 | * i_data's format changing. Force the migrate |
| 1236 | * to fail by clearing migrate flags | 1256 | * to fail by clearing migrate flags |
| 1237 | */ | 1257 | */ |
| 1238 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 1258 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
| 1239 | ~EXT4_EXT_MIGRATE; | ||
| 1240 | } | 1259 | } |
| 1241 | } | 1260 | } |
| 1242 | 1261 | ||
| @@ -1252,8 +1271,9 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
| 1252 | 1271 | ||
| 1253 | up_write((&EXT4_I(inode)->i_data_sem)); | 1272 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 1254 | if (retval > 0 && buffer_mapped(bh)) { | 1273 | if (retval > 0 && buffer_mapped(bh)) { |
| 1255 | int ret = check_block_validity(inode, block, | 1274 | int ret = check_block_validity(inode, "file system " |
| 1256 | bh->b_blocknr, retval); | 1275 | "corruption after allocation", |
| 1276 | block, bh->b_blocknr, retval); | ||
| 1257 | if (ret != 0) | 1277 | if (ret != 0) |
| 1258 | return ret; | 1278 | return ret; |
| 1259 | } | 1279 | } |
| @@ -1863,18 +1883,6 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
| 1863 | * Delayed allocation stuff | 1883 | * Delayed allocation stuff |
| 1864 | */ | 1884 | */ |
| 1865 | 1885 | ||
| 1866 | struct mpage_da_data { | ||
| 1867 | struct inode *inode; | ||
| 1868 | sector_t b_blocknr; /* start block number of extent */ | ||
| 1869 | size_t b_size; /* size of extent */ | ||
| 1870 | unsigned long b_state; /* state of the extent */ | ||
| 1871 | unsigned long first_page, next_page; /* extent of pages */ | ||
| 1872 | struct writeback_control *wbc; | ||
| 1873 | int io_done; | ||
| 1874 | int pages_written; | ||
| 1875 | int retval; | ||
| 1876 | }; | ||
| 1877 | |||
| 1878 | /* | 1886 | /* |
| 1879 | * mpage_da_submit_io - walks through extent of pages and try to write | 1887 | * mpage_da_submit_io - walks through extent of pages and try to write |
| 1880 | * them with writepage() call back | 1888 | * them with writepage() call back |
| @@ -2737,6 +2745,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2737 | long pages_skipped; | 2745 | long pages_skipped; |
| 2738 | int range_cyclic, cycled = 1, io_done = 0; | 2746 | int range_cyclic, cycled = 1, io_done = 0; |
| 2739 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2747 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
| 2748 | loff_t range_start = wbc->range_start; | ||
| 2740 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2749 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
| 2741 | 2750 | ||
| 2742 | trace_ext4_da_writepages(inode, wbc); | 2751 | trace_ext4_da_writepages(inode, wbc); |
| @@ -2850,6 +2859,7 @@ retry: | |||
| 2850 | mpd.io_done = 1; | 2859 | mpd.io_done = 1; |
| 2851 | ret = MPAGE_DA_EXTENT_TAIL; | 2860 | ret = MPAGE_DA_EXTENT_TAIL; |
| 2852 | } | 2861 | } |
| 2862 | trace_ext4_da_write_pages(inode, &mpd); | ||
| 2853 | wbc->nr_to_write -= mpd.pages_written; | 2863 | wbc->nr_to_write -= mpd.pages_written; |
| 2854 | 2864 | ||
| 2855 | ext4_journal_stop(handle); | 2865 | ext4_journal_stop(handle); |
| @@ -2905,6 +2915,7 @@ out_writepages: | |||
| 2905 | if (!no_nrwrite_index_update) | 2915 | if (!no_nrwrite_index_update) |
| 2906 | wbc->no_nrwrite_index_update = 0; | 2916 | wbc->no_nrwrite_index_update = 0; |
| 2907 | wbc->nr_to_write -= nr_to_writebump; | 2917 | wbc->nr_to_write -= nr_to_writebump; |
| 2918 | wbc->range_start = range_start; | ||
| 2908 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 2919 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
| 2909 | return ret; | 2920 | return ret; |
| 2910 | } | 2921 | } |
| @@ -3117,6 +3128,8 @@ out: | |||
| 3117 | */ | 3128 | */ |
| 3118 | int ext4_alloc_da_blocks(struct inode *inode) | 3129 | int ext4_alloc_da_blocks(struct inode *inode) |
| 3119 | { | 3130 | { |
| 3131 | trace_ext4_alloc_da_blocks(inode); | ||
| 3132 | |||
| 3120 | if (!EXT4_I(inode)->i_reserved_data_blocks && | 3133 | if (!EXT4_I(inode)->i_reserved_data_blocks && |
| 3121 | !EXT4_I(inode)->i_reserved_meta_blocks) | 3134 | !EXT4_I(inode)->i_reserved_meta_blocks) |
| 3122 | return 0; | 3135 | return 0; |
| @@ -3659,7 +3672,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
| 3659 | ext4_handle_dirty_metadata(handle, inode, bh); | 3672 | ext4_handle_dirty_metadata(handle, inode, bh); |
| 3660 | } | 3673 | } |
| 3661 | ext4_mark_inode_dirty(handle, inode); | 3674 | ext4_mark_inode_dirty(handle, inode); |
| 3662 | ext4_journal_test_restart(handle, inode); | 3675 | ext4_truncate_restart_trans(handle, inode, |
| 3676 | blocks_for_truncate(inode)); | ||
| 3663 | if (bh) { | 3677 | if (bh) { |
| 3664 | BUFFER_TRACE(bh, "retaking write access"); | 3678 | BUFFER_TRACE(bh, "retaking write access"); |
| 3665 | ext4_journal_get_write_access(handle, bh); | 3679 | ext4_journal_get_write_access(handle, bh); |
| @@ -3870,7 +3884,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 3870 | return; | 3884 | return; |
| 3871 | if (try_to_extend_transaction(handle, inode)) { | 3885 | if (try_to_extend_transaction(handle, inode)) { |
| 3872 | ext4_mark_inode_dirty(handle, inode); | 3886 | ext4_mark_inode_dirty(handle, inode); |
| 3873 | ext4_journal_test_restart(handle, inode); | 3887 | ext4_truncate_restart_trans(handle, inode, |
| 3888 | blocks_for_truncate(inode)); | ||
| 3874 | } | 3889 | } |
| 3875 | 3890 | ||
| 3876 | ext4_free_blocks(handle, inode, nr, 1, 1); | 3891 | ext4_free_blocks(handle, inode, nr, 1, 1); |
| @@ -3958,8 +3973,7 @@ void ext4_truncate(struct inode *inode) | |||
| 3958 | if (!ext4_can_truncate(inode)) | 3973 | if (!ext4_can_truncate(inode)) |
| 3959 | return; | 3974 | return; |
| 3960 | 3975 | ||
| 3961 | if (ei->i_disksize && inode->i_size == 0 && | 3976 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
| 3962 | !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
| 3963 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 3977 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; |
| 3964 | 3978 | ||
| 3965 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3979 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
| @@ -4533,7 +4547,8 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
| 4533 | */ | 4547 | */ |
| 4534 | static int ext4_do_update_inode(handle_t *handle, | 4548 | static int ext4_do_update_inode(handle_t *handle, |
| 4535 | struct inode *inode, | 4549 | struct inode *inode, |
| 4536 | struct ext4_iloc *iloc) | 4550 | struct ext4_iloc *iloc, |
| 4551 | int do_sync) | ||
| 4537 | { | 4552 | { |
| 4538 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); | 4553 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); |
| 4539 | struct ext4_inode_info *ei = EXT4_I(inode); | 4554 | struct ext4_inode_info *ei = EXT4_I(inode); |
| @@ -4581,8 +4596,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 4581 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 4596 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
| 4582 | goto out_brelse; | 4597 | goto out_brelse; |
| 4583 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 4598 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
| 4584 | /* clear the migrate flag in the raw_inode */ | 4599 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); |
| 4585 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); | ||
| 4586 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4600 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
| 4587 | cpu_to_le32(EXT4_OS_HURD)) | 4601 | cpu_to_le32(EXT4_OS_HURD)) |
| 4588 | raw_inode->i_file_acl_high = | 4602 | raw_inode->i_file_acl_high = |
| @@ -4635,10 +4649,22 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 4635 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 4649 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
| 4636 | } | 4650 | } |
| 4637 | 4651 | ||
| 4638 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4652 | /* |
| 4639 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 4653 | * If we're not using a journal and we were called from |
| 4640 | if (!err) | 4654 | * ext4_write_inode() to sync the inode (making do_sync true), |
| 4641 | err = rc; | 4655 | * we can just use sync_dirty_buffer() directly to do our dirty |
| 4656 | * work. Testing s_journal here is a bit redundant but it's | ||
| 4657 | * worth it to avoid potential future trouble. | ||
| 4658 | */ | ||
| 4659 | if (EXT4_SB(inode->i_sb)->s_journal == NULL && do_sync) { | ||
| 4660 | BUFFER_TRACE(bh, "call sync_dirty_buffer"); | ||
| 4661 | sync_dirty_buffer(bh); | ||
| 4662 | } else { | ||
| 4663 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
| 4664 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | ||
| 4665 | if (!err) | ||
| 4666 | err = rc; | ||
| 4667 | } | ||
| 4642 | ei->i_state &= ~EXT4_STATE_NEW; | 4668 | ei->i_state &= ~EXT4_STATE_NEW; |
| 4643 | 4669 | ||
| 4644 | out_brelse: | 4670 | out_brelse: |
| @@ -4684,19 +4710,32 @@ out_brelse: | |||
| 4684 | */ | 4710 | */ |
| 4685 | int ext4_write_inode(struct inode *inode, int wait) | 4711 | int ext4_write_inode(struct inode *inode, int wait) |
| 4686 | { | 4712 | { |
| 4713 | int err; | ||
| 4714 | |||
| 4687 | if (current->flags & PF_MEMALLOC) | 4715 | if (current->flags & PF_MEMALLOC) |
| 4688 | return 0; | 4716 | return 0; |
| 4689 | 4717 | ||
| 4690 | if (ext4_journal_current_handle()) { | 4718 | if (EXT4_SB(inode->i_sb)->s_journal) { |
| 4691 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); | 4719 | if (ext4_journal_current_handle()) { |
| 4692 | dump_stack(); | 4720 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); |
| 4693 | return -EIO; | 4721 | dump_stack(); |
| 4694 | } | 4722 | return -EIO; |
| 4723 | } | ||
| 4695 | 4724 | ||
| 4696 | if (!wait) | 4725 | if (!wait) |
| 4697 | return 0; | 4726 | return 0; |
| 4727 | |||
| 4728 | err = ext4_force_commit(inode->i_sb); | ||
| 4729 | } else { | ||
| 4730 | struct ext4_iloc iloc; | ||
| 4698 | 4731 | ||
| 4699 | return ext4_force_commit(inode->i_sb); | 4732 | err = ext4_get_inode_loc(inode, &iloc); |
| 4733 | if (err) | ||
| 4734 | return err; | ||
| 4735 | err = ext4_do_update_inode(EXT4_NOJOURNAL_HANDLE, | ||
| 4736 | inode, &iloc, wait); | ||
| 4737 | } | ||
| 4738 | return err; | ||
| 4700 | } | 4739 | } |
| 4701 | 4740 | ||
| 4702 | /* | 4741 | /* |
| @@ -4990,7 +5029,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
| 4990 | get_bh(iloc->bh); | 5029 | get_bh(iloc->bh); |
| 4991 | 5030 | ||
| 4992 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ | 5031 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ |
| 4993 | err = ext4_do_update_inode(handle, inode, iloc); | 5032 | err = ext4_do_update_inode(handle, inode, iloc, 0); |
| 4994 | put_bh(iloc->bh); | 5033 | put_bh(iloc->bh); |
| 4995 | return err; | 5034 | return err; |
| 4996 | } | 5035 | } |
| @@ -5281,12 +5320,21 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 5281 | else | 5320 | else |
| 5282 | len = PAGE_CACHE_SIZE; | 5321 | len = PAGE_CACHE_SIZE; |
| 5283 | 5322 | ||
| 5323 | lock_page(page); | ||
| 5324 | /* | ||
| 5325 | * return if we have all the buffers mapped. This avoid | ||
| 5326 | * the need to call write_begin/write_end which does a | ||
| 5327 | * journal_start/journal_stop which can block and take | ||
| 5328 | * long time | ||
| 5329 | */ | ||
| 5284 | if (page_has_buffers(page)) { | 5330 | if (page_has_buffers(page)) { |
| 5285 | /* return if we have all the buffers mapped */ | ||
| 5286 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5331 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
| 5287 | ext4_bh_unmapped)) | 5332 | ext4_bh_unmapped)) { |
| 5333 | unlock_page(page); | ||
| 5288 | goto out_unlock; | 5334 | goto out_unlock; |
| 5335 | } | ||
| 5289 | } | 5336 | } |
| 5337 | unlock_page(page); | ||
| 5290 | /* | 5338 | /* |
| 5291 | * OK, we need to fill the hole... Do write_begin write_end | 5339 | * OK, we need to fill the hole... Do write_begin write_end |
| 5292 | * to do block allocation/reservation.We are not holding | 5340 | * to do block allocation/reservation.We are not holding |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7050a9cd04a4..c1cdf613e725 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
| @@ -243,10 +243,9 @@ setversion_out: | |||
| 243 | me.donor_start, me.len, &me.moved_len); | 243 | me.donor_start, me.len, &me.moved_len); |
| 244 | fput(donor_filp); | 244 | fput(donor_filp); |
| 245 | 245 | ||
| 246 | if (!err) | 246 | if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) |
| 247 | if (copy_to_user((struct move_extent *)arg, | 247 | return -EFAULT; |
| 248 | &me, sizeof(me))) | 248 | |
| 249 | return -EFAULT; | ||
| 250 | return err; | 249 | return err; |
| 251 | } | 250 | } |
| 252 | 251 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cd258463e2a9..e9c61896d605 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include "mballoc.h" | 24 | #include "mballoc.h" |
| 25 | #include <linux/debugfs.h> | ||
| 25 | #include <trace/events/ext4.h> | 26 | #include <trace/events/ext4.h> |
| 26 | 27 | ||
| 27 | /* | 28 | /* |
| @@ -622,13 +623,13 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
| 622 | 623 | ||
| 623 | /* FIXME!! need more doc */ | 624 | /* FIXME!! need more doc */ |
| 624 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 625 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
| 625 | void *buddy, unsigned first, int len, | 626 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
| 626 | struct ext4_group_info *grp) | 627 | struct ext4_group_info *grp) |
| 627 | { | 628 | { |
| 628 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 629 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 629 | unsigned short min; | 630 | ext4_grpblk_t min; |
| 630 | unsigned short max; | 631 | ext4_grpblk_t max; |
| 631 | unsigned short chunk; | 632 | ext4_grpblk_t chunk; |
| 632 | unsigned short border; | 633 | unsigned short border; |
| 633 | 634 | ||
| 634 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); | 635 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); |
| @@ -662,10 +663,10 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
| 662 | void *buddy, void *bitmap, ext4_group_t group) | 663 | void *buddy, void *bitmap, ext4_group_t group) |
| 663 | { | 664 | { |
| 664 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 665 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
| 665 | unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); | 666 | ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); |
| 666 | unsigned short i = 0; | 667 | ext4_grpblk_t i = 0; |
| 667 | unsigned short first; | 668 | ext4_grpblk_t first; |
| 668 | unsigned short len; | 669 | ext4_grpblk_t len; |
| 669 | unsigned free = 0; | 670 | unsigned free = 0; |
| 670 | unsigned fragments = 0; | 671 | unsigned fragments = 0; |
| 671 | unsigned long long period = get_cycles(); | 672 | unsigned long long period = get_cycles(); |
| @@ -743,7 +744,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
| 743 | char *data; | 744 | char *data; |
| 744 | char *bitmap; | 745 | char *bitmap; |
| 745 | 746 | ||
| 746 | mb_debug("init page %lu\n", page->index); | 747 | mb_debug(1, "init page %lu\n", page->index); |
| 747 | 748 | ||
| 748 | inode = page->mapping->host; | 749 | inode = page->mapping->host; |
| 749 | sb = inode->i_sb; | 750 | sb = inode->i_sb; |
| @@ -822,7 +823,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
| 822 | set_bitmap_uptodate(bh[i]); | 823 | set_bitmap_uptodate(bh[i]); |
| 823 | bh[i]->b_end_io = end_buffer_read_sync; | 824 | bh[i]->b_end_io = end_buffer_read_sync; |
| 824 | submit_bh(READ, bh[i]); | 825 | submit_bh(READ, bh[i]); |
| 825 | mb_debug("read bitmap for group %u\n", first_group + i); | 826 | mb_debug(1, "read bitmap for group %u\n", first_group + i); |
| 826 | } | 827 | } |
| 827 | 828 | ||
| 828 | /* wait for I/O completion */ | 829 | /* wait for I/O completion */ |
| @@ -862,12 +863,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
| 862 | if ((first_block + i) & 1) { | 863 | if ((first_block + i) & 1) { |
| 863 | /* this is block of buddy */ | 864 | /* this is block of buddy */ |
| 864 | BUG_ON(incore == NULL); | 865 | BUG_ON(incore == NULL); |
| 865 | mb_debug("put buddy for group %u in page %lu/%x\n", | 866 | mb_debug(1, "put buddy for group %u in page %lu/%x\n", |
| 866 | group, page->index, i * blocksize); | 867 | group, page->index, i * blocksize); |
| 867 | grinfo = ext4_get_group_info(sb, group); | 868 | grinfo = ext4_get_group_info(sb, group); |
| 868 | grinfo->bb_fragments = 0; | 869 | grinfo->bb_fragments = 0; |
| 869 | memset(grinfo->bb_counters, 0, | 870 | memset(grinfo->bb_counters, 0, |
| 870 | sizeof(unsigned short)*(sb->s_blocksize_bits+2)); | 871 | sizeof(*grinfo->bb_counters) * |
| 872 | (sb->s_blocksize_bits+2)); | ||
| 871 | /* | 873 | /* |
| 872 | * incore got set to the group block bitmap below | 874 | * incore got set to the group block bitmap below |
| 873 | */ | 875 | */ |
| @@ -878,7 +880,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
| 878 | } else { | 880 | } else { |
| 879 | /* this is block of bitmap */ | 881 | /* this is block of bitmap */ |
| 880 | BUG_ON(incore != NULL); | 882 | BUG_ON(incore != NULL); |
| 881 | mb_debug("put bitmap for group %u in page %lu/%x\n", | 883 | mb_debug(1, "put bitmap for group %u in page %lu/%x\n", |
| 882 | group, page->index, i * blocksize); | 884 | group, page->index, i * blocksize); |
| 883 | 885 | ||
| 884 | /* see comments in ext4_mb_put_pa() */ | 886 | /* see comments in ext4_mb_put_pa() */ |
| @@ -908,6 +910,100 @@ out: | |||
| 908 | return err; | 910 | return err; |
| 909 | } | 911 | } |
| 910 | 912 | ||
| 913 | static noinline_for_stack | ||
| 914 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
| 915 | { | ||
| 916 | |||
| 917 | int ret = 0; | ||
| 918 | void *bitmap; | ||
| 919 | int blocks_per_page; | ||
| 920 | int block, pnum, poff; | ||
| 921 | int num_grp_locked = 0; | ||
| 922 | struct ext4_group_info *this_grp; | ||
| 923 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 924 | struct inode *inode = sbi->s_buddy_cache; | ||
| 925 | struct page *page = NULL, *bitmap_page = NULL; | ||
| 926 | |||
| 927 | mb_debug(1, "init group %u\n", group); | ||
| 928 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
| 929 | this_grp = ext4_get_group_info(sb, group); | ||
| 930 | /* | ||
| 931 | * This ensures that we don't reinit the buddy cache | ||
| 932 | * page which map to the group from which we are already | ||
| 933 | * allocating. If we are looking at the buddy cache we would | ||
| 934 | * have taken a reference using ext4_mb_load_buddy and that | ||
| 935 | * would have taken the alloc_sem lock. | ||
| 936 | */ | ||
| 937 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
| 938 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
| 939 | /* | ||
| 940 | * somebody initialized the group | ||
| 941 | * return without doing anything | ||
| 942 | */ | ||
| 943 | ret = 0; | ||
| 944 | goto err; | ||
| 945 | } | ||
| 946 | /* | ||
| 947 | * the buddy cache inode stores the block bitmap | ||
| 948 | * and buddy information in consecutive blocks. | ||
| 949 | * So for each group we need two blocks. | ||
| 950 | */ | ||
| 951 | block = group * 2; | ||
| 952 | pnum = block / blocks_per_page; | ||
| 953 | poff = block % blocks_per_page; | ||
| 954 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
| 955 | if (page) { | ||
| 956 | BUG_ON(page->mapping != inode->i_mapping); | ||
| 957 | ret = ext4_mb_init_cache(page, NULL); | ||
| 958 | if (ret) { | ||
| 959 | unlock_page(page); | ||
| 960 | goto err; | ||
| 961 | } | ||
| 962 | unlock_page(page); | ||
| 963 | } | ||
| 964 | if (page == NULL || !PageUptodate(page)) { | ||
| 965 | ret = -EIO; | ||
| 966 | goto err; | ||
| 967 | } | ||
| 968 | mark_page_accessed(page); | ||
| 969 | bitmap_page = page; | ||
| 970 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
| 971 | |||
| 972 | /* init buddy cache */ | ||
| 973 | block++; | ||
| 974 | pnum = block / blocks_per_page; | ||
| 975 | poff = block % blocks_per_page; | ||
| 976 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
| 977 | if (page == bitmap_page) { | ||
| 978 | /* | ||
| 979 | * If both the bitmap and buddy are in | ||
| 980 | * the same page we don't need to force | ||
| 981 | * init the buddy | ||
| 982 | */ | ||
| 983 | unlock_page(page); | ||
| 984 | } else if (page) { | ||
| 985 | BUG_ON(page->mapping != inode->i_mapping); | ||
| 986 | ret = ext4_mb_init_cache(page, bitmap); | ||
| 987 | if (ret) { | ||
| 988 | unlock_page(page); | ||
| 989 | goto err; | ||
| 990 | } | ||
| 991 | unlock_page(page); | ||
| 992 | } | ||
| 993 | if (page == NULL || !PageUptodate(page)) { | ||
| 994 | ret = -EIO; | ||
| 995 | goto err; | ||
| 996 | } | ||
| 997 | mark_page_accessed(page); | ||
| 998 | err: | ||
| 999 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
| 1000 | if (bitmap_page) | ||
| 1001 | page_cache_release(bitmap_page); | ||
| 1002 | if (page) | ||
| 1003 | page_cache_release(page); | ||
| 1004 | return ret; | ||
| 1005 | } | ||
| 1006 | |||
| 911 | static noinline_for_stack int | 1007 | static noinline_for_stack int |
| 912 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 1008 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
| 913 | struct ext4_buddy *e4b) | 1009 | struct ext4_buddy *e4b) |
| @@ -922,7 +1018,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
| 922 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1018 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 923 | struct inode *inode = sbi->s_buddy_cache; | 1019 | struct inode *inode = sbi->s_buddy_cache; |
| 924 | 1020 | ||
| 925 | mb_debug("load group %u\n", group); | 1021 | mb_debug(1, "load group %u\n", group); |
| 926 | 1022 | ||
| 927 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 1023 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
| 928 | grp = ext4_get_group_info(sb, group); | 1024 | grp = ext4_get_group_info(sb, group); |
| @@ -941,8 +1037,26 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
| 941 | * groups mapped by the page is blocked | 1037 | * groups mapped by the page is blocked |
| 942 | * till we are done with allocation | 1038 | * till we are done with allocation |
| 943 | */ | 1039 | */ |
| 1040 | repeat_load_buddy: | ||
| 944 | down_read(e4b->alloc_semp); | 1041 | down_read(e4b->alloc_semp); |
| 945 | 1042 | ||
| 1043 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
| 1044 | /* we need to check for group need init flag | ||
| 1045 | * with alloc_semp held so that we can be sure | ||
| 1046 | * that new blocks didn't get added to the group | ||
| 1047 | * when we are loading the buddy cache | ||
| 1048 | */ | ||
| 1049 | up_read(e4b->alloc_semp); | ||
| 1050 | /* | ||
| 1051 | * we need full data about the group | ||
| 1052 | * to make a good selection | ||
| 1053 | */ | ||
| 1054 | ret = ext4_mb_init_group(sb, group); | ||
| 1055 | if (ret) | ||
| 1056 | return ret; | ||
| 1057 | goto repeat_load_buddy; | ||
| 1058 | } | ||
| 1059 | |||
| 946 | /* | 1060 | /* |
| 947 | * the buddy cache inode stores the block bitmap | 1061 | * the buddy cache inode stores the block bitmap |
| 948 | * and buddy information in consecutive blocks. | 1062 | * and buddy information in consecutive blocks. |
| @@ -1360,7 +1474,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
| 1360 | ac->alloc_semp = e4b->alloc_semp; | 1474 | ac->alloc_semp = e4b->alloc_semp; |
| 1361 | e4b->alloc_semp = NULL; | 1475 | e4b->alloc_semp = NULL; |
| 1362 | /* store last allocated for subsequent stream allocation */ | 1476 | /* store last allocated for subsequent stream allocation */ |
| 1363 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { | 1477 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
| 1364 | spin_lock(&sbi->s_md_lock); | 1478 | spin_lock(&sbi->s_md_lock); |
| 1365 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; | 1479 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; |
| 1366 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; | 1480 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; |
| @@ -1837,97 +1951,6 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | |||
| 1837 | 1951 | ||
| 1838 | } | 1952 | } |
| 1839 | 1953 | ||
| 1840 | static noinline_for_stack | ||
| 1841 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
| 1842 | { | ||
| 1843 | |||
| 1844 | int ret; | ||
| 1845 | void *bitmap; | ||
| 1846 | int blocks_per_page; | ||
| 1847 | int block, pnum, poff; | ||
| 1848 | int num_grp_locked = 0; | ||
| 1849 | struct ext4_group_info *this_grp; | ||
| 1850 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 1851 | struct inode *inode = sbi->s_buddy_cache; | ||
| 1852 | struct page *page = NULL, *bitmap_page = NULL; | ||
| 1853 | |||
| 1854 | mb_debug("init group %lu\n", group); | ||
| 1855 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
| 1856 | this_grp = ext4_get_group_info(sb, group); | ||
| 1857 | /* | ||
| 1858 | * This ensures we don't add group | ||
| 1859 | * to this buddy cache via resize | ||
| 1860 | */ | ||
| 1861 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
| 1862 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
| 1863 | /* | ||
| 1864 | * somebody initialized the group | ||
| 1865 | * return without doing anything | ||
| 1866 | */ | ||
| 1867 | ret = 0; | ||
| 1868 | goto err; | ||
| 1869 | } | ||
| 1870 | /* | ||
| 1871 | * the buddy cache inode stores the block bitmap | ||
| 1872 | * and buddy information in consecutive blocks. | ||
| 1873 | * So for each group we need two blocks. | ||
| 1874 | */ | ||
| 1875 | block = group * 2; | ||
| 1876 | pnum = block / blocks_per_page; | ||
| 1877 | poff = block % blocks_per_page; | ||
| 1878 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
| 1879 | if (page) { | ||
| 1880 | BUG_ON(page->mapping != inode->i_mapping); | ||
| 1881 | ret = ext4_mb_init_cache(page, NULL); | ||
| 1882 | if (ret) { | ||
| 1883 | unlock_page(page); | ||
| 1884 | goto err; | ||
| 1885 | } | ||
| 1886 | unlock_page(page); | ||
| 1887 | } | ||
| 1888 | if (page == NULL || !PageUptodate(page)) { | ||
| 1889 | ret = -EIO; | ||
| 1890 | goto err; | ||
| 1891 | } | ||
| 1892 | mark_page_accessed(page); | ||
| 1893 | bitmap_page = page; | ||
| 1894 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
| 1895 | |||
| 1896 | /* init buddy cache */ | ||
| 1897 | block++; | ||
| 1898 | pnum = block / blocks_per_page; | ||
| 1899 | poff = block % blocks_per_page; | ||
| 1900 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
| 1901 | if (page == bitmap_page) { | ||
| 1902 | /* | ||
| 1903 | * If both the bitmap and buddy are in | ||
| 1904 | * the same page we don't need to force | ||
| 1905 | * init the buddy | ||
| 1906 | */ | ||
| 1907 | unlock_page(page); | ||
| 1908 | } else if (page) { | ||
| 1909 | BUG_ON(page->mapping != inode->i_mapping); | ||
| 1910 | ret = ext4_mb_init_cache(page, bitmap); | ||
| 1911 | if (ret) { | ||
| 1912 | unlock_page(page); | ||
| 1913 | goto err; | ||
| 1914 | } | ||
| 1915 | unlock_page(page); | ||
| 1916 | } | ||
| 1917 | if (page == NULL || !PageUptodate(page)) { | ||
| 1918 | ret = -EIO; | ||
| 1919 | goto err; | ||
| 1920 | } | ||
| 1921 | mark_page_accessed(page); | ||
| 1922 | err: | ||
| 1923 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
| 1924 | if (bitmap_page) | ||
| 1925 | page_cache_release(bitmap_page); | ||
| 1926 | if (page) | ||
| 1927 | page_cache_release(page); | ||
| 1928 | return ret; | ||
| 1929 | } | ||
| 1930 | |||
| 1931 | static noinline_for_stack int | 1954 | static noinline_for_stack int |
| 1932 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1955 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
| 1933 | { | 1956 | { |
| @@ -1938,11 +1961,14 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
| 1938 | struct ext4_sb_info *sbi; | 1961 | struct ext4_sb_info *sbi; |
| 1939 | struct super_block *sb; | 1962 | struct super_block *sb; |
| 1940 | struct ext4_buddy e4b; | 1963 | struct ext4_buddy e4b; |
| 1941 | loff_t size, isize; | ||
| 1942 | 1964 | ||
| 1943 | sb = ac->ac_sb; | 1965 | sb = ac->ac_sb; |
| 1944 | sbi = EXT4_SB(sb); | 1966 | sbi = EXT4_SB(sb); |
| 1945 | ngroups = ext4_get_groups_count(sb); | 1967 | ngroups = ext4_get_groups_count(sb); |
| 1968 | /* non-extent files are limited to low blocks/groups */ | ||
| 1969 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) | ||
| 1970 | ngroups = sbi->s_blockfile_groups; | ||
| 1971 | |||
| 1946 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 1972 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
| 1947 | 1973 | ||
| 1948 | /* first, try the goal */ | 1974 | /* first, try the goal */ |
| @@ -1974,20 +2000,16 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
| 1974 | } | 2000 | } |
| 1975 | 2001 | ||
| 1976 | bsbits = ac->ac_sb->s_blocksize_bits; | 2002 | bsbits = ac->ac_sb->s_blocksize_bits; |
| 1977 | /* if stream allocation is enabled, use global goal */ | ||
| 1978 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | ||
| 1979 | isize = i_size_read(ac->ac_inode) >> bsbits; | ||
| 1980 | if (size < isize) | ||
| 1981 | size = isize; | ||
| 1982 | 2003 | ||
| 1983 | if (size < sbi->s_mb_stream_request && | 2004 | /* if stream allocation is enabled, use global goal */ |
| 1984 | (ac->ac_flags & EXT4_MB_HINT_DATA)) { | 2005 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
| 1985 | /* TBD: may be hot point */ | 2006 | /* TBD: may be hot point */ |
| 1986 | spin_lock(&sbi->s_md_lock); | 2007 | spin_lock(&sbi->s_md_lock); |
| 1987 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; | 2008 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; |
| 1988 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; | 2009 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; |
| 1989 | spin_unlock(&sbi->s_md_lock); | 2010 | spin_unlock(&sbi->s_md_lock); |
| 1990 | } | 2011 | } |
| 2012 | |||
| 1991 | /* Let's just scan groups to find more-less suitable blocks */ | 2013 | /* Let's just scan groups to find more-less suitable blocks */ |
| 1992 | cr = ac->ac_2order ? 0 : 1; | 2014 | cr = ac->ac_2order ? 0 : 1; |
| 1993 | /* | 2015 | /* |
| @@ -2015,27 +2037,6 @@ repeat: | |||
| 2015 | if (grp->bb_free == 0) | 2037 | if (grp->bb_free == 0) |
| 2016 | continue; | 2038 | continue; |
| 2017 | 2039 | ||
| 2018 | /* | ||
| 2019 | * if the group is already init we check whether it is | ||
| 2020 | * a good group and if not we don't load the buddy | ||
| 2021 | */ | ||
| 2022 | if (EXT4_MB_GRP_NEED_INIT(grp)) { | ||
| 2023 | /* | ||
| 2024 | * we need full data about the group | ||
| 2025 | * to make a good selection | ||
| 2026 | */ | ||
| 2027 | err = ext4_mb_init_group(sb, group); | ||
| 2028 | if (err) | ||
| 2029 | goto out; | ||
| 2030 | } | ||
| 2031 | |||
| 2032 | /* | ||
| 2033 | * If the particular group doesn't satisfy our | ||
| 2034 | * criteria we continue with the next group | ||
| 2035 | */ | ||
| 2036 | if (!ext4_mb_good_group(ac, group, cr)) | ||
| 2037 | continue; | ||
| 2038 | |||
| 2039 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2040 | err = ext4_mb_load_buddy(sb, group, &e4b); |
| 2040 | if (err) | 2041 | if (err) |
| 2041 | goto out; | 2042 | goto out; |
| @@ -2156,7 +2157,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
| 2156 | 2157 | ||
| 2157 | if (v == SEQ_START_TOKEN) { | 2158 | if (v == SEQ_START_TOKEN) { |
| 2158 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " | 2159 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " |
| 2159 | "%-5s %-2s %-5s %-5s %-5s %-6s\n", | 2160 | "%-5s %-2s %-6s %-5s %-5s %-6s\n", |
| 2160 | "pid", "inode", "original", "goal", "result", "found", | 2161 | "pid", "inode", "original", "goal", "result", "found", |
| 2161 | "grps", "cr", "flags", "merge", "tail", "broken"); | 2162 | "grps", "cr", "flags", "merge", "tail", "broken"); |
| 2162 | return 0; | 2163 | return 0; |
| @@ -2164,7 +2165,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
| 2164 | 2165 | ||
| 2165 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { | 2166 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { |
| 2166 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " | 2167 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " |
| 2167 | "%-5u %-5s %-5u %-6u\n"; | 2168 | "0x%04x %-5s %-5u %-6u\n"; |
| 2168 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, | 2169 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, |
| 2169 | hs->result.fe_start, hs->result.fe_len, | 2170 | hs->result.fe_start, hs->result.fe_len, |
| 2170 | hs->result.fe_logical); | 2171 | hs->result.fe_logical); |
| @@ -2205,7 +2206,7 @@ static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v) | |||
| 2205 | { | 2206 | { |
| 2206 | } | 2207 | } |
| 2207 | 2208 | ||
| 2208 | static struct seq_operations ext4_mb_seq_history_ops = { | 2209 | static const struct seq_operations ext4_mb_seq_history_ops = { |
| 2209 | .start = ext4_mb_seq_history_start, | 2210 | .start = ext4_mb_seq_history_start, |
| 2210 | .next = ext4_mb_seq_history_next, | 2211 | .next = ext4_mb_seq_history_next, |
| 2211 | .stop = ext4_mb_seq_history_stop, | 2212 | .stop = ext4_mb_seq_history_stop, |
| @@ -2287,7 +2288,7 @@ static ssize_t ext4_mb_seq_history_write(struct file *file, | |||
| 2287 | return count; | 2288 | return count; |
| 2288 | } | 2289 | } |
| 2289 | 2290 | ||
| 2290 | static struct file_operations ext4_mb_seq_history_fops = { | 2291 | static const struct file_operations ext4_mb_seq_history_fops = { |
| 2291 | .owner = THIS_MODULE, | 2292 | .owner = THIS_MODULE, |
| 2292 | .open = ext4_mb_seq_history_open, | 2293 | .open = ext4_mb_seq_history_open, |
| 2293 | .read = seq_read, | 2294 | .read = seq_read, |
| @@ -2328,7 +2329,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
| 2328 | struct ext4_buddy e4b; | 2329 | struct ext4_buddy e4b; |
| 2329 | struct sg { | 2330 | struct sg { |
| 2330 | struct ext4_group_info info; | 2331 | struct ext4_group_info info; |
| 2331 | unsigned short counters[16]; | 2332 | ext4_grpblk_t counters[16]; |
| 2332 | } sg; | 2333 | } sg; |
| 2333 | 2334 | ||
| 2334 | group--; | 2335 | group--; |
| @@ -2366,7 +2367,7 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) | |||
| 2366 | { | 2367 | { |
| 2367 | } | 2368 | } |
| 2368 | 2369 | ||
| 2369 | static struct seq_operations ext4_mb_seq_groups_ops = { | 2370 | static const struct seq_operations ext4_mb_seq_groups_ops = { |
| 2370 | .start = ext4_mb_seq_groups_start, | 2371 | .start = ext4_mb_seq_groups_start, |
| 2371 | .next = ext4_mb_seq_groups_next, | 2372 | .next = ext4_mb_seq_groups_next, |
| 2372 | .stop = ext4_mb_seq_groups_stop, | 2373 | .stop = ext4_mb_seq_groups_stop, |
| @@ -2387,7 +2388,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) | |||
| 2387 | 2388 | ||
| 2388 | } | 2389 | } |
| 2389 | 2390 | ||
| 2390 | static struct file_operations ext4_mb_seq_groups_fops = { | 2391 | static const struct file_operations ext4_mb_seq_groups_fops = { |
| 2391 | .owner = THIS_MODULE, | 2392 | .owner = THIS_MODULE, |
| 2392 | .open = ext4_mb_seq_groups_open, | 2393 | .open = ext4_mb_seq_groups_open, |
| 2393 | .read = seq_read, | 2394 | .read = seq_read, |
| @@ -2532,7 +2533,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
| 2532 | 2533 | ||
| 2533 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2534 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
| 2534 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2535 | init_rwsem(&meta_group_info[i]->alloc_sem); |
| 2535 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | 2536 | meta_group_info[i]->bb_free_root.rb_node = NULL; |
| 2536 | 2537 | ||
| 2537 | #ifdef DOUBLE_CHECK | 2538 | #ifdef DOUBLE_CHECK |
| 2538 | { | 2539 | { |
| @@ -2558,26 +2559,15 @@ exit_meta_group_info: | |||
| 2558 | return -ENOMEM; | 2559 | return -ENOMEM; |
| 2559 | } /* ext4_mb_add_groupinfo */ | 2560 | } /* ext4_mb_add_groupinfo */ |
| 2560 | 2561 | ||
| 2561 | /* | ||
| 2562 | * Update an existing group. | ||
| 2563 | * This function is used for online resize | ||
| 2564 | */ | ||
| 2565 | void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | ||
| 2566 | { | ||
| 2567 | grp->bb_free += add; | ||
| 2568 | } | ||
| 2569 | |||
| 2570 | static int ext4_mb_init_backend(struct super_block *sb) | 2562 | static int ext4_mb_init_backend(struct super_block *sb) |
| 2571 | { | 2563 | { |
| 2572 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 2564 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
| 2573 | ext4_group_t i; | 2565 | ext4_group_t i; |
| 2574 | int metalen; | ||
| 2575 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2566 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2576 | struct ext4_super_block *es = sbi->s_es; | 2567 | struct ext4_super_block *es = sbi->s_es; |
| 2577 | int num_meta_group_infos; | 2568 | int num_meta_group_infos; |
| 2578 | int num_meta_group_infos_max; | 2569 | int num_meta_group_infos_max; |
| 2579 | int array_size; | 2570 | int array_size; |
| 2580 | struct ext4_group_info **meta_group_info; | ||
| 2581 | struct ext4_group_desc *desc; | 2571 | struct ext4_group_desc *desc; |
| 2582 | 2572 | ||
| 2583 | /* This is the number of blocks used by GDT */ | 2573 | /* This is the number of blocks used by GDT */ |
| @@ -2622,22 +2612,6 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
| 2622 | goto err_freesgi; | 2612 | goto err_freesgi; |
| 2623 | } | 2613 | } |
| 2624 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2614 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
| 2625 | |||
| 2626 | metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb); | ||
| 2627 | for (i = 0; i < num_meta_group_infos; i++) { | ||
| 2628 | if ((i + 1) == num_meta_group_infos) | ||
| 2629 | metalen = sizeof(*meta_group_info) * | ||
| 2630 | (ngroups - | ||
| 2631 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); | ||
| 2632 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | ||
| 2633 | if (meta_group_info == NULL) { | ||
| 2634 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | ||
| 2635 | "buddy group\n"); | ||
| 2636 | goto err_freemeta; | ||
| 2637 | } | ||
| 2638 | sbi->s_group_info[i] = meta_group_info; | ||
| 2639 | } | ||
| 2640 | |||
| 2641 | for (i = 0; i < ngroups; i++) { | 2615 | for (i = 0; i < ngroups; i++) { |
| 2642 | desc = ext4_get_group_desc(sb, i, NULL); | 2616 | desc = ext4_get_group_desc(sb, i, NULL); |
| 2643 | if (desc == NULL) { | 2617 | if (desc == NULL) { |
| @@ -2655,7 +2629,6 @@ err_freebuddy: | |||
| 2655 | while (i-- > 0) | 2629 | while (i-- > 0) |
| 2656 | kfree(ext4_get_group_info(sb, i)); | 2630 | kfree(ext4_get_group_info(sb, i)); |
| 2657 | i = num_meta_group_infos; | 2631 | i = num_meta_group_infos; |
| 2658 | err_freemeta: | ||
| 2659 | while (i-- > 0) | 2632 | while (i-- > 0) |
| 2660 | kfree(sbi->s_group_info[i]); | 2633 | kfree(sbi->s_group_info[i]); |
| 2661 | iput(sbi->s_buddy_cache); | 2634 | iput(sbi->s_buddy_cache); |
| @@ -2672,14 +2645,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2672 | unsigned max; | 2645 | unsigned max; |
| 2673 | int ret; | 2646 | int ret; |
| 2674 | 2647 | ||
| 2675 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2648 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
| 2676 | 2649 | ||
| 2677 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2650 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
| 2678 | if (sbi->s_mb_offsets == NULL) { | 2651 | if (sbi->s_mb_offsets == NULL) { |
| 2679 | return -ENOMEM; | 2652 | return -ENOMEM; |
| 2680 | } | 2653 | } |
| 2681 | 2654 | ||
| 2682 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); | 2655 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
| 2683 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2656 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
| 2684 | if (sbi->s_mb_maxs == NULL) { | 2657 | if (sbi->s_mb_maxs == NULL) { |
| 2685 | kfree(sbi->s_mb_offsets); | 2658 | kfree(sbi->s_mb_offsets); |
| @@ -2758,7 +2731,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
| 2758 | kmem_cache_free(ext4_pspace_cachep, pa); | 2731 | kmem_cache_free(ext4_pspace_cachep, pa); |
| 2759 | } | 2732 | } |
| 2760 | if (count) | 2733 | if (count) |
| 2761 | mb_debug("mballoc: %u PAs left\n", count); | 2734 | mb_debug(1, "mballoc: %u PAs left\n", count); |
| 2762 | 2735 | ||
| 2763 | } | 2736 | } |
| 2764 | 2737 | ||
| @@ -2839,7 +2812,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
| 2839 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2812 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
| 2840 | entry = list_entry(l, struct ext4_free_data, list); | 2813 | entry = list_entry(l, struct ext4_free_data, list); |
| 2841 | 2814 | ||
| 2842 | mb_debug("gonna free %u blocks in group %u (0x%p):", | 2815 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
| 2843 | entry->count, entry->group, entry); | 2816 | entry->count, entry->group, entry); |
| 2844 | 2817 | ||
| 2845 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2818 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
| @@ -2874,9 +2847,43 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
| 2874 | ext4_mb_release_desc(&e4b); | 2847 | ext4_mb_release_desc(&e4b); |
| 2875 | } | 2848 | } |
| 2876 | 2849 | ||
| 2877 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2850 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
| 2851 | } | ||
| 2852 | |||
| 2853 | #ifdef CONFIG_EXT4_DEBUG | ||
| 2854 | u8 mb_enable_debug __read_mostly; | ||
| 2855 | |||
| 2856 | static struct dentry *debugfs_dir; | ||
| 2857 | static struct dentry *debugfs_debug; | ||
| 2858 | |||
| 2859 | static void __init ext4_create_debugfs_entry(void) | ||
| 2860 | { | ||
| 2861 | debugfs_dir = debugfs_create_dir("ext4", NULL); | ||
| 2862 | if (debugfs_dir) | ||
| 2863 | debugfs_debug = debugfs_create_u8("mballoc-debug", | ||
| 2864 | S_IRUGO | S_IWUSR, | ||
| 2865 | debugfs_dir, | ||
| 2866 | &mb_enable_debug); | ||
| 2867 | } | ||
| 2868 | |||
| 2869 | static void ext4_remove_debugfs_entry(void) | ||
| 2870 | { | ||
| 2871 | debugfs_remove(debugfs_debug); | ||
| 2872 | debugfs_remove(debugfs_dir); | ||
| 2878 | } | 2873 | } |
| 2879 | 2874 | ||
| 2875 | #else | ||
| 2876 | |||
| 2877 | static void __init ext4_create_debugfs_entry(void) | ||
| 2878 | { | ||
| 2879 | } | ||
| 2880 | |||
| 2881 | static void ext4_remove_debugfs_entry(void) | ||
| 2882 | { | ||
| 2883 | } | ||
| 2884 | |||
| 2885 | #endif | ||
| 2886 | |||
| 2880 | int __init init_ext4_mballoc(void) | 2887 | int __init init_ext4_mballoc(void) |
| 2881 | { | 2888 | { |
| 2882 | ext4_pspace_cachep = | 2889 | ext4_pspace_cachep = |
| @@ -2904,6 +2911,7 @@ int __init init_ext4_mballoc(void) | |||
| 2904 | kmem_cache_destroy(ext4_ac_cachep); | 2911 | kmem_cache_destroy(ext4_ac_cachep); |
| 2905 | return -ENOMEM; | 2912 | return -ENOMEM; |
| 2906 | } | 2913 | } |
| 2914 | ext4_create_debugfs_entry(); | ||
| 2907 | return 0; | 2915 | return 0; |
| 2908 | } | 2916 | } |
| 2909 | 2917 | ||
| @@ -2917,6 +2925,7 @@ void exit_ext4_mballoc(void) | |||
| 2917 | kmem_cache_destroy(ext4_pspace_cachep); | 2925 | kmem_cache_destroy(ext4_pspace_cachep); |
| 2918 | kmem_cache_destroy(ext4_ac_cachep); | 2926 | kmem_cache_destroy(ext4_ac_cachep); |
| 2919 | kmem_cache_destroy(ext4_free_ext_cachep); | 2927 | kmem_cache_destroy(ext4_free_ext_cachep); |
| 2928 | ext4_remove_debugfs_entry(); | ||
| 2920 | } | 2929 | } |
| 2921 | 2930 | ||
| 2922 | 2931 | ||
| @@ -3061,7 +3070,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) | |||
| 3061 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; | 3070 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; |
| 3062 | else | 3071 | else |
| 3063 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; | 3072 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; |
| 3064 | mb_debug("#%u: goal %u blocks for locality group\n", | 3073 | mb_debug(1, "#%u: goal %u blocks for locality group\n", |
| 3065 | current->pid, ac->ac_g_ex.fe_len); | 3074 | current->pid, ac->ac_g_ex.fe_len); |
| 3066 | } | 3075 | } |
| 3067 | 3076 | ||
| @@ -3180,23 +3189,18 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
| 3180 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || | 3189 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || |
| 3181 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); | 3190 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); |
| 3182 | 3191 | ||
| 3183 | /* skip PA normalized request doesn't overlap with */ | 3192 | /* skip PAs this normalized request doesn't overlap with */ |
| 3184 | if (pa->pa_lstart >= end) { | 3193 | if (pa->pa_lstart >= end || pa_end <= start) { |
| 3185 | spin_unlock(&pa->pa_lock); | ||
| 3186 | continue; | ||
| 3187 | } | ||
| 3188 | if (pa_end <= start) { | ||
| 3189 | spin_unlock(&pa->pa_lock); | 3194 | spin_unlock(&pa->pa_lock); |
| 3190 | continue; | 3195 | continue; |
| 3191 | } | 3196 | } |
| 3192 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); | 3197 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); |
| 3193 | 3198 | ||
| 3199 | /* adjust start or end to be adjacent to this pa */ | ||
| 3194 | if (pa_end <= ac->ac_o_ex.fe_logical) { | 3200 | if (pa_end <= ac->ac_o_ex.fe_logical) { |
| 3195 | BUG_ON(pa_end < start); | 3201 | BUG_ON(pa_end < start); |
| 3196 | start = pa_end; | 3202 | start = pa_end; |
| 3197 | } | 3203 | } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { |
| 3198 | |||
| 3199 | if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { | ||
| 3200 | BUG_ON(pa->pa_lstart > end); | 3204 | BUG_ON(pa->pa_lstart > end); |
| 3201 | end = pa->pa_lstart; | 3205 | end = pa->pa_lstart; |
| 3202 | } | 3206 | } |
| @@ -3251,7 +3255,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
| 3251 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; | 3255 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
| 3252 | } | 3256 | } |
| 3253 | 3257 | ||
| 3254 | mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, | 3258 | mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size, |
| 3255 | (unsigned) orig_size, (unsigned) start); | 3259 | (unsigned) orig_size, (unsigned) start); |
| 3256 | } | 3260 | } |
| 3257 | 3261 | ||
| @@ -3300,7 +3304,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | |||
| 3300 | BUG_ON(pa->pa_free < len); | 3304 | BUG_ON(pa->pa_free < len); |
| 3301 | pa->pa_free -= len; | 3305 | pa->pa_free -= len; |
| 3302 | 3306 | ||
| 3303 | mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); | 3307 | mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa); |
| 3304 | } | 3308 | } |
| 3305 | 3309 | ||
| 3306 | /* | 3310 | /* |
| @@ -3324,7 +3328,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
| 3324 | * in on-disk bitmap -- see ext4_mb_release_context() | 3328 | * in on-disk bitmap -- see ext4_mb_release_context() |
| 3325 | * Other CPUs are prevented from allocating from this pa by lg_mutex | 3329 | * Other CPUs are prevented from allocating from this pa by lg_mutex |
| 3326 | */ | 3330 | */ |
| 3327 | mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); | 3331 | mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); |
| 3328 | } | 3332 | } |
| 3329 | 3333 | ||
| 3330 | /* | 3334 | /* |
| @@ -3382,6 +3386,11 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
| 3382 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) | 3386 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) |
| 3383 | continue; | 3387 | continue; |
| 3384 | 3388 | ||
| 3389 | /* non-extent files can't have physical blocks past 2^32 */ | ||
| 3390 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && | ||
| 3391 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) | ||
| 3392 | continue; | ||
| 3393 | |||
| 3385 | /* found preallocated blocks, use them */ | 3394 | /* found preallocated blocks, use them */ |
| 3386 | spin_lock(&pa->pa_lock); | 3395 | spin_lock(&pa->pa_lock); |
| 3387 | if (pa->pa_deleted == 0 && pa->pa_free) { | 3396 | if (pa->pa_deleted == 0 && pa->pa_free) { |
| @@ -3503,7 +3512,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
| 3503 | preallocated += len; | 3512 | preallocated += len; |
| 3504 | count++; | 3513 | count++; |
| 3505 | } | 3514 | } |
| 3506 | mb_debug("prellocated %u for group %u\n", preallocated, group); | 3515 | mb_debug(1, "prellocated %u for group %u\n", preallocated, group); |
| 3507 | } | 3516 | } |
| 3508 | 3517 | ||
| 3509 | static void ext4_mb_pa_callback(struct rcu_head *head) | 3518 | static void ext4_mb_pa_callback(struct rcu_head *head) |
| @@ -3638,7 +3647,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
| 3638 | pa->pa_deleted = 0; | 3647 | pa->pa_deleted = 0; |
| 3639 | pa->pa_type = MB_INODE_PA; | 3648 | pa->pa_type = MB_INODE_PA; |
| 3640 | 3649 | ||
| 3641 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, | 3650 | mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa, |
| 3642 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3651 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
| 3643 | trace_ext4_mb_new_inode_pa(ac, pa); | 3652 | trace_ext4_mb_new_inode_pa(ac, pa); |
| 3644 | 3653 | ||
| @@ -3698,7 +3707,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) | |||
| 3698 | pa->pa_deleted = 0; | 3707 | pa->pa_deleted = 0; |
| 3699 | pa->pa_type = MB_GROUP_PA; | 3708 | pa->pa_type = MB_GROUP_PA; |
| 3700 | 3709 | ||
| 3701 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, | 3710 | mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa, |
| 3702 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3711 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
| 3703 | trace_ext4_mb_new_group_pa(ac, pa); | 3712 | trace_ext4_mb_new_group_pa(ac, pa); |
| 3704 | 3713 | ||
| @@ -3777,7 +3786,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
| 3777 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3786 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
| 3778 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | 3787 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + |
| 3779 | le32_to_cpu(sbi->s_es->s_first_data_block); | 3788 | le32_to_cpu(sbi->s_es->s_first_data_block); |
| 3780 | mb_debug(" free preallocated %u/%u in group %u\n", | 3789 | mb_debug(1, " free preallocated %u/%u in group %u\n", |
| 3781 | (unsigned) start, (unsigned) next - bit, | 3790 | (unsigned) start, (unsigned) next - bit, |
| 3782 | (unsigned) group); | 3791 | (unsigned) group); |
| 3783 | free += next - bit; | 3792 | free += next - bit; |
| @@ -3868,7 +3877,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
| 3868 | int busy = 0; | 3877 | int busy = 0; |
| 3869 | int free = 0; | 3878 | int free = 0; |
| 3870 | 3879 | ||
| 3871 | mb_debug("discard preallocation for group %u\n", group); | 3880 | mb_debug(1, "discard preallocation for group %u\n", group); |
| 3872 | 3881 | ||
| 3873 | if (list_empty(&grp->bb_prealloc_list)) | 3882 | if (list_empty(&grp->bb_prealloc_list)) |
| 3874 | return 0; | 3883 | return 0; |
| @@ -3992,7 +4001,7 @@ void ext4_discard_preallocations(struct inode *inode) | |||
| 3992 | return; | 4001 | return; |
| 3993 | } | 4002 | } |
| 3994 | 4003 | ||
| 3995 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); | 4004 | mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino); |
| 3996 | trace_ext4_discard_preallocations(inode); | 4005 | trace_ext4_discard_preallocations(inode); |
| 3997 | 4006 | ||
| 3998 | INIT_LIST_HEAD(&list); | 4007 | INIT_LIST_HEAD(&list); |
| @@ -4097,7 +4106,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, | |||
| 4097 | { | 4106 | { |
| 4098 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); | 4107 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); |
| 4099 | } | 4108 | } |
| 4100 | #ifdef MB_DEBUG | 4109 | #ifdef CONFIG_EXT4_DEBUG |
| 4101 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 4110 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
| 4102 | { | 4111 | { |
| 4103 | struct super_block *sb = ac->ac_sb; | 4112 | struct super_block *sb = ac->ac_sb; |
| @@ -4139,14 +4148,14 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
| 4139 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, | 4148 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, |
| 4140 | NULL, &start); | 4149 | NULL, &start); |
| 4141 | spin_unlock(&pa->pa_lock); | 4150 | spin_unlock(&pa->pa_lock); |
| 4142 | printk(KERN_ERR "PA:%lu:%d:%u \n", i, | 4151 | printk(KERN_ERR "PA:%u:%d:%u \n", i, |
| 4143 | start, pa->pa_len); | 4152 | start, pa->pa_len); |
| 4144 | } | 4153 | } |
| 4145 | ext4_unlock_group(sb, i); | 4154 | ext4_unlock_group(sb, i); |
| 4146 | 4155 | ||
| 4147 | if (grp->bb_free == 0) | 4156 | if (grp->bb_free == 0) |
| 4148 | continue; | 4157 | continue; |
| 4149 | printk(KERN_ERR "%lu: %d/%d \n", | 4158 | printk(KERN_ERR "%u: %d/%d \n", |
| 4150 | i, grp->bb_free, grp->bb_fragments); | 4159 | i, grp->bb_free, grp->bb_fragments); |
| 4151 | } | 4160 | } |
| 4152 | printk(KERN_ERR "\n"); | 4161 | printk(KERN_ERR "\n"); |
| @@ -4174,16 +4183,26 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
| 4174 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 4183 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
| 4175 | return; | 4184 | return; |
| 4176 | 4185 | ||
| 4186 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | ||
| 4187 | return; | ||
| 4188 | |||
| 4177 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | 4189 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; |
| 4178 | isize = i_size_read(ac->ac_inode) >> bsbits; | 4190 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) |
| 4191 | >> bsbits; | ||
| 4179 | size = max(size, isize); | 4192 | size = max(size, isize); |
| 4180 | 4193 | ||
| 4181 | /* don't use group allocation for large files */ | 4194 | if ((size == isize) && |
| 4182 | if (size >= sbi->s_mb_stream_request) | 4195 | !ext4_fs_is_busy(sbi) && |
| 4196 | (atomic_read(&ac->ac_inode->i_writecount) == 0)) { | ||
| 4197 | ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; | ||
| 4183 | return; | 4198 | return; |
| 4199 | } | ||
| 4184 | 4200 | ||
| 4185 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | 4201 | /* don't use group allocation for large files */ |
| 4202 | if (size >= sbi->s_mb_stream_request) { | ||
| 4203 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; | ||
| 4186 | return; | 4204 | return; |
| 4205 | } | ||
| 4187 | 4206 | ||
| 4188 | BUG_ON(ac->ac_lg != NULL); | 4207 | BUG_ON(ac->ac_lg != NULL); |
| 4189 | /* | 4208 | /* |
| @@ -4246,7 +4265,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
| 4246 | * locality group. this is a policy, actually */ | 4265 | * locality group. this is a policy, actually */ |
| 4247 | ext4_mb_group_or_file(ac); | 4266 | ext4_mb_group_or_file(ac); |
| 4248 | 4267 | ||
| 4249 | mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " | 4268 | mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " |
| 4250 | "left: %u/%u, right %u/%u to %swritable\n", | 4269 | "left: %u/%u, right %u/%u to %swritable\n", |
| 4251 | (unsigned) ar->len, (unsigned) ar->logical, | 4270 | (unsigned) ar->len, (unsigned) ar->logical, |
| 4252 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, | 4271 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, |
| @@ -4268,7 +4287,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
| 4268 | struct ext4_prealloc_space *pa, *tmp; | 4287 | struct ext4_prealloc_space *pa, *tmp; |
| 4269 | struct ext4_allocation_context *ac; | 4288 | struct ext4_allocation_context *ac; |
| 4270 | 4289 | ||
| 4271 | mb_debug("discard locality group preallocation\n"); | 4290 | mb_debug(1, "discard locality group preallocation\n"); |
| 4272 | 4291 | ||
| 4273 | INIT_LIST_HEAD(&discard_list); | 4292 | INIT_LIST_HEAD(&discard_list); |
| 4274 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4293 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c96bb19f58f9..188d3d709b24 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
| @@ -37,11 +37,19 @@ | |||
| 37 | 37 | ||
| 38 | /* | 38 | /* |
| 39 | */ | 39 | */ |
| 40 | #define MB_DEBUG__ | 40 | #ifdef CONFIG_EXT4_DEBUG |
| 41 | #ifdef MB_DEBUG | 41 | extern u8 mb_enable_debug; |
| 42 | #define mb_debug(fmt, a...) printk(fmt, ##a) | 42 | |
| 43 | #define mb_debug(n, fmt, a...) \ | ||
| 44 | do { \ | ||
| 45 | if ((n) <= mb_enable_debug) { \ | ||
| 46 | printk(KERN_DEBUG "(%s, %d): %s: ", \ | ||
| 47 | __FILE__, __LINE__, __func__); \ | ||
| 48 | printk(fmt, ## a); \ | ||
| 49 | } \ | ||
| 50 | } while (0) | ||
| 43 | #else | 51 | #else |
| 44 | #define mb_debug(fmt, a...) | 52 | #define mb_debug(n, fmt, a...) |
| 45 | #endif | 53 | #endif |
| 46 | 54 | ||
| 47 | /* | 55 | /* |
| @@ -128,8 +136,8 @@ struct ext4_prealloc_space { | |||
| 128 | unsigned pa_deleted; | 136 | unsigned pa_deleted; |
| 129 | ext4_fsblk_t pa_pstart; /* phys. block */ | 137 | ext4_fsblk_t pa_pstart; /* phys. block */ |
| 130 | ext4_lblk_t pa_lstart; /* log. block */ | 138 | ext4_lblk_t pa_lstart; /* log. block */ |
| 131 | unsigned short pa_len; /* len of preallocated chunk */ | 139 | ext4_grpblk_t pa_len; /* len of preallocated chunk */ |
| 132 | unsigned short pa_free; /* how many blocks are free */ | 140 | ext4_grpblk_t pa_free; /* how many blocks are free */ |
| 133 | unsigned short pa_type; /* pa type. inode or group */ | 141 | unsigned short pa_type; /* pa type. inode or group */ |
| 134 | spinlock_t *pa_obj_lock; | 142 | spinlock_t *pa_obj_lock; |
| 135 | struct inode *pa_inode; /* hack, for history only */ | 143 | struct inode *pa_inode; /* hack, for history only */ |
| @@ -144,7 +152,7 @@ struct ext4_free_extent { | |||
| 144 | ext4_lblk_t fe_logical; | 152 | ext4_lblk_t fe_logical; |
| 145 | ext4_grpblk_t fe_start; | 153 | ext4_grpblk_t fe_start; |
| 146 | ext4_group_t fe_group; | 154 | ext4_group_t fe_group; |
| 147 | int fe_len; | 155 | ext4_grpblk_t fe_len; |
| 148 | }; | 156 | }; |
| 149 | 157 | ||
| 150 | /* | 158 | /* |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 313a50b39741..bf519f239ae6 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
| @@ -353,17 +353,16 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
| 353 | 353 | ||
| 354 | down_write(&EXT4_I(inode)->i_data_sem); | 354 | down_write(&EXT4_I(inode)->i_data_sem); |
| 355 | /* | 355 | /* |
| 356 | * if EXT4_EXT_MIGRATE is cleared a block allocation | 356 | * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation |
| 357 | * happened after we started the migrate. We need to | 357 | * happened after we started the migrate. We need to |
| 358 | * fail the migrate | 358 | * fail the migrate |
| 359 | */ | 359 | */ |
| 360 | if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { | 360 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { |
| 361 | retval = -EAGAIN; | 361 | retval = -EAGAIN; |
| 362 | up_write(&EXT4_I(inode)->i_data_sem); | 362 | up_write(&EXT4_I(inode)->i_data_sem); |
| 363 | goto err_out; | 363 | goto err_out; |
| 364 | } else | 364 | } else |
| 365 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 365 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
| 366 | ~EXT4_EXT_MIGRATE; | ||
| 367 | /* | 366 | /* |
| 368 | * We have the extent map build with the tmp inode. | 367 | * We have the extent map build with the tmp inode. |
| 369 | * Now copy the i_data across | 368 | * Now copy the i_data across |
| @@ -517,14 +516,15 @@ int ext4_ext_migrate(struct inode *inode) | |||
| 517 | * when we add extents we extent the journal | 516 | * when we add extents we extent the journal |
| 518 | */ | 517 | */ |
| 519 | /* | 518 | /* |
| 520 | * Even though we take i_mutex we can still cause block allocation | 519 | * Even though we take i_mutex we can still cause block |
| 521 | * via mmap write to holes. If we have allocated new blocks we fail | 520 | * allocation via mmap write to holes. If we have allocated |
| 522 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 521 | * new blocks we fail migrate. New block allocation will |
| 523 | * The flag is updated with i_data_sem held to prevent racing with | 522 | * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated |
| 524 | * block allocation. | 523 | * with i_data_sem held to prevent racing with block |
| 524 | * allocation. | ||
| 525 | */ | 525 | */ |
| 526 | down_read((&EXT4_I(inode)->i_data_sem)); | 526 | down_read((&EXT4_I(inode)->i_data_sem)); |
| 527 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; | 527 | EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; |
| 528 | up_read((&EXT4_I(inode)->i_data_sem)); | 528 | up_read((&EXT4_I(inode)->i_data_sem)); |
| 529 | 529 | ||
| 530 | handle = ext4_journal_start(inode, 1); | 530 | handle = ext4_journal_start(inode, 1); |
| @@ -618,7 +618,7 @@ err_out: | |||
| 618 | tmp_inode->i_nlink = 0; | 618 | tmp_inode->i_nlink = 0; |
| 619 | 619 | ||
| 620 | ext4_journal_stop(handle); | 620 | ext4_journal_stop(handle); |
| 621 | 621 | unlock_new_inode(tmp_inode); | |
| 622 | iput(tmp_inode); | 622 | iput(tmp_inode); |
| 623 | 623 | ||
| 624 | return retval; | 624 | return retval; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index bbf2dd9404dc..c07a2915e40b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
| @@ -19,14 +19,31 @@ | |||
| 19 | #include "ext4_extents.h" | 19 | #include "ext4_extents.h" |
| 20 | #include "ext4.h" | 20 | #include "ext4.h" |
| 21 | 21 | ||
| 22 | #define get_ext_path(path, inode, block, ret) \ | 22 | /** |
| 23 | do { \ | 23 | * get_ext_path - Find an extent path for designated logical block number. |
| 24 | path = ext4_ext_find_extent(inode, block, path); \ | 24 | * |
| 25 | if (IS_ERR(path)) { \ | 25 | * @inode: an inode which is searched |
| 26 | ret = PTR_ERR(path); \ | 26 | * @lblock: logical block number to find an extent path |
| 27 | path = NULL; \ | 27 | * @path: pointer to an extent path pointer (for output) |
| 28 | } \ | 28 | * |
| 29 | } while (0) | 29 | * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value |
| 30 | * on failure. | ||
| 31 | */ | ||
| 32 | static inline int | ||
| 33 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, | ||
| 34 | struct ext4_ext_path **path) | ||
| 35 | { | ||
| 36 | int ret = 0; | ||
| 37 | |||
| 38 | *path = ext4_ext_find_extent(inode, lblock, *path); | ||
| 39 | if (IS_ERR(*path)) { | ||
| 40 | ret = PTR_ERR(*path); | ||
| 41 | *path = NULL; | ||
| 42 | } else if ((*path)[ext_depth(inode)].p_ext == NULL) | ||
| 43 | ret = -ENODATA; | ||
| 44 | |||
| 45 | return ret; | ||
| 46 | } | ||
| 30 | 47 | ||
| 31 | /** | 48 | /** |
| 32 | * copy_extent_status - Copy the extent's initialization status | 49 | * copy_extent_status - Copy the extent's initialization status |
| @@ -113,6 +130,31 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
| 113 | } | 130 | } |
| 114 | 131 | ||
| 115 | /** | 132 | /** |
| 133 | * mext_check_null_inode - NULL check for two inodes | ||
| 134 | * | ||
| 135 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
| 136 | */ | ||
| 137 | static int | ||
| 138 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, | ||
| 139 | const char *function) | ||
| 140 | { | ||
| 141 | int ret = 0; | ||
| 142 | |||
| 143 | if (inode1 == NULL) { | ||
| 144 | ext4_error(inode2->i_sb, function, | ||
| 145 | "Both inodes should not be NULL: " | ||
| 146 | "inode1 NULL inode2 %lu", inode2->i_ino); | ||
| 147 | ret = -EIO; | ||
| 148 | } else if (inode2 == NULL) { | ||
| 149 | ext4_error(inode1->i_sb, function, | ||
| 150 | "Both inodes should not be NULL: " | ||
| 151 | "inode1 %lu inode2 NULL", inode1->i_ino); | ||
| 152 | ret = -EIO; | ||
| 153 | } | ||
| 154 | return ret; | ||
| 155 | } | ||
| 156 | |||
| 157 | /** | ||
| 116 | * mext_double_down_read - Acquire two inodes' read semaphore | 158 | * mext_double_down_read - Acquire two inodes' read semaphore |
| 117 | * | 159 | * |
| 118 | * @orig_inode: original inode structure | 160 | * @orig_inode: original inode structure |
| @@ -124,8 +166,6 @@ mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) | |||
| 124 | { | 166 | { |
| 125 | struct inode *first = orig_inode, *second = donor_inode; | 167 | struct inode *first = orig_inode, *second = donor_inode; |
| 126 | 168 | ||
| 127 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
| 128 | |||
| 129 | /* | 169 | /* |
| 130 | * Use the inode number to provide the stable locking order instead | 170 | * Use the inode number to provide the stable locking order instead |
| 131 | * of its address, because the C language doesn't guarantee you can | 171 | * of its address, because the C language doesn't guarantee you can |
| @@ -152,8 +192,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
| 152 | { | 192 | { |
| 153 | struct inode *first = orig_inode, *second = donor_inode; | 193 | struct inode *first = orig_inode, *second = donor_inode; |
| 154 | 194 | ||
| 155 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
| 156 | |||
| 157 | /* | 195 | /* |
| 158 | * Use the inode number to provide the stable locking order instead | 196 | * Use the inode number to provide the stable locking order instead |
| 159 | * of its address, because the C language doesn't guarantee you can | 197 | * of its address, because the C language doesn't guarantee you can |
| @@ -178,8 +216,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
| 178 | static void | 216 | static void |
| 179 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | 217 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) |
| 180 | { | 218 | { |
| 181 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
| 182 | |||
| 183 | up_read(&EXT4_I(orig_inode)->i_data_sem); | 219 | up_read(&EXT4_I(orig_inode)->i_data_sem); |
| 184 | up_read(&EXT4_I(donor_inode)->i_data_sem); | 220 | up_read(&EXT4_I(donor_inode)->i_data_sem); |
| 185 | } | 221 | } |
| @@ -194,8 +230,6 @@ mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | |||
| 194 | static void | 230 | static void |
| 195 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) | 231 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) |
| 196 | { | 232 | { |
| 197 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
| 198 | |||
| 199 | up_write(&EXT4_I(orig_inode)->i_data_sem); | 233 | up_write(&EXT4_I(orig_inode)->i_data_sem); |
| 200 | up_write(&EXT4_I(donor_inode)->i_data_sem); | 234 | up_write(&EXT4_I(donor_inode)->i_data_sem); |
| 201 | } | 235 | } |
| @@ -283,8 +317,8 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
| 283 | } | 317 | } |
| 284 | 318 | ||
| 285 | if (new_flag) { | 319 | if (new_flag) { |
| 286 | get_ext_path(orig_path, orig_inode, eblock, err); | 320 | err = get_ext_path(orig_inode, eblock, &orig_path); |
| 287 | if (orig_path == NULL) | 321 | if (err) |
| 288 | goto out; | 322 | goto out; |
| 289 | 323 | ||
| 290 | if (ext4_ext_insert_extent(handle, orig_inode, | 324 | if (ext4_ext_insert_extent(handle, orig_inode, |
| @@ -293,9 +327,9 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
| 293 | } | 327 | } |
| 294 | 328 | ||
| 295 | if (end_flag) { | 329 | if (end_flag) { |
| 296 | get_ext_path(orig_path, orig_inode, | 330 | err = get_ext_path(orig_inode, |
| 297 | le32_to_cpu(end_ext->ee_block) - 1, err); | 331 | le32_to_cpu(end_ext->ee_block) - 1, &orig_path); |
| 298 | if (orig_path == NULL) | 332 | if (err) |
| 299 | goto out; | 333 | goto out; |
| 300 | 334 | ||
| 301 | if (ext4_ext_insert_extent(handle, orig_inode, | 335 | if (ext4_ext_insert_extent(handle, orig_inode, |
| @@ -519,7 +553,15 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
| 519 | * oext |-----------| | 553 | * oext |-----------| |
| 520 | * new_ext |-------| | 554 | * new_ext |-------| |
| 521 | */ | 555 | */ |
| 522 | BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end); | 556 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { |
| 557 | ext4_error(orig_inode->i_sb, __func__, | ||
| 558 | "new_ext_end(%u) should be less than or equal to " | ||
| 559 | "oext->ee_block(%u) + oext_alen(%d) - 1", | ||
| 560 | new_ext_end, le32_to_cpu(oext->ee_block), | ||
| 561 | oext_alen); | ||
| 562 | ret = -EIO; | ||
| 563 | goto out; | ||
| 564 | } | ||
| 523 | 565 | ||
| 524 | /* | 566 | /* |
| 525 | * Case: new_ext is smaller than original extent | 567 | * Case: new_ext is smaller than original extent |
| @@ -543,6 +585,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
| 543 | 585 | ||
| 544 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, | 586 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, |
| 545 | o_end, &start_ext, &new_ext, &end_ext); | 587 | o_end, &start_ext, &new_ext, &end_ext); |
| 588 | out: | ||
| 546 | return ret; | 589 | return ret; |
| 547 | } | 590 | } |
| 548 | 591 | ||
| @@ -554,8 +597,10 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
| 554 | * @orig_off: block offset of original inode | 597 | * @orig_off: block offset of original inode |
| 555 | * @donor_off: block offset of donor inode | 598 | * @donor_off: block offset of donor inode |
| 556 | * @max_count: the maximun length of extents | 599 | * @max_count: the maximun length of extents |
| 600 | * | ||
| 601 | * Return 0 on success, or a negative error value on failure. | ||
| 557 | */ | 602 | */ |
| 558 | static void | 603 | static int |
| 559 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, | 604 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, |
| 560 | struct ext4_extent *tmp_oext, | 605 | struct ext4_extent *tmp_oext, |
| 561 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, | 606 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, |
| @@ -564,6 +609,19 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
| 564 | ext4_lblk_t diff, orig_diff; | 609 | ext4_lblk_t diff, orig_diff; |
| 565 | struct ext4_extent dext_old, oext_old; | 610 | struct ext4_extent dext_old, oext_old; |
| 566 | 611 | ||
| 612 | BUG_ON(orig_off != donor_off); | ||
| 613 | |||
| 614 | /* original and donor extents have to cover the same block offset */ | ||
| 615 | if (orig_off < le32_to_cpu(tmp_oext->ee_block) || | ||
| 616 | le32_to_cpu(tmp_oext->ee_block) + | ||
| 617 | ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) | ||
| 618 | return -ENODATA; | ||
| 619 | |||
| 620 | if (orig_off < le32_to_cpu(tmp_dext->ee_block) || | ||
| 621 | le32_to_cpu(tmp_dext->ee_block) + | ||
| 622 | ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) | ||
| 623 | return -ENODATA; | ||
| 624 | |||
| 567 | dext_old = *tmp_dext; | 625 | dext_old = *tmp_dext; |
| 568 | oext_old = *tmp_oext; | 626 | oext_old = *tmp_oext; |
| 569 | 627 | ||
| @@ -591,6 +649,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
| 591 | 649 | ||
| 592 | copy_extent_status(&oext_old, tmp_dext); | 650 | copy_extent_status(&oext_old, tmp_dext); |
| 593 | copy_extent_status(&dext_old, tmp_oext); | 651 | copy_extent_status(&dext_old, tmp_oext); |
| 652 | |||
| 653 | return 0; | ||
| 594 | } | 654 | } |
| 595 | 655 | ||
| 596 | /** | 656 | /** |
| @@ -631,13 +691,13 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
| 631 | mext_double_down_write(orig_inode, donor_inode); | 691 | mext_double_down_write(orig_inode, donor_inode); |
| 632 | 692 | ||
| 633 | /* Get the original extent for the block "orig_off" */ | 693 | /* Get the original extent for the block "orig_off" */ |
| 634 | get_ext_path(orig_path, orig_inode, orig_off, err); | 694 | err = get_ext_path(orig_inode, orig_off, &orig_path); |
| 635 | if (orig_path == NULL) | 695 | if (err) |
| 636 | goto out; | 696 | goto out; |
| 637 | 697 | ||
| 638 | /* Get the donor extent for the head */ | 698 | /* Get the donor extent for the head */ |
| 639 | get_ext_path(donor_path, donor_inode, donor_off, err); | 699 | err = get_ext_path(donor_inode, donor_off, &donor_path); |
| 640 | if (donor_path == NULL) | 700 | if (err) |
| 641 | goto out; | 701 | goto out; |
| 642 | depth = ext_depth(orig_inode); | 702 | depth = ext_depth(orig_inode); |
| 643 | oext = orig_path[depth].p_ext; | 703 | oext = orig_path[depth].p_ext; |
| @@ -647,13 +707,28 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
| 647 | dext = donor_path[depth].p_ext; | 707 | dext = donor_path[depth].p_ext; |
| 648 | tmp_dext = *dext; | 708 | tmp_dext = *dext; |
| 649 | 709 | ||
| 650 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 710 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
| 651 | donor_off, count); | 711 | donor_off, count); |
| 712 | if (err) | ||
| 713 | goto out; | ||
| 652 | 714 | ||
| 653 | /* Loop for the donor extents */ | 715 | /* Loop for the donor extents */ |
| 654 | while (1) { | 716 | while (1) { |
| 655 | /* The extent for donor must be found. */ | 717 | /* The extent for donor must be found. */ |
| 656 | BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block)); | 718 | if (!dext) { |
| 719 | ext4_error(donor_inode->i_sb, __func__, | ||
| 720 | "The extent for donor must be found"); | ||
| 721 | err = -EIO; | ||
| 722 | goto out; | ||
| 723 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | ||
| 724 | ext4_error(donor_inode->i_sb, __func__, | ||
| 725 | "Donor offset(%u) and the first block of donor " | ||
| 726 | "extent(%u) should be equal", | ||
| 727 | donor_off, | ||
| 728 | le32_to_cpu(tmp_dext.ee_block)); | ||
| 729 | err = -EIO; | ||
| 730 | goto out; | ||
| 731 | } | ||
| 657 | 732 | ||
| 658 | /* Set donor extent to orig extent */ | 733 | /* Set donor extent to orig extent */ |
| 659 | err = mext_leaf_block(handle, orig_inode, | 734 | err = mext_leaf_block(handle, orig_inode, |
| @@ -678,8 +753,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
| 678 | 753 | ||
| 679 | if (orig_path) | 754 | if (orig_path) |
| 680 | ext4_ext_drop_refs(orig_path); | 755 | ext4_ext_drop_refs(orig_path); |
| 681 | get_ext_path(orig_path, orig_inode, orig_off, err); | 756 | err = get_ext_path(orig_inode, orig_off, &orig_path); |
| 682 | if (orig_path == NULL) | 757 | if (err) |
| 683 | goto out; | 758 | goto out; |
| 684 | depth = ext_depth(orig_inode); | 759 | depth = ext_depth(orig_inode); |
| 685 | oext = orig_path[depth].p_ext; | 760 | oext = orig_path[depth].p_ext; |
| @@ -692,9 +767,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
| 692 | 767 | ||
| 693 | if (donor_path) | 768 | if (donor_path) |
| 694 | ext4_ext_drop_refs(donor_path); | 769 | ext4_ext_drop_refs(donor_path); |
| 695 | get_ext_path(donor_path, donor_inode, | 770 | err = get_ext_path(donor_inode, donor_off, &donor_path); |
| 696 | donor_off, err); | 771 | if (err) |
| 697 | if (donor_path == NULL) | ||
| 698 | goto out; | 772 | goto out; |
| 699 | depth = ext_depth(donor_inode); | 773 | depth = ext_depth(donor_inode); |
| 700 | dext = donor_path[depth].p_ext; | 774 | dext = donor_path[depth].p_ext; |
| @@ -705,9 +779,10 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
| 705 | } | 779 | } |
| 706 | tmp_dext = *dext; | 780 | tmp_dext = *dext; |
| 707 | 781 | ||
| 708 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 782 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
| 709 | donor_off, | 783 | donor_off, count - replaced_count); |
| 710 | count - replaced_count); | 784 | if (err) |
| 785 | goto out; | ||
| 711 | } | 786 | } |
| 712 | 787 | ||
| 713 | out: | 788 | out: |
| @@ -740,7 +815,7 @@ out: | |||
| 740 | * on success, or a negative error value on failure. | 815 | * on success, or a negative error value on failure. |
| 741 | */ | 816 | */ |
| 742 | static int | 817 | static int |
| 743 | move_extent_par_page(struct file *o_filp, struct inode *donor_inode, | 818 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, |
| 744 | pgoff_t orig_page_offset, int data_offset_in_page, | 819 | pgoff_t orig_page_offset, int data_offset_in_page, |
| 745 | int block_len_in_page, int uninit) | 820 | int block_len_in_page, int uninit) |
| 746 | { | 821 | { |
| @@ -871,6 +946,7 @@ out: | |||
| 871 | if (PageLocked(page)) | 946 | if (PageLocked(page)) |
| 872 | unlock_page(page); | 947 | unlock_page(page); |
| 873 | page_cache_release(page); | 948 | page_cache_release(page); |
| 949 | ext4_journal_stop(handle); | ||
| 874 | } | 950 | } |
| 875 | out2: | 951 | out2: |
| 876 | ext4_journal_stop(handle); | 952 | ext4_journal_stop(handle); |
| @@ -897,6 +973,10 @@ mext_check_arguments(struct inode *orig_inode, | |||
| 897 | struct inode *donor_inode, __u64 orig_start, | 973 | struct inode *donor_inode, __u64 orig_start, |
| 898 | __u64 donor_start, __u64 *len, __u64 moved_len) | 974 | __u64 donor_start, __u64 *len, __u64 moved_len) |
| 899 | { | 975 | { |
| 976 | ext4_lblk_t orig_blocks, donor_blocks; | ||
| 977 | unsigned int blkbits = orig_inode->i_blkbits; | ||
| 978 | unsigned int blocksize = 1 << blkbits; | ||
| 979 | |||
| 900 | /* Regular file check */ | 980 | /* Regular file check */ |
| 901 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | 981 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { |
| 902 | ext4_debug("ext4 move extent: The argument files should be " | 982 | ext4_debug("ext4 move extent: The argument files should be " |
| @@ -960,54 +1040,58 @@ mext_check_arguments(struct inode *orig_inode, | |||
| 960 | return -EINVAL; | 1040 | return -EINVAL; |
| 961 | } | 1041 | } |
| 962 | 1042 | ||
| 963 | if ((orig_start > MAX_DEFRAG_SIZE) || | 1043 | if ((orig_start > EXT_MAX_BLOCK) || |
| 964 | (donor_start > MAX_DEFRAG_SIZE) || | 1044 | (donor_start > EXT_MAX_BLOCK) || |
| 965 | (*len > MAX_DEFRAG_SIZE) || | 1045 | (*len > EXT_MAX_BLOCK) || |
| 966 | (orig_start + *len > MAX_DEFRAG_SIZE)) { | 1046 | (orig_start + *len > EXT_MAX_BLOCK)) { |
| 967 | ext4_debug("ext4 move extent: Can't handle over [%lu] blocks " | 1047 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
| 968 | "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE, | 1048 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, |
| 969 | orig_inode->i_ino, donor_inode->i_ino); | 1049 | orig_inode->i_ino, donor_inode->i_ino); |
| 970 | return -EINVAL; | 1050 | return -EINVAL; |
| 971 | } | 1051 | } |
| 972 | 1052 | ||
| 973 | if (orig_inode->i_size > donor_inode->i_size) { | 1053 | if (orig_inode->i_size > donor_inode->i_size) { |
| 974 | if (orig_start >= donor_inode->i_size) { | 1054 | donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; |
| 1055 | /* TODO: eliminate this artificial restriction */ | ||
| 1056 | if (orig_start >= donor_blocks) { | ||
| 975 | ext4_debug("ext4 move extent: orig start offset " | 1057 | ext4_debug("ext4 move extent: orig start offset " |
| 976 | "[%llu] should be less than donor file size " | 1058 | "[%llu] should be less than donor file blocks " |
| 977 | "[%lld] [ino:orig %lu, donor_inode %lu]\n", | 1059 | "[%u] [ino:orig %lu, donor %lu]\n", |
| 978 | orig_start, donor_inode->i_size, | 1060 | orig_start, donor_blocks, |
| 979 | orig_inode->i_ino, donor_inode->i_ino); | 1061 | orig_inode->i_ino, donor_inode->i_ino); |
| 980 | return -EINVAL; | 1062 | return -EINVAL; |
| 981 | } | 1063 | } |
| 982 | 1064 | ||
| 983 | if (orig_start + *len > donor_inode->i_size) { | 1065 | /* TODO: eliminate this artificial restriction */ |
| 1066 | if (orig_start + *len > donor_blocks) { | ||
| 984 | ext4_debug("ext4 move extent: End offset [%llu] should " | 1067 | ext4_debug("ext4 move extent: End offset [%llu] should " |
| 985 | "be less than donor file size [%lld]." | 1068 | "be less than donor file blocks [%u]." |
| 986 | "So adjust length from %llu to %lld " | 1069 | "So adjust length from %llu to %llu " |
| 987 | "[ino:orig %lu, donor %lu]\n", | 1070 | "[ino:orig %lu, donor %lu]\n", |
| 988 | orig_start + *len, donor_inode->i_size, | 1071 | orig_start + *len, donor_blocks, |
| 989 | *len, donor_inode->i_size - orig_start, | 1072 | *len, donor_blocks - orig_start, |
| 990 | orig_inode->i_ino, donor_inode->i_ino); | 1073 | orig_inode->i_ino, donor_inode->i_ino); |
| 991 | *len = donor_inode->i_size - orig_start; | 1074 | *len = donor_blocks - orig_start; |
| 992 | } | 1075 | } |
| 993 | } else { | 1076 | } else { |
| 994 | if (orig_start >= orig_inode->i_size) { | 1077 | orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits; |
| 1078 | if (orig_start >= orig_blocks) { | ||
| 995 | ext4_debug("ext4 move extent: start offset [%llu] " | 1079 | ext4_debug("ext4 move extent: start offset [%llu] " |
| 996 | "should be less than original file size " | 1080 | "should be less than original file blocks " |
| 997 | "[%lld] [inode:orig %lu, donor %lu]\n", | 1081 | "[%u] [ino:orig %lu, donor %lu]\n", |
| 998 | orig_start, orig_inode->i_size, | 1082 | orig_start, orig_blocks, |
| 999 | orig_inode->i_ino, donor_inode->i_ino); | 1083 | orig_inode->i_ino, donor_inode->i_ino); |
| 1000 | return -EINVAL; | 1084 | return -EINVAL; |
| 1001 | } | 1085 | } |
| 1002 | 1086 | ||
| 1003 | if (orig_start + *len > orig_inode->i_size) { | 1087 | if (orig_start + *len > orig_blocks) { |
| 1004 | ext4_debug("ext4 move extent: Adjust length " | 1088 | ext4_debug("ext4 move extent: Adjust length " |
| 1005 | "from %llu to %lld. Because it should be " | 1089 | "from %llu to %llu. Because it should be " |
| 1006 | "less than original file size " | 1090 | "less than original file blocks " |
| 1007 | "[ino:orig %lu, donor %lu]\n", | 1091 | "[ino:orig %lu, donor %lu]\n", |
| 1008 | *len, orig_inode->i_size - orig_start, | 1092 | *len, orig_blocks - orig_start, |
| 1009 | orig_inode->i_ino, donor_inode->i_ino); | 1093 | orig_inode->i_ino, donor_inode->i_ino); |
| 1010 | *len = orig_inode->i_size - orig_start; | 1094 | *len = orig_blocks - orig_start; |
| 1011 | } | 1095 | } |
| 1012 | } | 1096 | } |
| 1013 | 1097 | ||
| @@ -1027,18 +1111,23 @@ mext_check_arguments(struct inode *orig_inode, | |||
| 1027 | * @inode1: the inode structure | 1111 | * @inode1: the inode structure |
| 1028 | * @inode2: the inode structure | 1112 | * @inode2: the inode structure |
| 1029 | * | 1113 | * |
| 1030 | * Lock two inodes' i_mutex by i_ino order. This function is moved from | 1114 | * Lock two inodes' i_mutex by i_ino order. |
| 1031 | * fs/inode.c. | 1115 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. |
| 1032 | */ | 1116 | */ |
| 1033 | static void | 1117 | static int |
| 1034 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | 1118 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) |
| 1035 | { | 1119 | { |
| 1036 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | 1120 | int ret = 0; |
| 1037 | if (inode1) | 1121 | |
| 1038 | mutex_lock(&inode1->i_mutex); | 1122 | BUG_ON(inode1 == NULL && inode2 == NULL); |
| 1039 | else if (inode2) | 1123 | |
| 1040 | mutex_lock(&inode2->i_mutex); | 1124 | ret = mext_check_null_inode(inode1, inode2, __func__); |
| 1041 | return; | 1125 | if (ret < 0) |
| 1126 | goto out; | ||
| 1127 | |||
| 1128 | if (inode1 == inode2) { | ||
| 1129 | mutex_lock(&inode1->i_mutex); | ||
| 1130 | goto out; | ||
| 1042 | } | 1131 | } |
| 1043 | 1132 | ||
| 1044 | if (inode1->i_ino < inode2->i_ino) { | 1133 | if (inode1->i_ino < inode2->i_ino) { |
| @@ -1048,6 +1137,9 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
| 1048 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | 1137 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); |
| 1049 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | 1138 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); |
| 1050 | } | 1139 | } |
| 1140 | |||
| 1141 | out: | ||
| 1142 | return ret; | ||
| 1051 | } | 1143 | } |
| 1052 | 1144 | ||
| 1053 | /** | 1145 | /** |
| @@ -1056,17 +1148,28 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
| 1056 | * @inode1: the inode that is released first | 1148 | * @inode1: the inode that is released first |
| 1057 | * @inode2: the inode that is released second | 1149 | * @inode2: the inode that is released second |
| 1058 | * | 1150 | * |
| 1059 | * This function is moved from fs/inode.c. | 1151 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. |
| 1060 | */ | 1152 | */ |
| 1061 | 1153 | ||
| 1062 | static void | 1154 | static int |
| 1063 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | 1155 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) |
| 1064 | { | 1156 | { |
| 1157 | int ret = 0; | ||
| 1158 | |||
| 1159 | BUG_ON(inode1 == NULL && inode2 == NULL); | ||
| 1160 | |||
| 1161 | ret = mext_check_null_inode(inode1, inode2, __func__); | ||
| 1162 | if (ret < 0) | ||
| 1163 | goto out; | ||
| 1164 | |||
| 1065 | if (inode1) | 1165 | if (inode1) |
| 1066 | mutex_unlock(&inode1->i_mutex); | 1166 | mutex_unlock(&inode1->i_mutex); |
| 1067 | 1167 | ||
| 1068 | if (inode2 && inode2 != inode1) | 1168 | if (inode2 && inode2 != inode1) |
| 1069 | mutex_unlock(&inode2->i_mutex); | 1169 | mutex_unlock(&inode2->i_mutex); |
| 1170 | |||
| 1171 | out: | ||
| 1172 | return ret; | ||
| 1070 | } | 1173 | } |
| 1071 | 1174 | ||
| 1072 | /** | 1175 | /** |
| @@ -1123,70 +1226,76 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
| 1123 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | 1226 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; |
| 1124 | ext4_lblk_t rest_blocks; | 1227 | ext4_lblk_t rest_blocks; |
| 1125 | pgoff_t orig_page_offset = 0, seq_end_page; | 1228 | pgoff_t orig_page_offset = 0, seq_end_page; |
| 1126 | int ret, depth, last_extent = 0; | 1229 | int ret1, ret2, depth, last_extent = 0; |
| 1127 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 1230 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
| 1128 | int data_offset_in_page; | 1231 | int data_offset_in_page; |
| 1129 | int block_len_in_page; | 1232 | int block_len_in_page; |
| 1130 | int uninit; | 1233 | int uninit; |
| 1131 | 1234 | ||
| 1132 | /* protect orig and donor against a truncate */ | 1235 | /* protect orig and donor against a truncate */ |
| 1133 | mext_inode_double_lock(orig_inode, donor_inode); | 1236 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); |
| 1237 | if (ret1 < 0) | ||
| 1238 | return ret1; | ||
| 1134 | 1239 | ||
| 1135 | mext_double_down_read(orig_inode, donor_inode); | 1240 | mext_double_down_read(orig_inode, donor_inode); |
| 1136 | /* Check the filesystem environment whether move_extent can be done */ | 1241 | /* Check the filesystem environment whether move_extent can be done */ |
| 1137 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, | 1242 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, |
| 1138 | donor_start, &len, *moved_len); | 1243 | donor_start, &len, *moved_len); |
| 1139 | mext_double_up_read(orig_inode, donor_inode); | 1244 | mext_double_up_read(orig_inode, donor_inode); |
| 1140 | if (ret) | 1245 | if (ret1) |
| 1141 | goto out2; | 1246 | goto out; |
| 1142 | 1247 | ||
| 1143 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | 1248 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; |
| 1144 | block_end = block_start + len - 1; | 1249 | block_end = block_start + len - 1; |
| 1145 | if (file_end < block_end) | 1250 | if (file_end < block_end) |
| 1146 | len -= block_end - file_end; | 1251 | len -= block_end - file_end; |
| 1147 | 1252 | ||
| 1148 | get_ext_path(orig_path, orig_inode, block_start, ret); | 1253 | ret1 = get_ext_path(orig_inode, block_start, &orig_path); |
| 1149 | if (orig_path == NULL) | 1254 | if (ret1) |
| 1150 | goto out2; | 1255 | goto out; |
| 1151 | 1256 | ||
| 1152 | /* Get path structure to check the hole */ | 1257 | /* Get path structure to check the hole */ |
| 1153 | get_ext_path(holecheck_path, orig_inode, block_start, ret); | 1258 | ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); |
| 1154 | if (holecheck_path == NULL) | 1259 | if (ret1) |
| 1155 | goto out; | 1260 | goto out; |
| 1156 | 1261 | ||
| 1157 | depth = ext_depth(orig_inode); | 1262 | depth = ext_depth(orig_inode); |
| 1158 | ext_cur = holecheck_path[depth].p_ext; | 1263 | ext_cur = holecheck_path[depth].p_ext; |
| 1159 | if (ext_cur == NULL) { | ||
| 1160 | ret = -EINVAL; | ||
| 1161 | goto out; | ||
| 1162 | } | ||
| 1163 | 1264 | ||
| 1164 | /* | 1265 | /* |
| 1165 | * Get proper extent whose ee_block is beyond block_start | 1266 | * Get proper starting location of block replacement if block_start was |
| 1166 | * if block_start was within the hole. | 1267 | * within the hole. |
| 1167 | */ | 1268 | */ |
| 1168 | if (le32_to_cpu(ext_cur->ee_block) + | 1269 | if (le32_to_cpu(ext_cur->ee_block) + |
| 1169 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { | 1270 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { |
| 1271 | /* | ||
| 1272 | * The hole exists between extents or the tail of | ||
| 1273 | * original file. | ||
| 1274 | */ | ||
| 1170 | last_extent = mext_next_extent(orig_inode, | 1275 | last_extent = mext_next_extent(orig_inode, |
| 1171 | holecheck_path, &ext_cur); | 1276 | holecheck_path, &ext_cur); |
| 1172 | if (last_extent < 0) { | 1277 | if (last_extent < 0) { |
| 1173 | ret = last_extent; | 1278 | ret1 = last_extent; |
| 1174 | goto out; | 1279 | goto out; |
| 1175 | } | 1280 | } |
| 1176 | last_extent = mext_next_extent(orig_inode, orig_path, | 1281 | last_extent = mext_next_extent(orig_inode, orig_path, |
| 1177 | &ext_dummy); | 1282 | &ext_dummy); |
| 1178 | if (last_extent < 0) { | 1283 | if (last_extent < 0) { |
| 1179 | ret = last_extent; | 1284 | ret1 = last_extent; |
| 1180 | goto out; | 1285 | goto out; |
| 1181 | } | 1286 | } |
| 1182 | } | 1287 | seq_start = le32_to_cpu(ext_cur->ee_block); |
| 1183 | seq_start = block_start; | 1288 | } else if (le32_to_cpu(ext_cur->ee_block) > block_start) |
| 1289 | /* The hole exists at the beginning of original file. */ | ||
| 1290 | seq_start = le32_to_cpu(ext_cur->ee_block); | ||
| 1291 | else | ||
| 1292 | seq_start = block_start; | ||
| 1184 | 1293 | ||
| 1185 | /* No blocks within the specified range. */ | 1294 | /* No blocks within the specified range. */ |
| 1186 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | 1295 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { |
| 1187 | ext4_debug("ext4 move extent: The specified range of file " | 1296 | ext4_debug("ext4 move extent: The specified range of file " |
| 1188 | "may be the hole\n"); | 1297 | "may be the hole\n"); |
| 1189 | ret = -EINVAL; | 1298 | ret1 = -EINVAL; |
| 1190 | goto out; | 1299 | goto out; |
| 1191 | } | 1300 | } |
| 1192 | 1301 | ||
| @@ -1206,7 +1315,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
| 1206 | last_extent = mext_next_extent(orig_inode, holecheck_path, | 1315 | last_extent = mext_next_extent(orig_inode, holecheck_path, |
| 1207 | &ext_cur); | 1316 | &ext_cur); |
| 1208 | if (last_extent < 0) { | 1317 | if (last_extent < 0) { |
| 1209 | ret = last_extent; | 1318 | ret1 = last_extent; |
| 1210 | break; | 1319 | break; |
| 1211 | } | 1320 | } |
| 1212 | add_blocks = ext4_ext_get_actual_len(ext_cur); | 1321 | add_blocks = ext4_ext_get_actual_len(ext_cur); |
| @@ -1258,16 +1367,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
| 1258 | while (orig_page_offset <= seq_end_page) { | 1367 | while (orig_page_offset <= seq_end_page) { |
| 1259 | 1368 | ||
| 1260 | /* Swap original branches with new branches */ | 1369 | /* Swap original branches with new branches */ |
| 1261 | ret = move_extent_par_page(o_filp, donor_inode, | 1370 | ret1 = move_extent_per_page(o_filp, donor_inode, |
| 1262 | orig_page_offset, | 1371 | orig_page_offset, |
| 1263 | data_offset_in_page, | 1372 | data_offset_in_page, |
| 1264 | block_len_in_page, uninit); | 1373 | block_len_in_page, uninit); |
| 1265 | if (ret < 0) | 1374 | if (ret1 < 0) |
| 1266 | goto out; | 1375 | goto out; |
| 1267 | orig_page_offset++; | 1376 | orig_page_offset++; |
| 1268 | /* Count how many blocks we have exchanged */ | 1377 | /* Count how many blocks we have exchanged */ |
| 1269 | *moved_len += block_len_in_page; | 1378 | *moved_len += block_len_in_page; |
| 1270 | BUG_ON(*moved_len > len); | 1379 | if (*moved_len > len) { |
| 1380 | ext4_error(orig_inode->i_sb, __func__, | ||
| 1381 | "We replaced blocks too much! " | ||
| 1382 | "sum of replaced: %llu requested: %llu", | ||
| 1383 | *moved_len, len); | ||
| 1384 | ret1 = -EIO; | ||
| 1385 | goto out; | ||
| 1386 | } | ||
| 1271 | 1387 | ||
| 1272 | data_offset_in_page = 0; | 1388 | data_offset_in_page = 0; |
| 1273 | rest_blocks -= block_len_in_page; | 1389 | rest_blocks -= block_len_in_page; |
| @@ -1280,17 +1396,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
| 1280 | /* Decrease buffer counter */ | 1396 | /* Decrease buffer counter */ |
| 1281 | if (holecheck_path) | 1397 | if (holecheck_path) |
| 1282 | ext4_ext_drop_refs(holecheck_path); | 1398 | ext4_ext_drop_refs(holecheck_path); |
| 1283 | get_ext_path(holecheck_path, orig_inode, | 1399 | ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); |
| 1284 | seq_start, ret); | 1400 | if (ret1) |
| 1285 | if (holecheck_path == NULL) | ||
| 1286 | break; | 1401 | break; |
| 1287 | depth = holecheck_path->p_depth; | 1402 | depth = holecheck_path->p_depth; |
| 1288 | 1403 | ||
| 1289 | /* Decrease buffer counter */ | 1404 | /* Decrease buffer counter */ |
| 1290 | if (orig_path) | 1405 | if (orig_path) |
| 1291 | ext4_ext_drop_refs(orig_path); | 1406 | ext4_ext_drop_refs(orig_path); |
| 1292 | get_ext_path(orig_path, orig_inode, seq_start, ret); | 1407 | ret1 = get_ext_path(orig_inode, seq_start, &orig_path); |
| 1293 | if (orig_path == NULL) | 1408 | if (ret1) |
| 1294 | break; | 1409 | break; |
| 1295 | 1410 | ||
| 1296 | ext_cur = holecheck_path[depth].p_ext; | 1411 | ext_cur = holecheck_path[depth].p_ext; |
| @@ -1307,14 +1422,13 @@ out: | |||
| 1307 | ext4_ext_drop_refs(holecheck_path); | 1422 | ext4_ext_drop_refs(holecheck_path); |
| 1308 | kfree(holecheck_path); | 1423 | kfree(holecheck_path); |
| 1309 | } | 1424 | } |
| 1310 | out2: | ||
| 1311 | mext_inode_double_unlock(orig_inode, donor_inode); | ||
| 1312 | 1425 | ||
| 1313 | if (ret) | 1426 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); |
| 1314 | return ret; | ||
| 1315 | 1427 | ||
| 1316 | /* All of the specified blocks must be exchanged in succeed */ | 1428 | if (ret1) |
| 1317 | BUG_ON(*moved_len != len); | 1429 | return ret1; |
| 1430 | else if (ret2) | ||
| 1431 | return ret2; | ||
| 1318 | 1432 | ||
| 1319 | return 0; | 1433 | return 0; |
| 1320 | } | 1434 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 114abe5d2c1d..42f81d285cd5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -1518,8 +1518,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1518 | return retval; | 1518 | return retval; |
| 1519 | 1519 | ||
| 1520 | if (blocks == 1 && !dx_fallback && | 1520 | if (blocks == 1 && !dx_fallback && |
| 1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) | 1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { |
| 1522 | return make_indexed_dir(handle, dentry, inode, bh); | 1522 | retval = make_indexed_dir(handle, dentry, inode, bh); |
| 1523 | if (retval == -ENOSPC) | ||
| 1524 | brelse(bh); | ||
| 1525 | return retval; | ||
| 1526 | } | ||
| 1523 | brelse(bh); | 1527 | brelse(bh); |
| 1524 | } | 1528 | } |
| 1525 | bh = ext4_append(handle, dir, &block, &retval); | 1529 | bh = ext4_append(handle, dir, &block, &retval); |
| @@ -1528,7 +1532,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1528 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1532 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
| 1529 | de->inode = 0; | 1533 | de->inode = 0; |
| 1530 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); | 1534 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); |
| 1531 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1535 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
| 1536 | if (retval == -ENOSPC) | ||
| 1537 | brelse(bh); | ||
| 1538 | return retval; | ||
| 1532 | } | 1539 | } |
| 1533 | 1540 | ||
| 1534 | /* | 1541 | /* |
| @@ -1590,9 +1597,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1590 | goto cleanup; | 1597 | goto cleanup; |
| 1591 | node2 = (struct dx_node *)(bh2->b_data); | 1598 | node2 = (struct dx_node *)(bh2->b_data); |
| 1592 | entries2 = node2->entries; | 1599 | entries2 = node2->entries; |
| 1600 | memset(&node2->fake, 0, sizeof(struct fake_dirent)); | ||
| 1593 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, | 1601 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, |
| 1594 | sb->s_blocksize); | 1602 | sb->s_blocksize); |
| 1595 | node2->fake.inode = 0; | ||
| 1596 | BUFFER_TRACE(frame->bh, "get_write_access"); | 1603 | BUFFER_TRACE(frame->bh, "get_write_access"); |
| 1597 | err = ext4_journal_get_write_access(handle, frame->bh); | 1604 | err = ext4_journal_get_write_access(handle, frame->bh); |
| 1598 | if (err) | 1605 | if (err) |
| @@ -1657,7 +1664,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1657 | if (!de) | 1664 | if (!de) |
| 1658 | goto cleanup; | 1665 | goto cleanup; |
| 1659 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1666 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); |
| 1660 | bh = NULL; | 1667 | if (err != -ENOSPC) |
| 1668 | bh = NULL; | ||
| 1661 | goto cleanup; | 1669 | goto cleanup; |
| 1662 | 1670 | ||
| 1663 | journal_error: | 1671 | journal_error: |
| @@ -2310,7 +2318,7 @@ static int ext4_link(struct dentry *old_dentry, | |||
| 2310 | struct inode *inode = old_dentry->d_inode; | 2318 | struct inode *inode = old_dentry->d_inode; |
| 2311 | int err, retries = 0; | 2319 | int err, retries = 0; |
| 2312 | 2320 | ||
| 2313 | if (EXT4_DIR_LINK_MAX(inode)) | 2321 | if (inode->i_nlink >= EXT4_LINK_MAX) |
| 2314 | return -EMLINK; | 2322 | return -EMLINK; |
| 2315 | 2323 | ||
| 2316 | /* | 2324 | /* |
| @@ -2413,7 +2421,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 2413 | goto end_rename; | 2421 | goto end_rename; |
| 2414 | retval = -EMLINK; | 2422 | retval = -EMLINK; |
| 2415 | if (!new_inode && new_dir != old_dir && | 2423 | if (!new_inode && new_dir != old_dir && |
| 2416 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2424 | EXT4_DIR_LINK_MAX(new_dir)) |
| 2417 | goto end_rename; | 2425 | goto end_rename; |
| 2418 | } | 2426 | } |
| 2419 | if (!new_bh) { | 2427 | if (!new_bh) { |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 68b0351fc647..3cfc343c41b5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -746,7 +746,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 746 | struct inode *inode = NULL; | 746 | struct inode *inode = NULL; |
| 747 | handle_t *handle; | 747 | handle_t *handle; |
| 748 | int gdb_off, gdb_num; | 748 | int gdb_off, gdb_num; |
| 749 | int num_grp_locked = 0; | ||
| 750 | int err, err2; | 749 | int err, err2; |
| 751 | 750 | ||
| 752 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | 751 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); |
| @@ -856,7 +855,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 856 | * using the new disk blocks. | 855 | * using the new disk blocks. |
| 857 | */ | 856 | */ |
| 858 | 857 | ||
| 859 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group); | ||
| 860 | /* Update group descriptor block for new group */ | 858 | /* Update group descriptor block for new group */ |
| 861 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + | 859 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + |
| 862 | gdb_off * EXT4_DESC_SIZE(sb)); | 860 | gdb_off * EXT4_DESC_SIZE(sb)); |
| @@ -875,10 +873,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 875 | * descriptor | 873 | * descriptor |
| 876 | */ | 874 | */ |
| 877 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); | 875 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); |
| 878 | if (err) { | 876 | if (err) |
| 879 | ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | ||
| 880 | goto exit_journal; | 877 | goto exit_journal; |
| 881 | } | ||
| 882 | 878 | ||
| 883 | /* | 879 | /* |
| 884 | * Make the new blocks and inodes valid next. We do this before | 880 | * Make the new blocks and inodes valid next. We do this before |
| @@ -920,7 +916,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 920 | 916 | ||
| 921 | /* Update the global fs size fields */ | 917 | /* Update the global fs size fields */ |
| 922 | sbi->s_groups_count++; | 918 | sbi->s_groups_count++; |
| 923 | ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | ||
| 924 | 919 | ||
| 925 | ext4_handle_dirty_metadata(handle, NULL, primary); | 920 | ext4_handle_dirty_metadata(handle, NULL, primary); |
| 926 | 921 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f4f079e6b9a..a6b1ab734728 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include "ext4_jbd2.h" | 45 | #include "ext4_jbd2.h" |
| 46 | #include "xattr.h" | 46 | #include "xattr.h" |
| 47 | #include "acl.h" | 47 | #include "acl.h" |
| 48 | #include "mballoc.h" | ||
| 48 | 49 | ||
| 49 | #define CREATE_TRACE_POINTS | 50 | #define CREATE_TRACE_POINTS |
| 50 | #include <trace/events/ext4.h> | 51 | #include <trace/events/ext4.h> |
| @@ -344,7 +345,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, | |||
| 344 | errstr = "Out of memory"; | 345 | errstr = "Out of memory"; |
| 345 | break; | 346 | break; |
| 346 | case -EROFS: | 347 | case -EROFS: |
| 347 | if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) | 348 | if (!sb || (EXT4_SB(sb)->s_journal && |
| 349 | EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) | ||
| 348 | errstr = "Journal has aborted"; | 350 | errstr = "Journal has aborted"; |
| 349 | else | 351 | else |
| 350 | errstr = "Readonly filesystem"; | 352 | errstr = "Readonly filesystem"; |
| @@ -1279,11 +1281,9 @@ static int parse_options(char *options, struct super_block *sb, | |||
| 1279 | *journal_devnum = option; | 1281 | *journal_devnum = option; |
| 1280 | break; | 1282 | break; |
| 1281 | case Opt_journal_checksum: | 1283 | case Opt_journal_checksum: |
| 1282 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | 1284 | break; /* Kept for backwards compatibility */ |
| 1283 | break; | ||
| 1284 | case Opt_journal_async_commit: | 1285 | case Opt_journal_async_commit: |
| 1285 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); | 1286 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); |
| 1286 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | ||
| 1287 | break; | 1287 | break; |
| 1288 | case Opt_noload: | 1288 | case Opt_noload: |
| 1289 | set_opt(sbi->s_mount_opt, NOLOAD); | 1289 | set_opt(sbi->s_mount_opt, NOLOAD); |
| @@ -1695,12 +1695,12 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
| 1695 | gdp = ext4_get_group_desc(sb, i, NULL); | 1695 | gdp = ext4_get_group_desc(sb, i, NULL); |
| 1696 | 1696 | ||
| 1697 | flex_group = ext4_flex_group(sbi, i); | 1697 | flex_group = ext4_flex_group(sbi, i); |
| 1698 | atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, | 1698 | atomic_add(ext4_free_inodes_count(sb, gdp), |
| 1699 | ext4_free_inodes_count(sb, gdp)); | 1699 | &sbi->s_flex_groups[flex_group].free_inodes); |
| 1700 | atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, | 1700 | atomic_add(ext4_free_blks_count(sb, gdp), |
| 1701 | ext4_free_blks_count(sb, gdp)); | 1701 | &sbi->s_flex_groups[flex_group].free_blocks); |
| 1702 | atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, | 1702 | atomic_add(ext4_used_dirs_count(sb, gdp), |
| 1703 | ext4_used_dirs_count(sb, gdp)); | 1703 | &sbi->s_flex_groups[flex_group].used_dirs); |
| 1704 | } | 1704 | } |
| 1705 | 1705 | ||
| 1706 | return 1; | 1706 | return 1; |
| @@ -2253,6 +2253,49 @@ static struct kobj_type ext4_ktype = { | |||
| 2253 | .release = ext4_sb_release, | 2253 | .release = ext4_sb_release, |
| 2254 | }; | 2254 | }; |
| 2255 | 2255 | ||
| 2256 | /* | ||
| 2257 | * Check whether this filesystem can be mounted based on | ||
| 2258 | * the features present and the RDONLY/RDWR mount requested. | ||
| 2259 | * Returns 1 if this filesystem can be mounted as requested, | ||
| 2260 | * 0 if it cannot be. | ||
| 2261 | */ | ||
| 2262 | static int ext4_feature_set_ok(struct super_block *sb, int readonly) | ||
| 2263 | { | ||
| 2264 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { | ||
| 2265 | ext4_msg(sb, KERN_ERR, | ||
| 2266 | "Couldn't mount because of " | ||
| 2267 | "unsupported optional features (%x)", | ||
| 2268 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | ||
| 2269 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | ||
| 2270 | return 0; | ||
| 2271 | } | ||
| 2272 | |||
| 2273 | if (readonly) | ||
| 2274 | return 1; | ||
| 2275 | |||
| 2276 | /* Check that feature set is OK for a read-write mount */ | ||
| 2277 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { | ||
| 2278 | ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " | ||
| 2279 | "unsupported optional features (%x)", | ||
| 2280 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | ||
| 2281 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
| 2282 | return 0; | ||
| 2283 | } | ||
| 2284 | /* | ||
| 2285 | * Large file size enabled file system can only be mounted | ||
| 2286 | * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF | ||
| 2287 | */ | ||
| 2288 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | ||
| 2289 | if (sizeof(blkcnt_t) < sizeof(u64)) { | ||
| 2290 | ext4_msg(sb, KERN_ERR, "Filesystem with huge files " | ||
| 2291 | "cannot be mounted RDWR without " | ||
| 2292 | "CONFIG_LBDAF"); | ||
| 2293 | return 0; | ||
| 2294 | } | ||
| 2295 | } | ||
| 2296 | return 1; | ||
| 2297 | } | ||
| 2298 | |||
| 2256 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2299 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
| 2257 | __releases(kernel_lock) | 2300 | __releases(kernel_lock) |
| 2258 | __acquires(kernel_lock) | 2301 | __acquires(kernel_lock) |
| @@ -2274,7 +2317,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2274 | unsigned int db_count; | 2317 | unsigned int db_count; |
| 2275 | unsigned int i; | 2318 | unsigned int i; |
| 2276 | int needs_recovery, has_huge_files; | 2319 | int needs_recovery, has_huge_files; |
| 2277 | int features; | ||
| 2278 | __u64 blocks_count; | 2320 | __u64 blocks_count; |
| 2279 | int err; | 2321 | int err; |
| 2280 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 2322 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
| @@ -2401,39 +2443,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2401 | * previously didn't change the revision level when setting the flags, | 2443 | * previously didn't change the revision level when setting the flags, |
| 2402 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2444 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
| 2403 | */ | 2445 | */ |
| 2404 | features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); | 2446 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) |
| 2405 | if (features) { | ||
| 2406 | ext4_msg(sb, KERN_ERR, | ||
| 2407 | "Couldn't mount because of " | ||
| 2408 | "unsupported optional features (%x)", | ||
| 2409 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | ||
| 2410 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | ||
| 2411 | goto failed_mount; | ||
| 2412 | } | ||
| 2413 | features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); | ||
| 2414 | if (!(sb->s_flags & MS_RDONLY) && features) { | ||
| 2415 | ext4_msg(sb, KERN_ERR, | ||
| 2416 | "Couldn't mount RDWR because of " | ||
| 2417 | "unsupported optional features (%x)", | ||
| 2418 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | ||
| 2419 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
| 2420 | goto failed_mount; | 2447 | goto failed_mount; |
| 2421 | } | 2448 | |
| 2422 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
| 2423 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
| 2424 | if (has_huge_files) { | ||
| 2425 | /* | ||
| 2426 | * Large file size enabled file system can only be | ||
| 2427 | * mount if kernel is build with CONFIG_LBDAF | ||
| 2428 | */ | ||
| 2429 | if (sizeof(root->i_blocks) < sizeof(u64) && | ||
| 2430 | !(sb->s_flags & MS_RDONLY)) { | ||
| 2431 | ext4_msg(sb, KERN_ERR, "Filesystem with huge " | ||
| 2432 | "files cannot be mounted read-write " | ||
| 2433 | "without CONFIG_LBDAF"); | ||
| 2434 | goto failed_mount; | ||
| 2435 | } | ||
| 2436 | } | ||
| 2437 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | 2449 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); |
| 2438 | 2450 | ||
| 2439 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 2451 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
| @@ -2469,6 +2481,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2469 | } | 2481 | } |
| 2470 | } | 2482 | } |
| 2471 | 2483 | ||
| 2484 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
| 2485 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
| 2472 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, | 2486 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
| 2473 | has_huge_files); | 2487 | has_huge_files); |
| 2474 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | 2488 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); |
| @@ -2549,12 +2563,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2549 | goto failed_mount; | 2563 | goto failed_mount; |
| 2550 | } | 2564 | } |
| 2551 | 2565 | ||
| 2552 | if (ext4_blocks_count(es) > | 2566 | /* |
| 2553 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 2567 | * Test whether we have more sectors than will fit in sector_t, |
| 2568 | * and whether the max offset is addressable by the page cache. | ||
| 2569 | */ | ||
| 2570 | if ((ext4_blocks_count(es) > | ||
| 2571 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | ||
| 2572 | (ext4_blocks_count(es) > | ||
| 2573 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
| 2554 | ext4_msg(sb, KERN_ERR, "filesystem" | 2574 | ext4_msg(sb, KERN_ERR, "filesystem" |
| 2555 | " too large to mount safely"); | 2575 | " too large to mount safely on this system"); |
| 2556 | if (sizeof(sector_t) < 8) | 2576 | if (sizeof(sector_t) < 8) |
| 2557 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 2577 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
| 2578 | ret = -EFBIG; | ||
| 2558 | goto failed_mount; | 2579 | goto failed_mount; |
| 2559 | } | 2580 | } |
| 2560 | 2581 | ||
| @@ -2595,6 +2616,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2595 | goto failed_mount; | 2616 | goto failed_mount; |
| 2596 | } | 2617 | } |
| 2597 | sbi->s_groups_count = blocks_count; | 2618 | sbi->s_groups_count = blocks_count; |
| 2619 | sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, | ||
| 2620 | (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); | ||
| 2598 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | 2621 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / |
| 2599 | EXT4_DESC_PER_BLOCK(sb); | 2622 | EXT4_DESC_PER_BLOCK(sb); |
| 2600 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), | 2623 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), |
| @@ -2729,20 +2752,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2729 | goto failed_mount4; | 2752 | goto failed_mount4; |
| 2730 | } | 2753 | } |
| 2731 | 2754 | ||
| 2732 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 2755 | jbd2_journal_set_features(sbi->s_journal, |
| 2733 | jbd2_journal_set_features(sbi->s_journal, | 2756 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); |
| 2734 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 2757 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) |
| 2758 | jbd2_journal_set_features(sbi->s_journal, 0, 0, | ||
| 2735 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 2759 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
| 2736 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | 2760 | else |
| 2737 | jbd2_journal_set_features(sbi->s_journal, | ||
| 2738 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); | ||
| 2739 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | 2761 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
| 2740 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 2762 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
| 2741 | } else { | ||
| 2742 | jbd2_journal_clear_features(sbi->s_journal, | ||
| 2743 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
| 2744 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
| 2745 | } | ||
| 2746 | 2763 | ||
| 2747 | /* We have now updated the journal if required, so we can | 2764 | /* We have now updated the journal if required, so we can |
| 2748 | * validate the data journaling mode. */ | 2765 | * validate the data journaling mode. */ |
| @@ -3208,7 +3225,18 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
| 3208 | clear_buffer_write_io_error(sbh); | 3225 | clear_buffer_write_io_error(sbh); |
| 3209 | set_buffer_uptodate(sbh); | 3226 | set_buffer_uptodate(sbh); |
| 3210 | } | 3227 | } |
| 3211 | es->s_wtime = cpu_to_le32(get_seconds()); | 3228 | /* |
| 3229 | * If the file system is mounted read-only, don't update the | ||
| 3230 | * superblock write time. This avoids updating the superblock | ||
| 3231 | * write time when we are mounting the root file system | ||
| 3232 | * read/only but we need to replay the journal; at that point, | ||
| 3233 | * for people who are east of GMT and who make their clock | ||
| 3234 | * tick in localtime for Windows bug-for-bug compatibility, | ||
| 3235 | * the clock is set in the future, and this will cause e2fsck | ||
| 3236 | * to complain and force a full file system check. | ||
| 3237 | */ | ||
| 3238 | if (!(sb->s_flags & MS_RDONLY)) | ||
| 3239 | es->s_wtime = cpu_to_le32(get_seconds()); | ||
| 3212 | es->s_kbytes_written = | 3240 | es->s_kbytes_written = |
| 3213 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 3241 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + |
| 3214 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 3242 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
| @@ -3477,18 +3505,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
| 3477 | if (sbi->s_journal) | 3505 | if (sbi->s_journal) |
| 3478 | ext4_mark_recovery_complete(sb, es); | 3506 | ext4_mark_recovery_complete(sb, es); |
| 3479 | } else { | 3507 | } else { |
| 3480 | int ret; | 3508 | /* Make sure we can mount this feature set readwrite */ |
| 3481 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3509 | if (!ext4_feature_set_ok(sb, 0)) { |
| 3482 | ~EXT4_FEATURE_RO_COMPAT_SUPP))) { | ||
| 3483 | ext4_msg(sb, KERN_WARNING, "couldn't " | ||
| 3484 | "remount RDWR because of unsupported " | ||
| 3485 | "optional features (%x)", | ||
| 3486 | (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & | ||
| 3487 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
| 3488 | err = -EROFS; | 3510 | err = -EROFS; |
| 3489 | goto restore_opts; | 3511 | goto restore_opts; |
| 3490 | } | 3512 | } |
| 3491 | |||
| 3492 | /* | 3513 | /* |
| 3493 | * Make sure the group descriptor checksums | 3514 | * Make sure the group descriptor checksums |
| 3494 | * are sane. If they aren't, refuse to remount r/w. | 3515 | * are sane. If they aren't, refuse to remount r/w. |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 62b31c246994..fed5b01d7a8d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -810,12 +810,23 @@ inserted: | |||
| 810 | get_bh(new_bh); | 810 | get_bh(new_bh); |
| 811 | } else { | 811 | } else { |
| 812 | /* We need to allocate a new block */ | 812 | /* We need to allocate a new block */ |
| 813 | ext4_fsblk_t goal = ext4_group_first_block_no(sb, | 813 | ext4_fsblk_t goal, block; |
| 814 | |||
| 815 | goal = ext4_group_first_block_no(sb, | ||
| 814 | EXT4_I(inode)->i_block_group); | 816 | EXT4_I(inode)->i_block_group); |
| 815 | ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode, | 817 | |
| 818 | /* non-extent files can't have physical blocks past 2^32 */ | ||
| 819 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
| 820 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
| 821 | |||
| 822 | block = ext4_new_meta_blocks(handle, inode, | ||
| 816 | goal, NULL, &error); | 823 | goal, NULL, &error); |
| 817 | if (error) | 824 | if (error) |
| 818 | goto cleanup; | 825 | goto cleanup; |
| 826 | |||
| 827 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
| 828 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); | ||
| 829 | |||
| 819 | ea_idebug(inode, "creating block %d", block); | 830 | ea_idebug(inode, "creating block %d", block); |
| 820 | 831 | ||
| 821 | new_bh = sb_getblk(sb, block); | 832 | new_bh = sb_getblk(sb, block); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0df600e9162d..26d991ddc1e6 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/writeback.h> | 25 | #include <linux/writeback.h> |
| 26 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
| 27 | #include <linux/bio.h> | 27 | #include <linux/bio.h> |
| 28 | #include <linux/blkdev.h> | ||
| 28 | #include <trace/events/jbd2.h> | 29 | #include <trace/events/jbd2.h> |
| 29 | 30 | ||
| 30 | /* | 31 | /* |
| @@ -133,8 +134,8 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 133 | bh->b_end_io = journal_end_buffer_io_sync; | 134 | bh->b_end_io = journal_end_buffer_io_sync; |
| 134 | 135 | ||
| 135 | if (journal->j_flags & JBD2_BARRIER && | 136 | if (journal->j_flags & JBD2_BARRIER && |
| 136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 137 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 138 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| 138 | set_buffer_ordered(bh); | 139 | set_buffer_ordered(bh); |
| 139 | barrier_done = 1; | 140 | barrier_done = 1; |
| 140 | } | 141 | } |
| @@ -706,11 +707,13 @@ start_journal_io: | |||
| 706 | /* Done it all: now write the commit record asynchronously. */ | 707 | /* Done it all: now write the commit record asynchronously. */ |
| 707 | 708 | ||
| 708 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 709 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 709 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 710 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| 710 | err = journal_submit_commit_record(journal, commit_transaction, | 711 | err = journal_submit_commit_record(journal, commit_transaction, |
| 711 | &cbh, crc32_sum); | 712 | &cbh, crc32_sum); |
| 712 | if (err) | 713 | if (err) |
| 713 | __jbd2_journal_abort_hard(journal); | 714 | __jbd2_journal_abort_hard(journal); |
| 715 | if (journal->j_flags & JBD2_BARRIER) | ||
| 716 | blkdev_issue_flush(journal->j_dev, NULL); | ||
| 714 | } | 717 | } |
| 715 | 718 | ||
| 716 | /* | 719 | /* |
| @@ -833,7 +836,7 @@ wait_for_iobuf: | |||
| 833 | jbd_debug(3, "JBD: commit phase 5\n"); | 836 | jbd_debug(3, "JBD: commit phase 5\n"); |
| 834 | 837 | ||
| 835 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 838 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 836 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 839 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| 837 | err = journal_submit_commit_record(journal, commit_transaction, | 840 | err = journal_submit_commit_record(journal, commit_transaction, |
| 838 | &cbh, crc32_sum); | 841 | &cbh, crc32_sum); |
| 839 | if (err) | 842 | if (err) |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e378cb383979..a8a358bc0f21 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -1187,6 +1187,12 @@ static int journal_reset(journal_t *journal) | |||
| 1187 | 1187 | ||
| 1188 | first = be32_to_cpu(sb->s_first); | 1188 | first = be32_to_cpu(sb->s_first); |
| 1189 | last = be32_to_cpu(sb->s_maxlen); | 1189 | last = be32_to_cpu(sb->s_maxlen); |
| 1190 | if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { | ||
| 1191 | printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", | ||
| 1192 | first, last); | ||
| 1193 | journal_fail_superblock(journal); | ||
| 1194 | return -EINVAL; | ||
| 1195 | } | ||
| 1190 | 1196 | ||
| 1191 | journal->j_first = first; | 1197 | journal->j_first = first; |
| 1192 | journal->j_last = last; | 1198 | journal->j_last = last; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6213ac728f30..a0512700542f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -57,7 +57,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
| 57 | INIT_LIST_HEAD(&transaction->t_private_list); | 57 | INIT_LIST_HEAD(&transaction->t_private_list); |
| 58 | 58 | ||
| 59 | /* Set up the commit timer for the new transaction. */ | 59 | /* Set up the commit timer for the new transaction. */ |
| 60 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 60 | journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires); |
| 61 | add_timer(&journal->j_commit_timer); | 61 | add_timer(&journal->j_commit_timer); |
| 62 | 62 | ||
| 63 | J_ASSERT(journal->j_running_transaction == NULL); | 63 | J_ASSERT(journal->j_running_transaction == NULL); |
| @@ -238,6 +238,8 @@ repeat_locked: | |||
| 238 | __jbd2_log_space_left(journal)); | 238 | __jbd2_log_space_left(journal)); |
| 239 | spin_unlock(&transaction->t_handle_lock); | 239 | spin_unlock(&transaction->t_handle_lock); |
| 240 | spin_unlock(&journal->j_state_lock); | 240 | spin_unlock(&journal->j_state_lock); |
| 241 | |||
| 242 | lock_map_acquire(&handle->h_lockdep_map); | ||
| 241 | out: | 243 | out: |
| 242 | if (unlikely(new_transaction)) /* It's usually NULL */ | 244 | if (unlikely(new_transaction)) /* It's usually NULL */ |
| 243 | kfree(new_transaction); | 245 | kfree(new_transaction); |
| @@ -303,8 +305,6 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | |||
| 303 | handle = ERR_PTR(err); | 305 | handle = ERR_PTR(err); |
| 304 | goto out; | 306 | goto out; |
| 305 | } | 307 | } |
| 306 | |||
| 307 | lock_map_acquire(&handle->h_lockdep_map); | ||
| 308 | out: | 308 | out: |
| 309 | return handle; | 309 | return handle; |
| 310 | } | 310 | } |
| @@ -426,6 +426,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) | |||
| 426 | __jbd2_log_start_commit(journal, transaction->t_tid); | 426 | __jbd2_log_start_commit(journal, transaction->t_tid); |
| 427 | spin_unlock(&journal->j_state_lock); | 427 | spin_unlock(&journal->j_state_lock); |
| 428 | 428 | ||
| 429 | lock_map_release(&handle->h_lockdep_map); | ||
| 429 | handle->h_buffer_credits = nblocks; | 430 | handle->h_buffer_credits = nblocks; |
| 430 | ret = start_this_handle(journal, handle); | 431 | ret = start_this_handle(journal, handle); |
| 431 | return ret; | 432 | return ret; |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d97eb652d6ca..52695d3dfd0b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
| @@ -652,7 +652,7 @@ struct transaction_s | |||
| 652 | * This transaction is being forced and some process is | 652 | * This transaction is being forced and some process is |
| 653 | * waiting for it to finish. | 653 | * waiting for it to finish. |
| 654 | */ | 654 | */ |
| 655 | int t_synchronous_commit:1; | 655 | unsigned int t_synchronous_commit:1; |
| 656 | 656 | ||
| 657 | /* | 657 | /* |
| 658 | * For use by the filesystem to store fs-specific data | 658 | * For use by the filesystem to store fs-specific data |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 8d433c4e3709..c1bd8f1e8b94 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
| @@ -5,10 +5,15 @@ | |||
| 5 | #define _TRACE_EXT4_H | 5 | #define _TRACE_EXT4_H |
| 6 | 6 | ||
| 7 | #include <linux/writeback.h> | 7 | #include <linux/writeback.h> |
| 8 | #include "../../../fs/ext4/ext4.h" | ||
| 9 | #include "../../../fs/ext4/mballoc.h" | ||
| 10 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
| 11 | 9 | ||
| 10 | struct ext4_allocation_context; | ||
| 11 | struct ext4_allocation_request; | ||
| 12 | struct ext4_prealloc_space; | ||
| 13 | struct ext4_inode_info; | ||
| 14 | |||
| 15 | #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) | ||
| 16 | |||
| 12 | TRACE_EVENT(ext4_free_inode, | 17 | TRACE_EVENT(ext4_free_inode, |
| 13 | TP_PROTO(struct inode *inode), | 18 | TP_PROTO(struct inode *inode), |
| 14 | 19 | ||
| @@ -33,8 +38,8 @@ TRACE_EVENT(ext4_free_inode, | |||
| 33 | ), | 38 | ), |
| 34 | 39 | ||
| 35 | TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", | 40 | TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", |
| 36 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode, | 41 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 37 | __entry->uid, __entry->gid, | 42 | __entry->mode, __entry->uid, __entry->gid, |
| 38 | (unsigned long long) __entry->blocks) | 43 | (unsigned long long) __entry->blocks) |
| 39 | ); | 44 | ); |
| 40 | 45 | ||
| @@ -56,7 +61,8 @@ TRACE_EVENT(ext4_request_inode, | |||
| 56 | ), | 61 | ), |
| 57 | 62 | ||
| 58 | TP_printk("dev %s dir %lu mode %d", | 63 | TP_printk("dev %s dir %lu mode %d", |
| 59 | jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode) | 64 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir, |
| 65 | __entry->mode) | ||
| 60 | ); | 66 | ); |
| 61 | 67 | ||
| 62 | TRACE_EVENT(ext4_allocate_inode, | 68 | TRACE_EVENT(ext4_allocate_inode, |
| @@ -79,7 +85,8 @@ TRACE_EVENT(ext4_allocate_inode, | |||
| 79 | ), | 85 | ), |
| 80 | 86 | ||
| 81 | TP_printk("dev %s ino %lu dir %lu mode %d", | 87 | TP_printk("dev %s ino %lu dir %lu mode %d", |
| 82 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode) | 88 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 89 | (unsigned long) __entry->dir, __entry->mode) | ||
| 83 | ); | 90 | ); |
| 84 | 91 | ||
| 85 | TRACE_EVENT(ext4_write_begin, | 92 | TRACE_EVENT(ext4_write_begin, |
| @@ -106,8 +113,8 @@ TRACE_EVENT(ext4_write_begin, | |||
| 106 | ), | 113 | ), |
| 107 | 114 | ||
| 108 | TP_printk("dev %s ino %lu pos %llu len %u flags %u", | 115 | TP_printk("dev %s ino %lu pos %llu len %u flags %u", |
| 109 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, | 116 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 110 | __entry->flags) | 117 | __entry->pos, __entry->len, __entry->flags) |
| 111 | ); | 118 | ); |
| 112 | 119 | ||
| 113 | TRACE_EVENT(ext4_ordered_write_end, | 120 | TRACE_EVENT(ext4_ordered_write_end, |
| @@ -133,8 +140,8 @@ TRACE_EVENT(ext4_ordered_write_end, | |||
| 133 | ), | 140 | ), |
| 134 | 141 | ||
| 135 | TP_printk("dev %s ino %lu pos %llu len %u copied %u", | 142 | TP_printk("dev %s ino %lu pos %llu len %u copied %u", |
| 136 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, | 143 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 137 | __entry->copied) | 144 | __entry->pos, __entry->len, __entry->copied) |
| 138 | ); | 145 | ); |
| 139 | 146 | ||
| 140 | TRACE_EVENT(ext4_writeback_write_end, | 147 | TRACE_EVENT(ext4_writeback_write_end, |
| @@ -160,8 +167,8 @@ TRACE_EVENT(ext4_writeback_write_end, | |||
| 160 | ), | 167 | ), |
| 161 | 168 | ||
| 162 | TP_printk("dev %s ino %lu pos %llu len %u copied %u", | 169 | TP_printk("dev %s ino %lu pos %llu len %u copied %u", |
| 163 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, | 170 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 164 | __entry->copied) | 171 | __entry->pos, __entry->len, __entry->copied) |
| 165 | ); | 172 | ); |
| 166 | 173 | ||
| 167 | TRACE_EVENT(ext4_journalled_write_end, | 174 | TRACE_EVENT(ext4_journalled_write_end, |
| @@ -186,8 +193,8 @@ TRACE_EVENT(ext4_journalled_write_end, | |||
| 186 | ), | 193 | ), |
| 187 | 194 | ||
| 188 | TP_printk("dev %s ino %lu pos %llu len %u copied %u", | 195 | TP_printk("dev %s ino %lu pos %llu len %u copied %u", |
| 189 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, | 196 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 190 | __entry->copied) | 197 | __entry->pos, __entry->len, __entry->copied) |
| 191 | ); | 198 | ); |
| 192 | 199 | ||
| 193 | TRACE_EVENT(ext4_writepage, | 200 | TRACE_EVENT(ext4_writepage, |
| @@ -209,7 +216,8 @@ TRACE_EVENT(ext4_writepage, | |||
| 209 | ), | 216 | ), |
| 210 | 217 | ||
| 211 | TP_printk("dev %s ino %lu page_index %lu", | 218 | TP_printk("dev %s ino %lu page_index %lu", |
| 212 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) | 219 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 220 | __entry->index) | ||
| 213 | ); | 221 | ); |
| 214 | 222 | ||
| 215 | TRACE_EVENT(ext4_da_writepages, | 223 | TRACE_EVENT(ext4_da_writepages, |
| @@ -243,14 +251,49 @@ TRACE_EVENT(ext4_da_writepages, | |||
| 243 | __entry->range_cyclic = wbc->range_cyclic; | 251 | __entry->range_cyclic = wbc->range_cyclic; |
| 244 | ), | 252 | ), |
| 245 | 253 | ||
| 246 | TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", | 254 | TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", |
| 247 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, | 255 | jbd2_dev_to_name(__entry->dev), |
| 256 | (unsigned long) __entry->ino, __entry->nr_to_write, | ||
| 248 | __entry->pages_skipped, __entry->range_start, | 257 | __entry->pages_skipped, __entry->range_start, |
| 249 | __entry->range_end, __entry->nonblocking, | 258 | __entry->range_end, __entry->nonblocking, |
| 250 | __entry->for_kupdate, __entry->for_reclaim, | 259 | __entry->for_kupdate, __entry->for_reclaim, |
| 251 | __entry->range_cyclic) | 260 | __entry->range_cyclic) |
| 252 | ); | 261 | ); |
| 253 | 262 | ||
| 263 | TRACE_EVENT(ext4_da_write_pages, | ||
| 264 | TP_PROTO(struct inode *inode, struct mpage_da_data *mpd), | ||
| 265 | |||
| 266 | TP_ARGS(inode, mpd), | ||
| 267 | |||
| 268 | TP_STRUCT__entry( | ||
| 269 | __field( dev_t, dev ) | ||
| 270 | __field( ino_t, ino ) | ||
| 271 | __field( __u64, b_blocknr ) | ||
| 272 | __field( __u32, b_size ) | ||
| 273 | __field( __u32, b_state ) | ||
| 274 | __field( unsigned long, first_page ) | ||
| 275 | __field( int, io_done ) | ||
| 276 | __field( int, pages_written ) | ||
| 277 | ), | ||
| 278 | |||
| 279 | TP_fast_assign( | ||
| 280 | __entry->dev = inode->i_sb->s_dev; | ||
| 281 | __entry->ino = inode->i_ino; | ||
| 282 | __entry->b_blocknr = mpd->b_blocknr; | ||
| 283 | __entry->b_size = mpd->b_size; | ||
| 284 | __entry->b_state = mpd->b_state; | ||
| 285 | __entry->first_page = mpd->first_page; | ||
| 286 | __entry->io_done = mpd->io_done; | ||
| 287 | __entry->pages_written = mpd->pages_written; | ||
| 288 | ), | ||
| 289 | |||
| 290 | TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", | ||
| 291 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, | ||
| 292 | __entry->b_blocknr, __entry->b_size, | ||
| 293 | __entry->b_state, __entry->first_page, | ||
| 294 | __entry->io_done, __entry->pages_written) | ||
| 295 | ); | ||
| 296 | |||
| 254 | TRACE_EVENT(ext4_da_writepages_result, | 297 | TRACE_EVENT(ext4_da_writepages_result, |
| 255 | TP_PROTO(struct inode *inode, struct writeback_control *wbc, | 298 | TP_PROTO(struct inode *inode, struct writeback_control *wbc, |
| 256 | int ret, int pages_written), | 299 | int ret, int pages_written), |
| @@ -280,7 +323,8 @@ TRACE_EVENT(ext4_da_writepages_result, | |||
| 280 | ), | 323 | ), |
| 281 | 324 | ||
| 282 | TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d", | 325 | TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d", |
| 283 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret, | 326 | jbd2_dev_to_name(__entry->dev), |
| 327 | (unsigned long) __entry->ino, __entry->ret, | ||
| 284 | __entry->pages_written, __entry->pages_skipped, | 328 | __entry->pages_written, __entry->pages_skipped, |
| 285 | __entry->encountered_congestion, __entry->more_io, | 329 | __entry->encountered_congestion, __entry->more_io, |
| 286 | __entry->no_nrwrite_index_update) | 330 | __entry->no_nrwrite_index_update) |
| @@ -309,8 +353,8 @@ TRACE_EVENT(ext4_da_write_begin, | |||
| 309 | ), | 353 | ), |
| 310 | 354 | ||
| 311 | TP_printk("dev %s ino %lu pos %llu len %u flags %u", | 355 | TP_printk("dev %s ino %lu pos %llu len %u flags %u", |
| 312 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, | 356 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 313 | __entry->flags) | 357 | __entry->pos, __entry->len, __entry->flags) |
| 314 | ); | 358 | ); |
| 315 | 359 | ||
| 316 | TRACE_EVENT(ext4_da_write_end, | 360 | TRACE_EVENT(ext4_da_write_end, |
| @@ -336,8 +380,8 @@ TRACE_EVENT(ext4_da_write_end, | |||
| 336 | ), | 380 | ), |
| 337 | 381 | ||
| 338 | TP_printk("dev %s ino %lu pos %llu len %u copied %u", | 382 | TP_printk("dev %s ino %lu pos %llu len %u copied %u", |
| 339 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, | 383 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 340 | __entry->copied) | 384 | __entry->pos, __entry->len, __entry->copied) |
| 341 | ); | 385 | ); |
| 342 | 386 | ||
| 343 | TRACE_EVENT(ext4_discard_blocks, | 387 | TRACE_EVENT(ext4_discard_blocks, |
| @@ -387,8 +431,8 @@ TRACE_EVENT(ext4_mb_new_inode_pa, | |||
| 387 | ), | 431 | ), |
| 388 | 432 | ||
| 389 | TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", | 433 | TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", |
| 390 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, | 434 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 391 | __entry->pa_len, __entry->pa_lstart) | 435 | __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) |
| 392 | ); | 436 | ); |
| 393 | 437 | ||
| 394 | TRACE_EVENT(ext4_mb_new_group_pa, | 438 | TRACE_EVENT(ext4_mb_new_group_pa, |
| @@ -415,8 +459,8 @@ TRACE_EVENT(ext4_mb_new_group_pa, | |||
| 415 | ), | 459 | ), |
| 416 | 460 | ||
| 417 | TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", | 461 | TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", |
| 418 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, | 462 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 419 | __entry->pa_len, __entry->pa_lstart) | 463 | __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) |
| 420 | ); | 464 | ); |
| 421 | 465 | ||
| 422 | TRACE_EVENT(ext4_mb_release_inode_pa, | 466 | TRACE_EVENT(ext4_mb_release_inode_pa, |
| @@ -442,8 +486,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa, | |||
| 442 | ), | 486 | ), |
| 443 | 487 | ||
| 444 | TP_printk("dev %s ino %lu block %llu count %u", | 488 | TP_printk("dev %s ino %lu block %llu count %u", |
| 445 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, | 489 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 446 | __entry->count) | 490 | __entry->block, __entry->count) |
| 447 | ); | 491 | ); |
| 448 | 492 | ||
| 449 | TRACE_EVENT(ext4_mb_release_group_pa, | 493 | TRACE_EVENT(ext4_mb_release_group_pa, |
| @@ -488,7 +532,7 @@ TRACE_EVENT(ext4_discard_preallocations, | |||
| 488 | ), | 532 | ), |
| 489 | 533 | ||
| 490 | TP_printk("dev %s ino %lu", | 534 | TP_printk("dev %s ino %lu", |
| 491 | jbd2_dev_to_name(__entry->dev), __entry->ino) | 535 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) |
| 492 | ); | 536 | ); |
| 493 | 537 | ||
| 494 | TRACE_EVENT(ext4_mb_discard_preallocations, | 538 | TRACE_EVENT(ext4_mb_discard_preallocations, |
| @@ -543,8 +587,8 @@ TRACE_EVENT(ext4_request_blocks, | |||
| 543 | ), | 587 | ), |
| 544 | 588 | ||
| 545 | TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", | 589 | TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", |
| 546 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, | 590 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 547 | __entry->len, | 591 | __entry->flags, __entry->len, |
| 548 | (unsigned long long) __entry->logical, | 592 | (unsigned long long) __entry->logical, |
| 549 | (unsigned long long) __entry->goal, | 593 | (unsigned long long) __entry->goal, |
| 550 | (unsigned long long) __entry->lleft, | 594 | (unsigned long long) __entry->lleft, |
| @@ -587,8 +631,8 @@ TRACE_EVENT(ext4_allocate_blocks, | |||
| 587 | ), | 631 | ), |
| 588 | 632 | ||
| 589 | TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", | 633 | TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", |
| 590 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, | 634 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 591 | __entry->len, __entry->block, | 635 | __entry->flags, __entry->len, __entry->block, |
| 592 | (unsigned long long) __entry->logical, | 636 | (unsigned long long) __entry->logical, |
| 593 | (unsigned long long) __entry->goal, | 637 | (unsigned long long) __entry->goal, |
| 594 | (unsigned long long) __entry->lleft, | 638 | (unsigned long long) __entry->lleft, |
| @@ -621,8 +665,8 @@ TRACE_EVENT(ext4_free_blocks, | |||
| 621 | ), | 665 | ), |
| 622 | 666 | ||
| 623 | TP_printk("dev %s ino %lu block %llu count %lu metadata %d", | 667 | TP_printk("dev %s ino %lu block %llu count %lu metadata %d", |
| 624 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, | 668 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 625 | __entry->count, __entry->metadata) | 669 | __entry->block, __entry->count, __entry->metadata) |
| 626 | ); | 670 | ); |
| 627 | 671 | ||
| 628 | TRACE_EVENT(ext4_sync_file, | 672 | TRACE_EVENT(ext4_sync_file, |
| @@ -645,8 +689,8 @@ TRACE_EVENT(ext4_sync_file, | |||
| 645 | ), | 689 | ), |
| 646 | 690 | ||
| 647 | TP_printk("dev %s ino %ld parent %ld datasync %d ", | 691 | TP_printk("dev %s ino %ld parent %ld datasync %d ", |
| 648 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent, | 692 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, |
| 649 | __entry->datasync) | 693 | (unsigned long) __entry->parent, __entry->datasync) |
| 650 | ); | 694 | ); |
| 651 | 695 | ||
| 652 | TRACE_EVENT(ext4_sync_fs, | 696 | TRACE_EVENT(ext4_sync_fs, |
| @@ -669,6 +713,30 @@ TRACE_EVENT(ext4_sync_fs, | |||
| 669 | __entry->wait) | 713 | __entry->wait) |
| 670 | ); | 714 | ); |
| 671 | 715 | ||
| 716 | TRACE_EVENT(ext4_alloc_da_blocks, | ||
| 717 | TP_PROTO(struct inode *inode), | ||
| 718 | |||
| 719 | TP_ARGS(inode), | ||
| 720 | |||
| 721 | TP_STRUCT__entry( | ||
| 722 | __field( dev_t, dev ) | ||
| 723 | __field( ino_t, ino ) | ||
| 724 | __field( unsigned int, data_blocks ) | ||
| 725 | __field( unsigned int, meta_blocks ) | ||
| 726 | ), | ||
| 727 | |||
| 728 | TP_fast_assign( | ||
| 729 | __entry->dev = inode->i_sb->s_dev; | ||
| 730 | __entry->ino = inode->i_ino; | ||
| 731 | __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
| 732 | __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; | ||
| 733 | ), | ||
| 734 | |||
| 735 | TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u", | ||
| 736 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, | ||
| 737 | __entry->data_blocks, __entry->meta_blocks) | ||
| 738 | ); | ||
| 739 | |||
| 672 | #endif /* _TRACE_EXT4_H */ | 740 | #endif /* _TRACE_EXT4_H */ |
| 673 | 741 | ||
| 674 | /* This part must be outside protection */ | 742 | /* This part must be outside protection */ |
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 10813fa0c8d0..b851f0b4701c 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h | |||
| @@ -159,7 +159,7 @@ TRACE_EVENT(jbd2_submit_inode_data, | |||
| 159 | ), | 159 | ), |
| 160 | 160 | ||
| 161 | TP_printk("dev %s ino %lu", | 161 | TP_printk("dev %s ino %lu", |
| 162 | jbd2_dev_to_name(__entry->dev), __entry->ino) | 162 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) |
| 163 | ); | 163 | ); |
| 164 | 164 | ||
| 165 | #endif /* _TRACE_JBD2_H */ | 165 | #endif /* _TRACE_JBD2_H */ |
