diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-18 17:07:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-18 17:07:46 -0400 |
commit | 0732f87761dbe417cb6e084b712d07e879e876ef (patch) | |
tree | afed6ca0368fd3e121fd4f43b11e32aa1e5139c0 /fs/ext4 | |
parent | 15fc204afc6feb915c400159546f646eca8ba1d9 (diff) | |
parent | 536fc240e7147858255bdb08e7a999a3351a9fb4 (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
jbd2: clean up jbd2_journal_try_to_free_buffers()
ext4: Don't update ctime for non-extent-mapped inodes
ext4: Fix up whitespace issues in fs/ext4/inode.c
ext4: Fix 64-bit block type problem on 32-bit platforms
ext4: teach the inode allocator to use a goal inode number
ext4: Use a hash of the topdir directory name for the Orlov parent group
ext4: document the "abort" mount option
ext4: move the abort flag from s_mount_opts to s_mount_flags
ext4: update the s_last_mounted field in the superblock
ext4: change s_mount_opt to be an unsigned int
ext4: online defrag -- Add EXT4_IOC_MOVE_EXT ioctl
ext4: avoid unnecessary spinlock in critical POSIX ACL path
ext3: avoid unnecessary spinlock in critical POSIX ACL path
ext4: convert instrumentation from markers to tracepoints
jbd2: convert instrumentation from markers to tracepoints
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Makefile | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 39 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 4 | ||||
-rw-r--r-- | fs/ext4/extents.c | 4 | ||||
-rw-r--r-- | fs/ext4/file.c | 36 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 8 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 48 | ||||
-rw-r--r-- | fs/ext4/inode.c | 281 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 36 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 85 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 1 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 8 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 1320 | ||||
-rw-r--r-- | fs/ext4/namei.c | 10 | ||||
-rw-r--r-- | fs/ext4/super.c | 20 |
15 files changed, 1636 insertions, 266 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 8a34710ecf40..8867b2a1e5fe 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o | |||
6 | 6 | ||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o |
10 | 10 | ||
11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o | 12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cc7d5edc38c9..17b9998680e3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -352,6 +352,7 @@ struct ext4_new_group_data { | |||
352 | /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ | 352 | /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ |
353 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | 353 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ |
354 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) | 354 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) |
355 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) | ||
355 | 356 | ||
356 | /* | 357 | /* |
357 | * ioctl commands in 32 bit emulation | 358 | * ioctl commands in 32 bit emulation |
@@ -447,6 +448,15 @@ struct ext4_inode { | |||
447 | __le32 i_version_hi; /* high 32 bits for 64-bit version */ | 448 | __le32 i_version_hi; /* high 32 bits for 64-bit version */ |
448 | }; | 449 | }; |
449 | 450 | ||
451 | struct move_extent { | ||
452 | __u32 reserved; /* should be zero */ | ||
453 | __u32 donor_fd; /* donor file descriptor */ | ||
454 | __u64 orig_start; /* logical start offset in block for orig */ | ||
455 | __u64 donor_start; /* logical start offset in block for donor */ | ||
456 | __u64 len; /* block length to be moved */ | ||
457 | __u64 moved_len; /* moved block length */ | ||
458 | }; | ||
459 | #define MAX_DEFRAG_SIZE ((1UL<<31) - 1) | ||
450 | 460 | ||
451 | #define EXT4_EPOCH_BITS 2 | 461 | #define EXT4_EPOCH_BITS 2 |
452 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) | 462 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) |
@@ -674,7 +684,6 @@ struct ext4_inode_info { | |||
674 | #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ | 684 | #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ |
675 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ | 685 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ |
676 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ | 686 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ |
677 | #define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ | ||
678 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ | 687 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ |
679 | #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ | 688 | #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ |
680 | #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ | 689 | #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ |
@@ -696,17 +705,10 @@ struct ext4_inode_info { | |||
696 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 705 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
697 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 706 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
698 | 707 | ||
699 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | ||
700 | #ifndef _LINUX_EXT2_FS_H | ||
701 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 708 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
702 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt | 709 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt |
703 | #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ | 710 | #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ |
704 | EXT4_MOUNT_##opt) | 711 | EXT4_MOUNT_##opt) |
705 | #else | ||
706 | #define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD | ||
707 | #define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT | ||
708 | #define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS | ||
709 | #endif | ||
710 | 712 | ||
711 | #define ext4_set_bit ext2_set_bit | 713 | #define ext4_set_bit ext2_set_bit |
712 | #define ext4_set_bit_atomic ext2_set_bit_atomic | 714 | #define ext4_set_bit_atomic ext2_set_bit_atomic |
@@ -824,6 +826,13 @@ struct ext4_super_block { | |||
824 | }; | 826 | }; |
825 | 827 | ||
826 | #ifdef __KERNEL__ | 828 | #ifdef __KERNEL__ |
829 | |||
830 | /* | ||
831 | * run-time mount flags | ||
832 | */ | ||
833 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 | ||
834 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ | ||
835 | |||
827 | /* | 836 | /* |
828 | * fourth extended-fs super-block data in memory | 837 | * fourth extended-fs super-block data in memory |
829 | */ | 838 | */ |
@@ -842,7 +851,8 @@ struct ext4_sb_info { | |||
842 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 851 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
843 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | 852 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
844 | struct buffer_head **s_group_desc; | 853 | struct buffer_head **s_group_desc; |
845 | unsigned long s_mount_opt; | 854 | unsigned int s_mount_opt; |
855 | unsigned int s_mount_flags; | ||
846 | ext4_fsblk_t s_sb_block; | 856 | ext4_fsblk_t s_sb_block; |
847 | uid_t s_resuid; | 857 | uid_t s_resuid; |
848 | gid_t s_resgid; | 858 | gid_t s_resgid; |
@@ -853,6 +863,7 @@ struct ext4_sb_info { | |||
853 | int s_inode_size; | 863 | int s_inode_size; |
854 | int s_first_ino; | 864 | int s_first_ino; |
855 | unsigned int s_inode_readahead_blks; | 865 | unsigned int s_inode_readahead_blks; |
866 | unsigned int s_inode_goal; | ||
856 | spinlock_t s_next_gen_lock; | 867 | spinlock_t s_next_gen_lock; |
857 | u32 s_next_generation; | 868 | u32 s_next_generation; |
858 | u32 s_hash_seed[4]; | 869 | u32 s_hash_seed[4]; |
@@ -1305,7 +1316,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct | |||
1305 | dx_hash_info *hinfo); | 1316 | dx_hash_info *hinfo); |
1306 | 1317 | ||
1307 | /* ialloc.c */ | 1318 | /* ialloc.c */ |
1308 | extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); | 1319 | extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, |
1320 | const struct qstr *qstr, __u32 goal); | ||
1309 | extern void ext4_free_inode(handle_t *, struct inode *); | 1321 | extern void ext4_free_inode(handle_t *, struct inode *); |
1310 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | 1322 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
1311 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1323 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
@@ -1329,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *); | |||
1329 | extern int __init init_ext4_mballoc(void); | 1341 | extern int __init init_ext4_mballoc(void); |
1330 | extern void exit_ext4_mballoc(void); | 1342 | extern void exit_ext4_mballoc(void); |
1331 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1343 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
1332 | unsigned long, unsigned long, int, unsigned long *); | 1344 | ext4_fsblk_t, unsigned long, int, unsigned long *); |
1333 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1345 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1334 | ext4_group_t i, struct ext4_group_desc *desc); | 1346 | ext4_group_t i, struct ext4_group_desc *desc); |
1335 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | 1347 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, |
@@ -1647,6 +1659,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode, | |||
1647 | struct buffer_head *bh, int flags); | 1659 | struct buffer_head *bh, int flags); |
1648 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1660 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1649 | __u64 start, __u64 len); | 1661 | __u64 start, __u64 len); |
1662 | /* move_extent.c */ | ||
1663 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | ||
1664 | __u64 start_orig, __u64 start_donor, | ||
1665 | __u64 len, __u64 *moved_len); | ||
1666 | |||
1650 | 1667 | ||
1651 | /* | 1668 | /* |
1652 | * Add new method to test wether block and inode bitmaps are properly | 1669 | * Add new method to test wether block and inode bitmaps are properly |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index f0c3ec85bd48..20a84105a10b 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) | |||
221 | } | 221 | } |
222 | 222 | ||
223 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | 223 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); |
224 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); | ||
224 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 225 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
225 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 226 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
226 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 227 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
227 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | 228 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
228 | int num, | 229 | int num, |
229 | struct ext4_ext_path *path); | 230 | struct ext4_ext_path *path); |
231 | extern int ext4_can_extents_be_merged(struct inode *inode, | ||
232 | struct ext4_extent *ex1, | ||
233 | struct ext4_extent *ex2); | ||
230 | extern int ext4_ext_try_to_merge(struct inode *inode, | 234 | extern int ext4_ext_try_to_merge(struct inode *inode, |
231 | struct ext4_ext_path *path, | 235 | struct ext4_ext_path *path, |
232 | struct ext4_extent *); | 236 | struct ext4_extent *); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2593f748c3a4..50322a09bd01 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -49,7 +49,7 @@ | |||
49 | * ext_pblock: | 49 | * ext_pblock: |
50 | * combine low and high parts of physical block number into ext4_fsblk_t | 50 | * combine low and high parts of physical block number into ext4_fsblk_t |
51 | */ | 51 | */ |
52 | static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) | 52 | ext4_fsblk_t ext_pblock(struct ext4_extent *ex) |
53 | { | 53 | { |
54 | ext4_fsblk_t block; | 54 | ext4_fsblk_t block; |
55 | 55 | ||
@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, | |||
1417 | return err; | 1417 | return err; |
1418 | } | 1418 | } |
1419 | 1419 | ||
1420 | static int | 1420 | int |
1421 | ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | 1421 | ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, |
1422 | struct ext4_extent *ex2) | 1422 | struct ext4_extent *ex2) |
1423 | { | 1423 | { |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 588af8c77246..3f1873fef1c6 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include <linux/time.h> | 21 | #include <linux/time.h> |
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/jbd2.h> | 23 | #include <linux/jbd2.h> |
24 | #include <linux/mount.h> | ||
25 | #include <linux/path.h> | ||
24 | #include "ext4.h" | 26 | #include "ext4.h" |
25 | #include "ext4_jbd2.h" | 27 | #include "ext4_jbd2.h" |
26 | #include "xattr.h" | 28 | #include "xattr.h" |
@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
145 | return 0; | 147 | return 0; |
146 | } | 148 | } |
147 | 149 | ||
150 | static int ext4_file_open(struct inode * inode, struct file * filp) | ||
151 | { | ||
152 | struct super_block *sb = inode->i_sb; | ||
153 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
154 | struct vfsmount *mnt = filp->f_path.mnt; | ||
155 | struct path path; | ||
156 | char buf[64], *cp; | ||
157 | |||
158 | if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && | ||
159 | !(sb->s_flags & MS_RDONLY))) { | ||
160 | sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED; | ||
161 | /* | ||
162 | * Sample where the filesystem has been mounted and | ||
163 | * store it in the superblock for sysadmin convenience | ||
164 | * when trying to sort through large numbers of block | ||
165 | * devices or filesystem images. | ||
166 | */ | ||
167 | memset(buf, 0, sizeof(buf)); | ||
168 | path.mnt = mnt->mnt_parent; | ||
169 | path.dentry = mnt->mnt_mountpoint; | ||
170 | path_get(&path); | ||
171 | cp = d_path(&path, buf, sizeof(buf)); | ||
172 | path_put(&path); | ||
173 | if (!IS_ERR(cp)) { | ||
174 | memcpy(sbi->s_es->s_last_mounted, cp, | ||
175 | sizeof(sbi->s_es->s_last_mounted)); | ||
176 | sb->s_dirt = 1; | ||
177 | } | ||
178 | } | ||
179 | return generic_file_open(inode, filp); | ||
180 | } | ||
181 | |||
148 | const struct file_operations ext4_file_operations = { | 182 | const struct file_operations ext4_file_operations = { |
149 | .llseek = generic_file_llseek, | 183 | .llseek = generic_file_llseek, |
150 | .read = do_sync_read, | 184 | .read = do_sync_read, |
@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = { | |||
156 | .compat_ioctl = ext4_compat_ioctl, | 190 | .compat_ioctl = ext4_compat_ioctl, |
157 | #endif | 191 | #endif |
158 | .mmap = ext4_file_mmap, | 192 | .mmap = ext4_file_mmap, |
159 | .open = generic_file_open, | 193 | .open = ext4_file_open, |
160 | .release = ext4_release_file, | 194 | .release = ext4_release_file, |
161 | .fsync = ext4_sync_file, | 195 | .fsync = ext4_sync_file, |
162 | .splice_read = generic_file_splice_read, | 196 | .splice_read = generic_file_splice_read, |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 5afe4370840b..83cf6415f599 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -28,10 +28,12 @@ | |||
28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
29 | #include <linux/jbd2.h> | 29 | #include <linux/jbd2.h> |
30 | #include <linux/blkdev.h> | 30 | #include <linux/blkdev.h> |
31 | #include <linux/marker.h> | 31 | |
32 | #include "ext4.h" | 32 | #include "ext4.h" |
33 | #include "ext4_jbd2.h" | 33 | #include "ext4_jbd2.h" |
34 | 34 | ||
35 | #include <trace/events/ext4.h> | ||
36 | |||
35 | /* | 37 | /* |
36 | * akpm: A new design for ext4_sync_file(). | 38 | * akpm: A new design for ext4_sync_file(). |
37 | * | 39 | * |
@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
52 | 54 | ||
53 | J_ASSERT(ext4_journal_current_handle() == NULL); | 55 | J_ASSERT(ext4_journal_current_handle() == NULL); |
54 | 56 | ||
55 | trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", | 57 | trace_ext4_sync_file(file, dentry, datasync); |
56 | inode->i_sb->s_id, datasync, inode->i_ino, | ||
57 | dentry->d_parent->d_inode->i_ino); | ||
58 | 58 | ||
59 | /* | 59 | /* |
60 | * data=writeback: | 60 | * data=writeback: |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 3743bd849bce..2f645732e3b7 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -23,11 +23,14 @@ | |||
23 | #include <linux/bitops.h> | 23 | #include <linux/bitops.h> |
24 | #include <linux/blkdev.h> | 24 | #include <linux/blkdev.h> |
25 | #include <asm/byteorder.h> | 25 | #include <asm/byteorder.h> |
26 | |||
26 | #include "ext4.h" | 27 | #include "ext4.h" |
27 | #include "ext4_jbd2.h" | 28 | #include "ext4_jbd2.h" |
28 | #include "xattr.h" | 29 | #include "xattr.h" |
29 | #include "acl.h" | 30 | #include "acl.h" |
30 | 31 | ||
32 | #include <trace/events/ext4.h> | ||
33 | |||
31 | /* | 34 | /* |
32 | * ialloc.c contains the inodes allocation and deallocation routines | 35 | * ialloc.c contains the inodes allocation and deallocation routines |
33 | */ | 36 | */ |
@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
208 | 211 | ||
209 | ino = inode->i_ino; | 212 | ino = inode->i_ino; |
210 | ext4_debug("freeing inode %lu\n", ino); | 213 | ext4_debug("freeing inode %lu\n", ino); |
211 | trace_mark(ext4_free_inode, | 214 | trace_ext4_free_inode(inode); |
212 | "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu", | ||
213 | sb->s_id, inode->i_ino, inode->i_mode, | ||
214 | (unsigned long) inode->i_uid, (unsigned long) inode->i_gid, | ||
215 | (unsigned long long) inode->i_blocks); | ||
216 | 215 | ||
217 | /* | 216 | /* |
218 | * Note: we must free any quota before locking the superblock, | 217 | * Note: we must free any quota before locking the superblock, |
@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g, | |||
471 | */ | 470 | */ |
472 | 471 | ||
473 | static int find_group_orlov(struct super_block *sb, struct inode *parent, | 472 | static int find_group_orlov(struct super_block *sb, struct inode *parent, |
474 | ext4_group_t *group, int mode) | 473 | ext4_group_t *group, int mode, |
474 | const struct qstr *qstr) | ||
475 | { | 475 | { |
476 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 476 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
477 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 477 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
486 | struct ext4_group_desc *desc; | 486 | struct ext4_group_desc *desc; |
487 | struct orlov_stats stats; | 487 | struct orlov_stats stats; |
488 | int flex_size = ext4_flex_bg_size(sbi); | 488 | int flex_size = ext4_flex_bg_size(sbi); |
489 | struct dx_hash_info hinfo; | ||
489 | 490 | ||
490 | ngroups = real_ngroups; | 491 | ngroups = real_ngroups; |
491 | if (flex_size > 1) { | 492 | if (flex_size > 1) { |
@@ -507,7 +508,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
507 | int best_ndir = inodes_per_group; | 508 | int best_ndir = inodes_per_group; |
508 | int ret = -1; | 509 | int ret = -1; |
509 | 510 | ||
510 | get_random_bytes(&grp, sizeof(grp)); | 511 | if (qstr) { |
512 | hinfo.hash_version = DX_HASH_HALF_MD4; | ||
513 | hinfo.seed = sbi->s_hash_seed; | ||
514 | ext4fs_dirhash(qstr->name, qstr->len, &hinfo); | ||
515 | grp = hinfo.hash; | ||
516 | } else | ||
517 | get_random_bytes(&grp, sizeof(grp)); | ||
511 | parent_group = (unsigned)grp % ngroups; | 518 | parent_group = (unsigned)grp % ngroups; |
512 | for (i = 0; i < ngroups; i++) { | 519 | for (i = 0; i < ngroups; i++) { |
513 | g = (parent_group + i) % ngroups; | 520 | g = (parent_group + i) % ngroups; |
@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
650 | *group = parent_group + flex_size; | 657 | *group = parent_group + flex_size; |
651 | if (*group > ngroups) | 658 | if (*group > ngroups) |
652 | *group = 0; | 659 | *group = 0; |
653 | return find_group_orlov(sb, parent, group, mode); | 660 | return find_group_orlov(sb, parent, group, mode, 0); |
654 | } | 661 | } |
655 | 662 | ||
656 | /* | 663 | /* |
@@ -791,7 +798,8 @@ err_ret: | |||
791 | * For other inodes, search forward from the parent directory's block | 798 | * For other inodes, search forward from the parent directory's block |
792 | * group to find a free inode. | 799 | * group to find a free inode. |
793 | */ | 800 | */ |
794 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | 801 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, |
802 | const struct qstr *qstr, __u32 goal) | ||
795 | { | 803 | { |
796 | struct super_block *sb; | 804 | struct super_block *sb; |
797 | struct buffer_head *inode_bitmap_bh = NULL; | 805 | struct buffer_head *inode_bitmap_bh = NULL; |
@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
815 | 823 | ||
816 | sb = dir->i_sb; | 824 | sb = dir->i_sb; |
817 | ngroups = ext4_get_groups_count(sb); | 825 | ngroups = ext4_get_groups_count(sb); |
818 | trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, | 826 | trace_ext4_request_inode(dir, mode); |
819 | dir->i_ino, mode); | ||
820 | inode = new_inode(sb); | 827 | inode = new_inode(sb); |
821 | if (!inode) | 828 | if (!inode) |
822 | return ERR_PTR(-ENOMEM); | 829 | return ERR_PTR(-ENOMEM); |
823 | ei = EXT4_I(inode); | 830 | ei = EXT4_I(inode); |
824 | sbi = EXT4_SB(sb); | 831 | sbi = EXT4_SB(sb); |
825 | 832 | ||
833 | if (!goal) | ||
834 | goal = sbi->s_inode_goal; | ||
835 | |||
836 | if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) { | ||
837 | group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); | ||
838 | ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); | ||
839 | ret2 = 0; | ||
840 | goto got_group; | ||
841 | } | ||
842 | |||
826 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { | 843 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { |
827 | ret2 = find_group_flex(sb, dir, &group); | 844 | ret2 = find_group_flex(sb, dir, &group); |
828 | if (ret2 == -1) { | 845 | if (ret2 == -1) { |
@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
841 | if (test_opt(sb, OLDALLOC)) | 858 | if (test_opt(sb, OLDALLOC)) |
842 | ret2 = find_group_dir(sb, dir, &group); | 859 | ret2 = find_group_dir(sb, dir, &group); |
843 | else | 860 | else |
844 | ret2 = find_group_orlov(sb, dir, &group, mode); | 861 | ret2 = find_group_orlov(sb, dir, &group, mode, qstr); |
845 | } else | 862 | } else |
846 | ret2 = find_group_other(sb, dir, &group, mode); | 863 | ret2 = find_group_other(sb, dir, &group, mode); |
847 | 864 | ||
@@ -851,7 +868,7 @@ got_group: | |||
851 | if (ret2 == -1) | 868 | if (ret2 == -1) |
852 | goto out; | 869 | goto out; |
853 | 870 | ||
854 | for (i = 0; i < ngroups; i++) { | 871 | for (i = 0; i < ngroups; i++, ino = 0) { |
855 | err = -EIO; | 872 | err = -EIO; |
856 | 873 | ||
857 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | 874 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); |
@@ -863,8 +880,6 @@ got_group: | |||
863 | if (!inode_bitmap_bh) | 880 | if (!inode_bitmap_bh) |
864 | goto fail; | 881 | goto fail; |
865 | 882 | ||
866 | ino = 0; | ||
867 | |||
868 | repeat_in_this_group: | 883 | repeat_in_this_group: |
869 | ino = ext4_find_next_zero_bit((unsigned long *) | 884 | ino = ext4_find_next_zero_bit((unsigned long *) |
870 | inode_bitmap_bh->b_data, | 885 | inode_bitmap_bh->b_data, |
@@ -1047,8 +1062,7 @@ got: | |||
1047 | } | 1062 | } |
1048 | 1063 | ||
1049 | ext4_debug("allocating inode %lu\n", inode->i_ino); | 1064 | ext4_debug("allocating inode %lu\n", inode->i_ino); |
1050 | trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d", | 1065 | trace_ext4_allocate_inode(inode, dir, mode); |
1051 | sb->s_id, inode->i_ino, dir->i_ino, mode); | ||
1052 | goto really_out; | 1066 | goto really_out; |
1053 | fail: | 1067 | fail: |
1054 | ext4_std_error(sb, err); | 1068 | ext4_std_error(sb, err); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 875db944b22f..7c17ae275af4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -37,11 +37,14 @@ | |||
37 | #include <linux/namei.h> | 37 | #include <linux/namei.h> |
38 | #include <linux/uio.h> | 38 | #include <linux/uio.h> |
39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> |
40 | |||
40 | #include "ext4_jbd2.h" | 41 | #include "ext4_jbd2.h" |
41 | #include "xattr.h" | 42 | #include "xattr.h" |
42 | #include "acl.h" | 43 | #include "acl.h" |
43 | #include "ext4_extents.h" | 44 | #include "ext4_extents.h" |
44 | 45 | ||
46 | #include <trace/events/ext4.h> | ||
47 | |||
45 | #define MPAGE_DA_EXTENT_TAIL 0x01 | 48 | #define MPAGE_DA_EXTENT_TAIL 0x01 |
46 | 49 | ||
47 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 50 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
@@ -78,7 +81,7 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) | |||
78 | * If the handle isn't valid we're not journaling so there's nothing to do. | 81 | * If the handle isn't valid we're not journaling so there's nothing to do. |
79 | */ | 82 | */ |
80 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | 83 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, |
81 | struct buffer_head *bh, ext4_fsblk_t blocknr) | 84 | struct buffer_head *bh, ext4_fsblk_t blocknr) |
82 | { | 85 | { |
83 | int err; | 86 | int err; |
84 | 87 | ||
@@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | |||
90 | BUFFER_TRACE(bh, "enter"); | 93 | BUFFER_TRACE(bh, "enter"); |
91 | 94 | ||
92 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " | 95 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " |
93 | "data mode %lx\n", | 96 | "data mode %x\n", |
94 | bh, is_metadata, inode->i_mode, | 97 | bh, is_metadata, inode->i_mode, |
95 | test_opt(inode->i_sb, DATA_FLAGS)); | 98 | test_opt(inode->i_sb, DATA_FLAGS)); |
96 | 99 | ||
@@ -329,8 +332,8 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | |||
329 | */ | 332 | */ |
330 | 333 | ||
331 | static int ext4_block_to_path(struct inode *inode, | 334 | static int ext4_block_to_path(struct inode *inode, |
332 | ext4_lblk_t i_block, | 335 | ext4_lblk_t i_block, |
333 | ext4_lblk_t offsets[4], int *boundary) | 336 | ext4_lblk_t offsets[4], int *boundary) |
334 | { | 337 | { |
335 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 338 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
336 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); | 339 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); |
@@ -362,9 +365,9 @@ static int ext4_block_to_path(struct inode *inode, | |||
362 | final = ptrs; | 365 | final = ptrs; |
363 | } else { | 366 | } else { |
364 | ext4_warning(inode->i_sb, "ext4_block_to_path", | 367 | ext4_warning(inode->i_sb, "ext4_block_to_path", |
365 | "block %lu > max in inode %lu", | 368 | "block %lu > max in inode %lu", |
366 | i_block + direct_blocks + | 369 | i_block + direct_blocks + |
367 | indirect_blocks + double_blocks, inode->i_ino); | 370 | indirect_blocks + double_blocks, inode->i_ino); |
368 | } | 371 | } |
369 | if (boundary) | 372 | if (boundary) |
370 | *boundary = final - 1 - (i_block & (ptrs - 1)); | 373 | *boundary = final - 1 - (i_block & (ptrs - 1)); |
@@ -379,25 +382,25 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
379 | 382 | ||
380 | while (bref < p+max) { | 383 | while (bref < p+max) { |
381 | blk = le32_to_cpu(*bref++); | 384 | blk = le32_to_cpu(*bref++); |
382 | if (blk && | 385 | if (blk && |
383 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 386 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
384 | blk, 1))) { | 387 | blk, 1))) { |
385 | ext4_error(inode->i_sb, function, | 388 | ext4_error(inode->i_sb, function, |
386 | "invalid block reference %u " | 389 | "invalid block reference %u " |
387 | "in inode #%lu", blk, inode->i_ino); | 390 | "in inode #%lu", blk, inode->i_ino); |
388 | return -EIO; | 391 | return -EIO; |
389 | } | 392 | } |
390 | } | 393 | } |
391 | return 0; | 394 | return 0; |
392 | } | 395 | } |
393 | 396 | ||
394 | 397 | ||
395 | #define ext4_check_indirect_blockref(inode, bh) \ | 398 | #define ext4_check_indirect_blockref(inode, bh) \ |
396 | __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ | 399 | __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ |
397 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | 400 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) |
398 | 401 | ||
399 | #define ext4_check_inode_blockref(inode) \ | 402 | #define ext4_check_inode_blockref(inode) \ |
400 | __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ | 403 | __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ |
401 | EXT4_NDIR_BLOCKS) | 404 | EXT4_NDIR_BLOCKS) |
402 | 405 | ||
403 | /** | 406 | /** |
@@ -447,7 +450,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, | |||
447 | bh = sb_getblk(sb, le32_to_cpu(p->key)); | 450 | bh = sb_getblk(sb, le32_to_cpu(p->key)); |
448 | if (unlikely(!bh)) | 451 | if (unlikely(!bh)) |
449 | goto failure; | 452 | goto failure; |
450 | 453 | ||
451 | if (!bh_uptodate_or_lock(bh)) { | 454 | if (!bh_uptodate_or_lock(bh)) { |
452 | if (bh_submit_read(bh) < 0) { | 455 | if (bh_submit_read(bh) < 0) { |
453 | put_bh(bh); | 456 | put_bh(bh); |
@@ -459,7 +462,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, | |||
459 | goto failure; | 462 | goto failure; |
460 | } | 463 | } |
461 | } | 464 | } |
462 | 465 | ||
463 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); | 466 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
464 | /* Reader: end */ | 467 | /* Reader: end */ |
465 | if (!p->key) | 468 | if (!p->key) |
@@ -552,7 +555,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
552 | * returns it. | 555 | * returns it. |
553 | */ | 556 | */ |
554 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 557 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
555 | Indirect *partial) | 558 | Indirect *partial) |
556 | { | 559 | { |
557 | /* | 560 | /* |
558 | * XXX need to get goal block from mballoc's data structures | 561 | * XXX need to get goal block from mballoc's data structures |
@@ -574,7 +577,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | |||
574 | * direct and indirect blocks. | 577 | * direct and indirect blocks. |
575 | */ | 578 | */ |
576 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | 579 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, |
577 | int blocks_to_boundary) | 580 | int blocks_to_boundary) |
578 | { | 581 | { |
579 | unsigned int count = 0; | 582 | unsigned int count = 0; |
580 | 583 | ||
@@ -610,9 +613,9 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | |||
610 | * direct blocks | 613 | * direct blocks |
611 | */ | 614 | */ |
612 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | 615 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, |
613 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 616 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
614 | int indirect_blks, int blks, | 617 | int indirect_blks, int blks, |
615 | ext4_fsblk_t new_blocks[4], int *err) | 618 | ext4_fsblk_t new_blocks[4], int *err) |
616 | { | 619 | { |
617 | struct ext4_allocation_request ar; | 620 | struct ext4_allocation_request ar; |
618 | int target, i; | 621 | int target, i; |
@@ -683,10 +686,10 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
683 | } | 686 | } |
684 | if (!*err) { | 687 | if (!*err) { |
685 | if (target == blks) { | 688 | if (target == blks) { |
686 | /* | 689 | /* |
687 | * save the new block number | 690 | * save the new block number |
688 | * for the first direct block | 691 | * for the first direct block |
689 | */ | 692 | */ |
690 | new_blocks[index] = current_block; | 693 | new_blocks[index] = current_block; |
691 | } | 694 | } |
692 | blk_allocated += ar.len; | 695 | blk_allocated += ar.len; |
@@ -728,9 +731,9 @@ failed_out: | |||
728 | * as described above and return 0. | 731 | * as described above and return 0. |
729 | */ | 732 | */ |
730 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 733 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, |
731 | ext4_lblk_t iblock, int indirect_blks, | 734 | ext4_lblk_t iblock, int indirect_blks, |
732 | int *blks, ext4_fsblk_t goal, | 735 | int *blks, ext4_fsblk_t goal, |
733 | ext4_lblk_t *offsets, Indirect *branch) | 736 | ext4_lblk_t *offsets, Indirect *branch) |
734 | { | 737 | { |
735 | int blocksize = inode->i_sb->s_blocksize; | 738 | int blocksize = inode->i_sb->s_blocksize; |
736 | int i, n = 0; | 739 | int i, n = 0; |
@@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
777 | * the chain to point to the new allocated | 780 | * the chain to point to the new allocated |
778 | * data blocks numbers | 781 | * data blocks numbers |
779 | */ | 782 | */ |
780 | for (i=1; i < num; i++) | 783 | for (i = 1; i < num; i++) |
781 | *(branch[n].p + i) = cpu_to_le32(++current_block); | 784 | *(branch[n].p + i) = cpu_to_le32(++current_block); |
782 | } | 785 | } |
783 | BUFFER_TRACE(bh, "marking uptodate"); | 786 | BUFFER_TRACE(bh, "marking uptodate"); |
@@ -820,7 +823,8 @@ failed: | |||
820 | * chain to new block and return 0. | 823 | * chain to new block and return 0. |
821 | */ | 824 | */ |
822 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | 825 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, |
823 | ext4_lblk_t block, Indirect *where, int num, int blks) | 826 | ext4_lblk_t block, Indirect *where, int num, |
827 | int blks) | ||
824 | { | 828 | { |
825 | int i; | 829 | int i; |
826 | int err = 0; | 830 | int err = 0; |
@@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
852 | } | 856 | } |
853 | 857 | ||
854 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 858 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
855 | |||
856 | inode->i_ctime = ext4_current_time(inode); | ||
857 | ext4_mark_inode_dirty(handle, inode); | ||
858 | |||
859 | /* had we spliced it onto indirect block? */ | 859 | /* had we spliced it onto indirect block? */ |
860 | if (where->bh) { | 860 | if (where->bh) { |
861 | /* | 861 | /* |
@@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
874 | } else { | 874 | } else { |
875 | /* | 875 | /* |
876 | * OK, we spliced it into the inode itself on a direct block. | 876 | * OK, we spliced it into the inode itself on a direct block. |
877 | * Inode was dirtied above. | ||
878 | */ | 877 | */ |
878 | ext4_mark_inode_dirty(handle, inode); | ||
879 | jbd_debug(5, "splicing direct\n"); | 879 | jbd_debug(5, "splicing direct\n"); |
880 | } | 880 | } |
881 | return err; | 881 | return err; |
@@ -921,9 +921,9 @@ err_out: | |||
921 | * blocks. | 921 | * blocks. |
922 | */ | 922 | */ |
923 | static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | 923 | static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, |
924 | ext4_lblk_t iblock, unsigned int maxblocks, | 924 | ext4_lblk_t iblock, unsigned int maxblocks, |
925 | struct buffer_head *bh_result, | 925 | struct buffer_head *bh_result, |
926 | int flags) | 926 | int flags) |
927 | { | 927 | { |
928 | int err = -EIO; | 928 | int err = -EIO; |
929 | ext4_lblk_t offsets[4]; | 929 | ext4_lblk_t offsets[4]; |
@@ -939,7 +939,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
939 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 939 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
940 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 940 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
941 | depth = ext4_block_to_path(inode, iblock, offsets, | 941 | depth = ext4_block_to_path(inode, iblock, offsets, |
942 | &blocks_to_boundary); | 942 | &blocks_to_boundary); |
943 | 943 | ||
944 | if (depth == 0) | 944 | if (depth == 0) |
945 | goto out; | 945 | goto out; |
@@ -987,8 +987,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
987 | * Block out ext4_truncate while we alter the tree | 987 | * Block out ext4_truncate while we alter the tree |
988 | */ | 988 | */ |
989 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, | 989 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, |
990 | &count, goal, | 990 | &count, goal, |
991 | offsets + (partial - chain), partial); | 991 | offsets + (partial - chain), partial); |
992 | 992 | ||
993 | /* | 993 | /* |
994 | * The ext4_splice_branch call will free and forget any buffers | 994 | * The ext4_splice_branch call will free and forget any buffers |
@@ -999,8 +999,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
999 | */ | 999 | */ |
1000 | if (!err) | 1000 | if (!err) |
1001 | err = ext4_splice_branch(handle, inode, iblock, | 1001 | err = ext4_splice_branch(handle, inode, iblock, |
1002 | partial, indirect_blks, count); | 1002 | partial, indirect_blks, count); |
1003 | else | 1003 | else |
1004 | goto cleanup; | 1004 | goto cleanup; |
1005 | 1005 | ||
1006 | set_buffer_new(bh_result); | 1006 | set_buffer_new(bh_result); |
@@ -1172,7 +1172,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1172 | up_read((&EXT4_I(inode)->i_data_sem)); | 1172 | up_read((&EXT4_I(inode)->i_data_sem)); |
1173 | 1173 | ||
1174 | if (retval > 0 && buffer_mapped(bh)) { | 1174 | if (retval > 0 && buffer_mapped(bh)) { |
1175 | int ret = check_block_validity(inode, block, | 1175 | int ret = check_block_validity(inode, block, |
1176 | bh->b_blocknr, retval); | 1176 | bh->b_blocknr, retval); |
1177 | if (ret != 0) | 1177 | if (ret != 0) |
1178 | return ret; | 1178 | return ret; |
@@ -1254,7 +1254,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1254 | 1254 | ||
1255 | up_write((&EXT4_I(inode)->i_data_sem)); | 1255 | up_write((&EXT4_I(inode)->i_data_sem)); |
1256 | if (retval > 0 && buffer_mapped(bh)) { | 1256 | if (retval > 0 && buffer_mapped(bh)) { |
1257 | int ret = check_block_validity(inode, block, | 1257 | int ret = check_block_validity(inode, block, |
1258 | bh->b_blocknr, retval); | 1258 | bh->b_blocknr, retval); |
1259 | if (ret != 0) | 1259 | if (ret != 0) |
1260 | return ret; | 1260 | return ret; |
@@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle, | |||
1405 | 1405 | ||
1406 | for (bh = head, block_start = 0; | 1406 | for (bh = head, block_start = 0; |
1407 | ret == 0 && (bh != head || !block_start); | 1407 | ret == 0 && (bh != head || !block_start); |
1408 | block_start = block_end, bh = next) | 1408 | block_start = block_end, bh = next) { |
1409 | { | ||
1410 | next = bh->b_this_page; | 1409 | next = bh->b_this_page; |
1411 | block_end = block_start + blocksize; | 1410 | block_end = block_start + blocksize; |
1412 | if (block_end <= from || block_start >= to) { | 1411 | if (block_end <= from || block_start >= to) { |
@@ -1447,7 +1446,7 @@ static int walk_page_buffers(handle_t *handle, | |||
1447 | * write. | 1446 | * write. |
1448 | */ | 1447 | */ |
1449 | static int do_journal_get_write_access(handle_t *handle, | 1448 | static int do_journal_get_write_access(handle_t *handle, |
1450 | struct buffer_head *bh) | 1449 | struct buffer_head *bh) |
1451 | { | 1450 | { |
1452 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1451 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1453 | return 0; | 1452 | return 0; |
@@ -1455,27 +1454,24 @@ static int do_journal_get_write_access(handle_t *handle, | |||
1455 | } | 1454 | } |
1456 | 1455 | ||
1457 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 1456 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
1458 | loff_t pos, unsigned len, unsigned flags, | 1457 | loff_t pos, unsigned len, unsigned flags, |
1459 | struct page **pagep, void **fsdata) | 1458 | struct page **pagep, void **fsdata) |
1460 | { | 1459 | { |
1461 | struct inode *inode = mapping->host; | 1460 | struct inode *inode = mapping->host; |
1462 | int ret, needed_blocks; | 1461 | int ret, needed_blocks; |
1463 | handle_t *handle; | 1462 | handle_t *handle; |
1464 | int retries = 0; | 1463 | int retries = 0; |
1465 | struct page *page; | 1464 | struct page *page; |
1466 | pgoff_t index; | 1465 | pgoff_t index; |
1467 | unsigned from, to; | 1466 | unsigned from, to; |
1468 | 1467 | ||
1469 | trace_mark(ext4_write_begin, | 1468 | trace_ext4_write_begin(inode, pos, len, flags); |
1470 | "dev %s ino %lu pos %llu len %u flags %u", | ||
1471 | inode->i_sb->s_id, inode->i_ino, | ||
1472 | (unsigned long long) pos, len, flags); | ||
1473 | /* | 1469 | /* |
1474 | * Reserve one block more for addition to orphan list in case | 1470 | * Reserve one block more for addition to orphan list in case |
1475 | * we allocate blocks but write fails for some reason | 1471 | * we allocate blocks but write fails for some reason |
1476 | */ | 1472 | */ |
1477 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; | 1473 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; |
1478 | index = pos >> PAGE_CACHE_SHIFT; | 1474 | index = pos >> PAGE_CACHE_SHIFT; |
1479 | from = pos & (PAGE_CACHE_SIZE - 1); | 1475 | from = pos & (PAGE_CACHE_SIZE - 1); |
1480 | to = from + len; | 1476 | to = from + len; |
1481 | 1477 | ||
@@ -1523,7 +1519,7 @@ retry: | |||
1523 | ext4_journal_stop(handle); | 1519 | ext4_journal_stop(handle); |
1524 | if (pos + len > inode->i_size) { | 1520 | if (pos + len > inode->i_size) { |
1525 | vmtruncate(inode, inode->i_size); | 1521 | vmtruncate(inode, inode->i_size); |
1526 | /* | 1522 | /* |
1527 | * If vmtruncate failed early the inode might | 1523 | * If vmtruncate failed early the inode might |
1528 | * still be on the orphan list; we need to | 1524 | * still be on the orphan list; we need to |
1529 | * make sure the inode is removed from the | 1525 | * make sure the inode is removed from the |
@@ -1550,9 +1546,9 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
1550 | } | 1546 | } |
1551 | 1547 | ||
1552 | static int ext4_generic_write_end(struct file *file, | 1548 | static int ext4_generic_write_end(struct file *file, |
1553 | struct address_space *mapping, | 1549 | struct address_space *mapping, |
1554 | loff_t pos, unsigned len, unsigned copied, | 1550 | loff_t pos, unsigned len, unsigned copied, |
1555 | struct page *page, void *fsdata) | 1551 | struct page *page, void *fsdata) |
1556 | { | 1552 | { |
1557 | int i_size_changed = 0; | 1553 | int i_size_changed = 0; |
1558 | struct inode *inode = mapping->host; | 1554 | struct inode *inode = mapping->host; |
@@ -1603,18 +1599,15 @@ static int ext4_generic_write_end(struct file *file, | |||
1603 | * buffers are managed internally. | 1599 | * buffers are managed internally. |
1604 | */ | 1600 | */ |
1605 | static int ext4_ordered_write_end(struct file *file, | 1601 | static int ext4_ordered_write_end(struct file *file, |
1606 | struct address_space *mapping, | 1602 | struct address_space *mapping, |
1607 | loff_t pos, unsigned len, unsigned copied, | 1603 | loff_t pos, unsigned len, unsigned copied, |
1608 | struct page *page, void *fsdata) | 1604 | struct page *page, void *fsdata) |
1609 | { | 1605 | { |
1610 | handle_t *handle = ext4_journal_current_handle(); | 1606 | handle_t *handle = ext4_journal_current_handle(); |
1611 | struct inode *inode = mapping->host; | 1607 | struct inode *inode = mapping->host; |
1612 | int ret = 0, ret2; | 1608 | int ret = 0, ret2; |
1613 | 1609 | ||
1614 | trace_mark(ext4_ordered_write_end, | 1610 | trace_ext4_ordered_write_end(inode, pos, len, copied); |
1615 | "dev %s ino %lu pos %llu len %u copied %u", | ||
1616 | inode->i_sb->s_id, inode->i_ino, | ||
1617 | (unsigned long long) pos, len, copied); | ||
1618 | ret = ext4_jbd2_file_inode(handle, inode); | 1611 | ret = ext4_jbd2_file_inode(handle, inode); |
1619 | 1612 | ||
1620 | if (ret == 0) { | 1613 | if (ret == 0) { |
@@ -1636,7 +1629,7 @@ static int ext4_ordered_write_end(struct file *file, | |||
1636 | 1629 | ||
1637 | if (pos + len > inode->i_size) { | 1630 | if (pos + len > inode->i_size) { |
1638 | vmtruncate(inode, inode->i_size); | 1631 | vmtruncate(inode, inode->i_size); |
1639 | /* | 1632 | /* |
1640 | * If vmtruncate failed early the inode might still be | 1633 | * If vmtruncate failed early the inode might still be |
1641 | * on the orphan list; we need to make sure the inode | 1634 | * on the orphan list; we need to make sure the inode |
1642 | * is removed from the orphan list in that case. | 1635 | * is removed from the orphan list in that case. |
@@ -1650,18 +1643,15 @@ static int ext4_ordered_write_end(struct file *file, | |||
1650 | } | 1643 | } |
1651 | 1644 | ||
1652 | static int ext4_writeback_write_end(struct file *file, | 1645 | static int ext4_writeback_write_end(struct file *file, |
1653 | struct address_space *mapping, | 1646 | struct address_space *mapping, |
1654 | loff_t pos, unsigned len, unsigned copied, | 1647 | loff_t pos, unsigned len, unsigned copied, |
1655 | struct page *page, void *fsdata) | 1648 | struct page *page, void *fsdata) |
1656 | { | 1649 | { |
1657 | handle_t *handle = ext4_journal_current_handle(); | 1650 | handle_t *handle = ext4_journal_current_handle(); |
1658 | struct inode *inode = mapping->host; | 1651 | struct inode *inode = mapping->host; |
1659 | int ret = 0, ret2; | 1652 | int ret = 0, ret2; |
1660 | 1653 | ||
1661 | trace_mark(ext4_writeback_write_end, | 1654 | trace_ext4_writeback_write_end(inode, pos, len, copied); |
1662 | "dev %s ino %lu pos %llu len %u copied %u", | ||
1663 | inode->i_sb->s_id, inode->i_ino, | ||
1664 | (unsigned long long) pos, len, copied); | ||
1665 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1655 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1666 | page, fsdata); | 1656 | page, fsdata); |
1667 | copied = ret2; | 1657 | copied = ret2; |
@@ -1681,7 +1671,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
1681 | 1671 | ||
1682 | if (pos + len > inode->i_size) { | 1672 | if (pos + len > inode->i_size) { |
1683 | vmtruncate(inode, inode->i_size); | 1673 | vmtruncate(inode, inode->i_size); |
1684 | /* | 1674 | /* |
1685 | * If vmtruncate failed early the inode might still be | 1675 | * If vmtruncate failed early the inode might still be |
1686 | * on the orphan list; we need to make sure the inode | 1676 | * on the orphan list; we need to make sure the inode |
1687 | * is removed from the orphan list in that case. | 1677 | * is removed from the orphan list in that case. |
@@ -1694,9 +1684,9 @@ static int ext4_writeback_write_end(struct file *file, | |||
1694 | } | 1684 | } |
1695 | 1685 | ||
1696 | static int ext4_journalled_write_end(struct file *file, | 1686 | static int ext4_journalled_write_end(struct file *file, |
1697 | struct address_space *mapping, | 1687 | struct address_space *mapping, |
1698 | loff_t pos, unsigned len, unsigned copied, | 1688 | loff_t pos, unsigned len, unsigned copied, |
1699 | struct page *page, void *fsdata) | 1689 | struct page *page, void *fsdata) |
1700 | { | 1690 | { |
1701 | handle_t *handle = ext4_journal_current_handle(); | 1691 | handle_t *handle = ext4_journal_current_handle(); |
1702 | struct inode *inode = mapping->host; | 1692 | struct inode *inode = mapping->host; |
@@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1705 | unsigned from, to; | 1695 | unsigned from, to; |
1706 | loff_t new_i_size; | 1696 | loff_t new_i_size; |
1707 | 1697 | ||
1708 | trace_mark(ext4_journalled_write_end, | 1698 | trace_ext4_journalled_write_end(inode, pos, len, copied); |
1709 | "dev %s ino %lu pos %llu len %u copied %u", | ||
1710 | inode->i_sb->s_id, inode->i_ino, | ||
1711 | (unsigned long long) pos, len, copied); | ||
1712 | from = pos & (PAGE_CACHE_SIZE - 1); | 1699 | from = pos & (PAGE_CACHE_SIZE - 1); |
1713 | to = from + len; | 1700 | to = from + len; |
1714 | 1701 | ||
@@ -1747,7 +1734,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1747 | ret = ret2; | 1734 | ret = ret2; |
1748 | if (pos + len > inode->i_size) { | 1735 | if (pos + len > inode->i_size) { |
1749 | vmtruncate(inode, inode->i_size); | 1736 | vmtruncate(inode, inode->i_size); |
1750 | /* | 1737 | /* |
1751 | * If vmtruncate failed early the inode might still be | 1738 | * If vmtruncate failed early the inode might still be |
1752 | * on the orphan list; we need to make sure the inode | 1739 | * on the orphan list; we need to make sure the inode |
1753 | * is removed from the orphan list in that case. | 1740 | * is removed from the orphan list in that case. |
@@ -1854,7 +1841,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1854 | } | 1841 | } |
1855 | 1842 | ||
1856 | static void ext4_da_page_release_reservation(struct page *page, | 1843 | static void ext4_da_page_release_reservation(struct page *page, |
1857 | unsigned long offset) | 1844 | unsigned long offset) |
1858 | { | 1845 | { |
1859 | int to_release = 0; | 1846 | int to_release = 0; |
1860 | struct buffer_head *head, *bh; | 1847 | struct buffer_head *head, *bh; |
@@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page, | |||
2554 | struct buffer_head *page_bufs; | 2541 | struct buffer_head *page_bufs; |
2555 | struct inode *inode = page->mapping->host; | 2542 | struct inode *inode = page->mapping->host; |
2556 | 2543 | ||
2557 | trace_mark(ext4_da_writepage, | 2544 | trace_ext4_da_writepage(inode, page); |
2558 | "dev %s ino %lu page_index %lu", | ||
2559 | inode->i_sb->s_id, inode->i_ino, page->index); | ||
2560 | size = i_size_read(inode); | 2545 | size = i_size_read(inode); |
2561 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2546 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2562 | len = size & ~PAGE_CACHE_MASK; | 2547 | len = size & ~PAGE_CACHE_MASK; |
@@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2667 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2652 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2668 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2653 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2669 | 2654 | ||
2670 | trace_mark(ext4_da_writepages, | 2655 | trace_ext4_da_writepages(inode, wbc); |
2671 | "dev %s ino %lu nr_t_write %ld " | ||
2672 | "pages_skipped %ld range_start %llu " | ||
2673 | "range_end %llu nonblocking %d " | ||
2674 | "for_kupdate %d for_reclaim %d " | ||
2675 | "for_writepages %d range_cyclic %d", | ||
2676 | inode->i_sb->s_id, inode->i_ino, | ||
2677 | wbc->nr_to_write, wbc->pages_skipped, | ||
2678 | (unsigned long long) wbc->range_start, | ||
2679 | (unsigned long long) wbc->range_end, | ||
2680 | wbc->nonblocking, wbc->for_kupdate, | ||
2681 | wbc->for_reclaim, wbc->for_writepages, | ||
2682 | wbc->range_cyclic); | ||
2683 | 2656 | ||
2684 | /* | 2657 | /* |
2685 | * No pages to write? This is mainly a kludge to avoid starting | 2658 | * No pages to write? This is mainly a kludge to avoid starting |
@@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2693 | * If the filesystem has aborted, it is read-only, so return | 2666 | * If the filesystem has aborted, it is read-only, so return |
2694 | * right away instead of dumping stack traces later on that | 2667 | * right away instead of dumping stack traces later on that |
2695 | * will obscure the real source of the problem. We test | 2668 | * will obscure the real source of the problem. We test |
2696 | * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because | 2669 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because |
2697 | * the latter could be true if the filesystem is mounted | 2670 | * the latter could be true if the filesystem is mounted |
2698 | * read-only, and in that case, ext4_da_writepages should | 2671 | * read-only, and in that case, ext4_da_writepages should |
2699 | * *never* be called, so if that ever happens, we would want | 2672 | * *never* be called, so if that ever happens, we would want |
2700 | * the stack trace. | 2673 | * the stack trace. |
2701 | */ | 2674 | */ |
2702 | if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT)) | 2675 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
2703 | return -EROFS; | 2676 | return -EROFS; |
2704 | 2677 | ||
2705 | /* | 2678 | /* |
@@ -2845,14 +2818,7 @@ out_writepages: | |||
2845 | if (!no_nrwrite_index_update) | 2818 | if (!no_nrwrite_index_update) |
2846 | wbc->no_nrwrite_index_update = 0; | 2819 | wbc->no_nrwrite_index_update = 0; |
2847 | wbc->nr_to_write -= nr_to_writebump; | 2820 | wbc->nr_to_write -= nr_to_writebump; |
2848 | trace_mark(ext4_da_writepage_result, | 2821 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
2849 | "dev %s ino %lu ret %d pages_written %d " | ||
2850 | "pages_skipped %ld congestion %d " | ||
2851 | "more_io %d no_nrwrite_index_update %d", | ||
2852 | inode->i_sb->s_id, inode->i_ino, ret, | ||
2853 | pages_written, wbc->pages_skipped, | ||
2854 | wbc->encountered_congestion, wbc->more_io, | ||
2855 | wbc->no_nrwrite_index_update); | ||
2856 | return ret; | 2822 | return ret; |
2857 | } | 2823 | } |
2858 | 2824 | ||
@@ -2884,8 +2850,8 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2884 | } | 2850 | } |
2885 | 2851 | ||
2886 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2852 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
2887 | loff_t pos, unsigned len, unsigned flags, | 2853 | loff_t pos, unsigned len, unsigned flags, |
2888 | struct page **pagep, void **fsdata) | 2854 | struct page **pagep, void **fsdata) |
2889 | { | 2855 | { |
2890 | int ret, retries = 0; | 2856 | int ret, retries = 0; |
2891 | struct page *page; | 2857 | struct page *page; |
@@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2904 | len, flags, pagep, fsdata); | 2870 | len, flags, pagep, fsdata); |
2905 | } | 2871 | } |
2906 | *fsdata = (void *)0; | 2872 | *fsdata = (void *)0; |
2907 | 2873 | trace_ext4_da_write_begin(inode, pos, len, flags); | |
2908 | trace_mark(ext4_da_write_begin, | ||
2909 | "dev %s ino %lu pos %llu len %u flags %u", | ||
2910 | inode->i_sb->s_id, inode->i_ino, | ||
2911 | (unsigned long long) pos, len, flags); | ||
2912 | retry: | 2874 | retry: |
2913 | /* | 2875 | /* |
2914 | * With delayed allocation, we don't log the i_disksize update | 2876 | * With delayed allocation, we don't log the i_disksize update |
@@ -2959,7 +2921,7 @@ out: | |||
2959 | * when write to the end of file but not require block allocation | 2921 | * when write to the end of file but not require block allocation |
2960 | */ | 2922 | */ |
2961 | static int ext4_da_should_update_i_disksize(struct page *page, | 2923 | static int ext4_da_should_update_i_disksize(struct page *page, |
2962 | unsigned long offset) | 2924 | unsigned long offset) |
2963 | { | 2925 | { |
2964 | struct buffer_head *bh; | 2926 | struct buffer_head *bh; |
2965 | struct inode *inode = page->mapping->host; | 2927 | struct inode *inode = page->mapping->host; |
@@ -2978,9 +2940,9 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2978 | } | 2940 | } |
2979 | 2941 | ||
2980 | static int ext4_da_write_end(struct file *file, | 2942 | static int ext4_da_write_end(struct file *file, |
2981 | struct address_space *mapping, | 2943 | struct address_space *mapping, |
2982 | loff_t pos, unsigned len, unsigned copied, | 2944 | loff_t pos, unsigned len, unsigned copied, |
2983 | struct page *page, void *fsdata) | 2945 | struct page *page, void *fsdata) |
2984 | { | 2946 | { |
2985 | struct inode *inode = mapping->host; | 2947 | struct inode *inode = mapping->host; |
2986 | int ret = 0, ret2; | 2948 | int ret = 0, ret2; |
@@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file, | |||
3001 | } | 2963 | } |
3002 | } | 2964 | } |
3003 | 2965 | ||
3004 | trace_mark(ext4_da_write_end, | 2966 | trace_ext4_da_write_end(inode, pos, len, copied); |
3005 | "dev %s ino %lu pos %llu len %u copied %u", | ||
3006 | inode->i_sb->s_id, inode->i_ino, | ||
3007 | (unsigned long long) pos, len, copied); | ||
3008 | start = pos & (PAGE_CACHE_SIZE - 1); | 2967 | start = pos & (PAGE_CACHE_SIZE - 1); |
3009 | end = start + copied - 1; | 2968 | end = start + copied - 1; |
3010 | 2969 | ||
@@ -3081,7 +3040,7 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
3081 | * not strictly speaking necessary (and for users of | 3040 | * not strictly speaking necessary (and for users of |
3082 | * laptop_mode, not even desirable). However, to do otherwise | 3041 | * laptop_mode, not even desirable). However, to do otherwise |
3083 | * would require replicating code paths in: | 3042 | * would require replicating code paths in: |
3084 | * | 3043 | * |
3085 | * ext4_da_writepages() -> | 3044 | * ext4_da_writepages() -> |
3086 | * write_cache_pages() ---> (via passed in callback function) | 3045 | * write_cache_pages() ---> (via passed in callback function) |
3087 | * __mpage_da_writepage() --> | 3046 | * __mpage_da_writepage() --> |
@@ -3101,7 +3060,7 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
3101 | * write out the pages, but rather only collect contiguous | 3060 | * write out the pages, but rather only collect contiguous |
3102 | * logical block extents, call the multi-block allocator, and | 3061 | * logical block extents, call the multi-block allocator, and |
3103 | * then update the buffer heads with the block allocations. | 3062 | * then update the buffer heads with the block allocations. |
3104 | * | 3063 | * |
3105 | * For now, though, we'll cheat by calling filemap_flush(), | 3064 | * For now, though, we'll cheat by calling filemap_flush(), |
3106 | * which will map the blocks, and start the I/O, but not | 3065 | * which will map the blocks, and start the I/O, but not |
3107 | * actually wait for the I/O to complete. | 3066 | * actually wait for the I/O to complete. |
@@ -3237,7 +3196,7 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) | |||
3237 | * | 3196 | * |
3238 | */ | 3197 | */ |
3239 | static int __ext4_normal_writepage(struct page *page, | 3198 | static int __ext4_normal_writepage(struct page *page, |
3240 | struct writeback_control *wbc) | 3199 | struct writeback_control *wbc) |
3241 | { | 3200 | { |
3242 | struct inode *inode = page->mapping->host; | 3201 | struct inode *inode = page->mapping->host; |
3243 | 3202 | ||
@@ -3249,15 +3208,13 @@ static int __ext4_normal_writepage(struct page *page, | |||
3249 | } | 3208 | } |
3250 | 3209 | ||
3251 | static int ext4_normal_writepage(struct page *page, | 3210 | static int ext4_normal_writepage(struct page *page, |
3252 | struct writeback_control *wbc) | 3211 | struct writeback_control *wbc) |
3253 | { | 3212 | { |
3254 | struct inode *inode = page->mapping->host; | 3213 | struct inode *inode = page->mapping->host; |
3255 | loff_t size = i_size_read(inode); | 3214 | loff_t size = i_size_read(inode); |
3256 | loff_t len; | 3215 | loff_t len; |
3257 | 3216 | ||
3258 | trace_mark(ext4_normal_writepage, | 3217 | trace_ext4_normal_writepage(inode, page); |
3259 | "dev %s ino %lu page_index %lu", | ||
3260 | inode->i_sb->s_id, inode->i_ino, page->index); | ||
3261 | J_ASSERT(PageLocked(page)); | 3218 | J_ASSERT(PageLocked(page)); |
3262 | if (page->index == size >> PAGE_CACHE_SHIFT) | 3219 | if (page->index == size >> PAGE_CACHE_SHIFT) |
3263 | len = size & ~PAGE_CACHE_MASK; | 3220 | len = size & ~PAGE_CACHE_MASK; |
@@ -3287,7 +3244,7 @@ static int ext4_normal_writepage(struct page *page, | |||
3287 | } | 3244 | } |
3288 | 3245 | ||
3289 | static int __ext4_journalled_writepage(struct page *page, | 3246 | static int __ext4_journalled_writepage(struct page *page, |
3290 | struct writeback_control *wbc) | 3247 | struct writeback_control *wbc) |
3291 | { | 3248 | { |
3292 | struct address_space *mapping = page->mapping; | 3249 | struct address_space *mapping = page->mapping; |
3293 | struct inode *inode = mapping->host; | 3250 | struct inode *inode = mapping->host; |
@@ -3337,15 +3294,13 @@ out: | |||
3337 | } | 3294 | } |
3338 | 3295 | ||
3339 | static int ext4_journalled_writepage(struct page *page, | 3296 | static int ext4_journalled_writepage(struct page *page, |
3340 | struct writeback_control *wbc) | 3297 | struct writeback_control *wbc) |
3341 | { | 3298 | { |
3342 | struct inode *inode = page->mapping->host; | 3299 | struct inode *inode = page->mapping->host; |
3343 | loff_t size = i_size_read(inode); | 3300 | loff_t size = i_size_read(inode); |
3344 | loff_t len; | 3301 | loff_t len; |
3345 | 3302 | ||
3346 | trace_mark(ext4_journalled_writepage, | 3303 | trace_ext4_journalled_writepage(inode, page); |
3347 | "dev %s ino %lu page_index %lu", | ||
3348 | inode->i_sb->s_id, inode->i_ino, page->index); | ||
3349 | J_ASSERT(PageLocked(page)); | 3304 | J_ASSERT(PageLocked(page)); |
3350 | if (page->index == size >> PAGE_CACHE_SHIFT) | 3305 | if (page->index == size >> PAGE_CACHE_SHIFT) |
3351 | len = size & ~PAGE_CACHE_MASK; | 3306 | len = size & ~PAGE_CACHE_MASK; |
@@ -3442,8 +3397,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3442 | * VFS code falls back into buffered path in that case so we are safe. | 3397 | * VFS code falls back into buffered path in that case so we are safe. |
3443 | */ | 3398 | */ |
3444 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | 3399 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, |
3445 | const struct iovec *iov, loff_t offset, | 3400 | const struct iovec *iov, loff_t offset, |
3446 | unsigned long nr_segs) | 3401 | unsigned long nr_segs) |
3447 | { | 3402 | { |
3448 | struct file *file = iocb->ki_filp; | 3403 | struct file *file = iocb->ki_filp; |
3449 | struct inode *inode = file->f_mapping->host; | 3404 | struct inode *inode = file->f_mapping->host; |
@@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q) | |||
3763 | * (no partially truncated stuff there). */ | 3718 | * (no partially truncated stuff there). */ |
3764 | 3719 | ||
3765 | static Indirect *ext4_find_shared(struct inode *inode, int depth, | 3720 | static Indirect *ext4_find_shared(struct inode *inode, int depth, |
3766 | ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top) | 3721 | ext4_lblk_t offsets[4], Indirect chain[4], |
3722 | __le32 *top) | ||
3767 | { | 3723 | { |
3768 | Indirect *partial, *p; | 3724 | Indirect *partial, *p; |
3769 | int k, err; | 3725 | int k, err; |
@@ -3819,8 +3775,10 @@ no_top: | |||
3819 | * than `count' because there can be holes in there. | 3775 | * than `count' because there can be holes in there. |
3820 | */ | 3776 | */ |
3821 | static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | 3777 | static void ext4_clear_blocks(handle_t *handle, struct inode *inode, |
3822 | struct buffer_head *bh, ext4_fsblk_t block_to_free, | 3778 | struct buffer_head *bh, |
3823 | unsigned long count, __le32 *first, __le32 *last) | 3779 | ext4_fsblk_t block_to_free, |
3780 | unsigned long count, __le32 *first, | ||
3781 | __le32 *last) | ||
3824 | { | 3782 | { |
3825 | __le32 *p; | 3783 | __le32 *p; |
3826 | if (try_to_extend_transaction(handle, inode)) { | 3784 | if (try_to_extend_transaction(handle, inode)) { |
@@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
3837 | } | 3795 | } |
3838 | 3796 | ||
3839 | /* | 3797 | /* |
3840 | * Any buffers which are on the journal will be in memory. We find | 3798 | * Any buffers which are on the journal will be in memory. We |
3841 | * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget() | 3799 | * find them on the hash table so jbd2_journal_revoke() will |
3842 | * on them. We've already detached each block from the file, so | 3800 | * run jbd2_journal_forget() on them. We've already detached |
3843 | * bforget() in jbd2_journal_forget() should be safe. | 3801 | * each block from the file, so bforget() in |
3802 | * jbd2_journal_forget() should be safe. | ||
3844 | * | 3803 | * |
3845 | * AKPM: turn on bforget in jbd2_journal_forget()!!! | 3804 | * AKPM: turn on bforget in jbd2_journal_forget()!!! |
3846 | */ | 3805 | */ |
@@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode) | |||
4212 | (__le32*)partial->bh->b_data+addr_per_block, | 4171 | (__le32*)partial->bh->b_data+addr_per_block, |
4213 | (chain+n-1) - partial); | 4172 | (chain+n-1) - partial); |
4214 | BUFFER_TRACE(partial->bh, "call brelse"); | 4173 | BUFFER_TRACE(partial->bh, "call brelse"); |
4215 | brelse (partial->bh); | 4174 | brelse(partial->bh); |
4216 | partial--; | 4175 | partial--; |
4217 | } | 4176 | } |
4218 | do_indirects: | 4177 | do_indirects: |
@@ -4453,8 +4412,9 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei) | |||
4453 | if (flags & S_DIRSYNC) | 4412 | if (flags & S_DIRSYNC) |
4454 | ei->i_flags |= EXT4_DIRSYNC_FL; | 4413 | ei->i_flags |= EXT4_DIRSYNC_FL; |
4455 | } | 4414 | } |
4415 | |||
4456 | static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | 4416 | static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, |
4457 | struct ext4_inode_info *ei) | 4417 | struct ext4_inode_info *ei) |
4458 | { | 4418 | { |
4459 | blkcnt_t i_blocks ; | 4419 | blkcnt_t i_blocks ; |
4460 | struct inode *inode = &(ei->vfs_inode); | 4420 | struct inode *inode = &(ei->vfs_inode); |
@@ -4569,7 +4529,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4569 | EXT4_GOOD_OLD_INODE_SIZE + | 4529 | EXT4_GOOD_OLD_INODE_SIZE + |
4570 | ei->i_extra_isize; | 4530 | ei->i_extra_isize; |
4571 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | 4531 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) |
4572 | ei->i_state |= EXT4_STATE_XATTR; | 4532 | ei->i_state |= EXT4_STATE_XATTR; |
4573 | } | 4533 | } |
4574 | } else | 4534 | } else |
4575 | ei->i_extra_isize = 0; | 4535 | ei->i_extra_isize = 0; |
@@ -4588,7 +4548,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4588 | 4548 | ||
4589 | ret = 0; | 4549 | ret = 0; |
4590 | if (ei->i_file_acl && | 4550 | if (ei->i_file_acl && |
4591 | ((ei->i_file_acl < | 4551 | ((ei->i_file_acl < |
4592 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + | 4552 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + |
4593 | EXT4_SB(sb)->s_gdb_count)) || | 4553 | EXT4_SB(sb)->s_gdb_count)) || |
4594 | (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { | 4554 | (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { |
@@ -4603,15 +4563,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4603 | !ext4_inode_is_fast_symlink(inode))) | 4563 | !ext4_inode_is_fast_symlink(inode))) |
4604 | /* Validate extent which is part of inode */ | 4564 | /* Validate extent which is part of inode */ |
4605 | ret = ext4_ext_check_inode(inode); | 4565 | ret = ext4_ext_check_inode(inode); |
4606 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 4566 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
4607 | (S_ISLNK(inode->i_mode) && | 4567 | (S_ISLNK(inode->i_mode) && |
4608 | !ext4_inode_is_fast_symlink(inode))) { | 4568 | !ext4_inode_is_fast_symlink(inode))) { |
4609 | /* Validate block references which are part of inode */ | 4569 | /* Validate block references which are part of inode */ |
4610 | ret = ext4_check_inode_blockref(inode); | 4570 | ret = ext4_check_inode_blockref(inode); |
4611 | } | 4571 | } |
4612 | if (ret) { | 4572 | if (ret) { |
4613 | brelse(bh); | 4573 | brelse(bh); |
4614 | goto bad_inode; | 4574 | goto bad_inode; |
4615 | } | 4575 | } |
4616 | 4576 | ||
4617 | if (S_ISREG(inode->i_mode)) { | 4577 | if (S_ISREG(inode->i_mode)) { |
@@ -4642,7 +4602,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4642 | } else { | 4602 | } else { |
4643 | brelse(bh); | 4603 | brelse(bh); |
4644 | ret = -EIO; | 4604 | ret = -EIO; |
4645 | ext4_error(inode->i_sb, __func__, | 4605 | ext4_error(inode->i_sb, __func__, |
4646 | "bogus i_mode (%o) for inode=%lu", | 4606 | "bogus i_mode (%o) for inode=%lu", |
4647 | inode->i_mode, inode->i_ino); | 4607 | inode->i_mode, inode->i_ino); |
4648 | goto bad_inode; | 4608 | goto bad_inode; |
@@ -4795,8 +4755,9 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4795 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | 4755 | cpu_to_le32(new_encode_dev(inode->i_rdev)); |
4796 | raw_inode->i_block[2] = 0; | 4756 | raw_inode->i_block[2] = 0; |
4797 | } | 4757 | } |
4798 | } else for (block = 0; block < EXT4_N_BLOCKS; block++) | 4758 | } else |
4799 | raw_inode->i_block[block] = ei->i_data[block]; | 4759 | for (block = 0; block < EXT4_N_BLOCKS; block++) |
4760 | raw_inode->i_block[block] = ei->i_data[block]; | ||
4800 | 4761 | ||
4801 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); | 4762 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); |
4802 | if (ei->i_extra_isize) { | 4763 | if (ei->i_extra_isize) { |
@@ -5150,7 +5111,7 @@ int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | |||
5150 | * Give this, we know that the caller already has write access to iloc->bh. | 5111 | * Give this, we know that the caller already has write access to iloc->bh. |
5151 | */ | 5112 | */ |
5152 | int ext4_mark_iloc_dirty(handle_t *handle, | 5113 | int ext4_mark_iloc_dirty(handle_t *handle, |
5153 | struct inode *inode, struct ext4_iloc *iloc) | 5114 | struct inode *inode, struct ext4_iloc *iloc) |
5154 | { | 5115 | { |
5155 | int err = 0; | 5116 | int err = 0; |
5156 | 5117 | ||
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 91e75f7a9e73..bb415408fdb6 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/compat.h> | 14 | #include <linux/compat.h> |
15 | #include <linux/smp_lock.h> | 15 | #include <linux/smp_lock.h> |
16 | #include <linux/mount.h> | 16 | #include <linux/mount.h> |
17 | #include <linux/file.h> | ||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | #include "ext4_jbd2.h" | 19 | #include "ext4_jbd2.h" |
19 | #include "ext4.h" | 20 | #include "ext4.h" |
@@ -213,6 +214,41 @@ setversion_out: | |||
213 | 214 | ||
214 | return err; | 215 | return err; |
215 | } | 216 | } |
217 | |||
218 | case EXT4_IOC_MOVE_EXT: { | ||
219 | struct move_extent me; | ||
220 | struct file *donor_filp; | ||
221 | int err; | ||
222 | |||
223 | if (copy_from_user(&me, | ||
224 | (struct move_extent __user *)arg, sizeof(me))) | ||
225 | return -EFAULT; | ||
226 | |||
227 | donor_filp = fget(me.donor_fd); | ||
228 | if (!donor_filp) | ||
229 | return -EBADF; | ||
230 | |||
231 | if (!capable(CAP_DAC_OVERRIDE)) { | ||
232 | if ((current->real_cred->fsuid != inode->i_uid) || | ||
233 | !(inode->i_mode & S_IRUSR) || | ||
234 | !(donor_filp->f_dentry->d_inode->i_mode & | ||
235 | S_IRUSR)) { | ||
236 | fput(donor_filp); | ||
237 | return -EACCES; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | err = ext4_move_extents(filp, donor_filp, me.orig_start, | ||
242 | me.donor_start, me.len, &me.moved_len); | ||
243 | fput(donor_filp); | ||
244 | |||
245 | if (!err) | ||
246 | if (copy_to_user((struct move_extent *)arg, | ||
247 | &me, sizeof(me))) | ||
248 | return -EFAULT; | ||
249 | return err; | ||
250 | } | ||
251 | |||
216 | case EXT4_IOC_GROUP_ADD: { | 252 | case EXT4_IOC_GROUP_ADD: { |
217 | struct ext4_new_group_data input; | 253 | struct ext4_new_group_data input; |
218 | struct super_block *sb = inode->i_sb; | 254 | struct super_block *sb = inode->i_sb; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ed8482e22c0e..519a0a686d94 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -22,6 +22,8 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include "mballoc.h" | 24 | #include "mballoc.h" |
25 | #include <trace/events/ext4.h> | ||
26 | |||
25 | /* | 27 | /* |
26 | * MUSTDO: | 28 | * MUSTDO: |
27 | * - test ext4_ext_search_left() and ext4_ext_search_right() | 29 | * - test ext4_ext_search_left() and ext4_ext_search_right() |
@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
340 | ext4_group_t group); | 342 | ext4_group_t group); |
341 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | 343 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); |
342 | 344 | ||
343 | |||
344 | |||
345 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) | 345 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) |
346 | { | 346 | { |
347 | #if BITS_PER_LONG == 64 | 347 | #if BITS_PER_LONG == 64 |
@@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2859 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) | 2859 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) |
2860 | + entry->start_blk | 2860 | + entry->start_blk |
2861 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 2861 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
2862 | trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", | 2862 | trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, |
2863 | sb->s_id, (unsigned long long) discard_block, | 2863 | entry->count); |
2864 | entry->count); | ||
2865 | sb_issue_discard(sb, discard_block, entry->count); | 2864 | sb_issue_discard(sb, discard_block, entry->count); |
2866 | 2865 | ||
2867 | kmem_cache_free(ext4_free_ext_cachep, entry); | 2866 | kmem_cache_free(ext4_free_ext_cachep, entry); |
@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
3629 | 3628 | ||
3630 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, | 3629 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, |
3631 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3630 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3632 | trace_mark(ext4_mb_new_inode_pa, | 3631 | trace_ext4_mb_new_inode_pa(ac, pa); |
3633 | "dev %s ino %lu pstart %llu len %u lstart %u", | ||
3634 | sb->s_id, ac->ac_inode->i_ino, | ||
3635 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | ||
3636 | 3632 | ||
3637 | ext4_mb_use_inode_pa(ac, pa); | 3633 | ext4_mb_use_inode_pa(ac, pa); |
3638 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); | 3634 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); |
@@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) | |||
3691 | pa->pa_type = MB_GROUP_PA; | 3687 | pa->pa_type = MB_GROUP_PA; |
3692 | 3688 | ||
3693 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, | 3689 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, |
3694 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3690 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3695 | trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", | 3691 | trace_ext4_mb_new_group_pa(ac, pa); |
3696 | sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart); | ||
3697 | 3692 | ||
3698 | ext4_mb_use_group_pa(ac, pa); | 3693 | ext4_mb_use_group_pa(ac, pa); |
3699 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); | 3694 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); |
@@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3783 | ext4_mb_store_history(ac); | 3778 | ext4_mb_store_history(ac); |
3784 | } | 3779 | } |
3785 | 3780 | ||
3786 | trace_mark(ext4_mb_release_inode_pa, | 3781 | trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, |
3787 | "dev %s ino %lu block %llu count %u", | 3782 | next - bit); |
3788 | sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit, | ||
3789 | next - bit); | ||
3790 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3783 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3791 | bit = next + 1; | 3784 | bit = next + 1; |
3792 | } | 3785 | } |
@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3820 | if (ac) | 3813 | if (ac) |
3821 | ac->ac_op = EXT4_MB_HISTORY_DISCARD; | 3814 | ac->ac_op = EXT4_MB_HISTORY_DISCARD; |
3822 | 3815 | ||
3823 | trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", | 3816 | trace_ext4_mb_release_group_pa(ac, pa); |
3824 | sb->s_id, pa->pa_pstart, pa->pa_len); | ||
3825 | BUG_ON(pa->pa_deleted == 0); | 3817 | BUG_ON(pa->pa_deleted == 0); |
3826 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3818 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3827 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3819 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3889 | 3881 | ||
3890 | INIT_LIST_HEAD(&list); | 3882 | INIT_LIST_HEAD(&list); |
3891 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 3883 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
3884 | if (ac) | ||
3885 | ac->ac_sb = sb; | ||
3892 | repeat: | 3886 | repeat: |
3893 | ext4_lock_group(sb, group); | 3887 | ext4_lock_group(sb, group); |
3894 | list_for_each_entry_safe(pa, tmp, | 3888 | list_for_each_entry_safe(pa, tmp, |
@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3987 | } | 3981 | } |
3988 | 3982 | ||
3989 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); | 3983 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); |
3990 | trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, | 3984 | trace_ext4_discard_preallocations(inode); |
3991 | inode->i_ino); | ||
3992 | 3985 | ||
3993 | INIT_LIST_HEAD(&list); | 3986 | INIT_LIST_HEAD(&list); |
3994 | 3987 | ||
3995 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 3988 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
3989 | if (ac) { | ||
3990 | ac->ac_sb = sb; | ||
3991 | ac->ac_inode = inode; | ||
3992 | } | ||
3996 | repeat: | 3993 | repeat: |
3997 | /* first, collect all pa's in the inode */ | 3994 | /* first, collect all pa's in the inode */ |
3998 | spin_lock(&ei->i_prealloc_lock); | 3995 | spin_lock(&ei->i_prealloc_lock); |
@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4276 | 4273 | ||
4277 | INIT_LIST_HEAD(&discard_list); | 4274 | INIT_LIST_HEAD(&discard_list); |
4278 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4275 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
4276 | if (ac) | ||
4277 | ac->ac_sb = sb; | ||
4279 | 4278 | ||
4280 | spin_lock(&lg->lg_prealloc_lock); | 4279 | spin_lock(&lg->lg_prealloc_lock); |
4281 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], | 4280 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], |
@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4445 | int ret; | 4444 | int ret; |
4446 | int freed = 0; | 4445 | int freed = 0; |
4447 | 4446 | ||
4448 | trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", | 4447 | trace_ext4_mb_discard_preallocations(sb, needed); |
4449 | sb->s_id, needed); | ||
4450 | for (i = 0; i < ngroups && needed > 0; i++) { | 4448 | for (i = 0; i < ngroups && needed > 0; i++) { |
4451 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); | 4449 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); |
4452 | freed += ret; | 4450 | freed += ret; |
@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4475 | sb = ar->inode->i_sb; | 4473 | sb = ar->inode->i_sb; |
4476 | sbi = EXT4_SB(sb); | 4474 | sbi = EXT4_SB(sb); |
4477 | 4475 | ||
4478 | trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " | 4476 | trace_ext4_request_blocks(ar); |
4479 | "lblk %llu goal %llu lleft %llu lright %llu " | ||
4480 | "pleft %llu pright %llu ", | ||
4481 | sb->s_id, ar->flags, ar->len, | ||
4482 | ar->inode ? ar->inode->i_ino : 0, | ||
4483 | (unsigned long long) ar->logical, | ||
4484 | (unsigned long long) ar->goal, | ||
4485 | (unsigned long long) ar->lleft, | ||
4486 | (unsigned long long) ar->lright, | ||
4487 | (unsigned long long) ar->pleft, | ||
4488 | (unsigned long long) ar->pright); | ||
4489 | 4477 | ||
4490 | /* | 4478 | /* |
4491 | * For delayed allocation, we could skip the ENOSPC and | 4479 | * For delayed allocation, we could skip the ENOSPC and |
@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4521 | } | 4509 | } |
4522 | 4510 | ||
4523 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4511 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
4524 | if (!ac) { | 4512 | if (ac) { |
4513 | ac->ac_sb = sb; | ||
4514 | ac->ac_inode = ar->inode; | ||
4515 | } else { | ||
4525 | ar->len = 0; | 4516 | ar->len = 0; |
4526 | *errp = -ENOMEM; | 4517 | *errp = -ENOMEM; |
4527 | goto out1; | 4518 | goto out1; |
@@ -4594,18 +4585,7 @@ out3: | |||
4594 | reserv_blks); | 4585 | reserv_blks); |
4595 | } | 4586 | } |
4596 | 4587 | ||
4597 | trace_mark(ext4_allocate_blocks, | 4588 | trace_ext4_allocate_blocks(ar, (unsigned long long)block); |
4598 | "dev %s block %llu flags %u len %u ino %lu " | ||
4599 | "logical %llu goal %llu lleft %llu lright %llu " | ||
4600 | "pleft %llu pright %llu ", | ||
4601 | sb->s_id, (unsigned long long) block, | ||
4602 | ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0, | ||
4603 | (unsigned long long) ar->logical, | ||
4604 | (unsigned long long) ar->goal, | ||
4605 | (unsigned long long) ar->lleft, | ||
4606 | (unsigned long long) ar->lright, | ||
4607 | (unsigned long long) ar->pleft, | ||
4608 | (unsigned long long) ar->pright); | ||
4609 | 4589 | ||
4610 | return block; | 4590 | return block; |
4611 | } | 4591 | } |
@@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4709 | * Main entry point into mballoc to free blocks | 4689 | * Main entry point into mballoc to free blocks |
4710 | */ | 4690 | */ |
4711 | void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | 4691 | void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, |
4712 | unsigned long block, unsigned long count, | 4692 | ext4_fsblk_t block, unsigned long count, |
4713 | int metadata, unsigned long *freed) | 4693 | int metadata, unsigned long *freed) |
4714 | { | 4694 | { |
4715 | struct buffer_head *bitmap_bh = NULL; | 4695 | struct buffer_head *bitmap_bh = NULL; |
@@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4735 | block + count > ext4_blocks_count(es)) { | 4715 | block + count > ext4_blocks_count(es)) { |
4736 | ext4_error(sb, __func__, | 4716 | ext4_error(sb, __func__, |
4737 | "Freeing blocks not in datazone - " | 4717 | "Freeing blocks not in datazone - " |
4738 | "block = %lu, count = %lu", block, count); | 4718 | "block = %llu, count = %lu", block, count); |
4739 | goto error_return; | 4719 | goto error_return; |
4740 | } | 4720 | } |
4741 | 4721 | ||
4742 | ext4_debug("freeing block %lu\n", block); | 4722 | ext4_debug("freeing block %llu\n", block); |
4743 | trace_mark(ext4_free_blocks, | 4723 | trace_ext4_free_blocks(inode, block, count, metadata); |
4744 | "dev %s block %llu count %lu metadata %d ino %lu", | ||
4745 | sb->s_id, (unsigned long long) block, count, metadata, | ||
4746 | inode ? inode->i_ino : 0); | ||
4747 | 4724 | ||
4748 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4725 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
4749 | if (ac) { | 4726 | if (ac) { |
@@ -4784,7 +4761,7 @@ do_more: | |||
4784 | 4761 | ||
4785 | ext4_error(sb, __func__, | 4762 | ext4_error(sb, __func__, |
4786 | "Freeing blocks in system zone - " | 4763 | "Freeing blocks in system zone - " |
4787 | "Block = %lu, count = %lu", block, count); | 4764 | "Block = %llu, count = %lu", block, count); |
4788 | /* err = 0. ext4_std_error should be a no op */ | 4765 | /* err = 0. ext4_std_error should be a no op */ |
4789 | goto error_return; | 4766 | goto error_return; |
4790 | } | 4767 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 75e34f69215b..c96bb19f58f9 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/version.h> | 20 | #include <linux/version.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/marker.h> | ||
23 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
24 | #include "ext4_jbd2.h" | 23 | #include "ext4_jbd2.h" |
25 | #include "ext4.h" | 24 | #include "ext4.h" |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index fe64d9f79852..313a50b39741 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
458 | struct inode *tmp_inode = NULL; | 458 | struct inode *tmp_inode = NULL; |
459 | struct list_blocks_struct lb; | 459 | struct list_blocks_struct lb; |
460 | unsigned long max_entries; | 460 | unsigned long max_entries; |
461 | __u32 goal; | ||
461 | 462 | ||
462 | /* | 463 | /* |
463 | * If the filesystem does not support extents, or the inode | 464 | * If the filesystem does not support extents, or the inode |
@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode) | |||
483 | retval = PTR_ERR(handle); | 484 | retval = PTR_ERR(handle); |
484 | return retval; | 485 | return retval; |
485 | } | 486 | } |
486 | tmp_inode = ext4_new_inode(handle, | 487 | goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * |
487 | inode->i_sb->s_root->d_inode, | 488 | EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; |
488 | S_IFREG); | 489 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, |
490 | S_IFREG, 0, goal); | ||
489 | if (IS_ERR(tmp_inode)) { | 491 | if (IS_ERR(tmp_inode)) { |
490 | retval = -ENOMEM; | 492 | retval = -ENOMEM; |
491 | ext4_journal_stop(handle); | 493 | ext4_journal_stop(handle); |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c new file mode 100644 index 000000000000..bbf2dd9404dc --- /dev/null +++ b/fs/ext4/move_extent.c | |||
@@ -0,0 +1,1320 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd. | ||
3 | * Written by Takashi Sato <t-sato@yk.jp.nec.com> | ||
4 | * Akira Fujita <a-fujita@rs.jp.nec.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/fs.h> | ||
17 | #include <linux/quotaops.h> | ||
18 | #include "ext4_jbd2.h" | ||
19 | #include "ext4_extents.h" | ||
20 | #include "ext4.h" | ||
21 | |||
22 | #define get_ext_path(path, inode, block, ret) \ | ||
23 | do { \ | ||
24 | path = ext4_ext_find_extent(inode, block, path); \ | ||
25 | if (IS_ERR(path)) { \ | ||
26 | ret = PTR_ERR(path); \ | ||
27 | path = NULL; \ | ||
28 | } \ | ||
29 | } while (0) | ||
30 | |||
31 | /** | ||
32 | * copy_extent_status - Copy the extent's initialization status | ||
33 | * | ||
34 | * @src: an extent for getting initialize status | ||
35 | * @dest: an extent to be set the status | ||
36 | */ | ||
37 | static void | ||
38 | copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) | ||
39 | { | ||
40 | if (ext4_ext_is_uninitialized(src)) | ||
41 | ext4_ext_mark_uninitialized(dest); | ||
42 | else | ||
43 | dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest)); | ||
44 | } | ||
45 | |||
46 | /** | ||
47 | * mext_next_extent - Search for the next extent and set it to "extent" | ||
48 | * | ||
49 | * @inode: inode which is searched | ||
50 | * @path: this will obtain data for the next extent | ||
51 | * @extent: pointer to the next extent we have just gotten | ||
52 | * | ||
53 | * Search the next extent in the array of ext4_ext_path structure (@path) | ||
54 | * and set it to ext4_extent structure (@extent). In addition, the member of | ||
55 | * @path (->p_ext) also points the next extent. Return 0 on success, 1 if | ||
56 | * ext4_ext_path structure refers to the last extent, or a negative error | ||
57 | * value on failure. | ||
58 | */ | ||
59 | static int | ||
60 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | ||
61 | struct ext4_extent **extent) | ||
62 | { | ||
63 | int ppos, leaf_ppos = path->p_depth; | ||
64 | |||
65 | ppos = leaf_ppos; | ||
66 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | ||
67 | /* leaf block */ | ||
68 | *extent = ++path[ppos].p_ext; | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | while (--ppos >= 0) { | ||
73 | if (EXT_LAST_INDEX(path[ppos].p_hdr) > | ||
74 | path[ppos].p_idx) { | ||
75 | int cur_ppos = ppos; | ||
76 | |||
77 | /* index block */ | ||
78 | path[ppos].p_idx++; | ||
79 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | ||
80 | if (path[ppos+1].p_bh) | ||
81 | brelse(path[ppos+1].p_bh); | ||
82 | path[ppos+1].p_bh = | ||
83 | sb_bread(inode->i_sb, path[ppos].p_block); | ||
84 | if (!path[ppos+1].p_bh) | ||
85 | return -EIO; | ||
86 | path[ppos+1].p_hdr = | ||
87 | ext_block_hdr(path[ppos+1].p_bh); | ||
88 | |||
89 | /* Halfway index block */ | ||
90 | while (++cur_ppos < leaf_ppos) { | ||
91 | path[cur_ppos].p_idx = | ||
92 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); | ||
93 | path[cur_ppos].p_block = | ||
94 | idx_pblock(path[cur_ppos].p_idx); | ||
95 | if (path[cur_ppos+1].p_bh) | ||
96 | brelse(path[cur_ppos+1].p_bh); | ||
97 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, | ||
98 | path[cur_ppos].p_block); | ||
99 | if (!path[cur_ppos+1].p_bh) | ||
100 | return -EIO; | ||
101 | path[cur_ppos+1].p_hdr = | ||
102 | ext_block_hdr(path[cur_ppos+1].p_bh); | ||
103 | } | ||
104 | |||
105 | /* leaf block */ | ||
106 | path[leaf_ppos].p_ext = *extent = | ||
107 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | ||
108 | return 0; | ||
109 | } | ||
110 | } | ||
111 | /* We found the last extent */ | ||
112 | return 1; | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * mext_double_down_read - Acquire two inodes' read semaphore | ||
117 | * | ||
118 | * @orig_inode: original inode structure | ||
119 | * @donor_inode: donor inode structure | ||
120 | * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. | ||
121 | */ | ||
122 | static void | ||
123 | mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) | ||
124 | { | ||
125 | struct inode *first = orig_inode, *second = donor_inode; | ||
126 | |||
127 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
128 | |||
129 | /* | ||
130 | * Use the inode number to provide the stable locking order instead | ||
131 | * of its address, because the C language doesn't guarantee you can | ||
132 | * compare pointers that don't come from the same array. | ||
133 | */ | ||
134 | if (donor_inode->i_ino < orig_inode->i_ino) { | ||
135 | first = donor_inode; | ||
136 | second = orig_inode; | ||
137 | } | ||
138 | |||
139 | down_read(&EXT4_I(first)->i_data_sem); | ||
140 | down_read(&EXT4_I(second)->i_data_sem); | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * mext_double_down_write - Acquire two inodes' write semaphore | ||
145 | * | ||
146 | * @orig_inode: original inode structure | ||
147 | * @donor_inode: donor inode structure | ||
148 | * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. | ||
149 | */ | ||
150 | static void | ||
151 | mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | ||
152 | { | ||
153 | struct inode *first = orig_inode, *second = donor_inode; | ||
154 | |||
155 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
156 | |||
157 | /* | ||
158 | * Use the inode number to provide the stable locking order instead | ||
159 | * of its address, because the C language doesn't guarantee you can | ||
160 | * compare pointers that don't come from the same array. | ||
161 | */ | ||
162 | if (donor_inode->i_ino < orig_inode->i_ino) { | ||
163 | first = donor_inode; | ||
164 | second = orig_inode; | ||
165 | } | ||
166 | |||
167 | down_write(&EXT4_I(first)->i_data_sem); | ||
168 | down_write(&EXT4_I(second)->i_data_sem); | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * mext_double_up_read - Release two inodes' read semaphore | ||
173 | * | ||
174 | * @orig_inode: original inode structure to be released its lock first | ||
175 | * @donor_inode: donor inode structure to be released its lock second | ||
176 | * Release read semaphore of two inodes (orig and donor). | ||
177 | */ | ||
178 | static void | ||
179 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | ||
180 | { | ||
181 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
182 | |||
183 | up_read(&EXT4_I(orig_inode)->i_data_sem); | ||
184 | up_read(&EXT4_I(donor_inode)->i_data_sem); | ||
185 | } | ||
186 | |||
187 | /** | ||
188 | * mext_double_up_write - Release two inodes' write semaphore | ||
189 | * | ||
190 | * @orig_inode: original inode structure to be released its lock first | ||
191 | * @donor_inode: donor inode structure to be released its lock second | ||
192 | * Release write semaphore of two inodes (orig and donor). | ||
193 | */ | ||
194 | static void | ||
195 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) | ||
196 | { | ||
197 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
198 | |||
199 | up_write(&EXT4_I(orig_inode)->i_data_sem); | ||
200 | up_write(&EXT4_I(donor_inode)->i_data_sem); | ||
201 | } | ||
202 | |||
203 | /** | ||
204 | * mext_insert_across_blocks - Insert extents across leaf block | ||
205 | * | ||
206 | * @handle: journal handle | ||
207 | * @orig_inode: original inode | ||
208 | * @o_start: first original extent to be changed | ||
209 | * @o_end: last original extent to be changed | ||
210 | * @start_ext: first new extent to be inserted | ||
211 | * @new_ext: middle of new extent to be inserted | ||
212 | * @end_ext: last new extent to be inserted | ||
213 | * | ||
214 | * Allocate a new leaf block and insert extents into it. Return 0 on success, | ||
215 | * or a negative error value on failure. | ||
216 | */ | ||
217 | static int | ||
218 | mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | ||
219 | struct ext4_extent *o_start, struct ext4_extent *o_end, | ||
220 | struct ext4_extent *start_ext, struct ext4_extent *new_ext, | ||
221 | struct ext4_extent *end_ext) | ||
222 | { | ||
223 | struct ext4_ext_path *orig_path = NULL; | ||
224 | ext4_lblk_t eblock = 0; | ||
225 | int new_flag = 0; | ||
226 | int end_flag = 0; | ||
227 | int err = 0; | ||
228 | |||
229 | if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) { | ||
230 | if (o_start == o_end) { | ||
231 | |||
232 | /* start_ext new_ext end_ext | ||
233 | * donor |---------|-----------|--------| | ||
234 | * orig |------------------------------| | ||
235 | */ | ||
236 | end_flag = 1; | ||
237 | } else { | ||
238 | |||
239 | /* start_ext new_ext end_ext | ||
240 | * donor |---------|----------|---------| | ||
241 | * orig |---------------|--------------| | ||
242 | */ | ||
243 | o_end->ee_block = end_ext->ee_block; | ||
244 | o_end->ee_len = end_ext->ee_len; | ||
245 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | ||
246 | } | ||
247 | |||
248 | o_start->ee_len = start_ext->ee_len; | ||
249 | new_flag = 1; | ||
250 | |||
251 | } else if (start_ext->ee_len && new_ext->ee_len && | ||
252 | !end_ext->ee_len && o_start == o_end) { | ||
253 | |||
254 | /* start_ext new_ext | ||
255 | * donor |--------------|---------------| | ||
256 | * orig |------------------------------| | ||
257 | */ | ||
258 | o_start->ee_len = start_ext->ee_len; | ||
259 | new_flag = 1; | ||
260 | |||
261 | } else if (!start_ext->ee_len && new_ext->ee_len && | ||
262 | end_ext->ee_len && o_start == o_end) { | ||
263 | |||
264 | /* new_ext end_ext | ||
265 | * donor |--------------|---------------| | ||
266 | * orig |------------------------------| | ||
267 | */ | ||
268 | o_end->ee_block = end_ext->ee_block; | ||
269 | o_end->ee_len = end_ext->ee_len; | ||
270 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | ||
271 | |||
272 | /* | ||
273 | * Set 0 to the extent block if new_ext was | ||
274 | * the first block. | ||
275 | */ | ||
276 | if (new_ext->ee_block) | ||
277 | eblock = le32_to_cpu(new_ext->ee_block); | ||
278 | |||
279 | new_flag = 1; | ||
280 | } else { | ||
281 | ext4_debug("ext4 move extent: Unexpected insert case\n"); | ||
282 | return -EIO; | ||
283 | } | ||
284 | |||
285 | if (new_flag) { | ||
286 | get_ext_path(orig_path, orig_inode, eblock, err); | ||
287 | if (orig_path == NULL) | ||
288 | goto out; | ||
289 | |||
290 | if (ext4_ext_insert_extent(handle, orig_inode, | ||
291 | orig_path, new_ext)) | ||
292 | goto out; | ||
293 | } | ||
294 | |||
295 | if (end_flag) { | ||
296 | get_ext_path(orig_path, orig_inode, | ||
297 | le32_to_cpu(end_ext->ee_block) - 1, err); | ||
298 | if (orig_path == NULL) | ||
299 | goto out; | ||
300 | |||
301 | if (ext4_ext_insert_extent(handle, orig_inode, | ||
302 | orig_path, end_ext)) | ||
303 | goto out; | ||
304 | } | ||
305 | out: | ||
306 | if (orig_path) { | ||
307 | ext4_ext_drop_refs(orig_path); | ||
308 | kfree(orig_path); | ||
309 | } | ||
310 | |||
311 | return err; | ||
312 | |||
313 | } | ||
314 | |||
315 | /** | ||
316 | * mext_insert_inside_block - Insert new extent to the extent block | ||
317 | * | ||
318 | * @o_start: first original extent to be moved | ||
319 | * @o_end: last original extent to be moved | ||
320 | * @start_ext: first new extent to be inserted | ||
321 | * @new_ext: middle of new extent to be inserted | ||
322 | * @end_ext: last new extent to be inserted | ||
323 | * @eh: extent header of target leaf block | ||
324 | * @range_to_move: used to decide how to insert extent | ||
325 | * | ||
326 | * Insert extents into the leaf block. The extent (@o_start) is overwritten | ||
327 | * by inserted extents. | ||
328 | */ | ||
329 | static void | ||
330 | mext_insert_inside_block(struct ext4_extent *o_start, | ||
331 | struct ext4_extent *o_end, | ||
332 | struct ext4_extent *start_ext, | ||
333 | struct ext4_extent *new_ext, | ||
334 | struct ext4_extent *end_ext, | ||
335 | struct ext4_extent_header *eh, | ||
336 | int range_to_move) | ||
337 | { | ||
338 | int i = 0; | ||
339 | unsigned long len; | ||
340 | |||
341 | /* Move the existing extents */ | ||
342 | if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) { | ||
343 | len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) - | ||
344 | (unsigned long)(o_end + 1); | ||
345 | memmove(o_end + 1 + range_to_move, o_end + 1, len); | ||
346 | } | ||
347 | |||
348 | /* Insert start entry */ | ||
349 | if (start_ext->ee_len) | ||
350 | o_start[i++].ee_len = start_ext->ee_len; | ||
351 | |||
352 | /* Insert new entry */ | ||
353 | if (new_ext->ee_len) { | ||
354 | o_start[i] = *new_ext; | ||
355 | ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); | ||
356 | } | ||
357 | |||
358 | /* Insert end entry */ | ||
359 | if (end_ext->ee_len) | ||
360 | o_start[i] = *end_ext; | ||
361 | |||
362 | /* Increment the total entries counter on the extent block */ | ||
363 | le16_add_cpu(&eh->eh_entries, range_to_move); | ||
364 | } | ||
365 | |||
366 | /** | ||
367 | * mext_insert_extents - Insert new extent | ||
368 | * | ||
369 | * @handle: journal handle | ||
370 | * @orig_inode: original inode | ||
371 | * @orig_path: path indicates first extent to be changed | ||
372 | * @o_start: first original extent to be changed | ||
373 | * @o_end: last original extent to be changed | ||
374 | * @start_ext: first new extent to be inserted | ||
375 | * @new_ext: middle of new extent to be inserted | ||
376 | * @end_ext: last new extent to be inserted | ||
377 | * | ||
378 | * Call the function to insert extents. If we cannot add more extents into | ||
379 | * the leaf block, we call mext_insert_across_blocks() to create a | ||
380 | * new leaf block. Otherwise call mext_insert_inside_block(). Return 0 | ||
381 | * on success, or a negative error value on failure. | ||
382 | */ | ||
383 | static int | ||
384 | mext_insert_extents(handle_t *handle, struct inode *orig_inode, | ||
385 | struct ext4_ext_path *orig_path, | ||
386 | struct ext4_extent *o_start, | ||
387 | struct ext4_extent *o_end, | ||
388 | struct ext4_extent *start_ext, | ||
389 | struct ext4_extent *new_ext, | ||
390 | struct ext4_extent *end_ext) | ||
391 | { | ||
392 | struct ext4_extent_header *eh; | ||
393 | unsigned long need_slots, slots_range; | ||
394 | int range_to_move, depth, ret; | ||
395 | |||
396 | /* | ||
397 | * The extents need to be inserted | ||
398 | * start_extent + new_extent + end_extent. | ||
399 | */ | ||
400 | need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) + | ||
401 | (new_ext->ee_len ? 1 : 0); | ||
402 | |||
403 | /* The number of slots between start and end */ | ||
404 | slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1) | ||
405 | / sizeof(struct ext4_extent); | ||
406 | |||
407 | /* Range to move the end of extent */ | ||
408 | range_to_move = need_slots - slots_range; | ||
409 | depth = orig_path->p_depth; | ||
410 | orig_path += depth; | ||
411 | eh = orig_path->p_hdr; | ||
412 | |||
413 | if (depth) { | ||
414 | /* Register to journal */ | ||
415 | ret = ext4_journal_get_write_access(handle, orig_path->p_bh); | ||
416 | if (ret) | ||
417 | return ret; | ||
418 | } | ||
419 | |||
420 | /* Expansion */ | ||
421 | if (range_to_move > 0 && | ||
422 | (range_to_move > le16_to_cpu(eh->eh_max) | ||
423 | - le16_to_cpu(eh->eh_entries))) { | ||
424 | |||
425 | ret = mext_insert_across_blocks(handle, orig_inode, o_start, | ||
426 | o_end, start_ext, new_ext, end_ext); | ||
427 | if (ret < 0) | ||
428 | return ret; | ||
429 | } else | ||
430 | mext_insert_inside_block(o_start, o_end, start_ext, new_ext, | ||
431 | end_ext, eh, range_to_move); | ||
432 | |||
433 | if (depth) { | ||
434 | ret = ext4_handle_dirty_metadata(handle, orig_inode, | ||
435 | orig_path->p_bh); | ||
436 | if (ret) | ||
437 | return ret; | ||
438 | } else { | ||
439 | ret = ext4_mark_inode_dirty(handle, orig_inode); | ||
440 | if (ret < 0) | ||
441 | return ret; | ||
442 | } | ||
443 | |||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | /** | ||
448 | * mext_leaf_block - Move one leaf extent block into the inode. | ||
449 | * | ||
450 | * @handle: journal handle | ||
451 | * @orig_inode: original inode | ||
452 | * @orig_path: path indicates first extent to be changed | ||
453 | * @dext: donor extent | ||
454 | * @from: start offset on the target file | ||
455 | * | ||
456 | * In order to insert extents into the leaf block, we must divide the extent | ||
457 | * in the leaf block into three extents. The one is located to be inserted | ||
458 | * extents, and the others are located around it. | ||
459 | * | ||
460 | * Therefore, this function creates structures to save extents of the leaf | ||
461 | * block, and inserts extents by calling mext_insert_extents() with | ||
462 | * created extents. Return 0 on success, or a negative error value on failure. | ||
463 | */ | ||
464 | static int | ||
465 | mext_leaf_block(handle_t *handle, struct inode *orig_inode, | ||
466 | struct ext4_ext_path *orig_path, struct ext4_extent *dext, | ||
467 | ext4_lblk_t *from) | ||
468 | { | ||
469 | struct ext4_extent *oext, *o_start, *o_end, *prev_ext; | ||
470 | struct ext4_extent new_ext, start_ext, end_ext; | ||
471 | ext4_lblk_t new_ext_end; | ||
472 | ext4_fsblk_t new_phys_end; | ||
473 | int oext_alen, new_ext_alen, end_ext_alen; | ||
474 | int depth = ext_depth(orig_inode); | ||
475 | int ret; | ||
476 | |||
477 | o_start = o_end = oext = orig_path[depth].p_ext; | ||
478 | oext_alen = ext4_ext_get_actual_len(oext); | ||
479 | start_ext.ee_len = end_ext.ee_len = 0; | ||
480 | |||
481 | new_ext.ee_block = cpu_to_le32(*from); | ||
482 | ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); | ||
483 | new_ext.ee_len = dext->ee_len; | ||
484 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | ||
485 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | ||
486 | new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1; | ||
487 | |||
488 | /* | ||
489 | * Case: original extent is first | ||
490 | * oext |--------| | ||
491 | * new_ext |--| | ||
492 | * start_ext |--| | ||
493 | */ | ||
494 | if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) && | ||
495 | le32_to_cpu(new_ext.ee_block) < | ||
496 | le32_to_cpu(oext->ee_block) + oext_alen) { | ||
497 | start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - | ||
498 | le32_to_cpu(oext->ee_block)); | ||
499 | copy_extent_status(oext, &start_ext); | ||
500 | } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { | ||
501 | prev_ext = oext - 1; | ||
502 | /* | ||
503 | * We can merge new_ext into previous extent, | ||
504 | * if these are contiguous and same extent type. | ||
505 | */ | ||
506 | if (ext4_can_extents_be_merged(orig_inode, prev_ext, | ||
507 | &new_ext)) { | ||
508 | o_start = prev_ext; | ||
509 | start_ext.ee_len = cpu_to_le16( | ||
510 | ext4_ext_get_actual_len(prev_ext) + | ||
511 | new_ext_alen); | ||
512 | copy_extent_status(prev_ext, &start_ext); | ||
513 | new_ext.ee_len = 0; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Case: new_ext_end must be less than oext | ||
519 | * oext |-----------| | ||
520 | * new_ext |-------| | ||
521 | */ | ||
522 | BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end); | ||
523 | |||
524 | /* | ||
525 | * Case: new_ext is smaller than original extent | ||
526 | * oext |---------------| | ||
527 | * new_ext |-----------| | ||
528 | * end_ext |---| | ||
529 | */ | ||
530 | if (le32_to_cpu(oext->ee_block) <= new_ext_end && | ||
531 | new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) { | ||
532 | end_ext.ee_len = | ||
533 | cpu_to_le16(le32_to_cpu(oext->ee_block) + | ||
534 | oext_alen - 1 - new_ext_end); | ||
535 | copy_extent_status(oext, &end_ext); | ||
536 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); | ||
537 | ext4_ext_store_pblock(&end_ext, | ||
538 | (ext_pblock(o_end) + oext_alen - end_ext_alen)); | ||
539 | end_ext.ee_block = | ||
540 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + | ||
541 | oext_alen - end_ext_alen); | ||
542 | } | ||
543 | |||
544 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, | ||
545 | o_end, &start_ext, &new_ext, &end_ext); | ||
546 | return ret; | ||
547 | } | ||
548 | |||
549 | /** | ||
550 | * mext_calc_swap_extents - Calculate extents for extent swapping. | ||
551 | * | ||
552 | * @tmp_dext: the extent that will belong to the original inode | ||
553 | * @tmp_oext: the extent that will belong to the donor inode | ||
554 | * @orig_off: block offset of original inode | ||
555 | * @donor_off: block offset of donor inode | ||
556 | * @max_count: the maximun length of extents | ||
557 | */ | ||
558 | static void | ||
559 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, | ||
560 | struct ext4_extent *tmp_oext, | ||
561 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, | ||
562 | ext4_lblk_t max_count) | ||
563 | { | ||
564 | ext4_lblk_t diff, orig_diff; | ||
565 | struct ext4_extent dext_old, oext_old; | ||
566 | |||
567 | dext_old = *tmp_dext; | ||
568 | oext_old = *tmp_oext; | ||
569 | |||
570 | /* When tmp_dext is too large, pick up the target range. */ | ||
571 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | ||
572 | |||
573 | ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); | ||
574 | tmp_dext->ee_block = | ||
575 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); | ||
576 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); | ||
577 | |||
578 | if (max_count < ext4_ext_get_actual_len(tmp_dext)) | ||
579 | tmp_dext->ee_len = cpu_to_le16(max_count); | ||
580 | |||
581 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); | ||
582 | ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); | ||
583 | |||
584 | /* Adjust extent length if donor extent is larger than orig */ | ||
585 | if (ext4_ext_get_actual_len(tmp_dext) > | ||
586 | ext4_ext_get_actual_len(tmp_oext) - orig_diff) | ||
587 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) - | ||
588 | orig_diff); | ||
589 | |||
590 | tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext)); | ||
591 | |||
592 | copy_extent_status(&oext_old, tmp_dext); | ||
593 | copy_extent_status(&dext_old, tmp_oext); | ||
594 | } | ||
595 | |||
596 | /** | ||
597 | * mext_replace_branches - Replace original extents with new extents | ||
598 | * | ||
599 | * @handle: journal handle | ||
600 | * @orig_inode: original inode | ||
601 | * @donor_inode: donor inode | ||
602 | * @from: block offset of orig_inode | ||
603 | * @count: block count to be replaced | ||
604 | * | ||
605 | * Replace original inode extents and donor inode extents page by page. | ||
606 | * We implement this replacement in the following three steps: | ||
607 | * 1. Save the block information of original and donor inodes into | ||
608 | * dummy extents. | ||
609 | * 2. Change the block information of original inode to point at the | ||
610 | * donor inode blocks. | ||
611 | * 3. Change the block information of donor inode to point at the saved | ||
612 | * original inode blocks in the dummy extents. | ||
613 | * | ||
614 | * Return 0 on success, or a negative error value on failure. | ||
615 | */ | ||
616 | static int | ||
617 | mext_replace_branches(handle_t *handle, struct inode *orig_inode, | ||
618 | struct inode *donor_inode, ext4_lblk_t from, | ||
619 | ext4_lblk_t count) | ||
620 | { | ||
621 | struct ext4_ext_path *orig_path = NULL; | ||
622 | struct ext4_ext_path *donor_path = NULL; | ||
623 | struct ext4_extent *oext, *dext; | ||
624 | struct ext4_extent tmp_dext, tmp_oext; | ||
625 | ext4_lblk_t orig_off = from, donor_off = from; | ||
626 | int err = 0; | ||
627 | int depth; | ||
628 | int replaced_count = 0; | ||
629 | int dext_alen; | ||
630 | |||
631 | mext_double_down_write(orig_inode, donor_inode); | ||
632 | |||
633 | /* Get the original extent for the block "orig_off" */ | ||
634 | get_ext_path(orig_path, orig_inode, orig_off, err); | ||
635 | if (orig_path == NULL) | ||
636 | goto out; | ||
637 | |||
638 | /* Get the donor extent for the head */ | ||
639 | get_ext_path(donor_path, donor_inode, donor_off, err); | ||
640 | if (donor_path == NULL) | ||
641 | goto out; | ||
642 | depth = ext_depth(orig_inode); | ||
643 | oext = orig_path[depth].p_ext; | ||
644 | tmp_oext = *oext; | ||
645 | |||
646 | depth = ext_depth(donor_inode); | ||
647 | dext = donor_path[depth].p_ext; | ||
648 | tmp_dext = *dext; | ||
649 | |||
650 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | ||
651 | donor_off, count); | ||
652 | |||
653 | /* Loop for the donor extents */ | ||
654 | while (1) { | ||
655 | /* The extent for donor must be found. */ | ||
656 | BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block)); | ||
657 | |||
658 | /* Set donor extent to orig extent */ | ||
659 | err = mext_leaf_block(handle, orig_inode, | ||
660 | orig_path, &tmp_dext, &orig_off); | ||
661 | if (err < 0) | ||
662 | goto out; | ||
663 | |||
664 | /* Set orig extent to donor extent */ | ||
665 | err = mext_leaf_block(handle, donor_inode, | ||
666 | donor_path, &tmp_oext, &donor_off); | ||
667 | if (err < 0) | ||
668 | goto out; | ||
669 | |||
670 | dext_alen = ext4_ext_get_actual_len(&tmp_dext); | ||
671 | replaced_count += dext_alen; | ||
672 | donor_off += dext_alen; | ||
673 | orig_off += dext_alen; | ||
674 | |||
675 | /* Already moved the expected blocks */ | ||
676 | if (replaced_count >= count) | ||
677 | break; | ||
678 | |||
679 | if (orig_path) | ||
680 | ext4_ext_drop_refs(orig_path); | ||
681 | get_ext_path(orig_path, orig_inode, orig_off, err); | ||
682 | if (orig_path == NULL) | ||
683 | goto out; | ||
684 | depth = ext_depth(orig_inode); | ||
685 | oext = orig_path[depth].p_ext; | ||
686 | if (le32_to_cpu(oext->ee_block) + | ||
687 | ext4_ext_get_actual_len(oext) <= orig_off) { | ||
688 | err = 0; | ||
689 | goto out; | ||
690 | } | ||
691 | tmp_oext = *oext; | ||
692 | |||
693 | if (donor_path) | ||
694 | ext4_ext_drop_refs(donor_path); | ||
695 | get_ext_path(donor_path, donor_inode, | ||
696 | donor_off, err); | ||
697 | if (donor_path == NULL) | ||
698 | goto out; | ||
699 | depth = ext_depth(donor_inode); | ||
700 | dext = donor_path[depth].p_ext; | ||
701 | if (le32_to_cpu(dext->ee_block) + | ||
702 | ext4_ext_get_actual_len(dext) <= donor_off) { | ||
703 | err = 0; | ||
704 | goto out; | ||
705 | } | ||
706 | tmp_dext = *dext; | ||
707 | |||
708 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | ||
709 | donor_off, | ||
710 | count - replaced_count); | ||
711 | } | ||
712 | |||
713 | out: | ||
714 | if (orig_path) { | ||
715 | ext4_ext_drop_refs(orig_path); | ||
716 | kfree(orig_path); | ||
717 | } | ||
718 | if (donor_path) { | ||
719 | ext4_ext_drop_refs(donor_path); | ||
720 | kfree(donor_path); | ||
721 | } | ||
722 | |||
723 | mext_double_up_write(orig_inode, donor_inode); | ||
724 | return err; | ||
725 | } | ||
726 | |||
727 | /** | ||
728 | * move_extent_per_page - Move extent data per page | ||
729 | * | ||
730 | * @o_filp: file structure of original file | ||
731 | * @donor_inode: donor inode | ||
732 | * @orig_page_offset: page index on original file | ||
733 | * @data_offset_in_page: block index where data swapping starts | ||
734 | * @block_len_in_page: the number of blocks to be swapped | ||
735 | * @uninit: orig extent is uninitialized or not | ||
736 | * | ||
737 | * Save the data in original inode blocks and replace original inode extents | ||
738 | * with donor inode extents by calling mext_replace_branches(). | ||
739 | * Finally, write out the saved data in new original inode blocks. Return 0 | ||
740 | * on success, or a negative error value on failure. | ||
741 | */ | ||
742 | static int | ||
743 | move_extent_par_page(struct file *o_filp, struct inode *donor_inode, | ||
744 | pgoff_t orig_page_offset, int data_offset_in_page, | ||
745 | int block_len_in_page, int uninit) | ||
746 | { | ||
747 | struct inode *orig_inode = o_filp->f_dentry->d_inode; | ||
748 | struct address_space *mapping = orig_inode->i_mapping; | ||
749 | struct buffer_head *bh; | ||
750 | struct page *page = NULL; | ||
751 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
752 | handle_t *handle; | ||
753 | ext4_lblk_t orig_blk_offset; | ||
754 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | ||
755 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | ||
756 | unsigned int w_flags = 0; | ||
757 | unsigned int tmp_data_len, data_len; | ||
758 | void *fsdata; | ||
759 | int ret, i, jblocks; | ||
760 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | ||
761 | |||
762 | /* | ||
763 | * It needs twice the amount of ordinary journal buffers because | ||
764 | * inode and donor_inode may change each different metadata blocks. | ||
765 | */ | ||
766 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; | ||
767 | handle = ext4_journal_start(orig_inode, jblocks); | ||
768 | if (IS_ERR(handle)) { | ||
769 | ret = PTR_ERR(handle); | ||
770 | return ret; | ||
771 | } | ||
772 | |||
773 | if (segment_eq(get_fs(), KERNEL_DS)) | ||
774 | w_flags |= AOP_FLAG_UNINTERRUPTIBLE; | ||
775 | |||
776 | orig_blk_offset = orig_page_offset * blocks_per_page + | ||
777 | data_offset_in_page; | ||
778 | |||
779 | /* | ||
780 | * If orig extent is uninitialized one, | ||
781 | * it's not necessary force the page into memory | ||
782 | * and then force it to be written out again. | ||
783 | * Just swap data blocks between orig and donor. | ||
784 | */ | ||
785 | if (uninit) { | ||
786 | ret = mext_replace_branches(handle, orig_inode, | ||
787 | donor_inode, orig_blk_offset, | ||
788 | block_len_in_page); | ||
789 | |||
790 | /* Clear the inode cache not to refer to the old data */ | ||
791 | ext4_ext_invalidate_cache(orig_inode); | ||
792 | ext4_ext_invalidate_cache(donor_inode); | ||
793 | goto out2; | ||
794 | } | ||
795 | |||
796 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | ||
797 | |||
798 | /* Calculate data_len */ | ||
799 | if ((orig_blk_offset + block_len_in_page - 1) == | ||
800 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | ||
801 | /* Replace the last block */ | ||
802 | tmp_data_len = orig_inode->i_size & (blocksize - 1); | ||
803 | /* | ||
804 | * If data_len equal zero, it shows data_len is multiples of | ||
805 | * blocksize. So we set appropriate value. | ||
806 | */ | ||
807 | if (tmp_data_len == 0) | ||
808 | tmp_data_len = blocksize; | ||
809 | |||
810 | data_len = tmp_data_len + | ||
811 | ((block_len_in_page - 1) << orig_inode->i_blkbits); | ||
812 | } else { | ||
813 | data_len = block_len_in_page << orig_inode->i_blkbits; | ||
814 | } | ||
815 | |||
816 | ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, | ||
817 | &page, &fsdata); | ||
818 | if (unlikely(ret < 0)) | ||
819 | goto out; | ||
820 | |||
821 | if (!PageUptodate(page)) { | ||
822 | mapping->a_ops->readpage(o_filp, page); | ||
823 | lock_page(page); | ||
824 | } | ||
825 | |||
826 | /* | ||
827 | * try_to_release_page() doesn't call releasepage in writeback mode. | ||
828 | * We should care about the order of writing to the same file | ||
829 | * by multiple move extent processes. | ||
830 | * It needs to call wait_on_page_writeback() to wait for the | ||
831 | * writeback of the page. | ||
832 | */ | ||
833 | if (PageWriteback(page)) | ||
834 | wait_on_page_writeback(page); | ||
835 | |||
836 | /* Release old bh and drop refs */ | ||
837 | try_to_release_page(page, 0); | ||
838 | |||
839 | ret = mext_replace_branches(handle, orig_inode, donor_inode, | ||
840 | orig_blk_offset, block_len_in_page); | ||
841 | if (ret < 0) | ||
842 | goto out; | ||
843 | |||
844 | /* Clear the inode cache not to refer to the old data */ | ||
845 | ext4_ext_invalidate_cache(orig_inode); | ||
846 | ext4_ext_invalidate_cache(donor_inode); | ||
847 | |||
848 | if (!page_has_buffers(page)) | ||
849 | create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); | ||
850 | |||
851 | bh = page_buffers(page); | ||
852 | for (i = 0; i < data_offset_in_page; i++) | ||
853 | bh = bh->b_this_page; | ||
854 | |||
855 | for (i = 0; i < block_len_in_page; i++) { | ||
856 | ret = ext4_get_block(orig_inode, | ||
857 | (sector_t)(orig_blk_offset + i), bh, 0); | ||
858 | if (ret < 0) | ||
859 | goto out; | ||
860 | |||
861 | if (bh->b_this_page != NULL) | ||
862 | bh = bh->b_this_page; | ||
863 | } | ||
864 | |||
865 | ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, | ||
866 | page, fsdata); | ||
867 | page = NULL; | ||
868 | |||
869 | out: | ||
870 | if (unlikely(page)) { | ||
871 | if (PageLocked(page)) | ||
872 | unlock_page(page); | ||
873 | page_cache_release(page); | ||
874 | } | ||
875 | out2: | ||
876 | ext4_journal_stop(handle); | ||
877 | |||
878 | return ret < 0 ? ret : 0; | ||
879 | } | ||
880 | |||
881 | /** | ||
882 | * mext_check_argumants - Check whether move extent can be done | ||
883 | * | ||
884 | * @orig_inode: original inode | ||
885 | * @donor_inode: donor inode | ||
886 | * @orig_start: logical start offset in block for orig | ||
887 | * @donor_start: logical start offset in block for donor | ||
888 | * @len: the number of blocks to be moved | ||
889 | * @moved_len: moved block length | ||
890 | * | ||
891 | * Check the arguments of ext4_move_extents() whether the files can be | ||
892 | * exchanged with each other. | ||
893 | * Return 0 on success, or a negative error value on failure. | ||
894 | */ | ||
895 | static int | ||
896 | mext_check_arguments(struct inode *orig_inode, | ||
897 | struct inode *donor_inode, __u64 orig_start, | ||
898 | __u64 donor_start, __u64 *len, __u64 moved_len) | ||
899 | { | ||
900 | /* Regular file check */ | ||
901 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | ||
902 | ext4_debug("ext4 move extent: The argument files should be " | ||
903 | "regular file [ino:orig %lu, donor %lu]\n", | ||
904 | orig_inode->i_ino, donor_inode->i_ino); | ||
905 | return -EINVAL; | ||
906 | } | ||
907 | |||
908 | /* Ext4 move extent does not support swapfile */ | ||
909 | if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { | ||
910 | ext4_debug("ext4 move extent: The argument files should " | ||
911 | "not be swapfile [ino:orig %lu, donor %lu]\n", | ||
912 | orig_inode->i_ino, donor_inode->i_ino); | ||
913 | return -EINVAL; | ||
914 | } | ||
915 | |||
916 | /* Files should be in the same ext4 FS */ | ||
917 | if (orig_inode->i_sb != donor_inode->i_sb) { | ||
918 | ext4_debug("ext4 move extent: The argument files " | ||
919 | "should be in same FS [ino:orig %lu, donor %lu]\n", | ||
920 | orig_inode->i_ino, donor_inode->i_ino); | ||
921 | return -EINVAL; | ||
922 | } | ||
923 | |||
924 | /* orig and donor should be different file */ | ||
925 | if (orig_inode->i_ino == donor_inode->i_ino) { | ||
926 | ext4_debug("ext4 move extent: The argument files should not " | ||
927 | "be same file [ino:orig %lu, donor %lu]\n", | ||
928 | orig_inode->i_ino, donor_inode->i_ino); | ||
929 | return -EINVAL; | ||
930 | } | ||
931 | |||
932 | /* Ext4 move extent supports only extent based file */ | ||
933 | if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) { | ||
934 | ext4_debug("ext4 move extent: orig file is not extents " | ||
935 | "based file [ino:orig %lu]\n", orig_inode->i_ino); | ||
936 | return -EOPNOTSUPP; | ||
937 | } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) { | ||
938 | ext4_debug("ext4 move extent: donor file is not extents " | ||
939 | "based file [ino:donor %lu]\n", donor_inode->i_ino); | ||
940 | return -EOPNOTSUPP; | ||
941 | } | ||
942 | |||
943 | if ((!orig_inode->i_size) || (!donor_inode->i_size)) { | ||
944 | ext4_debug("ext4 move extent: File size is 0 byte\n"); | ||
945 | return -EINVAL; | ||
946 | } | ||
947 | |||
948 | /* Start offset should be same */ | ||
949 | if (orig_start != donor_start) { | ||
950 | ext4_debug("ext4 move extent: orig and donor's start " | ||
951 | "offset are not same [ino:orig %lu, donor %lu]\n", | ||
952 | orig_inode->i_ino, donor_inode->i_ino); | ||
953 | return -EINVAL; | ||
954 | } | ||
955 | |||
956 | if (moved_len) { | ||
957 | ext4_debug("ext4 move extent: moved_len should be 0 " | ||
958 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, | ||
959 | donor_inode->i_ino); | ||
960 | return -EINVAL; | ||
961 | } | ||
962 | |||
963 | if ((orig_start > MAX_DEFRAG_SIZE) || | ||
964 | (donor_start > MAX_DEFRAG_SIZE) || | ||
965 | (*len > MAX_DEFRAG_SIZE) || | ||
966 | (orig_start + *len > MAX_DEFRAG_SIZE)) { | ||
967 | ext4_debug("ext4 move extent: Can't handle over [%lu] blocks " | ||
968 | "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE, | ||
969 | orig_inode->i_ino, donor_inode->i_ino); | ||
970 | return -EINVAL; | ||
971 | } | ||
972 | |||
973 | if (orig_inode->i_size > donor_inode->i_size) { | ||
974 | if (orig_start >= donor_inode->i_size) { | ||
975 | ext4_debug("ext4 move extent: orig start offset " | ||
976 | "[%llu] should be less than donor file size " | ||
977 | "[%lld] [ino:orig %lu, donor_inode %lu]\n", | ||
978 | orig_start, donor_inode->i_size, | ||
979 | orig_inode->i_ino, donor_inode->i_ino); | ||
980 | return -EINVAL; | ||
981 | } | ||
982 | |||
983 | if (orig_start + *len > donor_inode->i_size) { | ||
984 | ext4_debug("ext4 move extent: End offset [%llu] should " | ||
985 | "be less than donor file size [%lld]." | ||
986 | "So adjust length from %llu to %lld " | ||
987 | "[ino:orig %lu, donor %lu]\n", | ||
988 | orig_start + *len, donor_inode->i_size, | ||
989 | *len, donor_inode->i_size - orig_start, | ||
990 | orig_inode->i_ino, donor_inode->i_ino); | ||
991 | *len = donor_inode->i_size - orig_start; | ||
992 | } | ||
993 | } else { | ||
994 | if (orig_start >= orig_inode->i_size) { | ||
995 | ext4_debug("ext4 move extent: start offset [%llu] " | ||
996 | "should be less than original file size " | ||
997 | "[%lld] [inode:orig %lu, donor %lu]\n", | ||
998 | orig_start, orig_inode->i_size, | ||
999 | orig_inode->i_ino, donor_inode->i_ino); | ||
1000 | return -EINVAL; | ||
1001 | } | ||
1002 | |||
1003 | if (orig_start + *len > orig_inode->i_size) { | ||
1004 | ext4_debug("ext4 move extent: Adjust length " | ||
1005 | "from %llu to %lld. Because it should be " | ||
1006 | "less than original file size " | ||
1007 | "[ino:orig %lu, donor %lu]\n", | ||
1008 | *len, orig_inode->i_size - orig_start, | ||
1009 | orig_inode->i_ino, donor_inode->i_ino); | ||
1010 | *len = orig_inode->i_size - orig_start; | ||
1011 | } | ||
1012 | } | ||
1013 | |||
1014 | if (!*len) { | ||
1015 | ext4_debug("ext4 move extent: len shoudld not be 0 " | ||
1016 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, | ||
1017 | donor_inode->i_ino); | ||
1018 | return -EINVAL; | ||
1019 | } | ||
1020 | |||
1021 | return 0; | ||
1022 | } | ||
1023 | |||
1024 | /** | ||
1025 | * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2 | ||
1026 | * | ||
1027 | * @inode1: the inode structure | ||
1028 | * @inode2: the inode structure | ||
1029 | * | ||
1030 | * Lock two inodes' i_mutex by i_ino order. This function is moved from | ||
1031 | * fs/inode.c. | ||
1032 | */ | ||
1033 | static void | ||
1034 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | ||
1035 | { | ||
1036 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | ||
1037 | if (inode1) | ||
1038 | mutex_lock(&inode1->i_mutex); | ||
1039 | else if (inode2) | ||
1040 | mutex_lock(&inode2->i_mutex); | ||
1041 | return; | ||
1042 | } | ||
1043 | |||
1044 | if (inode1->i_ino < inode2->i_ino) { | ||
1045 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | ||
1046 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | ||
1047 | } else { | ||
1048 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | ||
1049 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | ||
1050 | } | ||
1051 | } | ||
1052 | |||
1053 | /** | ||
1054 | * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2 | ||
1055 | * | ||
1056 | * @inode1: the inode that is released first | ||
1057 | * @inode2: the inode that is released second | ||
1058 | * | ||
1059 | * This function is moved from fs/inode.c. | ||
1060 | */ | ||
1061 | |||
1062 | static void | ||
1063 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | ||
1064 | { | ||
1065 | if (inode1) | ||
1066 | mutex_unlock(&inode1->i_mutex); | ||
1067 | |||
1068 | if (inode2 && inode2 != inode1) | ||
1069 | mutex_unlock(&inode2->i_mutex); | ||
1070 | } | ||
1071 | |||
1072 | /** | ||
1073 | * ext4_move_extents - Exchange the specified range of a file | ||
1074 | * | ||
1075 | * @o_filp: file structure of the original file | ||
1076 | * @d_filp: file structure of the donor file | ||
1077 | * @orig_start: start offset in block for orig | ||
1078 | * @donor_start: start offset in block for donor | ||
1079 | * @len: the number of blocks to be moved | ||
1080 | * @moved_len: moved block length | ||
1081 | * | ||
1082 | * This function returns 0 and moved block length is set in moved_len | ||
1083 | * if succeed, otherwise returns error value. | ||
1084 | * | ||
1085 | * Note: ext4_move_extents() proceeds the following order. | ||
1086 | * 1:ext4_move_extents() calculates the last block number of moving extent | ||
1087 | * function by the start block number (orig_start) and the number of blocks | ||
1088 | * to be moved (len) specified as arguments. | ||
1089 | * If the {orig, donor}_start points a hole, the extent's start offset | ||
1090 | * pointed by ext_cur (current extent), holecheck_path, orig_path are set | ||
1091 | * after hole behind. | ||
1092 | * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent | ||
1093 | * or the ext_cur exceeds the block_end which is last logical block number. | ||
1094 | * 3:To get the length of continues area, call mext_next_extent() | ||
1095 | * specified with the ext_cur (initial value is holecheck_path) re-cursive, | ||
1096 | * until find un-continuous extent, the start logical block number exceeds | ||
1097 | * the block_end or the extent points to the last extent. | ||
1098 | * 4:Exchange the original inode data with donor inode data | ||
1099 | * from orig_page_offset to seq_end_page. | ||
1100 | * The start indexes of data are specified as arguments. | ||
1101 | * That of the original inode is orig_page_offset, | ||
1102 | * and the donor inode is also orig_page_offset | ||
1103 | * (To easily handle blocksize != pagesize case, the offset for the | ||
1104 | * donor inode is block unit). | ||
1105 | * 5:Update holecheck_path and orig_path to points a next proceeding extent, | ||
1106 | * then returns to step 2. | ||
1107 | * 6:Release holecheck_path, orig_path and set the len to moved_len | ||
1108 | * which shows the number of moved blocks. | ||
1109 | * The moved_len is useful for the command to calculate the file offset | ||
1110 | * for starting next move extent ioctl. | ||
1111 | * 7:Return 0 on success, or a negative error value on failure. | ||
1112 | */ | ||
1113 | int | ||
1114 | ext4_move_extents(struct file *o_filp, struct file *d_filp, | ||
1115 | __u64 orig_start, __u64 donor_start, __u64 len, | ||
1116 | __u64 *moved_len) | ||
1117 | { | ||
1118 | struct inode *orig_inode = o_filp->f_dentry->d_inode; | ||
1119 | struct inode *donor_inode = d_filp->f_dentry->d_inode; | ||
1120 | struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; | ||
1121 | struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; | ||
1122 | ext4_lblk_t block_start = orig_start; | ||
1123 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | ||
1124 | ext4_lblk_t rest_blocks; | ||
1125 | pgoff_t orig_page_offset = 0, seq_end_page; | ||
1126 | int ret, depth, last_extent = 0; | ||
1127 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | ||
1128 | int data_offset_in_page; | ||
1129 | int block_len_in_page; | ||
1130 | int uninit; | ||
1131 | |||
1132 | /* protect orig and donor against a truncate */ | ||
1133 | mext_inode_double_lock(orig_inode, donor_inode); | ||
1134 | |||
1135 | mext_double_down_read(orig_inode, donor_inode); | ||
1136 | /* Check the filesystem environment whether move_extent can be done */ | ||
1137 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, | ||
1138 | donor_start, &len, *moved_len); | ||
1139 | mext_double_up_read(orig_inode, donor_inode); | ||
1140 | if (ret) | ||
1141 | goto out2; | ||
1142 | |||
1143 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | ||
1144 | block_end = block_start + len - 1; | ||
1145 | if (file_end < block_end) | ||
1146 | len -= block_end - file_end; | ||
1147 | |||
1148 | get_ext_path(orig_path, orig_inode, block_start, ret); | ||
1149 | if (orig_path == NULL) | ||
1150 | goto out2; | ||
1151 | |||
1152 | /* Get path structure to check the hole */ | ||
1153 | get_ext_path(holecheck_path, orig_inode, block_start, ret); | ||
1154 | if (holecheck_path == NULL) | ||
1155 | goto out; | ||
1156 | |||
1157 | depth = ext_depth(orig_inode); | ||
1158 | ext_cur = holecheck_path[depth].p_ext; | ||
1159 | if (ext_cur == NULL) { | ||
1160 | ret = -EINVAL; | ||
1161 | goto out; | ||
1162 | } | ||
1163 | |||
1164 | /* | ||
1165 | * Get proper extent whose ee_block is beyond block_start | ||
1166 | * if block_start was within the hole. | ||
1167 | */ | ||
1168 | if (le32_to_cpu(ext_cur->ee_block) + | ||
1169 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { | ||
1170 | last_extent = mext_next_extent(orig_inode, | ||
1171 | holecheck_path, &ext_cur); | ||
1172 | if (last_extent < 0) { | ||
1173 | ret = last_extent; | ||
1174 | goto out; | ||
1175 | } | ||
1176 | last_extent = mext_next_extent(orig_inode, orig_path, | ||
1177 | &ext_dummy); | ||
1178 | if (last_extent < 0) { | ||
1179 | ret = last_extent; | ||
1180 | goto out; | ||
1181 | } | ||
1182 | } | ||
1183 | seq_start = block_start; | ||
1184 | |||
1185 | /* No blocks within the specified range. */ | ||
1186 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | ||
1187 | ext4_debug("ext4 move extent: The specified range of file " | ||
1188 | "may be the hole\n"); | ||
1189 | ret = -EINVAL; | ||
1190 | goto out; | ||
1191 | } | ||
1192 | |||
1193 | /* Adjust start blocks */ | ||
1194 | add_blocks = min(le32_to_cpu(ext_cur->ee_block) + | ||
1195 | ext4_ext_get_actual_len(ext_cur), block_end + 1) - | ||
1196 | max(le32_to_cpu(ext_cur->ee_block), block_start); | ||
1197 | |||
1198 | while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) { | ||
1199 | seq_blocks += add_blocks; | ||
1200 | |||
1201 | /* Adjust tail blocks */ | ||
1202 | if (seq_start + seq_blocks - 1 > block_end) | ||
1203 | seq_blocks = block_end - seq_start + 1; | ||
1204 | |||
1205 | ext_prev = ext_cur; | ||
1206 | last_extent = mext_next_extent(orig_inode, holecheck_path, | ||
1207 | &ext_cur); | ||
1208 | if (last_extent < 0) { | ||
1209 | ret = last_extent; | ||
1210 | break; | ||
1211 | } | ||
1212 | add_blocks = ext4_ext_get_actual_len(ext_cur); | ||
1213 | |||
1214 | /* | ||
1215 | * Extend the length of contiguous block (seq_blocks) | ||
1216 | * if extents are contiguous. | ||
1217 | */ | ||
1218 | if (ext4_can_extents_be_merged(orig_inode, | ||
1219 | ext_prev, ext_cur) && | ||
1220 | block_end >= le32_to_cpu(ext_cur->ee_block) && | ||
1221 | !last_extent) | ||
1222 | continue; | ||
1223 | |||
1224 | /* Is original extent is uninitialized */ | ||
1225 | uninit = ext4_ext_is_uninitialized(ext_prev); | ||
1226 | |||
1227 | data_offset_in_page = seq_start % blocks_per_page; | ||
1228 | |||
1229 | /* | ||
1230 | * Calculate data blocks count that should be swapped | ||
1231 | * at the first page. | ||
1232 | */ | ||
1233 | if (data_offset_in_page + seq_blocks > blocks_per_page) { | ||
1234 | /* Swapped blocks are across pages */ | ||
1235 | block_len_in_page = | ||
1236 | blocks_per_page - data_offset_in_page; | ||
1237 | } else { | ||
1238 | /* Swapped blocks are in a page */ | ||
1239 | block_len_in_page = seq_blocks; | ||
1240 | } | ||
1241 | |||
1242 | orig_page_offset = seq_start >> | ||
1243 | (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); | ||
1244 | seq_end_page = (seq_start + seq_blocks - 1) >> | ||
1245 | (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); | ||
1246 | seq_start = le32_to_cpu(ext_cur->ee_block); | ||
1247 | rest_blocks = seq_blocks; | ||
1248 | |||
1249 | /* Discard preallocations of two inodes */ | ||
1250 | down_write(&EXT4_I(orig_inode)->i_data_sem); | ||
1251 | ext4_discard_preallocations(orig_inode); | ||
1252 | up_write(&EXT4_I(orig_inode)->i_data_sem); | ||
1253 | |||
1254 | down_write(&EXT4_I(donor_inode)->i_data_sem); | ||
1255 | ext4_discard_preallocations(donor_inode); | ||
1256 | up_write(&EXT4_I(donor_inode)->i_data_sem); | ||
1257 | |||
1258 | while (orig_page_offset <= seq_end_page) { | ||
1259 | |||
1260 | /* Swap original branches with new branches */ | ||
1261 | ret = move_extent_par_page(o_filp, donor_inode, | ||
1262 | orig_page_offset, | ||
1263 | data_offset_in_page, | ||
1264 | block_len_in_page, uninit); | ||
1265 | if (ret < 0) | ||
1266 | goto out; | ||
1267 | orig_page_offset++; | ||
1268 | /* Count how many blocks we have exchanged */ | ||
1269 | *moved_len += block_len_in_page; | ||
1270 | BUG_ON(*moved_len > len); | ||
1271 | |||
1272 | data_offset_in_page = 0; | ||
1273 | rest_blocks -= block_len_in_page; | ||
1274 | if (rest_blocks > blocks_per_page) | ||
1275 | block_len_in_page = blocks_per_page; | ||
1276 | else | ||
1277 | block_len_in_page = rest_blocks; | ||
1278 | } | ||
1279 | |||
1280 | /* Decrease buffer counter */ | ||
1281 | if (holecheck_path) | ||
1282 | ext4_ext_drop_refs(holecheck_path); | ||
1283 | get_ext_path(holecheck_path, orig_inode, | ||
1284 | seq_start, ret); | ||
1285 | if (holecheck_path == NULL) | ||
1286 | break; | ||
1287 | depth = holecheck_path->p_depth; | ||
1288 | |||
1289 | /* Decrease buffer counter */ | ||
1290 | if (orig_path) | ||
1291 | ext4_ext_drop_refs(orig_path); | ||
1292 | get_ext_path(orig_path, orig_inode, seq_start, ret); | ||
1293 | if (orig_path == NULL) | ||
1294 | break; | ||
1295 | |||
1296 | ext_cur = holecheck_path[depth].p_ext; | ||
1297 | add_blocks = ext4_ext_get_actual_len(ext_cur); | ||
1298 | seq_blocks = 0; | ||
1299 | |||
1300 | } | ||
1301 | out: | ||
1302 | if (orig_path) { | ||
1303 | ext4_ext_drop_refs(orig_path); | ||
1304 | kfree(orig_path); | ||
1305 | } | ||
1306 | if (holecheck_path) { | ||
1307 | ext4_ext_drop_refs(holecheck_path); | ||
1308 | kfree(holecheck_path); | ||
1309 | } | ||
1310 | out2: | ||
1311 | mext_inode_double_unlock(orig_inode, donor_inode); | ||
1312 | |||
1313 | if (ret) | ||
1314 | return ret; | ||
1315 | |||
1316 | /* All of the specified blocks must be exchanged in succeed */ | ||
1317 | BUG_ON(*moved_len != len); | ||
1318 | |||
1319 | return 0; | ||
1320 | } | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 07eb6649e4fa..de04013d16ff 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1782,7 +1782,7 @@ retry: | |||
1782 | if (IS_DIRSYNC(dir)) | 1782 | if (IS_DIRSYNC(dir)) |
1783 | ext4_handle_sync(handle); | 1783 | ext4_handle_sync(handle); |
1784 | 1784 | ||
1785 | inode = ext4_new_inode (handle, dir, mode); | 1785 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); |
1786 | err = PTR_ERR(inode); | 1786 | err = PTR_ERR(inode); |
1787 | if (!IS_ERR(inode)) { | 1787 | if (!IS_ERR(inode)) { |
1788 | inode->i_op = &ext4_file_inode_operations; | 1788 | inode->i_op = &ext4_file_inode_operations; |
@@ -1816,7 +1816,7 @@ retry: | |||
1816 | if (IS_DIRSYNC(dir)) | 1816 | if (IS_DIRSYNC(dir)) |
1817 | ext4_handle_sync(handle); | 1817 | ext4_handle_sync(handle); |
1818 | 1818 | ||
1819 | inode = ext4_new_inode(handle, dir, mode); | 1819 | inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); |
1820 | err = PTR_ERR(inode); | 1820 | err = PTR_ERR(inode); |
1821 | if (!IS_ERR(inode)) { | 1821 | if (!IS_ERR(inode)) { |
1822 | init_special_inode(inode, inode->i_mode, rdev); | 1822 | init_special_inode(inode, inode->i_mode, rdev); |
@@ -1853,7 +1853,8 @@ retry: | |||
1853 | if (IS_DIRSYNC(dir)) | 1853 | if (IS_DIRSYNC(dir)) |
1854 | ext4_handle_sync(handle); | 1854 | ext4_handle_sync(handle); |
1855 | 1855 | ||
1856 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode); | 1856 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode, |
1857 | &dentry->d_name, 0); | ||
1857 | err = PTR_ERR(inode); | 1858 | err = PTR_ERR(inode); |
1858 | if (IS_ERR(inode)) | 1859 | if (IS_ERR(inode)) |
1859 | goto out_stop; | 1860 | goto out_stop; |
@@ -2264,7 +2265,8 @@ retry: | |||
2264 | if (IS_DIRSYNC(dir)) | 2265 | if (IS_DIRSYNC(dir)) |
2265 | ext4_handle_sync(handle); | 2266 | ext4_handle_sync(handle); |
2266 | 2267 | ||
2267 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); | 2268 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, |
2269 | &dentry->d_name, 0); | ||
2268 | err = PTR_ERR(inode); | 2270 | err = PTR_ERR(inode); |
2269 | if (IS_ERR(inode)) | 2271 | if (IS_ERR(inode)) |
2270 | goto out_stop; | 2272 | goto out_stop; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 012c4251397e..23013d303f81 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include <linux/seq_file.h> | 37 | #include <linux/seq_file.h> |
38 | #include <linux/proc_fs.h> | 38 | #include <linux/proc_fs.h> |
39 | #include <linux/ctype.h> | 39 | #include <linux/ctype.h> |
40 | #include <linux/marker.h> | ||
41 | #include <linux/log2.h> | 40 | #include <linux/log2.h> |
42 | #include <linux/crc16.h> | 41 | #include <linux/crc16.h> |
43 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
@@ -47,6 +46,9 @@ | |||
47 | #include "xattr.h" | 46 | #include "xattr.h" |
48 | #include "acl.h" | 47 | #include "acl.h" |
49 | 48 | ||
49 | #define CREATE_TRACE_POINTS | ||
50 | #include <trace/events/ext4.h> | ||
51 | |||
50 | static int default_mb_history_length = 1000; | 52 | static int default_mb_history_length = 1000; |
51 | 53 | ||
52 | module_param_named(default_mb_history_length, default_mb_history_length, | 54 | module_param_named(default_mb_history_length, default_mb_history_length, |
@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb) | |||
301 | if (!test_opt(sb, ERRORS_CONT)) { | 303 | if (!test_opt(sb, ERRORS_CONT)) { |
302 | journal_t *journal = EXT4_SB(sb)->s_journal; | 304 | journal_t *journal = EXT4_SB(sb)->s_journal; |
303 | 305 | ||
304 | EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; | 306 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; |
305 | if (journal) | 307 | if (journal) |
306 | jbd2_journal_abort(journal, -EIO); | 308 | jbd2_journal_abort(journal, -EIO); |
307 | } | 309 | } |
@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function, | |||
414 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 416 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
415 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 417 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
416 | sb->s_flags |= MS_RDONLY; | 418 | sb->s_flags |= MS_RDONLY; |
417 | EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; | 419 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; |
418 | if (EXT4_SB(sb)->s_journal) | 420 | if (EXT4_SB(sb)->s_journal) |
419 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | 421 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); |
420 | } | 422 | } |
@@ -1474,7 +1476,7 @@ set_qf_format: | |||
1474 | break; | 1476 | break; |
1475 | #endif | 1477 | #endif |
1476 | case Opt_abort: | 1478 | case Opt_abort: |
1477 | set_opt(sbi->s_mount_opt, ABORT); | 1479 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; |
1478 | break; | 1480 | break; |
1479 | case Opt_nobarrier: | 1481 | case Opt_nobarrier: |
1480 | clear_opt(sbi->s_mount_opt, BARRIER); | 1482 | clear_opt(sbi->s_mount_opt, BARRIER); |
@@ -1653,7 +1655,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1653 | ext4_commit_super(sb, 1); | 1655 | ext4_commit_super(sb, 1); |
1654 | if (test_opt(sb, DEBUG)) | 1656 | if (test_opt(sb, DEBUG)) |
1655 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " | 1657 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " |
1656 | "bpg=%lu, ipg=%lu, mo=%04lx]\n", | 1658 | "bpg=%lu, ipg=%lu, mo=%04x]\n", |
1657 | sb->s_blocksize, | 1659 | sb->s_blocksize, |
1658 | sbi->s_groups_count, | 1660 | sbi->s_groups_count, |
1659 | EXT4_BLOCKS_PER_GROUP(sb), | 1661 | EXT4_BLOCKS_PER_GROUP(sb), |
@@ -2204,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes); | |||
2204 | EXT4_RO_ATTR(lifetime_write_kbytes); | 2206 | EXT4_RO_ATTR(lifetime_write_kbytes); |
2205 | EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, | 2207 | EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, |
2206 | inode_readahead_blks_store, s_inode_readahead_blks); | 2208 | inode_readahead_blks_store, s_inode_readahead_blks); |
2209 | EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); | ||
2207 | EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); | 2210 | EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); |
2208 | EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); | 2211 | EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); |
2209 | EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); | 2212 | EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); |
@@ -2216,6 +2219,7 @@ static struct attribute *ext4_attrs[] = { | |||
2216 | ATTR_LIST(session_write_kbytes), | 2219 | ATTR_LIST(session_write_kbytes), |
2217 | ATTR_LIST(lifetime_write_kbytes), | 2220 | ATTR_LIST(lifetime_write_kbytes), |
2218 | ATTR_LIST(inode_readahead_blks), | 2221 | ATTR_LIST(inode_readahead_blks), |
2222 | ATTR_LIST(inode_goal), | ||
2219 | ATTR_LIST(mb_stats), | 2223 | ATTR_LIST(mb_stats), |
2220 | ATTR_LIST(mb_max_to_scan), | 2224 | ATTR_LIST(mb_max_to_scan), |
2221 | ATTR_LIST(mb_min_to_scan), | 2225 | ATTR_LIST(mb_min_to_scan), |
@@ -3346,7 +3350,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
3346 | int ret = 0; | 3350 | int ret = 0; |
3347 | tid_t target; | 3351 | tid_t target; |
3348 | 3352 | ||
3349 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | 3353 | trace_ext4_sync_fs(sb, wait); |
3350 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { | 3354 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { |
3351 | if (wait) | 3355 | if (wait) |
3352 | jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); | 3356 | jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); |
@@ -3450,7 +3454,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3450 | goto restore_opts; | 3454 | goto restore_opts; |
3451 | } | 3455 | } |
3452 | 3456 | ||
3453 | if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) | 3457 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) |
3454 | ext4_abort(sb, __func__, "Abort forced by user"); | 3458 | ext4_abort(sb, __func__, "Abort forced by user"); |
3455 | 3459 | ||
3456 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 3460 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
@@ -3465,7 +3469,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3465 | 3469 | ||
3466 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || | 3470 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || |
3467 | n_blocks_count > ext4_blocks_count(es)) { | 3471 | n_blocks_count > ext4_blocks_count(es)) { |
3468 | if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { | 3472 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { |
3469 | err = -EROFS; | 3473 | err = -EROFS; |
3470 | goto restore_opts; | 3474 | goto restore_opts; |
3471 | } | 3475 | } |