aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-18 17:07:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-18 17:07:46 -0400
commit0732f87761dbe417cb6e084b712d07e879e876ef (patch)
treeafed6ca0368fd3e121fd4f43b11e32aa1e5139c0
parent15fc204afc6feb915c400159546f646eca8ba1d9 (diff)
parent536fc240e7147858255bdb08e7a999a3351a9fb4 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: jbd2: clean up jbd2_journal_try_to_free_buffers() ext4: Don't update ctime for non-extent-mapped inodes ext4: Fix up whitespace issues in fs/ext4/inode.c ext4: Fix 64-bit block type problem on 32-bit platforms ext4: teach the inode allocator to use a goal inode number ext4: Use a hash of the topdir directory name for the Orlov parent group ext4: document the "abort" mount option ext4: move the abort flag from s_mount_opts to s_mount_flags ext4: update the s_last_mounted field in the superblock ext4: change s_mount_opt to be an unsigned int ext4: online defrag -- Add EXT4_IOC_MOVE_EXT ioctl ext4: avoid unnecessary spinlock in critical POSIX ACL path ext3: avoid unnecessary spinlock in critical POSIX ACL path ext4: convert instrumentation from markers to tracepoints jbd2: convert instrumentation from markers to tracepoints
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext410
-rw-r--r--Documentation/filesystems/ext4.txt4
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/ext4.h39
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/file.c36
-rw-r--r--fs/ext4/fsync.c8
-rw-r--r--fs/ext4/ialloc.c48
-rw-r--r--fs/ext4/inode.c281
-rw-r--r--fs/ext4/ioctl.c36
-rw-r--r--fs/ext4/mballoc.c85
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/migrate.c8
-rw-r--r--fs/ext4/move_extent.c1320
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/super.c20
-rw-r--r--fs/jbd2/checkpoint.c5
-rw-r--r--fs/jbd2/commit.c13
-rw-r--r--fs/jbd2/journal.c69
-rw-r--r--fs/jbd2/transaction.c49
-rw-r--r--include/linux/jbd2.h6
-rw-r--r--include/trace/events/ext4.h719
-rw-r--r--include/trace/events/jbd2.h168
24 files changed, 2621 insertions, 324 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
index 4e79074de282..5fb709997d96 100644
--- a/Documentation/ABI/testing/sysfs-fs-ext4
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -79,3 +79,13 @@ Description:
79 This file is read-only and shows the number of 79 This file is read-only and shows the number of
80 kilobytes of data that have been written to this 80 kilobytes of data that have been written to this
81 filesystem since it was mounted. 81 filesystem since it was mounted.
82
83What: /sys/fs/ext4/<disk>/inode_goal
84Date: June 2008
85Contact: "Theodore Ts'o" <tytso@mit.edu>
86Description:
87 Tuning parameter which (if non-zero) controls the goal
88 inode used by the inode allocator in p0reference to
89 all other allocation hueristics. This is intended for
90 debugging use only, and should be 0 on production
91 systems.
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 608fdba97b72..7be02ac5fa36 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -235,6 +235,10 @@ minixdf Make 'df' act like Minix.
235 235
236debug Extra debugging information is sent to syslog. 236debug Extra debugging information is sent to syslog.
237 237
238abort Simulate the effects of calling ext4_abort() for
239 debugging purposes. This is normally used while
240 remounting a filesystem which is already mounted.
241
238errors=remount-ro Remount the filesystem read-only on an error. 242errors=remount-ro Remount the filesystem read-only on an error.
239errors=continue Keep going on a filesystem error. 243errors=continue Keep going on a filesystem error.
240errors=panic Panic and halt the machine if an error occurs. 244errors=panic Panic and halt the machine if an error occurs.
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8a34710ecf40..8867b2a1e5fe 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
10 10
11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7d5edc38c9..17b9998680e3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -352,6 +352,7 @@ struct ext4_new_group_data {
352 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ 352 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
353 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 353 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
354#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 354#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
355#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
355 356
356/* 357/*
357 * ioctl commands in 32 bit emulation 358 * ioctl commands in 32 bit emulation
@@ -447,6 +448,15 @@ struct ext4_inode {
447 __le32 i_version_hi; /* high 32 bits for 64-bit version */ 448 __le32 i_version_hi; /* high 32 bits for 64-bit version */
448}; 449};
449 450
451struct move_extent {
452 __u32 reserved; /* should be zero */
453 __u32 donor_fd; /* donor file descriptor */
454 __u64 orig_start; /* logical start offset in block for orig */
455 __u64 donor_start; /* logical start offset in block for donor */
456 __u64 len; /* block length to be moved */
457 __u64 moved_len; /* moved block length */
458};
459#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
450 460
451#define EXT4_EPOCH_BITS 2 461#define EXT4_EPOCH_BITS 2
452#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) 462#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
@@ -674,7 +684,6 @@ struct ext4_inode_info {
674#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ 684#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
675#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ 685#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
676#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ 686#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
677#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
678#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ 687#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
679#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ 688#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
680#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ 689#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
@@ -696,17 +705,10 @@ struct ext4_inode_info {
696#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 705#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
697#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 706#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
698 707
699/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
700#ifndef _LINUX_EXT2_FS_H
701#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 708#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
702#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 709#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
703#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ 710#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
704 EXT4_MOUNT_##opt) 711 EXT4_MOUNT_##opt)
705#else
706#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
707#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
708#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
709#endif
710 712
711#define ext4_set_bit ext2_set_bit 713#define ext4_set_bit ext2_set_bit
712#define ext4_set_bit_atomic ext2_set_bit_atomic 714#define ext4_set_bit_atomic ext2_set_bit_atomic
@@ -824,6 +826,13 @@ struct ext4_super_block {
824}; 826};
825 827
826#ifdef __KERNEL__ 828#ifdef __KERNEL__
829
830/*
831 * run-time mount flags
832 */
833#define EXT4_MF_MNTDIR_SAMPLED 0x0001
834#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
835
827/* 836/*
828 * fourth extended-fs super-block data in memory 837 * fourth extended-fs super-block data in memory
829 */ 838 */
@@ -842,7 +851,8 @@ struct ext4_sb_info {
842 struct buffer_head * s_sbh; /* Buffer containing the super block */ 851 struct buffer_head * s_sbh; /* Buffer containing the super block */
843 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 852 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
844 struct buffer_head **s_group_desc; 853 struct buffer_head **s_group_desc;
845 unsigned long s_mount_opt; 854 unsigned int s_mount_opt;
855 unsigned int s_mount_flags;
846 ext4_fsblk_t s_sb_block; 856 ext4_fsblk_t s_sb_block;
847 uid_t s_resuid; 857 uid_t s_resuid;
848 gid_t s_resgid; 858 gid_t s_resgid;
@@ -853,6 +863,7 @@ struct ext4_sb_info {
853 int s_inode_size; 863 int s_inode_size;
854 int s_first_ino; 864 int s_first_ino;
855 unsigned int s_inode_readahead_blks; 865 unsigned int s_inode_readahead_blks;
866 unsigned int s_inode_goal;
856 spinlock_t s_next_gen_lock; 867 spinlock_t s_next_gen_lock;
857 u32 s_next_generation; 868 u32 s_next_generation;
858 u32 s_hash_seed[4]; 869 u32 s_hash_seed[4];
@@ -1305,7 +1316,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1305 dx_hash_info *hinfo); 1316 dx_hash_info *hinfo);
1306 1317
1307/* ialloc.c */ 1318/* ialloc.c */
1308extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); 1319extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
1320 const struct qstr *qstr, __u32 goal);
1309extern void ext4_free_inode(handle_t *, struct inode *); 1321extern void ext4_free_inode(handle_t *, struct inode *);
1310extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 1322extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1311extern unsigned long ext4_count_free_inodes(struct super_block *); 1323extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1329,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *);
1329extern int __init init_ext4_mballoc(void); 1341extern int __init init_ext4_mballoc(void);
1330extern void exit_ext4_mballoc(void); 1342extern void exit_ext4_mballoc(void);
1331extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1343extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1332 unsigned long, unsigned long, int, unsigned long *); 1344 ext4_fsblk_t, unsigned long, int, unsigned long *);
1333extern int ext4_mb_add_groupinfo(struct super_block *sb, 1345extern int ext4_mb_add_groupinfo(struct super_block *sb,
1334 ext4_group_t i, struct ext4_group_desc *desc); 1346 ext4_group_t i, struct ext4_group_desc *desc);
1335extern void ext4_mb_update_group_info(struct ext4_group_info *grp, 1347extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
@@ -1647,6 +1659,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1647 struct buffer_head *bh, int flags); 1659 struct buffer_head *bh, int flags);
1648extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1660extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1649 __u64 start, __u64 len); 1661 __u64 start, __u64 len);
1662/* move_extent.c */
1663extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
1664 __u64 start_orig, __u64 start_donor,
1665 __u64 len, __u64 *moved_len);
1666
1650 1667
1651/* 1668/*
1652 * Add new method to test wether block and inode bitmaps are properly 1669 * Add new method to test wether block and inode bitmaps are properly
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index f0c3ec85bd48..20a84105a10b 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
221} 221}
222 222
223extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 223extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
224extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
224extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 225extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
225extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 226extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
226extern int ext4_extent_tree_init(handle_t *, struct inode *); 227extern int ext4_extent_tree_init(handle_t *, struct inode *);
227extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 228extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
228 int num, 229 int num,
229 struct ext4_ext_path *path); 230 struct ext4_ext_path *path);
231extern int ext4_can_extents_be_merged(struct inode *inode,
232 struct ext4_extent *ex1,
233 struct ext4_extent *ex2);
230extern int ext4_ext_try_to_merge(struct inode *inode, 234extern int ext4_ext_try_to_merge(struct inode *inode,
231 struct ext4_ext_path *path, 235 struct ext4_ext_path *path,
232 struct ext4_extent *); 236 struct ext4_extent *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2593f748c3a4..50322a09bd01 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -49,7 +49,7 @@
49 * ext_pblock: 49 * ext_pblock:
50 * combine low and high parts of physical block number into ext4_fsblk_t 50 * combine low and high parts of physical block number into ext4_fsblk_t
51 */ 51 */
52static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) 52ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
53{ 53{
54 ext4_fsblk_t block; 54 ext4_fsblk_t block;
55 55
@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1417 return err; 1417 return err;
1418} 1418}
1419 1419
1420static int 1420int
1421ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1421ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1422 struct ext4_extent *ex2) 1422 struct ext4_extent *ex2)
1423{ 1423{
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 588af8c77246..3f1873fef1c6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,6 +21,8 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/mount.h>
25#include <linux/path.h>
24#include "ext4.h" 26#include "ext4.h"
25#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
26#include "xattr.h" 28#include "xattr.h"
@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
145 return 0; 147 return 0;
146} 148}
147 149
150static int ext4_file_open(struct inode * inode, struct file * filp)
151{
152 struct super_block *sb = inode->i_sb;
153 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
154 struct vfsmount *mnt = filp->f_path.mnt;
155 struct path path;
156 char buf[64], *cp;
157
158 if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
159 !(sb->s_flags & MS_RDONLY))) {
160 sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
161 /*
162 * Sample where the filesystem has been mounted and
163 * store it in the superblock for sysadmin convenience
164 * when trying to sort through large numbers of block
165 * devices or filesystem images.
166 */
167 memset(buf, 0, sizeof(buf));
168 path.mnt = mnt->mnt_parent;
169 path.dentry = mnt->mnt_mountpoint;
170 path_get(&path);
171 cp = d_path(&path, buf, sizeof(buf));
172 path_put(&path);
173 if (!IS_ERR(cp)) {
174 memcpy(sbi->s_es->s_last_mounted, cp,
175 sizeof(sbi->s_es->s_last_mounted));
176 sb->s_dirt = 1;
177 }
178 }
179 return generic_file_open(inode, filp);
180}
181
148const struct file_operations ext4_file_operations = { 182const struct file_operations ext4_file_operations = {
149 .llseek = generic_file_llseek, 183 .llseek = generic_file_llseek,
150 .read = do_sync_read, 184 .read = do_sync_read,
@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = {
156 .compat_ioctl = ext4_compat_ioctl, 190 .compat_ioctl = ext4_compat_ioctl,
157#endif 191#endif
158 .mmap = ext4_file_mmap, 192 .mmap = ext4_file_mmap,
159 .open = generic_file_open, 193 .open = ext4_file_open,
160 .release = ext4_release_file, 194 .release = ext4_release_file,
161 .fsync = ext4_sync_file, 195 .fsync = ext4_sync_file,
162 .splice_read = generic_file_splice_read, 196 .splice_read = generic_file_splice_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5afe4370840b..83cf6415f599 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,10 +28,12 @@
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/marker.h> 31
32#include "ext4.h" 32#include "ext4.h"
33#include "ext4_jbd2.h" 33#include "ext4_jbd2.h"
34 34
35#include <trace/events/ext4.h>
36
35/* 37/*
36 * akpm: A new design for ext4_sync_file(). 38 * akpm: A new design for ext4_sync_file().
37 * 39 *
@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
52 54
53 J_ASSERT(ext4_journal_current_handle() == NULL); 55 J_ASSERT(ext4_journal_current_handle() == NULL);
54 56
55 trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", 57 trace_ext4_sync_file(file, dentry, datasync);
56 inode->i_sb->s_id, datasync, inode->i_ino,
57 dentry->d_parent->d_inode->i_ino);
58 58
59 /* 59 /*
60 * data=writeback: 60 * data=writeback:
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3743bd849bce..2f645732e3b7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -23,11 +23,14 @@
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <linux/blkdev.h> 24#include <linux/blkdev.h>
25#include <asm/byteorder.h> 25#include <asm/byteorder.h>
26
26#include "ext4.h" 27#include "ext4.h"
27#include "ext4_jbd2.h" 28#include "ext4_jbd2.h"
28#include "xattr.h" 29#include "xattr.h"
29#include "acl.h" 30#include "acl.h"
30 31
32#include <trace/events/ext4.h>
33
31/* 34/*
32 * ialloc.c contains the inodes allocation and deallocation routines 35 * ialloc.c contains the inodes allocation and deallocation routines
33 */ 36 */
@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
208 211
209 ino = inode->i_ino; 212 ino = inode->i_ino;
210 ext4_debug("freeing inode %lu\n", ino); 213 ext4_debug("freeing inode %lu\n", ino);
211 trace_mark(ext4_free_inode, 214 trace_ext4_free_inode(inode);
212 "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
213 sb->s_id, inode->i_ino, inode->i_mode,
214 (unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
215 (unsigned long long) inode->i_blocks);
216 215
217 /* 216 /*
218 * Note: we must free any quota before locking the superblock, 217 * Note: we must free any quota before locking the superblock,
@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g,
471 */ 470 */
472 471
473static int find_group_orlov(struct super_block *sb, struct inode *parent, 472static int find_group_orlov(struct super_block *sb, struct inode *parent,
474 ext4_group_t *group, int mode) 473 ext4_group_t *group, int mode,
474 const struct qstr *qstr)
475{ 475{
476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
477 struct ext4_sb_info *sbi = EXT4_SB(sb); 477 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
486 struct ext4_group_desc *desc; 486 struct ext4_group_desc *desc;
487 struct orlov_stats stats; 487 struct orlov_stats stats;
488 int flex_size = ext4_flex_bg_size(sbi); 488 int flex_size = ext4_flex_bg_size(sbi);
489 struct dx_hash_info hinfo;
489 490
490 ngroups = real_ngroups; 491 ngroups = real_ngroups;
491 if (flex_size > 1) { 492 if (flex_size > 1) {
@@ -507,7 +508,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
507 int best_ndir = inodes_per_group; 508 int best_ndir = inodes_per_group;
508 int ret = -1; 509 int ret = -1;
509 510
510 get_random_bytes(&grp, sizeof(grp)); 511 if (qstr) {
512 hinfo.hash_version = DX_HASH_HALF_MD4;
513 hinfo.seed = sbi->s_hash_seed;
514 ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
515 grp = hinfo.hash;
516 } else
517 get_random_bytes(&grp, sizeof(grp));
511 parent_group = (unsigned)grp % ngroups; 518 parent_group = (unsigned)grp % ngroups;
512 for (i = 0; i < ngroups; i++) { 519 for (i = 0; i < ngroups; i++) {
513 g = (parent_group + i) % ngroups; 520 g = (parent_group + i) % ngroups;
@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
650 *group = parent_group + flex_size; 657 *group = parent_group + flex_size;
651 if (*group > ngroups) 658 if (*group > ngroups)
652 *group = 0; 659 *group = 0;
653 return find_group_orlov(sb, parent, group, mode); 660 return find_group_orlov(sb, parent, group, mode, 0);
654 } 661 }
655 662
656 /* 663 /*
@@ -791,7 +798,8 @@ err_ret:
791 * For other inodes, search forward from the parent directory's block 798 * For other inodes, search forward from the parent directory's block
792 * group to find a free inode. 799 * group to find a free inode.
793 */ 800 */
794struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) 801struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
802 const struct qstr *qstr, __u32 goal)
795{ 803{
796 struct super_block *sb; 804 struct super_block *sb;
797 struct buffer_head *inode_bitmap_bh = NULL; 805 struct buffer_head *inode_bitmap_bh = NULL;
@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
815 823
816 sb = dir->i_sb; 824 sb = dir->i_sb;
817 ngroups = ext4_get_groups_count(sb); 825 ngroups = ext4_get_groups_count(sb);
818 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, 826 trace_ext4_request_inode(dir, mode);
819 dir->i_ino, mode);
820 inode = new_inode(sb); 827 inode = new_inode(sb);
821 if (!inode) 828 if (!inode)
822 return ERR_PTR(-ENOMEM); 829 return ERR_PTR(-ENOMEM);
823 ei = EXT4_I(inode); 830 ei = EXT4_I(inode);
824 sbi = EXT4_SB(sb); 831 sbi = EXT4_SB(sb);
825 832
833 if (!goal)
834 goal = sbi->s_inode_goal;
835
836 if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
837 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
838 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
839 ret2 = 0;
840 goto got_group;
841 }
842
826 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 843 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
827 ret2 = find_group_flex(sb, dir, &group); 844 ret2 = find_group_flex(sb, dir, &group);
828 if (ret2 == -1) { 845 if (ret2 == -1) {
@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
841 if (test_opt(sb, OLDALLOC)) 858 if (test_opt(sb, OLDALLOC))
842 ret2 = find_group_dir(sb, dir, &group); 859 ret2 = find_group_dir(sb, dir, &group);
843 else 860 else
844 ret2 = find_group_orlov(sb, dir, &group, mode); 861 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
845 } else 862 } else
846 ret2 = find_group_other(sb, dir, &group, mode); 863 ret2 = find_group_other(sb, dir, &group, mode);
847 864
@@ -851,7 +868,7 @@ got_group:
851 if (ret2 == -1) 868 if (ret2 == -1)
852 goto out; 869 goto out;
853 870
854 for (i = 0; i < ngroups; i++) { 871 for (i = 0; i < ngroups; i++, ino = 0) {
855 err = -EIO; 872 err = -EIO;
856 873
857 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 874 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -863,8 +880,6 @@ got_group:
863 if (!inode_bitmap_bh) 880 if (!inode_bitmap_bh)
864 goto fail; 881 goto fail;
865 882
866 ino = 0;
867
868repeat_in_this_group: 883repeat_in_this_group:
869 ino = ext4_find_next_zero_bit((unsigned long *) 884 ino = ext4_find_next_zero_bit((unsigned long *)
870 inode_bitmap_bh->b_data, 885 inode_bitmap_bh->b_data,
@@ -1047,8 +1062,7 @@ got:
1047 } 1062 }
1048 1063
1049 ext4_debug("allocating inode %lu\n", inode->i_ino); 1064 ext4_debug("allocating inode %lu\n", inode->i_ino);
1050 trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d", 1065 trace_ext4_allocate_inode(inode, dir, mode);
1051 sb->s_id, inode->i_ino, dir->i_ino, mode);
1052 goto really_out; 1066 goto really_out;
1053fail: 1067fail:
1054 ext4_std_error(sb, err); 1068 ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 875db944b22f..7c17ae275af4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,11 +37,14 @@
37#include <linux/namei.h> 37#include <linux/namei.h>
38#include <linux/uio.h> 38#include <linux/uio.h>
39#include <linux/bio.h> 39#include <linux/bio.h>
40
40#include "ext4_jbd2.h" 41#include "ext4_jbd2.h"
41#include "xattr.h" 42#include "xattr.h"
42#include "acl.h" 43#include "acl.h"
43#include "ext4_extents.h" 44#include "ext4_extents.h"
44 45
46#include <trace/events/ext4.h>
47
45#define MPAGE_DA_EXTENT_TAIL 0x01 48#define MPAGE_DA_EXTENT_TAIL 0x01
46 49
47static inline int ext4_begin_ordered_truncate(struct inode *inode, 50static inline int ext4_begin_ordered_truncate(struct inode *inode,
@@ -78,7 +81,7 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
78 * If the handle isn't valid we're not journaling so there's nothing to do. 81 * If the handle isn't valid we're not journaling so there's nothing to do.
79 */ 82 */
80int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 83int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
81 struct buffer_head *bh, ext4_fsblk_t blocknr) 84 struct buffer_head *bh, ext4_fsblk_t blocknr)
82{ 85{
83 int err; 86 int err;
84 87
@@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
90 BUFFER_TRACE(bh, "enter"); 93 BUFFER_TRACE(bh, "enter");
91 94
92 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " 95 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
93 "data mode %lx\n", 96 "data mode %x\n",
94 bh, is_metadata, inode->i_mode, 97 bh, is_metadata, inode->i_mode,
95 test_opt(inode->i_sb, DATA_FLAGS)); 98 test_opt(inode->i_sb, DATA_FLAGS));
96 99
@@ -329,8 +332,8 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
329 */ 332 */
330 333
331static int ext4_block_to_path(struct inode *inode, 334static int ext4_block_to_path(struct inode *inode,
332 ext4_lblk_t i_block, 335 ext4_lblk_t i_block,
333 ext4_lblk_t offsets[4], int *boundary) 336 ext4_lblk_t offsets[4], int *boundary)
334{ 337{
335 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); 338 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
336 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); 339 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
@@ -362,9 +365,9 @@ static int ext4_block_to_path(struct inode *inode,
362 final = ptrs; 365 final = ptrs;
363 } else { 366 } else {
364 ext4_warning(inode->i_sb, "ext4_block_to_path", 367 ext4_warning(inode->i_sb, "ext4_block_to_path",
365 "block %lu > max in inode %lu", 368 "block %lu > max in inode %lu",
366 i_block + direct_blocks + 369 i_block + direct_blocks +
367 indirect_blocks + double_blocks, inode->i_ino); 370 indirect_blocks + double_blocks, inode->i_ino);
368 } 371 }
369 if (boundary) 372 if (boundary)
370 *boundary = final - 1 - (i_block & (ptrs - 1)); 373 *boundary = final - 1 - (i_block & (ptrs - 1));
@@ -379,25 +382,25 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
379 382
380 while (bref < p+max) { 383 while (bref < p+max) {
381 blk = le32_to_cpu(*bref++); 384 blk = le32_to_cpu(*bref++);
382 if (blk && 385 if (blk &&
383 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 386 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
384 blk, 1))) { 387 blk, 1))) {
385 ext4_error(inode->i_sb, function, 388 ext4_error(inode->i_sb, function,
386 "invalid block reference %u " 389 "invalid block reference %u "
387 "in inode #%lu", blk, inode->i_ino); 390 "in inode #%lu", blk, inode->i_ino);
388 return -EIO; 391 return -EIO;
389 } 392 }
390 } 393 }
391 return 0; 394 return 0;
392} 395}
393 396
394 397
395#define ext4_check_indirect_blockref(inode, bh) \ 398#define ext4_check_indirect_blockref(inode, bh) \
396 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ 399 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
397 EXT4_ADDR_PER_BLOCK((inode)->i_sb)) 400 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
398 401
399#define ext4_check_inode_blockref(inode) \ 402#define ext4_check_inode_blockref(inode) \
400 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ 403 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
401 EXT4_NDIR_BLOCKS) 404 EXT4_NDIR_BLOCKS)
402 405
403/** 406/**
@@ -447,7 +450,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
447 bh = sb_getblk(sb, le32_to_cpu(p->key)); 450 bh = sb_getblk(sb, le32_to_cpu(p->key));
448 if (unlikely(!bh)) 451 if (unlikely(!bh))
449 goto failure; 452 goto failure;
450 453
451 if (!bh_uptodate_or_lock(bh)) { 454 if (!bh_uptodate_or_lock(bh)) {
452 if (bh_submit_read(bh) < 0) { 455 if (bh_submit_read(bh) < 0) {
453 put_bh(bh); 456 put_bh(bh);
@@ -459,7 +462,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
459 goto failure; 462 goto failure;
460 } 463 }
461 } 464 }
462 465
463 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); 466 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
464 /* Reader: end */ 467 /* Reader: end */
465 if (!p->key) 468 if (!p->key)
@@ -552,7 +555,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
552 * returns it. 555 * returns it.
553 */ 556 */
554static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 557static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
555 Indirect *partial) 558 Indirect *partial)
556{ 559{
557 /* 560 /*
558 * XXX need to get goal block from mballoc's data structures 561 * XXX need to get goal block from mballoc's data structures
@@ -574,7 +577,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
574 * direct and indirect blocks. 577 * direct and indirect blocks.
575 */ 578 */
576static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, 579static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
577 int blocks_to_boundary) 580 int blocks_to_boundary)
578{ 581{
579 unsigned int count = 0; 582 unsigned int count = 0;
580 583
@@ -610,9 +613,9 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
610 * direct blocks 613 * direct blocks
611 */ 614 */
612static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, 615static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
613 ext4_lblk_t iblock, ext4_fsblk_t goal, 616 ext4_lblk_t iblock, ext4_fsblk_t goal,
614 int indirect_blks, int blks, 617 int indirect_blks, int blks,
615 ext4_fsblk_t new_blocks[4], int *err) 618 ext4_fsblk_t new_blocks[4], int *err)
616{ 619{
617 struct ext4_allocation_request ar; 620 struct ext4_allocation_request ar;
618 int target, i; 621 int target, i;
@@ -683,10 +686,10 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
683 } 686 }
684 if (!*err) { 687 if (!*err) {
685 if (target == blks) { 688 if (target == blks) {
686 /* 689 /*
687 * save the new block number 690 * save the new block number
688 * for the first direct block 691 * for the first direct block
689 */ 692 */
690 new_blocks[index] = current_block; 693 new_blocks[index] = current_block;
691 } 694 }
692 blk_allocated += ar.len; 695 blk_allocated += ar.len;
@@ -728,9 +731,9 @@ failed_out:
728 * as described above and return 0. 731 * as described above and return 0.
729 */ 732 */
730static int ext4_alloc_branch(handle_t *handle, struct inode *inode, 733static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
731 ext4_lblk_t iblock, int indirect_blks, 734 ext4_lblk_t iblock, int indirect_blks,
732 int *blks, ext4_fsblk_t goal, 735 int *blks, ext4_fsblk_t goal,
733 ext4_lblk_t *offsets, Indirect *branch) 736 ext4_lblk_t *offsets, Indirect *branch)
734{ 737{
735 int blocksize = inode->i_sb->s_blocksize; 738 int blocksize = inode->i_sb->s_blocksize;
736 int i, n = 0; 739 int i, n = 0;
@@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
777 * the chain to point to the new allocated 780 * the chain to point to the new allocated
778 * data blocks numbers 781 * data blocks numbers
779 */ 782 */
780 for (i=1; i < num; i++) 783 for (i = 1; i < num; i++)
781 *(branch[n].p + i) = cpu_to_le32(++current_block); 784 *(branch[n].p + i) = cpu_to_le32(++current_block);
782 } 785 }
783 BUFFER_TRACE(bh, "marking uptodate"); 786 BUFFER_TRACE(bh, "marking uptodate");
@@ -820,7 +823,8 @@ failed:
820 * chain to new block and return 0. 823 * chain to new block and return 0.
821 */ 824 */
822static int ext4_splice_branch(handle_t *handle, struct inode *inode, 825static int ext4_splice_branch(handle_t *handle, struct inode *inode,
823 ext4_lblk_t block, Indirect *where, int num, int blks) 826 ext4_lblk_t block, Indirect *where, int num,
827 int blks)
824{ 828{
825 int i; 829 int i;
826 int err = 0; 830 int err = 0;
@@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
852 } 856 }
853 857
854 /* We are done with atomic stuff, now do the rest of housekeeping */ 858 /* We are done with atomic stuff, now do the rest of housekeeping */
855
856 inode->i_ctime = ext4_current_time(inode);
857 ext4_mark_inode_dirty(handle, inode);
858
859 /* had we spliced it onto indirect block? */ 859 /* had we spliced it onto indirect block? */
860 if (where->bh) { 860 if (where->bh) {
861 /* 861 /*
@@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
874 } else { 874 } else {
875 /* 875 /*
876 * OK, we spliced it into the inode itself on a direct block. 876 * OK, we spliced it into the inode itself on a direct block.
877 * Inode was dirtied above.
878 */ 877 */
878 ext4_mark_inode_dirty(handle, inode);
879 jbd_debug(5, "splicing direct\n"); 879 jbd_debug(5, "splicing direct\n");
880 } 880 }
881 return err; 881 return err;
@@ -921,9 +921,9 @@ err_out:
921 * blocks. 921 * blocks.
922 */ 922 */
923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, 923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
924 ext4_lblk_t iblock, unsigned int maxblocks, 924 ext4_lblk_t iblock, unsigned int maxblocks,
925 struct buffer_head *bh_result, 925 struct buffer_head *bh_result,
926 int flags) 926 int flags)
927{ 927{
928 int err = -EIO; 928 int err = -EIO;
929 ext4_lblk_t offsets[4]; 929 ext4_lblk_t offsets[4];
@@ -939,7 +939,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); 939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); 940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
941 depth = ext4_block_to_path(inode, iblock, offsets, 941 depth = ext4_block_to_path(inode, iblock, offsets,
942 &blocks_to_boundary); 942 &blocks_to_boundary);
943 943
944 if (depth == 0) 944 if (depth == 0)
945 goto out; 945 goto out;
@@ -987,8 +987,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
987 * Block out ext4_truncate while we alter the tree 987 * Block out ext4_truncate while we alter the tree
988 */ 988 */
989 err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, 989 err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
990 &count, goal, 990 &count, goal,
991 offsets + (partial - chain), partial); 991 offsets + (partial - chain), partial);
992 992
993 /* 993 /*
994 * The ext4_splice_branch call will free and forget any buffers 994 * The ext4_splice_branch call will free and forget any buffers
@@ -999,8 +999,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
999 */ 999 */
1000 if (!err) 1000 if (!err)
1001 err = ext4_splice_branch(handle, inode, iblock, 1001 err = ext4_splice_branch(handle, inode, iblock,
1002 partial, indirect_blks, count); 1002 partial, indirect_blks, count);
1003 else 1003 else
1004 goto cleanup; 1004 goto cleanup;
1005 1005
1006 set_buffer_new(bh_result); 1006 set_buffer_new(bh_result);
@@ -1172,7 +1172,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1172 up_read((&EXT4_I(inode)->i_data_sem)); 1172 up_read((&EXT4_I(inode)->i_data_sem));
1173 1173
1174 if (retval > 0 && buffer_mapped(bh)) { 1174 if (retval > 0 && buffer_mapped(bh)) {
1175 int ret = check_block_validity(inode, block, 1175 int ret = check_block_validity(inode, block,
1176 bh->b_blocknr, retval); 1176 bh->b_blocknr, retval);
1177 if (ret != 0) 1177 if (ret != 0)
1178 return ret; 1178 return ret;
@@ -1254,7 +1254,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1254 1254
1255 up_write((&EXT4_I(inode)->i_data_sem)); 1255 up_write((&EXT4_I(inode)->i_data_sem));
1256 if (retval > 0 && buffer_mapped(bh)) { 1256 if (retval > 0 && buffer_mapped(bh)) {
1257 int ret = check_block_validity(inode, block, 1257 int ret = check_block_validity(inode, block,
1258 bh->b_blocknr, retval); 1258 bh->b_blocknr, retval);
1259 if (ret != 0) 1259 if (ret != 0)
1260 return ret; 1260 return ret;
@@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle,
1405 1405
1406 for (bh = head, block_start = 0; 1406 for (bh = head, block_start = 0;
1407 ret == 0 && (bh != head || !block_start); 1407 ret == 0 && (bh != head || !block_start);
1408 block_start = block_end, bh = next) 1408 block_start = block_end, bh = next) {
1409 {
1410 next = bh->b_this_page; 1409 next = bh->b_this_page;
1411 block_end = block_start + blocksize; 1410 block_end = block_start + blocksize;
1412 if (block_end <= from || block_start >= to) { 1411 if (block_end <= from || block_start >= to) {
@@ -1447,7 +1446,7 @@ static int walk_page_buffers(handle_t *handle,
1447 * write. 1446 * write.
1448 */ 1447 */
1449static int do_journal_get_write_access(handle_t *handle, 1448static int do_journal_get_write_access(handle_t *handle,
1450 struct buffer_head *bh) 1449 struct buffer_head *bh)
1451{ 1450{
1452 if (!buffer_mapped(bh) || buffer_freed(bh)) 1451 if (!buffer_mapped(bh) || buffer_freed(bh))
1453 return 0; 1452 return 0;
@@ -1455,27 +1454,24 @@ static int do_journal_get_write_access(handle_t *handle,
1455} 1454}
1456 1455
1457static int ext4_write_begin(struct file *file, struct address_space *mapping, 1456static int ext4_write_begin(struct file *file, struct address_space *mapping,
1458 loff_t pos, unsigned len, unsigned flags, 1457 loff_t pos, unsigned len, unsigned flags,
1459 struct page **pagep, void **fsdata) 1458 struct page **pagep, void **fsdata)
1460{ 1459{
1461 struct inode *inode = mapping->host; 1460 struct inode *inode = mapping->host;
1462 int ret, needed_blocks; 1461 int ret, needed_blocks;
1463 handle_t *handle; 1462 handle_t *handle;
1464 int retries = 0; 1463 int retries = 0;
1465 struct page *page; 1464 struct page *page;
1466 pgoff_t index; 1465 pgoff_t index;
1467 unsigned from, to; 1466 unsigned from, to;
1468 1467
1469 trace_mark(ext4_write_begin, 1468 trace_ext4_write_begin(inode, pos, len, flags);
1470 "dev %s ino %lu pos %llu len %u flags %u",
1471 inode->i_sb->s_id, inode->i_ino,
1472 (unsigned long long) pos, len, flags);
1473 /* 1469 /*
1474 * Reserve one block more for addition to orphan list in case 1470 * Reserve one block more for addition to orphan list in case
1475 * we allocate blocks but write fails for some reason 1471 * we allocate blocks but write fails for some reason
1476 */ 1472 */
1477 needed_blocks = ext4_writepage_trans_blocks(inode) + 1; 1473 needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
1478 index = pos >> PAGE_CACHE_SHIFT; 1474 index = pos >> PAGE_CACHE_SHIFT;
1479 from = pos & (PAGE_CACHE_SIZE - 1); 1475 from = pos & (PAGE_CACHE_SIZE - 1);
1480 to = from + len; 1476 to = from + len;
1481 1477
@@ -1523,7 +1519,7 @@ retry:
1523 ext4_journal_stop(handle); 1519 ext4_journal_stop(handle);
1524 if (pos + len > inode->i_size) { 1520 if (pos + len > inode->i_size) {
1525 vmtruncate(inode, inode->i_size); 1521 vmtruncate(inode, inode->i_size);
1526 /* 1522 /*
1527 * If vmtruncate failed early the inode might 1523 * If vmtruncate failed early the inode might
1528 * still be on the orphan list; we need to 1524 * still be on the orphan list; we need to
1529 * make sure the inode is removed from the 1525 * make sure the inode is removed from the
@@ -1550,9 +1546,9 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1550} 1546}
1551 1547
1552static int ext4_generic_write_end(struct file *file, 1548static int ext4_generic_write_end(struct file *file,
1553 struct address_space *mapping, 1549 struct address_space *mapping,
1554 loff_t pos, unsigned len, unsigned copied, 1550 loff_t pos, unsigned len, unsigned copied,
1555 struct page *page, void *fsdata) 1551 struct page *page, void *fsdata)
1556{ 1552{
1557 int i_size_changed = 0; 1553 int i_size_changed = 0;
1558 struct inode *inode = mapping->host; 1554 struct inode *inode = mapping->host;
@@ -1603,18 +1599,15 @@ static int ext4_generic_write_end(struct file *file,
1603 * buffers are managed internally. 1599 * buffers are managed internally.
1604 */ 1600 */
1605static int ext4_ordered_write_end(struct file *file, 1601static int ext4_ordered_write_end(struct file *file,
1606 struct address_space *mapping, 1602 struct address_space *mapping,
1607 loff_t pos, unsigned len, unsigned copied, 1603 loff_t pos, unsigned len, unsigned copied,
1608 struct page *page, void *fsdata) 1604 struct page *page, void *fsdata)
1609{ 1605{
1610 handle_t *handle = ext4_journal_current_handle(); 1606 handle_t *handle = ext4_journal_current_handle();
1611 struct inode *inode = mapping->host; 1607 struct inode *inode = mapping->host;
1612 int ret = 0, ret2; 1608 int ret = 0, ret2;
1613 1609
1614 trace_mark(ext4_ordered_write_end, 1610 trace_ext4_ordered_write_end(inode, pos, len, copied);
1615 "dev %s ino %lu pos %llu len %u copied %u",
1616 inode->i_sb->s_id, inode->i_ino,
1617 (unsigned long long) pos, len, copied);
1618 ret = ext4_jbd2_file_inode(handle, inode); 1611 ret = ext4_jbd2_file_inode(handle, inode);
1619 1612
1620 if (ret == 0) { 1613 if (ret == 0) {
@@ -1636,7 +1629,7 @@ static int ext4_ordered_write_end(struct file *file,
1636 1629
1637 if (pos + len > inode->i_size) { 1630 if (pos + len > inode->i_size) {
1638 vmtruncate(inode, inode->i_size); 1631 vmtruncate(inode, inode->i_size);
1639 /* 1632 /*
1640 * If vmtruncate failed early the inode might still be 1633 * If vmtruncate failed early the inode might still be
1641 * on the orphan list; we need to make sure the inode 1634 * on the orphan list; we need to make sure the inode
1642 * is removed from the orphan list in that case. 1635 * is removed from the orphan list in that case.
@@ -1650,18 +1643,15 @@ static int ext4_ordered_write_end(struct file *file,
1650} 1643}
1651 1644
1652static int ext4_writeback_write_end(struct file *file, 1645static int ext4_writeback_write_end(struct file *file,
1653 struct address_space *mapping, 1646 struct address_space *mapping,
1654 loff_t pos, unsigned len, unsigned copied, 1647 loff_t pos, unsigned len, unsigned copied,
1655 struct page *page, void *fsdata) 1648 struct page *page, void *fsdata)
1656{ 1649{
1657 handle_t *handle = ext4_journal_current_handle(); 1650 handle_t *handle = ext4_journal_current_handle();
1658 struct inode *inode = mapping->host; 1651 struct inode *inode = mapping->host;
1659 int ret = 0, ret2; 1652 int ret = 0, ret2;
1660 1653
1661 trace_mark(ext4_writeback_write_end, 1654 trace_ext4_writeback_write_end(inode, pos, len, copied);
1662 "dev %s ino %lu pos %llu len %u copied %u",
1663 inode->i_sb->s_id, inode->i_ino,
1664 (unsigned long long) pos, len, copied);
1665 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1655 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1666 page, fsdata); 1656 page, fsdata);
1667 copied = ret2; 1657 copied = ret2;
@@ -1681,7 +1671,7 @@ static int ext4_writeback_write_end(struct file *file,
1681 1671
1682 if (pos + len > inode->i_size) { 1672 if (pos + len > inode->i_size) {
1683 vmtruncate(inode, inode->i_size); 1673 vmtruncate(inode, inode->i_size);
1684 /* 1674 /*
1685 * If vmtruncate failed early the inode might still be 1675 * If vmtruncate failed early the inode might still be
1686 * on the orphan list; we need to make sure the inode 1676 * on the orphan list; we need to make sure the inode
1687 * is removed from the orphan list in that case. 1677 * is removed from the orphan list in that case.
@@ -1694,9 +1684,9 @@ static int ext4_writeback_write_end(struct file *file,
1694} 1684}
1695 1685
1696static int ext4_journalled_write_end(struct file *file, 1686static int ext4_journalled_write_end(struct file *file,
1697 struct address_space *mapping, 1687 struct address_space *mapping,
1698 loff_t pos, unsigned len, unsigned copied, 1688 loff_t pos, unsigned len, unsigned copied,
1699 struct page *page, void *fsdata) 1689 struct page *page, void *fsdata)
1700{ 1690{
1701 handle_t *handle = ext4_journal_current_handle(); 1691 handle_t *handle = ext4_journal_current_handle();
1702 struct inode *inode = mapping->host; 1692 struct inode *inode = mapping->host;
@@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file,
1705 unsigned from, to; 1695 unsigned from, to;
1706 loff_t new_i_size; 1696 loff_t new_i_size;
1707 1697
1708 trace_mark(ext4_journalled_write_end, 1698 trace_ext4_journalled_write_end(inode, pos, len, copied);
1709 "dev %s ino %lu pos %llu len %u copied %u",
1710 inode->i_sb->s_id, inode->i_ino,
1711 (unsigned long long) pos, len, copied);
1712 from = pos & (PAGE_CACHE_SIZE - 1); 1699 from = pos & (PAGE_CACHE_SIZE - 1);
1713 to = from + len; 1700 to = from + len;
1714 1701
@@ -1747,7 +1734,7 @@ static int ext4_journalled_write_end(struct file *file,
1747 ret = ret2; 1734 ret = ret2;
1748 if (pos + len > inode->i_size) { 1735 if (pos + len > inode->i_size) {
1749 vmtruncate(inode, inode->i_size); 1736 vmtruncate(inode, inode->i_size);
1750 /* 1737 /*
1751 * If vmtruncate failed early the inode might still be 1738 * If vmtruncate failed early the inode might still be
1752 * on the orphan list; we need to make sure the inode 1739 * on the orphan list; we need to make sure the inode
1753 * is removed from the orphan list in that case. 1740 * is removed from the orphan list in that case.
@@ -1854,7 +1841,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1854} 1841}
1855 1842
1856static void ext4_da_page_release_reservation(struct page *page, 1843static void ext4_da_page_release_reservation(struct page *page,
1857 unsigned long offset) 1844 unsigned long offset)
1858{ 1845{
1859 int to_release = 0; 1846 int to_release = 0;
1860 struct buffer_head *head, *bh; 1847 struct buffer_head *head, *bh;
@@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page,
2554 struct buffer_head *page_bufs; 2541 struct buffer_head *page_bufs;
2555 struct inode *inode = page->mapping->host; 2542 struct inode *inode = page->mapping->host;
2556 2543
2557 trace_mark(ext4_da_writepage, 2544 trace_ext4_da_writepage(inode, page);
2558 "dev %s ino %lu page_index %lu",
2559 inode->i_sb->s_id, inode->i_ino, page->index);
2560 size = i_size_read(inode); 2545 size = i_size_read(inode);
2561 if (page->index == size >> PAGE_CACHE_SHIFT) 2546 if (page->index == size >> PAGE_CACHE_SHIFT)
2562 len = size & ~PAGE_CACHE_MASK; 2547 len = size & ~PAGE_CACHE_MASK;
@@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2667 int needed_blocks, ret = 0, nr_to_writebump = 0; 2652 int needed_blocks, ret = 0, nr_to_writebump = 0;
2668 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2653 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2669 2654
2670 trace_mark(ext4_da_writepages, 2655 trace_ext4_da_writepages(inode, wbc);
2671 "dev %s ino %lu nr_t_write %ld "
2672 "pages_skipped %ld range_start %llu "
2673 "range_end %llu nonblocking %d "
2674 "for_kupdate %d for_reclaim %d "
2675 "for_writepages %d range_cyclic %d",
2676 inode->i_sb->s_id, inode->i_ino,
2677 wbc->nr_to_write, wbc->pages_skipped,
2678 (unsigned long long) wbc->range_start,
2679 (unsigned long long) wbc->range_end,
2680 wbc->nonblocking, wbc->for_kupdate,
2681 wbc->for_reclaim, wbc->for_writepages,
2682 wbc->range_cyclic);
2683 2656
2684 /* 2657 /*
2685 * No pages to write? This is mainly a kludge to avoid starting 2658 * No pages to write? This is mainly a kludge to avoid starting
@@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping,
2693 * If the filesystem has aborted, it is read-only, so return 2666 * If the filesystem has aborted, it is read-only, so return
2694 * right away instead of dumping stack traces later on that 2667 * right away instead of dumping stack traces later on that
2695 * will obscure the real source of the problem. We test 2668 * will obscure the real source of the problem. We test
2696 * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because 2669 * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
2697 * the latter could be true if the filesystem is mounted 2670 * the latter could be true if the filesystem is mounted
2698 * read-only, and in that case, ext4_da_writepages should 2671 * read-only, and in that case, ext4_da_writepages should
2699 * *never* be called, so if that ever happens, we would want 2672 * *never* be called, so if that ever happens, we would want
2700 * the stack trace. 2673 * the stack trace.
2701 */ 2674 */
2702 if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT)) 2675 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
2703 return -EROFS; 2676 return -EROFS;
2704 2677
2705 /* 2678 /*
@@ -2845,14 +2818,7 @@ out_writepages:
2845 if (!no_nrwrite_index_update) 2818 if (!no_nrwrite_index_update)
2846 wbc->no_nrwrite_index_update = 0; 2819 wbc->no_nrwrite_index_update = 0;
2847 wbc->nr_to_write -= nr_to_writebump; 2820 wbc->nr_to_write -= nr_to_writebump;
2848 trace_mark(ext4_da_writepage_result, 2821 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2849 "dev %s ino %lu ret %d pages_written %d "
2850 "pages_skipped %ld congestion %d "
2851 "more_io %d no_nrwrite_index_update %d",
2852 inode->i_sb->s_id, inode->i_ino, ret,
2853 pages_written, wbc->pages_skipped,
2854 wbc->encountered_congestion, wbc->more_io,
2855 wbc->no_nrwrite_index_update);
2856 return ret; 2822 return ret;
2857} 2823}
2858 2824
@@ -2884,8 +2850,8 @@ static int ext4_nonda_switch(struct super_block *sb)
2884} 2850}
2885 2851
2886static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2852static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2887 loff_t pos, unsigned len, unsigned flags, 2853 loff_t pos, unsigned len, unsigned flags,
2888 struct page **pagep, void **fsdata) 2854 struct page **pagep, void **fsdata)
2889{ 2855{
2890 int ret, retries = 0; 2856 int ret, retries = 0;
2891 struct page *page; 2857 struct page *page;
@@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2904 len, flags, pagep, fsdata); 2870 len, flags, pagep, fsdata);
2905 } 2871 }
2906 *fsdata = (void *)0; 2872 *fsdata = (void *)0;
2907 2873 trace_ext4_da_write_begin(inode, pos, len, flags);
2908 trace_mark(ext4_da_write_begin,
2909 "dev %s ino %lu pos %llu len %u flags %u",
2910 inode->i_sb->s_id, inode->i_ino,
2911 (unsigned long long) pos, len, flags);
2912retry: 2874retry:
2913 /* 2875 /*
2914 * With delayed allocation, we don't log the i_disksize update 2876 * With delayed allocation, we don't log the i_disksize update
@@ -2959,7 +2921,7 @@ out:
2959 * when write to the end of file but not require block allocation 2921 * when write to the end of file but not require block allocation
2960 */ 2922 */
2961static int ext4_da_should_update_i_disksize(struct page *page, 2923static int ext4_da_should_update_i_disksize(struct page *page,
2962 unsigned long offset) 2924 unsigned long offset)
2963{ 2925{
2964 struct buffer_head *bh; 2926 struct buffer_head *bh;
2965 struct inode *inode = page->mapping->host; 2927 struct inode *inode = page->mapping->host;
@@ -2978,9 +2940,9 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2978} 2940}
2979 2941
2980static int ext4_da_write_end(struct file *file, 2942static int ext4_da_write_end(struct file *file,
2981 struct address_space *mapping, 2943 struct address_space *mapping,
2982 loff_t pos, unsigned len, unsigned copied, 2944 loff_t pos, unsigned len, unsigned copied,
2983 struct page *page, void *fsdata) 2945 struct page *page, void *fsdata)
2984{ 2946{
2985 struct inode *inode = mapping->host; 2947 struct inode *inode = mapping->host;
2986 int ret = 0, ret2; 2948 int ret = 0, ret2;
@@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file,
3001 } 2963 }
3002 } 2964 }
3003 2965
3004 trace_mark(ext4_da_write_end, 2966 trace_ext4_da_write_end(inode, pos, len, copied);
3005 "dev %s ino %lu pos %llu len %u copied %u",
3006 inode->i_sb->s_id, inode->i_ino,
3007 (unsigned long long) pos, len, copied);
3008 start = pos & (PAGE_CACHE_SIZE - 1); 2967 start = pos & (PAGE_CACHE_SIZE - 1);
3009 end = start + copied - 1; 2968 end = start + copied - 1;
3010 2969
@@ -3081,7 +3040,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3081 * not strictly speaking necessary (and for users of 3040 * not strictly speaking necessary (and for users of
3082 * laptop_mode, not even desirable). However, to do otherwise 3041 * laptop_mode, not even desirable). However, to do otherwise
3083 * would require replicating code paths in: 3042 * would require replicating code paths in:
3084 * 3043 *
3085 * ext4_da_writepages() -> 3044 * ext4_da_writepages() ->
3086 * write_cache_pages() ---> (via passed in callback function) 3045 * write_cache_pages() ---> (via passed in callback function)
3087 * __mpage_da_writepage() --> 3046 * __mpage_da_writepage() -->
@@ -3101,7 +3060,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3101 * write out the pages, but rather only collect contiguous 3060 * write out the pages, but rather only collect contiguous
3102 * logical block extents, call the multi-block allocator, and 3061 * logical block extents, call the multi-block allocator, and
3103 * then update the buffer heads with the block allocations. 3062 * then update the buffer heads with the block allocations.
3104 * 3063 *
3105 * For now, though, we'll cheat by calling filemap_flush(), 3064 * For now, though, we'll cheat by calling filemap_flush(),
3106 * which will map the blocks, and start the I/O, but not 3065 * which will map the blocks, and start the I/O, but not
3107 * actually wait for the I/O to complete. 3066 * actually wait for the I/O to complete.
@@ -3237,7 +3196,7 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
3237 * 3196 *
3238 */ 3197 */
3239static int __ext4_normal_writepage(struct page *page, 3198static int __ext4_normal_writepage(struct page *page,
3240 struct writeback_control *wbc) 3199 struct writeback_control *wbc)
3241{ 3200{
3242 struct inode *inode = page->mapping->host; 3201 struct inode *inode = page->mapping->host;
3243 3202
@@ -3249,15 +3208,13 @@ static int __ext4_normal_writepage(struct page *page,
3249} 3208}
3250 3209
3251static int ext4_normal_writepage(struct page *page, 3210static int ext4_normal_writepage(struct page *page,
3252 struct writeback_control *wbc) 3211 struct writeback_control *wbc)
3253{ 3212{
3254 struct inode *inode = page->mapping->host; 3213 struct inode *inode = page->mapping->host;
3255 loff_t size = i_size_read(inode); 3214 loff_t size = i_size_read(inode);
3256 loff_t len; 3215 loff_t len;
3257 3216
3258 trace_mark(ext4_normal_writepage, 3217 trace_ext4_normal_writepage(inode, page);
3259 "dev %s ino %lu page_index %lu",
3260 inode->i_sb->s_id, inode->i_ino, page->index);
3261 J_ASSERT(PageLocked(page)); 3218 J_ASSERT(PageLocked(page));
3262 if (page->index == size >> PAGE_CACHE_SHIFT) 3219 if (page->index == size >> PAGE_CACHE_SHIFT)
3263 len = size & ~PAGE_CACHE_MASK; 3220 len = size & ~PAGE_CACHE_MASK;
@@ -3287,7 +3244,7 @@ static int ext4_normal_writepage(struct page *page,
3287} 3244}
3288 3245
3289static int __ext4_journalled_writepage(struct page *page, 3246static int __ext4_journalled_writepage(struct page *page,
3290 struct writeback_control *wbc) 3247 struct writeback_control *wbc)
3291{ 3248{
3292 struct address_space *mapping = page->mapping; 3249 struct address_space *mapping = page->mapping;
3293 struct inode *inode = mapping->host; 3250 struct inode *inode = mapping->host;
@@ -3337,15 +3294,13 @@ out:
3337} 3294}
3338 3295
3339static int ext4_journalled_writepage(struct page *page, 3296static int ext4_journalled_writepage(struct page *page,
3340 struct writeback_control *wbc) 3297 struct writeback_control *wbc)
3341{ 3298{
3342 struct inode *inode = page->mapping->host; 3299 struct inode *inode = page->mapping->host;
3343 loff_t size = i_size_read(inode); 3300 loff_t size = i_size_read(inode);
3344 loff_t len; 3301 loff_t len;
3345 3302
3346 trace_mark(ext4_journalled_writepage, 3303 trace_ext4_journalled_writepage(inode, page);
3347 "dev %s ino %lu page_index %lu",
3348 inode->i_sb->s_id, inode->i_ino, page->index);
3349 J_ASSERT(PageLocked(page)); 3304 J_ASSERT(PageLocked(page));
3350 if (page->index == size >> PAGE_CACHE_SHIFT) 3305 if (page->index == size >> PAGE_CACHE_SHIFT)
3351 len = size & ~PAGE_CACHE_MASK; 3306 len = size & ~PAGE_CACHE_MASK;
@@ -3442,8 +3397,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3442 * VFS code falls back into buffered path in that case so we are safe. 3397 * VFS code falls back into buffered path in that case so we are safe.
3443 */ 3398 */
3444static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3399static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3445 const struct iovec *iov, loff_t offset, 3400 const struct iovec *iov, loff_t offset,
3446 unsigned long nr_segs) 3401 unsigned long nr_segs)
3447{ 3402{
3448 struct file *file = iocb->ki_filp; 3403 struct file *file = iocb->ki_filp;
3449 struct inode *inode = file->f_mapping->host; 3404 struct inode *inode = file->f_mapping->host;
@@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
3763 * (no partially truncated stuff there). */ 3718 * (no partially truncated stuff there). */
3764 3719
3765static Indirect *ext4_find_shared(struct inode *inode, int depth, 3720static Indirect *ext4_find_shared(struct inode *inode, int depth,
3766 ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top) 3721 ext4_lblk_t offsets[4], Indirect chain[4],
3722 __le32 *top)
3767{ 3723{
3768 Indirect *partial, *p; 3724 Indirect *partial, *p;
3769 int k, err; 3725 int k, err;
@@ -3819,8 +3775,10 @@ no_top:
3819 * than `count' because there can be holes in there. 3775 * than `count' because there can be holes in there.
3820 */ 3776 */
3821static void ext4_clear_blocks(handle_t *handle, struct inode *inode, 3777static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3822 struct buffer_head *bh, ext4_fsblk_t block_to_free, 3778 struct buffer_head *bh,
3823 unsigned long count, __le32 *first, __le32 *last) 3779 ext4_fsblk_t block_to_free,
3780 unsigned long count, __le32 *first,
3781 __le32 *last)
3824{ 3782{
3825 __le32 *p; 3783 __le32 *p;
3826 if (try_to_extend_transaction(handle, inode)) { 3784 if (try_to_extend_transaction(handle, inode)) {
@@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3837 } 3795 }
3838 3796
3839 /* 3797 /*
3840 * Any buffers which are on the journal will be in memory. We find 3798 * Any buffers which are on the journal will be in memory. We
3841 * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget() 3799 * find them on the hash table so jbd2_journal_revoke() will
3842 * on them. We've already detached each block from the file, so 3800 * run jbd2_journal_forget() on them. We've already detached
3843 * bforget() in jbd2_journal_forget() should be safe. 3801 * each block from the file, so bforget() in
3802 * jbd2_journal_forget() should be safe.
3844 * 3803 *
3845 * AKPM: turn on bforget in jbd2_journal_forget()!!! 3804 * AKPM: turn on bforget in jbd2_journal_forget()!!!
3846 */ 3805 */
@@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode)
4212 (__le32*)partial->bh->b_data+addr_per_block, 4171 (__le32*)partial->bh->b_data+addr_per_block,
4213 (chain+n-1) - partial); 4172 (chain+n-1) - partial);
4214 BUFFER_TRACE(partial->bh, "call brelse"); 4173 BUFFER_TRACE(partial->bh, "call brelse");
4215 brelse (partial->bh); 4174 brelse(partial->bh);
4216 partial--; 4175 partial--;
4217 } 4176 }
4218do_indirects: 4177do_indirects:
@@ -4453,8 +4412,9 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
4453 if (flags & S_DIRSYNC) 4412 if (flags & S_DIRSYNC)
4454 ei->i_flags |= EXT4_DIRSYNC_FL; 4413 ei->i_flags |= EXT4_DIRSYNC_FL;
4455} 4414}
4415
4456static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 4416static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
4457 struct ext4_inode_info *ei) 4417 struct ext4_inode_info *ei)
4458{ 4418{
4459 blkcnt_t i_blocks ; 4419 blkcnt_t i_blocks ;
4460 struct inode *inode = &(ei->vfs_inode); 4420 struct inode *inode = &(ei->vfs_inode);
@@ -4569,7 +4529,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4569 EXT4_GOOD_OLD_INODE_SIZE + 4529 EXT4_GOOD_OLD_INODE_SIZE +
4570 ei->i_extra_isize; 4530 ei->i_extra_isize;
4571 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 4531 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
4572 ei->i_state |= EXT4_STATE_XATTR; 4532 ei->i_state |= EXT4_STATE_XATTR;
4573 } 4533 }
4574 } else 4534 } else
4575 ei->i_extra_isize = 0; 4535 ei->i_extra_isize = 0;
@@ -4588,7 +4548,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4588 4548
4589 ret = 0; 4549 ret = 0;
4590 if (ei->i_file_acl && 4550 if (ei->i_file_acl &&
4591 ((ei->i_file_acl < 4551 ((ei->i_file_acl <
4592 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + 4552 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4593 EXT4_SB(sb)->s_gdb_count)) || 4553 EXT4_SB(sb)->s_gdb_count)) ||
4594 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { 4554 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
@@ -4603,15 +4563,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4603 !ext4_inode_is_fast_symlink(inode))) 4563 !ext4_inode_is_fast_symlink(inode)))
4604 /* Validate extent which is part of inode */ 4564 /* Validate extent which is part of inode */
4605 ret = ext4_ext_check_inode(inode); 4565 ret = ext4_ext_check_inode(inode);
4606 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4566 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4607 (S_ISLNK(inode->i_mode) && 4567 (S_ISLNK(inode->i_mode) &&
4608 !ext4_inode_is_fast_symlink(inode))) { 4568 !ext4_inode_is_fast_symlink(inode))) {
4609 /* Validate block references which are part of inode */ 4569 /* Validate block references which are part of inode */
4610 ret = ext4_check_inode_blockref(inode); 4570 ret = ext4_check_inode_blockref(inode);
4611 } 4571 }
4612 if (ret) { 4572 if (ret) {
4613 brelse(bh); 4573 brelse(bh);
4614 goto bad_inode; 4574 goto bad_inode;
4615 } 4575 }
4616 4576
4617 if (S_ISREG(inode->i_mode)) { 4577 if (S_ISREG(inode->i_mode)) {
@@ -4642,7 +4602,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4642 } else { 4602 } else {
4643 brelse(bh); 4603 brelse(bh);
4644 ret = -EIO; 4604 ret = -EIO;
4645 ext4_error(inode->i_sb, __func__, 4605 ext4_error(inode->i_sb, __func__,
4646 "bogus i_mode (%o) for inode=%lu", 4606 "bogus i_mode (%o) for inode=%lu",
4647 inode->i_mode, inode->i_ino); 4607 inode->i_mode, inode->i_ino);
4648 goto bad_inode; 4608 goto bad_inode;
@@ -4795,8 +4755,9 @@ static int ext4_do_update_inode(handle_t *handle,
4795 cpu_to_le32(new_encode_dev(inode->i_rdev)); 4755 cpu_to_le32(new_encode_dev(inode->i_rdev));
4796 raw_inode->i_block[2] = 0; 4756 raw_inode->i_block[2] = 0;
4797 } 4757 }
4798 } else for (block = 0; block < EXT4_N_BLOCKS; block++) 4758 } else
4799 raw_inode->i_block[block] = ei->i_data[block]; 4759 for (block = 0; block < EXT4_N_BLOCKS; block++)
4760 raw_inode->i_block[block] = ei->i_data[block];
4800 4761
4801 raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4762 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4802 if (ei->i_extra_isize) { 4763 if (ei->i_extra_isize) {
@@ -5150,7 +5111,7 @@ int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
5150 * Give this, we know that the caller already has write access to iloc->bh. 5111 * Give this, we know that the caller already has write access to iloc->bh.
5151 */ 5112 */
5152int ext4_mark_iloc_dirty(handle_t *handle, 5113int ext4_mark_iloc_dirty(handle_t *handle,
5153 struct inode *inode, struct ext4_iloc *iloc) 5114 struct inode *inode, struct ext4_iloc *iloc)
5154{ 5115{
5155 int err = 0; 5116 int err = 0;
5156 5117
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 91e75f7a9e73..bb415408fdb6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -14,6 +14,7 @@
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/file.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
19#include "ext4.h" 20#include "ext4.h"
@@ -213,6 +214,41 @@ setversion_out:
213 214
214 return err; 215 return err;
215 } 216 }
217
218 case EXT4_IOC_MOVE_EXT: {
219 struct move_extent me;
220 struct file *donor_filp;
221 int err;
222
223 if (copy_from_user(&me,
224 (struct move_extent __user *)arg, sizeof(me)))
225 return -EFAULT;
226
227 donor_filp = fget(me.donor_fd);
228 if (!donor_filp)
229 return -EBADF;
230
231 if (!capable(CAP_DAC_OVERRIDE)) {
232 if ((current->real_cred->fsuid != inode->i_uid) ||
233 !(inode->i_mode & S_IRUSR) ||
234 !(donor_filp->f_dentry->d_inode->i_mode &
235 S_IRUSR)) {
236 fput(donor_filp);
237 return -EACCES;
238 }
239 }
240
241 err = ext4_move_extents(filp, donor_filp, me.orig_start,
242 me.donor_start, me.len, &me.moved_len);
243 fput(donor_filp);
244
245 if (!err)
246 if (copy_to_user((struct move_extent *)arg,
247 &me, sizeof(me)))
248 return -EFAULT;
249 return err;
250 }
251
216 case EXT4_IOC_GROUP_ADD: { 252 case EXT4_IOC_GROUP_ADD: {
217 struct ext4_new_group_data input; 253 struct ext4_new_group_data input;
218 struct super_block *sb = inode->i_sb; 254 struct super_block *sb = inode->i_sb;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e22c0e..519a0a686d94 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -22,6 +22,8 @@
22 */ 22 */
23 23
24#include "mballoc.h" 24#include "mballoc.h"
25#include <trace/events/ext4.h>
26
25/* 27/*
26 * MUSTDO: 28 * MUSTDO:
27 * - test ext4_ext_search_left() and ext4_ext_search_right() 29 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
340 ext4_group_t group); 342 ext4_group_t group);
341static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 343static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
342 344
343
344
345static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 345static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
346{ 346{
347#if BITS_PER_LONG == 64 347#if BITS_PER_LONG == 64
@@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2859 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2859 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2860 + entry->start_blk 2860 + entry->start_blk
2861 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2861 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2862 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", 2862 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
2863 sb->s_id, (unsigned long long) discard_block, 2863 entry->count);
2864 entry->count);
2865 sb_issue_discard(sb, discard_block, entry->count); 2864 sb_issue_discard(sb, discard_block, entry->count);
2866 2865
2867 kmem_cache_free(ext4_free_ext_cachep, entry); 2866 kmem_cache_free(ext4_free_ext_cachep, entry);
@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3629 3628
3630 mb_debug("new inode pa %p: %llu/%u for %u\n", pa, 3629 mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
3631 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3630 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3632 trace_mark(ext4_mb_new_inode_pa, 3631 trace_ext4_mb_new_inode_pa(ac, pa);
3633 "dev %s ino %lu pstart %llu len %u lstart %u",
3634 sb->s_id, ac->ac_inode->i_ino,
3635 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3636 3632
3637 ext4_mb_use_inode_pa(ac, pa); 3633 ext4_mb_use_inode_pa(ac, pa);
3638 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3634 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3691 pa->pa_type = MB_GROUP_PA; 3687 pa->pa_type = MB_GROUP_PA;
3692 3688
3693 mb_debug("new group pa %p: %llu/%u for %u\n", pa, 3689 mb_debug("new group pa %p: %llu/%u for %u\n", pa,
3694 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3690 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3695 trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", 3691 trace_ext4_mb_new_group_pa(ac, pa);
3696 sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3697 3692
3698 ext4_mb_use_group_pa(ac, pa); 3693 ext4_mb_use_group_pa(ac, pa);
3699 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3694 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3783 ext4_mb_store_history(ac); 3778 ext4_mb_store_history(ac);
3784 } 3779 }
3785 3780
3786 trace_mark(ext4_mb_release_inode_pa, 3781 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
3787 "dev %s ino %lu block %llu count %u", 3782 next - bit);
3788 sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
3789 next - bit);
3790 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3783 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3791 bit = next + 1; 3784 bit = next + 1;
3792 } 3785 }
@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3820 if (ac) 3813 if (ac)
3821 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3814 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3822 3815
3823 trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", 3816 trace_ext4_mb_release_group_pa(ac, pa);
3824 sb->s_id, pa->pa_pstart, pa->pa_len);
3825 BUG_ON(pa->pa_deleted == 0); 3817 BUG_ON(pa->pa_deleted == 0);
3826 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3818 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3827 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3819 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3889 3881
3890 INIT_LIST_HEAD(&list); 3882 INIT_LIST_HEAD(&list);
3891 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3883 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3884 if (ac)
3885 ac->ac_sb = sb;
3892repeat: 3886repeat:
3893 ext4_lock_group(sb, group); 3887 ext4_lock_group(sb, group);
3894 list_for_each_entry_safe(pa, tmp, 3888 list_for_each_entry_safe(pa, tmp,
@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode)
3987 } 3981 }
3988 3982
3989 mb_debug("discard preallocation for inode %lu\n", inode->i_ino); 3983 mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
3990 trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, 3984 trace_ext4_discard_preallocations(inode);
3991 inode->i_ino);
3992 3985
3993 INIT_LIST_HEAD(&list); 3986 INIT_LIST_HEAD(&list);
3994 3987
3995 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3988 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3989 if (ac) {
3990 ac->ac_sb = sb;
3991 ac->ac_inode = inode;
3992 }
3996repeat: 3993repeat:
3997 /* first, collect all pa's in the inode */ 3994 /* first, collect all pa's in the inode */
3998 spin_lock(&ei->i_prealloc_lock); 3995 spin_lock(&ei->i_prealloc_lock);
@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4276 4273
4277 INIT_LIST_HEAD(&discard_list); 4274 INIT_LIST_HEAD(&discard_list);
4278 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4275 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4276 if (ac)
4277 ac->ac_sb = sb;
4279 4278
4280 spin_lock(&lg->lg_prealloc_lock); 4279 spin_lock(&lg->lg_prealloc_lock);
4281 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], 4280 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4445 int ret; 4444 int ret;
4446 int freed = 0; 4445 int freed = 0;
4447 4446
4448 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", 4447 trace_ext4_mb_discard_preallocations(sb, needed);
4449 sb->s_id, needed);
4450 for (i = 0; i < ngroups && needed > 0; i++) { 4448 for (i = 0; i < ngroups && needed > 0; i++) {
4451 ret = ext4_mb_discard_group_preallocations(sb, i, needed); 4449 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4452 freed += ret; 4450 freed += ret;
@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4475 sb = ar->inode->i_sb; 4473 sb = ar->inode->i_sb;
4476 sbi = EXT4_SB(sb); 4474 sbi = EXT4_SB(sb);
4477 4475
4478 trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " 4476 trace_ext4_request_blocks(ar);
4479 "lblk %llu goal %llu lleft %llu lright %llu "
4480 "pleft %llu pright %llu ",
4481 sb->s_id, ar->flags, ar->len,
4482 ar->inode ? ar->inode->i_ino : 0,
4483 (unsigned long long) ar->logical,
4484 (unsigned long long) ar->goal,
4485 (unsigned long long) ar->lleft,
4486 (unsigned long long) ar->lright,
4487 (unsigned long long) ar->pleft,
4488 (unsigned long long) ar->pright);
4489 4477
4490 /* 4478 /*
4491 * For delayed allocation, we could skip the ENOSPC and 4479 * For delayed allocation, we could skip the ENOSPC and
@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4521 } 4509 }
4522 4510
4523 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4511 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4524 if (!ac) { 4512 if (ac) {
4513 ac->ac_sb = sb;
4514 ac->ac_inode = ar->inode;
4515 } else {
4525 ar->len = 0; 4516 ar->len = 0;
4526 *errp = -ENOMEM; 4517 *errp = -ENOMEM;
4527 goto out1; 4518 goto out1;
@@ -4594,18 +4585,7 @@ out3:
4594 reserv_blks); 4585 reserv_blks);
4595 } 4586 }
4596 4587
4597 trace_mark(ext4_allocate_blocks, 4588 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4598 "dev %s block %llu flags %u len %u ino %lu "
4599 "logical %llu goal %llu lleft %llu lright %llu "
4600 "pleft %llu pright %llu ",
4601 sb->s_id, (unsigned long long) block,
4602 ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
4603 (unsigned long long) ar->logical,
4604 (unsigned long long) ar->goal,
4605 (unsigned long long) ar->lleft,
4606 (unsigned long long) ar->lright,
4607 (unsigned long long) ar->pleft,
4608 (unsigned long long) ar->pright);
4609 4589
4610 return block; 4590 return block;
4611} 4591}
@@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4709 * Main entry point into mballoc to free blocks 4689 * Main entry point into mballoc to free blocks
4710 */ 4690 */
4711void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4691void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4712 unsigned long block, unsigned long count, 4692 ext4_fsblk_t block, unsigned long count,
4713 int metadata, unsigned long *freed) 4693 int metadata, unsigned long *freed)
4714{ 4694{
4715 struct buffer_head *bitmap_bh = NULL; 4695 struct buffer_head *bitmap_bh = NULL;
@@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4735 block + count > ext4_blocks_count(es)) { 4715 block + count > ext4_blocks_count(es)) {
4736 ext4_error(sb, __func__, 4716 ext4_error(sb, __func__,
4737 "Freeing blocks not in datazone - " 4717 "Freeing blocks not in datazone - "
4738 "block = %lu, count = %lu", block, count); 4718 "block = %llu, count = %lu", block, count);
4739 goto error_return; 4719 goto error_return;
4740 } 4720 }
4741 4721
4742 ext4_debug("freeing block %lu\n", block); 4722 ext4_debug("freeing block %llu\n", block);
4743 trace_mark(ext4_free_blocks, 4723 trace_ext4_free_blocks(inode, block, count, metadata);
4744 "dev %s block %llu count %lu metadata %d ino %lu",
4745 sb->s_id, (unsigned long long) block, count, metadata,
4746 inode ? inode->i_ino : 0);
4747 4724
4748 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4725 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4749 if (ac) { 4726 if (ac) {
@@ -4784,7 +4761,7 @@ do_more:
4784 4761
4785 ext4_error(sb, __func__, 4762 ext4_error(sb, __func__,
4786 "Freeing blocks in system zone - " 4763 "Freeing blocks in system zone - "
4787 "Block = %lu, count = %lu", block, count); 4764 "Block = %llu, count = %lu", block, count);
4788 /* err = 0. ext4_std_error should be a no op */ 4765 /* err = 0. ext4_std_error should be a no op */
4789 goto error_return; 4766 goto error_return;
4790 } 4767 }
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 75e34f69215b..c96bb19f58f9 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -19,7 +19,6 @@
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h> 20#include <linux/version.h>
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/marker.h>
23#include <linux/mutex.h> 22#include <linux/mutex.h>
24#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
25#include "ext4.h" 24#include "ext4.h"
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index fe64d9f79852..313a50b39741 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
458 struct inode *tmp_inode = NULL; 458 struct inode *tmp_inode = NULL;
459 struct list_blocks_struct lb; 459 struct list_blocks_struct lb;
460 unsigned long max_entries; 460 unsigned long max_entries;
461 __u32 goal;
461 462
462 /* 463 /*
463 * If the filesystem does not support extents, or the inode 464 * If the filesystem does not support extents, or the inode
@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
483 retval = PTR_ERR(handle); 484 retval = PTR_ERR(handle);
484 return retval; 485 return retval;
485 } 486 }
486 tmp_inode = ext4_new_inode(handle, 487 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
487 inode->i_sb->s_root->d_inode, 488 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
488 S_IFREG); 489 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
490 S_IFREG, 0, goal);
489 if (IS_ERR(tmp_inode)) { 491 if (IS_ERR(tmp_inode)) {
490 retval = -ENOMEM; 492 retval = -ENOMEM;
491 ext4_journal_stop(handle); 493 ext4_journal_stop(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
new file mode 100644
index 000000000000..bbf2dd9404dc
--- /dev/null
+++ b/fs/ext4/move_extent.c
@@ -0,0 +1,1320 @@
1/*
2 * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
3 * Written by Takashi Sato <t-sato@yk.jp.nec.com>
4 * Akira Fujita <a-fujita@rs.jp.nec.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2.1 of the GNU Lesser General Public License
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/fs.h>
17#include <linux/quotaops.h>
18#include "ext4_jbd2.h"
19#include "ext4_extents.h"
20#include "ext4.h"
21
22#define get_ext_path(path, inode, block, ret) \
23 do { \
24 path = ext4_ext_find_extent(inode, block, path); \
25 if (IS_ERR(path)) { \
26 ret = PTR_ERR(path); \
27 path = NULL; \
28 } \
29 } while (0)
30
31/**
32 * copy_extent_status - Copy the extent's initialization status
33 *
34 * @src: an extent for getting initialize status
35 * @dest: an extent to be set the status
36 */
37static void
38copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
39{
40 if (ext4_ext_is_uninitialized(src))
41 ext4_ext_mark_uninitialized(dest);
42 else
43 dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
44}
45
46/**
47 * mext_next_extent - Search for the next extent and set it to "extent"
48 *
49 * @inode: inode which is searched
50 * @path: this will obtain data for the next extent
51 * @extent: pointer to the next extent we have just gotten
52 *
53 * Search the next extent in the array of ext4_ext_path structure (@path)
54 * and set it to ext4_extent structure (@extent). In addition, the member of
55 * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
56 * ext4_ext_path structure refers to the last extent, or a negative error
57 * value on failure.
58 */
59static int
60mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
61 struct ext4_extent **extent)
62{
63 int ppos, leaf_ppos = path->p_depth;
64
65 ppos = leaf_ppos;
66 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
67 /* leaf block */
68 *extent = ++path[ppos].p_ext;
69 return 0;
70 }
71
72 while (--ppos >= 0) {
73 if (EXT_LAST_INDEX(path[ppos].p_hdr) >
74 path[ppos].p_idx) {
75 int cur_ppos = ppos;
76
77 /* index block */
78 path[ppos].p_idx++;
79 path[ppos].p_block = idx_pblock(path[ppos].p_idx);
80 if (path[ppos+1].p_bh)
81 brelse(path[ppos+1].p_bh);
82 path[ppos+1].p_bh =
83 sb_bread(inode->i_sb, path[ppos].p_block);
84 if (!path[ppos+1].p_bh)
85 return -EIO;
86 path[ppos+1].p_hdr =
87 ext_block_hdr(path[ppos+1].p_bh);
88
89 /* Halfway index block */
90 while (++cur_ppos < leaf_ppos) {
91 path[cur_ppos].p_idx =
92 EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
93 path[cur_ppos].p_block =
94 idx_pblock(path[cur_ppos].p_idx);
95 if (path[cur_ppos+1].p_bh)
96 brelse(path[cur_ppos+1].p_bh);
97 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
98 path[cur_ppos].p_block);
99 if (!path[cur_ppos+1].p_bh)
100 return -EIO;
101 path[cur_ppos+1].p_hdr =
102 ext_block_hdr(path[cur_ppos+1].p_bh);
103 }
104
105 /* leaf block */
106 path[leaf_ppos].p_ext = *extent =
107 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
108 return 0;
109 }
110 }
111 /* We found the last extent */
112 return 1;
113}
114
115/**
116 * mext_double_down_read - Acquire two inodes' read semaphore
117 *
118 * @orig_inode: original inode structure
119 * @donor_inode: donor inode structure
120 * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
121 */
122static void
123mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
124{
125 struct inode *first = orig_inode, *second = donor_inode;
126
127 BUG_ON(orig_inode == NULL || donor_inode == NULL);
128
129 /*
130 * Use the inode number to provide the stable locking order instead
131 * of its address, because the C language doesn't guarantee you can
132 * compare pointers that don't come from the same array.
133 */
134 if (donor_inode->i_ino < orig_inode->i_ino) {
135 first = donor_inode;
136 second = orig_inode;
137 }
138
139 down_read(&EXT4_I(first)->i_data_sem);
140 down_read(&EXT4_I(second)->i_data_sem);
141}
142
143/**
144 * mext_double_down_write - Acquire two inodes' write semaphore
145 *
146 * @orig_inode: original inode structure
147 * @donor_inode: donor inode structure
148 * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
149 */
150static void
151mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
152{
153 struct inode *first = orig_inode, *second = donor_inode;
154
155 BUG_ON(orig_inode == NULL || donor_inode == NULL);
156
157 /*
158 * Use the inode number to provide the stable locking order instead
159 * of its address, because the C language doesn't guarantee you can
160 * compare pointers that don't come from the same array.
161 */
162 if (donor_inode->i_ino < orig_inode->i_ino) {
163 first = donor_inode;
164 second = orig_inode;
165 }
166
167 down_write(&EXT4_I(first)->i_data_sem);
168 down_write(&EXT4_I(second)->i_data_sem);
169}
170
171/**
172 * mext_double_up_read - Release two inodes' read semaphore
173 *
174 * @orig_inode: original inode structure to be released its lock first
175 * @donor_inode: donor inode structure to be released its lock second
176 * Release read semaphore of two inodes (orig and donor).
177 */
178static void
179mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
180{
181 BUG_ON(orig_inode == NULL || donor_inode == NULL);
182
183 up_read(&EXT4_I(orig_inode)->i_data_sem);
184 up_read(&EXT4_I(donor_inode)->i_data_sem);
185}
186
187/**
188 * mext_double_up_write - Release two inodes' write semaphore
189 *
190 * @orig_inode: original inode structure to be released its lock first
191 * @donor_inode: donor inode structure to be released its lock second
192 * Release write semaphore of two inodes (orig and donor).
193 */
194static void
195mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
196{
197 BUG_ON(orig_inode == NULL || donor_inode == NULL);
198
199 up_write(&EXT4_I(orig_inode)->i_data_sem);
200 up_write(&EXT4_I(donor_inode)->i_data_sem);
201}
202
203/**
204 * mext_insert_across_blocks - Insert extents across leaf block
205 *
206 * @handle: journal handle
207 * @orig_inode: original inode
208 * @o_start: first original extent to be changed
209 * @o_end: last original extent to be changed
210 * @start_ext: first new extent to be inserted
211 * @new_ext: middle of new extent to be inserted
212 * @end_ext: last new extent to be inserted
213 *
214 * Allocate a new leaf block and insert extents into it. Return 0 on success,
215 * or a negative error value on failure.
216 */
217static int
218mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
219 struct ext4_extent *o_start, struct ext4_extent *o_end,
220 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
221 struct ext4_extent *end_ext)
222{
223 struct ext4_ext_path *orig_path = NULL;
224 ext4_lblk_t eblock = 0;
225 int new_flag = 0;
226 int end_flag = 0;
227 int err = 0;
228
229 if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
230 if (o_start == o_end) {
231
232 /* start_ext new_ext end_ext
233 * donor |---------|-----------|--------|
234 * orig |------------------------------|
235 */
236 end_flag = 1;
237 } else {
238
239 /* start_ext new_ext end_ext
240 * donor |---------|----------|---------|
241 * orig |---------------|--------------|
242 */
243 o_end->ee_block = end_ext->ee_block;
244 o_end->ee_len = end_ext->ee_len;
245 ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
246 }
247
248 o_start->ee_len = start_ext->ee_len;
249 new_flag = 1;
250
251 } else if (start_ext->ee_len && new_ext->ee_len &&
252 !end_ext->ee_len && o_start == o_end) {
253
254 /* start_ext new_ext
255 * donor |--------------|---------------|
256 * orig |------------------------------|
257 */
258 o_start->ee_len = start_ext->ee_len;
259 new_flag = 1;
260
261 } else if (!start_ext->ee_len && new_ext->ee_len &&
262 end_ext->ee_len && o_start == o_end) {
263
264 /* new_ext end_ext
265 * donor |--------------|---------------|
266 * orig |------------------------------|
267 */
268 o_end->ee_block = end_ext->ee_block;
269 o_end->ee_len = end_ext->ee_len;
270 ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
271
272 /*
273 * Set 0 to the extent block if new_ext was
274 * the first block.
275 */
276 if (new_ext->ee_block)
277 eblock = le32_to_cpu(new_ext->ee_block);
278
279 new_flag = 1;
280 } else {
281 ext4_debug("ext4 move extent: Unexpected insert case\n");
282 return -EIO;
283 }
284
285 if (new_flag) {
286 get_ext_path(orig_path, orig_inode, eblock, err);
287 if (orig_path == NULL)
288 goto out;
289
290 if (ext4_ext_insert_extent(handle, orig_inode,
291 orig_path, new_ext))
292 goto out;
293 }
294
295 if (end_flag) {
296 get_ext_path(orig_path, orig_inode,
297 le32_to_cpu(end_ext->ee_block) - 1, err);
298 if (orig_path == NULL)
299 goto out;
300
301 if (ext4_ext_insert_extent(handle, orig_inode,
302 orig_path, end_ext))
303 goto out;
304 }
305out:
306 if (orig_path) {
307 ext4_ext_drop_refs(orig_path);
308 kfree(orig_path);
309 }
310
311 return err;
312
313}
314
315/**
316 * mext_insert_inside_block - Insert new extent to the extent block
317 *
318 * @o_start: first original extent to be moved
319 * @o_end: last original extent to be moved
320 * @start_ext: first new extent to be inserted
321 * @new_ext: middle of new extent to be inserted
322 * @end_ext: last new extent to be inserted
323 * @eh: extent header of target leaf block
324 * @range_to_move: used to decide how to insert extent
325 *
326 * Insert extents into the leaf block. The extent (@o_start) is overwritten
327 * by inserted extents.
328 */
329static void
330mext_insert_inside_block(struct ext4_extent *o_start,
331 struct ext4_extent *o_end,
332 struct ext4_extent *start_ext,
333 struct ext4_extent *new_ext,
334 struct ext4_extent *end_ext,
335 struct ext4_extent_header *eh,
336 int range_to_move)
337{
338 int i = 0;
339 unsigned long len;
340
341 /* Move the existing extents */
342 if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
343 len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
344 (unsigned long)(o_end + 1);
345 memmove(o_end + 1 + range_to_move, o_end + 1, len);
346 }
347
348 /* Insert start entry */
349 if (start_ext->ee_len)
350 o_start[i++].ee_len = start_ext->ee_len;
351
352 /* Insert new entry */
353 if (new_ext->ee_len) {
354 o_start[i] = *new_ext;
355 ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
356 }
357
358 /* Insert end entry */
359 if (end_ext->ee_len)
360 o_start[i] = *end_ext;
361
362 /* Increment the total entries counter on the extent block */
363 le16_add_cpu(&eh->eh_entries, range_to_move);
364}
365
366/**
367 * mext_insert_extents - Insert new extent
368 *
369 * @handle: journal handle
370 * @orig_inode: original inode
371 * @orig_path: path indicates first extent to be changed
372 * @o_start: first original extent to be changed
373 * @o_end: last original extent to be changed
374 * @start_ext: first new extent to be inserted
375 * @new_ext: middle of new extent to be inserted
376 * @end_ext: last new extent to be inserted
377 *
378 * Call the function to insert extents. If we cannot add more extents into
379 * the leaf block, we call mext_insert_across_blocks() to create a
380 * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
381 * on success, or a negative error value on failure.
382 */
383static int
384mext_insert_extents(handle_t *handle, struct inode *orig_inode,
385 struct ext4_ext_path *orig_path,
386 struct ext4_extent *o_start,
387 struct ext4_extent *o_end,
388 struct ext4_extent *start_ext,
389 struct ext4_extent *new_ext,
390 struct ext4_extent *end_ext)
391{
392 struct ext4_extent_header *eh;
393 unsigned long need_slots, slots_range;
394 int range_to_move, depth, ret;
395
396 /*
397 * The extents need to be inserted
398 * start_extent + new_extent + end_extent.
399 */
400 need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
401 (new_ext->ee_len ? 1 : 0);
402
403 /* The number of slots between start and end */
404 slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
405 / sizeof(struct ext4_extent);
406
407 /* Range to move the end of extent */
408 range_to_move = need_slots - slots_range;
409 depth = orig_path->p_depth;
410 orig_path += depth;
411 eh = orig_path->p_hdr;
412
413 if (depth) {
414 /* Register to journal */
415 ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
416 if (ret)
417 return ret;
418 }
419
420 /* Expansion */
421 if (range_to_move > 0 &&
422 (range_to_move > le16_to_cpu(eh->eh_max)
423 - le16_to_cpu(eh->eh_entries))) {
424
425 ret = mext_insert_across_blocks(handle, orig_inode, o_start,
426 o_end, start_ext, new_ext, end_ext);
427 if (ret < 0)
428 return ret;
429 } else
430 mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
431 end_ext, eh, range_to_move);
432
433 if (depth) {
434 ret = ext4_handle_dirty_metadata(handle, orig_inode,
435 orig_path->p_bh);
436 if (ret)
437 return ret;
438 } else {
439 ret = ext4_mark_inode_dirty(handle, orig_inode);
440 if (ret < 0)
441 return ret;
442 }
443
444 return 0;
445}
446
447/**
448 * mext_leaf_block - Move one leaf extent block into the inode.
449 *
450 * @handle: journal handle
451 * @orig_inode: original inode
452 * @orig_path: path indicates first extent to be changed
453 * @dext: donor extent
454 * @from: start offset on the target file
455 *
456 * In order to insert extents into the leaf block, we must divide the extent
457 * in the leaf block into three extents. The one is located to be inserted
458 * extents, and the others are located around it.
459 *
460 * Therefore, this function creates structures to save extents of the leaf
461 * block, and inserts extents by calling mext_insert_extents() with
462 * created extents. Return 0 on success, or a negative error value on failure.
463 */
464static int
465mext_leaf_block(handle_t *handle, struct inode *orig_inode,
466 struct ext4_ext_path *orig_path, struct ext4_extent *dext,
467 ext4_lblk_t *from)
468{
469 struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
470 struct ext4_extent new_ext, start_ext, end_ext;
471 ext4_lblk_t new_ext_end;
472 ext4_fsblk_t new_phys_end;
473 int oext_alen, new_ext_alen, end_ext_alen;
474 int depth = ext_depth(orig_inode);
475 int ret;
476
477 o_start = o_end = oext = orig_path[depth].p_ext;
478 oext_alen = ext4_ext_get_actual_len(oext);
479 start_ext.ee_len = end_ext.ee_len = 0;
480
481 new_ext.ee_block = cpu_to_le32(*from);
482 ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
483 new_ext.ee_len = dext->ee_len;
484 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
485 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
486 new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
487
488 /*
489 * Case: original extent is first
490 * oext |--------|
491 * new_ext |--|
492 * start_ext |--|
493 */
494 if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
495 le32_to_cpu(new_ext.ee_block) <
496 le32_to_cpu(oext->ee_block) + oext_alen) {
497 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
498 le32_to_cpu(oext->ee_block));
499 copy_extent_status(oext, &start_ext);
500 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
501 prev_ext = oext - 1;
502 /*
503 * We can merge new_ext into previous extent,
504 * if these are contiguous and same extent type.
505 */
506 if (ext4_can_extents_be_merged(orig_inode, prev_ext,
507 &new_ext)) {
508 o_start = prev_ext;
509 start_ext.ee_len = cpu_to_le16(
510 ext4_ext_get_actual_len(prev_ext) +
511 new_ext_alen);
512 copy_extent_status(prev_ext, &start_ext);
513 new_ext.ee_len = 0;
514 }
515 }
516
517 /*
518 * Case: new_ext_end must be less than oext
519 * oext |-----------|
520 * new_ext |-------|
521 */
522 BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end);
523
524 /*
525 * Case: new_ext is smaller than original extent
526 * oext |---------------|
527 * new_ext |-----------|
528 * end_ext |---|
529 */
530 if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
531 new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
532 end_ext.ee_len =
533 cpu_to_le16(le32_to_cpu(oext->ee_block) +
534 oext_alen - 1 - new_ext_end);
535 copy_extent_status(oext, &end_ext);
536 end_ext_alen = ext4_ext_get_actual_len(&end_ext);
537 ext4_ext_store_pblock(&end_ext,
538 (ext_pblock(o_end) + oext_alen - end_ext_alen));
539 end_ext.ee_block =
540 cpu_to_le32(le32_to_cpu(o_end->ee_block) +
541 oext_alen - end_ext_alen);
542 }
543
544 ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
545 o_end, &start_ext, &new_ext, &end_ext);
546 return ret;
547}
548
549/**
550 * mext_calc_swap_extents - Calculate extents for extent swapping.
551 *
552 * @tmp_dext: the extent that will belong to the original inode
553 * @tmp_oext: the extent that will belong to the donor inode
554 * @orig_off: block offset of original inode
555 * @donor_off: block offset of donor inode
556 * @max_count: the maximun length of extents
557 */
558static void
559mext_calc_swap_extents(struct ext4_extent *tmp_dext,
560 struct ext4_extent *tmp_oext,
561 ext4_lblk_t orig_off, ext4_lblk_t donor_off,
562 ext4_lblk_t max_count)
563{
564 ext4_lblk_t diff, orig_diff;
565 struct ext4_extent dext_old, oext_old;
566
567 dext_old = *tmp_dext;
568 oext_old = *tmp_oext;
569
570 /* When tmp_dext is too large, pick up the target range. */
571 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
572
573 ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
574 tmp_dext->ee_block =
575 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
576 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
577
578 if (max_count < ext4_ext_get_actual_len(tmp_dext))
579 tmp_dext->ee_len = cpu_to_le16(max_count);
580
581 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
582 ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
583
584 /* Adjust extent length if donor extent is larger than orig */
585 if (ext4_ext_get_actual_len(tmp_dext) >
586 ext4_ext_get_actual_len(tmp_oext) - orig_diff)
587 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
588 orig_diff);
589
590 tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
591
592 copy_extent_status(&oext_old, tmp_dext);
593 copy_extent_status(&dext_old, tmp_oext);
594}
595
596/**
597 * mext_replace_branches - Replace original extents with new extents
598 *
599 * @handle: journal handle
600 * @orig_inode: original inode
601 * @donor_inode: donor inode
602 * @from: block offset of orig_inode
603 * @count: block count to be replaced
604 *
605 * Replace original inode extents and donor inode extents page by page.
606 * We implement this replacement in the following three steps:
607 * 1. Save the block information of original and donor inodes into
608 * dummy extents.
609 * 2. Change the block information of original inode to point at the
610 * donor inode blocks.
611 * 3. Change the block information of donor inode to point at the saved
612 * original inode blocks in the dummy extents.
613 *
614 * Return 0 on success, or a negative error value on failure.
615 */
616static int
617mext_replace_branches(handle_t *handle, struct inode *orig_inode,
618 struct inode *donor_inode, ext4_lblk_t from,
619 ext4_lblk_t count)
620{
621 struct ext4_ext_path *orig_path = NULL;
622 struct ext4_ext_path *donor_path = NULL;
623 struct ext4_extent *oext, *dext;
624 struct ext4_extent tmp_dext, tmp_oext;
625 ext4_lblk_t orig_off = from, donor_off = from;
626 int err = 0;
627 int depth;
628 int replaced_count = 0;
629 int dext_alen;
630
631 mext_double_down_write(orig_inode, donor_inode);
632
633 /* Get the original extent for the block "orig_off" */
634 get_ext_path(orig_path, orig_inode, orig_off, err);
635 if (orig_path == NULL)
636 goto out;
637
638 /* Get the donor extent for the head */
639 get_ext_path(donor_path, donor_inode, donor_off, err);
640 if (donor_path == NULL)
641 goto out;
642 depth = ext_depth(orig_inode);
643 oext = orig_path[depth].p_ext;
644 tmp_oext = *oext;
645
646 depth = ext_depth(donor_inode);
647 dext = donor_path[depth].p_ext;
648 tmp_dext = *dext;
649
650 mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
651 donor_off, count);
652
653 /* Loop for the donor extents */
654 while (1) {
655 /* The extent for donor must be found. */
656 BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block));
657
658 /* Set donor extent to orig extent */
659 err = mext_leaf_block(handle, orig_inode,
660 orig_path, &tmp_dext, &orig_off);
661 if (err < 0)
662 goto out;
663
664 /* Set orig extent to donor extent */
665 err = mext_leaf_block(handle, donor_inode,
666 donor_path, &tmp_oext, &donor_off);
667 if (err < 0)
668 goto out;
669
670 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
671 replaced_count += dext_alen;
672 donor_off += dext_alen;
673 orig_off += dext_alen;
674
675 /* Already moved the expected blocks */
676 if (replaced_count >= count)
677 break;
678
679 if (orig_path)
680 ext4_ext_drop_refs(orig_path);
681 get_ext_path(orig_path, orig_inode, orig_off, err);
682 if (orig_path == NULL)
683 goto out;
684 depth = ext_depth(orig_inode);
685 oext = orig_path[depth].p_ext;
686 if (le32_to_cpu(oext->ee_block) +
687 ext4_ext_get_actual_len(oext) <= orig_off) {
688 err = 0;
689 goto out;
690 }
691 tmp_oext = *oext;
692
693 if (donor_path)
694 ext4_ext_drop_refs(donor_path);
695 get_ext_path(donor_path, donor_inode,
696 donor_off, err);
697 if (donor_path == NULL)
698 goto out;
699 depth = ext_depth(donor_inode);
700 dext = donor_path[depth].p_ext;
701 if (le32_to_cpu(dext->ee_block) +
702 ext4_ext_get_actual_len(dext) <= donor_off) {
703 err = 0;
704 goto out;
705 }
706 tmp_dext = *dext;
707
708 mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
709 donor_off,
710 count - replaced_count);
711 }
712
713out:
714 if (orig_path) {
715 ext4_ext_drop_refs(orig_path);
716 kfree(orig_path);
717 }
718 if (donor_path) {
719 ext4_ext_drop_refs(donor_path);
720 kfree(donor_path);
721 }
722
723 mext_double_up_write(orig_inode, donor_inode);
724 return err;
725}
726
727/**
728 * move_extent_per_page - Move extent data per page
729 *
730 * @o_filp: file structure of original file
731 * @donor_inode: donor inode
732 * @orig_page_offset: page index on original file
733 * @data_offset_in_page: block index where data swapping starts
734 * @block_len_in_page: the number of blocks to be swapped
735 * @uninit: orig extent is uninitialized or not
736 *
737 * Save the data in original inode blocks and replace original inode extents
738 * with donor inode extents by calling mext_replace_branches().
739 * Finally, write out the saved data in new original inode blocks. Return 0
740 * on success, or a negative error value on failure.
741 */
742static int
743move_extent_par_page(struct file *o_filp, struct inode *donor_inode,
744 pgoff_t orig_page_offset, int data_offset_in_page,
745 int block_len_in_page, int uninit)
746{
747 struct inode *orig_inode = o_filp->f_dentry->d_inode;
748 struct address_space *mapping = orig_inode->i_mapping;
749 struct buffer_head *bh;
750 struct page *page = NULL;
751 const struct address_space_operations *a_ops = mapping->a_ops;
752 handle_t *handle;
753 ext4_lblk_t orig_blk_offset;
754 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
755 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
756 unsigned int w_flags = 0;
757 unsigned int tmp_data_len, data_len;
758 void *fsdata;
759 int ret, i, jblocks;
760 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
761
762 /*
763 * It needs twice the amount of ordinary journal buffers because
764 * inode and donor_inode may change each different metadata blocks.
765 */
766 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
767 handle = ext4_journal_start(orig_inode, jblocks);
768 if (IS_ERR(handle)) {
769 ret = PTR_ERR(handle);
770 return ret;
771 }
772
773 if (segment_eq(get_fs(), KERNEL_DS))
774 w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
775
776 orig_blk_offset = orig_page_offset * blocks_per_page +
777 data_offset_in_page;
778
779 /*
780 * If orig extent is uninitialized one,
781 * it's not necessary force the page into memory
782 * and then force it to be written out again.
783 * Just swap data blocks between orig and donor.
784 */
785 if (uninit) {
786 ret = mext_replace_branches(handle, orig_inode,
787 donor_inode, orig_blk_offset,
788 block_len_in_page);
789
790 /* Clear the inode cache not to refer to the old data */
791 ext4_ext_invalidate_cache(orig_inode);
792 ext4_ext_invalidate_cache(donor_inode);
793 goto out2;
794 }
795
796 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
797
798 /* Calculate data_len */
799 if ((orig_blk_offset + block_len_in_page - 1) ==
800 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
801 /* Replace the last block */
802 tmp_data_len = orig_inode->i_size & (blocksize - 1);
803 /*
804 * If data_len equal zero, it shows data_len is multiples of
805 * blocksize. So we set appropriate value.
806 */
807 if (tmp_data_len == 0)
808 tmp_data_len = blocksize;
809
810 data_len = tmp_data_len +
811 ((block_len_in_page - 1) << orig_inode->i_blkbits);
812 } else {
813 data_len = block_len_in_page << orig_inode->i_blkbits;
814 }
815
816 ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
817 &page, &fsdata);
818 if (unlikely(ret < 0))
819 goto out;
820
821 if (!PageUptodate(page)) {
822 mapping->a_ops->readpage(o_filp, page);
823 lock_page(page);
824 }
825
826 /*
827 * try_to_release_page() doesn't call releasepage in writeback mode.
828 * We should care about the order of writing to the same file
829 * by multiple move extent processes.
830 * It needs to call wait_on_page_writeback() to wait for the
831 * writeback of the page.
832 */
833 if (PageWriteback(page))
834 wait_on_page_writeback(page);
835
836 /* Release old bh and drop refs */
837 try_to_release_page(page, 0);
838
839 ret = mext_replace_branches(handle, orig_inode, donor_inode,
840 orig_blk_offset, block_len_in_page);
841 if (ret < 0)
842 goto out;
843
844 /* Clear the inode cache not to refer to the old data */
845 ext4_ext_invalidate_cache(orig_inode);
846 ext4_ext_invalidate_cache(donor_inode);
847
848 if (!page_has_buffers(page))
849 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
850
851 bh = page_buffers(page);
852 for (i = 0; i < data_offset_in_page; i++)
853 bh = bh->b_this_page;
854
855 for (i = 0; i < block_len_in_page; i++) {
856 ret = ext4_get_block(orig_inode,
857 (sector_t)(orig_blk_offset + i), bh, 0);
858 if (ret < 0)
859 goto out;
860
861 if (bh->b_this_page != NULL)
862 bh = bh->b_this_page;
863 }
864
865 ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
866 page, fsdata);
867 page = NULL;
868
869out:
870 if (unlikely(page)) {
871 if (PageLocked(page))
872 unlock_page(page);
873 page_cache_release(page);
874 }
875out2:
876 ext4_journal_stop(handle);
877
878 return ret < 0 ? ret : 0;
879}
880
881/**
882 * mext_check_argumants - Check whether move extent can be done
883 *
884 * @orig_inode: original inode
885 * @donor_inode: donor inode
886 * @orig_start: logical start offset in block for orig
887 * @donor_start: logical start offset in block for donor
888 * @len: the number of blocks to be moved
889 * @moved_len: moved block length
890 *
891 * Check the arguments of ext4_move_extents() whether the files can be
892 * exchanged with each other.
893 * Return 0 on success, or a negative error value on failure.
894 */
895static int
896mext_check_arguments(struct inode *orig_inode,
897 struct inode *donor_inode, __u64 orig_start,
898 __u64 donor_start, __u64 *len, __u64 moved_len)
899{
900 /* Regular file check */
901 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
902 ext4_debug("ext4 move extent: The argument files should be "
903 "regular file [ino:orig %lu, donor %lu]\n",
904 orig_inode->i_ino, donor_inode->i_ino);
905 return -EINVAL;
906 }
907
908 /* Ext4 move extent does not support swapfile */
909 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
910 ext4_debug("ext4 move extent: The argument files should "
911 "not be swapfile [ino:orig %lu, donor %lu]\n",
912 orig_inode->i_ino, donor_inode->i_ino);
913 return -EINVAL;
914 }
915
916 /* Files should be in the same ext4 FS */
917 if (orig_inode->i_sb != donor_inode->i_sb) {
918 ext4_debug("ext4 move extent: The argument files "
919 "should be in same FS [ino:orig %lu, donor %lu]\n",
920 orig_inode->i_ino, donor_inode->i_ino);
921 return -EINVAL;
922 }
923
924 /* orig and donor should be different file */
925 if (orig_inode->i_ino == donor_inode->i_ino) {
926 ext4_debug("ext4 move extent: The argument files should not "
927 "be same file [ino:orig %lu, donor %lu]\n",
928 orig_inode->i_ino, donor_inode->i_ino);
929 return -EINVAL;
930 }
931
932 /* Ext4 move extent supports only extent based file */
933 if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
934 ext4_debug("ext4 move extent: orig file is not extents "
935 "based file [ino:orig %lu]\n", orig_inode->i_ino);
936 return -EOPNOTSUPP;
937 } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
938 ext4_debug("ext4 move extent: donor file is not extents "
939 "based file [ino:donor %lu]\n", donor_inode->i_ino);
940 return -EOPNOTSUPP;
941 }
942
943 if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
944 ext4_debug("ext4 move extent: File size is 0 byte\n");
945 return -EINVAL;
946 }
947
948 /* Start offset should be same */
949 if (orig_start != donor_start) {
950 ext4_debug("ext4 move extent: orig and donor's start "
951 "offset are not same [ino:orig %lu, donor %lu]\n",
952 orig_inode->i_ino, donor_inode->i_ino);
953 return -EINVAL;
954 }
955
956 if (moved_len) {
957 ext4_debug("ext4 move extent: moved_len should be 0 "
958 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
959 donor_inode->i_ino);
960 return -EINVAL;
961 }
962
963 if ((orig_start > MAX_DEFRAG_SIZE) ||
964 (donor_start > MAX_DEFRAG_SIZE) ||
965 (*len > MAX_DEFRAG_SIZE) ||
966 (orig_start + *len > MAX_DEFRAG_SIZE)) {
967 ext4_debug("ext4 move extent: Can't handle over [%lu] blocks "
968 "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE,
969 orig_inode->i_ino, donor_inode->i_ino);
970 return -EINVAL;
971 }
972
973 if (orig_inode->i_size > donor_inode->i_size) {
974 if (orig_start >= donor_inode->i_size) {
975 ext4_debug("ext4 move extent: orig start offset "
976 "[%llu] should be less than donor file size "
977 "[%lld] [ino:orig %lu, donor_inode %lu]\n",
978 orig_start, donor_inode->i_size,
979 orig_inode->i_ino, donor_inode->i_ino);
980 return -EINVAL;
981 }
982
983 if (orig_start + *len > donor_inode->i_size) {
984 ext4_debug("ext4 move extent: End offset [%llu] should "
985 "be less than donor file size [%lld]."
986 "So adjust length from %llu to %lld "
987 "[ino:orig %lu, donor %lu]\n",
988 orig_start + *len, donor_inode->i_size,
989 *len, donor_inode->i_size - orig_start,
990 orig_inode->i_ino, donor_inode->i_ino);
991 *len = donor_inode->i_size - orig_start;
992 }
993 } else {
994 if (orig_start >= orig_inode->i_size) {
995 ext4_debug("ext4 move extent: start offset [%llu] "
996 "should be less than original file size "
997 "[%lld] [inode:orig %lu, donor %lu]\n",
998 orig_start, orig_inode->i_size,
999 orig_inode->i_ino, donor_inode->i_ino);
1000 return -EINVAL;
1001 }
1002
1003 if (orig_start + *len > orig_inode->i_size) {
1004 ext4_debug("ext4 move extent: Adjust length "
1005 "from %llu to %lld. Because it should be "
1006 "less than original file size "
1007 "[ino:orig %lu, donor %lu]\n",
1008 *len, orig_inode->i_size - orig_start,
1009 orig_inode->i_ino, donor_inode->i_ino);
1010 *len = orig_inode->i_size - orig_start;
1011 }
1012 }
1013
1014 if (!*len) {
1015 ext4_debug("ext4 move extent: len shoudld not be 0 "
1016 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1017 donor_inode->i_ino);
1018 return -EINVAL;
1019 }
1020
1021 return 0;
1022}
1023
1024/**
1025 * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
1026 *
1027 * @inode1: the inode structure
1028 * @inode2: the inode structure
1029 *
1030 * Lock two inodes' i_mutex by i_ino order. This function is moved from
1031 * fs/inode.c.
1032 */
1033static void
1034mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
1035{
1036 if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
1037 if (inode1)
1038 mutex_lock(&inode1->i_mutex);
1039 else if (inode2)
1040 mutex_lock(&inode2->i_mutex);
1041 return;
1042 }
1043
1044 if (inode1->i_ino < inode2->i_ino) {
1045 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1046 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1047 } else {
1048 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1049 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1050 }
1051}
1052
1053/**
1054 * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
1055 *
1056 * @inode1: the inode that is released first
1057 * @inode2: the inode that is released second
1058 *
1059 * This function is moved from fs/inode.c.
1060 */
1061
1062static void
1063mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1064{
1065 if (inode1)
1066 mutex_unlock(&inode1->i_mutex);
1067
1068 if (inode2 && inode2 != inode1)
1069 mutex_unlock(&inode2->i_mutex);
1070}
1071
1072/**
1073 * ext4_move_extents - Exchange the specified range of a file
1074 *
1075 * @o_filp: file structure of the original file
1076 * @d_filp: file structure of the donor file
1077 * @orig_start: start offset in block for orig
1078 * @donor_start: start offset in block for donor
1079 * @len: the number of blocks to be moved
1080 * @moved_len: moved block length
1081 *
1082 * This function returns 0 and moved block length is set in moved_len
1083 * if succeed, otherwise returns error value.
1084 *
1085 * Note: ext4_move_extents() proceeds the following order.
1086 * 1:ext4_move_extents() calculates the last block number of moving extent
1087 * function by the start block number (orig_start) and the number of blocks
1088 * to be moved (len) specified as arguments.
1089 * If the {orig, donor}_start points a hole, the extent's start offset
1090 * pointed by ext_cur (current extent), holecheck_path, orig_path are set
1091 * after hole behind.
1092 * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
1093 * or the ext_cur exceeds the block_end which is last logical block number.
1094 * 3:To get the length of continues area, call mext_next_extent()
1095 * specified with the ext_cur (initial value is holecheck_path) re-cursive,
1096 * until find un-continuous extent, the start logical block number exceeds
1097 * the block_end or the extent points to the last extent.
1098 * 4:Exchange the original inode data with donor inode data
1099 * from orig_page_offset to seq_end_page.
1100 * The start indexes of data are specified as arguments.
1101 * That of the original inode is orig_page_offset,
1102 * and the donor inode is also orig_page_offset
1103 * (To easily handle blocksize != pagesize case, the offset for the
1104 * donor inode is block unit).
1105 * 5:Update holecheck_path and orig_path to points a next proceeding extent,
1106 * then returns to step 2.
1107 * 6:Release holecheck_path, orig_path and set the len to moved_len
1108 * which shows the number of moved blocks.
1109 * The moved_len is useful for the command to calculate the file offset
1110 * for starting next move extent ioctl.
1111 * 7:Return 0 on success, or a negative error value on failure.
1112 */
1113int
1114ext4_move_extents(struct file *o_filp, struct file *d_filp,
1115 __u64 orig_start, __u64 donor_start, __u64 len,
1116 __u64 *moved_len)
1117{
1118 struct inode *orig_inode = o_filp->f_dentry->d_inode;
1119 struct inode *donor_inode = d_filp->f_dentry->d_inode;
1120 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
1121 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
1122 ext4_lblk_t block_start = orig_start;
1123 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1124 ext4_lblk_t rest_blocks;
1125 pgoff_t orig_page_offset = 0, seq_end_page;
1126 int ret, depth, last_extent = 0;
1127 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1128 int data_offset_in_page;
1129 int block_len_in_page;
1130 int uninit;
1131
1132 /* protect orig and donor against a truncate */
1133 mext_inode_double_lock(orig_inode, donor_inode);
1134
1135 mext_double_down_read(orig_inode, donor_inode);
1136 /* Check the filesystem environment whether move_extent can be done */
1137 ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
1138 donor_start, &len, *moved_len);
1139 mext_double_up_read(orig_inode, donor_inode);
1140 if (ret)
1141 goto out2;
1142
1143 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
1144 block_end = block_start + len - 1;
1145 if (file_end < block_end)
1146 len -= block_end - file_end;
1147
1148 get_ext_path(orig_path, orig_inode, block_start, ret);
1149 if (orig_path == NULL)
1150 goto out2;
1151
1152 /* Get path structure to check the hole */
1153 get_ext_path(holecheck_path, orig_inode, block_start, ret);
1154 if (holecheck_path == NULL)
1155 goto out;
1156
1157 depth = ext_depth(orig_inode);
1158 ext_cur = holecheck_path[depth].p_ext;
1159 if (ext_cur == NULL) {
1160 ret = -EINVAL;
1161 goto out;
1162 }
1163
1164 /*
1165 * Get proper extent whose ee_block is beyond block_start
1166 * if block_start was within the hole.
1167 */
1168 if (le32_to_cpu(ext_cur->ee_block) +
1169 ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
1170 last_extent = mext_next_extent(orig_inode,
1171 holecheck_path, &ext_cur);
1172 if (last_extent < 0) {
1173 ret = last_extent;
1174 goto out;
1175 }
1176 last_extent = mext_next_extent(orig_inode, orig_path,
1177 &ext_dummy);
1178 if (last_extent < 0) {
1179 ret = last_extent;
1180 goto out;
1181 }
1182 }
1183 seq_start = block_start;
1184
1185 /* No blocks within the specified range. */
1186 if (le32_to_cpu(ext_cur->ee_block) > block_end) {
1187 ext4_debug("ext4 move extent: The specified range of file "
1188 "may be the hole\n");
1189 ret = -EINVAL;
1190 goto out;
1191 }
1192
1193 /* Adjust start blocks */
1194 add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
1195 ext4_ext_get_actual_len(ext_cur), block_end + 1) -
1196 max(le32_to_cpu(ext_cur->ee_block), block_start);
1197
1198 while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
1199 seq_blocks += add_blocks;
1200
1201 /* Adjust tail blocks */
1202 if (seq_start + seq_blocks - 1 > block_end)
1203 seq_blocks = block_end - seq_start + 1;
1204
1205 ext_prev = ext_cur;
1206 last_extent = mext_next_extent(orig_inode, holecheck_path,
1207 &ext_cur);
1208 if (last_extent < 0) {
1209 ret = last_extent;
1210 break;
1211 }
1212 add_blocks = ext4_ext_get_actual_len(ext_cur);
1213
1214 /*
1215 * Extend the length of contiguous block (seq_blocks)
1216 * if extents are contiguous.
1217 */
1218 if (ext4_can_extents_be_merged(orig_inode,
1219 ext_prev, ext_cur) &&
1220 block_end >= le32_to_cpu(ext_cur->ee_block) &&
1221 !last_extent)
1222 continue;
1223
1224 /* Is original extent is uninitialized */
1225 uninit = ext4_ext_is_uninitialized(ext_prev);
1226
1227 data_offset_in_page = seq_start % blocks_per_page;
1228
1229 /*
1230 * Calculate data blocks count that should be swapped
1231 * at the first page.
1232 */
1233 if (data_offset_in_page + seq_blocks > blocks_per_page) {
1234 /* Swapped blocks are across pages */
1235 block_len_in_page =
1236 blocks_per_page - data_offset_in_page;
1237 } else {
1238 /* Swapped blocks are in a page */
1239 block_len_in_page = seq_blocks;
1240 }
1241
1242 orig_page_offset = seq_start >>
1243 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
1244 seq_end_page = (seq_start + seq_blocks - 1) >>
1245 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
1246 seq_start = le32_to_cpu(ext_cur->ee_block);
1247 rest_blocks = seq_blocks;
1248
1249 /* Discard preallocations of two inodes */
1250 down_write(&EXT4_I(orig_inode)->i_data_sem);
1251 ext4_discard_preallocations(orig_inode);
1252 up_write(&EXT4_I(orig_inode)->i_data_sem);
1253
1254 down_write(&EXT4_I(donor_inode)->i_data_sem);
1255 ext4_discard_preallocations(donor_inode);
1256 up_write(&EXT4_I(donor_inode)->i_data_sem);
1257
1258 while (orig_page_offset <= seq_end_page) {
1259
1260 /* Swap original branches with new branches */
1261 ret = move_extent_par_page(o_filp, donor_inode,
1262 orig_page_offset,
1263 data_offset_in_page,
1264 block_len_in_page, uninit);
1265 if (ret < 0)
1266 goto out;
1267 orig_page_offset++;
1268 /* Count how many blocks we have exchanged */
1269 *moved_len += block_len_in_page;
1270 BUG_ON(*moved_len > len);
1271
1272 data_offset_in_page = 0;
1273 rest_blocks -= block_len_in_page;
1274 if (rest_blocks > blocks_per_page)
1275 block_len_in_page = blocks_per_page;
1276 else
1277 block_len_in_page = rest_blocks;
1278 }
1279
1280 /* Decrease buffer counter */
1281 if (holecheck_path)
1282 ext4_ext_drop_refs(holecheck_path);
1283 get_ext_path(holecheck_path, orig_inode,
1284 seq_start, ret);
1285 if (holecheck_path == NULL)
1286 break;
1287 depth = holecheck_path->p_depth;
1288
1289 /* Decrease buffer counter */
1290 if (orig_path)
1291 ext4_ext_drop_refs(orig_path);
1292 get_ext_path(orig_path, orig_inode, seq_start, ret);
1293 if (orig_path == NULL)
1294 break;
1295
1296 ext_cur = holecheck_path[depth].p_ext;
1297 add_blocks = ext4_ext_get_actual_len(ext_cur);
1298 seq_blocks = 0;
1299
1300 }
1301out:
1302 if (orig_path) {
1303 ext4_ext_drop_refs(orig_path);
1304 kfree(orig_path);
1305 }
1306 if (holecheck_path) {
1307 ext4_ext_drop_refs(holecheck_path);
1308 kfree(holecheck_path);
1309 }
1310out2:
1311 mext_inode_double_unlock(orig_inode, donor_inode);
1312
1313 if (ret)
1314 return ret;
1315
1316 /* All of the specified blocks must be exchanged in succeed */
1317 BUG_ON(*moved_len != len);
1318
1319 return 0;
1320}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 07eb6649e4fa..de04013d16ff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1782,7 +1782,7 @@ retry:
1782 if (IS_DIRSYNC(dir)) 1782 if (IS_DIRSYNC(dir))
1783 ext4_handle_sync(handle); 1783 ext4_handle_sync(handle);
1784 1784
1785 inode = ext4_new_inode (handle, dir, mode); 1785 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1786 err = PTR_ERR(inode); 1786 err = PTR_ERR(inode);
1787 if (!IS_ERR(inode)) { 1787 if (!IS_ERR(inode)) {
1788 inode->i_op = &ext4_file_inode_operations; 1788 inode->i_op = &ext4_file_inode_operations;
@@ -1816,7 +1816,7 @@ retry:
1816 if (IS_DIRSYNC(dir)) 1816 if (IS_DIRSYNC(dir))
1817 ext4_handle_sync(handle); 1817 ext4_handle_sync(handle);
1818 1818
1819 inode = ext4_new_inode(handle, dir, mode); 1819 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1820 err = PTR_ERR(inode); 1820 err = PTR_ERR(inode);
1821 if (!IS_ERR(inode)) { 1821 if (!IS_ERR(inode)) {
1822 init_special_inode(inode, inode->i_mode, rdev); 1822 init_special_inode(inode, inode->i_mode, rdev);
@@ -1853,7 +1853,8 @@ retry:
1853 if (IS_DIRSYNC(dir)) 1853 if (IS_DIRSYNC(dir))
1854 ext4_handle_sync(handle); 1854 ext4_handle_sync(handle);
1855 1855
1856 inode = ext4_new_inode(handle, dir, S_IFDIR | mode); 1856 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
1857 &dentry->d_name, 0);
1857 err = PTR_ERR(inode); 1858 err = PTR_ERR(inode);
1858 if (IS_ERR(inode)) 1859 if (IS_ERR(inode))
1859 goto out_stop; 1860 goto out_stop;
@@ -2264,7 +2265,8 @@ retry:
2264 if (IS_DIRSYNC(dir)) 2265 if (IS_DIRSYNC(dir))
2265 ext4_handle_sync(handle); 2266 ext4_handle_sync(handle);
2266 2267
2267 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); 2268 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2269 &dentry->d_name, 0);
2268 err = PTR_ERR(inode); 2270 err = PTR_ERR(inode);
2269 if (IS_ERR(inode)) 2271 if (IS_ERR(inode))
2270 goto out_stop; 2272 goto out_stop;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 012c4251397e..23013d303f81 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,7 +37,6 @@
37#include <linux/seq_file.h> 37#include <linux/seq_file.h>
38#include <linux/proc_fs.h> 38#include <linux/proc_fs.h>
39#include <linux/ctype.h> 39#include <linux/ctype.h>
40#include <linux/marker.h>
41#include <linux/log2.h> 40#include <linux/log2.h>
42#include <linux/crc16.h> 41#include <linux/crc16.h>
43#include <asm/uaccess.h> 42#include <asm/uaccess.h>
@@ -47,6 +46,9 @@
47#include "xattr.h" 46#include "xattr.h"
48#include "acl.h" 47#include "acl.h"
49 48
49#define CREATE_TRACE_POINTS
50#include <trace/events/ext4.h>
51
50static int default_mb_history_length = 1000; 52static int default_mb_history_length = 1000;
51 53
52module_param_named(default_mb_history_length, default_mb_history_length, 54module_param_named(default_mb_history_length, default_mb_history_length,
@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb)
301 if (!test_opt(sb, ERRORS_CONT)) { 303 if (!test_opt(sb, ERRORS_CONT)) {
302 journal_t *journal = EXT4_SB(sb)->s_journal; 304 journal_t *journal = EXT4_SB(sb)->s_journal;
303 305
304 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 306 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
305 if (journal) 307 if (journal)
306 jbd2_journal_abort(journal, -EIO); 308 jbd2_journal_abort(journal, -EIO);
307 } 309 }
@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function,
414 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 416 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
415 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 417 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
416 sb->s_flags |= MS_RDONLY; 418 sb->s_flags |= MS_RDONLY;
417 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 419 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
418 if (EXT4_SB(sb)->s_journal) 420 if (EXT4_SB(sb)->s_journal)
419 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 421 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
420} 422}
@@ -1474,7 +1476,7 @@ set_qf_format:
1474 break; 1476 break;
1475#endif 1477#endif
1476 case Opt_abort: 1478 case Opt_abort:
1477 set_opt(sbi->s_mount_opt, ABORT); 1479 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1478 break; 1480 break;
1479 case Opt_nobarrier: 1481 case Opt_nobarrier:
1480 clear_opt(sbi->s_mount_opt, BARRIER); 1482 clear_opt(sbi->s_mount_opt, BARRIER);
@@ -1653,7 +1655,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1653 ext4_commit_super(sb, 1); 1655 ext4_commit_super(sb, 1);
1654 if (test_opt(sb, DEBUG)) 1656 if (test_opt(sb, DEBUG))
1655 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1657 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1656 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1658 "bpg=%lu, ipg=%lu, mo=%04x]\n",
1657 sb->s_blocksize, 1659 sb->s_blocksize,
1658 sbi->s_groups_count, 1660 sbi->s_groups_count,
1659 EXT4_BLOCKS_PER_GROUP(sb), 1661 EXT4_BLOCKS_PER_GROUP(sb),
@@ -2204,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes);
2204EXT4_RO_ATTR(lifetime_write_kbytes); 2206EXT4_RO_ATTR(lifetime_write_kbytes);
2205EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2207EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2206 inode_readahead_blks_store, s_inode_readahead_blks); 2208 inode_readahead_blks_store, s_inode_readahead_blks);
2209EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2207EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2210EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2208EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2211EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2209EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2212EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@@ -2216,6 +2219,7 @@ static struct attribute *ext4_attrs[] = {
2216 ATTR_LIST(session_write_kbytes), 2219 ATTR_LIST(session_write_kbytes),
2217 ATTR_LIST(lifetime_write_kbytes), 2220 ATTR_LIST(lifetime_write_kbytes),
2218 ATTR_LIST(inode_readahead_blks), 2221 ATTR_LIST(inode_readahead_blks),
2222 ATTR_LIST(inode_goal),
2219 ATTR_LIST(mb_stats), 2223 ATTR_LIST(mb_stats),
2220 ATTR_LIST(mb_max_to_scan), 2224 ATTR_LIST(mb_max_to_scan),
2221 ATTR_LIST(mb_min_to_scan), 2225 ATTR_LIST(mb_min_to_scan),
@@ -3346,7 +3350,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
3346 int ret = 0; 3350 int ret = 0;
3347 tid_t target; 3351 tid_t target;
3348 3352
3349 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3353 trace_ext4_sync_fs(sb, wait);
3350 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 3354 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3351 if (wait) 3355 if (wait)
3352 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 3356 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
@@ -3450,7 +3454,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3450 goto restore_opts; 3454 goto restore_opts;
3451 } 3455 }
3452 3456
3453 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 3457 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
3454 ext4_abort(sb, __func__, "Abort forced by user"); 3458 ext4_abort(sb, __func__, "Abort forced by user");
3455 3459
3456 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3460 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -3465,7 +3469,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3465 3469
3466 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3470 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3467 n_blocks_count > ext4_blocks_count(es)) { 3471 n_blocks_count > ext4_blocks_count(es)) {
3468 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 3472 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
3469 err = -EROFS; 3473 err = -EROFS;
3470 goto restore_opts; 3474 goto restore_opts;
3471 } 3475 }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 17159cacbd9e..5d70b3e6d49b 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -20,9 +20,9 @@
20#include <linux/time.h> 20#include <linux/time.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/marker.h>
24#include <linux/errno.h> 23#include <linux/errno.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
25#include <trace/events/jbd2.h>
26 26
27/* 27/*
28 * Unlink a buffer from a transaction checkpoint list. 28 * Unlink a buffer from a transaction checkpoint list.
@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
358 * journal straight away. 358 * journal straight away.
359 */ 359 */
360 result = jbd2_cleanup_journal_tail(journal); 360 result = jbd2_cleanup_journal_tail(journal);
361 trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", 361 trace_jbd2_checkpoint(journal, result);
362 journal->j_devname, result);
363 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 362 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
364 if (result <= 0) 363 if (result <= 0)
365 return result; 364 return result;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0b7d3b8226fd..7b4088b2364d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -16,7 +16,6 @@
16#include <linux/time.h> 16#include <linux/time.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/jbd2.h> 18#include <linux/jbd2.h>
19#include <linux/marker.h>
20#include <linux/errno.h> 19#include <linux/errno.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22#include <linux/mm.h> 21#include <linux/mm.h>
@@ -26,6 +25,7 @@
26#include <linux/writeback.h> 25#include <linux/writeback.h>
27#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
28#include <linux/bio.h> 27#include <linux/bio.h>
28#include <trace/events/jbd2.h>
29 29
30/* 30/*
31 * Default IO end handler for temporary BJ_IO buffer_heads. 31 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal,
253 * block allocation with delalloc. We need to write 253 * block allocation with delalloc. We need to write
254 * only allocated blocks here. 254 * only allocated blocks here.
255 */ 255 */
256 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
256 err = journal_submit_inode_data_buffers(mapping); 257 err = journal_submit_inode_data_buffers(mapping);
257 if (!ret) 258 if (!ret)
258 ret = err; 259 ret = err;
@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
394 commit_transaction = journal->j_running_transaction; 395 commit_transaction = journal->j_running_transaction;
395 J_ASSERT(commit_transaction->t_state == T_RUNNING); 396 J_ASSERT(commit_transaction->t_state == T_RUNNING);
396 397
397 trace_mark(jbd2_start_commit, "dev %s transaction %d", 398 trace_jbd2_start_commit(journal, commit_transaction);
398 journal->j_devname, commit_transaction->t_tid);
399 jbd_debug(1, "JBD: starting commit of transaction %d\n", 399 jbd_debug(1, "JBD: starting commit of transaction %d\n",
400 commit_transaction->t_tid); 400 commit_transaction->t_tid);
401 401
@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
409 */ 409 */
410 if (commit_transaction->t_synchronous_commit) 410 if (commit_transaction->t_synchronous_commit)
411 write_op = WRITE_SYNC_PLUG; 411 write_op = WRITE_SYNC_PLUG;
412 trace_jbd2_commit_locking(journal, commit_transaction);
412 stats.u.run.rs_wait = commit_transaction->t_max_wait; 413 stats.u.run.rs_wait = commit_transaction->t_max_wait;
413 stats.u.run.rs_locked = jiffies; 414 stats.u.run.rs_locked = jiffies;
414 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, 415 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
484 */ 485 */
485 jbd2_journal_switch_revoke_table(journal); 486 jbd2_journal_switch_revoke_table(journal);
486 487
488 trace_jbd2_commit_flushing(journal, commit_transaction);
487 stats.u.run.rs_flushing = jiffies; 489 stats.u.run.rs_flushing = jiffies;
488 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, 490 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
489 stats.u.run.rs_flushing); 491 stats.u.run.rs_flushing);
@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 commit_transaction->t_state = T_COMMIT; 522 commit_transaction->t_state = T_COMMIT;
521 spin_unlock(&journal->j_state_lock); 523 spin_unlock(&journal->j_state_lock);
522 524
525 trace_jbd2_commit_logging(journal, commit_transaction);
523 stats.u.run.rs_logging = jiffies; 526 stats.u.run.rs_logging = jiffies;
524 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, 527 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
525 stats.u.run.rs_logging); 528 stats.u.run.rs_logging);
@@ -1054,9 +1057,7 @@ restart_loop:
1054 if (journal->j_commit_callback) 1057 if (journal->j_commit_callback)
1055 journal->j_commit_callback(journal, commit_transaction); 1058 journal->j_commit_callback(journal, commit_transaction);
1056 1059
1057 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 1060 trace_jbd2_end_commit(journal, commit_transaction);
1058 journal->j_devname, commit_transaction->t_tid,
1059 journal->j_tail_sequence);
1060 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1061 jbd_debug(1, "JBD: commit %d complete, head %d\n",
1061 journal->j_commit_sequence, journal->j_tail_sequence); 1062 journal->j_commit_sequence, journal->j_tail_sequence);
1062 if (to_free) 1063 if (to_free)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 62be7d294ec2..18bfd5dab642 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -38,6 +38,10 @@
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/seq_file.h> 39#include <linux/seq_file.h>
40#include <linux/math64.h> 40#include <linux/math64.h>
41#include <linux/hash.h>
42
43#define CREATE_TRACE_POINTS
44#include <trace/events/jbd2.h>
41 45
42#include <asm/uaccess.h> 46#include <asm/uaccess.h>
43#include <asm/page.h> 47#include <asm/page.h>
@@ -2377,6 +2381,71 @@ static void __exit journal_exit(void)
2377 jbd2_journal_destroy_caches(); 2381 jbd2_journal_destroy_caches();
2378} 2382}
2379 2383
2384/*
2385 * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
2386 * tracing infrastructure to map a dev_t to a device name.
2387 *
2388 * The caller should use rcu_read_lock() in order to make sure the
2389 * device name stays valid until its done with it. We use
2390 * rcu_read_lock() as well to make sure we're safe in case the caller
2391 * gets sloppy, and because rcu_read_lock() is cheap and can be safely
2392 * nested.
2393 */
2394struct devname_cache {
2395 struct rcu_head rcu;
2396 dev_t device;
2397 char devname[BDEVNAME_SIZE];
2398};
2399#define CACHE_SIZE_BITS 6
2400static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
2401static DEFINE_SPINLOCK(devname_cache_lock);
2402
2403static void free_devcache(struct rcu_head *rcu)
2404{
2405 kfree(rcu);
2406}
2407
2408const char *jbd2_dev_to_name(dev_t device)
2409{
2410 int i = hash_32(device, CACHE_SIZE_BITS);
2411 char *ret;
2412 struct block_device *bd;
2413
2414 rcu_read_lock();
2415 if (devcache[i] && devcache[i]->device == device) {
2416 ret = devcache[i]->devname;
2417 rcu_read_unlock();
2418 return ret;
2419 }
2420 rcu_read_unlock();
2421
2422 spin_lock(&devname_cache_lock);
2423 if (devcache[i]) {
2424 if (devcache[i]->device == device) {
2425 ret = devcache[i]->devname;
2426 spin_unlock(&devname_cache_lock);
2427 return ret;
2428 }
2429 call_rcu(&devcache[i]->rcu, free_devcache);
2430 }
2431 devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2432 if (!devcache[i]) {
2433 spin_unlock(&devname_cache_lock);
2434 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2435 }
2436 devcache[i]->device = device;
2437 bd = bdget(device);
2438 if (bd) {
2439 bdevname(bd, devcache[i]->devname);
2440 bdput(bd);
2441 } else
2442 __bdevname(device, devcache[i]->devname);
2443 ret = devcache[i]->devname;
2444 spin_unlock(&devname_cache_lock);
2445 return ret;
2446}
2447EXPORT_SYMBOL(jbd2_dev_to_name);
2448
2380MODULE_LICENSE("GPL"); 2449MODULE_LICENSE("GPL");
2381module_init(journal_init); 2450module_init(journal_init);
2382module_exit(journal_exit); 2451module_exit(journal_exit);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 996ffda06bf3..494501edba6b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1547,36 +1547,6 @@ out:
1547 return; 1547 return;
1548} 1548}
1549 1549
1550/*
1551 * jbd2_journal_try_to_free_buffers() could race with
1552 * jbd2_journal_commit_transaction(). The later might still hold the
1553 * reference count to the buffers when inspecting them on
1554 * t_syncdata_list or t_locked_list.
1555 *
1556 * jbd2_journal_try_to_free_buffers() will call this function to
1557 * wait for the current transaction to finish syncing data buffers, before
1558 * try to free that buffer.
1559 *
1560 * Called with journal->j_state_lock hold.
1561 */
1562static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
1563{
1564 transaction_t *transaction;
1565 tid_t tid;
1566
1567 spin_lock(&journal->j_state_lock);
1568 transaction = journal->j_committing_transaction;
1569
1570 if (!transaction) {
1571 spin_unlock(&journal->j_state_lock);
1572 return;
1573 }
1574
1575 tid = transaction->t_tid;
1576 spin_unlock(&journal->j_state_lock);
1577 jbd2_log_wait_commit(journal, tid);
1578}
1579
1580/** 1550/**
1581 * int jbd2_journal_try_to_free_buffers() - try to free page buffers. 1551 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
1582 * @journal: journal for operation 1552 * @journal: journal for operation
@@ -1649,25 +1619,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1649 1619
1650 ret = try_to_free_buffers(page); 1620 ret = try_to_free_buffers(page);
1651 1621
1652 /*
1653 * There are a number of places where jbd2_journal_try_to_free_buffers()
1654 * could race with jbd2_journal_commit_transaction(), the later still
1655 * holds the reference to the buffers to free while processing them.
1656 * try_to_free_buffers() failed to free those buffers. Some of the
1657 * caller of releasepage() request page buffers to be dropped, otherwise
1658 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1659 *
1660 * So, if the caller of try_to_release_page() wants the synchronous
1661 * behaviour(i.e make sure buffers are dropped upon return),
1662 * let's wait for the current transaction to finish flush of
1663 * dirty data buffers, then try to free those buffers again,
1664 * with the journal locked.
1665 */
1666 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1667 jbd2_journal_wait_for_transaction_sync_data(journal);
1668 ret = try_to_free_buffers(page);
1669 }
1670
1671busy: 1622busy:
1672 return ret; 1623 return ret;
1673} 1624}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index cc02393bfce8..d97eb652d6ca 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1315,6 +1315,12 @@ extern int jbd_blocks_per_page(struct inode *inode);
1315#define BUFFER_TRACE2(bh, bh2, info) do {} while (0) 1315#define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
1316#define JBUFFER_TRACE(jh, info) do {} while (0) 1316#define JBUFFER_TRACE(jh, info) do {} while (0)
1317 1317
1318/*
1319 * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
1320 * tracing infrastructure to map a dev_t to a device name.
1321 */
1322extern const char *jbd2_dev_to_name(dev_t device);
1323
1318#endif /* __KERNEL__ */ 1324#endif /* __KERNEL__ */
1319 1325
1320#endif /* _LINUX_JBD2_H */ 1326#endif /* _LINUX_JBD2_H */
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
new file mode 100644
index 000000000000..acf4cc9cd36d
--- /dev/null
+++ b/include/trace/events/ext4.h
@@ -0,0 +1,719 @@
1#if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_EXT4_H
3
4#undef TRACE_SYSTEM
5#define TRACE_SYSTEM ext4
6
7#include <linux/writeback.h>
8#include "../../../fs/ext4/ext4.h"
9#include "../../../fs/ext4/mballoc.h"
10#include <linux/tracepoint.h>
11
12TRACE_EVENT(ext4_free_inode,
13 TP_PROTO(struct inode *inode),
14
15 TP_ARGS(inode),
16
17 TP_STRUCT__entry(
18 __field( dev_t, dev )
19 __field( ino_t, ino )
20 __field( umode_t, mode )
21 __field( uid_t, uid )
22 __field( gid_t, gid )
23 __field( blkcnt_t, blocks )
24 ),
25
26 TP_fast_assign(
27 __entry->dev = inode->i_sb->s_dev;
28 __entry->ino = inode->i_ino;
29 __entry->mode = inode->i_mode;
30 __entry->uid = inode->i_uid;
31 __entry->gid = inode->i_gid;
32 __entry->blocks = inode->i_blocks;
33 ),
34
35 TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
36 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode,
37 __entry->uid, __entry->gid, __entry->blocks)
38);
39
40TRACE_EVENT(ext4_request_inode,
41 TP_PROTO(struct inode *dir, int mode),
42
43 TP_ARGS(dir, mode),
44
45 TP_STRUCT__entry(
46 __field( dev_t, dev )
47 __field( ino_t, dir )
48 __field( umode_t, mode )
49 ),
50
51 TP_fast_assign(
52 __entry->dev = dir->i_sb->s_dev;
53 __entry->dir = dir->i_ino;
54 __entry->mode = mode;
55 ),
56
57 TP_printk("dev %s dir %lu mode %d",
58 jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode)
59);
60
61TRACE_EVENT(ext4_allocate_inode,
62 TP_PROTO(struct inode *inode, struct inode *dir, int mode),
63
64 TP_ARGS(inode, dir, mode),
65
66 TP_STRUCT__entry(
67 __field( dev_t, dev )
68 __field( ino_t, ino )
69 __field( ino_t, dir )
70 __field( umode_t, mode )
71 ),
72
73 TP_fast_assign(
74 __entry->dev = inode->i_sb->s_dev;
75 __entry->ino = inode->i_ino;
76 __entry->dir = dir->i_ino;
77 __entry->mode = mode;
78 ),
79
80 TP_printk("dev %s ino %lu dir %lu mode %d",
81 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode)
82);
83
84TRACE_EVENT(ext4_write_begin,
85
86 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
87 unsigned int flags),
88
89 TP_ARGS(inode, pos, len, flags),
90
91 TP_STRUCT__entry(
92 __field( dev_t, dev )
93 __field( ino_t, ino )
94 __field( loff_t, pos )
95 __field( unsigned int, len )
96 __field( unsigned int, flags )
97 ),
98
99 TP_fast_assign(
100 __entry->dev = inode->i_sb->s_dev;
101 __entry->ino = inode->i_ino;
102 __entry->pos = pos;
103 __entry->len = len;
104 __entry->flags = flags;
105 ),
106
107 TP_printk("dev %s ino %lu pos %llu len %u flags %u",
108 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
109 __entry->flags)
110);
111
112TRACE_EVENT(ext4_ordered_write_end,
113 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
114 unsigned int copied),
115
116 TP_ARGS(inode, pos, len, copied),
117
118 TP_STRUCT__entry(
119 __field( dev_t, dev )
120 __field( ino_t, ino )
121 __field( loff_t, pos )
122 __field( unsigned int, len )
123 __field( unsigned int, copied )
124 ),
125
126 TP_fast_assign(
127 __entry->dev = inode->i_sb->s_dev;
128 __entry->ino = inode->i_ino;
129 __entry->pos = pos;
130 __entry->len = len;
131 __entry->copied = copied;
132 ),
133
134 TP_printk("dev %s ino %lu pos %llu len %u copied %u",
135 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
136 __entry->copied)
137);
138
139TRACE_EVENT(ext4_writeback_write_end,
140 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
141 unsigned int copied),
142
143 TP_ARGS(inode, pos, len, copied),
144
145 TP_STRUCT__entry(
146 __field( dev_t, dev )
147 __field( ino_t, ino )
148 __field( loff_t, pos )
149 __field( unsigned int, len )
150 __field( unsigned int, copied )
151 ),
152
153 TP_fast_assign(
154 __entry->dev = inode->i_sb->s_dev;
155 __entry->ino = inode->i_ino;
156 __entry->pos = pos;
157 __entry->len = len;
158 __entry->copied = copied;
159 ),
160
161 TP_printk("dev %s ino %lu pos %llu len %u copied %u",
162 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
163 __entry->copied)
164);
165
166TRACE_EVENT(ext4_journalled_write_end,
167 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
168 unsigned int copied),
169 TP_ARGS(inode, pos, len, copied),
170
171 TP_STRUCT__entry(
172 __field( dev_t, dev )
173 __field( ino_t, ino )
174 __field( loff_t, pos )
175 __field( unsigned int, len )
176 __field( unsigned int, copied )
177 ),
178
179 TP_fast_assign(
180 __entry->dev = inode->i_sb->s_dev;
181 __entry->ino = inode->i_ino;
182 __entry->pos = pos;
183 __entry->len = len;
184 __entry->copied = copied;
185 ),
186
187 TP_printk("dev %s ino %lu pos %llu len %u copied %u",
188 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
189 __entry->copied)
190);
191
192TRACE_EVENT(ext4_da_writepage,
193 TP_PROTO(struct inode *inode, struct page *page),
194
195 TP_ARGS(inode, page),
196
197 TP_STRUCT__entry(
198 __field( dev_t, dev )
199 __field( ino_t, ino )
200 __field( pgoff_t, index )
201
202 ),
203
204 TP_fast_assign(
205 __entry->dev = inode->i_sb->s_dev;
206 __entry->ino = inode->i_ino;
207 __entry->index = page->index;
208 ),
209
210 TP_printk("dev %s ino %lu page_index %lu",
211 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
212);
213
214TRACE_EVENT(ext4_da_writepages,
215 TP_PROTO(struct inode *inode, struct writeback_control *wbc),
216
217 TP_ARGS(inode, wbc),
218
219 TP_STRUCT__entry(
220 __field( dev_t, dev )
221 __field( ino_t, ino )
222 __field( long, nr_to_write )
223 __field( long, pages_skipped )
224 __field( loff_t, range_start )
225 __field( loff_t, range_end )
226 __field( char, nonblocking )
227 __field( char, for_kupdate )
228 __field( char, for_reclaim )
229 __field( char, for_writepages )
230 __field( char, range_cyclic )
231 ),
232
233 TP_fast_assign(
234 __entry->dev = inode->i_sb->s_dev;
235 __entry->ino = inode->i_ino;
236 __entry->nr_to_write = wbc->nr_to_write;
237 __entry->pages_skipped = wbc->pages_skipped;
238 __entry->range_start = wbc->range_start;
239 __entry->range_end = wbc->range_end;
240 __entry->nonblocking = wbc->nonblocking;
241 __entry->for_kupdate = wbc->for_kupdate;
242 __entry->for_reclaim = wbc->for_reclaim;
243 __entry->for_writepages = wbc->for_writepages;
244 __entry->range_cyclic = wbc->range_cyclic;
245 ),
246
247 TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d",
248 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
249 __entry->pages_skipped, __entry->range_start,
250 __entry->range_end, __entry->nonblocking,
251 __entry->for_kupdate, __entry->for_reclaim,
252 __entry->for_writepages, __entry->range_cyclic)
253);
254
255TRACE_EVENT(ext4_da_writepages_result,
256 TP_PROTO(struct inode *inode, struct writeback_control *wbc,
257 int ret, int pages_written),
258
259 TP_ARGS(inode, wbc, ret, pages_written),
260
261 TP_STRUCT__entry(
262 __field( dev_t, dev )
263 __field( ino_t, ino )
264 __field( int, ret )
265 __field( int, pages_written )
266 __field( long, pages_skipped )
267 __field( char, encountered_congestion )
268 __field( char, more_io )
269 __field( char, no_nrwrite_index_update )
270 ),
271
272 TP_fast_assign(
273 __entry->dev = inode->i_sb->s_dev;
274 __entry->ino = inode->i_ino;
275 __entry->ret = ret;
276 __entry->pages_written = pages_written;
277 __entry->pages_skipped = wbc->pages_skipped;
278 __entry->encountered_congestion = wbc->encountered_congestion;
279 __entry->more_io = wbc->more_io;
280 __entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
281 ),
282
283 TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
284 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret,
285 __entry->pages_written, __entry->pages_skipped,
286 __entry->encountered_congestion, __entry->more_io,
287 __entry->no_nrwrite_index_update)
288);
289
290TRACE_EVENT(ext4_da_write_begin,
291 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
292 unsigned int flags),
293
294 TP_ARGS(inode, pos, len, flags),
295
296 TP_STRUCT__entry(
297 __field( dev_t, dev )
298 __field( ino_t, ino )
299 __field( loff_t, pos )
300 __field( unsigned int, len )
301 __field( unsigned int, flags )
302 ),
303
304 TP_fast_assign(
305 __entry->dev = inode->i_sb->s_dev;
306 __entry->ino = inode->i_ino;
307 __entry->pos = pos;
308 __entry->len = len;
309 __entry->flags = flags;
310 ),
311
312 TP_printk("dev %s ino %lu pos %llu len %u flags %u",
313 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
314 __entry->flags)
315);
316
317TRACE_EVENT(ext4_da_write_end,
318 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
319 unsigned int copied),
320
321 TP_ARGS(inode, pos, len, copied),
322
323 TP_STRUCT__entry(
324 __field( dev_t, dev )
325 __field( ino_t, ino )
326 __field( loff_t, pos )
327 __field( unsigned int, len )
328 __field( unsigned int, copied )
329 ),
330
331 TP_fast_assign(
332 __entry->dev = inode->i_sb->s_dev;
333 __entry->ino = inode->i_ino;
334 __entry->pos = pos;
335 __entry->len = len;
336 __entry->copied = copied;
337 ),
338
339 TP_printk("dev %s ino %lu pos %llu len %u copied %u",
340 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
341 __entry->copied)
342);
343
344TRACE_EVENT(ext4_normal_writepage,
345 TP_PROTO(struct inode *inode, struct page *page),
346
347 TP_ARGS(inode, page),
348
349 TP_STRUCT__entry(
350 __field( dev_t, dev )
351 __field( ino_t, ino )
352 __field( pgoff_t, index )
353 ),
354
355 TP_fast_assign(
356 __entry->dev = inode->i_sb->s_dev;
357 __entry->ino = inode->i_ino;
358 __entry->index = page->index;
359 ),
360
361 TP_printk("dev %s ino %lu page_index %lu",
362 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
363);
364
365TRACE_EVENT(ext4_journalled_writepage,
366 TP_PROTO(struct inode *inode, struct page *page),
367
368 TP_ARGS(inode, page),
369
370 TP_STRUCT__entry(
371 __field( dev_t, dev )
372 __field( ino_t, ino )
373 __field( pgoff_t, index )
374
375 ),
376
377 TP_fast_assign(
378 __entry->dev = inode->i_sb->s_dev;
379 __entry->ino = inode->i_ino;
380 __entry->index = page->index;
381 ),
382
383 TP_printk("dev %s ino %lu page_index %lu",
384 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
385);
386
387TRACE_EVENT(ext4_discard_blocks,
388 TP_PROTO(struct super_block *sb, unsigned long long blk,
389 unsigned long long count),
390
391 TP_ARGS(sb, blk, count),
392
393 TP_STRUCT__entry(
394 __field( dev_t, dev )
395 __field( __u64, blk )
396 __field( __u64, count )
397
398 ),
399
400 TP_fast_assign(
401 __entry->dev = sb->s_dev;
402 __entry->blk = blk;
403 __entry->count = count;
404 ),
405
406 TP_printk("dev %s blk %llu count %llu",
407 jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
408);
409
410TRACE_EVENT(ext4_mb_new_inode_pa,
411 TP_PROTO(struct ext4_allocation_context *ac,
412 struct ext4_prealloc_space *pa),
413
414 TP_ARGS(ac, pa),
415
416 TP_STRUCT__entry(
417 __field( dev_t, dev )
418 __field( ino_t, ino )
419 __field( __u64, pa_pstart )
420 __field( __u32, pa_len )
421 __field( __u64, pa_lstart )
422
423 ),
424
425 TP_fast_assign(
426 __entry->dev = ac->ac_sb->s_dev;
427 __entry->ino = ac->ac_inode->i_ino;
428 __entry->pa_pstart = pa->pa_pstart;
429 __entry->pa_len = pa->pa_len;
430 __entry->pa_lstart = pa->pa_lstart;
431 ),
432
433 TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
434 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
435 __entry->pa_len, __entry->pa_lstart)
436);
437
438TRACE_EVENT(ext4_mb_new_group_pa,
439 TP_PROTO(struct ext4_allocation_context *ac,
440 struct ext4_prealloc_space *pa),
441
442 TP_ARGS(ac, pa),
443
444 TP_STRUCT__entry(
445 __field( dev_t, dev )
446 __field( ino_t, ino )
447 __field( __u64, pa_pstart )
448 __field( __u32, pa_len )
449 __field( __u64, pa_lstart )
450
451 ),
452
453 TP_fast_assign(
454 __entry->dev = ac->ac_sb->s_dev;
455 __entry->ino = ac->ac_inode->i_ino;
456 __entry->pa_pstart = pa->pa_pstart;
457 __entry->pa_len = pa->pa_len;
458 __entry->pa_lstart = pa->pa_lstart;
459 ),
460
461 TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
462 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
463 __entry->pa_len, __entry->pa_lstart)
464);
465
466TRACE_EVENT(ext4_mb_release_inode_pa,
467 TP_PROTO(struct ext4_allocation_context *ac,
468 struct ext4_prealloc_space *pa,
469 unsigned long long block, unsigned int count),
470
471 TP_ARGS(ac, pa, block, count),
472
473 TP_STRUCT__entry(
474 __field( dev_t, dev )
475 __field( ino_t, ino )
476 __field( __u64, block )
477 __field( __u32, count )
478
479 ),
480
481 TP_fast_assign(
482 __entry->dev = ac->ac_sb->s_dev;
483 __entry->ino = ac->ac_inode->i_ino;
484 __entry->block = block;
485 __entry->count = count;
486 ),
487
488 TP_printk("dev %s ino %lu block %llu count %u",
489 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
490 __entry->count)
491);
492
493TRACE_EVENT(ext4_mb_release_group_pa,
494 TP_PROTO(struct ext4_allocation_context *ac,
495 struct ext4_prealloc_space *pa),
496
497 TP_ARGS(ac, pa),
498
499 TP_STRUCT__entry(
500 __field( dev_t, dev )
501 __field( ino_t, ino )
502 __field( __u64, pa_pstart )
503 __field( __u32, pa_len )
504
505 ),
506
507 TP_fast_assign(
508 __entry->dev = ac->ac_sb->s_dev;
509 __entry->ino = ac->ac_inode->i_ino;
510 __entry->pa_pstart = pa->pa_pstart;
511 __entry->pa_len = pa->pa_len;
512 ),
513
514 TP_printk("dev %s pstart %llu len %u",
515 jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len)
516);
517
518TRACE_EVENT(ext4_discard_preallocations,
519 TP_PROTO(struct inode *inode),
520
521 TP_ARGS(inode),
522
523 TP_STRUCT__entry(
524 __field( dev_t, dev )
525 __field( ino_t, ino )
526
527 ),
528
529 TP_fast_assign(
530 __entry->dev = inode->i_sb->s_dev;
531 __entry->ino = inode->i_ino;
532 ),
533
534 TP_printk("dev %s ino %lu",
535 jbd2_dev_to_name(__entry->dev), __entry->ino)
536);
537
538TRACE_EVENT(ext4_mb_discard_preallocations,
539 TP_PROTO(struct super_block *sb, int needed),
540
541 TP_ARGS(sb, needed),
542
543 TP_STRUCT__entry(
544 __field( dev_t, dev )
545 __field( int, needed )
546
547 ),
548
549 TP_fast_assign(
550 __entry->dev = sb->s_dev;
551 __entry->needed = needed;
552 ),
553
554 TP_printk("dev %s needed %d",
555 jbd2_dev_to_name(__entry->dev), __entry->needed)
556);
557
558TRACE_EVENT(ext4_request_blocks,
559 TP_PROTO(struct ext4_allocation_request *ar),
560
561 TP_ARGS(ar),
562
563 TP_STRUCT__entry(
564 __field( dev_t, dev )
565 __field( ino_t, ino )
566 __field( unsigned int, flags )
567 __field( unsigned int, len )
568 __field( __u64, logical )
569 __field( __u64, goal )
570 __field( __u64, lleft )
571 __field( __u64, lright )
572 __field( __u64, pleft )
573 __field( __u64, pright )
574 ),
575
576 TP_fast_assign(
577 __entry->dev = ar->inode->i_sb->s_dev;
578 __entry->ino = ar->inode->i_ino;
579 __entry->flags = ar->flags;
580 __entry->len = ar->len;
581 __entry->logical = ar->logical;
582 __entry->goal = ar->goal;
583 __entry->lleft = ar->lleft;
584 __entry->lright = ar->lright;
585 __entry->pleft = ar->pleft;
586 __entry->pright = ar->pright;
587 ),
588
589 TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
590 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
591 __entry->len,
592 (unsigned long long) __entry->logical,
593 (unsigned long long) __entry->goal,
594 (unsigned long long) __entry->lleft,
595 (unsigned long long) __entry->lright,
596 (unsigned long long) __entry->pleft,
597 (unsigned long long) __entry->pright)
598);
599
600TRACE_EVENT(ext4_allocate_blocks,
601 TP_PROTO(struct ext4_allocation_request *ar, unsigned long long block),
602
603 TP_ARGS(ar, block),
604
605 TP_STRUCT__entry(
606 __field( dev_t, dev )
607 __field( ino_t, ino )
608 __field( __u64, block )
609 __field( unsigned int, flags )
610 __field( unsigned int, len )
611 __field( __u64, logical )
612 __field( __u64, goal )
613 __field( __u64, lleft )
614 __field( __u64, lright )
615 __field( __u64, pleft )
616 __field( __u64, pright )
617 ),
618
619 TP_fast_assign(
620 __entry->dev = ar->inode->i_sb->s_dev;
621 __entry->ino = ar->inode->i_ino;
622 __entry->block = block;
623 __entry->flags = ar->flags;
624 __entry->len = ar->len;
625 __entry->logical = ar->logical;
626 __entry->goal = ar->goal;
627 __entry->lleft = ar->lleft;
628 __entry->lright = ar->lright;
629 __entry->pleft = ar->pleft;
630 __entry->pright = ar->pright;
631 ),
632
633 TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
634 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
635 __entry->len, __entry->block,
636 (unsigned long long) __entry->logical,
637 (unsigned long long) __entry->goal,
638 (unsigned long long) __entry->lleft,
639 (unsigned long long) __entry->lright,
640 (unsigned long long) __entry->pleft,
641 (unsigned long long) __entry->pright)
642);
643
644TRACE_EVENT(ext4_free_blocks,
645 TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
646 int metadata),
647
648 TP_ARGS(inode, block, count, metadata),
649
650 TP_STRUCT__entry(
651 __field( dev_t, dev )
652 __field( ino_t, ino )
653 __field( __u64, block )
654 __field( unsigned long, count )
655 __field( int, metadata )
656
657 ),
658
659 TP_fast_assign(
660 __entry->dev = inode->i_sb->s_dev;
661 __entry->ino = inode->i_ino;
662 __entry->block = block;
663 __entry->count = count;
664 __entry->metadata = metadata;
665 ),
666
667 TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
668 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
669 __entry->count, __entry->metadata)
670);
671
672TRACE_EVENT(ext4_sync_file,
673 TP_PROTO(struct file *file, struct dentry *dentry, int datasync),
674
675 TP_ARGS(file, dentry, datasync),
676
677 TP_STRUCT__entry(
678 __field( dev_t, dev )
679 __field( ino_t, ino )
680 __field( ino_t, parent )
681 __field( int, datasync )
682 ),
683
684 TP_fast_assign(
685 __entry->dev = dentry->d_inode->i_sb->s_dev;
686 __entry->ino = dentry->d_inode->i_ino;
687 __entry->datasync = datasync;
688 __entry->parent = dentry->d_parent->d_inode->i_ino;
689 ),
690
691 TP_printk("dev %s ino %ld parent %ld datasync %d ",
692 jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent,
693 __entry->datasync)
694);
695
696TRACE_EVENT(ext4_sync_fs,
697 TP_PROTO(struct super_block *sb, int wait),
698
699 TP_ARGS(sb, wait),
700
701 TP_STRUCT__entry(
702 __field( dev_t, dev )
703 __field( int, wait )
704
705 ),
706
707 TP_fast_assign(
708 __entry->dev = sb->s_dev;
709 __entry->wait = wait;
710 ),
711
712 TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev),
713 __entry->wait)
714);
715
716#endif /* _TRACE_EXT4_H */
717
718/* This part must be outside protection */
719#include <trace/define_trace.h>
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
new file mode 100644
index 000000000000..845b0b4b48fd
--- /dev/null
+++ b/include/trace/events/jbd2.h
@@ -0,0 +1,168 @@
1#if !defined(_TRACE_JBD2_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_JBD2_H
3
4#include <linux/jbd2.h>
5#include <linux/tracepoint.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM jbd2
9
10TRACE_EVENT(jbd2_checkpoint,
11
12 TP_PROTO(journal_t *journal, int result),
13
14 TP_ARGS(journal, result),
15
16 TP_STRUCT__entry(
17 __field( dev_t, dev )
18 __field( int, result )
19 ),
20
21 TP_fast_assign(
22 __entry->dev = journal->j_fs_dev->bd_dev;
23 __entry->result = result;
24 ),
25
26 TP_printk("dev %s result %d",
27 jbd2_dev_to_name(__entry->dev), __entry->result)
28);
29
30TRACE_EVENT(jbd2_start_commit,
31
32 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
33
34 TP_ARGS(journal, commit_transaction),
35
36 TP_STRUCT__entry(
37 __field( dev_t, dev )
38 __field( char, sync_commit )
39 __field( int, transaction )
40 ),
41
42 TP_fast_assign(
43 __entry->dev = journal->j_fs_dev->bd_dev;
44 __entry->sync_commit = commit_transaction->t_synchronous_commit;
45 __entry->transaction = commit_transaction->t_tid;
46 ),
47
48 TP_printk("dev %s transaction %d sync %d",
49 jbd2_dev_to_name(__entry->dev), __entry->transaction,
50 __entry->sync_commit)
51);
52
53TRACE_EVENT(jbd2_commit_locking,
54
55 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
56
57 TP_ARGS(journal, commit_transaction),
58
59 TP_STRUCT__entry(
60 __field( dev_t, dev )
61 __field( char, sync_commit )
62 __field( int, transaction )
63 ),
64
65 TP_fast_assign(
66 __entry->dev = journal->j_fs_dev->bd_dev;
67 __entry->sync_commit = commit_transaction->t_synchronous_commit;
68 __entry->transaction = commit_transaction->t_tid;
69 ),
70
71 TP_printk("dev %s transaction %d sync %d",
72 jbd2_dev_to_name(__entry->dev), __entry->transaction,
73 __entry->sync_commit)
74);
75
76TRACE_EVENT(jbd2_commit_flushing,
77
78 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
79
80 TP_ARGS(journal, commit_transaction),
81
82 TP_STRUCT__entry(
83 __field( dev_t, dev )
84 __field( char, sync_commit )
85 __field( int, transaction )
86 ),
87
88 TP_fast_assign(
89 __entry->dev = journal->j_fs_dev->bd_dev;
90 __entry->sync_commit = commit_transaction->t_synchronous_commit;
91 __entry->transaction = commit_transaction->t_tid;
92 ),
93
94 TP_printk("dev %s transaction %d sync %d",
95 jbd2_dev_to_name(__entry->dev), __entry->transaction,
96 __entry->sync_commit)
97);
98
99TRACE_EVENT(jbd2_commit_logging,
100
101 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
102
103 TP_ARGS(journal, commit_transaction),
104
105 TP_STRUCT__entry(
106 __field( dev_t, dev )
107 __field( char, sync_commit )
108 __field( int, transaction )
109 ),
110
111 TP_fast_assign(
112 __entry->dev = journal->j_fs_dev->bd_dev;
113 __entry->sync_commit = commit_transaction->t_synchronous_commit;
114 __entry->transaction = commit_transaction->t_tid;
115 ),
116
117 TP_printk("dev %s transaction %d sync %d",
118 jbd2_dev_to_name(__entry->dev), __entry->transaction,
119 __entry->sync_commit)
120);
121
122TRACE_EVENT(jbd2_end_commit,
123 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
124
125 TP_ARGS(journal, commit_transaction),
126
127 TP_STRUCT__entry(
128 __field( dev_t, dev )
129 __field( char, sync_commit )
130 __field( int, transaction )
131 __field( int, head )
132 ),
133
134 TP_fast_assign(
135 __entry->dev = journal->j_fs_dev->bd_dev;
136 __entry->sync_commit = commit_transaction->t_synchronous_commit;
137 __entry->transaction = commit_transaction->t_tid;
138 __entry->head = journal->j_tail_sequence;
139 ),
140
141 TP_printk("dev %s transaction %d sync %d head %d",
142 jbd2_dev_to_name(__entry->dev), __entry->transaction,
143 __entry->sync_commit, __entry->head)
144);
145
146TRACE_EVENT(jbd2_submit_inode_data,
147 TP_PROTO(struct inode *inode),
148
149 TP_ARGS(inode),
150
151 TP_STRUCT__entry(
152 __field( dev_t, dev )
153 __field( ino_t, ino )
154 ),
155
156 TP_fast_assign(
157 __entry->dev = inode->i_sb->s_dev;
158 __entry->ino = inode->i_ino;
159 ),
160
161 TP_printk("dev %s ino %lu",
162 jbd2_dev_to_name(__entry->dev), __entry->ino)
163);
164
165#endif /* _TRACE_JBD2_H */
166
167/* This part must be outside protection */
168#include <trace/define_trace.h>