aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 18:39:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 18:39:39 -0400
commit24e7ea3bea94fe05eae5019f5f12bcdc98fc5157 (patch)
tree6e527053ad73b737b5450c52d14ddf53ad4ba9a2
parent8e343c8b5c2e3c93d9eebea7702c89d81753c495 (diff)
parentad6599ab3ac98a4474544086e048ce86ec15a4d1 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Major changes for 3.14 include support for the newly added ZERO_RANGE and COLLAPSE_RANGE fallocate operations, and scalability improvements in the jbd2 layer and in xattr handling when the extended attributes spill over into an external block. Other than that, the usual clean ups and minor bug fixes" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (42 commits) ext4: fix premature freeing of partial clusters split across leaf blocks ext4: remove unneeded test of ret variable ext4: fix comment typo ext4: make ext4_block_zero_page_range static ext4: atomically set inode->i_flags in ext4_set_inode_flags() ext4: optimize Hurd tests when reading/writing inodes ext4: kill i_version support for Hurd-castrated file systems ext4: each filesystem creates and uses its own mb_cache fs/mbcache.c: doucple the locking of local from global data fs/mbcache.c: change block and index hash chain to hlist_bl_node ext4: Introduce FALLOC_FL_ZERO_RANGE flag for fallocate ext4: refactor ext4_fallocate code ext4: Update inode i_size after the preallocation ext4: fix partial cluster handling for bigalloc file systems ext4: delete path dealloc code in ext4_ext_handle_uninitialized_extents ext4: only call sync_filesystm() when remounting read-only fs: push sync_filesystem() down to the file system's remount_fs() jbd2: improve error messages for inconsistent journal heads jbd2: minimize region locked by j_list_lock in jbd2_journal_forget() jbd2: minimize region locked by j_list_lock in journal_get_create_access() ...
-rw-r--r--fs/adfs/super.c1
-rw-r--r--fs/affs/super.c1
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/cifs/cifsfs.c1
-rw-r--r--fs/coda/inode.c1
-rw-r--r--fs/cramfs/inode.c1
-rw-r--r--fs/debugfs/inode.c1
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/efs/super.c1
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/ext4_jbd2.c10
-rw-r--r--fs/ext4/extents.c818
-rw-r--r--fs/ext4/extents_status.c28
-rw-r--r--fs/ext4/extents_status.h9
-rw-r--r--fs/ext4/inode.c120
-rw-r--r--fs/ext4/ioctl.c24
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/mballoc.h4
-rw-r--r--fs/ext4/move_extent.c5
-rw-r--r--fs/ext4/super.c40
-rw-r--r--fs/ext4/xattr.c59
-rw-r--r--fs/ext4/xattr.h6
-rw-r--r--fs/f2fs/super.c2
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/freevxfs/vxfs_super.c1
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/hfs/super.c1
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/inode.c31
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/jbd2/commit.c77
-rw-r--r--fs/jbd2/journal.c10
-rw-r--r--fs/jbd2/transaction.c46
-rw-r--r--fs/jffs2/super.c1
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/mbcache.c540
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/ncpfs/inode.c1
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nilfs2/super.c1
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/openpromfs/inode.c1
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/pstore/inode.c1
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/qnx6/inode.c1
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/squashfs/super.c1
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/udf/super.c1
-rw-r--r--fs/ufs/super.c1
-rw-r--r--fs/xfs/xfs_super.c1
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/mbcache.h12
-rw-r--r--include/trace/events/ext4.h102
64 files changed, 1514 insertions, 501 deletions
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 7b3003cb6f1b..952aeb048349 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -212,6 +212,7 @@ static int parse_options(struct super_block *sb, char *options)
212 212
213static int adfs_remount(struct super_block *sb, int *flags, char *data) 213static int adfs_remount(struct super_block *sb, int *flags, char *data)
214{ 214{
215 sync_filesystem(sb);
215 *flags |= MS_NODIRATIME; 216 *flags |= MS_NODIRATIME;
216 return parse_options(sb, data); 217 return parse_options(sb, data);
217} 218}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d098731b82ff..307453086c3f 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -530,6 +530,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
530 530
531 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); 531 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
532 532
533 sync_filesystem(sb);
533 *flags |= MS_NODIRATIME; 534 *flags |= MS_NODIRATIME;
534 535
535 memcpy(volume, sbi->s_volume, 32); 536 memcpy(volume, sbi->s_volume, 32);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 5188f1222987..d626756ff721 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -913,6 +913,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
913static int 913static int
914befs_remount(struct super_block *sb, int *flags, char *data) 914befs_remount(struct super_block *sb, int *flags, char *data)
915{ 915{
916 sync_filesystem(sb);
916 if (!(*flags & MS_RDONLY)) 917 if (!(*flags & MS_RDONLY))
917 return -EINVAL; 918 return -EINVAL;
918 return 0; 919 return 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d4878ddba87a..9dbf42395153 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1380,6 +1380,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1380 unsigned int old_metadata_ratio = fs_info->metadata_ratio; 1380 unsigned int old_metadata_ratio = fs_info->metadata_ratio;
1381 int ret; 1381 int ret;
1382 1382
1383 sync_filesystem(sb);
1383 btrfs_remount_prepare(fs_info); 1384 btrfs_remount_prepare(fs_info);
1384 1385
1385 ret = btrfs_parse_options(root, data); 1386 ret = btrfs_parse_options(root, data);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ab8ad2546c3e..2c70cbe35d39 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -541,6 +541,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
541 541
542static int cifs_remount(struct super_block *sb, int *flags, char *data) 542static int cifs_remount(struct super_block *sb, int *flags, char *data)
543{ 543{
544 sync_filesystem(sb);
544 *flags |= MS_NODIRATIME; 545 *flags |= MS_NODIRATIME;
545 return 0; 546 return 0;
546} 547}
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 626abc02b694..d9c7751f10ac 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -96,6 +96,7 @@ void coda_destroy_inodecache(void)
96 96
97static int coda_remount(struct super_block *sb, int *flags, char *data) 97static int coda_remount(struct super_block *sb, int *flags, char *data)
98{ 98{
99 sync_filesystem(sb);
99 *flags |= MS_NOATIME; 100 *flags |= MS_NOATIME;
100 return 0; 101 return 0;
101} 102}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a1f801c14fbc..ddcfe590b8a8 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -243,6 +243,7 @@ static void cramfs_kill_sb(struct super_block *sb)
243 243
244static int cramfs_remount(struct super_block *sb, int *flags, char *data) 244static int cramfs_remount(struct super_block *sb, int *flags, char *data)
245{ 245{
246 sync_filesystem(sb);
246 *flags |= MS_RDONLY; 247 *flags |= MS_RDONLY;
247 return 0; 248 return 0;
248} 249}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index ca4a08f38374..8c41b52da358 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -218,6 +218,7 @@ static int debugfs_remount(struct super_block *sb, int *flags, char *data)
218 int err; 218 int err;
219 struct debugfs_fs_info *fsi = sb->s_fs_info; 219 struct debugfs_fs_info *fsi = sb->s_fs_info;
220 220
221 sync_filesystem(sb);
221 err = debugfs_parse_options(data, &fsi->mount_opts); 222 err = debugfs_parse_options(data, &fsi->mount_opts);
222 if (err) 223 if (err)
223 goto fail; 224 goto fail;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index a726b9f29cb7..c71038079b47 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -313,6 +313,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
313 struct pts_fs_info *fsi = DEVPTS_SB(sb); 313 struct pts_fs_info *fsi = DEVPTS_SB(sb);
314 struct pts_mount_opts *opts = &fsi->mount_opts; 314 struct pts_mount_opts *opts = &fsi->mount_opts;
315 315
316 sync_filesystem(sb);
316 err = parse_mount_options(data, PARSE_REMOUNT, opts); 317 err = parse_mount_options(data, PARSE_REMOUNT, opts);
317 318
318 /* 319 /*
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f8def1acf08c..3befcc9f5d63 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -114,6 +114,7 @@ static void destroy_inodecache(void)
114 114
115static int efs_remount(struct super_block *sb, int *flags, char *data) 115static int efs_remount(struct super_block *sb, int *flags, char *data)
116{ 116{
117 sync_filesystem(sb);
117 *flags |= MS_RDONLY; 118 *flags |= MS_RDONLY;
118 return 0; 119 return 0;
119} 120}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 20d6697bd638..d260115c0350 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1254,6 +1254,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1254 unsigned long old_sb_flags; 1254 unsigned long old_sb_flags;
1255 int err; 1255 int err;
1256 1256
1257 sync_filesystem(sb);
1257 spin_lock(&sbi->s_lock); 1258 spin_lock(&sbi->s_lock);
1258 1259
1259 /* Store the old options */ 1260 /* Store the old options */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 37fd31ed16e7..95c6c5a6d0c5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2649,6 +2649,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2649 int i; 2649 int i;
2650#endif 2650#endif
2651 2651
2652 sync_filesystem(sb);
2653
2652 /* Store the original options */ 2654 /* Store the original options */
2653 old_sb_flags = sb->s_flags; 2655 old_sb_flags = sb->s_flags;
2654 old_opts.s_mount_opt = sbi->s_mount_opt; 2656 old_opts.s_mount_opt = sbi->s_mount_opt;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d3a534fdc5ff..f1c65dc7cc0a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -31,6 +31,7 @@
31#include <linux/percpu_counter.h> 31#include <linux/percpu_counter.h>
32#include <linux/ratelimit.h> 32#include <linux/ratelimit.h>
33#include <crypto/hash.h> 33#include <crypto/hash.h>
34#include <linux/falloc.h>
34#ifdef __KERNEL__ 35#ifdef __KERNEL__
35#include <linux/compat.h> 36#include <linux/compat.h>
36#endif 37#endif
@@ -567,6 +568,8 @@ enum {
567#define EXT4_GET_BLOCKS_NO_LOCK 0x0100 568#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
568 /* Do not put hole in extent cache */ 569 /* Do not put hole in extent cache */
569#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 570#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
571 /* Convert written extents to unwritten */
572#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400
570 573
571/* 574/*
572 * The bit position of these flags must not overlap with any of the 575 * The bit position of these flags must not overlap with any of the
@@ -998,6 +1001,8 @@ struct ext4_inode_info {
998#define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group 1001#define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group
999 size of blocksize * 8 1002 size of blocksize * 8
1000 blocks */ 1003 blocks */
1004#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
1005 file systems */
1001 1006
1002#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ 1007#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
1003 ~EXT4_MOUNT_##opt 1008 ~EXT4_MOUNT_##opt
@@ -1326,6 +1331,7 @@ struct ext4_sb_info {
1326 struct list_head s_es_lru; 1331 struct list_head s_es_lru;
1327 unsigned long s_es_last_sorted; 1332 unsigned long s_es_last_sorted;
1328 struct percpu_counter s_extent_cache_cnt; 1333 struct percpu_counter s_extent_cache_cnt;
1334 struct mb_cache *s_mb_cache;
1329 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; 1335 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
1330 1336
1331 /* Ratelimit ext4 messages. */ 1337 /* Ratelimit ext4 messages. */
@@ -2133,8 +2139,6 @@ extern int ext4_writepage_trans_blocks(struct inode *);
2133extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 2139extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
2134extern int ext4_block_truncate_page(handle_t *handle, 2140extern int ext4_block_truncate_page(handle_t *handle,
2135 struct address_space *mapping, loff_t from); 2141 struct address_space *mapping, loff_t from);
2136extern int ext4_block_zero_page_range(handle_t *handle,
2137 struct address_space *mapping, loff_t from, loff_t length);
2138extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 2142extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
2139 loff_t lstart, loff_t lend); 2143 loff_t lstart, loff_t lend);
2140extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2144extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
@@ -2757,6 +2761,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2757extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2761extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2758 __u64 start, __u64 len); 2762 __u64 start, __u64 len);
2759extern int ext4_ext_precache(struct inode *inode); 2763extern int ext4_ext_precache(struct inode *inode);
2764extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
2760 2765
2761/* move_extent.c */ 2766/* move_extent.c */
2762extern void ext4_double_down_write_data_sem(struct inode *first, 2767extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2766,6 +2771,8 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
2766extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2771extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2767 __u64 start_orig, __u64 start_donor, 2772 __u64 start_orig, __u64 start_donor,
2768 __u64 len, __u64 *moved_len); 2773 __u64 len, __u64 *moved_len);
2774extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
2775 struct ext4_extent **extent);
2769 2776
2770/* page-io.c */ 2777/* page-io.c */
2771extern int __init ext4_init_pageio(void); 2778extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 3fe29de832c8..c3fb607413ed 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -259,6 +259,16 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
259 if (WARN_ON_ONCE(err)) { 259 if (WARN_ON_ONCE(err)) {
260 ext4_journal_abort_handle(where, line, __func__, bh, 260 ext4_journal_abort_handle(where, line, __func__, bh,
261 handle, err); 261 handle, err);
262 if (inode == NULL) {
263 pr_err("EXT4: jbd2_journal_dirty_metadata "
264 "failed: handle type %u started at "
265 "line %u, credits %u/%u, errcode %d",
266 handle->h_type,
267 handle->h_line_no,
268 handle->h_requested_credits,
269 handle->h_buffer_credits, err);
270 return err;
271 }
262 ext4_error_inode(inode, where, line, 272 ext4_error_inode(inode, where, line,
263 bh->b_blocknr, 273 bh->b_blocknr,
264 "journal_dirty_metadata failed: " 274 "journal_dirty_metadata failed: "
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74bc2d549c58..82df3ce9874a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -37,7 +37,6 @@
37#include <linux/quotaops.h> 37#include <linux/quotaops.h>
38#include <linux/string.h> 38#include <linux/string.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/falloc.h>
41#include <asm/uaccess.h> 40#include <asm/uaccess.h>
42#include <linux/fiemap.h> 41#include <linux/fiemap.h>
43#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
@@ -1691,7 +1690,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1691 * the extent that was written properly split out and conversion to 1690 * the extent that was written properly split out and conversion to
1692 * initialized is trivial. 1691 * initialized is trivial.
1693 */ 1692 */
1694 if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) 1693 if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2))
1695 return 0; 1694 return 0;
1696 1695
1697 ext1_ee_len = ext4_ext_get_actual_len(ex1); 1696 ext1_ee_len = ext4_ext_get_actual_len(ex1);
@@ -1708,6 +1707,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1708 */ 1707 */
1709 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) 1708 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1710 return 0; 1709 return 0;
1710 if (ext4_ext_is_uninitialized(ex1) &&
1711 (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
1712 atomic_read(&EXT4_I(inode)->i_unwritten) ||
1713 (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN)))
1714 return 0;
1711#ifdef AGGRESSIVE_TEST 1715#ifdef AGGRESSIVE_TEST
1712 if (ext1_ee_len >= 4) 1716 if (ext1_ee_len >= 4)
1713 return 0; 1717 return 0;
@@ -1731,7 +1735,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1731{ 1735{
1732 struct ext4_extent_header *eh; 1736 struct ext4_extent_header *eh;
1733 unsigned int depth, len; 1737 unsigned int depth, len;
1734 int merge_done = 0; 1738 int merge_done = 0, uninit;
1735 1739
1736 depth = ext_depth(inode); 1740 depth = ext_depth(inode);
1737 BUG_ON(path[depth].p_hdr == NULL); 1741 BUG_ON(path[depth].p_hdr == NULL);
@@ -1741,8 +1745,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1741 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1745 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1742 break; 1746 break;
1743 /* merge with next extent! */ 1747 /* merge with next extent! */
1748 uninit = ext4_ext_is_uninitialized(ex);
1744 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1749 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1745 + ext4_ext_get_actual_len(ex + 1)); 1750 + ext4_ext_get_actual_len(ex + 1));
1751 if (uninit)
1752 ext4_ext_mark_uninitialized(ex);
1746 1753
1747 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1754 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1748 len = (EXT_LAST_EXTENT(eh) - ex - 1) 1755 len = (EXT_LAST_EXTENT(eh) - ex - 1)
@@ -1896,7 +1903,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1896 struct ext4_ext_path *npath = NULL; 1903 struct ext4_ext_path *npath = NULL;
1897 int depth, len, err; 1904 int depth, len, err;
1898 ext4_lblk_t next; 1905 ext4_lblk_t next;
1899 int mb_flags = 0; 1906 int mb_flags = 0, uninit;
1900 1907
1901 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1908 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1902 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1909 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1946,9 +1953,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1946 path + depth); 1953 path + depth);
1947 if (err) 1954 if (err)
1948 return err; 1955 return err;
1949 1956 uninit = ext4_ext_is_uninitialized(ex);
1950 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1957 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1951 + ext4_ext_get_actual_len(newext)); 1958 + ext4_ext_get_actual_len(newext));
1959 if (uninit)
1960 ext4_ext_mark_uninitialized(ex);
1952 eh = path[depth].p_hdr; 1961 eh = path[depth].p_hdr;
1953 nearex = ex; 1962 nearex = ex;
1954 goto merge; 1963 goto merge;
@@ -1971,10 +1980,13 @@ prepend:
1971 if (err) 1980 if (err)
1972 return err; 1981 return err;
1973 1982
1983 uninit = ext4_ext_is_uninitialized(ex);
1974 ex->ee_block = newext->ee_block; 1984 ex->ee_block = newext->ee_block;
1975 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); 1985 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1976 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1986 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1977 + ext4_ext_get_actual_len(newext)); 1987 + ext4_ext_get_actual_len(newext));
1988 if (uninit)
1989 ext4_ext_mark_uninitialized(ex);
1978 eh = path[depth].p_hdr; 1990 eh = path[depth].p_hdr;
1979 nearex = ex; 1991 nearex = ex;
1980 goto merge; 1992 goto merge;
@@ -2585,6 +2597,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2585 ex_ee_block = le32_to_cpu(ex->ee_block); 2597 ex_ee_block = le32_to_cpu(ex->ee_block);
2586 ex_ee_len = ext4_ext_get_actual_len(ex); 2598 ex_ee_len = ext4_ext_get_actual_len(ex);
2587 2599
2600 /*
2601 * If we're starting with an extent other than the last one in the
2602 * node, we need to see if it shares a cluster with the extent to
2603 * the right (towards the end of the file). If its leftmost cluster
2604 * is this extent's rightmost cluster and it is not cluster aligned,
2605 * we'll mark it as a partial that is not to be deallocated.
2606 */
2607
2608 if (ex != EXT_LAST_EXTENT(eh)) {
2609 ext4_fsblk_t current_pblk, right_pblk;
2610 long long current_cluster, right_cluster;
2611
2612 current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
2613 current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
2614 right_pblk = ext4_ext_pblock(ex + 1);
2615 right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
2616 if (current_cluster == right_cluster &&
2617 EXT4_PBLK_COFF(sbi, right_pblk))
2618 *partial_cluster = -right_cluster;
2619 }
2620
2588 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); 2621 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
2589 2622
2590 while (ex >= EXT_FIRST_EXTENT(eh) && 2623 while (ex >= EXT_FIRST_EXTENT(eh) &&
@@ -2710,10 +2743,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2710 err = ext4_ext_correct_indexes(handle, inode, path); 2743 err = ext4_ext_correct_indexes(handle, inode, path);
2711 2744
2712 /* 2745 /*
2713 * Free the partial cluster only if the current extent does not 2746 * If there's a partial cluster and at least one extent remains in
2714 * reference it. Otherwise we might free used cluster. 2747 * the leaf, free the partial cluster if it isn't shared with the
2748 * current extent. If there's a partial cluster and no extents
2749 * remain in the leaf, it can't be freed here. It can only be
2750 * freed when it's possible to determine if it's not shared with
2751 * any other extent - when the next leaf is processed or when space
2752 * removal is complete.
2715 */ 2753 */
2716 if (*partial_cluster > 0 && 2754 if (*partial_cluster > 0 && eh->eh_entries &&
2717 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != 2755 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
2718 *partial_cluster)) { 2756 *partial_cluster)) {
2719 int flags = get_default_free_blocks_flags(inode); 2757 int flags = get_default_free_blocks_flags(inode);
@@ -3569,6 +3607,8 @@ out:
3569 * b> Splits in two extents: Write is happening at either end of the extent 3607 * b> Splits in two extents: Write is happening at either end of the extent
3570 * c> Splits in three extents: Somone is writing in middle of the extent 3608 * c> Splits in three extents: Somone is writing in middle of the extent
3571 * 3609 *
3610 * This works the same way in the case of initialized -> unwritten conversion.
3611 *
3572 * One of more index blocks maybe needed if the extent tree grow after 3612 * One of more index blocks maybe needed if the extent tree grow after
3573 * the uninitialized extent split. To prevent ENOSPC occur at the IO 3613 * the uninitialized extent split. To prevent ENOSPC occur at the IO
3574 * complete, we need to split the uninitialized extent before DIO submit 3614 * complete, we need to split the uninitialized extent before DIO submit
@@ -3579,7 +3619,7 @@ out:
3579 * 3619 *
3580 * Returns the size of uninitialized extent to be written on success. 3620 * Returns the size of uninitialized extent to be written on success.
3581 */ 3621 */
3582static int ext4_split_unwritten_extents(handle_t *handle, 3622static int ext4_split_convert_extents(handle_t *handle,
3583 struct inode *inode, 3623 struct inode *inode,
3584 struct ext4_map_blocks *map, 3624 struct ext4_map_blocks *map,
3585 struct ext4_ext_path *path, 3625 struct ext4_ext_path *path,
@@ -3591,9 +3631,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3591 unsigned int ee_len; 3631 unsigned int ee_len;
3592 int split_flag = 0, depth; 3632 int split_flag = 0, depth;
3593 3633
3594 ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 3634 ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n",
3595 "block %llu, max_blocks %u\n", inode->i_ino, 3635 __func__, inode->i_ino,
3596 (unsigned long long)map->m_lblk, map->m_len); 3636 (unsigned long long)map->m_lblk, map->m_len);
3597 3637
3598 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3638 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3599 inode->i_sb->s_blocksize_bits; 3639 inode->i_sb->s_blocksize_bits;
@@ -3608,14 +3648,73 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3608 ee_block = le32_to_cpu(ex->ee_block); 3648 ee_block = le32_to_cpu(ex->ee_block);
3609 ee_len = ext4_ext_get_actual_len(ex); 3649 ee_len = ext4_ext_get_actual_len(ex);
3610 3650
3611 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3651 /* Convert to unwritten */
3612 split_flag |= EXT4_EXT_MARK_UNINIT2; 3652 if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3613 if (flags & EXT4_GET_BLOCKS_CONVERT) 3653 split_flag |= EXT4_EXT_DATA_VALID1;
3614 split_flag |= EXT4_EXT_DATA_VALID2; 3654 /* Convert to initialized */
3655 } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3656 split_flag |= ee_block + ee_len <= eof_block ?
3657 EXT4_EXT_MAY_ZEROOUT : 0;
3658 split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2);
3659 }
3615 flags |= EXT4_GET_BLOCKS_PRE_IO; 3660 flags |= EXT4_GET_BLOCKS_PRE_IO;
3616 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3661 return ext4_split_extent(handle, inode, path, map, split_flag, flags);
3617} 3662}
3618 3663
3664static int ext4_convert_initialized_extents(handle_t *handle,
3665 struct inode *inode,
3666 struct ext4_map_blocks *map,
3667 struct ext4_ext_path *path)
3668{
3669 struct ext4_extent *ex;
3670 ext4_lblk_t ee_block;
3671 unsigned int ee_len;
3672 int depth;
3673 int err = 0;
3674
3675 depth = ext_depth(inode);
3676 ex = path[depth].p_ext;
3677 ee_block = le32_to_cpu(ex->ee_block);
3678 ee_len = ext4_ext_get_actual_len(ex);
3679
3680 ext_debug("%s: inode %lu, logical"
3681 "block %llu, max_blocks %u\n", __func__, inode->i_ino,
3682 (unsigned long long)ee_block, ee_len);
3683
3684 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3685 err = ext4_split_convert_extents(handle, inode, map, path,
3686 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3687 if (err < 0)
3688 goto out;
3689 ext4_ext_drop_refs(path);
3690 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3691 if (IS_ERR(path)) {
3692 err = PTR_ERR(path);
3693 goto out;
3694 }
3695 depth = ext_depth(inode);
3696 ex = path[depth].p_ext;
3697 }
3698
3699 err = ext4_ext_get_access(handle, inode, path + depth);
3700 if (err)
3701 goto out;
3702 /* first mark the extent as uninitialized */
3703 ext4_ext_mark_uninitialized(ex);
3704
3705 /* note: ext4_ext_correct_indexes() isn't needed here because
3706 * borders are not changed
3707 */
3708 ext4_ext_try_to_merge(handle, inode, path, ex);
3709
3710 /* Mark modified extent as dirty */
3711 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3712out:
3713 ext4_ext_show_leaf(inode, path);
3714 return err;
3715}
3716
3717
3619static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3718static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3620 struct inode *inode, 3719 struct inode *inode,
3621 struct ext4_map_blocks *map, 3720 struct ext4_map_blocks *map,
@@ -3649,8 +3748,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3649 inode->i_ino, (unsigned long long)ee_block, ee_len, 3748 inode->i_ino, (unsigned long long)ee_block, ee_len,
3650 (unsigned long long)map->m_lblk, map->m_len); 3749 (unsigned long long)map->m_lblk, map->m_len);
3651#endif 3750#endif
3652 err = ext4_split_unwritten_extents(handle, inode, map, path, 3751 err = ext4_split_convert_extents(handle, inode, map, path,
3653 EXT4_GET_BLOCKS_CONVERT); 3752 EXT4_GET_BLOCKS_CONVERT);
3654 if (err < 0) 3753 if (err < 0)
3655 goto out; 3754 goto out;
3656 ext4_ext_drop_refs(path); 3755 ext4_ext_drop_refs(path);
@@ -3851,6 +3950,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3851} 3950}
3852 3951
3853static int 3952static int
3953ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
3954 struct ext4_map_blocks *map,
3955 struct ext4_ext_path *path, int flags,
3956 unsigned int allocated, ext4_fsblk_t newblock)
3957{
3958 int ret = 0;
3959 int err = 0;
3960
3961 /*
3962 * Make sure that the extent is no bigger than we support with
3963 * uninitialized extent
3964 */
3965 if (map->m_len > EXT_UNINIT_MAX_LEN)
3966 map->m_len = EXT_UNINIT_MAX_LEN / 2;
3967
3968 ret = ext4_convert_initialized_extents(handle, inode, map,
3969 path);
3970 if (ret >= 0) {
3971 ext4_update_inode_fsync_trans(handle, inode, 1);
3972 err = check_eofblocks_fl(handle, inode, map->m_lblk,
3973 path, map->m_len);
3974 } else
3975 err = ret;
3976 map->m_flags |= EXT4_MAP_UNWRITTEN;
3977 if (allocated > map->m_len)
3978 allocated = map->m_len;
3979 map->m_len = allocated;
3980
3981 return err ? err : allocated;
3982}
3983
3984static int
3854ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3985ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3855 struct ext4_map_blocks *map, 3986 struct ext4_map_blocks *map,
3856 struct ext4_ext_path *path, int flags, 3987 struct ext4_ext_path *path, int flags,
@@ -3877,8 +4008,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3877 4008
3878 /* get_block() before submit the IO, split the extent */ 4009 /* get_block() before submit the IO, split the extent */
3879 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4010 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3880 ret = ext4_split_unwritten_extents(handle, inode, map, 4011 ret = ext4_split_convert_extents(handle, inode, map,
3881 path, flags); 4012 path, flags | EXT4_GET_BLOCKS_CONVERT);
3882 if (ret <= 0) 4013 if (ret <= 0)
3883 goto out; 4014 goto out;
3884 /* 4015 /*
@@ -3993,10 +4124,6 @@ out1:
3993 map->m_pblk = newblock; 4124 map->m_pblk = newblock;
3994 map->m_len = allocated; 4125 map->m_len = allocated;
3995out2: 4126out2:
3996 if (path) {
3997 ext4_ext_drop_refs(path);
3998 kfree(path);
3999 }
4000 return err ? err : allocated; 4127 return err ? err : allocated;
4001} 4128}
4002 4129
@@ -4128,7 +4255,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4128 struct ext4_extent newex, *ex, *ex2; 4255 struct ext4_extent newex, *ex, *ex2;
4129 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4256 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4130 ext4_fsblk_t newblock = 0; 4257 ext4_fsblk_t newblock = 0;
4131 int free_on_err = 0, err = 0, depth; 4258 int free_on_err = 0, err = 0, depth, ret;
4132 unsigned int allocated = 0, offset = 0; 4259 unsigned int allocated = 0, offset = 0;
4133 unsigned int allocated_clusters = 0; 4260 unsigned int allocated_clusters = 0;
4134 struct ext4_allocation_request ar; 4261 struct ext4_allocation_request ar;
@@ -4170,6 +4297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4170 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 4297 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4171 unsigned short ee_len; 4298 unsigned short ee_len;
4172 4299
4300
4173 /* 4301 /*
4174 * Uninitialized extents are treated as holes, except that 4302 * Uninitialized extents are treated as holes, except that
4175 * we split out initialized portions during a write. 4303 * we split out initialized portions during a write.
@@ -4186,13 +4314,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4186 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 4314 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
4187 ee_block, ee_len, newblock); 4315 ee_block, ee_len, newblock);
4188 4316
4189 if (!ext4_ext_is_uninitialized(ex)) 4317 /*
4318 * If the extent is initialized check whether the
4319 * caller wants to convert it to unwritten.
4320 */
4321 if ((!ext4_ext_is_uninitialized(ex)) &&
4322 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4323 allocated = ext4_ext_convert_initialized_extent(
4324 handle, inode, map, path, flags,
4325 allocated, newblock);
4326 goto out2;
4327 } else if (!ext4_ext_is_uninitialized(ex))
4190 goto out; 4328 goto out;
4191 4329
4192 allocated = ext4_ext_handle_uninitialized_extents( 4330 ret = ext4_ext_handle_uninitialized_extents(
4193 handle, inode, map, path, flags, 4331 handle, inode, map, path, flags,
4194 allocated, newblock); 4332 allocated, newblock);
4195 goto out3; 4333 if (ret < 0)
4334 err = ret;
4335 else
4336 allocated = ret;
4337 goto out2;
4196 } 4338 }
4197 } 4339 }
4198 4340
@@ -4473,7 +4615,6 @@ out2:
4473 kfree(path); 4615 kfree(path);
4474 } 4616 }
4475 4617
4476out3:
4477 trace_ext4_ext_map_blocks_exit(inode, flags, map, 4618 trace_ext4_ext_map_blocks_exit(inode, flags, map,
4478 err ? err : allocated); 4619 err ? err : allocated);
4479 ext4_es_lru_add(inode); 4620 ext4_es_lru_add(inode);
@@ -4514,34 +4655,200 @@ retry:
4514 ext4_std_error(inode->i_sb, err); 4655 ext4_std_error(inode->i_sb, err);
4515} 4656}
4516 4657
4517static void ext4_falloc_update_inode(struct inode *inode, 4658static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4518 int mode, loff_t new_size, int update_ctime) 4659 ext4_lblk_t len, int flags, int mode)
4519{ 4660{
4520 struct timespec now; 4661 struct inode *inode = file_inode(file);
4662 handle_t *handle;
4663 int ret = 0;
4664 int ret2 = 0;
4665 int retries = 0;
4666 struct ext4_map_blocks map;
4667 unsigned int credits;
4521 4668
4522 if (update_ctime) { 4669 map.m_lblk = offset;
4523 now = current_fs_time(inode->i_sb); 4670 /*
4524 if (!timespec_equal(&inode->i_ctime, &now)) 4671 * Don't normalize the request if it can fit in one extent so
4525 inode->i_ctime = now; 4672 * that it doesn't get unnecessarily split into multiple
4673 * extents.
4674 */
4675 if (len <= EXT_UNINIT_MAX_LEN)
4676 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4677
4678 /*
4679 * credits to insert 1 extent into extent tree
4680 */
4681 credits = ext4_chunk_trans_blocks(inode, len);
4682
4683retry:
4684 while (ret >= 0 && ret < len) {
4685 map.m_lblk = map.m_lblk + ret;
4686 map.m_len = len = len - ret;
4687 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4688 credits);
4689 if (IS_ERR(handle)) {
4690 ret = PTR_ERR(handle);
4691 break;
4692 }
4693 ret = ext4_map_blocks(handle, inode, &map, flags);
4694 if (ret <= 0) {
4695 ext4_debug("inode #%lu: block %u: len %u: "
4696 "ext4_ext_map_blocks returned %d",
4697 inode->i_ino, map.m_lblk,
4698 map.m_len, ret);
4699 ext4_mark_inode_dirty(handle, inode);
4700 ret2 = ext4_journal_stop(handle);
4701 break;
4702 }
4703 ret2 = ext4_journal_stop(handle);
4704 if (ret2)
4705 break;
4706 }
4707 if (ret == -ENOSPC &&
4708 ext4_should_retry_alloc(inode->i_sb, &retries)) {
4709 ret = 0;
4710 goto retry;
4526 } 4711 }
4712
4713 return ret > 0 ? ret2 : ret;
4714}
4715
4716static long ext4_zero_range(struct file *file, loff_t offset,
4717 loff_t len, int mode)
4718{
4719 struct inode *inode = file_inode(file);
4720 handle_t *handle = NULL;
4721 unsigned int max_blocks;
4722 loff_t new_size = 0;
4723 int ret = 0;
4724 int flags;
4725 int partial;
4726 loff_t start, end;
4727 ext4_lblk_t lblk;
4728 struct address_space *mapping = inode->i_mapping;
4729 unsigned int blkbits = inode->i_blkbits;
4730
4731 trace_ext4_zero_range(inode, offset, len, mode);
4732
4733 /*
4734 * Write out all dirty pages to avoid race conditions
4735 * Then release them.
4736 */
4737 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4738 ret = filemap_write_and_wait_range(mapping, offset,
4739 offset + len - 1);
4740 if (ret)
4741 return ret;
4742 }
4743
4527 /* 4744 /*
4528 * Update only when preallocation was requested beyond 4745 * Round up offset. This is not fallocate, we neet to zero out
4529 * the file size. 4746 * blocks, so convert interior block aligned part of the range to
4747 * unwritten and possibly manually zero out unaligned parts of the
4748 * range.
4530 */ 4749 */
4531 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 4750 start = round_up(offset, 1 << blkbits);
4751 end = round_down((offset + len), 1 << blkbits);
4752
4753 if (start < offset || end > offset + len)
4754 return -EINVAL;
4755 partial = (offset + len) & ((1 << blkbits) - 1);
4756
4757 lblk = start >> blkbits;
4758 max_blocks = (end >> blkbits);
4759 if (max_blocks < lblk)
4760 max_blocks = 0;
4761 else
4762 max_blocks -= lblk;
4763
4764 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
4765 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
4766 if (mode & FALLOC_FL_KEEP_SIZE)
4767 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4768
4769 mutex_lock(&inode->i_mutex);
4770
4771 /*
4772 * Indirect files do not support unwritten extnets
4773 */
4774 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4775 ret = -EOPNOTSUPP;
4776 goto out_mutex;
4777 }
4778
4779 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4780 offset + len > i_size_read(inode)) {
4781 new_size = offset + len;
4782 ret = inode_newsize_ok(inode, new_size);
4783 if (ret)
4784 goto out_mutex;
4785 /*
4786 * If we have a partial block after EOF we have to allocate
4787 * the entire block.
4788 */
4789 if (partial)
4790 max_blocks += 1;
4791 }
4792
4793 if (max_blocks > 0) {
4794
4795 /* Now release the pages and zero block aligned part of pages*/
4796 truncate_pagecache_range(inode, start, end - 1);
4797
4798 /* Wait all existing dio workers, newcomers will block on i_mutex */
4799 ext4_inode_block_unlocked_dio(inode);
4800 inode_dio_wait(inode);
4801
4802 /*
4803 * Remove entire range from the extent status tree.
4804 */
4805 ret = ext4_es_remove_extent(inode, lblk, max_blocks);
4806 if (ret)
4807 goto out_dio;
4808
4809 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
4810 mode);
4811 if (ret)
4812 goto out_dio;
4813 }
4814
4815 handle = ext4_journal_start(inode, EXT4_HT_MISC, 4);
4816 if (IS_ERR(handle)) {
4817 ret = PTR_ERR(handle);
4818 ext4_std_error(inode->i_sb, ret);
4819 goto out_dio;
4820 }
4821
4822 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4823
4824 if (new_size) {
4532 if (new_size > i_size_read(inode)) 4825 if (new_size > i_size_read(inode))
4533 i_size_write(inode, new_size); 4826 i_size_write(inode, new_size);
4534 if (new_size > EXT4_I(inode)->i_disksize) 4827 if (new_size > EXT4_I(inode)->i_disksize)
4535 ext4_update_i_disksize(inode, new_size); 4828 ext4_update_i_disksize(inode, new_size);
4536 } else { 4829 } else {
4537 /* 4830 /*
4538 * Mark that we allocate beyond EOF so the subsequent truncate 4831 * Mark that we allocate beyond EOF so the subsequent truncate
4539 * can proceed even if the new size is the same as i_size. 4832 * can proceed even if the new size is the same as i_size.
4540 */ 4833 */
4541 if (new_size > i_size_read(inode)) 4834 if ((offset + len) > i_size_read(inode))
4542 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4835 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4543 } 4836 }
4544 4837
4838 ext4_mark_inode_dirty(handle, inode);
4839
4840 /* Zero out partial block at the edges of the range */
4841 ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4842
4843 if (file->f_flags & O_SYNC)
4844 ext4_handle_sync(handle);
4845
4846 ext4_journal_stop(handle);
4847out_dio:
4848 ext4_inode_resume_unlocked_dio(inode);
4849out_mutex:
4850 mutex_unlock(&inode->i_mutex);
4851 return ret;
4545} 4852}
4546 4853
4547/* 4854/*
@@ -4555,22 +4862,25 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4555{ 4862{
4556 struct inode *inode = file_inode(file); 4863 struct inode *inode = file_inode(file);
4557 handle_t *handle; 4864 handle_t *handle;
4558 loff_t new_size; 4865 loff_t new_size = 0;
4559 unsigned int max_blocks; 4866 unsigned int max_blocks;
4560 int ret = 0; 4867 int ret = 0;
4561 int ret2 = 0;
4562 int retries = 0;
4563 int flags; 4868 int flags;
4564 struct ext4_map_blocks map; 4869 ext4_lblk_t lblk;
4565 unsigned int credits, blkbits = inode->i_blkbits; 4870 struct timespec tv;
4871 unsigned int blkbits = inode->i_blkbits;
4566 4872
4567 /* Return error if mode is not supported */ 4873 /* Return error if mode is not supported */
4568 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 4874 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4875 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
4569 return -EOPNOTSUPP; 4876 return -EOPNOTSUPP;
4570 4877
4571 if (mode & FALLOC_FL_PUNCH_HOLE) 4878 if (mode & FALLOC_FL_PUNCH_HOLE)
4572 return ext4_punch_hole(inode, offset, len); 4879 return ext4_punch_hole(inode, offset, len);
4573 4880
4881 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4882 return ext4_collapse_range(inode, offset, len);
4883
4574 ret = ext4_convert_inline_data(inode); 4884 ret = ext4_convert_inline_data(inode);
4575 if (ret) 4885 if (ret)
4576 return ret; 4886 return ret;
@@ -4582,83 +4892,66 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4582 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4892 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4583 return -EOPNOTSUPP; 4893 return -EOPNOTSUPP;
4584 4894
4895 if (mode & FALLOC_FL_ZERO_RANGE)
4896 return ext4_zero_range(file, offset, len, mode);
4897
4585 trace_ext4_fallocate_enter(inode, offset, len, mode); 4898 trace_ext4_fallocate_enter(inode, offset, len, mode);
4586 map.m_lblk = offset >> blkbits; 4899 lblk = offset >> blkbits;
4587 /* 4900 /*
4588 * We can't just convert len to max_blocks because 4901 * We can't just convert len to max_blocks because
4589 * If blocksize = 4096 offset = 3072 and len = 2048 4902 * If blocksize = 4096 offset = 3072 and len = 2048
4590 */ 4903 */
4591 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 4904 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
4592 - map.m_lblk; 4905 - lblk;
4593 /* 4906
4594 * credits to insert 1 extent into extent tree
4595 */
4596 credits = ext4_chunk_trans_blocks(inode, max_blocks);
4597 mutex_lock(&inode->i_mutex);
4598 ret = inode_newsize_ok(inode, (len + offset));
4599 if (ret) {
4600 mutex_unlock(&inode->i_mutex);
4601 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4602 return ret;
4603 }
4604 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; 4907 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
4605 if (mode & FALLOC_FL_KEEP_SIZE) 4908 if (mode & FALLOC_FL_KEEP_SIZE)
4606 flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4909 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4607 /*
4608 * Don't normalize the request if it can fit in one extent so
4609 * that it doesn't get unnecessarily split into multiple
4610 * extents.
4611 */
4612 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4613 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4614 4910
4615retry: 4911 mutex_lock(&inode->i_mutex);
4616 while (ret >= 0 && ret < max_blocks) {
4617 map.m_lblk = map.m_lblk + ret;
4618 map.m_len = max_blocks = max_blocks - ret;
4619 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4620 credits);
4621 if (IS_ERR(handle)) {
4622 ret = PTR_ERR(handle);
4623 break;
4624 }
4625 ret = ext4_map_blocks(handle, inode, &map, flags);
4626 if (ret <= 0) {
4627#ifdef EXT4FS_DEBUG
4628 ext4_warning(inode->i_sb,
4629 "inode #%lu: block %u: len %u: "
4630 "ext4_ext_map_blocks returned %d",
4631 inode->i_ino, map.m_lblk,
4632 map.m_len, ret);
4633#endif
4634 ext4_mark_inode_dirty(handle, inode);
4635 ret2 = ext4_journal_stop(handle);
4636 break;
4637 }
4638 if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
4639 blkbits) >> blkbits))
4640 new_size = offset + len;
4641 else
4642 new_size = ((loff_t) map.m_lblk + ret) << blkbits;
4643 4912
4644 ext4_falloc_update_inode(inode, mode, new_size, 4913 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4645 (map.m_flags & EXT4_MAP_NEW)); 4914 offset + len > i_size_read(inode)) {
4646 ext4_mark_inode_dirty(handle, inode); 4915 new_size = offset + len;
4647 if ((file->f_flags & O_SYNC) && ret >= max_blocks) 4916 ret = inode_newsize_ok(inode, new_size);
4648 ext4_handle_sync(handle); 4917 if (ret)
4649 ret2 = ext4_journal_stop(handle); 4918 goto out;
4650 if (ret2)
4651 break;
4652 } 4919 }
4653 if (ret == -ENOSPC && 4920
4654 ext4_should_retry_alloc(inode->i_sb, &retries)) { 4921 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
4655 ret = 0; 4922 if (ret)
4656 goto retry; 4923 goto out;
4924
4925 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4926 if (IS_ERR(handle))
4927 goto out;
4928
4929 tv = inode->i_ctime = ext4_current_time(inode);
4930
4931 if (new_size) {
4932 if (new_size > i_size_read(inode)) {
4933 i_size_write(inode, new_size);
4934 inode->i_mtime = tv;
4935 }
4936 if (new_size > EXT4_I(inode)->i_disksize)
4937 ext4_update_i_disksize(inode, new_size);
4938 } else {
4939 /*
4940 * Mark that we allocate beyond EOF so the subsequent truncate
4941 * can proceed even if the new size is the same as i_size.
4942 */
4943 if ((offset + len) > i_size_read(inode))
4944 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4657 } 4945 }
4946 ext4_mark_inode_dirty(handle, inode);
4947 if (file->f_flags & O_SYNC)
4948 ext4_handle_sync(handle);
4949
4950 ext4_journal_stop(handle);
4951out:
4658 mutex_unlock(&inode->i_mutex); 4952 mutex_unlock(&inode->i_mutex);
4659 trace_ext4_fallocate_exit(inode, offset, max_blocks, 4953 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4660 ret > 0 ? ret2 : ret); 4954 return ret;
4661 return ret > 0 ? ret2 : ret;
4662} 4955}
4663 4956
4664/* 4957/*
@@ -4869,3 +5162,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4869 ext4_es_lru_add(inode); 5162 ext4_es_lru_add(inode);
4870 return error; 5163 return error;
4871} 5164}
5165
5166/*
5167 * ext4_access_path:
5168 * Function to access the path buffer for marking it dirty.
5169 * It also checks if there are sufficient credits left in the journal handle
5170 * to update path.
5171 */
5172static int
5173ext4_access_path(handle_t *handle, struct inode *inode,
5174 struct ext4_ext_path *path)
5175{
5176 int credits, err;
5177
5178 if (!ext4_handle_valid(handle))
5179 return 0;
5180
5181 /*
5182 * Check if need to extend journal credits
5183 * 3 for leaf, sb, and inode plus 2 (bmap and group
5184 * descriptor) for each block group; assume two block
5185 * groups
5186 */
5187 if (handle->h_buffer_credits < 7) {
5188 credits = ext4_writepage_trans_blocks(inode);
5189 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
5190 /* EAGAIN is success */
5191 if (err && err != -EAGAIN)
5192 return err;
5193 }
5194
5195 err = ext4_ext_get_access(handle, inode, path);
5196 return err;
5197}
5198
5199/*
5200 * ext4_ext_shift_path_extents:
5201 * Shift the extents of a path structure lying between path[depth].p_ext
5202 * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
5203 * from starting block for each extent.
5204 */
5205static int
5206ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5207 struct inode *inode, handle_t *handle,
5208 ext4_lblk_t *start)
5209{
5210 int depth, err = 0;
5211 struct ext4_extent *ex_start, *ex_last;
5212 bool update = 0;
5213 depth = path->p_depth;
5214
5215 while (depth >= 0) {
5216 if (depth == path->p_depth) {
5217 ex_start = path[depth].p_ext;
5218 if (!ex_start)
5219 return -EIO;
5220
5221 ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
5222 if (!ex_last)
5223 return -EIO;
5224
5225 err = ext4_access_path(handle, inode, path + depth);
5226 if (err)
5227 goto out;
5228
5229 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
5230 update = 1;
5231
5232 *start = ex_last->ee_block +
5233 ext4_ext_get_actual_len(ex_last);
5234
5235 while (ex_start <= ex_last) {
5236 ex_start->ee_block -= shift;
5237 if (ex_start >
5238 EXT_FIRST_EXTENT(path[depth].p_hdr)) {
5239 if (ext4_ext_try_to_merge_right(inode,
5240 path, ex_start - 1))
5241 ex_last--;
5242 }
5243 ex_start++;
5244 }
5245 err = ext4_ext_dirty(handle, inode, path + depth);
5246 if (err)
5247 goto out;
5248
5249 if (--depth < 0 || !update)
5250 break;
5251 }
5252
5253 /* Update index too */
5254 err = ext4_access_path(handle, inode, path + depth);
5255 if (err)
5256 goto out;
5257
5258 path[depth].p_idx->ei_block -= shift;
5259 err = ext4_ext_dirty(handle, inode, path + depth);
5260 if (err)
5261 goto out;
5262
5263 /* we are done if current index is not a starting index */
5264 if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
5265 break;
5266
5267 depth--;
5268 }
5269
5270out:
5271 return err;
5272}
5273
5274/*
5275 * ext4_ext_shift_extents:
5276 * All the extents which lies in the range from start to the last allocated
5277 * block for the file are shifted downwards by shift blocks.
5278 * On success, 0 is returned, error otherwise.
5279 */
5280static int
5281ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5282 ext4_lblk_t start, ext4_lblk_t shift)
5283{
5284 struct ext4_ext_path *path;
5285 int ret = 0, depth;
5286 struct ext4_extent *extent;
5287 ext4_lblk_t stop_block, current_block;
5288 ext4_lblk_t ex_start, ex_end;
5289
5290 /* Let path point to the last extent */
5291 path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
5292 if (IS_ERR(path))
5293 return PTR_ERR(path);
5294
5295 depth = path->p_depth;
5296 extent = path[depth].p_ext;
5297 if (!extent) {
5298 ext4_ext_drop_refs(path);
5299 kfree(path);
5300 return ret;
5301 }
5302
5303 stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
5304 ext4_ext_drop_refs(path);
5305 kfree(path);
5306
5307 /* Nothing to shift, if hole is at the end of file */
5308 if (start >= stop_block)
5309 return ret;
5310
5311 /*
5312 * Don't start shifting extents until we make sure the hole is big
5313 * enough to accomodate the shift.
5314 */
5315 path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
5316 depth = path->p_depth;
5317 extent = path[depth].p_ext;
5318 ex_start = extent->ee_block;
5319 ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
5320 ext4_ext_drop_refs(path);
5321 kfree(path);
5322
5323 if ((start == ex_start && shift > ex_start) ||
5324 (shift > start - ex_end))
5325 return -EINVAL;
5326
5327 /* Its safe to start updating extents */
5328 while (start < stop_block) {
5329 path = ext4_ext_find_extent(inode, start, NULL, 0);
5330 if (IS_ERR(path))
5331 return PTR_ERR(path);
5332 depth = path->p_depth;
5333 extent = path[depth].p_ext;
5334 current_block = extent->ee_block;
5335 if (start > current_block) {
5336 /* Hole, move to the next extent */
5337 ret = mext_next_extent(inode, path, &extent);
5338 if (ret != 0) {
5339 ext4_ext_drop_refs(path);
5340 kfree(path);
5341 if (ret == 1)
5342 ret = 0;
5343 break;
5344 }
5345 }
5346 ret = ext4_ext_shift_path_extents(path, shift, inode,
5347 handle, &start);
5348 ext4_ext_drop_refs(path);
5349 kfree(path);
5350 if (ret)
5351 break;
5352 }
5353
5354 return ret;
5355}
5356
5357/*
5358 * ext4_collapse_range:
5359 * This implements the fallocate's collapse range functionality for ext4
5360 * Returns: 0 and non-zero on error.
5361 */
5362int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5363{
5364 struct super_block *sb = inode->i_sb;
5365 ext4_lblk_t punch_start, punch_stop;
5366 handle_t *handle;
5367 unsigned int credits;
5368 loff_t new_size;
5369 int ret;
5370
5371 BUG_ON(offset + len > i_size_read(inode));
5372
5373 /* Collapse range works only on fs block size aligned offsets. */
5374 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
5375 len & (EXT4_BLOCK_SIZE(sb) - 1))
5376 return -EINVAL;
5377
5378 if (!S_ISREG(inode->i_mode))
5379 return -EOPNOTSUPP;
5380
5381 trace_ext4_collapse_range(inode, offset, len);
5382
5383 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5384 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5385
5386 /* Write out all dirty pages */
5387 ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
5388 if (ret)
5389 return ret;
5390
5391 /* Take mutex lock */
5392 mutex_lock(&inode->i_mutex);
5393
5394 /* It's not possible punch hole on append only file */
5395 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
5396 ret = -EPERM;
5397 goto out_mutex;
5398 }
5399
5400 if (IS_SWAPFILE(inode)) {
5401 ret = -ETXTBSY;
5402 goto out_mutex;
5403 }
5404
5405 /* Currently just for extent based files */
5406 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5407 ret = -EOPNOTSUPP;
5408 goto out_mutex;
5409 }
5410
5411 truncate_pagecache_range(inode, offset, -1);
5412
5413 /* Wait for existing dio to complete */
5414 ext4_inode_block_unlocked_dio(inode);
5415 inode_dio_wait(inode);
5416
5417 credits = ext4_writepage_trans_blocks(inode);
5418 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5419 if (IS_ERR(handle)) {
5420 ret = PTR_ERR(handle);
5421 goto out_dio;
5422 }
5423
5424 down_write(&EXT4_I(inode)->i_data_sem);
5425 ext4_discard_preallocations(inode);
5426
5427 ret = ext4_es_remove_extent(inode, punch_start,
5428 EXT_MAX_BLOCKS - punch_start - 1);
5429 if (ret) {
5430 up_write(&EXT4_I(inode)->i_data_sem);
5431 goto out_stop;
5432 }
5433
5434 ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5435 if (ret) {
5436 up_write(&EXT4_I(inode)->i_data_sem);
5437 goto out_stop;
5438 }
5439
5440 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5441 punch_stop - punch_start);
5442 if (ret) {
5443 up_write(&EXT4_I(inode)->i_data_sem);
5444 goto out_stop;
5445 }
5446
5447 new_size = i_size_read(inode) - len;
5448 truncate_setsize(inode, new_size);
5449 EXT4_I(inode)->i_disksize = new_size;
5450
5451 ext4_discard_preallocations(inode);
5452 up_write(&EXT4_I(inode)->i_data_sem);
5453 if (IS_SYNC(inode))
5454 ext4_handle_sync(handle);
5455 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
5456 ext4_mark_inode_dirty(handle, inode);
5457
5458out_stop:
5459 ext4_journal_stop(handle);
5460out_dio:
5461 ext4_inode_resume_unlocked_dio(inode);
5462out_mutex:
5463 mutex_unlock(&inode->i_mutex);
5464 return ret;
5465}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 3981ff783950..0a014a7194b2 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -184,7 +184,7 @@ static void ext4_es_print_tree(struct inode *inode)
184 while (node) { 184 while (node) {
185 struct extent_status *es; 185 struct extent_status *es;
186 es = rb_entry(node, struct extent_status, rb_node); 186 es = rb_entry(node, struct extent_status, rb_node);
187 printk(KERN_DEBUG " [%u/%u) %llu %llx", 187 printk(KERN_DEBUG " [%u/%u) %llu %x",
188 es->es_lblk, es->es_len, 188 es->es_lblk, es->es_len,
189 ext4_es_pblock(es), ext4_es_status(es)); 189 ext4_es_pblock(es), ext4_es_status(es));
190 node = rb_next(node); 190 node = rb_next(node);
@@ -445,8 +445,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
445 pr_warn("ES insert assertion failed for " 445 pr_warn("ES insert assertion failed for "
446 "inode: %lu we can find an extent " 446 "inode: %lu we can find an extent "
447 "at block [%d/%d/%llu/%c], but we " 447 "at block [%d/%d/%llu/%c], but we "
448 "want to add an delayed/hole extent " 448 "want to add a delayed/hole extent "
449 "[%d/%d/%llu/%llx]\n", 449 "[%d/%d/%llu/%x]\n",
450 inode->i_ino, ee_block, ee_len, 450 inode->i_ino, ee_block, ee_len,
451 ee_start, ee_status ? 'u' : 'w', 451 ee_start, ee_status ? 'u' : 'w',
452 es->es_lblk, es->es_len, 452 es->es_lblk, es->es_len,
@@ -486,8 +486,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
486 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { 486 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
487 pr_warn("ES insert assertion failed for inode: %lu " 487 pr_warn("ES insert assertion failed for inode: %lu "
488 "can't find an extent at block %d but we want " 488 "can't find an extent at block %d but we want "
489 "to add an written/unwritten extent " 489 "to add a written/unwritten extent "
490 "[%d/%d/%llu/%llx]\n", inode->i_ino, 490 "[%d/%d/%llu/%x]\n", inode->i_ino,
491 es->es_lblk, es->es_lblk, es->es_len, 491 es->es_lblk, es->es_lblk, es->es_len,
492 ext4_es_pblock(es), ext4_es_status(es)); 492 ext4_es_pblock(es), ext4_es_status(es));
493 } 493 }
@@ -524,7 +524,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
524 */ 524 */
525 pr_warn("ES insert assertion failed for inode: %lu " 525 pr_warn("ES insert assertion failed for inode: %lu "
526 "We can find blocks but we want to add a " 526 "We can find blocks but we want to add a "
527 "delayed/hole extent [%d/%d/%llu/%llx]\n", 527 "delayed/hole extent [%d/%d/%llu/%x]\n",
528 inode->i_ino, es->es_lblk, es->es_len, 528 inode->i_ino, es->es_lblk, es->es_len,
529 ext4_es_pblock(es), ext4_es_status(es)); 529 ext4_es_pblock(es), ext4_es_status(es));
530 return; 530 return;
@@ -554,7 +554,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
554 if (ext4_es_is_written(es)) { 554 if (ext4_es_is_written(es)) {
555 pr_warn("ES insert assertion failed for inode: %lu " 555 pr_warn("ES insert assertion failed for inode: %lu "
556 "We can't find the block but we want to add " 556 "We can't find the block but we want to add "
557 "an written extent [%d/%d/%llu/%llx]\n", 557 "a written extent [%d/%d/%llu/%x]\n",
558 inode->i_ino, es->es_lblk, es->es_len, 558 inode->i_ino, es->es_lblk, es->es_len,
559 ext4_es_pblock(es), ext4_es_status(es)); 559 ext4_es_pblock(es), ext4_es_status(es));
560 return; 560 return;
@@ -658,8 +658,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
658 658
659 newes.es_lblk = lblk; 659 newes.es_lblk = lblk;
660 newes.es_len = len; 660 newes.es_len = len;
661 ext4_es_store_pblock(&newes, pblk); 661 ext4_es_store_pblock_status(&newes, pblk, status);
662 ext4_es_store_status(&newes, status);
663 trace_ext4_es_insert_extent(inode, &newes); 662 trace_ext4_es_insert_extent(inode, &newes);
664 663
665 ext4_es_insert_extent_check(inode, &newes); 664 ext4_es_insert_extent_check(inode, &newes);
@@ -699,8 +698,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
699 698
700 newes.es_lblk = lblk; 699 newes.es_lblk = lblk;
701 newes.es_len = len; 700 newes.es_len = len;
702 ext4_es_store_pblock(&newes, pblk); 701 ext4_es_store_pblock_status(&newes, pblk, status);
703 ext4_es_store_status(&newes, status);
704 trace_ext4_es_cache_extent(inode, &newes); 702 trace_ext4_es_cache_extent(inode, &newes);
705 703
706 if (!len) 704 if (!len)
@@ -812,13 +810,13 @@ retry:
812 810
813 newes.es_lblk = end + 1; 811 newes.es_lblk = end + 1;
814 newes.es_len = len2; 812 newes.es_len = len2;
813 block = 0x7FDEADBEEF;
815 if (ext4_es_is_written(&orig_es) || 814 if (ext4_es_is_written(&orig_es) ||
816 ext4_es_is_unwritten(&orig_es)) { 815 ext4_es_is_unwritten(&orig_es))
817 block = ext4_es_pblock(&orig_es) + 816 block = ext4_es_pblock(&orig_es) +
818 orig_es.es_len - len2; 817 orig_es.es_len - len2;
819 ext4_es_store_pblock(&newes, block); 818 ext4_es_store_pblock_status(&newes, block,
820 } 819 ext4_es_status(&orig_es));
821 ext4_es_store_status(&newes, ext4_es_status(&orig_es));
822 err = __es_insert_extent(inode, &newes); 820 err = __es_insert_extent(inode, &newes);
823 if (err) { 821 if (err) {
824 es->es_lblk = orig_es.es_lblk; 822 es->es_lblk = orig_es.es_lblk;
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 167f4ab8ecc3..f1b62a419920 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -129,6 +129,15 @@ static inline void ext4_es_store_status(struct extent_status *es,
129 (es->es_pblk & ~ES_MASK)); 129 (es->es_pblk & ~ES_MASK));
130} 130}
131 131
132static inline void ext4_es_store_pblock_status(struct extent_status *es,
133 ext4_fsblk_t pb,
134 unsigned int status)
135{
136 es->es_pblk = (((ext4_fsblk_t)
137 (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
138 (pb & ~ES_MASK));
139}
140
132extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); 141extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi);
133extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); 142extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
134extern void ext4_es_lru_add(struct inode *inode); 143extern void ext4_es_lru_add(struct inode *inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 175c3f933816..5b0d2c7d5408 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -504,6 +504,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
504{ 504{
505 struct extent_status es; 505 struct extent_status es;
506 int retval; 506 int retval;
507 int ret = 0;
507#ifdef ES_AGGRESSIVE_TEST 508#ifdef ES_AGGRESSIVE_TEST
508 struct ext4_map_blocks orig_map; 509 struct ext4_map_blocks orig_map;
509 510
@@ -515,6 +516,12 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
515 "logical block %lu\n", inode->i_ino, flags, map->m_len, 516 "logical block %lu\n", inode->i_ino, flags, map->m_len,
516 (unsigned long) map->m_lblk); 517 (unsigned long) map->m_lblk);
517 518
519 /*
520 * ext4_map_blocks returns an int, and m_len is an unsigned int
521 */
522 if (unlikely(map->m_len > INT_MAX))
523 map->m_len = INT_MAX;
524
518 /* Lookup extent status tree firstly */ 525 /* Lookup extent status tree firstly */
519 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 526 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
520 ext4_es_lru_add(inode); 527 ext4_es_lru_add(inode);
@@ -553,7 +560,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
553 EXT4_GET_BLOCKS_KEEP_SIZE); 560 EXT4_GET_BLOCKS_KEEP_SIZE);
554 } 561 }
555 if (retval > 0) { 562 if (retval > 0) {
556 int ret;
557 unsigned int status; 563 unsigned int status;
558 564
559 if (unlikely(retval != map->m_len)) { 565 if (unlikely(retval != map->m_len)) {
@@ -580,7 +586,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
580 586
581found: 587found:
582 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 588 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
583 int ret = check_block_validity(inode, map); 589 ret = check_block_validity(inode, map);
584 if (ret != 0) 590 if (ret != 0)
585 return ret; 591 return ret;
586 } 592 }
@@ -597,7 +603,13 @@ found:
597 * with buffer head unmapped. 603 * with buffer head unmapped.
598 */ 604 */
599 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 605 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
600 return retval; 606 /*
607 * If we need to convert extent to unwritten
608 * we continue and do the actual work in
609 * ext4_ext_map_blocks()
610 */
611 if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN))
612 return retval;
601 613
602 /* 614 /*
603 * Here we clear m_flags because after allocating an new extent, 615 * Here we clear m_flags because after allocating an new extent,
@@ -653,7 +665,6 @@ found:
653 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 665 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
654 666
655 if (retval > 0) { 667 if (retval > 0) {
656 int ret;
657 unsigned int status; 668 unsigned int status;
658 669
659 if (unlikely(retval != map->m_len)) { 670 if (unlikely(retval != map->m_len)) {
@@ -688,7 +699,7 @@ found:
688has_zeroout: 699has_zeroout:
689 up_write((&EXT4_I(inode)->i_data_sem)); 700 up_write((&EXT4_I(inode)->i_data_sem));
690 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 701 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
691 int ret = check_block_validity(inode, map); 702 ret = check_block_validity(inode, map);
692 if (ret != 0) 703 if (ret != 0)
693 return ret; 704 return ret;
694 } 705 }
@@ -3313,33 +3324,13 @@ void ext4_set_aops(struct inode *inode)
3313} 3324}
3314 3325
3315/* 3326/*
3316 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3317 * up to the end of the block which corresponds to `from'.
3318 * This required during truncate. We need to physically zero the tail end
3319 * of that block so it doesn't yield old data if the file is later grown.
3320 */
3321int ext4_block_truncate_page(handle_t *handle,
3322 struct address_space *mapping, loff_t from)
3323{
3324 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3325 unsigned length;
3326 unsigned blocksize;
3327 struct inode *inode = mapping->host;
3328
3329 blocksize = inode->i_sb->s_blocksize;
3330 length = blocksize - (offset & (blocksize - 1));
3331
3332 return ext4_block_zero_page_range(handle, mapping, from, length);
3333}
3334
3335/*
3336 * ext4_block_zero_page_range() zeros out a mapping of length 'length' 3327 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3337 * starting from file offset 'from'. The range to be zero'd must 3328 * starting from file offset 'from'. The range to be zero'd must
3338 * be contained with in one block. If the specified range exceeds 3329 * be contained with in one block. If the specified range exceeds
3339 * the end of the block it will be shortened to end of the block 3330 * the end of the block it will be shortened to end of the block
3340 * that cooresponds to 'from' 3331 * that cooresponds to 'from'
3341 */ 3332 */
3342int ext4_block_zero_page_range(handle_t *handle, 3333static int ext4_block_zero_page_range(handle_t *handle,
3343 struct address_space *mapping, loff_t from, loff_t length) 3334 struct address_space *mapping, loff_t from, loff_t length)
3344{ 3335{
3345 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; 3336 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
@@ -3429,6 +3420,26 @@ unlock:
3429 return err; 3420 return err;
3430} 3421}
3431 3422
3423/*
3424 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3425 * up to the end of the block which corresponds to `from'.
3426 * This required during truncate. We need to physically zero the tail end
3427 * of that block so it doesn't yield old data if the file is later grown.
3428 */
3429int ext4_block_truncate_page(handle_t *handle,
3430 struct address_space *mapping, loff_t from)
3431{
3432 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3433 unsigned length;
3434 unsigned blocksize;
3435 struct inode *inode = mapping->host;
3436
3437 blocksize = inode->i_sb->s_blocksize;
3438 length = blocksize - (offset & (blocksize - 1));
3439
3440 return ext4_block_zero_page_range(handle, mapping, from, length);
3441}
3442
3432int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 3443int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
3433 loff_t lstart, loff_t length) 3444 loff_t lstart, loff_t length)
3434{ 3445{
@@ -3502,7 +3513,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3502 if (!S_ISREG(inode->i_mode)) 3513 if (!S_ISREG(inode->i_mode))
3503 return -EOPNOTSUPP; 3514 return -EOPNOTSUPP;
3504 3515
3505 trace_ext4_punch_hole(inode, offset, length); 3516 trace_ext4_punch_hole(inode, offset, length, 0);
3506 3517
3507 /* 3518 /*
3508 * Write out all dirty pages to avoid race conditions 3519 * Write out all dirty pages to avoid race conditions
@@ -3609,6 +3620,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3609 up_write(&EXT4_I(inode)->i_data_sem); 3620 up_write(&EXT4_I(inode)->i_data_sem);
3610 if (IS_SYNC(inode)) 3621 if (IS_SYNC(inode))
3611 ext4_handle_sync(handle); 3622 ext4_handle_sync(handle);
3623
3624 /* Now release the pages again to reduce race window */
3625 if (last_block_offset > first_block_offset)
3626 truncate_pagecache_range(inode, first_block_offset,
3627 last_block_offset);
3628
3612 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3629 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3613 ext4_mark_inode_dirty(handle, inode); 3630 ext4_mark_inode_dirty(handle, inode);
3614out_stop: 3631out_stop:
@@ -3682,7 +3699,7 @@ void ext4_truncate(struct inode *inode)
3682 3699
3683 /* 3700 /*
3684 * There is a possibility that we're either freeing the inode 3701 * There is a possibility that we're either freeing the inode
3685 * or it completely new indode. In those cases we might not 3702 * or it's a completely new inode. In those cases we might not
3686 * have i_mutex locked because it's not necessary. 3703 * have i_mutex locked because it's not necessary.
3687 */ 3704 */
3688 if (!(inode->i_state & (I_NEW|I_FREEING))) 3705 if (!(inode->i_state & (I_NEW|I_FREEING)))
@@ -3934,8 +3951,8 @@ void ext4_set_inode_flags(struct inode *inode)
3934 new_fl |= S_NOATIME; 3951 new_fl |= S_NOATIME;
3935 if (flags & EXT4_DIRSYNC_FL) 3952 if (flags & EXT4_DIRSYNC_FL)
3936 new_fl |= S_DIRSYNC; 3953 new_fl |= S_DIRSYNC;
3937 set_mask_bits(&inode->i_flags, 3954 inode_set_flags(inode, new_fl,
3938 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); 3955 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
3939} 3956}
3940 3957
3941/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ 3958/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@ -4154,11 +4171,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4154 EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); 4171 EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
4155 EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); 4172 EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
4156 4173
4157 inode->i_version = le32_to_cpu(raw_inode->i_disk_version); 4174 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
4158 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4175 inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
4159 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4176 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4160 inode->i_version |= 4177 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4161 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4178 inode->i_version |=
4179 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
4180 }
4162 } 4181 }
4163 4182
4164 ret = 0; 4183 ret = 0;
@@ -4328,8 +4347,7 @@ static int ext4_do_update_inode(handle_t *handle,
4328 goto out_brelse; 4347 goto out_brelse;
4329 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 4348 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4330 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 4349 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
4331 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 4350 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
4332 cpu_to_le32(EXT4_OS_HURD))
4333 raw_inode->i_file_acl_high = 4351 raw_inode->i_file_acl_high =
4334 cpu_to_le16(ei->i_file_acl >> 32); 4352 cpu_to_le16(ei->i_file_acl >> 32);
4335 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 4353 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
@@ -4374,12 +4392,15 @@ static int ext4_do_update_inode(handle_t *handle,
4374 raw_inode->i_block[block] = ei->i_data[block]; 4392 raw_inode->i_block[block] = ei->i_data[block];
4375 } 4393 }
4376 4394
4377 raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4395 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
4378 if (ei->i_extra_isize) { 4396 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4379 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4397 if (ei->i_extra_isize) {
4380 raw_inode->i_version_hi = 4398 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4381 cpu_to_le32(inode->i_version >> 32); 4399 raw_inode->i_version_hi =
4382 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 4400 cpu_to_le32(inode->i_version >> 32);
4401 raw_inode->i_extra_isize =
4402 cpu_to_le16(ei->i_extra_isize);
4403 }
4383 } 4404 }
4384 4405
4385 ext4_inode_csum_set(inode, raw_inode, ei); 4406 ext4_inode_csum_set(inode, raw_inode, ei);
@@ -4446,7 +4467,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4446 return -EIO; 4467 return -EIO;
4447 } 4468 }
4448 4469
4449 if (wbc->sync_mode != WB_SYNC_ALL) 4470 /*
4471 * No need to force transaction in WB_SYNC_NONE mode. Also
4472 * ext4_sync_fs() will force the commit after everything is
4473 * written.
4474 */
4475 if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
4450 return 0; 4476 return 0;
4451 4477
4452 err = ext4_force_commit(inode->i_sb); 4478 err = ext4_force_commit(inode->i_sb);
@@ -4456,7 +4482,11 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4456 err = __ext4_get_inode_loc(inode, &iloc, 0); 4482 err = __ext4_get_inode_loc(inode, &iloc, 0);
4457 if (err) 4483 if (err)
4458 return err; 4484 return err;
4459 if (wbc->sync_mode == WB_SYNC_ALL) 4485 /*
4486 * sync(2) will flush the whole buffer cache. No need to do
4487 * it here separately for each inode.
4488 */
4489 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
4460 sync_dirty_buffer(iloc.bh); 4490 sync_dirty_buffer(iloc.bh);
4461 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 4491 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
4462 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, 4492 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a2a837f00407..0f2252ec274d 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -104,21 +104,15 @@ static long swap_inode_boot_loader(struct super_block *sb,
104 struct ext4_inode_info *ei_bl; 104 struct ext4_inode_info *ei_bl;
105 struct ext4_sb_info *sbi = EXT4_SB(sb); 105 struct ext4_sb_info *sbi = EXT4_SB(sb);
106 106
107 if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) { 107 if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode))
108 err = -EINVAL; 108 return -EINVAL;
109 goto swap_boot_out;
110 }
111 109
112 if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) { 110 if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
113 err = -EPERM; 111 return -EPERM;
114 goto swap_boot_out;
115 }
116 112
117 inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); 113 inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
118 if (IS_ERR(inode_bl)) { 114 if (IS_ERR(inode_bl))
119 err = PTR_ERR(inode_bl); 115 return PTR_ERR(inode_bl);
120 goto swap_boot_out;
121 }
122 ei_bl = EXT4_I(inode_bl); 116 ei_bl = EXT4_I(inode_bl);
123 117
124 filemap_flush(inode->i_mapping); 118 filemap_flush(inode->i_mapping);
@@ -193,20 +187,14 @@ static long swap_inode_boot_loader(struct super_block *sb,
193 ext4_mark_inode_dirty(handle, inode); 187 ext4_mark_inode_dirty(handle, inode);
194 } 188 }
195 } 189 }
196
197 ext4_journal_stop(handle); 190 ext4_journal_stop(handle);
198
199 ext4_double_up_write_data_sem(inode, inode_bl); 191 ext4_double_up_write_data_sem(inode, inode_bl);
200 192
201journal_err_out: 193journal_err_out:
202 ext4_inode_resume_unlocked_dio(inode); 194 ext4_inode_resume_unlocked_dio(inode);
203 ext4_inode_resume_unlocked_dio(inode_bl); 195 ext4_inode_resume_unlocked_dio(inode_bl);
204
205 unlock_two_nondirectories(inode, inode_bl); 196 unlock_two_nondirectories(inode, inode_bl);
206
207 iput(inode_bl); 197 iput(inode_bl);
208
209swap_boot_out:
210 return err; 198 return err;
211} 199}
212 200
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 04a5c7504be9..a888cac76e9c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1808,6 +1808,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1808 ext4_lock_group(ac->ac_sb, group); 1808 ext4_lock_group(ac->ac_sb, group);
1809 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, 1809 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1810 ac->ac_g_ex.fe_len, &ex); 1810 ac->ac_g_ex.fe_len, &ex);
1811 ex.fe_logical = 0xDEADFA11; /* debug value */
1811 1812
1812 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { 1813 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1813 ext4_fsblk_t start; 1814 ext4_fsblk_t start;
@@ -1936,7 +1937,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1936 */ 1937 */
1937 break; 1938 break;
1938 } 1939 }
1939 1940 ex.fe_logical = 0xDEADC0DE; /* debug value */
1940 ext4_mb_measure_extent(ac, &ex, e4b); 1941 ext4_mb_measure_extent(ac, &ex, e4b);
1941 1942
1942 i += ex.fe_len; 1943 i += ex.fe_len;
@@ -1977,6 +1978,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1977 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); 1978 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
1978 if (max >= sbi->s_stripe) { 1979 if (max >= sbi->s_stripe) {
1979 ac->ac_found++; 1980 ac->ac_found++;
1981 ex.fe_logical = 0xDEADF00D; /* debug value */
1980 ac->ac_b_ex = ex; 1982 ac->ac_b_ex = ex;
1981 ext4_mb_use_best_found(ac, e4b); 1983 ext4_mb_use_best_found(ac, e4b);
1982 break; 1984 break;
@@ -4006,8 +4008,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4006 (unsigned long)ac->ac_b_ex.fe_len, 4008 (unsigned long)ac->ac_b_ex.fe_len,
4007 (unsigned long)ac->ac_b_ex.fe_logical, 4009 (unsigned long)ac->ac_b_ex.fe_logical,
4008 (int)ac->ac_criteria); 4010 (int)ac->ac_criteria);
4009 ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", 4011 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4010 ac->ac_ex_scanned, ac->ac_found);
4011 ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); 4012 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4012 ngroups = ext4_get_groups_count(sb); 4013 ngroups = ext4_get_groups_count(sb);
4013 for (i = 0; i < ngroups; i++) { 4014 for (i = 0; i < ngroups; i++) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 08481ee84cd5..d634e183b4d4 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -48,7 +48,7 @@ extern ushort ext4_mballoc_debug;
48 } \ 48 } \
49 } while (0) 49 } while (0)
50#else 50#else
51#define mb_debug(n, fmt, a...) 51#define mb_debug(n, fmt, a...) no_printk(fmt, ## a)
52#endif 52#endif
53 53
54#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ 54#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
@@ -175,8 +175,6 @@ struct ext4_allocation_context {
175 /* copy of the best found extent taken before preallocation efforts */ 175 /* copy of the best found extent taken before preallocation efforts */
176 struct ext4_free_extent ac_f_ex; 176 struct ext4_free_extent ac_f_ex;
177 177
178 /* number of iterations done. we have to track to limit searching */
179 unsigned long ac_ex_scanned;
180 __u16 ac_groups_scanned; 178 __u16 ac_groups_scanned;
181 __u16 ac_found; 179 __u16 ac_found;
182 __u16 ac_tail; 180 __u16 ac_tail;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 773b503bd18c..58ee7dc87669 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
76 * ext4_ext_path structure refers to the last extent, or a negative error 76 * ext4_ext_path structure refers to the last extent, or a negative error
77 * value on failure. 77 * value on failure.
78 */ 78 */
79static int 79int
80mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 80mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
81 struct ext4_extent **extent) 81 struct ext4_extent **extent)
82{ 82{
@@ -861,8 +861,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
861 } 861 }
862 if (!buffer_mapped(bh)) { 862 if (!buffer_mapped(bh)) {
863 zero_user(page, block_start, blocksize); 863 zero_user(page, block_start, blocksize);
864 if (!err) 864 set_buffer_uptodate(bh);
865 set_buffer_uptodate(bh);
866 continue; 865 continue;
867 } 866 }
868 } 867 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 710fed2377d4..f3c667091618 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -59,6 +59,7 @@ static struct kset *ext4_kset;
59static struct ext4_lazy_init *ext4_li_info; 59static struct ext4_lazy_init *ext4_li_info;
60static struct mutex ext4_li_mtx; 60static struct mutex ext4_li_mtx;
61static struct ext4_features *ext4_feat; 61static struct ext4_features *ext4_feat;
62static int ext4_mballoc_ready;
62 63
63static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 64static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
64 unsigned long journal_devnum); 65 unsigned long journal_devnum);
@@ -845,6 +846,10 @@ static void ext4_put_super(struct super_block *sb)
845 invalidate_bdev(sbi->journal_bdev); 846 invalidate_bdev(sbi->journal_bdev);
846 ext4_blkdev_remove(sbi); 847 ext4_blkdev_remove(sbi);
847 } 848 }
849 if (sbi->s_mb_cache) {
850 ext4_xattr_destroy_cache(sbi->s_mb_cache);
851 sbi->s_mb_cache = NULL;
852 }
848 if (sbi->s_mmp_tsk) 853 if (sbi->s_mmp_tsk)
849 kthread_stop(sbi->s_mmp_tsk); 854 kthread_stop(sbi->s_mmp_tsk);
850 sb->s_fs_info = NULL; 855 sb->s_fs_info = NULL;
@@ -940,7 +945,7 @@ static void init_once(void *foo)
940 inode_init_once(&ei->vfs_inode); 945 inode_init_once(&ei->vfs_inode);
941} 946}
942 947
943static int init_inodecache(void) 948static int __init init_inodecache(void)
944{ 949{
945 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 950 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
946 sizeof(struct ext4_inode_info), 951 sizeof(struct ext4_inode_info),
@@ -3575,6 +3580,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3575 "feature flags set on rev 0 fs, " 3580 "feature flags set on rev 0 fs, "
3576 "running e2fsck is recommended"); 3581 "running e2fsck is recommended");
3577 3582
3583 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
3584 set_opt2(sb, HURD_COMPAT);
3585 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
3586 EXT4_FEATURE_INCOMPAT_64BIT)) {
3587 ext4_msg(sb, KERN_ERR,
3588 "The Hurd can't support 64-bit file systems");
3589 goto failed_mount;
3590 }
3591 }
3592
3578 if (IS_EXT2_SB(sb)) { 3593 if (IS_EXT2_SB(sb)) {
3579 if (ext2_feature_set_ok(sb)) 3594 if (ext2_feature_set_ok(sb))
3580 ext4_msg(sb, KERN_INFO, "mounting ext2 file system " 3595 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
@@ -4010,6 +4025,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
4010 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); 4025 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
4011 4026
4012no_journal: 4027no_journal:
4028 if (ext4_mballoc_ready) {
4029 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
4030 if (!sbi->s_mb_cache) {
4031 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
4032 goto failed_mount_wq;
4033 }
4034 }
4035
4013 /* 4036 /*
4014 * Get the # of file system overhead blocks from the 4037 * Get the # of file system overhead blocks from the
4015 * superblock if present. 4038 * superblock if present.
@@ -4835,6 +4858,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4835 } 4858 }
4836 4859
4837 if (*flags & MS_RDONLY) { 4860 if (*flags & MS_RDONLY) {
4861 err = sync_filesystem(sb);
4862 if (err < 0)
4863 goto restore_opts;
4838 err = dquot_suspend(sb, -1); 4864 err = dquot_suspend(sb, -1);
4839 if (err < 0) 4865 if (err < 0)
4840 goto restore_opts; 4866 goto restore_opts;
@@ -5516,11 +5542,9 @@ static int __init ext4_init_fs(void)
5516 5542
5517 err = ext4_init_mballoc(); 5543 err = ext4_init_mballoc();
5518 if (err) 5544 if (err)
5519 goto out3;
5520
5521 err = ext4_init_xattr();
5522 if (err)
5523 goto out2; 5545 goto out2;
5546 else
5547 ext4_mballoc_ready = 1;
5524 err = init_inodecache(); 5548 err = init_inodecache();
5525 if (err) 5549 if (err)
5526 goto out1; 5550 goto out1;
@@ -5536,10 +5560,9 @@ out:
5536 unregister_as_ext3(); 5560 unregister_as_ext3();
5537 destroy_inodecache(); 5561 destroy_inodecache();
5538out1: 5562out1:
5539 ext4_exit_xattr(); 5563 ext4_mballoc_ready = 0;
5540out2:
5541 ext4_exit_mballoc(); 5564 ext4_exit_mballoc();
5542out3: 5565out2:
5543 ext4_exit_feat_adverts(); 5566 ext4_exit_feat_adverts();
5544out4: 5567out4:
5545 if (ext4_proc_root) 5568 if (ext4_proc_root)
@@ -5562,7 +5585,6 @@ static void __exit ext4_exit_fs(void)
5562 unregister_as_ext3(); 5585 unregister_as_ext3();
5563 unregister_filesystem(&ext4_fs_type); 5586 unregister_filesystem(&ext4_fs_type);
5564 destroy_inodecache(); 5587 destroy_inodecache();
5565 ext4_exit_xattr();
5566 ext4_exit_mballoc(); 5588 ext4_exit_mballoc();
5567 ext4_exit_feat_adverts(); 5589 ext4_exit_feat_adverts();
5568 remove_proc_entry("fs/ext4", NULL); 5590 remove_proc_entry("fs/ext4", NULL);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e175e94116ac..1f5cf5880718 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -81,7 +81,7 @@
81# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 81# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
82#endif 82#endif
83 83
84static void ext4_xattr_cache_insert(struct buffer_head *); 84static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
85static struct buffer_head *ext4_xattr_cache_find(struct inode *, 85static struct buffer_head *ext4_xattr_cache_find(struct inode *,
86 struct ext4_xattr_header *, 86 struct ext4_xattr_header *,
87 struct mb_cache_entry **); 87 struct mb_cache_entry **);
@@ -90,8 +90,6 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *,
90static int ext4_xattr_list(struct dentry *dentry, char *buffer, 90static int ext4_xattr_list(struct dentry *dentry, char *buffer,
91 size_t buffer_size); 91 size_t buffer_size);
92 92
93static struct mb_cache *ext4_xattr_cache;
94
95static const struct xattr_handler *ext4_xattr_handler_map[] = { 93static const struct xattr_handler *ext4_xattr_handler_map[] = {
96 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, 94 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
97#ifdef CONFIG_EXT4_FS_POSIX_ACL 95#ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -117,6 +115,9 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
117 NULL 115 NULL
118}; 116};
119 117
118#define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \
119 inode->i_sb->s_fs_info)->s_mb_cache)
120
120static __le32 ext4_xattr_block_csum(struct inode *inode, 121static __le32 ext4_xattr_block_csum(struct inode *inode,
121 sector_t block_nr, 122 sector_t block_nr,
122 struct ext4_xattr_header *hdr) 123 struct ext4_xattr_header *hdr)
@@ -265,6 +266,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
265 struct ext4_xattr_entry *entry; 266 struct ext4_xattr_entry *entry;
266 size_t size; 267 size_t size;
267 int error; 268 int error;
269 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
268 270
269 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 271 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
270 name_index, name, buffer, (long)buffer_size); 272 name_index, name, buffer, (long)buffer_size);
@@ -286,7 +288,7 @@ bad_block:
286 error = -EIO; 288 error = -EIO;
287 goto cleanup; 289 goto cleanup;
288 } 290 }
289 ext4_xattr_cache_insert(bh); 291 ext4_xattr_cache_insert(ext4_mb_cache, bh);
290 entry = BFIRST(bh); 292 entry = BFIRST(bh);
291 error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); 293 error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
292 if (error == -EIO) 294 if (error == -EIO)
@@ -409,6 +411,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
409 struct inode *inode = dentry->d_inode; 411 struct inode *inode = dentry->d_inode;
410 struct buffer_head *bh = NULL; 412 struct buffer_head *bh = NULL;
411 int error; 413 int error;
414 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
412 415
413 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 416 ea_idebug(inode, "buffer=%p, buffer_size=%ld",
414 buffer, (long)buffer_size); 417 buffer, (long)buffer_size);
@@ -430,7 +433,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
430 error = -EIO; 433 error = -EIO;
431 goto cleanup; 434 goto cleanup;
432 } 435 }
433 ext4_xattr_cache_insert(bh); 436 ext4_xattr_cache_insert(ext4_mb_cache, bh);
434 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); 437 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
435 438
436cleanup: 439cleanup:
@@ -526,8 +529,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
526{ 529{
527 struct mb_cache_entry *ce = NULL; 530 struct mb_cache_entry *ce = NULL;
528 int error = 0; 531 int error = 0;
532 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
529 533
530 ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); 534 ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
531 error = ext4_journal_get_write_access(handle, bh); 535 error = ext4_journal_get_write_access(handle, bh);
532 if (error) 536 if (error)
533 goto out; 537 goto out;
@@ -567,12 +571,13 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
567 size_t *min_offs, void *base, int *total) 571 size_t *min_offs, void *base, int *total)
568{ 572{
569 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 573 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
570 *total += EXT4_XATTR_LEN(last->e_name_len);
571 if (!last->e_value_block && last->e_value_size) { 574 if (!last->e_value_block && last->e_value_size) {
572 size_t offs = le16_to_cpu(last->e_value_offs); 575 size_t offs = le16_to_cpu(last->e_value_offs);
573 if (offs < *min_offs) 576 if (offs < *min_offs)
574 *min_offs = offs; 577 *min_offs = offs;
575 } 578 }
579 if (total)
580 *total += EXT4_XATTR_LEN(last->e_name_len);
576 } 581 }
577 return (*min_offs - ((void *)last - base) - sizeof(__u32)); 582 return (*min_offs - ((void *)last - base) - sizeof(__u32));
578} 583}
@@ -745,13 +750,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
745 struct ext4_xattr_search *s = &bs->s; 750 struct ext4_xattr_search *s = &bs->s;
746 struct mb_cache_entry *ce = NULL; 751 struct mb_cache_entry *ce = NULL;
747 int error = 0; 752 int error = 0;
753 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
748 754
749#define header(x) ((struct ext4_xattr_header *)(x)) 755#define header(x) ((struct ext4_xattr_header *)(x))
750 756
751 if (i->value && i->value_len > sb->s_blocksize) 757 if (i->value && i->value_len > sb->s_blocksize)
752 return -ENOSPC; 758 return -ENOSPC;
753 if (s->base) { 759 if (s->base) {
754 ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, 760 ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
755 bs->bh->b_blocknr); 761 bs->bh->b_blocknr);
756 error = ext4_journal_get_write_access(handle, bs->bh); 762 error = ext4_journal_get_write_access(handle, bs->bh);
757 if (error) 763 if (error)
@@ -769,7 +775,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
769 if (!IS_LAST_ENTRY(s->first)) 775 if (!IS_LAST_ENTRY(s->first))
770 ext4_xattr_rehash(header(s->base), 776 ext4_xattr_rehash(header(s->base),
771 s->here); 777 s->here);
772 ext4_xattr_cache_insert(bs->bh); 778 ext4_xattr_cache_insert(ext4_mb_cache,
779 bs->bh);
773 } 780 }
774 unlock_buffer(bs->bh); 781 unlock_buffer(bs->bh);
775 if (error == -EIO) 782 if (error == -EIO)
@@ -905,7 +912,7 @@ getblk_failed:
905 memcpy(new_bh->b_data, s->base, new_bh->b_size); 912 memcpy(new_bh->b_data, s->base, new_bh->b_size);
906 set_buffer_uptodate(new_bh); 913 set_buffer_uptodate(new_bh);
907 unlock_buffer(new_bh); 914 unlock_buffer(new_bh);
908 ext4_xattr_cache_insert(new_bh); 915 ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
909 error = ext4_handle_dirty_xattr_block(handle, 916 error = ext4_handle_dirty_xattr_block(handle,
910 inode, new_bh); 917 inode, new_bh);
911 if (error) 918 if (error)
@@ -1228,7 +1235,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
1228 struct ext4_xattr_block_find *bs = NULL; 1235 struct ext4_xattr_block_find *bs = NULL;
1229 char *buffer = NULL, *b_entry_name = NULL; 1236 char *buffer = NULL, *b_entry_name = NULL;
1230 size_t min_offs, free; 1237 size_t min_offs, free;
1231 int total_ino, total_blk; 1238 int total_ino;
1232 void *base, *start, *end; 1239 void *base, *start, *end;
1233 int extra_isize = 0, error = 0, tried_min_extra_isize = 0; 1240 int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
1234 int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); 1241 int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
@@ -1286,8 +1293,7 @@ retry:
1286 first = BFIRST(bh); 1293 first = BFIRST(bh);
1287 end = bh->b_data + bh->b_size; 1294 end = bh->b_data + bh->b_size;
1288 min_offs = end - base; 1295 min_offs = end - base;
1289 free = ext4_xattr_free_space(first, &min_offs, base, 1296 free = ext4_xattr_free_space(first, &min_offs, base, NULL);
1290 &total_blk);
1291 if (free < new_extra_isize) { 1297 if (free < new_extra_isize) {
1292 if (!tried_min_extra_isize && s_min_extra_isize) { 1298 if (!tried_min_extra_isize && s_min_extra_isize) {
1293 tried_min_extra_isize++; 1299 tried_min_extra_isize++;
@@ -1495,13 +1501,13 @@ ext4_xattr_put_super(struct super_block *sb)
1495 * Returns 0, or a negative error number on failure. 1501 * Returns 0, or a negative error number on failure.
1496 */ 1502 */
1497static void 1503static void
1498ext4_xattr_cache_insert(struct buffer_head *bh) 1504ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
1499{ 1505{
1500 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); 1506 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1501 struct mb_cache_entry *ce; 1507 struct mb_cache_entry *ce;
1502 int error; 1508 int error;
1503 1509
1504 ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS); 1510 ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
1505 if (!ce) { 1511 if (!ce) {
1506 ea_bdebug(bh, "out of memory"); 1512 ea_bdebug(bh, "out of memory");
1507 return; 1513 return;
@@ -1573,12 +1579,13 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1573{ 1579{
1574 __u32 hash = le32_to_cpu(header->h_hash); 1580 __u32 hash = le32_to_cpu(header->h_hash);
1575 struct mb_cache_entry *ce; 1581 struct mb_cache_entry *ce;
1582 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
1576 1583
1577 if (!header->h_hash) 1584 if (!header->h_hash)
1578 return NULL; /* never share */ 1585 return NULL; /* never share */
1579 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1586 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1580again: 1587again:
1581 ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, 1588 ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
1582 hash); 1589 hash);
1583 while (ce) { 1590 while (ce) {
1584 struct buffer_head *bh; 1591 struct buffer_head *bh;
@@ -1676,19 +1683,17 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1676 1683
1677#undef BLOCK_HASH_SHIFT 1684#undef BLOCK_HASH_SHIFT
1678 1685
1679int __init 1686#define HASH_BUCKET_BITS 10
1680ext4_init_xattr(void) 1687
1688struct mb_cache *
1689ext4_xattr_create_cache(char *name)
1681{ 1690{
1682 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); 1691 return mb_cache_create(name, HASH_BUCKET_BITS);
1683 if (!ext4_xattr_cache)
1684 return -ENOMEM;
1685 return 0;
1686} 1692}
1687 1693
1688void 1694void ext4_xattr_destroy_cache(struct mb_cache *cache)
1689ext4_exit_xattr(void)
1690{ 1695{
1691 if (ext4_xattr_cache) 1696 if (cache)
1692 mb_cache_destroy(ext4_xattr_cache); 1697 mb_cache_destroy(cache);
1693 ext4_xattr_cache = NULL;
1694} 1698}
1699
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 819d6398833f..29bedf5589f6 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -110,9 +110,6 @@ extern void ext4_xattr_put_super(struct super_block *);
110extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 110extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
111 struct ext4_inode *raw_inode, handle_t *handle); 111 struct ext4_inode *raw_inode, handle_t *handle);
112 112
113extern int __init ext4_init_xattr(void);
114extern void ext4_exit_xattr(void);
115
116extern const struct xattr_handler *ext4_xattr_handlers[]; 113extern const struct xattr_handler *ext4_xattr_handlers[];
117 114
118extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, 115extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
@@ -124,6 +121,9 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
124 struct ext4_xattr_info *i, 121 struct ext4_xattr_info *i,
125 struct ext4_xattr_ibody_find *is); 122 struct ext4_xattr_ibody_find *is);
126 123
124extern struct mb_cache *ext4_xattr_create_cache(char *name);
125extern void ext4_xattr_destroy_cache(struct mb_cache *);
126
127#ifdef CONFIG_EXT4_FS_SECURITY 127#ifdef CONFIG_EXT4_FS_SECURITY
128extern int ext4_init_security(handle_t *handle, struct inode *inode, 128extern int ext4_init_security(handle_t *handle, struct inode *inode,
129 struct inode *dir, const struct qstr *qstr); 129 struct inode *dir, const struct qstr *qstr);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1a85f83abd53..856bdf994c0a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -568,6 +568,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
568 struct f2fs_mount_info org_mount_opt; 568 struct f2fs_mount_info org_mount_opt;
569 int err, active_logs; 569 int err, active_logs;
570 570
571 sync_filesystem(sb);
572
571 /* 573 /*
572 * Save the old mount options in case we 574 * Save the old mount options in case we
573 * need to restore them. 575 * need to restore them.
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index c68d9f27135e..b3361fe2bcb5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -635,6 +635,8 @@ static int fat_remount(struct super_block *sb, int *flags, char *data)
635 struct msdos_sb_info *sbi = MSDOS_SB(sb); 635 struct msdos_sb_info *sbi = MSDOS_SB(sb);
636 *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); 636 *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME);
637 637
638 sync_filesystem(sb);
639
638 /* make sure we update state on remount. */ 640 /* make sure we update state on remount. */
639 new_rdonly = *flags & MS_RDONLY; 641 new_rdonly = *flags & MS_RDONLY;
640 if (new_rdonly != (sb->s_flags & MS_RDONLY)) { 642 if (new_rdonly != (sb->s_flags & MS_RDONLY)) {
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index e37eb274e492..7ca8c75d50d3 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -124,6 +124,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp)
124 124
125static int vxfs_remount(struct super_block *sb, int *flags, char *data) 125static int vxfs_remount(struct super_block *sb, int *flags, char *data)
126{ 126{
127 sync_filesystem(sb);
127 *flags |= MS_RDONLY; 128 *flags |= MS_RDONLY;
128 return 0; 129 return 0;
129} 130}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b4bff1b15028..8d611696fcad 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -135,6 +135,7 @@ static void fuse_evict_inode(struct inode *inode)
135 135
136static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) 136static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
137{ 137{
138 sync_filesystem(sb);
138 if (*flags & MS_MANDLOCK) 139 if (*flags & MS_MANDLOCK)
139 return -EINVAL; 140 return -EINVAL;
140 141
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 033ee975a895..de8afad89e51 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1167,6 +1167,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1167 struct gfs2_tune *gt = &sdp->sd_tune; 1167 struct gfs2_tune *gt = &sdp->sd_tune;
1168 int error; 1168 int error;
1169 1169
1170 sync_filesystem(sb);
1171
1170 spin_lock(&gt->gt_spin); 1172 spin_lock(&gt->gt_spin);
1171 args.ar_commit = gt->gt_logd_secs; 1173 args.ar_commit = gt->gt_logd_secs;
1172 args.ar_quota_quantum = gt->gt_quota_quantum; 1174 args.ar_quota_quantum = gt->gt_quota_quantum;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 2d2039e754cd..eee7206c38d1 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -112,6 +112,7 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf)
112 112
113static int hfs_remount(struct super_block *sb, int *flags, char *data) 113static int hfs_remount(struct super_block *sb, int *flags, char *data)
114{ 114{
115 sync_filesystem(sb);
115 *flags |= MS_NODIRATIME; 116 *flags |= MS_NODIRATIME;
116 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 117 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
117 return 0; 118 return 0;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index a6abf87d79d0..a513d2d36be9 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -323,6 +323,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
323 323
324static int hfsplus_remount(struct super_block *sb, int *flags, char *data) 324static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
325{ 325{
326 sync_filesystem(sb);
326 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 327 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
327 return 0; 328 return 0;
328 if (!(*flags & MS_RDONLY)) { 329 if (!(*flags & MS_RDONLY)) {
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 4534ff688b76..fe3463a43236 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -421,6 +421,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
421 struct hpfs_sb_info *sbi = hpfs_sb(s); 421 struct hpfs_sb_info *sbi = hpfs_sb(s);
422 char *new_opts = kstrdup(data, GFP_KERNEL); 422 char *new_opts = kstrdup(data, GFP_KERNEL);
423 423
424 sync_filesystem(s);
425
424 *flags |= MS_NOATIME; 426 *flags |= MS_NOATIME;
425 427
426 hpfs_lock(s); 428 hpfs_lock(s);
diff --git a/fs/inode.c b/fs/inode.c
index fb59ba7967f1..f96d2a6f88cc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1898,3 +1898,34 @@ void inode_dio_done(struct inode *inode)
1898 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); 1898 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
1899} 1899}
1900EXPORT_SYMBOL(inode_dio_done); 1900EXPORT_SYMBOL(inode_dio_done);
1901
1902/*
1903 * inode_set_flags - atomically set some inode flags
1904 *
1905 * Note: the caller should be holding i_mutex, or else be sure that
1906 * they have exclusive access to the inode structure (i.e., while the
1907 * inode is being instantiated). The reason for the cmpxchg() loop
1908 * --- which wouldn't be necessary if all code paths which modify
1909 * i_flags actually followed this rule, is that there is at least one
1910 * code path which doesn't today --- for example,
1911 * __generic_file_aio_write() calls file_remove_suid() without holding
1912 * i_mutex --- so we use cmpxchg() out of an abundance of caution.
1913 *
1914 * In the long run, i_mutex is overkill, and we should probably look
1915 * at using the i_lock spinlock to protect i_flags, and then make sure
1916 * it is so documented in include/linux/fs.h and that all code follows
1917 * the locking convention!!
1918 */
1919void inode_set_flags(struct inode *inode, unsigned int flags,
1920 unsigned int mask)
1921{
1922 unsigned int old_flags, new_flags;
1923
1924 WARN_ON_ONCE(flags & ~mask);
1925 do {
1926 old_flags = ACCESS_ONCE(inode->i_flags);
1927 new_flags = (old_flags & ~mask) | flags;
1928 } while (unlikely(cmpxchg(&inode->i_flags, old_flags,
1929 new_flags) != old_flags));
1930}
1931EXPORT_SYMBOL(inode_set_flags);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4a9e10ea13f2..6af66ee56390 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -117,6 +117,7 @@ static void destroy_inodecache(void)
117 117
118static int isofs_remount(struct super_block *sb, int *flags, char *data) 118static int isofs_remount(struct super_block *sb, int *flags, char *data)
119{ 119{
120 sync_filesystem(sb);
120 if (!(*flags & MS_RDONLY)) 121 if (!(*flags & MS_RDONLY))
121 return -EROFS; 122 return -EROFS;
122 return 0; 123 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index cf2fc0594063..5f26139a165a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -555,7 +555,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
555 blk_start_plug(&plug); 555 blk_start_plug(&plug);
556 jbd2_journal_write_revoke_records(journal, commit_transaction, 556 jbd2_journal_write_revoke_records(journal, commit_transaction,
557 &log_bufs, WRITE_SYNC); 557 &log_bufs, WRITE_SYNC);
558 blk_finish_plug(&plug);
559 558
560 jbd_debug(3, "JBD2: commit phase 2b\n"); 559 jbd_debug(3, "JBD2: commit phase 2b\n");
561 560
@@ -582,7 +581,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
582 err = 0; 581 err = 0;
583 bufs = 0; 582 bufs = 0;
584 descriptor = NULL; 583 descriptor = NULL;
585 blk_start_plug(&plug);
586 while (commit_transaction->t_buffers) { 584 while (commit_transaction->t_buffers) {
587 585
588 /* Find the next buffer to be journaled... */ 586 /* Find the next buffer to be journaled... */
@@ -1067,6 +1065,25 @@ restart_loop:
1067 goto restart_loop; 1065 goto restart_loop;
1068 } 1066 }
1069 1067
1068 /* Add the transaction to the checkpoint list
1069 * __journal_remove_checkpoint() can not destroy transaction
1070 * under us because it is not marked as T_FINISHED yet */
1071 if (journal->j_checkpoint_transactions == NULL) {
1072 journal->j_checkpoint_transactions = commit_transaction;
1073 commit_transaction->t_cpnext = commit_transaction;
1074 commit_transaction->t_cpprev = commit_transaction;
1075 } else {
1076 commit_transaction->t_cpnext =
1077 journal->j_checkpoint_transactions;
1078 commit_transaction->t_cpprev =
1079 commit_transaction->t_cpnext->t_cpprev;
1080 commit_transaction->t_cpnext->t_cpprev =
1081 commit_transaction;
1082 commit_transaction->t_cpprev->t_cpnext =
1083 commit_transaction;
1084 }
1085 spin_unlock(&journal->j_list_lock);
1086
1070 /* Done with this transaction! */ 1087 /* Done with this transaction! */
1071 1088
1072 jbd_debug(3, "JBD2: commit phase 7\n"); 1089 jbd_debug(3, "JBD2: commit phase 7\n");
@@ -1085,24 +1102,7 @@ restart_loop:
1085 atomic_read(&commit_transaction->t_handle_count); 1102 atomic_read(&commit_transaction->t_handle_count);
1086 trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, 1103 trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
1087 commit_transaction->t_tid, &stats.run); 1104 commit_transaction->t_tid, &stats.run);
1088 1105 stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0;
1089 /*
1090 * Calculate overall stats
1091 */
1092 spin_lock(&journal->j_history_lock);
1093 journal->j_stats.ts_tid++;
1094 if (commit_transaction->t_requested)
1095 journal->j_stats.ts_requested++;
1096 journal->j_stats.run.rs_wait += stats.run.rs_wait;
1097 journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
1098 journal->j_stats.run.rs_running += stats.run.rs_running;
1099 journal->j_stats.run.rs_locked += stats.run.rs_locked;
1100 journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
1101 journal->j_stats.run.rs_logging += stats.run.rs_logging;
1102 journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
1103 journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
1104 journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
1105 spin_unlock(&journal->j_history_lock);
1106 1106
1107 commit_transaction->t_state = T_COMMIT_CALLBACK; 1107 commit_transaction->t_state = T_COMMIT_CALLBACK;
1108 J_ASSERT(commit_transaction == journal->j_committing_transaction); 1108 J_ASSERT(commit_transaction == journal->j_committing_transaction);
@@ -1122,24 +1122,6 @@ restart_loop:
1122 1122
1123 write_unlock(&journal->j_state_lock); 1123 write_unlock(&journal->j_state_lock);
1124 1124
1125 if (journal->j_checkpoint_transactions == NULL) {
1126 journal->j_checkpoint_transactions = commit_transaction;
1127 commit_transaction->t_cpnext = commit_transaction;
1128 commit_transaction->t_cpprev = commit_transaction;
1129 } else {
1130 commit_transaction->t_cpnext =
1131 journal->j_checkpoint_transactions;
1132 commit_transaction->t_cpprev =
1133 commit_transaction->t_cpnext->t_cpprev;
1134 commit_transaction->t_cpnext->t_cpprev =
1135 commit_transaction;
1136 commit_transaction->t_cpprev->t_cpnext =
1137 commit_transaction;
1138 }
1139 spin_unlock(&journal->j_list_lock);
1140 /* Drop all spin_locks because commit_callback may be block.
1141 * __journal_remove_checkpoint() can not destroy transaction
1142 * under us because it is not marked as T_FINISHED yet */
1143 if (journal->j_commit_callback) 1125 if (journal->j_commit_callback)
1144 journal->j_commit_callback(journal, commit_transaction); 1126 journal->j_commit_callback(journal, commit_transaction);
1145 1127
@@ -1150,7 +1132,7 @@ restart_loop:
1150 write_lock(&journal->j_state_lock); 1132 write_lock(&journal->j_state_lock);
1151 spin_lock(&journal->j_list_lock); 1133 spin_lock(&journal->j_list_lock);
1152 commit_transaction->t_state = T_FINISHED; 1134 commit_transaction->t_state = T_FINISHED;
1153 /* Recheck checkpoint lists after j_list_lock was dropped */ 1135 /* Check if the transaction can be dropped now that we are finished */
1154 if (commit_transaction->t_checkpoint_list == NULL && 1136 if (commit_transaction->t_checkpoint_list == NULL &&
1155 commit_transaction->t_checkpoint_io_list == NULL) { 1137 commit_transaction->t_checkpoint_io_list == NULL) {
1156 __jbd2_journal_drop_transaction(journal, commit_transaction); 1138 __jbd2_journal_drop_transaction(journal, commit_transaction);
@@ -1159,4 +1141,21 @@ restart_loop:
1159 spin_unlock(&journal->j_list_lock); 1141 spin_unlock(&journal->j_list_lock);
1160 write_unlock(&journal->j_state_lock); 1142 write_unlock(&journal->j_state_lock);
1161 wake_up(&journal->j_wait_done_commit); 1143 wake_up(&journal->j_wait_done_commit);
1144
1145 /*
1146 * Calculate overall stats
1147 */
1148 spin_lock(&journal->j_history_lock);
1149 journal->j_stats.ts_tid++;
1150 journal->j_stats.ts_requested += stats.ts_requested;
1151 journal->j_stats.run.rs_wait += stats.run.rs_wait;
1152 journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
1153 journal->j_stats.run.rs_running += stats.run.rs_running;
1154 journal->j_stats.run.rs_locked += stats.run.rs_locked;
1155 journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
1156 journal->j_stats.run.rs_logging += stats.run.rs_logging;
1157 journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
1158 journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
1159 journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
1160 spin_unlock(&journal->j_history_lock);
1162} 1161}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 5fa344afb49a..67b8e303946c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -122,7 +122,7 @@ EXPORT_SYMBOL(__jbd2_debug);
122#endif 122#endif
123 123
124/* Checksumming functions */ 124/* Checksumming functions */
125int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 125static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
126{ 126{
127 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 127 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
128 return 1; 128 return 1;
@@ -143,7 +143,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
143 return cpu_to_be32(csum); 143 return cpu_to_be32(csum);
144} 144}
145 145
146int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 146static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
147{ 147{
148 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 148 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
149 return 1; 149 return 1;
@@ -151,7 +151,7 @@ int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
151 return sb->s_checksum == jbd2_superblock_csum(j, sb); 151 return sb->s_checksum == jbd2_superblock_csum(j, sb);
152} 152}
153 153
154void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 154static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
155{ 155{
156 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 156 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
157 return; 157 return;
@@ -302,8 +302,8 @@ static void journal_kill_thread(journal_t *journal)
302 journal->j_flags |= JBD2_UNMOUNT; 302 journal->j_flags |= JBD2_UNMOUNT;
303 303
304 while (journal->j_task) { 304 while (journal->j_task) {
305 wake_up(&journal->j_wait_commit);
306 write_unlock(&journal->j_state_lock); 305 write_unlock(&journal->j_state_lock);
306 wake_up(&journal->j_wait_commit);
307 wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 307 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
308 write_lock(&journal->j_state_lock); 308 write_lock(&journal->j_state_lock);
309 } 309 }
@@ -710,8 +710,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
710 while (tid_gt(tid, journal->j_commit_sequence)) { 710 while (tid_gt(tid, journal->j_commit_sequence)) {
711 jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", 711 jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
712 tid, journal->j_commit_sequence); 712 tid, journal->j_commit_sequence);
713 wake_up(&journal->j_wait_commit);
714 read_unlock(&journal->j_state_lock); 713 read_unlock(&journal->j_state_lock);
714 wake_up(&journal->j_wait_commit);
715 wait_event(journal->j_wait_done_commit, 715 wait_event(journal->j_wait_done_commit,
716 !tid_gt(tid, journal->j_commit_sequence)); 716 !tid_gt(tid, journal->j_commit_sequence));
717 read_lock(&journal->j_state_lock); 717 read_lock(&journal->j_state_lock);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 60bb365f54a5..38cfcf5f6fce 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1073,7 +1073,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
1073 * reused here. 1073 * reused here.
1074 */ 1074 */
1075 jbd_lock_bh_state(bh); 1075 jbd_lock_bh_state(bh);
1076 spin_lock(&journal->j_list_lock);
1077 J_ASSERT_JH(jh, (jh->b_transaction == transaction || 1076 J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
1078 jh->b_transaction == NULL || 1077 jh->b_transaction == NULL ||
1079 (jh->b_transaction == journal->j_committing_transaction && 1078 (jh->b_transaction == journal->j_committing_transaction &&
@@ -1096,12 +1095,14 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
1096 jh->b_modified = 0; 1095 jh->b_modified = 0;
1097 1096
1098 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 1097 JBUFFER_TRACE(jh, "file as BJ_Reserved");
1098 spin_lock(&journal->j_list_lock);
1099 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 1099 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
1100 } else if (jh->b_transaction == journal->j_committing_transaction) { 1100 } else if (jh->b_transaction == journal->j_committing_transaction) {
1101 /* first access by this transaction */ 1101 /* first access by this transaction */
1102 jh->b_modified = 0; 1102 jh->b_modified = 0;
1103 1103
1104 JBUFFER_TRACE(jh, "set next transaction"); 1104 JBUFFER_TRACE(jh, "set next transaction");
1105 spin_lock(&journal->j_list_lock);
1105 jh->b_next_transaction = transaction; 1106 jh->b_next_transaction = transaction;
1106 } 1107 }
1107 spin_unlock(&journal->j_list_lock); 1108 spin_unlock(&journal->j_list_lock);
@@ -1312,7 +1313,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1312 journal->j_running_transaction)) { 1313 journal->j_running_transaction)) {
1313 printk(KERN_ERR "JBD2: %s: " 1314 printk(KERN_ERR "JBD2: %s: "
1314 "jh->b_transaction (%llu, %p, %u) != " 1315 "jh->b_transaction (%llu, %p, %u) != "
1315 "journal->j_running_transaction (%p, %u)", 1316 "journal->j_running_transaction (%p, %u)\n",
1316 journal->j_devname, 1317 journal->j_devname,
1317 (unsigned long long) bh->b_blocknr, 1318 (unsigned long long) bh->b_blocknr,
1318 jh->b_transaction, 1319 jh->b_transaction,
@@ -1335,30 +1336,25 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1335 */ 1336 */
1336 if (jh->b_transaction != transaction) { 1337 if (jh->b_transaction != transaction) {
1337 JBUFFER_TRACE(jh, "already on other transaction"); 1338 JBUFFER_TRACE(jh, "already on other transaction");
1338 if (unlikely(jh->b_transaction != 1339 if (unlikely(((jh->b_transaction !=
1339 journal->j_committing_transaction)) { 1340 journal->j_committing_transaction)) ||
1340 printk(KERN_ERR "JBD2: %s: " 1341 (jh->b_next_transaction != transaction))) {
1341 "jh->b_transaction (%llu, %p, %u) != " 1342 printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: "
1342 "journal->j_committing_transaction (%p, %u)", 1343 "bad jh for block %llu: "
1344 "transaction (%p, %u), "
1345 "jh->b_transaction (%p, %u), "
1346 "jh->b_next_transaction (%p, %u), jlist %u\n",
1343 journal->j_devname, 1347 journal->j_devname,
1344 (unsigned long long) bh->b_blocknr, 1348 (unsigned long long) bh->b_blocknr,
1349 transaction, transaction->t_tid,
1345 jh->b_transaction, 1350 jh->b_transaction,
1346 jh->b_transaction ? jh->b_transaction->t_tid : 0, 1351 jh->b_transaction ?
1347 journal->j_committing_transaction, 1352 jh->b_transaction->t_tid : 0,
1348 journal->j_committing_transaction ?
1349 journal->j_committing_transaction->t_tid : 0);
1350 ret = -EINVAL;
1351 }
1352 if (unlikely(jh->b_next_transaction != transaction)) {
1353 printk(KERN_ERR "JBD2: %s: "
1354 "jh->b_next_transaction (%llu, %p, %u) != "
1355 "transaction (%p, %u)",
1356 journal->j_devname,
1357 (unsigned long long) bh->b_blocknr,
1358 jh->b_next_transaction, 1353 jh->b_next_transaction,
1359 jh->b_next_transaction ? 1354 jh->b_next_transaction ?
1360 jh->b_next_transaction->t_tid : 0, 1355 jh->b_next_transaction->t_tid : 0,
1361 transaction, transaction->t_tid); 1356 jh->b_jlist);
1357 WARN_ON(1);
1362 ret = -EINVAL; 1358 ret = -EINVAL;
1363 } 1359 }
1364 /* And this case is illegal: we can't reuse another 1360 /* And this case is illegal: we can't reuse another
@@ -1415,7 +1411,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1415 BUFFER_TRACE(bh, "entry"); 1411 BUFFER_TRACE(bh, "entry");
1416 1412
1417 jbd_lock_bh_state(bh); 1413 jbd_lock_bh_state(bh);
1418 spin_lock(&journal->j_list_lock);
1419 1414
1420 if (!buffer_jbd(bh)) 1415 if (!buffer_jbd(bh))
1421 goto not_jbd; 1416 goto not_jbd;
@@ -1468,6 +1463,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1468 * we know to remove the checkpoint after we commit. 1463 * we know to remove the checkpoint after we commit.
1469 */ 1464 */
1470 1465
1466 spin_lock(&journal->j_list_lock);
1471 if (jh->b_cp_transaction) { 1467 if (jh->b_cp_transaction) {
1472 __jbd2_journal_temp_unlink_buffer(jh); 1468 __jbd2_journal_temp_unlink_buffer(jh);
1473 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1469 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
@@ -1480,6 +1476,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1480 goto drop; 1476 goto drop;
1481 } 1477 }
1482 } 1478 }
1479 spin_unlock(&journal->j_list_lock);
1483 } else if (jh->b_transaction) { 1480 } else if (jh->b_transaction) {
1484 J_ASSERT_JH(jh, (jh->b_transaction == 1481 J_ASSERT_JH(jh, (jh->b_transaction ==
1485 journal->j_committing_transaction)); 1482 journal->j_committing_transaction));
@@ -1491,7 +1488,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1491 1488
1492 if (jh->b_next_transaction) { 1489 if (jh->b_next_transaction) {
1493 J_ASSERT(jh->b_next_transaction == transaction); 1490 J_ASSERT(jh->b_next_transaction == transaction);
1491 spin_lock(&journal->j_list_lock);
1494 jh->b_next_transaction = NULL; 1492 jh->b_next_transaction = NULL;
1493 spin_unlock(&journal->j_list_lock);
1495 1494
1496 /* 1495 /*
1497 * only drop a reference if this transaction modified 1496 * only drop a reference if this transaction modified
@@ -1503,7 +1502,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1503 } 1502 }
1504 1503
1505not_jbd: 1504not_jbd:
1506 spin_unlock(&journal->j_list_lock);
1507 jbd_unlock_bh_state(bh); 1505 jbd_unlock_bh_state(bh);
1508 __brelse(bh); 1506 __brelse(bh);
1509drop: 1507drop:
@@ -1821,11 +1819,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1821 if (buffer_locked(bh) || buffer_dirty(bh)) 1819 if (buffer_locked(bh) || buffer_dirty(bh))
1822 goto out; 1820 goto out;
1823 1821
1824 if (jh->b_next_transaction != NULL) 1822 if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
1825 goto out; 1823 goto out;
1826 1824
1827 spin_lock(&journal->j_list_lock); 1825 spin_lock(&journal->j_list_lock);
1828 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1826 if (jh->b_cp_transaction != NULL) {
1829 /* written-back checkpointed metadata buffer */ 1827 /* written-back checkpointed metadata buffer */
1830 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1828 JBUFFER_TRACE(jh, "remove from checkpoint list");
1831 __jbd2_journal_remove_checkpoint(jh); 1829 __jbd2_journal_remove_checkpoint(jh);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 0defb1cc2a35..0918f0e2e266 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -243,6 +243,7 @@ static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data)
243 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); 243 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
244 int err; 244 int err;
245 245
246 sync_filesystem(sb);
246 err = jffs2_parse_options(c, data); 247 err = jffs2_parse_options(c, data);
247 if (err) 248 if (err)
248 return -EINVAL; 249 return -EINVAL;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index e2b7483444fd..97f7fda51890 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -418,6 +418,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
418 int flag = JFS_SBI(sb)->flag; 418 int flag = JFS_SBI(sb)->flag;
419 int ret; 419 int ret;
420 420
421 sync_filesystem(sb);
421 if (!parse_options(data, sb, &newLVSize, &flag)) { 422 if (!parse_options(data, sb, &newLVSize, &flag)) {
422 return -EINVAL; 423 return -EINVAL;
423 } 424 }
diff --git a/fs/mbcache.c b/fs/mbcache.c
index e519e45bf673..bf166e388f0d 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -26,6 +26,41 @@
26 * back on the lru list. 26 * back on the lru list.
27 */ 27 */
28 28
29/*
30 * Lock descriptions and usage:
31 *
32 * Each hash chain of both the block and index hash tables now contains
33 * a built-in lock used to serialize accesses to the hash chain.
34 *
35 * Accesses to global data structures mb_cache_list and mb_cache_lru_list
36 * are serialized via the global spinlock mb_cache_spinlock.
37 *
38 * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
39 * accesses to its local data, such as e_used and e_queued.
40 *
41 * Lock ordering:
42 *
43 * Each block hash chain's lock has the highest lock order, followed by an
44 * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
45 * lock), and mb_cach_spinlock, with the lowest order. While holding
46 * either a block or index hash chain lock, a thread can acquire an
47 * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
48 *
49 * Synchronization:
50 *
51 * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
52 * index hash chian, it needs to lock the corresponding hash chain. For each
53 * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
54 * prevent either any simultaneous release or free on the entry and also
55 * to serialize accesses to either the e_used or e_queued member of the entry.
56 *
57 * To avoid having a dangling reference to an already freed
58 * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
59 * block hash chain and also no longer being referenced, both e_used,
60 * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
61 * first removed from a block hash chain.
62 */
63
29#include <linux/kernel.h> 64#include <linux/kernel.h>
30#include <linux/module.h> 65#include <linux/module.h>
31 66
@@ -34,9 +69,10 @@
34#include <linux/mm.h> 69#include <linux/mm.h>
35#include <linux/slab.h> 70#include <linux/slab.h>
36#include <linux/sched.h> 71#include <linux/sched.h>
37#include <linux/init.h> 72#include <linux/list_bl.h>
38#include <linux/mbcache.h> 73#include <linux/mbcache.h>
39 74#include <linux/init.h>
75#include <linux/blockgroup_lock.h>
40 76
41#ifdef MB_CACHE_DEBUG 77#ifdef MB_CACHE_DEBUG
42# define mb_debug(f...) do { \ 78# define mb_debug(f...) do { \
@@ -57,8 +93,14 @@
57 93
58#define MB_CACHE_WRITER ((unsigned short)~0U >> 1) 94#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
59 95
96#define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS)
97#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
98 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
99
60static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); 100static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
61 101static struct blockgroup_lock *mb_cache_bg_lock;
102static struct kmem_cache *mb_cache_kmem_cache;
103
62MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); 104MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
63MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); 105MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
64MODULE_LICENSE("GPL"); 106MODULE_LICENSE("GPL");
@@ -86,58 +128,110 @@ static LIST_HEAD(mb_cache_list);
86static LIST_HEAD(mb_cache_lru_list); 128static LIST_HEAD(mb_cache_lru_list);
87static DEFINE_SPINLOCK(mb_cache_spinlock); 129static DEFINE_SPINLOCK(mb_cache_spinlock);
88 130
131static inline void
132__spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
133{
134 spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
135 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
136}
137
138static inline void
139__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
140{
141 spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
142 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
143}
144
89static inline int 145static inline int
90__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) 146__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
91{ 147{
92 return !list_empty(&ce->e_block_list); 148 return !hlist_bl_unhashed(&ce->e_block_list);
93} 149}
94 150
95 151
96static void 152static inline void
97__mb_cache_entry_unhash(struct mb_cache_entry *ce) 153__mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
98{ 154{
99 if (__mb_cache_entry_is_hashed(ce)) { 155 if (__mb_cache_entry_is_block_hashed(ce))
100 list_del_init(&ce->e_block_list); 156 hlist_bl_del_init(&ce->e_block_list);
101 list_del(&ce->e_index.o_list);
102 }
103} 157}
104 158
159static inline int
160__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
161{
162 return !hlist_bl_unhashed(&ce->e_index.o_list);
163}
164
165static inline void
166__mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
167{
168 if (__mb_cache_entry_is_index_hashed(ce))
169 hlist_bl_del_init(&ce->e_index.o_list);
170}
171
172/*
173 * __mb_cache_entry_unhash_unlock()
174 *
175 * This function is called to unhash both the block and index hash
176 * chain.
177 * It assumes both the block and index hash chain is locked upon entry.
178 * It also unlock both hash chains both exit
179 */
180static inline void
181__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
182{
183 __mb_cache_entry_unhash_index(ce);
184 hlist_bl_unlock(ce->e_index_hash_p);
185 __mb_cache_entry_unhash_block(ce);
186 hlist_bl_unlock(ce->e_block_hash_p);
187}
105 188
106static void 189static void
107__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) 190__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
108{ 191{
109 struct mb_cache *cache = ce->e_cache; 192 struct mb_cache *cache = ce->e_cache;
110 193
111 mb_assert(!(ce->e_used || ce->e_queued)); 194 mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
112 kmem_cache_free(cache->c_entry_cache, ce); 195 kmem_cache_free(cache->c_entry_cache, ce);
113 atomic_dec(&cache->c_entry_count); 196 atomic_dec(&cache->c_entry_count);
114} 197}
115 198
116
117static void 199static void
118__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) 200__mb_cache_entry_release(struct mb_cache_entry *ce)
119 __releases(mb_cache_spinlock)
120{ 201{
202 /* First lock the entry to serialize access to its local data. */
203 __spin_lock_mb_cache_entry(ce);
121 /* Wake up all processes queuing for this cache entry. */ 204 /* Wake up all processes queuing for this cache entry. */
122 if (ce->e_queued) 205 if (ce->e_queued)
123 wake_up_all(&mb_cache_queue); 206 wake_up_all(&mb_cache_queue);
124 if (ce->e_used >= MB_CACHE_WRITER) 207 if (ce->e_used >= MB_CACHE_WRITER)
125 ce->e_used -= MB_CACHE_WRITER; 208 ce->e_used -= MB_CACHE_WRITER;
209 /*
210 * Make sure that all cache entries on lru_list have
211 * both e_used and e_qued of 0s.
212 */
126 ce->e_used--; 213 ce->e_used--;
127 if (!(ce->e_used || ce->e_queued)) { 214 if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
128 if (!__mb_cache_entry_is_hashed(ce)) 215 if (!__mb_cache_entry_is_block_hashed(ce)) {
216 __spin_unlock_mb_cache_entry(ce);
129 goto forget; 217 goto forget;
130 mb_assert(list_empty(&ce->e_lru_list)); 218 }
131 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); 219 /*
220 * Need access to lru list, first drop entry lock,
221 * then reacquire the lock in the proper order.
222 */
223 spin_lock(&mb_cache_spinlock);
224 if (list_empty(&ce->e_lru_list))
225 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
226 spin_unlock(&mb_cache_spinlock);
132 } 227 }
133 spin_unlock(&mb_cache_spinlock); 228 __spin_unlock_mb_cache_entry(ce);
134 return; 229 return;
135forget: 230forget:
136 spin_unlock(&mb_cache_spinlock); 231 mb_assert(list_empty(&ce->e_lru_list));
137 __mb_cache_entry_forget(ce, GFP_KERNEL); 232 __mb_cache_entry_forget(ce, GFP_KERNEL);
138} 233}
139 234
140
141/* 235/*
142 * mb_cache_shrink_scan() memory pressure callback 236 * mb_cache_shrink_scan() memory pressure callback
143 * 237 *
@@ -160,17 +254,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
160 254
161 mb_debug("trying to free %d entries", nr_to_scan); 255 mb_debug("trying to free %d entries", nr_to_scan);
162 spin_lock(&mb_cache_spinlock); 256 spin_lock(&mb_cache_spinlock);
163 while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { 257 while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
164 struct mb_cache_entry *ce = 258 struct mb_cache_entry *ce =
165 list_entry(mb_cache_lru_list.next, 259 list_entry(mb_cache_lru_list.next,
166 struct mb_cache_entry, e_lru_list); 260 struct mb_cache_entry, e_lru_list);
167 list_move_tail(&ce->e_lru_list, &free_list); 261 list_del_init(&ce->e_lru_list);
168 __mb_cache_entry_unhash(ce); 262 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
169 freed++; 263 continue;
264 spin_unlock(&mb_cache_spinlock);
265 /* Prevent any find or get operation on the entry */
266 hlist_bl_lock(ce->e_block_hash_p);
267 hlist_bl_lock(ce->e_index_hash_p);
268 /* Ignore if it is touched by a find/get */
269 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
270 !list_empty(&ce->e_lru_list)) {
271 hlist_bl_unlock(ce->e_index_hash_p);
272 hlist_bl_unlock(ce->e_block_hash_p);
273 spin_lock(&mb_cache_spinlock);
274 continue;
275 }
276 __mb_cache_entry_unhash_unlock(ce);
277 list_add_tail(&ce->e_lru_list, &free_list);
278 spin_lock(&mb_cache_spinlock);
170 } 279 }
171 spin_unlock(&mb_cache_spinlock); 280 spin_unlock(&mb_cache_spinlock);
281
172 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { 282 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
173 __mb_cache_entry_forget(entry, gfp_mask); 283 __mb_cache_entry_forget(entry, gfp_mask);
284 freed++;
174 } 285 }
175 return freed; 286 return freed;
176} 287}
@@ -215,29 +326,40 @@ mb_cache_create(const char *name, int bucket_bits)
215 int n, bucket_count = 1 << bucket_bits; 326 int n, bucket_count = 1 << bucket_bits;
216 struct mb_cache *cache = NULL; 327 struct mb_cache *cache = NULL;
217 328
329 if (!mb_cache_bg_lock) {
330 mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
331 GFP_KERNEL);
332 if (!mb_cache_bg_lock)
333 return NULL;
334 bgl_lock_init(mb_cache_bg_lock);
335 }
336
218 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); 337 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
219 if (!cache) 338 if (!cache)
220 return NULL; 339 return NULL;
221 cache->c_name = name; 340 cache->c_name = name;
222 atomic_set(&cache->c_entry_count, 0); 341 atomic_set(&cache->c_entry_count, 0);
223 cache->c_bucket_bits = bucket_bits; 342 cache->c_bucket_bits = bucket_bits;
224 cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), 343 cache->c_block_hash = kmalloc(bucket_count *
225 GFP_KERNEL); 344 sizeof(struct hlist_bl_head), GFP_KERNEL);
226 if (!cache->c_block_hash) 345 if (!cache->c_block_hash)
227 goto fail; 346 goto fail;
228 for (n=0; n<bucket_count; n++) 347 for (n=0; n<bucket_count; n++)
229 INIT_LIST_HEAD(&cache->c_block_hash[n]); 348 INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
230 cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), 349 cache->c_index_hash = kmalloc(bucket_count *
231 GFP_KERNEL); 350 sizeof(struct hlist_bl_head), GFP_KERNEL);
232 if (!cache->c_index_hash) 351 if (!cache->c_index_hash)
233 goto fail; 352 goto fail;
234 for (n=0; n<bucket_count; n++) 353 for (n=0; n<bucket_count; n++)
235 INIT_LIST_HEAD(&cache->c_index_hash[n]); 354 INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
236 cache->c_entry_cache = kmem_cache_create(name, 355 if (!mb_cache_kmem_cache) {
237 sizeof(struct mb_cache_entry), 0, 356 mb_cache_kmem_cache = kmem_cache_create(name,
238 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 357 sizeof(struct mb_cache_entry), 0,
239 if (!cache->c_entry_cache) 358 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
240 goto fail2; 359 if (!mb_cache_kmem_cache)
360 goto fail2;
361 }
362 cache->c_entry_cache = mb_cache_kmem_cache;
241 363
242 /* 364 /*
243 * Set an upper limit on the number of cache entries so that the hash 365 * Set an upper limit on the number of cache entries so that the hash
@@ -273,21 +395,47 @@ void
273mb_cache_shrink(struct block_device *bdev) 395mb_cache_shrink(struct block_device *bdev)
274{ 396{
275 LIST_HEAD(free_list); 397 LIST_HEAD(free_list);
276 struct list_head *l, *ltmp; 398 struct list_head *l;
399 struct mb_cache_entry *ce, *tmp;
277 400
401 l = &mb_cache_lru_list;
278 spin_lock(&mb_cache_spinlock); 402 spin_lock(&mb_cache_spinlock);
279 list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 403 while (!list_is_last(l, &mb_cache_lru_list)) {
280 struct mb_cache_entry *ce = 404 l = l->next;
281 list_entry(l, struct mb_cache_entry, e_lru_list); 405 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
282 if (ce->e_bdev == bdev) { 406 if (ce->e_bdev == bdev) {
283 list_move_tail(&ce->e_lru_list, &free_list); 407 list_del_init(&ce->e_lru_list);
284 __mb_cache_entry_unhash(ce); 408 if (ce->e_used || ce->e_queued ||
409 atomic_read(&ce->e_refcnt))
410 continue;
411 spin_unlock(&mb_cache_spinlock);
412 /*
413 * Prevent any find or get operation on the entry.
414 */
415 hlist_bl_lock(ce->e_block_hash_p);
416 hlist_bl_lock(ce->e_index_hash_p);
417 /* Ignore if it is touched by a find/get */
418 if (ce->e_used || ce->e_queued ||
419 atomic_read(&ce->e_refcnt) ||
420 !list_empty(&ce->e_lru_list)) {
421 hlist_bl_unlock(ce->e_index_hash_p);
422 hlist_bl_unlock(ce->e_block_hash_p);
423 l = &mb_cache_lru_list;
424 spin_lock(&mb_cache_spinlock);
425 continue;
426 }
427 __mb_cache_entry_unhash_unlock(ce);
428 mb_assert(!(ce->e_used || ce->e_queued ||
429 atomic_read(&ce->e_refcnt)));
430 list_add_tail(&ce->e_lru_list, &free_list);
431 l = &mb_cache_lru_list;
432 spin_lock(&mb_cache_spinlock);
285 } 433 }
286 } 434 }
287 spin_unlock(&mb_cache_spinlock); 435 spin_unlock(&mb_cache_spinlock);
288 list_for_each_safe(l, ltmp, &free_list) { 436
289 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 437 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
290 e_lru_list), GFP_KERNEL); 438 __mb_cache_entry_forget(ce, GFP_KERNEL);
291 } 439 }
292} 440}
293 441
@@ -303,23 +451,27 @@ void
303mb_cache_destroy(struct mb_cache *cache) 451mb_cache_destroy(struct mb_cache *cache)
304{ 452{
305 LIST_HEAD(free_list); 453 LIST_HEAD(free_list);
306 struct list_head *l, *ltmp; 454 struct mb_cache_entry *ce, *tmp;
307 455
308 spin_lock(&mb_cache_spinlock); 456 spin_lock(&mb_cache_spinlock);
309 list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 457 list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
310 struct mb_cache_entry *ce = 458 if (ce->e_cache == cache)
311 list_entry(l, struct mb_cache_entry, e_lru_list);
312 if (ce->e_cache == cache) {
313 list_move_tail(&ce->e_lru_list, &free_list); 459 list_move_tail(&ce->e_lru_list, &free_list);
314 __mb_cache_entry_unhash(ce);
315 }
316 } 460 }
317 list_del(&cache->c_cache_list); 461 list_del(&cache->c_cache_list);
318 spin_unlock(&mb_cache_spinlock); 462 spin_unlock(&mb_cache_spinlock);
319 463
320 list_for_each_safe(l, ltmp, &free_list) { 464 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
321 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 465 list_del_init(&ce->e_lru_list);
322 e_lru_list), GFP_KERNEL); 466 /*
467 * Prevent any find or get operation on the entry.
468 */
469 hlist_bl_lock(ce->e_block_hash_p);
470 hlist_bl_lock(ce->e_index_hash_p);
471 mb_assert(!(ce->e_used || ce->e_queued ||
472 atomic_read(&ce->e_refcnt)));
473 __mb_cache_entry_unhash_unlock(ce);
474 __mb_cache_entry_forget(ce, GFP_KERNEL);
323 } 475 }
324 476
325 if (atomic_read(&cache->c_entry_count) > 0) { 477 if (atomic_read(&cache->c_entry_count) > 0) {
@@ -328,8 +480,10 @@ mb_cache_destroy(struct mb_cache *cache)
328 atomic_read(&cache->c_entry_count)); 480 atomic_read(&cache->c_entry_count));
329 } 481 }
330 482
331 kmem_cache_destroy(cache->c_entry_cache); 483 if (list_empty(&mb_cache_list)) {
332 484 kmem_cache_destroy(mb_cache_kmem_cache);
485 mb_cache_kmem_cache = NULL;
486 }
333 kfree(cache->c_index_hash); 487 kfree(cache->c_index_hash);
334 kfree(cache->c_block_hash); 488 kfree(cache->c_block_hash);
335 kfree(cache); 489 kfree(cache);
@@ -346,28 +500,61 @@ mb_cache_destroy(struct mb_cache *cache)
346struct mb_cache_entry * 500struct mb_cache_entry *
347mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) 501mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
348{ 502{
349 struct mb_cache_entry *ce = NULL; 503 struct mb_cache_entry *ce;
350 504
351 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { 505 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
506 struct list_head *l;
507
508 l = &mb_cache_lru_list;
352 spin_lock(&mb_cache_spinlock); 509 spin_lock(&mb_cache_spinlock);
353 if (!list_empty(&mb_cache_lru_list)) { 510 while (!list_is_last(l, &mb_cache_lru_list)) {
354 ce = list_entry(mb_cache_lru_list.next, 511 l = l->next;
355 struct mb_cache_entry, e_lru_list); 512 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
356 list_del_init(&ce->e_lru_list); 513 if (ce->e_cache == cache) {
357 __mb_cache_entry_unhash(ce); 514 list_del_init(&ce->e_lru_list);
515 if (ce->e_used || ce->e_queued ||
516 atomic_read(&ce->e_refcnt))
517 continue;
518 spin_unlock(&mb_cache_spinlock);
519 /*
520 * Prevent any find or get operation on the
521 * entry.
522 */
523 hlist_bl_lock(ce->e_block_hash_p);
524 hlist_bl_lock(ce->e_index_hash_p);
525 /* Ignore if it is touched by a find/get */
526 if (ce->e_used || ce->e_queued ||
527 atomic_read(&ce->e_refcnt) ||
528 !list_empty(&ce->e_lru_list)) {
529 hlist_bl_unlock(ce->e_index_hash_p);
530 hlist_bl_unlock(ce->e_block_hash_p);
531 l = &mb_cache_lru_list;
532 spin_lock(&mb_cache_spinlock);
533 continue;
534 }
535 mb_assert(list_empty(&ce->e_lru_list));
536 mb_assert(!(ce->e_used || ce->e_queued ||
537 atomic_read(&ce->e_refcnt)));
538 __mb_cache_entry_unhash_unlock(ce);
539 goto found;
540 }
358 } 541 }
359 spin_unlock(&mb_cache_spinlock); 542 spin_unlock(&mb_cache_spinlock);
360 } 543 }
361 if (!ce) { 544
362 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 545 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
363 if (!ce) 546 if (!ce)
364 return NULL; 547 return NULL;
365 atomic_inc(&cache->c_entry_count); 548 atomic_inc(&cache->c_entry_count);
366 INIT_LIST_HEAD(&ce->e_lru_list); 549 INIT_LIST_HEAD(&ce->e_lru_list);
367 INIT_LIST_HEAD(&ce->e_block_list); 550 INIT_HLIST_BL_NODE(&ce->e_block_list);
368 ce->e_cache = cache; 551 INIT_HLIST_BL_NODE(&ce->e_index.o_list);
369 ce->e_queued = 0; 552 ce->e_cache = cache;
370 } 553 ce->e_queued = 0;
554 atomic_set(&ce->e_refcnt, 0);
555found:
556 ce->e_block_hash_p = &cache->c_block_hash[0];
557 ce->e_index_hash_p = &cache->c_index_hash[0];
371 ce->e_used = 1 + MB_CACHE_WRITER; 558 ce->e_used = 1 + MB_CACHE_WRITER;
372 return ce; 559 return ce;
373} 560}
@@ -393,29 +580,38 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
393{ 580{
394 struct mb_cache *cache = ce->e_cache; 581 struct mb_cache *cache = ce->e_cache;
395 unsigned int bucket; 582 unsigned int bucket;
396 struct list_head *l; 583 struct hlist_bl_node *l;
397 int error = -EBUSY; 584 struct hlist_bl_head *block_hash_p;
585 struct hlist_bl_head *index_hash_p;
586 struct mb_cache_entry *lce;
398 587
588 mb_assert(ce);
399 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 589 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
400 cache->c_bucket_bits); 590 cache->c_bucket_bits);
401 spin_lock(&mb_cache_spinlock); 591 block_hash_p = &cache->c_block_hash[bucket];
402 list_for_each_prev(l, &cache->c_block_hash[bucket]) { 592 hlist_bl_lock(block_hash_p);
403 struct mb_cache_entry *ce = 593 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
404 list_entry(l, struct mb_cache_entry, e_block_list); 594 if (lce->e_bdev == bdev && lce->e_block == block) {
405 if (ce->e_bdev == bdev && ce->e_block == block) 595 hlist_bl_unlock(block_hash_p);
406 goto out; 596 return -EBUSY;
597 }
407 } 598 }
408 __mb_cache_entry_unhash(ce); 599 mb_assert(!__mb_cache_entry_is_block_hashed(ce));
600 __mb_cache_entry_unhash_block(ce);
601 __mb_cache_entry_unhash_index(ce);
409 ce->e_bdev = bdev; 602 ce->e_bdev = bdev;
410 ce->e_block = block; 603 ce->e_block = block;
411 list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); 604 ce->e_block_hash_p = block_hash_p;
412 ce->e_index.o_key = key; 605 ce->e_index.o_key = key;
606 hlist_bl_add_head(&ce->e_block_list, block_hash_p);
607 hlist_bl_unlock(block_hash_p);
413 bucket = hash_long(key, cache->c_bucket_bits); 608 bucket = hash_long(key, cache->c_bucket_bits);
414 list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); 609 index_hash_p = &cache->c_index_hash[bucket];
415 error = 0; 610 hlist_bl_lock(index_hash_p);
416out: 611 ce->e_index_hash_p = index_hash_p;
417 spin_unlock(&mb_cache_spinlock); 612 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
418 return error; 613 hlist_bl_unlock(index_hash_p);
614 return 0;
419} 615}
420 616
421 617
@@ -429,24 +625,26 @@ out:
429void 625void
430mb_cache_entry_release(struct mb_cache_entry *ce) 626mb_cache_entry_release(struct mb_cache_entry *ce)
431{ 627{
432 spin_lock(&mb_cache_spinlock); 628 __mb_cache_entry_release(ce);
433 __mb_cache_entry_release_unlock(ce);
434} 629}
435 630
436 631
437/* 632/*
438 * mb_cache_entry_free() 633 * mb_cache_entry_free()
439 * 634 *
440 * This is equivalent to the sequence mb_cache_entry_takeout() --
441 * mb_cache_entry_release().
442 */ 635 */
443void 636void
444mb_cache_entry_free(struct mb_cache_entry *ce) 637mb_cache_entry_free(struct mb_cache_entry *ce)
445{ 638{
446 spin_lock(&mb_cache_spinlock); 639 mb_assert(ce);
447 mb_assert(list_empty(&ce->e_lru_list)); 640 mb_assert(list_empty(&ce->e_lru_list));
448 __mb_cache_entry_unhash(ce); 641 hlist_bl_lock(ce->e_index_hash_p);
449 __mb_cache_entry_release_unlock(ce); 642 __mb_cache_entry_unhash_index(ce);
643 hlist_bl_unlock(ce->e_index_hash_p);
644 hlist_bl_lock(ce->e_block_hash_p);
645 __mb_cache_entry_unhash_block(ce);
646 hlist_bl_unlock(ce->e_block_hash_p);
647 __mb_cache_entry_release(ce);
450} 648}
451 649
452 650
@@ -463,84 +661,110 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
463 sector_t block) 661 sector_t block)
464{ 662{
465 unsigned int bucket; 663 unsigned int bucket;
466 struct list_head *l; 664 struct hlist_bl_node *l;
467 struct mb_cache_entry *ce; 665 struct mb_cache_entry *ce;
666 struct hlist_bl_head *block_hash_p;
468 667
469 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 668 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
470 cache->c_bucket_bits); 669 cache->c_bucket_bits);
471 spin_lock(&mb_cache_spinlock); 670 block_hash_p = &cache->c_block_hash[bucket];
472 list_for_each(l, &cache->c_block_hash[bucket]) { 671 /* First serialize access to the block corresponding hash chain. */
473 ce = list_entry(l, struct mb_cache_entry, e_block_list); 672 hlist_bl_lock(block_hash_p);
673 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
674 mb_assert(ce->e_block_hash_p == block_hash_p);
474 if (ce->e_bdev == bdev && ce->e_block == block) { 675 if (ce->e_bdev == bdev && ce->e_block == block) {
475 DEFINE_WAIT(wait); 676 /*
677 * Prevent a free from removing the entry.
678 */
679 atomic_inc(&ce->e_refcnt);
680 hlist_bl_unlock(block_hash_p);
681 __spin_lock_mb_cache_entry(ce);
682 atomic_dec(&ce->e_refcnt);
683 if (ce->e_used > 0) {
684 DEFINE_WAIT(wait);
685 while (ce->e_used > 0) {
686 ce->e_queued++;
687 prepare_to_wait(&mb_cache_queue, &wait,
688 TASK_UNINTERRUPTIBLE);
689 __spin_unlock_mb_cache_entry(ce);
690 schedule();
691 __spin_lock_mb_cache_entry(ce);
692 ce->e_queued--;
693 }
694 finish_wait(&mb_cache_queue, &wait);
695 }
696 ce->e_used += 1 + MB_CACHE_WRITER;
697 __spin_unlock_mb_cache_entry(ce);
476 698
477 if (!list_empty(&ce->e_lru_list)) 699 if (!list_empty(&ce->e_lru_list)) {
700 spin_lock(&mb_cache_spinlock);
478 list_del_init(&ce->e_lru_list); 701 list_del_init(&ce->e_lru_list);
479
480 while (ce->e_used > 0) {
481 ce->e_queued++;
482 prepare_to_wait(&mb_cache_queue, &wait,
483 TASK_UNINTERRUPTIBLE);
484 spin_unlock(&mb_cache_spinlock); 702 spin_unlock(&mb_cache_spinlock);
485 schedule();
486 spin_lock(&mb_cache_spinlock);
487 ce->e_queued--;
488 } 703 }
489 finish_wait(&mb_cache_queue, &wait); 704 if (!__mb_cache_entry_is_block_hashed(ce)) {
490 ce->e_used += 1 + MB_CACHE_WRITER; 705 __mb_cache_entry_release(ce);
491
492 if (!__mb_cache_entry_is_hashed(ce)) {
493 __mb_cache_entry_release_unlock(ce);
494 return NULL; 706 return NULL;
495 } 707 }
496 goto cleanup; 708 return ce;
497 } 709 }
498 } 710 }
499 ce = NULL; 711 hlist_bl_unlock(block_hash_p);
500 712 return NULL;
501cleanup:
502 spin_unlock(&mb_cache_spinlock);
503 return ce;
504} 713}
505 714
506#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) 715#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
507 716
508static struct mb_cache_entry * 717static struct mb_cache_entry *
509__mb_cache_entry_find(struct list_head *l, struct list_head *head, 718__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
510 struct block_device *bdev, unsigned int key) 719 struct block_device *bdev, unsigned int key)
511{ 720{
512 while (l != head) { 721
722 /* The index hash chain is alredy acquire by caller. */
723 while (l != NULL) {
513 struct mb_cache_entry *ce = 724 struct mb_cache_entry *ce =
514 list_entry(l, struct mb_cache_entry, e_index.o_list); 725 hlist_bl_entry(l, struct mb_cache_entry,
726 e_index.o_list);
727 mb_assert(ce->e_index_hash_p == head);
515 if (ce->e_bdev == bdev && ce->e_index.o_key == key) { 728 if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
516 DEFINE_WAIT(wait); 729 /*
517 730 * Prevent a free from removing the entry.
518 if (!list_empty(&ce->e_lru_list)) 731 */
519 list_del_init(&ce->e_lru_list); 732 atomic_inc(&ce->e_refcnt);
520 733 hlist_bl_unlock(head);
734 __spin_lock_mb_cache_entry(ce);
735 atomic_dec(&ce->e_refcnt);
736 ce->e_used++;
521 /* Incrementing before holding the lock gives readers 737 /* Incrementing before holding the lock gives readers
522 priority over writers. */ 738 priority over writers. */
523 ce->e_used++; 739 if (ce->e_used >= MB_CACHE_WRITER) {
524 while (ce->e_used >= MB_CACHE_WRITER) { 740 DEFINE_WAIT(wait);
525 ce->e_queued++; 741
526 prepare_to_wait(&mb_cache_queue, &wait, 742 while (ce->e_used >= MB_CACHE_WRITER) {
527 TASK_UNINTERRUPTIBLE); 743 ce->e_queued++;
528 spin_unlock(&mb_cache_spinlock); 744 prepare_to_wait(&mb_cache_queue, &wait,
529 schedule(); 745 TASK_UNINTERRUPTIBLE);
530 spin_lock(&mb_cache_spinlock); 746 __spin_unlock_mb_cache_entry(ce);
531 ce->e_queued--; 747 schedule();
748 __spin_lock_mb_cache_entry(ce);
749 ce->e_queued--;
750 }
751 finish_wait(&mb_cache_queue, &wait);
532 } 752 }
533 finish_wait(&mb_cache_queue, &wait); 753 __spin_unlock_mb_cache_entry(ce);
534 754 if (!list_empty(&ce->e_lru_list)) {
535 if (!__mb_cache_entry_is_hashed(ce)) {
536 __mb_cache_entry_release_unlock(ce);
537 spin_lock(&mb_cache_spinlock); 755 spin_lock(&mb_cache_spinlock);
756 list_del_init(&ce->e_lru_list);
757 spin_unlock(&mb_cache_spinlock);
758 }
759 if (!__mb_cache_entry_is_block_hashed(ce)) {
760 __mb_cache_entry_release(ce);
538 return ERR_PTR(-EAGAIN); 761 return ERR_PTR(-EAGAIN);
539 } 762 }
540 return ce; 763 return ce;
541 } 764 }
542 l = l->next; 765 l = l->next;
543 } 766 }
767 hlist_bl_unlock(head);
544 return NULL; 768 return NULL;
545} 769}
546 770
@@ -562,13 +786,17 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
562 unsigned int key) 786 unsigned int key)
563{ 787{
564 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 788 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
565 struct list_head *l; 789 struct hlist_bl_node *l;
566 struct mb_cache_entry *ce; 790 struct mb_cache_entry *ce = NULL;
567 791 struct hlist_bl_head *index_hash_p;
568 spin_lock(&mb_cache_spinlock); 792
569 l = cache->c_index_hash[bucket].next; 793 index_hash_p = &cache->c_index_hash[bucket];
570 ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); 794 hlist_bl_lock(index_hash_p);
571 spin_unlock(&mb_cache_spinlock); 795 if (!hlist_bl_empty(index_hash_p)) {
796 l = hlist_bl_first(index_hash_p);
797 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
798 } else
799 hlist_bl_unlock(index_hash_p);
572 return ce; 800 return ce;
573} 801}
574 802
@@ -597,13 +825,17 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev,
597{ 825{
598 struct mb_cache *cache = prev->e_cache; 826 struct mb_cache *cache = prev->e_cache;
599 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 827 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
600 struct list_head *l; 828 struct hlist_bl_node *l;
601 struct mb_cache_entry *ce; 829 struct mb_cache_entry *ce;
830 struct hlist_bl_head *index_hash_p;
602 831
603 spin_lock(&mb_cache_spinlock); 832 index_hash_p = &cache->c_index_hash[bucket];
833 mb_assert(prev->e_index_hash_p == index_hash_p);
834 hlist_bl_lock(index_hash_p);
835 mb_assert(!hlist_bl_empty(index_hash_p));
604 l = prev->e_index.o_list.next; 836 l = prev->e_index.o_list.next;
605 ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); 837 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
606 __mb_cache_entry_release_unlock(prev); 838 __mb_cache_entry_release(prev);
607 return ce; 839 return ce;
608} 840}
609 841
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 0ad2ec9601de..f007a3355570 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -123,6 +123,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
123 struct minix_sb_info * sbi = minix_sb(sb); 123 struct minix_sb_info * sbi = minix_sb(sb);
124 struct minix_super_block * ms; 124 struct minix_super_block * ms;
125 125
126 sync_filesystem(sb);
126 ms = sbi->s_ms; 127 ms = sbi->s_ms;
127 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 128 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
128 return 0; 129 return 0;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index ee59d35ff069..647d86d2db39 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -99,6 +99,7 @@ static void destroy_inodecache(void)
99 99
100static int ncp_remount(struct super_block *sb, int *flags, char* data) 100static int ncp_remount(struct super_block *sb, int *flags, char* data)
101{ 101{
102 sync_filesystem(sb);
102 *flags |= MS_NODIRATIME; 103 *flags |= MS_NODIRATIME;
103 return 0; 104 return 0;
104} 105}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 910ed906eb82..2cb56943e232 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2215,6 +2215,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
2215 struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; 2215 struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data;
2216 u32 nfsvers = nfss->nfs_client->rpc_ops->version; 2216 u32 nfsvers = nfss->nfs_client->rpc_ops->version;
2217 2217
2218 sync_filesystem(sb);
2219
2218 /* 2220 /*
2219 * Userspace mount programs that send binary options generally send 2221 * Userspace mount programs that send binary options generally send
2220 * them populated with default values. We have no way to know which 2222 * them populated with default values. We have no way to know which
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 7ac2a122ca1d..8c532b2ca3ab 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1129,6 +1129,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1129 unsigned long old_mount_opt; 1129 unsigned long old_mount_opt;
1130 int err; 1130 int err;
1131 1131
1132 sync_filesystem(sb);
1132 old_sb_flags = sb->s_flags; 1133 old_sb_flags = sb->s_flags;
1133 old_mount_opt = nilfs->ns_mount_opt; 1134 old_mount_opt = nilfs->ns_mount_opt;
1134 1135
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 82650d52d916..bd5610d48242 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -468,6 +468,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
468 468
469 ntfs_debug("Entering with remount options string: %s", opt); 469 ntfs_debug("Entering with remount options string: %s", opt);
470 470
471 sync_filesystem(sb);
472
471#ifndef NTFS_RW 473#ifndef NTFS_RW
472 /* For read-only compiled driver, enforce read-only flag. */ 474 /* For read-only compiled driver, enforce read-only flag. */
473 *flags |= MS_RDONLY; 475 *flags |= MS_RDONLY;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 1aecd626e645..a7cdd56f4c79 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -634,6 +634,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
634 struct ocfs2_super *osb = OCFS2_SB(sb); 634 struct ocfs2_super *osb = OCFS2_SB(sb);
635 u32 tmp; 635 u32 tmp;
636 636
637 sync_filesystem(sb);
638
637 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || 639 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
638 !ocfs2_check_set_options(sb, &parsed_options)) { 640 !ocfs2_check_set_options(sb, &parsed_options)) {
639 ret = -EINVAL; 641 ret = -EINVAL;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 8c0ceb8dd1f7..15e4500cda3e 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -368,6 +368,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
368 368
369static int openprom_remount(struct super_block *sb, int *flags, char *data) 369static int openprom_remount(struct super_block *sb, int *flags, char *data)
370{ 370{
371 sync_filesystem(sb);
371 *flags |= MS_NOATIME; 372 *flags |= MS_NOATIME;
372 return 0; 373 return 0;
373} 374}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 7bbeb5257af1..5dbadecb234d 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
92int proc_remount(struct super_block *sb, int *flags, char *data) 92int proc_remount(struct super_block *sb, int *flags, char *data)
93{ 93{
94 struct pid_namespace *pid = sb->s_fs_info; 94 struct pid_namespace *pid = sb->s_fs_info;
95
96 sync_filesystem(sb);
95 return !proc_parse_options(data, pid); 97 return !proc_parse_options(data, pid);
96} 98}
97 99
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 12823845d324..192297b0090d 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -249,6 +249,7 @@ static void parse_options(char *options)
249 249
250static int pstore_remount(struct super_block *sb, int *flags, char *data) 250static int pstore_remount(struct super_block *sb, int *flags, char *data)
251{ 251{
252 sync_filesystem(sb);
252 parse_options(data); 253 parse_options(data);
253 254
254 return 0; 255 return 0;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 89558810381c..c4bcb778886e 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -44,6 +44,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
44{ 44{
45 struct qnx4_sb_info *qs; 45 struct qnx4_sb_info *qs;
46 46
47 sync_filesystem(sb);
47 qs = qnx4_sb(sb); 48 qs = qnx4_sb(sb);
48 qs->Version = QNX4_VERSION; 49 qs->Version = QNX4_VERSION;
49 *flags |= MS_RDONLY; 50 *flags |= MS_RDONLY;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 8d941edfefa1..65cdaab3ed49 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -55,6 +55,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root)
55 55
56static int qnx6_remount(struct super_block *sb, int *flags, char *data) 56static int qnx6_remount(struct super_block *sb, int *flags, char *data)
57{ 57{
58 sync_filesystem(sb);
58 *flags |= MS_RDONLY; 59 *flags |= MS_RDONLY;
59 return 0; 60 return 0;
60} 61}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ed54a04c33bd..9fb20426005e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1318,6 +1318,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1318 int i; 1318 int i;
1319#endif 1319#endif
1320 1320
1321 sync_filesystem(s);
1321 reiserfs_write_lock(s); 1322 reiserfs_write_lock(s);
1322 1323
1323#ifdef CONFIG_QUOTA 1324#ifdef CONFIG_QUOTA
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index d8418782862b..ef90e8bca95a 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -432,6 +432,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
432 */ 432 */
433static int romfs_remount(struct super_block *sb, int *flags, char *data) 433static int romfs_remount(struct super_block *sb, int *flags, char *data)
434{ 434{
435 sync_filesystem(sb);
435 *flags |= MS_RDONLY; 436 *flags |= MS_RDONLY;
436 return 0; 437 return 0;
437} 438}
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 202df6312d4e..031c8d67fd51 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -371,6 +371,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf)
371 371
372static int squashfs_remount(struct super_block *sb, int *flags, char *data) 372static int squashfs_remount(struct super_block *sb, int *flags, char *data)
373{ 373{
374 sync_filesystem(sb);
374 *flags |= MS_RDONLY; 375 *flags |= MS_RDONLY;
375 return 0; 376 return 0;
376} 377}
diff --git a/fs/super.c b/fs/super.c
index 80d5cf2ca765..e9dc3c3fe159 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -719,8 +719,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
719 } 719 }
720 } 720 }
721 721
722 sync_filesystem(sb);
723
724 if (sb->s_op->remount_fs) { 722 if (sb->s_op->remount_fs) {
725 retval = sb->s_op->remount_fs(sb, &flags, data); 723 retval = sb->s_op->remount_fs(sb, &flags, data);
726 if (retval) { 724 if (retval) {
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 5625ca920f5e..88956309cc86 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -60,6 +60,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data)
60{ 60{
61 struct sysv_sb_info *sbi = SYSV_SB(sb); 61 struct sysv_sb_info *sbi = SYSV_SB(sb);
62 62
63 sync_filesystem(sb);
63 if (sbi->s_forced_ro) 64 if (sbi->s_forced_ro)
64 *flags |= MS_RDONLY; 65 *flags |= MS_RDONLY;
65 return 0; 66 return 0;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 48f943f7f5d5..a1266089eca1 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1827,6 +1827,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1827 int err; 1827 int err;
1828 struct ubifs_info *c = sb->s_fs_info; 1828 struct ubifs_info *c = sb->s_fs_info;
1829 1829
1830 sync_filesystem(sb);
1830 dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); 1831 dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags);
1831 1832
1832 err = ubifs_parse_options(c, data, 1); 1833 err = ubifs_parse_options(c, data, 1);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3306b9f69bed..64f2b7334d08 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -646,6 +646,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
646 int error = 0; 646 int error = 0;
647 struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); 647 struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
648 648
649 sync_filesystem(sb);
649 if (lvidiu) { 650 if (lvidiu) {
650 int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); 651 int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev);
651 if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) 652 if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 329f2f53b7ed..b8c6791f046f 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1280,6 +1280,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1280 unsigned new_mount_opt, ufstype; 1280 unsigned new_mount_opt, ufstype;
1281 unsigned flags; 1281 unsigned flags;
1282 1282
1283 sync_filesystem(sb);
1283 lock_ufs(sb); 1284 lock_ufs(sb);
1284 mutex_lock(&UFS_SB(sb)->s_lock); 1285 mutex_lock(&UFS_SB(sb)->s_lock);
1285 uspi = UFS_SB(sb)->s_uspi; 1286 uspi = UFS_SB(sb)->s_uspi;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0ef599218991..205376776377 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1197,6 +1197,7 @@ xfs_fs_remount(
1197 char *p; 1197 char *p;
1198 int error; 1198 int error;
1199 1199
1200 sync_filesystem(sb);
1200 while ((p = strsep(&options, ",")) != NULL) { 1201 while ((p = strsep(&options, ",")) != NULL) {
1201 int token; 1202 int token;
1202 1203
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a877ed3f389f..ea80f1cdff06 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2572,6 +2572,9 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
2572void inode_dio_wait(struct inode *inode); 2572void inode_dio_wait(struct inode *inode);
2573void inode_dio_done(struct inode *inode); 2573void inode_dio_done(struct inode *inode);
2574 2574
2575extern void inode_set_flags(struct inode *inode, unsigned int flags,
2576 unsigned int mask);
2577
2575extern const struct file_operations generic_ro_fops; 2578extern const struct file_operations generic_ro_fops;
2576 2579
2577#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) 2580#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 5525d370701d..6a392e7a723a 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -3,19 +3,21 @@
3 3
4 (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 4 (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
5*/ 5*/
6
7struct mb_cache_entry { 6struct mb_cache_entry {
8 struct list_head e_lru_list; 7 struct list_head e_lru_list;
9 struct mb_cache *e_cache; 8 struct mb_cache *e_cache;
10 unsigned short e_used; 9 unsigned short e_used;
11 unsigned short e_queued; 10 unsigned short e_queued;
11 atomic_t e_refcnt;
12 struct block_device *e_bdev; 12 struct block_device *e_bdev;
13 sector_t e_block; 13 sector_t e_block;
14 struct list_head e_block_list; 14 struct hlist_bl_node e_block_list;
15 struct { 15 struct {
16 struct list_head o_list; 16 struct hlist_bl_node o_list;
17 unsigned int o_key; 17 unsigned int o_key;
18 } e_index; 18 } e_index;
19 struct hlist_bl_head *e_block_hash_p;
20 struct hlist_bl_head *e_index_hash_p;
19}; 21};
20 22
21struct mb_cache { 23struct mb_cache {
@@ -25,8 +27,8 @@ struct mb_cache {
25 int c_max_entries; 27 int c_max_entries;
26 int c_bucket_bits; 28 int c_bucket_bits;
27 struct kmem_cache *c_entry_cache; 29 struct kmem_cache *c_entry_cache;
28 struct list_head *c_block_hash; 30 struct hlist_bl_head *c_block_hash;
29 struct list_head *c_index_hash; 31 struct hlist_bl_head *c_index_hash;
30}; 32};
31 33
32/* Functions on caches */ 34/* Functions on caches */
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 197d3125df2a..010ea89eeb0e 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -16,6 +16,15 @@ struct mpage_da_data;
16struct ext4_map_blocks; 16struct ext4_map_blocks;
17struct extent_status; 17struct extent_status;
18 18
19/* shim until we merge in the xfs_collapse_range branch */
20#ifndef FALLOC_FL_COLLAPSE_RANGE
21#define FALLOC_FL_COLLAPSE_RANGE 0x08
22#endif
23
24#ifndef FALLOC_FL_ZERO_RANGE
25#define FALLOC_FL_ZERO_RANGE 0x10
26#endif
27
19#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) 28#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
20 29
21#define show_mballoc_flags(flags) __print_flags(flags, "|", \ 30#define show_mballoc_flags(flags) __print_flags(flags, "|", \
@@ -68,6 +77,13 @@ struct extent_status;
68 { EXTENT_STATUS_DELAYED, "D" }, \ 77 { EXTENT_STATUS_DELAYED, "D" }, \
69 { EXTENT_STATUS_HOLE, "H" }) 78 { EXTENT_STATUS_HOLE, "H" })
70 79
80#define show_falloc_mode(mode) __print_flags(mode, "|", \
81 { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
82 { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
83 { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \
84 { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
85 { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
86
71 87
72TRACE_EVENT(ext4_free_inode, 88TRACE_EVENT(ext4_free_inode,
73 TP_PROTO(struct inode *inode), 89 TP_PROTO(struct inode *inode),
@@ -1328,7 +1344,7 @@ TRACE_EVENT(ext4_direct_IO_exit,
1328 __entry->rw, __entry->ret) 1344 __entry->rw, __entry->ret)
1329); 1345);
1330 1346
1331TRACE_EVENT(ext4_fallocate_enter, 1347DECLARE_EVENT_CLASS(ext4__fallocate_mode,
1332 TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), 1348 TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
1333 1349
1334 TP_ARGS(inode, offset, len, mode), 1350 TP_ARGS(inode, offset, len, mode),
@@ -1336,23 +1352,45 @@ TRACE_EVENT(ext4_fallocate_enter,
1336 TP_STRUCT__entry( 1352 TP_STRUCT__entry(
1337 __field( dev_t, dev ) 1353 __field( dev_t, dev )
1338 __field( ino_t, ino ) 1354 __field( ino_t, ino )
1339 __field( loff_t, pos ) 1355 __field( loff_t, offset )
1340 __field( loff_t, len ) 1356 __field( loff_t, len )
1341 __field( int, mode ) 1357 __field( int, mode )
1342 ), 1358 ),
1343 1359
1344 TP_fast_assign( 1360 TP_fast_assign(
1345 __entry->dev = inode->i_sb->s_dev; 1361 __entry->dev = inode->i_sb->s_dev;
1346 __entry->ino = inode->i_ino; 1362 __entry->ino = inode->i_ino;
1347 __entry->pos = offset; 1363 __entry->offset = offset;
1348 __entry->len = len; 1364 __entry->len = len;
1349 __entry->mode = mode; 1365 __entry->mode = mode;
1350 ), 1366 ),
1351 1367
1352 TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %d", 1368 TP_printk("dev %d,%d ino %lu offset %lld len %lld mode %s",
1353 MAJOR(__entry->dev), MINOR(__entry->dev), 1369 MAJOR(__entry->dev), MINOR(__entry->dev),
1354 (unsigned long) __entry->ino, __entry->pos, 1370 (unsigned long) __entry->ino,
1355 __entry->len, __entry->mode) 1371 __entry->offset, __entry->len,
1372 show_falloc_mode(__entry->mode))
1373);
1374
1375DEFINE_EVENT(ext4__fallocate_mode, ext4_fallocate_enter,
1376
1377 TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
1378
1379 TP_ARGS(inode, offset, len, mode)
1380);
1381
1382DEFINE_EVENT(ext4__fallocate_mode, ext4_punch_hole,
1383
1384 TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
1385
1386 TP_ARGS(inode, offset, len, mode)
1387);
1388
1389DEFINE_EVENT(ext4__fallocate_mode, ext4_zero_range,
1390
1391 TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
1392
1393 TP_ARGS(inode, offset, len, mode)
1356); 1394);
1357 1395
1358TRACE_EVENT(ext4_fallocate_exit, 1396TRACE_EVENT(ext4_fallocate_exit,
@@ -1384,31 +1422,6 @@ TRACE_EVENT(ext4_fallocate_exit,
1384 __entry->ret) 1422 __entry->ret)
1385); 1423);
1386 1424
1387TRACE_EVENT(ext4_punch_hole,
1388 TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
1389
1390 TP_ARGS(inode, offset, len),
1391
1392 TP_STRUCT__entry(
1393 __field( dev_t, dev )
1394 __field( ino_t, ino )
1395 __field( loff_t, offset )
1396 __field( loff_t, len )
1397 ),
1398
1399 TP_fast_assign(
1400 __entry->dev = inode->i_sb->s_dev;
1401 __entry->ino = inode->i_ino;
1402 __entry->offset = offset;
1403 __entry->len = len;
1404 ),
1405
1406 TP_printk("dev %d,%d ino %lu offset %lld len %lld",
1407 MAJOR(__entry->dev), MINOR(__entry->dev),
1408 (unsigned long) __entry->ino,
1409 __entry->offset, __entry->len)
1410);
1411
1412TRACE_EVENT(ext4_unlink_enter, 1425TRACE_EVENT(ext4_unlink_enter,
1413 TP_PROTO(struct inode *parent, struct dentry *dentry), 1426 TP_PROTO(struct inode *parent, struct dentry *dentry),
1414 1427
@@ -2410,6 +2423,31 @@ TRACE_EVENT(ext4_es_shrink_exit,
2410 __entry->shrunk_nr, __entry->cache_cnt) 2423 __entry->shrunk_nr, __entry->cache_cnt)
2411); 2424);
2412 2425
2426TRACE_EVENT(ext4_collapse_range,
2427 TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
2428
2429 TP_ARGS(inode, offset, len),
2430
2431 TP_STRUCT__entry(
2432 __field(dev_t, dev)
2433 __field(ino_t, ino)
2434 __field(loff_t, offset)
2435 __field(loff_t, len)
2436 ),
2437
2438 TP_fast_assign(
2439 __entry->dev = inode->i_sb->s_dev;
2440 __entry->ino = inode->i_ino;
2441 __entry->offset = offset;
2442 __entry->len = len;
2443 ),
2444
2445 TP_printk("dev %d,%d ino %lu offset %lld len %lld",
2446 MAJOR(__entry->dev), MINOR(__entry->dev),
2447 (unsigned long) __entry->ino,
2448 __entry->offset, __entry->len)
2449);
2450
2413#endif /* _TRACE_EXT4_H */ 2451#endif /* _TRACE_EXT4_H */
2414 2452
2415/* This part must be outside protection */ 2453/* This part must be outside protection */