aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 18:39:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 18:39:39 -0400
commit24e7ea3bea94fe05eae5019f5f12bcdc98fc5157 (patch)
tree6e527053ad73b737b5450c52d14ddf53ad4ba9a2 /fs/ext4
parent8e343c8b5c2e3c93d9eebea7702c89d81753c495 (diff)
parentad6599ab3ac98a4474544086e048ce86ec15a4d1 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Major changes for 3.14 include support for the newly added ZERO_RANGE and COLLAPSE_RANGE fallocate operations, and scalability improvements in the jbd2 layer and in xattr handling when the extended attributes spill over into an external block. Other than that, the usual clean ups and minor bug fixes" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (42 commits) ext4: fix premature freeing of partial clusters split across leaf blocks ext4: remove unneeded test of ret variable ext4: fix comment typo ext4: make ext4_block_zero_page_range static ext4: atomically set inode->i_flags in ext4_set_inode_flags() ext4: optimize Hurd tests when reading/writing inodes ext4: kill i_version support for Hurd-castrated file systems ext4: each filesystem creates and uses its own mb_cache fs/mbcache.c: doucple the locking of local from global data fs/mbcache.c: change block and index hash chain to hlist_bl_node ext4: Introduce FALLOC_FL_ZERO_RANGE flag for fallocate ext4: refactor ext4_fallocate code ext4: Update inode i_size after the preallocation ext4: fix partial cluster handling for bigalloc file systems ext4: delete path dealloc code in ext4_ext_handle_uninitialized_extents ext4: only call sync_filesystm() when remounting read-only fs: push sync_filesystem() down to the file system's remount_fs() jbd2: improve error messages for inconsistent journal heads jbd2: minimize region locked by j_list_lock in jbd2_journal_forget() jbd2: minimize region locked by j_list_lock in journal_get_create_access() ...
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/ext4_jbd2.c10
-rw-r--r--fs/ext4/extents.c818
-rw-r--r--fs/ext4/extents_status.c28
-rw-r--r--fs/ext4/extents_status.h9
-rw-r--r--fs/ext4/inode.c120
-rw-r--r--fs/ext4/ioctl.c24
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/mballoc.h4
-rw-r--r--fs/ext4/move_extent.c5
-rw-r--r--fs/ext4/super.c40
-rw-r--r--fs/ext4/xattr.c59
-rw-r--r--fs/ext4/xattr.h6
13 files changed, 901 insertions, 240 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d3a534fdc5ff..f1c65dc7cc0a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -31,6 +31,7 @@
31#include <linux/percpu_counter.h> 31#include <linux/percpu_counter.h>
32#include <linux/ratelimit.h> 32#include <linux/ratelimit.h>
33#include <crypto/hash.h> 33#include <crypto/hash.h>
34#include <linux/falloc.h>
34#ifdef __KERNEL__ 35#ifdef __KERNEL__
35#include <linux/compat.h> 36#include <linux/compat.h>
36#endif 37#endif
@@ -567,6 +568,8 @@ enum {
567#define EXT4_GET_BLOCKS_NO_LOCK 0x0100 568#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
568 /* Do not put hole in extent cache */ 569 /* Do not put hole in extent cache */
569#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 570#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
571 /* Convert written extents to unwritten */
572#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400
570 573
571/* 574/*
572 * The bit position of these flags must not overlap with any of the 575 * The bit position of these flags must not overlap with any of the
@@ -998,6 +1001,8 @@ struct ext4_inode_info {
998#define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group 1001#define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group
999 size of blocksize * 8 1002 size of blocksize * 8
1000 blocks */ 1003 blocks */
1004#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
1005 file systems */
1001 1006
1002#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ 1007#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
1003 ~EXT4_MOUNT_##opt 1008 ~EXT4_MOUNT_##opt
@@ -1326,6 +1331,7 @@ struct ext4_sb_info {
1326 struct list_head s_es_lru; 1331 struct list_head s_es_lru;
1327 unsigned long s_es_last_sorted; 1332 unsigned long s_es_last_sorted;
1328 struct percpu_counter s_extent_cache_cnt; 1333 struct percpu_counter s_extent_cache_cnt;
1334 struct mb_cache *s_mb_cache;
1329 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; 1335 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
1330 1336
1331 /* Ratelimit ext4 messages. */ 1337 /* Ratelimit ext4 messages. */
@@ -2133,8 +2139,6 @@ extern int ext4_writepage_trans_blocks(struct inode *);
2133extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 2139extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
2134extern int ext4_block_truncate_page(handle_t *handle, 2140extern int ext4_block_truncate_page(handle_t *handle,
2135 struct address_space *mapping, loff_t from); 2141 struct address_space *mapping, loff_t from);
2136extern int ext4_block_zero_page_range(handle_t *handle,
2137 struct address_space *mapping, loff_t from, loff_t length);
2138extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 2142extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
2139 loff_t lstart, loff_t lend); 2143 loff_t lstart, loff_t lend);
2140extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2144extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
@@ -2757,6 +2761,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2757extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2761extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2758 __u64 start, __u64 len); 2762 __u64 start, __u64 len);
2759extern int ext4_ext_precache(struct inode *inode); 2763extern int ext4_ext_precache(struct inode *inode);
2764extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
2760 2765
2761/* move_extent.c */ 2766/* move_extent.c */
2762extern void ext4_double_down_write_data_sem(struct inode *first, 2767extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2766,6 +2771,8 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
2766extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2771extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2767 __u64 start_orig, __u64 start_donor, 2772 __u64 start_orig, __u64 start_donor,
2768 __u64 len, __u64 *moved_len); 2773 __u64 len, __u64 *moved_len);
2774extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
2775 struct ext4_extent **extent);
2769 2776
2770/* page-io.c */ 2777/* page-io.c */
2771extern int __init ext4_init_pageio(void); 2778extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 3fe29de832c8..c3fb607413ed 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -259,6 +259,16 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
259 if (WARN_ON_ONCE(err)) { 259 if (WARN_ON_ONCE(err)) {
260 ext4_journal_abort_handle(where, line, __func__, bh, 260 ext4_journal_abort_handle(where, line, __func__, bh,
261 handle, err); 261 handle, err);
262 if (inode == NULL) {
263 pr_err("EXT4: jbd2_journal_dirty_metadata "
264 "failed: handle type %u started at "
265 "line %u, credits %u/%u, errcode %d",
266 handle->h_type,
267 handle->h_line_no,
268 handle->h_requested_credits,
269 handle->h_buffer_credits, err);
270 return err;
271 }
262 ext4_error_inode(inode, where, line, 272 ext4_error_inode(inode, where, line,
263 bh->b_blocknr, 273 bh->b_blocknr,
264 "journal_dirty_metadata failed: " 274 "journal_dirty_metadata failed: "
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74bc2d549c58..82df3ce9874a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -37,7 +37,6 @@
37#include <linux/quotaops.h> 37#include <linux/quotaops.h>
38#include <linux/string.h> 38#include <linux/string.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/falloc.h>
41#include <asm/uaccess.h> 40#include <asm/uaccess.h>
42#include <linux/fiemap.h> 41#include <linux/fiemap.h>
43#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
@@ -1691,7 +1690,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1691 * the extent that was written properly split out and conversion to 1690 * the extent that was written properly split out and conversion to
1692 * initialized is trivial. 1691 * initialized is trivial.
1693 */ 1692 */
1694 if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) 1693 if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2))
1695 return 0; 1694 return 0;
1696 1695
1697 ext1_ee_len = ext4_ext_get_actual_len(ex1); 1696 ext1_ee_len = ext4_ext_get_actual_len(ex1);
@@ -1708,6 +1707,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1708 */ 1707 */
1709 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) 1708 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1710 return 0; 1709 return 0;
1710 if (ext4_ext_is_uninitialized(ex1) &&
1711 (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
1712 atomic_read(&EXT4_I(inode)->i_unwritten) ||
1713 (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN)))
1714 return 0;
1711#ifdef AGGRESSIVE_TEST 1715#ifdef AGGRESSIVE_TEST
1712 if (ext1_ee_len >= 4) 1716 if (ext1_ee_len >= 4)
1713 return 0; 1717 return 0;
@@ -1731,7 +1735,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1731{ 1735{
1732 struct ext4_extent_header *eh; 1736 struct ext4_extent_header *eh;
1733 unsigned int depth, len; 1737 unsigned int depth, len;
1734 int merge_done = 0; 1738 int merge_done = 0, uninit;
1735 1739
1736 depth = ext_depth(inode); 1740 depth = ext_depth(inode);
1737 BUG_ON(path[depth].p_hdr == NULL); 1741 BUG_ON(path[depth].p_hdr == NULL);
@@ -1741,8 +1745,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1741 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1745 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1742 break; 1746 break;
1743 /* merge with next extent! */ 1747 /* merge with next extent! */
1748 uninit = ext4_ext_is_uninitialized(ex);
1744 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1749 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1745 + ext4_ext_get_actual_len(ex + 1)); 1750 + ext4_ext_get_actual_len(ex + 1));
1751 if (uninit)
1752 ext4_ext_mark_uninitialized(ex);
1746 1753
1747 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1754 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1748 len = (EXT_LAST_EXTENT(eh) - ex - 1) 1755 len = (EXT_LAST_EXTENT(eh) - ex - 1)
@@ -1896,7 +1903,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1896 struct ext4_ext_path *npath = NULL; 1903 struct ext4_ext_path *npath = NULL;
1897 int depth, len, err; 1904 int depth, len, err;
1898 ext4_lblk_t next; 1905 ext4_lblk_t next;
1899 int mb_flags = 0; 1906 int mb_flags = 0, uninit;
1900 1907
1901 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1908 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1902 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1909 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1946,9 +1953,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1946 path + depth); 1953 path + depth);
1947 if (err) 1954 if (err)
1948 return err; 1955 return err;
1949 1956 uninit = ext4_ext_is_uninitialized(ex);
1950 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1957 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1951 + ext4_ext_get_actual_len(newext)); 1958 + ext4_ext_get_actual_len(newext));
1959 if (uninit)
1960 ext4_ext_mark_uninitialized(ex);
1952 eh = path[depth].p_hdr; 1961 eh = path[depth].p_hdr;
1953 nearex = ex; 1962 nearex = ex;
1954 goto merge; 1963 goto merge;
@@ -1971,10 +1980,13 @@ prepend:
1971 if (err) 1980 if (err)
1972 return err; 1981 return err;
1973 1982
1983 uninit = ext4_ext_is_uninitialized(ex);
1974 ex->ee_block = newext->ee_block; 1984 ex->ee_block = newext->ee_block;
1975 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); 1985 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1976 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1986 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1977 + ext4_ext_get_actual_len(newext)); 1987 + ext4_ext_get_actual_len(newext));
1988 if (uninit)
1989 ext4_ext_mark_uninitialized(ex);
1978 eh = path[depth].p_hdr; 1990 eh = path[depth].p_hdr;
1979 nearex = ex; 1991 nearex = ex;
1980 goto merge; 1992 goto merge;
@@ -2585,6 +2597,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2585 ex_ee_block = le32_to_cpu(ex->ee_block); 2597 ex_ee_block = le32_to_cpu(ex->ee_block);
2586 ex_ee_len = ext4_ext_get_actual_len(ex); 2598 ex_ee_len = ext4_ext_get_actual_len(ex);
2587 2599
2600 /*
2601 * If we're starting with an extent other than the last one in the
2602 * node, we need to see if it shares a cluster with the extent to
2603 * the right (towards the end of the file). If its leftmost cluster
2604 * is this extent's rightmost cluster and it is not cluster aligned,
2605 * we'll mark it as a partial that is not to be deallocated.
2606 */
2607
2608 if (ex != EXT_LAST_EXTENT(eh)) {
2609 ext4_fsblk_t current_pblk, right_pblk;
2610 long long current_cluster, right_cluster;
2611
2612 current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
2613 current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
2614 right_pblk = ext4_ext_pblock(ex + 1);
2615 right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
2616 if (current_cluster == right_cluster &&
2617 EXT4_PBLK_COFF(sbi, right_pblk))
2618 *partial_cluster = -right_cluster;
2619 }
2620
2588 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); 2621 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
2589 2622
2590 while (ex >= EXT_FIRST_EXTENT(eh) && 2623 while (ex >= EXT_FIRST_EXTENT(eh) &&
@@ -2710,10 +2743,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2710 err = ext4_ext_correct_indexes(handle, inode, path); 2743 err = ext4_ext_correct_indexes(handle, inode, path);
2711 2744
2712 /* 2745 /*
2713 * Free the partial cluster only if the current extent does not 2746 * If there's a partial cluster and at least one extent remains in
2714 * reference it. Otherwise we might free used cluster. 2747 * the leaf, free the partial cluster if it isn't shared with the
2748 * current extent. If there's a partial cluster and no extents
2749 * remain in the leaf, it can't be freed here. It can only be
2750 * freed when it's possible to determine if it's not shared with
2751 * any other extent - when the next leaf is processed or when space
2752 * removal is complete.
2715 */ 2753 */
2716 if (*partial_cluster > 0 && 2754 if (*partial_cluster > 0 && eh->eh_entries &&
2717 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != 2755 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
2718 *partial_cluster)) { 2756 *partial_cluster)) {
2719 int flags = get_default_free_blocks_flags(inode); 2757 int flags = get_default_free_blocks_flags(inode);
@@ -3569,6 +3607,8 @@ out:
3569 * b> Splits in two extents: Write is happening at either end of the extent 3607 * b> Splits in two extents: Write is happening at either end of the extent
3570 * c> Splits in three extents: Somone is writing in middle of the extent 3608 * c> Splits in three extents: Somone is writing in middle of the extent
3571 * 3609 *
3610 * This works the same way in the case of initialized -> unwritten conversion.
3611 *
3572 * One of more index blocks maybe needed if the extent tree grow after 3612 * One of more index blocks maybe needed if the extent tree grow after
3573 * the uninitialized extent split. To prevent ENOSPC occur at the IO 3613 * the uninitialized extent split. To prevent ENOSPC occur at the IO
3574 * complete, we need to split the uninitialized extent before DIO submit 3614 * complete, we need to split the uninitialized extent before DIO submit
@@ -3579,7 +3619,7 @@ out:
3579 * 3619 *
3580 * Returns the size of uninitialized extent to be written on success. 3620 * Returns the size of uninitialized extent to be written on success.
3581 */ 3621 */
3582static int ext4_split_unwritten_extents(handle_t *handle, 3622static int ext4_split_convert_extents(handle_t *handle,
3583 struct inode *inode, 3623 struct inode *inode,
3584 struct ext4_map_blocks *map, 3624 struct ext4_map_blocks *map,
3585 struct ext4_ext_path *path, 3625 struct ext4_ext_path *path,
@@ -3591,9 +3631,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3591 unsigned int ee_len; 3631 unsigned int ee_len;
3592 int split_flag = 0, depth; 3632 int split_flag = 0, depth;
3593 3633
3594 ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 3634 ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n",
3595 "block %llu, max_blocks %u\n", inode->i_ino, 3635 __func__, inode->i_ino,
3596 (unsigned long long)map->m_lblk, map->m_len); 3636 (unsigned long long)map->m_lblk, map->m_len);
3597 3637
3598 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3638 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3599 inode->i_sb->s_blocksize_bits; 3639 inode->i_sb->s_blocksize_bits;
@@ -3608,14 +3648,73 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3608 ee_block = le32_to_cpu(ex->ee_block); 3648 ee_block = le32_to_cpu(ex->ee_block);
3609 ee_len = ext4_ext_get_actual_len(ex); 3649 ee_len = ext4_ext_get_actual_len(ex);
3610 3650
3611 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3651 /* Convert to unwritten */
3612 split_flag |= EXT4_EXT_MARK_UNINIT2; 3652 if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3613 if (flags & EXT4_GET_BLOCKS_CONVERT) 3653 split_flag |= EXT4_EXT_DATA_VALID1;
3614 split_flag |= EXT4_EXT_DATA_VALID2; 3654 /* Convert to initialized */
3655 } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3656 split_flag |= ee_block + ee_len <= eof_block ?
3657 EXT4_EXT_MAY_ZEROOUT : 0;
3658 split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2);
3659 }
3615 flags |= EXT4_GET_BLOCKS_PRE_IO; 3660 flags |= EXT4_GET_BLOCKS_PRE_IO;
3616 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3661 return ext4_split_extent(handle, inode, path, map, split_flag, flags);
3617} 3662}
3618 3663
3664static int ext4_convert_initialized_extents(handle_t *handle,
3665 struct inode *inode,
3666 struct ext4_map_blocks *map,
3667 struct ext4_ext_path *path)
3668{
3669 struct ext4_extent *ex;
3670 ext4_lblk_t ee_block;
3671 unsigned int ee_len;
3672 int depth;
3673 int err = 0;
3674
3675 depth = ext_depth(inode);
3676 ex = path[depth].p_ext;
3677 ee_block = le32_to_cpu(ex->ee_block);
3678 ee_len = ext4_ext_get_actual_len(ex);
3679
3680 ext_debug("%s: inode %lu, logical"
3681 "block %llu, max_blocks %u\n", __func__, inode->i_ino,
3682 (unsigned long long)ee_block, ee_len);
3683
3684 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3685 err = ext4_split_convert_extents(handle, inode, map, path,
3686 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3687 if (err < 0)
3688 goto out;
3689 ext4_ext_drop_refs(path);
3690 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3691 if (IS_ERR(path)) {
3692 err = PTR_ERR(path);
3693 goto out;
3694 }
3695 depth = ext_depth(inode);
3696 ex = path[depth].p_ext;
3697 }
3698
3699 err = ext4_ext_get_access(handle, inode, path + depth);
3700 if (err)
3701 goto out;
3702 /* first mark the extent as uninitialized */
3703 ext4_ext_mark_uninitialized(ex);
3704
3705 /* note: ext4_ext_correct_indexes() isn't needed here because
3706 * borders are not changed
3707 */
3708 ext4_ext_try_to_merge(handle, inode, path, ex);
3709
3710 /* Mark modified extent as dirty */
3711 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3712out:
3713 ext4_ext_show_leaf(inode, path);
3714 return err;
3715}
3716
3717
3619static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3718static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3620 struct inode *inode, 3719 struct inode *inode,
3621 struct ext4_map_blocks *map, 3720 struct ext4_map_blocks *map,
@@ -3649,8 +3748,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3649 inode->i_ino, (unsigned long long)ee_block, ee_len, 3748 inode->i_ino, (unsigned long long)ee_block, ee_len,
3650 (unsigned long long)map->m_lblk, map->m_len); 3749 (unsigned long long)map->m_lblk, map->m_len);
3651#endif 3750#endif
3652 err = ext4_split_unwritten_extents(handle, inode, map, path, 3751 err = ext4_split_convert_extents(handle, inode, map, path,
3653 EXT4_GET_BLOCKS_CONVERT); 3752 EXT4_GET_BLOCKS_CONVERT);
3654 if (err < 0) 3753 if (err < 0)
3655 goto out; 3754 goto out;
3656 ext4_ext_drop_refs(path); 3755 ext4_ext_drop_refs(path);
@@ -3851,6 +3950,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3851} 3950}
3852 3951
3853static int 3952static int
3953ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
3954 struct ext4_map_blocks *map,
3955 struct ext4_ext_path *path, int flags,
3956 unsigned int allocated, ext4_fsblk_t newblock)
3957{
3958 int ret = 0;
3959 int err = 0;
3960
3961 /*
3962 * Make sure that the extent is no bigger than we support with
3963 * uninitialized extent
3964 */
3965 if (map->m_len > EXT_UNINIT_MAX_LEN)
3966 map->m_len = EXT_UNINIT_MAX_LEN / 2;
3967
3968 ret = ext4_convert_initialized_extents(handle, inode, map,
3969 path);
3970 if (ret >= 0) {
3971 ext4_update_inode_fsync_trans(handle, inode, 1);
3972 err = check_eofblocks_fl(handle, inode, map->m_lblk,
3973 path, map->m_len);
3974 } else
3975 err = ret;
3976 map->m_flags |= EXT4_MAP_UNWRITTEN;
3977 if (allocated > map->m_len)
3978 allocated = map->m_len;
3979 map->m_len = allocated;
3980
3981 return err ? err : allocated;
3982}
3983
3984static int
3854ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3985ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3855 struct ext4_map_blocks *map, 3986 struct ext4_map_blocks *map,
3856 struct ext4_ext_path *path, int flags, 3987 struct ext4_ext_path *path, int flags,
@@ -3877,8 +4008,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3877 4008
3878 /* get_block() before submit the IO, split the extent */ 4009 /* get_block() before submit the IO, split the extent */
3879 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4010 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3880 ret = ext4_split_unwritten_extents(handle, inode, map, 4011 ret = ext4_split_convert_extents(handle, inode, map,
3881 path, flags); 4012 path, flags | EXT4_GET_BLOCKS_CONVERT);
3882 if (ret <= 0) 4013 if (ret <= 0)
3883 goto out; 4014 goto out;
3884 /* 4015 /*
@@ -3993,10 +4124,6 @@ out1:
3993 map->m_pblk = newblock; 4124 map->m_pblk = newblock;
3994 map->m_len = allocated; 4125 map->m_len = allocated;
3995out2: 4126out2:
3996 if (path) {
3997 ext4_ext_drop_refs(path);
3998 kfree(path);
3999 }
4000 return err ? err : allocated; 4127 return err ? err : allocated;
4001} 4128}
4002 4129
@@ -4128,7 +4255,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4128 struct ext4_extent newex, *ex, *ex2; 4255 struct ext4_extent newex, *ex, *ex2;
4129 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4256 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4130 ext4_fsblk_t newblock = 0; 4257 ext4_fsblk_t newblock = 0;
4131 int free_on_err = 0, err = 0, depth; 4258 int free_on_err = 0, err = 0, depth, ret;
4132 unsigned int allocated = 0, offset = 0; 4259 unsigned int allocated = 0, offset = 0;
4133 unsigned int allocated_clusters = 0; 4260 unsigned int allocated_clusters = 0;
4134 struct ext4_allocation_request ar; 4261 struct ext4_allocation_request ar;
@@ -4170,6 +4297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4170 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 4297 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4171 unsigned short ee_len; 4298 unsigned short ee_len;
4172 4299
4300
4173 /* 4301 /*
4174 * Uninitialized extents are treated as holes, except that 4302 * Uninitialized extents are treated as holes, except that
4175 * we split out initialized portions during a write. 4303 * we split out initialized portions during a write.
@@ -4186,13 +4314,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4186 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 4314 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
4187 ee_block, ee_len, newblock); 4315 ee_block, ee_len, newblock);
4188 4316
4189 if (!ext4_ext_is_uninitialized(ex)) 4317 /*
4318 * If the extent is initialized check whether the
4319 * caller wants to convert it to unwritten.
4320 */
4321 if ((!ext4_ext_is_uninitialized(ex)) &&
4322 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4323 allocated = ext4_ext_convert_initialized_extent(
4324 handle, inode, map, path, flags,
4325 allocated, newblock);
4326 goto out2;
4327 } else if (!ext4_ext_is_uninitialized(ex))
4190 goto out; 4328 goto out;
4191 4329
4192 allocated = ext4_ext_handle_uninitialized_extents( 4330 ret = ext4_ext_handle_uninitialized_extents(
4193 handle, inode, map, path, flags, 4331 handle, inode, map, path, flags,
4194 allocated, newblock); 4332 allocated, newblock);
4195 goto out3; 4333 if (ret < 0)
4334 err = ret;
4335 else
4336 allocated = ret;
4337 goto out2;
4196 } 4338 }
4197 } 4339 }
4198 4340
@@ -4473,7 +4615,6 @@ out2:
4473 kfree(path); 4615 kfree(path);
4474 } 4616 }
4475 4617
4476out3:
4477 trace_ext4_ext_map_blocks_exit(inode, flags, map, 4618 trace_ext4_ext_map_blocks_exit(inode, flags, map,
4478 err ? err : allocated); 4619 err ? err : allocated);
4479 ext4_es_lru_add(inode); 4620 ext4_es_lru_add(inode);
@@ -4514,34 +4655,200 @@ retry:
4514 ext4_std_error(inode->i_sb, err); 4655 ext4_std_error(inode->i_sb, err);
4515} 4656}
4516 4657
4517static void ext4_falloc_update_inode(struct inode *inode, 4658static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4518 int mode, loff_t new_size, int update_ctime) 4659 ext4_lblk_t len, int flags, int mode)
4519{ 4660{
4520 struct timespec now; 4661 struct inode *inode = file_inode(file);
4662 handle_t *handle;
4663 int ret = 0;
4664 int ret2 = 0;
4665 int retries = 0;
4666 struct ext4_map_blocks map;
4667 unsigned int credits;
4521 4668
4522 if (update_ctime) { 4669 map.m_lblk = offset;
4523 now = current_fs_time(inode->i_sb); 4670 /*
4524 if (!timespec_equal(&inode->i_ctime, &now)) 4671 * Don't normalize the request if it can fit in one extent so
4525 inode->i_ctime = now; 4672 * that it doesn't get unnecessarily split into multiple
4673 * extents.
4674 */
4675 if (len <= EXT_UNINIT_MAX_LEN)
4676 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4677
4678 /*
4679 * credits to insert 1 extent into extent tree
4680 */
4681 credits = ext4_chunk_trans_blocks(inode, len);
4682
4683retry:
4684 while (ret >= 0 && ret < len) {
4685 map.m_lblk = map.m_lblk + ret;
4686 map.m_len = len = len - ret;
4687 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4688 credits);
4689 if (IS_ERR(handle)) {
4690 ret = PTR_ERR(handle);
4691 break;
4692 }
4693 ret = ext4_map_blocks(handle, inode, &map, flags);
4694 if (ret <= 0) {
4695 ext4_debug("inode #%lu: block %u: len %u: "
4696 "ext4_ext_map_blocks returned %d",
4697 inode->i_ino, map.m_lblk,
4698 map.m_len, ret);
4699 ext4_mark_inode_dirty(handle, inode);
4700 ret2 = ext4_journal_stop(handle);
4701 break;
4702 }
4703 ret2 = ext4_journal_stop(handle);
4704 if (ret2)
4705 break;
4706 }
4707 if (ret == -ENOSPC &&
4708 ext4_should_retry_alloc(inode->i_sb, &retries)) {
4709 ret = 0;
4710 goto retry;
4526 } 4711 }
4712
4713 return ret > 0 ? ret2 : ret;
4714}
4715
4716static long ext4_zero_range(struct file *file, loff_t offset,
4717 loff_t len, int mode)
4718{
4719 struct inode *inode = file_inode(file);
4720 handle_t *handle = NULL;
4721 unsigned int max_blocks;
4722 loff_t new_size = 0;
4723 int ret = 0;
4724 int flags;
4725 int partial;
4726 loff_t start, end;
4727 ext4_lblk_t lblk;
4728 struct address_space *mapping = inode->i_mapping;
4729 unsigned int blkbits = inode->i_blkbits;
4730
4731 trace_ext4_zero_range(inode, offset, len, mode);
4732
4733 /*
4734 * Write out all dirty pages to avoid race conditions
4735 * Then release them.
4736 */
4737 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4738 ret = filemap_write_and_wait_range(mapping, offset,
4739 offset + len - 1);
4740 if (ret)
4741 return ret;
4742 }
4743
4527 /* 4744 /*
4528 * Update only when preallocation was requested beyond 4745 * Round up offset. This is not fallocate, we neet to zero out
4529 * the file size. 4746 * blocks, so convert interior block aligned part of the range to
4747 * unwritten and possibly manually zero out unaligned parts of the
4748 * range.
4530 */ 4749 */
4531 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 4750 start = round_up(offset, 1 << blkbits);
4751 end = round_down((offset + len), 1 << blkbits);
4752
4753 if (start < offset || end > offset + len)
4754 return -EINVAL;
4755 partial = (offset + len) & ((1 << blkbits) - 1);
4756
4757 lblk = start >> blkbits;
4758 max_blocks = (end >> blkbits);
4759 if (max_blocks < lblk)
4760 max_blocks = 0;
4761 else
4762 max_blocks -= lblk;
4763
4764 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
4765 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
4766 if (mode & FALLOC_FL_KEEP_SIZE)
4767 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4768
4769 mutex_lock(&inode->i_mutex);
4770
4771 /*
4772 * Indirect files do not support unwritten extnets
4773 */
4774 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4775 ret = -EOPNOTSUPP;
4776 goto out_mutex;
4777 }
4778
4779 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4780 offset + len > i_size_read(inode)) {
4781 new_size = offset + len;
4782 ret = inode_newsize_ok(inode, new_size);
4783 if (ret)
4784 goto out_mutex;
4785 /*
4786 * If we have a partial block after EOF we have to allocate
4787 * the entire block.
4788 */
4789 if (partial)
4790 max_blocks += 1;
4791 }
4792
4793 if (max_blocks > 0) {
4794
4795 /* Now release the pages and zero block aligned part of pages*/
4796 truncate_pagecache_range(inode, start, end - 1);
4797
4798 /* Wait all existing dio workers, newcomers will block on i_mutex */
4799 ext4_inode_block_unlocked_dio(inode);
4800 inode_dio_wait(inode);
4801
4802 /*
4803 * Remove entire range from the extent status tree.
4804 */
4805 ret = ext4_es_remove_extent(inode, lblk, max_blocks);
4806 if (ret)
4807 goto out_dio;
4808
4809 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
4810 mode);
4811 if (ret)
4812 goto out_dio;
4813 }
4814
4815 handle = ext4_journal_start(inode, EXT4_HT_MISC, 4);
4816 if (IS_ERR(handle)) {
4817 ret = PTR_ERR(handle);
4818 ext4_std_error(inode->i_sb, ret);
4819 goto out_dio;
4820 }
4821
4822 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4823
4824 if (new_size) {
4532 if (new_size > i_size_read(inode)) 4825 if (new_size > i_size_read(inode))
4533 i_size_write(inode, new_size); 4826 i_size_write(inode, new_size);
4534 if (new_size > EXT4_I(inode)->i_disksize) 4827 if (new_size > EXT4_I(inode)->i_disksize)
4535 ext4_update_i_disksize(inode, new_size); 4828 ext4_update_i_disksize(inode, new_size);
4536 } else { 4829 } else {
4537 /* 4830 /*
4538 * Mark that we allocate beyond EOF so the subsequent truncate 4831 * Mark that we allocate beyond EOF so the subsequent truncate
4539 * can proceed even if the new size is the same as i_size. 4832 * can proceed even if the new size is the same as i_size.
4540 */ 4833 */
4541 if (new_size > i_size_read(inode)) 4834 if ((offset + len) > i_size_read(inode))
4542 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4835 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4543 } 4836 }
4544 4837
4838 ext4_mark_inode_dirty(handle, inode);
4839
4840 /* Zero out partial block at the edges of the range */
4841 ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4842
4843 if (file->f_flags & O_SYNC)
4844 ext4_handle_sync(handle);
4845
4846 ext4_journal_stop(handle);
4847out_dio:
4848 ext4_inode_resume_unlocked_dio(inode);
4849out_mutex:
4850 mutex_unlock(&inode->i_mutex);
4851 return ret;
4545} 4852}
4546 4853
4547/* 4854/*
@@ -4555,22 +4862,25 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4555{ 4862{
4556 struct inode *inode = file_inode(file); 4863 struct inode *inode = file_inode(file);
4557 handle_t *handle; 4864 handle_t *handle;
4558 loff_t new_size; 4865 loff_t new_size = 0;
4559 unsigned int max_blocks; 4866 unsigned int max_blocks;
4560 int ret = 0; 4867 int ret = 0;
4561 int ret2 = 0;
4562 int retries = 0;
4563 int flags; 4868 int flags;
4564 struct ext4_map_blocks map; 4869 ext4_lblk_t lblk;
4565 unsigned int credits, blkbits = inode->i_blkbits; 4870 struct timespec tv;
4871 unsigned int blkbits = inode->i_blkbits;
4566 4872
4567 /* Return error if mode is not supported */ 4873 /* Return error if mode is not supported */
4568 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 4874 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4875 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
4569 return -EOPNOTSUPP; 4876 return -EOPNOTSUPP;
4570 4877
4571 if (mode & FALLOC_FL_PUNCH_HOLE) 4878 if (mode & FALLOC_FL_PUNCH_HOLE)
4572 return ext4_punch_hole(inode, offset, len); 4879 return ext4_punch_hole(inode, offset, len);
4573 4880
4881 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4882 return ext4_collapse_range(inode, offset, len);
4883
4574 ret = ext4_convert_inline_data(inode); 4884 ret = ext4_convert_inline_data(inode);
4575 if (ret) 4885 if (ret)
4576 return ret; 4886 return ret;
@@ -4582,83 +4892,66 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4582 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4892 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4583 return -EOPNOTSUPP; 4893 return -EOPNOTSUPP;
4584 4894
4895 if (mode & FALLOC_FL_ZERO_RANGE)
4896 return ext4_zero_range(file, offset, len, mode);
4897
4585 trace_ext4_fallocate_enter(inode, offset, len, mode); 4898 trace_ext4_fallocate_enter(inode, offset, len, mode);
4586 map.m_lblk = offset >> blkbits; 4899 lblk = offset >> blkbits;
4587 /* 4900 /*
4588 * We can't just convert len to max_blocks because 4901 * We can't just convert len to max_blocks because
4589 * If blocksize = 4096 offset = 3072 and len = 2048 4902 * If blocksize = 4096 offset = 3072 and len = 2048
4590 */ 4903 */
4591 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 4904 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
4592 - map.m_lblk; 4905 - lblk;
4593 /* 4906
4594 * credits to insert 1 extent into extent tree
4595 */
4596 credits = ext4_chunk_trans_blocks(inode, max_blocks);
4597 mutex_lock(&inode->i_mutex);
4598 ret = inode_newsize_ok(inode, (len + offset));
4599 if (ret) {
4600 mutex_unlock(&inode->i_mutex);
4601 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4602 return ret;
4603 }
4604 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; 4907 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
4605 if (mode & FALLOC_FL_KEEP_SIZE) 4908 if (mode & FALLOC_FL_KEEP_SIZE)
4606 flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4909 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4607 /*
4608 * Don't normalize the request if it can fit in one extent so
4609 * that it doesn't get unnecessarily split into multiple
4610 * extents.
4611 */
4612 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4613 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4614 4910
4615retry: 4911 mutex_lock(&inode->i_mutex);
4616 while (ret >= 0 && ret < max_blocks) {
4617 map.m_lblk = map.m_lblk + ret;
4618 map.m_len = max_blocks = max_blocks - ret;
4619 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4620 credits);
4621 if (IS_ERR(handle)) {
4622 ret = PTR_ERR(handle);
4623 break;
4624 }
4625 ret = ext4_map_blocks(handle, inode, &map, flags);
4626 if (ret <= 0) {
4627#ifdef EXT4FS_DEBUG
4628 ext4_warning(inode->i_sb,
4629 "inode #%lu: block %u: len %u: "
4630 "ext4_ext_map_blocks returned %d",
4631 inode->i_ino, map.m_lblk,
4632 map.m_len, ret);
4633#endif
4634 ext4_mark_inode_dirty(handle, inode);
4635 ret2 = ext4_journal_stop(handle);
4636 break;
4637 }
4638 if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
4639 blkbits) >> blkbits))
4640 new_size = offset + len;
4641 else
4642 new_size = ((loff_t) map.m_lblk + ret) << blkbits;
4643 4912
4644 ext4_falloc_update_inode(inode, mode, new_size, 4913 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4645 (map.m_flags & EXT4_MAP_NEW)); 4914 offset + len > i_size_read(inode)) {
4646 ext4_mark_inode_dirty(handle, inode); 4915 new_size = offset + len;
4647 if ((file->f_flags & O_SYNC) && ret >= max_blocks) 4916 ret = inode_newsize_ok(inode, new_size);
4648 ext4_handle_sync(handle); 4917 if (ret)
4649 ret2 = ext4_journal_stop(handle); 4918 goto out;
4650 if (ret2)
4651 break;
4652 } 4919 }
4653 if (ret == -ENOSPC && 4920
4654 ext4_should_retry_alloc(inode->i_sb, &retries)) { 4921 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
4655 ret = 0; 4922 if (ret)
4656 goto retry; 4923 goto out;
4924
4925 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4926 if (IS_ERR(handle))
4927 goto out;
4928
4929 tv = inode->i_ctime = ext4_current_time(inode);
4930
4931 if (new_size) {
4932 if (new_size > i_size_read(inode)) {
4933 i_size_write(inode, new_size);
4934 inode->i_mtime = tv;
4935 }
4936 if (new_size > EXT4_I(inode)->i_disksize)
4937 ext4_update_i_disksize(inode, new_size);
4938 } else {
4939 /*
4940 * Mark that we allocate beyond EOF so the subsequent truncate
4941 * can proceed even if the new size is the same as i_size.
4942 */
4943 if ((offset + len) > i_size_read(inode))
4944 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4657 } 4945 }
4946 ext4_mark_inode_dirty(handle, inode);
4947 if (file->f_flags & O_SYNC)
4948 ext4_handle_sync(handle);
4949
4950 ext4_journal_stop(handle);
4951out:
4658 mutex_unlock(&inode->i_mutex); 4952 mutex_unlock(&inode->i_mutex);
4659 trace_ext4_fallocate_exit(inode, offset, max_blocks, 4953 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4660 ret > 0 ? ret2 : ret); 4954 return ret;
4661 return ret > 0 ? ret2 : ret;
4662} 4955}
4663 4956
4664/* 4957/*
@@ -4869,3 +5162,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4869 ext4_es_lru_add(inode); 5162 ext4_es_lru_add(inode);
4870 return error; 5163 return error;
4871} 5164}
5165
5166/*
5167 * ext4_access_path:
5168 * Function to access the path buffer for marking it dirty.
5169 * It also checks if there are sufficient credits left in the journal handle
5170 * to update path.
5171 */
5172static int
5173ext4_access_path(handle_t *handle, struct inode *inode,
5174 struct ext4_ext_path *path)
5175{
5176 int credits, err;
5177
5178 if (!ext4_handle_valid(handle))
5179 return 0;
5180
5181 /*
5182 * Check if need to extend journal credits
5183 * 3 for leaf, sb, and inode plus 2 (bmap and group
5184 * descriptor) for each block group; assume two block
5185 * groups
5186 */
5187 if (handle->h_buffer_credits < 7) {
5188 credits = ext4_writepage_trans_blocks(inode);
5189 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
5190 /* EAGAIN is success */
5191 if (err && err != -EAGAIN)
5192 return err;
5193 }
5194
5195 err = ext4_ext_get_access(handle, inode, path);
5196 return err;
5197}
5198
5199/*
5200 * ext4_ext_shift_path_extents:
5201 * Shift the extents of a path structure lying between path[depth].p_ext
5202 * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
5203 * from starting block for each extent.
5204 */
5205static int
5206ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5207 struct inode *inode, handle_t *handle,
5208 ext4_lblk_t *start)
5209{
5210 int depth, err = 0;
5211 struct ext4_extent *ex_start, *ex_last;
5212 bool update = 0;
5213 depth = path->p_depth;
5214
5215 while (depth >= 0) {
5216 if (depth == path->p_depth) {
5217 ex_start = path[depth].p_ext;
5218 if (!ex_start)
5219 return -EIO;
5220
5221 ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
5222 if (!ex_last)
5223 return -EIO;
5224
5225 err = ext4_access_path(handle, inode, path + depth);
5226 if (err)
5227 goto out;
5228
5229 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
5230 update = 1;
5231
5232 *start = ex_last->ee_block +
5233 ext4_ext_get_actual_len(ex_last);
5234
5235 while (ex_start <= ex_last) {
5236 ex_start->ee_block -= shift;
5237 if (ex_start >
5238 EXT_FIRST_EXTENT(path[depth].p_hdr)) {
5239 if (ext4_ext_try_to_merge_right(inode,
5240 path, ex_start - 1))
5241 ex_last--;
5242 }
5243 ex_start++;
5244 }
5245 err = ext4_ext_dirty(handle, inode, path + depth);
5246 if (err)
5247 goto out;
5248
5249 if (--depth < 0 || !update)
5250 break;
5251 }
5252
5253 /* Update index too */
5254 err = ext4_access_path(handle, inode, path + depth);
5255 if (err)
5256 goto out;
5257
5258 path[depth].p_idx->ei_block -= shift;
5259 err = ext4_ext_dirty(handle, inode, path + depth);
5260 if (err)
5261 goto out;
5262
5263 /* we are done if current index is not a starting index */
5264 if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
5265 break;
5266
5267 depth--;
5268 }
5269
5270out:
5271 return err;
5272}
5273
5274/*
5275 * ext4_ext_shift_extents:
5276 * All the extents which lies in the range from start to the last allocated
5277 * block for the file are shifted downwards by shift blocks.
5278 * On success, 0 is returned, error otherwise.
5279 */
5280static int
5281ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5282 ext4_lblk_t start, ext4_lblk_t shift)
5283{
5284 struct ext4_ext_path *path;
5285 int ret = 0, depth;
5286 struct ext4_extent *extent;
5287 ext4_lblk_t stop_block, current_block;
5288 ext4_lblk_t ex_start, ex_end;
5289
5290 /* Let path point to the last extent */
5291 path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
5292 if (IS_ERR(path))
5293 return PTR_ERR(path);
5294
5295 depth = path->p_depth;
5296 extent = path[depth].p_ext;
5297 if (!extent) {
5298 ext4_ext_drop_refs(path);
5299 kfree(path);
5300 return ret;
5301 }
5302
5303 stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
5304 ext4_ext_drop_refs(path);
5305 kfree(path);
5306
5307 /* Nothing to shift, if hole is at the end of file */
5308 if (start >= stop_block)
5309 return ret;
5310
5311 /*
5312 * Don't start shifting extents until we make sure the hole is big
5313 * enough to accomodate the shift.
5314 */
5315 path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
5316 depth = path->p_depth;
5317 extent = path[depth].p_ext;
5318 ex_start = extent->ee_block;
5319 ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
5320 ext4_ext_drop_refs(path);
5321 kfree(path);
5322
5323 if ((start == ex_start && shift > ex_start) ||
5324 (shift > start - ex_end))
5325 return -EINVAL;
5326
5327 /* Its safe to start updating extents */
5328 while (start < stop_block) {
5329 path = ext4_ext_find_extent(inode, start, NULL, 0);
5330 if (IS_ERR(path))
5331 return PTR_ERR(path);
5332 depth = path->p_depth;
5333 extent = path[depth].p_ext;
5334 current_block = extent->ee_block;
5335 if (start > current_block) {
5336 /* Hole, move to the next extent */
5337 ret = mext_next_extent(inode, path, &extent);
5338 if (ret != 0) {
5339 ext4_ext_drop_refs(path);
5340 kfree(path);
5341 if (ret == 1)
5342 ret = 0;
5343 break;
5344 }
5345 }
5346 ret = ext4_ext_shift_path_extents(path, shift, inode,
5347 handle, &start);
5348 ext4_ext_drop_refs(path);
5349 kfree(path);
5350 if (ret)
5351 break;
5352 }
5353
5354 return ret;
5355}
5356
5357/*
5358 * ext4_collapse_range:
5359 * This implements the fallocate's collapse range functionality for ext4
5360 * Returns: 0 and non-zero on error.
5361 */
5362int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5363{
5364 struct super_block *sb = inode->i_sb;
5365 ext4_lblk_t punch_start, punch_stop;
5366 handle_t *handle;
5367 unsigned int credits;
5368 loff_t new_size;
5369 int ret;
5370
5371 BUG_ON(offset + len > i_size_read(inode));
5372
5373 /* Collapse range works only on fs block size aligned offsets. */
5374 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
5375 len & (EXT4_BLOCK_SIZE(sb) - 1))
5376 return -EINVAL;
5377
5378 if (!S_ISREG(inode->i_mode))
5379 return -EOPNOTSUPP;
5380
5381 trace_ext4_collapse_range(inode, offset, len);
5382
5383 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5384 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5385
5386 /* Write out all dirty pages */
5387 ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
5388 if (ret)
5389 return ret;
5390
5391 /* Take mutex lock */
5392 mutex_lock(&inode->i_mutex);
5393
5394 /* It's not possible punch hole on append only file */
5395 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
5396 ret = -EPERM;
5397 goto out_mutex;
5398 }
5399
5400 if (IS_SWAPFILE(inode)) {
5401 ret = -ETXTBSY;
5402 goto out_mutex;
5403 }
5404
5405 /* Currently just for extent based files */
5406 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5407 ret = -EOPNOTSUPP;
5408 goto out_mutex;
5409 }
5410
5411 truncate_pagecache_range(inode, offset, -1);
5412
5413 /* Wait for existing dio to complete */
5414 ext4_inode_block_unlocked_dio(inode);
5415 inode_dio_wait(inode);
5416
5417 credits = ext4_writepage_trans_blocks(inode);
5418 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5419 if (IS_ERR(handle)) {
5420 ret = PTR_ERR(handle);
5421 goto out_dio;
5422 }
5423
5424 down_write(&EXT4_I(inode)->i_data_sem);
5425 ext4_discard_preallocations(inode);
5426
5427 ret = ext4_es_remove_extent(inode, punch_start,
5428 EXT_MAX_BLOCKS - punch_start - 1);
5429 if (ret) {
5430 up_write(&EXT4_I(inode)->i_data_sem);
5431 goto out_stop;
5432 }
5433
5434 ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5435 if (ret) {
5436 up_write(&EXT4_I(inode)->i_data_sem);
5437 goto out_stop;
5438 }
5439
5440 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5441 punch_stop - punch_start);
5442 if (ret) {
5443 up_write(&EXT4_I(inode)->i_data_sem);
5444 goto out_stop;
5445 }
5446
5447 new_size = i_size_read(inode) - len;
5448 truncate_setsize(inode, new_size);
5449 EXT4_I(inode)->i_disksize = new_size;
5450
5451 ext4_discard_preallocations(inode);
5452 up_write(&EXT4_I(inode)->i_data_sem);
5453 if (IS_SYNC(inode))
5454 ext4_handle_sync(handle);
5455 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
5456 ext4_mark_inode_dirty(handle, inode);
5457
5458out_stop:
5459 ext4_journal_stop(handle);
5460out_dio:
5461 ext4_inode_resume_unlocked_dio(inode);
5462out_mutex:
5463 mutex_unlock(&inode->i_mutex);
5464 return ret;
5465}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 3981ff783950..0a014a7194b2 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -184,7 +184,7 @@ static void ext4_es_print_tree(struct inode *inode)
184 while (node) { 184 while (node) {
185 struct extent_status *es; 185 struct extent_status *es;
186 es = rb_entry(node, struct extent_status, rb_node); 186 es = rb_entry(node, struct extent_status, rb_node);
187 printk(KERN_DEBUG " [%u/%u) %llu %llx", 187 printk(KERN_DEBUG " [%u/%u) %llu %x",
188 es->es_lblk, es->es_len, 188 es->es_lblk, es->es_len,
189 ext4_es_pblock(es), ext4_es_status(es)); 189 ext4_es_pblock(es), ext4_es_status(es));
190 node = rb_next(node); 190 node = rb_next(node);
@@ -445,8 +445,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
445 pr_warn("ES insert assertion failed for " 445 pr_warn("ES insert assertion failed for "
446 "inode: %lu we can find an extent " 446 "inode: %lu we can find an extent "
447 "at block [%d/%d/%llu/%c], but we " 447 "at block [%d/%d/%llu/%c], but we "
448 "want to add an delayed/hole extent " 448 "want to add a delayed/hole extent "
449 "[%d/%d/%llu/%llx]\n", 449 "[%d/%d/%llu/%x]\n",
450 inode->i_ino, ee_block, ee_len, 450 inode->i_ino, ee_block, ee_len,
451 ee_start, ee_status ? 'u' : 'w', 451 ee_start, ee_status ? 'u' : 'w',
452 es->es_lblk, es->es_len, 452 es->es_lblk, es->es_len,
@@ -486,8 +486,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
486 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { 486 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
487 pr_warn("ES insert assertion failed for inode: %lu " 487 pr_warn("ES insert assertion failed for inode: %lu "
488 "can't find an extent at block %d but we want " 488 "can't find an extent at block %d but we want "
489 "to add an written/unwritten extent " 489 "to add a written/unwritten extent "
490 "[%d/%d/%llu/%llx]\n", inode->i_ino, 490 "[%d/%d/%llu/%x]\n", inode->i_ino,
491 es->es_lblk, es->es_lblk, es->es_len, 491 es->es_lblk, es->es_lblk, es->es_len,
492 ext4_es_pblock(es), ext4_es_status(es)); 492 ext4_es_pblock(es), ext4_es_status(es));
493 } 493 }
@@ -524,7 +524,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
524 */ 524 */
525 pr_warn("ES insert assertion failed for inode: %lu " 525 pr_warn("ES insert assertion failed for inode: %lu "
526 "We can find blocks but we want to add a " 526 "We can find blocks but we want to add a "
527 "delayed/hole extent [%d/%d/%llu/%llx]\n", 527 "delayed/hole extent [%d/%d/%llu/%x]\n",
528 inode->i_ino, es->es_lblk, es->es_len, 528 inode->i_ino, es->es_lblk, es->es_len,
529 ext4_es_pblock(es), ext4_es_status(es)); 529 ext4_es_pblock(es), ext4_es_status(es));
530 return; 530 return;
@@ -554,7 +554,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
554 if (ext4_es_is_written(es)) { 554 if (ext4_es_is_written(es)) {
555 pr_warn("ES insert assertion failed for inode: %lu " 555 pr_warn("ES insert assertion failed for inode: %lu "
556 "We can't find the block but we want to add " 556 "We can't find the block but we want to add "
557 "an written extent [%d/%d/%llu/%llx]\n", 557 "a written extent [%d/%d/%llu/%x]\n",
558 inode->i_ino, es->es_lblk, es->es_len, 558 inode->i_ino, es->es_lblk, es->es_len,
559 ext4_es_pblock(es), ext4_es_status(es)); 559 ext4_es_pblock(es), ext4_es_status(es));
560 return; 560 return;
@@ -658,8 +658,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
658 658
659 newes.es_lblk = lblk; 659 newes.es_lblk = lblk;
660 newes.es_len = len; 660 newes.es_len = len;
661 ext4_es_store_pblock(&newes, pblk); 661 ext4_es_store_pblock_status(&newes, pblk, status);
662 ext4_es_store_status(&newes, status);
663 trace_ext4_es_insert_extent(inode, &newes); 662 trace_ext4_es_insert_extent(inode, &newes);
664 663
665 ext4_es_insert_extent_check(inode, &newes); 664 ext4_es_insert_extent_check(inode, &newes);
@@ -699,8 +698,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
699 698
700 newes.es_lblk = lblk; 699 newes.es_lblk = lblk;
701 newes.es_len = len; 700 newes.es_len = len;
702 ext4_es_store_pblock(&newes, pblk); 701 ext4_es_store_pblock_status(&newes, pblk, status);
703 ext4_es_store_status(&newes, status);
704 trace_ext4_es_cache_extent(inode, &newes); 702 trace_ext4_es_cache_extent(inode, &newes);
705 703
706 if (!len) 704 if (!len)
@@ -812,13 +810,13 @@ retry:
812 810
813 newes.es_lblk = end + 1; 811 newes.es_lblk = end + 1;
814 newes.es_len = len2; 812 newes.es_len = len2;
813 block = 0x7FDEADBEEF;
815 if (ext4_es_is_written(&orig_es) || 814 if (ext4_es_is_written(&orig_es) ||
816 ext4_es_is_unwritten(&orig_es)) { 815 ext4_es_is_unwritten(&orig_es))
817 block = ext4_es_pblock(&orig_es) + 816 block = ext4_es_pblock(&orig_es) +
818 orig_es.es_len - len2; 817 orig_es.es_len - len2;
819 ext4_es_store_pblock(&newes, block); 818 ext4_es_store_pblock_status(&newes, block,
820 } 819 ext4_es_status(&orig_es));
821 ext4_es_store_status(&newes, ext4_es_status(&orig_es));
822 err = __es_insert_extent(inode, &newes); 820 err = __es_insert_extent(inode, &newes);
823 if (err) { 821 if (err) {
824 es->es_lblk = orig_es.es_lblk; 822 es->es_lblk = orig_es.es_lblk;
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 167f4ab8ecc3..f1b62a419920 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -129,6 +129,15 @@ static inline void ext4_es_store_status(struct extent_status *es,
129 (es->es_pblk & ~ES_MASK)); 129 (es->es_pblk & ~ES_MASK));
130} 130}
131 131
132static inline void ext4_es_store_pblock_status(struct extent_status *es,
133 ext4_fsblk_t pb,
134 unsigned int status)
135{
136 es->es_pblk = (((ext4_fsblk_t)
137 (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
138 (pb & ~ES_MASK));
139}
140
132extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); 141extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi);
133extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); 142extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
134extern void ext4_es_lru_add(struct inode *inode); 143extern void ext4_es_lru_add(struct inode *inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 175c3f933816..5b0d2c7d5408 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -504,6 +504,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
504{ 504{
505 struct extent_status es; 505 struct extent_status es;
506 int retval; 506 int retval;
507 int ret = 0;
507#ifdef ES_AGGRESSIVE_TEST 508#ifdef ES_AGGRESSIVE_TEST
508 struct ext4_map_blocks orig_map; 509 struct ext4_map_blocks orig_map;
509 510
@@ -515,6 +516,12 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
515 "logical block %lu\n", inode->i_ino, flags, map->m_len, 516 "logical block %lu\n", inode->i_ino, flags, map->m_len,
516 (unsigned long) map->m_lblk); 517 (unsigned long) map->m_lblk);
517 518
519 /*
520 * ext4_map_blocks returns an int, and m_len is an unsigned int
521 */
522 if (unlikely(map->m_len > INT_MAX))
523 map->m_len = INT_MAX;
524
518 /* Lookup extent status tree firstly */ 525 /* Lookup extent status tree firstly */
519 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 526 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
520 ext4_es_lru_add(inode); 527 ext4_es_lru_add(inode);
@@ -553,7 +560,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
553 EXT4_GET_BLOCKS_KEEP_SIZE); 560 EXT4_GET_BLOCKS_KEEP_SIZE);
554 } 561 }
555 if (retval > 0) { 562 if (retval > 0) {
556 int ret;
557 unsigned int status; 563 unsigned int status;
558 564
559 if (unlikely(retval != map->m_len)) { 565 if (unlikely(retval != map->m_len)) {
@@ -580,7 +586,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
580 586
581found: 587found:
582 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 588 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
583 int ret = check_block_validity(inode, map); 589 ret = check_block_validity(inode, map);
584 if (ret != 0) 590 if (ret != 0)
585 return ret; 591 return ret;
586 } 592 }
@@ -597,7 +603,13 @@ found:
597 * with buffer head unmapped. 603 * with buffer head unmapped.
598 */ 604 */
599 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 605 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
600 return retval; 606 /*
607 * If we need to convert extent to unwritten
608 * we continue and do the actual work in
609 * ext4_ext_map_blocks()
610 */
611 if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN))
612 return retval;
601 613
602 /* 614 /*
603 * Here we clear m_flags because after allocating an new extent, 615 * Here we clear m_flags because after allocating an new extent,
@@ -653,7 +665,6 @@ found:
653 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 665 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
654 666
655 if (retval > 0) { 667 if (retval > 0) {
656 int ret;
657 unsigned int status; 668 unsigned int status;
658 669
659 if (unlikely(retval != map->m_len)) { 670 if (unlikely(retval != map->m_len)) {
@@ -688,7 +699,7 @@ found:
688has_zeroout: 699has_zeroout:
689 up_write((&EXT4_I(inode)->i_data_sem)); 700 up_write((&EXT4_I(inode)->i_data_sem));
690 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 701 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
691 int ret = check_block_validity(inode, map); 702 ret = check_block_validity(inode, map);
692 if (ret != 0) 703 if (ret != 0)
693 return ret; 704 return ret;
694 } 705 }
@@ -3313,33 +3324,13 @@ void ext4_set_aops(struct inode *inode)
3313} 3324}
3314 3325
3315/* 3326/*
3316 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3317 * up to the end of the block which corresponds to `from'.
3318 * This required during truncate. We need to physically zero the tail end
3319 * of that block so it doesn't yield old data if the file is later grown.
3320 */
3321int ext4_block_truncate_page(handle_t *handle,
3322 struct address_space *mapping, loff_t from)
3323{
3324 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3325 unsigned length;
3326 unsigned blocksize;
3327 struct inode *inode = mapping->host;
3328
3329 blocksize = inode->i_sb->s_blocksize;
3330 length = blocksize - (offset & (blocksize - 1));
3331
3332 return ext4_block_zero_page_range(handle, mapping, from, length);
3333}
3334
3335/*
3336 * ext4_block_zero_page_range() zeros out a mapping of length 'length' 3327 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3337 * starting from file offset 'from'. The range to be zero'd must 3328 * starting from file offset 'from'. The range to be zero'd must
3338 * be contained with in one block. If the specified range exceeds 3329 * be contained with in one block. If the specified range exceeds
3339 * the end of the block it will be shortened to end of the block 3330 * the end of the block it will be shortened to end of the block
3340 * that cooresponds to 'from' 3331 * that cooresponds to 'from'
3341 */ 3332 */
3342int ext4_block_zero_page_range(handle_t *handle, 3333static int ext4_block_zero_page_range(handle_t *handle,
3343 struct address_space *mapping, loff_t from, loff_t length) 3334 struct address_space *mapping, loff_t from, loff_t length)
3344{ 3335{
3345 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; 3336 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
@@ -3429,6 +3420,26 @@ unlock:
3429 return err; 3420 return err;
3430} 3421}
3431 3422
3423/*
3424 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3425 * up to the end of the block which corresponds to `from'.
3426 * This required during truncate. We need to physically zero the tail end
3427 * of that block so it doesn't yield old data if the file is later grown.
3428 */
3429int ext4_block_truncate_page(handle_t *handle,
3430 struct address_space *mapping, loff_t from)
3431{
3432 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3433 unsigned length;
3434 unsigned blocksize;
3435 struct inode *inode = mapping->host;
3436
3437 blocksize = inode->i_sb->s_blocksize;
3438 length = blocksize - (offset & (blocksize - 1));
3439
3440 return ext4_block_zero_page_range(handle, mapping, from, length);
3441}
3442
3432int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 3443int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
3433 loff_t lstart, loff_t length) 3444 loff_t lstart, loff_t length)
3434{ 3445{
@@ -3502,7 +3513,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3502 if (!S_ISREG(inode->i_mode)) 3513 if (!S_ISREG(inode->i_mode))
3503 return -EOPNOTSUPP; 3514 return -EOPNOTSUPP;
3504 3515
3505 trace_ext4_punch_hole(inode, offset, length); 3516 trace_ext4_punch_hole(inode, offset, length, 0);
3506 3517
3507 /* 3518 /*
3508 * Write out all dirty pages to avoid race conditions 3519 * Write out all dirty pages to avoid race conditions
@@ -3609,6 +3620,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3609 up_write(&EXT4_I(inode)->i_data_sem); 3620 up_write(&EXT4_I(inode)->i_data_sem);
3610 if (IS_SYNC(inode)) 3621 if (IS_SYNC(inode))
3611 ext4_handle_sync(handle); 3622 ext4_handle_sync(handle);
3623
3624 /* Now release the pages again to reduce race window */
3625 if (last_block_offset > first_block_offset)
3626 truncate_pagecache_range(inode, first_block_offset,
3627 last_block_offset);
3628
3612 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3629 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3613 ext4_mark_inode_dirty(handle, inode); 3630 ext4_mark_inode_dirty(handle, inode);
3614out_stop: 3631out_stop:
@@ -3682,7 +3699,7 @@ void ext4_truncate(struct inode *inode)
3682 3699
3683 /* 3700 /*
3684 * There is a possibility that we're either freeing the inode 3701 * There is a possibility that we're either freeing the inode
3685 * or it completely new indode. In those cases we might not 3702 * or it's a completely new inode. In those cases we might not
3686 * have i_mutex locked because it's not necessary. 3703 * have i_mutex locked because it's not necessary.
3687 */ 3704 */
3688 if (!(inode->i_state & (I_NEW|I_FREEING))) 3705 if (!(inode->i_state & (I_NEW|I_FREEING)))
@@ -3934,8 +3951,8 @@ void ext4_set_inode_flags(struct inode *inode)
3934 new_fl |= S_NOATIME; 3951 new_fl |= S_NOATIME;
3935 if (flags & EXT4_DIRSYNC_FL) 3952 if (flags & EXT4_DIRSYNC_FL)
3936 new_fl |= S_DIRSYNC; 3953 new_fl |= S_DIRSYNC;
3937 set_mask_bits(&inode->i_flags, 3954 inode_set_flags(inode, new_fl,
3938 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); 3955 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
3939} 3956}
3940 3957
3941/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ 3958/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@ -4154,11 +4171,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4154 EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); 4171 EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
4155 EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); 4172 EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
4156 4173
4157 inode->i_version = le32_to_cpu(raw_inode->i_disk_version); 4174 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
4158 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4175 inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
4159 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4176 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4160 inode->i_version |= 4177 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4161 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4178 inode->i_version |=
4179 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
4180 }
4162 } 4181 }
4163 4182
4164 ret = 0; 4183 ret = 0;
@@ -4328,8 +4347,7 @@ static int ext4_do_update_inode(handle_t *handle,
4328 goto out_brelse; 4347 goto out_brelse;
4329 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 4348 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4330 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 4349 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
4331 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 4350 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
4332 cpu_to_le32(EXT4_OS_HURD))
4333 raw_inode->i_file_acl_high = 4351 raw_inode->i_file_acl_high =
4334 cpu_to_le16(ei->i_file_acl >> 32); 4352 cpu_to_le16(ei->i_file_acl >> 32);
4335 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 4353 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
@@ -4374,12 +4392,15 @@ static int ext4_do_update_inode(handle_t *handle,
4374 raw_inode->i_block[block] = ei->i_data[block]; 4392 raw_inode->i_block[block] = ei->i_data[block];
4375 } 4393 }
4376 4394
4377 raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4395 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
4378 if (ei->i_extra_isize) { 4396 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4379 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 4397 if (ei->i_extra_isize) {
4380 raw_inode->i_version_hi = 4398 if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4381 cpu_to_le32(inode->i_version >> 32); 4399 raw_inode->i_version_hi =
4382 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 4400 cpu_to_le32(inode->i_version >> 32);
4401 raw_inode->i_extra_isize =
4402 cpu_to_le16(ei->i_extra_isize);
4403 }
4383 } 4404 }
4384 4405
4385 ext4_inode_csum_set(inode, raw_inode, ei); 4406 ext4_inode_csum_set(inode, raw_inode, ei);
@@ -4446,7 +4467,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4446 return -EIO; 4467 return -EIO;
4447 } 4468 }
4448 4469
4449 if (wbc->sync_mode != WB_SYNC_ALL) 4470 /*
4471 * No need to force transaction in WB_SYNC_NONE mode. Also
4472 * ext4_sync_fs() will force the commit after everything is
4473 * written.
4474 */
4475 if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
4450 return 0; 4476 return 0;
4451 4477
4452 err = ext4_force_commit(inode->i_sb); 4478 err = ext4_force_commit(inode->i_sb);
@@ -4456,7 +4482,11 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4456 err = __ext4_get_inode_loc(inode, &iloc, 0); 4482 err = __ext4_get_inode_loc(inode, &iloc, 0);
4457 if (err) 4483 if (err)
4458 return err; 4484 return err;
4459 if (wbc->sync_mode == WB_SYNC_ALL) 4485 /*
4486 * sync(2) will flush the whole buffer cache. No need to do
4487 * it here separately for each inode.
4488 */
4489 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
4460 sync_dirty_buffer(iloc.bh); 4490 sync_dirty_buffer(iloc.bh);
4461 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 4491 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
4462 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, 4492 EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a2a837f00407..0f2252ec274d 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -104,21 +104,15 @@ static long swap_inode_boot_loader(struct super_block *sb,
104 struct ext4_inode_info *ei_bl; 104 struct ext4_inode_info *ei_bl;
105 struct ext4_sb_info *sbi = EXT4_SB(sb); 105 struct ext4_sb_info *sbi = EXT4_SB(sb);
106 106
107 if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) { 107 if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode))
108 err = -EINVAL; 108 return -EINVAL;
109 goto swap_boot_out;
110 }
111 109
112 if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) { 110 if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
113 err = -EPERM; 111 return -EPERM;
114 goto swap_boot_out;
115 }
116 112
117 inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); 113 inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
118 if (IS_ERR(inode_bl)) { 114 if (IS_ERR(inode_bl))
119 err = PTR_ERR(inode_bl); 115 return PTR_ERR(inode_bl);
120 goto swap_boot_out;
121 }
122 ei_bl = EXT4_I(inode_bl); 116 ei_bl = EXT4_I(inode_bl);
123 117
124 filemap_flush(inode->i_mapping); 118 filemap_flush(inode->i_mapping);
@@ -193,20 +187,14 @@ static long swap_inode_boot_loader(struct super_block *sb,
193 ext4_mark_inode_dirty(handle, inode); 187 ext4_mark_inode_dirty(handle, inode);
194 } 188 }
195 } 189 }
196
197 ext4_journal_stop(handle); 190 ext4_journal_stop(handle);
198
199 ext4_double_up_write_data_sem(inode, inode_bl); 191 ext4_double_up_write_data_sem(inode, inode_bl);
200 192
201journal_err_out: 193journal_err_out:
202 ext4_inode_resume_unlocked_dio(inode); 194 ext4_inode_resume_unlocked_dio(inode);
203 ext4_inode_resume_unlocked_dio(inode_bl); 195 ext4_inode_resume_unlocked_dio(inode_bl);
204
205 unlock_two_nondirectories(inode, inode_bl); 196 unlock_two_nondirectories(inode, inode_bl);
206
207 iput(inode_bl); 197 iput(inode_bl);
208
209swap_boot_out:
210 return err; 198 return err;
211} 199}
212 200
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 04a5c7504be9..a888cac76e9c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1808,6 +1808,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1808 ext4_lock_group(ac->ac_sb, group); 1808 ext4_lock_group(ac->ac_sb, group);
1809 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, 1809 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1810 ac->ac_g_ex.fe_len, &ex); 1810 ac->ac_g_ex.fe_len, &ex);
1811 ex.fe_logical = 0xDEADFA11; /* debug value */
1811 1812
1812 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { 1813 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1813 ext4_fsblk_t start; 1814 ext4_fsblk_t start;
@@ -1936,7 +1937,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1936 */ 1937 */
1937 break; 1938 break;
1938 } 1939 }
1939 1940 ex.fe_logical = 0xDEADC0DE; /* debug value */
1940 ext4_mb_measure_extent(ac, &ex, e4b); 1941 ext4_mb_measure_extent(ac, &ex, e4b);
1941 1942
1942 i += ex.fe_len; 1943 i += ex.fe_len;
@@ -1977,6 +1978,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1977 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); 1978 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
1978 if (max >= sbi->s_stripe) { 1979 if (max >= sbi->s_stripe) {
1979 ac->ac_found++; 1980 ac->ac_found++;
1981 ex.fe_logical = 0xDEADF00D; /* debug value */
1980 ac->ac_b_ex = ex; 1982 ac->ac_b_ex = ex;
1981 ext4_mb_use_best_found(ac, e4b); 1983 ext4_mb_use_best_found(ac, e4b);
1982 break; 1984 break;
@@ -4006,8 +4008,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4006 (unsigned long)ac->ac_b_ex.fe_len, 4008 (unsigned long)ac->ac_b_ex.fe_len,
4007 (unsigned long)ac->ac_b_ex.fe_logical, 4009 (unsigned long)ac->ac_b_ex.fe_logical,
4008 (int)ac->ac_criteria); 4010 (int)ac->ac_criteria);
4009 ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", 4011 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4010 ac->ac_ex_scanned, ac->ac_found);
4011 ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); 4012 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4012 ngroups = ext4_get_groups_count(sb); 4013 ngroups = ext4_get_groups_count(sb);
4013 for (i = 0; i < ngroups; i++) { 4014 for (i = 0; i < ngroups; i++) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 08481ee84cd5..d634e183b4d4 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -48,7 +48,7 @@ extern ushort ext4_mballoc_debug;
48 } \ 48 } \
49 } while (0) 49 } while (0)
50#else 50#else
51#define mb_debug(n, fmt, a...) 51#define mb_debug(n, fmt, a...) no_printk(fmt, ## a)
52#endif 52#endif
53 53
54#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ 54#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
@@ -175,8 +175,6 @@ struct ext4_allocation_context {
175 /* copy of the best found extent taken before preallocation efforts */ 175 /* copy of the best found extent taken before preallocation efforts */
176 struct ext4_free_extent ac_f_ex; 176 struct ext4_free_extent ac_f_ex;
177 177
178 /* number of iterations done. we have to track to limit searching */
179 unsigned long ac_ex_scanned;
180 __u16 ac_groups_scanned; 178 __u16 ac_groups_scanned;
181 __u16 ac_found; 179 __u16 ac_found;
182 __u16 ac_tail; 180 __u16 ac_tail;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 773b503bd18c..58ee7dc87669 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
76 * ext4_ext_path structure refers to the last extent, or a negative error 76 * ext4_ext_path structure refers to the last extent, or a negative error
77 * value on failure. 77 * value on failure.
78 */ 78 */
79static int 79int
80mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 80mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
81 struct ext4_extent **extent) 81 struct ext4_extent **extent)
82{ 82{
@@ -861,8 +861,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
861 } 861 }
862 if (!buffer_mapped(bh)) { 862 if (!buffer_mapped(bh)) {
863 zero_user(page, block_start, blocksize); 863 zero_user(page, block_start, blocksize);
864 if (!err) 864 set_buffer_uptodate(bh);
865 set_buffer_uptodate(bh);
866 continue; 865 continue;
867 } 866 }
868 } 867 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 710fed2377d4..f3c667091618 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -59,6 +59,7 @@ static struct kset *ext4_kset;
59static struct ext4_lazy_init *ext4_li_info; 59static struct ext4_lazy_init *ext4_li_info;
60static struct mutex ext4_li_mtx; 60static struct mutex ext4_li_mtx;
61static struct ext4_features *ext4_feat; 61static struct ext4_features *ext4_feat;
62static int ext4_mballoc_ready;
62 63
63static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 64static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
64 unsigned long journal_devnum); 65 unsigned long journal_devnum);
@@ -845,6 +846,10 @@ static void ext4_put_super(struct super_block *sb)
845 invalidate_bdev(sbi->journal_bdev); 846 invalidate_bdev(sbi->journal_bdev);
846 ext4_blkdev_remove(sbi); 847 ext4_blkdev_remove(sbi);
847 } 848 }
849 if (sbi->s_mb_cache) {
850 ext4_xattr_destroy_cache(sbi->s_mb_cache);
851 sbi->s_mb_cache = NULL;
852 }
848 if (sbi->s_mmp_tsk) 853 if (sbi->s_mmp_tsk)
849 kthread_stop(sbi->s_mmp_tsk); 854 kthread_stop(sbi->s_mmp_tsk);
850 sb->s_fs_info = NULL; 855 sb->s_fs_info = NULL;
@@ -940,7 +945,7 @@ static void init_once(void *foo)
940 inode_init_once(&ei->vfs_inode); 945 inode_init_once(&ei->vfs_inode);
941} 946}
942 947
943static int init_inodecache(void) 948static int __init init_inodecache(void)
944{ 949{
945 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 950 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
946 sizeof(struct ext4_inode_info), 951 sizeof(struct ext4_inode_info),
@@ -3575,6 +3580,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3575 "feature flags set on rev 0 fs, " 3580 "feature flags set on rev 0 fs, "
3576 "running e2fsck is recommended"); 3581 "running e2fsck is recommended");
3577 3582
3583 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
3584 set_opt2(sb, HURD_COMPAT);
3585 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
3586 EXT4_FEATURE_INCOMPAT_64BIT)) {
3587 ext4_msg(sb, KERN_ERR,
3588 "The Hurd can't support 64-bit file systems");
3589 goto failed_mount;
3590 }
3591 }
3592
3578 if (IS_EXT2_SB(sb)) { 3593 if (IS_EXT2_SB(sb)) {
3579 if (ext2_feature_set_ok(sb)) 3594 if (ext2_feature_set_ok(sb))
3580 ext4_msg(sb, KERN_INFO, "mounting ext2 file system " 3595 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
@@ -4010,6 +4025,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
4010 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); 4025 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
4011 4026
4012no_journal: 4027no_journal:
4028 if (ext4_mballoc_ready) {
4029 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
4030 if (!sbi->s_mb_cache) {
4031 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
4032 goto failed_mount_wq;
4033 }
4034 }
4035
4013 /* 4036 /*
4014 * Get the # of file system overhead blocks from the 4037 * Get the # of file system overhead blocks from the
4015 * superblock if present. 4038 * superblock if present.
@@ -4835,6 +4858,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4835 } 4858 }
4836 4859
4837 if (*flags & MS_RDONLY) { 4860 if (*flags & MS_RDONLY) {
4861 err = sync_filesystem(sb);
4862 if (err < 0)
4863 goto restore_opts;
4838 err = dquot_suspend(sb, -1); 4864 err = dquot_suspend(sb, -1);
4839 if (err < 0) 4865 if (err < 0)
4840 goto restore_opts; 4866 goto restore_opts;
@@ -5516,11 +5542,9 @@ static int __init ext4_init_fs(void)
5516 5542
5517 err = ext4_init_mballoc(); 5543 err = ext4_init_mballoc();
5518 if (err) 5544 if (err)
5519 goto out3;
5520
5521 err = ext4_init_xattr();
5522 if (err)
5523 goto out2; 5545 goto out2;
5546 else
5547 ext4_mballoc_ready = 1;
5524 err = init_inodecache(); 5548 err = init_inodecache();
5525 if (err) 5549 if (err)
5526 goto out1; 5550 goto out1;
@@ -5536,10 +5560,9 @@ out:
5536 unregister_as_ext3(); 5560 unregister_as_ext3();
5537 destroy_inodecache(); 5561 destroy_inodecache();
5538out1: 5562out1:
5539 ext4_exit_xattr(); 5563 ext4_mballoc_ready = 0;
5540out2:
5541 ext4_exit_mballoc(); 5564 ext4_exit_mballoc();
5542out3: 5565out2:
5543 ext4_exit_feat_adverts(); 5566 ext4_exit_feat_adverts();
5544out4: 5567out4:
5545 if (ext4_proc_root) 5568 if (ext4_proc_root)
@@ -5562,7 +5585,6 @@ static void __exit ext4_exit_fs(void)
5562 unregister_as_ext3(); 5585 unregister_as_ext3();
5563 unregister_filesystem(&ext4_fs_type); 5586 unregister_filesystem(&ext4_fs_type);
5564 destroy_inodecache(); 5587 destroy_inodecache();
5565 ext4_exit_xattr();
5566 ext4_exit_mballoc(); 5588 ext4_exit_mballoc();
5567 ext4_exit_feat_adverts(); 5589 ext4_exit_feat_adverts();
5568 remove_proc_entry("fs/ext4", NULL); 5590 remove_proc_entry("fs/ext4", NULL);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e175e94116ac..1f5cf5880718 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -81,7 +81,7 @@
81# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 81# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
82#endif 82#endif
83 83
84static void ext4_xattr_cache_insert(struct buffer_head *); 84static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
85static struct buffer_head *ext4_xattr_cache_find(struct inode *, 85static struct buffer_head *ext4_xattr_cache_find(struct inode *,
86 struct ext4_xattr_header *, 86 struct ext4_xattr_header *,
87 struct mb_cache_entry **); 87 struct mb_cache_entry **);
@@ -90,8 +90,6 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *,
90static int ext4_xattr_list(struct dentry *dentry, char *buffer, 90static int ext4_xattr_list(struct dentry *dentry, char *buffer,
91 size_t buffer_size); 91 size_t buffer_size);
92 92
93static struct mb_cache *ext4_xattr_cache;
94
95static const struct xattr_handler *ext4_xattr_handler_map[] = { 93static const struct xattr_handler *ext4_xattr_handler_map[] = {
96 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, 94 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
97#ifdef CONFIG_EXT4_FS_POSIX_ACL 95#ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -117,6 +115,9 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
117 NULL 115 NULL
118}; 116};
119 117
118#define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \
119 inode->i_sb->s_fs_info)->s_mb_cache)
120
120static __le32 ext4_xattr_block_csum(struct inode *inode, 121static __le32 ext4_xattr_block_csum(struct inode *inode,
121 sector_t block_nr, 122 sector_t block_nr,
122 struct ext4_xattr_header *hdr) 123 struct ext4_xattr_header *hdr)
@@ -265,6 +266,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
265 struct ext4_xattr_entry *entry; 266 struct ext4_xattr_entry *entry;
266 size_t size; 267 size_t size;
267 int error; 268 int error;
269 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
268 270
269 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 271 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
270 name_index, name, buffer, (long)buffer_size); 272 name_index, name, buffer, (long)buffer_size);
@@ -286,7 +288,7 @@ bad_block:
286 error = -EIO; 288 error = -EIO;
287 goto cleanup; 289 goto cleanup;
288 } 290 }
289 ext4_xattr_cache_insert(bh); 291 ext4_xattr_cache_insert(ext4_mb_cache, bh);
290 entry = BFIRST(bh); 292 entry = BFIRST(bh);
291 error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); 293 error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
292 if (error == -EIO) 294 if (error == -EIO)
@@ -409,6 +411,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
409 struct inode *inode = dentry->d_inode; 411 struct inode *inode = dentry->d_inode;
410 struct buffer_head *bh = NULL; 412 struct buffer_head *bh = NULL;
411 int error; 413 int error;
414 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
412 415
413 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 416 ea_idebug(inode, "buffer=%p, buffer_size=%ld",
414 buffer, (long)buffer_size); 417 buffer, (long)buffer_size);
@@ -430,7 +433,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
430 error = -EIO; 433 error = -EIO;
431 goto cleanup; 434 goto cleanup;
432 } 435 }
433 ext4_xattr_cache_insert(bh); 436 ext4_xattr_cache_insert(ext4_mb_cache, bh);
434 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); 437 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
435 438
436cleanup: 439cleanup:
@@ -526,8 +529,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
526{ 529{
527 struct mb_cache_entry *ce = NULL; 530 struct mb_cache_entry *ce = NULL;
528 int error = 0; 531 int error = 0;
532 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
529 533
530 ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); 534 ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
531 error = ext4_journal_get_write_access(handle, bh); 535 error = ext4_journal_get_write_access(handle, bh);
532 if (error) 536 if (error)
533 goto out; 537 goto out;
@@ -567,12 +571,13 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
567 size_t *min_offs, void *base, int *total) 571 size_t *min_offs, void *base, int *total)
568{ 572{
569 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 573 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
570 *total += EXT4_XATTR_LEN(last->e_name_len);
571 if (!last->e_value_block && last->e_value_size) { 574 if (!last->e_value_block && last->e_value_size) {
572 size_t offs = le16_to_cpu(last->e_value_offs); 575 size_t offs = le16_to_cpu(last->e_value_offs);
573 if (offs < *min_offs) 576 if (offs < *min_offs)
574 *min_offs = offs; 577 *min_offs = offs;
575 } 578 }
579 if (total)
580 *total += EXT4_XATTR_LEN(last->e_name_len);
576 } 581 }
577 return (*min_offs - ((void *)last - base) - sizeof(__u32)); 582 return (*min_offs - ((void *)last - base) - sizeof(__u32));
578} 583}
@@ -745,13 +750,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
745 struct ext4_xattr_search *s = &bs->s; 750 struct ext4_xattr_search *s = &bs->s;
746 struct mb_cache_entry *ce = NULL; 751 struct mb_cache_entry *ce = NULL;
747 int error = 0; 752 int error = 0;
753 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
748 754
749#define header(x) ((struct ext4_xattr_header *)(x)) 755#define header(x) ((struct ext4_xattr_header *)(x))
750 756
751 if (i->value && i->value_len > sb->s_blocksize) 757 if (i->value && i->value_len > sb->s_blocksize)
752 return -ENOSPC; 758 return -ENOSPC;
753 if (s->base) { 759 if (s->base) {
754 ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, 760 ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
755 bs->bh->b_blocknr); 761 bs->bh->b_blocknr);
756 error = ext4_journal_get_write_access(handle, bs->bh); 762 error = ext4_journal_get_write_access(handle, bs->bh);
757 if (error) 763 if (error)
@@ -769,7 +775,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
769 if (!IS_LAST_ENTRY(s->first)) 775 if (!IS_LAST_ENTRY(s->first))
770 ext4_xattr_rehash(header(s->base), 776 ext4_xattr_rehash(header(s->base),
771 s->here); 777 s->here);
772 ext4_xattr_cache_insert(bs->bh); 778 ext4_xattr_cache_insert(ext4_mb_cache,
779 bs->bh);
773 } 780 }
774 unlock_buffer(bs->bh); 781 unlock_buffer(bs->bh);
775 if (error == -EIO) 782 if (error == -EIO)
@@ -905,7 +912,7 @@ getblk_failed:
905 memcpy(new_bh->b_data, s->base, new_bh->b_size); 912 memcpy(new_bh->b_data, s->base, new_bh->b_size);
906 set_buffer_uptodate(new_bh); 913 set_buffer_uptodate(new_bh);
907 unlock_buffer(new_bh); 914 unlock_buffer(new_bh);
908 ext4_xattr_cache_insert(new_bh); 915 ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
909 error = ext4_handle_dirty_xattr_block(handle, 916 error = ext4_handle_dirty_xattr_block(handle,
910 inode, new_bh); 917 inode, new_bh);
911 if (error) 918 if (error)
@@ -1228,7 +1235,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
1228 struct ext4_xattr_block_find *bs = NULL; 1235 struct ext4_xattr_block_find *bs = NULL;
1229 char *buffer = NULL, *b_entry_name = NULL; 1236 char *buffer = NULL, *b_entry_name = NULL;
1230 size_t min_offs, free; 1237 size_t min_offs, free;
1231 int total_ino, total_blk; 1238 int total_ino;
1232 void *base, *start, *end; 1239 void *base, *start, *end;
1233 int extra_isize = 0, error = 0, tried_min_extra_isize = 0; 1240 int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
1234 int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); 1241 int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
@@ -1286,8 +1293,7 @@ retry:
1286 first = BFIRST(bh); 1293 first = BFIRST(bh);
1287 end = bh->b_data + bh->b_size; 1294 end = bh->b_data + bh->b_size;
1288 min_offs = end - base; 1295 min_offs = end - base;
1289 free = ext4_xattr_free_space(first, &min_offs, base, 1296 free = ext4_xattr_free_space(first, &min_offs, base, NULL);
1290 &total_blk);
1291 if (free < new_extra_isize) { 1297 if (free < new_extra_isize) {
1292 if (!tried_min_extra_isize && s_min_extra_isize) { 1298 if (!tried_min_extra_isize && s_min_extra_isize) {
1293 tried_min_extra_isize++; 1299 tried_min_extra_isize++;
@@ -1495,13 +1501,13 @@ ext4_xattr_put_super(struct super_block *sb)
1495 * Returns 0, or a negative error number on failure. 1501 * Returns 0, or a negative error number on failure.
1496 */ 1502 */
1497static void 1503static void
1498ext4_xattr_cache_insert(struct buffer_head *bh) 1504ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
1499{ 1505{
1500 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); 1506 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1501 struct mb_cache_entry *ce; 1507 struct mb_cache_entry *ce;
1502 int error; 1508 int error;
1503 1509
1504 ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS); 1510 ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
1505 if (!ce) { 1511 if (!ce) {
1506 ea_bdebug(bh, "out of memory"); 1512 ea_bdebug(bh, "out of memory");
1507 return; 1513 return;
@@ -1573,12 +1579,13 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1573{ 1579{
1574 __u32 hash = le32_to_cpu(header->h_hash); 1580 __u32 hash = le32_to_cpu(header->h_hash);
1575 struct mb_cache_entry *ce; 1581 struct mb_cache_entry *ce;
1582 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
1576 1583
1577 if (!header->h_hash) 1584 if (!header->h_hash)
1578 return NULL; /* never share */ 1585 return NULL; /* never share */
1579 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1586 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1580again: 1587again:
1581 ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, 1588 ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
1582 hash); 1589 hash);
1583 while (ce) { 1590 while (ce) {
1584 struct buffer_head *bh; 1591 struct buffer_head *bh;
@@ -1676,19 +1683,17 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1676 1683
1677#undef BLOCK_HASH_SHIFT 1684#undef BLOCK_HASH_SHIFT
1678 1685
1679int __init 1686#define HASH_BUCKET_BITS 10
1680ext4_init_xattr(void) 1687
1688struct mb_cache *
1689ext4_xattr_create_cache(char *name)
1681{ 1690{
1682 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); 1691 return mb_cache_create(name, HASH_BUCKET_BITS);
1683 if (!ext4_xattr_cache)
1684 return -ENOMEM;
1685 return 0;
1686} 1692}
1687 1693
1688void 1694void ext4_xattr_destroy_cache(struct mb_cache *cache)
1689ext4_exit_xattr(void)
1690{ 1695{
1691 if (ext4_xattr_cache) 1696 if (cache)
1692 mb_cache_destroy(ext4_xattr_cache); 1697 mb_cache_destroy(cache);
1693 ext4_xattr_cache = NULL;
1694} 1698}
1699
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 819d6398833f..29bedf5589f6 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -110,9 +110,6 @@ extern void ext4_xattr_put_super(struct super_block *);
110extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 110extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
111 struct ext4_inode *raw_inode, handle_t *handle); 111 struct ext4_inode *raw_inode, handle_t *handle);
112 112
113extern int __init ext4_init_xattr(void);
114extern void ext4_exit_xattr(void);
115
116extern const struct xattr_handler *ext4_xattr_handlers[]; 113extern const struct xattr_handler *ext4_xattr_handlers[];
117 114
118extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, 115extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
@@ -124,6 +121,9 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
124 struct ext4_xattr_info *i, 121 struct ext4_xattr_info *i,
125 struct ext4_xattr_ibody_find *is); 122 struct ext4_xattr_ibody_find *is);
126 123
124extern struct mb_cache *ext4_xattr_create_cache(char *name);
125extern void ext4_xattr_destroy_cache(struct mb_cache *);
126
127#ifdef CONFIG_EXT4_FS_SECURITY 127#ifdef CONFIG_EXT4_FS_SECURITY
128extern int ext4_init_security(handle_t *handle, struct inode *inode, 128extern int ext4_init_security(handle_t *handle, struct inode *inode,
129 struct inode *dir, const struct qstr *qstr); 129 struct inode *dir, const struct qstr *qstr);