aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/crypto.c6
-rw-r--r--fs/ext4/crypto_key.c4
-rw-r--r--fs/ext4/ext4.h99
-rw-r--r--fs/ext4/extents.c153
-rw-r--r--fs/ext4/file.c82
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c268
-rw-r--r--fs/ext4/ioctl.c376
-rw-r--r--fs/ext4/namei.c34
-rw-r--r--fs/ext4/super.c97
-rw-r--r--fs/ext4/truncate.h2
-rw-r--r--include/trace/events/ext4.h2
-rw-r--r--include/uapi/linux/fs.h31
14 files changed, 895 insertions, 276 deletions
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 1a0835073663..c8021208a7eb 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
384 EXT4_DECRYPT, page->index, page, page); 384 EXT4_DECRYPT, page->index, page, page);
385} 385}
386 386
387int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) 387int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
388 ext4_fsblk_t pblk, ext4_lblk_t len)
388{ 389{
389 struct ext4_crypto_ctx *ctx; 390 struct ext4_crypto_ctx *ctx;
390 struct page *ciphertext_page = NULL; 391 struct page *ciphertext_page = NULL;
391 struct bio *bio; 392 struct bio *bio;
392 ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
393 ext4_fsblk_t pblk = ext4_ext_pblock(ex);
394 unsigned int len = ext4_ext_get_actual_len(ex);
395 int ret, err = 0; 393 int ret, err = 0;
396 394
397#if 0 395#if 0
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index c5882b36e558..9a16d1e75a49 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -213,9 +213,11 @@ retry:
213 res = -ENOKEY; 213 res = -ENOKEY;
214 goto out; 214 goto out;
215 } 215 }
216 down_read(&keyring_key->sem);
216 ukp = user_key_payload(keyring_key); 217 ukp = user_key_payload(keyring_key);
217 if (ukp->datalen != sizeof(struct ext4_encryption_key)) { 218 if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
218 res = -EINVAL; 219 res = -EINVAL;
220 up_read(&keyring_key->sem);
219 goto out; 221 goto out;
220 } 222 }
221 master_key = (struct ext4_encryption_key *)ukp->data; 223 master_key = (struct ext4_encryption_key *)ukp->data;
@@ -226,10 +228,12 @@ retry:
226 "ext4: key size incorrect: %d\n", 228 "ext4: key size incorrect: %d\n",
227 master_key->size); 229 master_key->size);
228 res = -ENOKEY; 230 res = -ENOKEY;
231 up_read(&keyring_key->sem);
229 goto out; 232 goto out;
230 } 233 }
231 res = ext4_derive_key_aes(ctx.nonce, master_key->raw, 234 res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
232 raw_key); 235 raw_key);
236 up_read(&keyring_key->sem);
233 if (res) 237 if (res)
234 goto out; 238 goto out;
235got_key: 239got_key:
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7ca4e87144..1c127213363a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -378,14 +378,22 @@ struct flex_groups {
378#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ 378#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
379#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 379#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
380 380
381#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ 381#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
382#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */ 382#define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
383
384#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
385 EXT4_IMMUTABLE_FL | \
386 EXT4_APPEND_FL | \
387 EXT4_NODUMP_FL | \
388 EXT4_NOATIME_FL | \
389 EXT4_PROJINHERIT_FL)
383 390
384/* Flags that should be inherited by new inodes from their parent. */ 391/* Flags that should be inherited by new inodes from their parent. */
385#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ 392#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
386 EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ 393 EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
387 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ 394 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
388 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) 395 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
396 EXT4_PROJINHERIT_FL)
389 397
390/* Flags that are appropriate for regular files (all but dir-specific ones). */ 398/* Flags that are appropriate for regular files (all but dir-specific ones). */
391#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) 399#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -555,10 +563,12 @@ enum {
555#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 563#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
556 /* Request will not result in inode size update (user for fallocate) */ 564 /* Request will not result in inode size update (user for fallocate) */
557#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 565#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
558 /* Do not take i_data_sem locking in ext4_map_blocks */
559#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
560 /* Convert written extents to unwritten */ 566 /* Convert written extents to unwritten */
561#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 567#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100
568 /* Write zeros to newly created written extents */
569#define EXT4_GET_BLOCKS_ZERO 0x0200
570#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
571 EXT4_GET_BLOCKS_ZERO)
562 572
563/* 573/*
564 * The bit position of these flags must not overlap with any of the 574 * The bit position of these flags must not overlap with any of the
@@ -616,6 +626,46 @@ enum {
616#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) 626#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
617#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy) 627#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
618 628
629#ifndef FS_IOC_FSGETXATTR
630/* Until the uapi changes get merged for project quota... */
631
632#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
633#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
634
635/*
636 * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
637 */
638struct fsxattr {
639 __u32 fsx_xflags; /* xflags field value (get/set) */
640 __u32 fsx_extsize; /* extsize field value (get/set)*/
641 __u32 fsx_nextents; /* nextents field value (get) */
642 __u32 fsx_projid; /* project identifier (get/set) */
643 unsigned char fsx_pad[12];
644};
645
646/*
647 * Flags for the fsx_xflags field
648 */
649#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
650#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
651#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
652#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
653#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
654#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
655#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
656#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
657#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
658#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
659#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
660#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
661#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
662#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
663#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
664#endif /* !defined(FS_IOC_FSGETXATTR) */
665
666#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
667#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
668
619#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 669#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
620/* 670/*
621 * ioctl commands in 32 bit emulation 671 * ioctl commands in 32 bit emulation
@@ -910,6 +960,15 @@ struct ext4_inode_info {
910 * by other means, so we have i_data_sem. 960 * by other means, so we have i_data_sem.
911 */ 961 */
912 struct rw_semaphore i_data_sem; 962 struct rw_semaphore i_data_sem;
963 /*
964 * i_mmap_sem is for serializing page faults with truncate / punch hole
965 * operations. We have to make sure that new page cannot be faulted in
966 * a section of the inode that is being punched. We cannot easily use
967 * i_data_sem for this since we need protection for the whole punch
968 * operation and i_data_sem ranks below transaction start so we have
969 * to occasionally drop it.
970 */
971 struct rw_semaphore i_mmap_sem;
913 struct inode vfs_inode; 972 struct inode vfs_inode;
914 struct jbd2_inode *jinode; 973 struct jbd2_inode *jinode;
915 974
@@ -993,6 +1052,7 @@ struct ext4_inode_info {
993 /* Encryption params */ 1052 /* Encryption params */
994 struct ext4_crypt_info *i_crypt_info; 1053 struct ext4_crypt_info *i_crypt_info;
995#endif 1054#endif
1055 kprojid_t i_projid;
996}; 1056};
997 1057
998/* 1058/*
@@ -1248,7 +1308,7 @@ struct ext4_super_block {
1248#endif 1308#endif
1249 1309
1250/* Number of quota types we support */ 1310/* Number of quota types we support */
1251#define EXT4_MAXQUOTAS 2 1311#define EXT4_MAXQUOTAS 3
1252 1312
1253/* 1313/*
1254 * fourth extended-fs super-block data in memory 1314 * fourth extended-fs super-block data in memory
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
1754 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ 1814 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
1755 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ 1815 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
1756 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ 1816 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
1757 EXT4_FEATURE_RO_COMPAT_QUOTA) 1817 EXT4_FEATURE_RO_COMPAT_QUOTA |\
1818 EXT4_FEATURE_RO_COMPAT_PROJECT)
1758 1819
1759#define EXTN_FEATURE_FUNCS(ver) \ 1820#define EXTN_FEATURE_FUNCS(ver) \
1760static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ 1821static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
1796#define EXT4_DEF_RESUID 0 1857#define EXT4_DEF_RESUID 0
1797#define EXT4_DEF_RESGID 0 1858#define EXT4_DEF_RESGID 0
1798 1859
1860/*
1861 * Default project ID
1862 */
1863#define EXT4_DEF_PROJID 0
1864
1799#define EXT4_DEF_INODE_READAHEAD_BLKS 32 1865#define EXT4_DEF_INODE_READAHEAD_BLKS 32
1800 1866
1801/* 1867/*
@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page);
2234struct page *ext4_encrypt(struct inode *inode, 2300struct page *ext4_encrypt(struct inode *inode,
2235 struct page *plaintext_page); 2301 struct page *plaintext_page);
2236int ext4_decrypt(struct page *page); 2302int ext4_decrypt(struct page *page);
2237int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); 2303int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
2304 ext4_fsblk_t pblk, ext4_lblk_t len);
2238 2305
2239#ifdef CONFIG_EXT4_FS_ENCRYPTION 2306#ifdef CONFIG_EXT4_FS_ENCRYPTION
2240int ext4_init_crypto(void); 2307int ext4_init_crypto(void);
@@ -2440,8 +2507,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
2440struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); 2507struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
2441int ext4_get_block_write(struct inode *inode, sector_t iblock, 2508int ext4_get_block_write(struct inode *inode, sector_t iblock,
2442 struct buffer_head *bh_result, int create); 2509 struct buffer_head *bh_result, int create);
2443int ext4_get_block_dax(struct inode *inode, sector_t iblock, 2510int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
2444 struct buffer_head *bh_result, int create); 2511 struct buffer_head *bh_result, int create);
2445int ext4_get_block(struct inode *inode, sector_t iblock, 2512int ext4_get_block(struct inode *inode, sector_t iblock,
2446 struct buffer_head *bh_result, int create); 2513 struct buffer_head *bh_result, int create);
2447int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 2514int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
2484extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 2551extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
2485 loff_t lstart, loff_t lend); 2552 loff_t lstart, loff_t lend);
2486extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2553extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2554extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
2487extern qsize_t *ext4_get_reserved_space(struct inode *inode); 2555extern qsize_t *ext4_get_reserved_space(struct inode *inode);
2556extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
2488extern void ext4_da_update_reserve_space(struct inode *inode, 2557extern void ext4_da_update_reserve_space(struct inode *inode,
2489 int used, int quota_claim); 2558 int used, int quota_claim);
2559extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
2560 ext4_fsblk_t pblk, ext4_lblk_t len);
2490 2561
2491/* indirect.c */ 2562/* indirect.c */
2492extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 2563extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
2848 return changed; 2919 return changed;
2849} 2920}
2850 2921
2922int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
2923 loff_t len);
2924
2851struct ext4_group_info { 2925struct ext4_group_info {
2852 unsigned long bb_state; 2926 unsigned long bb_state;
2853 struct rb_root bb_free_root; 2927 struct rb_root bb_free_root;
@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
2986 struct page *page); 3060 struct page *page);
2987extern int ext4_try_add_inline_entry(handle_t *handle, 3061extern int ext4_try_add_inline_entry(handle_t *handle,
2988 struct ext4_filename *fname, 3062 struct ext4_filename *fname,
2989 struct dentry *dentry, 3063 struct inode *dir, struct inode *inode);
2990 struct inode *inode);
2991extern int ext4_try_create_inline_dir(handle_t *handle, 3064extern int ext4_try_create_inline_dir(handle_t *handle,
2992 struct inode *parent, 3065 struct inode *parent,
2993 struct inode *inode); 3066 struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 551353b1b17a..b52fea3b7219 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3119{ 3119{
3120 ext4_fsblk_t ee_pblock; 3120 ext4_fsblk_t ee_pblock;
3121 unsigned int ee_len; 3121 unsigned int ee_len;
3122 int ret;
3123 3122
3124 ee_len = ext4_ext_get_actual_len(ex); 3123 ee_len = ext4_ext_get_actual_len(ex);
3125 ee_pblock = ext4_ext_pblock(ex); 3124 ee_pblock = ext4_ext_pblock(ex);
3126 3125 return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
3127 if (ext4_encrypted_inode(inode)) 3126 ee_len);
3128 return ext4_encrypted_zeroout(inode, ex);
3129
3130 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
3131 if (ret > 0)
3132 ret = 0;
3133
3134 return ret;
3135} 3127}
3136 3128
3137/* 3129/*
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
4052 } 4044 }
4053 /* IO end_io complete, convert the filled extent to written */ 4045 /* IO end_io complete, convert the filled extent to written */
4054 if (flags & EXT4_GET_BLOCKS_CONVERT) { 4046 if (flags & EXT4_GET_BLOCKS_CONVERT) {
4047 if (flags & EXT4_GET_BLOCKS_ZERO) {
4048 if (allocated > map->m_len)
4049 allocated = map->m_len;
4050 err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
4051 allocated);
4052 if (err < 0)
4053 goto out2;
4054 }
4055 ret = ext4_convert_unwritten_extents_endio(handle, inode, map, 4055 ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
4056 ppath); 4056 ppath);
4057 if (ret >= 0) { 4057 if (ret >= 0) {
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4685 if (len <= EXT_UNWRITTEN_MAX_LEN) 4685 if (len <= EXT_UNWRITTEN_MAX_LEN)
4686 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; 4686 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4687 4687
4688 /* Wait all existing dio workers, newcomers will block on i_mutex */
4689 ext4_inode_block_unlocked_dio(inode);
4690 inode_dio_wait(inode);
4691
4692 /* 4688 /*
4693 * credits to insert 1 extent into extent tree 4689 * credits to insert 1 extent into extent tree
4694 */ 4690 */
@@ -4752,8 +4748,6 @@ retry:
4752 goto retry; 4748 goto retry;
4753 } 4749 }
4754 4750
4755 ext4_inode_resume_unlocked_dio(inode);
4756
4757 return ret > 0 ? ret2 : ret; 4751 return ret > 0 ? ret2 : ret;
4758} 4752}
4759 4753
@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4770 int partial_begin, partial_end; 4764 int partial_begin, partial_end;
4771 loff_t start, end; 4765 loff_t start, end;
4772 ext4_lblk_t lblk; 4766 ext4_lblk_t lblk;
4773 struct address_space *mapping = inode->i_mapping;
4774 unsigned int blkbits = inode->i_blkbits; 4767 unsigned int blkbits = inode->i_blkbits;
4775 4768
4776 trace_ext4_zero_range(inode, offset, len, mode); 4769 trace_ext4_zero_range(inode, offset, len, mode);
@@ -4786,17 +4779,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4786 } 4779 }
4787 4780
4788 /* 4781 /*
4789 * Write out all dirty pages to avoid race conditions
4790 * Then release them.
4791 */
4792 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4793 ret = filemap_write_and_wait_range(mapping, offset,
4794 offset + len - 1);
4795 if (ret)
4796 return ret;
4797 }
4798
4799 /*
4800 * Round up offset. This is not fallocate, we neet to zero out 4782 * Round up offset. This is not fallocate, we neet to zero out
4801 * blocks, so convert interior block aligned part of the range to 4783 * blocks, so convert interior block aligned part of the range to
4802 * unwritten and possibly manually zero out unaligned parts of the 4784 * unwritten and possibly manually zero out unaligned parts of the
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4839 if (mode & FALLOC_FL_KEEP_SIZE) 4821 if (mode & FALLOC_FL_KEEP_SIZE)
4840 flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4822 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4841 4823
4824 /* Wait all existing dio workers, newcomers will block on i_mutex */
4825 ext4_inode_block_unlocked_dio(inode);
4826 inode_dio_wait(inode);
4827
4842 /* Preallocate the range including the unaligned edges */ 4828 /* Preallocate the range including the unaligned edges */
4843 if (partial_begin || partial_end) { 4829 if (partial_begin || partial_end) {
4844 ret = ext4_alloc_file_blocks(file, 4830 ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4847 round_down(offset, 1 << blkbits)) >> blkbits, 4833 round_down(offset, 1 << blkbits)) >> blkbits,
4848 new_size, flags, mode); 4834 new_size, flags, mode);
4849 if (ret) 4835 if (ret)
4850 goto out_mutex; 4836 goto out_dio;
4851 4837
4852 } 4838 }
4853 4839
@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4856 flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | 4842 flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4857 EXT4_EX_NOCACHE); 4843 EXT4_EX_NOCACHE);
4858 4844
4859 /* Now release the pages and zero block aligned part of pages*/ 4845 /*
4846 * Prevent page faults from reinstantiating pages we have
4847 * released from page cache.
4848 */
4849 down_write(&EXT4_I(inode)->i_mmap_sem);
4850 ret = ext4_update_disksize_before_punch(inode, offset, len);
4851 if (ret) {
4852 up_write(&EXT4_I(inode)->i_mmap_sem);
4853 goto out_dio;
4854 }
4855 /* Now release the pages and zero block aligned part of pages */
4860 truncate_pagecache_range(inode, start, end - 1); 4856 truncate_pagecache_range(inode, start, end - 1);
4861 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4857 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4862 4858
4863 /* Wait all existing dio workers, newcomers will block on i_mutex */
4864 ext4_inode_block_unlocked_dio(inode);
4865 inode_dio_wait(inode);
4866
4867 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, 4859 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4868 flags, mode); 4860 flags, mode);
4861 up_write(&EXT4_I(inode)->i_mmap_sem);
4869 if (ret) 4862 if (ret)
4870 goto out_dio; 4863 goto out_dio;
4871 } 4864 }
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4998 goto out; 4991 goto out;
4999 } 4992 }
5000 4993
4994 /* Wait all existing dio workers, newcomers will block on i_mutex */
4995 ext4_inode_block_unlocked_dio(inode);
4996 inode_dio_wait(inode);
4997
5001 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, 4998 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
5002 flags, mode); 4999 flags, mode);
5000 ext4_inode_resume_unlocked_dio(inode);
5003 if (ret) 5001 if (ret)
5004 goto out; 5002 goto out;
5005 5003
@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5494 return ret; 5492 return ret;
5495 } 5493 }
5496 5494
5497 /*
5498 * Need to round down offset to be aligned with page size boundary
5499 * for page size > block size.
5500 */
5501 ioffset = round_down(offset, PAGE_SIZE);
5502
5503 /* Write out all dirty pages */
5504 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5505 LLONG_MAX);
5506 if (ret)
5507 return ret;
5508
5509 /* Take mutex lock */
5510 mutex_lock(&inode->i_mutex); 5495 mutex_lock(&inode->i_mutex);
5511
5512 /* 5496 /*
5513 * There is no need to overlap collapse range with EOF, in which case 5497 * There is no need to overlap collapse range with EOF, in which case
5514 * it is effectively a truncate operation 5498 * it is effectively a truncate operation
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5524 goto out_mutex; 5508 goto out_mutex;
5525 } 5509 }
5526 5510
5527 truncate_pagecache(inode, ioffset);
5528
5529 /* Wait for existing dio to complete */ 5511 /* Wait for existing dio to complete */
5530 ext4_inode_block_unlocked_dio(inode); 5512 ext4_inode_block_unlocked_dio(inode);
5531 inode_dio_wait(inode); 5513 inode_dio_wait(inode);
5532 5514
5515 /*
5516 * Prevent page faults from reinstantiating pages we have released from
5517 * page cache.
5518 */
5519 down_write(&EXT4_I(inode)->i_mmap_sem);
5520 /*
5521 * Need to round down offset to be aligned with page size boundary
5522 * for page size > block size.
5523 */
5524 ioffset = round_down(offset, PAGE_SIZE);
5525 /*
5526 * Write tail of the last page before removed range since it will get
5527 * removed from the page cache below.
5528 */
5529 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
5530 if (ret)
5531 goto out_mmap;
5532 /*
5533 * Write data that will be shifted to preserve them when discarding
5534 * page cache below. We are also protected from pages becoming dirty
5535 * by i_mmap_sem.
5536 */
5537 ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
5538 LLONG_MAX);
5539 if (ret)
5540 goto out_mmap;
5541 truncate_pagecache(inode, ioffset);
5542
5533 credits = ext4_writepage_trans_blocks(inode); 5543 credits = ext4_writepage_trans_blocks(inode);
5534 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 5544 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5535 if (IS_ERR(handle)) { 5545 if (IS_ERR(handle)) {
5536 ret = PTR_ERR(handle); 5546 ret = PTR_ERR(handle);
5537 goto out_dio; 5547 goto out_mmap;
5538 } 5548 }
5539 5549
5540 down_write(&EXT4_I(inode)->i_data_sem); 5550 down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,7 +5583,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5573 5583
5574out_stop: 5584out_stop:
5575 ext4_journal_stop(handle); 5585 ext4_journal_stop(handle);
5576out_dio: 5586out_mmap:
5587 up_write(&EXT4_I(inode)->i_mmap_sem);
5577 ext4_inode_resume_unlocked_dio(inode); 5588 ext4_inode_resume_unlocked_dio(inode);
5578out_mutex: 5589out_mutex:
5579 mutex_unlock(&inode->i_mutex); 5590 mutex_unlock(&inode->i_mutex);
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
5627 return ret; 5638 return ret;
5628 } 5639 }
5629 5640
5630 /*
5631 * Need to round down to align start offset to page size boundary
5632 * for page size > block size.
5633 */
5634 ioffset = round_down(offset, PAGE_SIZE);
5635
5636 /* Write out all dirty pages */
5637 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5638 LLONG_MAX);
5639 if (ret)
5640 return ret;
5641
5642 /* Take mutex lock */
5643 mutex_lock(&inode->i_mutex); 5641 mutex_lock(&inode->i_mutex);
5644
5645 /* Currently just for extent based files */ 5642 /* Currently just for extent based files */
5646 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 5643 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5647 ret = -EOPNOTSUPP; 5644 ret = -EOPNOTSUPP;
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
5660 goto out_mutex; 5657 goto out_mutex;
5661 } 5658 }
5662 5659
5663 truncate_pagecache(inode, ioffset);
5664
5665 /* Wait for existing dio to complete */ 5660 /* Wait for existing dio to complete */
5666 ext4_inode_block_unlocked_dio(inode); 5661 ext4_inode_block_unlocked_dio(inode);
5667 inode_dio_wait(inode); 5662 inode_dio_wait(inode);
5668 5663
5664 /*
5665 * Prevent page faults from reinstantiating pages we have released from
5666 * page cache.
5667 */
5668 down_write(&EXT4_I(inode)->i_mmap_sem);
5669 /*
5670 * Need to round down to align start offset to page size boundary
5671 * for page size > block size.
5672 */
5673 ioffset = round_down(offset, PAGE_SIZE);
5674 /* Write out all dirty pages */
5675 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5676 LLONG_MAX);
5677 if (ret)
5678 goto out_mmap;
5679 truncate_pagecache(inode, ioffset);
5680
5669 credits = ext4_writepage_trans_blocks(inode); 5681 credits = ext4_writepage_trans_blocks(inode);
5670 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 5682 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5671 if (IS_ERR(handle)) { 5683 if (IS_ERR(handle)) {
5672 ret = PTR_ERR(handle); 5684 ret = PTR_ERR(handle);
5673 goto out_dio; 5685 goto out_mmap;
5674 } 5686 }
5675 5687
5676 /* Expand file to avoid data loss if there is error while shifting */ 5688 /* Expand file to avoid data loss if there is error while shifting */
@@ -5741,7 +5753,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
5741 5753
5742out_stop: 5754out_stop:
5743 ext4_journal_stop(handle); 5755 ext4_journal_stop(handle);
5744out_dio: 5756out_mmap:
5757 up_write(&EXT4_I(inode)->i_mmap_sem);
5745 ext4_inode_resume_unlocked_dio(inode); 5758 ext4_inode_resume_unlocked_dio(inode);
5746out_mutex: 5759out_mutex:
5747 mutex_unlock(&inode->i_mutex); 5760 mutex_unlock(&inode->i_mutex);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 113837e7ba98..749b222e6498 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -193,43 +193,35 @@ out:
193} 193}
194 194
195#ifdef CONFIG_FS_DAX 195#ifdef CONFIG_FS_DAX
196static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
197{
198 struct inode *inode = bh->b_assoc_map->host;
199 /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
200 loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
201 int err;
202 if (!uptodate)
203 return;
204 WARN_ON(!buffer_unwritten(bh));
205 err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
206}
207
208static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 196static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
209{ 197{
210 int result; 198 int result;
211 handle_t *handle = NULL; 199 handle_t *handle = NULL;
212 struct super_block *sb = file_inode(vma->vm_file)->i_sb; 200 struct inode *inode = file_inode(vma->vm_file);
201 struct super_block *sb = inode->i_sb;
213 bool write = vmf->flags & FAULT_FLAG_WRITE; 202 bool write = vmf->flags & FAULT_FLAG_WRITE;
214 203
215 if (write) { 204 if (write) {
216 sb_start_pagefault(sb); 205 sb_start_pagefault(sb);
217 file_update_time(vma->vm_file); 206 file_update_time(vma->vm_file);
207 down_read(&EXT4_I(inode)->i_mmap_sem);
218 handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, 208 handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
219 EXT4_DATA_TRANS_BLOCKS(sb)); 209 EXT4_DATA_TRANS_BLOCKS(sb));
220 } 210 } else
211 down_read(&EXT4_I(inode)->i_mmap_sem);
221 212
222 if (IS_ERR(handle)) 213 if (IS_ERR(handle))
223 result = VM_FAULT_SIGBUS; 214 result = VM_FAULT_SIGBUS;
224 else 215 else
225 result = __dax_fault(vma, vmf, ext4_get_block_dax, 216 result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
226 ext4_end_io_unwritten);
227 217
228 if (write) { 218 if (write) {
229 if (!IS_ERR(handle)) 219 if (!IS_ERR(handle))
230 ext4_journal_stop(handle); 220 ext4_journal_stop(handle);
221 up_read(&EXT4_I(inode)->i_mmap_sem);
231 sb_end_pagefault(sb); 222 sb_end_pagefault(sb);
232 } 223 } else
224 up_read(&EXT4_I(inode)->i_mmap_sem);
233 225
234 return result; 226 return result;
235} 227}
@@ -246,44 +238,86 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
246 if (write) { 238 if (write) {
247 sb_start_pagefault(sb); 239 sb_start_pagefault(sb);
248 file_update_time(vma->vm_file); 240 file_update_time(vma->vm_file);
241 down_read(&EXT4_I(inode)->i_mmap_sem);
249 handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, 242 handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
250 ext4_chunk_trans_blocks(inode, 243 ext4_chunk_trans_blocks(inode,
251 PMD_SIZE / PAGE_SIZE)); 244 PMD_SIZE / PAGE_SIZE));
252 } 245 } else
246 down_read(&EXT4_I(inode)->i_mmap_sem);
253 247
254 if (IS_ERR(handle)) 248 if (IS_ERR(handle))
255 result = VM_FAULT_SIGBUS; 249 result = VM_FAULT_SIGBUS;
256 else 250 else
257 result = __dax_pmd_fault(vma, addr, pmd, flags, 251 result = __dax_pmd_fault(vma, addr, pmd, flags,
258 ext4_get_block_dax, ext4_end_io_unwritten); 252 ext4_dax_mmap_get_block, NULL);
259 253
260 if (write) { 254 if (write) {
261 if (!IS_ERR(handle)) 255 if (!IS_ERR(handle))
262 ext4_journal_stop(handle); 256 ext4_journal_stop(handle);
257 up_read(&EXT4_I(inode)->i_mmap_sem);
263 sb_end_pagefault(sb); 258 sb_end_pagefault(sb);
264 } 259 } else
260 up_read(&EXT4_I(inode)->i_mmap_sem);
265 261
266 return result; 262 return result;
267} 263}
268 264
269static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 265static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
270{ 266{
271 return dax_mkwrite(vma, vmf, ext4_get_block_dax, 267 int err;
272 ext4_end_io_unwritten); 268 struct inode *inode = file_inode(vma->vm_file);
269
270 sb_start_pagefault(inode->i_sb);
271 file_update_time(vma->vm_file);
272 down_read(&EXT4_I(inode)->i_mmap_sem);
273 err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
274 up_read(&EXT4_I(inode)->i_mmap_sem);
275 sb_end_pagefault(inode->i_sb);
276
277 return err;
278}
279
280/*
281 * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
282 * handler we check for races agaist truncate. Note that since we cycle through
283 * i_mmap_sem, we are sure that also any hole punching that began before we
284 * were called is finished by now and so if it included part of the file we
285 * are working on, our pte will get unmapped and the check for pte_same() in
286 * wp_pfn_shared() fails. Thus fault gets retried and things work out as
287 * desired.
288 */
289static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
290 struct vm_fault *vmf)
291{
292 struct inode *inode = file_inode(vma->vm_file);
293 struct super_block *sb = inode->i_sb;
294 int ret = VM_FAULT_NOPAGE;
295 loff_t size;
296
297 sb_start_pagefault(sb);
298 file_update_time(vma->vm_file);
299 down_read(&EXT4_I(inode)->i_mmap_sem);
300 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
301 if (vmf->pgoff >= size)
302 ret = VM_FAULT_SIGBUS;
303 up_read(&EXT4_I(inode)->i_mmap_sem);
304 sb_end_pagefault(sb);
305
306 return ret;
273} 307}
274 308
275static const struct vm_operations_struct ext4_dax_vm_ops = { 309static const struct vm_operations_struct ext4_dax_vm_ops = {
276 .fault = ext4_dax_fault, 310 .fault = ext4_dax_fault,
277 .pmd_fault = ext4_dax_pmd_fault, 311 .pmd_fault = ext4_dax_pmd_fault,
278 .page_mkwrite = ext4_dax_mkwrite, 312 .page_mkwrite = ext4_dax_mkwrite,
279 .pfn_mkwrite = dax_pfn_mkwrite, 313 .pfn_mkwrite = ext4_dax_pfn_mkwrite,
280}; 314};
281#else 315#else
282#define ext4_dax_vm_ops ext4_file_vm_ops 316#define ext4_dax_vm_ops ext4_file_vm_ops
283#endif 317#endif
284 318
285static const struct vm_operations_struct ext4_file_vm_ops = { 319static const struct vm_operations_struct ext4_file_vm_ops = {
286 .fault = filemap_fault, 320 .fault = ext4_filemap_fault,
287 .map_pages = filemap_map_pages, 321 .map_pages = filemap_map_pages,
288 .page_mkwrite = ext4_page_mkwrite, 322 .page_mkwrite = ext4_page_mkwrite,
289}; 323};
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 1b8024d26f65..3fcfd50a2e8a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -799,6 +799,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
799 inode->i_gid = dir->i_gid; 799 inode->i_gid = dir->i_gid;
800 } else 800 } else
801 inode_init_owner(inode, dir, mode); 801 inode_init_owner(inode, dir, mode);
802
803 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
804 ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
805 ei->i_projid = EXT4_I(dir)->i_projid;
806 else
807 ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
808
802 err = dquot_initialize(inode); 809 err = dquot_initialize(inode);
803 if (err) 810 if (err)
804 goto out; 811 goto out;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d884989cc83d..dfe3b9bafc0d 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
995 */ 995 */
996static int ext4_add_dirent_to_inline(handle_t *handle, 996static int ext4_add_dirent_to_inline(handle_t *handle,
997 struct ext4_filename *fname, 997 struct ext4_filename *fname,
998 struct dentry *dentry, 998 struct inode *dir,
999 struct inode *inode, 999 struct inode *inode,
1000 struct ext4_iloc *iloc, 1000 struct ext4_iloc *iloc,
1001 void *inline_start, int inline_size) 1001 void *inline_start, int inline_size)
1002{ 1002{
1003 struct inode *dir = d_inode(dentry->d_parent);
1004 int err; 1003 int err;
1005 struct ext4_dir_entry_2 *de; 1004 struct ext4_dir_entry_2 *de;
1006 1005
@@ -1245,12 +1244,11 @@ out:
1245 * the new created block. 1244 * the new created block.
1246 */ 1245 */
1247int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, 1246int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
1248 struct dentry *dentry, struct inode *inode) 1247 struct inode *dir, struct inode *inode)
1249{ 1248{
1250 int ret, inline_size; 1249 int ret, inline_size;
1251 void *inline_start; 1250 void *inline_start;
1252 struct ext4_iloc iloc; 1251 struct ext4_iloc iloc;
1253 struct inode *dir = d_inode(dentry->d_parent);
1254 1252
1255 ret = ext4_get_inode_loc(dir, &iloc); 1253 ret = ext4_get_inode_loc(dir, &iloc);
1256 if (ret) 1254 if (ret)
@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
1264 EXT4_INLINE_DOTDOT_SIZE; 1262 EXT4_INLINE_DOTDOT_SIZE;
1265 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1263 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
1266 1264
1267 ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc, 1265 ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
1268 inline_start, inline_size); 1266 inline_start, inline_size);
1269 if (ret != -ENOSPC) 1267 if (ret != -ENOSPC)
1270 goto out; 1268 goto out;
@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
1285 if (inline_size) { 1283 if (inline_size) {
1286 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1284 inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
1287 1285
1288 ret = ext4_add_dirent_to_inline(handle, fname, dentry, 1286 ret = ext4_add_dirent_to_inline(handle, fname, dir,
1289 inode, &iloc, inline_start, 1287 inode, &iloc, inline_start,
1290 inline_size); 1288 inline_size);
1291 1289
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b3bd912df6bf..d964195ea0e2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -383,6 +383,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
383 return 0; 383 return 0;
384} 384}
385 385
386int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
387 ext4_lblk_t len)
388{
389 int ret;
390
391 if (ext4_encrypted_inode(inode))
392 return ext4_encrypted_zeroout(inode, lblk, pblk, len);
393
394 ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
395 if (ret > 0)
396 ret = 0;
397
398 return ret;
399}
400
386#define check_block_validity(inode, map) \ 401#define check_block_validity(inode, map) \
387 __check_block_validity((inode), __func__, __LINE__, (map)) 402 __check_block_validity((inode), __func__, __LINE__, (map))
388 403
@@ -403,8 +418,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
403 * out taking i_data_sem. So at the time the unwritten extent 418 * out taking i_data_sem. So at the time the unwritten extent
404 * could be converted. 419 * could be converted.
405 */ 420 */
406 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 421 down_read(&EXT4_I(inode)->i_data_sem);
407 down_read(&EXT4_I(inode)->i_data_sem);
408 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 422 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
409 retval = ext4_ext_map_blocks(handle, inode, map, flags & 423 retval = ext4_ext_map_blocks(handle, inode, map, flags &
410 EXT4_GET_BLOCKS_KEEP_SIZE); 424 EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -412,8 +426,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
412 retval = ext4_ind_map_blocks(handle, inode, map, flags & 426 retval = ext4_ind_map_blocks(handle, inode, map, flags &
413 EXT4_GET_BLOCKS_KEEP_SIZE); 427 EXT4_GET_BLOCKS_KEEP_SIZE);
414 } 428 }
415 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 429 up_read((&EXT4_I(inode)->i_data_sem));
416 up_read((&EXT4_I(inode)->i_data_sem));
417 430
418 /* 431 /*
419 * We don't check m_len because extent will be collpased in status 432 * We don't check m_len because extent will be collpased in status
@@ -509,8 +522,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
509 * Try to see if we can get the block without requesting a new 522 * Try to see if we can get the block without requesting a new
510 * file system block. 523 * file system block.
511 */ 524 */
512 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 525 down_read(&EXT4_I(inode)->i_data_sem);
513 down_read(&EXT4_I(inode)->i_data_sem);
514 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 526 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
515 retval = ext4_ext_map_blocks(handle, inode, map, flags & 527 retval = ext4_ext_map_blocks(handle, inode, map, flags &
516 EXT4_GET_BLOCKS_KEEP_SIZE); 528 EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -541,8 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
541 if (ret < 0) 553 if (ret < 0)
542 retval = ret; 554 retval = ret;
543 } 555 }
544 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 556 up_read((&EXT4_I(inode)->i_data_sem));
545 up_read((&EXT4_I(inode)->i_data_sem));
546 557
547found: 558found:
548 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 559 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -626,13 +637,29 @@ found:
626 } 637 }
627 638
628 /* 639 /*
640 * We have to zeroout blocks before inserting them into extent
641 * status tree. Otherwise someone could look them up there and
642 * use them before they are really zeroed.
643 */
644 if (flags & EXT4_GET_BLOCKS_ZERO &&
645 map->m_flags & EXT4_MAP_MAPPED &&
646 map->m_flags & EXT4_MAP_NEW) {
647 ret = ext4_issue_zeroout(inode, map->m_lblk,
648 map->m_pblk, map->m_len);
649 if (ret) {
650 retval = ret;
651 goto out_sem;
652 }
653 }
654
655 /*
629 * If the extent has been zeroed out, we don't need to update 656 * If the extent has been zeroed out, we don't need to update
630 * extent status tree. 657 * extent status tree.
631 */ 658 */
632 if ((flags & EXT4_GET_BLOCKS_PRE_IO) && 659 if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
633 ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 660 ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
634 if (ext4_es_is_written(&es)) 661 if (ext4_es_is_written(&es))
635 goto has_zeroout; 662 goto out_sem;
636 } 663 }
637 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 664 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
638 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 665 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -643,11 +670,13 @@ found:
643 status |= EXTENT_STATUS_DELAYED; 670 status |= EXTENT_STATUS_DELAYED;
644 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 671 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
645 map->m_pblk, status); 672 map->m_pblk, status);
646 if (ret < 0) 673 if (ret < 0) {
647 retval = ret; 674 retval = ret;
675 goto out_sem;
676 }
648 } 677 }
649 678
650has_zeroout: 679out_sem:
651 up_write((&EXT4_I(inode)->i_data_sem)); 680 up_write((&EXT4_I(inode)->i_data_sem));
652 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 681 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
653 ret = check_block_validity(inode, map); 682 ret = check_block_validity(inode, map);
@@ -674,7 +703,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
674 map.m_lblk = iblock; 703 map.m_lblk = iblock;
675 map.m_len = bh->b_size >> inode->i_blkbits; 704 map.m_len = bh->b_size >> inode->i_blkbits;
676 705
677 if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) { 706 if (flags && !handle) {
678 /* Direct IO write... */ 707 /* Direct IO write... */
679 if (map.m_len > DIO_MAX_BLOCKS) 708 if (map.m_len > DIO_MAX_BLOCKS)
680 map.m_len = DIO_MAX_BLOCKS; 709 map.m_len = DIO_MAX_BLOCKS;
@@ -694,16 +723,6 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
694 723
695 map_bh(bh, inode->i_sb, map.m_pblk); 724 map_bh(bh, inode->i_sb, map.m_pblk);
696 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 725 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
697 if (IS_DAX(inode) && buffer_unwritten(bh)) {
698 /*
699 * dgc: I suspect unwritten conversion on ext4+DAX is
700 * fundamentally broken here when there are concurrent
701 * read/write in progress on this inode.
702 */
703 WARN_ON_ONCE(io_end);
704 bh->b_assoc_map = inode->i_mapping;
705 bh->b_private = (void *)(unsigned long)iblock;
706 }
707 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) 726 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
708 set_buffer_defer_completion(bh); 727 set_buffer_defer_completion(bh);
709 bh->b_size = inode->i_sb->s_blocksize * map.m_len; 728 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -879,9 +898,6 @@ int do_journal_get_write_access(handle_t *handle,
879 return ret; 898 return ret;
880} 899}
881 900
882static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
883 struct buffer_head *bh_result, int create);
884
885#ifdef CONFIG_EXT4_FS_ENCRYPTION 901#ifdef CONFIG_EXT4_FS_ENCRYPTION
886static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, 902static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
887 get_block_t *get_block) 903 get_block_t *get_block)
@@ -3054,25 +3070,96 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock,
3054 EXT4_GET_BLOCKS_IO_CREATE_EXT); 3070 EXT4_GET_BLOCKS_IO_CREATE_EXT);
3055} 3071}
3056 3072
3057static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, 3073static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
3058 struct buffer_head *bh_result, int create) 3074 struct buffer_head *bh_result, int create)
3059{ 3075{
3060 ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n", 3076 int ret;
3077
3078 ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
3061 inode->i_ino, create); 3079 inode->i_ino, create);
3062 return _ext4_get_block(inode, iblock, bh_result, 3080 ret = _ext4_get_block(inode, iblock, bh_result, 0);
3063 EXT4_GET_BLOCKS_NO_LOCK); 3081 /*
3082 * Blocks should have been preallocated! ext4_file_write_iter() checks
3083 * that.
3084 */
3085 WARN_ON_ONCE(!buffer_mapped(bh_result));
3086
3087 return ret;
3064} 3088}
3065 3089
3066int ext4_get_block_dax(struct inode *inode, sector_t iblock, 3090#ifdef CONFIG_FS_DAX
3067 struct buffer_head *bh_result, int create) 3091int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
3092 struct buffer_head *bh_result, int create)
3068{ 3093{
3069 int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT; 3094 int ret, err;
3070 if (create) 3095 int credits;
3071 flags |= EXT4_GET_BLOCKS_CREATE; 3096 struct ext4_map_blocks map;
3072 ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n", 3097 handle_t *handle = NULL;
3098 int flags = 0;
3099
3100 ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
3073 inode->i_ino, create); 3101 inode->i_ino, create);
3074 return _ext4_get_block(inode, iblock, bh_result, flags); 3102 map.m_lblk = iblock;
3103 map.m_len = bh_result->b_size >> inode->i_blkbits;
3104 credits = ext4_chunk_trans_blocks(inode, map.m_len);
3105 if (create) {
3106 flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
3107 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
3108 if (IS_ERR(handle)) {
3109 ret = PTR_ERR(handle);
3110 return ret;
3111 }
3112 }
3113
3114 ret = ext4_map_blocks(handle, inode, &map, flags);
3115 if (create) {
3116 err = ext4_journal_stop(handle);
3117 if (ret >= 0 && err < 0)
3118 ret = err;
3119 }
3120 if (ret <= 0)
3121 goto out;
3122 if (map.m_flags & EXT4_MAP_UNWRITTEN) {
3123 int err2;
3124
3125 /*
3126 * We are protected by i_mmap_sem so we know block cannot go
3127 * away from under us even though we dropped i_data_sem.
3128 * Convert extent to written and write zeros there.
3129 *
3130 * Note: We may get here even when create == 0.
3131 */
3132 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
3133 if (IS_ERR(handle)) {
3134 ret = PTR_ERR(handle);
3135 goto out;
3136 }
3137
3138 err = ext4_map_blocks(handle, inode, &map,
3139 EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
3140 if (err < 0)
3141 ret = err;
3142 err2 = ext4_journal_stop(handle);
3143 if (err2 < 0 && ret > 0)
3144 ret = err2;
3145 }
3146out:
3147 WARN_ON_ONCE(ret == 0 && create);
3148 if (ret > 0) {
3149 map_bh(bh_result, inode->i_sb, map.m_pblk);
3150 bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
3151 map.m_flags;
3152 /*
3153 * At least for now we have to clear BH_New so that DAX code
3154 * doesn't attempt to zero blocks again in a racy way.
3155 */
3156 bh_result->b_state &= ~(1 << BH_New);
3157 bh_result->b_size = map.m_len << inode->i_blkbits;
3158 ret = 0;
3159 }
3160 return ret;
3075} 3161}
3162#endif
3076 3163
3077static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3164static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3078 ssize_t size, void *private) 3165 ssize_t size, void *private)
@@ -3143,10 +3230,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3143 /* If we do a overwrite dio, i_mutex locking can be released */ 3230 /* If we do a overwrite dio, i_mutex locking can be released */
3144 overwrite = *((int *)iocb->private); 3231 overwrite = *((int *)iocb->private);
3145 3232
3146 if (overwrite) { 3233 if (overwrite)
3147 down_read(&EXT4_I(inode)->i_data_sem);
3148 mutex_unlock(&inode->i_mutex); 3234 mutex_unlock(&inode->i_mutex);
3149 }
3150 3235
3151 /* 3236 /*
3152 * We could direct write to holes and fallocate. 3237 * We could direct write to holes and fallocate.
@@ -3189,7 +3274,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3189 } 3274 }
3190 3275
3191 if (overwrite) { 3276 if (overwrite) {
3192 get_block_func = ext4_get_block_write_nolock; 3277 get_block_func = ext4_get_block_overwrite;
3193 } else { 3278 } else {
3194 get_block_func = ext4_get_block_write; 3279 get_block_func = ext4_get_block_write;
3195 dio_flags = DIO_LOCKING; 3280 dio_flags = DIO_LOCKING;
@@ -3245,10 +3330,8 @@ retake_lock:
3245 if (iov_iter_rw(iter) == WRITE) 3330 if (iov_iter_rw(iter) == WRITE)
3246 inode_dio_end(inode); 3331 inode_dio_end(inode);
3247 /* take i_mutex locking again if we do a ovewrite dio */ 3332 /* take i_mutex locking again if we do a ovewrite dio */
3248 if (overwrite) { 3333 if (overwrite)
3249 up_read(&EXT4_I(inode)->i_data_sem);
3250 mutex_lock(&inode->i_mutex); 3334 mutex_lock(&inode->i_mutex);
3251 }
3252 3335
3253 return ret; 3336 return ret;
3254} 3337}
@@ -3559,6 +3642,35 @@ int ext4_can_truncate(struct inode *inode)
3559} 3642}
3560 3643
3561/* 3644/*
3645 * We have to make sure i_disksize gets properly updated before we truncate
3646 * page cache due to hole punching or zero range. Otherwise i_disksize update
3647 * can get lost as it may have been postponed to submission of writeback but
3648 * that will never happen after we truncate page cache.
3649 */
3650int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
3651 loff_t len)
3652{
3653 handle_t *handle;
3654 loff_t size = i_size_read(inode);
3655
3656 WARN_ON(!mutex_is_locked(&inode->i_mutex));
3657 if (offset > size || offset + len < size)
3658 return 0;
3659
3660 if (EXT4_I(inode)->i_disksize >= size)
3661 return 0;
3662
3663 handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
3664 if (IS_ERR(handle))
3665 return PTR_ERR(handle);
3666 ext4_update_i_disksize(inode, size);
3667 ext4_mark_inode_dirty(handle, inode);
3668 ext4_journal_stop(handle);
3669
3670 return 0;
3671}
3672
3673/*
3562 * ext4_punch_hole: punches a hole in a file by releaseing the blocks 3674 * ext4_punch_hole: punches a hole in a file by releaseing the blocks
3563 * associated with the given offset and length 3675 * associated with the given offset and length
3564 * 3676 *
@@ -3623,17 +3735,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3623 3735
3624 } 3736 }
3625 3737
3738 /* Wait all existing dio workers, newcomers will block on i_mutex */
3739 ext4_inode_block_unlocked_dio(inode);
3740 inode_dio_wait(inode);
3741
3742 /*
3743 * Prevent page faults from reinstantiating pages we have released from
3744 * page cache.
3745 */
3746 down_write(&EXT4_I(inode)->i_mmap_sem);
3626 first_block_offset = round_up(offset, sb->s_blocksize); 3747 first_block_offset = round_up(offset, sb->s_blocksize);
3627 last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; 3748 last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
3628 3749
3629 /* Now release the pages and zero block aligned part of pages*/ 3750 /* Now release the pages and zero block aligned part of pages*/
3630 if (last_block_offset > first_block_offset) 3751 if (last_block_offset > first_block_offset) {
3752 ret = ext4_update_disksize_before_punch(inode, offset, length);
3753 if (ret)
3754 goto out_dio;
3631 truncate_pagecache_range(inode, first_block_offset, 3755 truncate_pagecache_range(inode, first_block_offset,
3632 last_block_offset); 3756 last_block_offset);
3633 3757 }
3634 /* Wait all existing dio workers, newcomers will block on i_mutex */
3635 ext4_inode_block_unlocked_dio(inode);
3636 inode_dio_wait(inode);
3637 3758
3638 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3759 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3639 credits = ext4_writepage_trans_blocks(inode); 3760 credits = ext4_writepage_trans_blocks(inode);
@@ -3680,16 +3801,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3680 if (IS_SYNC(inode)) 3801 if (IS_SYNC(inode))
3681 ext4_handle_sync(handle); 3802 ext4_handle_sync(handle);
3682 3803
3683 /* Now release the pages again to reduce race window */
3684 if (last_block_offset > first_block_offset)
3685 truncate_pagecache_range(inode, first_block_offset,
3686 last_block_offset);
3687
3688 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3804 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3689 ext4_mark_inode_dirty(handle, inode); 3805 ext4_mark_inode_dirty(handle, inode);
3690out_stop: 3806out_stop:
3691 ext4_journal_stop(handle); 3807 ext4_journal_stop(handle);
3692out_dio: 3808out_dio:
3809 up_write(&EXT4_I(inode)->i_mmap_sem);
3693 ext4_inode_resume_unlocked_dio(inode); 3810 ext4_inode_resume_unlocked_dio(inode);
3694out_mutex: 3811out_mutex:
3695 mutex_unlock(&inode->i_mutex); 3812 mutex_unlock(&inode->i_mutex);
@@ -4076,6 +4193,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
4076 EXT4_I(inode)->i_inline_off = 0; 4193 EXT4_I(inode)->i_inline_off = 0;
4077} 4194}
4078 4195
4196int ext4_get_projid(struct inode *inode, kprojid_t *projid)
4197{
4198 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
4199 return -EOPNOTSUPP;
4200 *projid = EXT4_I(inode)->i_projid;
4201 return 0;
4202}
4203
4079struct inode *ext4_iget(struct super_block *sb, unsigned long ino) 4204struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4080{ 4205{
4081 struct ext4_iloc iloc; 4206 struct ext4_iloc iloc;
@@ -4087,6 +4212,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4087 int block; 4212 int block;
4088 uid_t i_uid; 4213 uid_t i_uid;
4089 gid_t i_gid; 4214 gid_t i_gid;
4215 projid_t i_projid;
4090 4216
4091 inode = iget_locked(sb, ino); 4217 inode = iget_locked(sb, ino);
4092 if (!inode) 4218 if (!inode)
@@ -4136,12 +4262,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4136 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 4262 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4137 i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 4263 i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
4138 i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); 4264 i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
4265 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
4266 EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
4267 EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
4268 i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
4269 else
4270 i_projid = EXT4_DEF_PROJID;
4271
4139 if (!(test_opt(inode->i_sb, NO_UID32))) { 4272 if (!(test_opt(inode->i_sb, NO_UID32))) {
4140 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 4273 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
4141 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 4274 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
4142 } 4275 }
4143 i_uid_write(inode, i_uid); 4276 i_uid_write(inode, i_uid);
4144 i_gid_write(inode, i_gid); 4277 i_gid_write(inode, i_gid);
4278 ei->i_projid = make_kprojid(&init_user_ns, i_projid);
4145 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 4279 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
4146 4280
4147 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 4281 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
@@ -4440,6 +4574,7 @@ static int ext4_do_update_inode(handle_t *handle,
4440 int need_datasync = 0, set_large_file = 0; 4574 int need_datasync = 0, set_large_file = 0;
4441 uid_t i_uid; 4575 uid_t i_uid;
4442 gid_t i_gid; 4576 gid_t i_gid;
4577 projid_t i_projid;
4443 4578
4444 spin_lock(&ei->i_raw_lock); 4579 spin_lock(&ei->i_raw_lock);
4445 4580
@@ -4452,6 +4587,7 @@ static int ext4_do_update_inode(handle_t *handle,
4452 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 4587 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
4453 i_uid = i_uid_read(inode); 4588 i_uid = i_uid_read(inode);
4454 i_gid = i_gid_read(inode); 4589 i_gid = i_gid_read(inode);
4590 i_projid = from_kprojid(&init_user_ns, ei->i_projid);
4455 if (!(test_opt(inode->i_sb, NO_UID32))) { 4591 if (!(test_opt(inode->i_sb, NO_UID32))) {
4456 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); 4592 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
4457 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); 4593 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4529,6 +4665,15 @@ static int ext4_do_update_inode(handle_t *handle,
4529 cpu_to_le16(ei->i_extra_isize); 4665 cpu_to_le16(ei->i_extra_isize);
4530 } 4666 }
4531 } 4667 }
4668
4669 BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
4670 EXT4_FEATURE_RO_COMPAT_PROJECT) &&
4671 i_projid != EXT4_DEF_PROJID);
4672
4673 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
4674 EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
4675 raw_inode->i_projid = cpu_to_le32(i_projid);
4676
4532 ext4_inode_csum_set(inode, raw_inode, ei); 4677 ext4_inode_csum_set(inode, raw_inode, ei);
4533 spin_unlock(&ei->i_raw_lock); 4678 spin_unlock(&ei->i_raw_lock);
4534 if (inode->i_sb->s_flags & MS_LAZYTIME) 4679 if (inode->i_sb->s_flags & MS_LAZYTIME)
@@ -4824,6 +4969,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4824 } else 4969 } else
4825 ext4_wait_for_tail_page_commit(inode); 4970 ext4_wait_for_tail_page_commit(inode);
4826 } 4971 }
4972 down_write(&EXT4_I(inode)->i_mmap_sem);
4827 /* 4973 /*
4828 * Truncate pagecache after we've waited for commit 4974 * Truncate pagecache after we've waited for commit
4829 * in data=journal mode to make pages freeable. 4975 * in data=journal mode to make pages freeable.
@@ -4831,6 +4977,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4831 truncate_pagecache(inode, inode->i_size); 4977 truncate_pagecache(inode, inode->i_size);
4832 if (shrink) 4978 if (shrink)
4833 ext4_truncate(inode); 4979 ext4_truncate(inode);
4980 up_write(&EXT4_I(inode)->i_mmap_sem);
4834 } 4981 }
4835 4982
4836 if (!rc) { 4983 if (!rc) {
@@ -5279,6 +5426,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5279 5426
5280 sb_start_pagefault(inode->i_sb); 5427 sb_start_pagefault(inode->i_sb);
5281 file_update_time(vma->vm_file); 5428 file_update_time(vma->vm_file);
5429
5430 down_read(&EXT4_I(inode)->i_mmap_sem);
5282 /* Delalloc case is easy... */ 5431 /* Delalloc case is easy... */
5283 if (test_opt(inode->i_sb, DELALLOC) && 5432 if (test_opt(inode->i_sb, DELALLOC) &&
5284 !ext4_should_journal_data(inode) && 5433 !ext4_should_journal_data(inode) &&
@@ -5348,6 +5497,19 @@ retry_alloc:
5348out_ret: 5497out_ret:
5349 ret = block_page_mkwrite_return(ret); 5498 ret = block_page_mkwrite_return(ret);
5350out: 5499out:
5500 up_read(&EXT4_I(inode)->i_mmap_sem);
5351 sb_end_pagefault(inode->i_sb); 5501 sb_end_pagefault(inode->i_sb);
5352 return ret; 5502 return ret;
5353} 5503}
5504
5505int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
5506{
5507 struct inode *inode = file_inode(vma->vm_file);
5508 int err;
5509
5510 down_read(&EXT4_I(inode)->i_mmap_sem);
5511 err = filemap_fault(vma, vmf);
5512 up_read(&EXT4_I(inode)->i_mmap_sem);
5513
5514 return err;
5515}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5e872fd40e5e..2b0cb84255eb 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -14,6 +14,7 @@
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/file.h> 15#include <linux/file.h>
16#include <linux/random.h> 16#include <linux/random.h>
17#include <linux/quotaops.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
19#include "ext4.h" 20#include "ext4.h"
@@ -202,6 +203,238 @@ static int uuid_is_zero(__u8 u[16])
202 return 1; 203 return 1;
203} 204}
204 205
206static int ext4_ioctl_setflags(struct inode *inode,
207 unsigned int flags)
208{
209 struct ext4_inode_info *ei = EXT4_I(inode);
210 handle_t *handle = NULL;
211 int err = EPERM, migrate = 0;
212 struct ext4_iloc iloc;
213 unsigned int oldflags, mask, i;
214 unsigned int jflag;
215
216 /* Is it quota file? Do not allow user to mess with it */
217 if (IS_NOQUOTA(inode))
218 goto flags_out;
219
220 oldflags = ei->i_flags;
221
222 /* The JOURNAL_DATA flag is modifiable only by root */
223 jflag = flags & EXT4_JOURNAL_DATA_FL;
224
225 /*
226 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
227 * the relevant capability.
228 *
229 * This test looks nicer. Thanks to Pauline Middelink
230 */
231 if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
232 if (!capable(CAP_LINUX_IMMUTABLE))
233 goto flags_out;
234 }
235
236 /*
237 * The JOURNAL_DATA flag can only be changed by
238 * the relevant capability.
239 */
240 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
241 if (!capable(CAP_SYS_RESOURCE))
242 goto flags_out;
243 }
244 if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
245 migrate = 1;
246
247 if (flags & EXT4_EOFBLOCKS_FL) {
248 /* we don't support adding EOFBLOCKS flag */
249 if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
250 err = -EOPNOTSUPP;
251 goto flags_out;
252 }
253 } else if (oldflags & EXT4_EOFBLOCKS_FL)
254 ext4_truncate(inode);
255
256 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
257 if (IS_ERR(handle)) {
258 err = PTR_ERR(handle);
259 goto flags_out;
260 }
261 if (IS_SYNC(inode))
262 ext4_handle_sync(handle);
263 err = ext4_reserve_inode_write(handle, inode, &iloc);
264 if (err)
265 goto flags_err;
266
267 for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
268 if (!(mask & EXT4_FL_USER_MODIFIABLE))
269 continue;
270 if (mask & flags)
271 ext4_set_inode_flag(inode, i);
272 else
273 ext4_clear_inode_flag(inode, i);
274 }
275
276 ext4_set_inode_flags(inode);
277 inode->i_ctime = ext4_current_time(inode);
278
279 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
280flags_err:
281 ext4_journal_stop(handle);
282 if (err)
283 goto flags_out;
284
285 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
286 err = ext4_change_inode_journal_flag(inode, jflag);
287 if (err)
288 goto flags_out;
289 if (migrate) {
290 if (flags & EXT4_EXTENTS_FL)
291 err = ext4_ext_migrate(inode);
292 else
293 err = ext4_ind_migrate(inode);
294 }
295
296flags_out:
297 return err;
298}
299
300#ifdef CONFIG_QUOTA
301static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
302{
303 struct inode *inode = file_inode(filp);
304 struct super_block *sb = inode->i_sb;
305 struct ext4_inode_info *ei = EXT4_I(inode);
306 int err, rc;
307 handle_t *handle;
308 kprojid_t kprojid;
309 struct ext4_iloc iloc;
310 struct ext4_inode *raw_inode;
311
312 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
313 EXT4_FEATURE_RO_COMPAT_PROJECT)) {
314 if (projid != EXT4_DEF_PROJID)
315 return -EOPNOTSUPP;
316 else
317 return 0;
318 }
319
320 if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
321 return -EOPNOTSUPP;
322
323 kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
324
325 if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
326 return 0;
327
328 err = mnt_want_write_file(filp);
329 if (err)
330 return err;
331
332 err = -EPERM;
333 mutex_lock(&inode->i_mutex);
334 /* Is it quota file? Do not allow user to mess with it */
335 if (IS_NOQUOTA(inode))
336 goto out_unlock;
337
338 err = ext4_get_inode_loc(inode, &iloc);
339 if (err)
340 goto out_unlock;
341
342 raw_inode = ext4_raw_inode(&iloc);
343 if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
344 err = -EOVERFLOW;
345 brelse(iloc.bh);
346 goto out_unlock;
347 }
348 brelse(iloc.bh);
349
350 dquot_initialize(inode);
351
352 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
353 EXT4_QUOTA_INIT_BLOCKS(sb) +
354 EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
355 if (IS_ERR(handle)) {
356 err = PTR_ERR(handle);
357 goto out_unlock;
358 }
359
360 err = ext4_reserve_inode_write(handle, inode, &iloc);
361 if (err)
362 goto out_stop;
363
364 if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
365 struct dquot *transfer_to[MAXQUOTAS] = { };
366
367 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
368 if (transfer_to[PRJQUOTA]) {
369 err = __dquot_transfer(inode, transfer_to);
370 dqput(transfer_to[PRJQUOTA]);
371 if (err)
372 goto out_dirty;
373 }
374 }
375 EXT4_I(inode)->i_projid = kprojid;
376 inode->i_ctime = ext4_current_time(inode);
377out_dirty:
378 rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
379 if (!err)
380 err = rc;
381out_stop:
382 ext4_journal_stop(handle);
383out_unlock:
384 mutex_unlock(&inode->i_mutex);
385 mnt_drop_write_file(filp);
386 return err;
387}
388#else
389static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
390{
391 if (projid != EXT4_DEF_PROJID)
392 return -EOPNOTSUPP;
393 return 0;
394}
395#endif
396
397/* Transfer internal flags to xflags */
398static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
399{
400 __u32 xflags = 0;
401
402 if (iflags & EXT4_SYNC_FL)
403 xflags |= FS_XFLAG_SYNC;
404 if (iflags & EXT4_IMMUTABLE_FL)
405 xflags |= FS_XFLAG_IMMUTABLE;
406 if (iflags & EXT4_APPEND_FL)
407 xflags |= FS_XFLAG_APPEND;
408 if (iflags & EXT4_NODUMP_FL)
409 xflags |= FS_XFLAG_NODUMP;
410 if (iflags & EXT4_NOATIME_FL)
411 xflags |= FS_XFLAG_NOATIME;
412 if (iflags & EXT4_PROJINHERIT_FL)
413 xflags |= FS_XFLAG_PROJINHERIT;
414 return xflags;
415}
416
417/* Transfer xflags flags to internal */
418static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
419{
420 unsigned long iflags = 0;
421
422 if (xflags & FS_XFLAG_SYNC)
423 iflags |= EXT4_SYNC_FL;
424 if (xflags & FS_XFLAG_IMMUTABLE)
425 iflags |= EXT4_IMMUTABLE_FL;
426 if (xflags & FS_XFLAG_APPEND)
427 iflags |= EXT4_APPEND_FL;
428 if (xflags & FS_XFLAG_NODUMP)
429 iflags |= EXT4_NODUMP_FL;
430 if (xflags & FS_XFLAG_NOATIME)
431 iflags |= EXT4_NOATIME_FL;
432 if (xflags & FS_XFLAG_PROJINHERIT)
433 iflags |= EXT4_PROJINHERIT_FL;
434
435 return iflags;
436}
437
205long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 438long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
206{ 439{
207 struct inode *inode = file_inode(filp); 440 struct inode *inode = file_inode(filp);
@@ -217,11 +450,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
217 flags = ei->i_flags & EXT4_FL_USER_VISIBLE; 450 flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
218 return put_user(flags, (int __user *) arg); 451 return put_user(flags, (int __user *) arg);
219 case EXT4_IOC_SETFLAGS: { 452 case EXT4_IOC_SETFLAGS: {
220 handle_t *handle = NULL; 453 int err;
221 int err, migrate = 0;
222 struct ext4_iloc iloc;
223 unsigned int oldflags, mask, i;
224 unsigned int jflag;
225 454
226 if (!inode_owner_or_capable(inode)) 455 if (!inode_owner_or_capable(inode))
227 return -EACCES; 456 return -EACCES;
@@ -235,89 +464,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
235 464
236 flags = ext4_mask_flags(inode->i_mode, flags); 465 flags = ext4_mask_flags(inode->i_mode, flags);
237 466
238 err = -EPERM;
239 mutex_lock(&inode->i_mutex); 467 mutex_lock(&inode->i_mutex);
240 /* Is it quota file? Do not allow user to mess with it */ 468 err = ext4_ioctl_setflags(inode, flags);
241 if (IS_NOQUOTA(inode))
242 goto flags_out;
243
244 oldflags = ei->i_flags;
245
246 /* The JOURNAL_DATA flag is modifiable only by root */
247 jflag = flags & EXT4_JOURNAL_DATA_FL;
248
249 /*
250 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
251 * the relevant capability.
252 *
253 * This test looks nicer. Thanks to Pauline Middelink
254 */
255 if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
256 if (!capable(CAP_LINUX_IMMUTABLE))
257 goto flags_out;
258 }
259
260 /*
261 * The JOURNAL_DATA flag can only be changed by
262 * the relevant capability.
263 */
264 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
265 if (!capable(CAP_SYS_RESOURCE))
266 goto flags_out;
267 }
268 if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
269 migrate = 1;
270
271 if (flags & EXT4_EOFBLOCKS_FL) {
272 /* we don't support adding EOFBLOCKS flag */
273 if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
274 err = -EOPNOTSUPP;
275 goto flags_out;
276 }
277 } else if (oldflags & EXT4_EOFBLOCKS_FL)
278 ext4_truncate(inode);
279
280 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
281 if (IS_ERR(handle)) {
282 err = PTR_ERR(handle);
283 goto flags_out;
284 }
285 if (IS_SYNC(inode))
286 ext4_handle_sync(handle);
287 err = ext4_reserve_inode_write(handle, inode, &iloc);
288 if (err)
289 goto flags_err;
290
291 for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
292 if (!(mask & EXT4_FL_USER_MODIFIABLE))
293 continue;
294 if (mask & flags)
295 ext4_set_inode_flag(inode, i);
296 else
297 ext4_clear_inode_flag(inode, i);
298 }
299
300 ext4_set_inode_flags(inode);
301 inode->i_ctime = ext4_current_time(inode);
302
303 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
304flags_err:
305 ext4_journal_stop(handle);
306 if (err)
307 goto flags_out;
308
309 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
310 err = ext4_change_inode_journal_flag(inode, jflag);
311 if (err)
312 goto flags_out;
313 if (migrate) {
314 if (flags & EXT4_EXTENTS_FL)
315 err = ext4_ext_migrate(inode);
316 else
317 err = ext4_ind_migrate(inode);
318 }
319
320flags_out:
321 mutex_unlock(&inode->i_mutex); 469 mutex_unlock(&inode->i_mutex);
322 mnt_drop_write_file(filp); 470 mnt_drop_write_file(filp);
323 return err; 471 return err;
@@ -689,6 +837,60 @@ encryption_policy_out:
689 return -EOPNOTSUPP; 837 return -EOPNOTSUPP;
690#endif 838#endif
691 } 839 }
840 case EXT4_IOC_FSGETXATTR:
841 {
842 struct fsxattr fa;
843
844 memset(&fa, 0, sizeof(struct fsxattr));
845 ext4_get_inode_flags(ei);
846 fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
847
848 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
849 EXT4_FEATURE_RO_COMPAT_PROJECT)) {
850 fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
851 EXT4_I(inode)->i_projid);
852 }
853
854 if (copy_to_user((struct fsxattr __user *)arg,
855 &fa, sizeof(fa)))
856 return -EFAULT;
857 return 0;
858 }
859 case EXT4_IOC_FSSETXATTR:
860 {
861 struct fsxattr fa;
862 int err;
863
864 if (copy_from_user(&fa, (struct fsxattr __user *)arg,
865 sizeof(fa)))
866 return -EFAULT;
867
868 /* Make sure caller has proper permission */
869 if (!inode_owner_or_capable(inode))
870 return -EACCES;
871
872 err = mnt_want_write_file(filp);
873 if (err)
874 return err;
875
876 flags = ext4_xflags_to_iflags(fa.fsx_xflags);
877 flags = ext4_mask_flags(inode->i_mode, flags);
878
879 mutex_lock(&inode->i_mutex);
880 flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
881 (flags & EXT4_FL_XFLAG_VISIBLE);
882 err = ext4_ioctl_setflags(inode, flags);
883 mutex_unlock(&inode->i_mutex);
884 mnt_drop_write_file(filp);
885 if (err)
886 return err;
887
888 err = ext4_ioctl_setproject(filp, fa.fsx_projid);
889 if (err)
890 return err;
891
892 return 0;
893 }
692 default: 894 default:
693 return -ENOTTY; 895 return -ENOTTY;
694 } 896 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f27e0c2598c5..854f75de4599 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
273 struct ext4_filename *fname, 273 struct ext4_filename *fname,
274 struct ext4_dir_entry_2 **res_dir); 274 struct ext4_dir_entry_2 **res_dir);
275static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, 275static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
276 struct dentry *dentry, struct inode *inode); 276 struct inode *dir, struct inode *inode);
277 277
278/* checksumming functions */ 278/* checksumming functions */
279void initialize_dirent_tail(struct ext4_dir_entry_tail *t, 279void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1928,10 +1928,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
1928 * directory, and adds the dentry to the indexed directory. 1928 * directory, and adds the dentry to the indexed directory.
1929 */ 1929 */
1930static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, 1930static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
1931 struct dentry *dentry, 1931 struct inode *dir,
1932 struct inode *inode, struct buffer_head *bh) 1932 struct inode *inode, struct buffer_head *bh)
1933{ 1933{
1934 struct inode *dir = d_inode(dentry->d_parent);
1935 struct buffer_head *bh2; 1934 struct buffer_head *bh2;
1936 struct dx_root *root; 1935 struct dx_root *root;
1937 struct dx_frame frames[2], *frame; 1936 struct dx_frame frames[2], *frame;
@@ -2086,8 +2085,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
2086 return retval; 2085 return retval;
2087 2086
2088 if (ext4_has_inline_data(dir)) { 2087 if (ext4_has_inline_data(dir)) {
2089 retval = ext4_try_add_inline_entry(handle, &fname, 2088 retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
2090 dentry, inode);
2091 if (retval < 0) 2089 if (retval < 0)
2092 goto out; 2090 goto out;
2093 if (retval == 1) { 2091 if (retval == 1) {
@@ -2097,7 +2095,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
2097 } 2095 }
2098 2096
2099 if (is_dx(dir)) { 2097 if (is_dx(dir)) {
2100 retval = ext4_dx_add_entry(handle, &fname, dentry, inode); 2098 retval = ext4_dx_add_entry(handle, &fname, dir, inode);
2101 if (!retval || (retval != ERR_BAD_DX_DIR)) 2099 if (!retval || (retval != ERR_BAD_DX_DIR))
2102 goto out; 2100 goto out;
2103 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); 2101 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2119,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
2119 2117
2120 if (blocks == 1 && !dx_fallback && 2118 if (blocks == 1 && !dx_fallback &&
2121 ext4_has_feature_dir_index(sb)) { 2119 ext4_has_feature_dir_index(sb)) {
2122 retval = make_indexed_dir(handle, &fname, dentry, 2120 retval = make_indexed_dir(handle, &fname, dir,
2123 inode, bh); 2121 inode, bh);
2124 bh = NULL; /* make_indexed_dir releases bh */ 2122 bh = NULL; /* make_indexed_dir releases bh */
2125 goto out; 2123 goto out;
@@ -2154,12 +2152,11 @@ out:
2154 * Returns 0 for success, or a negative error value 2152 * Returns 0 for success, or a negative error value
2155 */ 2153 */
2156static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, 2154static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2157 struct dentry *dentry, struct inode *inode) 2155 struct inode *dir, struct inode *inode)
2158{ 2156{
2159 struct dx_frame frames[2], *frame; 2157 struct dx_frame frames[2], *frame;
2160 struct dx_entry *entries, *at; 2158 struct dx_entry *entries, *at;
2161 struct buffer_head *bh; 2159 struct buffer_head *bh;
2162 struct inode *dir = d_inode(dentry->d_parent);
2163 struct super_block *sb = dir->i_sb; 2160 struct super_block *sb = dir->i_sb;
2164 struct ext4_dir_entry_2 *de; 2161 struct ext4_dir_entry_2 *de;
2165 int err; 2162 int err;
@@ -3212,6 +3209,12 @@ static int ext4_link(struct dentry *old_dentry,
3212 if (ext4_encrypted_inode(dir) && 3209 if (ext4_encrypted_inode(dir) &&
3213 !ext4_is_child_context_consistent_with_parent(dir, inode)) 3210 !ext4_is_child_context_consistent_with_parent(dir, inode))
3214 return -EPERM; 3211 return -EPERM;
3212
3213 if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
3214 (!projid_eq(EXT4_I(dir)->i_projid,
3215 EXT4_I(old_dentry->d_inode)->i_projid)))
3216 return -EXDEV;
3217
3215 err = dquot_initialize(dir); 3218 err = dquot_initialize(dir);
3216 if (err) 3219 if (err)
3217 return err; 3220 return err;
@@ -3492,6 +3495,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3492 int credits; 3495 int credits;
3493 u8 old_file_type; 3496 u8 old_file_type;
3494 3497
3498 if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
3499 (!projid_eq(EXT4_I(new_dir)->i_projid,
3500 EXT4_I(old_dentry->d_inode)->i_projid)))
3501 return -EXDEV;
3502
3495 retval = dquot_initialize(old.dir); 3503 retval = dquot_initialize(old.dir);
3496 if (retval) 3504 if (retval)
3497 return retval; 3505 return retval;
@@ -3701,6 +3709,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3701 new.inode))) 3709 new.inode)))
3702 return -EPERM; 3710 return -EPERM;
3703 3711
3712 if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
3713 !projid_eq(EXT4_I(new_dir)->i_projid,
3714 EXT4_I(old_dentry->d_inode)->i_projid)) ||
3715 (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
3716 !projid_eq(EXT4_I(old_dir)->i_projid,
3717 EXT4_I(new_dentry->d_inode)->i_projid)))
3718 return -EXDEV;
3719
3704 retval = dquot_initialize(old.dir); 3720 retval = dquot_initialize(old.dir);
3705 if (retval) 3721 if (retval)
3706 return retval; 3722 return retval;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f1b56ff01208..00c98fab6333 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void);
80static void ext4_unregister_li_request(struct super_block *sb); 80static void ext4_unregister_li_request(struct super_block *sb);
81static void ext4_clear_request_list(void); 81static void ext4_clear_request_list(void);
82 82
83/*
84 * Lock ordering
85 *
86 * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
87 * i_mmap_rwsem (inode->i_mmap_rwsem)!
88 *
89 * page fault path:
90 * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
91 * page lock -> i_data_sem (rw)
92 *
93 * buffered write path:
94 * sb_start_write -> i_mutex -> mmap_sem
95 * sb_start_write -> i_mutex -> transaction start -> page lock ->
96 * i_data_sem (rw)
97 *
98 * truncate:
99 * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
100 * i_mmap_rwsem (w) -> page lock
101 * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
102 * transaction start -> i_data_sem (rw)
103 *
104 * direct IO:
105 * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
106 * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
107 * transaction start -> i_data_sem (rw)
108 *
109 * writepages:
110 * transaction start -> page lock(s) -> i_data_sem (rw)
111 */
112
83#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) 113#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
84static struct file_system_type ext2_fs_type = { 114static struct file_system_type ext2_fs_type = {
85 .owner = THIS_MODULE, 115 .owner = THIS_MODULE,
@@ -958,6 +988,7 @@ static void init_once(void *foo)
958 INIT_LIST_HEAD(&ei->i_orphan); 988 INIT_LIST_HEAD(&ei->i_orphan);
959 init_rwsem(&ei->xattr_sem); 989 init_rwsem(&ei->xattr_sem);
960 init_rwsem(&ei->i_data_sem); 990 init_rwsem(&ei->i_data_sem);
991 init_rwsem(&ei->i_mmap_sem);
961 inode_init_once(&ei->vfs_inode); 992 inode_init_once(&ei->vfs_inode);
962} 993}
963 994
@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1066} 1097}
1067 1098
1068#ifdef CONFIG_QUOTA 1099#ifdef CONFIG_QUOTA
1069#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1100static char *quotatypes[] = INITQFNAMES;
1070#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1101#define QTYPE2NAME(t) (quotatypes[t])
1071 1102
1072static int ext4_write_dquot(struct dquot *dquot); 1103static int ext4_write_dquot(struct dquot *dquot);
1073static int ext4_acquire_dquot(struct dquot *dquot); 1104static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = {
1100 .write_info = ext4_write_info, 1131 .write_info = ext4_write_info,
1101 .alloc_dquot = dquot_alloc, 1132 .alloc_dquot = dquot_alloc,
1102 .destroy_dquot = dquot_destroy, 1133 .destroy_dquot = dquot_destroy,
1134 .get_projid = ext4_get_projid,
1103}; 1135};
1104 1136
1105static const struct quotactl_ops ext4_qctl_operations = { 1137static const struct quotactl_ops ext4_qctl_operations = {
@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2526 "without CONFIG_QUOTA"); 2558 "without CONFIG_QUOTA");
2527 return 0; 2559 return 0;
2528 } 2560 }
2561 if (ext4_has_feature_project(sb) && !readonly) {
2562 ext4_msg(sb, KERN_ERR,
2563 "Filesystem with project quota feature cannot be mounted RDWR "
2564 "without CONFIG_QUOTA");
2565 return 0;
2566 }
2529#endif /* CONFIG_QUOTA */ 2567#endif /* CONFIG_QUOTA */
2530 return 1; 2568 return 1;
2531} 2569}
@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3654 sb->s_qcop = &dquot_quotactl_sysfile_ops; 3692 sb->s_qcop = &dquot_quotactl_sysfile_ops;
3655 else 3693 else
3656 sb->s_qcop = &ext4_qctl_operations; 3694 sb->s_qcop = &ext4_qctl_operations;
3657 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 3695 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
3658#endif 3696#endif
3659 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 3697 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3660 3698
@@ -4790,6 +4828,48 @@ restore_opts:
4790 return err; 4828 return err;
4791} 4829}
4792 4830
4831#ifdef CONFIG_QUOTA
4832static int ext4_statfs_project(struct super_block *sb,
4833 kprojid_t projid, struct kstatfs *buf)
4834{
4835 struct kqid qid;
4836 struct dquot *dquot;
4837 u64 limit;
4838 u64 curblock;
4839
4840 qid = make_kqid_projid(projid);
4841 dquot = dqget(sb, qid);
4842 if (IS_ERR(dquot))
4843 return PTR_ERR(dquot);
4844 spin_lock(&dq_data_lock);
4845
4846 limit = (dquot->dq_dqb.dqb_bsoftlimit ?
4847 dquot->dq_dqb.dqb_bsoftlimit :
4848 dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
4849 if (limit && buf->f_blocks > limit) {
4850 curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
4851 buf->f_blocks = limit;
4852 buf->f_bfree = buf->f_bavail =
4853 (buf->f_blocks > curblock) ?
4854 (buf->f_blocks - curblock) : 0;
4855 }
4856
4857 limit = dquot->dq_dqb.dqb_isoftlimit ?
4858 dquot->dq_dqb.dqb_isoftlimit :
4859 dquot->dq_dqb.dqb_ihardlimit;
4860 if (limit && buf->f_files > limit) {
4861 buf->f_files = limit;
4862 buf->f_ffree =
4863 (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
4864 (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
4865 }
4866
4867 spin_unlock(&dq_data_lock);
4868 dqput(dquot);
4869 return 0;
4870}
4871#endif
4872
4793static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 4873static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4794{ 4874{
4795 struct super_block *sb = dentry->d_sb; 4875 struct super_block *sb = dentry->d_sb;
@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4822 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 4902 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
4823 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 4903 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
4824 4904
4905#ifdef CONFIG_QUOTA
4906 if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
4907 sb_has_quota_limits_enabled(sb, PRJQUOTA))
4908 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
4909#endif
4825 return 0; 4910 return 0;
4826} 4911}
4827 4912
@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
4986 struct inode *qf_inode; 5071 struct inode *qf_inode;
4987 unsigned long qf_inums[EXT4_MAXQUOTAS] = { 5072 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
4988 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5073 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
4989 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5074 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5075 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
4990 }; 5076 };
4991 5077
4992 BUG_ON(!ext4_has_feature_quota(sb)); 5078 BUG_ON(!ext4_has_feature_quota(sb));
@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb)
5014 int type, err = 0; 5100 int type, err = 0;
5015 unsigned long qf_inums[EXT4_MAXQUOTAS] = { 5101 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5016 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5102 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5017 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5103 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5104 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
5018 }; 5105 };
5019 5106
5020 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; 5107 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba6670d99..c70d06a383e2 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -10,8 +10,10 @@
10 */ 10 */
11static inline void ext4_truncate_failed_write(struct inode *inode) 11static inline void ext4_truncate_failed_write(struct inode *inode)
12{ 12{
13 down_write(&EXT4_I(inode)->i_mmap_sem);
13 truncate_inode_pages(inode->i_mapping, inode->i_size); 14 truncate_inode_pages(inode->i_mapping, inode->i_size);
14 ext4_truncate(inode); 15 ext4_truncate(inode);
16 up_write(&EXT4_I(inode)->i_mmap_sem);
15} 17}
16 18
17/* 19/*
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 594b4b29a224..4e4b2fa78609 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -43,7 +43,7 @@ struct extent_status;
43 { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ 43 { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
44 { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ 44 { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
45 { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ 45 { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
46 { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }) 46 { EXT4_GET_BLOCKS_ZERO, "ZERO" })
47 47
48#define show_mflags(flags) __print_flags(flags, "", \ 48#define show_mflags(flags) __print_flags(flags, "", \
49 { EXT4_MAP_NEW, "N" }, \ 49 { EXT4_MAP_NEW, "N" }, \
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 8c8451f76633..41e0433b4a83 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -2,8 +2,11 @@
2#define _UAPI_LINUX_FS_H 2#define _UAPI_LINUX_FS_H
3 3
4/* 4/*
5 * This file has definitions for some important file table 5 * This file has definitions for some important file table structures
6 * structures etc. 6 * and constants and structures used by various generic file system
7 * ioctl's. Please do not make any changes in this file before
8 * sending patches for review to linux-fsdevel@vger.kernel.org and
9 * linux-api@vger.kernel.org.
7 */ 10 */
8 11
9#include <linux/limits.h> 12#include <linux/limits.h>
@@ -246,6 +249,23 @@ struct fsxattr {
246 249
247/* 250/*
248 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) 251 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
252 *
253 * Note: for historical reasons, these flags were originally used and
254 * defined for use by ext2/ext3, and then other file systems started
255 * using these flags so they wouldn't need to write their own version
256 * of chattr/lsattr (which was shipped as part of e2fsprogs). You
257 * should think twice before trying to use these flags in new
258 * contexts, or trying to assign these flags, since they are used both
259 * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are
260 * almost out of 32-bit flags. :-)
261 *
262 * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
263 * XFS to the generic FS level interface. This uses a structure that
264 * has padding and hence has more room to grow, so it may be more
265 * appropriate for many new use cases.
266 *
267 * Please do not change these flags or interfaces before checking with
268 * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
249 */ 269 */
250#define FS_SECRM_FL 0x00000001 /* Secure deletion */ 270#define FS_SECRM_FL 0x00000001 /* Secure deletion */
251#define FS_UNRM_FL 0x00000002 /* Undelete */ 271#define FS_UNRM_FL 0x00000002 /* Undelete */
@@ -259,8 +279,8 @@ struct fsxattr {
259#define FS_DIRTY_FL 0x00000100 279#define FS_DIRTY_FL 0x00000100
260#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ 280#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
261#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ 281#define FS_NOCOMP_FL 0x00000400 /* Don't compress */
262#define FS_ECOMPR_FL 0x00000800 /* Compression error */
263/* End compression flags --- maybe not all used */ 282/* End compression flags --- maybe not all used */
283#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */
264#define FS_BTREE_FL 0x00001000 /* btree format dir */ 284#define FS_BTREE_FL 0x00001000 /* btree format dir */
265#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ 285#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
266#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ 286#define FS_IMAGIC_FL 0x00002000 /* AFS directory */
@@ -268,9 +288,12 @@ struct fsxattr {
268#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ 288#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
269#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ 289#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
270#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 290#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
291#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
271#define FS_EXTENT_FL 0x00080000 /* Extents */ 292#define FS_EXTENT_FL 0x00080000 /* Extents */
272#define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ 293#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
294#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
273#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ 295#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
296#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
274#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ 297#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
275#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ 298#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
276 299