diff options
-rw-r--r-- | fs/ext4/crypto.c | 6 | ||||
-rw-r--r-- | fs/ext4/crypto_key.c | 4 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 99 | ||||
-rw-r--r-- | fs/ext4/extents.c | 153 | ||||
-rw-r--r-- | fs/ext4/file.c | 82 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 7 | ||||
-rw-r--r-- | fs/ext4/inline.c | 10 | ||||
-rw-r--r-- | fs/ext4/inode.c | 268 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 376 | ||||
-rw-r--r-- | fs/ext4/namei.c | 34 | ||||
-rw-r--r-- | fs/ext4/super.c | 97 | ||||
-rw-r--r-- | fs/ext4/truncate.h | 2 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/fs.h | 31 |
14 files changed, 895 insertions, 276 deletions
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 1a0835073663..c8021208a7eb 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c | |||
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page) | |||
384 | EXT4_DECRYPT, page->index, page, page); | 384 | EXT4_DECRYPT, page->index, page, page); |
385 | } | 385 | } |
386 | 386 | ||
387 | int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) | 387 | int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, |
388 | ext4_fsblk_t pblk, ext4_lblk_t len) | ||
388 | { | 389 | { |
389 | struct ext4_crypto_ctx *ctx; | 390 | struct ext4_crypto_ctx *ctx; |
390 | struct page *ciphertext_page = NULL; | 391 | struct page *ciphertext_page = NULL; |
391 | struct bio *bio; | 392 | struct bio *bio; |
392 | ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); | ||
393 | ext4_fsblk_t pblk = ext4_ext_pblock(ex); | ||
394 | unsigned int len = ext4_ext_get_actual_len(ex); | ||
395 | int ret, err = 0; | 393 | int ret, err = 0; |
396 | 394 | ||
397 | #if 0 | 395 | #if 0 |
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index c5882b36e558..9a16d1e75a49 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c | |||
@@ -213,9 +213,11 @@ retry: | |||
213 | res = -ENOKEY; | 213 | res = -ENOKEY; |
214 | goto out; | 214 | goto out; |
215 | } | 215 | } |
216 | down_read(&keyring_key->sem); | ||
216 | ukp = user_key_payload(keyring_key); | 217 | ukp = user_key_payload(keyring_key); |
217 | if (ukp->datalen != sizeof(struct ext4_encryption_key)) { | 218 | if (ukp->datalen != sizeof(struct ext4_encryption_key)) { |
218 | res = -EINVAL; | 219 | res = -EINVAL; |
220 | up_read(&keyring_key->sem); | ||
219 | goto out; | 221 | goto out; |
220 | } | 222 | } |
221 | master_key = (struct ext4_encryption_key *)ukp->data; | 223 | master_key = (struct ext4_encryption_key *)ukp->data; |
@@ -226,10 +228,12 @@ retry: | |||
226 | "ext4: key size incorrect: %d\n", | 228 | "ext4: key size incorrect: %d\n", |
227 | master_key->size); | 229 | master_key->size); |
228 | res = -ENOKEY; | 230 | res = -ENOKEY; |
231 | up_read(&keyring_key->sem); | ||
229 | goto out; | 232 | goto out; |
230 | } | 233 | } |
231 | res = ext4_derive_key_aes(ctx.nonce, master_key->raw, | 234 | res = ext4_derive_key_aes(ctx.nonce, master_key->raw, |
232 | raw_key); | 235 | raw_key); |
236 | up_read(&keyring_key->sem); | ||
233 | if (res) | 237 | if (res) |
234 | goto out; | 238 | goto out; |
235 | got_key: | 239 | got_key: |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cc7ca4e87144..1c127213363a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -378,14 +378,22 @@ struct flex_groups { | |||
378 | #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ | 378 | #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ |
379 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 379 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
380 | 380 | ||
381 | #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ | 381 | #define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */ |
382 | #define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */ | 382 | #define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */ |
383 | |||
384 | #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ | ||
385 | EXT4_IMMUTABLE_FL | \ | ||
386 | EXT4_APPEND_FL | \ | ||
387 | EXT4_NODUMP_FL | \ | ||
388 | EXT4_NOATIME_FL | \ | ||
389 | EXT4_PROJINHERIT_FL) | ||
383 | 390 | ||
384 | /* Flags that should be inherited by new inodes from their parent. */ | 391 | /* Flags that should be inherited by new inodes from their parent. */ |
385 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ | 392 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ |
386 | EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ | 393 | EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ |
387 | EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ | 394 | EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ |
388 | EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) | 395 | EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\ |
396 | EXT4_PROJINHERIT_FL) | ||
389 | 397 | ||
390 | /* Flags that are appropriate for regular files (all but dir-specific ones). */ | 398 | /* Flags that are appropriate for regular files (all but dir-specific ones). */ |
391 | #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) | 399 | #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) |
@@ -555,10 +563,12 @@ enum { | |||
555 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 | 563 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 |
556 | /* Request will not result in inode size update (user for fallocate) */ | 564 | /* Request will not result in inode size update (user for fallocate) */ |
557 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 | 565 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 |
558 | /* Do not take i_data_sem locking in ext4_map_blocks */ | ||
559 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 | ||
560 | /* Convert written extents to unwritten */ | 566 | /* Convert written extents to unwritten */ |
561 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 | 567 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100 |
568 | /* Write zeros to newly created written extents */ | ||
569 | #define EXT4_GET_BLOCKS_ZERO 0x0200 | ||
570 | #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ | ||
571 | EXT4_GET_BLOCKS_ZERO) | ||
562 | 572 | ||
563 | /* | 573 | /* |
564 | * The bit position of these flags must not overlap with any of the | 574 | * The bit position of these flags must not overlap with any of the |
@@ -616,6 +626,46 @@ enum { | |||
616 | #define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) | 626 | #define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) |
617 | #define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy) | 627 | #define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy) |
618 | 628 | ||
629 | #ifndef FS_IOC_FSGETXATTR | ||
630 | /* Until the uapi changes get merged for project quota... */ | ||
631 | |||
632 | #define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr) | ||
633 | #define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) | ||
634 | |||
635 | /* | ||
636 | * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR. | ||
637 | */ | ||
638 | struct fsxattr { | ||
639 | __u32 fsx_xflags; /* xflags field value (get/set) */ | ||
640 | __u32 fsx_extsize; /* extsize field value (get/set)*/ | ||
641 | __u32 fsx_nextents; /* nextents field value (get) */ | ||
642 | __u32 fsx_projid; /* project identifier (get/set) */ | ||
643 | unsigned char fsx_pad[12]; | ||
644 | }; | ||
645 | |||
646 | /* | ||
647 | * Flags for the fsx_xflags field | ||
648 | */ | ||
649 | #define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ | ||
650 | #define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ | ||
651 | #define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ | ||
652 | #define FS_XFLAG_APPEND 0x00000010 /* all writes append */ | ||
653 | #define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ | ||
654 | #define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ | ||
655 | #define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ | ||
656 | #define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ | ||
657 | #define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ | ||
658 | #define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ | ||
659 | #define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ | ||
660 | #define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ | ||
661 | #define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ | ||
662 | #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ | ||
663 | #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ | ||
664 | #endif /* !defined(FS_IOC_FSGETXATTR) */ | ||
665 | |||
666 | #define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR | ||
667 | #define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR | ||
668 | |||
619 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 669 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
620 | /* | 670 | /* |
621 | * ioctl commands in 32 bit emulation | 671 | * ioctl commands in 32 bit emulation |
@@ -910,6 +960,15 @@ struct ext4_inode_info { | |||
910 | * by other means, so we have i_data_sem. | 960 | * by other means, so we have i_data_sem. |
911 | */ | 961 | */ |
912 | struct rw_semaphore i_data_sem; | 962 | struct rw_semaphore i_data_sem; |
963 | /* | ||
964 | * i_mmap_sem is for serializing page faults with truncate / punch hole | ||
965 | * operations. We have to make sure that new page cannot be faulted in | ||
966 | * a section of the inode that is being punched. We cannot easily use | ||
967 | * i_data_sem for this since we need protection for the whole punch | ||
968 | * operation and i_data_sem ranks below transaction start so we have | ||
969 | * to occasionally drop it. | ||
970 | */ | ||
971 | struct rw_semaphore i_mmap_sem; | ||
913 | struct inode vfs_inode; | 972 | struct inode vfs_inode; |
914 | struct jbd2_inode *jinode; | 973 | struct jbd2_inode *jinode; |
915 | 974 | ||
@@ -993,6 +1052,7 @@ struct ext4_inode_info { | |||
993 | /* Encryption params */ | 1052 | /* Encryption params */ |
994 | struct ext4_crypt_info *i_crypt_info; | 1053 | struct ext4_crypt_info *i_crypt_info; |
995 | #endif | 1054 | #endif |
1055 | kprojid_t i_projid; | ||
996 | }; | 1056 | }; |
997 | 1057 | ||
998 | /* | 1058 | /* |
@@ -1248,7 +1308,7 @@ struct ext4_super_block { | |||
1248 | #endif | 1308 | #endif |
1249 | 1309 | ||
1250 | /* Number of quota types we support */ | 1310 | /* Number of quota types we support */ |
1251 | #define EXT4_MAXQUOTAS 2 | 1311 | #define EXT4_MAXQUOTAS 3 |
1252 | 1312 | ||
1253 | /* | 1313 | /* |
1254 | * fourth extended-fs super-block data in memory | 1314 | * fourth extended-fs super-block data in memory |
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT) | |||
1754 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ | 1814 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ |
1755 | EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ | 1815 | EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ |
1756 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ | 1816 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ |
1757 | EXT4_FEATURE_RO_COMPAT_QUOTA) | 1817 | EXT4_FEATURE_RO_COMPAT_QUOTA |\ |
1818 | EXT4_FEATURE_RO_COMPAT_PROJECT) | ||
1758 | 1819 | ||
1759 | #define EXTN_FEATURE_FUNCS(ver) \ | 1820 | #define EXTN_FEATURE_FUNCS(ver) \ |
1760 | static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ | 1821 | static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ |
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb) | |||
1796 | #define EXT4_DEF_RESUID 0 | 1857 | #define EXT4_DEF_RESUID 0 |
1797 | #define EXT4_DEF_RESGID 0 | 1858 | #define EXT4_DEF_RESGID 0 |
1798 | 1859 | ||
1860 | /* | ||
1861 | * Default project ID | ||
1862 | */ | ||
1863 | #define EXT4_DEF_PROJID 0 | ||
1864 | |||
1799 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 | 1865 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 |
1800 | 1866 | ||
1801 | /* | 1867 | /* |
@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page); | |||
2234 | struct page *ext4_encrypt(struct inode *inode, | 2300 | struct page *ext4_encrypt(struct inode *inode, |
2235 | struct page *plaintext_page); | 2301 | struct page *plaintext_page); |
2236 | int ext4_decrypt(struct page *page); | 2302 | int ext4_decrypt(struct page *page); |
2237 | int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); | 2303 | int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, |
2304 | ext4_fsblk_t pblk, ext4_lblk_t len); | ||
2238 | 2305 | ||
2239 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | 2306 | #ifdef CONFIG_EXT4_FS_ENCRYPTION |
2240 | int ext4_init_crypto(void); | 2307 | int ext4_init_crypto(void); |
@@ -2440,8 +2507,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); | |||
2440 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); | 2507 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); |
2441 | int ext4_get_block_write(struct inode *inode, sector_t iblock, | 2508 | int ext4_get_block_write(struct inode *inode, sector_t iblock, |
2442 | struct buffer_head *bh_result, int create); | 2509 | struct buffer_head *bh_result, int create); |
2443 | int ext4_get_block_dax(struct inode *inode, sector_t iblock, | 2510 | int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, |
2444 | struct buffer_head *bh_result, int create); | 2511 | struct buffer_head *bh_result, int create); |
2445 | int ext4_get_block(struct inode *inode, sector_t iblock, | 2512 | int ext4_get_block(struct inode *inode, sector_t iblock, |
2446 | struct buffer_head *bh_result, int create); | 2513 | struct buffer_head *bh_result, int create); |
2447 | int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2514 | int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | |||
2484 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | 2551 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
2485 | loff_t lstart, loff_t lend); | 2552 | loff_t lstart, loff_t lend); |
2486 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2553 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2554 | extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); | ||
2487 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 2555 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
2556 | extern int ext4_get_projid(struct inode *inode, kprojid_t *projid); | ||
2488 | extern void ext4_da_update_reserve_space(struct inode *inode, | 2557 | extern void ext4_da_update_reserve_space(struct inode *inode, |
2489 | int used, int quota_claim); | 2558 | int used, int quota_claim); |
2559 | extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, | ||
2560 | ext4_fsblk_t pblk, ext4_lblk_t len); | ||
2490 | 2561 | ||
2491 | /* indirect.c */ | 2562 | /* indirect.c */ |
2492 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | 2563 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) | |||
2848 | return changed; | 2919 | return changed; |
2849 | } | 2920 | } |
2850 | 2921 | ||
2922 | int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, | ||
2923 | loff_t len); | ||
2924 | |||
2851 | struct ext4_group_info { | 2925 | struct ext4_group_info { |
2852 | unsigned long bb_state; | 2926 | unsigned long bb_state; |
2853 | struct rb_root bb_free_root; | 2927 | struct rb_root bb_free_root; |
@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, | |||
2986 | struct page *page); | 3060 | struct page *page); |
2987 | extern int ext4_try_add_inline_entry(handle_t *handle, | 3061 | extern int ext4_try_add_inline_entry(handle_t *handle, |
2988 | struct ext4_filename *fname, | 3062 | struct ext4_filename *fname, |
2989 | struct dentry *dentry, | 3063 | struct inode *dir, struct inode *inode); |
2990 | struct inode *inode); | ||
2991 | extern int ext4_try_create_inline_dir(handle_t *handle, | 3064 | extern int ext4_try_create_inline_dir(handle_t *handle, |
2992 | struct inode *parent, | 3065 | struct inode *parent, |
2993 | struct inode *inode); | 3066 | struct inode *inode); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 551353b1b17a..b52fea3b7219 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
3119 | { | 3119 | { |
3120 | ext4_fsblk_t ee_pblock; | 3120 | ext4_fsblk_t ee_pblock; |
3121 | unsigned int ee_len; | 3121 | unsigned int ee_len; |
3122 | int ret; | ||
3123 | 3122 | ||
3124 | ee_len = ext4_ext_get_actual_len(ex); | 3123 | ee_len = ext4_ext_get_actual_len(ex); |
3125 | ee_pblock = ext4_ext_pblock(ex); | 3124 | ee_pblock = ext4_ext_pblock(ex); |
3126 | 3125 | return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock, | |
3127 | if (ext4_encrypted_inode(inode)) | 3126 | ee_len); |
3128 | return ext4_encrypted_zeroout(inode, ex); | ||
3129 | |||
3130 | ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); | ||
3131 | if (ret > 0) | ||
3132 | ret = 0; | ||
3133 | |||
3134 | return ret; | ||
3135 | } | 3127 | } |
3136 | 3128 | ||
3137 | /* | 3129 | /* |
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | |||
4052 | } | 4044 | } |
4053 | /* IO end_io complete, convert the filled extent to written */ | 4045 | /* IO end_io complete, convert the filled extent to written */ |
4054 | if (flags & EXT4_GET_BLOCKS_CONVERT) { | 4046 | if (flags & EXT4_GET_BLOCKS_CONVERT) { |
4047 | if (flags & EXT4_GET_BLOCKS_ZERO) { | ||
4048 | if (allocated > map->m_len) | ||
4049 | allocated = map->m_len; | ||
4050 | err = ext4_issue_zeroout(inode, map->m_lblk, newblock, | ||
4051 | allocated); | ||
4052 | if (err < 0) | ||
4053 | goto out2; | ||
4054 | } | ||
4055 | ret = ext4_convert_unwritten_extents_endio(handle, inode, map, | 4055 | ret = ext4_convert_unwritten_extents_endio(handle, inode, map, |
4056 | ppath); | 4056 | ppath); |
4057 | if (ret >= 0) { | 4057 | if (ret >= 0) { |
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, | |||
4685 | if (len <= EXT_UNWRITTEN_MAX_LEN) | 4685 | if (len <= EXT_UNWRITTEN_MAX_LEN) |
4686 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | 4686 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; |
4687 | 4687 | ||
4688 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
4689 | ext4_inode_block_unlocked_dio(inode); | ||
4690 | inode_dio_wait(inode); | ||
4691 | |||
4692 | /* | 4688 | /* |
4693 | * credits to insert 1 extent into extent tree | 4689 | * credits to insert 1 extent into extent tree |
4694 | */ | 4690 | */ |
@@ -4752,8 +4748,6 @@ retry: | |||
4752 | goto retry; | 4748 | goto retry; |
4753 | } | 4749 | } |
4754 | 4750 | ||
4755 | ext4_inode_resume_unlocked_dio(inode); | ||
4756 | |||
4757 | return ret > 0 ? ret2 : ret; | 4751 | return ret > 0 ? ret2 : ret; |
4758 | } | 4752 | } |
4759 | 4753 | ||
@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4770 | int partial_begin, partial_end; | 4764 | int partial_begin, partial_end; |
4771 | loff_t start, end; | 4765 | loff_t start, end; |
4772 | ext4_lblk_t lblk; | 4766 | ext4_lblk_t lblk; |
4773 | struct address_space *mapping = inode->i_mapping; | ||
4774 | unsigned int blkbits = inode->i_blkbits; | 4767 | unsigned int blkbits = inode->i_blkbits; |
4775 | 4768 | ||
4776 | trace_ext4_zero_range(inode, offset, len, mode); | 4769 | trace_ext4_zero_range(inode, offset, len, mode); |
@@ -4786,17 +4779,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4786 | } | 4779 | } |
4787 | 4780 | ||
4788 | /* | 4781 | /* |
4789 | * Write out all dirty pages to avoid race conditions | ||
4790 | * Then release them. | ||
4791 | */ | ||
4792 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4793 | ret = filemap_write_and_wait_range(mapping, offset, | ||
4794 | offset + len - 1); | ||
4795 | if (ret) | ||
4796 | return ret; | ||
4797 | } | ||
4798 | |||
4799 | /* | ||
4800 | * Round up offset. This is not fallocate, we neet to zero out | 4782 | * Round up offset. This is not fallocate, we neet to zero out |
4801 | * blocks, so convert interior block aligned part of the range to | 4783 | * blocks, so convert interior block aligned part of the range to |
4802 | * unwritten and possibly manually zero out unaligned parts of the | 4784 | * unwritten and possibly manually zero out unaligned parts of the |
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4839 | if (mode & FALLOC_FL_KEEP_SIZE) | 4821 | if (mode & FALLOC_FL_KEEP_SIZE) |
4840 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | 4822 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; |
4841 | 4823 | ||
4824 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
4825 | ext4_inode_block_unlocked_dio(inode); | ||
4826 | inode_dio_wait(inode); | ||
4827 | |||
4842 | /* Preallocate the range including the unaligned edges */ | 4828 | /* Preallocate the range including the unaligned edges */ |
4843 | if (partial_begin || partial_end) { | 4829 | if (partial_begin || partial_end) { |
4844 | ret = ext4_alloc_file_blocks(file, | 4830 | ret = ext4_alloc_file_blocks(file, |
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4847 | round_down(offset, 1 << blkbits)) >> blkbits, | 4833 | round_down(offset, 1 << blkbits)) >> blkbits, |
4848 | new_size, flags, mode); | 4834 | new_size, flags, mode); |
4849 | if (ret) | 4835 | if (ret) |
4850 | goto out_mutex; | 4836 | goto out_dio; |
4851 | 4837 | ||
4852 | } | 4838 | } |
4853 | 4839 | ||
@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4856 | flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | | 4842 | flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | |
4857 | EXT4_EX_NOCACHE); | 4843 | EXT4_EX_NOCACHE); |
4858 | 4844 | ||
4859 | /* Now release the pages and zero block aligned part of pages*/ | 4845 | /* |
4846 | * Prevent page faults from reinstantiating pages we have | ||
4847 | * released from page cache. | ||
4848 | */ | ||
4849 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
4850 | ret = ext4_update_disksize_before_punch(inode, offset, len); | ||
4851 | if (ret) { | ||
4852 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
4853 | goto out_dio; | ||
4854 | } | ||
4855 | /* Now release the pages and zero block aligned part of pages */ | ||
4860 | truncate_pagecache_range(inode, start, end - 1); | 4856 | truncate_pagecache_range(inode, start, end - 1); |
4861 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4857 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4862 | 4858 | ||
4863 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
4864 | ext4_inode_block_unlocked_dio(inode); | ||
4865 | inode_dio_wait(inode); | ||
4866 | |||
4867 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | 4859 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, |
4868 | flags, mode); | 4860 | flags, mode); |
4861 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
4869 | if (ret) | 4862 | if (ret) |
4870 | goto out_dio; | 4863 | goto out_dio; |
4871 | } | 4864 | } |
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4998 | goto out; | 4991 | goto out; |
4999 | } | 4992 | } |
5000 | 4993 | ||
4994 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
4995 | ext4_inode_block_unlocked_dio(inode); | ||
4996 | inode_dio_wait(inode); | ||
4997 | |||
5001 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | 4998 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, |
5002 | flags, mode); | 4999 | flags, mode); |
5000 | ext4_inode_resume_unlocked_dio(inode); | ||
5003 | if (ret) | 5001 | if (ret) |
5004 | goto out; | 5002 | goto out; |
5005 | 5003 | ||
@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |||
5494 | return ret; | 5492 | return ret; |
5495 | } | 5493 | } |
5496 | 5494 | ||
5497 | /* | ||
5498 | * Need to round down offset to be aligned with page size boundary | ||
5499 | * for page size > block size. | ||
5500 | */ | ||
5501 | ioffset = round_down(offset, PAGE_SIZE); | ||
5502 | |||
5503 | /* Write out all dirty pages */ | ||
5504 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, | ||
5505 | LLONG_MAX); | ||
5506 | if (ret) | ||
5507 | return ret; | ||
5508 | |||
5509 | /* Take mutex lock */ | ||
5510 | mutex_lock(&inode->i_mutex); | 5495 | mutex_lock(&inode->i_mutex); |
5511 | |||
5512 | /* | 5496 | /* |
5513 | * There is no need to overlap collapse range with EOF, in which case | 5497 | * There is no need to overlap collapse range with EOF, in which case |
5514 | * it is effectively a truncate operation | 5498 | * it is effectively a truncate operation |
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |||
5524 | goto out_mutex; | 5508 | goto out_mutex; |
5525 | } | 5509 | } |
5526 | 5510 | ||
5527 | truncate_pagecache(inode, ioffset); | ||
5528 | |||
5529 | /* Wait for existing dio to complete */ | 5511 | /* Wait for existing dio to complete */ |
5530 | ext4_inode_block_unlocked_dio(inode); | 5512 | ext4_inode_block_unlocked_dio(inode); |
5531 | inode_dio_wait(inode); | 5513 | inode_dio_wait(inode); |
5532 | 5514 | ||
5515 | /* | ||
5516 | * Prevent page faults from reinstantiating pages we have released from | ||
5517 | * page cache. | ||
5518 | */ | ||
5519 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
5520 | /* | ||
5521 | * Need to round down offset to be aligned with page size boundary | ||
5522 | * for page size > block size. | ||
5523 | */ | ||
5524 | ioffset = round_down(offset, PAGE_SIZE); | ||
5525 | /* | ||
5526 | * Write tail of the last page before removed range since it will get | ||
5527 | * removed from the page cache below. | ||
5528 | */ | ||
5529 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset); | ||
5530 | if (ret) | ||
5531 | goto out_mmap; | ||
5532 | /* | ||
5533 | * Write data that will be shifted to preserve them when discarding | ||
5534 | * page cache below. We are also protected from pages becoming dirty | ||
5535 | * by i_mmap_sem. | ||
5536 | */ | ||
5537 | ret = filemap_write_and_wait_range(inode->i_mapping, offset + len, | ||
5538 | LLONG_MAX); | ||
5539 | if (ret) | ||
5540 | goto out_mmap; | ||
5541 | truncate_pagecache(inode, ioffset); | ||
5542 | |||
5533 | credits = ext4_writepage_trans_blocks(inode); | 5543 | credits = ext4_writepage_trans_blocks(inode); |
5534 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | 5544 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); |
5535 | if (IS_ERR(handle)) { | 5545 | if (IS_ERR(handle)) { |
5536 | ret = PTR_ERR(handle); | 5546 | ret = PTR_ERR(handle); |
5537 | goto out_dio; | 5547 | goto out_mmap; |
5538 | } | 5548 | } |
5539 | 5549 | ||
5540 | down_write(&EXT4_I(inode)->i_data_sem); | 5550 | down_write(&EXT4_I(inode)->i_data_sem); |
@@ -5573,7 +5583,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |||
5573 | 5583 | ||
5574 | out_stop: | 5584 | out_stop: |
5575 | ext4_journal_stop(handle); | 5585 | ext4_journal_stop(handle); |
5576 | out_dio: | 5586 | out_mmap: |
5587 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
5577 | ext4_inode_resume_unlocked_dio(inode); | 5588 | ext4_inode_resume_unlocked_dio(inode); |
5578 | out_mutex: | 5589 | out_mutex: |
5579 | mutex_unlock(&inode->i_mutex); | 5590 | mutex_unlock(&inode->i_mutex); |
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) | |||
5627 | return ret; | 5638 | return ret; |
5628 | } | 5639 | } |
5629 | 5640 | ||
5630 | /* | ||
5631 | * Need to round down to align start offset to page size boundary | ||
5632 | * for page size > block size. | ||
5633 | */ | ||
5634 | ioffset = round_down(offset, PAGE_SIZE); | ||
5635 | |||
5636 | /* Write out all dirty pages */ | ||
5637 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, | ||
5638 | LLONG_MAX); | ||
5639 | if (ret) | ||
5640 | return ret; | ||
5641 | |||
5642 | /* Take mutex lock */ | ||
5643 | mutex_lock(&inode->i_mutex); | 5641 | mutex_lock(&inode->i_mutex); |
5644 | |||
5645 | /* Currently just for extent based files */ | 5642 | /* Currently just for extent based files */ |
5646 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 5643 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
5647 | ret = -EOPNOTSUPP; | 5644 | ret = -EOPNOTSUPP; |
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) | |||
5660 | goto out_mutex; | 5657 | goto out_mutex; |
5661 | } | 5658 | } |
5662 | 5659 | ||
5663 | truncate_pagecache(inode, ioffset); | ||
5664 | |||
5665 | /* Wait for existing dio to complete */ | 5660 | /* Wait for existing dio to complete */ |
5666 | ext4_inode_block_unlocked_dio(inode); | 5661 | ext4_inode_block_unlocked_dio(inode); |
5667 | inode_dio_wait(inode); | 5662 | inode_dio_wait(inode); |
5668 | 5663 | ||
5664 | /* | ||
5665 | * Prevent page faults from reinstantiating pages we have released from | ||
5666 | * page cache. | ||
5667 | */ | ||
5668 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
5669 | /* | ||
5670 | * Need to round down to align start offset to page size boundary | ||
5671 | * for page size > block size. | ||
5672 | */ | ||
5673 | ioffset = round_down(offset, PAGE_SIZE); | ||
5674 | /* Write out all dirty pages */ | ||
5675 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, | ||
5676 | LLONG_MAX); | ||
5677 | if (ret) | ||
5678 | goto out_mmap; | ||
5679 | truncate_pagecache(inode, ioffset); | ||
5680 | |||
5669 | credits = ext4_writepage_trans_blocks(inode); | 5681 | credits = ext4_writepage_trans_blocks(inode); |
5670 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | 5682 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); |
5671 | if (IS_ERR(handle)) { | 5683 | if (IS_ERR(handle)) { |
5672 | ret = PTR_ERR(handle); | 5684 | ret = PTR_ERR(handle); |
5673 | goto out_dio; | 5685 | goto out_mmap; |
5674 | } | 5686 | } |
5675 | 5687 | ||
5676 | /* Expand file to avoid data loss if there is error while shifting */ | 5688 | /* Expand file to avoid data loss if there is error while shifting */ |
@@ -5741,7 +5753,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) | |||
5741 | 5753 | ||
5742 | out_stop: | 5754 | out_stop: |
5743 | ext4_journal_stop(handle); | 5755 | ext4_journal_stop(handle); |
5744 | out_dio: | 5756 | out_mmap: |
5757 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
5745 | ext4_inode_resume_unlocked_dio(inode); | 5758 | ext4_inode_resume_unlocked_dio(inode); |
5746 | out_mutex: | 5759 | out_mutex: |
5747 | mutex_unlock(&inode->i_mutex); | 5760 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 113837e7ba98..749b222e6498 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -193,43 +193,35 @@ out: | |||
193 | } | 193 | } |
194 | 194 | ||
195 | #ifdef CONFIG_FS_DAX | 195 | #ifdef CONFIG_FS_DAX |
196 | static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) | ||
197 | { | ||
198 | struct inode *inode = bh->b_assoc_map->host; | ||
199 | /* XXX: breaks on 32-bit > 16TB. Is that even supported? */ | ||
200 | loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; | ||
201 | int err; | ||
202 | if (!uptodate) | ||
203 | return; | ||
204 | WARN_ON(!buffer_unwritten(bh)); | ||
205 | err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); | ||
206 | } | ||
207 | |||
208 | static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 196 | static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
209 | { | 197 | { |
210 | int result; | 198 | int result; |
211 | handle_t *handle = NULL; | 199 | handle_t *handle = NULL; |
212 | struct super_block *sb = file_inode(vma->vm_file)->i_sb; | 200 | struct inode *inode = file_inode(vma->vm_file); |
201 | struct super_block *sb = inode->i_sb; | ||
213 | bool write = vmf->flags & FAULT_FLAG_WRITE; | 202 | bool write = vmf->flags & FAULT_FLAG_WRITE; |
214 | 203 | ||
215 | if (write) { | 204 | if (write) { |
216 | sb_start_pagefault(sb); | 205 | sb_start_pagefault(sb); |
217 | file_update_time(vma->vm_file); | 206 | file_update_time(vma->vm_file); |
207 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
218 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, | 208 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, |
219 | EXT4_DATA_TRANS_BLOCKS(sb)); | 209 | EXT4_DATA_TRANS_BLOCKS(sb)); |
220 | } | 210 | } else |
211 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
221 | 212 | ||
222 | if (IS_ERR(handle)) | 213 | if (IS_ERR(handle)) |
223 | result = VM_FAULT_SIGBUS; | 214 | result = VM_FAULT_SIGBUS; |
224 | else | 215 | else |
225 | result = __dax_fault(vma, vmf, ext4_get_block_dax, | 216 | result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL); |
226 | ext4_end_io_unwritten); | ||
227 | 217 | ||
228 | if (write) { | 218 | if (write) { |
229 | if (!IS_ERR(handle)) | 219 | if (!IS_ERR(handle)) |
230 | ext4_journal_stop(handle); | 220 | ext4_journal_stop(handle); |
221 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
231 | sb_end_pagefault(sb); | 222 | sb_end_pagefault(sb); |
232 | } | 223 | } else |
224 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
233 | 225 | ||
234 | return result; | 226 | return result; |
235 | } | 227 | } |
@@ -246,44 +238,86 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
246 | if (write) { | 238 | if (write) { |
247 | sb_start_pagefault(sb); | 239 | sb_start_pagefault(sb); |
248 | file_update_time(vma->vm_file); | 240 | file_update_time(vma->vm_file); |
241 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
249 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, | 242 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, |
250 | ext4_chunk_trans_blocks(inode, | 243 | ext4_chunk_trans_blocks(inode, |
251 | PMD_SIZE / PAGE_SIZE)); | 244 | PMD_SIZE / PAGE_SIZE)); |
252 | } | 245 | } else |
246 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
253 | 247 | ||
254 | if (IS_ERR(handle)) | 248 | if (IS_ERR(handle)) |
255 | result = VM_FAULT_SIGBUS; | 249 | result = VM_FAULT_SIGBUS; |
256 | else | 250 | else |
257 | result = __dax_pmd_fault(vma, addr, pmd, flags, | 251 | result = __dax_pmd_fault(vma, addr, pmd, flags, |
258 | ext4_get_block_dax, ext4_end_io_unwritten); | 252 | ext4_dax_mmap_get_block, NULL); |
259 | 253 | ||
260 | if (write) { | 254 | if (write) { |
261 | if (!IS_ERR(handle)) | 255 | if (!IS_ERR(handle)) |
262 | ext4_journal_stop(handle); | 256 | ext4_journal_stop(handle); |
257 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
263 | sb_end_pagefault(sb); | 258 | sb_end_pagefault(sb); |
264 | } | 259 | } else |
260 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
265 | 261 | ||
266 | return result; | 262 | return result; |
267 | } | 263 | } |
268 | 264 | ||
269 | static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 265 | static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) |
270 | { | 266 | { |
271 | return dax_mkwrite(vma, vmf, ext4_get_block_dax, | 267 | int err; |
272 | ext4_end_io_unwritten); | 268 | struct inode *inode = file_inode(vma->vm_file); |
269 | |||
270 | sb_start_pagefault(inode->i_sb); | ||
271 | file_update_time(vma->vm_file); | ||
272 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
273 | err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL); | ||
274 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
275 | sb_end_pagefault(inode->i_sb); | ||
276 | |||
277 | return err; | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite() | ||
282 | * handler we check for races agaist truncate. Note that since we cycle through | ||
283 | * i_mmap_sem, we are sure that also any hole punching that began before we | ||
284 | * were called is finished by now and so if it included part of the file we | ||
285 | * are working on, our pte will get unmapped and the check for pte_same() in | ||
286 | * wp_pfn_shared() fails. Thus fault gets retried and things work out as | ||
287 | * desired. | ||
288 | */ | ||
289 | static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, | ||
290 | struct vm_fault *vmf) | ||
291 | { | ||
292 | struct inode *inode = file_inode(vma->vm_file); | ||
293 | struct super_block *sb = inode->i_sb; | ||
294 | int ret = VM_FAULT_NOPAGE; | ||
295 | loff_t size; | ||
296 | |||
297 | sb_start_pagefault(sb); | ||
298 | file_update_time(vma->vm_file); | ||
299 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
300 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
301 | if (vmf->pgoff >= size) | ||
302 | ret = VM_FAULT_SIGBUS; | ||
303 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
304 | sb_end_pagefault(sb); | ||
305 | |||
306 | return ret; | ||
273 | } | 307 | } |
274 | 308 | ||
275 | static const struct vm_operations_struct ext4_dax_vm_ops = { | 309 | static const struct vm_operations_struct ext4_dax_vm_ops = { |
276 | .fault = ext4_dax_fault, | 310 | .fault = ext4_dax_fault, |
277 | .pmd_fault = ext4_dax_pmd_fault, | 311 | .pmd_fault = ext4_dax_pmd_fault, |
278 | .page_mkwrite = ext4_dax_mkwrite, | 312 | .page_mkwrite = ext4_dax_mkwrite, |
279 | .pfn_mkwrite = dax_pfn_mkwrite, | 313 | .pfn_mkwrite = ext4_dax_pfn_mkwrite, |
280 | }; | 314 | }; |
281 | #else | 315 | #else |
282 | #define ext4_dax_vm_ops ext4_file_vm_ops | 316 | #define ext4_dax_vm_ops ext4_file_vm_ops |
283 | #endif | 317 | #endif |
284 | 318 | ||
285 | static const struct vm_operations_struct ext4_file_vm_ops = { | 319 | static const struct vm_operations_struct ext4_file_vm_ops = { |
286 | .fault = filemap_fault, | 320 | .fault = ext4_filemap_fault, |
287 | .map_pages = filemap_map_pages, | 321 | .map_pages = filemap_map_pages, |
288 | .page_mkwrite = ext4_page_mkwrite, | 322 | .page_mkwrite = ext4_page_mkwrite, |
289 | }; | 323 | }; |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 1b8024d26f65..3fcfd50a2e8a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -799,6 +799,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, | |||
799 | inode->i_gid = dir->i_gid; | 799 | inode->i_gid = dir->i_gid; |
800 | } else | 800 | } else |
801 | inode_init_owner(inode, dir, mode); | 801 | inode_init_owner(inode, dir, mode); |
802 | |||
803 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) && | ||
804 | ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) | ||
805 | ei->i_projid = EXT4_I(dir)->i_projid; | ||
806 | else | ||
807 | ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID); | ||
808 | |||
802 | err = dquot_initialize(inode); | 809 | err = dquot_initialize(inode); |
803 | if (err) | 810 | if (err) |
804 | goto out; | 811 | goto out; |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d884989cc83d..dfe3b9bafc0d 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, | |||
995 | */ | 995 | */ |
996 | static int ext4_add_dirent_to_inline(handle_t *handle, | 996 | static int ext4_add_dirent_to_inline(handle_t *handle, |
997 | struct ext4_filename *fname, | 997 | struct ext4_filename *fname, |
998 | struct dentry *dentry, | 998 | struct inode *dir, |
999 | struct inode *inode, | 999 | struct inode *inode, |
1000 | struct ext4_iloc *iloc, | 1000 | struct ext4_iloc *iloc, |
1001 | void *inline_start, int inline_size) | 1001 | void *inline_start, int inline_size) |
1002 | { | 1002 | { |
1003 | struct inode *dir = d_inode(dentry->d_parent); | ||
1004 | int err; | 1003 | int err; |
1005 | struct ext4_dir_entry_2 *de; | 1004 | struct ext4_dir_entry_2 *de; |
1006 | 1005 | ||
@@ -1245,12 +1244,11 @@ out: | |||
1245 | * the new created block. | 1244 | * the new created block. |
1246 | */ | 1245 | */ |
1247 | int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, | 1246 | int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, |
1248 | struct dentry *dentry, struct inode *inode) | 1247 | struct inode *dir, struct inode *inode) |
1249 | { | 1248 | { |
1250 | int ret, inline_size; | 1249 | int ret, inline_size; |
1251 | void *inline_start; | 1250 | void *inline_start; |
1252 | struct ext4_iloc iloc; | 1251 | struct ext4_iloc iloc; |
1253 | struct inode *dir = d_inode(dentry->d_parent); | ||
1254 | 1252 | ||
1255 | ret = ext4_get_inode_loc(dir, &iloc); | 1253 | ret = ext4_get_inode_loc(dir, &iloc); |
1256 | if (ret) | 1254 | if (ret) |
@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, | |||
1264 | EXT4_INLINE_DOTDOT_SIZE; | 1262 | EXT4_INLINE_DOTDOT_SIZE; |
1265 | inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; | 1263 | inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; |
1266 | 1264 | ||
1267 | ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc, | 1265 | ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, |
1268 | inline_start, inline_size); | 1266 | inline_start, inline_size); |
1269 | if (ret != -ENOSPC) | 1267 | if (ret != -ENOSPC) |
1270 | goto out; | 1268 | goto out; |
@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, | |||
1285 | if (inline_size) { | 1283 | if (inline_size) { |
1286 | inline_start = ext4_get_inline_xattr_pos(dir, &iloc); | 1284 | inline_start = ext4_get_inline_xattr_pos(dir, &iloc); |
1287 | 1285 | ||
1288 | ret = ext4_add_dirent_to_inline(handle, fname, dentry, | 1286 | ret = ext4_add_dirent_to_inline(handle, fname, dir, |
1289 | inode, &iloc, inline_start, | 1287 | inode, &iloc, inline_start, |
1290 | inline_size); | 1288 | inline_size); |
1291 | 1289 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b3bd912df6bf..d964195ea0e2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -383,6 +383,21 @@ static int __check_block_validity(struct inode *inode, const char *func, | |||
383 | return 0; | 383 | return 0; |
384 | } | 384 | } |
385 | 385 | ||
386 | int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, | ||
387 | ext4_lblk_t len) | ||
388 | { | ||
389 | int ret; | ||
390 | |||
391 | if (ext4_encrypted_inode(inode)) | ||
392 | return ext4_encrypted_zeroout(inode, lblk, pblk, len); | ||
393 | |||
394 | ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS); | ||
395 | if (ret > 0) | ||
396 | ret = 0; | ||
397 | |||
398 | return ret; | ||
399 | } | ||
400 | |||
386 | #define check_block_validity(inode, map) \ | 401 | #define check_block_validity(inode, map) \ |
387 | __check_block_validity((inode), __func__, __LINE__, (map)) | 402 | __check_block_validity((inode), __func__, __LINE__, (map)) |
388 | 403 | ||
@@ -403,8 +418,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, | |||
403 | * out taking i_data_sem. So at the time the unwritten extent | 418 | * out taking i_data_sem. So at the time the unwritten extent |
404 | * could be converted. | 419 | * could be converted. |
405 | */ | 420 | */ |
406 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 421 | down_read(&EXT4_I(inode)->i_data_sem); |
407 | down_read(&EXT4_I(inode)->i_data_sem); | ||
408 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 422 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
409 | retval = ext4_ext_map_blocks(handle, inode, map, flags & | 423 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
410 | EXT4_GET_BLOCKS_KEEP_SIZE); | 424 | EXT4_GET_BLOCKS_KEEP_SIZE); |
@@ -412,8 +426,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, | |||
412 | retval = ext4_ind_map_blocks(handle, inode, map, flags & | 426 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
413 | EXT4_GET_BLOCKS_KEEP_SIZE); | 427 | EXT4_GET_BLOCKS_KEEP_SIZE); |
414 | } | 428 | } |
415 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 429 | up_read((&EXT4_I(inode)->i_data_sem)); |
416 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
417 | 430 | ||
418 | /* | 431 | /* |
419 | * We don't check m_len because extent will be collpased in status | 432 | * We don't check m_len because extent will be collpased in status |
@@ -509,8 +522,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
509 | * Try to see if we can get the block without requesting a new | 522 | * Try to see if we can get the block without requesting a new |
510 | * file system block. | 523 | * file system block. |
511 | */ | 524 | */ |
512 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 525 | down_read(&EXT4_I(inode)->i_data_sem); |
513 | down_read(&EXT4_I(inode)->i_data_sem); | ||
514 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 526 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
515 | retval = ext4_ext_map_blocks(handle, inode, map, flags & | 527 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
516 | EXT4_GET_BLOCKS_KEEP_SIZE); | 528 | EXT4_GET_BLOCKS_KEEP_SIZE); |
@@ -541,8 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
541 | if (ret < 0) | 553 | if (ret < 0) |
542 | retval = ret; | 554 | retval = ret; |
543 | } | 555 | } |
544 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 556 | up_read((&EXT4_I(inode)->i_data_sem)); |
545 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
546 | 557 | ||
547 | found: | 558 | found: |
548 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 559 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
@@ -626,13 +637,29 @@ found: | |||
626 | } | 637 | } |
627 | 638 | ||
628 | /* | 639 | /* |
640 | * We have to zeroout blocks before inserting them into extent | ||
641 | * status tree. Otherwise someone could look them up there and | ||
642 | * use them before they are really zeroed. | ||
643 | */ | ||
644 | if (flags & EXT4_GET_BLOCKS_ZERO && | ||
645 | map->m_flags & EXT4_MAP_MAPPED && | ||
646 | map->m_flags & EXT4_MAP_NEW) { | ||
647 | ret = ext4_issue_zeroout(inode, map->m_lblk, | ||
648 | map->m_pblk, map->m_len); | ||
649 | if (ret) { | ||
650 | retval = ret; | ||
651 | goto out_sem; | ||
652 | } | ||
653 | } | ||
654 | |||
655 | /* | ||
629 | * If the extent has been zeroed out, we don't need to update | 656 | * If the extent has been zeroed out, we don't need to update |
630 | * extent status tree. | 657 | * extent status tree. |
631 | */ | 658 | */ |
632 | if ((flags & EXT4_GET_BLOCKS_PRE_IO) && | 659 | if ((flags & EXT4_GET_BLOCKS_PRE_IO) && |
633 | ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 660 | ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
634 | if (ext4_es_is_written(&es)) | 661 | if (ext4_es_is_written(&es)) |
635 | goto has_zeroout; | 662 | goto out_sem; |
636 | } | 663 | } |
637 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 664 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
638 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 665 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
@@ -643,11 +670,13 @@ found: | |||
643 | status |= EXTENT_STATUS_DELAYED; | 670 | status |= EXTENT_STATUS_DELAYED; |
644 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | 671 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
645 | map->m_pblk, status); | 672 | map->m_pblk, status); |
646 | if (ret < 0) | 673 | if (ret < 0) { |
647 | retval = ret; | 674 | retval = ret; |
675 | goto out_sem; | ||
676 | } | ||
648 | } | 677 | } |
649 | 678 | ||
650 | has_zeroout: | 679 | out_sem: |
651 | up_write((&EXT4_I(inode)->i_data_sem)); | 680 | up_write((&EXT4_I(inode)->i_data_sem)); |
652 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 681 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
653 | ret = check_block_validity(inode, map); | 682 | ret = check_block_validity(inode, map); |
@@ -674,7 +703,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
674 | map.m_lblk = iblock; | 703 | map.m_lblk = iblock; |
675 | map.m_len = bh->b_size >> inode->i_blkbits; | 704 | map.m_len = bh->b_size >> inode->i_blkbits; |
676 | 705 | ||
677 | if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) { | 706 | if (flags && !handle) { |
678 | /* Direct IO write... */ | 707 | /* Direct IO write... */ |
679 | if (map.m_len > DIO_MAX_BLOCKS) | 708 | if (map.m_len > DIO_MAX_BLOCKS) |
680 | map.m_len = DIO_MAX_BLOCKS; | 709 | map.m_len = DIO_MAX_BLOCKS; |
@@ -694,16 +723,6 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
694 | 723 | ||
695 | map_bh(bh, inode->i_sb, map.m_pblk); | 724 | map_bh(bh, inode->i_sb, map.m_pblk); |
696 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 725 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
697 | if (IS_DAX(inode) && buffer_unwritten(bh)) { | ||
698 | /* | ||
699 | * dgc: I suspect unwritten conversion on ext4+DAX is | ||
700 | * fundamentally broken here when there are concurrent | ||
701 | * read/write in progress on this inode. | ||
702 | */ | ||
703 | WARN_ON_ONCE(io_end); | ||
704 | bh->b_assoc_map = inode->i_mapping; | ||
705 | bh->b_private = (void *)(unsigned long)iblock; | ||
706 | } | ||
707 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | 726 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) |
708 | set_buffer_defer_completion(bh); | 727 | set_buffer_defer_completion(bh); |
709 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 728 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
@@ -879,9 +898,6 @@ int do_journal_get_write_access(handle_t *handle, | |||
879 | return ret; | 898 | return ret; |
880 | } | 899 | } |
881 | 900 | ||
882 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | ||
883 | struct buffer_head *bh_result, int create); | ||
884 | |||
885 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | 901 | #ifdef CONFIG_EXT4_FS_ENCRYPTION |
886 | static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, | 902 | static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, |
887 | get_block_t *get_block) | 903 | get_block_t *get_block) |
@@ -3054,25 +3070,96 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
3054 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3070 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3055 | } | 3071 | } |
3056 | 3072 | ||
3057 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | 3073 | static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock, |
3058 | struct buffer_head *bh_result, int create) | 3074 | struct buffer_head *bh_result, int create) |
3059 | { | 3075 | { |
3060 | ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n", | 3076 | int ret; |
3077 | |||
3078 | ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n", | ||
3061 | inode->i_ino, create); | 3079 | inode->i_ino, create); |
3062 | return _ext4_get_block(inode, iblock, bh_result, | 3080 | ret = _ext4_get_block(inode, iblock, bh_result, 0); |
3063 | EXT4_GET_BLOCKS_NO_LOCK); | 3081 | /* |
3082 | * Blocks should have been preallocated! ext4_file_write_iter() checks | ||
3083 | * that. | ||
3084 | */ | ||
3085 | WARN_ON_ONCE(!buffer_mapped(bh_result)); | ||
3086 | |||
3087 | return ret; | ||
3064 | } | 3088 | } |
3065 | 3089 | ||
3066 | int ext4_get_block_dax(struct inode *inode, sector_t iblock, | 3090 | #ifdef CONFIG_FS_DAX |
3067 | struct buffer_head *bh_result, int create) | 3091 | int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, |
3092 | struct buffer_head *bh_result, int create) | ||
3068 | { | 3093 | { |
3069 | int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT; | 3094 | int ret, err; |
3070 | if (create) | 3095 | int credits; |
3071 | flags |= EXT4_GET_BLOCKS_CREATE; | 3096 | struct ext4_map_blocks map; |
3072 | ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n", | 3097 | handle_t *handle = NULL; |
3098 | int flags = 0; | ||
3099 | |||
3100 | ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n", | ||
3073 | inode->i_ino, create); | 3101 | inode->i_ino, create); |
3074 | return _ext4_get_block(inode, iblock, bh_result, flags); | 3102 | map.m_lblk = iblock; |
3103 | map.m_len = bh_result->b_size >> inode->i_blkbits; | ||
3104 | credits = ext4_chunk_trans_blocks(inode, map.m_len); | ||
3105 | if (create) { | ||
3106 | flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO; | ||
3107 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | ||
3108 | if (IS_ERR(handle)) { | ||
3109 | ret = PTR_ERR(handle); | ||
3110 | return ret; | ||
3111 | } | ||
3112 | } | ||
3113 | |||
3114 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
3115 | if (create) { | ||
3116 | err = ext4_journal_stop(handle); | ||
3117 | if (ret >= 0 && err < 0) | ||
3118 | ret = err; | ||
3119 | } | ||
3120 | if (ret <= 0) | ||
3121 | goto out; | ||
3122 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | ||
3123 | int err2; | ||
3124 | |||
3125 | /* | ||
3126 | * We are protected by i_mmap_sem so we know block cannot go | ||
3127 | * away from under us even though we dropped i_data_sem. | ||
3128 | * Convert extent to written and write zeros there. | ||
3129 | * | ||
3130 | * Note: We may get here even when create == 0. | ||
3131 | */ | ||
3132 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | ||
3133 | if (IS_ERR(handle)) { | ||
3134 | ret = PTR_ERR(handle); | ||
3135 | goto out; | ||
3136 | } | ||
3137 | |||
3138 | err = ext4_map_blocks(handle, inode, &map, | ||
3139 | EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO); | ||
3140 | if (err < 0) | ||
3141 | ret = err; | ||
3142 | err2 = ext4_journal_stop(handle); | ||
3143 | if (err2 < 0 && ret > 0) | ||
3144 | ret = err2; | ||
3145 | } | ||
3146 | out: | ||
3147 | WARN_ON_ONCE(ret == 0 && create); | ||
3148 | if (ret > 0) { | ||
3149 | map_bh(bh_result, inode->i_sb, map.m_pblk); | ||
3150 | bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) | | ||
3151 | map.m_flags; | ||
3152 | /* | ||
3153 | * At least for now we have to clear BH_New so that DAX code | ||
3154 | * doesn't attempt to zero blocks again in a racy way. | ||
3155 | */ | ||
3156 | bh_result->b_state &= ~(1 << BH_New); | ||
3157 | bh_result->b_size = map.m_len << inode->i_blkbits; | ||
3158 | ret = 0; | ||
3159 | } | ||
3160 | return ret; | ||
3075 | } | 3161 | } |
3162 | #endif | ||
3076 | 3163 | ||
3077 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3164 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3078 | ssize_t size, void *private) | 3165 | ssize_t size, void *private) |
@@ -3143,10 +3230,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3143 | /* If we do a overwrite dio, i_mutex locking can be released */ | 3230 | /* If we do a overwrite dio, i_mutex locking can be released */ |
3144 | overwrite = *((int *)iocb->private); | 3231 | overwrite = *((int *)iocb->private); |
3145 | 3232 | ||
3146 | if (overwrite) { | 3233 | if (overwrite) |
3147 | down_read(&EXT4_I(inode)->i_data_sem); | ||
3148 | mutex_unlock(&inode->i_mutex); | 3234 | mutex_unlock(&inode->i_mutex); |
3149 | } | ||
3150 | 3235 | ||
3151 | /* | 3236 | /* |
3152 | * We could direct write to holes and fallocate. | 3237 | * We could direct write to holes and fallocate. |
@@ -3189,7 +3274,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3189 | } | 3274 | } |
3190 | 3275 | ||
3191 | if (overwrite) { | 3276 | if (overwrite) { |
3192 | get_block_func = ext4_get_block_write_nolock; | 3277 | get_block_func = ext4_get_block_overwrite; |
3193 | } else { | 3278 | } else { |
3194 | get_block_func = ext4_get_block_write; | 3279 | get_block_func = ext4_get_block_write; |
3195 | dio_flags = DIO_LOCKING; | 3280 | dio_flags = DIO_LOCKING; |
@@ -3245,10 +3330,8 @@ retake_lock: | |||
3245 | if (iov_iter_rw(iter) == WRITE) | 3330 | if (iov_iter_rw(iter) == WRITE) |
3246 | inode_dio_end(inode); | 3331 | inode_dio_end(inode); |
3247 | /* take i_mutex locking again if we do a ovewrite dio */ | 3332 | /* take i_mutex locking again if we do a ovewrite dio */ |
3248 | if (overwrite) { | 3333 | if (overwrite) |
3249 | up_read(&EXT4_I(inode)->i_data_sem); | ||
3250 | mutex_lock(&inode->i_mutex); | 3334 | mutex_lock(&inode->i_mutex); |
3251 | } | ||
3252 | 3335 | ||
3253 | return ret; | 3336 | return ret; |
3254 | } | 3337 | } |
@@ -3559,6 +3642,35 @@ int ext4_can_truncate(struct inode *inode) | |||
3559 | } | 3642 | } |
3560 | 3643 | ||
3561 | /* | 3644 | /* |
3645 | * We have to make sure i_disksize gets properly updated before we truncate | ||
3646 | * page cache due to hole punching or zero range. Otherwise i_disksize update | ||
3647 | * can get lost as it may have been postponed to submission of writeback but | ||
3648 | * that will never happen after we truncate page cache. | ||
3649 | */ | ||
3650 | int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, | ||
3651 | loff_t len) | ||
3652 | { | ||
3653 | handle_t *handle; | ||
3654 | loff_t size = i_size_read(inode); | ||
3655 | |||
3656 | WARN_ON(!mutex_is_locked(&inode->i_mutex)); | ||
3657 | if (offset > size || offset + len < size) | ||
3658 | return 0; | ||
3659 | |||
3660 | if (EXT4_I(inode)->i_disksize >= size) | ||
3661 | return 0; | ||
3662 | |||
3663 | handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); | ||
3664 | if (IS_ERR(handle)) | ||
3665 | return PTR_ERR(handle); | ||
3666 | ext4_update_i_disksize(inode, size); | ||
3667 | ext4_mark_inode_dirty(handle, inode); | ||
3668 | ext4_journal_stop(handle); | ||
3669 | |||
3670 | return 0; | ||
3671 | } | ||
3672 | |||
3673 | /* | ||
3562 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks | 3674 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks |
3563 | * associated with the given offset and length | 3675 | * associated with the given offset and length |
3564 | * | 3676 | * |
@@ -3623,17 +3735,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
3623 | 3735 | ||
3624 | } | 3736 | } |
3625 | 3737 | ||
3738 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
3739 | ext4_inode_block_unlocked_dio(inode); | ||
3740 | inode_dio_wait(inode); | ||
3741 | |||
3742 | /* | ||
3743 | * Prevent page faults from reinstantiating pages we have released from | ||
3744 | * page cache. | ||
3745 | */ | ||
3746 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
3626 | first_block_offset = round_up(offset, sb->s_blocksize); | 3747 | first_block_offset = round_up(offset, sb->s_blocksize); |
3627 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; | 3748 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; |
3628 | 3749 | ||
3629 | /* Now release the pages and zero block aligned part of pages*/ | 3750 | /* Now release the pages and zero block aligned part of pages*/ |
3630 | if (last_block_offset > first_block_offset) | 3751 | if (last_block_offset > first_block_offset) { |
3752 | ret = ext4_update_disksize_before_punch(inode, offset, length); | ||
3753 | if (ret) | ||
3754 | goto out_dio; | ||
3631 | truncate_pagecache_range(inode, first_block_offset, | 3755 | truncate_pagecache_range(inode, first_block_offset, |
3632 | last_block_offset); | 3756 | last_block_offset); |
3633 | 3757 | } | |
3634 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
3635 | ext4_inode_block_unlocked_dio(inode); | ||
3636 | inode_dio_wait(inode); | ||
3637 | 3758 | ||
3638 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3759 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3639 | credits = ext4_writepage_trans_blocks(inode); | 3760 | credits = ext4_writepage_trans_blocks(inode); |
@@ -3680,16 +3801,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
3680 | if (IS_SYNC(inode)) | 3801 | if (IS_SYNC(inode)) |
3681 | ext4_handle_sync(handle); | 3802 | ext4_handle_sync(handle); |
3682 | 3803 | ||
3683 | /* Now release the pages again to reduce race window */ | ||
3684 | if (last_block_offset > first_block_offset) | ||
3685 | truncate_pagecache_range(inode, first_block_offset, | ||
3686 | last_block_offset); | ||
3687 | |||
3688 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3804 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3689 | ext4_mark_inode_dirty(handle, inode); | 3805 | ext4_mark_inode_dirty(handle, inode); |
3690 | out_stop: | 3806 | out_stop: |
3691 | ext4_journal_stop(handle); | 3807 | ext4_journal_stop(handle); |
3692 | out_dio: | 3808 | out_dio: |
3809 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
3693 | ext4_inode_resume_unlocked_dio(inode); | 3810 | ext4_inode_resume_unlocked_dio(inode); |
3694 | out_mutex: | 3811 | out_mutex: |
3695 | mutex_unlock(&inode->i_mutex); | 3812 | mutex_unlock(&inode->i_mutex); |
@@ -4076,6 +4193,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode, | |||
4076 | EXT4_I(inode)->i_inline_off = 0; | 4193 | EXT4_I(inode)->i_inline_off = 0; |
4077 | } | 4194 | } |
4078 | 4195 | ||
4196 | int ext4_get_projid(struct inode *inode, kprojid_t *projid) | ||
4197 | { | ||
4198 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT)) | ||
4199 | return -EOPNOTSUPP; | ||
4200 | *projid = EXT4_I(inode)->i_projid; | ||
4201 | return 0; | ||
4202 | } | ||
4203 | |||
4079 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | 4204 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) |
4080 | { | 4205 | { |
4081 | struct ext4_iloc iloc; | 4206 | struct ext4_iloc iloc; |
@@ -4087,6 +4212,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4087 | int block; | 4212 | int block; |
4088 | uid_t i_uid; | 4213 | uid_t i_uid; |
4089 | gid_t i_gid; | 4214 | gid_t i_gid; |
4215 | projid_t i_projid; | ||
4090 | 4216 | ||
4091 | inode = iget_locked(sb, ino); | 4217 | inode = iget_locked(sb, ino); |
4092 | if (!inode) | 4218 | if (!inode) |
@@ -4136,12 +4262,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4136 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 4262 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
4137 | i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 4263 | i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
4138 | i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 4264 | i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
4265 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) && | ||
4266 | EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE && | ||
4267 | EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) | ||
4268 | i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid); | ||
4269 | else | ||
4270 | i_projid = EXT4_DEF_PROJID; | ||
4271 | |||
4139 | if (!(test_opt(inode->i_sb, NO_UID32))) { | 4272 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
4140 | i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 4273 | i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
4141 | i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 4274 | i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
4142 | } | 4275 | } |
4143 | i_uid_write(inode, i_uid); | 4276 | i_uid_write(inode, i_uid); |
4144 | i_gid_write(inode, i_gid); | 4277 | i_gid_write(inode, i_gid); |
4278 | ei->i_projid = make_kprojid(&init_user_ns, i_projid); | ||
4145 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); | 4279 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); |
4146 | 4280 | ||
4147 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ | 4281 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
@@ -4440,6 +4574,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4440 | int need_datasync = 0, set_large_file = 0; | 4574 | int need_datasync = 0, set_large_file = 0; |
4441 | uid_t i_uid; | 4575 | uid_t i_uid; |
4442 | gid_t i_gid; | 4576 | gid_t i_gid; |
4577 | projid_t i_projid; | ||
4443 | 4578 | ||
4444 | spin_lock(&ei->i_raw_lock); | 4579 | spin_lock(&ei->i_raw_lock); |
4445 | 4580 | ||
@@ -4452,6 +4587,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4452 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 4587 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
4453 | i_uid = i_uid_read(inode); | 4588 | i_uid = i_uid_read(inode); |
4454 | i_gid = i_gid_read(inode); | 4589 | i_gid = i_gid_read(inode); |
4590 | i_projid = from_kprojid(&init_user_ns, ei->i_projid); | ||
4455 | if (!(test_opt(inode->i_sb, NO_UID32))) { | 4591 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
4456 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); | 4592 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); |
4457 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); | 4593 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); |
@@ -4529,6 +4665,15 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4529 | cpu_to_le16(ei->i_extra_isize); | 4665 | cpu_to_le16(ei->i_extra_isize); |
4530 | } | 4666 | } |
4531 | } | 4667 | } |
4668 | |||
4669 | BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
4670 | EXT4_FEATURE_RO_COMPAT_PROJECT) && | ||
4671 | i_projid != EXT4_DEF_PROJID); | ||
4672 | |||
4673 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | ||
4674 | EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) | ||
4675 | raw_inode->i_projid = cpu_to_le32(i_projid); | ||
4676 | |||
4532 | ext4_inode_csum_set(inode, raw_inode, ei); | 4677 | ext4_inode_csum_set(inode, raw_inode, ei); |
4533 | spin_unlock(&ei->i_raw_lock); | 4678 | spin_unlock(&ei->i_raw_lock); |
4534 | if (inode->i_sb->s_flags & MS_LAZYTIME) | 4679 | if (inode->i_sb->s_flags & MS_LAZYTIME) |
@@ -4824,6 +4969,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4824 | } else | 4969 | } else |
4825 | ext4_wait_for_tail_page_commit(inode); | 4970 | ext4_wait_for_tail_page_commit(inode); |
4826 | } | 4971 | } |
4972 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
4827 | /* | 4973 | /* |
4828 | * Truncate pagecache after we've waited for commit | 4974 | * Truncate pagecache after we've waited for commit |
4829 | * in data=journal mode to make pages freeable. | 4975 | * in data=journal mode to make pages freeable. |
@@ -4831,6 +4977,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4831 | truncate_pagecache(inode, inode->i_size); | 4977 | truncate_pagecache(inode, inode->i_size); |
4832 | if (shrink) | 4978 | if (shrink) |
4833 | ext4_truncate(inode); | 4979 | ext4_truncate(inode); |
4980 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
4834 | } | 4981 | } |
4835 | 4982 | ||
4836 | if (!rc) { | 4983 | if (!rc) { |
@@ -5279,6 +5426,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5279 | 5426 | ||
5280 | sb_start_pagefault(inode->i_sb); | 5427 | sb_start_pagefault(inode->i_sb); |
5281 | file_update_time(vma->vm_file); | 5428 | file_update_time(vma->vm_file); |
5429 | |||
5430 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
5282 | /* Delalloc case is easy... */ | 5431 | /* Delalloc case is easy... */ |
5283 | if (test_opt(inode->i_sb, DELALLOC) && | 5432 | if (test_opt(inode->i_sb, DELALLOC) && |
5284 | !ext4_should_journal_data(inode) && | 5433 | !ext4_should_journal_data(inode) && |
@@ -5348,6 +5497,19 @@ retry_alloc: | |||
5348 | out_ret: | 5497 | out_ret: |
5349 | ret = block_page_mkwrite_return(ret); | 5498 | ret = block_page_mkwrite_return(ret); |
5350 | out: | 5499 | out: |
5500 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
5351 | sb_end_pagefault(inode->i_sb); | 5501 | sb_end_pagefault(inode->i_sb); |
5352 | return ret; | 5502 | return ret; |
5353 | } | 5503 | } |
5504 | |||
5505 | int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
5506 | { | ||
5507 | struct inode *inode = file_inode(vma->vm_file); | ||
5508 | int err; | ||
5509 | |||
5510 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
5511 | err = filemap_fault(vma, vmf); | ||
5512 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
5513 | |||
5514 | return err; | ||
5515 | } | ||
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5e872fd40e5e..2b0cb84255eb 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/mount.h> | 14 | #include <linux/mount.h> |
15 | #include <linux/file.h> | 15 | #include <linux/file.h> |
16 | #include <linux/random.h> | 16 | #include <linux/random.h> |
17 | #include <linux/quotaops.h> | ||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | #include "ext4_jbd2.h" | 19 | #include "ext4_jbd2.h" |
19 | #include "ext4.h" | 20 | #include "ext4.h" |
@@ -202,6 +203,238 @@ static int uuid_is_zero(__u8 u[16]) | |||
202 | return 1; | 203 | return 1; |
203 | } | 204 | } |
204 | 205 | ||
206 | static int ext4_ioctl_setflags(struct inode *inode, | ||
207 | unsigned int flags) | ||
208 | { | ||
209 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
210 | handle_t *handle = NULL; | ||
211 | int err = EPERM, migrate = 0; | ||
212 | struct ext4_iloc iloc; | ||
213 | unsigned int oldflags, mask, i; | ||
214 | unsigned int jflag; | ||
215 | |||
216 | /* Is it quota file? Do not allow user to mess with it */ | ||
217 | if (IS_NOQUOTA(inode)) | ||
218 | goto flags_out; | ||
219 | |||
220 | oldflags = ei->i_flags; | ||
221 | |||
222 | /* The JOURNAL_DATA flag is modifiable only by root */ | ||
223 | jflag = flags & EXT4_JOURNAL_DATA_FL; | ||
224 | |||
225 | /* | ||
226 | * The IMMUTABLE and APPEND_ONLY flags can only be changed by | ||
227 | * the relevant capability. | ||
228 | * | ||
229 | * This test looks nicer. Thanks to Pauline Middelink | ||
230 | */ | ||
231 | if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { | ||
232 | if (!capable(CAP_LINUX_IMMUTABLE)) | ||
233 | goto flags_out; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * The JOURNAL_DATA flag can only be changed by | ||
238 | * the relevant capability. | ||
239 | */ | ||
240 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { | ||
241 | if (!capable(CAP_SYS_RESOURCE)) | ||
242 | goto flags_out; | ||
243 | } | ||
244 | if ((flags ^ oldflags) & EXT4_EXTENTS_FL) | ||
245 | migrate = 1; | ||
246 | |||
247 | if (flags & EXT4_EOFBLOCKS_FL) { | ||
248 | /* we don't support adding EOFBLOCKS flag */ | ||
249 | if (!(oldflags & EXT4_EOFBLOCKS_FL)) { | ||
250 | err = -EOPNOTSUPP; | ||
251 | goto flags_out; | ||
252 | } | ||
253 | } else if (oldflags & EXT4_EOFBLOCKS_FL) | ||
254 | ext4_truncate(inode); | ||
255 | |||
256 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | ||
257 | if (IS_ERR(handle)) { | ||
258 | err = PTR_ERR(handle); | ||
259 | goto flags_out; | ||
260 | } | ||
261 | if (IS_SYNC(inode)) | ||
262 | ext4_handle_sync(handle); | ||
263 | err = ext4_reserve_inode_write(handle, inode, &iloc); | ||
264 | if (err) | ||
265 | goto flags_err; | ||
266 | |||
267 | for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { | ||
268 | if (!(mask & EXT4_FL_USER_MODIFIABLE)) | ||
269 | continue; | ||
270 | if (mask & flags) | ||
271 | ext4_set_inode_flag(inode, i); | ||
272 | else | ||
273 | ext4_clear_inode_flag(inode, i); | ||
274 | } | ||
275 | |||
276 | ext4_set_inode_flags(inode); | ||
277 | inode->i_ctime = ext4_current_time(inode); | ||
278 | |||
279 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | ||
280 | flags_err: | ||
281 | ext4_journal_stop(handle); | ||
282 | if (err) | ||
283 | goto flags_out; | ||
284 | |||
285 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) | ||
286 | err = ext4_change_inode_journal_flag(inode, jflag); | ||
287 | if (err) | ||
288 | goto flags_out; | ||
289 | if (migrate) { | ||
290 | if (flags & EXT4_EXTENTS_FL) | ||
291 | err = ext4_ext_migrate(inode); | ||
292 | else | ||
293 | err = ext4_ind_migrate(inode); | ||
294 | } | ||
295 | |||
296 | flags_out: | ||
297 | return err; | ||
298 | } | ||
299 | |||
300 | #ifdef CONFIG_QUOTA | ||
301 | static int ext4_ioctl_setproject(struct file *filp, __u32 projid) | ||
302 | { | ||
303 | struct inode *inode = file_inode(filp); | ||
304 | struct super_block *sb = inode->i_sb; | ||
305 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
306 | int err, rc; | ||
307 | handle_t *handle; | ||
308 | kprojid_t kprojid; | ||
309 | struct ext4_iloc iloc; | ||
310 | struct ext4_inode *raw_inode; | ||
311 | |||
312 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
313 | EXT4_FEATURE_RO_COMPAT_PROJECT)) { | ||
314 | if (projid != EXT4_DEF_PROJID) | ||
315 | return -EOPNOTSUPP; | ||
316 | else | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE) | ||
321 | return -EOPNOTSUPP; | ||
322 | |||
323 | kprojid = make_kprojid(&init_user_ns, (projid_t)projid); | ||
324 | |||
325 | if (projid_eq(kprojid, EXT4_I(inode)->i_projid)) | ||
326 | return 0; | ||
327 | |||
328 | err = mnt_want_write_file(filp); | ||
329 | if (err) | ||
330 | return err; | ||
331 | |||
332 | err = -EPERM; | ||
333 | mutex_lock(&inode->i_mutex); | ||
334 | /* Is it quota file? Do not allow user to mess with it */ | ||
335 | if (IS_NOQUOTA(inode)) | ||
336 | goto out_unlock; | ||
337 | |||
338 | err = ext4_get_inode_loc(inode, &iloc); | ||
339 | if (err) | ||
340 | goto out_unlock; | ||
341 | |||
342 | raw_inode = ext4_raw_inode(&iloc); | ||
343 | if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) { | ||
344 | err = -EOVERFLOW; | ||
345 | brelse(iloc.bh); | ||
346 | goto out_unlock; | ||
347 | } | ||
348 | brelse(iloc.bh); | ||
349 | |||
350 | dquot_initialize(inode); | ||
351 | |||
352 | handle = ext4_journal_start(inode, EXT4_HT_QUOTA, | ||
353 | EXT4_QUOTA_INIT_BLOCKS(sb) + | ||
354 | EXT4_QUOTA_DEL_BLOCKS(sb) + 3); | ||
355 | if (IS_ERR(handle)) { | ||
356 | err = PTR_ERR(handle); | ||
357 | goto out_unlock; | ||
358 | } | ||
359 | |||
360 | err = ext4_reserve_inode_write(handle, inode, &iloc); | ||
361 | if (err) | ||
362 | goto out_stop; | ||
363 | |||
364 | if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) { | ||
365 | struct dquot *transfer_to[MAXQUOTAS] = { }; | ||
366 | |||
367 | transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); | ||
368 | if (transfer_to[PRJQUOTA]) { | ||
369 | err = __dquot_transfer(inode, transfer_to); | ||
370 | dqput(transfer_to[PRJQUOTA]); | ||
371 | if (err) | ||
372 | goto out_dirty; | ||
373 | } | ||
374 | } | ||
375 | EXT4_I(inode)->i_projid = kprojid; | ||
376 | inode->i_ctime = ext4_current_time(inode); | ||
377 | out_dirty: | ||
378 | rc = ext4_mark_iloc_dirty(handle, inode, &iloc); | ||
379 | if (!err) | ||
380 | err = rc; | ||
381 | out_stop: | ||
382 | ext4_journal_stop(handle); | ||
383 | out_unlock: | ||
384 | mutex_unlock(&inode->i_mutex); | ||
385 | mnt_drop_write_file(filp); | ||
386 | return err; | ||
387 | } | ||
388 | #else | ||
389 | static int ext4_ioctl_setproject(struct file *filp, __u32 projid) | ||
390 | { | ||
391 | if (projid != EXT4_DEF_PROJID) | ||
392 | return -EOPNOTSUPP; | ||
393 | return 0; | ||
394 | } | ||
395 | #endif | ||
396 | |||
397 | /* Transfer internal flags to xflags */ | ||
398 | static inline __u32 ext4_iflags_to_xflags(unsigned long iflags) | ||
399 | { | ||
400 | __u32 xflags = 0; | ||
401 | |||
402 | if (iflags & EXT4_SYNC_FL) | ||
403 | xflags |= FS_XFLAG_SYNC; | ||
404 | if (iflags & EXT4_IMMUTABLE_FL) | ||
405 | xflags |= FS_XFLAG_IMMUTABLE; | ||
406 | if (iflags & EXT4_APPEND_FL) | ||
407 | xflags |= FS_XFLAG_APPEND; | ||
408 | if (iflags & EXT4_NODUMP_FL) | ||
409 | xflags |= FS_XFLAG_NODUMP; | ||
410 | if (iflags & EXT4_NOATIME_FL) | ||
411 | xflags |= FS_XFLAG_NOATIME; | ||
412 | if (iflags & EXT4_PROJINHERIT_FL) | ||
413 | xflags |= FS_XFLAG_PROJINHERIT; | ||
414 | return xflags; | ||
415 | } | ||
416 | |||
417 | /* Transfer xflags flags to internal */ | ||
418 | static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) | ||
419 | { | ||
420 | unsigned long iflags = 0; | ||
421 | |||
422 | if (xflags & FS_XFLAG_SYNC) | ||
423 | iflags |= EXT4_SYNC_FL; | ||
424 | if (xflags & FS_XFLAG_IMMUTABLE) | ||
425 | iflags |= EXT4_IMMUTABLE_FL; | ||
426 | if (xflags & FS_XFLAG_APPEND) | ||
427 | iflags |= EXT4_APPEND_FL; | ||
428 | if (xflags & FS_XFLAG_NODUMP) | ||
429 | iflags |= EXT4_NODUMP_FL; | ||
430 | if (xflags & FS_XFLAG_NOATIME) | ||
431 | iflags |= EXT4_NOATIME_FL; | ||
432 | if (xflags & FS_XFLAG_PROJINHERIT) | ||
433 | iflags |= EXT4_PROJINHERIT_FL; | ||
434 | |||
435 | return iflags; | ||
436 | } | ||
437 | |||
205 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 438 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
206 | { | 439 | { |
207 | struct inode *inode = file_inode(filp); | 440 | struct inode *inode = file_inode(filp); |
@@ -217,11 +450,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
217 | flags = ei->i_flags & EXT4_FL_USER_VISIBLE; | 450 | flags = ei->i_flags & EXT4_FL_USER_VISIBLE; |
218 | return put_user(flags, (int __user *) arg); | 451 | return put_user(flags, (int __user *) arg); |
219 | case EXT4_IOC_SETFLAGS: { | 452 | case EXT4_IOC_SETFLAGS: { |
220 | handle_t *handle = NULL; | 453 | int err; |
221 | int err, migrate = 0; | ||
222 | struct ext4_iloc iloc; | ||
223 | unsigned int oldflags, mask, i; | ||
224 | unsigned int jflag; | ||
225 | 454 | ||
226 | if (!inode_owner_or_capable(inode)) | 455 | if (!inode_owner_or_capable(inode)) |
227 | return -EACCES; | 456 | return -EACCES; |
@@ -235,89 +464,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
235 | 464 | ||
236 | flags = ext4_mask_flags(inode->i_mode, flags); | 465 | flags = ext4_mask_flags(inode->i_mode, flags); |
237 | 466 | ||
238 | err = -EPERM; | ||
239 | mutex_lock(&inode->i_mutex); | 467 | mutex_lock(&inode->i_mutex); |
240 | /* Is it quota file? Do not allow user to mess with it */ | 468 | err = ext4_ioctl_setflags(inode, flags); |
241 | if (IS_NOQUOTA(inode)) | ||
242 | goto flags_out; | ||
243 | |||
244 | oldflags = ei->i_flags; | ||
245 | |||
246 | /* The JOURNAL_DATA flag is modifiable only by root */ | ||
247 | jflag = flags & EXT4_JOURNAL_DATA_FL; | ||
248 | |||
249 | /* | ||
250 | * The IMMUTABLE and APPEND_ONLY flags can only be changed by | ||
251 | * the relevant capability. | ||
252 | * | ||
253 | * This test looks nicer. Thanks to Pauline Middelink | ||
254 | */ | ||
255 | if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { | ||
256 | if (!capable(CAP_LINUX_IMMUTABLE)) | ||
257 | goto flags_out; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * The JOURNAL_DATA flag can only be changed by | ||
262 | * the relevant capability. | ||
263 | */ | ||
264 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { | ||
265 | if (!capable(CAP_SYS_RESOURCE)) | ||
266 | goto flags_out; | ||
267 | } | ||
268 | if ((flags ^ oldflags) & EXT4_EXTENTS_FL) | ||
269 | migrate = 1; | ||
270 | |||
271 | if (flags & EXT4_EOFBLOCKS_FL) { | ||
272 | /* we don't support adding EOFBLOCKS flag */ | ||
273 | if (!(oldflags & EXT4_EOFBLOCKS_FL)) { | ||
274 | err = -EOPNOTSUPP; | ||
275 | goto flags_out; | ||
276 | } | ||
277 | } else if (oldflags & EXT4_EOFBLOCKS_FL) | ||
278 | ext4_truncate(inode); | ||
279 | |||
280 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | ||
281 | if (IS_ERR(handle)) { | ||
282 | err = PTR_ERR(handle); | ||
283 | goto flags_out; | ||
284 | } | ||
285 | if (IS_SYNC(inode)) | ||
286 | ext4_handle_sync(handle); | ||
287 | err = ext4_reserve_inode_write(handle, inode, &iloc); | ||
288 | if (err) | ||
289 | goto flags_err; | ||
290 | |||
291 | for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { | ||
292 | if (!(mask & EXT4_FL_USER_MODIFIABLE)) | ||
293 | continue; | ||
294 | if (mask & flags) | ||
295 | ext4_set_inode_flag(inode, i); | ||
296 | else | ||
297 | ext4_clear_inode_flag(inode, i); | ||
298 | } | ||
299 | |||
300 | ext4_set_inode_flags(inode); | ||
301 | inode->i_ctime = ext4_current_time(inode); | ||
302 | |||
303 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | ||
304 | flags_err: | ||
305 | ext4_journal_stop(handle); | ||
306 | if (err) | ||
307 | goto flags_out; | ||
308 | |||
309 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) | ||
310 | err = ext4_change_inode_journal_flag(inode, jflag); | ||
311 | if (err) | ||
312 | goto flags_out; | ||
313 | if (migrate) { | ||
314 | if (flags & EXT4_EXTENTS_FL) | ||
315 | err = ext4_ext_migrate(inode); | ||
316 | else | ||
317 | err = ext4_ind_migrate(inode); | ||
318 | } | ||
319 | |||
320 | flags_out: | ||
321 | mutex_unlock(&inode->i_mutex); | 469 | mutex_unlock(&inode->i_mutex); |
322 | mnt_drop_write_file(filp); | 470 | mnt_drop_write_file(filp); |
323 | return err; | 471 | return err; |
@@ -689,6 +837,60 @@ encryption_policy_out: | |||
689 | return -EOPNOTSUPP; | 837 | return -EOPNOTSUPP; |
690 | #endif | 838 | #endif |
691 | } | 839 | } |
840 | case EXT4_IOC_FSGETXATTR: | ||
841 | { | ||
842 | struct fsxattr fa; | ||
843 | |||
844 | memset(&fa, 0, sizeof(struct fsxattr)); | ||
845 | ext4_get_inode_flags(ei); | ||
846 | fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE); | ||
847 | |||
848 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
849 | EXT4_FEATURE_RO_COMPAT_PROJECT)) { | ||
850 | fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, | ||
851 | EXT4_I(inode)->i_projid); | ||
852 | } | ||
853 | |||
854 | if (copy_to_user((struct fsxattr __user *)arg, | ||
855 | &fa, sizeof(fa))) | ||
856 | return -EFAULT; | ||
857 | return 0; | ||
858 | } | ||
859 | case EXT4_IOC_FSSETXATTR: | ||
860 | { | ||
861 | struct fsxattr fa; | ||
862 | int err; | ||
863 | |||
864 | if (copy_from_user(&fa, (struct fsxattr __user *)arg, | ||
865 | sizeof(fa))) | ||
866 | return -EFAULT; | ||
867 | |||
868 | /* Make sure caller has proper permission */ | ||
869 | if (!inode_owner_or_capable(inode)) | ||
870 | return -EACCES; | ||
871 | |||
872 | err = mnt_want_write_file(filp); | ||
873 | if (err) | ||
874 | return err; | ||
875 | |||
876 | flags = ext4_xflags_to_iflags(fa.fsx_xflags); | ||
877 | flags = ext4_mask_flags(inode->i_mode, flags); | ||
878 | |||
879 | mutex_lock(&inode->i_mutex); | ||
880 | flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | | ||
881 | (flags & EXT4_FL_XFLAG_VISIBLE); | ||
882 | err = ext4_ioctl_setflags(inode, flags); | ||
883 | mutex_unlock(&inode->i_mutex); | ||
884 | mnt_drop_write_file(filp); | ||
885 | if (err) | ||
886 | return err; | ||
887 | |||
888 | err = ext4_ioctl_setproject(filp, fa.fsx_projid); | ||
889 | if (err) | ||
890 | return err; | ||
891 | |||
892 | return 0; | ||
893 | } | ||
692 | default: | 894 | default: |
693 | return -ENOTTY; | 895 | return -ENOTTY; |
694 | } | 896 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f27e0c2598c5..854f75de4599 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, | |||
273 | struct ext4_filename *fname, | 273 | struct ext4_filename *fname, |
274 | struct ext4_dir_entry_2 **res_dir); | 274 | struct ext4_dir_entry_2 **res_dir); |
275 | static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, | 275 | static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, |
276 | struct dentry *dentry, struct inode *inode); | 276 | struct inode *dir, struct inode *inode); |
277 | 277 | ||
278 | /* checksumming functions */ | 278 | /* checksumming functions */ |
279 | void initialize_dirent_tail(struct ext4_dir_entry_tail *t, | 279 | void initialize_dirent_tail(struct ext4_dir_entry_tail *t, |
@@ -1928,10 +1928,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, | |||
1928 | * directory, and adds the dentry to the indexed directory. | 1928 | * directory, and adds the dentry to the indexed directory. |
1929 | */ | 1929 | */ |
1930 | static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, | 1930 | static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, |
1931 | struct dentry *dentry, | 1931 | struct inode *dir, |
1932 | struct inode *inode, struct buffer_head *bh) | 1932 | struct inode *inode, struct buffer_head *bh) |
1933 | { | 1933 | { |
1934 | struct inode *dir = d_inode(dentry->d_parent); | ||
1935 | struct buffer_head *bh2; | 1934 | struct buffer_head *bh2; |
1936 | struct dx_root *root; | 1935 | struct dx_root *root; |
1937 | struct dx_frame frames[2], *frame; | 1936 | struct dx_frame frames[2], *frame; |
@@ -2086,8 +2085,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
2086 | return retval; | 2085 | return retval; |
2087 | 2086 | ||
2088 | if (ext4_has_inline_data(dir)) { | 2087 | if (ext4_has_inline_data(dir)) { |
2089 | retval = ext4_try_add_inline_entry(handle, &fname, | 2088 | retval = ext4_try_add_inline_entry(handle, &fname, dir, inode); |
2090 | dentry, inode); | ||
2091 | if (retval < 0) | 2089 | if (retval < 0) |
2092 | goto out; | 2090 | goto out; |
2093 | if (retval == 1) { | 2091 | if (retval == 1) { |
@@ -2097,7 +2095,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
2097 | } | 2095 | } |
2098 | 2096 | ||
2099 | if (is_dx(dir)) { | 2097 | if (is_dx(dir)) { |
2100 | retval = ext4_dx_add_entry(handle, &fname, dentry, inode); | 2098 | retval = ext4_dx_add_entry(handle, &fname, dir, inode); |
2101 | if (!retval || (retval != ERR_BAD_DX_DIR)) | 2099 | if (!retval || (retval != ERR_BAD_DX_DIR)) |
2102 | goto out; | 2100 | goto out; |
2103 | ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); | 2101 | ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); |
@@ -2119,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
2119 | 2117 | ||
2120 | if (blocks == 1 && !dx_fallback && | 2118 | if (blocks == 1 && !dx_fallback && |
2121 | ext4_has_feature_dir_index(sb)) { | 2119 | ext4_has_feature_dir_index(sb)) { |
2122 | retval = make_indexed_dir(handle, &fname, dentry, | 2120 | retval = make_indexed_dir(handle, &fname, dir, |
2123 | inode, bh); | 2121 | inode, bh); |
2124 | bh = NULL; /* make_indexed_dir releases bh */ | 2122 | bh = NULL; /* make_indexed_dir releases bh */ |
2125 | goto out; | 2123 | goto out; |
@@ -2154,12 +2152,11 @@ out: | |||
2154 | * Returns 0 for success, or a negative error value | 2152 | * Returns 0 for success, or a negative error value |
2155 | */ | 2153 | */ |
2156 | static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, | 2154 | static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, |
2157 | struct dentry *dentry, struct inode *inode) | 2155 | struct inode *dir, struct inode *inode) |
2158 | { | 2156 | { |
2159 | struct dx_frame frames[2], *frame; | 2157 | struct dx_frame frames[2], *frame; |
2160 | struct dx_entry *entries, *at; | 2158 | struct dx_entry *entries, *at; |
2161 | struct buffer_head *bh; | 2159 | struct buffer_head *bh; |
2162 | struct inode *dir = d_inode(dentry->d_parent); | ||
2163 | struct super_block *sb = dir->i_sb; | 2160 | struct super_block *sb = dir->i_sb; |
2164 | struct ext4_dir_entry_2 *de; | 2161 | struct ext4_dir_entry_2 *de; |
2165 | int err; | 2162 | int err; |
@@ -3212,6 +3209,12 @@ static int ext4_link(struct dentry *old_dentry, | |||
3212 | if (ext4_encrypted_inode(dir) && | 3209 | if (ext4_encrypted_inode(dir) && |
3213 | !ext4_is_child_context_consistent_with_parent(dir, inode)) | 3210 | !ext4_is_child_context_consistent_with_parent(dir, inode)) |
3214 | return -EPERM; | 3211 | return -EPERM; |
3212 | |||
3213 | if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && | ||
3214 | (!projid_eq(EXT4_I(dir)->i_projid, | ||
3215 | EXT4_I(old_dentry->d_inode)->i_projid))) | ||
3216 | return -EXDEV; | ||
3217 | |||
3215 | err = dquot_initialize(dir); | 3218 | err = dquot_initialize(dir); |
3216 | if (err) | 3219 | if (err) |
3217 | return err; | 3220 | return err; |
@@ -3492,6 +3495,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3492 | int credits; | 3495 | int credits; |
3493 | u8 old_file_type; | 3496 | u8 old_file_type; |
3494 | 3497 | ||
3498 | if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) && | ||
3499 | (!projid_eq(EXT4_I(new_dir)->i_projid, | ||
3500 | EXT4_I(old_dentry->d_inode)->i_projid))) | ||
3501 | return -EXDEV; | ||
3502 | |||
3495 | retval = dquot_initialize(old.dir); | 3503 | retval = dquot_initialize(old.dir); |
3496 | if (retval) | 3504 | if (retval) |
3497 | return retval; | 3505 | return retval; |
@@ -3701,6 +3709,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3701 | new.inode))) | 3709 | new.inode))) |
3702 | return -EPERM; | 3710 | return -EPERM; |
3703 | 3711 | ||
3712 | if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) && | ||
3713 | !projid_eq(EXT4_I(new_dir)->i_projid, | ||
3714 | EXT4_I(old_dentry->d_inode)->i_projid)) || | ||
3715 | (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) && | ||
3716 | !projid_eq(EXT4_I(old_dir)->i_projid, | ||
3717 | EXT4_I(new_dentry->d_inode)->i_projid))) | ||
3718 | return -EXDEV; | ||
3719 | |||
3704 | retval = dquot_initialize(old.dir); | 3720 | retval = dquot_initialize(old.dir); |
3705 | if (retval) | 3721 | if (retval) |
3706 | return retval; | 3722 | return retval; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f1b56ff01208..00c98fab6333 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void); | |||
80 | static void ext4_unregister_li_request(struct super_block *sb); | 80 | static void ext4_unregister_li_request(struct super_block *sb); |
81 | static void ext4_clear_request_list(void); | 81 | static void ext4_clear_request_list(void); |
82 | 82 | ||
83 | /* | ||
84 | * Lock ordering | ||
85 | * | ||
86 | * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and | ||
87 | * i_mmap_rwsem (inode->i_mmap_rwsem)! | ||
88 | * | ||
89 | * page fault path: | ||
90 | * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start -> | ||
91 | * page lock -> i_data_sem (rw) | ||
92 | * | ||
93 | * buffered write path: | ||
94 | * sb_start_write -> i_mutex -> mmap_sem | ||
95 | * sb_start_write -> i_mutex -> transaction start -> page lock -> | ||
96 | * i_data_sem (rw) | ||
97 | * | ||
98 | * truncate: | ||
99 | * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) -> | ||
100 | * i_mmap_rwsem (w) -> page lock | ||
101 | * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) -> | ||
102 | * transaction start -> i_data_sem (rw) | ||
103 | * | ||
104 | * direct IO: | ||
105 | * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem | ||
106 | * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> | ||
107 | * transaction start -> i_data_sem (rw) | ||
108 | * | ||
109 | * writepages: | ||
110 | * transaction start -> page lock(s) -> i_data_sem (rw) | ||
111 | */ | ||
112 | |||
83 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) | 113 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) |
84 | static struct file_system_type ext2_fs_type = { | 114 | static struct file_system_type ext2_fs_type = { |
85 | .owner = THIS_MODULE, | 115 | .owner = THIS_MODULE, |
@@ -958,6 +988,7 @@ static void init_once(void *foo) | |||
958 | INIT_LIST_HEAD(&ei->i_orphan); | 988 | INIT_LIST_HEAD(&ei->i_orphan); |
959 | init_rwsem(&ei->xattr_sem); | 989 | init_rwsem(&ei->xattr_sem); |
960 | init_rwsem(&ei->i_data_sem); | 990 | init_rwsem(&ei->i_data_sem); |
991 | init_rwsem(&ei->i_mmap_sem); | ||
961 | inode_init_once(&ei->vfs_inode); | 992 | inode_init_once(&ei->vfs_inode); |
962 | } | 993 | } |
963 | 994 | ||
@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, | |||
1066 | } | 1097 | } |
1067 | 1098 | ||
1068 | #ifdef CONFIG_QUOTA | 1099 | #ifdef CONFIG_QUOTA |
1069 | #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") | 1100 | static char *quotatypes[] = INITQFNAMES; |
1070 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 1101 | #define QTYPE2NAME(t) (quotatypes[t]) |
1071 | 1102 | ||
1072 | static int ext4_write_dquot(struct dquot *dquot); | 1103 | static int ext4_write_dquot(struct dquot *dquot); |
1073 | static int ext4_acquire_dquot(struct dquot *dquot); | 1104 | static int ext4_acquire_dquot(struct dquot *dquot); |
@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = { | |||
1100 | .write_info = ext4_write_info, | 1131 | .write_info = ext4_write_info, |
1101 | .alloc_dquot = dquot_alloc, | 1132 | .alloc_dquot = dquot_alloc, |
1102 | .destroy_dquot = dquot_destroy, | 1133 | .destroy_dquot = dquot_destroy, |
1134 | .get_projid = ext4_get_projid, | ||
1103 | }; | 1135 | }; |
1104 | 1136 | ||
1105 | static const struct quotactl_ops ext4_qctl_operations = { | 1137 | static const struct quotactl_ops ext4_qctl_operations = { |
@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) | |||
2526 | "without CONFIG_QUOTA"); | 2558 | "without CONFIG_QUOTA"); |
2527 | return 0; | 2559 | return 0; |
2528 | } | 2560 | } |
2561 | if (ext4_has_feature_project(sb) && !readonly) { | ||
2562 | ext4_msg(sb, KERN_ERR, | ||
2563 | "Filesystem with project quota feature cannot be mounted RDWR " | ||
2564 | "without CONFIG_QUOTA"); | ||
2565 | return 0; | ||
2566 | } | ||
2529 | #endif /* CONFIG_QUOTA */ | 2567 | #endif /* CONFIG_QUOTA */ |
2530 | return 1; | 2568 | return 1; |
2531 | } | 2569 | } |
@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3654 | sb->s_qcop = &dquot_quotactl_sysfile_ops; | 3692 | sb->s_qcop = &dquot_quotactl_sysfile_ops; |
3655 | else | 3693 | else |
3656 | sb->s_qcop = &ext4_qctl_operations; | 3694 | sb->s_qcop = &ext4_qctl_operations; |
3657 | sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; | 3695 | sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; |
3658 | #endif | 3696 | #endif |
3659 | memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); | 3697 | memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); |
3660 | 3698 | ||
@@ -4790,6 +4828,48 @@ restore_opts: | |||
4790 | return err; | 4828 | return err; |
4791 | } | 4829 | } |
4792 | 4830 | ||
4831 | #ifdef CONFIG_QUOTA | ||
4832 | static int ext4_statfs_project(struct super_block *sb, | ||
4833 | kprojid_t projid, struct kstatfs *buf) | ||
4834 | { | ||
4835 | struct kqid qid; | ||
4836 | struct dquot *dquot; | ||
4837 | u64 limit; | ||
4838 | u64 curblock; | ||
4839 | |||
4840 | qid = make_kqid_projid(projid); | ||
4841 | dquot = dqget(sb, qid); | ||
4842 | if (IS_ERR(dquot)) | ||
4843 | return PTR_ERR(dquot); | ||
4844 | spin_lock(&dq_data_lock); | ||
4845 | |||
4846 | limit = (dquot->dq_dqb.dqb_bsoftlimit ? | ||
4847 | dquot->dq_dqb.dqb_bsoftlimit : | ||
4848 | dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; | ||
4849 | if (limit && buf->f_blocks > limit) { | ||
4850 | curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; | ||
4851 | buf->f_blocks = limit; | ||
4852 | buf->f_bfree = buf->f_bavail = | ||
4853 | (buf->f_blocks > curblock) ? | ||
4854 | (buf->f_blocks - curblock) : 0; | ||
4855 | } | ||
4856 | |||
4857 | limit = dquot->dq_dqb.dqb_isoftlimit ? | ||
4858 | dquot->dq_dqb.dqb_isoftlimit : | ||
4859 | dquot->dq_dqb.dqb_ihardlimit; | ||
4860 | if (limit && buf->f_files > limit) { | ||
4861 | buf->f_files = limit; | ||
4862 | buf->f_ffree = | ||
4863 | (buf->f_files > dquot->dq_dqb.dqb_curinodes) ? | ||
4864 | (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0; | ||
4865 | } | ||
4866 | |||
4867 | spin_unlock(&dq_data_lock); | ||
4868 | dqput(dquot); | ||
4869 | return 0; | ||
4870 | } | ||
4871 | #endif | ||
4872 | |||
4793 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | 4873 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) |
4794 | { | 4874 | { |
4795 | struct super_block *sb = dentry->d_sb; | 4875 | struct super_block *sb = dentry->d_sb; |
@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
4822 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; | 4902 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; |
4823 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; | 4903 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; |
4824 | 4904 | ||
4905 | #ifdef CONFIG_QUOTA | ||
4906 | if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) && | ||
4907 | sb_has_quota_limits_enabled(sb, PRJQUOTA)) | ||
4908 | ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf); | ||
4909 | #endif | ||
4825 | return 0; | 4910 | return 0; |
4826 | } | 4911 | } |
4827 | 4912 | ||
@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, | |||
4986 | struct inode *qf_inode; | 5071 | struct inode *qf_inode; |
4987 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { | 5072 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { |
4988 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), | 5073 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), |
4989 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) | 5074 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum), |
5075 | le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum) | ||
4990 | }; | 5076 | }; |
4991 | 5077 | ||
4992 | BUG_ON(!ext4_has_feature_quota(sb)); | 5078 | BUG_ON(!ext4_has_feature_quota(sb)); |
@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb) | |||
5014 | int type, err = 0; | 5100 | int type, err = 0; |
5015 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { | 5101 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { |
5016 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), | 5102 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), |
5017 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) | 5103 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum), |
5104 | le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum) | ||
5018 | }; | 5105 | }; |
5019 | 5106 | ||
5020 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; | 5107 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; |
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h index 011ba6670d99..c70d06a383e2 100644 --- a/fs/ext4/truncate.h +++ b/fs/ext4/truncate.h | |||
@@ -10,8 +10,10 @@ | |||
10 | */ | 10 | */ |
11 | static inline void ext4_truncate_failed_write(struct inode *inode) | 11 | static inline void ext4_truncate_failed_write(struct inode *inode) |
12 | { | 12 | { |
13 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
13 | truncate_inode_pages(inode->i_mapping, inode->i_size); | 14 | truncate_inode_pages(inode->i_mapping, inode->i_size); |
14 | ext4_truncate(inode); | 15 | ext4_truncate(inode); |
16 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
15 | } | 17 | } |
16 | 18 | ||
17 | /* | 19 | /* |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 594b4b29a224..4e4b2fa78609 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
@@ -43,7 +43,7 @@ struct extent_status; | |||
43 | { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ | 43 | { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ |
44 | { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ | 44 | { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ |
45 | { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ | 45 | { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ |
46 | { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }) | 46 | { EXT4_GET_BLOCKS_ZERO, "ZERO" }) |
47 | 47 | ||
48 | #define show_mflags(flags) __print_flags(flags, "", \ | 48 | #define show_mflags(flags) __print_flags(flags, "", \ |
49 | { EXT4_MAP_NEW, "N" }, \ | 49 | { EXT4_MAP_NEW, "N" }, \ |
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 8c8451f76633..41e0433b4a83 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h | |||
@@ -2,8 +2,11 @@ | |||
2 | #define _UAPI_LINUX_FS_H | 2 | #define _UAPI_LINUX_FS_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * This file has definitions for some important file table | 5 | * This file has definitions for some important file table structures |
6 | * structures etc. | 6 | * and constants and structures used by various generic file system |
7 | * ioctl's. Please do not make any changes in this file before | ||
8 | * sending patches for review to linux-fsdevel@vger.kernel.org and | ||
9 | * linux-api@vger.kernel.org. | ||
7 | */ | 10 | */ |
8 | 11 | ||
9 | #include <linux/limits.h> | 12 | #include <linux/limits.h> |
@@ -246,6 +249,23 @@ struct fsxattr { | |||
246 | 249 | ||
247 | /* | 250 | /* |
248 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) | 251 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) |
252 | * | ||
253 | * Note: for historical reasons, these flags were originally used and | ||
254 | * defined for use by ext2/ext3, and then other file systems started | ||
255 | * using these flags so they wouldn't need to write their own version | ||
256 | * of chattr/lsattr (which was shipped as part of e2fsprogs). You | ||
257 | * should think twice before trying to use these flags in new | ||
258 | * contexts, or trying to assign these flags, since they are used both | ||
259 | * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are | ||
260 | * almost out of 32-bit flags. :-) | ||
261 | * | ||
262 | * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from | ||
263 | * XFS to the generic FS level interface. This uses a structure that | ||
264 | * has padding and hence has more room to grow, so it may be more | ||
265 | * appropriate for many new use cases. | ||
266 | * | ||
267 | * Please do not change these flags or interfaces before checking with | ||
268 | * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org. | ||
249 | */ | 269 | */ |
250 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ | 270 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ |
251 | #define FS_UNRM_FL 0x00000002 /* Undelete */ | 271 | #define FS_UNRM_FL 0x00000002 /* Undelete */ |
@@ -259,8 +279,8 @@ struct fsxattr { | |||
259 | #define FS_DIRTY_FL 0x00000100 | 279 | #define FS_DIRTY_FL 0x00000100 |
260 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ | 280 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ |
261 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ | 281 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ |
262 | #define FS_ECOMPR_FL 0x00000800 /* Compression error */ | ||
263 | /* End compression flags --- maybe not all used */ | 282 | /* End compression flags --- maybe not all used */ |
283 | #define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */ | ||
264 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ | 284 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ |
265 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ | 285 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ |
266 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ | 286 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ |
@@ -268,9 +288,12 @@ struct fsxattr { | |||
268 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ | 288 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ |
269 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ | 289 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ |
270 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 290 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
291 | #define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ | ||
271 | #define FS_EXTENT_FL 0x00080000 /* Extents */ | 292 | #define FS_EXTENT_FL 0x00080000 /* Extents */ |
272 | #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ | 293 | #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ |
294 | #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ | ||
273 | #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ | 295 | #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ |
296 | #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ | ||
274 | #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ | 297 | #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ |
275 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ | 298 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ |
276 | 299 | ||