From fbbbad4bc2101e452b24e6e65d3d5e11314a0b5f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Feb 2015 15:58:53 -0800 Subject: vfs,ext2: introduce IS_DAX(inode) Use an inode flag to tag inodes which should avoid using the page cache. Convert ext2 to use it instead of mapping_is_xip(). Prevent I/Os to files tagged with the DAX flag from falling back to buffered I/O. Signed-off-by: Matthew Wilcox Reviewed-by: Jan Kara Reviewed-by: Mathieu Desnoyers Cc: Andreas Dilger Cc: Boaz Harrosh Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jens Axboe Cc: Kirill A. Shutemov Cc: Randy Dunlap Cc: Ross Zwisler Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index e49f10cc8a73..fb373bb5cf03 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1677,6 +1677,11 @@ struct super_operations { #define S_IMA 1024 /* Inode has an associated IMA struct */ #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ #define S_NOSEC 4096 /* no suid or xattr security attributes */ +#ifdef CONFIG_FS_XIP +#define S_DAX 8192 /* Direct Access, avoiding the page cache */ +#else +#define S_DAX 0 /* Make all the DAX code disappear */ +#endif /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -1714,6 +1719,7 @@ struct super_operations { #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) +#define IS_DAX(inode) ((inode)->i_flags & S_DAX) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) -- cgit v1.2.2 From d475c6346a38aef3058eba96867bfa726a3cc940 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Feb 2015 15:58:56 -0800 Subject: dax,ext2: replace XIP read and write with DAX I/O Use the generic AIO infrastructure instead of custom read and write methods. In addition to giving us support for AIO, this adds the missing locking between read() and truncate(). Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Andreas Dilger Cc: Boaz Harrosh Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jens Axboe Cc: Kirill A. Shutemov Cc: Mathieu Desnoyers Cc: Randy Dunlap Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index fb373bb5cf03..241c3c030fb5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2587,12 +2587,11 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); +ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, + loff_t, get_block_t, dio_iodone_t, int flags); + #ifdef CONFIG_FS_XIP -extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, - loff_t *ppos); extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); -extern ssize_t xip_file_write(struct file *filp, const char __user *buf, - size_t len, loff_t *ppos); extern int xip_truncate_page(struct address_space *mapping, loff_t from); #else static inline int xip_truncate_page(struct address_space *mapping, loff_t from) @@ -2756,6 +2755,11 @@ extern int generic_show_options(struct seq_file *m, struct dentry *root); extern void save_mount_options(struct super_block *sb, char *options); extern void replace_mount_options(struct super_block *sb, char *options); +static inline bool io_is_direct(struct file *filp) +{ + return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp)); +} + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; -- cgit v1.2.2 From 289c6aedac981533331428bc933fff21ae332c9e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Feb 2015 15:58:59 -0800 Subject: dax,ext2: replace ext2_clear_xip_target with dax_clear_blocks This is practically generic code; other filesystems will want to call it from other places, but there's nothing ext2-specific about it. Make it a little more generic by allowing it to take a count of the number of bytes to zero rather than fixing it to a single page. Thanks to Dave Hansen for suggesting that I need to call cond_resched() if zeroing more than one page. Signed-off-by: Matthew Wilcox Cc: Andreas Dilger Cc: Boaz Harrosh Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jan Kara Cc: Jens Axboe Cc: Kirill A. Shutemov Cc: Mathieu Desnoyers Cc: Randy Dunlap Cc: Ross Zwisler Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 241c3c030fb5..8084934a5676 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2589,6 +2589,7 @@ extern int nonseekable_open(struct inode * inode, struct file * filp); ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); +int dax_clear_blocks(struct inode *, sector_t block, long size); #ifdef CONFIG_FS_XIP extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); -- cgit v1.2.2 From f7ca90b160307d63aaedab8bd451c24a182db20f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Feb 2015 15:59:02 -0800 Subject: dax,ext2: replace the XIP page fault handler with the DAX page fault handler Instead of calling aops->get_xip_mem from the fault handler, the filesystem passes a get_block_t that is used to find the appropriate blocks. This requires that all architectures implement copy_user_page(). At the time of writing, mips and arm do not. Patches exist and are in progress. [akpm@linux-foundation.org: remap_file_pages went away] Signed-off-by: Matthew Wilcox Reviewed-by: Jan Kara Cc: Andreas Dilger Cc: Boaz Harrosh Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jens Axboe Cc: Kirill A. Shutemov Cc: Mathieu Desnoyers Cc: Randy Dunlap Cc: Ross Zwisler Cc: Theodore Ts'o Cc: Russell King Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8084934a5676..6bad6d4c579b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -51,6 +51,7 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; +struct vm_fault; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2590,9 +2591,10 @@ extern int nonseekable_open(struct inode * inode, struct file * filp); ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); int dax_clear_blocks(struct inode *, sector_t block, long size); +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) #ifdef CONFIG_FS_XIP -extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); extern int xip_truncate_page(struct address_space *mapping, loff_t from); #else static inline int xip_truncate_page(struct address_space *mapping, loff_t from) -- cgit v1.2.2 From 4c0ccfef2e9f7418a6eb0bf07a2fc8f216365b18 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Feb 2015 15:59:06 -0800 Subject: dax,ext2: replace xip_truncate_page with dax_truncate_page It takes a get_block parameter just like nobh_truncate_page() and block_truncate_page() Signed-off-by: Matthew Wilcox Reviewed-by: Mathieu Desnoyers Cc: Andreas Dilger Cc: Boaz Harrosh Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jan Kara Cc: Jens Axboe Cc: Kirill A. Shutemov Cc: Randy Dunlap Cc: Ross Zwisler Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6bad6d4c579b..2c8f9055af38 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2591,18 +2591,10 @@ extern int nonseekable_open(struct inode * inode, struct file * filp); ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); int dax_clear_blocks(struct inode *, sector_t block, long size); +int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); #define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) -#ifdef CONFIG_FS_XIP -extern int xip_truncate_page(struct address_space *mapping, loff_t from); -#else -static inline int xip_truncate_page(struct address_space *mapping, loff_t from) -{ - return 0; -} -#endif - #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, loff_t file_offset); -- cgit v1.2.2 From e748dcd095ddee50e7a7deda2e26247715318a2e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Feb 2015 15:59:12 -0800 Subject: vfs: remove get_xip_mem All callers of get_xip_mem() are now gone. Remove checks for it, initialisers of it, documentation of it and the only implementation of it. Also remove mm/filemap_xip.c as it is now empty. Also remove documentation of the long-gone get_xip_page(). Signed-off-by: Matthew Wilcox Cc: Andreas Dilger Cc: Boaz Harrosh Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jan Kara Cc: Jens Axboe Cc: Kirill A. Shutemov Cc: Mathieu Desnoyers Cc: Randy Dunlap Cc: Ross Zwisler Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c8f9055af38..9772d655f444 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -362,8 +362,6 @@ struct address_space_operations { int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); - int (*get_xip_mem)(struct address_space *, pgoff_t, int, - void **, unsigned long *); /* * migrate the contents of a page to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. -- cgit v1.2.2 From 6cd176a51e52e5218b1aa97e1ec916bac25a9b7e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Feb 2015 15:59:25 -0800 Subject: vfs,ext2: remove CONFIG_EXT2_FS_XIP and rename CONFIG_FS_XIP to CONFIG_FS_DAX The fewer Kconfig options we have the better. Use the generic CONFIG_FS_DAX to enable XIP support in ext2 as well as in the core. Signed-off-by: Matthew Wilcox Cc: Andreas Dilger Cc: Boaz Harrosh Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jan Kara Cc: Jens Axboe Cc: Kirill A. Shutemov Cc: Mathieu Desnoyers Cc: Randy Dunlap Cc: Ross Zwisler Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9772d655f444..d46f8fe6a0ea 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1676,7 +1676,7 @@ struct super_operations { #define S_IMA 1024 /* Inode has an associated IMA struct */ #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ #define S_NOSEC 4096 /* no suid or xattr security attributes */ -#ifdef CONFIG_FS_XIP +#ifdef CONFIG_FS_DAX #define S_DAX 8192 /* Direct Access, avoiding the page cache */ #else #define S_DAX 0 /* Make all the DAX code disappear */ -- cgit v1.2.2 From 25726bc15731d42112b579cf73f30edbc43d3973 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Feb 2015 15:59:35 -0800 Subject: dax: add dax_zero_page_range This new function allows us to support hole-punch for DAX files by zeroing a partial page, as opposed to the dax_truncate_page() function which can only truncate to the end of the page. Reimplement dax_truncate_page() to call dax_zero_page_range(). [ross.zwisler@linux.intel.com: ported to 3.13-rc2] [akpm@linux-foundation.org: fix typos in comments] Signed-off-by: Matthew Wilcox Signed-off-by: Ross Zwisler Cc: Andreas Dilger Cc: Boaz Harrosh Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jan Kara Cc: Jens Axboe Cc: Kirill A. Shutemov Cc: Mathieu Desnoyers Cc: Randy Dunlap Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index d46f8fe6a0ea..ed5a0900b94d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2589,6 +2589,7 @@ extern int nonseekable_open(struct inode * inode, struct file * filp); ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); int dax_clear_blocks(struct inode *, sector_t block, long size); +int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); #define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) -- cgit v1.2.2