diff options
author | Ross Zwisler <ross.zwisler@linux.intel.com> | 2015-02-16 18:59:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-16 20:56:04 -0500 |
commit | 923ae0ff9250430133b3310fe62c47538cf1cbc1 (patch) | |
tree | 3c4194cbedbe77e719bf7c8762fba5362ec26032 /fs/ext4/inode.c | |
parent | 25726bc15731d42112b579cf73f30edbc43d3973 (diff) |
ext4: add DAX functionality
This is a port of the DAX functionality found in the current version of
ext2.
[matthew.r.wilcox@intel.com: heavily tweaked]
[akpm@linux-foundation.org: remap_pages went away]
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 89 |
1 files changed, 62 insertions, 27 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5653fa42930b..28555f191b62 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -657,6 +657,18 @@ has_zeroout: | |||
657 | return retval; | 657 | return retval; |
658 | } | 658 | } |
659 | 659 | ||
660 | static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) | ||
661 | { | ||
662 | struct inode *inode = bh->b_assoc_map->host; | ||
663 | /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ | ||
664 | loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; | ||
665 | int err; | ||
666 | if (!uptodate) | ||
667 | return; | ||
668 | WARN_ON(!buffer_unwritten(bh)); | ||
669 | err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); | ||
670 | } | ||
671 | |||
660 | /* Maximum number of blocks we map for direct IO at once. */ | 672 | /* Maximum number of blocks we map for direct IO at once. */ |
661 | #define DIO_MAX_BLOCKS 4096 | 673 | #define DIO_MAX_BLOCKS 4096 |
662 | 674 | ||
@@ -694,6 +706,11 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
694 | 706 | ||
695 | map_bh(bh, inode->i_sb, map.m_pblk); | 707 | map_bh(bh, inode->i_sb, map.m_pblk); |
696 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 708 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
709 | if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { | ||
710 | bh->b_assoc_map = inode->i_mapping; | ||
711 | bh->b_private = (void *)(unsigned long)iblock; | ||
712 | bh->b_end_io = ext4_end_io_unwritten; | ||
713 | } | ||
697 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | 714 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) |
698 | set_buffer_defer_completion(bh); | 715 | set_buffer_defer_completion(bh); |
699 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 716 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
@@ -3010,13 +3027,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3010 | get_block_func = ext4_get_block_write; | 3027 | get_block_func = ext4_get_block_write; |
3011 | dio_flags = DIO_LOCKING; | 3028 | dio_flags = DIO_LOCKING; |
3012 | } | 3029 | } |
3013 | ret = __blockdev_direct_IO(rw, iocb, inode, | 3030 | if (IS_DAX(inode)) |
3014 | inode->i_sb->s_bdev, iter, | 3031 | ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, |
3015 | offset, | 3032 | ext4_end_io_dio, dio_flags); |
3016 | get_block_func, | 3033 | else |
3017 | ext4_end_io_dio, | 3034 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3018 | NULL, | 3035 | inode->i_sb->s_bdev, iter, offset, |
3019 | dio_flags); | 3036 | get_block_func, |
3037 | ext4_end_io_dio, NULL, dio_flags); | ||
3020 | 3038 | ||
3021 | /* | 3039 | /* |
3022 | * Put our reference to io_end. This can free the io_end structure e.g. | 3040 | * Put our reference to io_end. This can free the io_end structure e.g. |
@@ -3180,19 +3198,12 @@ void ext4_set_aops(struct inode *inode) | |||
3180 | inode->i_mapping->a_ops = &ext4_aops; | 3198 | inode->i_mapping->a_ops = &ext4_aops; |
3181 | } | 3199 | } |
3182 | 3200 | ||
3183 | /* | 3201 | static int __ext4_block_zero_page_range(handle_t *handle, |
3184 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3185 | * starting from file offset 'from'. The range to be zero'd must | ||
3186 | * be contained with in one block. If the specified range exceeds | ||
3187 | * the end of the block it will be shortened to end of the block | ||
3188 | * that cooresponds to 'from' | ||
3189 | */ | ||
3190 | static int ext4_block_zero_page_range(handle_t *handle, | ||
3191 | struct address_space *mapping, loff_t from, loff_t length) | 3202 | struct address_space *mapping, loff_t from, loff_t length) |
3192 | { | 3203 | { |
3193 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3204 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3194 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3205 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3195 | unsigned blocksize, max, pos; | 3206 | unsigned blocksize, pos; |
3196 | ext4_lblk_t iblock; | 3207 | ext4_lblk_t iblock; |
3197 | struct inode *inode = mapping->host; | 3208 | struct inode *inode = mapping->host; |
3198 | struct buffer_head *bh; | 3209 | struct buffer_head *bh; |
@@ -3205,14 +3216,6 @@ static int ext4_block_zero_page_range(handle_t *handle, | |||
3205 | return -ENOMEM; | 3216 | return -ENOMEM; |
3206 | 3217 | ||
3207 | blocksize = inode->i_sb->s_blocksize; | 3218 | blocksize = inode->i_sb->s_blocksize; |
3208 | max = blocksize - (offset & (blocksize - 1)); | ||
3209 | |||
3210 | /* | ||
3211 | * correct length if it does not fall between | ||
3212 | * 'from' and the end of the block | ||
3213 | */ | ||
3214 | if (length > max || length < 0) | ||
3215 | length = max; | ||
3216 | 3219 | ||
3217 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3220 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
3218 | 3221 | ||
@@ -3278,6 +3281,33 @@ unlock: | |||
3278 | } | 3281 | } |
3279 | 3282 | ||
3280 | /* | 3283 | /* |
3284 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3285 | * starting from file offset 'from'. The range to be zero'd must | ||
3286 | * be contained with in one block. If the specified range exceeds | ||
3287 | * the end of the block it will be shortened to end of the block | ||
3288 | * that cooresponds to 'from' | ||
3289 | */ | ||
3290 | static int ext4_block_zero_page_range(handle_t *handle, | ||
3291 | struct address_space *mapping, loff_t from, loff_t length) | ||
3292 | { | ||
3293 | struct inode *inode = mapping->host; | ||
3294 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3295 | unsigned blocksize = inode->i_sb->s_blocksize; | ||
3296 | unsigned max = blocksize - (offset & (blocksize - 1)); | ||
3297 | |||
3298 | /* | ||
3299 | * correct length if it does not fall between | ||
3300 | * 'from' and the end of the block | ||
3301 | */ | ||
3302 | if (length > max || length < 0) | ||
3303 | length = max; | ||
3304 | |||
3305 | if (IS_DAX(inode)) | ||
3306 | return dax_zero_page_range(inode, from, length, ext4_get_block); | ||
3307 | return __ext4_block_zero_page_range(handle, mapping, from, length); | ||
3308 | } | ||
3309 | |||
3310 | /* | ||
3281 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | 3311 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
3282 | * up to the end of the block which corresponds to `from'. | 3312 | * up to the end of the block which corresponds to `from'. |
3283 | * This required during truncate. We need to physically zero the tail end | 3313 | * This required during truncate. We need to physically zero the tail end |
@@ -3798,8 +3828,10 @@ void ext4_set_inode_flags(struct inode *inode) | |||
3798 | new_fl |= S_NOATIME; | 3828 | new_fl |= S_NOATIME; |
3799 | if (flags & EXT4_DIRSYNC_FL) | 3829 | if (flags & EXT4_DIRSYNC_FL) |
3800 | new_fl |= S_DIRSYNC; | 3830 | new_fl |= S_DIRSYNC; |
3831 | if (test_opt(inode->i_sb, DAX)) | ||
3832 | new_fl |= S_DAX; | ||
3801 | inode_set_flags(inode, new_fl, | 3833 | inode_set_flags(inode, new_fl, |
3802 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | 3834 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); |
3803 | } | 3835 | } |
3804 | 3836 | ||
3805 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ | 3837 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ |
@@ -4052,7 +4084,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4052 | 4084 | ||
4053 | if (S_ISREG(inode->i_mode)) { | 4085 | if (S_ISREG(inode->i_mode)) { |
4054 | inode->i_op = &ext4_file_inode_operations; | 4086 | inode->i_op = &ext4_file_inode_operations; |
4055 | inode->i_fop = &ext4_file_operations; | 4087 | if (test_opt(inode->i_sb, DAX)) |
4088 | inode->i_fop = &ext4_dax_file_operations; | ||
4089 | else | ||
4090 | inode->i_fop = &ext4_file_operations; | ||
4056 | ext4_set_aops(inode); | 4091 | ext4_set_aops(inode); |
4057 | } else if (S_ISDIR(inode->i_mode)) { | 4092 | } else if (S_ISDIR(inode->i_mode)) { |
4058 | inode->i_op = &ext4_dir_inode_operations; | 4093 | inode->i_op = &ext4_dir_inode_operations; |
@@ -4534,7 +4569,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4534 | * Truncate pagecache after we've waited for commit | 4569 | * Truncate pagecache after we've waited for commit |
4535 | * in data=journal mode to make pages freeable. | 4570 | * in data=journal mode to make pages freeable. |
4536 | */ | 4571 | */ |
4537 | truncate_pagecache(inode, inode->i_size); | 4572 | truncate_pagecache(inode, inode->i_size); |
4538 | } | 4573 | } |
4539 | /* | 4574 | /* |
4540 | * We want to call ext4_truncate() even if attr->ia_size == | 4575 | * We want to call ext4_truncate() even if attr->ia_size == |